Merge pull request 'PRD 0006: pipelock native TLS interception' (#9) from pipelock-tls-interception into main
This commit was merged in pull request #9.
This commit is contained in:
+4
-2
@@ -19,9 +19,11 @@ FROM node:22-slim
|
||||
# clarity in case the base ever drops it. socat is the privileged
|
||||
# forwarder for the in-container ssh-agent (see claude_bottle/ssh.py): the agent
|
||||
# runs as root and rejects non-root connections, so socat sits between
|
||||
# node and the agent socket.
|
||||
# node and the agent socket. curl is here so any HTTPS_PROXY-aware
|
||||
# tool (curl itself, plus anything that shells out to it) works
|
||||
# against pipelock's bumped TLS without the agent needing local DNS.
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends git ca-certificates openssh-client socat \
|
||||
&& apt-get install -y --no-install-recommends git ca-certificates openssh-client socat curl \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install claude-code globally. Pinned to the version verified in the v1
|
||||
|
||||
@@ -204,24 +204,36 @@ class BottleBackend(ABC, Generic[PlanT, CleanupT]):
|
||||
"""Build/run the bottle and yield a handle; tear down on exit."""
|
||||
|
||||
def provision(self, plan: PlanT, target: str) -> str | None:
|
||||
"""Copy host-side files (prompt, skills, SSH keys, .git) into
|
||||
the running bottle. Called from `launch` after the container/
|
||||
machine is up. `target` identifies the running instance in
|
||||
backend-specific terms (Docker: resolved container name; fly:
|
||||
machine id). Returns the in-container prompt path if a prompt
|
||||
was provisioned, else None — the Bottle handle uses it to
|
||||
decide whether to add --append-system-prompt-file to claude's
|
||||
argv.
|
||||
"""Copy host-side files (CA cert, prompt, skills, SSH keys,
|
||||
.git) into the running bottle. Called from `launch` after the
|
||||
container/machine is up. `target` identifies the running
|
||||
instance in backend-specific terms (Docker: resolved
|
||||
container name; fly: machine id). Returns the in-container
|
||||
prompt path if a prompt was provisioned, else None — the
|
||||
Bottle handle uses it to decide whether to add
|
||||
--append-system-prompt-file to claude's argv.
|
||||
|
||||
Default orchestration: prompt → skills → ssh → git. Subclasses
|
||||
typically don't override this; they implement the four
|
||||
sub-methods below."""
|
||||
Default orchestration: ca → prompt → skills → ssh → git.
|
||||
CA install runs first so the agent's trust store is rebuilt
|
||||
before anything inside the agent makes a TLS call. Subclasses
|
||||
typically don't override this; they implement the sub-methods
|
||||
below."""
|
||||
self.provision_ca(plan, target)
|
||||
prompt_path = self.provision_prompt(plan, target)
|
||||
self.provision_skills(plan, target)
|
||||
self.provision_ssh(plan, target)
|
||||
self.provision_git(plan, target)
|
||||
return prompt_path
|
||||
|
||||
def provision_ca(self, plan: PlanT, target: str) -> None:
|
||||
"""Install pipelock's per-bottle CA into the agent's trust
|
||||
store so the agent trusts the bumped CONNECT cert pipelock
|
||||
presents. Default impl is a no-op so backends that don't
|
||||
yet support TLS interception (every backend except Docker
|
||||
today) aren't forced to implement it. The Docker backend
|
||||
overrides to docker-cp the cert in and run
|
||||
`update-ca-certificates`."""
|
||||
|
||||
@abstractmethod
|
||||
def provision_prompt(self, plan: PlanT, target: str) -> str | None:
|
||||
"""Copy the prompt file into the running bottle. Returns the
|
||||
|
||||
@@ -24,6 +24,7 @@ from .bottle import DockerBottle
|
||||
from .bottle_cleanup_plan import DockerBottleCleanupPlan
|
||||
from .bottle_plan import DockerBottlePlan
|
||||
from .pipelock import DockerPipelockProxy
|
||||
from .provision import ca as _ca
|
||||
from .provision import git as _git
|
||||
from .provision import prompt as _prompt
|
||||
from .provision import skills as _skills
|
||||
@@ -47,6 +48,9 @@ class DockerBottleBackend(BottleBackend["DockerBottlePlan", "DockerBottleCleanup
|
||||
with _launch.launch(plan, proxy=self._proxy, provision=self.provision) as bottle:
|
||||
yield bottle
|
||||
|
||||
def provision_ca(self, plan: DockerBottlePlan, target: str) -> None:
|
||||
_ca.provision_ca(plan, target)
|
||||
|
||||
def provision_prompt(self, plan: DockerBottlePlan, target: str) -> str | None:
|
||||
return _prompt.provision_prompt(plan, target)
|
||||
|
||||
|
||||
@@ -93,6 +93,7 @@ class DockerBottlePlan(BottlePlan):
|
||||
else:
|
||||
info(" ssh hosts : (none)")
|
||||
info(f" egress : {self.allowlist_summary}")
|
||||
info(" tls intercept : pipelock (per-bottle ephemeral CA, generated at launch)")
|
||||
info(
|
||||
f"prompt : {len(v.agent.prompt)} chars; "
|
||||
f"first line: {v.prompt_first_line or '(empty)'}"
|
||||
@@ -117,6 +118,15 @@ class DockerBottlePlan(BottlePlan):
|
||||
"egress": {
|
||||
"host_count": len(hosts),
|
||||
"hosts": hosts,
|
||||
# PRD 0006: pipelock's `tls_interception` block is on
|
||||
# for every launched bottle. ca_fingerprint is always
|
||||
# null at dry-run because the CA doesn't exist yet —
|
||||
# real launches print the fingerprint to stderr from
|
||||
# provision_ca. Reserved field for forward-compat.
|
||||
"tls_interception": {
|
||||
"enabled": True,
|
||||
"ca_fingerprint": None,
|
||||
},
|
||||
},
|
||||
"prompt": {
|
||||
"length": len(v.agent.prompt),
|
||||
|
||||
@@ -22,7 +22,8 @@ from . import network as network_mod
|
||||
from . import util as docker_mod
|
||||
from .bottle import DockerBottle
|
||||
from .bottle_plan import DockerBottlePlan
|
||||
from .pipelock import DockerPipelockProxy, pipelock_proxy_url
|
||||
from .pipelock import DockerPipelockProxy, pipelock_proxy_url, pipelock_tls_init
|
||||
from .provision.ca import AGENT_CA_BUNDLE, AGENT_CA_PATH
|
||||
|
||||
|
||||
# Where the repo root lives, for `docker build` context. Computed once.
|
||||
@@ -63,12 +64,25 @@ def launch(
|
||||
egress_network = network_mod.network_create_egress(plan.slug)
|
||||
stack.callback(network_mod.network_remove, egress_network)
|
||||
|
||||
# Per-bottle ephemeral CA for pipelock's TLS interception
|
||||
# (PRD 0006). One-shot pipelock container writes ca.pem +
|
||||
# ca-key.pem under plan.stage_dir; .start docker-cp's them
|
||||
# into the sidecar. The private key never leaves the host
|
||||
# stage dir, which start.py's outer finally `shutil.rmtree`s
|
||||
# after the sidecar is torn down.
|
||||
ca_cert_host, ca_key_host = pipelock_tls_init(plan.stage_dir)
|
||||
proxy_plan = dataclasses.replace(
|
||||
plan.proxy_plan,
|
||||
internal_network=internal_network,
|
||||
egress_network=egress_network,
|
||||
ca_cert_host_path=ca_cert_host,
|
||||
ca_key_host_path=ca_key_host,
|
||||
)
|
||||
pipelock_name = proxy.start(proxy_plan)
|
||||
# Re-bind the outer plan so provision_ca (which runs later
|
||||
# from `provision(plan, container)`) can read the populated
|
||||
# CA paths off plan.proxy_plan.
|
||||
plan = dataclasses.replace(plan, proxy_plan=proxy_plan)
|
||||
pipelock_name = proxy.start(plan.proxy_plan)
|
||||
stack.callback(proxy.stop, pipelock_name)
|
||||
|
||||
container = _run_agent_container(plan, internal_network)
|
||||
@@ -93,6 +107,16 @@ def _run_agent_container(plan: DockerBottlePlan, internal_network: str) -> str:
|
||||
"-e", f"HTTPS_PROXY={proxy_url}",
|
||||
"-e", f"HTTP_PROXY={proxy_url}",
|
||||
"-e", "NO_PROXY=localhost,127.0.0.1",
|
||||
# CA trust trio for the agent process. Docker propagates
|
||||
# run-time env into `docker exec`, so `claude` sees these
|
||||
# without per-exec threading. NODE_EXTRA_CA_CERTS points at
|
||||
# the cert file (Node appends it to its bundled roots);
|
||||
# SSL_CERT_FILE / REQUESTS_CA_BUNDLE point at the system
|
||||
# bundle that `update-ca-certificates` rebuilds in
|
||||
# provision_ca.
|
||||
"-e", f"NODE_EXTRA_CA_CERTS={AGENT_CA_PATH}",
|
||||
"-e", f"SSL_CERT_FILE={AGENT_CA_BUNDLE}",
|
||||
"-e", f"REQUESTS_CA_BUNDLE={AGENT_CA_BUNDLE}",
|
||||
]
|
||||
if plan.use_runsc:
|
||||
docker_args.extend(["--runtime", "runsc"])
|
||||
|
||||
@@ -6,6 +6,7 @@ from __future__ import annotations
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
from ...log import die, info, warn
|
||||
from ...pipelock import PipelockProxy, PipelockProxyPlan
|
||||
@@ -21,6 +22,12 @@ PIPELOCK_IMAGE = os.environ.get(
|
||||
# Listening port for pipelock's forward proxy.
|
||||
PIPELOCK_PORT = os.environ.get("CLAUDE_BOTTLE_PIPELOCK_PORT", "8888")
|
||||
|
||||
# In-container paths where the per-bottle CA cert + key land after
|
||||
# `docker cp` in `DockerPipelockProxy.start`. Pipelock's rendered
|
||||
# YAML references these paths under `tls_interception`.
|
||||
PIPELOCK_CA_CERT_IN_CONTAINER = "/etc/pipelock-ca.pem"
|
||||
PIPELOCK_CA_KEY_IN_CONTAINER = "/etc/pipelock-ca-key.pem"
|
||||
|
||||
|
||||
def pipelock_container_name(slug: str) -> str:
|
||||
return f"claude-bottle-pipelock-{slug}"
|
||||
@@ -34,19 +41,56 @@ def pipelock_proxy_host_port(slug: str) -> str:
|
||||
return f"{pipelock_container_name(slug)}:{PIPELOCK_PORT}"
|
||||
|
||||
|
||||
def pipelock_tls_init(stage_dir: Path) -> tuple[Path, Path]:
|
||||
"""Generate a fresh per-bottle CA via a one-shot pipelock container.
|
||||
|
||||
Runs `pipelock tls init` against a host-mounted scratch dir, leaving
|
||||
`ca.pem` (public cert, mode 600) and `ca-key.pem` (private key, mode
|
||||
600) under `<stage_dir>/pipelock-ca/`. Returns the two host paths.
|
||||
|
||||
The image is pinned (same digest the running sidecar uses) so the
|
||||
generated CA matches what the sidecar expects. Output is owned by
|
||||
whatever UID the one-shot ran as; `DockerPipelockProxy.start`
|
||||
`docker cp`s the files into the sidecar's filesystem layer, so
|
||||
runtime ownership inside the sidecar (root in pipelock's
|
||||
distroless image) is independent."""
|
||||
work = stage_dir / "pipelock-ca"
|
||||
work.mkdir(exist_ok=True)
|
||||
result = subprocess.run(
|
||||
["docker", "run", "--rm",
|
||||
"-v", f"{work}:/h",
|
||||
"-e", "PIPELOCK_HOME=/h",
|
||||
PIPELOCK_IMAGE, "tls", "init"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
die(f"pipelock tls init failed: {result.stderr.strip()}")
|
||||
cert = work / "ca.pem"
|
||||
key = work / "ca-key.pem"
|
||||
if not cert.is_file() or not key.is_file():
|
||||
die(f"pipelock tls init did not produce ca files in {work}")
|
||||
return (cert, key)
|
||||
|
||||
|
||||
class DockerPipelockProxy(PipelockProxy):
|
||||
"""Brings the pipelock sidecar up and down via Docker."""
|
||||
|
||||
CA_CERT_IN_CONTAINER = PIPELOCK_CA_CERT_IN_CONTAINER
|
||||
CA_KEY_IN_CONTAINER = PIPELOCK_CA_KEY_IN_CONTAINER
|
||||
|
||||
def start(self, plan: PipelockProxyPlan) -> str:
|
||||
"""Boot the pipelock sidecar:
|
||||
1. `docker create` on the internal network with the canonical
|
||||
name and argv `run --config /etc/pipelock.yaml --listen
|
||||
0.0.0.0:<port>`.
|
||||
2. `docker cp` the YAML config to /etc/pipelock.yaml in the
|
||||
writable layer (parent dir must already exist; image is
|
||||
distroless).
|
||||
3. Attach to the per-agent egress network.
|
||||
4. `docker start`.
|
||||
2. `docker cp` the YAML config to /etc/pipelock.yaml.
|
||||
3. `docker cp` the CA cert + key to /etc/pipelock-ca.pem
|
||||
and /etc/pipelock-ca-key.pem (pipelock runs as root in
|
||||
its distroless image, so no chown is needed).
|
||||
4. Attach to the per-agent egress network.
|
||||
5. `docker start`.
|
||||
Returns the container name (the proxy_target passed to .stop)."""
|
||||
name = pipelock_container_name(plan.slug)
|
||||
if not plan.yaml_path.is_file():
|
||||
@@ -54,6 +98,11 @@ class DockerPipelockProxy(PipelockProxy):
|
||||
f"pipelock yaml not found at {plan.yaml_path}; "
|
||||
f"PipelockProxy.prepare must run first"
|
||||
)
|
||||
if not plan.ca_cert_host_path.is_file() or not plan.ca_key_host_path.is_file():
|
||||
die(
|
||||
f"pipelock CA missing at {plan.ca_cert_host_path} / "
|
||||
f"{plan.ca_key_host_path}; pipelock_tls_init must run first"
|
||||
)
|
||||
|
||||
info(f"starting pipelock sidecar {name} on network {plan.internal_network}")
|
||||
|
||||
@@ -68,15 +117,23 @@ class DockerPipelockProxy(PipelockProxy):
|
||||
if subprocess.run(create_args, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False).returncode != 0:
|
||||
die(f"failed to create pipelock sidecar {name}")
|
||||
|
||||
cp_result = subprocess.run(
|
||||
["docker", "cp", str(plan.yaml_path), f"{name}:/etc/pipelock.yaml"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False,
|
||||
)
|
||||
if cp_result.returncode != 0:
|
||||
subprocess.run(["docker", "rm", "-f", name], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False)
|
||||
die(f"failed to copy pipelock yaml into {name}: {cp_result.stderr.strip()}")
|
||||
for src, dst, label in (
|
||||
(plan.yaml_path, "/etc/pipelock.yaml", "yaml"),
|
||||
(plan.ca_cert_host_path, PIPELOCK_CA_CERT_IN_CONTAINER, "ca cert"),
|
||||
(plan.ca_key_host_path, PIPELOCK_CA_KEY_IN_CONTAINER, "ca key"),
|
||||
):
|
||||
cp_result = subprocess.run(
|
||||
["docker", "cp", str(src), f"{name}:{dst}"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False,
|
||||
)
|
||||
if cp_result.returncode != 0:
|
||||
subprocess.run(
|
||||
["docker", "rm", "-f", name],
|
||||
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False,
|
||||
)
|
||||
die(f"failed to copy pipelock {label} into {name}: {cp_result.stderr.strip()}")
|
||||
|
||||
if subprocess.run(
|
||||
["docker", "network", "connect", plan.egress_network, name],
|
||||
|
||||
@@ -0,0 +1,79 @@
|
||||
"""Install pipelock's per-bottle CA into the agent container's trust
|
||||
store (PRD 0006).
|
||||
|
||||
By the time this provisioner runs, `pipelock_tls_init` has generated
|
||||
a fresh CA into `plan.stage_dir/pipelock-ca/` and the pipelock sidecar
|
||||
is up with `tls_interception: { enabled: true }` referencing the
|
||||
in-container CA paths. This step makes the agent trust certs signed
|
||||
by that CA so the agent's TLS handshake with the bumped CONNECT
|
||||
succeeds.
|
||||
|
||||
Cert lands on Debian's standard source path
|
||||
(`/usr/local/share/ca-certificates/`); `update-ca-certificates`
|
||||
rebuilds `/etc/ssl/certs/ca-certificates.crt`, which is what curl,
|
||||
Python `ssl`, and OpenSSL-based tools all read by default. The env
|
||||
trio set on the agent's `docker run` covers Node
|
||||
(`NODE_EXTRA_CA_CERTS`) and Python `requests` /
|
||||
`SSL_CERT_FILE`-honoring libraries that don't load the system
|
||||
bundle.
|
||||
|
||||
The fingerprint is computed via stdlib (`ssl.PEM_cert_to_DER_cert`
|
||||
+ `hashlib.sha256`) and logged once to stderr. The private key
|
||||
stays on the host (under `stage_dir`) until teardown wipes the
|
||||
stage dir; nothing in the agent ever sees it."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import ssl
|
||||
import subprocess
|
||||
|
||||
from ....log import info
|
||||
from ..bottle_plan import DockerBottlePlan
|
||||
|
||||
|
||||
# Debian-family path for sources that `update-ca-certificates` reads.
|
||||
# Bundle path is what the command rebuilds and what every standard
|
||||
# TLS consumer in the image reads.
|
||||
AGENT_CA_PATH = "/usr/local/share/ca-certificates/claude-bottle-pipelock-ca.crt"
|
||||
AGENT_CA_BUNDLE = "/etc/ssl/certs/ca-certificates.crt"
|
||||
|
||||
|
||||
def provision_ca(plan: DockerBottlePlan, target: str) -> None:
|
||||
"""Copy pipelock's CA cert into the agent, rebuild the trust
|
||||
bundle, emit a one-line fingerprint log. Called from
|
||||
`BottleBackend.provision` after the agent container is up."""
|
||||
container = target
|
||||
cert_host_path = plan.proxy_plan.ca_cert_host_path
|
||||
if not cert_host_path or not cert_host_path.is_file():
|
||||
# Defensive: provision runs after launch wires CA paths
|
||||
# onto the plan via dataclasses.replace; an empty path here
|
||||
# would mean that wiring was skipped.
|
||||
from ....log import die
|
||||
die(
|
||||
f"pipelock CA cert missing at {cert_host_path or '(empty)'}; "
|
||||
f"launch must have called pipelock_tls_init and re-bound "
|
||||
f"the plan before provision"
|
||||
)
|
||||
|
||||
subprocess.run(
|
||||
["docker", "cp", str(cert_host_path), f"{container}:{AGENT_CA_PATH}"],
|
||||
stdout=subprocess.DEVNULL,
|
||||
check=True,
|
||||
)
|
||||
subprocess.run(
|
||||
["docker", "exec", "-u", "0", container, "chmod", "644", AGENT_CA_PATH],
|
||||
stdout=subprocess.DEVNULL,
|
||||
check=True,
|
||||
)
|
||||
subprocess.run(
|
||||
["docker", "exec", "-u", "0", container, "update-ca-certificates"],
|
||||
stdout=subprocess.DEVNULL,
|
||||
check=True,
|
||||
)
|
||||
|
||||
# Stdlib SHA-256 of the cert's DER bytes — the standard
|
||||
# fingerprint form. Never the private key.
|
||||
der = ssl.PEM_cert_to_DER_cert(cert_host_path.read_text())
|
||||
fingerprint = hashlib.sha256(der).hexdigest()
|
||||
info(f"pipelock ca fingerprint: sha256:{fingerprint[:32]}...")
|
||||
+70
-15
@@ -89,13 +89,26 @@ def pipelock_allowlist_summary(bottle: Bottle) -> str:
|
||||
# --- Config build + YAML render --------------------------------------------
|
||||
|
||||
|
||||
def pipelock_build_config(bottle: Bottle) -> dict[str, object]:
|
||||
def pipelock_build_config(
|
||||
bottle: Bottle,
|
||||
*,
|
||||
ca_cert_path: str = "",
|
||||
ca_key_path: str = "",
|
||||
) -> dict[str, object]:
|
||||
"""Build the structured pipelock config dict the sidecar will load.
|
||||
|
||||
Deliberately carries no env values, no secrets, no per-agent
|
||||
customization beyond the resolved hostname list. The shape mirrors
|
||||
the YAML pipelock expects on disk; `pipelock_render_yaml` serializes
|
||||
it. Tests assert on this dict; production code renders it."""
|
||||
it. Tests assert on this dict; production code renders it.
|
||||
|
||||
`ca_cert_path` / `ca_key_path` are the **in-container** paths the
|
||||
pipelock sidecar will read its CA from at runtime (they're
|
||||
populated into the container at start time via `docker cp`).
|
||||
Pass both or neither: both → emit `tls_interception` block with
|
||||
`enabled: true`; neither → omit the block entirely (pipelock
|
||||
falls back to its built-in default of `enabled: false`). Used
|
||||
by PRD 0006 to turn on pipelock's native TLS interception."""
|
||||
cfg: dict[str, object] = {
|
||||
"version": 1,
|
||||
"mode": "strict",
|
||||
@@ -116,6 +129,17 @@ def pipelock_build_config(bottle: Bottle) -> dict[str, object]:
|
||||
# with a log line); claude-bottle's default is "block" so a hit
|
||||
# actually stops the request from leaving the egress network.
|
||||
cfg["request_body_scanning"] = {"action": bottle.egress.dlp_action}
|
||||
if ca_cert_path or ca_key_path:
|
||||
if not (ca_cert_path and ca_key_path):
|
||||
raise ValueError(
|
||||
"pipelock_build_config: pass both ca_cert_path and ca_key_path "
|
||||
"to enable tls_interception, or neither to leave it off"
|
||||
)
|
||||
cfg["tls_interception"] = {
|
||||
"enabled": True,
|
||||
"ca_cert": ca_cert_path,
|
||||
"ca_key": ca_key_path,
|
||||
}
|
||||
return cfg
|
||||
|
||||
|
||||
@@ -159,6 +183,13 @@ def pipelock_render_yaml(cfg: dict[str, object]) -> str:
|
||||
lines.append("request_body_scanning:")
|
||||
rbs = cast(dict[str, object], cfg["request_body_scanning"])
|
||||
lines.append(f' action: "{rbs["action"]}"')
|
||||
if "tls_interception" in cfg:
|
||||
lines.append("")
|
||||
lines.append("tls_interception:")
|
||||
tls = cast(dict[str, object], cfg["tls_interception"])
|
||||
lines.append(f" enabled: {_bool(tls['enabled'])}")
|
||||
lines.append(f' ca_cert: "{tls["ca_cert"]}"')
|
||||
lines.append(f' ca_key: "{tls["ca_key"]}"')
|
||||
return "\n".join(lines) + "\n"
|
||||
|
||||
|
||||
@@ -170,42 +201,66 @@ class PipelockProxyPlan:
|
||||
"""Output of PipelockProxy.prepare; consumed by .start when the
|
||||
sidecar needs to be brought up.
|
||||
|
||||
yaml_path + slug are filled in at prepare time. internal_network
|
||||
and egress_network default to empty and are populated by the
|
||||
backend's launch step (via dataclasses.replace) once those networks
|
||||
have actually been created."""
|
||||
yaml_path + slug are filled in at prepare time (host-side, side-
|
||||
effect-free; the YAML references the in-container CA paths
|
||||
already so it doesn't need the host paths to be valid). The
|
||||
remaining fields are populated by the backend's launch step
|
||||
via `dataclasses.replace`: internal/egress networks once
|
||||
those networks exist, and the CA host paths once the
|
||||
one-shot `pipelock tls init` has run. Empty defaults are
|
||||
sentinels meaning "not yet set"; `.start` validates that
|
||||
they are populated."""
|
||||
|
||||
yaml_path: Path
|
||||
slug: str
|
||||
internal_network: str = ""
|
||||
egress_network: str = ""
|
||||
ca_cert_host_path: Path = Path()
|
||||
ca_key_host_path: Path = Path()
|
||||
|
||||
|
||||
class PipelockProxy(ABC):
|
||||
"""The pipelock egress proxy. Encapsulates the YAML-config
|
||||
generation; the sidecar's start/stop lifecycle is backend-specific
|
||||
and lives on concrete subclasses."""
|
||||
and lives on concrete subclasses.
|
||||
|
||||
The class-level constants `CA_CERT_IN_CONTAINER` /
|
||||
`CA_KEY_IN_CONTAINER` are the in-container paths the YAML config
|
||||
references — they correspond to wherever the backend's `.start`
|
||||
places the CA cert and key inside the sidecar. Subclasses
|
||||
override the constants."""
|
||||
|
||||
CA_CERT_IN_CONTAINER: str = ""
|
||||
CA_KEY_IN_CONTAINER: str = ""
|
||||
|
||||
def prepare(
|
||||
self, bottle: Bottle, slug: str, stage_dir: Path
|
||||
) -> PipelockProxyPlan:
|
||||
"""Write the pipelock yaml config (mode 600) under `stage_dir`
|
||||
and return the plan for `.start`.
|
||||
and return the plan for `.start`. Pure host-side, no docker
|
||||
subprocess.
|
||||
|
||||
`slug` is the agent-derived identifier (lowercased,
|
||||
hyphen-normalized) used as the suffix in every per-agent
|
||||
resource name — the agent container, the pipelock container
|
||||
(`claude-bottle-pipelock-<slug>`), the internal/egress
|
||||
networks. It's stored on the returned plan so the backend's
|
||||
start step can derive the sidecar's container name."""
|
||||
yaml_path = stage_dir / "pipelock.yaml"
|
||||
self._build_pipelock_yaml(bottle, yaml_path)
|
||||
return PipelockProxyPlan(yaml_path=yaml_path, slug=slug)
|
||||
start step can derive the sidecar's container name.
|
||||
|
||||
def _build_pipelock_yaml(self, bottle: Bottle, yaml_path: Path):
|
||||
"""Write the pipelock yaml config (mode 600) to `yaml_path`."""
|
||||
yaml_path.write_text(pipelock_render_yaml(pipelock_build_config(bottle)))
|
||||
The CA paths the YAML references are the in-container paths
|
||||
from the concrete subclass's class-level constants. The
|
||||
host-side counterparts are generated by the launch step
|
||||
(not here, so prepare stays side-effect-free on docker) and
|
||||
added to the plan via `dataclasses.replace` before `.start`."""
|
||||
yaml_path = stage_dir / "pipelock.yaml"
|
||||
cfg = pipelock_build_config(
|
||||
bottle,
|
||||
ca_cert_path=self.CA_CERT_IN_CONTAINER,
|
||||
ca_key_path=self.CA_KEY_IN_CONTAINER,
|
||||
)
|
||||
yaml_path.write_text(pipelock_render_yaml(cfg))
|
||||
yaml_path.chmod(0o600)
|
||||
return PipelockProxyPlan(yaml_path=yaml_path, slug=slug)
|
||||
|
||||
@abstractmethod
|
||||
def start(self, plan: PipelockProxyPlan) -> str:
|
||||
|
||||
@@ -0,0 +1,291 @@
|
||||
# PRD 0006: pipelock native TLS interception
|
||||
|
||||
- **Status:** Draft
|
||||
- **Author:** didericis
|
||||
- **Created:** 2026-05-12
|
||||
|
||||
## Summary
|
||||
|
||||
Turn on pipelock's built-in `tls_interception` so its DLP / URL /
|
||||
header / MCP scanners fire on the plaintext of HTTPS requests
|
||||
instead of only the outer `CONNECT` hostname. Pipelock generates a
|
||||
per-bottle ephemeral CA at launch (`pipelock tls init`); the
|
||||
public cert is installed into the agent container's trust store
|
||||
and the private key dies with the sidecar on teardown. The
|
||||
existing per-agent sidecar topology from PRD 0001 is otherwise
|
||||
unchanged — one container, no addon, no second proxy.
|
||||
|
||||
This supersedes the closed PR #8 / branch `mitmproxy-tls-interception`,
|
||||
which built a mitmproxy + addon chain on the (falsified) premise
|
||||
that pipelock could not MITM. Empirical proof from the impl-time
|
||||
spike: with `tls_interception: { enabled: true, ca_cert, ca_key }`
|
||||
in the pipelock config, pipelock answered a credential POST over
|
||||
HTTPS with `STATUS=403 / body: blocked: request body contains
|
||||
secret: GitHub Token` and emitted both
|
||||
`scanner:"tls_intercept"` and `scanner:"body_dlp"` events.
|
||||
|
||||
## Problem
|
||||
|
||||
PRD 0001 wired pipelock onto every bottle's egress, but pipelock
|
||||
ran with its default `tls_interception.enabled: false`. The agent
|
||||
container's only egress route is pipelock, but pipelock only saw
|
||||
`CONNECT` hostnames and the encrypted bytes inside the tunnel.
|
||||
Pipelock's headline scanners — request body DLP (48 credential
|
||||
patterns), header DLP, URL DLP, subdomain entropy, MCP scanning,
|
||||
response-body scanning — all need plaintext to fire. Against the
|
||||
HTTPS-only hosts in `DEFAULT_ALLOWLIST` (`api.anthropic.com`,
|
||||
`raw.githubusercontent.com`, etc.) they are effectively disabled.
|
||||
|
||||
The existing `tests/integration/test_pipelock_blocks_secret_post`
|
||||
test only fires because it forces the agent to send plain HTTP
|
||||
through pipelock's forward-proxy mode. Real Claude Code traffic
|
||||
uses HTTPS via CONNECT and slips past the scanner.
|
||||
|
||||
## Goals / Success Criteria
|
||||
|
||||
The feature works when all of the following are observable:
|
||||
|
||||
- A Node / curl request from inside a launched bottle to a
|
||||
CONNECT-bumped HTTPS host (e.g. `https://api.anthropic.com/dlp-probe`)
|
||||
carrying a pipelock-recognized credential pattern in the body
|
||||
returns 403 from pipelock with the documented
|
||||
`blocked: request body contains secret: …` body. Pipelock's
|
||||
`body_dlp` event fires on the decrypted request.
|
||||
- A clean HTTPS GET from inside the bottle to an allowlisted host
|
||||
(e.g. `https://raw.githubusercontent.com/...`) returns the real
|
||||
upstream response — TLS interception doesn't break legitimate
|
||||
traffic.
|
||||
- The agent's TLS library trusts pipelock's bumped leaf certs
|
||||
(per the bottle's installed CA); no TLS-trust errors.
|
||||
- Claude Code reaches `api.anthropic.com` end-to-end through the
|
||||
bottle and completes a chat round-trip.
|
||||
|
||||
The feature is **done** when all of the following ship:
|
||||
|
||||
- `pipelock_build_config` / `pipelock_render_yaml` emit a
|
||||
`tls_interception` block with `enabled: true` and the per-bottle
|
||||
CA cert/key paths. The defaults
|
||||
(`cert_ttl: 24h`, `cert_cache_size: 10000`,
|
||||
`passthrough_domains: []`) are kept; only `enabled` and the
|
||||
cert paths are populated.
|
||||
- The prepare step generates a per-bottle CA via `pipelock tls init`
|
||||
in a one-shot container, writes `ca.pem` and `ca-key.pem` to
|
||||
`stage_dir`. Paths land on the `DockerBottlePlan`.
|
||||
- `DockerPipelockProxy.start` mounts the stage dir into the
|
||||
sidecar (read-only) so the running pipelock can read its CA.
|
||||
- `BottleBackend.provision_ca` (new) copies the CA public cert
|
||||
into the agent at
|
||||
`/usr/local/share/ca-certificates/claude-bottle-mitm.crt`, runs
|
||||
`update-ca-certificates`, and sets the `NODE_EXTRA_CA_CERTS` /
|
||||
`SSL_CERT_FILE` / `REQUESTS_CA_BUNDLE` env trio on the agent
|
||||
container's runtime env. Default no-op on the abstract base so
|
||||
other backends aren't forced to implement.
|
||||
- The launch step prints a one-line stderr log with the SHA-256
|
||||
fingerprint of the public CA cert (computed via stdlib
|
||||
`ssl.PEM_cert_to_DER_cert` + `hashlib.sha256`).
|
||||
- On bottle teardown the sidecar is removed and the CA private
|
||||
key is gone with it.
|
||||
- Two new integration tests under `tests/integration/`:
|
||||
- HTTPS variant of the credential-post block test (proves the
|
||||
`tls_intercept` + `body_dlp` chain fires end-to-end).
|
||||
- Clean HTTPS GET test (proves the allow path doesn't break TLS
|
||||
trust and returns real upstream content).
|
||||
- The dry-run preflight (`start --dry-run`) renders the new TLS
|
||||
layer. Text: one line under the egress summary. JSON: a
|
||||
reserved `egress.tls_interception: { enabled: true,
|
||||
ca_fingerprint: null }` block — fingerprint is null at dry-run
|
||||
because the CA only exists after launch.
|
||||
|
||||
## Non-goals
|
||||
|
||||
- A second proxy in the chain. Pipelock does the bumping
|
||||
natively; the mitmproxy approach was based on a wrong premise
|
||||
(closed PR #8).
|
||||
- Per-bottle override to disable interception. v1 always enables
|
||||
`tls_interception`. The pipelock-side `passthrough_domains`
|
||||
list is the right knob if a future allowlisted host turns out
|
||||
to pin certs — exposing it through the manifest is a follow-up.
|
||||
- A long-lived / shared CA across bottles. Each bottle gets a
|
||||
fresh CA generated by `pipelock tls init` and destroyed with the
|
||||
sidecar.
|
||||
- Tuning `cert_ttl`, `cert_cache_size`, `max_response_bytes`,
|
||||
`cross_request_detection`, or other pipelock advanced features.
|
||||
Defaults from `pipelock generate config --preset strict` are
|
||||
fine for v1.
|
||||
- Trust-store paths for non-Debian agent images.
|
||||
`node:22-slim` is Debian; `update-ca-certificates` is the right
|
||||
command. A Red-Hat-family base would need `update-ca-trust`.
|
||||
- HTTP/3 / QUIC. Pipelock's interception is HTTP/HTTPS-over-TLS;
|
||||
UDP/443 still needs an iptables layer (separate PRD).
|
||||
|
||||
## Scope
|
||||
|
||||
### In scope
|
||||
|
||||
- **`claude_bottle/pipelock.py`** changes:
|
||||
- Extend `pipelock_build_config` to include
|
||||
`tls_interception: { enabled: true, ca_cert: <path>, ca_key:
|
||||
<path> }`. Paths are populated from the plan; the function's
|
||||
signature grows a `cert_path` / `key_path` pair or reads them
|
||||
off `Bottle` once they're stored.
|
||||
- Extend `pipelock_render_yaml` to emit the new block.
|
||||
- **`claude_bottle/backend/docker/pipelock.py`** changes:
|
||||
- New helper `pipelock_tls_init(stage_dir)` runs the upstream
|
||||
image as a one-shot:
|
||||
`docker run --rm -v <stage>:/h -e PIPELOCK_HOME=/h pipelock tls init`,
|
||||
leaving `ca.pem` and `ca-key.pem` under `stage_dir`. The host
|
||||
file owner is whatever the upstream image's user is; the
|
||||
sidecar mount is read-only so this is fine.
|
||||
- `DockerPipelockProxy.start` `docker cp`s the CA cert + key
|
||||
into the sidecar at `/etc/pipelock/ca.pem` and
|
||||
`/etc/pipelock/ca-key.pem` between `docker create` and
|
||||
`docker start`, mirroring the existing pattern for the YAML
|
||||
config. If pipelock's image runs as non-root, a `docker exec
|
||||
-u 0 chown pipelock:pipelock /etc/pipelock/ca*.pem` lands
|
||||
between the `cp` and the `start`.
|
||||
- **`claude_bottle/backend/__init__.py`**: new abstract method
|
||||
`provision_ca(plan, target)` on `BottleBackend`, default no-op.
|
||||
`BottleBackend.provision` orchestrates `ca → prompt → skills →
|
||||
ssh → git`.
|
||||
- **`claude_bottle/backend/docker/provision/ca.py`** (new):
|
||||
- Reads the cert from `stage_dir` (already written by prepare).
|
||||
- `docker cp` into the agent.
|
||||
- `docker exec -u 0 ... chmod 644 ...` + `update-ca-certificates`.
|
||||
- Computes the SHA-256 fingerprint with stdlib (`ssl` +
|
||||
`hashlib`), emits one stderr log line.
|
||||
- **`claude_bottle/backend/docker/launch.py`**:
|
||||
- Three new `-e` flags on the agent's `docker run`:
|
||||
`NODE_EXTRA_CA_CERTS=/usr/local/share/ca-certificates/claude-bottle-mitm.crt`,
|
||||
`SSL_CERT_FILE=/etc/ssl/certs/ca-certificates.crt`,
|
||||
`REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt`.
|
||||
- `HTTPS_PROXY` / `HTTP_PROXY` continue to point at pipelock
|
||||
(unchanged from PRD 0001 — the mitmproxy detour in PR #8 is
|
||||
abandoned).
|
||||
- **`claude_bottle/backend/docker/bottle_plan.py`**:
|
||||
- One new `info(...)` line in `print()` noting TLS interception
|
||||
is on.
|
||||
- `to_dict()` gains an `egress.tls_interception: { enabled:
|
||||
true, ca_fingerprint: null }` block. Reserved for future
|
||||
population.
|
||||
- **`claude_bottle/backend/docker/prepare.py`**: call
|
||||
`pipelock_tls_init(stage_dir)` and write the resolved cert/key
|
||||
paths onto the plan (either on the existing `proxy_plan` field
|
||||
or on the parent `DockerBottlePlan`).
|
||||
- **Tests:**
|
||||
- `tests/integration/test_pipelock_blocks_secret_https_post.py`
|
||||
(new) — HTTPS variant of the existing block test.
|
||||
- `tests/integration/test_pipelock_allows_normal_https.py`
|
||||
(new) — clean HTTPS GET succeeds.
|
||||
- `tests/unit/test_pipelock_yaml.py` updated to assert the new
|
||||
`tls_interception` block in the rendered config.
|
||||
- `tests/integration/test_dry_run_plan.py` updated to assert
|
||||
the new `egress.tls_interception` JSON block.
|
||||
|
||||
### Out of scope
|
||||
|
||||
- Modifying pipelock itself. We're using existing config knobs.
|
||||
- A manifest field to disable / customize interception per bottle.
|
||||
Doable but premature.
|
||||
- Wiring `passthrough_domains`. The default `[]` is correct for
|
||||
v1; add the manifest field when a pinning host shows up. The
|
||||
shape is pre-recorded so the follow-up is mechanical:
|
||||
`bottle.egress.tls_passthrough_domains: [host, ...]`,
|
||||
mirroring the existing `egress.allowlist`.
|
||||
- `cross_request_detection`, `entropy_budget`,
|
||||
`fragment_reassembly`, `reverse_proxy`, `scan_api` — features
|
||||
pipelock exposes but we don't need for the body-DLP gap.
|
||||
|
||||
## Proposed Design
|
||||
|
||||
### Topology
|
||||
|
||||
```
|
||||
agent --HTTPS_PROXY--> pipelock --[bumps TLS]--> internet
|
||||
(sees plaintext: URL, headers, body)
|
||||
```
|
||||
|
||||
Same single-sidecar shape as PRD 0001. The only addition is
|
||||
`tls_interception` in pipelock's config plus the per-bottle CA
|
||||
generated at prepare time.
|
||||
|
||||
### CA lifecycle
|
||||
|
||||
- **Generation.** Host-side, at prepare time, via a one-shot
|
||||
`docker run --rm -v <stage>:/h -e PIPELOCK_HOME=/h pipelock tls
|
||||
init`. Output: `<stage>/ca.pem` + `<stage>/ca-key.pem`, mode 600.
|
||||
- **Sidecar install.** `DockerPipelockProxy.start` `docker cp`s
|
||||
the CA cert + key into the sidecar at `/etc/pipelock/ca.pem`
|
||||
and `/etc/pipelock/ca-key.pem` between `docker create` and
|
||||
`docker start`. Same pattern the proxy already uses for the
|
||||
YAML config — no bind-mount, no UID/permission concern from
|
||||
the one-shot generation step. The rendered YAML references
|
||||
the in-container paths.
|
||||
- **Bottle install.** `provision_ca` (Docker impl) does
|
||||
`docker cp <stage>/ca.pem agent:/usr/local/share/ca-certificates/claude-bottle-mitm.crt`,
|
||||
then `update-ca-certificates`. The CA env trio is set at
|
||||
`docker run -e` time (Docker propagates run-time env into
|
||||
`docker exec`).
|
||||
- **Per-bottle ephemerality.** Enforced by *regenerating per
|
||||
launch*, not by validity windows. Pipelock's defaults
|
||||
(`cert_ttl: 24h` for leaves, `--validity 87600h` for the CA)
|
||||
are fine — the CA lives only as long as the sidecar, which is
|
||||
the bottle's lifetime.
|
||||
- **Teardown.** Sidecar removed via `ExitStack` callback, then
|
||||
the launch context manager's outer `finally` `shutil.rmtree`s
|
||||
`stage_dir`. CA dies with both, in that order, so the sidecar
|
||||
is never reading a deleted mount on shutdown.
|
||||
- **Fingerprint.** Computed via stdlib in `provision_ca` and
|
||||
logged once to stderr (`claude-bottle: mitm ca fingerprint:
|
||||
sha256:<hex>…`). The private key never appears in any log.
|
||||
|
||||
### Data model changes
|
||||
|
||||
None to the manifest schema. The dry-run JSON contract grows a
|
||||
reserved `egress.tls_interception` block; the fingerprint is
|
||||
always null at dry-run because the CA doesn't exist yet.
|
||||
|
||||
### Existing code touched
|
||||
|
||||
Surgical, all on the existing pipelock path:
|
||||
|
||||
- `claude_bottle/pipelock.py` — config builder + YAML renderer.
|
||||
- `claude_bottle/backend/__init__.py` — abstract `provision_ca`.
|
||||
- `claude_bottle/backend/docker/pipelock.py` — `tls init` helper,
|
||||
sidecar volume mount.
|
||||
- `claude_bottle/backend/docker/prepare.py` — CA paths on plan.
|
||||
- `claude_bottle/backend/docker/launch.py` — CA env trio on agent.
|
||||
- `claude_bottle/backend/docker/backend.py` — `provision_ca`
|
||||
dispatch + thread `self._proxy` through prepare/launch unchanged
|
||||
shape.
|
||||
- `claude_bottle/backend/docker/bottle_plan.py` — preflight
|
||||
rendering.
|
||||
- `claude_bottle/backend/docker/provision/ca.py` (new).
|
||||
|
||||
Net diff is meaningfully smaller than PR #8 because pipelock
|
||||
already does the work — no addon, no second sidecar, no second
|
||||
backend module.
|
||||
|
||||
### External dependencies
|
||||
|
||||
- **Pipelock image** — unchanged pin from PRD 0001
|
||||
(`ghcr.io/luckypipewrench/pipelock@sha256:3b1a3941…`,
|
||||
matching pipelock v2.3.0). No new image dependency.
|
||||
- **No host-side crypto deps.** CA generation uses the pipelock
|
||||
image's own `tls init` command in a one-shot container.
|
||||
Fingerprint uses Python stdlib `ssl` + `hashlib`.
|
||||
|
||||
## References
|
||||
|
||||
- `docs/research/pipelock-assessment.md` (now corrected) —
|
||||
pipelock capability assessment including the
|
||||
`tls_interception` block.
|
||||
- `docs/prds/0001-per-agent-egress-proxy-via-pipelock.md` —
|
||||
egress-proxy baseline this PRD extends.
|
||||
- `docs/prds/0003-bottle-backend-abstraction.md` — backend ABC
|
||||
contract this PRD adds a `provision_ca` method to.
|
||||
- `docs/prds/0004-split-out-provisioners.md` — per-provisioner
|
||||
module pattern reused for the new CA provisioner.
|
||||
- Pipelock `tls` CLI (in-image help):
|
||||
`pipelock tls init / install-ca / show-ca`.
|
||||
- Closed PR #8 — earlier mitmproxy-based design built on the
|
||||
falsified "pipelock can't MITM" premise; archived for context.
|
||||
@@ -222,10 +222,14 @@ The following threat-model items from `network-egress-guard.md` are
|
||||
intercept raw UDP 53 packets.
|
||||
- **Domain fronting**: an agent can send `CONNECT allowed-host.com:443`
|
||||
through the proxy but embed a different SNI inside the TLS session.
|
||||
Pipelock does not perform TLS inspection (no CA trust injection) and
|
||||
cannot verify SNI vs. CONNECT header. The same limitation is shared
|
||||
with smokescreen and is documented in `network-egress-guard.md` as a
|
||||
known gap for the non-TLS-terminating proxy approach.
|
||||
Pipelock supports TLS interception via its `tls_interception` config
|
||||
block (`enabled`, `ca_cert`, `ca_key`, `cert_ttl`, `cert_cache_size`,
|
||||
`passthrough_domains`, `max_response_bytes`) plus the `pipelock tls
|
||||
init` / `install-ca` / `show-ca` CLI; with interception on, the
|
||||
body and inner Host header become visible to its scanner pipeline,
|
||||
closing the domain-fronting gap. With interception off (default in
|
||||
the generated config), pipelock relays the CONNECT as an opaque
|
||||
tunnel and only sees the outer hostname.
|
||||
- **SSH egress content**: SSH sessions to permitted hosts are opaque.
|
||||
Same limitation noted in both prior research notes.
|
||||
- **Agent killing the proxy process**: if pipelock runs inside the same
|
||||
@@ -385,7 +389,7 @@ pipelock's differentiators.
|
||||
| Blocks RFC 1918 by default | only if explicitly added to rules | yes | yes, + DNS rebinding | no |
|
||||
| Content-based DLP (credential patterns) | no | no | yes, 48 patterns + encoding normalization | no |
|
||||
| MCP / WebSocket scanning | no | no | yes, bidirectional | no |
|
||||
| Domain fronting bypass | possible | possible | possible (no TLS termination) | n/a |
|
||||
| Domain fronting bypass | possible | possible | mitigated when `tls_interception` is enabled (CA trust required in client) | n/a |
|
||||
| macOS Docker Desktop (sidecar mode) | yes | yes | yes | yes |
|
||||
| macOS Docker Desktop (in-container sandbox) | yes | n/a | degraded (--best-effort) | yes |
|
||||
| NET_ADMIN / NET_RAW required | yes | no | no (sidecar) | no |
|
||||
|
||||
@@ -1,508 +0,0 @@
|
||||
# TLS interception for pipelock content scanning
|
||||
|
||||
Research into adding TLS termination ("MITM") to the egress path so that
|
||||
pipelock's scanning pipeline can see plaintext HTTP request and response
|
||||
bodies, instead of only the `CONNECT` host and opaque ciphertext.
|
||||
|
||||
## Summary
|
||||
|
||||
- Pipelock today sees `CONNECT` hostnames and the encrypted bytes that follow.
|
||||
Its DLP, subdomain-entropy, and MCP scanners cannot fire on TLS-encrypted
|
||||
bodies, which is the gap explicitly named under "Scope gaps" in
|
||||
`pipelock-assessment.md` ("Pipelock does not perform TLS inspection (no CA
|
||||
trust injection)").
|
||||
- Closing that gap requires a TLS-terminating proxy that bumps `CONNECT`,
|
||||
presents a leaf certificate for the target hostname signed by a CA the
|
||||
bottle's trust store accepts, decrypts the inner HTTP, and re-establishes
|
||||
TLS to the real upstream.
|
||||
- The mature open-source option is **mitmproxy**. Squid + `ssl_bump` is the
|
||||
heavier production-grade alternative. The Go ecosystem (`goproxy`,
|
||||
`gomitmproxy`, `martian`) is suitable only if we want a custom binary
|
||||
tightly coupled to pipelock.
|
||||
- Recommended v1 topology: **mitmproxy in front of pipelock** on the same
|
||||
egress route. mitmproxy terminates client TLS, forwards plaintext to
|
||||
pipelock as its upstream HTTP proxy, and re-encrypts to the real upstream.
|
||||
Pipelock stays unchanged.
|
||||
- Per-bottle ephemeral CA, generated at bottle start and destroyed on
|
||||
teardown. The CA private key lives only on the sidecar; the bottle's
|
||||
trust store only ever sees the public cert.
|
||||
- Cert pinning is a known caveat but a small one given the narrow allowlist
|
||||
in this project. Selective bumping is the mitigation if a future
|
||||
allowlisted host turns out to pin.
|
||||
|
||||
---
|
||||
|
||||
## What pipelock cannot see today
|
||||
|
||||
The current egress topology (per `pipelock-assessment.md`):
|
||||
|
||||
```
|
||||
agent --HTTPS_PROXY--> pipelock --CONNECT host:443--> internet
|
||||
\____________________________
|
||||
opaque TLS bytes
|
||||
```
|
||||
|
||||
The agent's client (Claude Code, `curl`, an MCP server, a Python SDK)
|
||||
sends `CONNECT api.anthropic.com:443`. Pipelock checks the hostname
|
||||
against its `api_allowlist`, replies `200 Connection Established`, and
|
||||
then blindly relays bytes between the two TCP halves. The TLS handshake
|
||||
and everything inside it happens end-to-end between the agent and the
|
||||
real upstream.
|
||||
|
||||
What pipelock can scan in this mode:
|
||||
|
||||
- `CONNECT` target hostname (SNI is not even needed).
|
||||
- TLS record framing and lengths (useful for budgets, useless for DLP).
|
||||
- Plain HTTP/1.1 to non-HTTPS destinations (irrelevant — there are none
|
||||
in `DEFAULT_ALLOWLIST`).
|
||||
|
||||
What pipelock cannot scan in this mode:
|
||||
|
||||
- Request URL, method, headers, body.
|
||||
- Response status, headers, body.
|
||||
- MCP JSON-RPC payloads inside the TLS session.
|
||||
- WebSocket frames inside a TLS-wrapped upgrade.
|
||||
- Whether the inner SNI or HTTP `Host` / `:authority` matches the
|
||||
outer `CONNECT` target (domain-fronting check).
|
||||
|
||||
The 48-pattern DLP layer, the subdomain-entropy check (insofar as it
|
||||
inspects URLs rather than DNS-resolver queries), the request-redaction
|
||||
feature added in v2.3.0, and bidirectional MCP scanning all require
|
||||
plaintext to operate on. Without TLS termination, those layers are
|
||||
inert against any HTTPS destination — which is every destination in
|
||||
the current allowlist.
|
||||
|
||||
---
|
||||
|
||||
## How TLS interception works
|
||||
|
||||
The mechanics of `CONNECT` bumping, end to end:
|
||||
|
||||
1. **Agent issues `CONNECT`.** The HTTP client sees `HTTPS_PROXY` set,
|
||||
so it opens a TCP connection to the proxy and sends
|
||||
`CONNECT api.anthropic.com:443 HTTP/1.1`.
|
||||
2. **Proxy answers `200`.** Standard tunnel-established response.
|
||||
3. **Proxy starts TLS as the server.** Instead of relaying bytes, the
|
||||
proxy itself performs a TLS handshake with the agent. It needs a
|
||||
server certificate for `api.anthropic.com` — so on first contact for
|
||||
that hostname, the proxy generates a leaf certificate with
|
||||
`CN=api.anthropic.com` and a SAN for the same, signs it with its
|
||||
own CA private key, and presents that cert. Subsequent connections
|
||||
to the same hostname reuse the cached leaf.
|
||||
4. **Agent verifies the cert.** The agent's TLS library walks the chain
|
||||
to a trusted root. Because the bottle's trust store contains the
|
||||
proxy's CA cert, validation succeeds. The agent has no way to tell
|
||||
it isn't talking to the real `api.anthropic.com`.
|
||||
5. **Proxy opens its own TLS to the real upstream.** As a client this
|
||||
time, using the system root store, talking to the real
|
||||
`api.anthropic.com`. Real SNI, real cert chain validated normally.
|
||||
6. **Proxy bridges the two TLS sessions.** Decrypts on the server side,
|
||||
re-encrypts on the client side, and scans the plaintext in between.
|
||||
|
||||
This is what every TLS-terminating egress proxy does. The trade-offs
|
||||
live in three places:
|
||||
|
||||
- **CA trust injection.** Step 4 only works if the bottle's trust
|
||||
store contains the proxy's CA. Mechanics covered under "CA lifecycle"
|
||||
below.
|
||||
- **Cert generation cost.** Generating an RSA-2048 leaf cert takes
|
||||
~50 ms; ECDSA P-256 is ~5 ms. Cache leaves per (hostname, SAN list)
|
||||
to keep this off the steady-state hot path.
|
||||
- **Protocol coverage.** The proxy needs to speak HTTP/1.1, HTTP/2 (ALPN
|
||||
`h2`), and ideally WebSocket. HTTP/3 / QUIC is UDP and requires a
|
||||
separate code path; for v1, blocking UDP/443 at the iptables layer
|
||||
forces clients to fall back to HTTP/2, which we can inspect.
|
||||
|
||||
---
|
||||
|
||||
## Tools
|
||||
|
||||
### mitmproxy
|
||||
|
||||
- **What it is.** Python (with Rust crypto bits) interactive HTTPS proxy.
|
||||
Reference open-source implementation of the bump pattern. Ships as
|
||||
`mitmproxy` (TUI), `mitmweb` (browser UI), and `mitmdump` (headless).
|
||||
- **Cert handling.** Generates a CA on first run under `~/.mitmproxy/`.
|
||||
Per-host leaves are generated on demand and cached in memory. Cert
|
||||
cache keyed by (hostname, SAN extensions inferred from upstream cert).
|
||||
- **Protocols.** HTTP/1.1, HTTP/2, WebSocket fully supported. HTTP/3
|
||||
exists as experimental. Raw TCP / non-HTTP TLS supported via
|
||||
`--mode reverse:` but not in CONNECT-bump mode.
|
||||
- **Extensibility.** Python addon API. An addon module can inspect or
|
||||
modify any `request` / `response` / `tcp_message` flow. The pipelock
|
||||
integration in Topology D below uses this.
|
||||
- **Selective bumping.** `ignore_hosts` regex; matching CONNECTs are
|
||||
tunneled blindly instead of bumped. Critical for the cert-pinning
|
||||
mitigation.
|
||||
- **Docker image.** `mitmproxy/mitmproxy` on Docker Hub. Single binary
|
||||
for the CLI, ~80 MB image. Configurable via flags or `~/.mitmproxy/config.yaml`.
|
||||
- **Project URL.** <https://mitmproxy.org>, <https://github.com/mitmproxy/mitmproxy>.
|
||||
|
||||
Most mature, best-documented, lowest-effort integration. Default choice
|
||||
for v1.
|
||||
|
||||
### Squid + ssl_bump
|
||||
|
||||
- **What it is.** Squid is a long-running C++ caching proxy.
|
||||
`ssl_bump` is its TLS-interception feature, controlled by per-CONNECT
|
||||
actions: `splice` (tunnel blindly), `bump` (decrypt and re-encrypt),
|
||||
`peek` (look at TLS hello then decide), `stare` (look at server cert
|
||||
then decide), `terminate` (abort the connection).
|
||||
- **Cert handling.** Configured via `sslcrtd_program` — a helper that
|
||||
generates and caches per-host certs. CA cert and key referenced by
|
||||
PEM paths in `squid.conf`.
|
||||
- **Protocols.** HTTP/1.1 fully; HTTP/2 to clients via recent versions;
|
||||
no scripted addons.
|
||||
- **Extensibility.** ICAP (Internet Content Adaptation Protocol) for
|
||||
external scanners — Squid POSTs each request/response to an ICAP
|
||||
service that can modify or reject. This is the formal version of
|
||||
Topology D below.
|
||||
- **Production track record.** Used at corporate-proxy scale (large
|
||||
enterprises, ISPs). Heavyweight for a single-bottle sidecar.
|
||||
- **Project URL.** <https://wiki.squid-cache.org/Features/SslPeekAndSplice>.
|
||||
|
||||
Right tool if pipelock grows an ICAP server endpoint. Otherwise, more
|
||||
config surface than this project needs.
|
||||
|
||||
### Go libraries: goproxy, gomitmproxy, martian
|
||||
|
||||
- **`goproxy`** (elazarl) — long-lived Go library, basic CONNECT-bumping
|
||||
proxy with a handler API. Sparse on HTTP/2.
|
||||
<https://github.com/elazarl/goproxy>
|
||||
- **`gomitmproxy`** (AdGuard) — newer, cleaner API; built for AdGuard
|
||||
Home / DNS-filtering products. HTTP/2 support is partial.
|
||||
<https://github.com/AdguardTeam/gomitmproxy>
|
||||
- **`martian`** (Google) — request/response modifier framework with a
|
||||
JSON-configurable rule engine. Used internally at Google; public
|
||||
ecosystem thin.
|
||||
<https://github.com/google/martian>
|
||||
|
||||
These are relevant only if we decide to write a custom TLS-terminating
|
||||
binary that links pipelock's scanning packages directly — Topology C
|
||||
below. They are not faster than mitmproxy for the v1 sidecar shape;
|
||||
they are smaller and more direct, at the cost of writing more Go.
|
||||
|
||||
### Disqualified
|
||||
|
||||
- **Caddy, Envoy, HAProxy.** All can terminate TLS at a reverse-proxy
|
||||
vhost. None ship a "bump on CONNECT and forward plaintext to a
|
||||
downstream proxy" mode out of the box. Adapting any of them to this
|
||||
shape is more work than starting from mitmproxy.
|
||||
- **Cloudflare Gateway, Zscaler, NetSkope, Forcepoint.** Managed cloud
|
||||
egress with TLS inspection. Wrong topology — they live outside the
|
||||
host, not as a per-bottle sidecar, and they require trusting a vendor
|
||||
with full plaintext.
|
||||
- **Charles Proxy, Burp Suite.** Closed-source GUI tools for developer
|
||||
capture and security testing. Not appropriate as headless sidecars.
|
||||
- **`mitmdump` standalone vs. embedding mitmproxy as a library.** Both
|
||||
are mitmproxy. Calling out only to note: the project ships both a CLI
|
||||
and a Python API; addons can be loaded either way.
|
||||
|
||||
---
|
||||
|
||||
## Topologies
|
||||
|
||||
Five candidate topologies, ordered roughly from least to most coupled
|
||||
between the two components.
|
||||
|
||||
### A — mitmproxy in front of pipelock (recommended)
|
||||
|
||||
```
|
||||
agent --HTTPS_PROXY--> mitmproxy --HTTP_PROXY--> pipelock --> internet
|
||||
(bump TLS) (scan plain) (real TLS)
|
||||
```
|
||||
|
||||
mitmproxy terminates the agent's TLS connection, decrypts, and then
|
||||
forwards the inner HTTP request to pipelock by treating pipelock as
|
||||
its own upstream HTTP forward proxy. Pipelock receives plaintext HTTP
|
||||
exactly as if the agent had used HTTP, applies its full scanning
|
||||
pipeline, and forwards to mitmproxy's upstream client half — which
|
||||
re-establishes TLS to the real destination.
|
||||
|
||||
Concretely the agent's `HTTPS_PROXY` points at mitmproxy; mitmproxy's
|
||||
`upstream_proxy` config points at pipelock; pipelock's network reach
|
||||
includes the real internet.
|
||||
|
||||
- **Wins.** Pipelock unchanged. mitmproxy unchanged from default
|
||||
configuration. Each component has one job. Failure modes are clear
|
||||
per layer.
|
||||
- **Costs.** Two sidecars per bottle instead of one. One extra
|
||||
decrypt / re-encrypt hop, ~5–15 ms per request in steady state.
|
||||
- **Open question.** How exactly mitmproxy forwards to pipelock matters
|
||||
for whether pipelock sees TLS again or only HTTP. mitmproxy's
|
||||
`upstream` mode wraps the decrypted request in another CONNECT if the
|
||||
destination is HTTPS — which would re-encrypt before pipelock sees
|
||||
it, defeating the point. The correct mode is `upstream` with TLS
|
||||
re-origination disabled, or `regular` mode with a chained proxy. The
|
||||
v2 release of mitmproxy reworked this; needs verification against the
|
||||
current docs at integration time.
|
||||
|
||||
### B — pipelock in front of mitmproxy (ruled out)
|
||||
|
||||
```
|
||||
agent --HTTPS_PROXY--> pipelock --CONNECT?--> mitmproxy --> internet
|
||||
(sees CONNECT only) (bump TLS)
|
||||
```
|
||||
|
||||
Pipelock would receive a `CONNECT` and decide to allow or deny based
|
||||
on hostname, then tunnel to mitmproxy. mitmproxy would terminate TLS
|
||||
and see plaintext — but pipelock would never see the plaintext, which
|
||||
is the whole point of the exercise. The scanning still happens (in
|
||||
mitmproxy), but it isn't pipelock doing it, so we'd need an entirely
|
||||
different rule engine. Ruled out.
|
||||
|
||||
### C — Extend pipelock itself to terminate TLS
|
||||
|
||||
Two sub-variants:
|
||||
|
||||
**C.1 — Upstream a `tls_terminate` mode.** Submit a feature to
|
||||
pipelock that adds CONNECT bumping and per-host cert generation in Go,
|
||||
using `crypto/tls` and the existing scanning packages. Pipelock becomes
|
||||
a self-contained MITM proxy. License question matters here: the Apache
|
||||
2.0 core can grow new features in-tree, but if upstream insists this
|
||||
belongs in `enterprise/` (ELv2), we either accept ELv2 or fork.
|
||||
|
||||
**C.2 — Wrap pipelock in a thin Go binary in the same container.** A
|
||||
small Go program does the TLS half (`CONNECT` parsing, cert generation,
|
||||
TLS handshake) and pipes plaintext to pipelock over UDS or loopback.
|
||||
The wrapper is ours; pipelock is unmodified. No license question.
|
||||
|
||||
- **Wins.** Single component on the egress path. Pipelock owns the
|
||||
scanning end-to-end, including domain-fronting checks (SNI vs.
|
||||
`Host` vs. `CONNECT`).
|
||||
- **Costs.** Real Go engineering effort. CA generation, cert caching,
|
||||
TLS handshake, HTTP/2 ALPN negotiation, WebSocket upgrade — all
|
||||
things mitmproxy already solves.
|
||||
- **When.** Right shape for v2 or v3 once the v1 mitmproxy-in-front
|
||||
topology has proven the integration works and the scanning rules are
|
||||
stable.
|
||||
|
||||
### D — mitmproxy as the proxy, pipelock as a content-scan subroutine
|
||||
|
||||
```
|
||||
agent --HTTPS_PROXY--> mitmproxy --> internet
|
||||
(bump TLS)
|
||||
|
|
||||
v
|
||||
POST /scan to pipelock
|
||||
<- allow / block / redact
|
||||
```
|
||||
|
||||
A Python addon in mitmproxy sends each decrypted request (and response)
|
||||
to a pipelock HTTP `/scan` endpoint and gates the flow on the verdict.
|
||||
mitmproxy handles all networking; pipelock is the rule engine only.
|
||||
|
||||
- **Wins.** Clean separation of concerns. Pipelock doesn't have to
|
||||
speak TLS at all. The addon is small, ~100 lines of Python.
|
||||
- **Costs.** Requires pipelock to expose a scan API. The current Apache
|
||||
2.0 core does not document one. If `/scan` lives in `enterprise/`,
|
||||
ELv2 applies. If it doesn't exist, we'd be asking pipelock for a new
|
||||
surface.
|
||||
- **Variant.** Squid's ICAP path is the formalized version of the same
|
||||
pattern.
|
||||
|
||||
### E — Single container, two processes
|
||||
|
||||
mitmproxy and pipelock share a container, started by `supervisord` or
|
||||
`s6-overlay`. Networking simplifies to localhost. Lifecycle complicates:
|
||||
container restart now means restarting both; failure of one process is
|
||||
not visible at the Docker layer; logs interleave.
|
||||
|
||||
- **Wins.** Slightly less Docker plumbing in `cli.py`.
|
||||
- **Costs.** Operational complexity not worth the savings. The two
|
||||
containers are independent processes with independent failure modes;
|
||||
Docker is the right tool for that.
|
||||
|
||||
Net: not recommended.
|
||||
|
||||
---
|
||||
|
||||
## CA lifecycle
|
||||
|
||||
The CA private key is the asset to defend. With it, anyone can issue
|
||||
certs that the bottle's trust store will accept for any hostname. So:
|
||||
|
||||
**Per-bottle ephemeral CA.** At bottle start, generate a fresh
|
||||
RSA-2048 or ECDSA-P256 CA inside the mitmproxy sidecar. Export only
|
||||
the public cert (PEM) into the bottle's trust store at one of:
|
||||
|
||||
- `/usr/local/share/ca-certificates/claude-bottle-mitm.crt` followed by
|
||||
`update-ca-certificates` (Debian/Ubuntu base images).
|
||||
- `/etc/pki/ca-trust/source/anchors/` with `update-ca-trust`
|
||||
(Red-Hat-family).
|
||||
- `$NODE_EXTRA_CA_CERTS` for Node-based agents (Claude Code).
|
||||
- `$SSL_CERT_FILE` / `$REQUESTS_CA_BUNDLE` for Python SDKs.
|
||||
|
||||
The private key never leaves the sidecar's filesystem. The CA cert
|
||||
public half is the only artifact that crosses into the bottle.
|
||||
|
||||
On bottle teardown, the sidecar container is destroyed; the CA dies
|
||||
with it. The next bottle gets a fresh CA. No long-lived MITM CA on
|
||||
disk.
|
||||
|
||||
**Why not a shared per-host CA.** A persistent CA across bottles is
|
||||
faster (no generation at start) but is a real liability: if any bottle
|
||||
exfiltrates the CA cert public half (which it can — it's in the trust
|
||||
store by design), an attacker on the host network could in principle
|
||||
impersonate any host to any bottle. With a per-bottle CA, the exfil
|
||||
gains nothing: the CA is bottle-local and dies in minutes.
|
||||
|
||||
**Generation cost.** RSA-2048 CA generation is ~200 ms; ECDSA-P256 is
|
||||
~5 ms. Either is irrelevant against the per-bottle Docker pull and
|
||||
network setup cost.
|
||||
|
||||
**Where the CA lives in the bottle's trust store.** Both: a
|
||||
distribution-standard path with `update-ca-certificates`, and the
|
||||
env-var path. Belt and suspenders, because some Node and Python
|
||||
libraries honor the env vars only, and some load only `/etc/ssl/certs/`
|
||||
directly.
|
||||
|
||||
---
|
||||
|
||||
## Cert pinning (brief)
|
||||
|
||||
A client that pins ignores the trust store and refuses any cert whose
|
||||
public key isn't on a hardcoded list. Three observations for this
|
||||
project:
|
||||
|
||||
- The current `DEFAULT_ALLOWLIST` (`api.anthropic.com`,
|
||||
`statsig.anthropic.com`, `sentry.io`, `claude.ai`,
|
||||
`platform.claude.com`, `downloads.claude.ai`,
|
||||
`raw.githubusercontent.com`) does not appear to include any host that
|
||||
pins against server-side SDKs. Server-side SDKs (Node, Python) almost
|
||||
universally honor system trust and `NODE_EXTRA_CA_CERTS` /
|
||||
`SSL_CERT_FILE`. Mobile SDKs and Chromium pin; we don't run those.
|
||||
- If a future allowlisted host turns out to pin, the mitigation is
|
||||
selective bumping via mitmproxy `ignore_hosts`: that specific
|
||||
hostname tunnels blindly and pipelock loses DLP coverage for it.
|
||||
Coverage on every other host is unaffected.
|
||||
- The cost of finding out: a single 5-minute test before adding a host
|
||||
— point mitmproxy at the host, observe whether the client succeeds.
|
||||
|
||||
Not a v1 blocker. Document the failure mode and the mitigation.
|
||||
|
||||
---
|
||||
|
||||
## Comparison table
|
||||
|
||||
| | A: mitmproxy → pipelock | B: pipelock → mitmproxy | C: TLS in pipelock | D: mitmproxy + scan API | E: one container |
|
||||
|---|---|---|---|---|---|
|
||||
| Pipelock sees plaintext | yes | no | yes | yes (via /scan) | yes |
|
||||
| Code change to pipelock | none | none | substantial | adds /scan endpoint | none |
|
||||
| Sidecar count | 2 | 2 | 1 | 2 | 1 |
|
||||
| Cert generation owner | mitmproxy | mitmproxy | pipelock | mitmproxy | mitmproxy |
|
||||
| Selective bumping | mitmproxy `ignore_hosts` | mitmproxy `ignore_hosts` | pipelock config | mitmproxy `ignore_hosts` | mitmproxy `ignore_hosts` |
|
||||
| Failure isolation per process | yes | yes | n/a (one process) | yes | no (shared container) |
|
||||
| License question | none | none | ELv2 risk | ELv2 risk | none |
|
||||
| v1 effort | low | low (but pointless) | high | medium | low |
|
||||
| Long-term shape | interim | n/a | best | possible | not recommended |
|
||||
|
||||
---
|
||||
|
||||
## Recommendation
|
||||
|
||||
**Adopt Topology A for v1.** Add a mitmproxy sidecar to the egress
|
||||
topology, in front of pipelock on the same per-bottle internal network.
|
||||
The agent's `HTTPS_PROXY` points at mitmproxy; mitmproxy's upstream is
|
||||
pipelock; pipelock's upstream is the real internet.
|
||||
|
||||
Concretely:
|
||||
|
||||
1. Add a `MitmproxyProxy` class alongside `PipelockProxy`, with the
|
||||
same `prepare` / `start` / `stop` lifecycle. The class generates
|
||||
a per-bottle CA in `stage_dir`, exports the public cert into a
|
||||
second file, and writes a mitmproxy config that:
|
||||
- bumps every CONNECT by default
|
||||
- uses `upstream_proxy = http://pipelock-<slug>:<port>`
|
||||
- listens on a known port inside the per-bottle internal network
|
||||
2. Extend the bottle launch step to copy the CA public cert into the
|
||||
agent container under
|
||||
`/usr/local/share/ca-certificates/claude-bottle-mitm.crt`, run
|
||||
`update-ca-certificates`, and set `NODE_EXTRA_CA_CERTS` /
|
||||
`SSL_CERT_FILE` / `REQUESTS_CA_BUNDLE` accordingly.
|
||||
3. Repoint the agent's `HTTPS_PROXY` and `HTTP_PROXY` from the pipelock
|
||||
container to the mitmproxy container.
|
||||
4. Verify mitmproxy's upstream-proxy mode forwards plaintext (not a
|
||||
re-wrapped CONNECT) to pipelock; if not, use `regular` mode with a
|
||||
chained proxy directive.
|
||||
5. Test that pipelock's DLP, subdomain-entropy, and MCP scanners now
|
||||
fire on real request bodies for `api.anthropic.com` traffic.
|
||||
|
||||
**Defer Topologies C and D.** Topology C (extending pipelock to
|
||||
terminate TLS) is the cleanest long-term shape but is a substantial
|
||||
build and runs into the Apache 2.0 vs. ELv2 question. Topology D
|
||||
(mitmproxy with pipelock as a scan API) is attractive but requires a
|
||||
pipelock surface that doesn't exist today. Both are valid v2 targets;
|
||||
neither is the right starting point.
|
||||
|
||||
The `network-egress-guard.md` v1 iptables + dnsmasq layer remains
|
||||
necessary alongside this — TLS interception covers HTTP/HTTPS only;
|
||||
raw TCP, UDP/443 (QUIC), UDP/53 (DNS), and ICMP still need the
|
||||
IP-level default-deny.
|
||||
|
||||
---
|
||||
|
||||
## Open questions
|
||||
|
||||
1. **mitmproxy upstream-proxy mode mechanics.** Does mitmproxy in
|
||||
`upstream_proxy` mode forward decrypted HTTP plaintext to the
|
||||
upstream, or does it wrap it in a new CONNECT? The documented
|
||||
behavior changed between mitmproxy 8 and 10. Needs verification
|
||||
against the version we pin.
|
||||
2. **Pipelock's behavior when receiving plain HTTP.** Pipelock's
|
||||
`forward_proxy.enabled: true` accepts both `GET http://...` (plain
|
||||
HTTP) and `CONNECT host:443` (HTTPS). After Topology A is wired up,
|
||||
pipelock will see only plain HTTP — does its DLP / MCP scanning
|
||||
pipeline run the full set of layers, or are some gated on the
|
||||
CONNECT path? Confirm by reading
|
||||
`github.com/luckyPipewrench/pipelock/blob/main/docs/configuration.md`.
|
||||
3. **CA installation in the Anthropic-provided Claude Code Docker image.**
|
||||
The base image's distribution determines whether `update-ca-certificates`
|
||||
(Debian/Ubuntu) or `update-ca-trust` (Red Hat) is the right command.
|
||||
The current `Dockerfile` should be inspected before assuming Debian.
|
||||
4. **HTTP/2 over the agent → mitmproxy hop.** Node's HTTP client
|
||||
negotiates `h2` via ALPN. mitmproxy speaks `h2` to clients in recent
|
||||
versions. Confirm the version we pin supports `h2` end-to-end and
|
||||
doesn't downgrade to `http/1.1` (which would be a silent
|
||||
performance regression).
|
||||
5. **Selective-bump policy surface.** Where does the
|
||||
"tunnel this hostname blindly" decision live? Options: a field on
|
||||
`bottle.egress` in the manifest, a fixed list of known-pinning
|
||||
hosts baked into the mitmproxy config, or pipelock-side opt-out.
|
||||
Manifest field is most consistent with the existing
|
||||
`bottle.egress.allowlist` shape.
|
||||
6. **Image pin for mitmproxy.** The `pipelock-assessment.md`
|
||||
recommendation is to pin by digest. The mitmproxy Docker Hub image
|
||||
should be pinned the same way. Which release line? `mitmproxy/mitmproxy`
|
||||
ships rolling and tagged versions; the tagged `:11.x` line is the
|
||||
right baseline.
|
||||
7. **CA generation in Python (mitmproxy) vs. as a separate step.**
|
||||
mitmproxy generates a CA on first launch if none is provided. For
|
||||
per-bottle ephemerality, we want the CA to be ours, not whatever
|
||||
mitmproxy chooses — so generate the CA in the host-side prepare
|
||||
step and inject it via `--certs *=...`. Mechanics need confirming.
|
||||
8. **Domain fronting verification.** Once pipelock sees plaintext, it
|
||||
has access to the inner `Host` / `:authority`. A new rule that
|
||||
compares it against the outer `CONNECT` target catches domain
|
||||
fronting. Worth a follow-up note on whether pipelock has such a
|
||||
rule or whether we add it.
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
- mitmproxy: <https://mitmproxy.org>, <https://github.com/mitmproxy/mitmproxy>
|
||||
- mitmproxy `upstream_proxy` mode: <https://docs.mitmproxy.org/stable/concepts/modes/#upstream-proxy>
|
||||
- mitmproxy CA cert installation: <https://docs.mitmproxy.org/stable/concepts/certificates/>
|
||||
- Squid `ssl_bump`: <https://wiki.squid-cache.org/Features/SslPeekAndSplice>
|
||||
- Squid ICAP: <https://wiki.squid-cache.org/Features/ICAP>
|
||||
- `goproxy`: <https://github.com/elazarl/goproxy>
|
||||
- `gomitmproxy`: <https://github.com/AdguardTeam/gomitmproxy>
|
||||
- `martian`: <https://github.com/google/martian>
|
||||
- Node TLS / `NODE_EXTRA_CA_CERTS`: <https://nodejs.org/api/cli.html#node_extra_ca_certsfile>
|
||||
- Python `SSL_CERT_FILE` and `REQUESTS_CA_BUNDLE`: <https://docs.python.org/3/library/ssl.html#ssl.SSLContext.load_verify_locations>
|
||||
- Prior research — pipelock assessment: `docs/research/pipelock-assessment.md`
|
||||
- Prior research — network egress guard: `docs/research/network-egress-guard.md`
|
||||
- Prior research — secret exfil tripwire encodings: `docs/research/secret-exfil-tripwire-encodings.md`
|
||||
|
||||
Research conducted 2026-05-12.
|
||||
@@ -92,6 +92,14 @@ class TestDryRunPlan(unittest.TestCase):
|
||||
self.assertEqual(sorted(set(hosts)), hosts,
|
||||
"hosts must be sorted and deduplicated")
|
||||
|
||||
# PRD 0006: TLS interception is on for every launched
|
||||
# bottle. Fingerprint is null at dry-run (no CA exists
|
||||
# yet); real launches log it from provision_ca.
|
||||
self.assertEqual(
|
||||
{"enabled": True, "ca_fingerprint": None},
|
||||
plan["egress"]["tls_interception"],
|
||||
)
|
||||
|
||||
# No Docker side effects (see the GITEA_ACTIONS skip note
|
||||
# above — this guard runs locally only).
|
||||
if check_side_effects:
|
||||
|
||||
@@ -0,0 +1,84 @@
|
||||
"""Integration: with pipelock's tls_interception enabled (PRD 0006),
|
||||
a clean HTTPS GET to an allowlisted host succeeds end-to-end through
|
||||
the bumped tunnel.
|
||||
|
||||
Complement to test_pipelock_blocks_secret_https_post — together they
|
||||
pin pipelock's two paths (block on body match, allow on clean
|
||||
traffic). This test is also the implicit TLS-trust check: if
|
||||
provision_ca had failed to install pipelock's CA into the agent's
|
||||
trust store, curl would have rejected the bumped leaf cert and the
|
||||
fetch would have failed before any HTTP response could come back."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
from claude_bottle.backend import BottleSpec, get_bottle_backend
|
||||
from tests._docker import skip_unless_docker
|
||||
from tests.fixtures import fixture_minimal
|
||||
|
||||
|
||||
# raw.githubusercontent.com is in the baked-in DEFAULT_ALLOWLIST.
|
||||
# `git`'s own README on the master branch is a long-lived raw file
|
||||
# (~3 KB) that any CI runner with internet can fetch.
|
||||
_TARGET_URL = "https://raw.githubusercontent.com/git/git/master/README.md"
|
||||
|
||||
|
||||
@skip_unless_docker()
|
||||
class TestPipelockAllowsNormalHttps(unittest.TestCase):
|
||||
@unittest.skipIf(
|
||||
os.environ.get("GITEA_ACTIONS") == "true",
|
||||
"skipped under act_runner: docker socket mount topology breaks "
|
||||
"in-process visibility of networks created on the host daemon",
|
||||
)
|
||||
def test_https_get_to_allowed_host_succeeds(self):
|
||||
backend = get_bottle_backend()
|
||||
stage_dir = Path(tempfile.mkdtemp(prefix="cb-test-stage."))
|
||||
try:
|
||||
spec = BottleSpec(
|
||||
manifest=fixture_minimal(),
|
||||
agent_name="demo",
|
||||
copy_cwd=False,
|
||||
user_cwd=str(stage_dir),
|
||||
forward_oauth_token=False,
|
||||
)
|
||||
plan = backend.prepare(spec, stage_dir=stage_dir)
|
||||
with backend.launch(plan) as bottle:
|
||||
script = (
|
||||
"set -eu\n"
|
||||
'curl --proxy "$HTTPS_PROXY" -s --max-time 10 \\\n'
|
||||
" -w 'status=%{http_code}\\n' \\\n"
|
||||
" -o /tmp/probe-body.txt \\\n"
|
||||
f" {_TARGET_URL}\n"
|
||||
'echo "len=$(wc -c < /tmp/probe-body.txt)"\n'
|
||||
)
|
||||
result = bottle.exec(script)
|
||||
finally:
|
||||
shutil.rmtree(stage_dir, ignore_errors=True)
|
||||
|
||||
self.assertEqual(
|
||||
0, result.returncode,
|
||||
f"exec wrapper failed: stdout={result.stdout!r} stderr={result.stderr!r}",
|
||||
)
|
||||
# 200 from the upstream (pipelock forwarded after the body
|
||||
# scan passed). If curl had failed the bumped-cert trust
|
||||
# check, the exit code or status would be non-200 here.
|
||||
self.assertIn(
|
||||
"status=200", result.stdout,
|
||||
f"expected 200 from raw.githubusercontent.com; got: {result.stdout!r}",
|
||||
)
|
||||
# The git README is ~3 KB. Anything substantially non-zero
|
||||
# proves the response body actually transferred — i.e. the
|
||||
# CONNECT tunnel + bumped TLS + body forwarding all worked.
|
||||
self.assertNotIn(
|
||||
"len=0\n", result.stdout,
|
||||
f"response body was empty: {result.stdout!r}",
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,96 @@
|
||||
"""Integration: with pipelock's tls_interception enabled (PRD 0006),
|
||||
a credential POST sent over HTTPS is blocked by pipelock's body-scan
|
||||
layer — closing the gap that motivated this PRD.
|
||||
|
||||
End-to-end: drives `BottleBackend.prepare → launch` so the real
|
||||
image build, network plumbing, pipelock_tls_init, sidecar bring-up,
|
||||
and provision_ca (CA install in the agent's trust store) are all in
|
||||
the loop. The probe is a single `curl --proxy "$HTTPS_PROXY" -X POST
|
||||
... https://api.anthropic.com/...` — curl natively does CONNECT
|
||||
through the proxy, the agent's trust store now contains pipelock's
|
||||
per-bottle CA so curl trusts pipelock's bumped leaf, and pipelock
|
||||
sees the decrypted body and returns its known
|
||||
`blocked: request body contains secret: <pattern>` 403."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
from claude_bottle.backend import BottleSpec, get_bottle_backend
|
||||
from claude_bottle.manifest import Manifest
|
||||
from tests._docker import skip_unless_docker
|
||||
|
||||
|
||||
# Synthetic value shaped like a GitHub Personal Access Token; not a
|
||||
# real credential. Carried into the bottle as an env var so the
|
||||
# probe shell can read it via $FAKE_TOKEN without ever interpolating
|
||||
# the value on the bash `bottle.exec` argv.
|
||||
_FAKE_TOKEN = "ghp_aB3cD4eF5gH6iJ7kL8mN9oP0qR1sT2uV3wX4yZ"
|
||||
|
||||
|
||||
@skip_unless_docker()
|
||||
class TestPipelockBlocksSecretHttpsPost(unittest.TestCase):
|
||||
@unittest.skipIf(
|
||||
os.environ.get("GITEA_ACTIONS") == "true",
|
||||
"skipped under act_runner: docker socket mount topology breaks "
|
||||
"in-process visibility of networks created on the host daemon",
|
||||
)
|
||||
def test_https_post_with_credential_body_is_blocked(self):
|
||||
manifest = Manifest.from_json_obj({
|
||||
"bottles": {
|
||||
"dev": {"env": {"FAKE_TOKEN": _FAKE_TOKEN}},
|
||||
},
|
||||
"agents": {
|
||||
"demo": {"skills": [], "prompt": "", "bottle": "dev"},
|
||||
},
|
||||
})
|
||||
backend = get_bottle_backend()
|
||||
stage_dir = Path(tempfile.mkdtemp(prefix="cb-test-stage."))
|
||||
try:
|
||||
spec = BottleSpec(
|
||||
manifest=manifest,
|
||||
agent_name="demo",
|
||||
copy_cwd=False,
|
||||
user_cwd=str(stage_dir),
|
||||
forward_oauth_token=False,
|
||||
)
|
||||
plan = backend.prepare(spec, stage_dir=stage_dir)
|
||||
with backend.launch(plan) as bottle:
|
||||
script = (
|
||||
"set -eu\n"
|
||||
'curl --proxy "$HTTPS_PROXY" -s --max-time 8 \\\n'
|
||||
" -w 'status=%{http_code}\\n' \\\n"
|
||||
" -o /tmp/probe-body.txt \\\n"
|
||||
' -X POST -d "token=$FAKE_TOKEN" \\\n'
|
||||
" https://api.anthropic.com/dlp-probe\n"
|
||||
'echo "body=$(head -c 200 /tmp/probe-body.txt)"\n'
|
||||
)
|
||||
result = bottle.exec(script)
|
||||
finally:
|
||||
shutil.rmtree(stage_dir, ignore_errors=True)
|
||||
|
||||
self.assertEqual(
|
||||
0, result.returncode,
|
||||
f"exec wrapper failed: stdout={result.stdout!r} stderr={result.stderr!r}",
|
||||
)
|
||||
# Pipelock's body-scan block returns 403 with a plain-text
|
||||
# body starting `blocked: ` (pinned empirically; see
|
||||
# tests/unit/test_mitmproxy_verdict.py for the
|
||||
# corresponding-fingerprint test, retained from PR #8 as
|
||||
# general pipelock-block-shape coverage).
|
||||
self.assertIn(
|
||||
"status=403", result.stdout,
|
||||
f"expected 403 from pipelock; got: {result.stdout!r}",
|
||||
)
|
||||
self.assertIn(
|
||||
"body=blocked: ", result.stdout,
|
||||
f"expected pipelock block body; got: {result.stdout!r}",
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -28,6 +28,7 @@ from claude_bottle.backend.docker.pipelock import (
|
||||
PIPELOCK_PORT,
|
||||
DockerPipelockProxy,
|
||||
pipelock_container_name,
|
||||
pipelock_tls_init,
|
||||
)
|
||||
from tests._docker import skip_unless_docker
|
||||
from tests.fixtures import fixture_minimal
|
||||
@@ -79,10 +80,17 @@ class TestPipelockSidecarSmoke(unittest.TestCase):
|
||||
self.internal_net = network_create_internal(self.slug)
|
||||
self.egress_net = network_create_egress(self.slug)
|
||||
|
||||
# PRD 0006: pipelock's tls_interception block in the rendered
|
||||
# YAML references in-container CA paths; .start docker-cp's
|
||||
# those files in. The full launch flow generates the CA via
|
||||
# `pipelock_tls_init`; this smoke test calls it directly.
|
||||
ca_cert_host, ca_key_host = pipelock_tls_init(self.work_dir)
|
||||
plan = dataclasses.replace(
|
||||
prep,
|
||||
internal_network=self.internal_net,
|
||||
egress_network=self.egress_net,
|
||||
ca_cert_host_path=ca_cert_host,
|
||||
ca_key_host_path=ca_key_host,
|
||||
)
|
||||
|
||||
self.sidecar_name = proxy.start(plan)
|
||||
|
||||
@@ -37,6 +37,9 @@ class TestBuildConfig(unittest.TestCase):
|
||||
# No SSH entries → no trusted_domains, no ssrf.
|
||||
self.assertNotIn("trusted_domains", cfg)
|
||||
self.assertNotIn("ssrf", cfg)
|
||||
# Without CA paths, the tls_interception block is omitted —
|
||||
# pipelock falls back to its built-in default of `enabled: false`.
|
||||
self.assertNotIn("tls_interception", cfg)
|
||||
|
||||
def test_ssh_shape(self):
|
||||
cfg = pipelock_build_config(fixture_with_ssh().bottles["dev"])
|
||||
@@ -49,6 +52,31 @@ class TestBuildConfig(unittest.TestCase):
|
||||
# Strict mode: IPv4 host is also in the api_allowlist union.
|
||||
self.assertIn("100.78.141.42", cast(list[str], cfg["api_allowlist"]))
|
||||
|
||||
def test_tls_interception_block_emitted_when_paths_supplied(self):
|
||||
# PRD 0006: paths flow in via DockerPipelockProxy's in-container
|
||||
# constants; this directly pins the dict shape.
|
||||
cfg = pipelock_build_config(
|
||||
fixture_minimal().bottles["dev"],
|
||||
ca_cert_path="/etc/pipelock-ca.pem",
|
||||
ca_key_path="/etc/pipelock-ca-key.pem",
|
||||
)
|
||||
self.assertEqual(
|
||||
{
|
||||
"enabled": True,
|
||||
"ca_cert": "/etc/pipelock-ca.pem",
|
||||
"ca_key": "/etc/pipelock-ca-key.pem",
|
||||
},
|
||||
cfg["tls_interception"],
|
||||
)
|
||||
|
||||
def test_tls_interception_requires_both_paths(self):
|
||||
# Half-set is a programmer error, not a silent omission.
|
||||
with self.assertRaises(ValueError):
|
||||
pipelock_build_config(
|
||||
fixture_minimal().bottles["dev"],
|
||||
ca_cert_path="/etc/pipelock-ca.pem",
|
||||
)
|
||||
|
||||
|
||||
class TestRenderAndWrite(unittest.TestCase):
|
||||
def setUp(self):
|
||||
@@ -101,6 +129,21 @@ class TestRenderAndWrite(unittest.TestCase):
|
||||
self.assertNotIn("MY_SECRET", content)
|
||||
self.assertNotIn("prompt-message", content)
|
||||
|
||||
def test_render_emits_tls_interception_via_prepare(self):
|
||||
"""`DockerPipelockProxy.prepare` plumbs its in-container CA
|
||||
constants through to the YAML. The block should land in the
|
||||
rendered output with `enabled: true` and the configured paths.
|
||||
The actual host-side CA generation happens in launch (not
|
||||
prepare), so this test exercises only the YAML rendering."""
|
||||
plan = DockerPipelockProxy().prepare(
|
||||
fixture_minimal().bottles["dev"], "demo", self.out_dir
|
||||
)
|
||||
content = plan.yaml_path.read_text()
|
||||
self.assertIn("tls_interception:", content)
|
||||
self.assertIn("enabled: true", content)
|
||||
self.assertIn('ca_cert: "/etc/pipelock-ca.pem"', content)
|
||||
self.assertIn('ca_key: "/etc/pipelock-ca-key.pem"', content)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
Reference in New Issue
Block a user