diff --git a/claude_bottle/backend/__init__.py b/claude_bottle/backend/__init__.py index 4c85366..7990cf9 100644 --- a/claude_bottle/backend/__init__.py +++ b/claude_bottle/backend/__init__.py @@ -204,24 +204,35 @@ class BottleBackend(ABC, Generic[PlanT, CleanupT]): """Build/run the bottle and yield a handle; tear down on exit.""" def provision(self, plan: PlanT, target: str) -> str | None: - """Copy host-side files (prompt, skills, SSH keys, .git) into - the running bottle. Called from `launch` after the container/ - machine is up. `target` identifies the running instance in - backend-specific terms (Docker: resolved container name; fly: - machine id). Returns the in-container prompt path if a prompt - was provisioned, else None — the Bottle handle uses it to - decide whether to add --append-system-prompt-file to claude's - argv. + """Copy host-side files (CA cert, prompt, skills, SSH keys, + .git) into the running bottle. Called from `launch` after the + container/machine is up. `target` identifies the running + instance in backend-specific terms (Docker: resolved container + name; fly: machine id). Returns the in-container prompt path + if a prompt was provisioned, else None — the Bottle handle + uses it to decide whether to add --append-system-prompt-file + to claude's argv. - Default orchestration: prompt → skills → ssh → git. Subclasses - typically don't override this; they implement the four - sub-methods below.""" + Default orchestration: ca → prompt → skills → ssh → git. + CA goes first because it changes how the agent process trusts + the network; the rest don't depend on it but the order keeps + trust setup adjacent to the launch step. Subclasses typically + don't override this; they implement the sub-methods below.""" + self.provision_ca(plan, target) prompt_path = self.provision_prompt(plan, target) self.provision_skills(plan, target) self.provision_ssh(plan, target) self.provision_git(plan, target) return prompt_path + def provision_ca(self, plan: PlanT, target: str) -> None: + """Install the egress-proxy's CA into the running bottle's + trust store. Default impl is a no-op so backends that don't + yet support TLS interception (every backend except Docker + today) aren't forced to implement it. The Docker backend + overrides to extract mitmproxy's CA and run + `update-ca-certificates` inside the agent container.""" + @abstractmethod def provision_prompt(self, plan: PlanT, target: str) -> str | None: """Copy the prompt file into the running bottle. Returns the diff --git a/claude_bottle/backend/docker/backend.py b/claude_bottle/backend/docker/backend.py index 97d1344..b8a5b36 100644 --- a/claude_bottle/backend/docker/backend.py +++ b/claude_bottle/backend/docker/backend.py @@ -23,7 +23,9 @@ from . import prepare as _prepare from .bottle import DockerBottle from .bottle_cleanup_plan import DockerBottleCleanupPlan from .bottle_plan import DockerBottlePlan +from .mitmproxy import DockerMitmproxyProxy from .pipelock import DockerPipelockProxy +from .provision import ca as _ca from .provision import git as _git from .provision import prompt as _prompt from .provision import skills as _skills @@ -38,15 +40,23 @@ class DockerBottleBackend(BottleBackend["DockerBottlePlan", "DockerBottleCleanup def __init__(self) -> None: self._proxy = DockerPipelockProxy() + self._mitm = DockerMitmproxyProxy() def _resolve_plan(self, spec: BottleSpec, *, stage_dir: Path) -> DockerBottlePlan: - return _prepare.resolve_plan(spec, stage_dir=stage_dir, proxy=self._proxy) + return _prepare.resolve_plan( + spec, stage_dir=stage_dir, proxy=self._proxy, mitm=self._mitm, + ) @contextmanager def launch(self, plan: DockerBottlePlan) -> Generator[DockerBottle, None, None]: - with _launch.launch(plan, proxy=self._proxy, provision=self.provision) as bottle: + with _launch.launch( + plan, proxy=self._proxy, mitm=self._mitm, provision=self.provision, + ) as bottle: yield bottle + def provision_ca(self, plan: DockerBottlePlan, target: str) -> None: + _ca.provision_ca(plan, target) + def provision_prompt(self, plan: DockerBottlePlan, target: str) -> str | None: return _prompt.provision_prompt(plan, target) diff --git a/claude_bottle/backend/docker/bottle_plan.py b/claude_bottle/backend/docker/bottle_plan.py index 5ad3da8..293e965 100644 --- a/claude_bottle/backend/docker/bottle_plan.py +++ b/claude_bottle/backend/docker/bottle_plan.py @@ -13,6 +13,7 @@ from pathlib import Path from ...log import info from ...manifest import Agent, Bottle +from ...mitmproxy import MitmproxyProxyPlan from ...pipelock import PipelockProxyPlan, pipelock_effective_allowlist from .. import BottlePlan @@ -49,6 +50,7 @@ class DockerBottlePlan(BottlePlan): forwarded_env: dict[str, str] = field(repr=False) prompt_file: Path proxy_plan: PipelockProxyPlan + mitmproxy_plan: MitmproxyProxyPlan allowlist_summary: str use_runsc: bool @@ -93,6 +95,7 @@ class DockerBottlePlan(BottlePlan): else: info(" ssh hosts : (none)") info(f" egress : {self.allowlist_summary}") + info(" tls intercept : mitmproxy (per-bottle ephemeral CA, generated at launch)") info( f"prompt : {len(v.agent.prompt)} chars; " f"first line: {v.prompt_first_line or '(empty)'}" @@ -117,6 +120,14 @@ class DockerBottlePlan(BottlePlan): "egress": { "host_count": len(hosts), "hosts": hosts, + # Reserved for PRD 0005: TLS interception via mitmproxy. + # ca_fingerprint is always null at dry-run because the + # CA is generated by the sidecar at launch time. Real + # launches print the fingerprint to stderr. + "mitm": { + "enabled": True, + "ca_fingerprint": None, + }, }, "prompt": { "length": len(v.agent.prompt), diff --git a/claude_bottle/backend/docker/launch.py b/claude_bottle/backend/docker/launch.py index 45ad6dd..8664012 100644 --- a/claude_bottle/backend/docker/launch.py +++ b/claude_bottle/backend/docker/launch.py @@ -22,8 +22,15 @@ from . import network as network_mod from . import util as docker_mod from .bottle import DockerBottle from .bottle_plan import DockerBottlePlan +from .mitmproxy import DockerMitmproxyProxy, mitmproxy_proxy_url from .pipelock import DockerPipelockProxy, pipelock_proxy_url +# Path inside the agent container where the mitmproxy CA cert lives +# after provision_ca runs. Exported as a module-level constant so +# both the agent's docker-run env trio and the provisioner agree. +AGENT_CA_PATH = "/usr/local/share/ca-certificates/claude-bottle-mitm.crt" +AGENT_CA_BUNDLE = "/etc/ssl/certs/ca-certificates.crt" + # Where the repo root lives, for `docker build` context. Computed once. _REPO_DIR = str(Path(__file__).resolve().parent.parent.parent.parent) @@ -34,6 +41,7 @@ def launch( plan: DockerBottlePlan, *, proxy: DockerPipelockProxy, + mitm: DockerMitmproxyProxy, provision: Callable[[DockerBottlePlan, str], str | None], ) -> Generator[DockerBottle, None, None]: """Build, launch, and provision a Docker bottle. Teardown on exit. @@ -71,6 +79,17 @@ def launch( pipelock_name = proxy.start(proxy_plan) stack.callback(proxy.stop, pipelock_name) + # mitmproxy sits in front of pipelock on the agent's egress + # path. mitmproxy's `addon.py` reaches pipelock via the + # service-name URL we hand it here. + mitm_plan = dataclasses.replace( + plan.mitmproxy_plan, + internal_network=internal_network, + egress_network=egress_network, + ) + mitm_name = mitm.start(mitm_plan, pipelock_url=pipelock_proxy_url(plan.slug)) + stack.callback(mitm.stop, mitm_name) + container = _run_agent_container(plan, internal_network) stack.callback(docker_mod.force_remove_container, container) @@ -85,7 +104,10 @@ def _run_agent_container(plan: DockerBottlePlan, internal_network: str) -> str: """Build the `docker run` argv and execute it, handling name- conflict races by incrementing the suffix (unless the name was user-pinned). Returns the resolved container name.""" - proxy_url = pipelock_proxy_url(plan.slug) + # Agent traffic routes through mitmproxy, not pipelock directly. + # mitmproxy decrypts and hands the plaintext to pipelock via its + # addon; pipelock is unchanged from PRD 0001. + proxy_url = mitmproxy_proxy_url(plan.slug) docker_args: list[str] = [ "--rm", "-d", "--name", plan.container_name, @@ -93,6 +115,16 @@ def _run_agent_container(plan: DockerBottlePlan, internal_network: str) -> str: "-e", f"HTTPS_PROXY={proxy_url}", "-e", f"HTTP_PROXY={proxy_url}", "-e", "NO_PROXY=localhost,127.0.0.1", + # CA trust trio for the agent process. Docker propagates + # run-time env into `docker exec`, so `claude` sees these + # without per-exec threading. NODE_EXTRA_CA_CERTS points at + # the cert file (Node appends it to its bundled roots); + # SSL_CERT_FILE / REQUESTS_CA_BUNDLE point at the system + # bundle that `update-ca-certificates` rebuilds in + # provision_ca. + "-e", f"NODE_EXTRA_CA_CERTS={AGENT_CA_PATH}", + "-e", f"SSL_CERT_FILE={AGENT_CA_BUNDLE}", + "-e", f"REQUESTS_CA_BUNDLE={AGENT_CA_BUNDLE}", ] if plan.use_runsc: docker_args.extend(["--runtime", "runsc"]) diff --git a/claude_bottle/backend/docker/mitmproxy.py b/claude_bottle/backend/docker/mitmproxy.py new file mode 100644 index 0000000..9911bbf --- /dev/null +++ b/claude_bottle/backend/docker/mitmproxy.py @@ -0,0 +1,178 @@ +"""DockerMitmproxyProxy — the Docker-specific lifecycle for the +mitmproxy sidecar. Inherits the addon-bundling from MitmproxyProxy. + +The sidecar runs `mitmdump -s /addon/addon.py`, listens on +MITMPROXY_PORT inside the per-bottle internal network, and generates +its own ephemeral CA on first launch (extracted by provision_ca, +installed into the agent's trust store).""" + +from __future__ import annotations + +import os +import subprocess +import time +from pathlib import Path + +from ...log import die, info, warn +from ...mitmproxy import MitmproxyProxy, MitmproxyProxyPlan + + +# mitmproxy/mitmproxy:12.2.3 (mitmproxy v12 release line). The digest +# is the multi-arch image index — pulls resolve to the right per-arch +# child digest. Bumped deliberately; see PRD 0005. +MITMPROXY_IMAGE = os.environ.get( + "CLAUDE_BOTTLE_MITMPROXY_IMAGE", + "mitmproxy/mitmproxy@sha256:00b77b5d8804c8ad18cb6caefbf9d5849e895e8986c5ce011f4ae30f4385962f", +) + +# Listening port for mitmproxy's forward proxy (agent-facing). +MITMPROXY_PORT = os.environ.get("CLAUDE_BOTTLE_MITMPROXY_PORT", "8080") + +# Path inside the sidecar where the addon is dropped by docker cp. +MITMPROXY_ADDON_PATH = "/addon/addon.py" + +# Path inside the sidecar where mitmproxy generates its CA. +_CA_PATH_IN_SIDECAR = "/home/mitmproxy/.mitmproxy/mitmproxy-ca-cert.pem" + + +def mitmproxy_container_name(slug: str) -> str: + return f"claude-bottle-mitm-{slug}" + + +def mitmproxy_proxy_url(slug: str) -> str: + return f"http://{mitmproxy_container_name(slug)}:{MITMPROXY_PORT}" + + +class DockerMitmproxyProxy(MitmproxyProxy): + """Brings the mitmproxy sidecar up and down via Docker.""" + + def start(self, plan: MitmproxyProxyPlan, *, pipelock_url: str) -> str: + """Boot the mitmproxy sidecar: + 1. `docker create` on the internal network with mitmdump + argv: `--listen-port -s ` plus the + pipelock URL injected as an env var. + 2. `docker cp` the vendored addon to the sidecar. + 3. Attach to the per-agent egress network so mitmproxy + can reach real upstreams. + 4. `docker start`. + Returns the container name (the proxy_target passed to .stop + and .extract_ca_cert).""" + name = mitmproxy_container_name(plan.slug) + if not plan.addon_src.is_file(): + die(f"mitmproxy addon not found at {plan.addon_src}") + + info(f"starting mitmproxy sidecar {name} on network {plan.internal_network}") + + create_args = [ + "docker", "create", + "--name", name, + "--network", plan.internal_network, + "-e", f"CLAUDE_BOTTLE_PIPELOCK_URL={pipelock_url}", + MITMPROXY_IMAGE, + "mitmdump", + "--listen-port", MITMPROXY_PORT, + "-s", MITMPROXY_ADDON_PATH, + ] + if subprocess.run( + create_args, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ).returncode != 0: + die(f"failed to create mitmproxy sidecar {name}") + + cp_result = subprocess.run( + ["docker", "cp", str(plan.addon_src), f"{name}:{MITMPROXY_ADDON_PATH}"], + capture_output=True, + text=True, + check=False, + ) + if cp_result.returncode != 0: + subprocess.run( + ["docker", "rm", "-f", name], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ) + die(f"failed to copy mitmproxy addon into {name}: {cp_result.stderr.strip()}") + + if subprocess.run( + ["docker", "network", "connect", plan.egress_network, name], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ).returncode != 0: + subprocess.run( + ["docker", "rm", "-f", name], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ) + die(f"failed to attach mitmproxy sidecar {name} to egress " + f"network {plan.egress_network}") + + if subprocess.run( + ["docker", "start", name], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ).returncode != 0: + subprocess.run( + ["docker", "rm", "-f", name], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ) + die(f"failed to start mitmproxy sidecar {name}") + + return name + + def stop(self, proxy_target: str) -> None: + """Idempotent: missing container is success. Mirrors + DockerPipelockProxy.stop.""" + if subprocess.run( + ["docker", "inspect", proxy_target], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ).returncode == 0: + if subprocess.run( + ["docker", "rm", "-f", proxy_target], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ).returncode != 0: + warn( + f"failed to remove mitmproxy sidecar {proxy_target}; " + f"clean up with 'docker rm -f {proxy_target}'" + ) + + def extract_ca_cert(self, proxy_target: str, dest_path: Path) -> None: + """Poll the running sidecar for the CA cert (mitmproxy + generates it on first launch, typically <1s after start), + then `docker cp` the public half to `dest_path`. The private + key never leaves the container.""" + deadline = time.monotonic() + 15 + while time.monotonic() < deadline: + check = subprocess.run( + ["docker", "exec", proxy_target, "test", "-f", _CA_PATH_IN_SIDECAR], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ) + if check.returncode == 0: + break + time.sleep(0.5) + else: + die(f"mitmproxy CA cert did not appear at {_CA_PATH_IN_SIDECAR} " + f"after 15s — sidecar {proxy_target} may have failed to start") + + cp_result = subprocess.run( + ["docker", "cp", f"{proxy_target}:{_CA_PATH_IN_SIDECAR}", str(dest_path)], + capture_output=True, + text=True, + check=False, + ) + if cp_result.returncode != 0: + die(f"failed to extract mitmproxy CA cert from {proxy_target}: " + f"{cp_result.stderr.strip()}") diff --git a/claude_bottle/backend/docker/prepare.py b/claude_bottle/backend/docker/prepare.py index d7be637..9df125e 100644 --- a/claude_bottle/backend/docker/prepare.py +++ b/claude_bottle/backend/docker/prepare.py @@ -19,6 +19,7 @@ from ...log import die from .. import BottleSpec from . import util as docker_mod from .bottle_plan import DockerBottlePlan +from .mitmproxy import DockerMitmproxyProxy from .pipelock import DockerPipelockProxy @@ -27,6 +28,7 @@ def resolve_plan( *, stage_dir: Path, proxy: DockerPipelockProxy, + mitm: DockerMitmproxyProxy, ) -> DockerBottlePlan: """Resolve Docker-specific names and write scratch files. Trusts that the agent and its skills/SSH keys are present — validation @@ -78,6 +80,7 @@ def resolve_plan( prompt_file.chmod(0o600) proxy_plan = proxy.prepare(bottle, slug, stage_dir) + mitmproxy_plan = mitm.prepare(slug) resolved = resolve_env(manifest, spec.agent_name) # Everything that should reach the bottle by-name (so its value # never lands on argv or in env_file) goes into one dict. The @@ -105,6 +108,7 @@ def resolve_plan( forwarded_env=forwarded_env, prompt_file=prompt_file, proxy_plan=proxy_plan, + mitmproxy_plan=mitmproxy_plan, allowlist_summary=allowlist_summary, use_runsc=use_runsc, ) diff --git a/claude_bottle/backend/docker/provision/ca.py b/claude_bottle/backend/docker/provision/ca.py new file mode 100644 index 0000000..bd68319 --- /dev/null +++ b/claude_bottle/backend/docker/provision/ca.py @@ -0,0 +1,55 @@ +"""Extract mitmproxy's CA cert and install it into the agent +container's trust store. + +mitmproxy generates a fresh CA on first launch inside its sidecar. +This provisioner pulls the public cert through a host stage dir, +drops it into the agent at `/usr/local/share/ca-certificates/...`, +runs `update-ca-certificates` to rebuild the system bundle, and +emits a single stderr log line with the SHA-256 fingerprint.""" + +from __future__ import annotations + +import hashlib +import ssl +import subprocess + +from ....log import info +from ..bottle_plan import DockerBottlePlan +from ..launch import AGENT_CA_PATH +from ..mitmproxy import DockerMitmproxyProxy, mitmproxy_container_name + + +def provision_ca(plan: DockerBottlePlan, target: str) -> None: + """Pull mitmproxy's CA cert, install in the agent, log fingerprint. + Called from BottleBackend.provision after the agent container is + up. The mitmproxy sidecar is already running (started during + `launch`).""" + sidecar = mitmproxy_container_name(plan.mitmproxy_plan.slug) + stage_cert = plan.stage_dir / "mitm-ca.crt" + + DockerMitmproxyProxy().extract_ca_cert(sidecar, stage_cert) + + container = target + subprocess.run( + ["docker", "cp", str(stage_cert), f"{container}:{AGENT_CA_PATH}"], + stdout=subprocess.DEVNULL, + check=True, + ) + subprocess.run( + ["docker", "exec", "-u", "0", container, "chmod", "644", AGENT_CA_PATH], + stdout=subprocess.DEVNULL, + check=True, + ) + subprocess.run( + ["docker", "exec", "-u", "0", container, "update-ca-certificates"], + stdout=subprocess.DEVNULL, + check=True, + ) + + # SHA-256 of the cert's DER bytes — the standard fingerprint + # form. stdlib only; never the private key (which stays in the + # sidecar). Logged once at launch as an audit signal. + pem = stage_cert.read_text() + der = ssl.PEM_cert_to_DER_cert(pem) + fingerprint = hashlib.sha256(der).hexdigest() + info(f"mitm ca fingerprint: sha256:{fingerprint[:32]}...") diff --git a/claude_bottle/mitmproxy/__init__.py b/claude_bottle/mitmproxy/__init__.py new file mode 100644 index 0000000..74fb246 --- /dev/null +++ b/claude_bottle/mitmproxy/__init__.py @@ -0,0 +1,81 @@ +"""mitmproxy TLS-interception sidecar for the per-bottle egress +topology (PRD 0005). + +Sits in front of pipelock on the bottle's egress path so pipelock's +body / header / URL DLP scanners see plaintext for HTTPS targets. +The sidecar runs in mitmproxy's `regular` mode and loads the +vendored addon at `addon.py`; the addon forwards each decrypted +request to pipelock as a plain HTTP forward-proxy call and gates +the mitmproxy flow on pipelock's verdict. + +This module is platform-agnostic: it owns the abstract proxy +lifecycle (prepare / start / stop / extract_ca_cert). The +Docker-specific lifecycle lives in +`claude_bottle/backend/docker/mitmproxy.py`. +""" + +from __future__ import annotations + +from abc import ABC, abstractmethod +from dataclasses import dataclass +from pathlib import Path + + +@dataclass(frozen=True) +class MitmproxyProxyPlan: + """Output of MitmproxyProxy.prepare; consumed by .start when the + sidecar needs to be brought up. + + `addon_src` is the host-side path to the vendored addon.py, + resolved at prepare time. `slug` is the per-agent identifier + used as the suffix in every per-bottle resource name. The + network fields default to empty and are populated by the + backend's launch step (via dataclasses.replace) once those + networks have actually been created — same pattern as + PipelockProxyPlan.""" + + addon_src: Path + slug: str + internal_network: str = "" + egress_network: str = "" + + +class MitmproxyProxy(ABC): + """The mitmproxy TLS-interception sidecar. The proxy-config + addon + bundling are platform-agnostic; the sidecar's start/stop lifecycle + and the CA extraction step are backend-specific and live on + concrete subclasses.""" + + def prepare(self, slug: str) -> MitmproxyProxyPlan: + """Locate the vendored addon source and return the start + plan. The addon is checked into the project and identical + across bottles; per-bottle wiring (pipelock URL) is injected + via env vars at start time, not via a generated config.""" + addon_src = Path(__file__).resolve().parent / "addon.py" + if not addon_src.is_file(): + raise FileNotFoundError( + f"mitmproxy addon not found at {addon_src}; the " + f"package was installed incompletely" + ) + return MitmproxyProxyPlan(addon_src=addon_src, slug=slug) + + @abstractmethod + def start(self, plan: MitmproxyProxyPlan, *, pipelock_url: str) -> str: + """Bring up the mitmproxy sidecar according to `plan`. + `pipelock_url` is injected into the sidecar's env (as + CLAUDE_BOTTLE_PIPELOCK_URL) so the addon knows where to + scan. Returns the proxy_target string identifying the + running sidecar — the same value to pass to `.stop` and + `.extract_ca_cert`.""" + + @abstractmethod + def stop(self, proxy_target: str) -> None: + """Tear down the sidecar identified by `proxy_target`. + Idempotent: a missing target is success.""" + + @abstractmethod + def extract_ca_cert(self, proxy_target: str, dest_path: Path) -> None: + """Copy the public CA cert from the running sidecar to + `dest_path` on the host. Polls the sidecar for the cert + file to appear (mitmproxy generates the CA on first launch). + The private key never leaves the sidecar.""" diff --git a/claude_bottle/mitmproxy/addon.py b/claude_bottle/mitmproxy/addon.py new file mode 100644 index 0000000..c9c913d --- /dev/null +++ b/claude_bottle/mitmproxy/addon.py @@ -0,0 +1,169 @@ +"""mitmproxy addon: forward each decrypted request to pipelock for +scanning, then either short-circuit with pipelock's 403 (block) or +let mitmproxy proceed to the real upstream (allow). + +Loaded inside the mitmproxy sidecar container via `mitmdump -s ...`. +Must be self-contained — the sidecar image doesn't have claude_bottle +on its import path. Imports are limited to the Python stdlib plus +mitmproxy itself (which is the host). + +Pipelock's URL is read from CLAUDE_BOTTLE_PIPELOCK_URL at startup +(injected by DockerMitmproxyProxy.start). + +The verdict function `is_pipelock_block` is exported as a pure +function so unit tests can exercise it without importing mitmproxy. +""" + +from __future__ import annotations + +import logging +import os +import urllib.error +import urllib.request + + +PIPELOCK_URL_ENV = "CLAUDE_BOTTLE_PIPELOCK_URL" +PIPELOCK_TIMEOUT_SEC = 5 + +# Hop-by-hop headers per RFC 7230 §6.1; should not be forwarded +# across a proxy. Lower-cased for case-insensitive comparison. +_HOP_BY_HOP = frozenset({ + "connection", + "keep-alive", + "proxy-authenticate", + "proxy-authorization", + "te", + "trailers", + "transfer-encoding", + "upgrade", +}) + +log = logging.getLogger("pipelock-bridge") + + +def is_pipelock_block(status: int, body_bytes: bytes) -> bool: + """Return True iff pipelock's response indicates the proxy itself + blocked (DLP / allowlist), distinguishing from a relayed upstream + 4xx that pipelock happened to forward back. + + Pipelock's block bodies are plain text starting with + `blocked: ` and the status is always 403. A relayed + upstream response has whatever body the upstream sent — + extremely unlikely to begin with `blocked: `. Pinned empirically + against pipelock v2.3.0 in the impl spike (DLP block: + "blocked: request body contains secret: GitHub Token"; + allowlist block: "blocked: domain not in allowlist: example.com"). + + Long-term cleanup: file an upstream feature request for an + `X-Pipelock-Verdict: block` response header so we can match on a + structured signal instead of pattern-matching the body.""" + return status == 403 and body_bytes.startswith(b"blocked: ") + + +def _scan_via_pipelock( + pipelock_url: str, + method: str, + target_url: str, + headers: dict[str, str], + body: bytes, +) -> tuple[int, bytes]: + """Forward the decrypted request to pipelock as a plain HTTP + forward-proxy call. Returns (status, body_bytes). Raises on + transport-level errors so the caller can fail closed. + + The target URL is rewritten to http:// so pipelock receives an + absolute-URI forward-proxy request shape. Pipelock will scan, + then may attempt an upstream forward over plain HTTP — that + response is read back too, but the addon discards it on allow + (mitmproxy makes the real HTTPS request itself).""" + rewritten_url = target_url + if rewritten_url.startswith("https://"): + rewritten_url = "http://" + rewritten_url[len("https://"):] + + forwarded_headers = { + k: v for k, v in headers.items() + if k.lower() not in _HOP_BY_HOP + } + + proxy_handler = urllib.request.ProxyHandler({"http": pipelock_url}) + opener = urllib.request.build_opener(proxy_handler) + req = urllib.request.Request( + url=rewritten_url, + data=body if body else None, + headers=forwarded_headers, + method=method, + ) + try: + resp = opener.open(req, timeout=PIPELOCK_TIMEOUT_SEC) + return resp.status, resp.read() + except urllib.error.HTTPError as e: + return e.code, e.read() + + +class PipelockBridge: + """mitmproxy addon class. mitmproxy instantiates one of these via + the `addons = [...]` module attribute at the bottom of this file.""" + + def __init__(self) -> None: + # Read once per sidecar lifetime. Empty string is allowed at + # construction (so the module can be imported in test + # environments) but the request handler fails closed if it's + # missing at request time. + self._pipelock_url = os.environ.get(PIPELOCK_URL_ENV, "") + + def request(self, flow) -> None: + """mitmproxy callback. Called for each decrypted client + request before mitmproxy forwards to the real upstream. + Setting flow.response short-circuits the flow with that + response; leaving it None lets mitmproxy proceed.""" + # Late import so this module can be loaded in test + # environments without mitmproxy installed (the verdict + # function is unit-testable in isolation). + from mitmproxy import http + + if not self._pipelock_url: + log.error("%s is unset; failing closed", PIPELOCK_URL_ENV) + flow.response = http.Response.make( + 503, + b"egress scanner not configured", + {"Content-Type": "text/plain", + "X-Pipelock-Bridge": "misconfigured"}, + ) + return + + target_url = flow.request.pretty_url + method = flow.request.method + headers = {k: v for k, v in flow.request.headers.items()} + body = bytes(flow.request.content or b"") + + try: + status, response_body = _scan_via_pipelock( + self._pipelock_url, method, target_url, headers, body, + ) + except Exception as e: + # Fail closed: scanner unreachable means no verdict, so + # refuse rather than risk leaking. + log.warning("pipelock unreachable; failing closed: %s", e) + flow.response = http.Response.make( + 503, + b"egress scanner unreachable", + {"Content-Type": "text/plain", + "X-Pipelock-Bridge": "error"}, + ) + return + + if is_pipelock_block(status, response_body): + flow.response = http.Response.make( + status, + response_body, + {"Content-Type": "text/plain", + "X-Pipelock-Bridge": "block"}, + ) + return + + # Allow path: discard pipelock's response (it's the wasted + # upstream-forward attempt). Leave flow.response as None; + # mitmproxy proceeds to the real upstream on its own. + + +addons = [PipelockBridge()] diff --git a/docs/prds/0005-mitmproxy-tls-interception.md b/docs/prds/0005-mitmproxy-tls-interception.md new file mode 100644 index 0000000..89f2a56 --- /dev/null +++ b/docs/prds/0005-mitmproxy-tls-interception.md @@ -0,0 +1,437 @@ +# PRD 0005: mitmproxy TLS interception for pipelock content scanning + +- **Status:** Draft (updated 2026-05-12 after open-question walkthrough) +- **Author:** didericis +- **Created:** 2026-05-12 + +## Summary + +Add a per-bottle **mitmproxy** sidecar in front of pipelock on the +egress path. mitmproxy bumps the agent's TLS CONNECT, decrypts the +inner HTTP, and hands each request to a vendored Python addon. The +addon forwards the decrypted request to pipelock as a plain HTTP +forward-proxy call so pipelock's DLP, URL-scan, and header-scan +layers fire on real bodies. On the verdict, the addon either +short-circuits the flow with a 403 (block) or lets mitmproxy +proceed to the real upstream (allow). mitmproxy itself generates +the ephemeral per-bottle CA on startup; the public cert is copied +into the agent's trust store and the private key dies with the +sidecar on teardown. + +This is Topology A' from `docs/research/tls-mitm-for-pipelock.md` — +a variant of the research note's Topology A after a spike showed +mitmproxy's `upstream` mode re-wraps decrypted flows in a new +CONNECT to the upstream proxy (which would defeat the entire +point). The addon recovers the design by emitting plain HTTP to +pipelock explicitly instead of relying on mitmproxy's `upstream` +chaining. + +## Problem + +PRD 0001 wired pipelock onto every bottle's egress, but the current +topology only sees `CONNECT` hostnames and opaque TLS bytes: + +``` +agent --HTTPS_PROXY--> pipelock --CONNECT host:443--> internet + \____________________________ + opaque TLS bytes +``` + +What pipelock cannot scan in this mode is documented in +`docs/research/tls-mitm-for-pipelock.md` §What pipelock cannot see +today: request URLs and methods, request and response headers, +request and response bodies, MCP JSON-RPC payloads, inner-vs-outer +hostname (the domain-fronting check), and WebSocket frames inside a +TLS-wrapped upgrade. The 48-pattern DLP layer this project relies on +in PRD 0001 is therefore inert against every host in the current +`DEFAULT_ALLOWLIST` — all of which are HTTPS-only. + +The integration test added in `tests/integration/test_pipelock_blocks_secret_post.py` +demonstrates the gap concretely: pipelock's body-scan layer only +fires when the agent is forced to send plain HTTP. Real Claude Code +traffic to `api.anthropic.com` goes over CONNECT-tunneled TLS and +slips past the scanner. + +`pipelock-assessment.md` §Scope gaps names this as a known +limitation of the proxy-without-TLS-inspection shape. Closing it is +the explicit motivation for `tls-mitm-for-pipelock.md`, whose +recommendation this PRD implements (with the addon adjustment +forced by the upstream-mode spike). + +## Goals / Success Criteria + +The feature works when all of the following are observable: + +- A Node request from inside a launched bottle to a CONNECT-bumped + HTTPS host (e.g. `https://api.anthropic.com/dlp-probe`) carrying a + pipelock-recognized credential pattern in the body returns 403 + from the bottle's egress chain — not a response from the upstream. + The existing `test_pipelock_blocks_secret_post` test path becomes + the HTTPS variant of this assertion. +- A plain HTTPS GET from inside the bottle to an allowlisted host + with no credential pattern (e.g. `GET https://raw.githubusercontent.com/...`) + returns the real upstream response — the addon doesn't break + clean traffic. +- Claude Code itself reaches `api.anthropic.com` end-to-end through + the bottle and completes a chat round-trip. No TLS-trust errors + in the agent process. +- mitmproxy's flow log and pipelock's `body_dlp` / `header_dlp` / + `core_dlp` event lines both appear for the same outbound request, + confirming the two-stage path is active. + +The feature is **done** when all of the following ship: + +- A new `MitmproxyProxy` class with the same `prepare` / `start` / + `stop` lifecycle shape as `PipelockProxy`, wired into the Docker + backend's launch step. +- A vendored Python addon at `claude_bottle/mitmproxy/addon.py` + that mitmproxy loads on startup via `mitmdump -s ...`. The sidecar + runs in `regular` mode (default), not `upstream` mode. +- The bottle launch step starts the mitmproxy sidecar, waits for + the sidecar-internal CA to be generated, copies the CA public + cert into the agent at `/usr/local/share/ca-certificates/claude-bottle-mitm.crt`, + runs `update-ca-certificates` inside the agent, and threads the + `NODE_EXTRA_CA_CERTS` / `SSL_CERT_FILE` / `REQUESTS_CA_BUNDLE` + env trio onto the agent container's runtime env. +- The agent's `HTTPS_PROXY` / `HTTP_PROXY` point at the mitmproxy + sidecar (where they pointed at pipelock under PRD 0001). +- pipelock is otherwise unchanged. It continues to load the YAML + PRD 0001 generates and runs its existing scanning pipeline; the + addon talks to it via the same forward-proxy interface today's + `test_pipelock_blocks_secret_post` uses. +- On bottle teardown the mitmproxy sidecar is removed and the + ephemeral CA private key is gone with it. +- An HTTPS variant of `test_pipelock_blocks_secret_post` proves + pipelock now blocks a credential POST over HTTPS rather than + plain HTTP. +- An integration test proves a non-credential HTTPS GET through + the chain returns the upstream's real response. +- The dry-run preflight (`start --dry-run`) shows the mitmproxy + sidecar in both text and `--format=json` output. The JSON + contract gains a reserved `egress.mitm: { "enabled": true, "ca_fingerprint": null }` + block; fingerprint is always null at dry-run because the CA + doesn't exist yet. Real launches emit a one-line stderr log: + `claude-bottle: mitm ca fingerprint: ...`. + +## Non-goals + +- **Topology C** — extending pipelock itself to terminate TLS. The + research note's recommended long-term shape, but substantial Go + work plus the Apache-2.0-vs-ELv2 question. Deferred. +- **Topology D as canonical** — mitmproxy with a pipelock `/scan` + HTTP endpoint. The addon in this PRD talks to pipelock via its + existing forward-proxy interface; no upstream pipelock change + needed. +- **Persistent or shared CA across bottles.** Each bottle gets a + fresh CA generated by its own mitmproxy at startup. +- **Selective bumping ("ignore_hosts") as a v1 manifest field.** + v1 bumps every CONNECT. If a future allowlisted host turns out + to pin (Mobile / Chromium-style cert pinning), a follow-up PRD + adds the per-host opt-out via `bottle.egress.tls_bump_ignore`. + Strictly additive. +- **HTTP/3 / QUIC.** mitmproxy's HTTP/3 support is experimental. + v1 relies on the v1-egress iptables layer blocking UDP/443 to + force clients onto HTTP/2 over TCP, which mitmproxy 12 inspects + natively (verified by spike). +- **Raw TCP / non-HTTP TLS interception.** mitmproxy supports it + via `--mode reverse:`, not in CONNECT-bump mode. SSH and any + future raw-TCP egress route around mitmproxy entirely. +- **Trust-store rewiring for non-Debian agent images.** The + current `Dockerfile` is `node:22-slim` (Debian). If a future base + switches to Red-Hat-family, the `update-ca-certificates` step + becomes `update-ca-trust`. Out of scope until the base changes. +- **Response-body scanning.** Pipelock supports it; we don't wire + it in v1 because the addon would need to ferry the upstream + response back through pipelock's scanner, which the forward- + proxy interface doesn't support cleanly. v2 candidate. +- **MCP scanning on the bumped path.** Only fires on MCP-formatted + JSON-RPC payloads inside tool calls. Not relevant to plain HTTPS + agent traffic and out of v1 scope. +- **Domain-fronting verification.** Once the addon sees the inner + `Host` / `:authority`, comparing it to the outer CONNECT target + catches domain fronting. Worth ~10 lines in the addon, but + defer until the rest of v1 is settled. +- **Host-side openssl / `cryptography` for CA generation.** The + research note's open question on this is resolved by letting + mitmproxy itself generate the CA (it does so on first launch). + No new host-side crypto. + +## Scope + +### In scope + +- New `claude_bottle/mitmproxy/` package: + - `__init__.py` — backend-agnostic. Constants (sidecar port, + image-pin digest, the in-container addon path), the abstract + `MitmproxyProxy` class with `prepare` / `start` / `stop` shape + mirroring `PipelockProxy`, and the small helper that reads the + CA fingerprint from a PEM file via `openssl x509 -fingerprint` + shelled out. + - `addon.py` — the Python addon mitmproxy loads. ~80–150 lines. + For each `request` event: forward the decrypted request to + pipelock at `http://claude-bottle-pipelock-:8888` as a + plain HTTP forward-proxy call (absolute-URI form). Inspect + pipelock's response. If status is 403 *and* the body matches + pipelock's known block-event shape, set the flow's response to + a 403 with pipelock's body and short-circuit. Otherwise, + discard pipelock's response (and any wasted upstream-leg + response from pipelock's forwarder) and let mitmproxy proceed + to the real upstream. +- New `claude_bottle/backend/docker/mitmproxy.py` — + `DockerMitmproxyProxy(MitmproxyProxy)` with the Docker-specific + start/stop lifecycle. `start(plan)` does `docker create` / + `docker cp addon.py …` / `docker network connect` / `docker start`, + analogous to the existing `DockerPipelockProxy.start`. Injects + `CLAUDE_BOTTLE_PIPELOCK_URL` into the sidecar env so the addon + knows where pipelock lives. +- New provisioner `claude_bottle/backend/docker/provision/ca.py`. + Polls mitmproxy for the cert file, copies it through a host + stage dir into the agent, runs `update-ca-certificates` inside + the agent, computes the SHA-256 fingerprint, and prints the + one-line stderr log. +- `BottleBackend.provision_ca(plan, target)` joins the four + existing provisioner methods on the abstract base. Default impl + is no-op so other backends don't break when they don't yet + implement TLS interception. +- `DockerBottlePlan` grows a `mitmproxy_plan` field mirroring the + existing `proxy_plan`. +- Agent container `docker run` invocation: + - `HTTPS_PROXY` / `HTTP_PROXY` change from the pipelock service + name to the mitmproxy service name. + - Three `-e` flags set the CA env trio so they're inherited by + the eventual `docker exec claude` (Docker propagates run-time + env into exec by default; fallback in Q1 below). +- Dry-run preflight rendering of the mitmproxy entry (text + JSON). + JSON gains `egress.mitm: { "enabled": true, "ca_fingerprint": null }`. +- One stderr log line at launch with the CA fingerprint. +- Two new integration tests under `tests/integration/`: + - `test_mitmproxy_blocks_secret_https_post.py` — HTTPS variant + of the existing block-secret test. Asserts pipelock's body + DLP fires on a credential POST tunneled through CONNECT. + - `test_mitmproxy_allows_normal_https.py` — confirms a plain + HTTPS GET on an allowlisted host returns the upstream response, + isolating the addon's pass-through path from the block path. +- Unit tests for the addon's verdict logic (block vs allow on + status + body shape, edge cases) using mitmproxy's `mitmproxy.test` + flow fixtures. Unit tests for the proxy config builder + (mirroring `tests/unit/test_pipelock_yaml.py`). + +### Out of scope + +- The v1 iptables + dnsmasq layer (separate PRD; see + `network-egress-guard.md`). mitmproxy covers HTTP/HTTPS only; + raw TCP, UDP, ICMP, and direct DNS still need the IP-level layer. +- Pipelock config changes. Pipelock continues to load the YAML + PRD 0001 generates; the addon talks to it via the existing + forward-proxy interface. +- A bottle-level toggle to skip mitmproxy entirely. v1 always + wires it in. +- Pinning-host detection automation. The cost of finding out (per + research) is a single 5-minute test before adding a host; it + stays a manual step. +- Pipelock upstream contributions for an `X-Pipelock-Verdict` header. + Possible follow-up. Until then the addon distinguishes blocks + from passes via status + body fingerprint. + +## Proposed Design + +### Topology + +``` +agent --HTTPS_PROXY--> mitmproxy --addon--> pipelock (scan) + (bump TLS) | + ^ | (verdict via status code) + | v + +-- on allow ----- real upstream + (mitmproxy as client) +``` + +All three containers live on the same per-bottle internal Docker +network. mitmproxy and pipelock are both attached to the per-bottle +egress bridge for real-internet reach; the agent has no default +route. + +Concretely: + +- Agent sets `HTTPS_PROXY=http://claude-bottle-mitm-:`. + PRD 0001 had this pointing at pipelock; the hostname swap is the + only agent-side env change. +- mitmproxy runs in **`regular`** mode (default; no `--mode` flag). + It bumps every CONNECT, generates fake leaf certs signed by its + own CA, and presents them to the agent. +- The addon, loaded via `mitmdump -s /addon/addon.py`, intercepts + each decrypted `request` event. It forwards the request to + pipelock at `http://claude-bottle-pipelock-:8888` as a + plain HTTP forward-proxy call (absolute-URI form), so pipelock + sees the full URL, headers, and body. +- The addon inspects pipelock's response. If status is 403 *and* + the response body matches pipelock's known block-event shape, + the addon sets the mitmproxy flow's response to a 403 with + pipelock's body and short-circuits. Otherwise — including the + case where pipelock's forwarder attempted the upstream and got + a 4xx — the addon discards pipelock's response and lets + mitmproxy proceed to the real upstream. +- mitmproxy completes the outbound TLS to the real destination + using its built-in trust store, just like any other forward + proxy. Pipelock is only involved as a scanner. + +The trade-off: pipelock makes a wasted upstream forward attempt +for every allowed request (it tries to forward over plain HTTP to +a real HTTPS-only host, which fails with the upstream's 4xx). This +is benign — the scan completes before forwarding, the verdict +reaches the addon, the upstream-side request happens to die in +pipelock's forwarder rather than reach the agent. Acceptable cost +for the visibility win. A pipelock-side improvement (skip the +forward when the addon only needs the scan verdict) is a future +optimization. + +### New components + +- `claude_bottle/mitmproxy/__init__.py` — backend-agnostic + abstract base, constants, the `openssl x509 -fingerprint` helper. +- `claude_bottle/mitmproxy/addon.py` — the scanning addon. + Reads pipelock's URL from `CLAUDE_BOTTLE_PIPELOCK_URL` (injected + into the sidecar env by the proxy's `start`). For each + `request` flow: synchronously POST to pipelock; inspect status + + body; either short-circuit with 403 or fall through. +- `claude_bottle/backend/docker/mitmproxy.py` — + `DockerMitmproxyProxy(MitmproxyProxy)` with start/stop, the + `docker cp` of the addon into the sidecar before `docker start`, + and the `CLAUDE_BOTTLE_PIPELOCK_URL` wiring. + +### CA lifecycle + +Simplified by letting mitmproxy own the generation: + +- **Generation.** mitmproxy generates a fresh CA on startup + inside its container at `/home/mitmproxy/.mitmproxy/mitmproxy-ca-cert.pem` + (public) + `mitmproxy-ca.pem` (private). No host-side openssl + for *generation*; no host-side Python `cryptography` dep. +- **Volume strategy.** Container-internal only. No host bind + mount means the CA dies with the container. +- **Extraction.** `provision_ca` polls (~1s) for the cert file + via `docker exec`, then `docker cp` to host stage dir, then + `docker cp` into the agent. Host stage dir gets cleaned up by + the existing `start.py` `finally` block. +- **Bottle install.** + 1. `docker cp /mitm-ca.crt agent-:/usr/local/share/ca-certificates/claude-bottle-mitm.crt` + 2. `docker exec -u 0 agent- chmod 644 …` + 3. `docker exec -u 0 agent- update-ca-certificates` + 4. Three `-e` flags on `docker run` set the env trio + (`NODE_EXTRA_CA_CERTS=…/claude-bottle-mitm.crt`, + `SSL_CERT_FILE=/etc/ssl/certs/ca-certificates.crt`, + `REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt`) so + `docker exec claude` inherits them. +- **Teardown.** Sidecar container removed; CA private key gone. +- **Fingerprint.** Computed post-extraction via shelled-out + `openssl x509 -fingerprint -sha256 -noout`. Logged once to + stderr at launch; never the private key. + +### Data model changes + +None to the manifest schema. The dry-run JSON contract gains a +reserved `egress.mitm: { "enabled": true, "ca_fingerprint": null }` +block. Fingerprint is always null at dry-run (CA doesn't exist +yet) but the field is reserved so future schema additions stay +non-breaking. + +A future selective-bump knob would add +`bottle.egress.tls_bump_ignore: [host, ...]` per the research +note. Strictly additive when it lands. + +### Existing code touched + +- **`claude_bottle/backend/docker/launch.py`** — bring up the + mitmproxy sidecar between pipelock and the agent. Repoint the + agent's `HTTPS_PROXY` / `HTTP_PROXY` env flags to mitmproxy. + Register an `ExitStack` callback for mitmproxy teardown. Print + the CA fingerprint once the sidecar reports ready. +- **`claude_bottle/backend/docker/prepare.py`** — call into + `MitmproxyProxy.prepare(...)` alongside `PipelockProxy.prepare(...)`, + populate `DockerBottlePlan.mitmproxy_plan`. +- **`claude_bottle/backend/docker/backend.py`** — add the + `DockerMitmproxyProxy` instance attribute (`self._mitm`) and + thread it through `launch` + cleanup, mirroring `self._proxy`. +- **`claude_bottle/backend/docker/bottle_plan.py`** — new + `mitmproxy_plan` field. `print()` and `to_dict()` learn to + render the mitmproxy entry and the `egress.mitm` JSON block. +- **`claude_bottle/backend/__init__.py`** — abstract + `BottleBackend.provision_ca` joins the four existing + provisioners; default no-op. +- **`tests/integration/`** — two new tests as described above. +- **`tests/unit/`** — addon-verdict tests, mitmproxy-config + builder tests, dry-run-plan test updated for the new + `egress.mitm` block. + +### External dependencies + +- **mitmproxy Docker image** pinned by digest on the `12.x` line. + Bumped deliberately, mirroring the pipelock pin. Verified by + spike to speak h2 on both halves. +- No new host-side runtimes. mitmproxy generates the CA; + fingerprint via the `openssl` already present on Debian / macOS + / ubuntu-latest runners. + +## Open questions + +(rewritten — most of the original v1 questions are now closed by +the walkthrough spikes; what remains is addon-implementation +specifics worth pinning during the first impl turn.) + +- **Pipelock's 403-body fingerprint.** The addon needs to + distinguish a pipelock block (DLP / host) from a real-upstream + 4xx that pipelock's forwarder relayed back. Most likely shape: + pipelock's 403 response carries a JSON body with `event` / + `scanner` fields, whereas a real-upstream 4xx carries whatever + the upstream sent. Pin the exact fingerprint by inspecting + pipelock's actual 403 body bytes at impl time. Long-term + cleanup: file an upstream feature request for an + `X-Pipelock-Verdict: block` response header so the addon can + read a structured signal instead of pattern-matching the body. +- **Docker run env-var inheritance through docker exec.** Plan + assumes `docker run -e VAR=value` propagates to subsequent + `docker exec` invocations. The Docker docs say so; not yet + empirically pinned on this project's runner setup. Verify in + the first impl turn. Trivial fallback: thread the three `-e` + flags onto every `DockerBottle.exec*` call. +- **Addon synchronous-call latency.** The addon makes a sync HTTP + call to pipelock per outbound flow. Pipelock is on the same + internal Docker network; expected per-call latency is well + under 10ms. Confirm under the parallel-request load Claude Code + generates (most likely a non-issue — Claude is single-stream + request-wise). +- **Addon test fixtures.** mitmproxy ships `mitmproxy.test` with + flow fixtures; addons can be unit-tested without a running + proxy. Confirm the import path and recommended fixture shape at + impl time; structure the addon so the verdict-decision is a + pure function that's trivially testable in isolation from any + HTTP I/O. +- **Pipelock allowing the addon's forwarded request through.** + pipelock will see the addon's request as coming from the + mitmproxy sidecar's IP on the internal network. Confirm + pipelock has no client-IP allowlist that would reject these. + Likely fine — pipelock's `client_ip` is informational in the + scan event, not a gate. + +## References + +- `docs/research/tls-mitm-for-pipelock.md` — primary source. This + PRD implements a variant of §Recommendation (Topology A) after + the spike documented under "Open questions" §1 falsified the + `upstream` mode assumption. +- `docs/research/pipelock-assessment.md` §Scope gaps — names the + TLS-inspection gap closed here. +- `docs/prds/0001-per-agent-egress-proxy-via-pipelock.md` — + egress-proxy baseline this PRD extends. +- `docs/prds/0003-bottle-backend-abstraction.md` — backend ABC + contract this PRD adds a `provision_ca` method to. +- `docs/prds/0004-split-out-provisioners.md` — per-provisioner + module pattern reused for the new CA provisioner. +- mitmproxy: , + +- mitmproxy modes: +- mitmproxy CA cert installation: + +- mitmproxy addon API: +- Node `NODE_EXTRA_CA_CERTS`: + diff --git a/tests/integration/test_dry_run_plan.py b/tests/integration/test_dry_run_plan.py index c0ae3eb..45c4564 100644 --- a/tests/integration/test_dry_run_plan.py +++ b/tests/integration/test_dry_run_plan.py @@ -92,6 +92,12 @@ class TestDryRunPlan(unittest.TestCase): self.assertEqual(sorted(set(hosts)), hosts, "hosts must be sorted and deduplicated") + # PRD 0005: TLS interception block is part of the JSON + # contract. Fingerprint is null at dry-run (CA doesn't + # exist yet); real launches print it to stderr. + self.assertEqual({"enabled": True, "ca_fingerprint": None}, + plan["egress"]["mitm"]) + # No Docker side effects (see the GITEA_ACTIONS skip note # above — this guard runs locally only). if check_side_effects: diff --git a/tests/integration/test_mitmproxy_allows_normal_https.py b/tests/integration/test_mitmproxy_allows_normal_https.py new file mode 100644 index 0000000..35fc631 --- /dev/null +++ b/tests/integration/test_mitmproxy_allows_normal_https.py @@ -0,0 +1,167 @@ +"""Integration: with mitmproxy in front of pipelock, a plain HTTPS +GET to an allowlisted host with no credential pattern still gets +through end-to-end. + +The complement to test_mitmproxy_blocks_secret_https_post — together +they isolate the addon's two paths (block vs. allow). This test +also functions as the end-to-end TLS-trust check: if the agent's +trust store didn't have mitmproxy's CA installed, the TLS handshake +between the agent and mitmproxy's bumped cert would fail and the +fetch would throw before we ever saw a response. +""" + +from __future__ import annotations + +import os +import shutil +import tempfile +import unittest +from pathlib import Path + +from claude_bottle.backend import BottleSpec, get_bottle_backend +from tests._docker import skip_unless_docker +from tests.fixtures import fixture_minimal + + +# raw.githubusercontent.com is in the baked-in DEFAULT_ALLOWLIST. +# Pick a file path that's stable enough across runs — `git`'s own +# README.md on the master branch is a long-lived artifact and one +# of github's most-trafficked raw files. +_TARGET_URL = "https://raw.githubusercontent.com/git/git/master/README.md" + +# stdlib http (for CONNECT) + tls (for the bumped tunnel); see the +# block test for the rationale on not pulling undici in as a dep. +# +# Output contract: +# - "status=" HTTP status from upstream (or addon, if +# blocked) +# - "bridge=" X-Pipelock-Bridge header; empty on allow +# - "len=" response body length, sanity-check it's a +# real response and not an empty proxy stub +# - "error=<...>" thrown error +_PROBE_JS = r""" +const http = require('http'); +const tls = require('tls'); + +const proxy = new URL(process.env.HTTPS_PROXY); + +const connectReq = http.request({ + host: proxy.hostname, + port: proxy.port, + method: 'CONNECT', + path: 'raw.githubusercontent.com:443', +}); +connectReq.setTimeout(10000, () => { + console.log('timeout=connect'); + connectReq.destroy(); +}); +connectReq.on('error', (e) => { + console.log('error=' + (e.code || '') + ' ' + e.message); +}); +connectReq.on('connect', (res, socket) => { + if (res.statusCode !== 200) { + console.log('status=' + res.statusCode); + console.log('bridge=' + (res.headers['x-pipelock-bridge'] || '')); + return; + } + const tlsSocket = tls.connect({ + socket: socket, + servername: 'raw.githubusercontent.com', + }); + tlsSocket.on('secureConnect', () => { + tlsSocket.write( + 'GET /git/git/master/README.md HTTP/1.1\r\n' + + 'Host: raw.githubusercontent.com\r\n' + + 'User-Agent: claude-bottle-mitm-test\r\n' + + 'Accept: */*\r\n' + + 'Connection: close\r\n' + + '\r\n' + ); + }); + let buf = Buffer.alloc(0); + tlsSocket.on('data', (c) => { buf = Buffer.concat([buf, c]); }); + tlsSocket.on('end', () => { + const text = buf.toString('utf8'); + const headersEnd = text.indexOf('\r\n\r\n'); + const head = headersEnd >= 0 ? text.slice(0, headersEnd) : text; + const body = headersEnd >= 0 ? text.slice(headersEnd + 4) : ''; + const lines = head.split('\r\n'); + const m = lines[0].match(/HTTP\/[\d.]+ (\d+)/); + let bridge = ''; + for (let i = 1; i < lines.length; i++) { + const ix = lines[i].indexOf(': '); + if (ix < 0) continue; + if (lines[i].slice(0, ix).toLowerCase() === 'x-pipelock-bridge') { + bridge = lines[i].slice(ix + 2); + } + } + console.log('status=' + (m ? m[1] : '?')); + console.log('bridge=' + bridge); + console.log('len=' + body.length); + }); + tlsSocket.on('error', (e) => { + console.log('tls_error=' + (e.code || '') + ' ' + e.message); + }); +}); +connectReq.end(); +""" + + +@skip_unless_docker() +class TestMitmproxyAllowsNormalHttps(unittest.TestCase): + @unittest.skipIf( + os.environ.get("GITEA_ACTIONS") == "true", + "skipped under act_runner: docker socket mount topology breaks " + "in-process visibility of networks created on the host daemon", + ) + def test_https_get_to_allowed_host_succeeds(self): + backend = get_bottle_backend() + stage_dir = Path(tempfile.mkdtemp(prefix="cb-test-stage.")) + try: + spec = BottleSpec( + manifest=fixture_minimal(), + agent_name="demo", + copy_cwd=False, + user_cwd=str(stage_dir), + forward_oauth_token=False, + ) + plan = backend.prepare(spec, stage_dir=stage_dir) + with backend.launch(plan) as bottle: + script = ( + "set -e\n" + "cat > /tmp/probe.js <<'PROBE_EOF'\n" + f"{_PROBE_JS}\n" + "PROBE_EOF\n" + "node /tmp/probe.js\n" + ) + result = bottle.exec(script) + finally: + shutil.rmtree(stage_dir, ignore_errors=True) + + self.assertEqual( + 0, result.returncode, + f"exec wrapper failed: stdout={result.stdout!r} stderr={result.stderr!r}", + ) + # The TLS-trust setup is implicit here — if it had failed, + # fetch would have thrown rather than returned a status. + self.assertIn( + "status=200", result.stdout, + f"expected 200 from raw.githubusercontent.com; got: {result.stdout!r}", + ) + # X-Pipelock-Bridge is set only on the addon's short-circuit + # paths (block / misconfigured / scanner-unreachable). An + # allow flow goes straight through mitmproxy to upstream and + # the header should be absent. + self.assertIn( + "bridge=\n", result.stdout, + f"X-Pipelock-Bridge unexpectedly present on the allow " + f"path: {result.stdout!r}", + ) + # Sanity: the README is many KB. An empty body would suggest + # the response was synthesized by something in the chain + # rather than fetched from github. + self.assertNotIn("len=0\n", result.stdout) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/integration/test_mitmproxy_blocks_secret_https_post.py b/tests/integration/test_mitmproxy_blocks_secret_https_post.py new file mode 100644 index 0000000..c6393df --- /dev/null +++ b/tests/integration/test_mitmproxy_blocks_secret_https_post.py @@ -0,0 +1,172 @@ +"""Integration: with mitmproxy in front of pipelock, a credential +POST sent over HTTPS is now blocked by pipelock's body-scan layer. + +This is the HTTPS variant of test_pipelock_blocks_secret_post — the +two together prove the TLS-interception layer is doing the work the +PRD targets. The earlier plain-HTTP test only fired because the agent +was forced to bypass TLS; real Claude Code traffic to api.anthropic.com +goes over CONNECT-tunneled HTTPS and would have slipped past pipelock +prior to this PRD. + +End-to-end: drives `BottleBackend.prepare → launch` so the real +image build, network plumbing, pipelock sidecar, mitmproxy sidecar, +ephemeral CA generation, and trust-store install are all in the +loop. +""" + +from __future__ import annotations + +import os +import shutil +import tempfile +import unittest +from pathlib import Path + +from claude_bottle.backend import BottleSpec, get_bottle_backend +from claude_bottle.manifest import Manifest +from tests._docker import skip_unless_docker + + +# Synthetic value shaped like a GitHub Personal Access Token; not a +# real credential. Pipelock's default DLP rules pattern-match this +# format and mitmproxy's addon short-circuits with the 403 it +# receives back. +_FAKE_TOKEN = "ghp_aB3cD4eF5gH6iJ7kL8mN9oP0qR1sT2uV3wX4yZ" + + +# Build the request by hand using stdlib `http` (for CONNECT) and +# `tls` (for the bumped tunnel). Node 22's `fetch` doesn't expose +# proxy configuration without undici as an installable dep, and +# this project keeps the bottle image dep-light. NODE_EXTRA_CA_CERTS +# is wired by launch.py so the agent trusts mitmproxy's bumped cert. +# +# Output contract (parsed by the test): +# - "status=" HTTP status of the decrypted response +# - "bridge=" X-Pipelock-Bridge header from the addon's +# short-circuit, empty on the allow path +# - "error=<...>" thrown error +_PROBE_JS = r""" +const http = require('http'); +const tls = require('tls'); + +const proxy = new URL(process.env.HTTPS_PROXY); +const body = 'token=' + process.env.FAKE_TOKEN; + +const connectReq = http.request({ + host: proxy.hostname, + port: proxy.port, + method: 'CONNECT', + path: 'api.anthropic.com:443', +}); +connectReq.setTimeout(8000, () => { + console.log('timeout=connect'); + connectReq.destroy(); +}); +connectReq.on('error', (e) => { + console.log('error=' + (e.code || '') + ' ' + e.message); +}); +connectReq.on('connect', (res, socket) => { + if (res.statusCode !== 200) { + console.log('status=' + res.statusCode); + console.log('bridge=' + (res.headers['x-pipelock-bridge'] || '')); + return; + } + const tlsSocket = tls.connect({ + socket: socket, + servername: 'api.anthropic.com', + }); + tlsSocket.on('secureConnect', () => { + tlsSocket.write( + 'POST /dlp-probe HTTP/1.1\r\n' + + 'Host: api.anthropic.com\r\n' + + 'Content-Type: application/x-www-form-urlencoded\r\n' + + 'Content-Length: ' + Buffer.byteLength(body) + '\r\n' + + 'Connection: close\r\n' + + '\r\n' + body + ); + }); + let buf = ''; + tlsSocket.on('data', (c) => { buf += c.toString('utf8'); }); + tlsSocket.on('end', () => { + const lines = buf.split('\r\n'); + const m = lines[0].match(/HTTP\/[\d.]+ (\d+)/); + let bridge = ''; + for (let i = 1; i < lines.length; i++) { + if (lines[i] === '') break; + const ix = lines[i].indexOf(': '); + if (ix < 0) continue; + if (lines[i].slice(0, ix).toLowerCase() === 'x-pipelock-bridge') { + bridge = lines[i].slice(ix + 2); + } + } + console.log('status=' + (m ? m[1] : '?')); + console.log('bridge=' + bridge); + }); + tlsSocket.on('error', (e) => { + console.log('tls_error=' + (e.code || '') + ' ' + e.message); + }); +}); +connectReq.end(); +""" + + +@skip_unless_docker() +class TestMitmproxyBlocksSecretHttpsPost(unittest.TestCase): + @unittest.skipIf( + os.environ.get("GITEA_ACTIONS") == "true", + "skipped under act_runner: docker socket mount topology breaks " + "in-process visibility of networks created on the host daemon", + ) + def test_https_post_with_credential_body_is_blocked(self): + manifest = Manifest.from_json_obj({ + "bottles": { + "dev": {"env": {"FAKE_TOKEN": _FAKE_TOKEN}}, + }, + "agents": { + "demo": {"skills": [], "prompt": "", "bottle": "dev"}, + }, + }) + backend = get_bottle_backend() + stage_dir = Path(tempfile.mkdtemp(prefix="cb-test-stage.")) + try: + spec = BottleSpec( + manifest=manifest, + agent_name="demo", + copy_cwd=False, + user_cwd=str(stage_dir), + forward_oauth_token=False, + ) + plan = backend.prepare(spec, stage_dir=stage_dir) + with backend.launch(plan) as bottle: + script = ( + "set -e\n" + "cat > /tmp/probe.js <<'PROBE_EOF'\n" + f"{_PROBE_JS}\n" + "PROBE_EOF\n" + "node /tmp/probe.js\n" + ) + result = bottle.exec(script) + finally: + shutil.rmtree(stage_dir, ignore_errors=True) + + self.assertEqual( + 0, result.returncode, + f"exec wrapper failed: stdout={result.stdout!r} stderr={result.stderr!r}", + ) + # The addon short-circuits the flow with X-Pipelock-Bridge: block + # on a pipelock block — the cleanest signal that the chain + # mitmproxy(bump) -> addon(forward) -> pipelock(scan) -> block + # all happened, end to end. + self.assertIn( + "status=403", result.stdout, + f"expected 403 from pipelock block; got: {result.stdout!r}", + ) + self.assertIn( + "bridge=block", result.stdout, + f"X-Pipelock-Bridge header missing; the addon may not be " + f"in path: {result.stdout!r}", + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/unit/test_mitmproxy_verdict.py b/tests/unit/test_mitmproxy_verdict.py new file mode 100644 index 0000000..fcba0cf --- /dev/null +++ b/tests/unit/test_mitmproxy_verdict.py @@ -0,0 +1,62 @@ +"""Unit: the addon's verdict function pinning pipelock-block vs. +relayed-upstream 4xx. + +The fingerprint shape is the contract the addon depends on; this +test should break loudly if pipelock changes its 403-body prefix +under a version bump.""" + +from __future__ import annotations + +import unittest + +from claude_bottle.mitmproxy.addon import is_pipelock_block + + +class TestIsPipelockBlock(unittest.TestCase): + def test_block_dlp_body(self): + # Pipelock v2.3.0 DLP block, captured in the impl spike. + self.assertTrue(is_pipelock_block( + 403, + b"blocked: request body contains secret: GitHub Token", + )) + + def test_block_allowlist_body(self): + # Pipelock v2.3.0 allowlist block, captured in the impl spike. + self.assertTrue(is_pipelock_block( + 403, + b"blocked: domain not in allowlist: example.com", + )) + + def test_block_header_dlp_body(self): + # Header DLP path; same body prefix per the spike. + self.assertTrue(is_pipelock_block( + 403, + b"blocked: request header Authorization contains secret", + )) + + def test_403_without_blocked_prefix_is_not_a_block(self): + # A real-upstream 403 relayed by pipelock — body is whatever + # the upstream sent, almost certainly not starting with + # `blocked: `. Must be treated as allow so the addon hands + # the flow back to mitmproxy. + self.assertFalse(is_pipelock_block( + 403, + b'{"error":"forbidden","detail":"insufficient permissions"}', + )) + + def test_non_403_with_blocked_prefix_is_not_a_block(self): + # Defensive: if some intermediate ever returns 502/504 with + # a body that happens to begin `blocked: `, we should still + # not short-circuit. Block status is always 403 by contract. + self.assertFalse(is_pipelock_block(502, b"blocked: ...")) + + def test_200_is_not_a_block(self): + # Allow path, normal forwarded response. + self.assertFalse(is_pipelock_block(200, b'{"ok":true}')) + + def test_empty_body_is_not_a_block(self): + self.assertFalse(is_pipelock_block(403, b"")) + + +if __name__ == "__main__": + unittest.main()