diff --git a/claude_bottle/backend/docker/mitmproxy.py b/claude_bottle/backend/docker/mitmproxy.py new file mode 100644 index 0000000..9911bbf --- /dev/null +++ b/claude_bottle/backend/docker/mitmproxy.py @@ -0,0 +1,178 @@ +"""DockerMitmproxyProxy — the Docker-specific lifecycle for the +mitmproxy sidecar. Inherits the addon-bundling from MitmproxyProxy. + +The sidecar runs `mitmdump -s /addon/addon.py`, listens on +MITMPROXY_PORT inside the per-bottle internal network, and generates +its own ephemeral CA on first launch (extracted by provision_ca, +installed into the agent's trust store).""" + +from __future__ import annotations + +import os +import subprocess +import time +from pathlib import Path + +from ...log import die, info, warn +from ...mitmproxy import MitmproxyProxy, MitmproxyProxyPlan + + +# mitmproxy/mitmproxy:12.2.3 (mitmproxy v12 release line). The digest +# is the multi-arch image index — pulls resolve to the right per-arch +# child digest. Bumped deliberately; see PRD 0005. +MITMPROXY_IMAGE = os.environ.get( + "CLAUDE_BOTTLE_MITMPROXY_IMAGE", + "mitmproxy/mitmproxy@sha256:00b77b5d8804c8ad18cb6caefbf9d5849e895e8986c5ce011f4ae30f4385962f", +) + +# Listening port for mitmproxy's forward proxy (agent-facing). +MITMPROXY_PORT = os.environ.get("CLAUDE_BOTTLE_MITMPROXY_PORT", "8080") + +# Path inside the sidecar where the addon is dropped by docker cp. +MITMPROXY_ADDON_PATH = "/addon/addon.py" + +# Path inside the sidecar where mitmproxy generates its CA. +_CA_PATH_IN_SIDECAR = "/home/mitmproxy/.mitmproxy/mitmproxy-ca-cert.pem" + + +def mitmproxy_container_name(slug: str) -> str: + return f"claude-bottle-mitm-{slug}" + + +def mitmproxy_proxy_url(slug: str) -> str: + return f"http://{mitmproxy_container_name(slug)}:{MITMPROXY_PORT}" + + +class DockerMitmproxyProxy(MitmproxyProxy): + """Brings the mitmproxy sidecar up and down via Docker.""" + + def start(self, plan: MitmproxyProxyPlan, *, pipelock_url: str) -> str: + """Boot the mitmproxy sidecar: + 1. `docker create` on the internal network with mitmdump + argv: `--listen-port -s ` plus the + pipelock URL injected as an env var. + 2. `docker cp` the vendored addon to the sidecar. + 3. Attach to the per-agent egress network so mitmproxy + can reach real upstreams. + 4. `docker start`. + Returns the container name (the proxy_target passed to .stop + and .extract_ca_cert).""" + name = mitmproxy_container_name(plan.slug) + if not plan.addon_src.is_file(): + die(f"mitmproxy addon not found at {plan.addon_src}") + + info(f"starting mitmproxy sidecar {name} on network {plan.internal_network}") + + create_args = [ + "docker", "create", + "--name", name, + "--network", plan.internal_network, + "-e", f"CLAUDE_BOTTLE_PIPELOCK_URL={pipelock_url}", + MITMPROXY_IMAGE, + "mitmdump", + "--listen-port", MITMPROXY_PORT, + "-s", MITMPROXY_ADDON_PATH, + ] + if subprocess.run( + create_args, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ).returncode != 0: + die(f"failed to create mitmproxy sidecar {name}") + + cp_result = subprocess.run( + ["docker", "cp", str(plan.addon_src), f"{name}:{MITMPROXY_ADDON_PATH}"], + capture_output=True, + text=True, + check=False, + ) + if cp_result.returncode != 0: + subprocess.run( + ["docker", "rm", "-f", name], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ) + die(f"failed to copy mitmproxy addon into {name}: {cp_result.stderr.strip()}") + + if subprocess.run( + ["docker", "network", "connect", plan.egress_network, name], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ).returncode != 0: + subprocess.run( + ["docker", "rm", "-f", name], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ) + die(f"failed to attach mitmproxy sidecar {name} to egress " + f"network {plan.egress_network}") + + if subprocess.run( + ["docker", "start", name], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ).returncode != 0: + subprocess.run( + ["docker", "rm", "-f", name], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ) + die(f"failed to start mitmproxy sidecar {name}") + + return name + + def stop(self, proxy_target: str) -> None: + """Idempotent: missing container is success. Mirrors + DockerPipelockProxy.stop.""" + if subprocess.run( + ["docker", "inspect", proxy_target], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ).returncode == 0: + if subprocess.run( + ["docker", "rm", "-f", proxy_target], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ).returncode != 0: + warn( + f"failed to remove mitmproxy sidecar {proxy_target}; " + f"clean up with 'docker rm -f {proxy_target}'" + ) + + def extract_ca_cert(self, proxy_target: str, dest_path: Path) -> None: + """Poll the running sidecar for the CA cert (mitmproxy + generates it on first launch, typically <1s after start), + then `docker cp` the public half to `dest_path`. The private + key never leaves the container.""" + deadline = time.monotonic() + 15 + while time.monotonic() < deadline: + check = subprocess.run( + ["docker", "exec", proxy_target, "test", "-f", _CA_PATH_IN_SIDECAR], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ) + if check.returncode == 0: + break + time.sleep(0.5) + else: + die(f"mitmproxy CA cert did not appear at {_CA_PATH_IN_SIDECAR} " + f"after 15s — sidecar {proxy_target} may have failed to start") + + cp_result = subprocess.run( + ["docker", "cp", f"{proxy_target}:{_CA_PATH_IN_SIDECAR}", str(dest_path)], + capture_output=True, + text=True, + check=False, + ) + if cp_result.returncode != 0: + die(f"failed to extract mitmproxy CA cert from {proxy_target}: " + f"{cp_result.stderr.strip()}") diff --git a/claude_bottle/mitmproxy/__init__.py b/claude_bottle/mitmproxy/__init__.py new file mode 100644 index 0000000..74fb246 --- /dev/null +++ b/claude_bottle/mitmproxy/__init__.py @@ -0,0 +1,81 @@ +"""mitmproxy TLS-interception sidecar for the per-bottle egress +topology (PRD 0005). + +Sits in front of pipelock on the bottle's egress path so pipelock's +body / header / URL DLP scanners see plaintext for HTTPS targets. +The sidecar runs in mitmproxy's `regular` mode and loads the +vendored addon at `addon.py`; the addon forwards each decrypted +request to pipelock as a plain HTTP forward-proxy call and gates +the mitmproxy flow on pipelock's verdict. + +This module is platform-agnostic: it owns the abstract proxy +lifecycle (prepare / start / stop / extract_ca_cert). The +Docker-specific lifecycle lives in +`claude_bottle/backend/docker/mitmproxy.py`. +""" + +from __future__ import annotations + +from abc import ABC, abstractmethod +from dataclasses import dataclass +from pathlib import Path + + +@dataclass(frozen=True) +class MitmproxyProxyPlan: + """Output of MitmproxyProxy.prepare; consumed by .start when the + sidecar needs to be brought up. + + `addon_src` is the host-side path to the vendored addon.py, + resolved at prepare time. `slug` is the per-agent identifier + used as the suffix in every per-bottle resource name. The + network fields default to empty and are populated by the + backend's launch step (via dataclasses.replace) once those + networks have actually been created — same pattern as + PipelockProxyPlan.""" + + addon_src: Path + slug: str + internal_network: str = "" + egress_network: str = "" + + +class MitmproxyProxy(ABC): + """The mitmproxy TLS-interception sidecar. The proxy-config + addon + bundling are platform-agnostic; the sidecar's start/stop lifecycle + and the CA extraction step are backend-specific and live on + concrete subclasses.""" + + def prepare(self, slug: str) -> MitmproxyProxyPlan: + """Locate the vendored addon source and return the start + plan. The addon is checked into the project and identical + across bottles; per-bottle wiring (pipelock URL) is injected + via env vars at start time, not via a generated config.""" + addon_src = Path(__file__).resolve().parent / "addon.py" + if not addon_src.is_file(): + raise FileNotFoundError( + f"mitmproxy addon not found at {addon_src}; the " + f"package was installed incompletely" + ) + return MitmproxyProxyPlan(addon_src=addon_src, slug=slug) + + @abstractmethod + def start(self, plan: MitmproxyProxyPlan, *, pipelock_url: str) -> str: + """Bring up the mitmproxy sidecar according to `plan`. + `pipelock_url` is injected into the sidecar's env (as + CLAUDE_BOTTLE_PIPELOCK_URL) so the addon knows where to + scan. Returns the proxy_target string identifying the + running sidecar — the same value to pass to `.stop` and + `.extract_ca_cert`.""" + + @abstractmethod + def stop(self, proxy_target: str) -> None: + """Tear down the sidecar identified by `proxy_target`. + Idempotent: a missing target is success.""" + + @abstractmethod + def extract_ca_cert(self, proxy_target: str, dest_path: Path) -> None: + """Copy the public CA cert from the running sidecar to + `dest_path` on the host. Polls the sidecar for the cert + file to appear (mitmproxy generates the CA on first launch). + The private key never leaves the sidecar.""" diff --git a/claude_bottle/mitmproxy/addon.py b/claude_bottle/mitmproxy/addon.py new file mode 100644 index 0000000..c9c913d --- /dev/null +++ b/claude_bottle/mitmproxy/addon.py @@ -0,0 +1,169 @@ +"""mitmproxy addon: forward each decrypted request to pipelock for +scanning, then either short-circuit with pipelock's 403 (block) or +let mitmproxy proceed to the real upstream (allow). + +Loaded inside the mitmproxy sidecar container via `mitmdump -s ...`. +Must be self-contained — the sidecar image doesn't have claude_bottle +on its import path. Imports are limited to the Python stdlib plus +mitmproxy itself (which is the host). + +Pipelock's URL is read from CLAUDE_BOTTLE_PIPELOCK_URL at startup +(injected by DockerMitmproxyProxy.start). + +The verdict function `is_pipelock_block` is exported as a pure +function so unit tests can exercise it without importing mitmproxy. +""" + +from __future__ import annotations + +import logging +import os +import urllib.error +import urllib.request + + +PIPELOCK_URL_ENV = "CLAUDE_BOTTLE_PIPELOCK_URL" +PIPELOCK_TIMEOUT_SEC = 5 + +# Hop-by-hop headers per RFC 7230 §6.1; should not be forwarded +# across a proxy. Lower-cased for case-insensitive comparison. +_HOP_BY_HOP = frozenset({ + "connection", + "keep-alive", + "proxy-authenticate", + "proxy-authorization", + "te", + "trailers", + "transfer-encoding", + "upgrade", +}) + +log = logging.getLogger("pipelock-bridge") + + +def is_pipelock_block(status: int, body_bytes: bytes) -> bool: + """Return True iff pipelock's response indicates the proxy itself + blocked (DLP / allowlist), distinguishing from a relayed upstream + 4xx that pipelock happened to forward back. + + Pipelock's block bodies are plain text starting with + `blocked: ` and the status is always 403. A relayed + upstream response has whatever body the upstream sent — + extremely unlikely to begin with `blocked: `. Pinned empirically + against pipelock v2.3.0 in the impl spike (DLP block: + "blocked: request body contains secret: GitHub Token"; + allowlist block: "blocked: domain not in allowlist: example.com"). + + Long-term cleanup: file an upstream feature request for an + `X-Pipelock-Verdict: block` response header so we can match on a + structured signal instead of pattern-matching the body.""" + return status == 403 and body_bytes.startswith(b"blocked: ") + + +def _scan_via_pipelock( + pipelock_url: str, + method: str, + target_url: str, + headers: dict[str, str], + body: bytes, +) -> tuple[int, bytes]: + """Forward the decrypted request to pipelock as a plain HTTP + forward-proxy call. Returns (status, body_bytes). Raises on + transport-level errors so the caller can fail closed. + + The target URL is rewritten to http:// so pipelock receives an + absolute-URI forward-proxy request shape. Pipelock will scan, + then may attempt an upstream forward over plain HTTP — that + response is read back too, but the addon discards it on allow + (mitmproxy makes the real HTTPS request itself).""" + rewritten_url = target_url + if rewritten_url.startswith("https://"): + rewritten_url = "http://" + rewritten_url[len("https://"):] + + forwarded_headers = { + k: v for k, v in headers.items() + if k.lower() not in _HOP_BY_HOP + } + + proxy_handler = urllib.request.ProxyHandler({"http": pipelock_url}) + opener = urllib.request.build_opener(proxy_handler) + req = urllib.request.Request( + url=rewritten_url, + data=body if body else None, + headers=forwarded_headers, + method=method, + ) + try: + resp = opener.open(req, timeout=PIPELOCK_TIMEOUT_SEC) + return resp.status, resp.read() + except urllib.error.HTTPError as e: + return e.code, e.read() + + +class PipelockBridge: + """mitmproxy addon class. mitmproxy instantiates one of these via + the `addons = [...]` module attribute at the bottom of this file.""" + + def __init__(self) -> None: + # Read once per sidecar lifetime. Empty string is allowed at + # construction (so the module can be imported in test + # environments) but the request handler fails closed if it's + # missing at request time. + self._pipelock_url = os.environ.get(PIPELOCK_URL_ENV, "") + + def request(self, flow) -> None: + """mitmproxy callback. Called for each decrypted client + request before mitmproxy forwards to the real upstream. + Setting flow.response short-circuits the flow with that + response; leaving it None lets mitmproxy proceed.""" + # Late import so this module can be loaded in test + # environments without mitmproxy installed (the verdict + # function is unit-testable in isolation). + from mitmproxy import http + + if not self._pipelock_url: + log.error("%s is unset; failing closed", PIPELOCK_URL_ENV) + flow.response = http.Response.make( + 503, + b"egress scanner not configured", + {"Content-Type": "text/plain", + "X-Pipelock-Bridge": "misconfigured"}, + ) + return + + target_url = flow.request.pretty_url + method = flow.request.method + headers = {k: v for k, v in flow.request.headers.items()} + body = bytes(flow.request.content or b"") + + try: + status, response_body = _scan_via_pipelock( + self._pipelock_url, method, target_url, headers, body, + ) + except Exception as e: + # Fail closed: scanner unreachable means no verdict, so + # refuse rather than risk leaking. + log.warning("pipelock unreachable; failing closed: %s", e) + flow.response = http.Response.make( + 503, + b"egress scanner unreachable", + {"Content-Type": "text/plain", + "X-Pipelock-Bridge": "error"}, + ) + return + + if is_pipelock_block(status, response_body): + flow.response = http.Response.make( + status, + response_body, + {"Content-Type": "text/plain", + "X-Pipelock-Bridge": "block"}, + ) + return + + # Allow path: discard pipelock's response (it's the wasted + # upstream-forward attempt). Leave flow.response as None; + # mitmproxy proceeds to the real upstream on its own. + + +addons = [PipelockBridge()] diff --git a/tests/unit/test_mitmproxy_verdict.py b/tests/unit/test_mitmproxy_verdict.py new file mode 100644 index 0000000..fcba0cf --- /dev/null +++ b/tests/unit/test_mitmproxy_verdict.py @@ -0,0 +1,62 @@ +"""Unit: the addon's verdict function pinning pipelock-block vs. +relayed-upstream 4xx. + +The fingerprint shape is the contract the addon depends on; this +test should break loudly if pipelock changes its 403-body prefix +under a version bump.""" + +from __future__ import annotations + +import unittest + +from claude_bottle.mitmproxy.addon import is_pipelock_block + + +class TestIsPipelockBlock(unittest.TestCase): + def test_block_dlp_body(self): + # Pipelock v2.3.0 DLP block, captured in the impl spike. + self.assertTrue(is_pipelock_block( + 403, + b"blocked: request body contains secret: GitHub Token", + )) + + def test_block_allowlist_body(self): + # Pipelock v2.3.0 allowlist block, captured in the impl spike. + self.assertTrue(is_pipelock_block( + 403, + b"blocked: domain not in allowlist: example.com", + )) + + def test_block_header_dlp_body(self): + # Header DLP path; same body prefix per the spike. + self.assertTrue(is_pipelock_block( + 403, + b"blocked: request header Authorization contains secret", + )) + + def test_403_without_blocked_prefix_is_not_a_block(self): + # A real-upstream 403 relayed by pipelock — body is whatever + # the upstream sent, almost certainly not starting with + # `blocked: `. Must be treated as allow so the addon hands + # the flow back to mitmproxy. + self.assertFalse(is_pipelock_block( + 403, + b'{"error":"forbidden","detail":"insufficient permissions"}', + )) + + def test_non_403_with_blocked_prefix_is_not_a_block(self): + # Defensive: if some intermediate ever returns 502/504 with + # a body that happens to begin `blocked: `, we should still + # not short-circuit. Block status is always 403 by contract. + self.assertFalse(is_pipelock_block(502, b"blocked: ...")) + + def test_200_is_not_a_block(self): + # Allow path, normal forwarded response. + self.assertFalse(is_pipelock_block(200, b'{"ok":true}')) + + def test_empty_body_is_not_a_block(self): + self.assertFalse(is_pipelock_block(403, b"")) + + +if __name__ == "__main__": + unittest.main()