PRD 0005: mitmproxy TLS interception for pipelock content scanning #8

Closed
didericis wants to merge 6 commits from mitmproxy-tls-interception into main
14 changed files with 1409 additions and 14 deletions
+22 -11
View File
@@ -204,24 +204,35 @@ class BottleBackend(ABC, Generic[PlanT, CleanupT]):
"""Build/run the bottle and yield a handle; tear down on exit.""" """Build/run the bottle and yield a handle; tear down on exit."""
def provision(self, plan: PlanT, target: str) -> str | None: def provision(self, plan: PlanT, target: str) -> str | None:
"""Copy host-side files (prompt, skills, SSH keys, .git) into """Copy host-side files (CA cert, prompt, skills, SSH keys,
the running bottle. Called from `launch` after the container/ .git) into the running bottle. Called from `launch` after the
machine is up. `target` identifies the running instance in container/machine is up. `target` identifies the running
backend-specific terms (Docker: resolved container name; fly: instance in backend-specific terms (Docker: resolved container
machine id). Returns the in-container prompt path if a prompt name; fly: machine id). Returns the in-container prompt path
was provisioned, else None — the Bottle handle uses it to if a prompt was provisioned, else None — the Bottle handle
decide whether to add --append-system-prompt-file to claude's uses it to decide whether to add --append-system-prompt-file
argv. to claude's argv.
Default orchestration: prompt → skills → ssh → git. Subclasses Default orchestration: ca → prompt → skills → ssh → git.
typically don't override this; they implement the four CA goes first because it changes how the agent process trusts
sub-methods below.""" the network; the rest don't depend on it but the order keeps
trust setup adjacent to the launch step. Subclasses typically
don't override this; they implement the sub-methods below."""
self.provision_ca(plan, target)
prompt_path = self.provision_prompt(plan, target) prompt_path = self.provision_prompt(plan, target)
self.provision_skills(plan, target) self.provision_skills(plan, target)
self.provision_ssh(plan, target) self.provision_ssh(plan, target)
self.provision_git(plan, target) self.provision_git(plan, target)
return prompt_path return prompt_path
def provision_ca(self, plan: PlanT, target: str) -> None:
"""Install the egress-proxy's CA into the running bottle's
trust store. Default impl is a no-op so backends that don't
yet support TLS interception (every backend except Docker
today) aren't forced to implement it. The Docker backend
overrides to extract mitmproxy's CA and run
`update-ca-certificates` inside the agent container."""
@abstractmethod @abstractmethod
def provision_prompt(self, plan: PlanT, target: str) -> str | None: def provision_prompt(self, plan: PlanT, target: str) -> str | None:
"""Copy the prompt file into the running bottle. Returns the """Copy the prompt file into the running bottle. Returns the
+12 -2
View File
@@ -23,7 +23,9 @@ from . import prepare as _prepare
from .bottle import DockerBottle from .bottle import DockerBottle
from .bottle_cleanup_plan import DockerBottleCleanupPlan from .bottle_cleanup_plan import DockerBottleCleanupPlan
from .bottle_plan import DockerBottlePlan from .bottle_plan import DockerBottlePlan
from .mitmproxy import DockerMitmproxyProxy
from .pipelock import DockerPipelockProxy from .pipelock import DockerPipelockProxy
from .provision import ca as _ca
from .provision import git as _git from .provision import git as _git
from .provision import prompt as _prompt from .provision import prompt as _prompt
from .provision import skills as _skills from .provision import skills as _skills
@@ -38,15 +40,23 @@ class DockerBottleBackend(BottleBackend["DockerBottlePlan", "DockerBottleCleanup
def __init__(self) -> None: def __init__(self) -> None:
self._proxy = DockerPipelockProxy() self._proxy = DockerPipelockProxy()
self._mitm = DockerMitmproxyProxy()
def _resolve_plan(self, spec: BottleSpec, *, stage_dir: Path) -> DockerBottlePlan: def _resolve_plan(self, spec: BottleSpec, *, stage_dir: Path) -> DockerBottlePlan:
return _prepare.resolve_plan(spec, stage_dir=stage_dir, proxy=self._proxy) return _prepare.resolve_plan(
spec, stage_dir=stage_dir, proxy=self._proxy, mitm=self._mitm,
)
@contextmanager @contextmanager
def launch(self, plan: DockerBottlePlan) -> Generator[DockerBottle, None, None]: def launch(self, plan: DockerBottlePlan) -> Generator[DockerBottle, None, None]:
with _launch.launch(plan, proxy=self._proxy, provision=self.provision) as bottle: with _launch.launch(
plan, proxy=self._proxy, mitm=self._mitm, provision=self.provision,
) as bottle:
yield bottle yield bottle
def provision_ca(self, plan: DockerBottlePlan, target: str) -> None:
_ca.provision_ca(plan, target)
def provision_prompt(self, plan: DockerBottlePlan, target: str) -> str | None: def provision_prompt(self, plan: DockerBottlePlan, target: str) -> str | None:
return _prompt.provision_prompt(plan, target) return _prompt.provision_prompt(plan, target)
@@ -13,6 +13,7 @@ from pathlib import Path
from ...log import info from ...log import info
from ...manifest import Agent, Bottle from ...manifest import Agent, Bottle
from ...mitmproxy import MitmproxyProxyPlan
from ...pipelock import PipelockProxyPlan, pipelock_effective_allowlist from ...pipelock import PipelockProxyPlan, pipelock_effective_allowlist
from .. import BottlePlan from .. import BottlePlan
@@ -49,6 +50,7 @@ class DockerBottlePlan(BottlePlan):
forwarded_env: dict[str, str] = field(repr=False) forwarded_env: dict[str, str] = field(repr=False)
prompt_file: Path prompt_file: Path
proxy_plan: PipelockProxyPlan proxy_plan: PipelockProxyPlan
mitmproxy_plan: MitmproxyProxyPlan
allowlist_summary: str allowlist_summary: str
use_runsc: bool use_runsc: bool
@@ -93,6 +95,7 @@ class DockerBottlePlan(BottlePlan):
else: else:
info(" ssh hosts : (none)") info(" ssh hosts : (none)")
info(f" egress : {self.allowlist_summary}") info(f" egress : {self.allowlist_summary}")
info(" tls intercept : mitmproxy (per-bottle ephemeral CA, generated at launch)")
info( info(
f"prompt : {len(v.agent.prompt)} chars; " f"prompt : {len(v.agent.prompt)} chars; "
f"first line: {v.prompt_first_line or '(empty)'}" f"first line: {v.prompt_first_line or '(empty)'}"
@@ -117,6 +120,14 @@ class DockerBottlePlan(BottlePlan):
"egress": { "egress": {
"host_count": len(hosts), "host_count": len(hosts),
"hosts": hosts, "hosts": hosts,
# Reserved for PRD 0005: TLS interception via mitmproxy.
# ca_fingerprint is always null at dry-run because the
# CA is generated by the sidecar at launch time. Real
# launches print the fingerprint to stderr.
"mitm": {
"enabled": True,
"ca_fingerprint": None,
},
}, },
"prompt": { "prompt": {
"length": len(v.agent.prompt), "length": len(v.agent.prompt),
+33 -1
View File
@@ -22,8 +22,15 @@ from . import network as network_mod
from . import util as docker_mod from . import util as docker_mod
from .bottle import DockerBottle from .bottle import DockerBottle
from .bottle_plan import DockerBottlePlan from .bottle_plan import DockerBottlePlan
from .mitmproxy import DockerMitmproxyProxy, mitmproxy_proxy_url
from .pipelock import DockerPipelockProxy, pipelock_proxy_url from .pipelock import DockerPipelockProxy, pipelock_proxy_url
# Path inside the agent container where the mitmproxy CA cert lives
# after provision_ca runs. Exported as a module-level constant so
# both the agent's docker-run env trio and the provisioner agree.
AGENT_CA_PATH = "/usr/local/share/ca-certificates/claude-bottle-mitm.crt"
AGENT_CA_BUNDLE = "/etc/ssl/certs/ca-certificates.crt"
# Where the repo root lives, for `docker build` context. Computed once. # Where the repo root lives, for `docker build` context. Computed once.
_REPO_DIR = str(Path(__file__).resolve().parent.parent.parent.parent) _REPO_DIR = str(Path(__file__).resolve().parent.parent.parent.parent)
@@ -34,6 +41,7 @@ def launch(
plan: DockerBottlePlan, plan: DockerBottlePlan,
*, *,
proxy: DockerPipelockProxy, proxy: DockerPipelockProxy,
mitm: DockerMitmproxyProxy,
provision: Callable[[DockerBottlePlan, str], str | None], provision: Callable[[DockerBottlePlan, str], str | None],
) -> Generator[DockerBottle, None, None]: ) -> Generator[DockerBottle, None, None]:
"""Build, launch, and provision a Docker bottle. Teardown on exit. """Build, launch, and provision a Docker bottle. Teardown on exit.
@@ -71,6 +79,17 @@ def launch(
pipelock_name = proxy.start(proxy_plan) pipelock_name = proxy.start(proxy_plan)
stack.callback(proxy.stop, pipelock_name) stack.callback(proxy.stop, pipelock_name)
# mitmproxy sits in front of pipelock on the agent's egress
# path. mitmproxy's `addon.py` reaches pipelock via the
# service-name URL we hand it here.
mitm_plan = dataclasses.replace(
plan.mitmproxy_plan,
internal_network=internal_network,
egress_network=egress_network,
)
mitm_name = mitm.start(mitm_plan, pipelock_url=pipelock_proxy_url(plan.slug))
stack.callback(mitm.stop, mitm_name)
container = _run_agent_container(plan, internal_network) container = _run_agent_container(plan, internal_network)
stack.callback(docker_mod.force_remove_container, container) stack.callback(docker_mod.force_remove_container, container)
@@ -85,7 +104,10 @@ def _run_agent_container(plan: DockerBottlePlan, internal_network: str) -> str:
"""Build the `docker run` argv and execute it, handling name- """Build the `docker run` argv and execute it, handling name-
conflict races by incrementing the suffix (unless the name was conflict races by incrementing the suffix (unless the name was
user-pinned). Returns the resolved container name.""" user-pinned). Returns the resolved container name."""
proxy_url = pipelock_proxy_url(plan.slug) # Agent traffic routes through mitmproxy, not pipelock directly.
# mitmproxy decrypts and hands the plaintext to pipelock via its
# addon; pipelock is unchanged from PRD 0001.
proxy_url = mitmproxy_proxy_url(plan.slug)
docker_args: list[str] = [ docker_args: list[str] = [
"--rm", "-d", "--rm", "-d",
"--name", plan.container_name, "--name", plan.container_name,
@@ -93,6 +115,16 @@ def _run_agent_container(plan: DockerBottlePlan, internal_network: str) -> str:
"-e", f"HTTPS_PROXY={proxy_url}", "-e", f"HTTPS_PROXY={proxy_url}",
"-e", f"HTTP_PROXY={proxy_url}", "-e", f"HTTP_PROXY={proxy_url}",
"-e", "NO_PROXY=localhost,127.0.0.1", "-e", "NO_PROXY=localhost,127.0.0.1",
# CA trust trio for the agent process. Docker propagates
# run-time env into `docker exec`, so `claude` sees these
# without per-exec threading. NODE_EXTRA_CA_CERTS points at
# the cert file (Node appends it to its bundled roots);
# SSL_CERT_FILE / REQUESTS_CA_BUNDLE point at the system
# bundle that `update-ca-certificates` rebuilds in
# provision_ca.
"-e", f"NODE_EXTRA_CA_CERTS={AGENT_CA_PATH}",
"-e", f"SSL_CERT_FILE={AGENT_CA_BUNDLE}",
"-e", f"REQUESTS_CA_BUNDLE={AGENT_CA_BUNDLE}",
] ]
if plan.use_runsc: if plan.use_runsc:
docker_args.extend(["--runtime", "runsc"]) docker_args.extend(["--runtime", "runsc"])
+178
View File
@@ -0,0 +1,178 @@
"""DockerMitmproxyProxy — the Docker-specific lifecycle for the
mitmproxy sidecar. Inherits the addon-bundling from MitmproxyProxy.
The sidecar runs `mitmdump -s /addon/addon.py`, listens on
MITMPROXY_PORT inside the per-bottle internal network, and generates
its own ephemeral CA on first launch (extracted by provision_ca,
installed into the agent's trust store)."""
from __future__ import annotations
import os
import subprocess
import time
from pathlib import Path
from ...log import die, info, warn
from ...mitmproxy import MitmproxyProxy, MitmproxyProxyPlan
# mitmproxy/mitmproxy:12.2.3 (mitmproxy v12 release line). The digest
# is the multi-arch image index — pulls resolve to the right per-arch
# child digest. Bumped deliberately; see PRD 0005.
MITMPROXY_IMAGE = os.environ.get(
"CLAUDE_BOTTLE_MITMPROXY_IMAGE",
"mitmproxy/mitmproxy@sha256:00b77b5d8804c8ad18cb6caefbf9d5849e895e8986c5ce011f4ae30f4385962f",
)
# Listening port for mitmproxy's forward proxy (agent-facing).
MITMPROXY_PORT = os.environ.get("CLAUDE_BOTTLE_MITMPROXY_PORT", "8080")
# Path inside the sidecar where the addon is dropped by docker cp.
MITMPROXY_ADDON_PATH = "/addon/addon.py"
# Path inside the sidecar where mitmproxy generates its CA.
_CA_PATH_IN_SIDECAR = "/home/mitmproxy/.mitmproxy/mitmproxy-ca-cert.pem"
def mitmproxy_container_name(slug: str) -> str:
return f"claude-bottle-mitm-{slug}"
def mitmproxy_proxy_url(slug: str) -> str:
return f"http://{mitmproxy_container_name(slug)}:{MITMPROXY_PORT}"
class DockerMitmproxyProxy(MitmproxyProxy):
"""Brings the mitmproxy sidecar up and down via Docker."""
def start(self, plan: MitmproxyProxyPlan, *, pipelock_url: str) -> str:
"""Boot the mitmproxy sidecar:
1. `docker create` on the internal network with mitmdump
argv: `--listen-port <port> -s <addon path>` plus the
pipelock URL injected as an env var.
2. `docker cp` the vendored addon to the sidecar.
3. Attach to the per-agent egress network so mitmproxy
can reach real upstreams.
4. `docker start`.
Returns the container name (the proxy_target passed to .stop
and .extract_ca_cert)."""
name = mitmproxy_container_name(plan.slug)
if not plan.addon_src.is_file():
die(f"mitmproxy addon not found at {plan.addon_src}")
info(f"starting mitmproxy sidecar {name} on network {plan.internal_network}")
create_args = [
"docker", "create",
"--name", name,
"--network", plan.internal_network,
"-e", f"CLAUDE_BOTTLE_PIPELOCK_URL={pipelock_url}",
MITMPROXY_IMAGE,
"mitmdump",
"--listen-port", MITMPROXY_PORT,
"-s", MITMPROXY_ADDON_PATH,
]
if subprocess.run(
create_args,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
).returncode != 0:
die(f"failed to create mitmproxy sidecar {name}")
cp_result = subprocess.run(
["docker", "cp", str(plan.addon_src), f"{name}:{MITMPROXY_ADDON_PATH}"],
capture_output=True,
text=True,
check=False,
)
if cp_result.returncode != 0:
subprocess.run(
["docker", "rm", "-f", name],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
)
die(f"failed to copy mitmproxy addon into {name}: {cp_result.stderr.strip()}")
if subprocess.run(
["docker", "network", "connect", plan.egress_network, name],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
).returncode != 0:
subprocess.run(
["docker", "rm", "-f", name],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
)
die(f"failed to attach mitmproxy sidecar {name} to egress "
f"network {plan.egress_network}")
if subprocess.run(
["docker", "start", name],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
).returncode != 0:
subprocess.run(
["docker", "rm", "-f", name],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
)
die(f"failed to start mitmproxy sidecar {name}")
return name
def stop(self, proxy_target: str) -> None:
"""Idempotent: missing container is success. Mirrors
DockerPipelockProxy.stop."""
if subprocess.run(
["docker", "inspect", proxy_target],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
).returncode == 0:
if subprocess.run(
["docker", "rm", "-f", proxy_target],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
).returncode != 0:
warn(
f"failed to remove mitmproxy sidecar {proxy_target}; "
f"clean up with 'docker rm -f {proxy_target}'"
)
def extract_ca_cert(self, proxy_target: str, dest_path: Path) -> None:
"""Poll the running sidecar for the CA cert (mitmproxy
generates it on first launch, typically <1s after start),
then `docker cp` the public half to `dest_path`. The private
key never leaves the container."""
deadline = time.monotonic() + 15
while time.monotonic() < deadline:
check = subprocess.run(
["docker", "exec", proxy_target, "test", "-f", _CA_PATH_IN_SIDECAR],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
)
if check.returncode == 0:
break
time.sleep(0.5)
else:
die(f"mitmproxy CA cert did not appear at {_CA_PATH_IN_SIDECAR} "
f"after 15s — sidecar {proxy_target} may have failed to start")
cp_result = subprocess.run(
["docker", "cp", f"{proxy_target}:{_CA_PATH_IN_SIDECAR}", str(dest_path)],
capture_output=True,
text=True,
check=False,
)
if cp_result.returncode != 0:
die(f"failed to extract mitmproxy CA cert from {proxy_target}: "
f"{cp_result.stderr.strip()}")
+4
View File
@@ -19,6 +19,7 @@ from ...log import die
from .. import BottleSpec from .. import BottleSpec
from . import util as docker_mod from . import util as docker_mod
from .bottle_plan import DockerBottlePlan from .bottle_plan import DockerBottlePlan
from .mitmproxy import DockerMitmproxyProxy
from .pipelock import DockerPipelockProxy from .pipelock import DockerPipelockProxy
@@ -27,6 +28,7 @@ def resolve_plan(
*, *,
stage_dir: Path, stage_dir: Path,
proxy: DockerPipelockProxy, proxy: DockerPipelockProxy,
mitm: DockerMitmproxyProxy,
) -> DockerBottlePlan: ) -> DockerBottlePlan:
"""Resolve Docker-specific names and write scratch files. Trusts """Resolve Docker-specific names and write scratch files. Trusts
that the agent and its skills/SSH keys are present — validation that the agent and its skills/SSH keys are present — validation
@@ -78,6 +80,7 @@ def resolve_plan(
prompt_file.chmod(0o600) prompt_file.chmod(0o600)
proxy_plan = proxy.prepare(bottle, slug, stage_dir) proxy_plan = proxy.prepare(bottle, slug, stage_dir)
mitmproxy_plan = mitm.prepare(slug)
resolved = resolve_env(manifest, spec.agent_name) resolved = resolve_env(manifest, spec.agent_name)
# Everything that should reach the bottle by-name (so its value # Everything that should reach the bottle by-name (so its value
# never lands on argv or in env_file) goes into one dict. The # never lands on argv or in env_file) goes into one dict. The
@@ -105,6 +108,7 @@ def resolve_plan(
forwarded_env=forwarded_env, forwarded_env=forwarded_env,
prompt_file=prompt_file, prompt_file=prompt_file,
proxy_plan=proxy_plan, proxy_plan=proxy_plan,
mitmproxy_plan=mitmproxy_plan,
allowlist_summary=allowlist_summary, allowlist_summary=allowlist_summary,
use_runsc=use_runsc, use_runsc=use_runsc,
) )
@@ -0,0 +1,55 @@
"""Extract mitmproxy's CA cert and install it into the agent
container's trust store.
mitmproxy generates a fresh CA on first launch inside its sidecar.
This provisioner pulls the public cert through a host stage dir,
drops it into the agent at `/usr/local/share/ca-certificates/...`,
runs `update-ca-certificates` to rebuild the system bundle, and
emits a single stderr log line with the SHA-256 fingerprint."""
from __future__ import annotations
import hashlib
import ssl
import subprocess
from ....log import info
from ..bottle_plan import DockerBottlePlan
from ..launch import AGENT_CA_PATH
from ..mitmproxy import DockerMitmproxyProxy, mitmproxy_container_name
def provision_ca(plan: DockerBottlePlan, target: str) -> None:
"""Pull mitmproxy's CA cert, install in the agent, log fingerprint.
Called from BottleBackend.provision after the agent container is
up. The mitmproxy sidecar is already running (started during
`launch`)."""
sidecar = mitmproxy_container_name(plan.mitmproxy_plan.slug)
stage_cert = plan.stage_dir / "mitm-ca.crt"
DockerMitmproxyProxy().extract_ca_cert(sidecar, stage_cert)
container = target
subprocess.run(
["docker", "cp", str(stage_cert), f"{container}:{AGENT_CA_PATH}"],
stdout=subprocess.DEVNULL,
check=True,
)
subprocess.run(
["docker", "exec", "-u", "0", container, "chmod", "644", AGENT_CA_PATH],
stdout=subprocess.DEVNULL,
check=True,
)
subprocess.run(
["docker", "exec", "-u", "0", container, "update-ca-certificates"],
stdout=subprocess.DEVNULL,
check=True,
)
# SHA-256 of the cert's DER bytes — the standard fingerprint
# form. stdlib only; never the private key (which stays in the
# sidecar). Logged once at launch as an audit signal.
pem = stage_cert.read_text()
der = ssl.PEM_cert_to_DER_cert(pem)
fingerprint = hashlib.sha256(der).hexdigest()
info(f"mitm ca fingerprint: sha256:{fingerprint[:32]}...")
+81
View File
@@ -0,0 +1,81 @@
"""mitmproxy TLS-interception sidecar for the per-bottle egress
topology (PRD 0005).
Sits in front of pipelock on the bottle's egress path so pipelock's
body / header / URL DLP scanners see plaintext for HTTPS targets.
The sidecar runs in mitmproxy's `regular` mode and loads the
vendored addon at `addon.py`; the addon forwards each decrypted
request to pipelock as a plain HTTP forward-proxy call and gates
the mitmproxy flow on pipelock's verdict.
This module is platform-agnostic: it owns the abstract proxy
lifecycle (prepare / start / stop / extract_ca_cert). The
Docker-specific lifecycle lives in
`claude_bottle/backend/docker/mitmproxy.py`.
"""
from __future__ import annotations
from abc import ABC, abstractmethod
from dataclasses import dataclass
from pathlib import Path
@dataclass(frozen=True)
class MitmproxyProxyPlan:
"""Output of MitmproxyProxy.prepare; consumed by .start when the
sidecar needs to be brought up.
`addon_src` is the host-side path to the vendored addon.py,
resolved at prepare time. `slug` is the per-agent identifier
used as the suffix in every per-bottle resource name. The
network fields default to empty and are populated by the
backend's launch step (via dataclasses.replace) once those
networks have actually been created — same pattern as
PipelockProxyPlan."""
addon_src: Path
slug: str
internal_network: str = ""
egress_network: str = ""
class MitmproxyProxy(ABC):
"""The mitmproxy TLS-interception sidecar. The proxy-config + addon
bundling are platform-agnostic; the sidecar's start/stop lifecycle
and the CA extraction step are backend-specific and live on
concrete subclasses."""
def prepare(self, slug: str) -> MitmproxyProxyPlan:
"""Locate the vendored addon source and return the start
plan. The addon is checked into the project and identical
across bottles; per-bottle wiring (pipelock URL) is injected
via env vars at start time, not via a generated config."""
addon_src = Path(__file__).resolve().parent / "addon.py"
if not addon_src.is_file():
raise FileNotFoundError(
f"mitmproxy addon not found at {addon_src}; the "
f"package was installed incompletely"
)
return MitmproxyProxyPlan(addon_src=addon_src, slug=slug)
@abstractmethod
def start(self, plan: MitmproxyProxyPlan, *, pipelock_url: str) -> str:
"""Bring up the mitmproxy sidecar according to `plan`.
`pipelock_url` is injected into the sidecar's env (as
CLAUDE_BOTTLE_PIPELOCK_URL) so the addon knows where to
scan. Returns the proxy_target string identifying the
running sidecar — the same value to pass to `.stop` and
`.extract_ca_cert`."""
@abstractmethod
def stop(self, proxy_target: str) -> None:
"""Tear down the sidecar identified by `proxy_target`.
Idempotent: a missing target is success."""
@abstractmethod
def extract_ca_cert(self, proxy_target: str, dest_path: Path) -> None:
"""Copy the public CA cert from the running sidecar to
`dest_path` on the host. Polls the sidecar for the cert
file to appear (mitmproxy generates the CA on first launch).
The private key never leaves the sidecar."""
+169
View File
@@ -0,0 +1,169 @@
"""mitmproxy addon: forward each decrypted request to pipelock for
scanning, then either short-circuit with pipelock's 403 (block) or
let mitmproxy proceed to the real upstream (allow).
Loaded inside the mitmproxy sidecar container via `mitmdump -s ...`.
Must be self-contained — the sidecar image doesn't have claude_bottle
on its import path. Imports are limited to the Python stdlib plus
mitmproxy itself (which is the host).
Pipelock's URL is read from CLAUDE_BOTTLE_PIPELOCK_URL at startup
(injected by DockerMitmproxyProxy.start).
The verdict function `is_pipelock_block` is exported as a pure
function so unit tests can exercise it without importing mitmproxy.
"""
from __future__ import annotations
import logging
import os
import urllib.error
import urllib.request
PIPELOCK_URL_ENV = "CLAUDE_BOTTLE_PIPELOCK_URL"
PIPELOCK_TIMEOUT_SEC = 5
# Hop-by-hop headers per RFC 7230 §6.1; should not be forwarded
# across a proxy. Lower-cased for case-insensitive comparison.
_HOP_BY_HOP = frozenset({
"connection",
"keep-alive",
"proxy-authenticate",
"proxy-authorization",
"te",
"trailers",
"transfer-encoding",
"upgrade",
})
log = logging.getLogger("pipelock-bridge")
def is_pipelock_block(status: int, body_bytes: bytes) -> bool:
"""Return True iff pipelock's response indicates the proxy itself
blocked (DLP / allowlist), distinguishing from a relayed upstream
4xx that pipelock happened to forward back.
Pipelock's block bodies are plain text starting with
`blocked: <reason>` and the status is always 403. A relayed
upstream response has whatever body the upstream sent —
extremely unlikely to begin with `blocked: `. Pinned empirically
against pipelock v2.3.0 in the impl spike (DLP block:
"blocked: request body contains secret: GitHub Token";
allowlist block: "blocked: domain not in allowlist: example.com").
Long-term cleanup: file an upstream feature request for an
`X-Pipelock-Verdict: block` response header so we can match on a
structured signal instead of pattern-matching the body."""
return status == 403 and body_bytes.startswith(b"blocked: ")
def _scan_via_pipelock(
pipelock_url: str,
method: str,
target_url: str,
headers: dict[str, str],
body: bytes,
) -> tuple[int, bytes]:
"""Forward the decrypted request to pipelock as a plain HTTP
forward-proxy call. Returns (status, body_bytes). Raises on
transport-level errors so the caller can fail closed.
The target URL is rewritten to http:// so pipelock receives an
absolute-URI forward-proxy request shape. Pipelock will scan,
then may attempt an upstream forward over plain HTTP — that
response is read back too, but the addon discards it on allow
(mitmproxy makes the real HTTPS request itself)."""
rewritten_url = target_url
if rewritten_url.startswith("https://"):
rewritten_url = "http://" + rewritten_url[len("https://"):]
forwarded_headers = {
k: v for k, v in headers.items()
if k.lower() not in _HOP_BY_HOP
}
proxy_handler = urllib.request.ProxyHandler({"http": pipelock_url})
opener = urllib.request.build_opener(proxy_handler)
req = urllib.request.Request(
url=rewritten_url,
data=body if body else None,
headers=forwarded_headers,
method=method,
)
try:
resp = opener.open(req, timeout=PIPELOCK_TIMEOUT_SEC)
return resp.status, resp.read()
except urllib.error.HTTPError as e:
return e.code, e.read()
class PipelockBridge:
"""mitmproxy addon class. mitmproxy instantiates one of these via
the `addons = [...]` module attribute at the bottom of this file."""
def __init__(self) -> None:
# Read once per sidecar lifetime. Empty string is allowed at
# construction (so the module can be imported in test
# environments) but the request handler fails closed if it's
# missing at request time.
self._pipelock_url = os.environ.get(PIPELOCK_URL_ENV, "")
def request(self, flow) -> None:
"""mitmproxy callback. Called for each decrypted client
request before mitmproxy forwards to the real upstream.
Setting flow.response short-circuits the flow with that
response; leaving it None lets mitmproxy proceed."""
# Late import so this module can be loaded in test
# environments without mitmproxy installed (the verdict
# function is unit-testable in isolation).
from mitmproxy import http
if not self._pipelock_url:
log.error("%s is unset; failing closed", PIPELOCK_URL_ENV)
flow.response = http.Response.make(
503,
b"egress scanner not configured",
{"Content-Type": "text/plain",
"X-Pipelock-Bridge": "misconfigured"},
)
return
target_url = flow.request.pretty_url
method = flow.request.method
headers = {k: v for k, v in flow.request.headers.items()}
body = bytes(flow.request.content or b"")
try:
status, response_body = _scan_via_pipelock(
self._pipelock_url, method, target_url, headers, body,
)
except Exception as e:
# Fail closed: scanner unreachable means no verdict, so
# refuse rather than risk leaking.
log.warning("pipelock unreachable; failing closed: %s", e)
flow.response = http.Response.make(
503,
b"egress scanner unreachable",
{"Content-Type": "text/plain",
"X-Pipelock-Bridge": "error"},
)
return
if is_pipelock_block(status, response_body):
flow.response = http.Response.make(
status,
response_body,
{"Content-Type": "text/plain",
"X-Pipelock-Bridge": "block"},
)
return
# Allow path: discard pipelock's response (it's the wasted
# upstream-forward attempt). Leave flow.response as None;
# mitmproxy proceeds to the real upstream on its own.
addons = [PipelockBridge()]
@@ -0,0 +1,437 @@
# PRD 0005: mitmproxy TLS interception for pipelock content scanning
- **Status:** Draft (updated 2026-05-12 after open-question walkthrough)
- **Author:** didericis
- **Created:** 2026-05-12
## Summary
Add a per-bottle **mitmproxy** sidecar in front of pipelock on the
egress path. mitmproxy bumps the agent's TLS CONNECT, decrypts the
inner HTTP, and hands each request to a vendored Python addon. The
addon forwards the decrypted request to pipelock as a plain HTTP
forward-proxy call so pipelock's DLP, URL-scan, and header-scan
layers fire on real bodies. On the verdict, the addon either
short-circuits the flow with a 403 (block) or lets mitmproxy
proceed to the real upstream (allow). mitmproxy itself generates
the ephemeral per-bottle CA on startup; the public cert is copied
into the agent's trust store and the private key dies with the
sidecar on teardown.
This is Topology A' from `docs/research/tls-mitm-for-pipelock.md`
a variant of the research note's Topology A after a spike showed
mitmproxy's `upstream` mode re-wraps decrypted flows in a new
CONNECT to the upstream proxy (which would defeat the entire
point). The addon recovers the design by emitting plain HTTP to
pipelock explicitly instead of relying on mitmproxy's `upstream`
chaining.
## Problem
PRD 0001 wired pipelock onto every bottle's egress, but the current
topology only sees `CONNECT` hostnames and opaque TLS bytes:
```
agent --HTTPS_PROXY--> pipelock --CONNECT host:443--> internet
\____________________________
opaque TLS bytes
```
What pipelock cannot scan in this mode is documented in
`docs/research/tls-mitm-for-pipelock.md` §What pipelock cannot see
today: request URLs and methods, request and response headers,
request and response bodies, MCP JSON-RPC payloads, inner-vs-outer
hostname (the domain-fronting check), and WebSocket frames inside a
TLS-wrapped upgrade. The 48-pattern DLP layer this project relies on
in PRD 0001 is therefore inert against every host in the current
`DEFAULT_ALLOWLIST` — all of which are HTTPS-only.
The integration test added in `tests/integration/test_pipelock_blocks_secret_post.py`
demonstrates the gap concretely: pipelock's body-scan layer only
fires when the agent is forced to send plain HTTP. Real Claude Code
traffic to `api.anthropic.com` goes over CONNECT-tunneled TLS and
slips past the scanner.
`pipelock-assessment.md` §Scope gaps names this as a known
limitation of the proxy-without-TLS-inspection shape. Closing it is
the explicit motivation for `tls-mitm-for-pipelock.md`, whose
recommendation this PRD implements (with the addon adjustment
forced by the upstream-mode spike).
## Goals / Success Criteria
The feature works when all of the following are observable:
- A Node request from inside a launched bottle to a CONNECT-bumped
HTTPS host (e.g. `https://api.anthropic.com/dlp-probe`) carrying a
pipelock-recognized credential pattern in the body returns 403
from the bottle's egress chain — not a response from the upstream.
The existing `test_pipelock_blocks_secret_post` test path becomes
the HTTPS variant of this assertion.
- A plain HTTPS GET from inside the bottle to an allowlisted host
with no credential pattern (e.g. `GET https://raw.githubusercontent.com/...`)
returns the real upstream response — the addon doesn't break
clean traffic.
- Claude Code itself reaches `api.anthropic.com` end-to-end through
the bottle and completes a chat round-trip. No TLS-trust errors
in the agent process.
- mitmproxy's flow log and pipelock's `body_dlp` / `header_dlp` /
`core_dlp` event lines both appear for the same outbound request,
confirming the two-stage path is active.
The feature is **done** when all of the following ship:
- A new `MitmproxyProxy` class with the same `prepare` / `start` /
`stop` lifecycle shape as `PipelockProxy`, wired into the Docker
backend's launch step.
- A vendored Python addon at `claude_bottle/mitmproxy/addon.py`
that mitmproxy loads on startup via `mitmdump -s ...`. The sidecar
runs in `regular` mode (default), not `upstream` mode.
- The bottle launch step starts the mitmproxy sidecar, waits for
the sidecar-internal CA to be generated, copies the CA public
cert into the agent at `/usr/local/share/ca-certificates/claude-bottle-mitm.crt`,
runs `update-ca-certificates` inside the agent, and threads the
`NODE_EXTRA_CA_CERTS` / `SSL_CERT_FILE` / `REQUESTS_CA_BUNDLE`
env trio onto the agent container's runtime env.
- The agent's `HTTPS_PROXY` / `HTTP_PROXY` point at the mitmproxy
sidecar (where they pointed at pipelock under PRD 0001).
- pipelock is otherwise unchanged. It continues to load the YAML
PRD 0001 generates and runs its existing scanning pipeline; the
addon talks to it via the same forward-proxy interface today's
`test_pipelock_blocks_secret_post` uses.
- On bottle teardown the mitmproxy sidecar is removed and the
ephemeral CA private key is gone with it.
- An HTTPS variant of `test_pipelock_blocks_secret_post` proves
pipelock now blocks a credential POST over HTTPS rather than
plain HTTP.
- An integration test proves a non-credential HTTPS GET through
the chain returns the upstream's real response.
- The dry-run preflight (`start --dry-run`) shows the mitmproxy
sidecar in both text and `--format=json` output. The JSON
contract gains a reserved `egress.mitm: { "enabled": true, "ca_fingerprint": null }`
block; fingerprint is always null at dry-run because the CA
doesn't exist yet. Real launches emit a one-line stderr log:
`claude-bottle: mitm ca fingerprint: <sha256-first-16>...`.
## Non-goals
- **Topology C** — extending pipelock itself to terminate TLS. The
research note's recommended long-term shape, but substantial Go
work plus the Apache-2.0-vs-ELv2 question. Deferred.
- **Topology D as canonical** — mitmproxy with a pipelock `/scan`
HTTP endpoint. The addon in this PRD talks to pipelock via its
existing forward-proxy interface; no upstream pipelock change
needed.
- **Persistent or shared CA across bottles.** Each bottle gets a
fresh CA generated by its own mitmproxy at startup.
- **Selective bumping ("ignore_hosts") as a v1 manifest field.**
v1 bumps every CONNECT. If a future allowlisted host turns out
to pin (Mobile / Chromium-style cert pinning), a follow-up PRD
adds the per-host opt-out via `bottle.egress.tls_bump_ignore`.
Strictly additive.
- **HTTP/3 / QUIC.** mitmproxy's HTTP/3 support is experimental.
v1 relies on the v1-egress iptables layer blocking UDP/443 to
force clients onto HTTP/2 over TCP, which mitmproxy 12 inspects
natively (verified by spike).
- **Raw TCP / non-HTTP TLS interception.** mitmproxy supports it
via `--mode reverse:`, not in CONNECT-bump mode. SSH and any
future raw-TCP egress route around mitmproxy entirely.
- **Trust-store rewiring for non-Debian agent images.** The
current `Dockerfile` is `node:22-slim` (Debian). If a future base
switches to Red-Hat-family, the `update-ca-certificates` step
becomes `update-ca-trust`. Out of scope until the base changes.
- **Response-body scanning.** Pipelock supports it; we don't wire
it in v1 because the addon would need to ferry the upstream
response back through pipelock's scanner, which the forward-
proxy interface doesn't support cleanly. v2 candidate.
- **MCP scanning on the bumped path.** Only fires on MCP-formatted
JSON-RPC payloads inside tool calls. Not relevant to plain HTTPS
agent traffic and out of v1 scope.
- **Domain-fronting verification.** Once the addon sees the inner
`Host` / `:authority`, comparing it to the outer CONNECT target
catches domain fronting. Worth ~10 lines in the addon, but
defer until the rest of v1 is settled.
- **Host-side openssl / `cryptography` for CA generation.** The
research note's open question on this is resolved by letting
mitmproxy itself generate the CA (it does so on first launch).
No new host-side crypto.
## Scope
### In scope
- New `claude_bottle/mitmproxy/` package:
- `__init__.py` — backend-agnostic. Constants (sidecar port,
image-pin digest, the in-container addon path), the abstract
`MitmproxyProxy` class with `prepare` / `start` / `stop` shape
mirroring `PipelockProxy`, and the small helper that reads the
CA fingerprint from a PEM file via `openssl x509 -fingerprint`
shelled out.
- `addon.py` — the Python addon mitmproxy loads. ~80150 lines.
For each `request` event: forward the decrypted request to
pipelock at `http://claude-bottle-pipelock-<slug>:8888` as a
plain HTTP forward-proxy call (absolute-URI form). Inspect
pipelock's response. If status is 403 *and* the body matches
pipelock's known block-event shape, set the flow's response to
a 403 with pipelock's body and short-circuit. Otherwise,
discard pipelock's response (and any wasted upstream-leg
response from pipelock's forwarder) and let mitmproxy proceed
to the real upstream.
- New `claude_bottle/backend/docker/mitmproxy.py`
`DockerMitmproxyProxy(MitmproxyProxy)` with the Docker-specific
start/stop lifecycle. `start(plan)` does `docker create` /
`docker cp addon.py …` / `docker network connect` / `docker start`,
analogous to the existing `DockerPipelockProxy.start`. Injects
`CLAUDE_BOTTLE_PIPELOCK_URL` into the sidecar env so the addon
knows where pipelock lives.
- New provisioner `claude_bottle/backend/docker/provision/ca.py`.
Polls mitmproxy for the cert file, copies it through a host
stage dir into the agent, runs `update-ca-certificates` inside
the agent, computes the SHA-256 fingerprint, and prints the
one-line stderr log.
- `BottleBackend.provision_ca(plan, target)` joins the four
existing provisioner methods on the abstract base. Default impl
is no-op so other backends don't break when they don't yet
implement TLS interception.
- `DockerBottlePlan` grows a `mitmproxy_plan` field mirroring the
existing `proxy_plan`.
- Agent container `docker run` invocation:
- `HTTPS_PROXY` / `HTTP_PROXY` change from the pipelock service
name to the mitmproxy service name.
- Three `-e` flags set the CA env trio so they're inherited by
the eventual `docker exec claude` (Docker propagates run-time
env into exec by default; fallback in Q1 below).
- Dry-run preflight rendering of the mitmproxy entry (text + JSON).
JSON gains `egress.mitm: { "enabled": true, "ca_fingerprint": null }`.
- One stderr log line at launch with the CA fingerprint.
- Two new integration tests under `tests/integration/`:
- `test_mitmproxy_blocks_secret_https_post.py` — HTTPS variant
of the existing block-secret test. Asserts pipelock's body
DLP fires on a credential POST tunneled through CONNECT.
- `test_mitmproxy_allows_normal_https.py` — confirms a plain
HTTPS GET on an allowlisted host returns the upstream response,
isolating the addon's pass-through path from the block path.
- Unit tests for the addon's verdict logic (block vs allow on
status + body shape, edge cases) using mitmproxy's `mitmproxy.test`
flow fixtures. Unit tests for the proxy config builder
(mirroring `tests/unit/test_pipelock_yaml.py`).
### Out of scope
- The v1 iptables + dnsmasq layer (separate PRD; see
`network-egress-guard.md`). mitmproxy covers HTTP/HTTPS only;
raw TCP, UDP, ICMP, and direct DNS still need the IP-level layer.
- Pipelock config changes. Pipelock continues to load the YAML
PRD 0001 generates; the addon talks to it via the existing
forward-proxy interface.
- A bottle-level toggle to skip mitmproxy entirely. v1 always
wires it in.
- Pinning-host detection automation. The cost of finding out (per
research) is a single 5-minute test before adding a host; it
stays a manual step.
- Pipelock upstream contributions for an `X-Pipelock-Verdict` header.
Possible follow-up. Until then the addon distinguishes blocks
from passes via status + body fingerprint.
## Proposed Design
### Topology
```
agent --HTTPS_PROXY--> mitmproxy --addon--> pipelock (scan)
(bump TLS) |
^ | (verdict via status code)
| v
+-- on allow ----- real upstream
(mitmproxy as client)
```
All three containers live on the same per-bottle internal Docker
network. mitmproxy and pipelock are both attached to the per-bottle
egress bridge for real-internet reach; the agent has no default
route.
Concretely:
- Agent sets `HTTPS_PROXY=http://claude-bottle-mitm-<slug>:<port>`.
PRD 0001 had this pointing at pipelock; the hostname swap is the
only agent-side env change.
- mitmproxy runs in **`regular`** mode (default; no `--mode` flag).
It bumps every CONNECT, generates fake leaf certs signed by its
own CA, and presents them to the agent.
- The addon, loaded via `mitmdump -s /addon/addon.py`, intercepts
each decrypted `request` event. It forwards the request to
pipelock at `http://claude-bottle-pipelock-<slug>:8888` as a
plain HTTP forward-proxy call (absolute-URI form), so pipelock
sees the full URL, headers, and body.
- The addon inspects pipelock's response. If status is 403 *and*
the response body matches pipelock's known block-event shape,
the addon sets the mitmproxy flow's response to a 403 with
pipelock's body and short-circuits. Otherwise — including the
case where pipelock's forwarder attempted the upstream and got
a 4xx — the addon discards pipelock's response and lets
mitmproxy proceed to the real upstream.
- mitmproxy completes the outbound TLS to the real destination
using its built-in trust store, just like any other forward
proxy. Pipelock is only involved as a scanner.
The trade-off: pipelock makes a wasted upstream forward attempt
for every allowed request (it tries to forward over plain HTTP to
a real HTTPS-only host, which fails with the upstream's 4xx). This
is benign — the scan completes before forwarding, the verdict
reaches the addon, the upstream-side request happens to die in
pipelock's forwarder rather than reach the agent. Acceptable cost
for the visibility win. A pipelock-side improvement (skip the
forward when the addon only needs the scan verdict) is a future
optimization.
### New components
- `claude_bottle/mitmproxy/__init__.py` — backend-agnostic
abstract base, constants, the `openssl x509 -fingerprint` helper.
- `claude_bottle/mitmproxy/addon.py` — the scanning addon.
Reads pipelock's URL from `CLAUDE_BOTTLE_PIPELOCK_URL` (injected
into the sidecar env by the proxy's `start`). For each
`request` flow: synchronously POST to pipelock; inspect status
+ body; either short-circuit with 403 or fall through.
- `claude_bottle/backend/docker/mitmproxy.py`
`DockerMitmproxyProxy(MitmproxyProxy)` with start/stop, the
`docker cp` of the addon into the sidecar before `docker start`,
and the `CLAUDE_BOTTLE_PIPELOCK_URL` wiring.
### CA lifecycle
Simplified by letting mitmproxy own the generation:
- **Generation.** mitmproxy generates a fresh CA on startup
inside its container at `/home/mitmproxy/.mitmproxy/mitmproxy-ca-cert.pem`
(public) + `mitmproxy-ca.pem` (private). No host-side openssl
for *generation*; no host-side Python `cryptography` dep.
- **Volume strategy.** Container-internal only. No host bind
mount means the CA dies with the container.
- **Extraction.** `provision_ca` polls (~1s) for the cert file
via `docker exec`, then `docker cp` to host stage dir, then
`docker cp` into the agent. Host stage dir gets cleaned up by
the existing `start.py` `finally` block.
- **Bottle install.**
1. `docker cp <host stage>/mitm-ca.crt agent-<slug>:/usr/local/share/ca-certificates/claude-bottle-mitm.crt`
2. `docker exec -u 0 agent-<slug> chmod 644 …`
3. `docker exec -u 0 agent-<slug> update-ca-certificates`
4. Three `-e` flags on `docker run` set the env trio
(`NODE_EXTRA_CA_CERTS=…/claude-bottle-mitm.crt`,
`SSL_CERT_FILE=/etc/ssl/certs/ca-certificates.crt`,
`REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt`) so
`docker exec claude` inherits them.
- **Teardown.** Sidecar container removed; CA private key gone.
- **Fingerprint.** Computed post-extraction via shelled-out
`openssl x509 -fingerprint -sha256 -noout`. Logged once to
stderr at launch; never the private key.
### Data model changes
None to the manifest schema. The dry-run JSON contract gains a
reserved `egress.mitm: { "enabled": true, "ca_fingerprint": null }`
block. Fingerprint is always null at dry-run (CA doesn't exist
yet) but the field is reserved so future schema additions stay
non-breaking.
A future selective-bump knob would add
`bottle.egress.tls_bump_ignore: [host, ...]` per the research
note. Strictly additive when it lands.
### Existing code touched
- **`claude_bottle/backend/docker/launch.py`** — bring up the
mitmproxy sidecar between pipelock and the agent. Repoint the
agent's `HTTPS_PROXY` / `HTTP_PROXY` env flags to mitmproxy.
Register an `ExitStack` callback for mitmproxy teardown. Print
the CA fingerprint once the sidecar reports ready.
- **`claude_bottle/backend/docker/prepare.py`** — call into
`MitmproxyProxy.prepare(...)` alongside `PipelockProxy.prepare(...)`,
populate `DockerBottlePlan.mitmproxy_plan`.
- **`claude_bottle/backend/docker/backend.py`** — add the
`DockerMitmproxyProxy` instance attribute (`self._mitm`) and
thread it through `launch` + cleanup, mirroring `self._proxy`.
- **`claude_bottle/backend/docker/bottle_plan.py`** — new
`mitmproxy_plan` field. `print()` and `to_dict()` learn to
render the mitmproxy entry and the `egress.mitm` JSON block.
- **`claude_bottle/backend/__init__.py`** — abstract
`BottleBackend.provision_ca` joins the four existing
provisioners; default no-op.
- **`tests/integration/`** — two new tests as described above.
- **`tests/unit/`** — addon-verdict tests, mitmproxy-config
builder tests, dry-run-plan test updated for the new
`egress.mitm` block.
### External dependencies
- **mitmproxy Docker image** pinned by digest on the `12.x` line.
Bumped deliberately, mirroring the pipelock pin. Verified by
spike to speak h2 on both halves.
- No new host-side runtimes. mitmproxy generates the CA;
fingerprint via the `openssl` already present on Debian / macOS
/ ubuntu-latest runners.
## Open questions
(rewritten — most of the original v1 questions are now closed by
the walkthrough spikes; what remains is addon-implementation
specifics worth pinning during the first impl turn.)
- **Pipelock's 403-body fingerprint.** The addon needs to
distinguish a pipelock block (DLP / host) from a real-upstream
4xx that pipelock's forwarder relayed back. Most likely shape:
pipelock's 403 response carries a JSON body with `event` /
`scanner` fields, whereas a real-upstream 4xx carries whatever
the upstream sent. Pin the exact fingerprint by inspecting
pipelock's actual 403 body bytes at impl time. Long-term
cleanup: file an upstream feature request for an
`X-Pipelock-Verdict: block` response header so the addon can
read a structured signal instead of pattern-matching the body.
- **Docker run env-var inheritance through docker exec.** Plan
assumes `docker run -e VAR=value` propagates to subsequent
`docker exec` invocations. The Docker docs say so; not yet
empirically pinned on this project's runner setup. Verify in
the first impl turn. Trivial fallback: thread the three `-e`
flags onto every `DockerBottle.exec*` call.
- **Addon synchronous-call latency.** The addon makes a sync HTTP
call to pipelock per outbound flow. Pipelock is on the same
internal Docker network; expected per-call latency is well
under 10ms. Confirm under the parallel-request load Claude Code
generates (most likely a non-issue — Claude is single-stream
request-wise).
- **Addon test fixtures.** mitmproxy ships `mitmproxy.test` with
flow fixtures; addons can be unit-tested without a running
proxy. Confirm the import path and recommended fixture shape at
impl time; structure the addon so the verdict-decision is a
pure function that's trivially testable in isolation from any
HTTP I/O.
- **Pipelock allowing the addon's forwarded request through.**
pipelock will see the addon's request as coming from the
mitmproxy sidecar's IP on the internal network. Confirm
pipelock has no client-IP allowlist that would reject these.
Likely fine — pipelock's `client_ip` is informational in the
scan event, not a gate.
## References
- `docs/research/tls-mitm-for-pipelock.md` — primary source. This
PRD implements a variant of §Recommendation (Topology A) after
the spike documented under "Open questions" §1 falsified the
`upstream` mode assumption.
- `docs/research/pipelock-assessment.md` §Scope gaps — names the
TLS-inspection gap closed here.
- `docs/prds/0001-per-agent-egress-proxy-via-pipelock.md`
egress-proxy baseline this PRD extends.
- `docs/prds/0003-bottle-backend-abstraction.md` — backend ABC
contract this PRD adds a `provision_ca` method to.
- `docs/prds/0004-split-out-provisioners.md` — per-provisioner
module pattern reused for the new CA provisioner.
- mitmproxy: <https://mitmproxy.org>,
<https://github.com/mitmproxy/mitmproxy>
- mitmproxy modes: <https://docs.mitmproxy.org/stable/concepts/modes/>
- mitmproxy CA cert installation:
<https://docs.mitmproxy.org/stable/concepts/certificates/>
- mitmproxy addon API: <https://docs.mitmproxy.org/stable/addons-overview/>
- Node `NODE_EXTRA_CA_CERTS`:
<https://nodejs.org/api/cli.html#node_extra_ca_certsfile>
+6
View File
@@ -92,6 +92,12 @@ class TestDryRunPlan(unittest.TestCase):
self.assertEqual(sorted(set(hosts)), hosts, self.assertEqual(sorted(set(hosts)), hosts,
"hosts must be sorted and deduplicated") "hosts must be sorted and deduplicated")
# PRD 0005: TLS interception block is part of the JSON
# contract. Fingerprint is null at dry-run (CA doesn't
# exist yet); real launches print it to stderr.
self.assertEqual({"enabled": True, "ca_fingerprint": None},
plan["egress"]["mitm"])
# No Docker side effects (see the GITEA_ACTIONS skip note # No Docker side effects (see the GITEA_ACTIONS skip note
# above — this guard runs locally only). # above — this guard runs locally only).
if check_side_effects: if check_side_effects:
@@ -0,0 +1,167 @@
"""Integration: with mitmproxy in front of pipelock, a plain HTTPS
GET to an allowlisted host with no credential pattern still gets
through end-to-end.
The complement to test_mitmproxy_blocks_secret_https_post — together
they isolate the addon's two paths (block vs. allow). This test
also functions as the end-to-end TLS-trust check: if the agent's
trust store didn't have mitmproxy's CA installed, the TLS handshake
between the agent and mitmproxy's bumped cert would fail and the
fetch would throw before we ever saw a response.
"""
from __future__ import annotations
import os
import shutil
import tempfile
import unittest
from pathlib import Path
from claude_bottle.backend import BottleSpec, get_bottle_backend
from tests._docker import skip_unless_docker
from tests.fixtures import fixture_minimal
# raw.githubusercontent.com is in the baked-in DEFAULT_ALLOWLIST.
# Pick a file path that's stable enough across runs — `git`'s own
# README.md on the master branch is a long-lived artifact and one
# of github's most-trafficked raw files.
_TARGET_URL = "https://raw.githubusercontent.com/git/git/master/README.md"
# stdlib http (for CONNECT) + tls (for the bumped tunnel); see the
# block test for the rationale on not pulling undici in as a dep.
#
# Output contract:
# - "status=<code>" HTTP status from upstream (or addon, if
# blocked)
# - "bridge=<value>" X-Pipelock-Bridge header; empty on allow
# - "len=<N>" response body length, sanity-check it's a
# real response and not an empty proxy stub
# - "error=<...>" thrown error
_PROBE_JS = r"""
const http = require('http');
const tls = require('tls');
const proxy = new URL(process.env.HTTPS_PROXY);
const connectReq = http.request({
host: proxy.hostname,
port: proxy.port,
method: 'CONNECT',
path: 'raw.githubusercontent.com:443',
});
connectReq.setTimeout(10000, () => {
console.log('timeout=connect');
connectReq.destroy();
});
connectReq.on('error', (e) => {
console.log('error=' + (e.code || '') + ' ' + e.message);
});
connectReq.on('connect', (res, socket) => {
if (res.statusCode !== 200) {
console.log('status=' + res.statusCode);
console.log('bridge=' + (res.headers['x-pipelock-bridge'] || ''));
return;
}
const tlsSocket = tls.connect({
socket: socket,
servername: 'raw.githubusercontent.com',
});
tlsSocket.on('secureConnect', () => {
tlsSocket.write(
'GET /git/git/master/README.md HTTP/1.1\r\n' +
'Host: raw.githubusercontent.com\r\n' +
'User-Agent: claude-bottle-mitm-test\r\n' +
'Accept: */*\r\n' +
'Connection: close\r\n' +
'\r\n'
);
});
let buf = Buffer.alloc(0);
tlsSocket.on('data', (c) => { buf = Buffer.concat([buf, c]); });
tlsSocket.on('end', () => {
const text = buf.toString('utf8');
const headersEnd = text.indexOf('\r\n\r\n');
const head = headersEnd >= 0 ? text.slice(0, headersEnd) : text;
const body = headersEnd >= 0 ? text.slice(headersEnd + 4) : '';
const lines = head.split('\r\n');
const m = lines[0].match(/HTTP\/[\d.]+ (\d+)/);
let bridge = '';
for (let i = 1; i < lines.length; i++) {
const ix = lines[i].indexOf(': ');
if (ix < 0) continue;
if (lines[i].slice(0, ix).toLowerCase() === 'x-pipelock-bridge') {
bridge = lines[i].slice(ix + 2);
}
}
console.log('status=' + (m ? m[1] : '?'));
console.log('bridge=' + bridge);
console.log('len=' + body.length);
});
tlsSocket.on('error', (e) => {
console.log('tls_error=' + (e.code || '') + ' ' + e.message);
});
});
connectReq.end();
"""
@skip_unless_docker()
class TestMitmproxyAllowsNormalHttps(unittest.TestCase):
@unittest.skipIf(
os.environ.get("GITEA_ACTIONS") == "true",
"skipped under act_runner: docker socket mount topology breaks "
"in-process visibility of networks created on the host daemon",
)
def test_https_get_to_allowed_host_succeeds(self):
backend = get_bottle_backend()
stage_dir = Path(tempfile.mkdtemp(prefix="cb-test-stage."))
try:
spec = BottleSpec(
manifest=fixture_minimal(),
agent_name="demo",
copy_cwd=False,
user_cwd=str(stage_dir),
forward_oauth_token=False,
)
plan = backend.prepare(spec, stage_dir=stage_dir)
with backend.launch(plan) as bottle:
script = (
"set -e\n"
"cat > /tmp/probe.js <<'PROBE_EOF'\n"
f"{_PROBE_JS}\n"
"PROBE_EOF\n"
"node /tmp/probe.js\n"
)
result = bottle.exec(script)
finally:
shutil.rmtree(stage_dir, ignore_errors=True)
self.assertEqual(
0, result.returncode,
f"exec wrapper failed: stdout={result.stdout!r} stderr={result.stderr!r}",
)
# The TLS-trust setup is implicit here — if it had failed,
# fetch would have thrown rather than returned a status.
self.assertIn(
"status=200", result.stdout,
f"expected 200 from raw.githubusercontent.com; got: {result.stdout!r}",
)
# X-Pipelock-Bridge is set only on the addon's short-circuit
# paths (block / misconfigured / scanner-unreachable). An
# allow flow goes straight through mitmproxy to upstream and
# the header should be absent.
self.assertIn(
"bridge=\n", result.stdout,
f"X-Pipelock-Bridge unexpectedly present on the allow "
f"path: {result.stdout!r}",
)
# Sanity: the README is many KB. An empty body would suggest
# the response was synthesized by something in the chain
# rather than fetched from github.
self.assertNotIn("len=0\n", result.stdout)
if __name__ == "__main__":
unittest.main()
@@ -0,0 +1,172 @@
"""Integration: with mitmproxy in front of pipelock, a credential
POST sent over HTTPS is now blocked by pipelock's body-scan layer.
This is the HTTPS variant of test_pipelock_blocks_secret_post — the
two together prove the TLS-interception layer is doing the work the
PRD targets. The earlier plain-HTTP test only fired because the agent
was forced to bypass TLS; real Claude Code traffic to api.anthropic.com
goes over CONNECT-tunneled HTTPS and would have slipped past pipelock
prior to this PRD.
End-to-end: drives `BottleBackend.prepare → launch` so the real
image build, network plumbing, pipelock sidecar, mitmproxy sidecar,
ephemeral CA generation, and trust-store install are all in the
loop.
"""
from __future__ import annotations
import os
import shutil
import tempfile
import unittest
from pathlib import Path
from claude_bottle.backend import BottleSpec, get_bottle_backend
from claude_bottle.manifest import Manifest
from tests._docker import skip_unless_docker
# Synthetic value shaped like a GitHub Personal Access Token; not a
# real credential. Pipelock's default DLP rules pattern-match this
# format and mitmproxy's addon short-circuits with the 403 it
# receives back.
_FAKE_TOKEN = "ghp_aB3cD4eF5gH6iJ7kL8mN9oP0qR1sT2uV3wX4yZ"
# Build the request by hand using stdlib `http` (for CONNECT) and
# `tls` (for the bumped tunnel). Node 22's `fetch` doesn't expose
# proxy configuration without undici as an installable dep, and
# this project keeps the bottle image dep-light. NODE_EXTRA_CA_CERTS
# is wired by launch.py so the agent trusts mitmproxy's bumped cert.
#
# Output contract (parsed by the test):
# - "status=<code>" HTTP status of the decrypted response
# - "bridge=<value>" X-Pipelock-Bridge header from the addon's
# short-circuit, empty on the allow path
# - "error=<...>" thrown error
_PROBE_JS = r"""
const http = require('http');
const tls = require('tls');
const proxy = new URL(process.env.HTTPS_PROXY);
const body = 'token=' + process.env.FAKE_TOKEN;
const connectReq = http.request({
host: proxy.hostname,
port: proxy.port,
method: 'CONNECT',
path: 'api.anthropic.com:443',
});
connectReq.setTimeout(8000, () => {
console.log('timeout=connect');
connectReq.destroy();
});
connectReq.on('error', (e) => {
console.log('error=' + (e.code || '') + ' ' + e.message);
});
connectReq.on('connect', (res, socket) => {
if (res.statusCode !== 200) {
console.log('status=' + res.statusCode);
console.log('bridge=' + (res.headers['x-pipelock-bridge'] || ''));
return;
}
const tlsSocket = tls.connect({
socket: socket,
servername: 'api.anthropic.com',
});
tlsSocket.on('secureConnect', () => {
tlsSocket.write(
'POST /dlp-probe HTTP/1.1\r\n' +
'Host: api.anthropic.com\r\n' +
'Content-Type: application/x-www-form-urlencoded\r\n' +
'Content-Length: ' + Buffer.byteLength(body) + '\r\n' +
'Connection: close\r\n' +
'\r\n' + body
);
});
let buf = '';
tlsSocket.on('data', (c) => { buf += c.toString('utf8'); });
tlsSocket.on('end', () => {
const lines = buf.split('\r\n');
const m = lines[0].match(/HTTP\/[\d.]+ (\d+)/);
let bridge = '';
for (let i = 1; i < lines.length; i++) {
if (lines[i] === '') break;
const ix = lines[i].indexOf(': ');
if (ix < 0) continue;
if (lines[i].slice(0, ix).toLowerCase() === 'x-pipelock-bridge') {
bridge = lines[i].slice(ix + 2);
}
}
console.log('status=' + (m ? m[1] : '?'));
console.log('bridge=' + bridge);
});
tlsSocket.on('error', (e) => {
console.log('tls_error=' + (e.code || '') + ' ' + e.message);
});
});
connectReq.end();
"""
@skip_unless_docker()
class TestMitmproxyBlocksSecretHttpsPost(unittest.TestCase):
@unittest.skipIf(
os.environ.get("GITEA_ACTIONS") == "true",
"skipped under act_runner: docker socket mount topology breaks "
"in-process visibility of networks created on the host daemon",
)
def test_https_post_with_credential_body_is_blocked(self):
manifest = Manifest.from_json_obj({
"bottles": {
"dev": {"env": {"FAKE_TOKEN": _FAKE_TOKEN}},
},
"agents": {
"demo": {"skills": [], "prompt": "", "bottle": "dev"},
},
})
backend = get_bottle_backend()
stage_dir = Path(tempfile.mkdtemp(prefix="cb-test-stage."))
try:
spec = BottleSpec(
manifest=manifest,
agent_name="demo",
copy_cwd=False,
user_cwd=str(stage_dir),
forward_oauth_token=False,
)
plan = backend.prepare(spec, stage_dir=stage_dir)
with backend.launch(plan) as bottle:
script = (
"set -e\n"
"cat > /tmp/probe.js <<'PROBE_EOF'\n"
f"{_PROBE_JS}\n"
"PROBE_EOF\n"
"node /tmp/probe.js\n"
)
result = bottle.exec(script)
finally:
shutil.rmtree(stage_dir, ignore_errors=True)
self.assertEqual(
0, result.returncode,
f"exec wrapper failed: stdout={result.stdout!r} stderr={result.stderr!r}",
)
# The addon short-circuits the flow with X-Pipelock-Bridge: block
# on a pipelock block — the cleanest signal that the chain
# mitmproxy(bump) -> addon(forward) -> pipelock(scan) -> block
# all happened, end to end.
self.assertIn(
"status=403", result.stdout,
f"expected 403 from pipelock block; got: {result.stdout!r}",
)
self.assertIn(
"bridge=block", result.stdout,
f"X-Pipelock-Bridge header missing; the addon may not be "
f"in path: {result.stdout!r}",
)
if __name__ == "__main__":
unittest.main()
+62
View File
@@ -0,0 +1,62 @@
"""Unit: the addon's verdict function pinning pipelock-block vs.
relayed-upstream 4xx.
The fingerprint shape is the contract the addon depends on; this
test should break loudly if pipelock changes its 403-body prefix
under a version bump."""
from __future__ import annotations
import unittest
from claude_bottle.mitmproxy.addon import is_pipelock_block
class TestIsPipelockBlock(unittest.TestCase):
def test_block_dlp_body(self):
# Pipelock v2.3.0 DLP block, captured in the impl spike.
self.assertTrue(is_pipelock_block(
403,
b"blocked: request body contains secret: GitHub Token",
))
def test_block_allowlist_body(self):
# Pipelock v2.3.0 allowlist block, captured in the impl spike.
self.assertTrue(is_pipelock_block(
403,
b"blocked: domain not in allowlist: example.com",
))
def test_block_header_dlp_body(self):
# Header DLP path; same body prefix per the spike.
self.assertTrue(is_pipelock_block(
403,
b"blocked: request header Authorization contains secret",
))
def test_403_without_blocked_prefix_is_not_a_block(self):
# A real-upstream 403 relayed by pipelock — body is whatever
# the upstream sent, almost certainly not starting with
# `blocked: `. Must be treated as allow so the addon hands
# the flow back to mitmproxy.
self.assertFalse(is_pipelock_block(
403,
b'{"error":"forbidden","detail":"insufficient permissions"}',
))
def test_non_403_with_blocked_prefix_is_not_a_block(self):
# Defensive: if some intermediate ever returns 502/504 with
# a body that happens to begin `blocked: `, we should still
# not short-circuit. Block status is always 403 by contract.
self.assertFalse(is_pipelock_block(502, b"blocked: ..."))
def test_200_is_not_a_block(self):
# Allow path, normal forwarded response.
self.assertFalse(is_pipelock_block(200, b'{"ok":true}'))
def test_empty_body_is_not_a_block(self):
self.assertFalse(is_pipelock_block(403, b""))
if __name__ == "__main__":
unittest.main()