PRD 0005: mitmproxy TLS interception for pipelock content scanning #8

Closed
didericis wants to merge 6 commits from mitmproxy-tls-interception into main
14 changed files with 1409 additions and 14 deletions
+22 -11
View File
@@ -204,24 +204,35 @@ class BottleBackend(ABC, Generic[PlanT, CleanupT]):
"""Build/run the bottle and yield a handle; tear down on exit."""
def provision(self, plan: PlanT, target: str) -> str | None:
"""Copy host-side files (prompt, skills, SSH keys, .git) into
the running bottle. Called from `launch` after the container/
machine is up. `target` identifies the running instance in
backend-specific terms (Docker: resolved container name; fly:
machine id). Returns the in-container prompt path if a prompt
was provisioned, else None — the Bottle handle uses it to
decide whether to add --append-system-prompt-file to claude's
argv.
"""Copy host-side files (CA cert, prompt, skills, SSH keys,
.git) into the running bottle. Called from `launch` after the
container/machine is up. `target` identifies the running
instance in backend-specific terms (Docker: resolved container
name; fly: machine id). Returns the in-container prompt path
if a prompt was provisioned, else None — the Bottle handle
uses it to decide whether to add --append-system-prompt-file
to claude's argv.
Default orchestration: prompt → skills → ssh → git. Subclasses
typically don't override this; they implement the four
sub-methods below."""
Default orchestration: ca → prompt → skills → ssh → git.
CA goes first because it changes how the agent process trusts
the network; the rest don't depend on it but the order keeps
trust setup adjacent to the launch step. Subclasses typically
don't override this; they implement the sub-methods below."""
self.provision_ca(plan, target)
prompt_path = self.provision_prompt(plan, target)
self.provision_skills(plan, target)
self.provision_ssh(plan, target)
self.provision_git(plan, target)
return prompt_path
def provision_ca(self, plan: PlanT, target: str) -> None:
"""Install the egress-proxy's CA into the running bottle's
trust store. Default impl is a no-op so backends that don't
yet support TLS interception (every backend except Docker
today) aren't forced to implement it. The Docker backend
overrides to extract mitmproxy's CA and run
`update-ca-certificates` inside the agent container."""
@abstractmethod
def provision_prompt(self, plan: PlanT, target: str) -> str | None:
"""Copy the prompt file into the running bottle. Returns the
+12 -2
View File
@@ -23,7 +23,9 @@ from . import prepare as _prepare
from .bottle import DockerBottle
from .bottle_cleanup_plan import DockerBottleCleanupPlan
from .bottle_plan import DockerBottlePlan
from .mitmproxy import DockerMitmproxyProxy
from .pipelock import DockerPipelockProxy
from .provision import ca as _ca
from .provision import git as _git
from .provision import prompt as _prompt
from .provision import skills as _skills
@@ -38,15 +40,23 @@ class DockerBottleBackend(BottleBackend["DockerBottlePlan", "DockerBottleCleanup
def __init__(self) -> None:
self._proxy = DockerPipelockProxy()
self._mitm = DockerMitmproxyProxy()
def _resolve_plan(self, spec: BottleSpec, *, stage_dir: Path) -> DockerBottlePlan:
return _prepare.resolve_plan(spec, stage_dir=stage_dir, proxy=self._proxy)
return _prepare.resolve_plan(
spec, stage_dir=stage_dir, proxy=self._proxy, mitm=self._mitm,
)
@contextmanager
def launch(self, plan: DockerBottlePlan) -> Generator[DockerBottle, None, None]:
with _launch.launch(plan, proxy=self._proxy, provision=self.provision) as bottle:
with _launch.launch(
plan, proxy=self._proxy, mitm=self._mitm, provision=self.provision,
) as bottle:
yield bottle
def provision_ca(self, plan: DockerBottlePlan, target: str) -> None:
_ca.provision_ca(plan, target)
def provision_prompt(self, plan: DockerBottlePlan, target: str) -> str | None:
return _prompt.provision_prompt(plan, target)
@@ -13,6 +13,7 @@ from pathlib import Path
from ...log import info
from ...manifest import Agent, Bottle
from ...mitmproxy import MitmproxyProxyPlan
from ...pipelock import PipelockProxyPlan, pipelock_effective_allowlist
from .. import BottlePlan
@@ -49,6 +50,7 @@ class DockerBottlePlan(BottlePlan):
forwarded_env: dict[str, str] = field(repr=False)
prompt_file: Path
proxy_plan: PipelockProxyPlan
mitmproxy_plan: MitmproxyProxyPlan
allowlist_summary: str
use_runsc: bool
@@ -93,6 +95,7 @@ class DockerBottlePlan(BottlePlan):
else:
info(" ssh hosts : (none)")
info(f" egress : {self.allowlist_summary}")
info(" tls intercept : mitmproxy (per-bottle ephemeral CA, generated at launch)")
info(
f"prompt : {len(v.agent.prompt)} chars; "
f"first line: {v.prompt_first_line or '(empty)'}"
@@ -117,6 +120,14 @@ class DockerBottlePlan(BottlePlan):
"egress": {
"host_count": len(hosts),
"hosts": hosts,
# Reserved for PRD 0005: TLS interception via mitmproxy.
# ca_fingerprint is always null at dry-run because the
# CA is generated by the sidecar at launch time. Real
# launches print the fingerprint to stderr.
"mitm": {
"enabled": True,
"ca_fingerprint": None,
},
},
"prompt": {
"length": len(v.agent.prompt),
+33 -1
View File
@@ -22,8 +22,15 @@ from . import network as network_mod
from . import util as docker_mod
from .bottle import DockerBottle
from .bottle_plan import DockerBottlePlan
from .mitmproxy import DockerMitmproxyProxy, mitmproxy_proxy_url
from .pipelock import DockerPipelockProxy, pipelock_proxy_url
# Path inside the agent container where the mitmproxy CA cert lives
# after provision_ca runs. Exported as a module-level constant so
# both the agent's docker-run env trio and the provisioner agree.
AGENT_CA_PATH = "/usr/local/share/ca-certificates/claude-bottle-mitm.crt"
AGENT_CA_BUNDLE = "/etc/ssl/certs/ca-certificates.crt"
# Where the repo root lives, for `docker build` context. Computed once.
_REPO_DIR = str(Path(__file__).resolve().parent.parent.parent.parent)
@@ -34,6 +41,7 @@ def launch(
plan: DockerBottlePlan,
*,
proxy: DockerPipelockProxy,
mitm: DockerMitmproxyProxy,
provision: Callable[[DockerBottlePlan, str], str | None],
) -> Generator[DockerBottle, None, None]:
"""Build, launch, and provision a Docker bottle. Teardown on exit.
@@ -71,6 +79,17 @@ def launch(
pipelock_name = proxy.start(proxy_plan)
stack.callback(proxy.stop, pipelock_name)
# mitmproxy sits in front of pipelock on the agent's egress
# path. mitmproxy's `addon.py` reaches pipelock via the
# service-name URL we hand it here.
mitm_plan = dataclasses.replace(
plan.mitmproxy_plan,
internal_network=internal_network,
egress_network=egress_network,
)
mitm_name = mitm.start(mitm_plan, pipelock_url=pipelock_proxy_url(plan.slug))
stack.callback(mitm.stop, mitm_name)
container = _run_agent_container(plan, internal_network)
stack.callback(docker_mod.force_remove_container, container)
@@ -85,7 +104,10 @@ def _run_agent_container(plan: DockerBottlePlan, internal_network: str) -> str:
"""Build the `docker run` argv and execute it, handling name-
conflict races by incrementing the suffix (unless the name was
user-pinned). Returns the resolved container name."""
proxy_url = pipelock_proxy_url(plan.slug)
# Agent traffic routes through mitmproxy, not pipelock directly.
# mitmproxy decrypts and hands the plaintext to pipelock via its
# addon; pipelock is unchanged from PRD 0001.
proxy_url = mitmproxy_proxy_url(plan.slug)
docker_args: list[str] = [
"--rm", "-d",
"--name", plan.container_name,
@@ -93,6 +115,16 @@ def _run_agent_container(plan: DockerBottlePlan, internal_network: str) -> str:
"-e", f"HTTPS_PROXY={proxy_url}",
"-e", f"HTTP_PROXY={proxy_url}",
"-e", "NO_PROXY=localhost,127.0.0.1",
# CA trust trio for the agent process. Docker propagates
# run-time env into `docker exec`, so `claude` sees these
# without per-exec threading. NODE_EXTRA_CA_CERTS points at
# the cert file (Node appends it to its bundled roots);
# SSL_CERT_FILE / REQUESTS_CA_BUNDLE point at the system
# bundle that `update-ca-certificates` rebuilds in
# provision_ca.
"-e", f"NODE_EXTRA_CA_CERTS={AGENT_CA_PATH}",
"-e", f"SSL_CERT_FILE={AGENT_CA_BUNDLE}",
"-e", f"REQUESTS_CA_BUNDLE={AGENT_CA_BUNDLE}",
]
if plan.use_runsc:
docker_args.extend(["--runtime", "runsc"])
+178
View File
@@ -0,0 +1,178 @@
"""DockerMitmproxyProxy — the Docker-specific lifecycle for the
mitmproxy sidecar. Inherits the addon-bundling from MitmproxyProxy.
The sidecar runs `mitmdump -s /addon/addon.py`, listens on
MITMPROXY_PORT inside the per-bottle internal network, and generates
its own ephemeral CA on first launch (extracted by provision_ca,
installed into the agent's trust store)."""
from __future__ import annotations
import os
import subprocess
import time
from pathlib import Path
from ...log import die, info, warn
from ...mitmproxy import MitmproxyProxy, MitmproxyProxyPlan
# mitmproxy/mitmproxy:12.2.3 (mitmproxy v12 release line). The digest
# is the multi-arch image index — pulls resolve to the right per-arch
# child digest. Bumped deliberately; see PRD 0005.
MITMPROXY_IMAGE = os.environ.get(
"CLAUDE_BOTTLE_MITMPROXY_IMAGE",
"mitmproxy/mitmproxy@sha256:00b77b5d8804c8ad18cb6caefbf9d5849e895e8986c5ce011f4ae30f4385962f",
)
# Listening port for mitmproxy's forward proxy (agent-facing).
MITMPROXY_PORT = os.environ.get("CLAUDE_BOTTLE_MITMPROXY_PORT", "8080")
# Path inside the sidecar where the addon is dropped by docker cp.
MITMPROXY_ADDON_PATH = "/addon/addon.py"
# Path inside the sidecar where mitmproxy generates its CA.
_CA_PATH_IN_SIDECAR = "/home/mitmproxy/.mitmproxy/mitmproxy-ca-cert.pem"
def mitmproxy_container_name(slug: str) -> str:
return f"claude-bottle-mitm-{slug}"
def mitmproxy_proxy_url(slug: str) -> str:
return f"http://{mitmproxy_container_name(slug)}:{MITMPROXY_PORT}"
class DockerMitmproxyProxy(MitmproxyProxy):
"""Brings the mitmproxy sidecar up and down via Docker."""
def start(self, plan: MitmproxyProxyPlan, *, pipelock_url: str) -> str:
"""Boot the mitmproxy sidecar:
1. `docker create` on the internal network with mitmdump
argv: `--listen-port <port> -s <addon path>` plus the
pipelock URL injected as an env var.
2. `docker cp` the vendored addon to the sidecar.
3. Attach to the per-agent egress network so mitmproxy
can reach real upstreams.
4. `docker start`.
Returns the container name (the proxy_target passed to .stop
and .extract_ca_cert)."""
name = mitmproxy_container_name(plan.slug)
if not plan.addon_src.is_file():
die(f"mitmproxy addon not found at {plan.addon_src}")
info(f"starting mitmproxy sidecar {name} on network {plan.internal_network}")
create_args = [
"docker", "create",
"--name", name,
"--network", plan.internal_network,
"-e", f"CLAUDE_BOTTLE_PIPELOCK_URL={pipelock_url}",
MITMPROXY_IMAGE,
"mitmdump",
"--listen-port", MITMPROXY_PORT,
"-s", MITMPROXY_ADDON_PATH,
]
if subprocess.run(
create_args,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
).returncode != 0:
die(f"failed to create mitmproxy sidecar {name}")
cp_result = subprocess.run(
["docker", "cp", str(plan.addon_src), f"{name}:{MITMPROXY_ADDON_PATH}"],
capture_output=True,
text=True,
check=False,
)
if cp_result.returncode != 0:
subprocess.run(
["docker", "rm", "-f", name],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
)
die(f"failed to copy mitmproxy addon into {name}: {cp_result.stderr.strip()}")
if subprocess.run(
["docker", "network", "connect", plan.egress_network, name],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
).returncode != 0:
subprocess.run(
["docker", "rm", "-f", name],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
)
die(f"failed to attach mitmproxy sidecar {name} to egress "
f"network {plan.egress_network}")
if subprocess.run(
["docker", "start", name],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
).returncode != 0:
subprocess.run(
["docker", "rm", "-f", name],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
)
die(f"failed to start mitmproxy sidecar {name}")
return name
def stop(self, proxy_target: str) -> None:
"""Idempotent: missing container is success. Mirrors
DockerPipelockProxy.stop."""
if subprocess.run(
["docker", "inspect", proxy_target],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
).returncode == 0:
if subprocess.run(
["docker", "rm", "-f", proxy_target],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
).returncode != 0:
warn(
f"failed to remove mitmproxy sidecar {proxy_target}; "
f"clean up with 'docker rm -f {proxy_target}'"
)
def extract_ca_cert(self, proxy_target: str, dest_path: Path) -> None:
"""Poll the running sidecar for the CA cert (mitmproxy
generates it on first launch, typically <1s after start),
then `docker cp` the public half to `dest_path`. The private
key never leaves the container."""
deadline = time.monotonic() + 15
while time.monotonic() < deadline:
check = subprocess.run(
["docker", "exec", proxy_target, "test", "-f", _CA_PATH_IN_SIDECAR],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
)
if check.returncode == 0:
break
time.sleep(0.5)
else:
die(f"mitmproxy CA cert did not appear at {_CA_PATH_IN_SIDECAR} "
f"after 15s — sidecar {proxy_target} may have failed to start")
cp_result = subprocess.run(
["docker", "cp", f"{proxy_target}:{_CA_PATH_IN_SIDECAR}", str(dest_path)],
capture_output=True,
text=True,
check=False,
)
if cp_result.returncode != 0:
die(f"failed to extract mitmproxy CA cert from {proxy_target}: "
f"{cp_result.stderr.strip()}")
+4
View File
@@ -19,6 +19,7 @@ from ...log import die
from .. import BottleSpec
from . import util as docker_mod
from .bottle_plan import DockerBottlePlan
from .mitmproxy import DockerMitmproxyProxy
from .pipelock import DockerPipelockProxy
@@ -27,6 +28,7 @@ def resolve_plan(
*,
stage_dir: Path,
proxy: DockerPipelockProxy,
mitm: DockerMitmproxyProxy,
) -> DockerBottlePlan:
"""Resolve Docker-specific names and write scratch files. Trusts
that the agent and its skills/SSH keys are present — validation
@@ -78,6 +80,7 @@ def resolve_plan(
prompt_file.chmod(0o600)
proxy_plan = proxy.prepare(bottle, slug, stage_dir)
mitmproxy_plan = mitm.prepare(slug)
resolved = resolve_env(manifest, spec.agent_name)
# Everything that should reach the bottle by-name (so its value
# never lands on argv or in env_file) goes into one dict. The
@@ -105,6 +108,7 @@ def resolve_plan(
forwarded_env=forwarded_env,
prompt_file=prompt_file,
proxy_plan=proxy_plan,
mitmproxy_plan=mitmproxy_plan,
allowlist_summary=allowlist_summary,
use_runsc=use_runsc,
)
@@ -0,0 +1,55 @@
"""Extract mitmproxy's CA cert and install it into the agent
container's trust store.
mitmproxy generates a fresh CA on first launch inside its sidecar.
This provisioner pulls the public cert through a host stage dir,
drops it into the agent at `/usr/local/share/ca-certificates/...`,
runs `update-ca-certificates` to rebuild the system bundle, and
emits a single stderr log line with the SHA-256 fingerprint."""
from __future__ import annotations
import hashlib
import ssl
import subprocess
from ....log import info
from ..bottle_plan import DockerBottlePlan
from ..launch import AGENT_CA_PATH
from ..mitmproxy import DockerMitmproxyProxy, mitmproxy_container_name
def provision_ca(plan: DockerBottlePlan, target: str) -> None:
"""Pull mitmproxy's CA cert, install in the agent, log fingerprint.
Called from BottleBackend.provision after the agent container is
up. The mitmproxy sidecar is already running (started during
`launch`)."""
sidecar = mitmproxy_container_name(plan.mitmproxy_plan.slug)
stage_cert = plan.stage_dir / "mitm-ca.crt"
DockerMitmproxyProxy().extract_ca_cert(sidecar, stage_cert)
container = target
subprocess.run(
["docker", "cp", str(stage_cert), f"{container}:{AGENT_CA_PATH}"],
stdout=subprocess.DEVNULL,
check=True,
)
subprocess.run(
["docker", "exec", "-u", "0", container, "chmod", "644", AGENT_CA_PATH],
stdout=subprocess.DEVNULL,
check=True,
)
subprocess.run(
["docker", "exec", "-u", "0", container, "update-ca-certificates"],
stdout=subprocess.DEVNULL,
check=True,
)
# SHA-256 of the cert's DER bytes — the standard fingerprint
# form. stdlib only; never the private key (which stays in the
# sidecar). Logged once at launch as an audit signal.
pem = stage_cert.read_text()
der = ssl.PEM_cert_to_DER_cert(pem)
fingerprint = hashlib.sha256(der).hexdigest()
info(f"mitm ca fingerprint: sha256:{fingerprint[:32]}...")
+81
View File
@@ -0,0 +1,81 @@
"""mitmproxy TLS-interception sidecar for the per-bottle egress
topology (PRD 0005).
Sits in front of pipelock on the bottle's egress path so pipelock's
body / header / URL DLP scanners see plaintext for HTTPS targets.
The sidecar runs in mitmproxy's `regular` mode and loads the
vendored addon at `addon.py`; the addon forwards each decrypted
request to pipelock as a plain HTTP forward-proxy call and gates
the mitmproxy flow on pipelock's verdict.
This module is platform-agnostic: it owns the abstract proxy
lifecycle (prepare / start / stop / extract_ca_cert). The
Docker-specific lifecycle lives in
`claude_bottle/backend/docker/mitmproxy.py`.
"""
from __future__ import annotations
from abc import ABC, abstractmethod
from dataclasses import dataclass
from pathlib import Path
@dataclass(frozen=True)
class MitmproxyProxyPlan:
"""Output of MitmproxyProxy.prepare; consumed by .start when the
sidecar needs to be brought up.
`addon_src` is the host-side path to the vendored addon.py,
resolved at prepare time. `slug` is the per-agent identifier
used as the suffix in every per-bottle resource name. The
network fields default to empty and are populated by the
backend's launch step (via dataclasses.replace) once those
networks have actually been created — same pattern as
PipelockProxyPlan."""
addon_src: Path
slug: str
internal_network: str = ""
egress_network: str = ""
class MitmproxyProxy(ABC):
"""The mitmproxy TLS-interception sidecar. The proxy-config + addon
bundling are platform-agnostic; the sidecar's start/stop lifecycle
and the CA extraction step are backend-specific and live on
concrete subclasses."""
def prepare(self, slug: str) -> MitmproxyProxyPlan:
"""Locate the vendored addon source and return the start
plan. The addon is checked into the project and identical
across bottles; per-bottle wiring (pipelock URL) is injected
via env vars at start time, not via a generated config."""
addon_src = Path(__file__).resolve().parent / "addon.py"
if not addon_src.is_file():
raise FileNotFoundError(
f"mitmproxy addon not found at {addon_src}; the "
f"package was installed incompletely"
)
return MitmproxyProxyPlan(addon_src=addon_src, slug=slug)
@abstractmethod
def start(self, plan: MitmproxyProxyPlan, *, pipelock_url: str) -> str:
"""Bring up the mitmproxy sidecar according to `plan`.
`pipelock_url` is injected into the sidecar's env (as
CLAUDE_BOTTLE_PIPELOCK_URL) so the addon knows where to
scan. Returns the proxy_target string identifying the
running sidecar — the same value to pass to `.stop` and
`.extract_ca_cert`."""
@abstractmethod
def stop(self, proxy_target: str) -> None:
"""Tear down the sidecar identified by `proxy_target`.
Idempotent: a missing target is success."""
@abstractmethod
def extract_ca_cert(self, proxy_target: str, dest_path: Path) -> None:
"""Copy the public CA cert from the running sidecar to
`dest_path` on the host. Polls the sidecar for the cert
file to appear (mitmproxy generates the CA on first launch).
The private key never leaves the sidecar."""
+169
View File
@@ -0,0 +1,169 @@
"""mitmproxy addon: forward each decrypted request to pipelock for
scanning, then either short-circuit with pipelock's 403 (block) or
let mitmproxy proceed to the real upstream (allow).
Loaded inside the mitmproxy sidecar container via `mitmdump -s ...`.
Must be self-contained — the sidecar image doesn't have claude_bottle
on its import path. Imports are limited to the Python stdlib plus
mitmproxy itself (which is the host).
Pipelock's URL is read from CLAUDE_BOTTLE_PIPELOCK_URL at startup
(injected by DockerMitmproxyProxy.start).
The verdict function `is_pipelock_block` is exported as a pure
function so unit tests can exercise it without importing mitmproxy.
"""
from __future__ import annotations
import logging
import os
import urllib.error
import urllib.request
PIPELOCK_URL_ENV = "CLAUDE_BOTTLE_PIPELOCK_URL"
PIPELOCK_TIMEOUT_SEC = 5
# Hop-by-hop headers per RFC 7230 §6.1; should not be forwarded
# across a proxy. Lower-cased for case-insensitive comparison.
_HOP_BY_HOP = frozenset({
"connection",
"keep-alive",
"proxy-authenticate",
"proxy-authorization",
"te",
"trailers",
"transfer-encoding",
"upgrade",
})
log = logging.getLogger("pipelock-bridge")
def is_pipelock_block(status: int, body_bytes: bytes) -> bool:
"""Return True iff pipelock's response indicates the proxy itself
blocked (DLP / allowlist), distinguishing from a relayed upstream
4xx that pipelock happened to forward back.
Pipelock's block bodies are plain text starting with
`blocked: <reason>` and the status is always 403. A relayed
upstream response has whatever body the upstream sent —
extremely unlikely to begin with `blocked: `. Pinned empirically
against pipelock v2.3.0 in the impl spike (DLP block:
"blocked: request body contains secret: GitHub Token";
allowlist block: "blocked: domain not in allowlist: example.com").
Long-term cleanup: file an upstream feature request for an
`X-Pipelock-Verdict: block` response header so we can match on a
structured signal instead of pattern-matching the body."""
return status == 403 and body_bytes.startswith(b"blocked: ")
def _scan_via_pipelock(
pipelock_url: str,
method: str,
target_url: str,
headers: dict[str, str],
body: bytes,
) -> tuple[int, bytes]:
"""Forward the decrypted request to pipelock as a plain HTTP
forward-proxy call. Returns (status, body_bytes). Raises on
transport-level errors so the caller can fail closed.
The target URL is rewritten to http:// so pipelock receives an
absolute-URI forward-proxy request shape. Pipelock will scan,
then may attempt an upstream forward over plain HTTP — that
response is read back too, but the addon discards it on allow
(mitmproxy makes the real HTTPS request itself)."""
rewritten_url = target_url
if rewritten_url.startswith("https://"):
rewritten_url = "http://" + rewritten_url[len("https://"):]
forwarded_headers = {
k: v for k, v in headers.items()
if k.lower() not in _HOP_BY_HOP
}
proxy_handler = urllib.request.ProxyHandler({"http": pipelock_url})
opener = urllib.request.build_opener(proxy_handler)
req = urllib.request.Request(
url=rewritten_url,
data=body if body else None,
headers=forwarded_headers,
method=method,
)
try:
resp = opener.open(req, timeout=PIPELOCK_TIMEOUT_SEC)
return resp.status, resp.read()
except urllib.error.HTTPError as e:
return e.code, e.read()
class PipelockBridge:
"""mitmproxy addon class. mitmproxy instantiates one of these via
the `addons = [...]` module attribute at the bottom of this file."""
def __init__(self) -> None:
# Read once per sidecar lifetime. Empty string is allowed at
# construction (so the module can be imported in test
# environments) but the request handler fails closed if it's
# missing at request time.
self._pipelock_url = os.environ.get(PIPELOCK_URL_ENV, "")
def request(self, flow) -> None:
"""mitmproxy callback. Called for each decrypted client
request before mitmproxy forwards to the real upstream.
Setting flow.response short-circuits the flow with that
response; leaving it None lets mitmproxy proceed."""
# Late import so this module can be loaded in test
# environments without mitmproxy installed (the verdict
# function is unit-testable in isolation).
from mitmproxy import http
if not self._pipelock_url:
log.error("%s is unset; failing closed", PIPELOCK_URL_ENV)
flow.response = http.Response.make(
503,
b"egress scanner not configured",
{"Content-Type": "text/plain",
"X-Pipelock-Bridge": "misconfigured"},
)
return
target_url = flow.request.pretty_url
method = flow.request.method
headers = {k: v for k, v in flow.request.headers.items()}
body = bytes(flow.request.content or b"")
try:
status, response_body = _scan_via_pipelock(
self._pipelock_url, method, target_url, headers, body,
)
except Exception as e:
# Fail closed: scanner unreachable means no verdict, so
# refuse rather than risk leaking.
log.warning("pipelock unreachable; failing closed: %s", e)
flow.response = http.Response.make(
503,
b"egress scanner unreachable",
{"Content-Type": "text/plain",
"X-Pipelock-Bridge": "error"},
)
return
if is_pipelock_block(status, response_body):
flow.response = http.Response.make(
status,
response_body,
{"Content-Type": "text/plain",
"X-Pipelock-Bridge": "block"},
)
return
# Allow path: discard pipelock's response (it's the wasted
# upstream-forward attempt). Leave flow.response as None;
# mitmproxy proceeds to the real upstream on its own.
addons = [PipelockBridge()]
@@ -0,0 +1,437 @@
# PRD 0005: mitmproxy TLS interception for pipelock content scanning
- **Status:** Draft (updated 2026-05-12 after open-question walkthrough)
- **Author:** didericis
- **Created:** 2026-05-12
## Summary
Add a per-bottle **mitmproxy** sidecar in front of pipelock on the
egress path. mitmproxy bumps the agent's TLS CONNECT, decrypts the
inner HTTP, and hands each request to a vendored Python addon. The
addon forwards the decrypted request to pipelock as a plain HTTP
forward-proxy call so pipelock's DLP, URL-scan, and header-scan
layers fire on real bodies. On the verdict, the addon either
short-circuits the flow with a 403 (block) or lets mitmproxy
proceed to the real upstream (allow). mitmproxy itself generates
the ephemeral per-bottle CA on startup; the public cert is copied
into the agent's trust store and the private key dies with the
sidecar on teardown.
This is Topology A' from `docs/research/tls-mitm-for-pipelock.md`
a variant of the research note's Topology A after a spike showed
mitmproxy's `upstream` mode re-wraps decrypted flows in a new
CONNECT to the upstream proxy (which would defeat the entire
point). The addon recovers the design by emitting plain HTTP to
pipelock explicitly instead of relying on mitmproxy's `upstream`
chaining.
## Problem
PRD 0001 wired pipelock onto every bottle's egress, but the current
topology only sees `CONNECT` hostnames and opaque TLS bytes:
```
agent --HTTPS_PROXY--> pipelock --CONNECT host:443--> internet
\____________________________
opaque TLS bytes
```
What pipelock cannot scan in this mode is documented in
`docs/research/tls-mitm-for-pipelock.md` §What pipelock cannot see
today: request URLs and methods, request and response headers,
request and response bodies, MCP JSON-RPC payloads, inner-vs-outer
hostname (the domain-fronting check), and WebSocket frames inside a
TLS-wrapped upgrade. The 48-pattern DLP layer this project relies on
in PRD 0001 is therefore inert against every host in the current
`DEFAULT_ALLOWLIST` — all of which are HTTPS-only.
The integration test added in `tests/integration/test_pipelock_blocks_secret_post.py`
demonstrates the gap concretely: pipelock's body-scan layer only
fires when the agent is forced to send plain HTTP. Real Claude Code
traffic to `api.anthropic.com` goes over CONNECT-tunneled TLS and
slips past the scanner.
`pipelock-assessment.md` §Scope gaps names this as a known
limitation of the proxy-without-TLS-inspection shape. Closing it is
the explicit motivation for `tls-mitm-for-pipelock.md`, whose
recommendation this PRD implements (with the addon adjustment
forced by the upstream-mode spike).
## Goals / Success Criteria
The feature works when all of the following are observable:
- A Node request from inside a launched bottle to a CONNECT-bumped
HTTPS host (e.g. `https://api.anthropic.com/dlp-probe`) carrying a
pipelock-recognized credential pattern in the body returns 403
from the bottle's egress chain — not a response from the upstream.
The existing `test_pipelock_blocks_secret_post` test path becomes
the HTTPS variant of this assertion.
- A plain HTTPS GET from inside the bottle to an allowlisted host
with no credential pattern (e.g. `GET https://raw.githubusercontent.com/...`)
returns the real upstream response — the addon doesn't break
clean traffic.
- Claude Code itself reaches `api.anthropic.com` end-to-end through
the bottle and completes a chat round-trip. No TLS-trust errors
in the agent process.
- mitmproxy's flow log and pipelock's `body_dlp` / `header_dlp` /
`core_dlp` event lines both appear for the same outbound request,
confirming the two-stage path is active.
The feature is **done** when all of the following ship:
- A new `MitmproxyProxy` class with the same `prepare` / `start` /
`stop` lifecycle shape as `PipelockProxy`, wired into the Docker
backend's launch step.
- A vendored Python addon at `claude_bottle/mitmproxy/addon.py`
that mitmproxy loads on startup via `mitmdump -s ...`. The sidecar
runs in `regular` mode (default), not `upstream` mode.
- The bottle launch step starts the mitmproxy sidecar, waits for
the sidecar-internal CA to be generated, copies the CA public
cert into the agent at `/usr/local/share/ca-certificates/claude-bottle-mitm.crt`,
runs `update-ca-certificates` inside the agent, and threads the
`NODE_EXTRA_CA_CERTS` / `SSL_CERT_FILE` / `REQUESTS_CA_BUNDLE`
env trio onto the agent container's runtime env.
- The agent's `HTTPS_PROXY` / `HTTP_PROXY` point at the mitmproxy
sidecar (where they pointed at pipelock under PRD 0001).
- pipelock is otherwise unchanged. It continues to load the YAML
PRD 0001 generates and runs its existing scanning pipeline; the
addon talks to it via the same forward-proxy interface today's
`test_pipelock_blocks_secret_post` uses.
- On bottle teardown the mitmproxy sidecar is removed and the
ephemeral CA private key is gone with it.
- An HTTPS variant of `test_pipelock_blocks_secret_post` proves
pipelock now blocks a credential POST over HTTPS rather than
plain HTTP.
- An integration test proves a non-credential HTTPS GET through
the chain returns the upstream's real response.
- The dry-run preflight (`start --dry-run`) shows the mitmproxy
sidecar in both text and `--format=json` output. The JSON
contract gains a reserved `egress.mitm: { "enabled": true, "ca_fingerprint": null }`
block; fingerprint is always null at dry-run because the CA
doesn't exist yet. Real launches emit a one-line stderr log:
`claude-bottle: mitm ca fingerprint: <sha256-first-16>...`.
## Non-goals
- **Topology C** — extending pipelock itself to terminate TLS. The
research note's recommended long-term shape, but substantial Go
work plus the Apache-2.0-vs-ELv2 question. Deferred.
- **Topology D as canonical** — mitmproxy with a pipelock `/scan`
HTTP endpoint. The addon in this PRD talks to pipelock via its
existing forward-proxy interface; no upstream pipelock change
needed.
- **Persistent or shared CA across bottles.** Each bottle gets a
fresh CA generated by its own mitmproxy at startup.
- **Selective bumping ("ignore_hosts") as a v1 manifest field.**
v1 bumps every CONNECT. If a future allowlisted host turns out
to pin (Mobile / Chromium-style cert pinning), a follow-up PRD
adds the per-host opt-out via `bottle.egress.tls_bump_ignore`.
Strictly additive.
- **HTTP/3 / QUIC.** mitmproxy's HTTP/3 support is experimental.
v1 relies on the v1-egress iptables layer blocking UDP/443 to
force clients onto HTTP/2 over TCP, which mitmproxy 12 inspects
natively (verified by spike).
- **Raw TCP / non-HTTP TLS interception.** mitmproxy supports it
via `--mode reverse:`, not in CONNECT-bump mode. SSH and any
future raw-TCP egress route around mitmproxy entirely.
- **Trust-store rewiring for non-Debian agent images.** The
current `Dockerfile` is `node:22-slim` (Debian). If a future base
switches to Red-Hat-family, the `update-ca-certificates` step
becomes `update-ca-trust`. Out of scope until the base changes.
- **Response-body scanning.** Pipelock supports it; we don't wire
it in v1 because the addon would need to ferry the upstream
response back through pipelock's scanner, which the forward-
proxy interface doesn't support cleanly. v2 candidate.
- **MCP scanning on the bumped path.** Only fires on MCP-formatted
JSON-RPC payloads inside tool calls. Not relevant to plain HTTPS
agent traffic and out of v1 scope.
- **Domain-fronting verification.** Once the addon sees the inner
`Host` / `:authority`, comparing it to the outer CONNECT target
catches domain fronting. Worth ~10 lines in the addon, but
defer until the rest of v1 is settled.
- **Host-side openssl / `cryptography` for CA generation.** The
research note's open question on this is resolved by letting
mitmproxy itself generate the CA (it does so on first launch).
No new host-side crypto.
## Scope
### In scope
- New `claude_bottle/mitmproxy/` package:
- `__init__.py` — backend-agnostic. Constants (sidecar port,
image-pin digest, the in-container addon path), the abstract
`MitmproxyProxy` class with `prepare` / `start` / `stop` shape
mirroring `PipelockProxy`, and the small helper that reads the
CA fingerprint from a PEM file via `openssl x509 -fingerprint`
shelled out.
- `addon.py` — the Python addon mitmproxy loads. ~80150 lines.
For each `request` event: forward the decrypted request to
pipelock at `http://claude-bottle-pipelock-<slug>:8888` as a
plain HTTP forward-proxy call (absolute-URI form). Inspect
pipelock's response. If status is 403 *and* the body matches
pipelock's known block-event shape, set the flow's response to
a 403 with pipelock's body and short-circuit. Otherwise,
discard pipelock's response (and any wasted upstream-leg
response from pipelock's forwarder) and let mitmproxy proceed
to the real upstream.
- New `claude_bottle/backend/docker/mitmproxy.py`
`DockerMitmproxyProxy(MitmproxyProxy)` with the Docker-specific
start/stop lifecycle. `start(plan)` does `docker create` /
`docker cp addon.py …` / `docker network connect` / `docker start`,
analogous to the existing `DockerPipelockProxy.start`. Injects
`CLAUDE_BOTTLE_PIPELOCK_URL` into the sidecar env so the addon
knows where pipelock lives.
- New provisioner `claude_bottle/backend/docker/provision/ca.py`.
Polls mitmproxy for the cert file, copies it through a host
stage dir into the agent, runs `update-ca-certificates` inside
the agent, computes the SHA-256 fingerprint, and prints the
one-line stderr log.
- `BottleBackend.provision_ca(plan, target)` joins the four
existing provisioner methods on the abstract base. Default impl
is no-op so other backends don't break when they don't yet
implement TLS interception.
- `DockerBottlePlan` grows a `mitmproxy_plan` field mirroring the
existing `proxy_plan`.
- Agent container `docker run` invocation:
- `HTTPS_PROXY` / `HTTP_PROXY` change from the pipelock service
name to the mitmproxy service name.
- Three `-e` flags set the CA env trio so they're inherited by
the eventual `docker exec claude` (Docker propagates run-time
env into exec by default; fallback in Q1 below).
- Dry-run preflight rendering of the mitmproxy entry (text + JSON).
JSON gains `egress.mitm: { "enabled": true, "ca_fingerprint": null }`.
- One stderr log line at launch with the CA fingerprint.
- Two new integration tests under `tests/integration/`:
- `test_mitmproxy_blocks_secret_https_post.py` — HTTPS variant
of the existing block-secret test. Asserts pipelock's body
DLP fires on a credential POST tunneled through CONNECT.
- `test_mitmproxy_allows_normal_https.py` — confirms a plain
HTTPS GET on an allowlisted host returns the upstream response,
isolating the addon's pass-through path from the block path.
- Unit tests for the addon's verdict logic (block vs allow on
status + body shape, edge cases) using mitmproxy's `mitmproxy.test`
flow fixtures. Unit tests for the proxy config builder
(mirroring `tests/unit/test_pipelock_yaml.py`).
### Out of scope
- The v1 iptables + dnsmasq layer (separate PRD; see
`network-egress-guard.md`). mitmproxy covers HTTP/HTTPS only;
raw TCP, UDP, ICMP, and direct DNS still need the IP-level layer.
- Pipelock config changes. Pipelock continues to load the YAML
PRD 0001 generates; the addon talks to it via the existing
forward-proxy interface.
- A bottle-level toggle to skip mitmproxy entirely. v1 always
wires it in.
- Pinning-host detection automation. The cost of finding out (per
research) is a single 5-minute test before adding a host; it
stays a manual step.
- Pipelock upstream contributions for an `X-Pipelock-Verdict` header.
Possible follow-up. Until then the addon distinguishes blocks
from passes via status + body fingerprint.
## Proposed Design
### Topology
```
agent --HTTPS_PROXY--> mitmproxy --addon--> pipelock (scan)
(bump TLS) |
^ | (verdict via status code)
| v
+-- on allow ----- real upstream
(mitmproxy as client)
```
All three containers live on the same per-bottle internal Docker
network. mitmproxy and pipelock are both attached to the per-bottle
egress bridge for real-internet reach; the agent has no default
route.
Concretely:
- Agent sets `HTTPS_PROXY=http://claude-bottle-mitm-<slug>:<port>`.
PRD 0001 had this pointing at pipelock; the hostname swap is the
only agent-side env change.
- mitmproxy runs in **`regular`** mode (default; no `--mode` flag).
It bumps every CONNECT, generates fake leaf certs signed by its
own CA, and presents them to the agent.
- The addon, loaded via `mitmdump -s /addon/addon.py`, intercepts
each decrypted `request` event. It forwards the request to
pipelock at `http://claude-bottle-pipelock-<slug>:8888` as a
plain HTTP forward-proxy call (absolute-URI form), so pipelock
sees the full URL, headers, and body.
- The addon inspects pipelock's response. If status is 403 *and*
the response body matches pipelock's known block-event shape,
the addon sets the mitmproxy flow's response to a 403 with
pipelock's body and short-circuits. Otherwise — including the
case where pipelock's forwarder attempted the upstream and got
a 4xx — the addon discards pipelock's response and lets
mitmproxy proceed to the real upstream.
- mitmproxy completes the outbound TLS to the real destination
using its built-in trust store, just like any other forward
proxy. Pipelock is only involved as a scanner.
The trade-off: pipelock makes a wasted upstream forward attempt
for every allowed request (it tries to forward over plain HTTP to
a real HTTPS-only host, which fails with the upstream's 4xx). This
is benign — the scan completes before forwarding, the verdict
reaches the addon, the upstream-side request happens to die in
pipelock's forwarder rather than reach the agent. Acceptable cost
for the visibility win. A pipelock-side improvement (skip the
forward when the addon only needs the scan verdict) is a future
optimization.
### New components
- `claude_bottle/mitmproxy/__init__.py` — backend-agnostic
abstract base, constants, the `openssl x509 -fingerprint` helper.
- `claude_bottle/mitmproxy/addon.py` — the scanning addon.
Reads pipelock's URL from `CLAUDE_BOTTLE_PIPELOCK_URL` (injected
into the sidecar env by the proxy's `start`). For each
`request` flow: synchronously POST to pipelock; inspect status
+ body; either short-circuit with 403 or fall through.
- `claude_bottle/backend/docker/mitmproxy.py`
`DockerMitmproxyProxy(MitmproxyProxy)` with start/stop, the
`docker cp` of the addon into the sidecar before `docker start`,
and the `CLAUDE_BOTTLE_PIPELOCK_URL` wiring.
### CA lifecycle
Simplified by letting mitmproxy own the generation:
- **Generation.** mitmproxy generates a fresh CA on startup
inside its container at `/home/mitmproxy/.mitmproxy/mitmproxy-ca-cert.pem`
(public) + `mitmproxy-ca.pem` (private). No host-side openssl
for *generation*; no host-side Python `cryptography` dep.
- **Volume strategy.** Container-internal only. No host bind
mount means the CA dies with the container.
- **Extraction.** `provision_ca` polls (~1s) for the cert file
via `docker exec`, then `docker cp` to host stage dir, then
`docker cp` into the agent. Host stage dir gets cleaned up by
the existing `start.py` `finally` block.
- **Bottle install.**
1. `docker cp <host stage>/mitm-ca.crt agent-<slug>:/usr/local/share/ca-certificates/claude-bottle-mitm.crt`
2. `docker exec -u 0 agent-<slug> chmod 644 …`
3. `docker exec -u 0 agent-<slug> update-ca-certificates`
4. Three `-e` flags on `docker run` set the env trio
(`NODE_EXTRA_CA_CERTS=…/claude-bottle-mitm.crt`,
`SSL_CERT_FILE=/etc/ssl/certs/ca-certificates.crt`,
`REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt`) so
`docker exec claude` inherits them.
- **Teardown.** Sidecar container removed; CA private key gone.
- **Fingerprint.** Computed post-extraction via shelled-out
`openssl x509 -fingerprint -sha256 -noout`. Logged once to
stderr at launch; never the private key.
### Data model changes
None to the manifest schema. The dry-run JSON contract gains a
reserved `egress.mitm: { "enabled": true, "ca_fingerprint": null }`
block. Fingerprint is always null at dry-run (CA doesn't exist
yet) but the field is reserved so future schema additions stay
non-breaking.
A future selective-bump knob would add
`bottle.egress.tls_bump_ignore: [host, ...]` per the research
note. Strictly additive when it lands.
### Existing code touched
- **`claude_bottle/backend/docker/launch.py`** — bring up the
mitmproxy sidecar between pipelock and the agent. Repoint the
agent's `HTTPS_PROXY` / `HTTP_PROXY` env flags to mitmproxy.
Register an `ExitStack` callback for mitmproxy teardown. Print
the CA fingerprint once the sidecar reports ready.
- **`claude_bottle/backend/docker/prepare.py`** — call into
`MitmproxyProxy.prepare(...)` alongside `PipelockProxy.prepare(...)`,
populate `DockerBottlePlan.mitmproxy_plan`.
- **`claude_bottle/backend/docker/backend.py`** — add the
`DockerMitmproxyProxy` instance attribute (`self._mitm`) and
thread it through `launch` + cleanup, mirroring `self._proxy`.
- **`claude_bottle/backend/docker/bottle_plan.py`** — new
`mitmproxy_plan` field. `print()` and `to_dict()` learn to
render the mitmproxy entry and the `egress.mitm` JSON block.
- **`claude_bottle/backend/__init__.py`** — abstract
`BottleBackend.provision_ca` joins the four existing
provisioners; default no-op.
- **`tests/integration/`** — two new tests as described above.
- **`tests/unit/`** — addon-verdict tests, mitmproxy-config
builder tests, dry-run-plan test updated for the new
`egress.mitm` block.
### External dependencies
- **mitmproxy Docker image** pinned by digest on the `12.x` line.
Bumped deliberately, mirroring the pipelock pin. Verified by
spike to speak h2 on both halves.
- No new host-side runtimes. mitmproxy generates the CA;
fingerprint via the `openssl` already present on Debian / macOS
/ ubuntu-latest runners.
## Open questions
(rewritten — most of the original v1 questions are now closed by
the walkthrough spikes; what remains is addon-implementation
specifics worth pinning during the first impl turn.)
- **Pipelock's 403-body fingerprint.** The addon needs to
distinguish a pipelock block (DLP / host) from a real-upstream
4xx that pipelock's forwarder relayed back. Most likely shape:
pipelock's 403 response carries a JSON body with `event` /
`scanner` fields, whereas a real-upstream 4xx carries whatever
the upstream sent. Pin the exact fingerprint by inspecting
pipelock's actual 403 body bytes at impl time. Long-term
cleanup: file an upstream feature request for an
`X-Pipelock-Verdict: block` response header so the addon can
read a structured signal instead of pattern-matching the body.
- **Docker run env-var inheritance through docker exec.** Plan
assumes `docker run -e VAR=value` propagates to subsequent
`docker exec` invocations. The Docker docs say so; not yet
empirically pinned on this project's runner setup. Verify in
the first impl turn. Trivial fallback: thread the three `-e`
flags onto every `DockerBottle.exec*` call.
- **Addon synchronous-call latency.** The addon makes a sync HTTP
call to pipelock per outbound flow. Pipelock is on the same
internal Docker network; expected per-call latency is well
under 10ms. Confirm under the parallel-request load Claude Code
generates (most likely a non-issue — Claude is single-stream
request-wise).
- **Addon test fixtures.** mitmproxy ships `mitmproxy.test` with
flow fixtures; addons can be unit-tested without a running
proxy. Confirm the import path and recommended fixture shape at
impl time; structure the addon so the verdict-decision is a
pure function that's trivially testable in isolation from any
HTTP I/O.
- **Pipelock allowing the addon's forwarded request through.**
pipelock will see the addon's request as coming from the
mitmproxy sidecar's IP on the internal network. Confirm
pipelock has no client-IP allowlist that would reject these.
Likely fine — pipelock's `client_ip` is informational in the
scan event, not a gate.
## References
- `docs/research/tls-mitm-for-pipelock.md` — primary source. This
PRD implements a variant of §Recommendation (Topology A) after
the spike documented under "Open questions" §1 falsified the
`upstream` mode assumption.
- `docs/research/pipelock-assessment.md` §Scope gaps — names the
TLS-inspection gap closed here.
- `docs/prds/0001-per-agent-egress-proxy-via-pipelock.md`
egress-proxy baseline this PRD extends.
- `docs/prds/0003-bottle-backend-abstraction.md` — backend ABC
contract this PRD adds a `provision_ca` method to.
- `docs/prds/0004-split-out-provisioners.md` — per-provisioner
module pattern reused for the new CA provisioner.
- mitmproxy: <https://mitmproxy.org>,
<https://github.com/mitmproxy/mitmproxy>
- mitmproxy modes: <https://docs.mitmproxy.org/stable/concepts/modes/>
- mitmproxy CA cert installation:
<https://docs.mitmproxy.org/stable/concepts/certificates/>
- mitmproxy addon API: <https://docs.mitmproxy.org/stable/addons-overview/>
- Node `NODE_EXTRA_CA_CERTS`:
<https://nodejs.org/api/cli.html#node_extra_ca_certsfile>
+6
View File
@@ -92,6 +92,12 @@ class TestDryRunPlan(unittest.TestCase):
self.assertEqual(sorted(set(hosts)), hosts,
"hosts must be sorted and deduplicated")
# PRD 0005: TLS interception block is part of the JSON
# contract. Fingerprint is null at dry-run (CA doesn't
# exist yet); real launches print it to stderr.
self.assertEqual({"enabled": True, "ca_fingerprint": None},
plan["egress"]["mitm"])
# No Docker side effects (see the GITEA_ACTIONS skip note
# above — this guard runs locally only).
if check_side_effects:
@@ -0,0 +1,167 @@
"""Integration: with mitmproxy in front of pipelock, a plain HTTPS
GET to an allowlisted host with no credential pattern still gets
through end-to-end.
The complement to test_mitmproxy_blocks_secret_https_post together
they isolate the addon's two paths (block vs. allow). This test
also functions as the end-to-end TLS-trust check: if the agent's
trust store didn't have mitmproxy's CA installed, the TLS handshake
between the agent and mitmproxy's bumped cert would fail and the
fetch would throw before we ever saw a response.
"""
from __future__ import annotations
import os
import shutil
import tempfile
import unittest
from pathlib import Path
from claude_bottle.backend import BottleSpec, get_bottle_backend
from tests._docker import skip_unless_docker
from tests.fixtures import fixture_minimal
# raw.githubusercontent.com is in the baked-in DEFAULT_ALLOWLIST.
# Pick a file path that's stable enough across runs — `git`'s own
# README.md on the master branch is a long-lived artifact and one
# of github's most-trafficked raw files.
_TARGET_URL = "https://raw.githubusercontent.com/git/git/master/README.md"
# stdlib http (for CONNECT) + tls (for the bumped tunnel); see the
# block test for the rationale on not pulling undici in as a dep.
#
# Output contract:
# - "status=<code>" HTTP status from upstream (or addon, if
# blocked)
# - "bridge=<value>" X-Pipelock-Bridge header; empty on allow
# - "len=<N>" response body length, sanity-check it's a
# real response and not an empty proxy stub
# - "error=<...>" thrown error
_PROBE_JS = r"""
const http = require('http');
const tls = require('tls');
const proxy = new URL(process.env.HTTPS_PROXY);
const connectReq = http.request({
host: proxy.hostname,
port: proxy.port,
method: 'CONNECT',
path: 'raw.githubusercontent.com:443',
});
connectReq.setTimeout(10000, () => {
console.log('timeout=connect');
connectReq.destroy();
});
connectReq.on('error', (e) => {
console.log('error=' + (e.code || '') + ' ' + e.message);
});
connectReq.on('connect', (res, socket) => {
if (res.statusCode !== 200) {
console.log('status=' + res.statusCode);
console.log('bridge=' + (res.headers['x-pipelock-bridge'] || ''));
return;
}
const tlsSocket = tls.connect({
socket: socket,
servername: 'raw.githubusercontent.com',
});
tlsSocket.on('secureConnect', () => {
tlsSocket.write(
'GET /git/git/master/README.md HTTP/1.1\r\n' +
'Host: raw.githubusercontent.com\r\n' +
'User-Agent: claude-bottle-mitm-test\r\n' +
'Accept: */*\r\n' +
'Connection: close\r\n' +
'\r\n'
);
});
let buf = Buffer.alloc(0);
tlsSocket.on('data', (c) => { buf = Buffer.concat([buf, c]); });
tlsSocket.on('end', () => {
const text = buf.toString('utf8');
const headersEnd = text.indexOf('\r\n\r\n');
const head = headersEnd >= 0 ? text.slice(0, headersEnd) : text;
const body = headersEnd >= 0 ? text.slice(headersEnd + 4) : '';
const lines = head.split('\r\n');
const m = lines[0].match(/HTTP\/[\d.]+ (\d+)/);
let bridge = '';
for (let i = 1; i < lines.length; i++) {
const ix = lines[i].indexOf(': ');
if (ix < 0) continue;
if (lines[i].slice(0, ix).toLowerCase() === 'x-pipelock-bridge') {
bridge = lines[i].slice(ix + 2);
}
}
console.log('status=' + (m ? m[1] : '?'));
console.log('bridge=' + bridge);
console.log('len=' + body.length);
});
tlsSocket.on('error', (e) => {
console.log('tls_error=' + (e.code || '') + ' ' + e.message);
});
});
connectReq.end();
"""
@skip_unless_docker()
class TestMitmproxyAllowsNormalHttps(unittest.TestCase):
@unittest.skipIf(
os.environ.get("GITEA_ACTIONS") == "true",
"skipped under act_runner: docker socket mount topology breaks "
"in-process visibility of networks created on the host daemon",
)
def test_https_get_to_allowed_host_succeeds(self):
backend = get_bottle_backend()
stage_dir = Path(tempfile.mkdtemp(prefix="cb-test-stage."))
try:
spec = BottleSpec(
manifest=fixture_minimal(),
agent_name="demo",
copy_cwd=False,
user_cwd=str(stage_dir),
forward_oauth_token=False,
)
plan = backend.prepare(spec, stage_dir=stage_dir)
with backend.launch(plan) as bottle:
script = (
"set -e\n"
"cat > /tmp/probe.js <<'PROBE_EOF'\n"
f"{_PROBE_JS}\n"
"PROBE_EOF\n"
"node /tmp/probe.js\n"
)
result = bottle.exec(script)
finally:
shutil.rmtree(stage_dir, ignore_errors=True)
self.assertEqual(
0, result.returncode,
f"exec wrapper failed: stdout={result.stdout!r} stderr={result.stderr!r}",
)
# The TLS-trust setup is implicit here — if it had failed,
# fetch would have thrown rather than returned a status.
self.assertIn(
"status=200", result.stdout,
f"expected 200 from raw.githubusercontent.com; got: {result.stdout!r}",
)
# X-Pipelock-Bridge is set only on the addon's short-circuit
# paths (block / misconfigured / scanner-unreachable). An
# allow flow goes straight through mitmproxy to upstream and
# the header should be absent.
self.assertIn(
"bridge=\n", result.stdout,
f"X-Pipelock-Bridge unexpectedly present on the allow "
f"path: {result.stdout!r}",
)
# Sanity: the README is many KB. An empty body would suggest
# the response was synthesized by something in the chain
# rather than fetched from github.
self.assertNotIn("len=0\n", result.stdout)
if __name__ == "__main__":
unittest.main()
@@ -0,0 +1,172 @@
"""Integration: with mitmproxy in front of pipelock, a credential
POST sent over HTTPS is now blocked by pipelock's body-scan layer.
This is the HTTPS variant of test_pipelock_blocks_secret_post the
two together prove the TLS-interception layer is doing the work the
PRD targets. The earlier plain-HTTP test only fired because the agent
was forced to bypass TLS; real Claude Code traffic to api.anthropic.com
goes over CONNECT-tunneled HTTPS and would have slipped past pipelock
prior to this PRD.
End-to-end: drives `BottleBackend.prepare launch` so the real
image build, network plumbing, pipelock sidecar, mitmproxy sidecar,
ephemeral CA generation, and trust-store install are all in the
loop.
"""
from __future__ import annotations
import os
import shutil
import tempfile
import unittest
from pathlib import Path
from claude_bottle.backend import BottleSpec, get_bottle_backend
from claude_bottle.manifest import Manifest
from tests._docker import skip_unless_docker
# Synthetic value shaped like a GitHub Personal Access Token; not a
# real credential. Pipelock's default DLP rules pattern-match this
# format and mitmproxy's addon short-circuits with the 403 it
# receives back.
_FAKE_TOKEN = "ghp_aB3cD4eF5gH6iJ7kL8mN9oP0qR1sT2uV3wX4yZ"
# Build the request by hand using stdlib `http` (for CONNECT) and
# `tls` (for the bumped tunnel). Node 22's `fetch` doesn't expose
# proxy configuration without undici as an installable dep, and
# this project keeps the bottle image dep-light. NODE_EXTRA_CA_CERTS
# is wired by launch.py so the agent trusts mitmproxy's bumped cert.
#
# Output contract (parsed by the test):
# - "status=<code>" HTTP status of the decrypted response
# - "bridge=<value>" X-Pipelock-Bridge header from the addon's
# short-circuit, empty on the allow path
# - "error=<...>" thrown error
_PROBE_JS = r"""
const http = require('http');
const tls = require('tls');
const proxy = new URL(process.env.HTTPS_PROXY);
const body = 'token=' + process.env.FAKE_TOKEN;
const connectReq = http.request({
host: proxy.hostname,
port: proxy.port,
method: 'CONNECT',
path: 'api.anthropic.com:443',
});
connectReq.setTimeout(8000, () => {
console.log('timeout=connect');
connectReq.destroy();
});
connectReq.on('error', (e) => {
console.log('error=' + (e.code || '') + ' ' + e.message);
});
connectReq.on('connect', (res, socket) => {
if (res.statusCode !== 200) {
console.log('status=' + res.statusCode);
console.log('bridge=' + (res.headers['x-pipelock-bridge'] || ''));
return;
}
const tlsSocket = tls.connect({
socket: socket,
servername: 'api.anthropic.com',
});
tlsSocket.on('secureConnect', () => {
tlsSocket.write(
'POST /dlp-probe HTTP/1.1\r\n' +
'Host: api.anthropic.com\r\n' +
'Content-Type: application/x-www-form-urlencoded\r\n' +
'Content-Length: ' + Buffer.byteLength(body) + '\r\n' +
'Connection: close\r\n' +
'\r\n' + body
);
});
let buf = '';
tlsSocket.on('data', (c) => { buf += c.toString('utf8'); });
tlsSocket.on('end', () => {
const lines = buf.split('\r\n');
const m = lines[0].match(/HTTP\/[\d.]+ (\d+)/);
let bridge = '';
for (let i = 1; i < lines.length; i++) {
if (lines[i] === '') break;
const ix = lines[i].indexOf(': ');
if (ix < 0) continue;
if (lines[i].slice(0, ix).toLowerCase() === 'x-pipelock-bridge') {
bridge = lines[i].slice(ix + 2);
}
}
console.log('status=' + (m ? m[1] : '?'));
console.log('bridge=' + bridge);
});
tlsSocket.on('error', (e) => {
console.log('tls_error=' + (e.code || '') + ' ' + e.message);
});
});
connectReq.end();
"""
@skip_unless_docker()
class TestMitmproxyBlocksSecretHttpsPost(unittest.TestCase):
@unittest.skipIf(
os.environ.get("GITEA_ACTIONS") == "true",
"skipped under act_runner: docker socket mount topology breaks "
"in-process visibility of networks created on the host daemon",
)
def test_https_post_with_credential_body_is_blocked(self):
manifest = Manifest.from_json_obj({
"bottles": {
"dev": {"env": {"FAKE_TOKEN": _FAKE_TOKEN}},
},
"agents": {
"demo": {"skills": [], "prompt": "", "bottle": "dev"},
},
})
backend = get_bottle_backend()
stage_dir = Path(tempfile.mkdtemp(prefix="cb-test-stage."))
try:
spec = BottleSpec(
manifest=manifest,
agent_name="demo",
copy_cwd=False,
user_cwd=str(stage_dir),
forward_oauth_token=False,
)
plan = backend.prepare(spec, stage_dir=stage_dir)
with backend.launch(plan) as bottle:
script = (
"set -e\n"
"cat > /tmp/probe.js <<'PROBE_EOF'\n"
f"{_PROBE_JS}\n"
"PROBE_EOF\n"
"node /tmp/probe.js\n"
)
result = bottle.exec(script)
finally:
shutil.rmtree(stage_dir, ignore_errors=True)
self.assertEqual(
0, result.returncode,
f"exec wrapper failed: stdout={result.stdout!r} stderr={result.stderr!r}",
)
# The addon short-circuits the flow with X-Pipelock-Bridge: block
# on a pipelock block — the cleanest signal that the chain
# mitmproxy(bump) -> addon(forward) -> pipelock(scan) -> block
# all happened, end to end.
self.assertIn(
"status=403", result.stdout,
f"expected 403 from pipelock block; got: {result.stdout!r}",
)
self.assertIn(
"bridge=block", result.stdout,
f"X-Pipelock-Bridge header missing; the addon may not be "
f"in path: {result.stdout!r}",
)
if __name__ == "__main__":
unittest.main()
+62
View File
@@ -0,0 +1,62 @@
"""Unit: the addon's verdict function pinning pipelock-block vs.
relayed-upstream 4xx.
The fingerprint shape is the contract the addon depends on; this
test should break loudly if pipelock changes its 403-body prefix
under a version bump."""
from __future__ import annotations
import unittest
from claude_bottle.mitmproxy.addon import is_pipelock_block
class TestIsPipelockBlock(unittest.TestCase):
def test_block_dlp_body(self):
# Pipelock v2.3.0 DLP block, captured in the impl spike.
self.assertTrue(is_pipelock_block(
403,
b"blocked: request body contains secret: GitHub Token",
))
def test_block_allowlist_body(self):
# Pipelock v2.3.0 allowlist block, captured in the impl spike.
self.assertTrue(is_pipelock_block(
403,
b"blocked: domain not in allowlist: example.com",
))
def test_block_header_dlp_body(self):
# Header DLP path; same body prefix per the spike.
self.assertTrue(is_pipelock_block(
403,
b"blocked: request header Authorization contains secret",
))
def test_403_without_blocked_prefix_is_not_a_block(self):
# A real-upstream 403 relayed by pipelock — body is whatever
# the upstream sent, almost certainly not starting with
# `blocked: `. Must be treated as allow so the addon hands
# the flow back to mitmproxy.
self.assertFalse(is_pipelock_block(
403,
b'{"error":"forbidden","detail":"insufficient permissions"}',
))
def test_non_403_with_blocked_prefix_is_not_a_block(self):
# Defensive: if some intermediate ever returns 502/504 with
# a body that happens to begin `blocked: `, we should still
# not short-circuit. Block status is always 403 by contract.
self.assertFalse(is_pipelock_block(502, b"blocked: ..."))
def test_200_is_not_a_block(self):
# Allow path, normal forwarded response.
self.assertFalse(is_pipelock_block(200, b'{"ok":true}'))
def test_empty_body_is_not_a_block(self):
self.assertFalse(is_pipelock_block(403, b""))
if __name__ == "__main__":
unittest.main()