feat(mitmproxy): vendor the addon and Docker sidecar lifecycle

First step of PRD 0005. Three new files for the
mitmproxy-in-front-of-pipelock topology — wiring into the bottle
launch comes in the next commit.

- claude_bottle/mitmproxy/__init__.py: abstract MitmproxyProxy
  base + MitmproxyProxyPlan. Mirrors the PipelockProxy shape
  (prepare / start / stop) and adds extract_ca_cert for the CA
  cert hand-off into the agent.
- claude_bottle/mitmproxy/addon.py: the vendored Python addon
  mitmproxy loads inside the sidecar. Forwards each decrypted
  request to pipelock as a plain HTTP forward-proxy call,
  inspects the response, and short-circuits the flow with 403 on
  a pipelock block (status=403 + body starts with `blocked: `,
  pinned empirically against pipelock 2.3.0 in the impl spike).
  Self-contained — no claude_bottle imports — so it loads in a
  sidecar that doesn't have claude_bottle on its path.
- claude_bottle/backend/docker/mitmproxy.py: DockerMitmproxyProxy
  with create / cp / network connect / start lifecycle. Pinned
  to mitmproxy/mitmproxy@sha256:00b77b5d… (multi-arch manifest
  for v12.2.3).
- tests/unit/test_mitmproxy_verdict.py: pins the verdict
  fingerprint so a pipelock-side body shape change breaks loudly.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-12 13:32:36 -04:00
parent c2eacac49f
commit e579c3d4fd
4 changed files with 490 additions and 0 deletions
+178
View File
@@ -0,0 +1,178 @@
"""DockerMitmproxyProxy — the Docker-specific lifecycle for the
mitmproxy sidecar. Inherits the addon-bundling from MitmproxyProxy.
The sidecar runs `mitmdump -s /addon/addon.py`, listens on
MITMPROXY_PORT inside the per-bottle internal network, and generates
its own ephemeral CA on first launch (extracted by provision_ca,
installed into the agent's trust store)."""
from __future__ import annotations
import os
import subprocess
import time
from pathlib import Path
from ...log import die, info, warn
from ...mitmproxy import MitmproxyProxy, MitmproxyProxyPlan
# mitmproxy/mitmproxy:12.2.3 (mitmproxy v12 release line). The digest
# is the multi-arch image index — pulls resolve to the right per-arch
# child digest. Bumped deliberately; see PRD 0005.
MITMPROXY_IMAGE = os.environ.get(
"CLAUDE_BOTTLE_MITMPROXY_IMAGE",
"mitmproxy/mitmproxy@sha256:00b77b5d8804c8ad18cb6caefbf9d5849e895e8986c5ce011f4ae30f4385962f",
)
# Listening port for mitmproxy's forward proxy (agent-facing).
MITMPROXY_PORT = os.environ.get("CLAUDE_BOTTLE_MITMPROXY_PORT", "8080")
# Path inside the sidecar where the addon is dropped by docker cp.
MITMPROXY_ADDON_PATH = "/addon/addon.py"
# Path inside the sidecar where mitmproxy generates its CA.
_CA_PATH_IN_SIDECAR = "/home/mitmproxy/.mitmproxy/mitmproxy-ca-cert.pem"
def mitmproxy_container_name(slug: str) -> str:
return f"claude-bottle-mitm-{slug}"
def mitmproxy_proxy_url(slug: str) -> str:
return f"http://{mitmproxy_container_name(slug)}:{MITMPROXY_PORT}"
class DockerMitmproxyProxy(MitmproxyProxy):
"""Brings the mitmproxy sidecar up and down via Docker."""
def start(self, plan: MitmproxyProxyPlan, *, pipelock_url: str) -> str:
"""Boot the mitmproxy sidecar:
1. `docker create` on the internal network with mitmdump
argv: `--listen-port <port> -s <addon path>` plus the
pipelock URL injected as an env var.
2. `docker cp` the vendored addon to the sidecar.
3. Attach to the per-agent egress network so mitmproxy
can reach real upstreams.
4. `docker start`.
Returns the container name (the proxy_target passed to .stop
and .extract_ca_cert)."""
name = mitmproxy_container_name(plan.slug)
if not plan.addon_src.is_file():
die(f"mitmproxy addon not found at {plan.addon_src}")
info(f"starting mitmproxy sidecar {name} on network {plan.internal_network}")
create_args = [
"docker", "create",
"--name", name,
"--network", plan.internal_network,
"-e", f"CLAUDE_BOTTLE_PIPELOCK_URL={pipelock_url}",
MITMPROXY_IMAGE,
"mitmdump",
"--listen-port", MITMPROXY_PORT,
"-s", MITMPROXY_ADDON_PATH,
]
if subprocess.run(
create_args,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
).returncode != 0:
die(f"failed to create mitmproxy sidecar {name}")
cp_result = subprocess.run(
["docker", "cp", str(plan.addon_src), f"{name}:{MITMPROXY_ADDON_PATH}"],
capture_output=True,
text=True,
check=False,
)
if cp_result.returncode != 0:
subprocess.run(
["docker", "rm", "-f", name],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
)
die(f"failed to copy mitmproxy addon into {name}: {cp_result.stderr.strip()}")
if subprocess.run(
["docker", "network", "connect", plan.egress_network, name],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
).returncode != 0:
subprocess.run(
["docker", "rm", "-f", name],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
)
die(f"failed to attach mitmproxy sidecar {name} to egress "
f"network {plan.egress_network}")
if subprocess.run(
["docker", "start", name],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
).returncode != 0:
subprocess.run(
["docker", "rm", "-f", name],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
)
die(f"failed to start mitmproxy sidecar {name}")
return name
def stop(self, proxy_target: str) -> None:
"""Idempotent: missing container is success. Mirrors
DockerPipelockProxy.stop."""
if subprocess.run(
["docker", "inspect", proxy_target],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
).returncode == 0:
if subprocess.run(
["docker", "rm", "-f", proxy_target],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
).returncode != 0:
warn(
f"failed to remove mitmproxy sidecar {proxy_target}; "
f"clean up with 'docker rm -f {proxy_target}'"
)
def extract_ca_cert(self, proxy_target: str, dest_path: Path) -> None:
"""Poll the running sidecar for the CA cert (mitmproxy
generates it on first launch, typically <1s after start),
then `docker cp` the public half to `dest_path`. The private
key never leaves the container."""
deadline = time.monotonic() + 15
while time.monotonic() < deadline:
check = subprocess.run(
["docker", "exec", proxy_target, "test", "-f", _CA_PATH_IN_SIDECAR],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
)
if check.returncode == 0:
break
time.sleep(0.5)
else:
die(f"mitmproxy CA cert did not appear at {_CA_PATH_IN_SIDECAR} "
f"after 15s — sidecar {proxy_target} may have failed to start")
cp_result = subprocess.run(
["docker", "cp", f"{proxy_target}:{_CA_PATH_IN_SIDECAR}", str(dest_path)],
capture_output=True,
text=True,
check=False,
)
if cp_result.returncode != 0:
die(f"failed to extract mitmproxy CA cert from {proxy_target}: "
f"{cp_result.stderr.strip()}")
+81
View File
@@ -0,0 +1,81 @@
"""mitmproxy TLS-interception sidecar for the per-bottle egress
topology (PRD 0005).
Sits in front of pipelock on the bottle's egress path so pipelock's
body / header / URL DLP scanners see plaintext for HTTPS targets.
The sidecar runs in mitmproxy's `regular` mode and loads the
vendored addon at `addon.py`; the addon forwards each decrypted
request to pipelock as a plain HTTP forward-proxy call and gates
the mitmproxy flow on pipelock's verdict.
This module is platform-agnostic: it owns the abstract proxy
lifecycle (prepare / start / stop / extract_ca_cert). The
Docker-specific lifecycle lives in
`claude_bottle/backend/docker/mitmproxy.py`.
"""
from __future__ import annotations
from abc import ABC, abstractmethod
from dataclasses import dataclass
from pathlib import Path
@dataclass(frozen=True)
class MitmproxyProxyPlan:
"""Output of MitmproxyProxy.prepare; consumed by .start when the
sidecar needs to be brought up.
`addon_src` is the host-side path to the vendored addon.py,
resolved at prepare time. `slug` is the per-agent identifier
used as the suffix in every per-bottle resource name. The
network fields default to empty and are populated by the
backend's launch step (via dataclasses.replace) once those
networks have actually been created — same pattern as
PipelockProxyPlan."""
addon_src: Path
slug: str
internal_network: str = ""
egress_network: str = ""
class MitmproxyProxy(ABC):
"""The mitmproxy TLS-interception sidecar. The proxy-config + addon
bundling are platform-agnostic; the sidecar's start/stop lifecycle
and the CA extraction step are backend-specific and live on
concrete subclasses."""
def prepare(self, slug: str) -> MitmproxyProxyPlan:
"""Locate the vendored addon source and return the start
plan. The addon is checked into the project and identical
across bottles; per-bottle wiring (pipelock URL) is injected
via env vars at start time, not via a generated config."""
addon_src = Path(__file__).resolve().parent / "addon.py"
if not addon_src.is_file():
raise FileNotFoundError(
f"mitmproxy addon not found at {addon_src}; the "
f"package was installed incompletely"
)
return MitmproxyProxyPlan(addon_src=addon_src, slug=slug)
@abstractmethod
def start(self, plan: MitmproxyProxyPlan, *, pipelock_url: str) -> str:
"""Bring up the mitmproxy sidecar according to `plan`.
`pipelock_url` is injected into the sidecar's env (as
CLAUDE_BOTTLE_PIPELOCK_URL) so the addon knows where to
scan. Returns the proxy_target string identifying the
running sidecar — the same value to pass to `.stop` and
`.extract_ca_cert`."""
@abstractmethod
def stop(self, proxy_target: str) -> None:
"""Tear down the sidecar identified by `proxy_target`.
Idempotent: a missing target is success."""
@abstractmethod
def extract_ca_cert(self, proxy_target: str, dest_path: Path) -> None:
"""Copy the public CA cert from the running sidecar to
`dest_path` on the host. Polls the sidecar for the cert
file to appear (mitmproxy generates the CA on first launch).
The private key never leaves the sidecar."""
+169
View File
@@ -0,0 +1,169 @@
"""mitmproxy addon: forward each decrypted request to pipelock for
scanning, then either short-circuit with pipelock's 403 (block) or
let mitmproxy proceed to the real upstream (allow).
Loaded inside the mitmproxy sidecar container via `mitmdump -s ...`.
Must be self-contained — the sidecar image doesn't have claude_bottle
on its import path. Imports are limited to the Python stdlib plus
mitmproxy itself (which is the host).
Pipelock's URL is read from CLAUDE_BOTTLE_PIPELOCK_URL at startup
(injected by DockerMitmproxyProxy.start).
The verdict function `is_pipelock_block` is exported as a pure
function so unit tests can exercise it without importing mitmproxy.
"""
from __future__ import annotations
import logging
import os
import urllib.error
import urllib.request
PIPELOCK_URL_ENV = "CLAUDE_BOTTLE_PIPELOCK_URL"
PIPELOCK_TIMEOUT_SEC = 5
# Hop-by-hop headers per RFC 7230 §6.1; should not be forwarded
# across a proxy. Lower-cased for case-insensitive comparison.
_HOP_BY_HOP = frozenset({
"connection",
"keep-alive",
"proxy-authenticate",
"proxy-authorization",
"te",
"trailers",
"transfer-encoding",
"upgrade",
})
log = logging.getLogger("pipelock-bridge")
def is_pipelock_block(status: int, body_bytes: bytes) -> bool:
"""Return True iff pipelock's response indicates the proxy itself
blocked (DLP / allowlist), distinguishing from a relayed upstream
4xx that pipelock happened to forward back.
Pipelock's block bodies are plain text starting with
`blocked: <reason>` and the status is always 403. A relayed
upstream response has whatever body the upstream sent —
extremely unlikely to begin with `blocked: `. Pinned empirically
against pipelock v2.3.0 in the impl spike (DLP block:
"blocked: request body contains secret: GitHub Token";
allowlist block: "blocked: domain not in allowlist: example.com").
Long-term cleanup: file an upstream feature request for an
`X-Pipelock-Verdict: block` response header so we can match on a
structured signal instead of pattern-matching the body."""
return status == 403 and body_bytes.startswith(b"blocked: ")
def _scan_via_pipelock(
pipelock_url: str,
method: str,
target_url: str,
headers: dict[str, str],
body: bytes,
) -> tuple[int, bytes]:
"""Forward the decrypted request to pipelock as a plain HTTP
forward-proxy call. Returns (status, body_bytes). Raises on
transport-level errors so the caller can fail closed.
The target URL is rewritten to http:// so pipelock receives an
absolute-URI forward-proxy request shape. Pipelock will scan,
then may attempt an upstream forward over plain HTTP — that
response is read back too, but the addon discards it on allow
(mitmproxy makes the real HTTPS request itself)."""
rewritten_url = target_url
if rewritten_url.startswith("https://"):
rewritten_url = "http://" + rewritten_url[len("https://"):]
forwarded_headers = {
k: v for k, v in headers.items()
if k.lower() not in _HOP_BY_HOP
}
proxy_handler = urllib.request.ProxyHandler({"http": pipelock_url})
opener = urllib.request.build_opener(proxy_handler)
req = urllib.request.Request(
url=rewritten_url,
data=body if body else None,
headers=forwarded_headers,
method=method,
)
try:
resp = opener.open(req, timeout=PIPELOCK_TIMEOUT_SEC)
return resp.status, resp.read()
except urllib.error.HTTPError as e:
return e.code, e.read()
class PipelockBridge:
"""mitmproxy addon class. mitmproxy instantiates one of these via
the `addons = [...]` module attribute at the bottom of this file."""
def __init__(self) -> None:
# Read once per sidecar lifetime. Empty string is allowed at
# construction (so the module can be imported in test
# environments) but the request handler fails closed if it's
# missing at request time.
self._pipelock_url = os.environ.get(PIPELOCK_URL_ENV, "")
def request(self, flow) -> None:
"""mitmproxy callback. Called for each decrypted client
request before mitmproxy forwards to the real upstream.
Setting flow.response short-circuits the flow with that
response; leaving it None lets mitmproxy proceed."""
# Late import so this module can be loaded in test
# environments without mitmproxy installed (the verdict
# function is unit-testable in isolation).
from mitmproxy import http
if not self._pipelock_url:
log.error("%s is unset; failing closed", PIPELOCK_URL_ENV)
flow.response = http.Response.make(
503,
b"egress scanner not configured",
{"Content-Type": "text/plain",
"X-Pipelock-Bridge": "misconfigured"},
)
return
target_url = flow.request.pretty_url
method = flow.request.method
headers = {k: v for k, v in flow.request.headers.items()}
body = bytes(flow.request.content or b"")
try:
status, response_body = _scan_via_pipelock(
self._pipelock_url, method, target_url, headers, body,
)
except Exception as e:
# Fail closed: scanner unreachable means no verdict, so
# refuse rather than risk leaking.
log.warning("pipelock unreachable; failing closed: %s", e)
flow.response = http.Response.make(
503,
b"egress scanner unreachable",
{"Content-Type": "text/plain",
"X-Pipelock-Bridge": "error"},
)
return
if is_pipelock_block(status, response_body):
flow.response = http.Response.make(
status,
response_body,
{"Content-Type": "text/plain",
"X-Pipelock-Bridge": "block"},
)
return
# Allow path: discard pipelock's response (it's the wasted
# upstream-forward attempt). Leave flow.response as None;
# mitmproxy proceeds to the real upstream on its own.
addons = [PipelockBridge()]
+62
View File
@@ -0,0 +1,62 @@
"""Unit: the addon's verdict function pinning pipelock-block vs.
relayed-upstream 4xx.
The fingerprint shape is the contract the addon depends on; this
test should break loudly if pipelock changes its 403-body prefix
under a version bump."""
from __future__ import annotations
import unittest
from claude_bottle.mitmproxy.addon import is_pipelock_block
class TestIsPipelockBlock(unittest.TestCase):
def test_block_dlp_body(self):
# Pipelock v2.3.0 DLP block, captured in the impl spike.
self.assertTrue(is_pipelock_block(
403,
b"blocked: request body contains secret: GitHub Token",
))
def test_block_allowlist_body(self):
# Pipelock v2.3.0 allowlist block, captured in the impl spike.
self.assertTrue(is_pipelock_block(
403,
b"blocked: domain not in allowlist: example.com",
))
def test_block_header_dlp_body(self):
# Header DLP path; same body prefix per the spike.
self.assertTrue(is_pipelock_block(
403,
b"blocked: request header Authorization contains secret",
))
def test_403_without_blocked_prefix_is_not_a_block(self):
# A real-upstream 403 relayed by pipelock — body is whatever
# the upstream sent, almost certainly not starting with
# `blocked: `. Must be treated as allow so the addon hands
# the flow back to mitmproxy.
self.assertFalse(is_pipelock_block(
403,
b'{"error":"forbidden","detail":"insufficient permissions"}',
))
def test_non_403_with_blocked_prefix_is_not_a_block(self):
# Defensive: if some intermediate ever returns 502/504 with
# a body that happens to begin `blocked: `, we should still
# not short-circuit. Block status is always 403 by contract.
self.assertFalse(is_pipelock_block(502, b"blocked: ..."))
def test_200_is_not_a_block(self):
# Allow path, normal forwarded response.
self.assertFalse(is_pipelock_block(200, b'{"ok":true}'))
def test_empty_body_is_not_a_block(self):
self.assertFalse(is_pipelock_block(403, b""))
if __name__ == "__main__":
unittest.main()