feat(mitmproxy): vendor the addon and Docker sidecar lifecycle
First step of PRD 0005. Three new files for the mitmproxy-in-front-of-pipelock topology — wiring into the bottle launch comes in the next commit. - claude_bottle/mitmproxy/__init__.py: abstract MitmproxyProxy base + MitmproxyProxyPlan. Mirrors the PipelockProxy shape (prepare / start / stop) and adds extract_ca_cert for the CA cert hand-off into the agent. - claude_bottle/mitmproxy/addon.py: the vendored Python addon mitmproxy loads inside the sidecar. Forwards each decrypted request to pipelock as a plain HTTP forward-proxy call, inspects the response, and short-circuits the flow with 403 on a pipelock block (status=403 + body starts with `blocked: `, pinned empirically against pipelock 2.3.0 in the impl spike). Self-contained — no claude_bottle imports — so it loads in a sidecar that doesn't have claude_bottle on its path. - claude_bottle/backend/docker/mitmproxy.py: DockerMitmproxyProxy with create / cp / network connect / start lifecycle. Pinned to mitmproxy/mitmproxy@sha256:00b77b5d… (multi-arch manifest for v12.2.3). - tests/unit/test_mitmproxy_verdict.py: pins the verdict fingerprint so a pipelock-side body shape change breaks loudly. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,81 @@
|
||||
"""mitmproxy TLS-interception sidecar for the per-bottle egress
|
||||
topology (PRD 0005).
|
||||
|
||||
Sits in front of pipelock on the bottle's egress path so pipelock's
|
||||
body / header / URL DLP scanners see plaintext for HTTPS targets.
|
||||
The sidecar runs in mitmproxy's `regular` mode and loads the
|
||||
vendored addon at `addon.py`; the addon forwards each decrypted
|
||||
request to pipelock as a plain HTTP forward-proxy call and gates
|
||||
the mitmproxy flow on pipelock's verdict.
|
||||
|
||||
This module is platform-agnostic: it owns the abstract proxy
|
||||
lifecycle (prepare / start / stop / extract_ca_cert). The
|
||||
Docker-specific lifecycle lives in
|
||||
`claude_bottle/backend/docker/mitmproxy.py`.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class MitmproxyProxyPlan:
|
||||
"""Output of MitmproxyProxy.prepare; consumed by .start when the
|
||||
sidecar needs to be brought up.
|
||||
|
||||
`addon_src` is the host-side path to the vendored addon.py,
|
||||
resolved at prepare time. `slug` is the per-agent identifier
|
||||
used as the suffix in every per-bottle resource name. The
|
||||
network fields default to empty and are populated by the
|
||||
backend's launch step (via dataclasses.replace) once those
|
||||
networks have actually been created — same pattern as
|
||||
PipelockProxyPlan."""
|
||||
|
||||
addon_src: Path
|
||||
slug: str
|
||||
internal_network: str = ""
|
||||
egress_network: str = ""
|
||||
|
||||
|
||||
class MitmproxyProxy(ABC):
|
||||
"""The mitmproxy TLS-interception sidecar. The proxy-config + addon
|
||||
bundling are platform-agnostic; the sidecar's start/stop lifecycle
|
||||
and the CA extraction step are backend-specific and live on
|
||||
concrete subclasses."""
|
||||
|
||||
def prepare(self, slug: str) -> MitmproxyProxyPlan:
|
||||
"""Locate the vendored addon source and return the start
|
||||
plan. The addon is checked into the project and identical
|
||||
across bottles; per-bottle wiring (pipelock URL) is injected
|
||||
via env vars at start time, not via a generated config."""
|
||||
addon_src = Path(__file__).resolve().parent / "addon.py"
|
||||
if not addon_src.is_file():
|
||||
raise FileNotFoundError(
|
||||
f"mitmproxy addon not found at {addon_src}; the "
|
||||
f"package was installed incompletely"
|
||||
)
|
||||
return MitmproxyProxyPlan(addon_src=addon_src, slug=slug)
|
||||
|
||||
@abstractmethod
|
||||
def start(self, plan: MitmproxyProxyPlan, *, pipelock_url: str) -> str:
|
||||
"""Bring up the mitmproxy sidecar according to `plan`.
|
||||
`pipelock_url` is injected into the sidecar's env (as
|
||||
CLAUDE_BOTTLE_PIPELOCK_URL) so the addon knows where to
|
||||
scan. Returns the proxy_target string identifying the
|
||||
running sidecar — the same value to pass to `.stop` and
|
||||
`.extract_ca_cert`."""
|
||||
|
||||
@abstractmethod
|
||||
def stop(self, proxy_target: str) -> None:
|
||||
"""Tear down the sidecar identified by `proxy_target`.
|
||||
Idempotent: a missing target is success."""
|
||||
|
||||
@abstractmethod
|
||||
def extract_ca_cert(self, proxy_target: str, dest_path: Path) -> None:
|
||||
"""Copy the public CA cert from the running sidecar to
|
||||
`dest_path` on the host. Polls the sidecar for the cert
|
||||
file to appear (mitmproxy generates the CA on first launch).
|
||||
The private key never leaves the sidecar."""
|
||||
@@ -0,0 +1,169 @@
|
||||
"""mitmproxy addon: forward each decrypted request to pipelock for
|
||||
scanning, then either short-circuit with pipelock's 403 (block) or
|
||||
let mitmproxy proceed to the real upstream (allow).
|
||||
|
||||
Loaded inside the mitmproxy sidecar container via `mitmdump -s ...`.
|
||||
Must be self-contained — the sidecar image doesn't have claude_bottle
|
||||
on its import path. Imports are limited to the Python stdlib plus
|
||||
mitmproxy itself (which is the host).
|
||||
|
||||
Pipelock's URL is read from CLAUDE_BOTTLE_PIPELOCK_URL at startup
|
||||
(injected by DockerMitmproxyProxy.start).
|
||||
|
||||
The verdict function `is_pipelock_block` is exported as a pure
|
||||
function so unit tests can exercise it without importing mitmproxy.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
|
||||
|
||||
PIPELOCK_URL_ENV = "CLAUDE_BOTTLE_PIPELOCK_URL"
|
||||
PIPELOCK_TIMEOUT_SEC = 5
|
||||
|
||||
# Hop-by-hop headers per RFC 7230 §6.1; should not be forwarded
|
||||
# across a proxy. Lower-cased for case-insensitive comparison.
|
||||
_HOP_BY_HOP = frozenset({
|
||||
"connection",
|
||||
"keep-alive",
|
||||
"proxy-authenticate",
|
||||
"proxy-authorization",
|
||||
"te",
|
||||
"trailers",
|
||||
"transfer-encoding",
|
||||
"upgrade",
|
||||
})
|
||||
|
||||
log = logging.getLogger("pipelock-bridge")
|
||||
|
||||
|
||||
def is_pipelock_block(status: int, body_bytes: bytes) -> bool:
|
||||
"""Return True iff pipelock's response indicates the proxy itself
|
||||
blocked (DLP / allowlist), distinguishing from a relayed upstream
|
||||
4xx that pipelock happened to forward back.
|
||||
|
||||
Pipelock's block bodies are plain text starting with
|
||||
`blocked: <reason>` and the status is always 403. A relayed
|
||||
upstream response has whatever body the upstream sent —
|
||||
extremely unlikely to begin with `blocked: `. Pinned empirically
|
||||
against pipelock v2.3.0 in the impl spike (DLP block:
|
||||
"blocked: request body contains secret: GitHub Token";
|
||||
allowlist block: "blocked: domain not in allowlist: example.com").
|
||||
|
||||
Long-term cleanup: file an upstream feature request for an
|
||||
`X-Pipelock-Verdict: block` response header so we can match on a
|
||||
structured signal instead of pattern-matching the body."""
|
||||
return status == 403 and body_bytes.startswith(b"blocked: ")
|
||||
|
||||
|
||||
def _scan_via_pipelock(
|
||||
pipelock_url: str,
|
||||
method: str,
|
||||
target_url: str,
|
||||
headers: dict[str, str],
|
||||
body: bytes,
|
||||
) -> tuple[int, bytes]:
|
||||
"""Forward the decrypted request to pipelock as a plain HTTP
|
||||
forward-proxy call. Returns (status, body_bytes). Raises on
|
||||
transport-level errors so the caller can fail closed.
|
||||
|
||||
The target URL is rewritten to http:// so pipelock receives an
|
||||
absolute-URI forward-proxy request shape. Pipelock will scan,
|
||||
then may attempt an upstream forward over plain HTTP — that
|
||||
response is read back too, but the addon discards it on allow
|
||||
(mitmproxy makes the real HTTPS request itself)."""
|
||||
rewritten_url = target_url
|
||||
if rewritten_url.startswith("https://"):
|
||||
rewritten_url = "http://" + rewritten_url[len("https://"):]
|
||||
|
||||
forwarded_headers = {
|
||||
k: v for k, v in headers.items()
|
||||
if k.lower() not in _HOP_BY_HOP
|
||||
}
|
||||
|
||||
proxy_handler = urllib.request.ProxyHandler({"http": pipelock_url})
|
||||
opener = urllib.request.build_opener(proxy_handler)
|
||||
req = urllib.request.Request(
|
||||
url=rewritten_url,
|
||||
data=body if body else None,
|
||||
headers=forwarded_headers,
|
||||
method=method,
|
||||
)
|
||||
try:
|
||||
resp = opener.open(req, timeout=PIPELOCK_TIMEOUT_SEC)
|
||||
return resp.status, resp.read()
|
||||
except urllib.error.HTTPError as e:
|
||||
return e.code, e.read()
|
||||
|
||||
|
||||
class PipelockBridge:
|
||||
"""mitmproxy addon class. mitmproxy instantiates one of these via
|
||||
the `addons = [...]` module attribute at the bottom of this file."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
# Read once per sidecar lifetime. Empty string is allowed at
|
||||
# construction (so the module can be imported in test
|
||||
# environments) but the request handler fails closed if it's
|
||||
# missing at request time.
|
||||
self._pipelock_url = os.environ.get(PIPELOCK_URL_ENV, "")
|
||||
|
||||
def request(self, flow) -> None:
|
||||
"""mitmproxy callback. Called for each decrypted client
|
||||
request before mitmproxy forwards to the real upstream.
|
||||
Setting flow.response short-circuits the flow with that
|
||||
response; leaving it None lets mitmproxy proceed."""
|
||||
# Late import so this module can be loaded in test
|
||||
# environments without mitmproxy installed (the verdict
|
||||
# function is unit-testable in isolation).
|
||||
from mitmproxy import http
|
||||
|
||||
if not self._pipelock_url:
|
||||
log.error("%s is unset; failing closed", PIPELOCK_URL_ENV)
|
||||
flow.response = http.Response.make(
|
||||
503,
|
||||
b"egress scanner not configured",
|
||||
{"Content-Type": "text/plain",
|
||||
"X-Pipelock-Bridge": "misconfigured"},
|
||||
)
|
||||
return
|
||||
|
||||
target_url = flow.request.pretty_url
|
||||
method = flow.request.method
|
||||
headers = {k: v for k, v in flow.request.headers.items()}
|
||||
body = bytes(flow.request.content or b"")
|
||||
|
||||
try:
|
||||
status, response_body = _scan_via_pipelock(
|
||||
self._pipelock_url, method, target_url, headers, body,
|
||||
)
|
||||
except Exception as e:
|
||||
# Fail closed: scanner unreachable means no verdict, so
|
||||
# refuse rather than risk leaking.
|
||||
log.warning("pipelock unreachable; failing closed: %s", e)
|
||||
flow.response = http.Response.make(
|
||||
503,
|
||||
b"egress scanner unreachable",
|
||||
{"Content-Type": "text/plain",
|
||||
"X-Pipelock-Bridge": "error"},
|
||||
)
|
||||
return
|
||||
|
||||
if is_pipelock_block(status, response_body):
|
||||
flow.response = http.Response.make(
|
||||
status,
|
||||
response_body,
|
||||
{"Content-Type": "text/plain",
|
||||
"X-Pipelock-Bridge": "block"},
|
||||
)
|
||||
return
|
||||
|
||||
# Allow path: discard pipelock's response (it's the wasted
|
||||
# upstream-forward attempt). Leave flow.response as None;
|
||||
# mitmproxy proceeds to the real upstream on its own.
|
||||
|
||||
|
||||
addons = [PipelockBridge()]
|
||||
Reference in New Issue
Block a user