Files
bot-bottle/claude_bottle/mitmproxy/addon.py
T
didericis e579c3d4fd feat(mitmproxy): vendor the addon and Docker sidecar lifecycle
First step of PRD 0005. Three new files for the
mitmproxy-in-front-of-pipelock topology — wiring into the bottle
launch comes in the next commit.

- claude_bottle/mitmproxy/__init__.py: abstract MitmproxyProxy
  base + MitmproxyProxyPlan. Mirrors the PipelockProxy shape
  (prepare / start / stop) and adds extract_ca_cert for the CA
  cert hand-off into the agent.
- claude_bottle/mitmproxy/addon.py: the vendored Python addon
  mitmproxy loads inside the sidecar. Forwards each decrypted
  request to pipelock as a plain HTTP forward-proxy call,
  inspects the response, and short-circuits the flow with 403 on
  a pipelock block (status=403 + body starts with `blocked: `,
  pinned empirically against pipelock 2.3.0 in the impl spike).
  Self-contained — no claude_bottle imports — so it loads in a
  sidecar that doesn't have claude_bottle on its path.
- claude_bottle/backend/docker/mitmproxy.py: DockerMitmproxyProxy
  with create / cp / network connect / start lifecycle. Pinned
  to mitmproxy/mitmproxy@sha256:00b77b5d… (multi-arch manifest
  for v12.2.3).
- tests/unit/test_mitmproxy_verdict.py: pins the verdict
  fingerprint so a pipelock-side body shape change breaks loudly.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-12 13:32:36 -04:00

170 lines
6.1 KiB
Python

"""mitmproxy addon: forward each decrypted request to pipelock for
scanning, then either short-circuit with pipelock's 403 (block) or
let mitmproxy proceed to the real upstream (allow).
Loaded inside the mitmproxy sidecar container via `mitmdump -s ...`.
Must be self-contained — the sidecar image doesn't have claude_bottle
on its import path. Imports are limited to the Python stdlib plus
mitmproxy itself (which is the host).
Pipelock's URL is read from CLAUDE_BOTTLE_PIPELOCK_URL at startup
(injected by DockerMitmproxyProxy.start).
The verdict function `is_pipelock_block` is exported as a pure
function so unit tests can exercise it without importing mitmproxy.
"""
from __future__ import annotations
import logging
import os
import urllib.error
import urllib.request
PIPELOCK_URL_ENV = "CLAUDE_BOTTLE_PIPELOCK_URL"
PIPELOCK_TIMEOUT_SEC = 5
# Hop-by-hop headers per RFC 7230 §6.1; should not be forwarded
# across a proxy. Lower-cased for case-insensitive comparison.
_HOP_BY_HOP = frozenset({
"connection",
"keep-alive",
"proxy-authenticate",
"proxy-authorization",
"te",
"trailers",
"transfer-encoding",
"upgrade",
})
log = logging.getLogger("pipelock-bridge")
def is_pipelock_block(status: int, body_bytes: bytes) -> bool:
"""Return True iff pipelock's response indicates the proxy itself
blocked (DLP / allowlist), distinguishing from a relayed upstream
4xx that pipelock happened to forward back.
Pipelock's block bodies are plain text starting with
`blocked: <reason>` and the status is always 403. A relayed
upstream response has whatever body the upstream sent —
extremely unlikely to begin with `blocked: `. Pinned empirically
against pipelock v2.3.0 in the impl spike (DLP block:
"blocked: request body contains secret: GitHub Token";
allowlist block: "blocked: domain not in allowlist: example.com").
Long-term cleanup: file an upstream feature request for an
`X-Pipelock-Verdict: block` response header so we can match on a
structured signal instead of pattern-matching the body."""
return status == 403 and body_bytes.startswith(b"blocked: ")
def _scan_via_pipelock(
pipelock_url: str,
method: str,
target_url: str,
headers: dict[str, str],
body: bytes,
) -> tuple[int, bytes]:
"""Forward the decrypted request to pipelock as a plain HTTP
forward-proxy call. Returns (status, body_bytes). Raises on
transport-level errors so the caller can fail closed.
The target URL is rewritten to http:// so pipelock receives an
absolute-URI forward-proxy request shape. Pipelock will scan,
then may attempt an upstream forward over plain HTTP — that
response is read back too, but the addon discards it on allow
(mitmproxy makes the real HTTPS request itself)."""
rewritten_url = target_url
if rewritten_url.startswith("https://"):
rewritten_url = "http://" + rewritten_url[len("https://"):]
forwarded_headers = {
k: v for k, v in headers.items()
if k.lower() not in _HOP_BY_HOP
}
proxy_handler = urllib.request.ProxyHandler({"http": pipelock_url})
opener = urllib.request.build_opener(proxy_handler)
req = urllib.request.Request(
url=rewritten_url,
data=body if body else None,
headers=forwarded_headers,
method=method,
)
try:
resp = opener.open(req, timeout=PIPELOCK_TIMEOUT_SEC)
return resp.status, resp.read()
except urllib.error.HTTPError as e:
return e.code, e.read()
class PipelockBridge:
"""mitmproxy addon class. mitmproxy instantiates one of these via
the `addons = [...]` module attribute at the bottom of this file."""
def __init__(self) -> None:
# Read once per sidecar lifetime. Empty string is allowed at
# construction (so the module can be imported in test
# environments) but the request handler fails closed if it's
# missing at request time.
self._pipelock_url = os.environ.get(PIPELOCK_URL_ENV, "")
def request(self, flow) -> None:
"""mitmproxy callback. Called for each decrypted client
request before mitmproxy forwards to the real upstream.
Setting flow.response short-circuits the flow with that
response; leaving it None lets mitmproxy proceed."""
# Late import so this module can be loaded in test
# environments without mitmproxy installed (the verdict
# function is unit-testable in isolation).
from mitmproxy import http
if not self._pipelock_url:
log.error("%s is unset; failing closed", PIPELOCK_URL_ENV)
flow.response = http.Response.make(
503,
b"egress scanner not configured",
{"Content-Type": "text/plain",
"X-Pipelock-Bridge": "misconfigured"},
)
return
target_url = flow.request.pretty_url
method = flow.request.method
headers = {k: v for k, v in flow.request.headers.items()}
body = bytes(flow.request.content or b"")
try:
status, response_body = _scan_via_pipelock(
self._pipelock_url, method, target_url, headers, body,
)
except Exception as e:
# Fail closed: scanner unreachable means no verdict, so
# refuse rather than risk leaking.
log.warning("pipelock unreachable; failing closed: %s", e)
flow.response = http.Response.make(
503,
b"egress scanner unreachable",
{"Content-Type": "text/plain",
"X-Pipelock-Bridge": "error"},
)
return
if is_pipelock_block(status, response_body):
flow.response = http.Response.make(
status,
response_body,
{"Content-Type": "text/plain",
"X-Pipelock-Bridge": "block"},
)
return
# Allow path: discard pipelock's response (it's the wasted
# upstream-forward attempt). Leave flow.response as None;
# mitmproxy proceeds to the real upstream on its own.
addons = [PipelockBridge()]