e579c3d4fd
First step of PRD 0005. Three new files for the mitmproxy-in-front-of-pipelock topology — wiring into the bottle launch comes in the next commit. - claude_bottle/mitmproxy/__init__.py: abstract MitmproxyProxy base + MitmproxyProxyPlan. Mirrors the PipelockProxy shape (prepare / start / stop) and adds extract_ca_cert for the CA cert hand-off into the agent. - claude_bottle/mitmproxy/addon.py: the vendored Python addon mitmproxy loads inside the sidecar. Forwards each decrypted request to pipelock as a plain HTTP forward-proxy call, inspects the response, and short-circuits the flow with 403 on a pipelock block (status=403 + body starts with `blocked: `, pinned empirically against pipelock 2.3.0 in the impl spike). Self-contained — no claude_bottle imports — so it loads in a sidecar that doesn't have claude_bottle on its path. - claude_bottle/backend/docker/mitmproxy.py: DockerMitmproxyProxy with create / cp / network connect / start lifecycle. Pinned to mitmproxy/mitmproxy@sha256:00b77b5d… (multi-arch manifest for v12.2.3). - tests/unit/test_mitmproxy_verdict.py: pins the verdict fingerprint so a pipelock-side body shape change breaks loudly. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
170 lines
6.1 KiB
Python
170 lines
6.1 KiB
Python
"""mitmproxy addon: forward each decrypted request to pipelock for
|
|
scanning, then either short-circuit with pipelock's 403 (block) or
|
|
let mitmproxy proceed to the real upstream (allow).
|
|
|
|
Loaded inside the mitmproxy sidecar container via `mitmdump -s ...`.
|
|
Must be self-contained — the sidecar image doesn't have claude_bottle
|
|
on its import path. Imports are limited to the Python stdlib plus
|
|
mitmproxy itself (which is the host).
|
|
|
|
Pipelock's URL is read from CLAUDE_BOTTLE_PIPELOCK_URL at startup
|
|
(injected by DockerMitmproxyProxy.start).
|
|
|
|
The verdict function `is_pipelock_block` is exported as a pure
|
|
function so unit tests can exercise it without importing mitmproxy.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import os
|
|
import urllib.error
|
|
import urllib.request
|
|
|
|
|
|
PIPELOCK_URL_ENV = "CLAUDE_BOTTLE_PIPELOCK_URL"
|
|
PIPELOCK_TIMEOUT_SEC = 5
|
|
|
|
# Hop-by-hop headers per RFC 7230 §6.1; should not be forwarded
|
|
# across a proxy. Lower-cased for case-insensitive comparison.
|
|
_HOP_BY_HOP = frozenset({
|
|
"connection",
|
|
"keep-alive",
|
|
"proxy-authenticate",
|
|
"proxy-authorization",
|
|
"te",
|
|
"trailers",
|
|
"transfer-encoding",
|
|
"upgrade",
|
|
})
|
|
|
|
log = logging.getLogger("pipelock-bridge")
|
|
|
|
|
|
def is_pipelock_block(status: int, body_bytes: bytes) -> bool:
|
|
"""Return True iff pipelock's response indicates the proxy itself
|
|
blocked (DLP / allowlist), distinguishing from a relayed upstream
|
|
4xx that pipelock happened to forward back.
|
|
|
|
Pipelock's block bodies are plain text starting with
|
|
`blocked: <reason>` and the status is always 403. A relayed
|
|
upstream response has whatever body the upstream sent —
|
|
extremely unlikely to begin with `blocked: `. Pinned empirically
|
|
against pipelock v2.3.0 in the impl spike (DLP block:
|
|
"blocked: request body contains secret: GitHub Token";
|
|
allowlist block: "blocked: domain not in allowlist: example.com").
|
|
|
|
Long-term cleanup: file an upstream feature request for an
|
|
`X-Pipelock-Verdict: block` response header so we can match on a
|
|
structured signal instead of pattern-matching the body."""
|
|
return status == 403 and body_bytes.startswith(b"blocked: ")
|
|
|
|
|
|
def _scan_via_pipelock(
|
|
pipelock_url: str,
|
|
method: str,
|
|
target_url: str,
|
|
headers: dict[str, str],
|
|
body: bytes,
|
|
) -> tuple[int, bytes]:
|
|
"""Forward the decrypted request to pipelock as a plain HTTP
|
|
forward-proxy call. Returns (status, body_bytes). Raises on
|
|
transport-level errors so the caller can fail closed.
|
|
|
|
The target URL is rewritten to http:// so pipelock receives an
|
|
absolute-URI forward-proxy request shape. Pipelock will scan,
|
|
then may attempt an upstream forward over plain HTTP — that
|
|
response is read back too, but the addon discards it on allow
|
|
(mitmproxy makes the real HTTPS request itself)."""
|
|
rewritten_url = target_url
|
|
if rewritten_url.startswith("https://"):
|
|
rewritten_url = "http://" + rewritten_url[len("https://"):]
|
|
|
|
forwarded_headers = {
|
|
k: v for k, v in headers.items()
|
|
if k.lower() not in _HOP_BY_HOP
|
|
}
|
|
|
|
proxy_handler = urllib.request.ProxyHandler({"http": pipelock_url})
|
|
opener = urllib.request.build_opener(proxy_handler)
|
|
req = urllib.request.Request(
|
|
url=rewritten_url,
|
|
data=body if body else None,
|
|
headers=forwarded_headers,
|
|
method=method,
|
|
)
|
|
try:
|
|
resp = opener.open(req, timeout=PIPELOCK_TIMEOUT_SEC)
|
|
return resp.status, resp.read()
|
|
except urllib.error.HTTPError as e:
|
|
return e.code, e.read()
|
|
|
|
|
|
class PipelockBridge:
|
|
"""mitmproxy addon class. mitmproxy instantiates one of these via
|
|
the `addons = [...]` module attribute at the bottom of this file."""
|
|
|
|
def __init__(self) -> None:
|
|
# Read once per sidecar lifetime. Empty string is allowed at
|
|
# construction (so the module can be imported in test
|
|
# environments) but the request handler fails closed if it's
|
|
# missing at request time.
|
|
self._pipelock_url = os.environ.get(PIPELOCK_URL_ENV, "")
|
|
|
|
def request(self, flow) -> None:
|
|
"""mitmproxy callback. Called for each decrypted client
|
|
request before mitmproxy forwards to the real upstream.
|
|
Setting flow.response short-circuits the flow with that
|
|
response; leaving it None lets mitmproxy proceed."""
|
|
# Late import so this module can be loaded in test
|
|
# environments without mitmproxy installed (the verdict
|
|
# function is unit-testable in isolation).
|
|
from mitmproxy import http
|
|
|
|
if not self._pipelock_url:
|
|
log.error("%s is unset; failing closed", PIPELOCK_URL_ENV)
|
|
flow.response = http.Response.make(
|
|
503,
|
|
b"egress scanner not configured",
|
|
{"Content-Type": "text/plain",
|
|
"X-Pipelock-Bridge": "misconfigured"},
|
|
)
|
|
return
|
|
|
|
target_url = flow.request.pretty_url
|
|
method = flow.request.method
|
|
headers = {k: v for k, v in flow.request.headers.items()}
|
|
body = bytes(flow.request.content or b"")
|
|
|
|
try:
|
|
status, response_body = _scan_via_pipelock(
|
|
self._pipelock_url, method, target_url, headers, body,
|
|
)
|
|
except Exception as e:
|
|
# Fail closed: scanner unreachable means no verdict, so
|
|
# refuse rather than risk leaking.
|
|
log.warning("pipelock unreachable; failing closed: %s", e)
|
|
flow.response = http.Response.make(
|
|
503,
|
|
b"egress scanner unreachable",
|
|
{"Content-Type": "text/plain",
|
|
"X-Pipelock-Bridge": "error"},
|
|
)
|
|
return
|
|
|
|
if is_pipelock_block(status, response_body):
|
|
flow.response = http.Response.make(
|
|
status,
|
|
response_body,
|
|
{"Content-Type": "text/plain",
|
|
"X-Pipelock-Bridge": "block"},
|
|
)
|
|
return
|
|
|
|
# Allow path: discard pipelock's response (it's the wasted
|
|
# upstream-forward attempt). Leave flow.response as None;
|
|
# mitmproxy proceeds to the real upstream on its own.
|
|
|
|
|
|
addons = [PipelockBridge()]
|