From fe712490053956655351101195114652ff8fa7f1 Mon Sep 17 00:00:00 2001 From: didericis Date: Tue, 12 May 2026 12:20:24 -0400 Subject: [PATCH 1/6] docs(prd): add 0005 mitmproxy TLS interception Captures the design for putting a mitmproxy sidecar in front of pipelock on the egress path so pipelock's body / header / MCP scanners see plaintext for the HTTPS hosts in the default allowlist. Implements Topology A from docs/research/tls-mitm-for-pipelock.md with a per-bottle ephemeral CA, no manifest schema change in v1, and selective-bumping deferred until a pinning host appears. Co-Authored-By: Claude Opus 4.7 --- docs/prds/0005-mitmproxy-tls-interception.md | 371 +++++++++++++++++++ 1 file changed, 371 insertions(+) create mode 100644 docs/prds/0005-mitmproxy-tls-interception.md diff --git a/docs/prds/0005-mitmproxy-tls-interception.md b/docs/prds/0005-mitmproxy-tls-interception.md new file mode 100644 index 0000000..e0f3d95 --- /dev/null +++ b/docs/prds/0005-mitmproxy-tls-interception.md @@ -0,0 +1,371 @@ +# PRD 0005: mitmproxy TLS interception for pipelock content scanning + +- **Status:** Draft +- **Author:** didericis +- **Created:** 2026-05-12 + +## Summary + +Add a per-bottle **mitmproxy** sidecar in front of pipelock on the +egress path so pipelock's DLP, subdomain-entropy, and MCP scanners +fire on the plaintext bodies of HTTPS requests instead of only the +opaque ciphertext that follows a `CONNECT`. mitmproxy terminates the +agent's TLS, hands plaintext HTTP to pipelock as an upstream +forward proxy, and re-establishes TLS to the real destination. A +fresh ephemeral CA is minted per bottle; the CA private key never +leaves the sidecar, and the public cert is wired into the agent +container's trust store at launch. + +## Problem + +PRD 0001 wired pipelock onto every bottle's egress, but the current +topology only sees `CONNECT` hostnames and opaque TLS bytes: + +``` +agent --HTTPS_PROXY--> pipelock --CONNECT host:443--> internet + \____________________________ + opaque TLS bytes +``` + +What pipelock cannot scan in this mode is documented in +`docs/research/tls-mitm-for-pipelock.md` §What pipelock cannot see +today: request URLs and methods, request and response headers, +request and response bodies, MCP JSON-RPC payloads, inner-vs-outer +hostname (the domain-fronting check), and WebSocket frames inside a +TLS-wrapped upgrade. The 48-pattern DLP layer this project relies on +in PRD 0001 is therefore inert against every host in the current +`DEFAULT_ALLOWLIST` — all of which are HTTPS-only. + +The integration test added in `tests/integration/test_pipelock_blocks_secret_post.py` +demonstrates the gap concretely: pipelock's body-scan layer only +fires when the agent is forced to send plain HTTP. Real Claude Code +traffic to `api.anthropic.com` goes over CONNECT-tunneled TLS and +slips past the scanner. + +`pipelock-assessment.md` §Scope gaps names this as a known +limitation of the proxy-without-TLS-inspection shape. Closing it is +the explicit motivation for `tls-mitm-for-pipelock.md`, whose +recommendation this PRD implements. + +## Goals / Success Criteria + +The feature works when all of the following are observable: + +- A Node request from inside a launched bottle to a CONNECT-bumped + HTTPS host (e.g. `https://api.anthropic.com/dlp-probe`) carrying a + pipelock-recognized credential pattern in the body returns 403 from + the proxy, not a response from the upstream. The existing + `test_pipelock_blocks_secret_post` test path becomes the HTTPS + variant of this assertion. +- Claude Code itself reaches `api.anthropic.com` end-to-end through + the bottle and completes a chat round-trip. No TLS-trust errors + in the agent process. +- mitmproxy's TLS-handshake log lines and pipelock's `body_dlp` + event lines both appear for the same outbound request, confirming + the two-stage path is active. + +The feature is **done** when all of the following ship: + +- A new `MitmproxyProxy` class with the same `prepare` / `start` / + `stop` lifecycle shape as `PipelockProxy`, wired into the Docker + backend's launch step. +- The bottle launch step generates a per-bottle ephemeral CA in + `stage_dir`, starts the mitmproxy sidecar with that CA on the + per-bottle internal network, copies the CA public cert into the + agent container's trust store, and points the agent's + `HTTPS_PROXY` / `HTTP_PROXY` at mitmproxy. +- mitmproxy's upstream is the existing pipelock sidecar; pipelock + sees plaintext HTTP from mitmproxy for every previously-HTTPS + request. +- On bottle teardown the mitmproxy sidecar is removed and the + ephemeral CA private key is gone with it. +- An integration test (variant of `test_pipelock_blocks_secret_post`) + proves pipelock now blocks a credential POST that goes out over + HTTPS rather than plain HTTP. +- An integration test proves a non-credential HTTPS request to an + allowlisted host (e.g. CONNECT-then-GET on `raw.githubusercontent.com`) + succeeds end-to-end with mitmproxy in the path (no TLS-trust + errors, response body received). +- The dry-run preflight (`start --dry-run`) shows the mitmproxy + sidecar in both the text and `--format=json` output alongside the + existing pipelock entry. + +## Non-goals + +- **Topology C** — extending pipelock itself to terminate TLS. That + is the cleanest long-term shape per the research note's + recommendation but is substantial Go work and hits the + Apache-2.0-vs-ELv2 question. Deferred. +- **Topology D** — driving mitmproxy with a pipelock `/scan` HTTP + endpoint. Requires a pipelock surface that doesn't exist today. + Deferred. +- **Persistent or shared CA across bottles.** Each bottle gets a + fresh CA generated at start and destroyed at teardown. No CA + storage on the host, no cross-bottle reuse. +- **Selective bumping ("ignore_hosts") as a v1 manifest field.** + v1 bumps every CONNECT. If a future allowlisted host turns out to + pin (Mobile / Chromium-style cert pinning), a follow-up PRD adds + the per-host opt-out — likely a `bottle.egress.tls_bump_ignore` + field. See Open questions. +- **HTTP/3 / QUIC.** mitmproxy's HTTP/3 support is experimental. + v1 relies on the v1-egress iptables layer (separate PRD) blocking + UDP/443 to force clients onto HTTP/2 over TCP, which mitmproxy + inspects normally. +- **Raw TCP / non-HTTP TLS interception.** mitmproxy supports it + via `--mode reverse:`, not in CONNECT-bump mode. SSH and any + future raw-TCP egress route around mitmproxy entirely. +- **Trust-store rewiring for non-Debian agent base images.** The + current `Dockerfile` is `node:22-slim` (Debian). If a future base + switches to Red-Hat-family, the `update-ca-certificates` step + becomes `update-ca-trust`. Out of scope until the base changes. + +## Scope + +### In scope + +- New `claude_bottle/mitmproxy.py` mirroring `claude_bottle/pipelock.py`: + config helpers (no backend-specific Docker calls), the + `MitmproxyProxy` abstract class, and the per-bottle CA generation + helpers. +- New `claude_bottle/backend/docker/mitmproxy.py` mirroring + `claude_bottle/backend/docker/pipelock.py`: `DockerMitmproxyProxy` + with the Docker-specific `start` / `stop` lifecycle, the sidecar + container name scheme, and the image pin. +- New provisioner: `claude_bottle/backend/docker/provision/ca.py`, + installing the CA public cert into the agent container at + `/usr/local/share/ca-certificates/claude-bottle-mitm.crt`, running + `update-ca-certificates`, and exporting `NODE_EXTRA_CA_CERTS` / + `SSL_CERT_FILE` / `REQUESTS_CA_BUNDLE` env vars to the agent + process. The provisioner runs from `BottleBackend.provision` in + the same orchestration as `prompt`, `skills`, `ssh`, `git`. +- Per-agent network reshuffle in `DockerBottleBackend.launch`: + - internal network is unchanged (mitmproxy + pipelock + agent) + - agent's `HTTPS_PROXY` / `HTTP_PROXY` change from pointing at the + pipelock service name to the mitmproxy service name + - mitmproxy's `upstream_proxy` config points at the pipelock + service name on the internal network +- `DockerBottlePlan` grows a `mitmproxy_plan` field analogous to the + existing `proxy_plan` (the pipelock one) so prepare-time state + rides on the plan. +- Dry-run preflight (`start --dry-run` text + JSON) renders the + mitmproxy line and surfaces the CA fingerprint shown in the + bottle's trust store, so the operator can verify what's been + installed. +- Two new integration tests under `tests/integration/`: + - `test_mitmproxy_blocks_secret_https_post.py` — the HTTPS + variant of the existing `_blocks_secret_post` test. + - `test_mitmproxy_allows_normal_https.py` — confirms a plain + HTTPS GET to a non-credential-bearing path through mitmproxy + + pipelock returns the upstream response, asserting no trust / + handshake breakage. +- Unit tests for the new config builder (mirroring the pipelock + YAML unit tests) and for the CA generation helper. + +### Out of scope + +- The v1 iptables + dnsmasq layer (separate PRD; see + `network-egress-guard.md`). mitmproxy covers HTTP/HTTPS only. + Raw TCP, UDP, ICMP, and direct DNS still need the IP-level layer. +- Pipelock config changes. Pipelock continues to load the YAML PRD + 0001 already generates. mitmproxy is opaque to it; pipelock just + sees plain HTTP from a forward-proxy client. +- A bottle-level toggle to skip mitmproxy entirely. v1 always wires + it in. If a use case appears for an unintercepted bottle + (e.g. testing pipelock's CONNECT-mode behavior in isolation), + that's a follow-up. +- Pinning-host detection automation. The cost of finding out (per + the research note) is a single 5-minute test before adding a + host; it stays a manual step. + +## Proposed Design + +### Topology + +``` +agent --HTTPS_PROXY--> mitmproxy --HTTP_PROXY--> pipelock --> internet + (bump TLS) (scan plain) (real TLS) +``` + +All three containers live on the same per-bottle internal Docker +network. mitmproxy and pipelock are both attached to the per-bottle +egress bridge so they can reach the host network; the agent has no +default route, exactly as today. + +Concretely: + +- `agent` sets `HTTPS_PROXY=http://claude-bottle-mitm-:`. + Currently this points at `claude-bottle-pipelock-`. The + hostname swap is the only agent-side env change. +- `mitmproxy` runs with `--mode upstream:http://claude-bottle-pipelock-:` + so its decrypted plaintext is forwarded to pipelock as a regular + upstream forward-proxy request. (Research open question #1 calls + this out: mitmproxy 10+ documentation says `upstream` mode forwards + the original request shape; verify against the pinned version at + implementation time. If forwarding wraps a new CONNECT, fall back + to `regular` mode with a chained proxy declared in mitmproxy's + config and route plain HTTP to pipelock by hand.) +- `pipelock` continues to listen on its existing port and receives + plain HTTP from mitmproxy. No pipelock config change. + +### New components + +Two new modules, matching PRD 0001's split between +backend-agnostic config and backend-specific lifecycle: + +- **`claude_bottle/mitmproxy.py`** — backend-agnostic. The config + builder (mitmproxy YAML / TOML — confirm format), the abstract + `MitmproxyProxy` class with `prepare(...)` writing the config and + the ephemeral CA into `stage_dir`, the CA generation helper + (RSA-2048 or ECDSA-P256 — pick at impl time, research suggests + ECDSA for cert-gen speed), and constants for the sidecar's + internal-network port and image pin. +- **`claude_bottle/backend/docker/mitmproxy.py`** — Docker + implementation. `DockerMitmproxyProxy(MitmproxyProxy)` with + `start(plan)` doing `docker create` / `docker cp` / `docker + network connect` / `docker start` analogous to + `DockerPipelockProxy.start`. `stop(target)` removes the sidecar + idempotently. + +The provisioner that installs the CA cert into the agent's trust +store lives at `claude_bottle/backend/docker/provision/ca.py` and +plugs into the existing `BottleBackend.provision` orchestration. The +abstract `BottleBackend.provision_ca` method joins +`provision_prompt` / `provision_skills` / `provision_ssh` / +`provision_git` on the base class (PRD 0004's pattern), with a +default no-op implementation so other backends don't break when +they don't yet implement it. + +### CA lifecycle + +Per `tls-mitm-for-pipelock.md` §CA lifecycle: + +- **Generation.** Host-side in `MitmproxyProxy.prepare`, written to + `stage_dir/mitm-ca.key` (mode 600) and `stage_dir/mitm-ca.crt` + (mode 644). The `.key` is copied into the mitmproxy container at + start; nothing else touches it. +- **Bottle injection.** `provision_ca` copies only the public + `.crt` into the agent container at + `/usr/local/share/ca-certificates/claude-bottle-mitm.crt`, runs + `update-ca-certificates` as root inside the container, and sets + `NODE_EXTRA_CA_CERTS=/usr/local/share/ca-certificates/claude-bottle-mitm.crt`, + `SSL_CERT_FILE`, and `REQUESTS_CA_BUNDLE` for the agent process. + Belt-and-suspenders because some libraries honor only env vars. +- **Teardown.** The mitmproxy sidecar container is destroyed; the + CA key vanishes with it. Nothing persists on the host outside + `stage_dir`, which the start command already deletes in its + finally block. +- **Cost.** ECDSA-P256 CA + per-host leaf generation runs in + milliseconds; the per-bottle Docker pull and network plumbing + dominate startup time. + +### Data model changes + +None in v1. The manifest schema is unchanged. mitmproxy is always +on for every bottle once this PRD ships. + +A future selective-bump knob (per `tls-mitm-for-pipelock.md` open +question #5) would land on `bottle.egress.tls_bump_ignore` as a +list of hostnames. The shape mirrors `egress.allowlist`. Adding it +later is a strictly additive change. + +### Existing code touched + +- **`claude_bottle/backend/docker/launch.py`** — bring up the + mitmproxy sidecar after the pipelock sidecar but before the agent + container, repoint the agent's `HTTPS_PROXY` / `HTTP_PROXY` env + flags, register an `ExitStack` callback to stop mitmproxy on + teardown. +- **`claude_bottle/backend/docker/prepare.py`** — call into + `MitmproxyProxy.prepare(...)` alongside the existing + `PipelockProxy.prepare(...)`, populate + `DockerBottlePlan.mitmproxy_plan`. +- **`claude_bottle/backend/docker/backend.py`** — add the + `DockerMitmproxyProxy` instance attribute (`self._mitm`) and + thread it through `launch` + cleanup, mirroring the existing + `self._proxy` pattern. +- **`claude_bottle/backend/docker/bottle_plan.py`** — new + `mitmproxy_plan: MitmproxyProxyPlan` field on + `DockerBottlePlan`. `print()` and `to_dict()` learn to render it. +- **`claude_bottle/backend/__init__.py`** — abstract + `BottleBackend.provision_ca(plan, target)` joins the other four + provisioners. Default impl is a no-op (so a future fly backend + isn't forced to implement TLS interception in v1). +- **`tests/integration/`** — two new tests as described above. +- **`tests/unit/`** — config-builder unit tests; CA-helper unit + tests; updated dry-run-plan test pinning the mitmproxy entry. + +### External dependencies + +- **mitmproxy Docker image** pulled from + `mitmproxy/mitmproxy@sha256:`. The digest is pinned in + `claude_bottle/mitmproxy.py` and bumped deliberately, mirroring + the pipelock pin. Tag line `mitmproxy/mitmproxy:11.x` per + research §Image pin for mitmproxy. +- No new host-side runtimes. CA generation uses Python's `cryptography` + if it's already a transitive dep; otherwise use `openssl` shelled + out from the host-side prepare step. Decide at impl time after + confirming what's available on the runner without adding deps. + +## Open questions + +- **mitmproxy upstream-proxy mode mechanics.** Whether `upstream` + mode forwards decrypted plaintext to pipelock or re-wraps it in a + CONNECT. Documented behavior changed between mitmproxy 8 and 10. + Needs verification against the pinned version at impl time. If + `upstream` re-wraps, fall back to `regular` mode plus a chained + proxy directive routing plain HTTP to pipelock. +- **Pipelock plain-HTTP scanning coverage.** Pipelock's + `forward_proxy.enabled: true` accepts both `GET http://…` and + `CONNECT host:443`. Confirm by reading + `github.com/luckyPipewrench/pipelock/blob/main/docs/configuration.md` + that the full DLP / MCP / subdomain-entropy pipeline runs on the + HTTP path; some pipelock layers may be gated on CONNECT only. +- **CA installation in the Anthropic-provided Claude Code image.** + The base image determines whether `update-ca-certificates` + (Debian) or `update-ca-trust` (Red Hat) applies. Confirm against + the `Dockerfile` before writing the provisioner; v1 assumes + Debian (`node:22-slim`). +- **HTTP/2 ALPN end-to-end.** Node's HTTP client negotiates `h2` + via ALPN. Confirm the pinned mitmproxy version speaks `h2` to + both halves without silently downgrading to `http/1.1`, which + would be a noticeable performance regression on bulk transfers. +- **Selective-bump policy surface.** Where does the + "tunnel this hostname blindly" decision live when (not if) a + pinning host appears? Recommended shape per research: + `bottle.egress.tls_bump_ignore: ["example.com"]`, a list of + hostnames mitmproxy passes through via `ignore_hosts`. Defer + until needed; record the shape so the follow-up is mechanical. +- **CA generation: Python `cryptography` vs. shelled-out + `openssl`.** Adding `cryptography` brings a substantial transitive + graph; shelling to `openssl` keeps the host-side prepare step + dep-light. Decide at impl time based on what's already on the + runner. Either way, the CA is per-bottle and ephemeral. +- **Domain-fronting verification.** Once pipelock sees the inner + `Host` / `:authority`, comparing it to the outer `CONNECT` target + catches domain fronting. Whether pipelock has a rule for this or + we need to add one is a follow-up; out of scope here. +- **Dry-run preflight rendering of the CA.** Show the fingerprint + but never the private key. Confirm the exact dry-run JSON shape + during implementation; the field set is part of the CLI's user- + facing contract (per PRD 0003 §to_dict notes). + +## References + +- `docs/research/tls-mitm-for-pipelock.md` — primary source; this + PRD implements the recommendation in §Recommendation (Topology A). +- `docs/research/pipelock-assessment.md` §Scope gaps — names the + TLS-inspection gap closed here. +- `docs/prds/0001-per-agent-egress-proxy-via-pipelock.md` — + egress-proxy baseline this PRD extends. +- `docs/prds/0003-bottle-backend-abstraction.md` — backend ABC + contract this PRD adds a `provision_ca` method to. +- `docs/prds/0004-split-out-provisioners.md` — per-provisioner + module pattern reused for the new CA provisioner. +- mitmproxy: , + +- mitmproxy `upstream_proxy` mode: + +- mitmproxy CA cert installation: + +- Node `NODE_EXTRA_CA_CERTS`: + -- 2.52.0 From c2eacac49f926eee6d65ef42a63845865ad096ab Mon Sep 17 00:00:00 2001 From: didericis Date: Tue, 12 May 2026 12:54:27 -0400 Subject: [PATCH 2/6] docs(prd): update 0005 after open-question walkthrough MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Re-grounds the design after walking the eight original open questions interactively. Two structural changes: - Topology A → A'. A spike confirmed mitmproxy's `upstream` mode re-wraps decrypted flows in a new CONNECT to the upstream proxy, which would have left pipelock seeing only ciphertext (the very gap this PRD set out to close). The fix is to run mitmproxy in `regular` mode and ship a vendored Python addon that forwards each decrypted request to pipelock as a plain HTTP forward-proxy call. Pipelock is unchanged. - mitmproxy owns CA generation. The research note's preference for a host-side openssl / cryptography CA turned out to be unnecessary — mitmproxy generates a fresh CA on startup; the public cert is `docker cp`'d into the agent. No new host-side crypto deps. Dry-run can't render a fingerprint (CA doesn't exist yet); launches print it once to stderr. Other Q3–Q8 resolutions folded in: Debian-base `update-ca-certificates` confirmed, mitmproxy 12 verified to speak h2 on both halves, selective-bump deferred to v2, response-body and MCP scanning deferred to v2, domain-fronting deferred to v2. Open questions rewritten — what remains is addon-implementation specifics (pipelock 403-body fingerprint, env-var inheritance through docker exec, addon test fixtures). Co-Authored-By: Claude Opus 4.7 --- docs/prds/0005-mitmproxy-tls-interception.md | 532 +++++++++++-------- 1 file changed, 299 insertions(+), 233 deletions(-) diff --git a/docs/prds/0005-mitmproxy-tls-interception.md b/docs/prds/0005-mitmproxy-tls-interception.md index e0f3d95..89f2a56 100644 --- a/docs/prds/0005-mitmproxy-tls-interception.md +++ b/docs/prds/0005-mitmproxy-tls-interception.md @@ -1,20 +1,30 @@ # PRD 0005: mitmproxy TLS interception for pipelock content scanning -- **Status:** Draft +- **Status:** Draft (updated 2026-05-12 after open-question walkthrough) - **Author:** didericis - **Created:** 2026-05-12 ## Summary Add a per-bottle **mitmproxy** sidecar in front of pipelock on the -egress path so pipelock's DLP, subdomain-entropy, and MCP scanners -fire on the plaintext bodies of HTTPS requests instead of only the -opaque ciphertext that follows a `CONNECT`. mitmproxy terminates the -agent's TLS, hands plaintext HTTP to pipelock as an upstream -forward proxy, and re-establishes TLS to the real destination. A -fresh ephemeral CA is minted per bottle; the CA private key never -leaves the sidecar, and the public cert is wired into the agent -container's trust store at launch. +egress path. mitmproxy bumps the agent's TLS CONNECT, decrypts the +inner HTTP, and hands each request to a vendored Python addon. The +addon forwards the decrypted request to pipelock as a plain HTTP +forward-proxy call so pipelock's DLP, URL-scan, and header-scan +layers fire on real bodies. On the verdict, the addon either +short-circuits the flow with a 403 (block) or lets mitmproxy +proceed to the real upstream (allow). mitmproxy itself generates +the ephemeral per-bottle CA on startup; the public cert is copied +into the agent's trust store and the private key dies with the +sidecar on teardown. + +This is Topology A' from `docs/research/tls-mitm-for-pipelock.md` — +a variant of the research note's Topology A after a spike showed +mitmproxy's `upstream` mode re-wraps decrypted flows in a new +CONNECT to the upstream proxy (which would defeat the entire +point). The addon recovers the design by emitting plain HTTP to +pipelock explicitly instead of relying on mitmproxy's `upstream` +chaining. ## Problem @@ -45,7 +55,8 @@ slips past the scanner. `pipelock-assessment.md` §Scope gaps names this as a known limitation of the proxy-without-TLS-inspection shape. Closing it is the explicit motivation for `tls-mitm-for-pipelock.md`, whose -recommendation this PRD implements. +recommendation this PRD implements (with the addon adjustment +forced by the upstream-mode spike). ## Goals / Success Criteria @@ -53,306 +64,361 @@ The feature works when all of the following are observable: - A Node request from inside a launched bottle to a CONNECT-bumped HTTPS host (e.g. `https://api.anthropic.com/dlp-probe`) carrying a - pipelock-recognized credential pattern in the body returns 403 from - the proxy, not a response from the upstream. The existing - `test_pipelock_blocks_secret_post` test path becomes the HTTPS - variant of this assertion. + pipelock-recognized credential pattern in the body returns 403 + from the bottle's egress chain — not a response from the upstream. + The existing `test_pipelock_blocks_secret_post` test path becomes + the HTTPS variant of this assertion. +- A plain HTTPS GET from inside the bottle to an allowlisted host + with no credential pattern (e.g. `GET https://raw.githubusercontent.com/...`) + returns the real upstream response — the addon doesn't break + clean traffic. - Claude Code itself reaches `api.anthropic.com` end-to-end through the bottle and completes a chat round-trip. No TLS-trust errors in the agent process. -- mitmproxy's TLS-handshake log lines and pipelock's `body_dlp` - event lines both appear for the same outbound request, confirming - the two-stage path is active. +- mitmproxy's flow log and pipelock's `body_dlp` / `header_dlp` / + `core_dlp` event lines both appear for the same outbound request, + confirming the two-stage path is active. The feature is **done** when all of the following ship: - A new `MitmproxyProxy` class with the same `prepare` / `start` / `stop` lifecycle shape as `PipelockProxy`, wired into the Docker backend's launch step. -- The bottle launch step generates a per-bottle ephemeral CA in - `stage_dir`, starts the mitmproxy sidecar with that CA on the - per-bottle internal network, copies the CA public cert into the - agent container's trust store, and points the agent's - `HTTPS_PROXY` / `HTTP_PROXY` at mitmproxy. -- mitmproxy's upstream is the existing pipelock sidecar; pipelock - sees plaintext HTTP from mitmproxy for every previously-HTTPS - request. +- A vendored Python addon at `claude_bottle/mitmproxy/addon.py` + that mitmproxy loads on startup via `mitmdump -s ...`. The sidecar + runs in `regular` mode (default), not `upstream` mode. +- The bottle launch step starts the mitmproxy sidecar, waits for + the sidecar-internal CA to be generated, copies the CA public + cert into the agent at `/usr/local/share/ca-certificates/claude-bottle-mitm.crt`, + runs `update-ca-certificates` inside the agent, and threads the + `NODE_EXTRA_CA_CERTS` / `SSL_CERT_FILE` / `REQUESTS_CA_BUNDLE` + env trio onto the agent container's runtime env. +- The agent's `HTTPS_PROXY` / `HTTP_PROXY` point at the mitmproxy + sidecar (where they pointed at pipelock under PRD 0001). +- pipelock is otherwise unchanged. It continues to load the YAML + PRD 0001 generates and runs its existing scanning pipeline; the + addon talks to it via the same forward-proxy interface today's + `test_pipelock_blocks_secret_post` uses. - On bottle teardown the mitmproxy sidecar is removed and the ephemeral CA private key is gone with it. -- An integration test (variant of `test_pipelock_blocks_secret_post`) - proves pipelock now blocks a credential POST that goes out over - HTTPS rather than plain HTTP. -- An integration test proves a non-credential HTTPS request to an - allowlisted host (e.g. CONNECT-then-GET on `raw.githubusercontent.com`) - succeeds end-to-end with mitmproxy in the path (no TLS-trust - errors, response body received). +- An HTTPS variant of `test_pipelock_blocks_secret_post` proves + pipelock now blocks a credential POST over HTTPS rather than + plain HTTP. +- An integration test proves a non-credential HTTPS GET through + the chain returns the upstream's real response. - The dry-run preflight (`start --dry-run`) shows the mitmproxy - sidecar in both the text and `--format=json` output alongside the - existing pipelock entry. + sidecar in both text and `--format=json` output. The JSON + contract gains a reserved `egress.mitm: { "enabled": true, "ca_fingerprint": null }` + block; fingerprint is always null at dry-run because the CA + doesn't exist yet. Real launches emit a one-line stderr log: + `claude-bottle: mitm ca fingerprint: ...`. ## Non-goals -- **Topology C** — extending pipelock itself to terminate TLS. That - is the cleanest long-term shape per the research note's - recommendation but is substantial Go work and hits the - Apache-2.0-vs-ELv2 question. Deferred. -- **Topology D** — driving mitmproxy with a pipelock `/scan` HTTP - endpoint. Requires a pipelock surface that doesn't exist today. - Deferred. +- **Topology C** — extending pipelock itself to terminate TLS. The + research note's recommended long-term shape, but substantial Go + work plus the Apache-2.0-vs-ELv2 question. Deferred. +- **Topology D as canonical** — mitmproxy with a pipelock `/scan` + HTTP endpoint. The addon in this PRD talks to pipelock via its + existing forward-proxy interface; no upstream pipelock change + needed. - **Persistent or shared CA across bottles.** Each bottle gets a - fresh CA generated at start and destroyed at teardown. No CA - storage on the host, no cross-bottle reuse. + fresh CA generated by its own mitmproxy at startup. - **Selective bumping ("ignore_hosts") as a v1 manifest field.** - v1 bumps every CONNECT. If a future allowlisted host turns out to - pin (Mobile / Chromium-style cert pinning), a follow-up PRD adds - the per-host opt-out — likely a `bottle.egress.tls_bump_ignore` - field. See Open questions. + v1 bumps every CONNECT. If a future allowlisted host turns out + to pin (Mobile / Chromium-style cert pinning), a follow-up PRD + adds the per-host opt-out via `bottle.egress.tls_bump_ignore`. + Strictly additive. - **HTTP/3 / QUIC.** mitmproxy's HTTP/3 support is experimental. - v1 relies on the v1-egress iptables layer (separate PRD) blocking - UDP/443 to force clients onto HTTP/2 over TCP, which mitmproxy - inspects normally. + v1 relies on the v1-egress iptables layer blocking UDP/443 to + force clients onto HTTP/2 over TCP, which mitmproxy 12 inspects + natively (verified by spike). - **Raw TCP / non-HTTP TLS interception.** mitmproxy supports it via `--mode reverse:`, not in CONNECT-bump mode. SSH and any future raw-TCP egress route around mitmproxy entirely. -- **Trust-store rewiring for non-Debian agent base images.** The +- **Trust-store rewiring for non-Debian agent images.** The current `Dockerfile` is `node:22-slim` (Debian). If a future base switches to Red-Hat-family, the `update-ca-certificates` step becomes `update-ca-trust`. Out of scope until the base changes. +- **Response-body scanning.** Pipelock supports it; we don't wire + it in v1 because the addon would need to ferry the upstream + response back through pipelock's scanner, which the forward- + proxy interface doesn't support cleanly. v2 candidate. +- **MCP scanning on the bumped path.** Only fires on MCP-formatted + JSON-RPC payloads inside tool calls. Not relevant to plain HTTPS + agent traffic and out of v1 scope. +- **Domain-fronting verification.** Once the addon sees the inner + `Host` / `:authority`, comparing it to the outer CONNECT target + catches domain fronting. Worth ~10 lines in the addon, but + defer until the rest of v1 is settled. +- **Host-side openssl / `cryptography` for CA generation.** The + research note's open question on this is resolved by letting + mitmproxy itself generate the CA (it does so on first launch). + No new host-side crypto. ## Scope ### In scope -- New `claude_bottle/mitmproxy.py` mirroring `claude_bottle/pipelock.py`: - config helpers (no backend-specific Docker calls), the - `MitmproxyProxy` abstract class, and the per-bottle CA generation - helpers. -- New `claude_bottle/backend/docker/mitmproxy.py` mirroring - `claude_bottle/backend/docker/pipelock.py`: `DockerMitmproxyProxy` - with the Docker-specific `start` / `stop` lifecycle, the sidecar - container name scheme, and the image pin. -- New provisioner: `claude_bottle/backend/docker/provision/ca.py`, - installing the CA public cert into the agent container at - `/usr/local/share/ca-certificates/claude-bottle-mitm.crt`, running - `update-ca-certificates`, and exporting `NODE_EXTRA_CA_CERTS` / - `SSL_CERT_FILE` / `REQUESTS_CA_BUNDLE` env vars to the agent - process. The provisioner runs from `BottleBackend.provision` in - the same orchestration as `prompt`, `skills`, `ssh`, `git`. -- Per-agent network reshuffle in `DockerBottleBackend.launch`: - - internal network is unchanged (mitmproxy + pipelock + agent) - - agent's `HTTPS_PROXY` / `HTTP_PROXY` change from pointing at the - pipelock service name to the mitmproxy service name - - mitmproxy's `upstream_proxy` config points at the pipelock - service name on the internal network -- `DockerBottlePlan` grows a `mitmproxy_plan` field analogous to the - existing `proxy_plan` (the pipelock one) so prepare-time state - rides on the plan. -- Dry-run preflight (`start --dry-run` text + JSON) renders the - mitmproxy line and surfaces the CA fingerprint shown in the - bottle's trust store, so the operator can verify what's been - installed. +- New `claude_bottle/mitmproxy/` package: + - `__init__.py` — backend-agnostic. Constants (sidecar port, + image-pin digest, the in-container addon path), the abstract + `MitmproxyProxy` class with `prepare` / `start` / `stop` shape + mirroring `PipelockProxy`, and the small helper that reads the + CA fingerprint from a PEM file via `openssl x509 -fingerprint` + shelled out. + - `addon.py` — the Python addon mitmproxy loads. ~80–150 lines. + For each `request` event: forward the decrypted request to + pipelock at `http://claude-bottle-pipelock-:8888` as a + plain HTTP forward-proxy call (absolute-URI form). Inspect + pipelock's response. If status is 403 *and* the body matches + pipelock's known block-event shape, set the flow's response to + a 403 with pipelock's body and short-circuit. Otherwise, + discard pipelock's response (and any wasted upstream-leg + response from pipelock's forwarder) and let mitmproxy proceed + to the real upstream. +- New `claude_bottle/backend/docker/mitmproxy.py` — + `DockerMitmproxyProxy(MitmproxyProxy)` with the Docker-specific + start/stop lifecycle. `start(plan)` does `docker create` / + `docker cp addon.py …` / `docker network connect` / `docker start`, + analogous to the existing `DockerPipelockProxy.start`. Injects + `CLAUDE_BOTTLE_PIPELOCK_URL` into the sidecar env so the addon + knows where pipelock lives. +- New provisioner `claude_bottle/backend/docker/provision/ca.py`. + Polls mitmproxy for the cert file, copies it through a host + stage dir into the agent, runs `update-ca-certificates` inside + the agent, computes the SHA-256 fingerprint, and prints the + one-line stderr log. +- `BottleBackend.provision_ca(plan, target)` joins the four + existing provisioner methods on the abstract base. Default impl + is no-op so other backends don't break when they don't yet + implement TLS interception. +- `DockerBottlePlan` grows a `mitmproxy_plan` field mirroring the + existing `proxy_plan`. +- Agent container `docker run` invocation: + - `HTTPS_PROXY` / `HTTP_PROXY` change from the pipelock service + name to the mitmproxy service name. + - Three `-e` flags set the CA env trio so they're inherited by + the eventual `docker exec claude` (Docker propagates run-time + env into exec by default; fallback in Q1 below). +- Dry-run preflight rendering of the mitmproxy entry (text + JSON). + JSON gains `egress.mitm: { "enabled": true, "ca_fingerprint": null }`. +- One stderr log line at launch with the CA fingerprint. - Two new integration tests under `tests/integration/`: - - `test_mitmproxy_blocks_secret_https_post.py` — the HTTPS - variant of the existing `_blocks_secret_post` test. + - `test_mitmproxy_blocks_secret_https_post.py` — HTTPS variant + of the existing block-secret test. Asserts pipelock's body + DLP fires on a credential POST tunneled through CONNECT. - `test_mitmproxy_allows_normal_https.py` — confirms a plain - HTTPS GET to a non-credential-bearing path through mitmproxy + - pipelock returns the upstream response, asserting no trust / - handshake breakage. -- Unit tests for the new config builder (mirroring the pipelock - YAML unit tests) and for the CA generation helper. + HTTPS GET on an allowlisted host returns the upstream response, + isolating the addon's pass-through path from the block path. +- Unit tests for the addon's verdict logic (block vs allow on + status + body shape, edge cases) using mitmproxy's `mitmproxy.test` + flow fixtures. Unit tests for the proxy config builder + (mirroring `tests/unit/test_pipelock_yaml.py`). ### Out of scope - The v1 iptables + dnsmasq layer (separate PRD; see - `network-egress-guard.md`). mitmproxy covers HTTP/HTTPS only. - Raw TCP, UDP, ICMP, and direct DNS still need the IP-level layer. -- Pipelock config changes. Pipelock continues to load the YAML PRD - 0001 already generates. mitmproxy is opaque to it; pipelock just - sees plain HTTP from a forward-proxy client. -- A bottle-level toggle to skip mitmproxy entirely. v1 always wires - it in. If a use case appears for an unintercepted bottle - (e.g. testing pipelock's CONNECT-mode behavior in isolation), - that's a follow-up. + `network-egress-guard.md`). mitmproxy covers HTTP/HTTPS only; + raw TCP, UDP, ICMP, and direct DNS still need the IP-level layer. +- Pipelock config changes. Pipelock continues to load the YAML + PRD 0001 generates; the addon talks to it via the existing + forward-proxy interface. +- A bottle-level toggle to skip mitmproxy entirely. v1 always + wires it in. - Pinning-host detection automation. The cost of finding out (per - the research note) is a single 5-minute test before adding a - host; it stays a manual step. + research) is a single 5-minute test before adding a host; it + stays a manual step. +- Pipelock upstream contributions for an `X-Pipelock-Verdict` header. + Possible follow-up. Until then the addon distinguishes blocks + from passes via status + body fingerprint. ## Proposed Design ### Topology ``` -agent --HTTPS_PROXY--> mitmproxy --HTTP_PROXY--> pipelock --> internet - (bump TLS) (scan plain) (real TLS) +agent --HTTPS_PROXY--> mitmproxy --addon--> pipelock (scan) + (bump TLS) | + ^ | (verdict via status code) + | v + +-- on allow ----- real upstream + (mitmproxy as client) ``` All three containers live on the same per-bottle internal Docker network. mitmproxy and pipelock are both attached to the per-bottle -egress bridge so they can reach the host network; the agent has no -default route, exactly as today. +egress bridge for real-internet reach; the agent has no default +route. Concretely: -- `agent` sets `HTTPS_PROXY=http://claude-bottle-mitm-:`. - Currently this points at `claude-bottle-pipelock-`. The - hostname swap is the only agent-side env change. -- `mitmproxy` runs with `--mode upstream:http://claude-bottle-pipelock-:` - so its decrypted plaintext is forwarded to pipelock as a regular - upstream forward-proxy request. (Research open question #1 calls - this out: mitmproxy 10+ documentation says `upstream` mode forwards - the original request shape; verify against the pinned version at - implementation time. If forwarding wraps a new CONNECT, fall back - to `regular` mode with a chained proxy declared in mitmproxy's - config and route plain HTTP to pipelock by hand.) -- `pipelock` continues to listen on its existing port and receives - plain HTTP from mitmproxy. No pipelock config change. +- Agent sets `HTTPS_PROXY=http://claude-bottle-mitm-:`. + PRD 0001 had this pointing at pipelock; the hostname swap is the + only agent-side env change. +- mitmproxy runs in **`regular`** mode (default; no `--mode` flag). + It bumps every CONNECT, generates fake leaf certs signed by its + own CA, and presents them to the agent. +- The addon, loaded via `mitmdump -s /addon/addon.py`, intercepts + each decrypted `request` event. It forwards the request to + pipelock at `http://claude-bottle-pipelock-:8888` as a + plain HTTP forward-proxy call (absolute-URI form), so pipelock + sees the full URL, headers, and body. +- The addon inspects pipelock's response. If status is 403 *and* + the response body matches pipelock's known block-event shape, + the addon sets the mitmproxy flow's response to a 403 with + pipelock's body and short-circuits. Otherwise — including the + case where pipelock's forwarder attempted the upstream and got + a 4xx — the addon discards pipelock's response and lets + mitmproxy proceed to the real upstream. +- mitmproxy completes the outbound TLS to the real destination + using its built-in trust store, just like any other forward + proxy. Pipelock is only involved as a scanner. + +The trade-off: pipelock makes a wasted upstream forward attempt +for every allowed request (it tries to forward over plain HTTP to +a real HTTPS-only host, which fails with the upstream's 4xx). This +is benign — the scan completes before forwarding, the verdict +reaches the addon, the upstream-side request happens to die in +pipelock's forwarder rather than reach the agent. Acceptable cost +for the visibility win. A pipelock-side improvement (skip the +forward when the addon only needs the scan verdict) is a future +optimization. ### New components -Two new modules, matching PRD 0001's split between -backend-agnostic config and backend-specific lifecycle: - -- **`claude_bottle/mitmproxy.py`** — backend-agnostic. The config - builder (mitmproxy YAML / TOML — confirm format), the abstract - `MitmproxyProxy` class with `prepare(...)` writing the config and - the ephemeral CA into `stage_dir`, the CA generation helper - (RSA-2048 or ECDSA-P256 — pick at impl time, research suggests - ECDSA for cert-gen speed), and constants for the sidecar's - internal-network port and image pin. -- **`claude_bottle/backend/docker/mitmproxy.py`** — Docker - implementation. `DockerMitmproxyProxy(MitmproxyProxy)` with - `start(plan)` doing `docker create` / `docker cp` / `docker - network connect` / `docker start` analogous to - `DockerPipelockProxy.start`. `stop(target)` removes the sidecar - idempotently. - -The provisioner that installs the CA cert into the agent's trust -store lives at `claude_bottle/backend/docker/provision/ca.py` and -plugs into the existing `BottleBackend.provision` orchestration. The -abstract `BottleBackend.provision_ca` method joins -`provision_prompt` / `provision_skills` / `provision_ssh` / -`provision_git` on the base class (PRD 0004's pattern), with a -default no-op implementation so other backends don't break when -they don't yet implement it. +- `claude_bottle/mitmproxy/__init__.py` — backend-agnostic + abstract base, constants, the `openssl x509 -fingerprint` helper. +- `claude_bottle/mitmproxy/addon.py` — the scanning addon. + Reads pipelock's URL from `CLAUDE_BOTTLE_PIPELOCK_URL` (injected + into the sidecar env by the proxy's `start`). For each + `request` flow: synchronously POST to pipelock; inspect status + + body; either short-circuit with 403 or fall through. +- `claude_bottle/backend/docker/mitmproxy.py` — + `DockerMitmproxyProxy(MitmproxyProxy)` with start/stop, the + `docker cp` of the addon into the sidecar before `docker start`, + and the `CLAUDE_BOTTLE_PIPELOCK_URL` wiring. ### CA lifecycle -Per `tls-mitm-for-pipelock.md` §CA lifecycle: +Simplified by letting mitmproxy own the generation: -- **Generation.** Host-side in `MitmproxyProxy.prepare`, written to - `stage_dir/mitm-ca.key` (mode 600) and `stage_dir/mitm-ca.crt` - (mode 644). The `.key` is copied into the mitmproxy container at - start; nothing else touches it. -- **Bottle injection.** `provision_ca` copies only the public - `.crt` into the agent container at - `/usr/local/share/ca-certificates/claude-bottle-mitm.crt`, runs - `update-ca-certificates` as root inside the container, and sets - `NODE_EXTRA_CA_CERTS=/usr/local/share/ca-certificates/claude-bottle-mitm.crt`, - `SSL_CERT_FILE`, and `REQUESTS_CA_BUNDLE` for the agent process. - Belt-and-suspenders because some libraries honor only env vars. -- **Teardown.** The mitmproxy sidecar container is destroyed; the - CA key vanishes with it. Nothing persists on the host outside - `stage_dir`, which the start command already deletes in its - finally block. -- **Cost.** ECDSA-P256 CA + per-host leaf generation runs in - milliseconds; the per-bottle Docker pull and network plumbing - dominate startup time. +- **Generation.** mitmproxy generates a fresh CA on startup + inside its container at `/home/mitmproxy/.mitmproxy/mitmproxy-ca-cert.pem` + (public) + `mitmproxy-ca.pem` (private). No host-side openssl + for *generation*; no host-side Python `cryptography` dep. +- **Volume strategy.** Container-internal only. No host bind + mount means the CA dies with the container. +- **Extraction.** `provision_ca` polls (~1s) for the cert file + via `docker exec`, then `docker cp` to host stage dir, then + `docker cp` into the agent. Host stage dir gets cleaned up by + the existing `start.py` `finally` block. +- **Bottle install.** + 1. `docker cp /mitm-ca.crt agent-:/usr/local/share/ca-certificates/claude-bottle-mitm.crt` + 2. `docker exec -u 0 agent- chmod 644 …` + 3. `docker exec -u 0 agent- update-ca-certificates` + 4. Three `-e` flags on `docker run` set the env trio + (`NODE_EXTRA_CA_CERTS=…/claude-bottle-mitm.crt`, + `SSL_CERT_FILE=/etc/ssl/certs/ca-certificates.crt`, + `REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt`) so + `docker exec claude` inherits them. +- **Teardown.** Sidecar container removed; CA private key gone. +- **Fingerprint.** Computed post-extraction via shelled-out + `openssl x509 -fingerprint -sha256 -noout`. Logged once to + stderr at launch; never the private key. ### Data model changes -None in v1. The manifest schema is unchanged. mitmproxy is always -on for every bottle once this PRD ships. +None to the manifest schema. The dry-run JSON contract gains a +reserved `egress.mitm: { "enabled": true, "ca_fingerprint": null }` +block. Fingerprint is always null at dry-run (CA doesn't exist +yet) but the field is reserved so future schema additions stay +non-breaking. -A future selective-bump knob (per `tls-mitm-for-pipelock.md` open -question #5) would land on `bottle.egress.tls_bump_ignore` as a -list of hostnames. The shape mirrors `egress.allowlist`. Adding it -later is a strictly additive change. +A future selective-bump knob would add +`bottle.egress.tls_bump_ignore: [host, ...]` per the research +note. Strictly additive when it lands. ### Existing code touched - **`claude_bottle/backend/docker/launch.py`** — bring up the - mitmproxy sidecar after the pipelock sidecar but before the agent - container, repoint the agent's `HTTPS_PROXY` / `HTTP_PROXY` env - flags, register an `ExitStack` callback to stop mitmproxy on - teardown. + mitmproxy sidecar between pipelock and the agent. Repoint the + agent's `HTTPS_PROXY` / `HTTP_PROXY` env flags to mitmproxy. + Register an `ExitStack` callback for mitmproxy teardown. Print + the CA fingerprint once the sidecar reports ready. - **`claude_bottle/backend/docker/prepare.py`** — call into - `MitmproxyProxy.prepare(...)` alongside the existing - `PipelockProxy.prepare(...)`, populate - `DockerBottlePlan.mitmproxy_plan`. + `MitmproxyProxy.prepare(...)` alongside `PipelockProxy.prepare(...)`, + populate `DockerBottlePlan.mitmproxy_plan`. - **`claude_bottle/backend/docker/backend.py`** — add the `DockerMitmproxyProxy` instance attribute (`self._mitm`) and - thread it through `launch` + cleanup, mirroring the existing - `self._proxy` pattern. + thread it through `launch` + cleanup, mirroring `self._proxy`. - **`claude_bottle/backend/docker/bottle_plan.py`** — new - `mitmproxy_plan: MitmproxyProxyPlan` field on - `DockerBottlePlan`. `print()` and `to_dict()` learn to render it. + `mitmproxy_plan` field. `print()` and `to_dict()` learn to + render the mitmproxy entry and the `egress.mitm` JSON block. - **`claude_bottle/backend/__init__.py`** — abstract - `BottleBackend.provision_ca(plan, target)` joins the other four - provisioners. Default impl is a no-op (so a future fly backend - isn't forced to implement TLS interception in v1). + `BottleBackend.provision_ca` joins the four existing + provisioners; default no-op. - **`tests/integration/`** — two new tests as described above. -- **`tests/unit/`** — config-builder unit tests; CA-helper unit - tests; updated dry-run-plan test pinning the mitmproxy entry. +- **`tests/unit/`** — addon-verdict tests, mitmproxy-config + builder tests, dry-run-plan test updated for the new + `egress.mitm` block. ### External dependencies -- **mitmproxy Docker image** pulled from - `mitmproxy/mitmproxy@sha256:`. The digest is pinned in - `claude_bottle/mitmproxy.py` and bumped deliberately, mirroring - the pipelock pin. Tag line `mitmproxy/mitmproxy:11.x` per - research §Image pin for mitmproxy. -- No new host-side runtimes. CA generation uses Python's `cryptography` - if it's already a transitive dep; otherwise use `openssl` shelled - out from the host-side prepare step. Decide at impl time after - confirming what's available on the runner without adding deps. +- **mitmproxy Docker image** pinned by digest on the `12.x` line. + Bumped deliberately, mirroring the pipelock pin. Verified by + spike to speak h2 on both halves. +- No new host-side runtimes. mitmproxy generates the CA; + fingerprint via the `openssl` already present on Debian / macOS + / ubuntu-latest runners. ## Open questions -- **mitmproxy upstream-proxy mode mechanics.** Whether `upstream` - mode forwards decrypted plaintext to pipelock or re-wraps it in a - CONNECT. Documented behavior changed between mitmproxy 8 and 10. - Needs verification against the pinned version at impl time. If - `upstream` re-wraps, fall back to `regular` mode plus a chained - proxy directive routing plain HTTP to pipelock. -- **Pipelock plain-HTTP scanning coverage.** Pipelock's - `forward_proxy.enabled: true` accepts both `GET http://…` and - `CONNECT host:443`. Confirm by reading - `github.com/luckyPipewrench/pipelock/blob/main/docs/configuration.md` - that the full DLP / MCP / subdomain-entropy pipeline runs on the - HTTP path; some pipelock layers may be gated on CONNECT only. -- **CA installation in the Anthropic-provided Claude Code image.** - The base image determines whether `update-ca-certificates` - (Debian) or `update-ca-trust` (Red Hat) applies. Confirm against - the `Dockerfile` before writing the provisioner; v1 assumes - Debian (`node:22-slim`). -- **HTTP/2 ALPN end-to-end.** Node's HTTP client negotiates `h2` - via ALPN. Confirm the pinned mitmproxy version speaks `h2` to - both halves without silently downgrading to `http/1.1`, which - would be a noticeable performance regression on bulk transfers. -- **Selective-bump policy surface.** Where does the - "tunnel this hostname blindly" decision live when (not if) a - pinning host appears? Recommended shape per research: - `bottle.egress.tls_bump_ignore: ["example.com"]`, a list of - hostnames mitmproxy passes through via `ignore_hosts`. Defer - until needed; record the shape so the follow-up is mechanical. -- **CA generation: Python `cryptography` vs. shelled-out - `openssl`.** Adding `cryptography` brings a substantial transitive - graph; shelling to `openssl` keeps the host-side prepare step - dep-light. Decide at impl time based on what's already on the - runner. Either way, the CA is per-bottle and ephemeral. -- **Domain-fronting verification.** Once pipelock sees the inner - `Host` / `:authority`, comparing it to the outer `CONNECT` target - catches domain fronting. Whether pipelock has a rule for this or - we need to add one is a follow-up; out of scope here. -- **Dry-run preflight rendering of the CA.** Show the fingerprint - but never the private key. Confirm the exact dry-run JSON shape - during implementation; the field set is part of the CLI's user- - facing contract (per PRD 0003 §to_dict notes). +(rewritten — most of the original v1 questions are now closed by +the walkthrough spikes; what remains is addon-implementation +specifics worth pinning during the first impl turn.) + +- **Pipelock's 403-body fingerprint.** The addon needs to + distinguish a pipelock block (DLP / host) from a real-upstream + 4xx that pipelock's forwarder relayed back. Most likely shape: + pipelock's 403 response carries a JSON body with `event` / + `scanner` fields, whereas a real-upstream 4xx carries whatever + the upstream sent. Pin the exact fingerprint by inspecting + pipelock's actual 403 body bytes at impl time. Long-term + cleanup: file an upstream feature request for an + `X-Pipelock-Verdict: block` response header so the addon can + read a structured signal instead of pattern-matching the body. +- **Docker run env-var inheritance through docker exec.** Plan + assumes `docker run -e VAR=value` propagates to subsequent + `docker exec` invocations. The Docker docs say so; not yet + empirically pinned on this project's runner setup. Verify in + the first impl turn. Trivial fallback: thread the three `-e` + flags onto every `DockerBottle.exec*` call. +- **Addon synchronous-call latency.** The addon makes a sync HTTP + call to pipelock per outbound flow. Pipelock is on the same + internal Docker network; expected per-call latency is well + under 10ms. Confirm under the parallel-request load Claude Code + generates (most likely a non-issue — Claude is single-stream + request-wise). +- **Addon test fixtures.** mitmproxy ships `mitmproxy.test` with + flow fixtures; addons can be unit-tested without a running + proxy. Confirm the import path and recommended fixture shape at + impl time; structure the addon so the verdict-decision is a + pure function that's trivially testable in isolation from any + HTTP I/O. +- **Pipelock allowing the addon's forwarded request through.** + pipelock will see the addon's request as coming from the + mitmproxy sidecar's IP on the internal network. Confirm + pipelock has no client-IP allowlist that would reject these. + Likely fine — pipelock's `client_ip` is informational in the + scan event, not a gate. ## References -- `docs/research/tls-mitm-for-pipelock.md` — primary source; this - PRD implements the recommendation in §Recommendation (Topology A). +- `docs/research/tls-mitm-for-pipelock.md` — primary source. This + PRD implements a variant of §Recommendation (Topology A) after + the spike documented under "Open questions" §1 falsified the + `upstream` mode assumption. - `docs/research/pipelock-assessment.md` §Scope gaps — names the TLS-inspection gap closed here. - `docs/prds/0001-per-agent-egress-proxy-via-pipelock.md` — @@ -363,9 +429,9 @@ later is a strictly additive change. module pattern reused for the new CA provisioner. - mitmproxy: , -- mitmproxy `upstream_proxy` mode: - +- mitmproxy modes: - mitmproxy CA cert installation: +- mitmproxy addon API: - Node `NODE_EXTRA_CA_CERTS`: -- 2.52.0 From e579c3d4fde3b001e34b66a3693e02a989a3b530 Mon Sep 17 00:00:00 2001 From: didericis Date: Tue, 12 May 2026 13:32:36 -0400 Subject: [PATCH 3/6] feat(mitmproxy): vendor the addon and Docker sidecar lifecycle MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First step of PRD 0005. Three new files for the mitmproxy-in-front-of-pipelock topology — wiring into the bottle launch comes in the next commit. - claude_bottle/mitmproxy/__init__.py: abstract MitmproxyProxy base + MitmproxyProxyPlan. Mirrors the PipelockProxy shape (prepare / start / stop) and adds extract_ca_cert for the CA cert hand-off into the agent. - claude_bottle/mitmproxy/addon.py: the vendored Python addon mitmproxy loads inside the sidecar. Forwards each decrypted request to pipelock as a plain HTTP forward-proxy call, inspects the response, and short-circuits the flow with 403 on a pipelock block (status=403 + body starts with `blocked: `, pinned empirically against pipelock 2.3.0 in the impl spike). Self-contained — no claude_bottle imports — so it loads in a sidecar that doesn't have claude_bottle on its path. - claude_bottle/backend/docker/mitmproxy.py: DockerMitmproxyProxy with create / cp / network connect / start lifecycle. Pinned to mitmproxy/mitmproxy@sha256:00b77b5d… (multi-arch manifest for v12.2.3). - tests/unit/test_mitmproxy_verdict.py: pins the verdict fingerprint so a pipelock-side body shape change breaks loudly. Co-Authored-By: Claude Opus 4.7 --- claude_bottle/backend/docker/mitmproxy.py | 178 ++++++++++++++++++++++ claude_bottle/mitmproxy/__init__.py | 81 ++++++++++ claude_bottle/mitmproxy/addon.py | 169 ++++++++++++++++++++ tests/unit/test_mitmproxy_verdict.py | 62 ++++++++ 4 files changed, 490 insertions(+) create mode 100644 claude_bottle/backend/docker/mitmproxy.py create mode 100644 claude_bottle/mitmproxy/__init__.py create mode 100644 claude_bottle/mitmproxy/addon.py create mode 100644 tests/unit/test_mitmproxy_verdict.py diff --git a/claude_bottle/backend/docker/mitmproxy.py b/claude_bottle/backend/docker/mitmproxy.py new file mode 100644 index 0000000..9911bbf --- /dev/null +++ b/claude_bottle/backend/docker/mitmproxy.py @@ -0,0 +1,178 @@ +"""DockerMitmproxyProxy — the Docker-specific lifecycle for the +mitmproxy sidecar. Inherits the addon-bundling from MitmproxyProxy. + +The sidecar runs `mitmdump -s /addon/addon.py`, listens on +MITMPROXY_PORT inside the per-bottle internal network, and generates +its own ephemeral CA on first launch (extracted by provision_ca, +installed into the agent's trust store).""" + +from __future__ import annotations + +import os +import subprocess +import time +from pathlib import Path + +from ...log import die, info, warn +from ...mitmproxy import MitmproxyProxy, MitmproxyProxyPlan + + +# mitmproxy/mitmproxy:12.2.3 (mitmproxy v12 release line). The digest +# is the multi-arch image index — pulls resolve to the right per-arch +# child digest. Bumped deliberately; see PRD 0005. +MITMPROXY_IMAGE = os.environ.get( + "CLAUDE_BOTTLE_MITMPROXY_IMAGE", + "mitmproxy/mitmproxy@sha256:00b77b5d8804c8ad18cb6caefbf9d5849e895e8986c5ce011f4ae30f4385962f", +) + +# Listening port for mitmproxy's forward proxy (agent-facing). +MITMPROXY_PORT = os.environ.get("CLAUDE_BOTTLE_MITMPROXY_PORT", "8080") + +# Path inside the sidecar where the addon is dropped by docker cp. +MITMPROXY_ADDON_PATH = "/addon/addon.py" + +# Path inside the sidecar where mitmproxy generates its CA. +_CA_PATH_IN_SIDECAR = "/home/mitmproxy/.mitmproxy/mitmproxy-ca-cert.pem" + + +def mitmproxy_container_name(slug: str) -> str: + return f"claude-bottle-mitm-{slug}" + + +def mitmproxy_proxy_url(slug: str) -> str: + return f"http://{mitmproxy_container_name(slug)}:{MITMPROXY_PORT}" + + +class DockerMitmproxyProxy(MitmproxyProxy): + """Brings the mitmproxy sidecar up and down via Docker.""" + + def start(self, plan: MitmproxyProxyPlan, *, pipelock_url: str) -> str: + """Boot the mitmproxy sidecar: + 1. `docker create` on the internal network with mitmdump + argv: `--listen-port -s ` plus the + pipelock URL injected as an env var. + 2. `docker cp` the vendored addon to the sidecar. + 3. Attach to the per-agent egress network so mitmproxy + can reach real upstreams. + 4. `docker start`. + Returns the container name (the proxy_target passed to .stop + and .extract_ca_cert).""" + name = mitmproxy_container_name(plan.slug) + if not plan.addon_src.is_file(): + die(f"mitmproxy addon not found at {plan.addon_src}") + + info(f"starting mitmproxy sidecar {name} on network {plan.internal_network}") + + create_args = [ + "docker", "create", + "--name", name, + "--network", plan.internal_network, + "-e", f"CLAUDE_BOTTLE_PIPELOCK_URL={pipelock_url}", + MITMPROXY_IMAGE, + "mitmdump", + "--listen-port", MITMPROXY_PORT, + "-s", MITMPROXY_ADDON_PATH, + ] + if subprocess.run( + create_args, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ).returncode != 0: + die(f"failed to create mitmproxy sidecar {name}") + + cp_result = subprocess.run( + ["docker", "cp", str(plan.addon_src), f"{name}:{MITMPROXY_ADDON_PATH}"], + capture_output=True, + text=True, + check=False, + ) + if cp_result.returncode != 0: + subprocess.run( + ["docker", "rm", "-f", name], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ) + die(f"failed to copy mitmproxy addon into {name}: {cp_result.stderr.strip()}") + + if subprocess.run( + ["docker", "network", "connect", plan.egress_network, name], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ).returncode != 0: + subprocess.run( + ["docker", "rm", "-f", name], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ) + die(f"failed to attach mitmproxy sidecar {name} to egress " + f"network {plan.egress_network}") + + if subprocess.run( + ["docker", "start", name], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ).returncode != 0: + subprocess.run( + ["docker", "rm", "-f", name], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ) + die(f"failed to start mitmproxy sidecar {name}") + + return name + + def stop(self, proxy_target: str) -> None: + """Idempotent: missing container is success. Mirrors + DockerPipelockProxy.stop.""" + if subprocess.run( + ["docker", "inspect", proxy_target], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ).returncode == 0: + if subprocess.run( + ["docker", "rm", "-f", proxy_target], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ).returncode != 0: + warn( + f"failed to remove mitmproxy sidecar {proxy_target}; " + f"clean up with 'docker rm -f {proxy_target}'" + ) + + def extract_ca_cert(self, proxy_target: str, dest_path: Path) -> None: + """Poll the running sidecar for the CA cert (mitmproxy + generates it on first launch, typically <1s after start), + then `docker cp` the public half to `dest_path`. The private + key never leaves the container.""" + deadline = time.monotonic() + 15 + while time.monotonic() < deadline: + check = subprocess.run( + ["docker", "exec", proxy_target, "test", "-f", _CA_PATH_IN_SIDECAR], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ) + if check.returncode == 0: + break + time.sleep(0.5) + else: + die(f"mitmproxy CA cert did not appear at {_CA_PATH_IN_SIDECAR} " + f"after 15s — sidecar {proxy_target} may have failed to start") + + cp_result = subprocess.run( + ["docker", "cp", f"{proxy_target}:{_CA_PATH_IN_SIDECAR}", str(dest_path)], + capture_output=True, + text=True, + check=False, + ) + if cp_result.returncode != 0: + die(f"failed to extract mitmproxy CA cert from {proxy_target}: " + f"{cp_result.stderr.strip()}") diff --git a/claude_bottle/mitmproxy/__init__.py b/claude_bottle/mitmproxy/__init__.py new file mode 100644 index 0000000..74fb246 --- /dev/null +++ b/claude_bottle/mitmproxy/__init__.py @@ -0,0 +1,81 @@ +"""mitmproxy TLS-interception sidecar for the per-bottle egress +topology (PRD 0005). + +Sits in front of pipelock on the bottle's egress path so pipelock's +body / header / URL DLP scanners see plaintext for HTTPS targets. +The sidecar runs in mitmproxy's `regular` mode and loads the +vendored addon at `addon.py`; the addon forwards each decrypted +request to pipelock as a plain HTTP forward-proxy call and gates +the mitmproxy flow on pipelock's verdict. + +This module is platform-agnostic: it owns the abstract proxy +lifecycle (prepare / start / stop / extract_ca_cert). The +Docker-specific lifecycle lives in +`claude_bottle/backend/docker/mitmproxy.py`. +""" + +from __future__ import annotations + +from abc import ABC, abstractmethod +from dataclasses import dataclass +from pathlib import Path + + +@dataclass(frozen=True) +class MitmproxyProxyPlan: + """Output of MitmproxyProxy.prepare; consumed by .start when the + sidecar needs to be brought up. + + `addon_src` is the host-side path to the vendored addon.py, + resolved at prepare time. `slug` is the per-agent identifier + used as the suffix in every per-bottle resource name. The + network fields default to empty and are populated by the + backend's launch step (via dataclasses.replace) once those + networks have actually been created — same pattern as + PipelockProxyPlan.""" + + addon_src: Path + slug: str + internal_network: str = "" + egress_network: str = "" + + +class MitmproxyProxy(ABC): + """The mitmproxy TLS-interception sidecar. The proxy-config + addon + bundling are platform-agnostic; the sidecar's start/stop lifecycle + and the CA extraction step are backend-specific and live on + concrete subclasses.""" + + def prepare(self, slug: str) -> MitmproxyProxyPlan: + """Locate the vendored addon source and return the start + plan. The addon is checked into the project and identical + across bottles; per-bottle wiring (pipelock URL) is injected + via env vars at start time, not via a generated config.""" + addon_src = Path(__file__).resolve().parent / "addon.py" + if not addon_src.is_file(): + raise FileNotFoundError( + f"mitmproxy addon not found at {addon_src}; the " + f"package was installed incompletely" + ) + return MitmproxyProxyPlan(addon_src=addon_src, slug=slug) + + @abstractmethod + def start(self, plan: MitmproxyProxyPlan, *, pipelock_url: str) -> str: + """Bring up the mitmproxy sidecar according to `plan`. + `pipelock_url` is injected into the sidecar's env (as + CLAUDE_BOTTLE_PIPELOCK_URL) so the addon knows where to + scan. Returns the proxy_target string identifying the + running sidecar — the same value to pass to `.stop` and + `.extract_ca_cert`.""" + + @abstractmethod + def stop(self, proxy_target: str) -> None: + """Tear down the sidecar identified by `proxy_target`. + Idempotent: a missing target is success.""" + + @abstractmethod + def extract_ca_cert(self, proxy_target: str, dest_path: Path) -> None: + """Copy the public CA cert from the running sidecar to + `dest_path` on the host. Polls the sidecar for the cert + file to appear (mitmproxy generates the CA on first launch). + The private key never leaves the sidecar.""" diff --git a/claude_bottle/mitmproxy/addon.py b/claude_bottle/mitmproxy/addon.py new file mode 100644 index 0000000..c9c913d --- /dev/null +++ b/claude_bottle/mitmproxy/addon.py @@ -0,0 +1,169 @@ +"""mitmproxy addon: forward each decrypted request to pipelock for +scanning, then either short-circuit with pipelock's 403 (block) or +let mitmproxy proceed to the real upstream (allow). + +Loaded inside the mitmproxy sidecar container via `mitmdump -s ...`. +Must be self-contained — the sidecar image doesn't have claude_bottle +on its import path. Imports are limited to the Python stdlib plus +mitmproxy itself (which is the host). + +Pipelock's URL is read from CLAUDE_BOTTLE_PIPELOCK_URL at startup +(injected by DockerMitmproxyProxy.start). + +The verdict function `is_pipelock_block` is exported as a pure +function so unit tests can exercise it without importing mitmproxy. +""" + +from __future__ import annotations + +import logging +import os +import urllib.error +import urllib.request + + +PIPELOCK_URL_ENV = "CLAUDE_BOTTLE_PIPELOCK_URL" +PIPELOCK_TIMEOUT_SEC = 5 + +# Hop-by-hop headers per RFC 7230 §6.1; should not be forwarded +# across a proxy. Lower-cased for case-insensitive comparison. +_HOP_BY_HOP = frozenset({ + "connection", + "keep-alive", + "proxy-authenticate", + "proxy-authorization", + "te", + "trailers", + "transfer-encoding", + "upgrade", +}) + +log = logging.getLogger("pipelock-bridge") + + +def is_pipelock_block(status: int, body_bytes: bytes) -> bool: + """Return True iff pipelock's response indicates the proxy itself + blocked (DLP / allowlist), distinguishing from a relayed upstream + 4xx that pipelock happened to forward back. + + Pipelock's block bodies are plain text starting with + `blocked: ` and the status is always 403. A relayed + upstream response has whatever body the upstream sent — + extremely unlikely to begin with `blocked: `. Pinned empirically + against pipelock v2.3.0 in the impl spike (DLP block: + "blocked: request body contains secret: GitHub Token"; + allowlist block: "blocked: domain not in allowlist: example.com"). + + Long-term cleanup: file an upstream feature request for an + `X-Pipelock-Verdict: block` response header so we can match on a + structured signal instead of pattern-matching the body.""" + return status == 403 and body_bytes.startswith(b"blocked: ") + + +def _scan_via_pipelock( + pipelock_url: str, + method: str, + target_url: str, + headers: dict[str, str], + body: bytes, +) -> tuple[int, bytes]: + """Forward the decrypted request to pipelock as a plain HTTP + forward-proxy call. Returns (status, body_bytes). Raises on + transport-level errors so the caller can fail closed. + + The target URL is rewritten to http:// so pipelock receives an + absolute-URI forward-proxy request shape. Pipelock will scan, + then may attempt an upstream forward over plain HTTP — that + response is read back too, but the addon discards it on allow + (mitmproxy makes the real HTTPS request itself).""" + rewritten_url = target_url + if rewritten_url.startswith("https://"): + rewritten_url = "http://" + rewritten_url[len("https://"):] + + forwarded_headers = { + k: v for k, v in headers.items() + if k.lower() not in _HOP_BY_HOP + } + + proxy_handler = urllib.request.ProxyHandler({"http": pipelock_url}) + opener = urllib.request.build_opener(proxy_handler) + req = urllib.request.Request( + url=rewritten_url, + data=body if body else None, + headers=forwarded_headers, + method=method, + ) + try: + resp = opener.open(req, timeout=PIPELOCK_TIMEOUT_SEC) + return resp.status, resp.read() + except urllib.error.HTTPError as e: + return e.code, e.read() + + +class PipelockBridge: + """mitmproxy addon class. mitmproxy instantiates one of these via + the `addons = [...]` module attribute at the bottom of this file.""" + + def __init__(self) -> None: + # Read once per sidecar lifetime. Empty string is allowed at + # construction (so the module can be imported in test + # environments) but the request handler fails closed if it's + # missing at request time. + self._pipelock_url = os.environ.get(PIPELOCK_URL_ENV, "") + + def request(self, flow) -> None: + """mitmproxy callback. Called for each decrypted client + request before mitmproxy forwards to the real upstream. + Setting flow.response short-circuits the flow with that + response; leaving it None lets mitmproxy proceed.""" + # Late import so this module can be loaded in test + # environments without mitmproxy installed (the verdict + # function is unit-testable in isolation). + from mitmproxy import http + + if not self._pipelock_url: + log.error("%s is unset; failing closed", PIPELOCK_URL_ENV) + flow.response = http.Response.make( + 503, + b"egress scanner not configured", + {"Content-Type": "text/plain", + "X-Pipelock-Bridge": "misconfigured"}, + ) + return + + target_url = flow.request.pretty_url + method = flow.request.method + headers = {k: v for k, v in flow.request.headers.items()} + body = bytes(flow.request.content or b"") + + try: + status, response_body = _scan_via_pipelock( + self._pipelock_url, method, target_url, headers, body, + ) + except Exception as e: + # Fail closed: scanner unreachable means no verdict, so + # refuse rather than risk leaking. + log.warning("pipelock unreachable; failing closed: %s", e) + flow.response = http.Response.make( + 503, + b"egress scanner unreachable", + {"Content-Type": "text/plain", + "X-Pipelock-Bridge": "error"}, + ) + return + + if is_pipelock_block(status, response_body): + flow.response = http.Response.make( + status, + response_body, + {"Content-Type": "text/plain", + "X-Pipelock-Bridge": "block"}, + ) + return + + # Allow path: discard pipelock's response (it's the wasted + # upstream-forward attempt). Leave flow.response as None; + # mitmproxy proceeds to the real upstream on its own. + + +addons = [PipelockBridge()] diff --git a/tests/unit/test_mitmproxy_verdict.py b/tests/unit/test_mitmproxy_verdict.py new file mode 100644 index 0000000..fcba0cf --- /dev/null +++ b/tests/unit/test_mitmproxy_verdict.py @@ -0,0 +1,62 @@ +"""Unit: the addon's verdict function pinning pipelock-block vs. +relayed-upstream 4xx. + +The fingerprint shape is the contract the addon depends on; this +test should break loudly if pipelock changes its 403-body prefix +under a version bump.""" + +from __future__ import annotations + +import unittest + +from claude_bottle.mitmproxy.addon import is_pipelock_block + + +class TestIsPipelockBlock(unittest.TestCase): + def test_block_dlp_body(self): + # Pipelock v2.3.0 DLP block, captured in the impl spike. + self.assertTrue(is_pipelock_block( + 403, + b"blocked: request body contains secret: GitHub Token", + )) + + def test_block_allowlist_body(self): + # Pipelock v2.3.0 allowlist block, captured in the impl spike. + self.assertTrue(is_pipelock_block( + 403, + b"blocked: domain not in allowlist: example.com", + )) + + def test_block_header_dlp_body(self): + # Header DLP path; same body prefix per the spike. + self.assertTrue(is_pipelock_block( + 403, + b"blocked: request header Authorization contains secret", + )) + + def test_403_without_blocked_prefix_is_not_a_block(self): + # A real-upstream 403 relayed by pipelock — body is whatever + # the upstream sent, almost certainly not starting with + # `blocked: `. Must be treated as allow so the addon hands + # the flow back to mitmproxy. + self.assertFalse(is_pipelock_block( + 403, + b'{"error":"forbidden","detail":"insufficient permissions"}', + )) + + def test_non_403_with_blocked_prefix_is_not_a_block(self): + # Defensive: if some intermediate ever returns 502/504 with + # a body that happens to begin `blocked: `, we should still + # not short-circuit. Block status is always 403 by contract. + self.assertFalse(is_pipelock_block(502, b"blocked: ...")) + + def test_200_is_not_a_block(self): + # Allow path, normal forwarded response. + self.assertFalse(is_pipelock_block(200, b'{"ok":true}')) + + def test_empty_body_is_not_a_block(self): + self.assertFalse(is_pipelock_block(403, b"")) + + +if __name__ == "__main__": + unittest.main() -- 2.52.0 From 21054212d411f534ca72e5a5c3412e4c832c26c6 Mon Sep 17 00:00:00 2001 From: didericis Date: Tue, 12 May 2026 13:38:51 -0400 Subject: [PATCH 4/6] feat(mitmproxy): wire the sidecar into the bottle launch lifecycle MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Second step of PRD 0005. The mitmproxy sidecar from the previous commit now actually runs alongside pipelock when a bottle launches. - BottleBackend gains a non-abstract provision_ca with a default no-op so non-Docker backends aren't forced to implement TLS interception. provision() orchestrates ca → prompt → skills → ssh → git; CA goes first so trust is set up before anything else runs inside the agent. - DockerBottlePlan gains `mitmproxy_plan: MitmproxyProxyPlan`. The prepare step builds it alongside the existing pipelock plan; no new manifest schema or host-side scratch files. - DockerBottleBackend grows self._mitm, threads it through prepare and launch. Mirror of the existing self._proxy pattern. - launch.py brings the mitmproxy sidecar up between pipelock and the agent container, passing pipelock's service-name URL via env. ExitStack callback handles teardown in reverse order. - The agent's HTTPS_PROXY / HTTP_PROXY now point at mitmproxy (not pipelock directly). Three new -e flags inject the CA trust trio (NODE_EXTRA_CA_CERTS / SSL_CERT_FILE / REQUESTS_CA_BUNDLE) at docker run time; Docker propagates those into docker exec so the claude process sees them without per-exec threading. - New provisioner backend/docker/provision/ca.py extracts the CA cert from the running mitmproxy sidecar, copies it into the agent at /usr/local/share/ca-certificates/claude-bottle-mitm.crt, runs update-ca-certificates, and emits a stderr line with the SHA-256 fingerprint (stdlib ssl + hashlib; no subprocess). Cleanup needs no change — `docker ps --filter name=^claude-bottle-` already catches the new claude-bottle-mitm- containers. Co-Authored-By: Claude Opus 4.7 --- claude_bottle/backend/__init__.py | 33 ++++++++---- claude_bottle/backend/docker/backend.py | 14 ++++- claude_bottle/backend/docker/bottle_plan.py | 2 + claude_bottle/backend/docker/launch.py | 34 +++++++++++- claude_bottle/backend/docker/prepare.py | 4 ++ claude_bottle/backend/docker/provision/ca.py | 55 ++++++++++++++++++++ 6 files changed, 128 insertions(+), 14 deletions(-) create mode 100644 claude_bottle/backend/docker/provision/ca.py diff --git a/claude_bottle/backend/__init__.py b/claude_bottle/backend/__init__.py index 4c85366..7990cf9 100644 --- a/claude_bottle/backend/__init__.py +++ b/claude_bottle/backend/__init__.py @@ -204,24 +204,35 @@ class BottleBackend(ABC, Generic[PlanT, CleanupT]): """Build/run the bottle and yield a handle; tear down on exit.""" def provision(self, plan: PlanT, target: str) -> str | None: - """Copy host-side files (prompt, skills, SSH keys, .git) into - the running bottle. Called from `launch` after the container/ - machine is up. `target` identifies the running instance in - backend-specific terms (Docker: resolved container name; fly: - machine id). Returns the in-container prompt path if a prompt - was provisioned, else None — the Bottle handle uses it to - decide whether to add --append-system-prompt-file to claude's - argv. + """Copy host-side files (CA cert, prompt, skills, SSH keys, + .git) into the running bottle. Called from `launch` after the + container/machine is up. `target` identifies the running + instance in backend-specific terms (Docker: resolved container + name; fly: machine id). Returns the in-container prompt path + if a prompt was provisioned, else None — the Bottle handle + uses it to decide whether to add --append-system-prompt-file + to claude's argv. - Default orchestration: prompt → skills → ssh → git. Subclasses - typically don't override this; they implement the four - sub-methods below.""" + Default orchestration: ca → prompt → skills → ssh → git. + CA goes first because it changes how the agent process trusts + the network; the rest don't depend on it but the order keeps + trust setup adjacent to the launch step. Subclasses typically + don't override this; they implement the sub-methods below.""" + self.provision_ca(plan, target) prompt_path = self.provision_prompt(plan, target) self.provision_skills(plan, target) self.provision_ssh(plan, target) self.provision_git(plan, target) return prompt_path + def provision_ca(self, plan: PlanT, target: str) -> None: + """Install the egress-proxy's CA into the running bottle's + trust store. Default impl is a no-op so backends that don't + yet support TLS interception (every backend except Docker + today) aren't forced to implement it. The Docker backend + overrides to extract mitmproxy's CA and run + `update-ca-certificates` inside the agent container.""" + @abstractmethod def provision_prompt(self, plan: PlanT, target: str) -> str | None: """Copy the prompt file into the running bottle. Returns the diff --git a/claude_bottle/backend/docker/backend.py b/claude_bottle/backend/docker/backend.py index 97d1344..b8a5b36 100644 --- a/claude_bottle/backend/docker/backend.py +++ b/claude_bottle/backend/docker/backend.py @@ -23,7 +23,9 @@ from . import prepare as _prepare from .bottle import DockerBottle from .bottle_cleanup_plan import DockerBottleCleanupPlan from .bottle_plan import DockerBottlePlan +from .mitmproxy import DockerMitmproxyProxy from .pipelock import DockerPipelockProxy +from .provision import ca as _ca from .provision import git as _git from .provision import prompt as _prompt from .provision import skills as _skills @@ -38,15 +40,23 @@ class DockerBottleBackend(BottleBackend["DockerBottlePlan", "DockerBottleCleanup def __init__(self) -> None: self._proxy = DockerPipelockProxy() + self._mitm = DockerMitmproxyProxy() def _resolve_plan(self, spec: BottleSpec, *, stage_dir: Path) -> DockerBottlePlan: - return _prepare.resolve_plan(spec, stage_dir=stage_dir, proxy=self._proxy) + return _prepare.resolve_plan( + spec, stage_dir=stage_dir, proxy=self._proxy, mitm=self._mitm, + ) @contextmanager def launch(self, plan: DockerBottlePlan) -> Generator[DockerBottle, None, None]: - with _launch.launch(plan, proxy=self._proxy, provision=self.provision) as bottle: + with _launch.launch( + plan, proxy=self._proxy, mitm=self._mitm, provision=self.provision, + ) as bottle: yield bottle + def provision_ca(self, plan: DockerBottlePlan, target: str) -> None: + _ca.provision_ca(plan, target) + def provision_prompt(self, plan: DockerBottlePlan, target: str) -> str | None: return _prompt.provision_prompt(plan, target) diff --git a/claude_bottle/backend/docker/bottle_plan.py b/claude_bottle/backend/docker/bottle_plan.py index 5ad3da8..c3d76a7 100644 --- a/claude_bottle/backend/docker/bottle_plan.py +++ b/claude_bottle/backend/docker/bottle_plan.py @@ -13,6 +13,7 @@ from pathlib import Path from ...log import info from ...manifest import Agent, Bottle +from ...mitmproxy import MitmproxyProxyPlan from ...pipelock import PipelockProxyPlan, pipelock_effective_allowlist from .. import BottlePlan @@ -49,6 +50,7 @@ class DockerBottlePlan(BottlePlan): forwarded_env: dict[str, str] = field(repr=False) prompt_file: Path proxy_plan: PipelockProxyPlan + mitmproxy_plan: MitmproxyProxyPlan allowlist_summary: str use_runsc: bool diff --git a/claude_bottle/backend/docker/launch.py b/claude_bottle/backend/docker/launch.py index 45ad6dd..8664012 100644 --- a/claude_bottle/backend/docker/launch.py +++ b/claude_bottle/backend/docker/launch.py @@ -22,8 +22,15 @@ from . import network as network_mod from . import util as docker_mod from .bottle import DockerBottle from .bottle_plan import DockerBottlePlan +from .mitmproxy import DockerMitmproxyProxy, mitmproxy_proxy_url from .pipelock import DockerPipelockProxy, pipelock_proxy_url +# Path inside the agent container where the mitmproxy CA cert lives +# after provision_ca runs. Exported as a module-level constant so +# both the agent's docker-run env trio and the provisioner agree. +AGENT_CA_PATH = "/usr/local/share/ca-certificates/claude-bottle-mitm.crt" +AGENT_CA_BUNDLE = "/etc/ssl/certs/ca-certificates.crt" + # Where the repo root lives, for `docker build` context. Computed once. _REPO_DIR = str(Path(__file__).resolve().parent.parent.parent.parent) @@ -34,6 +41,7 @@ def launch( plan: DockerBottlePlan, *, proxy: DockerPipelockProxy, + mitm: DockerMitmproxyProxy, provision: Callable[[DockerBottlePlan, str], str | None], ) -> Generator[DockerBottle, None, None]: """Build, launch, and provision a Docker bottle. Teardown on exit. @@ -71,6 +79,17 @@ def launch( pipelock_name = proxy.start(proxy_plan) stack.callback(proxy.stop, pipelock_name) + # mitmproxy sits in front of pipelock on the agent's egress + # path. mitmproxy's `addon.py` reaches pipelock via the + # service-name URL we hand it here. + mitm_plan = dataclasses.replace( + plan.mitmproxy_plan, + internal_network=internal_network, + egress_network=egress_network, + ) + mitm_name = mitm.start(mitm_plan, pipelock_url=pipelock_proxy_url(plan.slug)) + stack.callback(mitm.stop, mitm_name) + container = _run_agent_container(plan, internal_network) stack.callback(docker_mod.force_remove_container, container) @@ -85,7 +104,10 @@ def _run_agent_container(plan: DockerBottlePlan, internal_network: str) -> str: """Build the `docker run` argv and execute it, handling name- conflict races by incrementing the suffix (unless the name was user-pinned). Returns the resolved container name.""" - proxy_url = pipelock_proxy_url(plan.slug) + # Agent traffic routes through mitmproxy, not pipelock directly. + # mitmproxy decrypts and hands the plaintext to pipelock via its + # addon; pipelock is unchanged from PRD 0001. + proxy_url = mitmproxy_proxy_url(plan.slug) docker_args: list[str] = [ "--rm", "-d", "--name", plan.container_name, @@ -93,6 +115,16 @@ def _run_agent_container(plan: DockerBottlePlan, internal_network: str) -> str: "-e", f"HTTPS_PROXY={proxy_url}", "-e", f"HTTP_PROXY={proxy_url}", "-e", "NO_PROXY=localhost,127.0.0.1", + # CA trust trio for the agent process. Docker propagates + # run-time env into `docker exec`, so `claude` sees these + # without per-exec threading. NODE_EXTRA_CA_CERTS points at + # the cert file (Node appends it to its bundled roots); + # SSL_CERT_FILE / REQUESTS_CA_BUNDLE point at the system + # bundle that `update-ca-certificates` rebuilds in + # provision_ca. + "-e", f"NODE_EXTRA_CA_CERTS={AGENT_CA_PATH}", + "-e", f"SSL_CERT_FILE={AGENT_CA_BUNDLE}", + "-e", f"REQUESTS_CA_BUNDLE={AGENT_CA_BUNDLE}", ] if plan.use_runsc: docker_args.extend(["--runtime", "runsc"]) diff --git a/claude_bottle/backend/docker/prepare.py b/claude_bottle/backend/docker/prepare.py index d7be637..9df125e 100644 --- a/claude_bottle/backend/docker/prepare.py +++ b/claude_bottle/backend/docker/prepare.py @@ -19,6 +19,7 @@ from ...log import die from .. import BottleSpec from . import util as docker_mod from .bottle_plan import DockerBottlePlan +from .mitmproxy import DockerMitmproxyProxy from .pipelock import DockerPipelockProxy @@ -27,6 +28,7 @@ def resolve_plan( *, stage_dir: Path, proxy: DockerPipelockProxy, + mitm: DockerMitmproxyProxy, ) -> DockerBottlePlan: """Resolve Docker-specific names and write scratch files. Trusts that the agent and its skills/SSH keys are present — validation @@ -78,6 +80,7 @@ def resolve_plan( prompt_file.chmod(0o600) proxy_plan = proxy.prepare(bottle, slug, stage_dir) + mitmproxy_plan = mitm.prepare(slug) resolved = resolve_env(manifest, spec.agent_name) # Everything that should reach the bottle by-name (so its value # never lands on argv or in env_file) goes into one dict. The @@ -105,6 +108,7 @@ def resolve_plan( forwarded_env=forwarded_env, prompt_file=prompt_file, proxy_plan=proxy_plan, + mitmproxy_plan=mitmproxy_plan, allowlist_summary=allowlist_summary, use_runsc=use_runsc, ) diff --git a/claude_bottle/backend/docker/provision/ca.py b/claude_bottle/backend/docker/provision/ca.py new file mode 100644 index 0000000..bd68319 --- /dev/null +++ b/claude_bottle/backend/docker/provision/ca.py @@ -0,0 +1,55 @@ +"""Extract mitmproxy's CA cert and install it into the agent +container's trust store. + +mitmproxy generates a fresh CA on first launch inside its sidecar. +This provisioner pulls the public cert through a host stage dir, +drops it into the agent at `/usr/local/share/ca-certificates/...`, +runs `update-ca-certificates` to rebuild the system bundle, and +emits a single stderr log line with the SHA-256 fingerprint.""" + +from __future__ import annotations + +import hashlib +import ssl +import subprocess + +from ....log import info +from ..bottle_plan import DockerBottlePlan +from ..launch import AGENT_CA_PATH +from ..mitmproxy import DockerMitmproxyProxy, mitmproxy_container_name + + +def provision_ca(plan: DockerBottlePlan, target: str) -> None: + """Pull mitmproxy's CA cert, install in the agent, log fingerprint. + Called from BottleBackend.provision after the agent container is + up. The mitmproxy sidecar is already running (started during + `launch`).""" + sidecar = mitmproxy_container_name(plan.mitmproxy_plan.slug) + stage_cert = plan.stage_dir / "mitm-ca.crt" + + DockerMitmproxyProxy().extract_ca_cert(sidecar, stage_cert) + + container = target + subprocess.run( + ["docker", "cp", str(stage_cert), f"{container}:{AGENT_CA_PATH}"], + stdout=subprocess.DEVNULL, + check=True, + ) + subprocess.run( + ["docker", "exec", "-u", "0", container, "chmod", "644", AGENT_CA_PATH], + stdout=subprocess.DEVNULL, + check=True, + ) + subprocess.run( + ["docker", "exec", "-u", "0", container, "update-ca-certificates"], + stdout=subprocess.DEVNULL, + check=True, + ) + + # SHA-256 of the cert's DER bytes — the standard fingerprint + # form. stdlib only; never the private key (which stays in the + # sidecar). Logged once at launch as an audit signal. + pem = stage_cert.read_text() + der = ssl.PEM_cert_to_DER_cert(pem) + fingerprint = hashlib.sha256(der).hexdigest() + info(f"mitm ca fingerprint: sha256:{fingerprint[:32]}...") -- 2.52.0 From c4de42ea3c4914e67f525e7378f30c28762e7043 Mon Sep 17 00:00:00 2001 From: didericis Date: Tue, 12 May 2026 13:40:31 -0400 Subject: [PATCH 5/6] feat(mitmproxy): render mitmproxy in the dry-run preflight MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Third step of PRD 0005. The preflight now surfaces the TLS- intercept layer so the operator sees it before agreeing to launch. - Text output: one new line under the egress summary — "tls intercept : mitmproxy (per-bottle ephemeral CA, generated at launch)". - JSON output (--format=json contract): new egress.mitm: { enabled: true, ca_fingerprint: null } block. Fingerprint is always null at dry-run because the CA only exists after the sidecar starts; real launches print it as a stderr log line from provision_ca. - Pin the new shape in the dry-run integration test. Co-Authored-By: Claude Opus 4.7 --- claude_bottle/backend/docker/bottle_plan.py | 9 +++++++++ tests/integration/test_dry_run_plan.py | 6 ++++++ 2 files changed, 15 insertions(+) diff --git a/claude_bottle/backend/docker/bottle_plan.py b/claude_bottle/backend/docker/bottle_plan.py index c3d76a7..293e965 100644 --- a/claude_bottle/backend/docker/bottle_plan.py +++ b/claude_bottle/backend/docker/bottle_plan.py @@ -95,6 +95,7 @@ class DockerBottlePlan(BottlePlan): else: info(" ssh hosts : (none)") info(f" egress : {self.allowlist_summary}") + info(" tls intercept : mitmproxy (per-bottle ephemeral CA, generated at launch)") info( f"prompt : {len(v.agent.prompt)} chars; " f"first line: {v.prompt_first_line or '(empty)'}" @@ -119,6 +120,14 @@ class DockerBottlePlan(BottlePlan): "egress": { "host_count": len(hosts), "hosts": hosts, + # Reserved for PRD 0005: TLS interception via mitmproxy. + # ca_fingerprint is always null at dry-run because the + # CA is generated by the sidecar at launch time. Real + # launches print the fingerprint to stderr. + "mitm": { + "enabled": True, + "ca_fingerprint": None, + }, }, "prompt": { "length": len(v.agent.prompt), diff --git a/tests/integration/test_dry_run_plan.py b/tests/integration/test_dry_run_plan.py index c0ae3eb..45c4564 100644 --- a/tests/integration/test_dry_run_plan.py +++ b/tests/integration/test_dry_run_plan.py @@ -92,6 +92,12 @@ class TestDryRunPlan(unittest.TestCase): self.assertEqual(sorted(set(hosts)), hosts, "hosts must be sorted and deduplicated") + # PRD 0005: TLS interception block is part of the JSON + # contract. Fingerprint is null at dry-run (CA doesn't + # exist yet); real launches print it to stderr. + self.assertEqual({"enabled": True, "ca_fingerprint": None}, + plan["egress"]["mitm"]) + # No Docker side effects (see the GITEA_ACTIONS skip note # above — this guard runs locally only). if check_side_effects: -- 2.52.0 From 22bc13dc3cc27da1da49f7a40d78dee7f0aced5a Mon Sep 17 00:00:00 2001 From: didericis Date: Tue, 12 May 2026 13:46:09 -0400 Subject: [PATCH 6/6] feat(mitmproxy): integration tests for the bumped HTTPS path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fourth and final step of PRD 0005. Two new end-to-end tests that exercise the full chain agent -> mitmproxy(bump) -> addon -> pipelock -> upstream and pin the two paths the addon implements. - test_mitmproxy_blocks_secret_https_post: HTTPS variant of the existing test_pipelock_blocks_secret_post. Posts a credential pattern in the body over HTTPS through the bottle. mitmproxy bumps the CONNECT (the agent trusts the per-bottle ephemeral CA installed by provision_ca), the addon forwards the decrypted request to pipelock, pipelock returns 403 with the known `blocked: ...` body shape, and the addon short-circuits the flow with status=403 + X-Pipelock-Bridge: block. The two-axis assertion (status + header) proves the addon-mediated path is what produced the block, not some other layer. - test_mitmproxy_allows_normal_https: hits raw.githubusercontent.com (a baked-in allowlist host) over HTTPS through the bottle. Verifies the addon's allow path: mitmproxy bumps, addon forwards to pipelock for the scan, pipelock allows, mitmproxy proceeds to the real upstream, response comes back through. The absence of X-Pipelock-Bridge on the response is the signal that the addon didn't short-circuit. Body length sanity-checks that the response is real upstream content, not a synthesized stub. Both probes are stdlib-only Node (http.request CONNECT + tls.connect on the tunneled socket) — pulling in undici as a dep would be the clean way to do HTTPS-through-proxy but is out of scope. The earlier integration tests still pass with mitmproxy in path: their assertions hold under the new topology, though their semantic coverage shifts (e.g. test_pipelock_allow_node now exercises mitmproxy's CONNECT-200 path rather than pipelock's host allowlist on CONNECT). Updating those tests is a follow-up. Co-Authored-By: Claude Opus 4.7 --- .../test_mitmproxy_allows_normal_https.py | 167 +++++++++++++++++ ...test_mitmproxy_blocks_secret_https_post.py | 172 ++++++++++++++++++ 2 files changed, 339 insertions(+) create mode 100644 tests/integration/test_mitmproxy_allows_normal_https.py create mode 100644 tests/integration/test_mitmproxy_blocks_secret_https_post.py diff --git a/tests/integration/test_mitmproxy_allows_normal_https.py b/tests/integration/test_mitmproxy_allows_normal_https.py new file mode 100644 index 0000000..35fc631 --- /dev/null +++ b/tests/integration/test_mitmproxy_allows_normal_https.py @@ -0,0 +1,167 @@ +"""Integration: with mitmproxy in front of pipelock, a plain HTTPS +GET to an allowlisted host with no credential pattern still gets +through end-to-end. + +The complement to test_mitmproxy_blocks_secret_https_post — together +they isolate the addon's two paths (block vs. allow). This test +also functions as the end-to-end TLS-trust check: if the agent's +trust store didn't have mitmproxy's CA installed, the TLS handshake +between the agent and mitmproxy's bumped cert would fail and the +fetch would throw before we ever saw a response. +""" + +from __future__ import annotations + +import os +import shutil +import tempfile +import unittest +from pathlib import Path + +from claude_bottle.backend import BottleSpec, get_bottle_backend +from tests._docker import skip_unless_docker +from tests.fixtures import fixture_minimal + + +# raw.githubusercontent.com is in the baked-in DEFAULT_ALLOWLIST. +# Pick a file path that's stable enough across runs — `git`'s own +# README.md on the master branch is a long-lived artifact and one +# of github's most-trafficked raw files. +_TARGET_URL = "https://raw.githubusercontent.com/git/git/master/README.md" + +# stdlib http (for CONNECT) + tls (for the bumped tunnel); see the +# block test for the rationale on not pulling undici in as a dep. +# +# Output contract: +# - "status=" HTTP status from upstream (or addon, if +# blocked) +# - "bridge=" X-Pipelock-Bridge header; empty on allow +# - "len=" response body length, sanity-check it's a +# real response and not an empty proxy stub +# - "error=<...>" thrown error +_PROBE_JS = r""" +const http = require('http'); +const tls = require('tls'); + +const proxy = new URL(process.env.HTTPS_PROXY); + +const connectReq = http.request({ + host: proxy.hostname, + port: proxy.port, + method: 'CONNECT', + path: 'raw.githubusercontent.com:443', +}); +connectReq.setTimeout(10000, () => { + console.log('timeout=connect'); + connectReq.destroy(); +}); +connectReq.on('error', (e) => { + console.log('error=' + (e.code || '') + ' ' + e.message); +}); +connectReq.on('connect', (res, socket) => { + if (res.statusCode !== 200) { + console.log('status=' + res.statusCode); + console.log('bridge=' + (res.headers['x-pipelock-bridge'] || '')); + return; + } + const tlsSocket = tls.connect({ + socket: socket, + servername: 'raw.githubusercontent.com', + }); + tlsSocket.on('secureConnect', () => { + tlsSocket.write( + 'GET /git/git/master/README.md HTTP/1.1\r\n' + + 'Host: raw.githubusercontent.com\r\n' + + 'User-Agent: claude-bottle-mitm-test\r\n' + + 'Accept: */*\r\n' + + 'Connection: close\r\n' + + '\r\n' + ); + }); + let buf = Buffer.alloc(0); + tlsSocket.on('data', (c) => { buf = Buffer.concat([buf, c]); }); + tlsSocket.on('end', () => { + const text = buf.toString('utf8'); + const headersEnd = text.indexOf('\r\n\r\n'); + const head = headersEnd >= 0 ? text.slice(0, headersEnd) : text; + const body = headersEnd >= 0 ? text.slice(headersEnd + 4) : ''; + const lines = head.split('\r\n'); + const m = lines[0].match(/HTTP\/[\d.]+ (\d+)/); + let bridge = ''; + for (let i = 1; i < lines.length; i++) { + const ix = lines[i].indexOf(': '); + if (ix < 0) continue; + if (lines[i].slice(0, ix).toLowerCase() === 'x-pipelock-bridge') { + bridge = lines[i].slice(ix + 2); + } + } + console.log('status=' + (m ? m[1] : '?')); + console.log('bridge=' + bridge); + console.log('len=' + body.length); + }); + tlsSocket.on('error', (e) => { + console.log('tls_error=' + (e.code || '') + ' ' + e.message); + }); +}); +connectReq.end(); +""" + + +@skip_unless_docker() +class TestMitmproxyAllowsNormalHttps(unittest.TestCase): + @unittest.skipIf( + os.environ.get("GITEA_ACTIONS") == "true", + "skipped under act_runner: docker socket mount topology breaks " + "in-process visibility of networks created on the host daemon", + ) + def test_https_get_to_allowed_host_succeeds(self): + backend = get_bottle_backend() + stage_dir = Path(tempfile.mkdtemp(prefix="cb-test-stage.")) + try: + spec = BottleSpec( + manifest=fixture_minimal(), + agent_name="demo", + copy_cwd=False, + user_cwd=str(stage_dir), + forward_oauth_token=False, + ) + plan = backend.prepare(spec, stage_dir=stage_dir) + with backend.launch(plan) as bottle: + script = ( + "set -e\n" + "cat > /tmp/probe.js <<'PROBE_EOF'\n" + f"{_PROBE_JS}\n" + "PROBE_EOF\n" + "node /tmp/probe.js\n" + ) + result = bottle.exec(script) + finally: + shutil.rmtree(stage_dir, ignore_errors=True) + + self.assertEqual( + 0, result.returncode, + f"exec wrapper failed: stdout={result.stdout!r} stderr={result.stderr!r}", + ) + # The TLS-trust setup is implicit here — if it had failed, + # fetch would have thrown rather than returned a status. + self.assertIn( + "status=200", result.stdout, + f"expected 200 from raw.githubusercontent.com; got: {result.stdout!r}", + ) + # X-Pipelock-Bridge is set only on the addon's short-circuit + # paths (block / misconfigured / scanner-unreachable). An + # allow flow goes straight through mitmproxy to upstream and + # the header should be absent. + self.assertIn( + "bridge=\n", result.stdout, + f"X-Pipelock-Bridge unexpectedly present on the allow " + f"path: {result.stdout!r}", + ) + # Sanity: the README is many KB. An empty body would suggest + # the response was synthesized by something in the chain + # rather than fetched from github. + self.assertNotIn("len=0\n", result.stdout) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/integration/test_mitmproxy_blocks_secret_https_post.py b/tests/integration/test_mitmproxy_blocks_secret_https_post.py new file mode 100644 index 0000000..c6393df --- /dev/null +++ b/tests/integration/test_mitmproxy_blocks_secret_https_post.py @@ -0,0 +1,172 @@ +"""Integration: with mitmproxy in front of pipelock, a credential +POST sent over HTTPS is now blocked by pipelock's body-scan layer. + +This is the HTTPS variant of test_pipelock_blocks_secret_post — the +two together prove the TLS-interception layer is doing the work the +PRD targets. The earlier plain-HTTP test only fired because the agent +was forced to bypass TLS; real Claude Code traffic to api.anthropic.com +goes over CONNECT-tunneled HTTPS and would have slipped past pipelock +prior to this PRD. + +End-to-end: drives `BottleBackend.prepare → launch` so the real +image build, network plumbing, pipelock sidecar, mitmproxy sidecar, +ephemeral CA generation, and trust-store install are all in the +loop. +""" + +from __future__ import annotations + +import os +import shutil +import tempfile +import unittest +from pathlib import Path + +from claude_bottle.backend import BottleSpec, get_bottle_backend +from claude_bottle.manifest import Manifest +from tests._docker import skip_unless_docker + + +# Synthetic value shaped like a GitHub Personal Access Token; not a +# real credential. Pipelock's default DLP rules pattern-match this +# format and mitmproxy's addon short-circuits with the 403 it +# receives back. +_FAKE_TOKEN = "ghp_aB3cD4eF5gH6iJ7kL8mN9oP0qR1sT2uV3wX4yZ" + + +# Build the request by hand using stdlib `http` (for CONNECT) and +# `tls` (for the bumped tunnel). Node 22's `fetch` doesn't expose +# proxy configuration without undici as an installable dep, and +# this project keeps the bottle image dep-light. NODE_EXTRA_CA_CERTS +# is wired by launch.py so the agent trusts mitmproxy's bumped cert. +# +# Output contract (parsed by the test): +# - "status=" HTTP status of the decrypted response +# - "bridge=" X-Pipelock-Bridge header from the addon's +# short-circuit, empty on the allow path +# - "error=<...>" thrown error +_PROBE_JS = r""" +const http = require('http'); +const tls = require('tls'); + +const proxy = new URL(process.env.HTTPS_PROXY); +const body = 'token=' + process.env.FAKE_TOKEN; + +const connectReq = http.request({ + host: proxy.hostname, + port: proxy.port, + method: 'CONNECT', + path: 'api.anthropic.com:443', +}); +connectReq.setTimeout(8000, () => { + console.log('timeout=connect'); + connectReq.destroy(); +}); +connectReq.on('error', (e) => { + console.log('error=' + (e.code || '') + ' ' + e.message); +}); +connectReq.on('connect', (res, socket) => { + if (res.statusCode !== 200) { + console.log('status=' + res.statusCode); + console.log('bridge=' + (res.headers['x-pipelock-bridge'] || '')); + return; + } + const tlsSocket = tls.connect({ + socket: socket, + servername: 'api.anthropic.com', + }); + tlsSocket.on('secureConnect', () => { + tlsSocket.write( + 'POST /dlp-probe HTTP/1.1\r\n' + + 'Host: api.anthropic.com\r\n' + + 'Content-Type: application/x-www-form-urlencoded\r\n' + + 'Content-Length: ' + Buffer.byteLength(body) + '\r\n' + + 'Connection: close\r\n' + + '\r\n' + body + ); + }); + let buf = ''; + tlsSocket.on('data', (c) => { buf += c.toString('utf8'); }); + tlsSocket.on('end', () => { + const lines = buf.split('\r\n'); + const m = lines[0].match(/HTTP\/[\d.]+ (\d+)/); + let bridge = ''; + for (let i = 1; i < lines.length; i++) { + if (lines[i] === '') break; + const ix = lines[i].indexOf(': '); + if (ix < 0) continue; + if (lines[i].slice(0, ix).toLowerCase() === 'x-pipelock-bridge') { + bridge = lines[i].slice(ix + 2); + } + } + console.log('status=' + (m ? m[1] : '?')); + console.log('bridge=' + bridge); + }); + tlsSocket.on('error', (e) => { + console.log('tls_error=' + (e.code || '') + ' ' + e.message); + }); +}); +connectReq.end(); +""" + + +@skip_unless_docker() +class TestMitmproxyBlocksSecretHttpsPost(unittest.TestCase): + @unittest.skipIf( + os.environ.get("GITEA_ACTIONS") == "true", + "skipped under act_runner: docker socket mount topology breaks " + "in-process visibility of networks created on the host daemon", + ) + def test_https_post_with_credential_body_is_blocked(self): + manifest = Manifest.from_json_obj({ + "bottles": { + "dev": {"env": {"FAKE_TOKEN": _FAKE_TOKEN}}, + }, + "agents": { + "demo": {"skills": [], "prompt": "", "bottle": "dev"}, + }, + }) + backend = get_bottle_backend() + stage_dir = Path(tempfile.mkdtemp(prefix="cb-test-stage.")) + try: + spec = BottleSpec( + manifest=manifest, + agent_name="demo", + copy_cwd=False, + user_cwd=str(stage_dir), + forward_oauth_token=False, + ) + plan = backend.prepare(spec, stage_dir=stage_dir) + with backend.launch(plan) as bottle: + script = ( + "set -e\n" + "cat > /tmp/probe.js <<'PROBE_EOF'\n" + f"{_PROBE_JS}\n" + "PROBE_EOF\n" + "node /tmp/probe.js\n" + ) + result = bottle.exec(script) + finally: + shutil.rmtree(stage_dir, ignore_errors=True) + + self.assertEqual( + 0, result.returncode, + f"exec wrapper failed: stdout={result.stdout!r} stderr={result.stderr!r}", + ) + # The addon short-circuits the flow with X-Pipelock-Bridge: block + # on a pipelock block — the cleanest signal that the chain + # mitmproxy(bump) -> addon(forward) -> pipelock(scan) -> block + # all happened, end to end. + self.assertIn( + "status=403", result.stdout, + f"expected 403 from pipelock block; got: {result.stdout!r}", + ) + self.assertIn( + "bridge=block", result.stdout, + f"X-Pipelock-Bridge header missing; the addon may not be " + f"in path: {result.stdout!r}", + ) + + +if __name__ == "__main__": + unittest.main() -- 2.52.0