From 6716f091c18cf9b4e63c3a4721c9c482f71e970b Mon Sep 17 00:00:00 2001 From: didericis Date: Tue, 12 May 2026 14:15:44 -0400 Subject: [PATCH 1/6] docs(prd): add 0006, enable pipelock's native TLS interception MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Supersedes the abandoned PR #8 (`mitmproxy-tls-interception`), which built a mitmproxy + addon chain on the (falsified) premise that pipelock could not MITM. Empirical proof from the impl-time spike: with `tls_interception: { enabled: true, ca_cert, ca_key }` in pipelock's config, pipelock answered a credential POST over HTTPS with `STATUS=403 / body: blocked: request body contains secret: GitHub Token` and emitted both `scanner:"tls_intercept"` and `scanner:"body_dlp"` events. Standalone, no second proxy. Net change vs PR #8: one sidecar instead of two, no vendored addon, no addon-verdict pattern matching, no HTTPS-trust / DNS / lookup workarounds. Same end-state behavior — pipelock's DLP fires on plaintext for HTTPS hosts in the allowlist. Also cleaning up the now-stale TLS-research notes: - `docs/research/tls-mitm-for-pipelock.md` is removed. Its entire premise (mitmproxy in front of pipelock) is moot now that pipelock does the work natively. The mechanics of CONNECT bumping and the CA-lifecycle considerations it documented are the same as what pipelock implements; the PRD restates the parts that matter for the integration. - `docs/research/pipelock-assessment.md` had two stale claims corrected: the "Pipelock does not perform TLS inspection (no CA trust injection)" line in §Scope gaps and the "no TLS termination" cell in the comparison table. Both now point at the `tls_interception` config and `pipelock tls` CLI instead. Co-Authored-By: Claude Opus 4.7 --- docs/prds/0006-pipelock-tls-interception.md | 303 ++++++++++++ docs/research/pipelock-assessment.md | 14 +- docs/research/tls-mitm-for-pipelock.md | 508 -------------------- 3 files changed, 312 insertions(+), 513 deletions(-) create mode 100644 docs/prds/0006-pipelock-tls-interception.md delete mode 100644 docs/research/tls-mitm-for-pipelock.md diff --git a/docs/prds/0006-pipelock-tls-interception.md b/docs/prds/0006-pipelock-tls-interception.md new file mode 100644 index 0000000..20bbe7b --- /dev/null +++ b/docs/prds/0006-pipelock-tls-interception.md @@ -0,0 +1,303 @@ +# PRD 0006: pipelock native TLS interception + +- **Status:** Draft +- **Author:** didericis +- **Created:** 2026-05-12 + +## Summary + +Turn on pipelock's built-in `tls_interception` so its DLP / URL / +header / MCP scanners fire on the plaintext of HTTPS requests +instead of only the outer `CONNECT` hostname. Pipelock generates a +per-bottle ephemeral CA at launch (`pipelock tls init`); the +public cert is installed into the agent container's trust store +and the private key dies with the sidecar on teardown. The +existing per-agent sidecar topology from PRD 0001 is otherwise +unchanged — one container, no addon, no second proxy. + +This supersedes the closed PR #8 / branch `mitmproxy-tls-interception`, +which built a mitmproxy + addon chain on the (falsified) premise +that pipelock could not MITM. Empirical proof from the impl-time +spike: with `tls_interception: { enabled: true, ca_cert, ca_key }` +in the pipelock config, pipelock answered a credential POST over +HTTPS with `STATUS=403 / body: blocked: request body contains +secret: GitHub Token` and emitted both +`scanner:"tls_intercept"` and `scanner:"body_dlp"` events. + +## Problem + +PRD 0001 wired pipelock onto every bottle's egress, but pipelock +ran with its default `tls_interception.enabled: false`. The agent +container's only egress route is pipelock, but pipelock only saw +`CONNECT` hostnames and the encrypted bytes inside the tunnel. +Pipelock's headline scanners — request body DLP (48 credential +patterns), header DLP, URL DLP, subdomain entropy, MCP scanning, +response-body scanning — all need plaintext to fire. Against the +HTTPS-only hosts in `DEFAULT_ALLOWLIST` (`api.anthropic.com`, +`raw.githubusercontent.com`, etc.) they are effectively disabled. + +The existing `tests/integration/test_pipelock_blocks_secret_post` +test only fires because it forces the agent to send plain HTTP +through pipelock's forward-proxy mode. Real Claude Code traffic +uses HTTPS via CONNECT and slips past the scanner. + +## Goals / Success Criteria + +The feature works when all of the following are observable: + +- A Node / curl request from inside a launched bottle to a + CONNECT-bumped HTTPS host (e.g. `https://api.anthropic.com/dlp-probe`) + carrying a pipelock-recognized credential pattern in the body + returns 403 from pipelock with the documented + `blocked: request body contains secret: …` body. Pipelock's + `body_dlp` event fires on the decrypted request. +- A clean HTTPS GET from inside the bottle to an allowlisted host + (e.g. `https://raw.githubusercontent.com/...`) returns the real + upstream response — TLS interception doesn't break legitimate + traffic. +- The agent's TLS library trusts pipelock's bumped leaf certs + (per the bottle's installed CA); no TLS-trust errors. +- Claude Code reaches `api.anthropic.com` end-to-end through the + bottle and completes a chat round-trip. + +The feature is **done** when all of the following ship: + +- `pipelock_build_config` / `pipelock_render_yaml` emit a + `tls_interception` block with `enabled: true` and the per-bottle + CA cert/key paths. The defaults + (`cert_ttl: 24h`, `cert_cache_size: 10000`, + `passthrough_domains: []`) are kept; only `enabled` and the + cert paths are populated. +- The prepare step generates a per-bottle CA via `pipelock tls init` + in a one-shot container, writes `ca.pem` and `ca-key.pem` to + `stage_dir`. Paths land on the `DockerBottlePlan`. +- `DockerPipelockProxy.start` mounts the stage dir into the + sidecar (read-only) so the running pipelock can read its CA. +- `BottleBackend.provision_ca` (new) copies the CA public cert + into the agent at + `/usr/local/share/ca-certificates/claude-bottle-mitm.crt`, runs + `update-ca-certificates`, and sets the `NODE_EXTRA_CA_CERTS` / + `SSL_CERT_FILE` / `REQUESTS_CA_BUNDLE` env trio on the agent + container's runtime env. Default no-op on the abstract base so + other backends aren't forced to implement. +- The launch step prints a one-line stderr log with the SHA-256 + fingerprint of the public CA cert (computed via stdlib + `ssl.PEM_cert_to_DER_cert` + `hashlib.sha256`). +- On bottle teardown the sidecar is removed and the CA private + key is gone with it. +- Two new integration tests under `tests/integration/`: + - HTTPS variant of the credential-post block test (proves the + `tls_intercept` + `body_dlp` chain fires end-to-end). + - Clean HTTPS GET test (proves the allow path doesn't break TLS + trust and returns real upstream content). +- The dry-run preflight (`start --dry-run`) renders the new TLS + layer. Text: one line under the egress summary. JSON: a + reserved `egress.tls_interception: { enabled: true, + ca_fingerprint: null }` block — fingerprint is null at dry-run + because the CA only exists after launch. + +## Non-goals + +- A second proxy in the chain. Pipelock does the bumping + natively; the mitmproxy approach was based on a wrong premise + (closed PR #8). +- Per-bottle override to disable interception. v1 always enables + `tls_interception`. The pipelock-side `passthrough_domains` + list is the right knob if a future allowlisted host turns out + to pin certs — exposing it through the manifest is a follow-up. +- A long-lived / shared CA across bottles. Each bottle gets a + fresh CA generated by `pipelock tls init` and destroyed with the + sidecar. +- Tuning `cert_ttl`, `cert_cache_size`, `max_response_bytes`, + `cross_request_detection`, or other pipelock advanced features. + Defaults from `pipelock generate config --preset strict` are + fine for v1. +- Trust-store paths for non-Debian agent images. + `node:22-slim` is Debian; `update-ca-certificates` is the right + command. A Red-Hat-family base would need `update-ca-trust`. +- HTTP/3 / QUIC. Pipelock's interception is HTTP/HTTPS-over-TLS; + UDP/443 still needs an iptables layer (separate PRD). + +## Scope + +### In scope + +- **`claude_bottle/pipelock.py`** changes: + - Extend `pipelock_build_config` to include + `tls_interception: { enabled: true, ca_cert: , ca_key: + }`. Paths are populated from the plan; the function's + signature grows a `cert_path` / `key_path` pair or reads them + off `Bottle` once they're stored. + - Extend `pipelock_render_yaml` to emit the new block. +- **`claude_bottle/backend/docker/pipelock.py`** changes: + - New helper `pipelock_tls_init(stage_dir)` runs the upstream + image as a one-shot: + `docker run --rm -v :/h -e PIPELOCK_HOME=/h pipelock tls init`, + leaving `ca.pem` and `ca-key.pem` under `stage_dir`. The host + file owner is whatever the upstream image's user is; the + sidecar mount is read-only so this is fine. + - `DockerPipelockProxy.start` mounts the stage dir into the + sidecar at `/h:ro` and references the CA paths in the rendered + YAML. +- **`claude_bottle/backend/__init__.py`**: new abstract method + `provision_ca(plan, target)` on `BottleBackend`, default no-op. + `BottleBackend.provision` orchestrates `ca → prompt → skills → + ssh → git`. +- **`claude_bottle/backend/docker/provision/ca.py`** (new): + - Reads the cert from `stage_dir` (already written by prepare). + - `docker cp` into the agent. + - `docker exec -u 0 ... chmod 644 ...` + `update-ca-certificates`. + - Computes the SHA-256 fingerprint with stdlib (`ssl` + + `hashlib`), emits one stderr log line. +- **`claude_bottle/backend/docker/launch.py`**: + - Three new `-e` flags on the agent's `docker run`: + `NODE_EXTRA_CA_CERTS=/usr/local/share/ca-certificates/claude-bottle-mitm.crt`, + `SSL_CERT_FILE=/etc/ssl/certs/ca-certificates.crt`, + `REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt`. + - `HTTPS_PROXY` / `HTTP_PROXY` continue to point at pipelock + (unchanged from PRD 0001 — the mitmproxy detour in PR #8 is + abandoned). +- **`claude_bottle/backend/docker/bottle_plan.py`**: + - One new `info(...)` line in `print()` noting TLS interception + is on. + - `to_dict()` gains an `egress.tls_interception: { enabled: + true, ca_fingerprint: null }` block. Reserved for future + population. +- **`claude_bottle/backend/docker/prepare.py`**: call + `pipelock_tls_init(stage_dir)` and write the resolved cert/key + paths onto the plan (either on the existing `proxy_plan` field + or on the parent `DockerBottlePlan`). +- **Tests:** + - `tests/integration/test_pipelock_blocks_secret_https_post.py` + (new) — HTTPS variant of the existing block test. + - `tests/integration/test_pipelock_allows_normal_https.py` + (new) — clean HTTPS GET succeeds. + - `tests/unit/test_pipelock_yaml.py` updated to assert the new + `tls_interception` block in the rendered config. + - `tests/integration/test_dry_run_plan.py` updated to assert + the new `egress.tls_interception` JSON block. + +### Out of scope + +- Modifying pipelock itself. We're using existing config knobs. +- A manifest field to disable / customize interception per bottle. + Doable but premature. +- Wiring `passthrough_domains`. The default `[]` is correct for + v1; add the manifest field when a pinning host shows up. +- `cross_request_detection`, `entropy_budget`, + `fragment_reassembly`, `reverse_proxy`, `scan_api` — features + pipelock exposes but we don't need for the body-DLP gap. + +## Proposed Design + +### Topology + +``` +agent --HTTPS_PROXY--> pipelock --[bumps TLS]--> internet + (sees plaintext: URL, headers, body) +``` + +Same single-sidecar shape as PRD 0001. The only addition is +`tls_interception` in pipelock's config plus the per-bottle CA +generated at prepare time. + +### CA lifecycle + +- **Generation.** Host-side, at prepare time, via a one-shot + `docker run --rm -v :/h pipelock tls init`. Output is + `/ca.pem` + `/ca-key.pem`, both mode 600. +- **Sidecar mount.** `DockerPipelockProxy.start` adds + `-v :/h:ro` to the sidecar's `docker run`. The rendered + YAML references `/h/ca.pem` and `/h/ca-key.pem`. The private + key is read-only from pipelock's perspective; the host stage + dir is owned by the launching user. +- **Bottle install.** `provision_ca` (Docker impl) does + `docker cp /ca.pem agent:/usr/local/share/ca-certificates/claude-bottle-mitm.crt`, + then `update-ca-certificates`. The CA env trio is set at + `docker run -e` time (Docker propagates run-time env into + `docker exec`, verified in PR #8's spike). +- **Teardown.** The sidecar container is destroyed, the stage + dir is removed by `start.py`'s existing `finally` block, and + the CA dies with both. +- **Fingerprint.** Computed via stdlib in `provision_ca` and + logged once to stderr (`claude-bottle: mitm ca fingerprint: + sha256:…`). The private key never appears in any log. + +### Data model changes + +None to the manifest schema. The dry-run JSON contract grows a +reserved `egress.tls_interception` block; the fingerprint is +always null at dry-run because the CA doesn't exist yet. + +### Existing code touched + +Surgical, all on the existing pipelock path: + +- `claude_bottle/pipelock.py` — config builder + YAML renderer. +- `claude_bottle/backend/__init__.py` — abstract `provision_ca`. +- `claude_bottle/backend/docker/pipelock.py` — `tls init` helper, + sidecar volume mount. +- `claude_bottle/backend/docker/prepare.py` — CA paths on plan. +- `claude_bottle/backend/docker/launch.py` — CA env trio on agent. +- `claude_bottle/backend/docker/backend.py` — `provision_ca` + dispatch + thread `self._proxy` through prepare/launch unchanged + shape. +- `claude_bottle/backend/docker/bottle_plan.py` — preflight + rendering. +- `claude_bottle/backend/docker/provision/ca.py` (new). + +Net diff is meaningfully smaller than PR #8 because pipelock +already does the work — no addon, no second sidecar, no second +backend module. + +### External dependencies + +- **Pipelock image** — unchanged pin from PRD 0001 + (`ghcr.io/luckypipewrench/pipelock@sha256:3b1a3941…`, + matching pipelock v2.3.0). No new image dependency. +- **No host-side crypto deps.** CA generation uses the pipelock + image's own `tls init` command in a one-shot container. + Fingerprint uses Python stdlib `ssl` + `hashlib`. + +## Open questions + +- **Mount semantics for the stage dir.** The sidecar runs with a + `-v :/h:ro` bind mount. The CA files were written by + the one-shot `pipelock tls init` container with whatever UID + pipelock's image uses; the sidecar reads them as that same UID. + Should work, but confirm on first impl by inspecting the file + modes/owners and that the sidecar actually loads them. Fallback: + `docker cp` the cert/key into the running sidecar after `docker + create` (mirror PR #8's mitmproxy lifecycle). +- **Cert validity / TTL.** Defaults are `cert_ttl: 24h` for + per-host leaves; the CA validity from `pipelock tls init` is + 10 years by default (`--validity 87600h`). The CA outlives the + bottle either way; per-bottle ephemerality is enforced by + *generating a fresh one each launch*, not by setting a short + CA validity. Document; no tuning in v1. +- **`passthrough_domains` shape.** Once we expose this through + the manifest in a follow-up, the natural place is + `bottle.egress.tls_passthrough_domains: [host, ...]`, mirroring + the existing `egress.allowlist` shape. +- **Stage-dir cleanup ordering.** The stage dir holds the CA + private key briefly. `start.py`'s existing `finally` block + `shutil.rmtree`s it. Confirm the rmtree fires after the sidecar + is stopped, so the sidecar isn't reading a deleted mount when + it shuts down. The current order is correct (teardown unwinds + via ExitStack before the outer `finally` runs); verify. + +## References + +- `docs/research/pipelock-assessment.md` (now corrected) — + pipelock capability assessment including the + `tls_interception` block. +- `docs/prds/0001-per-agent-egress-proxy-via-pipelock.md` — + egress-proxy baseline this PRD extends. +- `docs/prds/0003-bottle-backend-abstraction.md` — backend ABC + contract this PRD adds a `provision_ca` method to. +- `docs/prds/0004-split-out-provisioners.md` — per-provisioner + module pattern reused for the new CA provisioner. +- Pipelock `tls` CLI (in-image help): + `pipelock tls init / install-ca / show-ca`. +- Closed PR #8 — earlier mitmproxy-based design built on the + falsified "pipelock can't MITM" premise; archived for context. diff --git a/docs/research/pipelock-assessment.md b/docs/research/pipelock-assessment.md index e547afc..5c6c1d2 100644 --- a/docs/research/pipelock-assessment.md +++ b/docs/research/pipelock-assessment.md @@ -222,10 +222,14 @@ The following threat-model items from `network-egress-guard.md` are intercept raw UDP 53 packets. - **Domain fronting**: an agent can send `CONNECT allowed-host.com:443` through the proxy but embed a different SNI inside the TLS session. - Pipelock does not perform TLS inspection (no CA trust injection) and - cannot verify SNI vs. CONNECT header. The same limitation is shared - with smokescreen and is documented in `network-egress-guard.md` as a - known gap for the non-TLS-terminating proxy approach. + Pipelock supports TLS interception via its `tls_interception` config + block (`enabled`, `ca_cert`, `ca_key`, `cert_ttl`, `cert_cache_size`, + `passthrough_domains`, `max_response_bytes`) plus the `pipelock tls + init` / `install-ca` / `show-ca` CLI; with interception on, the + body and inner Host header become visible to its scanner pipeline, + closing the domain-fronting gap. With interception off (default in + the generated config), pipelock relays the CONNECT as an opaque + tunnel and only sees the outer hostname. - **SSH egress content**: SSH sessions to permitted hosts are opaque. Same limitation noted in both prior research notes. - **Agent killing the proxy process**: if pipelock runs inside the same @@ -385,7 +389,7 @@ pipelock's differentiators. | Blocks RFC 1918 by default | only if explicitly added to rules | yes | yes, + DNS rebinding | no | | Content-based DLP (credential patterns) | no | no | yes, 48 patterns + encoding normalization | no | | MCP / WebSocket scanning | no | no | yes, bidirectional | no | -| Domain fronting bypass | possible | possible | possible (no TLS termination) | n/a | +| Domain fronting bypass | possible | possible | mitigated when `tls_interception` is enabled (CA trust required in client) | n/a | | macOS Docker Desktop (sidecar mode) | yes | yes | yes | yes | | macOS Docker Desktop (in-container sandbox) | yes | n/a | degraded (--best-effort) | yes | | NET_ADMIN / NET_RAW required | yes | no | no (sidecar) | no | diff --git a/docs/research/tls-mitm-for-pipelock.md b/docs/research/tls-mitm-for-pipelock.md deleted file mode 100644 index aa8c4d0..0000000 --- a/docs/research/tls-mitm-for-pipelock.md +++ /dev/null @@ -1,508 +0,0 @@ -# TLS interception for pipelock content scanning - -Research into adding TLS termination ("MITM") to the egress path so that -pipelock's scanning pipeline can see plaintext HTTP request and response -bodies, instead of only the `CONNECT` host and opaque ciphertext. - -## Summary - -- Pipelock today sees `CONNECT` hostnames and the encrypted bytes that follow. - Its DLP, subdomain-entropy, and MCP scanners cannot fire on TLS-encrypted - bodies, which is the gap explicitly named under "Scope gaps" in - `pipelock-assessment.md` ("Pipelock does not perform TLS inspection (no CA - trust injection)"). -- Closing that gap requires a TLS-terminating proxy that bumps `CONNECT`, - presents a leaf certificate for the target hostname signed by a CA the - bottle's trust store accepts, decrypts the inner HTTP, and re-establishes - TLS to the real upstream. -- The mature open-source option is **mitmproxy**. Squid + `ssl_bump` is the - heavier production-grade alternative. The Go ecosystem (`goproxy`, - `gomitmproxy`, `martian`) is suitable only if we want a custom binary - tightly coupled to pipelock. -- Recommended v1 topology: **mitmproxy in front of pipelock** on the same - egress route. mitmproxy terminates client TLS, forwards plaintext to - pipelock as its upstream HTTP proxy, and re-encrypts to the real upstream. - Pipelock stays unchanged. -- Per-bottle ephemeral CA, generated at bottle start and destroyed on - teardown. The CA private key lives only on the sidecar; the bottle's - trust store only ever sees the public cert. -- Cert pinning is a known caveat but a small one given the narrow allowlist - in this project. Selective bumping is the mitigation if a future - allowlisted host turns out to pin. - ---- - -## What pipelock cannot see today - -The current egress topology (per `pipelock-assessment.md`): - -``` -agent --HTTPS_PROXY--> pipelock --CONNECT host:443--> internet - \____________________________ - opaque TLS bytes -``` - -The agent's client (Claude Code, `curl`, an MCP server, a Python SDK) -sends `CONNECT api.anthropic.com:443`. Pipelock checks the hostname -against its `api_allowlist`, replies `200 Connection Established`, and -then blindly relays bytes between the two TCP halves. The TLS handshake -and everything inside it happens end-to-end between the agent and the -real upstream. - -What pipelock can scan in this mode: - -- `CONNECT` target hostname (SNI is not even needed). -- TLS record framing and lengths (useful for budgets, useless for DLP). -- Plain HTTP/1.1 to non-HTTPS destinations (irrelevant — there are none - in `DEFAULT_ALLOWLIST`). - -What pipelock cannot scan in this mode: - -- Request URL, method, headers, body. -- Response status, headers, body. -- MCP JSON-RPC payloads inside the TLS session. -- WebSocket frames inside a TLS-wrapped upgrade. -- Whether the inner SNI or HTTP `Host` / `:authority` matches the - outer `CONNECT` target (domain-fronting check). - -The 48-pattern DLP layer, the subdomain-entropy check (insofar as it -inspects URLs rather than DNS-resolver queries), the request-redaction -feature added in v2.3.0, and bidirectional MCP scanning all require -plaintext to operate on. Without TLS termination, those layers are -inert against any HTTPS destination — which is every destination in -the current allowlist. - ---- - -## How TLS interception works - -The mechanics of `CONNECT` bumping, end to end: - -1. **Agent issues `CONNECT`.** The HTTP client sees `HTTPS_PROXY` set, - so it opens a TCP connection to the proxy and sends - `CONNECT api.anthropic.com:443 HTTP/1.1`. -2. **Proxy answers `200`.** Standard tunnel-established response. -3. **Proxy starts TLS as the server.** Instead of relaying bytes, the - proxy itself performs a TLS handshake with the agent. It needs a - server certificate for `api.anthropic.com` — so on first contact for - that hostname, the proxy generates a leaf certificate with - `CN=api.anthropic.com` and a SAN for the same, signs it with its - own CA private key, and presents that cert. Subsequent connections - to the same hostname reuse the cached leaf. -4. **Agent verifies the cert.** The agent's TLS library walks the chain - to a trusted root. Because the bottle's trust store contains the - proxy's CA cert, validation succeeds. The agent has no way to tell - it isn't talking to the real `api.anthropic.com`. -5. **Proxy opens its own TLS to the real upstream.** As a client this - time, using the system root store, talking to the real - `api.anthropic.com`. Real SNI, real cert chain validated normally. -6. **Proxy bridges the two TLS sessions.** Decrypts on the server side, - re-encrypts on the client side, and scans the plaintext in between. - -This is what every TLS-terminating egress proxy does. The trade-offs -live in three places: - -- **CA trust injection.** Step 4 only works if the bottle's trust - store contains the proxy's CA. Mechanics covered under "CA lifecycle" - below. -- **Cert generation cost.** Generating an RSA-2048 leaf cert takes - ~50 ms; ECDSA P-256 is ~5 ms. Cache leaves per (hostname, SAN list) - to keep this off the steady-state hot path. -- **Protocol coverage.** The proxy needs to speak HTTP/1.1, HTTP/2 (ALPN - `h2`), and ideally WebSocket. HTTP/3 / QUIC is UDP and requires a - separate code path; for v1, blocking UDP/443 at the iptables layer - forces clients to fall back to HTTP/2, which we can inspect. - ---- - -## Tools - -### mitmproxy - -- **What it is.** Python (with Rust crypto bits) interactive HTTPS proxy. - Reference open-source implementation of the bump pattern. Ships as - `mitmproxy` (TUI), `mitmweb` (browser UI), and `mitmdump` (headless). -- **Cert handling.** Generates a CA on first run under `~/.mitmproxy/`. - Per-host leaves are generated on demand and cached in memory. Cert - cache keyed by (hostname, SAN extensions inferred from upstream cert). -- **Protocols.** HTTP/1.1, HTTP/2, WebSocket fully supported. HTTP/3 - exists as experimental. Raw TCP / non-HTTP TLS supported via - `--mode reverse:` but not in CONNECT-bump mode. -- **Extensibility.** Python addon API. An addon module can inspect or - modify any `request` / `response` / `tcp_message` flow. The pipelock - integration in Topology D below uses this. -- **Selective bumping.** `ignore_hosts` regex; matching CONNECTs are - tunneled blindly instead of bumped. Critical for the cert-pinning - mitigation. -- **Docker image.** `mitmproxy/mitmproxy` on Docker Hub. Single binary - for the CLI, ~80 MB image. Configurable via flags or `~/.mitmproxy/config.yaml`. -- **Project URL.** , . - -Most mature, best-documented, lowest-effort integration. Default choice -for v1. - -### Squid + ssl_bump - -- **What it is.** Squid is a long-running C++ caching proxy. - `ssl_bump` is its TLS-interception feature, controlled by per-CONNECT - actions: `splice` (tunnel blindly), `bump` (decrypt and re-encrypt), - `peek` (look at TLS hello then decide), `stare` (look at server cert - then decide), `terminate` (abort the connection). -- **Cert handling.** Configured via `sslcrtd_program` — a helper that - generates and caches per-host certs. CA cert and key referenced by - PEM paths in `squid.conf`. -- **Protocols.** HTTP/1.1 fully; HTTP/2 to clients via recent versions; - no scripted addons. -- **Extensibility.** ICAP (Internet Content Adaptation Protocol) for - external scanners — Squid POSTs each request/response to an ICAP - service that can modify or reject. This is the formal version of - Topology D below. -- **Production track record.** Used at corporate-proxy scale (large - enterprises, ISPs). Heavyweight for a single-bottle sidecar. -- **Project URL.** . - -Right tool if pipelock grows an ICAP server endpoint. Otherwise, more -config surface than this project needs. - -### Go libraries: goproxy, gomitmproxy, martian - -- **`goproxy`** (elazarl) — long-lived Go library, basic CONNECT-bumping - proxy with a handler API. Sparse on HTTP/2. - -- **`gomitmproxy`** (AdGuard) — newer, cleaner API; built for AdGuard - Home / DNS-filtering products. HTTP/2 support is partial. - -- **`martian`** (Google) — request/response modifier framework with a - JSON-configurable rule engine. Used internally at Google; public - ecosystem thin. - - -These are relevant only if we decide to write a custom TLS-terminating -binary that links pipelock's scanning packages directly — Topology C -below. They are not faster than mitmproxy for the v1 sidecar shape; -they are smaller and more direct, at the cost of writing more Go. - -### Disqualified - -- **Caddy, Envoy, HAProxy.** All can terminate TLS at a reverse-proxy - vhost. None ship a "bump on CONNECT and forward plaintext to a - downstream proxy" mode out of the box. Adapting any of them to this - shape is more work than starting from mitmproxy. -- **Cloudflare Gateway, Zscaler, NetSkope, Forcepoint.** Managed cloud - egress with TLS inspection. Wrong topology — they live outside the - host, not as a per-bottle sidecar, and they require trusting a vendor - with full plaintext. -- **Charles Proxy, Burp Suite.** Closed-source GUI tools for developer - capture and security testing. Not appropriate as headless sidecars. -- **`mitmdump` standalone vs. embedding mitmproxy as a library.** Both - are mitmproxy. Calling out only to note: the project ships both a CLI - and a Python API; addons can be loaded either way. - ---- - -## Topologies - -Five candidate topologies, ordered roughly from least to most coupled -between the two components. - -### A — mitmproxy in front of pipelock (recommended) - -``` -agent --HTTPS_PROXY--> mitmproxy --HTTP_PROXY--> pipelock --> internet - (bump TLS) (scan plain) (real TLS) -``` - -mitmproxy terminates the agent's TLS connection, decrypts, and then -forwards the inner HTTP request to pipelock by treating pipelock as -its own upstream HTTP forward proxy. Pipelock receives plaintext HTTP -exactly as if the agent had used HTTP, applies its full scanning -pipeline, and forwards to mitmproxy's upstream client half — which -re-establishes TLS to the real destination. - -Concretely the agent's `HTTPS_PROXY` points at mitmproxy; mitmproxy's -`upstream_proxy` config points at pipelock; pipelock's network reach -includes the real internet. - -- **Wins.** Pipelock unchanged. mitmproxy unchanged from default - configuration. Each component has one job. Failure modes are clear - per layer. -- **Costs.** Two sidecars per bottle instead of one. One extra - decrypt / re-encrypt hop, ~5–15 ms per request in steady state. -- **Open question.** How exactly mitmproxy forwards to pipelock matters - for whether pipelock sees TLS again or only HTTP. mitmproxy's - `upstream` mode wraps the decrypted request in another CONNECT if the - destination is HTTPS — which would re-encrypt before pipelock sees - it, defeating the point. The correct mode is `upstream` with TLS - re-origination disabled, or `regular` mode with a chained proxy. The - v2 release of mitmproxy reworked this; needs verification against the - current docs at integration time. - -### B — pipelock in front of mitmproxy (ruled out) - -``` -agent --HTTPS_PROXY--> pipelock --CONNECT?--> mitmproxy --> internet - (sees CONNECT only) (bump TLS) -``` - -Pipelock would receive a `CONNECT` and decide to allow or deny based -on hostname, then tunnel to mitmproxy. mitmproxy would terminate TLS -and see plaintext — but pipelock would never see the plaintext, which -is the whole point of the exercise. The scanning still happens (in -mitmproxy), but it isn't pipelock doing it, so we'd need an entirely -different rule engine. Ruled out. - -### C — Extend pipelock itself to terminate TLS - -Two sub-variants: - -**C.1 — Upstream a `tls_terminate` mode.** Submit a feature to -pipelock that adds CONNECT bumping and per-host cert generation in Go, -using `crypto/tls` and the existing scanning packages. Pipelock becomes -a self-contained MITM proxy. License question matters here: the Apache -2.0 core can grow new features in-tree, but if upstream insists this -belongs in `enterprise/` (ELv2), we either accept ELv2 or fork. - -**C.2 — Wrap pipelock in a thin Go binary in the same container.** A -small Go program does the TLS half (`CONNECT` parsing, cert generation, -TLS handshake) and pipes plaintext to pipelock over UDS or loopback. -The wrapper is ours; pipelock is unmodified. No license question. - -- **Wins.** Single component on the egress path. Pipelock owns the - scanning end-to-end, including domain-fronting checks (SNI vs. - `Host` vs. `CONNECT`). -- **Costs.** Real Go engineering effort. CA generation, cert caching, - TLS handshake, HTTP/2 ALPN negotiation, WebSocket upgrade — all - things mitmproxy already solves. -- **When.** Right shape for v2 or v3 once the v1 mitmproxy-in-front - topology has proven the integration works and the scanning rules are - stable. - -### D — mitmproxy as the proxy, pipelock as a content-scan subroutine - -``` -agent --HTTPS_PROXY--> mitmproxy --> internet - (bump TLS) - | - v - POST /scan to pipelock - <- allow / block / redact -``` - -A Python addon in mitmproxy sends each decrypted request (and response) -to a pipelock HTTP `/scan` endpoint and gates the flow on the verdict. -mitmproxy handles all networking; pipelock is the rule engine only. - -- **Wins.** Clean separation of concerns. Pipelock doesn't have to - speak TLS at all. The addon is small, ~100 lines of Python. -- **Costs.** Requires pipelock to expose a scan API. The current Apache - 2.0 core does not document one. If `/scan` lives in `enterprise/`, - ELv2 applies. If it doesn't exist, we'd be asking pipelock for a new - surface. -- **Variant.** Squid's ICAP path is the formalized version of the same - pattern. - -### E — Single container, two processes - -mitmproxy and pipelock share a container, started by `supervisord` or -`s6-overlay`. Networking simplifies to localhost. Lifecycle complicates: -container restart now means restarting both; failure of one process is -not visible at the Docker layer; logs interleave. - -- **Wins.** Slightly less Docker plumbing in `cli.py`. -- **Costs.** Operational complexity not worth the savings. The two - containers are independent processes with independent failure modes; - Docker is the right tool for that. - -Net: not recommended. - ---- - -## CA lifecycle - -The CA private key is the asset to defend. With it, anyone can issue -certs that the bottle's trust store will accept for any hostname. So: - -**Per-bottle ephemeral CA.** At bottle start, generate a fresh -RSA-2048 or ECDSA-P256 CA inside the mitmproxy sidecar. Export only -the public cert (PEM) into the bottle's trust store at one of: - -- `/usr/local/share/ca-certificates/claude-bottle-mitm.crt` followed by - `update-ca-certificates` (Debian/Ubuntu base images). -- `/etc/pki/ca-trust/source/anchors/` with `update-ca-trust` - (Red-Hat-family). -- `$NODE_EXTRA_CA_CERTS` for Node-based agents (Claude Code). -- `$SSL_CERT_FILE` / `$REQUESTS_CA_BUNDLE` for Python SDKs. - -The private key never leaves the sidecar's filesystem. The CA cert -public half is the only artifact that crosses into the bottle. - -On bottle teardown, the sidecar container is destroyed; the CA dies -with it. The next bottle gets a fresh CA. No long-lived MITM CA on -disk. - -**Why not a shared per-host CA.** A persistent CA across bottles is -faster (no generation at start) but is a real liability: if any bottle -exfiltrates the CA cert public half (which it can — it's in the trust -store by design), an attacker on the host network could in principle -impersonate any host to any bottle. With a per-bottle CA, the exfil -gains nothing: the CA is bottle-local and dies in minutes. - -**Generation cost.** RSA-2048 CA generation is ~200 ms; ECDSA-P256 is -~5 ms. Either is irrelevant against the per-bottle Docker pull and -network setup cost. - -**Where the CA lives in the bottle's trust store.** Both: a -distribution-standard path with `update-ca-certificates`, and the -env-var path. Belt and suspenders, because some Node and Python -libraries honor the env vars only, and some load only `/etc/ssl/certs/` -directly. - ---- - -## Cert pinning (brief) - -A client that pins ignores the trust store and refuses any cert whose -public key isn't on a hardcoded list. Three observations for this -project: - -- The current `DEFAULT_ALLOWLIST` (`api.anthropic.com`, - `statsig.anthropic.com`, `sentry.io`, `claude.ai`, - `platform.claude.com`, `downloads.claude.ai`, - `raw.githubusercontent.com`) does not appear to include any host that - pins against server-side SDKs. Server-side SDKs (Node, Python) almost - universally honor system trust and `NODE_EXTRA_CA_CERTS` / - `SSL_CERT_FILE`. Mobile SDKs and Chromium pin; we don't run those. -- If a future allowlisted host turns out to pin, the mitigation is - selective bumping via mitmproxy `ignore_hosts`: that specific - hostname tunnels blindly and pipelock loses DLP coverage for it. - Coverage on every other host is unaffected. -- The cost of finding out: a single 5-minute test before adding a host - — point mitmproxy at the host, observe whether the client succeeds. - -Not a v1 blocker. Document the failure mode and the mitigation. - ---- - -## Comparison table - -| | A: mitmproxy → pipelock | B: pipelock → mitmproxy | C: TLS in pipelock | D: mitmproxy + scan API | E: one container | -|---|---|---|---|---|---| -| Pipelock sees plaintext | yes | no | yes | yes (via /scan) | yes | -| Code change to pipelock | none | none | substantial | adds /scan endpoint | none | -| Sidecar count | 2 | 2 | 1 | 2 | 1 | -| Cert generation owner | mitmproxy | mitmproxy | pipelock | mitmproxy | mitmproxy | -| Selective bumping | mitmproxy `ignore_hosts` | mitmproxy `ignore_hosts` | pipelock config | mitmproxy `ignore_hosts` | mitmproxy `ignore_hosts` | -| Failure isolation per process | yes | yes | n/a (one process) | yes | no (shared container) | -| License question | none | none | ELv2 risk | ELv2 risk | none | -| v1 effort | low | low (but pointless) | high | medium | low | -| Long-term shape | interim | n/a | best | possible | not recommended | - ---- - -## Recommendation - -**Adopt Topology A for v1.** Add a mitmproxy sidecar to the egress -topology, in front of pipelock on the same per-bottle internal network. -The agent's `HTTPS_PROXY` points at mitmproxy; mitmproxy's upstream is -pipelock; pipelock's upstream is the real internet. - -Concretely: - -1. Add a `MitmproxyProxy` class alongside `PipelockProxy`, with the - same `prepare` / `start` / `stop` lifecycle. The class generates - a per-bottle CA in `stage_dir`, exports the public cert into a - second file, and writes a mitmproxy config that: - - bumps every CONNECT by default - - uses `upstream_proxy = http://pipelock-:` - - listens on a known port inside the per-bottle internal network -2. Extend the bottle launch step to copy the CA public cert into the - agent container under - `/usr/local/share/ca-certificates/claude-bottle-mitm.crt`, run - `update-ca-certificates`, and set `NODE_EXTRA_CA_CERTS` / - `SSL_CERT_FILE` / `REQUESTS_CA_BUNDLE` accordingly. -3. Repoint the agent's `HTTPS_PROXY` and `HTTP_PROXY` from the pipelock - container to the mitmproxy container. -4. Verify mitmproxy's upstream-proxy mode forwards plaintext (not a - re-wrapped CONNECT) to pipelock; if not, use `regular` mode with a - chained proxy directive. -5. Test that pipelock's DLP, subdomain-entropy, and MCP scanners now - fire on real request bodies for `api.anthropic.com` traffic. - -**Defer Topologies C and D.** Topology C (extending pipelock to -terminate TLS) is the cleanest long-term shape but is a substantial -build and runs into the Apache 2.0 vs. ELv2 question. Topology D -(mitmproxy with pipelock as a scan API) is attractive but requires a -pipelock surface that doesn't exist today. Both are valid v2 targets; -neither is the right starting point. - -The `network-egress-guard.md` v1 iptables + dnsmasq layer remains -necessary alongside this — TLS interception covers HTTP/HTTPS only; -raw TCP, UDP/443 (QUIC), UDP/53 (DNS), and ICMP still need the -IP-level default-deny. - ---- - -## Open questions - -1. **mitmproxy upstream-proxy mode mechanics.** Does mitmproxy in - `upstream_proxy` mode forward decrypted HTTP plaintext to the - upstream, or does it wrap it in a new CONNECT? The documented - behavior changed between mitmproxy 8 and 10. Needs verification - against the version we pin. -2. **Pipelock's behavior when receiving plain HTTP.** Pipelock's - `forward_proxy.enabled: true` accepts both `GET http://...` (plain - HTTP) and `CONNECT host:443` (HTTPS). After Topology A is wired up, - pipelock will see only plain HTTP — does its DLP / MCP scanning - pipeline run the full set of layers, or are some gated on the - CONNECT path? Confirm by reading - `github.com/luckyPipewrench/pipelock/blob/main/docs/configuration.md`. -3. **CA installation in the Anthropic-provided Claude Code Docker image.** - The base image's distribution determines whether `update-ca-certificates` - (Debian/Ubuntu) or `update-ca-trust` (Red Hat) is the right command. - The current `Dockerfile` should be inspected before assuming Debian. -4. **HTTP/2 over the agent → mitmproxy hop.** Node's HTTP client - negotiates `h2` via ALPN. mitmproxy speaks `h2` to clients in recent - versions. Confirm the version we pin supports `h2` end-to-end and - doesn't downgrade to `http/1.1` (which would be a silent - performance regression). -5. **Selective-bump policy surface.** Where does the - "tunnel this hostname blindly" decision live? Options: a field on - `bottle.egress` in the manifest, a fixed list of known-pinning - hosts baked into the mitmproxy config, or pipelock-side opt-out. - Manifest field is most consistent with the existing - `bottle.egress.allowlist` shape. -6. **Image pin for mitmproxy.** The `pipelock-assessment.md` - recommendation is to pin by digest. The mitmproxy Docker Hub image - should be pinned the same way. Which release line? `mitmproxy/mitmproxy` - ships rolling and tagged versions; the tagged `:11.x` line is the - right baseline. -7. **CA generation in Python (mitmproxy) vs. as a separate step.** - mitmproxy generates a CA on first launch if none is provided. For - per-bottle ephemerality, we want the CA to be ours, not whatever - mitmproxy chooses — so generate the CA in the host-side prepare - step and inject it via `--certs *=...`. Mechanics need confirming. -8. **Domain fronting verification.** Once pipelock sees plaintext, it - has access to the inner `Host` / `:authority`. A new rule that - compares it against the outer `CONNECT` target catches domain - fronting. Worth a follow-up note on whether pipelock has such a - rule or whether we add it. - ---- - -## References - -- mitmproxy: , -- mitmproxy `upstream_proxy` mode: -- mitmproxy CA cert installation: -- Squid `ssl_bump`: -- Squid ICAP: -- `goproxy`: -- `gomitmproxy`: -- `martian`: -- Node TLS / `NODE_EXTRA_CA_CERTS`: -- Python `SSL_CERT_FILE` and `REQUESTS_CA_BUNDLE`: -- Prior research — pipelock assessment: `docs/research/pipelock-assessment.md` -- Prior research — network egress guard: `docs/research/network-egress-guard.md` -- Prior research — secret exfil tripwire encodings: `docs/research/secret-exfil-tripwire-encodings.md` - -Research conducted 2026-05-12. -- 2.52.0 From f44e884d8afaa832366e97173b439f44be08c612 Mon Sep 17 00:00:00 2001 From: didericis Date: Tue, 12 May 2026 14:22:59 -0400 Subject: [PATCH 2/6] docs(prd): fold 0006 walkthrough resolutions into the design MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After the open-question walkthrough, all four collapsed: - Q1 (mount semantics): resolved to `docker cp` between `docker create` and `docker start`, mirroring the existing pipelock YAML handling. No bind mount, no UID/permission concern. Folded into §Proposed Design > CA lifecycle as "Sidecar install". - Q2 (cert validity / TTL): pre-decided in the question text. Per-bottle ephemerality is enforced by regenerating per launch, not by short validity windows. Pipelock's defaults are fine. Folded into §Proposed Design as a one-line "Per-bottle ephemerality" note. - Q3 (`passthrough_domains` shape): not v1 scope; the shape is pre-recorded so the follow-up is mechanical. Moved into §Out of scope. - Q4 (stage-dir cleanup ordering): reading start.py confirmed the ExitStack-then-outer-finally order is correct. Folded into §Proposed Design as a "Teardown" note. The §Open questions section is dropped. None of the four was a real design question — they were verifications and pre-decided items left in for defensiveness. Co-Authored-By: Claude Opus 4.7 --- docs/prds/0006-pipelock-tls-interception.md | 72 +++++++++------------ 1 file changed, 30 insertions(+), 42 deletions(-) diff --git a/docs/prds/0006-pipelock-tls-interception.md b/docs/prds/0006-pipelock-tls-interception.md index 20bbe7b..d416252 100644 --- a/docs/prds/0006-pipelock-tls-interception.md +++ b/docs/prds/0006-pipelock-tls-interception.md @@ -136,9 +136,13 @@ The feature is **done** when all of the following ship: leaving `ca.pem` and `ca-key.pem` under `stage_dir`. The host file owner is whatever the upstream image's user is; the sidecar mount is read-only so this is fine. - - `DockerPipelockProxy.start` mounts the stage dir into the - sidecar at `/h:ro` and references the CA paths in the rendered - YAML. + - `DockerPipelockProxy.start` `docker cp`s the CA cert + key + into the sidecar at `/etc/pipelock/ca.pem` and + `/etc/pipelock/ca-key.pem` between `docker create` and + `docker start`, mirroring the existing pattern for the YAML + config. If pipelock's image runs as non-root, a `docker exec + -u 0 chown pipelock:pipelock /etc/pipelock/ca*.pem` lands + between the `cp` and the `start`. - **`claude_bottle/backend/__init__.py`**: new abstract method `provision_ca(plan, target)` on `BottleBackend`, default no-op. `BottleBackend.provision` orchestrates `ca → prompt → skills → @@ -183,7 +187,10 @@ The feature is **done** when all of the following ship: - A manifest field to disable / customize interception per bottle. Doable but premature. - Wiring `passthrough_domains`. The default `[]` is correct for - v1; add the manifest field when a pinning host shows up. + v1; add the manifest field when a pinning host shows up. The + shape is pre-recorded so the follow-up is mechanical: + `bottle.egress.tls_passthrough_domains: [host, ...]`, + mirroring the existing `egress.allowlist`. - `cross_request_detection`, `entropy_budget`, `fragment_reassembly`, `reverse_proxy`, `scan_api` — features pipelock exposes but we don't need for the body-DLP gap. @@ -204,21 +211,29 @@ generated at prepare time. ### CA lifecycle - **Generation.** Host-side, at prepare time, via a one-shot - `docker run --rm -v :/h pipelock tls init`. Output is - `/ca.pem` + `/ca-key.pem`, both mode 600. -- **Sidecar mount.** `DockerPipelockProxy.start` adds - `-v :/h:ro` to the sidecar's `docker run`. The rendered - YAML references `/h/ca.pem` and `/h/ca-key.pem`. The private - key is read-only from pipelock's perspective; the host stage - dir is owned by the launching user. + `docker run --rm -v :/h -e PIPELOCK_HOME=/h pipelock tls + init`. Output: `/ca.pem` + `/ca-key.pem`, mode 600. +- **Sidecar install.** `DockerPipelockProxy.start` `docker cp`s + the CA cert + key into the sidecar at `/etc/pipelock/ca.pem` + and `/etc/pipelock/ca-key.pem` between `docker create` and + `docker start`. Same pattern the proxy already uses for the + YAML config — no bind-mount, no UID/permission concern from + the one-shot generation step. The rendered YAML references + the in-container paths. - **Bottle install.** `provision_ca` (Docker impl) does `docker cp /ca.pem agent:/usr/local/share/ca-certificates/claude-bottle-mitm.crt`, then `update-ca-certificates`. The CA env trio is set at `docker run -e` time (Docker propagates run-time env into - `docker exec`, verified in PR #8's spike). -- **Teardown.** The sidecar container is destroyed, the stage - dir is removed by `start.py`'s existing `finally` block, and - the CA dies with both. + `docker exec`). +- **Per-bottle ephemerality.** Enforced by *regenerating per + launch*, not by validity windows. Pipelock's defaults + (`cert_ttl: 24h` for leaves, `--validity 87600h` for the CA) + are fine — the CA lives only as long as the sidecar, which is + the bottle's lifetime. +- **Teardown.** Sidecar removed via `ExitStack` callback, then + the launch context manager's outer `finally` `shutil.rmtree`s + `stage_dir`. CA dies with both, in that order, so the sidecar + is never reading a deleted mount on shutdown. - **Fingerprint.** Computed via stdlib in `provision_ca` and logged once to stderr (`claude-bottle: mitm ca fingerprint: sha256:…`). The private key never appears in any log. @@ -259,33 +274,6 @@ backend module. image's own `tls init` command in a one-shot container. Fingerprint uses Python stdlib `ssl` + `hashlib`. -## Open questions - -- **Mount semantics for the stage dir.** The sidecar runs with a - `-v :/h:ro` bind mount. The CA files were written by - the one-shot `pipelock tls init` container with whatever UID - pipelock's image uses; the sidecar reads them as that same UID. - Should work, but confirm on first impl by inspecting the file - modes/owners and that the sidecar actually loads them. Fallback: - `docker cp` the cert/key into the running sidecar after `docker - create` (mirror PR #8's mitmproxy lifecycle). -- **Cert validity / TTL.** Defaults are `cert_ttl: 24h` for - per-host leaves; the CA validity from `pipelock tls init` is - 10 years by default (`--validity 87600h`). The CA outlives the - bottle either way; per-bottle ephemerality is enforced by - *generating a fresh one each launch*, not by setting a short - CA validity. Document; no tuning in v1. -- **`passthrough_domains` shape.** Once we expose this through - the manifest in a follow-up, the natural place is - `bottle.egress.tls_passthrough_domains: [host, ...]`, mirroring - the existing `egress.allowlist` shape. -- **Stage-dir cleanup ordering.** The stage dir holds the CA - private key briefly. `start.py`'s existing `finally` block - `shutil.rmtree`s it. Confirm the rmtree fires after the sidecar - is stopped, so the sidecar isn't reading a deleted mount when - it shuts down. The current order is correct (teardown unwinds - via ExitStack before the outer `finally` runs); verify. - ## References - `docs/research/pipelock-assessment.md` (now corrected) — -- 2.52.0 From 3755e66abe66412eb37b51f7acd77d41c54153c7 Mon Sep 17 00:00:00 2001 From: didericis Date: Tue, 12 May 2026 14:45:36 -0400 Subject: [PATCH 3/6] feat(pipelock): enable tls_interception with per-bottle ephemeral CA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First step of PRD 0006. Pipelock now does the CONNECT bumping that PR #8's mitmproxy chain was supposed to provide — natively, in the same single sidecar PRD 0001 wired up. - claude_bottle/pipelock.py: pipelock_build_config grows optional ca_cert_path / ca_key_path kwargs. When both are passed the rendered YAML carries a `tls_interception: { enabled: true, ca_cert, ca_key }` block. PipelockProxy gains class-level CA_CERT_IN_CONTAINER / CA_KEY_IN_CONTAINER constants that subclasses set to wherever they place the CA inside the sidecar. PipelockProxyPlan gains ca_cert_host_path / ca_key_host_path fields (default empty Path() — sentinel for "not yet populated", filled by launch via dataclasses.replace). - claude_bottle/backend/docker/pipelock.py: new pipelock_tls_init(stage_dir) helper runs `pipelock tls init` in a one-shot container against a host-mounted scratch dir. DockerPipelockProxy sets its class constants to /etc/pipelock-ca.pem and /etc/pipelock-ca-key.pem; .start docker-cp's the cert + key into those paths between `docker create` and `docker start`. Pipelock runs as root in its distroless image, so no chown is needed (verified). - claude_bottle/backend/docker/launch.py: calls pipelock_tls_init between network creation and proxy.start. Prepare stays side-effect-free on docker; the one-shot ca-init container only runs on a real launch, not on `start --dry-run`. - tests/unit/test_pipelock_yaml.py: new assertions that pipelock_build_config emits the tls_interception block only when both paths are supplied (and rejects a half-set pair), plus a test that the docker proxy's prepare plumbs the in-container paths through to the rendered YAML. The end-to-end "bumping actually fires" assertion lands in chunk 4 (HTTPS integration tests). Co-Authored-By: Claude Opus 4.7 --- claude_bottle/backend/docker/launch.py | 11 ++- claude_bottle/backend/docker/pipelock.py | 85 ++++++++++++++++++++---- claude_bottle/pipelock.py | 85 +++++++++++++++++++----- tests/unit/test_pipelock_yaml.py | 43 ++++++++++++ 4 files changed, 194 insertions(+), 30 deletions(-) diff --git a/claude_bottle/backend/docker/launch.py b/claude_bottle/backend/docker/launch.py index 45ad6dd..218100a 100644 --- a/claude_bottle/backend/docker/launch.py +++ b/claude_bottle/backend/docker/launch.py @@ -22,7 +22,7 @@ from . import network as network_mod from . import util as docker_mod from .bottle import DockerBottle from .bottle_plan import DockerBottlePlan -from .pipelock import DockerPipelockProxy, pipelock_proxy_url +from .pipelock import DockerPipelockProxy, pipelock_proxy_url, pipelock_tls_init # Where the repo root lives, for `docker build` context. Computed once. @@ -63,10 +63,19 @@ def launch( egress_network = network_mod.network_create_egress(plan.slug) stack.callback(network_mod.network_remove, egress_network) + # Per-bottle ephemeral CA for pipelock's TLS interception + # (PRD 0006). One-shot pipelock container writes ca.pem + + # ca-key.pem under plan.stage_dir; .start docker-cp's them + # into the sidecar. The private key never leaves the host + # stage dir, which start.py's outer finally `shutil.rmtree`s + # after the sidecar is torn down. + ca_cert_host, ca_key_host = pipelock_tls_init(plan.stage_dir) proxy_plan = dataclasses.replace( plan.proxy_plan, internal_network=internal_network, egress_network=egress_network, + ca_cert_host_path=ca_cert_host, + ca_key_host_path=ca_key_host, ) pipelock_name = proxy.start(proxy_plan) stack.callback(proxy.stop, pipelock_name) diff --git a/claude_bottle/backend/docker/pipelock.py b/claude_bottle/backend/docker/pipelock.py index f2ab4be..73c431d 100644 --- a/claude_bottle/backend/docker/pipelock.py +++ b/claude_bottle/backend/docker/pipelock.py @@ -6,6 +6,7 @@ from __future__ import annotations import os import subprocess +from pathlib import Path from ...log import die, info, warn from ...pipelock import PipelockProxy, PipelockProxyPlan @@ -21,6 +22,12 @@ PIPELOCK_IMAGE = os.environ.get( # Listening port for pipelock's forward proxy. PIPELOCK_PORT = os.environ.get("CLAUDE_BOTTLE_PIPELOCK_PORT", "8888") +# In-container paths where the per-bottle CA cert + key land after +# `docker cp` in `DockerPipelockProxy.start`. Pipelock's rendered +# YAML references these paths under `tls_interception`. +PIPELOCK_CA_CERT_IN_CONTAINER = "/etc/pipelock-ca.pem" +PIPELOCK_CA_KEY_IN_CONTAINER = "/etc/pipelock-ca-key.pem" + def pipelock_container_name(slug: str) -> str: return f"claude-bottle-pipelock-{slug}" @@ -34,19 +41,56 @@ def pipelock_proxy_host_port(slug: str) -> str: return f"{pipelock_container_name(slug)}:{PIPELOCK_PORT}" +def pipelock_tls_init(stage_dir: Path) -> tuple[Path, Path]: + """Generate a fresh per-bottle CA via a one-shot pipelock container. + + Runs `pipelock tls init` against a host-mounted scratch dir, leaving + `ca.pem` (public cert, mode 600) and `ca-key.pem` (private key, mode + 600) under `/pipelock-ca/`. Returns the two host paths. + + The image is pinned (same digest the running sidecar uses) so the + generated CA matches what the sidecar expects. Output is owned by + whatever UID the one-shot ran as; `DockerPipelockProxy.start` + `docker cp`s the files into the sidecar's filesystem layer, so + runtime ownership inside the sidecar (root in pipelock's + distroless image) is independent.""" + work = stage_dir / "pipelock-ca" + work.mkdir(exist_ok=True) + result = subprocess.run( + ["docker", "run", "--rm", + "-v", f"{work}:/h", + "-e", "PIPELOCK_HOME=/h", + PIPELOCK_IMAGE, "tls", "init"], + capture_output=True, + text=True, + check=False, + ) + if result.returncode != 0: + die(f"pipelock tls init failed: {result.stderr.strip()}") + cert = work / "ca.pem" + key = work / "ca-key.pem" + if not cert.is_file() or not key.is_file(): + die(f"pipelock tls init did not produce ca files in {work}") + return (cert, key) + + class DockerPipelockProxy(PipelockProxy): """Brings the pipelock sidecar up and down via Docker.""" + CA_CERT_IN_CONTAINER = PIPELOCK_CA_CERT_IN_CONTAINER + CA_KEY_IN_CONTAINER = PIPELOCK_CA_KEY_IN_CONTAINER + def start(self, plan: PipelockProxyPlan) -> str: """Boot the pipelock sidecar: 1. `docker create` on the internal network with the canonical name and argv `run --config /etc/pipelock.yaml --listen 0.0.0.0:`. - 2. `docker cp` the YAML config to /etc/pipelock.yaml in the - writable layer (parent dir must already exist; image is - distroless). - 3. Attach to the per-agent egress network. - 4. `docker start`. + 2. `docker cp` the YAML config to /etc/pipelock.yaml. + 3. `docker cp` the CA cert + key to /etc/pipelock-ca.pem + and /etc/pipelock-ca-key.pem (pipelock runs as root in + its distroless image, so no chown is needed). + 4. Attach to the per-agent egress network. + 5. `docker start`. Returns the container name (the proxy_target passed to .stop).""" name = pipelock_container_name(plan.slug) if not plan.yaml_path.is_file(): @@ -54,6 +98,11 @@ class DockerPipelockProxy(PipelockProxy): f"pipelock yaml not found at {plan.yaml_path}; " f"PipelockProxy.prepare must run first" ) + if not plan.ca_cert_host_path.is_file() or not plan.ca_key_host_path.is_file(): + die( + f"pipelock CA missing at {plan.ca_cert_host_path} / " + f"{plan.ca_key_host_path}; pipelock_tls_init must run first" + ) info(f"starting pipelock sidecar {name} on network {plan.internal_network}") @@ -68,15 +117,23 @@ class DockerPipelockProxy(PipelockProxy): if subprocess.run(create_args, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False).returncode != 0: die(f"failed to create pipelock sidecar {name}") - cp_result = subprocess.run( - ["docker", "cp", str(plan.yaml_path), f"{name}:/etc/pipelock.yaml"], - capture_output=True, - text=True, - check=False, - ) - if cp_result.returncode != 0: - subprocess.run(["docker", "rm", "-f", name], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False) - die(f"failed to copy pipelock yaml into {name}: {cp_result.stderr.strip()}") + for src, dst, label in ( + (plan.yaml_path, "/etc/pipelock.yaml", "yaml"), + (plan.ca_cert_host_path, PIPELOCK_CA_CERT_IN_CONTAINER, "ca cert"), + (plan.ca_key_host_path, PIPELOCK_CA_KEY_IN_CONTAINER, "ca key"), + ): + cp_result = subprocess.run( + ["docker", "cp", str(src), f"{name}:{dst}"], + capture_output=True, + text=True, + check=False, + ) + if cp_result.returncode != 0: + subprocess.run( + ["docker", "rm", "-f", name], + stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False, + ) + die(f"failed to copy pipelock {label} into {name}: {cp_result.stderr.strip()}") if subprocess.run( ["docker", "network", "connect", plan.egress_network, name], diff --git a/claude_bottle/pipelock.py b/claude_bottle/pipelock.py index 2c0aa7f..e9238c7 100644 --- a/claude_bottle/pipelock.py +++ b/claude_bottle/pipelock.py @@ -89,13 +89,26 @@ def pipelock_allowlist_summary(bottle: Bottle) -> str: # --- Config build + YAML render -------------------------------------------- -def pipelock_build_config(bottle: Bottle) -> dict[str, object]: +def pipelock_build_config( + bottle: Bottle, + *, + ca_cert_path: str = "", + ca_key_path: str = "", +) -> dict[str, object]: """Build the structured pipelock config dict the sidecar will load. Deliberately carries no env values, no secrets, no per-agent customization beyond the resolved hostname list. The shape mirrors the YAML pipelock expects on disk; `pipelock_render_yaml` serializes - it. Tests assert on this dict; production code renders it.""" + it. Tests assert on this dict; production code renders it. + + `ca_cert_path` / `ca_key_path` are the **in-container** paths the + pipelock sidecar will read its CA from at runtime (they're + populated into the container at start time via `docker cp`). + Pass both or neither: both → emit `tls_interception` block with + `enabled: true`; neither → omit the block entirely (pipelock + falls back to its built-in default of `enabled: false`). Used + by PRD 0006 to turn on pipelock's native TLS interception.""" cfg: dict[str, object] = { "version": 1, "mode": "strict", @@ -116,6 +129,17 @@ def pipelock_build_config(bottle: Bottle) -> dict[str, object]: # with a log line); claude-bottle's default is "block" so a hit # actually stops the request from leaving the egress network. cfg["request_body_scanning"] = {"action": bottle.egress.dlp_action} + if ca_cert_path or ca_key_path: + if not (ca_cert_path and ca_key_path): + raise ValueError( + "pipelock_build_config: pass both ca_cert_path and ca_key_path " + "to enable tls_interception, or neither to leave it off" + ) + cfg["tls_interception"] = { + "enabled": True, + "ca_cert": ca_cert_path, + "ca_key": ca_key_path, + } return cfg @@ -159,6 +183,13 @@ def pipelock_render_yaml(cfg: dict[str, object]) -> str: lines.append("request_body_scanning:") rbs = cast(dict[str, object], cfg["request_body_scanning"]) lines.append(f' action: "{rbs["action"]}"') + if "tls_interception" in cfg: + lines.append("") + lines.append("tls_interception:") + tls = cast(dict[str, object], cfg["tls_interception"]) + lines.append(f" enabled: {_bool(tls['enabled'])}") + lines.append(f' ca_cert: "{tls["ca_cert"]}"') + lines.append(f' ca_key: "{tls["ca_key"]}"') return "\n".join(lines) + "\n" @@ -170,42 +201,66 @@ class PipelockProxyPlan: """Output of PipelockProxy.prepare; consumed by .start when the sidecar needs to be brought up. - yaml_path + slug are filled in at prepare time. internal_network - and egress_network default to empty and are populated by the - backend's launch step (via dataclasses.replace) once those networks - have actually been created.""" + yaml_path + slug are filled in at prepare time (host-side, side- + effect-free; the YAML references the in-container CA paths + already so it doesn't need the host paths to be valid). The + remaining fields are populated by the backend's launch step + via `dataclasses.replace`: internal/egress networks once + those networks exist, and the CA host paths once the + one-shot `pipelock tls init` has run. Empty defaults are + sentinels meaning "not yet set"; `.start` validates that + they are populated.""" yaml_path: Path slug: str internal_network: str = "" egress_network: str = "" + ca_cert_host_path: Path = Path() + ca_key_host_path: Path = Path() class PipelockProxy(ABC): """The pipelock egress proxy. Encapsulates the YAML-config generation; the sidecar's start/stop lifecycle is backend-specific - and lives on concrete subclasses.""" + and lives on concrete subclasses. + + The class-level constants `CA_CERT_IN_CONTAINER` / + `CA_KEY_IN_CONTAINER` are the in-container paths the YAML config + references — they correspond to wherever the backend's `.start` + places the CA cert and key inside the sidecar. Subclasses + override the constants.""" + + CA_CERT_IN_CONTAINER: str = "" + CA_KEY_IN_CONTAINER: str = "" def prepare( self, bottle: Bottle, slug: str, stage_dir: Path ) -> PipelockProxyPlan: """Write the pipelock yaml config (mode 600) under `stage_dir` - and return the plan for `.start`. + and return the plan for `.start`. Pure host-side, no docker + subprocess. `slug` is the agent-derived identifier (lowercased, hyphen-normalized) used as the suffix in every per-agent resource name — the agent container, the pipelock container (`claude-bottle-pipelock-`), the internal/egress networks. It's stored on the returned plan so the backend's - start step can derive the sidecar's container name.""" - yaml_path = stage_dir / "pipelock.yaml" - self._build_pipelock_yaml(bottle, yaml_path) - return PipelockProxyPlan(yaml_path=yaml_path, slug=slug) + start step can derive the sidecar's container name. - def _build_pipelock_yaml(self, bottle: Bottle, yaml_path: Path): - """Write the pipelock yaml config (mode 600) to `yaml_path`.""" - yaml_path.write_text(pipelock_render_yaml(pipelock_build_config(bottle))) + The CA paths the YAML references are the in-container paths + from the concrete subclass's class-level constants. The + host-side counterparts are generated by the launch step + (not here, so prepare stays side-effect-free on docker) and + added to the plan via `dataclasses.replace` before `.start`.""" + yaml_path = stage_dir / "pipelock.yaml" + cfg = pipelock_build_config( + bottle, + ca_cert_path=self.CA_CERT_IN_CONTAINER, + ca_key_path=self.CA_KEY_IN_CONTAINER, + ) + yaml_path.write_text(pipelock_render_yaml(cfg)) yaml_path.chmod(0o600) + return PipelockProxyPlan(yaml_path=yaml_path, slug=slug) @abstractmethod def start(self, plan: PipelockProxyPlan) -> str: diff --git a/tests/unit/test_pipelock_yaml.py b/tests/unit/test_pipelock_yaml.py index 53d3ff7..f039752 100644 --- a/tests/unit/test_pipelock_yaml.py +++ b/tests/unit/test_pipelock_yaml.py @@ -37,6 +37,9 @@ class TestBuildConfig(unittest.TestCase): # No SSH entries → no trusted_domains, no ssrf. self.assertNotIn("trusted_domains", cfg) self.assertNotIn("ssrf", cfg) + # Without CA paths, the tls_interception block is omitted — + # pipelock falls back to its built-in default of `enabled: false`. + self.assertNotIn("tls_interception", cfg) def test_ssh_shape(self): cfg = pipelock_build_config(fixture_with_ssh().bottles["dev"]) @@ -49,6 +52,31 @@ class TestBuildConfig(unittest.TestCase): # Strict mode: IPv4 host is also in the api_allowlist union. self.assertIn("100.78.141.42", cast(list[str], cfg["api_allowlist"])) + def test_tls_interception_block_emitted_when_paths_supplied(self): + # PRD 0006: paths flow in via DockerPipelockProxy's in-container + # constants; this directly pins the dict shape. + cfg = pipelock_build_config( + fixture_minimal().bottles["dev"], + ca_cert_path="/etc/pipelock-ca.pem", + ca_key_path="/etc/pipelock-ca-key.pem", + ) + self.assertEqual( + { + "enabled": True, + "ca_cert": "/etc/pipelock-ca.pem", + "ca_key": "/etc/pipelock-ca-key.pem", + }, + cfg["tls_interception"], + ) + + def test_tls_interception_requires_both_paths(self): + # Half-set is a programmer error, not a silent omission. + with self.assertRaises(ValueError): + pipelock_build_config( + fixture_minimal().bottles["dev"], + ca_cert_path="/etc/pipelock-ca.pem", + ) + class TestRenderAndWrite(unittest.TestCase): def setUp(self): @@ -101,6 +129,21 @@ class TestRenderAndWrite(unittest.TestCase): self.assertNotIn("MY_SECRET", content) self.assertNotIn("prompt-message", content) + def test_render_emits_tls_interception_via_prepare(self): + """`DockerPipelockProxy.prepare` plumbs its in-container CA + constants through to the YAML. The block should land in the + rendered output with `enabled: true` and the configured paths. + The actual host-side CA generation happens in launch (not + prepare), so this test exercises only the YAML rendering.""" + plan = DockerPipelockProxy().prepare( + fixture_minimal().bottles["dev"], "demo", self.out_dir + ) + content = plan.yaml_path.read_text() + self.assertIn("tls_interception:", content) + self.assertIn("enabled: true", content) + self.assertIn('ca_cert: "/etc/pipelock-ca.pem"', content) + self.assertIn('ca_key: "/etc/pipelock-ca-key.pem"', content) + if __name__ == "__main__": unittest.main() -- 2.52.0 From 86a9b499bc816c4d330e49acaa6288fef2fdfe90 Mon Sep 17 00:00:00 2001 From: didericis Date: Tue, 12 May 2026 14:50:20 -0400 Subject: [PATCH 4/6] feat(provision): install pipelock CA into the agent + add curl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Second step of PRD 0006. With pipelock now doing the bumping, the agent's TLS library has to trust pipelock's per-bottle CA — or every CONNECT to api.anthropic.com is a self-signed-cert error. - BottleBackend.provision gains a non-abstract `provision_ca` with a default no-op (so non-Docker backends aren't forced to implement TLS interception) and orchestrates ca → prompt → skills → ssh → git. CA install runs first so the agent's trust store is rebuilt before anything else in the agent makes a TLS call. - New backend/docker/provision/ca.py: docker-cp's the CA cert into the agent at /usr/local/share/ca-certificates/..., `update-ca-certificates`, then emits a one-line stderr log with the SHA-256 fingerprint (stdlib `ssl` + `hashlib`; no subprocess for crypto). Module-level constants AGENT_CA_PATH and AGENT_CA_BUNDLE are imported by launch.py so the env trio set at docker run time matches the paths the provisioner writes. - launch.py: rebinds `plan` after `dataclasses.replace`s on the pipelock proxy plan so provision_ca (which reads `plan.proxy_plan.ca_cert_host_path`) sees the populated CA paths. Three new -e flags on the agent's docker run for the NODE_EXTRA_CA_CERTS / SSL_CERT_FILE / REQUESTS_CA_BUNDLE trio. - Dockerfile: adds curl to the apt-get install line. curl natively respects HTTPS_PROXY and sends CONNECT directly — the agent doesn't need OS-level DNS for external hostnames (pipelock resolves them on its side of the bumped tunnel). This is the "simple HTTPS request" path the earlier turn needed and Node's stdlib https.request couldn't provide. Co-Authored-By: Claude Opus 4.7 --- Dockerfile | 6 +- claude_bottle/backend/__init__.py | 34 ++++++--- claude_bottle/backend/docker/backend.py | 4 + claude_bottle/backend/docker/launch.py | 17 ++++- claude_bottle/backend/docker/provision/ca.py | 79 ++++++++++++++++++++ 5 files changed, 126 insertions(+), 14 deletions(-) create mode 100644 claude_bottle/backend/docker/provision/ca.py diff --git a/Dockerfile b/Dockerfile index 06e2911..abe1c19 100644 --- a/Dockerfile +++ b/Dockerfile @@ -19,9 +19,11 @@ FROM node:22-slim # clarity in case the base ever drops it. socat is the privileged # forwarder for the in-container ssh-agent (see claude_bottle/ssh.py): the agent # runs as root and rejects non-root connections, so socat sits between -# node and the agent socket. +# node and the agent socket. curl is here so any HTTPS_PROXY-aware +# tool (curl itself, plus anything that shells out to it) works +# against pipelock's bumped TLS without the agent needing local DNS. RUN apt-get update \ - && apt-get install -y --no-install-recommends git ca-certificates openssh-client socat \ + && apt-get install -y --no-install-recommends git ca-certificates openssh-client socat curl \ && rm -rf /var/lib/apt/lists/* # Install claude-code globally. Pinned to the version verified in the v1 diff --git a/claude_bottle/backend/__init__.py b/claude_bottle/backend/__init__.py index 4c85366..8e0dc63 100644 --- a/claude_bottle/backend/__init__.py +++ b/claude_bottle/backend/__init__.py @@ -204,24 +204,36 @@ class BottleBackend(ABC, Generic[PlanT, CleanupT]): """Build/run the bottle and yield a handle; tear down on exit.""" def provision(self, plan: PlanT, target: str) -> str | None: - """Copy host-side files (prompt, skills, SSH keys, .git) into - the running bottle. Called from `launch` after the container/ - machine is up. `target` identifies the running instance in - backend-specific terms (Docker: resolved container name; fly: - machine id). Returns the in-container prompt path if a prompt - was provisioned, else None — the Bottle handle uses it to - decide whether to add --append-system-prompt-file to claude's - argv. + """Copy host-side files (CA cert, prompt, skills, SSH keys, + .git) into the running bottle. Called from `launch` after the + container/machine is up. `target` identifies the running + instance in backend-specific terms (Docker: resolved + container name; fly: machine id). Returns the in-container + prompt path if a prompt was provisioned, else None — the + Bottle handle uses it to decide whether to add + --append-system-prompt-file to claude's argv. - Default orchestration: prompt → skills → ssh → git. Subclasses - typically don't override this; they implement the four - sub-methods below.""" + Default orchestration: ca → prompt → skills → ssh → git. + CA install runs first so the agent's trust store is rebuilt + before anything inside the agent makes a TLS call. Subclasses + typically don't override this; they implement the sub-methods + below.""" + self.provision_ca(plan, target) prompt_path = self.provision_prompt(plan, target) self.provision_skills(plan, target) self.provision_ssh(plan, target) self.provision_git(plan, target) return prompt_path + def provision_ca(self, plan: PlanT, target: str) -> None: + """Install pipelock's per-bottle CA into the agent's trust + store so the agent trusts the bumped CONNECT cert pipelock + presents. Default impl is a no-op so backends that don't + yet support TLS interception (every backend except Docker + today) aren't forced to implement it. The Docker backend + overrides to docker-cp the cert in and run + `update-ca-certificates`.""" + @abstractmethod def provision_prompt(self, plan: PlanT, target: str) -> str | None: """Copy the prompt file into the running bottle. Returns the diff --git a/claude_bottle/backend/docker/backend.py b/claude_bottle/backend/docker/backend.py index 97d1344..0ba0a5c 100644 --- a/claude_bottle/backend/docker/backend.py +++ b/claude_bottle/backend/docker/backend.py @@ -24,6 +24,7 @@ from .bottle import DockerBottle from .bottle_cleanup_plan import DockerBottleCleanupPlan from .bottle_plan import DockerBottlePlan from .pipelock import DockerPipelockProxy +from .provision import ca as _ca from .provision import git as _git from .provision import prompt as _prompt from .provision import skills as _skills @@ -47,6 +48,9 @@ class DockerBottleBackend(BottleBackend["DockerBottlePlan", "DockerBottleCleanup with _launch.launch(plan, proxy=self._proxy, provision=self.provision) as bottle: yield bottle + def provision_ca(self, plan: DockerBottlePlan, target: str) -> None: + _ca.provision_ca(plan, target) + def provision_prompt(self, plan: DockerBottlePlan, target: str) -> str | None: return _prompt.provision_prompt(plan, target) diff --git a/claude_bottle/backend/docker/launch.py b/claude_bottle/backend/docker/launch.py index 218100a..5e1d09d 100644 --- a/claude_bottle/backend/docker/launch.py +++ b/claude_bottle/backend/docker/launch.py @@ -23,6 +23,7 @@ from . import util as docker_mod from .bottle import DockerBottle from .bottle_plan import DockerBottlePlan from .pipelock import DockerPipelockProxy, pipelock_proxy_url, pipelock_tls_init +from .provision.ca import AGENT_CA_BUNDLE, AGENT_CA_PATH # Where the repo root lives, for `docker build` context. Computed once. @@ -77,7 +78,11 @@ def launch( ca_cert_host_path=ca_cert_host, ca_key_host_path=ca_key_host, ) - pipelock_name = proxy.start(proxy_plan) + # Re-bind the outer plan so provision_ca (which runs later + # from `provision(plan, container)`) can read the populated + # CA paths off plan.proxy_plan. + plan = dataclasses.replace(plan, proxy_plan=proxy_plan) + pipelock_name = proxy.start(plan.proxy_plan) stack.callback(proxy.stop, pipelock_name) container = _run_agent_container(plan, internal_network) @@ -102,6 +107,16 @@ def _run_agent_container(plan: DockerBottlePlan, internal_network: str) -> str: "-e", f"HTTPS_PROXY={proxy_url}", "-e", f"HTTP_PROXY={proxy_url}", "-e", "NO_PROXY=localhost,127.0.0.1", + # CA trust trio for the agent process. Docker propagates + # run-time env into `docker exec`, so `claude` sees these + # without per-exec threading. NODE_EXTRA_CA_CERTS points at + # the cert file (Node appends it to its bundled roots); + # SSL_CERT_FILE / REQUESTS_CA_BUNDLE point at the system + # bundle that `update-ca-certificates` rebuilds in + # provision_ca. + "-e", f"NODE_EXTRA_CA_CERTS={AGENT_CA_PATH}", + "-e", f"SSL_CERT_FILE={AGENT_CA_BUNDLE}", + "-e", f"REQUESTS_CA_BUNDLE={AGENT_CA_BUNDLE}", ] if plan.use_runsc: docker_args.extend(["--runtime", "runsc"]) diff --git a/claude_bottle/backend/docker/provision/ca.py b/claude_bottle/backend/docker/provision/ca.py new file mode 100644 index 0000000..1d30192 --- /dev/null +++ b/claude_bottle/backend/docker/provision/ca.py @@ -0,0 +1,79 @@ +"""Install pipelock's per-bottle CA into the agent container's trust +store (PRD 0006). + +By the time this provisioner runs, `pipelock_tls_init` has generated +a fresh CA into `plan.stage_dir/pipelock-ca/` and the pipelock sidecar +is up with `tls_interception: { enabled: true }` referencing the +in-container CA paths. This step makes the agent trust certs signed +by that CA so the agent's TLS handshake with the bumped CONNECT +succeeds. + +Cert lands on Debian's standard source path +(`/usr/local/share/ca-certificates/`); `update-ca-certificates` +rebuilds `/etc/ssl/certs/ca-certificates.crt`, which is what curl, +Python `ssl`, and OpenSSL-based tools all read by default. The env +trio set on the agent's `docker run` covers Node +(`NODE_EXTRA_CA_CERTS`) and Python `requests` / +`SSL_CERT_FILE`-honoring libraries that don't load the system +bundle. + +The fingerprint is computed via stdlib (`ssl.PEM_cert_to_DER_cert` ++ `hashlib.sha256`) and logged once to stderr. The private key +stays on the host (under `stage_dir`) until teardown wipes the +stage dir; nothing in the agent ever sees it.""" + +from __future__ import annotations + +import hashlib +import ssl +import subprocess + +from ....log import info +from ..bottle_plan import DockerBottlePlan + + +# Debian-family path for sources that `update-ca-certificates` reads. +# Bundle path is what the command rebuilds and what every standard +# TLS consumer in the image reads. +AGENT_CA_PATH = "/usr/local/share/ca-certificates/claude-bottle-pipelock-ca.crt" +AGENT_CA_BUNDLE = "/etc/ssl/certs/ca-certificates.crt" + + +def provision_ca(plan: DockerBottlePlan, target: str) -> None: + """Copy pipelock's CA cert into the agent, rebuild the trust + bundle, emit a one-line fingerprint log. Called from + `BottleBackend.provision` after the agent container is up.""" + container = target + cert_host_path = plan.proxy_plan.ca_cert_host_path + if not cert_host_path or not cert_host_path.is_file(): + # Defensive: provision runs after launch wires CA paths + # onto the plan via dataclasses.replace; an empty path here + # would mean that wiring was skipped. + from ....log import die + die( + f"pipelock CA cert missing at {cert_host_path or '(empty)'}; " + f"launch must have called pipelock_tls_init and re-bound " + f"the plan before provision" + ) + + subprocess.run( + ["docker", "cp", str(cert_host_path), f"{container}:{AGENT_CA_PATH}"], + stdout=subprocess.DEVNULL, + check=True, + ) + subprocess.run( + ["docker", "exec", "-u", "0", container, "chmod", "644", AGENT_CA_PATH], + stdout=subprocess.DEVNULL, + check=True, + ) + subprocess.run( + ["docker", "exec", "-u", "0", container, "update-ca-certificates"], + stdout=subprocess.DEVNULL, + check=True, + ) + + # Stdlib SHA-256 of the cert's DER bytes — the standard + # fingerprint form. Never the private key. + der = ssl.PEM_cert_to_DER_cert(cert_host_path.read_text()) + fingerprint = hashlib.sha256(der).hexdigest() + info(f"pipelock ca fingerprint: sha256:{fingerprint[:32]}...") -- 2.52.0 From fb10c8dd8af44878e3429e0c8847a5e05134a7a9 Mon Sep 17 00:00:00 2001 From: didericis Date: Tue, 12 May 2026 14:52:53 -0400 Subject: [PATCH 5/6] feat(bottle-plan): render TLS interception in the dry-run preflight Third step of PRD 0006. The preflight now surfaces the TLS- intercept layer so the operator sees it before agreeing to launch. - Text output: one new line under the egress summary ("tls intercept : pipelock (per-bottle ephemeral CA, generated at launch)"). - JSON output (--format=json contract): new egress.tls_interception: { enabled: true, ca_fingerprint: null } block. Fingerprint is always null at dry-run because the CA only exists after launch; real launches print it as a stderr log line from provision_ca. - Pin the new shape in the dry-run integration test. Co-Authored-By: Claude Opus 4.7 --- claude_bottle/backend/docker/bottle_plan.py | 10 ++++++++++ tests/integration/test_dry_run_plan.py | 8 ++++++++ 2 files changed, 18 insertions(+) diff --git a/claude_bottle/backend/docker/bottle_plan.py b/claude_bottle/backend/docker/bottle_plan.py index 5ad3da8..cd9dc19 100644 --- a/claude_bottle/backend/docker/bottle_plan.py +++ b/claude_bottle/backend/docker/bottle_plan.py @@ -93,6 +93,7 @@ class DockerBottlePlan(BottlePlan): else: info(" ssh hosts : (none)") info(f" egress : {self.allowlist_summary}") + info(" tls intercept : pipelock (per-bottle ephemeral CA, generated at launch)") info( f"prompt : {len(v.agent.prompt)} chars; " f"first line: {v.prompt_first_line or '(empty)'}" @@ -117,6 +118,15 @@ class DockerBottlePlan(BottlePlan): "egress": { "host_count": len(hosts), "hosts": hosts, + # PRD 0006: pipelock's `tls_interception` block is on + # for every launched bottle. ca_fingerprint is always + # null at dry-run because the CA doesn't exist yet — + # real launches print the fingerprint to stderr from + # provision_ca. Reserved field for forward-compat. + "tls_interception": { + "enabled": True, + "ca_fingerprint": None, + }, }, "prompt": { "length": len(v.agent.prompt), diff --git a/tests/integration/test_dry_run_plan.py b/tests/integration/test_dry_run_plan.py index c0ae3eb..b564ae3 100644 --- a/tests/integration/test_dry_run_plan.py +++ b/tests/integration/test_dry_run_plan.py @@ -92,6 +92,14 @@ class TestDryRunPlan(unittest.TestCase): self.assertEqual(sorted(set(hosts)), hosts, "hosts must be sorted and deduplicated") + # PRD 0006: TLS interception is on for every launched + # bottle. Fingerprint is null at dry-run (no CA exists + # yet); real launches log it from provision_ca. + self.assertEqual( + {"enabled": True, "ca_fingerprint": None}, + plan["egress"]["tls_interception"], + ) + # No Docker side effects (see the GITEA_ACTIONS skip note # above — this guard runs locally only). if check_side_effects: -- 2.52.0 From d3115ae5fdb71d449f031458dacf84afaa6ff5d2 Mon Sep 17 00:00:00 2001 From: didericis Date: Tue, 12 May 2026 15:01:17 -0400 Subject: [PATCH 6/6] test(pipelock): HTTPS integration tests for the bumped path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fourth and final step of PRD 0006. Two new end-to-end tests pin the two paths through pipelock's tls_interception layer. - test_pipelock_blocks_secret_https_post: posts a GitHub-PAT-shaped body to api.anthropic.com over HTTPS through the bottle. With pipelock now bumping the CONNECT and seeing the decrypted body, it returns 403 with the documented `blocked: request body contains secret: GitHub Token` body. The probe is a single curl invocation — curl natively does CONNECT through HTTPS_PROXY, the agent's trust store now contains pipelock's CA, no hand-rolled TLS in the test. - test_pipelock_allows_normal_https: GETs git's README from raw.githubusercontent.com (a baked-in allowlist host). 200 + non-zero body length proves the full chain works: pipelock_tls_init → docker cp of CA into sidecar → bumped CONNECT → provision_ca installed CA in agent → curl trusts pipelock's bumped leaf → body forwarded back through the tunnel. - test_pipelock_sidecar_smoke: pre-existing direct-start smoke test updated to call pipelock_tls_init and populate the CA paths on the plan. (The full launch flow does this in launch.py; this test exercises the proxy class in isolation.) Co-Authored-By: Claude Opus 4.7 --- .../test_pipelock_allows_normal_https.py | 84 ++++++++++++++++ .../test_pipelock_blocks_secret_https_post.py | 96 +++++++++++++++++++ .../test_pipelock_sidecar_smoke.py | 8 ++ 3 files changed, 188 insertions(+) create mode 100644 tests/integration/test_pipelock_allows_normal_https.py create mode 100644 tests/integration/test_pipelock_blocks_secret_https_post.py diff --git a/tests/integration/test_pipelock_allows_normal_https.py b/tests/integration/test_pipelock_allows_normal_https.py new file mode 100644 index 0000000..97b1732 --- /dev/null +++ b/tests/integration/test_pipelock_allows_normal_https.py @@ -0,0 +1,84 @@ +"""Integration: with pipelock's tls_interception enabled (PRD 0006), +a clean HTTPS GET to an allowlisted host succeeds end-to-end through +the bumped tunnel. + +Complement to test_pipelock_blocks_secret_https_post — together they +pin pipelock's two paths (block on body match, allow on clean +traffic). This test is also the implicit TLS-trust check: if +provision_ca had failed to install pipelock's CA into the agent's +trust store, curl would have rejected the bumped leaf cert and the +fetch would have failed before any HTTP response could come back.""" + +from __future__ import annotations + +import os +import shutil +import tempfile +import unittest +from pathlib import Path + +from claude_bottle.backend import BottleSpec, get_bottle_backend +from tests._docker import skip_unless_docker +from tests.fixtures import fixture_minimal + + +# raw.githubusercontent.com is in the baked-in DEFAULT_ALLOWLIST. +# `git`'s own README on the master branch is a long-lived raw file +# (~3 KB) that any CI runner with internet can fetch. +_TARGET_URL = "https://raw.githubusercontent.com/git/git/master/README.md" + + +@skip_unless_docker() +class TestPipelockAllowsNormalHttps(unittest.TestCase): + @unittest.skipIf( + os.environ.get("GITEA_ACTIONS") == "true", + "skipped under act_runner: docker socket mount topology breaks " + "in-process visibility of networks created on the host daemon", + ) + def test_https_get_to_allowed_host_succeeds(self): + backend = get_bottle_backend() + stage_dir = Path(tempfile.mkdtemp(prefix="cb-test-stage.")) + try: + spec = BottleSpec( + manifest=fixture_minimal(), + agent_name="demo", + copy_cwd=False, + user_cwd=str(stage_dir), + forward_oauth_token=False, + ) + plan = backend.prepare(spec, stage_dir=stage_dir) + with backend.launch(plan) as bottle: + script = ( + "set -eu\n" + 'curl --proxy "$HTTPS_PROXY" -s --max-time 10 \\\n' + " -w 'status=%{http_code}\\n' \\\n" + " -o /tmp/probe-body.txt \\\n" + f" {_TARGET_URL}\n" + 'echo "len=$(wc -c < /tmp/probe-body.txt)"\n' + ) + result = bottle.exec(script) + finally: + shutil.rmtree(stage_dir, ignore_errors=True) + + self.assertEqual( + 0, result.returncode, + f"exec wrapper failed: stdout={result.stdout!r} stderr={result.stderr!r}", + ) + # 200 from the upstream (pipelock forwarded after the body + # scan passed). If curl had failed the bumped-cert trust + # check, the exit code or status would be non-200 here. + self.assertIn( + "status=200", result.stdout, + f"expected 200 from raw.githubusercontent.com; got: {result.stdout!r}", + ) + # The git README is ~3 KB. Anything substantially non-zero + # proves the response body actually transferred — i.e. the + # CONNECT tunnel + bumped TLS + body forwarding all worked. + self.assertNotIn( + "len=0\n", result.stdout, + f"response body was empty: {result.stdout!r}", + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/integration/test_pipelock_blocks_secret_https_post.py b/tests/integration/test_pipelock_blocks_secret_https_post.py new file mode 100644 index 0000000..0863d05 --- /dev/null +++ b/tests/integration/test_pipelock_blocks_secret_https_post.py @@ -0,0 +1,96 @@ +"""Integration: with pipelock's tls_interception enabled (PRD 0006), +a credential POST sent over HTTPS is blocked by pipelock's body-scan +layer — closing the gap that motivated this PRD. + +End-to-end: drives `BottleBackend.prepare → launch` so the real +image build, network plumbing, pipelock_tls_init, sidecar bring-up, +and provision_ca (CA install in the agent's trust store) are all in +the loop. The probe is a single `curl --proxy "$HTTPS_PROXY" -X POST +... https://api.anthropic.com/...` — curl natively does CONNECT +through the proxy, the agent's trust store now contains pipelock's +per-bottle CA so curl trusts pipelock's bumped leaf, and pipelock +sees the decrypted body and returns its known +`blocked: request body contains secret: ` 403.""" + +from __future__ import annotations + +import os +import shutil +import tempfile +import unittest +from pathlib import Path + +from claude_bottle.backend import BottleSpec, get_bottle_backend +from claude_bottle.manifest import Manifest +from tests._docker import skip_unless_docker + + +# Synthetic value shaped like a GitHub Personal Access Token; not a +# real credential. Carried into the bottle as an env var so the +# probe shell can read it via $FAKE_TOKEN without ever interpolating +# the value on the bash `bottle.exec` argv. +_FAKE_TOKEN = "ghp_aB3cD4eF5gH6iJ7kL8mN9oP0qR1sT2uV3wX4yZ" + + +@skip_unless_docker() +class TestPipelockBlocksSecretHttpsPost(unittest.TestCase): + @unittest.skipIf( + os.environ.get("GITEA_ACTIONS") == "true", + "skipped under act_runner: docker socket mount topology breaks " + "in-process visibility of networks created on the host daemon", + ) + def test_https_post_with_credential_body_is_blocked(self): + manifest = Manifest.from_json_obj({ + "bottles": { + "dev": {"env": {"FAKE_TOKEN": _FAKE_TOKEN}}, + }, + "agents": { + "demo": {"skills": [], "prompt": "", "bottle": "dev"}, + }, + }) + backend = get_bottle_backend() + stage_dir = Path(tempfile.mkdtemp(prefix="cb-test-stage.")) + try: + spec = BottleSpec( + manifest=manifest, + agent_name="demo", + copy_cwd=False, + user_cwd=str(stage_dir), + forward_oauth_token=False, + ) + plan = backend.prepare(spec, stage_dir=stage_dir) + with backend.launch(plan) as bottle: + script = ( + "set -eu\n" + 'curl --proxy "$HTTPS_PROXY" -s --max-time 8 \\\n' + " -w 'status=%{http_code}\\n' \\\n" + " -o /tmp/probe-body.txt \\\n" + ' -X POST -d "token=$FAKE_TOKEN" \\\n' + " https://api.anthropic.com/dlp-probe\n" + 'echo "body=$(head -c 200 /tmp/probe-body.txt)"\n' + ) + result = bottle.exec(script) + finally: + shutil.rmtree(stage_dir, ignore_errors=True) + + self.assertEqual( + 0, result.returncode, + f"exec wrapper failed: stdout={result.stdout!r} stderr={result.stderr!r}", + ) + # Pipelock's body-scan block returns 403 with a plain-text + # body starting `blocked: ` (pinned empirically; see + # tests/unit/test_mitmproxy_verdict.py for the + # corresponding-fingerprint test, retained from PR #8 as + # general pipelock-block-shape coverage). + self.assertIn( + "status=403", result.stdout, + f"expected 403 from pipelock; got: {result.stdout!r}", + ) + self.assertIn( + "body=blocked: ", result.stdout, + f"expected pipelock block body; got: {result.stdout!r}", + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/integration/test_pipelock_sidecar_smoke.py b/tests/integration/test_pipelock_sidecar_smoke.py index 682e61f..cb96a8b 100644 --- a/tests/integration/test_pipelock_sidecar_smoke.py +++ b/tests/integration/test_pipelock_sidecar_smoke.py @@ -28,6 +28,7 @@ from claude_bottle.backend.docker.pipelock import ( PIPELOCK_PORT, DockerPipelockProxy, pipelock_container_name, + pipelock_tls_init, ) from tests._docker import skip_unless_docker from tests.fixtures import fixture_minimal @@ -79,10 +80,17 @@ class TestPipelockSidecarSmoke(unittest.TestCase): self.internal_net = network_create_internal(self.slug) self.egress_net = network_create_egress(self.slug) + # PRD 0006: pipelock's tls_interception block in the rendered + # YAML references in-container CA paths; .start docker-cp's + # those files in. The full launch flow generates the CA via + # `pipelock_tls_init`; this smoke test calls it directly. + ca_cert_host, ca_key_host = pipelock_tls_init(self.work_dir) plan = dataclasses.replace( prep, internal_network=self.internal_net, egress_network=self.egress_net, + ca_cert_host_path=ca_cert_host, + ca_key_host_path=ca_key_host, ) self.sidecar_name = proxy.start(plan) -- 2.52.0