Merge pull request 'PRD 0006: pipelock native TLS interception' (#9) from pipelock-tls-interception into main
test / unit (push) Successful in 12s
test / integration (push) Successful in 13s

This commit was merged in pull request #9.
This commit is contained in:
2026-05-12 15:03:23 -04:00
16 changed files with 826 additions and 557 deletions
+4 -2
View File
@@ -19,9 +19,11 @@ FROM node:22-slim
# clarity in case the base ever drops it. socat is the privileged
# forwarder for the in-container ssh-agent (see claude_bottle/ssh.py): the agent
# runs as root and rejects non-root connections, so socat sits between
# node and the agent socket.
# node and the agent socket. curl is here so any HTTPS_PROXY-aware
# tool (curl itself, plus anything that shells out to it) works
# against pipelock's bumped TLS without the agent needing local DNS.
RUN apt-get update \
&& apt-get install -y --no-install-recommends git ca-certificates openssh-client socat \
&& apt-get install -y --no-install-recommends git ca-certificates openssh-client socat curl \
&& rm -rf /var/lib/apt/lists/*
# Install claude-code globally. Pinned to the version verified in the v1
+23 -11
View File
@@ -204,24 +204,36 @@ class BottleBackend(ABC, Generic[PlanT, CleanupT]):
"""Build/run the bottle and yield a handle; tear down on exit."""
def provision(self, plan: PlanT, target: str) -> str | None:
"""Copy host-side files (prompt, skills, SSH keys, .git) into
the running bottle. Called from `launch` after the container/
machine is up. `target` identifies the running instance in
backend-specific terms (Docker: resolved container name; fly:
machine id). Returns the in-container prompt path if a prompt
was provisioned, else None — the Bottle handle uses it to
decide whether to add --append-system-prompt-file to claude's
argv.
"""Copy host-side files (CA cert, prompt, skills, SSH keys,
.git) into the running bottle. Called from `launch` after the
container/machine is up. `target` identifies the running
instance in backend-specific terms (Docker: resolved
container name; fly: machine id). Returns the in-container
prompt path if a prompt was provisioned, else None — the
Bottle handle uses it to decide whether to add
--append-system-prompt-file to claude's argv.
Default orchestration: prompt → skills → ssh → git. Subclasses
typically don't override this; they implement the four
sub-methods below."""
Default orchestration: ca → prompt → skills → ssh → git.
CA install runs first so the agent's trust store is rebuilt
before anything inside the agent makes a TLS call. Subclasses
typically don't override this; they implement the sub-methods
below."""
self.provision_ca(plan, target)
prompt_path = self.provision_prompt(plan, target)
self.provision_skills(plan, target)
self.provision_ssh(plan, target)
self.provision_git(plan, target)
return prompt_path
def provision_ca(self, plan: PlanT, target: str) -> None:
"""Install pipelock's per-bottle CA into the agent's trust
store so the agent trusts the bumped CONNECT cert pipelock
presents. Default impl is a no-op so backends that don't
yet support TLS interception (every backend except Docker
today) aren't forced to implement it. The Docker backend
overrides to docker-cp the cert in and run
`update-ca-certificates`."""
@abstractmethod
def provision_prompt(self, plan: PlanT, target: str) -> str | None:
"""Copy the prompt file into the running bottle. Returns the
+4
View File
@@ -24,6 +24,7 @@ from .bottle import DockerBottle
from .bottle_cleanup_plan import DockerBottleCleanupPlan
from .bottle_plan import DockerBottlePlan
from .pipelock import DockerPipelockProxy
from .provision import ca as _ca
from .provision import git as _git
from .provision import prompt as _prompt
from .provision import skills as _skills
@@ -47,6 +48,9 @@ class DockerBottleBackend(BottleBackend["DockerBottlePlan", "DockerBottleCleanup
with _launch.launch(plan, proxy=self._proxy, provision=self.provision) as bottle:
yield bottle
def provision_ca(self, plan: DockerBottlePlan, target: str) -> None:
_ca.provision_ca(plan, target)
def provision_prompt(self, plan: DockerBottlePlan, target: str) -> str | None:
return _prompt.provision_prompt(plan, target)
@@ -93,6 +93,7 @@ class DockerBottlePlan(BottlePlan):
else:
info(" ssh hosts : (none)")
info(f" egress : {self.allowlist_summary}")
info(" tls intercept : pipelock (per-bottle ephemeral CA, generated at launch)")
info(
f"prompt : {len(v.agent.prompt)} chars; "
f"first line: {v.prompt_first_line or '(empty)'}"
@@ -117,6 +118,15 @@ class DockerBottlePlan(BottlePlan):
"egress": {
"host_count": len(hosts),
"hosts": hosts,
# PRD 0006: pipelock's `tls_interception` block is on
# for every launched bottle. ca_fingerprint is always
# null at dry-run because the CA doesn't exist yet —
# real launches print the fingerprint to stderr from
# provision_ca. Reserved field for forward-compat.
"tls_interception": {
"enabled": True,
"ca_fingerprint": None,
},
},
"prompt": {
"length": len(v.agent.prompt),
+26 -2
View File
@@ -22,7 +22,8 @@ from . import network as network_mod
from . import util as docker_mod
from .bottle import DockerBottle
from .bottle_plan import DockerBottlePlan
from .pipelock import DockerPipelockProxy, pipelock_proxy_url
from .pipelock import DockerPipelockProxy, pipelock_proxy_url, pipelock_tls_init
from .provision.ca import AGENT_CA_BUNDLE, AGENT_CA_PATH
# Where the repo root lives, for `docker build` context. Computed once.
@@ -63,12 +64,25 @@ def launch(
egress_network = network_mod.network_create_egress(plan.slug)
stack.callback(network_mod.network_remove, egress_network)
# Per-bottle ephemeral CA for pipelock's TLS interception
# (PRD 0006). One-shot pipelock container writes ca.pem +
# ca-key.pem under plan.stage_dir; .start docker-cp's them
# into the sidecar. The private key never leaves the host
# stage dir, which start.py's outer finally `shutil.rmtree`s
# after the sidecar is torn down.
ca_cert_host, ca_key_host = pipelock_tls_init(plan.stage_dir)
proxy_plan = dataclasses.replace(
plan.proxy_plan,
internal_network=internal_network,
egress_network=egress_network,
ca_cert_host_path=ca_cert_host,
ca_key_host_path=ca_key_host,
)
pipelock_name = proxy.start(proxy_plan)
# Re-bind the outer plan so provision_ca (which runs later
# from `provision(plan, container)`) can read the populated
# CA paths off plan.proxy_plan.
plan = dataclasses.replace(plan, proxy_plan=proxy_plan)
pipelock_name = proxy.start(plan.proxy_plan)
stack.callback(proxy.stop, pipelock_name)
container = _run_agent_container(plan, internal_network)
@@ -93,6 +107,16 @@ def _run_agent_container(plan: DockerBottlePlan, internal_network: str) -> str:
"-e", f"HTTPS_PROXY={proxy_url}",
"-e", f"HTTP_PROXY={proxy_url}",
"-e", "NO_PROXY=localhost,127.0.0.1",
# CA trust trio for the agent process. Docker propagates
# run-time env into `docker exec`, so `claude` sees these
# without per-exec threading. NODE_EXTRA_CA_CERTS points at
# the cert file (Node appends it to its bundled roots);
# SSL_CERT_FILE / REQUESTS_CA_BUNDLE point at the system
# bundle that `update-ca-certificates` rebuilds in
# provision_ca.
"-e", f"NODE_EXTRA_CA_CERTS={AGENT_CA_PATH}",
"-e", f"SSL_CERT_FILE={AGENT_CA_BUNDLE}",
"-e", f"REQUESTS_CA_BUNDLE={AGENT_CA_BUNDLE}",
]
if plan.use_runsc:
docker_args.extend(["--runtime", "runsc"])
+71 -14
View File
@@ -6,6 +6,7 @@ from __future__ import annotations
import os
import subprocess
from pathlib import Path
from ...log import die, info, warn
from ...pipelock import PipelockProxy, PipelockProxyPlan
@@ -21,6 +22,12 @@ PIPELOCK_IMAGE = os.environ.get(
# Listening port for pipelock's forward proxy.
PIPELOCK_PORT = os.environ.get("CLAUDE_BOTTLE_PIPELOCK_PORT", "8888")
# In-container paths where the per-bottle CA cert + key land after
# `docker cp` in `DockerPipelockProxy.start`. Pipelock's rendered
# YAML references these paths under `tls_interception`.
PIPELOCK_CA_CERT_IN_CONTAINER = "/etc/pipelock-ca.pem"
PIPELOCK_CA_KEY_IN_CONTAINER = "/etc/pipelock-ca-key.pem"
def pipelock_container_name(slug: str) -> str:
return f"claude-bottle-pipelock-{slug}"
@@ -34,19 +41,56 @@ def pipelock_proxy_host_port(slug: str) -> str:
return f"{pipelock_container_name(slug)}:{PIPELOCK_PORT}"
def pipelock_tls_init(stage_dir: Path) -> tuple[Path, Path]:
"""Generate a fresh per-bottle CA via a one-shot pipelock container.
Runs `pipelock tls init` against a host-mounted scratch dir, leaving
`ca.pem` (public cert, mode 600) and `ca-key.pem` (private key, mode
600) under `<stage_dir>/pipelock-ca/`. Returns the two host paths.
The image is pinned (same digest the running sidecar uses) so the
generated CA matches what the sidecar expects. Output is owned by
whatever UID the one-shot ran as; `DockerPipelockProxy.start`
`docker cp`s the files into the sidecar's filesystem layer, so
runtime ownership inside the sidecar (root in pipelock's
distroless image) is independent."""
work = stage_dir / "pipelock-ca"
work.mkdir(exist_ok=True)
result = subprocess.run(
["docker", "run", "--rm",
"-v", f"{work}:/h",
"-e", "PIPELOCK_HOME=/h",
PIPELOCK_IMAGE, "tls", "init"],
capture_output=True,
text=True,
check=False,
)
if result.returncode != 0:
die(f"pipelock tls init failed: {result.stderr.strip()}")
cert = work / "ca.pem"
key = work / "ca-key.pem"
if not cert.is_file() or not key.is_file():
die(f"pipelock tls init did not produce ca files in {work}")
return (cert, key)
class DockerPipelockProxy(PipelockProxy):
"""Brings the pipelock sidecar up and down via Docker."""
CA_CERT_IN_CONTAINER = PIPELOCK_CA_CERT_IN_CONTAINER
CA_KEY_IN_CONTAINER = PIPELOCK_CA_KEY_IN_CONTAINER
def start(self, plan: PipelockProxyPlan) -> str:
"""Boot the pipelock sidecar:
1. `docker create` on the internal network with the canonical
name and argv `run --config /etc/pipelock.yaml --listen
0.0.0.0:<port>`.
2. `docker cp` the YAML config to /etc/pipelock.yaml in the
writable layer (parent dir must already exist; image is
distroless).
3. Attach to the per-agent egress network.
4. `docker start`.
2. `docker cp` the YAML config to /etc/pipelock.yaml.
3. `docker cp` the CA cert + key to /etc/pipelock-ca.pem
and /etc/pipelock-ca-key.pem (pipelock runs as root in
its distroless image, so no chown is needed).
4. Attach to the per-agent egress network.
5. `docker start`.
Returns the container name (the proxy_target passed to .stop)."""
name = pipelock_container_name(plan.slug)
if not plan.yaml_path.is_file():
@@ -54,6 +98,11 @@ class DockerPipelockProxy(PipelockProxy):
f"pipelock yaml not found at {plan.yaml_path}; "
f"PipelockProxy.prepare must run first"
)
if not plan.ca_cert_host_path.is_file() or not plan.ca_key_host_path.is_file():
die(
f"pipelock CA missing at {plan.ca_cert_host_path} / "
f"{plan.ca_key_host_path}; pipelock_tls_init must run first"
)
info(f"starting pipelock sidecar {name} on network {plan.internal_network}")
@@ -68,15 +117,23 @@ class DockerPipelockProxy(PipelockProxy):
if subprocess.run(create_args, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False).returncode != 0:
die(f"failed to create pipelock sidecar {name}")
cp_result = subprocess.run(
["docker", "cp", str(plan.yaml_path), f"{name}:/etc/pipelock.yaml"],
capture_output=True,
text=True,
check=False,
)
if cp_result.returncode != 0:
subprocess.run(["docker", "rm", "-f", name], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False)
die(f"failed to copy pipelock yaml into {name}: {cp_result.stderr.strip()}")
for src, dst, label in (
(plan.yaml_path, "/etc/pipelock.yaml", "yaml"),
(plan.ca_cert_host_path, PIPELOCK_CA_CERT_IN_CONTAINER, "ca cert"),
(plan.ca_key_host_path, PIPELOCK_CA_KEY_IN_CONTAINER, "ca key"),
):
cp_result = subprocess.run(
["docker", "cp", str(src), f"{name}:{dst}"],
capture_output=True,
text=True,
check=False,
)
if cp_result.returncode != 0:
subprocess.run(
["docker", "rm", "-f", name],
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False,
)
die(f"failed to copy pipelock {label} into {name}: {cp_result.stderr.strip()}")
if subprocess.run(
["docker", "network", "connect", plan.egress_network, name],
@@ -0,0 +1,79 @@
"""Install pipelock's per-bottle CA into the agent container's trust
store (PRD 0006).
By the time this provisioner runs, `pipelock_tls_init` has generated
a fresh CA into `plan.stage_dir/pipelock-ca/` and the pipelock sidecar
is up with `tls_interception: { enabled: true }` referencing the
in-container CA paths. This step makes the agent trust certs signed
by that CA so the agent's TLS handshake with the bumped CONNECT
succeeds.
Cert lands on Debian's standard source path
(`/usr/local/share/ca-certificates/`); `update-ca-certificates`
rebuilds `/etc/ssl/certs/ca-certificates.crt`, which is what curl,
Python `ssl`, and OpenSSL-based tools all read by default. The env
trio set on the agent's `docker run` covers Node
(`NODE_EXTRA_CA_CERTS`) and Python `requests` /
`SSL_CERT_FILE`-honoring libraries that don't load the system
bundle.
The fingerprint is computed via stdlib (`ssl.PEM_cert_to_DER_cert`
+ `hashlib.sha256`) and logged once to stderr. The private key
stays on the host (under `stage_dir`) until teardown wipes the
stage dir; nothing in the agent ever sees it."""
from __future__ import annotations
import hashlib
import ssl
import subprocess
from ....log import info
from ..bottle_plan import DockerBottlePlan
# Debian-family path for sources that `update-ca-certificates` reads.
# Bundle path is what the command rebuilds and what every standard
# TLS consumer in the image reads.
AGENT_CA_PATH = "/usr/local/share/ca-certificates/claude-bottle-pipelock-ca.crt"
AGENT_CA_BUNDLE = "/etc/ssl/certs/ca-certificates.crt"
def provision_ca(plan: DockerBottlePlan, target: str) -> None:
"""Copy pipelock's CA cert into the agent, rebuild the trust
bundle, emit a one-line fingerprint log. Called from
`BottleBackend.provision` after the agent container is up."""
container = target
cert_host_path = plan.proxy_plan.ca_cert_host_path
if not cert_host_path or not cert_host_path.is_file():
# Defensive: provision runs after launch wires CA paths
# onto the plan via dataclasses.replace; an empty path here
# would mean that wiring was skipped.
from ....log import die
die(
f"pipelock CA cert missing at {cert_host_path or '(empty)'}; "
f"launch must have called pipelock_tls_init and re-bound "
f"the plan before provision"
)
subprocess.run(
["docker", "cp", str(cert_host_path), f"{container}:{AGENT_CA_PATH}"],
stdout=subprocess.DEVNULL,
check=True,
)
subprocess.run(
["docker", "exec", "-u", "0", container, "chmod", "644", AGENT_CA_PATH],
stdout=subprocess.DEVNULL,
check=True,
)
subprocess.run(
["docker", "exec", "-u", "0", container, "update-ca-certificates"],
stdout=subprocess.DEVNULL,
check=True,
)
# Stdlib SHA-256 of the cert's DER bytes — the standard
# fingerprint form. Never the private key.
der = ssl.PEM_cert_to_DER_cert(cert_host_path.read_text())
fingerprint = hashlib.sha256(der).hexdigest()
info(f"pipelock ca fingerprint: sha256:{fingerprint[:32]}...")
+70 -15
View File
@@ -89,13 +89,26 @@ def pipelock_allowlist_summary(bottle: Bottle) -> str:
# --- Config build + YAML render --------------------------------------------
def pipelock_build_config(bottle: Bottle) -> dict[str, object]:
def pipelock_build_config(
bottle: Bottle,
*,
ca_cert_path: str = "",
ca_key_path: str = "",
) -> dict[str, object]:
"""Build the structured pipelock config dict the sidecar will load.
Deliberately carries no env values, no secrets, no per-agent
customization beyond the resolved hostname list. The shape mirrors
the YAML pipelock expects on disk; `pipelock_render_yaml` serializes
it. Tests assert on this dict; production code renders it."""
it. Tests assert on this dict; production code renders it.
`ca_cert_path` / `ca_key_path` are the **in-container** paths the
pipelock sidecar will read its CA from at runtime (they're
populated into the container at start time via `docker cp`).
Pass both or neither: both → emit `tls_interception` block with
`enabled: true`; neither → omit the block entirely (pipelock
falls back to its built-in default of `enabled: false`). Used
by PRD 0006 to turn on pipelock's native TLS interception."""
cfg: dict[str, object] = {
"version": 1,
"mode": "strict",
@@ -116,6 +129,17 @@ def pipelock_build_config(bottle: Bottle) -> dict[str, object]:
# with a log line); claude-bottle's default is "block" so a hit
# actually stops the request from leaving the egress network.
cfg["request_body_scanning"] = {"action": bottle.egress.dlp_action}
if ca_cert_path or ca_key_path:
if not (ca_cert_path and ca_key_path):
raise ValueError(
"pipelock_build_config: pass both ca_cert_path and ca_key_path "
"to enable tls_interception, or neither to leave it off"
)
cfg["tls_interception"] = {
"enabled": True,
"ca_cert": ca_cert_path,
"ca_key": ca_key_path,
}
return cfg
@@ -159,6 +183,13 @@ def pipelock_render_yaml(cfg: dict[str, object]) -> str:
lines.append("request_body_scanning:")
rbs = cast(dict[str, object], cfg["request_body_scanning"])
lines.append(f' action: "{rbs["action"]}"')
if "tls_interception" in cfg:
lines.append("")
lines.append("tls_interception:")
tls = cast(dict[str, object], cfg["tls_interception"])
lines.append(f" enabled: {_bool(tls['enabled'])}")
lines.append(f' ca_cert: "{tls["ca_cert"]}"')
lines.append(f' ca_key: "{tls["ca_key"]}"')
return "\n".join(lines) + "\n"
@@ -170,42 +201,66 @@ class PipelockProxyPlan:
"""Output of PipelockProxy.prepare; consumed by .start when the
sidecar needs to be brought up.
yaml_path + slug are filled in at prepare time. internal_network
and egress_network default to empty and are populated by the
backend's launch step (via dataclasses.replace) once those networks
have actually been created."""
yaml_path + slug are filled in at prepare time (host-side, side-
effect-free; the YAML references the in-container CA paths
already so it doesn't need the host paths to be valid). The
remaining fields are populated by the backend's launch step
via `dataclasses.replace`: internal/egress networks once
those networks exist, and the CA host paths once the
one-shot `pipelock tls init` has run. Empty defaults are
sentinels meaning "not yet set"; `.start` validates that
they are populated."""
yaml_path: Path
slug: str
internal_network: str = ""
egress_network: str = ""
ca_cert_host_path: Path = Path()
ca_key_host_path: Path = Path()
class PipelockProxy(ABC):
"""The pipelock egress proxy. Encapsulates the YAML-config
generation; the sidecar's start/stop lifecycle is backend-specific
and lives on concrete subclasses."""
and lives on concrete subclasses.
The class-level constants `CA_CERT_IN_CONTAINER` /
`CA_KEY_IN_CONTAINER` are the in-container paths the YAML config
references — they correspond to wherever the backend's `.start`
places the CA cert and key inside the sidecar. Subclasses
override the constants."""
CA_CERT_IN_CONTAINER: str = ""
CA_KEY_IN_CONTAINER: str = ""
def prepare(
self, bottle: Bottle, slug: str, stage_dir: Path
) -> PipelockProxyPlan:
"""Write the pipelock yaml config (mode 600) under `stage_dir`
and return the plan for `.start`.
and return the plan for `.start`. Pure host-side, no docker
subprocess.
`slug` is the agent-derived identifier (lowercased,
hyphen-normalized) used as the suffix in every per-agent
resource name — the agent container, the pipelock container
(`claude-bottle-pipelock-<slug>`), the internal/egress
networks. It's stored on the returned plan so the backend's
start step can derive the sidecar's container name."""
yaml_path = stage_dir / "pipelock.yaml"
self._build_pipelock_yaml(bottle, yaml_path)
return PipelockProxyPlan(yaml_path=yaml_path, slug=slug)
start step can derive the sidecar's container name.
def _build_pipelock_yaml(self, bottle: Bottle, yaml_path: Path):
"""Write the pipelock yaml config (mode 600) to `yaml_path`."""
yaml_path.write_text(pipelock_render_yaml(pipelock_build_config(bottle)))
The CA paths the YAML references are the in-container paths
from the concrete subclass's class-level constants. The
host-side counterparts are generated by the launch step
(not here, so prepare stays side-effect-free on docker) and
added to the plan via `dataclasses.replace` before `.start`."""
yaml_path = stage_dir / "pipelock.yaml"
cfg = pipelock_build_config(
bottle,
ca_cert_path=self.CA_CERT_IN_CONTAINER,
ca_key_path=self.CA_KEY_IN_CONTAINER,
)
yaml_path.write_text(pipelock_render_yaml(cfg))
yaml_path.chmod(0o600)
return PipelockProxyPlan(yaml_path=yaml_path, slug=slug)
@abstractmethod
def start(self, plan: PipelockProxyPlan) -> str:
+291
View File
@@ -0,0 +1,291 @@
# PRD 0006: pipelock native TLS interception
- **Status:** Draft
- **Author:** didericis
- **Created:** 2026-05-12
## Summary
Turn on pipelock's built-in `tls_interception` so its DLP / URL /
header / MCP scanners fire on the plaintext of HTTPS requests
instead of only the outer `CONNECT` hostname. Pipelock generates a
per-bottle ephemeral CA at launch (`pipelock tls init`); the
public cert is installed into the agent container's trust store
and the private key dies with the sidecar on teardown. The
existing per-agent sidecar topology from PRD 0001 is otherwise
unchanged — one container, no addon, no second proxy.
This supersedes the closed PR #8 / branch `mitmproxy-tls-interception`,
which built a mitmproxy + addon chain on the (falsified) premise
that pipelock could not MITM. Empirical proof from the impl-time
spike: with `tls_interception: { enabled: true, ca_cert, ca_key }`
in the pipelock config, pipelock answered a credential POST over
HTTPS with `STATUS=403 / body: blocked: request body contains
secret: GitHub Token` and emitted both
`scanner:"tls_intercept"` and `scanner:"body_dlp"` events.
## Problem
PRD 0001 wired pipelock onto every bottle's egress, but pipelock
ran with its default `tls_interception.enabled: false`. The agent
container's only egress route is pipelock, but pipelock only saw
`CONNECT` hostnames and the encrypted bytes inside the tunnel.
Pipelock's headline scanners — request body DLP (48 credential
patterns), header DLP, URL DLP, subdomain entropy, MCP scanning,
response-body scanning — all need plaintext to fire. Against the
HTTPS-only hosts in `DEFAULT_ALLOWLIST` (`api.anthropic.com`,
`raw.githubusercontent.com`, etc.) they are effectively disabled.
The existing `tests/integration/test_pipelock_blocks_secret_post`
test only fires because it forces the agent to send plain HTTP
through pipelock's forward-proxy mode. Real Claude Code traffic
uses HTTPS via CONNECT and slips past the scanner.
## Goals / Success Criteria
The feature works when all of the following are observable:
- A Node / curl request from inside a launched bottle to a
CONNECT-bumped HTTPS host (e.g. `https://api.anthropic.com/dlp-probe`)
carrying a pipelock-recognized credential pattern in the body
returns 403 from pipelock with the documented
`blocked: request body contains secret: …` body. Pipelock's
`body_dlp` event fires on the decrypted request.
- A clean HTTPS GET from inside the bottle to an allowlisted host
(e.g. `https://raw.githubusercontent.com/...`) returns the real
upstream response — TLS interception doesn't break legitimate
traffic.
- The agent's TLS library trusts pipelock's bumped leaf certs
(per the bottle's installed CA); no TLS-trust errors.
- Claude Code reaches `api.anthropic.com` end-to-end through the
bottle and completes a chat round-trip.
The feature is **done** when all of the following ship:
- `pipelock_build_config` / `pipelock_render_yaml` emit a
`tls_interception` block with `enabled: true` and the per-bottle
CA cert/key paths. The defaults
(`cert_ttl: 24h`, `cert_cache_size: 10000`,
`passthrough_domains: []`) are kept; only `enabled` and the
cert paths are populated.
- The prepare step generates a per-bottle CA via `pipelock tls init`
in a one-shot container, writes `ca.pem` and `ca-key.pem` to
`stage_dir`. Paths land on the `DockerBottlePlan`.
- `DockerPipelockProxy.start` mounts the stage dir into the
sidecar (read-only) so the running pipelock can read its CA.
- `BottleBackend.provision_ca` (new) copies the CA public cert
into the agent at
`/usr/local/share/ca-certificates/claude-bottle-mitm.crt`, runs
`update-ca-certificates`, and sets the `NODE_EXTRA_CA_CERTS` /
`SSL_CERT_FILE` / `REQUESTS_CA_BUNDLE` env trio on the agent
container's runtime env. Default no-op on the abstract base so
other backends aren't forced to implement.
- The launch step prints a one-line stderr log with the SHA-256
fingerprint of the public CA cert (computed via stdlib
`ssl.PEM_cert_to_DER_cert` + `hashlib.sha256`).
- On bottle teardown the sidecar is removed and the CA private
key is gone with it.
- Two new integration tests under `tests/integration/`:
- HTTPS variant of the credential-post block test (proves the
`tls_intercept` + `body_dlp` chain fires end-to-end).
- Clean HTTPS GET test (proves the allow path doesn't break TLS
trust and returns real upstream content).
- The dry-run preflight (`start --dry-run`) renders the new TLS
layer. Text: one line under the egress summary. JSON: a
reserved `egress.tls_interception: { enabled: true,
ca_fingerprint: null }` block — fingerprint is null at dry-run
because the CA only exists after launch.
## Non-goals
- A second proxy in the chain. Pipelock does the bumping
natively; the mitmproxy approach was based on a wrong premise
(closed PR #8).
- Per-bottle override to disable interception. v1 always enables
`tls_interception`. The pipelock-side `passthrough_domains`
list is the right knob if a future allowlisted host turns out
to pin certs — exposing it through the manifest is a follow-up.
- A long-lived / shared CA across bottles. Each bottle gets a
fresh CA generated by `pipelock tls init` and destroyed with the
sidecar.
- Tuning `cert_ttl`, `cert_cache_size`, `max_response_bytes`,
`cross_request_detection`, or other pipelock advanced features.
Defaults from `pipelock generate config --preset strict` are
fine for v1.
- Trust-store paths for non-Debian agent images.
`node:22-slim` is Debian; `update-ca-certificates` is the right
command. A Red-Hat-family base would need `update-ca-trust`.
- HTTP/3 / QUIC. Pipelock's interception is HTTP/HTTPS-over-TLS;
UDP/443 still needs an iptables layer (separate PRD).
## Scope
### In scope
- **`claude_bottle/pipelock.py`** changes:
- Extend `pipelock_build_config` to include
`tls_interception: { enabled: true, ca_cert: <path>, ca_key:
<path> }`. Paths are populated from the plan; the function's
signature grows a `cert_path` / `key_path` pair or reads them
off `Bottle` once they're stored.
- Extend `pipelock_render_yaml` to emit the new block.
- **`claude_bottle/backend/docker/pipelock.py`** changes:
- New helper `pipelock_tls_init(stage_dir)` runs the upstream
image as a one-shot:
`docker run --rm -v <stage>:/h -e PIPELOCK_HOME=/h pipelock tls init`,
leaving `ca.pem` and `ca-key.pem` under `stage_dir`. The host
file owner is whatever the upstream image's user is; the
sidecar mount is read-only so this is fine.
- `DockerPipelockProxy.start` `docker cp`s the CA cert + key
into the sidecar at `/etc/pipelock/ca.pem` and
`/etc/pipelock/ca-key.pem` between `docker create` and
`docker start`, mirroring the existing pattern for the YAML
config. If pipelock's image runs as non-root, a `docker exec
-u 0 chown pipelock:pipelock /etc/pipelock/ca*.pem` lands
between the `cp` and the `start`.
- **`claude_bottle/backend/__init__.py`**: new abstract method
`provision_ca(plan, target)` on `BottleBackend`, default no-op.
`BottleBackend.provision` orchestrates `ca → prompt → skills →
ssh → git`.
- **`claude_bottle/backend/docker/provision/ca.py`** (new):
- Reads the cert from `stage_dir` (already written by prepare).
- `docker cp` into the agent.
- `docker exec -u 0 ... chmod 644 ...` + `update-ca-certificates`.
- Computes the SHA-256 fingerprint with stdlib (`ssl` +
`hashlib`), emits one stderr log line.
- **`claude_bottle/backend/docker/launch.py`**:
- Three new `-e` flags on the agent's `docker run`:
`NODE_EXTRA_CA_CERTS=/usr/local/share/ca-certificates/claude-bottle-mitm.crt`,
`SSL_CERT_FILE=/etc/ssl/certs/ca-certificates.crt`,
`REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt`.
- `HTTPS_PROXY` / `HTTP_PROXY` continue to point at pipelock
(unchanged from PRD 0001 — the mitmproxy detour in PR #8 is
abandoned).
- **`claude_bottle/backend/docker/bottle_plan.py`**:
- One new `info(...)` line in `print()` noting TLS interception
is on.
- `to_dict()` gains an `egress.tls_interception: { enabled:
true, ca_fingerprint: null }` block. Reserved for future
population.
- **`claude_bottle/backend/docker/prepare.py`**: call
`pipelock_tls_init(stage_dir)` and write the resolved cert/key
paths onto the plan (either on the existing `proxy_plan` field
or on the parent `DockerBottlePlan`).
- **Tests:**
- `tests/integration/test_pipelock_blocks_secret_https_post.py`
(new) — HTTPS variant of the existing block test.
- `tests/integration/test_pipelock_allows_normal_https.py`
(new) — clean HTTPS GET succeeds.
- `tests/unit/test_pipelock_yaml.py` updated to assert the new
`tls_interception` block in the rendered config.
- `tests/integration/test_dry_run_plan.py` updated to assert
the new `egress.tls_interception` JSON block.
### Out of scope
- Modifying pipelock itself. We're using existing config knobs.
- A manifest field to disable / customize interception per bottle.
Doable but premature.
- Wiring `passthrough_domains`. The default `[]` is correct for
v1; add the manifest field when a pinning host shows up. The
shape is pre-recorded so the follow-up is mechanical:
`bottle.egress.tls_passthrough_domains: [host, ...]`,
mirroring the existing `egress.allowlist`.
- `cross_request_detection`, `entropy_budget`,
`fragment_reassembly`, `reverse_proxy`, `scan_api` — features
pipelock exposes but we don't need for the body-DLP gap.
## Proposed Design
### Topology
```
agent --HTTPS_PROXY--> pipelock --[bumps TLS]--> internet
(sees plaintext: URL, headers, body)
```
Same single-sidecar shape as PRD 0001. The only addition is
`tls_interception` in pipelock's config plus the per-bottle CA
generated at prepare time.
### CA lifecycle
- **Generation.** Host-side, at prepare time, via a one-shot
`docker run --rm -v <stage>:/h -e PIPELOCK_HOME=/h pipelock tls
init`. Output: `<stage>/ca.pem` + `<stage>/ca-key.pem`, mode 600.
- **Sidecar install.** `DockerPipelockProxy.start` `docker cp`s
the CA cert + key into the sidecar at `/etc/pipelock/ca.pem`
and `/etc/pipelock/ca-key.pem` between `docker create` and
`docker start`. Same pattern the proxy already uses for the
YAML config — no bind-mount, no UID/permission concern from
the one-shot generation step. The rendered YAML references
the in-container paths.
- **Bottle install.** `provision_ca` (Docker impl) does
`docker cp <stage>/ca.pem agent:/usr/local/share/ca-certificates/claude-bottle-mitm.crt`,
then `update-ca-certificates`. The CA env trio is set at
`docker run -e` time (Docker propagates run-time env into
`docker exec`).
- **Per-bottle ephemerality.** Enforced by *regenerating per
launch*, not by validity windows. Pipelock's defaults
(`cert_ttl: 24h` for leaves, `--validity 87600h` for the CA)
are fine — the CA lives only as long as the sidecar, which is
the bottle's lifetime.
- **Teardown.** Sidecar removed via `ExitStack` callback, then
the launch context manager's outer `finally` `shutil.rmtree`s
`stage_dir`. CA dies with both, in that order, so the sidecar
is never reading a deleted mount on shutdown.
- **Fingerprint.** Computed via stdlib in `provision_ca` and
logged once to stderr (`claude-bottle: mitm ca fingerprint:
sha256:<hex>…`). The private key never appears in any log.
### Data model changes
None to the manifest schema. The dry-run JSON contract grows a
reserved `egress.tls_interception` block; the fingerprint is
always null at dry-run because the CA doesn't exist yet.
### Existing code touched
Surgical, all on the existing pipelock path:
- `claude_bottle/pipelock.py` — config builder + YAML renderer.
- `claude_bottle/backend/__init__.py` — abstract `provision_ca`.
- `claude_bottle/backend/docker/pipelock.py` — `tls init` helper,
sidecar volume mount.
- `claude_bottle/backend/docker/prepare.py` — CA paths on plan.
- `claude_bottle/backend/docker/launch.py` — CA env trio on agent.
- `claude_bottle/backend/docker/backend.py` — `provision_ca`
dispatch + thread `self._proxy` through prepare/launch unchanged
shape.
- `claude_bottle/backend/docker/bottle_plan.py` — preflight
rendering.
- `claude_bottle/backend/docker/provision/ca.py` (new).
Net diff is meaningfully smaller than PR #8 because pipelock
already does the work — no addon, no second sidecar, no second
backend module.
### External dependencies
- **Pipelock image** — unchanged pin from PRD 0001
(`ghcr.io/luckypipewrench/pipelock@sha256:3b1a3941`,
matching pipelock v2.3.0). No new image dependency.
- **No host-side crypto deps.** CA generation uses the pipelock
image's own `tls init` command in a one-shot container.
Fingerprint uses Python stdlib `ssl` + `hashlib`.
## References
- `docs/research/pipelock-assessment.md` (now corrected) —
pipelock capability assessment including the
`tls_interception` block.
- `docs/prds/0001-per-agent-egress-proxy-via-pipelock.md` —
egress-proxy baseline this PRD extends.
- `docs/prds/0003-bottle-backend-abstraction.md` — backend ABC
contract this PRD adds a `provision_ca` method to.
- `docs/prds/0004-split-out-provisioners.md` — per-provisioner
module pattern reused for the new CA provisioner.
- Pipelock `tls` CLI (in-image help):
`pipelock tls init / install-ca / show-ca`.
- Closed PR #8 — earlier mitmproxy-based design built on the
falsified "pipelock can't MITM" premise; archived for context.
+9 -5
View File
@@ -222,10 +222,14 @@ The following threat-model items from `network-egress-guard.md` are
intercept raw UDP 53 packets.
- **Domain fronting**: an agent can send `CONNECT allowed-host.com:443`
through the proxy but embed a different SNI inside the TLS session.
Pipelock does not perform TLS inspection (no CA trust injection) and
cannot verify SNI vs. CONNECT header. The same limitation is shared
with smokescreen and is documented in `network-egress-guard.md` as a
known gap for the non-TLS-terminating proxy approach.
Pipelock supports TLS interception via its `tls_interception` config
block (`enabled`, `ca_cert`, `ca_key`, `cert_ttl`, `cert_cache_size`,
`passthrough_domains`, `max_response_bytes`) plus the `pipelock tls
init` / `install-ca` / `show-ca` CLI; with interception on, the
body and inner Host header become visible to its scanner pipeline,
closing the domain-fronting gap. With interception off (default in
the generated config), pipelock relays the CONNECT as an opaque
tunnel and only sees the outer hostname.
- **SSH egress content**: SSH sessions to permitted hosts are opaque.
Same limitation noted in both prior research notes.
- **Agent killing the proxy process**: if pipelock runs inside the same
@@ -385,7 +389,7 @@ pipelock's differentiators.
| Blocks RFC 1918 by default | only if explicitly added to rules | yes | yes, + DNS rebinding | no |
| Content-based DLP (credential patterns) | no | no | yes, 48 patterns + encoding normalization | no |
| MCP / WebSocket scanning | no | no | yes, bidirectional | no |
| Domain fronting bypass | possible | possible | possible (no TLS termination) | n/a |
| Domain fronting bypass | possible | possible | mitigated when `tls_interception` is enabled (CA trust required in client) | n/a |
| macOS Docker Desktop (sidecar mode) | yes | yes | yes | yes |
| macOS Docker Desktop (in-container sandbox) | yes | n/a | degraded (--best-effort) | yes |
| NET_ADMIN / NET_RAW required | yes | no | no (sidecar) | no |
-508
View File
@@ -1,508 +0,0 @@
# TLS interception for pipelock content scanning
Research into adding TLS termination ("MITM") to the egress path so that
pipelock's scanning pipeline can see plaintext HTTP request and response
bodies, instead of only the `CONNECT` host and opaque ciphertext.
## Summary
- Pipelock today sees `CONNECT` hostnames and the encrypted bytes that follow.
Its DLP, subdomain-entropy, and MCP scanners cannot fire on TLS-encrypted
bodies, which is the gap explicitly named under "Scope gaps" in
`pipelock-assessment.md` ("Pipelock does not perform TLS inspection (no CA
trust injection)").
- Closing that gap requires a TLS-terminating proxy that bumps `CONNECT`,
presents a leaf certificate for the target hostname signed by a CA the
bottle's trust store accepts, decrypts the inner HTTP, and re-establishes
TLS to the real upstream.
- The mature open-source option is **mitmproxy**. Squid + `ssl_bump` is the
heavier production-grade alternative. The Go ecosystem (`goproxy`,
`gomitmproxy`, `martian`) is suitable only if we want a custom binary
tightly coupled to pipelock.
- Recommended v1 topology: **mitmproxy in front of pipelock** on the same
egress route. mitmproxy terminates client TLS, forwards plaintext to
pipelock as its upstream HTTP proxy, and re-encrypts to the real upstream.
Pipelock stays unchanged.
- Per-bottle ephemeral CA, generated at bottle start and destroyed on
teardown. The CA private key lives only on the sidecar; the bottle's
trust store only ever sees the public cert.
- Cert pinning is a known caveat but a small one given the narrow allowlist
in this project. Selective bumping is the mitigation if a future
allowlisted host turns out to pin.
---
## What pipelock cannot see today
The current egress topology (per `pipelock-assessment.md`):
```
agent --HTTPS_PROXY--> pipelock --CONNECT host:443--> internet
\____________________________
opaque TLS bytes
```
The agent's client (Claude Code, `curl`, an MCP server, a Python SDK)
sends `CONNECT api.anthropic.com:443`. Pipelock checks the hostname
against its `api_allowlist`, replies `200 Connection Established`, and
then blindly relays bytes between the two TCP halves. The TLS handshake
and everything inside it happens end-to-end between the agent and the
real upstream.
What pipelock can scan in this mode:
- `CONNECT` target hostname (SNI is not even needed).
- TLS record framing and lengths (useful for budgets, useless for DLP).
- Plain HTTP/1.1 to non-HTTPS destinations (irrelevant — there are none
in `DEFAULT_ALLOWLIST`).
What pipelock cannot scan in this mode:
- Request URL, method, headers, body.
- Response status, headers, body.
- MCP JSON-RPC payloads inside the TLS session.
- WebSocket frames inside a TLS-wrapped upgrade.
- Whether the inner SNI or HTTP `Host` / `:authority` matches the
outer `CONNECT` target (domain-fronting check).
The 48-pattern DLP layer, the subdomain-entropy check (insofar as it
inspects URLs rather than DNS-resolver queries), the request-redaction
feature added in v2.3.0, and bidirectional MCP scanning all require
plaintext to operate on. Without TLS termination, those layers are
inert against any HTTPS destination — which is every destination in
the current allowlist.
---
## How TLS interception works
The mechanics of `CONNECT` bumping, end to end:
1. **Agent issues `CONNECT`.** The HTTP client sees `HTTPS_PROXY` set,
so it opens a TCP connection to the proxy and sends
`CONNECT api.anthropic.com:443 HTTP/1.1`.
2. **Proxy answers `200`.** Standard tunnel-established response.
3. **Proxy starts TLS as the server.** Instead of relaying bytes, the
proxy itself performs a TLS handshake with the agent. It needs a
server certificate for `api.anthropic.com` — so on first contact for
that hostname, the proxy generates a leaf certificate with
`CN=api.anthropic.com` and a SAN for the same, signs it with its
own CA private key, and presents that cert. Subsequent connections
to the same hostname reuse the cached leaf.
4. **Agent verifies the cert.** The agent's TLS library walks the chain
to a trusted root. Because the bottle's trust store contains the
proxy's CA cert, validation succeeds. The agent has no way to tell
it isn't talking to the real `api.anthropic.com`.
5. **Proxy opens its own TLS to the real upstream.** As a client this
time, using the system root store, talking to the real
`api.anthropic.com`. Real SNI, real cert chain validated normally.
6. **Proxy bridges the two TLS sessions.** Decrypts on the server side,
re-encrypts on the client side, and scans the plaintext in between.
This is what every TLS-terminating egress proxy does. The trade-offs
live in three places:
- **CA trust injection.** Step 4 only works if the bottle's trust
store contains the proxy's CA. Mechanics covered under "CA lifecycle"
below.
- **Cert generation cost.** Generating an RSA-2048 leaf cert takes
~50 ms; ECDSA P-256 is ~5 ms. Cache leaves per (hostname, SAN list)
to keep this off the steady-state hot path.
- **Protocol coverage.** The proxy needs to speak HTTP/1.1, HTTP/2 (ALPN
`h2`), and ideally WebSocket. HTTP/3 / QUIC is UDP and requires a
separate code path; for v1, blocking UDP/443 at the iptables layer
forces clients to fall back to HTTP/2, which we can inspect.
---
## Tools
### mitmproxy
- **What it is.** Python (with Rust crypto bits) interactive HTTPS proxy.
Reference open-source implementation of the bump pattern. Ships as
`mitmproxy` (TUI), `mitmweb` (browser UI), and `mitmdump` (headless).
- **Cert handling.** Generates a CA on first run under `~/.mitmproxy/`.
Per-host leaves are generated on demand and cached in memory. Cert
cache keyed by (hostname, SAN extensions inferred from upstream cert).
- **Protocols.** HTTP/1.1, HTTP/2, WebSocket fully supported. HTTP/3
exists as experimental. Raw TCP / non-HTTP TLS supported via
`--mode reverse:` but not in CONNECT-bump mode.
- **Extensibility.** Python addon API. An addon module can inspect or
modify any `request` / `response` / `tcp_message` flow. The pipelock
integration in Topology D below uses this.
- **Selective bumping.** `ignore_hosts` regex; matching CONNECTs are
tunneled blindly instead of bumped. Critical for the cert-pinning
mitigation.
- **Docker image.** `mitmproxy/mitmproxy` on Docker Hub. Single binary
for the CLI, ~80 MB image. Configurable via flags or `~/.mitmproxy/config.yaml`.
- **Project URL.** <https://mitmproxy.org>, <https://github.com/mitmproxy/mitmproxy>.
Most mature, best-documented, lowest-effort integration. Default choice
for v1.
### Squid + ssl_bump
- **What it is.** Squid is a long-running C++ caching proxy.
`ssl_bump` is its TLS-interception feature, controlled by per-CONNECT
actions: `splice` (tunnel blindly), `bump` (decrypt and re-encrypt),
`peek` (look at TLS hello then decide), `stare` (look at server cert
then decide), `terminate` (abort the connection).
- **Cert handling.** Configured via `sslcrtd_program` — a helper that
generates and caches per-host certs. CA cert and key referenced by
PEM paths in `squid.conf`.
- **Protocols.** HTTP/1.1 fully; HTTP/2 to clients via recent versions;
no scripted addons.
- **Extensibility.** ICAP (Internet Content Adaptation Protocol) for
external scanners — Squid POSTs each request/response to an ICAP
service that can modify or reject. This is the formal version of
Topology D below.
- **Production track record.** Used at corporate-proxy scale (large
enterprises, ISPs). Heavyweight for a single-bottle sidecar.
- **Project URL.** <https://wiki.squid-cache.org/Features/SslPeekAndSplice>.
Right tool if pipelock grows an ICAP server endpoint. Otherwise, more
config surface than this project needs.
### Go libraries: goproxy, gomitmproxy, martian
- **`goproxy`** (elazarl) — long-lived Go library, basic CONNECT-bumping
proxy with a handler API. Sparse on HTTP/2.
<https://github.com/elazarl/goproxy>
- **`gomitmproxy`** (AdGuard) — newer, cleaner API; built for AdGuard
Home / DNS-filtering products. HTTP/2 support is partial.
<https://github.com/AdguardTeam/gomitmproxy>
- **`martian`** (Google) — request/response modifier framework with a
JSON-configurable rule engine. Used internally at Google; public
ecosystem thin.
<https://github.com/google/martian>
These are relevant only if we decide to write a custom TLS-terminating
binary that links pipelock's scanning packages directly — Topology C
below. They are not faster than mitmproxy for the v1 sidecar shape;
they are smaller and more direct, at the cost of writing more Go.
### Disqualified
- **Caddy, Envoy, HAProxy.** All can terminate TLS at a reverse-proxy
vhost. None ship a "bump on CONNECT and forward plaintext to a
downstream proxy" mode out of the box. Adapting any of them to this
shape is more work than starting from mitmproxy.
- **Cloudflare Gateway, Zscaler, NetSkope, Forcepoint.** Managed cloud
egress with TLS inspection. Wrong topology — they live outside the
host, not as a per-bottle sidecar, and they require trusting a vendor
with full plaintext.
- **Charles Proxy, Burp Suite.** Closed-source GUI tools for developer
capture and security testing. Not appropriate as headless sidecars.
- **`mitmdump` standalone vs. embedding mitmproxy as a library.** Both
are mitmproxy. Calling out only to note: the project ships both a CLI
and a Python API; addons can be loaded either way.
---
## Topologies
Five candidate topologies, ordered roughly from least to most coupled
between the two components.
### A — mitmproxy in front of pipelock (recommended)
```
agent --HTTPS_PROXY--> mitmproxy --HTTP_PROXY--> pipelock --> internet
(bump TLS) (scan plain) (real TLS)
```
mitmproxy terminates the agent's TLS connection, decrypts, and then
forwards the inner HTTP request to pipelock by treating pipelock as
its own upstream HTTP forward proxy. Pipelock receives plaintext HTTP
exactly as if the agent had used HTTP, applies its full scanning
pipeline, and forwards to mitmproxy's upstream client half — which
re-establishes TLS to the real destination.
Concretely the agent's `HTTPS_PROXY` points at mitmproxy; mitmproxy's
`upstream_proxy` config points at pipelock; pipelock's network reach
includes the real internet.
- **Wins.** Pipelock unchanged. mitmproxy unchanged from default
configuration. Each component has one job. Failure modes are clear
per layer.
- **Costs.** Two sidecars per bottle instead of one. One extra
decrypt / re-encrypt hop, ~515 ms per request in steady state.
- **Open question.** How exactly mitmproxy forwards to pipelock matters
for whether pipelock sees TLS again or only HTTP. mitmproxy's
`upstream` mode wraps the decrypted request in another CONNECT if the
destination is HTTPS — which would re-encrypt before pipelock sees
it, defeating the point. The correct mode is `upstream` with TLS
re-origination disabled, or `regular` mode with a chained proxy. The
v2 release of mitmproxy reworked this; needs verification against the
current docs at integration time.
### B — pipelock in front of mitmproxy (ruled out)
```
agent --HTTPS_PROXY--> pipelock --CONNECT?--> mitmproxy --> internet
(sees CONNECT only) (bump TLS)
```
Pipelock would receive a `CONNECT` and decide to allow or deny based
on hostname, then tunnel to mitmproxy. mitmproxy would terminate TLS
and see plaintext — but pipelock would never see the plaintext, which
is the whole point of the exercise. The scanning still happens (in
mitmproxy), but it isn't pipelock doing it, so we'd need an entirely
different rule engine. Ruled out.
### C — Extend pipelock itself to terminate TLS
Two sub-variants:
**C.1 — Upstream a `tls_terminate` mode.** Submit a feature to
pipelock that adds CONNECT bumping and per-host cert generation in Go,
using `crypto/tls` and the existing scanning packages. Pipelock becomes
a self-contained MITM proxy. License question matters here: the Apache
2.0 core can grow new features in-tree, but if upstream insists this
belongs in `enterprise/` (ELv2), we either accept ELv2 or fork.
**C.2 — Wrap pipelock in a thin Go binary in the same container.** A
small Go program does the TLS half (`CONNECT` parsing, cert generation,
TLS handshake) and pipes plaintext to pipelock over UDS or loopback.
The wrapper is ours; pipelock is unmodified. No license question.
- **Wins.** Single component on the egress path. Pipelock owns the
scanning end-to-end, including domain-fronting checks (SNI vs.
`Host` vs. `CONNECT`).
- **Costs.** Real Go engineering effort. CA generation, cert caching,
TLS handshake, HTTP/2 ALPN negotiation, WebSocket upgrade — all
things mitmproxy already solves.
- **When.** Right shape for v2 or v3 once the v1 mitmproxy-in-front
topology has proven the integration works and the scanning rules are
stable.
### D — mitmproxy as the proxy, pipelock as a content-scan subroutine
```
agent --HTTPS_PROXY--> mitmproxy --> internet
(bump TLS)
|
v
POST /scan to pipelock
<- allow / block / redact
```
A Python addon in mitmproxy sends each decrypted request (and response)
to a pipelock HTTP `/scan` endpoint and gates the flow on the verdict.
mitmproxy handles all networking; pipelock is the rule engine only.
- **Wins.** Clean separation of concerns. Pipelock doesn't have to
speak TLS at all. The addon is small, ~100 lines of Python.
- **Costs.** Requires pipelock to expose a scan API. The current Apache
2.0 core does not document one. If `/scan` lives in `enterprise/`,
ELv2 applies. If it doesn't exist, we'd be asking pipelock for a new
surface.
- **Variant.** Squid's ICAP path is the formalized version of the same
pattern.
### E — Single container, two processes
mitmproxy and pipelock share a container, started by `supervisord` or
`s6-overlay`. Networking simplifies to localhost. Lifecycle complicates:
container restart now means restarting both; failure of one process is
not visible at the Docker layer; logs interleave.
- **Wins.** Slightly less Docker plumbing in `cli.py`.
- **Costs.** Operational complexity not worth the savings. The two
containers are independent processes with independent failure modes;
Docker is the right tool for that.
Net: not recommended.
---
## CA lifecycle
The CA private key is the asset to defend. With it, anyone can issue
certs that the bottle's trust store will accept for any hostname. So:
**Per-bottle ephemeral CA.** At bottle start, generate a fresh
RSA-2048 or ECDSA-P256 CA inside the mitmproxy sidecar. Export only
the public cert (PEM) into the bottle's trust store at one of:
- `/usr/local/share/ca-certificates/claude-bottle-mitm.crt` followed by
`update-ca-certificates` (Debian/Ubuntu base images).
- `/etc/pki/ca-trust/source/anchors/` with `update-ca-trust`
(Red-Hat-family).
- `$NODE_EXTRA_CA_CERTS` for Node-based agents (Claude Code).
- `$SSL_CERT_FILE` / `$REQUESTS_CA_BUNDLE` for Python SDKs.
The private key never leaves the sidecar's filesystem. The CA cert
public half is the only artifact that crosses into the bottle.
On bottle teardown, the sidecar container is destroyed; the CA dies
with it. The next bottle gets a fresh CA. No long-lived MITM CA on
disk.
**Why not a shared per-host CA.** A persistent CA across bottles is
faster (no generation at start) but is a real liability: if any bottle
exfiltrates the CA cert public half (which it can — it's in the trust
store by design), an attacker on the host network could in principle
impersonate any host to any bottle. With a per-bottle CA, the exfil
gains nothing: the CA is bottle-local and dies in minutes.
**Generation cost.** RSA-2048 CA generation is ~200 ms; ECDSA-P256 is
~5 ms. Either is irrelevant against the per-bottle Docker pull and
network setup cost.
**Where the CA lives in the bottle's trust store.** Both: a
distribution-standard path with `update-ca-certificates`, and the
env-var path. Belt and suspenders, because some Node and Python
libraries honor the env vars only, and some load only `/etc/ssl/certs/`
directly.
---
## Cert pinning (brief)
A client that pins ignores the trust store and refuses any cert whose
public key isn't on a hardcoded list. Three observations for this
project:
- The current `DEFAULT_ALLOWLIST` (`api.anthropic.com`,
`statsig.anthropic.com`, `sentry.io`, `claude.ai`,
`platform.claude.com`, `downloads.claude.ai`,
`raw.githubusercontent.com`) does not appear to include any host that
pins against server-side SDKs. Server-side SDKs (Node, Python) almost
universally honor system trust and `NODE_EXTRA_CA_CERTS` /
`SSL_CERT_FILE`. Mobile SDKs and Chromium pin; we don't run those.
- If a future allowlisted host turns out to pin, the mitigation is
selective bumping via mitmproxy `ignore_hosts`: that specific
hostname tunnels blindly and pipelock loses DLP coverage for it.
Coverage on every other host is unaffected.
- The cost of finding out: a single 5-minute test before adding a host
— point mitmproxy at the host, observe whether the client succeeds.
Not a v1 blocker. Document the failure mode and the mitigation.
---
## Comparison table
| | A: mitmproxy → pipelock | B: pipelock → mitmproxy | C: TLS in pipelock | D: mitmproxy + scan API | E: one container |
|---|---|---|---|---|---|
| Pipelock sees plaintext | yes | no | yes | yes (via /scan) | yes |
| Code change to pipelock | none | none | substantial | adds /scan endpoint | none |
| Sidecar count | 2 | 2 | 1 | 2 | 1 |
| Cert generation owner | mitmproxy | mitmproxy | pipelock | mitmproxy | mitmproxy |
| Selective bumping | mitmproxy `ignore_hosts` | mitmproxy `ignore_hosts` | pipelock config | mitmproxy `ignore_hosts` | mitmproxy `ignore_hosts` |
| Failure isolation per process | yes | yes | n/a (one process) | yes | no (shared container) |
| License question | none | none | ELv2 risk | ELv2 risk | none |
| v1 effort | low | low (but pointless) | high | medium | low |
| Long-term shape | interim | n/a | best | possible | not recommended |
---
## Recommendation
**Adopt Topology A for v1.** Add a mitmproxy sidecar to the egress
topology, in front of pipelock on the same per-bottle internal network.
The agent's `HTTPS_PROXY` points at mitmproxy; mitmproxy's upstream is
pipelock; pipelock's upstream is the real internet.
Concretely:
1. Add a `MitmproxyProxy` class alongside `PipelockProxy`, with the
same `prepare` / `start` / `stop` lifecycle. The class generates
a per-bottle CA in `stage_dir`, exports the public cert into a
second file, and writes a mitmproxy config that:
- bumps every CONNECT by default
- uses `upstream_proxy = http://pipelock-<slug>:<port>`
- listens on a known port inside the per-bottle internal network
2. Extend the bottle launch step to copy the CA public cert into the
agent container under
`/usr/local/share/ca-certificates/claude-bottle-mitm.crt`, run
`update-ca-certificates`, and set `NODE_EXTRA_CA_CERTS` /
`SSL_CERT_FILE` / `REQUESTS_CA_BUNDLE` accordingly.
3. Repoint the agent's `HTTPS_PROXY` and `HTTP_PROXY` from the pipelock
container to the mitmproxy container.
4. Verify mitmproxy's upstream-proxy mode forwards plaintext (not a
re-wrapped CONNECT) to pipelock; if not, use `regular` mode with a
chained proxy directive.
5. Test that pipelock's DLP, subdomain-entropy, and MCP scanners now
fire on real request bodies for `api.anthropic.com` traffic.
**Defer Topologies C and D.** Topology C (extending pipelock to
terminate TLS) is the cleanest long-term shape but is a substantial
build and runs into the Apache 2.0 vs. ELv2 question. Topology D
(mitmproxy with pipelock as a scan API) is attractive but requires a
pipelock surface that doesn't exist today. Both are valid v2 targets;
neither is the right starting point.
The `network-egress-guard.md` v1 iptables + dnsmasq layer remains
necessary alongside this — TLS interception covers HTTP/HTTPS only;
raw TCP, UDP/443 (QUIC), UDP/53 (DNS), and ICMP still need the
IP-level default-deny.
---
## Open questions
1. **mitmproxy upstream-proxy mode mechanics.** Does mitmproxy in
`upstream_proxy` mode forward decrypted HTTP plaintext to the
upstream, or does it wrap it in a new CONNECT? The documented
behavior changed between mitmproxy 8 and 10. Needs verification
against the version we pin.
2. **Pipelock's behavior when receiving plain HTTP.** Pipelock's
`forward_proxy.enabled: true` accepts both `GET http://...` (plain
HTTP) and `CONNECT host:443` (HTTPS). After Topology A is wired up,
pipelock will see only plain HTTP — does its DLP / MCP scanning
pipeline run the full set of layers, or are some gated on the
CONNECT path? Confirm by reading
`github.com/luckyPipewrench/pipelock/blob/main/docs/configuration.md`.
3. **CA installation in the Anthropic-provided Claude Code Docker image.**
The base image's distribution determines whether `update-ca-certificates`
(Debian/Ubuntu) or `update-ca-trust` (Red Hat) is the right command.
The current `Dockerfile` should be inspected before assuming Debian.
4. **HTTP/2 over the agent → mitmproxy hop.** Node's HTTP client
negotiates `h2` via ALPN. mitmproxy speaks `h2` to clients in recent
versions. Confirm the version we pin supports `h2` end-to-end and
doesn't downgrade to `http/1.1` (which would be a silent
performance regression).
5. **Selective-bump policy surface.** Where does the
"tunnel this hostname blindly" decision live? Options: a field on
`bottle.egress` in the manifest, a fixed list of known-pinning
hosts baked into the mitmproxy config, or pipelock-side opt-out.
Manifest field is most consistent with the existing
`bottle.egress.allowlist` shape.
6. **Image pin for mitmproxy.** The `pipelock-assessment.md`
recommendation is to pin by digest. The mitmproxy Docker Hub image
should be pinned the same way. Which release line? `mitmproxy/mitmproxy`
ships rolling and tagged versions; the tagged `:11.x` line is the
right baseline.
7. **CA generation in Python (mitmproxy) vs. as a separate step.**
mitmproxy generates a CA on first launch if none is provided. For
per-bottle ephemerality, we want the CA to be ours, not whatever
mitmproxy chooses — so generate the CA in the host-side prepare
step and inject it via `--certs *=...`. Mechanics need confirming.
8. **Domain fronting verification.** Once pipelock sees plaintext, it
has access to the inner `Host` / `:authority`. A new rule that
compares it against the outer `CONNECT` target catches domain
fronting. Worth a follow-up note on whether pipelock has such a
rule or whether we add it.
---
## References
- mitmproxy: <https://mitmproxy.org>, <https://github.com/mitmproxy/mitmproxy>
- mitmproxy `upstream_proxy` mode: <https://docs.mitmproxy.org/stable/concepts/modes/#upstream-proxy>
- mitmproxy CA cert installation: <https://docs.mitmproxy.org/stable/concepts/certificates/>
- Squid `ssl_bump`: <https://wiki.squid-cache.org/Features/SslPeekAndSplice>
- Squid ICAP: <https://wiki.squid-cache.org/Features/ICAP>
- `goproxy`: <https://github.com/elazarl/goproxy>
- `gomitmproxy`: <https://github.com/AdguardTeam/gomitmproxy>
- `martian`: <https://github.com/google/martian>
- Node TLS / `NODE_EXTRA_CA_CERTS`: <https://nodejs.org/api/cli.html#node_extra_ca_certsfile>
- Python `SSL_CERT_FILE` and `REQUESTS_CA_BUNDLE`: <https://docs.python.org/3/library/ssl.html#ssl.SSLContext.load_verify_locations>
- Prior research — pipelock assessment: `docs/research/pipelock-assessment.md`
- Prior research — network egress guard: `docs/research/network-egress-guard.md`
- Prior research — secret exfil tripwire encodings: `docs/research/secret-exfil-tripwire-encodings.md`
Research conducted 2026-05-12.
+8
View File
@@ -92,6 +92,14 @@ class TestDryRunPlan(unittest.TestCase):
self.assertEqual(sorted(set(hosts)), hosts,
"hosts must be sorted and deduplicated")
# PRD 0006: TLS interception is on for every launched
# bottle. Fingerprint is null at dry-run (no CA exists
# yet); real launches log it from provision_ca.
self.assertEqual(
{"enabled": True, "ca_fingerprint": None},
plan["egress"]["tls_interception"],
)
# No Docker side effects (see the GITEA_ACTIONS skip note
# above — this guard runs locally only).
if check_side_effects:
@@ -0,0 +1,84 @@
"""Integration: with pipelock's tls_interception enabled (PRD 0006),
a clean HTTPS GET to an allowlisted host succeeds end-to-end through
the bumped tunnel.
Complement to test_pipelock_blocks_secret_https_post together they
pin pipelock's two paths (block on body match, allow on clean
traffic). This test is also the implicit TLS-trust check: if
provision_ca had failed to install pipelock's CA into the agent's
trust store, curl would have rejected the bumped leaf cert and the
fetch would have failed before any HTTP response could come back."""
from __future__ import annotations
import os
import shutil
import tempfile
import unittest
from pathlib import Path
from claude_bottle.backend import BottleSpec, get_bottle_backend
from tests._docker import skip_unless_docker
from tests.fixtures import fixture_minimal
# raw.githubusercontent.com is in the baked-in DEFAULT_ALLOWLIST.
# `git`'s own README on the master branch is a long-lived raw file
# (~3 KB) that any CI runner with internet can fetch.
_TARGET_URL = "https://raw.githubusercontent.com/git/git/master/README.md"
@skip_unless_docker()
class TestPipelockAllowsNormalHttps(unittest.TestCase):
@unittest.skipIf(
os.environ.get("GITEA_ACTIONS") == "true",
"skipped under act_runner: docker socket mount topology breaks "
"in-process visibility of networks created on the host daemon",
)
def test_https_get_to_allowed_host_succeeds(self):
backend = get_bottle_backend()
stage_dir = Path(tempfile.mkdtemp(prefix="cb-test-stage."))
try:
spec = BottleSpec(
manifest=fixture_minimal(),
agent_name="demo",
copy_cwd=False,
user_cwd=str(stage_dir),
forward_oauth_token=False,
)
plan = backend.prepare(spec, stage_dir=stage_dir)
with backend.launch(plan) as bottle:
script = (
"set -eu\n"
'curl --proxy "$HTTPS_PROXY" -s --max-time 10 \\\n'
" -w 'status=%{http_code}\\n' \\\n"
" -o /tmp/probe-body.txt \\\n"
f" {_TARGET_URL}\n"
'echo "len=$(wc -c < /tmp/probe-body.txt)"\n'
)
result = bottle.exec(script)
finally:
shutil.rmtree(stage_dir, ignore_errors=True)
self.assertEqual(
0, result.returncode,
f"exec wrapper failed: stdout={result.stdout!r} stderr={result.stderr!r}",
)
# 200 from the upstream (pipelock forwarded after the body
# scan passed). If curl had failed the bumped-cert trust
# check, the exit code or status would be non-200 here.
self.assertIn(
"status=200", result.stdout,
f"expected 200 from raw.githubusercontent.com; got: {result.stdout!r}",
)
# The git README is ~3 KB. Anything substantially non-zero
# proves the response body actually transferred — i.e. the
# CONNECT tunnel + bumped TLS + body forwarding all worked.
self.assertNotIn(
"len=0\n", result.stdout,
f"response body was empty: {result.stdout!r}",
)
if __name__ == "__main__":
unittest.main()
@@ -0,0 +1,96 @@
"""Integration: with pipelock's tls_interception enabled (PRD 0006),
a credential POST sent over HTTPS is blocked by pipelock's body-scan
layer closing the gap that motivated this PRD.
End-to-end: drives `BottleBackend.prepare launch` so the real
image build, network plumbing, pipelock_tls_init, sidecar bring-up,
and provision_ca (CA install in the agent's trust store) are all in
the loop. The probe is a single `curl --proxy "$HTTPS_PROXY" -X POST
... https://api.anthropic.com/...` curl natively does CONNECT
through the proxy, the agent's trust store now contains pipelock's
per-bottle CA so curl trusts pipelock's bumped leaf, and pipelock
sees the decrypted body and returns its known
`blocked: request body contains secret: <pattern>` 403."""
from __future__ import annotations
import os
import shutil
import tempfile
import unittest
from pathlib import Path
from claude_bottle.backend import BottleSpec, get_bottle_backend
from claude_bottle.manifest import Manifest
from tests._docker import skip_unless_docker
# Synthetic value shaped like a GitHub Personal Access Token; not a
# real credential. Carried into the bottle as an env var so the
# probe shell can read it via $FAKE_TOKEN without ever interpolating
# the value on the bash `bottle.exec` argv.
_FAKE_TOKEN = "ghp_aB3cD4eF5gH6iJ7kL8mN9oP0qR1sT2uV3wX4yZ"
@skip_unless_docker()
class TestPipelockBlocksSecretHttpsPost(unittest.TestCase):
@unittest.skipIf(
os.environ.get("GITEA_ACTIONS") == "true",
"skipped under act_runner: docker socket mount topology breaks "
"in-process visibility of networks created on the host daemon",
)
def test_https_post_with_credential_body_is_blocked(self):
manifest = Manifest.from_json_obj({
"bottles": {
"dev": {"env": {"FAKE_TOKEN": _FAKE_TOKEN}},
},
"agents": {
"demo": {"skills": [], "prompt": "", "bottle": "dev"},
},
})
backend = get_bottle_backend()
stage_dir = Path(tempfile.mkdtemp(prefix="cb-test-stage."))
try:
spec = BottleSpec(
manifest=manifest,
agent_name="demo",
copy_cwd=False,
user_cwd=str(stage_dir),
forward_oauth_token=False,
)
plan = backend.prepare(spec, stage_dir=stage_dir)
with backend.launch(plan) as bottle:
script = (
"set -eu\n"
'curl --proxy "$HTTPS_PROXY" -s --max-time 8 \\\n'
" -w 'status=%{http_code}\\n' \\\n"
" -o /tmp/probe-body.txt \\\n"
' -X POST -d "token=$FAKE_TOKEN" \\\n'
" https://api.anthropic.com/dlp-probe\n"
'echo "body=$(head -c 200 /tmp/probe-body.txt)"\n'
)
result = bottle.exec(script)
finally:
shutil.rmtree(stage_dir, ignore_errors=True)
self.assertEqual(
0, result.returncode,
f"exec wrapper failed: stdout={result.stdout!r} stderr={result.stderr!r}",
)
# Pipelock's body-scan block returns 403 with a plain-text
# body starting `blocked: ` (pinned empirically; see
# tests/unit/test_mitmproxy_verdict.py for the
# corresponding-fingerprint test, retained from PR #8 as
# general pipelock-block-shape coverage).
self.assertIn(
"status=403", result.stdout,
f"expected 403 from pipelock; got: {result.stdout!r}",
)
self.assertIn(
"body=blocked: ", result.stdout,
f"expected pipelock block body; got: {result.stdout!r}",
)
if __name__ == "__main__":
unittest.main()
@@ -28,6 +28,7 @@ from claude_bottle.backend.docker.pipelock import (
PIPELOCK_PORT,
DockerPipelockProxy,
pipelock_container_name,
pipelock_tls_init,
)
from tests._docker import skip_unless_docker
from tests.fixtures import fixture_minimal
@@ -79,10 +80,17 @@ class TestPipelockSidecarSmoke(unittest.TestCase):
self.internal_net = network_create_internal(self.slug)
self.egress_net = network_create_egress(self.slug)
# PRD 0006: pipelock's tls_interception block in the rendered
# YAML references in-container CA paths; .start docker-cp's
# those files in. The full launch flow generates the CA via
# `pipelock_tls_init`; this smoke test calls it directly.
ca_cert_host, ca_key_host = pipelock_tls_init(self.work_dir)
plan = dataclasses.replace(
prep,
internal_network=self.internal_net,
egress_network=self.egress_net,
ca_cert_host_path=ca_cert_host,
ca_key_host_path=ca_key_host,
)
self.sidecar_name = proxy.start(plan)
+43
View File
@@ -37,6 +37,9 @@ class TestBuildConfig(unittest.TestCase):
# No SSH entries → no trusted_domains, no ssrf.
self.assertNotIn("trusted_domains", cfg)
self.assertNotIn("ssrf", cfg)
# Without CA paths, the tls_interception block is omitted —
# pipelock falls back to its built-in default of `enabled: false`.
self.assertNotIn("tls_interception", cfg)
def test_ssh_shape(self):
cfg = pipelock_build_config(fixture_with_ssh().bottles["dev"])
@@ -49,6 +52,31 @@ class TestBuildConfig(unittest.TestCase):
# Strict mode: IPv4 host is also in the api_allowlist union.
self.assertIn("100.78.141.42", cast(list[str], cfg["api_allowlist"]))
def test_tls_interception_block_emitted_when_paths_supplied(self):
# PRD 0006: paths flow in via DockerPipelockProxy's in-container
# constants; this directly pins the dict shape.
cfg = pipelock_build_config(
fixture_minimal().bottles["dev"],
ca_cert_path="/etc/pipelock-ca.pem",
ca_key_path="/etc/pipelock-ca-key.pem",
)
self.assertEqual(
{
"enabled": True,
"ca_cert": "/etc/pipelock-ca.pem",
"ca_key": "/etc/pipelock-ca-key.pem",
},
cfg["tls_interception"],
)
def test_tls_interception_requires_both_paths(self):
# Half-set is a programmer error, not a silent omission.
with self.assertRaises(ValueError):
pipelock_build_config(
fixture_minimal().bottles["dev"],
ca_cert_path="/etc/pipelock-ca.pem",
)
class TestRenderAndWrite(unittest.TestCase):
def setUp(self):
@@ -101,6 +129,21 @@ class TestRenderAndWrite(unittest.TestCase):
self.assertNotIn("MY_SECRET", content)
self.assertNotIn("prompt-message", content)
def test_render_emits_tls_interception_via_prepare(self):
"""`DockerPipelockProxy.prepare` plumbs its in-container CA
constants through to the YAML. The block should land in the
rendered output with `enabled: true` and the configured paths.
The actual host-side CA generation happens in launch (not
prepare), so this test exercises only the YAML rendering."""
plan = DockerPipelockProxy().prepare(
fixture_minimal().bottles["dev"], "demo", self.out_dir
)
content = plan.yaml_path.read_text()
self.assertIn("tls_interception:", content)
self.assertIn("enabled: true", content)
self.assertIn('ca_cert: "/etc/pipelock-ca.pem"', content)
self.assertIn('ca_key: "/etc/pipelock-ca-key.pem"', content)
if __name__ == "__main__":
unittest.main()