feat(pipelock): allow route tls passthrough policy
test / unit (pull_request) Successful in 37s
test / integration (pull_request) Successful in 58s

This commit is contained in:
2026-05-28 19:19:40 -04:00
parent 3299674c30
commit bcadc07d09
11 changed files with 164 additions and 78 deletions
+4
View File
@@ -314,6 +314,8 @@ egress:
auth: auth:
scheme: Bearer scheme: Bearer
token_ref: BOT_BOTTLE_CLAUDE_OAUTH_TOKEN token_ref: BOT_BOTTLE_CLAUDE_OAUTH_TOKEN
pipelock:
tls_passthrough: true
--- ---
Common Claude provider boundary. Common Claude provider boundary.
@@ -429,6 +431,8 @@ egress:
auth: auth:
scheme: Bearer scheme: Bearer
token_ref: BOT_BOTTLE_CLAUDE_OAUTH_TOKEN token_ref: BOT_BOTTLE_CLAUDE_OAUTH_TOKEN
pipelock:
tls_passthrough: true
``` ```
At launch, `cli.py` reads `BOT_BOTTLE_CLAUDE_OAUTH_TOKEN` from the host At launch, `cli.py` reads `BOT_BOTTLE_CLAUDE_OAUTH_TOKEN` from the host
+1 -1
View File
@@ -14,7 +14,7 @@
# combined trust bundle (system roots + pipelock CA) and point # combined trust bundle (system roots + pipelock CA) and point
# mitmproxy at it. The option REPLACES mitmproxy's default # mitmproxy at it. The option REPLACES mitmproxy's default
# trust store, so passing pipelock's CA alone would break # trust store, so passing pipelock's CA alone would break
# pipelock-passthrough hosts (api.anthropic.com etc.). # route-configured pipelock passthrough hosts.
# * `-s /app/egress_addon.py` loads the addon that reads # * `-s /app/egress_addon.py` loads the addon that reads
# /etc/egress/routes.yaml. # /etc/egress/routes.yaml.
+45 -2
View File
@@ -18,6 +18,8 @@ Bottle schema (frontmatter):
user: { name: <str>, email: <str> } # optional user: { name: <str>, email: <str> } # optional
remotes: { <host>: <git-entry>, ... } # optional remotes: { <host>: <git-entry>, ... } # optional
egress: { routes: [ <egress-route>, ... ] } egress: { routes: [ <egress-route>, ... ] }
# route keys: host, path_allowlist, auth, role, pipelock
# pipelock: { tls_passthrough: <bool> }
supervise: <bool> # optional supervise: <bool> # optional
Agent schema (frontmatter): Agent schema (frontmatter):
@@ -319,6 +321,39 @@ def _parse_git_config(
return git, git_user return git, git_user
@dataclass(frozen=True)
class PipelockRoutePolicy:
"""Per-route pipelock policy overrides.
`TlsPassthrough` adds the route host to pipelock's
`tls_interception.passthrough_domains`, so pipelock still enforces
the hostname allowlist but does not MITM/decrypt request bodies or
headers for that host.
"""
TlsPassthrough: bool = False
@classmethod
def from_dict(
cls, bottle_name: str, idx: int, raw: object,
) -> "PipelockRoutePolicy":
label = f"bottle '{bottle_name}' egress.routes[{idx}] pipelock"
d = _as_json_object(raw, label)
for k in d:
if k not in ("tls_passthrough",):
die(
f"{label} has unknown key {k!r}; "
f"only 'tls_passthrough' is accepted"
)
tls_passthrough_raw = d.get("tls_passthrough", False)
if not isinstance(tls_passthrough_raw, bool):
die(
f"{label}.tls_passthrough must be a boolean "
f"(was {type(tls_passthrough_raw).__name__})"
)
return cls(TlsPassthrough=tls_passthrough_raw)
@dataclass(frozen=True) @dataclass(frozen=True)
class EgressRoute: class EgressRoute:
"""One route on the per-bottle egress sidecar (PRD 0017). """One route on the per-bottle egress sidecar (PRD 0017).
@@ -355,6 +390,7 @@ class EgressRoute:
AuthScheme: str = "" AuthScheme: str = ""
TokenRef: str = "" TokenRef: str = ""
Role: tuple[str, ...] = () Role: tuple[str, ...] = ()
Pipelock: PipelockRoutePolicy = field(default_factory=PipelockRoutePolicy)
@classmethod @classmethod
def from_dict(cls, bottle_name: str, idx: int, raw: object) -> "EgressRoute": def from_dict(cls, bottle_name: str, idx: int, raw: object) -> "EgressRoute":
@@ -451,11 +487,17 @@ class EgressRoute:
f"{', '.join(sorted(EGRESS_ROLES))}" f"{', '.join(sorted(EGRESS_ROLES))}"
) )
pipelock = (
PipelockRoutePolicy.from_dict(bottle_name, idx, d["pipelock"])
if "pipelock" in d
else PipelockRoutePolicy()
)
for k in d: for k in d:
if k not in ("host", "path_allowlist", "auth", "role"): if k not in ("host", "path_allowlist", "auth", "role", "pipelock"):
die( die(
f"{label} has unknown key {k!r}; accepted keys are " f"{label} has unknown key {k!r}; accepted keys are "
f"'host', 'path_allowlist', 'auth', 'role'" f"'host', 'path_allowlist', 'auth', 'role', 'pipelock'"
) )
return cls( return cls(
@@ -464,6 +506,7 @@ class EgressRoute:
AuthScheme=auth_scheme, AuthScheme=auth_scheme,
TokenRef=token_ref, TokenRef=token_ref,
Role=roles, Role=roles,
Pipelock=pipelock,
) )
+16 -32
View File
@@ -26,20 +26,10 @@ from .supervise import SUPERVISE_HOSTNAME
from .manifest import Bottle from .manifest import Bottle
# Hosts pipelock should NOT TLS-MITM, even when tls_interception is # Hosts pipelock should NOT TLS-MITM, even when tls_interception is
# enabled. The Claude API endpoint is an LLM provider — its request # enabled. This is now route-owned manifest policy via
# bodies are user-authored conversation text that legitimately can # `egress.routes[].pipelock.tls_passthrough`; no provider hosts are
# trigger DLP scanners (notably the BIP-39 seed-phrase detector, which # injected implicitly.
# fires on any 12+ consecutive English words that happen to be on the DEFAULT_TLS_PASSTHROUGH: tuple[str, ...] = ()
# BIP-39 wordlist and pass the checksum). Per pipelock's own
# configuration.md, the recommended treatment for LLM API endpoints is
# `passthrough_domains`: pipelock still proxies the CONNECT (so the
# api_allowlist gate applies), but it does not generate a leaf cert or
# decrypt the body. Body scanning happens on hosts that aren't
# passthrough'd, so DLP protection against agent exfil to other
# allowlisted hosts is unchanged.
DEFAULT_TLS_PASSTHROUGH: tuple[str, ...] = (
"api.anthropic.com",
)
# In-container paths the rendered pipelock YAML references under # In-container paths the rendered pipelock YAML references under
@@ -109,25 +99,19 @@ def pipelock_seed_phrase_detection_enabled(bottle: Bottle) -> bool:
def pipelock_effective_tls_passthrough(bottle: Bottle) -> list[str]: def pipelock_effective_tls_passthrough(bottle: Bottle) -> list[str]:
"""Hostnames pipelock should pass through (no TLS MITM, no body """Hostnames pipelock should pass through (no TLS MITM).
scan). Default carries the LLM API endpoint its request bodies
are user-authored conversation text that legitimately trips DLP
scanners (notably pipelock's BIP-39 seed-phrase detector). Every
other allowlisted host is MITM'd by pipelock's per-bottle CA so
its body scanner sees the cleartext.
egress route hosts (github, gitea, npm) are deliberately A route opts in with `pipelock.tls_passthrough: true`. This is
NOT auto-added here. egress's HTTPS client trusts pipelock's useful for provider API routes where egress injects the
CA at runtime (folded into its trust store via docker cp), so Authorization header after the agent boundary; pipelock still
pipelock MITMs and body-scans the egress upstream leg the enforces the host allowlist but does not decrypt and scan that
same way it body-scanned the agent's direct HTTPS traffic before provider request.
the PRD 0017 cutover. """
seen: dict[str, None] = {host: None for host in DEFAULT_TLS_PASSTHROUGH}
`bottle` is kept on the signature for forward-compat (a future for route in bottle.egress.routes:
knob might let a manifest opt a host into passthrough); today if route.Pipelock.TlsPassthrough:
the returned list is independent of the bottle.""" seen.setdefault(route.Host, None)
del bottle # not consulted; see docstring. return sorted(seen.keys())
return sorted(DEFAULT_TLS_PASSTHROUGH)
+2
View File
@@ -9,6 +9,8 @@ egress:
auth: auth:
scheme: Bearer scheme: Bearer
token_ref: BOT_BOTTLE_CLAUDE_OAUTH_TOKEN token_ref: BOT_BOTTLE_CLAUDE_OAUTH_TOKEN
pipelock:
tls_passthrough: true
--- ---
Common Claude provider boundary. Drop this file into Common Claude provider boundary. Drop this file into
@@ -12,12 +12,11 @@ pipelock's per-bottle CA so curl trusts pipelock's bumped leaf, and
pipelock sees the decrypted body and returns its known pipelock sees the decrypted body and returns its known
`blocked: request body contains secret: <pattern>` 403. `blocked: request body contains secret: <pattern>` 403.
The host has to be allowlisted (so the CONNECT is accepted) but NOT The host has to be allowlisted (so the CONNECT is accepted) but must
in `tls_interception.passthrough_domains` (so the body actually gets not opt into `pipelock.tls_passthrough` (so the body actually gets
scanned). `api.anthropic.com` is passthrough'd to skip MITM on the scanned). This probe targets `raw.githubusercontent.com`, which is on
LLM endpoint, so this probe targets `raw.githubusercontent.com` the baked allowlist and intercepted+scanned like any non-passthrough
also on the baked allowlist (Claude Code fetches release assets from host."""
it) and intercepted+scanned like any non-passthrough host."""
from __future__ import annotations from __future__ import annotations
@@ -1,17 +1,14 @@
"""Integration: pipelock's `tls_interception.passthrough_domains` """Integration: route-owned `pipelock.tls_passthrough` renders into
exempts api.anthropic.com from MITM, so request bodies that would pipelock's `tls_interception.passthrough_domains`, so request bodies
otherwise trip the body-scan layer (notably the BIP-39 seed-phrase that would otherwise trip the body-scan layer are not inspected and the
detector firing on user-authored Claude conversation text) are not request reaches the provider TLS endpoint.
inspected and the request reaches Anthropic's TLS endpoint.
Probe: POST the canonical zero-entropy 12-word BIP-39 mnemonic Probe: POST the canonical zero-entropy 12-word BIP-39 mnemonic
(`abandon` × 11 + `about`) checksum-valid by construction to (`abandon` × 11 + `about`) checksum-valid by construction to
`https://api.anthropic.com/v1/messages`. Without the passthrough, `https://api.anthropic.com/v1/messages`. With the route policy,
pipelock returns a 403 `blocked: request body contains secret: pipelock relays the CONNECT opaquely and the upstream replies with
BIP-39 Seed Phrase`. With it, pipelock relays the CONNECT opaquely whatever it likes (401/4xx from Anthropic for an unauthenticated junk
and the upstream replies with whatever it likes (401/4xx from POST). We assert that the verdict is NOT pipelock's block.
Anthropic for an unauthenticated junk POST). We assert that the
verdict is NOT pipelock's block.
""" """
from __future__ import annotations from __future__ import annotations
@@ -46,7 +43,13 @@ class TestPipelockLlmPassthrough(unittest.TestCase):
def test_bip39_body_to_anthropic_is_not_blocked(self): def test_bip39_body_to_anthropic_is_not_blocked(self):
manifest = Manifest.from_json_obj({ manifest = Manifest.from_json_obj({
"bottles": { "bottles": {
"dev": {"env": {"SEED": _BIP39_PHRASE}}, "dev": {
"env": {"SEED": _BIP39_PHRASE},
"egress": {"routes": [{
"host": "api.anthropic.com",
"pipelock": {"tls_passthrough": True},
}]},
},
}, },
"agents": { "agents": {
"demo": {"skills": [], "prompt": "", "bottle": "dev"}, "demo": {"skills": [], "prompt": "", "bottle": "dev"},
+4 -9
View File
@@ -310,15 +310,10 @@ class TestSandboxEscape(unittest.TestCase):
remediation lands as its own PRD before this test merges. remediation lands as its own PRD before this test merges.
DON'T mark expectedFailure to silence it. DON'T mark expectedFailure to silence it.
Destination note: we use `raw.githubusercontent.com` (one Destination note: we use `raw.githubusercontent.com`, one
of the DEFAULT_ALLOWLIST hosts) rather than of the DEFAULT_ALLOWLIST hosts. It is not route-configured
api.anthropic.com because pipelock passthrough's the for pipelock TLS passthrough, so pipelock MITMs it and the
Anthropic API endpoint specifically its DLP scanners DLP scan applies, which is what this attack exercises."""
false-positive on real LLM conversation bodies (BIP-39
seed phrases, etc.). That trade-off is documented in
`pipelock.DEFAULT_TLS_PASSTHROUGH`. For non-passthrough
hosts pipelock MITMs and the DLP scan applies, which is
what this attack exercises."""
# Capture HTTP code via curl's -w; don't use --fail so # Capture HTTP code via curl's -w; don't use --fail so
# we get the response body even on 4xx. # we get the response body even on 4xx.
url_base = "https://raw.githubusercontent.com" url_base = "https://raw.githubusercontent.com"
+28
View File
@@ -215,6 +215,34 @@ class TestRole(unittest.TestCase):
}]) }])
class TestPipelockPolicy(unittest.TestCase):
def test_tls_passthrough_route_policy(self):
b = _bottle([{
"host": "api.openai.com",
"pipelock": {"tls_passthrough": True},
}])
self.assertTrue(b.egress.routes[0].Pipelock.TlsPassthrough)
def test_tls_passthrough_defaults_false(self):
b = _bottle([{"host": "api.openai.com"}])
self.assertFalse(b.egress.routes[0].Pipelock.TlsPassthrough)
def test_pipelock_policy_must_be_object(self):
with self.assertRaises(Die):
_bottle([{"host": "x.example", "pipelock": True}])
def test_tls_passthrough_must_be_bool(self):
with self.assertRaises(Die):
_bottle([{
"host": "x.example",
"pipelock": {"tls_passthrough": "yes"},
}])
def test_unknown_pipelock_key_rejected(self):
with self.assertRaises(Die):
_bottle([{"host": "x.example", "pipelock": {"wat": True}}])
class TestRouteValidation(unittest.TestCase): class TestRouteValidation(unittest.TestCase):
def test_duplicate_hosts_rejected(self): def test_duplicate_hosts_rejected(self):
# Routes match by exact host; duplicates leave the choice # Routes match by exact host; duplicates leave the choice
+14 -4
View File
@@ -89,18 +89,28 @@ class TestAllowlistWithRoutes(unittest.TestCase):
class TestTlsPassthrough(unittest.TestCase): class TestTlsPassthrough(unittest.TestCase):
def test_default_includes_api_anthropic(self): def test_default_empty(self):
passthrough = pipelock_effective_tls_passthrough(_bottle({})) passthrough = pipelock_effective_tls_passthrough(_bottle({}))
self.assertEqual(["api.anthropic.com"], passthrough) self.assertEqual([], passthrough)
def test_route_hosts_NOT_added_to_passthrough(self): def test_route_hosts_not_added_to_passthrough_by_default(self):
passthrough = pipelock_effective_tls_passthrough(_bottle(_routes([ passthrough = pipelock_effective_tls_passthrough(_bottle(_routes([
{"host": "api.github.com", {"host": "api.github.com",
"auth": {"scheme": "Bearer", "token_ref": "G"}}, "auth": {"scheme": "Bearer", "token_ref": "G"}},
{"host": "registry.npmjs.org", {"host": "registry.npmjs.org",
"auth": {"scheme": "Bearer", "token_ref": "N"}}, "auth": {"scheme": "Bearer", "token_ref": "N"}},
]))) ])))
self.assertEqual(["api.anthropic.com"], passthrough) self.assertEqual([], passthrough)
def test_route_policy_adds_tls_passthrough(self):
passthrough = pipelock_effective_tls_passthrough(_bottle(_routes([
{"host": "api.openai.com",
"auth": {"scheme": "Bearer", "token_ref": "O"},
"pipelock": {"tls_passthrough": True}},
{"host": "api.github.com",
"auth": {"scheme": "Bearer", "token_ref": "G"}},
])))
self.assertEqual(["api.openai.com"], passthrough)
if __name__ == "__main__": if __name__ == "__main__":
+30 -12
View File
@@ -54,11 +54,7 @@ class TestBuildConfig(unittest.TestCase):
def test_tls_interception_block_emitted_when_paths_supplied(self): def test_tls_interception_block_emitted_when_paths_supplied(self):
# PRD 0006: paths flow in via the platform-neutral in-container # PRD 0006: paths flow in via the platform-neutral in-container
# constants; this directly pins the dict shape. passthrough_domains # constants; this directly pins the dict shape.
# is baked in so LLM provider endpoints (api.anthropic.com) skip
# MITM — pipelock's docs explicitly recommend this for LLM hosts,
# and without it the BIP-39 body scanner false-positives on
# Claude conversation traffic.
cfg = pipelock_build_config( cfg = pipelock_build_config(
fixture_minimal().bottles["dev"], fixture_minimal().bottles["dev"],
ca_cert_path="/etc/pipelock-ca.pem", ca_cert_path="/etc/pipelock-ca.pem",
@@ -69,11 +65,28 @@ class TestBuildConfig(unittest.TestCase):
"enabled": True, "enabled": True,
"ca_cert": "/etc/pipelock-ca.pem", "ca_cert": "/etc/pipelock-ca.pem",
"ca_key": "/etc/pipelock-ca-key.pem", "ca_key": "/etc/pipelock-ca-key.pem",
"passthrough_domains": list(DEFAULT_TLS_PASSTHROUGH), "passthrough_domains": [],
}, },
cfg["tls_interception"], cfg["tls_interception"],
) )
self.assertIn("api.anthropic.com", DEFAULT_TLS_PASSTHROUGH) self.assertEqual((), DEFAULT_TLS_PASSTHROUGH)
def test_tls_passthrough_route_policy_emits_domain(self):
bottle = Manifest.from_json_obj({
"bottles": {"dev": {"egress": {"routes": [
{"host": "api.openai.com",
"auth": {"scheme": "Bearer", "token_ref": "T"},
"pipelock": {"tls_passthrough": True}},
]}}},
"agents": {"demo": {"skills": [], "prompt": "", "bottle": "dev"}},
}).bottles["dev"]
cfg = pipelock_build_config(
bottle,
ca_cert_path="/etc/pipelock-ca.pem",
ca_key_path="/etc/pipelock-ca-key.pem",
)
tls = cast(dict[str, object], cfg["tls_interception"])
self.assertEqual(["api.openai.com"], tls["passthrough_domains"])
def test_tls_interception_requires_both_paths(self): def test_tls_interception_requires_both_paths(self):
# Half-set is a programmer error, not a silent omission. # Half-set is a programmer error, not a silent omission.
@@ -179,19 +192,24 @@ class TestRenderAndWrite(unittest.TestCase):
"""`PipelockProxy.prepare` plumbs the module-level in-container """`PipelockProxy.prepare` plumbs the module-level in-container
CA constants through to the YAML. The block should land in the CA constants through to the YAML. The block should land in the
rendered output with `enabled: true`, the configured paths, rendered output with `enabled: true`, the configured paths,
and the baked LLM-provider passthrough list. The actual and any route-owned passthrough domains. The actual
host-side CA generation happens in launch (not prepare), so host-side CA generation happens in launch (not prepare), so
this test exercises only the YAML rendering.""" this test exercises only the YAML rendering."""
plan = PipelockProxy().prepare( bottle = Manifest.from_json_obj({
fixture_minimal().bottles["dev"], "demo", self.out_dir "bottles": {"dev": {"egress": {"routes": [
) {"host": "api.openai.com",
"pipelock": {"tls_passthrough": True}},
]}}},
"agents": {"demo": {"skills": [], "prompt": "", "bottle": "dev"}},
}).bottles["dev"]
plan = PipelockProxy().prepare(bottle, "demo", self.out_dir)
content = plan.yaml_path.read_text() content = plan.yaml_path.read_text()
self.assertIn("tls_interception:", content) self.assertIn("tls_interception:", content)
self.assertIn("enabled: true", content) self.assertIn("enabled: true", content)
self.assertIn('ca_cert: "/etc/pipelock-ca.pem"', content) self.assertIn('ca_cert: "/etc/pipelock-ca.pem"', content)
self.assertIn('ca_key: "/etc/pipelock-ca-key.pem"', content) self.assertIn('ca_key: "/etc/pipelock-ca-key.pem"', content)
self.assertIn("passthrough_domains:", content) self.assertIn("passthrough_domains:", content)
self.assertIn('- "api.anthropic.com"', content) self.assertIn('- "api.openai.com"', content)
def test_render_emits_ssrf_block_when_allowlist_given(self): def test_render_emits_ssrf_block_when_allowlist_given(self):
cfg = pipelock_build_config( cfg = pipelock_build_config(