diff --git a/README.md b/README.md index 030ac6b..07280eb 100644 --- a/README.md +++ b/README.md @@ -314,6 +314,8 @@ egress: auth: scheme: Bearer token_ref: BOT_BOTTLE_CLAUDE_OAUTH_TOKEN + pipelock: + tls_passthrough: true --- Common Claude provider boundary. @@ -429,6 +431,8 @@ egress: auth: scheme: Bearer token_ref: BOT_BOTTLE_CLAUDE_OAUTH_TOKEN + pipelock: + tls_passthrough: true ``` At launch, `cli.py` reads `BOT_BOTTLE_CLAUDE_OAUTH_TOKEN` from the host diff --git a/bot_bottle/egress_entrypoint.sh b/bot_bottle/egress_entrypoint.sh index 90e8a03..c56de23 100644 --- a/bot_bottle/egress_entrypoint.sh +++ b/bot_bottle/egress_entrypoint.sh @@ -14,7 +14,7 @@ # combined trust bundle (system roots + pipelock CA) and point # mitmproxy at it. The option REPLACES mitmproxy's default # trust store, so passing pipelock's CA alone would break -# pipelock-passthrough hosts (api.anthropic.com etc.). +# route-configured pipelock passthrough hosts. # * `-s /app/egress_addon.py` loads the addon that reads # /etc/egress/routes.yaml. diff --git a/bot_bottle/manifest.py b/bot_bottle/manifest.py index 619d13e..43a05b3 100644 --- a/bot_bottle/manifest.py +++ b/bot_bottle/manifest.py @@ -18,6 +18,8 @@ Bottle schema (frontmatter): user: { name: , email: } # optional remotes: { : , ... } # optional egress: { routes: [ , ... ] } + # route keys: host, path_allowlist, auth, role, pipelock + # pipelock: { tls_passthrough: } supervise: # optional Agent schema (frontmatter): @@ -319,6 +321,39 @@ def _parse_git_config( return git, git_user +@dataclass(frozen=True) +class PipelockRoutePolicy: + """Per-route pipelock policy overrides. + + `TlsPassthrough` adds the route host to pipelock's + `tls_interception.passthrough_domains`, so pipelock still enforces + the hostname allowlist but does not MITM/decrypt request bodies or + headers for that host. + """ + + TlsPassthrough: bool = False + + @classmethod + def from_dict( + cls, bottle_name: str, idx: int, raw: object, + ) -> "PipelockRoutePolicy": + label = f"bottle '{bottle_name}' egress.routes[{idx}] pipelock" + d = _as_json_object(raw, label) + for k in d: + if k not in ("tls_passthrough",): + die( + f"{label} has unknown key {k!r}; " + f"only 'tls_passthrough' is accepted" + ) + tls_passthrough_raw = d.get("tls_passthrough", False) + if not isinstance(tls_passthrough_raw, bool): + die( + f"{label}.tls_passthrough must be a boolean " + f"(was {type(tls_passthrough_raw).__name__})" + ) + return cls(TlsPassthrough=tls_passthrough_raw) + + @dataclass(frozen=True) class EgressRoute: """One route on the per-bottle egress sidecar (PRD 0017). @@ -355,6 +390,7 @@ class EgressRoute: AuthScheme: str = "" TokenRef: str = "" Role: tuple[str, ...] = () + Pipelock: PipelockRoutePolicy = field(default_factory=PipelockRoutePolicy) @classmethod def from_dict(cls, bottle_name: str, idx: int, raw: object) -> "EgressRoute": @@ -451,11 +487,17 @@ class EgressRoute: f"{', '.join(sorted(EGRESS_ROLES))}" ) + pipelock = ( + PipelockRoutePolicy.from_dict(bottle_name, idx, d["pipelock"]) + if "pipelock" in d + else PipelockRoutePolicy() + ) + for k in d: - if k not in ("host", "path_allowlist", "auth", "role"): + if k not in ("host", "path_allowlist", "auth", "role", "pipelock"): die( f"{label} has unknown key {k!r}; accepted keys are " - f"'host', 'path_allowlist', 'auth', 'role'" + f"'host', 'path_allowlist', 'auth', 'role', 'pipelock'" ) return cls( @@ -464,6 +506,7 @@ class EgressRoute: AuthScheme=auth_scheme, TokenRef=token_ref, Role=roles, + Pipelock=pipelock, ) diff --git a/bot_bottle/pipelock.py b/bot_bottle/pipelock.py index 59fdcd3..6d137fb 100644 --- a/bot_bottle/pipelock.py +++ b/bot_bottle/pipelock.py @@ -26,20 +26,10 @@ from .supervise import SUPERVISE_HOSTNAME from .manifest import Bottle # Hosts pipelock should NOT TLS-MITM, even when tls_interception is -# enabled. The Claude API endpoint is an LLM provider — its request -# bodies are user-authored conversation text that legitimately can -# trigger DLP scanners (notably the BIP-39 seed-phrase detector, which -# fires on any 12+ consecutive English words that happen to be on the -# BIP-39 wordlist and pass the checksum). Per pipelock's own -# configuration.md, the recommended treatment for LLM API endpoints is -# `passthrough_domains`: pipelock still proxies the CONNECT (so the -# api_allowlist gate applies), but it does not generate a leaf cert or -# decrypt the body. Body scanning happens on hosts that aren't -# passthrough'd, so DLP protection against agent exfil to other -# allowlisted hosts is unchanged. -DEFAULT_TLS_PASSTHROUGH: tuple[str, ...] = ( - "api.anthropic.com", -) +# enabled. This is now route-owned manifest policy via +# `egress.routes[].pipelock.tls_passthrough`; no provider hosts are +# injected implicitly. +DEFAULT_TLS_PASSTHROUGH: tuple[str, ...] = () # In-container paths the rendered pipelock YAML references under @@ -109,25 +99,19 @@ def pipelock_seed_phrase_detection_enabled(bottle: Bottle) -> bool: def pipelock_effective_tls_passthrough(bottle: Bottle) -> list[str]: - """Hostnames pipelock should pass through (no TLS MITM, no body - scan). Default carries the LLM API endpoint — its request bodies - are user-authored conversation text that legitimately trips DLP - scanners (notably pipelock's BIP-39 seed-phrase detector). Every - other allowlisted host is MITM'd by pipelock's per-bottle CA so - its body scanner sees the cleartext. + """Hostnames pipelock should pass through (no TLS MITM). - egress route hosts (github, gitea, npm) are deliberately - NOT auto-added here. egress's HTTPS client trusts pipelock's - CA at runtime (folded into its trust store via docker cp), so - pipelock MITMs and body-scans the egress → upstream leg the - same way it body-scanned the agent's direct HTTPS traffic before - the PRD 0017 cutover. - - `bottle` is kept on the signature for forward-compat (a future - knob might let a manifest opt a host into passthrough); today - the returned list is independent of the bottle.""" - del bottle # not consulted; see docstring. - return sorted(DEFAULT_TLS_PASSTHROUGH) + A route opts in with `pipelock.tls_passthrough: true`. This is + useful for provider API routes where egress injects the + Authorization header after the agent boundary; pipelock still + enforces the host allowlist but does not decrypt and scan that + provider request. + """ + seen: dict[str, None] = {host: None for host in DEFAULT_TLS_PASSTHROUGH} + for route in bottle.egress.routes: + if route.Pipelock.TlsPassthrough: + seen.setdefault(route.Host, None) + return sorted(seen.keys()) diff --git a/examples/bottles/claude.md b/examples/bottles/claude.md index a47037a..766dfc5 100644 --- a/examples/bottles/claude.md +++ b/examples/bottles/claude.md @@ -9,6 +9,8 @@ egress: auth: scheme: Bearer token_ref: BOT_BOTTLE_CLAUDE_OAUTH_TOKEN + pipelock: + tls_passthrough: true --- Common Claude provider boundary. Drop this file into diff --git a/tests/integration/test_pipelock_blocks_secret_https_post.py b/tests/integration/test_pipelock_blocks_secret_https_post.py index dee7e6e..b1d1320 100644 --- a/tests/integration/test_pipelock_blocks_secret_https_post.py +++ b/tests/integration/test_pipelock_blocks_secret_https_post.py @@ -12,12 +12,11 @@ pipelock's per-bottle CA so curl trusts pipelock's bumped leaf, and pipelock sees the decrypted body and returns its known `blocked: request body contains secret: ` 403. -The host has to be allowlisted (so the CONNECT is accepted) but NOT -in `tls_interception.passthrough_domains` (so the body actually gets -scanned). `api.anthropic.com` is passthrough'd to skip MITM on the -LLM endpoint, so this probe targets `raw.githubusercontent.com` — -also on the baked allowlist (Claude Code fetches release assets from -it) and intercepted+scanned like any non-passthrough host.""" +The host has to be allowlisted (so the CONNECT is accepted) but must +not opt into `pipelock.tls_passthrough` (so the body actually gets +scanned). This probe targets `raw.githubusercontent.com`, which is on +the baked allowlist and intercepted+scanned like any non-passthrough +host.""" from __future__ import annotations diff --git a/tests/integration/test_pipelock_llm_passthrough.py b/tests/integration/test_pipelock_llm_passthrough.py index f243fe3..f2b008d 100644 --- a/tests/integration/test_pipelock_llm_passthrough.py +++ b/tests/integration/test_pipelock_llm_passthrough.py @@ -1,17 +1,14 @@ -"""Integration: pipelock's `tls_interception.passthrough_domains` -exempts api.anthropic.com from MITM, so request bodies that would -otherwise trip the body-scan layer (notably the BIP-39 seed-phrase -detector firing on user-authored Claude conversation text) are not -inspected and the request reaches Anthropic's TLS endpoint. +"""Integration: route-owned `pipelock.tls_passthrough` renders into +pipelock's `tls_interception.passthrough_domains`, so request bodies +that would otherwise trip the body-scan layer are not inspected and the +request reaches the provider TLS endpoint. Probe: POST the canonical zero-entropy 12-word BIP-39 mnemonic (`abandon` × 11 + `about`) — checksum-valid by construction — to -`https://api.anthropic.com/v1/messages`. Without the passthrough, -pipelock returns a 403 `blocked: request body contains secret: -BIP-39 Seed Phrase`. With it, pipelock relays the CONNECT opaquely -and the upstream replies with whatever it likes (401/4xx from -Anthropic for an unauthenticated junk POST). We assert that the -verdict is NOT pipelock's block. +`https://api.anthropic.com/v1/messages`. With the route policy, +pipelock relays the CONNECT opaquely and the upstream replies with +whatever it likes (401/4xx from Anthropic for an unauthenticated junk +POST). We assert that the verdict is NOT pipelock's block. """ from __future__ import annotations @@ -46,7 +43,13 @@ class TestPipelockLlmPassthrough(unittest.TestCase): def test_bip39_body_to_anthropic_is_not_blocked(self): manifest = Manifest.from_json_obj({ "bottles": { - "dev": {"env": {"SEED": _BIP39_PHRASE}}, + "dev": { + "env": {"SEED": _BIP39_PHRASE}, + "egress": {"routes": [{ + "host": "api.anthropic.com", + "pipelock": {"tls_passthrough": True}, + }]}, + }, }, "agents": { "demo": {"skills": [], "prompt": "", "bottle": "dev"}, diff --git a/tests/integration/test_sandbox_escape.py b/tests/integration/test_sandbox_escape.py index c9fcad2..9bf75a0 100644 --- a/tests/integration/test_sandbox_escape.py +++ b/tests/integration/test_sandbox_escape.py @@ -310,15 +310,10 @@ class TestSandboxEscape(unittest.TestCase): remediation lands as its own PRD before this test merges. DON'T mark expectedFailure to silence it. - Destination note: we use `raw.githubusercontent.com` (one - of the DEFAULT_ALLOWLIST hosts) rather than - api.anthropic.com because pipelock passthrough's the - Anthropic API endpoint specifically — its DLP scanners - false-positive on real LLM conversation bodies (BIP-39 - seed phrases, etc.). That trade-off is documented in - `pipelock.DEFAULT_TLS_PASSTHROUGH`. For non-passthrough - hosts pipelock MITMs and the DLP scan applies, which is - what this attack exercises.""" + Destination note: we use `raw.githubusercontent.com`, one + of the DEFAULT_ALLOWLIST hosts. It is not route-configured + for pipelock TLS passthrough, so pipelock MITMs it and the + DLP scan applies, which is what this attack exercises.""" # Capture HTTP code via curl's -w; don't use --fail so # we get the response body even on 4xx. url_base = "https://raw.githubusercontent.com" diff --git a/tests/unit/test_manifest_egress.py b/tests/unit/test_manifest_egress.py index edf14dc..43f89ef 100644 --- a/tests/unit/test_manifest_egress.py +++ b/tests/unit/test_manifest_egress.py @@ -215,6 +215,34 @@ class TestRole(unittest.TestCase): }]) +class TestPipelockPolicy(unittest.TestCase): + def test_tls_passthrough_route_policy(self): + b = _bottle([{ + "host": "api.openai.com", + "pipelock": {"tls_passthrough": True}, + }]) + self.assertTrue(b.egress.routes[0].Pipelock.TlsPassthrough) + + def test_tls_passthrough_defaults_false(self): + b = _bottle([{"host": "api.openai.com"}]) + self.assertFalse(b.egress.routes[0].Pipelock.TlsPassthrough) + + def test_pipelock_policy_must_be_object(self): + with self.assertRaises(Die): + _bottle([{"host": "x.example", "pipelock": True}]) + + def test_tls_passthrough_must_be_bool(self): + with self.assertRaises(Die): + _bottle([{ + "host": "x.example", + "pipelock": {"tls_passthrough": "yes"}, + }]) + + def test_unknown_pipelock_key_rejected(self): + with self.assertRaises(Die): + _bottle([{"host": "x.example", "pipelock": {"wat": True}}]) + + class TestRouteValidation(unittest.TestCase): def test_duplicate_hosts_rejected(self): # Routes match by exact host; duplicates leave the choice diff --git a/tests/unit/test_pipelock_allowlist.py b/tests/unit/test_pipelock_allowlist.py index c28418b..5a85b0c 100644 --- a/tests/unit/test_pipelock_allowlist.py +++ b/tests/unit/test_pipelock_allowlist.py @@ -89,18 +89,28 @@ class TestAllowlistWithRoutes(unittest.TestCase): class TestTlsPassthrough(unittest.TestCase): - def test_default_includes_api_anthropic(self): + def test_default_empty(self): passthrough = pipelock_effective_tls_passthrough(_bottle({})) - self.assertEqual(["api.anthropic.com"], passthrough) + self.assertEqual([], passthrough) - def test_route_hosts_NOT_added_to_passthrough(self): + def test_route_hosts_not_added_to_passthrough_by_default(self): passthrough = pipelock_effective_tls_passthrough(_bottle(_routes([ {"host": "api.github.com", "auth": {"scheme": "Bearer", "token_ref": "G"}}, {"host": "registry.npmjs.org", "auth": {"scheme": "Bearer", "token_ref": "N"}}, ]))) - self.assertEqual(["api.anthropic.com"], passthrough) + self.assertEqual([], passthrough) + + def test_route_policy_adds_tls_passthrough(self): + passthrough = pipelock_effective_tls_passthrough(_bottle(_routes([ + {"host": "api.openai.com", + "auth": {"scheme": "Bearer", "token_ref": "O"}, + "pipelock": {"tls_passthrough": True}}, + {"host": "api.github.com", + "auth": {"scheme": "Bearer", "token_ref": "G"}}, + ]))) + self.assertEqual(["api.openai.com"], passthrough) if __name__ == "__main__": diff --git a/tests/unit/test_pipelock_yaml.py b/tests/unit/test_pipelock_yaml.py index 7461234..913826d 100644 --- a/tests/unit/test_pipelock_yaml.py +++ b/tests/unit/test_pipelock_yaml.py @@ -54,11 +54,7 @@ class TestBuildConfig(unittest.TestCase): def test_tls_interception_block_emitted_when_paths_supplied(self): # PRD 0006: paths flow in via the platform-neutral in-container - # constants; this directly pins the dict shape. passthrough_domains - # is baked in so LLM provider endpoints (api.anthropic.com) skip - # MITM — pipelock's docs explicitly recommend this for LLM hosts, - # and without it the BIP-39 body scanner false-positives on - # Claude conversation traffic. + # constants; this directly pins the dict shape. cfg = pipelock_build_config( fixture_minimal().bottles["dev"], ca_cert_path="/etc/pipelock-ca.pem", @@ -69,11 +65,28 @@ class TestBuildConfig(unittest.TestCase): "enabled": True, "ca_cert": "/etc/pipelock-ca.pem", "ca_key": "/etc/pipelock-ca-key.pem", - "passthrough_domains": list(DEFAULT_TLS_PASSTHROUGH), + "passthrough_domains": [], }, cfg["tls_interception"], ) - self.assertIn("api.anthropic.com", DEFAULT_TLS_PASSTHROUGH) + self.assertEqual((), DEFAULT_TLS_PASSTHROUGH) + + def test_tls_passthrough_route_policy_emits_domain(self): + bottle = Manifest.from_json_obj({ + "bottles": {"dev": {"egress": {"routes": [ + {"host": "api.openai.com", + "auth": {"scheme": "Bearer", "token_ref": "T"}, + "pipelock": {"tls_passthrough": True}}, + ]}}}, + "agents": {"demo": {"skills": [], "prompt": "", "bottle": "dev"}}, + }).bottles["dev"] + cfg = pipelock_build_config( + bottle, + ca_cert_path="/etc/pipelock-ca.pem", + ca_key_path="/etc/pipelock-ca-key.pem", + ) + tls = cast(dict[str, object], cfg["tls_interception"]) + self.assertEqual(["api.openai.com"], tls["passthrough_domains"]) def test_tls_interception_requires_both_paths(self): # Half-set is a programmer error, not a silent omission. @@ -179,19 +192,24 @@ class TestRenderAndWrite(unittest.TestCase): """`PipelockProxy.prepare` plumbs the module-level in-container CA constants through to the YAML. The block should land in the rendered output with `enabled: true`, the configured paths, - and the baked LLM-provider passthrough list. The actual + and any route-owned passthrough domains. The actual host-side CA generation happens in launch (not prepare), so this test exercises only the YAML rendering.""" - plan = PipelockProxy().prepare( - fixture_minimal().bottles["dev"], "demo", self.out_dir - ) + bottle = Manifest.from_json_obj({ + "bottles": {"dev": {"egress": {"routes": [ + {"host": "api.openai.com", + "pipelock": {"tls_passthrough": True}}, + ]}}}, + "agents": {"demo": {"skills": [], "prompt": "", "bottle": "dev"}}, + }).bottles["dev"] + plan = PipelockProxy().prepare(bottle, "demo", self.out_dir) content = plan.yaml_path.read_text() self.assertIn("tls_interception:", content) self.assertIn("enabled: true", content) self.assertIn('ca_cert: "/etc/pipelock-ca.pem"', content) self.assertIn('ca_key: "/etc/pipelock-ca-key.pem"', content) self.assertIn("passthrough_domains:", content) - self.assertIn('- "api.anthropic.com"', content) + self.assertIn('- "api.openai.com"', content) def test_render_emits_ssrf_block_when_allowlist_given(self): cfg = pipelock_build_config(