feat(pipelock): allow route tls passthrough policy
This commit is contained in:
@@ -314,6 +314,8 @@ egress:
|
||||
auth:
|
||||
scheme: Bearer
|
||||
token_ref: BOT_BOTTLE_CLAUDE_OAUTH_TOKEN
|
||||
pipelock:
|
||||
tls_passthrough: true
|
||||
---
|
||||
|
||||
Common Claude provider boundary.
|
||||
@@ -429,6 +431,8 @@ egress:
|
||||
auth:
|
||||
scheme: Bearer
|
||||
token_ref: BOT_BOTTLE_CLAUDE_OAUTH_TOKEN
|
||||
pipelock:
|
||||
tls_passthrough: true
|
||||
```
|
||||
|
||||
At launch, `cli.py` reads `BOT_BOTTLE_CLAUDE_OAUTH_TOKEN` from the host
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
# combined trust bundle (system roots + pipelock CA) and point
|
||||
# mitmproxy at it. The option REPLACES mitmproxy's default
|
||||
# trust store, so passing pipelock's CA alone would break
|
||||
# pipelock-passthrough hosts (api.anthropic.com etc.).
|
||||
# route-configured pipelock passthrough hosts.
|
||||
# * `-s /app/egress_addon.py` loads the addon that reads
|
||||
# /etc/egress/routes.yaml.
|
||||
|
||||
|
||||
+45
-2
@@ -18,6 +18,8 @@ Bottle schema (frontmatter):
|
||||
user: { name: <str>, email: <str> } # optional
|
||||
remotes: { <host>: <git-entry>, ... } # optional
|
||||
egress: { routes: [ <egress-route>, ... ] }
|
||||
# route keys: host, path_allowlist, auth, role, pipelock
|
||||
# pipelock: { tls_passthrough: <bool> }
|
||||
supervise: <bool> # optional
|
||||
|
||||
Agent schema (frontmatter):
|
||||
@@ -319,6 +321,39 @@ def _parse_git_config(
|
||||
return git, git_user
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class PipelockRoutePolicy:
|
||||
"""Per-route pipelock policy overrides.
|
||||
|
||||
`TlsPassthrough` adds the route host to pipelock's
|
||||
`tls_interception.passthrough_domains`, so pipelock still enforces
|
||||
the hostname allowlist but does not MITM/decrypt request bodies or
|
||||
headers for that host.
|
||||
"""
|
||||
|
||||
TlsPassthrough: bool = False
|
||||
|
||||
@classmethod
|
||||
def from_dict(
|
||||
cls, bottle_name: str, idx: int, raw: object,
|
||||
) -> "PipelockRoutePolicy":
|
||||
label = f"bottle '{bottle_name}' egress.routes[{idx}] pipelock"
|
||||
d = _as_json_object(raw, label)
|
||||
for k in d:
|
||||
if k not in ("tls_passthrough",):
|
||||
die(
|
||||
f"{label} has unknown key {k!r}; "
|
||||
f"only 'tls_passthrough' is accepted"
|
||||
)
|
||||
tls_passthrough_raw = d.get("tls_passthrough", False)
|
||||
if not isinstance(tls_passthrough_raw, bool):
|
||||
die(
|
||||
f"{label}.tls_passthrough must be a boolean "
|
||||
f"(was {type(tls_passthrough_raw).__name__})"
|
||||
)
|
||||
return cls(TlsPassthrough=tls_passthrough_raw)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class EgressRoute:
|
||||
"""One route on the per-bottle egress sidecar (PRD 0017).
|
||||
@@ -355,6 +390,7 @@ class EgressRoute:
|
||||
AuthScheme: str = ""
|
||||
TokenRef: str = ""
|
||||
Role: tuple[str, ...] = ()
|
||||
Pipelock: PipelockRoutePolicy = field(default_factory=PipelockRoutePolicy)
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, bottle_name: str, idx: int, raw: object) -> "EgressRoute":
|
||||
@@ -451,11 +487,17 @@ class EgressRoute:
|
||||
f"{', '.join(sorted(EGRESS_ROLES))}"
|
||||
)
|
||||
|
||||
pipelock = (
|
||||
PipelockRoutePolicy.from_dict(bottle_name, idx, d["pipelock"])
|
||||
if "pipelock" in d
|
||||
else PipelockRoutePolicy()
|
||||
)
|
||||
|
||||
for k in d:
|
||||
if k not in ("host", "path_allowlist", "auth", "role"):
|
||||
if k not in ("host", "path_allowlist", "auth", "role", "pipelock"):
|
||||
die(
|
||||
f"{label} has unknown key {k!r}; accepted keys are "
|
||||
f"'host', 'path_allowlist', 'auth', 'role'"
|
||||
f"'host', 'path_allowlist', 'auth', 'role', 'pipelock'"
|
||||
)
|
||||
|
||||
return cls(
|
||||
@@ -464,6 +506,7 @@ class EgressRoute:
|
||||
AuthScheme=auth_scheme,
|
||||
TokenRef=token_ref,
|
||||
Role=roles,
|
||||
Pipelock=pipelock,
|
||||
)
|
||||
|
||||
|
||||
|
||||
+16
-32
@@ -26,20 +26,10 @@ from .supervise import SUPERVISE_HOSTNAME
|
||||
from .manifest import Bottle
|
||||
|
||||
# Hosts pipelock should NOT TLS-MITM, even when tls_interception is
|
||||
# enabled. The Claude API endpoint is an LLM provider — its request
|
||||
# bodies are user-authored conversation text that legitimately can
|
||||
# trigger DLP scanners (notably the BIP-39 seed-phrase detector, which
|
||||
# fires on any 12+ consecutive English words that happen to be on the
|
||||
# BIP-39 wordlist and pass the checksum). Per pipelock's own
|
||||
# configuration.md, the recommended treatment for LLM API endpoints is
|
||||
# `passthrough_domains`: pipelock still proxies the CONNECT (so the
|
||||
# api_allowlist gate applies), but it does not generate a leaf cert or
|
||||
# decrypt the body. Body scanning happens on hosts that aren't
|
||||
# passthrough'd, so DLP protection against agent exfil to other
|
||||
# allowlisted hosts is unchanged.
|
||||
DEFAULT_TLS_PASSTHROUGH: tuple[str, ...] = (
|
||||
"api.anthropic.com",
|
||||
)
|
||||
# enabled. This is now route-owned manifest policy via
|
||||
# `egress.routes[].pipelock.tls_passthrough`; no provider hosts are
|
||||
# injected implicitly.
|
||||
DEFAULT_TLS_PASSTHROUGH: tuple[str, ...] = ()
|
||||
|
||||
|
||||
# In-container paths the rendered pipelock YAML references under
|
||||
@@ -109,25 +99,19 @@ def pipelock_seed_phrase_detection_enabled(bottle: Bottle) -> bool:
|
||||
|
||||
|
||||
def pipelock_effective_tls_passthrough(bottle: Bottle) -> list[str]:
|
||||
"""Hostnames pipelock should pass through (no TLS MITM, no body
|
||||
scan). Default carries the LLM API endpoint — its request bodies
|
||||
are user-authored conversation text that legitimately trips DLP
|
||||
scanners (notably pipelock's BIP-39 seed-phrase detector). Every
|
||||
other allowlisted host is MITM'd by pipelock's per-bottle CA so
|
||||
its body scanner sees the cleartext.
|
||||
"""Hostnames pipelock should pass through (no TLS MITM).
|
||||
|
||||
egress route hosts (github, gitea, npm) are deliberately
|
||||
NOT auto-added here. egress's HTTPS client trusts pipelock's
|
||||
CA at runtime (folded into its trust store via docker cp), so
|
||||
pipelock MITMs and body-scans the egress → upstream leg the
|
||||
same way it body-scanned the agent's direct HTTPS traffic before
|
||||
the PRD 0017 cutover.
|
||||
|
||||
`bottle` is kept on the signature for forward-compat (a future
|
||||
knob might let a manifest opt a host into passthrough); today
|
||||
the returned list is independent of the bottle."""
|
||||
del bottle # not consulted; see docstring.
|
||||
return sorted(DEFAULT_TLS_PASSTHROUGH)
|
||||
A route opts in with `pipelock.tls_passthrough: true`. This is
|
||||
useful for provider API routes where egress injects the
|
||||
Authorization header after the agent boundary; pipelock still
|
||||
enforces the host allowlist but does not decrypt and scan that
|
||||
provider request.
|
||||
"""
|
||||
seen: dict[str, None] = {host: None for host in DEFAULT_TLS_PASSTHROUGH}
|
||||
for route in bottle.egress.routes:
|
||||
if route.Pipelock.TlsPassthrough:
|
||||
seen.setdefault(route.Host, None)
|
||||
return sorted(seen.keys())
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -9,6 +9,8 @@ egress:
|
||||
auth:
|
||||
scheme: Bearer
|
||||
token_ref: BOT_BOTTLE_CLAUDE_OAUTH_TOKEN
|
||||
pipelock:
|
||||
tls_passthrough: true
|
||||
---
|
||||
|
||||
Common Claude provider boundary. Drop this file into
|
||||
|
||||
@@ -12,12 +12,11 @@ pipelock's per-bottle CA so curl trusts pipelock's bumped leaf, and
|
||||
pipelock sees the decrypted body and returns its known
|
||||
`blocked: request body contains secret: <pattern>` 403.
|
||||
|
||||
The host has to be allowlisted (so the CONNECT is accepted) but NOT
|
||||
in `tls_interception.passthrough_domains` (so the body actually gets
|
||||
scanned). `api.anthropic.com` is passthrough'd to skip MITM on the
|
||||
LLM endpoint, so this probe targets `raw.githubusercontent.com` —
|
||||
also on the baked allowlist (Claude Code fetches release assets from
|
||||
it) and intercepted+scanned like any non-passthrough host."""
|
||||
The host has to be allowlisted (so the CONNECT is accepted) but must
|
||||
not opt into `pipelock.tls_passthrough` (so the body actually gets
|
||||
scanned). This probe targets `raw.githubusercontent.com`, which is on
|
||||
the baked allowlist and intercepted+scanned like any non-passthrough
|
||||
host."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
@@ -1,17 +1,14 @@
|
||||
"""Integration: pipelock's `tls_interception.passthrough_domains`
|
||||
exempts api.anthropic.com from MITM, so request bodies that would
|
||||
otherwise trip the body-scan layer (notably the BIP-39 seed-phrase
|
||||
detector firing on user-authored Claude conversation text) are not
|
||||
inspected and the request reaches Anthropic's TLS endpoint.
|
||||
"""Integration: route-owned `pipelock.tls_passthrough` renders into
|
||||
pipelock's `tls_interception.passthrough_domains`, so request bodies
|
||||
that would otherwise trip the body-scan layer are not inspected and the
|
||||
request reaches the provider TLS endpoint.
|
||||
|
||||
Probe: POST the canonical zero-entropy 12-word BIP-39 mnemonic
|
||||
(`abandon` × 11 + `about`) — checksum-valid by construction — to
|
||||
`https://api.anthropic.com/v1/messages`. Without the passthrough,
|
||||
pipelock returns a 403 `blocked: request body contains secret:
|
||||
BIP-39 Seed Phrase`. With it, pipelock relays the CONNECT opaquely
|
||||
and the upstream replies with whatever it likes (401/4xx from
|
||||
Anthropic for an unauthenticated junk POST). We assert that the
|
||||
verdict is NOT pipelock's block.
|
||||
`https://api.anthropic.com/v1/messages`. With the route policy,
|
||||
pipelock relays the CONNECT opaquely and the upstream replies with
|
||||
whatever it likes (401/4xx from Anthropic for an unauthenticated junk
|
||||
POST). We assert that the verdict is NOT pipelock's block.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -46,7 +43,13 @@ class TestPipelockLlmPassthrough(unittest.TestCase):
|
||||
def test_bip39_body_to_anthropic_is_not_blocked(self):
|
||||
manifest = Manifest.from_json_obj({
|
||||
"bottles": {
|
||||
"dev": {"env": {"SEED": _BIP39_PHRASE}},
|
||||
"dev": {
|
||||
"env": {"SEED": _BIP39_PHRASE},
|
||||
"egress": {"routes": [{
|
||||
"host": "api.anthropic.com",
|
||||
"pipelock": {"tls_passthrough": True},
|
||||
}]},
|
||||
},
|
||||
},
|
||||
"agents": {
|
||||
"demo": {"skills": [], "prompt": "", "bottle": "dev"},
|
||||
|
||||
@@ -310,15 +310,10 @@ class TestSandboxEscape(unittest.TestCase):
|
||||
remediation lands as its own PRD before this test merges.
|
||||
DON'T mark expectedFailure to silence it.
|
||||
|
||||
Destination note: we use `raw.githubusercontent.com` (one
|
||||
of the DEFAULT_ALLOWLIST hosts) rather than
|
||||
api.anthropic.com because pipelock passthrough's the
|
||||
Anthropic API endpoint specifically — its DLP scanners
|
||||
false-positive on real LLM conversation bodies (BIP-39
|
||||
seed phrases, etc.). That trade-off is documented in
|
||||
`pipelock.DEFAULT_TLS_PASSTHROUGH`. For non-passthrough
|
||||
hosts pipelock MITMs and the DLP scan applies, which is
|
||||
what this attack exercises."""
|
||||
Destination note: we use `raw.githubusercontent.com`, one
|
||||
of the DEFAULT_ALLOWLIST hosts. It is not route-configured
|
||||
for pipelock TLS passthrough, so pipelock MITMs it and the
|
||||
DLP scan applies, which is what this attack exercises."""
|
||||
# Capture HTTP code via curl's -w; don't use --fail so
|
||||
# we get the response body even on 4xx.
|
||||
url_base = "https://raw.githubusercontent.com"
|
||||
|
||||
@@ -215,6 +215,34 @@ class TestRole(unittest.TestCase):
|
||||
}])
|
||||
|
||||
|
||||
class TestPipelockPolicy(unittest.TestCase):
|
||||
def test_tls_passthrough_route_policy(self):
|
||||
b = _bottle([{
|
||||
"host": "api.openai.com",
|
||||
"pipelock": {"tls_passthrough": True},
|
||||
}])
|
||||
self.assertTrue(b.egress.routes[0].Pipelock.TlsPassthrough)
|
||||
|
||||
def test_tls_passthrough_defaults_false(self):
|
||||
b = _bottle([{"host": "api.openai.com"}])
|
||||
self.assertFalse(b.egress.routes[0].Pipelock.TlsPassthrough)
|
||||
|
||||
def test_pipelock_policy_must_be_object(self):
|
||||
with self.assertRaises(Die):
|
||||
_bottle([{"host": "x.example", "pipelock": True}])
|
||||
|
||||
def test_tls_passthrough_must_be_bool(self):
|
||||
with self.assertRaises(Die):
|
||||
_bottle([{
|
||||
"host": "x.example",
|
||||
"pipelock": {"tls_passthrough": "yes"},
|
||||
}])
|
||||
|
||||
def test_unknown_pipelock_key_rejected(self):
|
||||
with self.assertRaises(Die):
|
||||
_bottle([{"host": "x.example", "pipelock": {"wat": True}}])
|
||||
|
||||
|
||||
class TestRouteValidation(unittest.TestCase):
|
||||
def test_duplicate_hosts_rejected(self):
|
||||
# Routes match by exact host; duplicates leave the choice
|
||||
|
||||
@@ -89,18 +89,28 @@ class TestAllowlistWithRoutes(unittest.TestCase):
|
||||
|
||||
|
||||
class TestTlsPassthrough(unittest.TestCase):
|
||||
def test_default_includes_api_anthropic(self):
|
||||
def test_default_empty(self):
|
||||
passthrough = pipelock_effective_tls_passthrough(_bottle({}))
|
||||
self.assertEqual(["api.anthropic.com"], passthrough)
|
||||
self.assertEqual([], passthrough)
|
||||
|
||||
def test_route_hosts_NOT_added_to_passthrough(self):
|
||||
def test_route_hosts_not_added_to_passthrough_by_default(self):
|
||||
passthrough = pipelock_effective_tls_passthrough(_bottle(_routes([
|
||||
{"host": "api.github.com",
|
||||
"auth": {"scheme": "Bearer", "token_ref": "G"}},
|
||||
{"host": "registry.npmjs.org",
|
||||
"auth": {"scheme": "Bearer", "token_ref": "N"}},
|
||||
])))
|
||||
self.assertEqual(["api.anthropic.com"], passthrough)
|
||||
self.assertEqual([], passthrough)
|
||||
|
||||
def test_route_policy_adds_tls_passthrough(self):
|
||||
passthrough = pipelock_effective_tls_passthrough(_bottle(_routes([
|
||||
{"host": "api.openai.com",
|
||||
"auth": {"scheme": "Bearer", "token_ref": "O"},
|
||||
"pipelock": {"tls_passthrough": True}},
|
||||
{"host": "api.github.com",
|
||||
"auth": {"scheme": "Bearer", "token_ref": "G"}},
|
||||
])))
|
||||
self.assertEqual(["api.openai.com"], passthrough)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -54,11 +54,7 @@ class TestBuildConfig(unittest.TestCase):
|
||||
|
||||
def test_tls_interception_block_emitted_when_paths_supplied(self):
|
||||
# PRD 0006: paths flow in via the platform-neutral in-container
|
||||
# constants; this directly pins the dict shape. passthrough_domains
|
||||
# is baked in so LLM provider endpoints (api.anthropic.com) skip
|
||||
# MITM — pipelock's docs explicitly recommend this for LLM hosts,
|
||||
# and without it the BIP-39 body scanner false-positives on
|
||||
# Claude conversation traffic.
|
||||
# constants; this directly pins the dict shape.
|
||||
cfg = pipelock_build_config(
|
||||
fixture_minimal().bottles["dev"],
|
||||
ca_cert_path="/etc/pipelock-ca.pem",
|
||||
@@ -69,11 +65,28 @@ class TestBuildConfig(unittest.TestCase):
|
||||
"enabled": True,
|
||||
"ca_cert": "/etc/pipelock-ca.pem",
|
||||
"ca_key": "/etc/pipelock-ca-key.pem",
|
||||
"passthrough_domains": list(DEFAULT_TLS_PASSTHROUGH),
|
||||
"passthrough_domains": [],
|
||||
},
|
||||
cfg["tls_interception"],
|
||||
)
|
||||
self.assertIn("api.anthropic.com", DEFAULT_TLS_PASSTHROUGH)
|
||||
self.assertEqual((), DEFAULT_TLS_PASSTHROUGH)
|
||||
|
||||
def test_tls_passthrough_route_policy_emits_domain(self):
|
||||
bottle = Manifest.from_json_obj({
|
||||
"bottles": {"dev": {"egress": {"routes": [
|
||||
{"host": "api.openai.com",
|
||||
"auth": {"scheme": "Bearer", "token_ref": "T"},
|
||||
"pipelock": {"tls_passthrough": True}},
|
||||
]}}},
|
||||
"agents": {"demo": {"skills": [], "prompt": "", "bottle": "dev"}},
|
||||
}).bottles["dev"]
|
||||
cfg = pipelock_build_config(
|
||||
bottle,
|
||||
ca_cert_path="/etc/pipelock-ca.pem",
|
||||
ca_key_path="/etc/pipelock-ca-key.pem",
|
||||
)
|
||||
tls = cast(dict[str, object], cfg["tls_interception"])
|
||||
self.assertEqual(["api.openai.com"], tls["passthrough_domains"])
|
||||
|
||||
def test_tls_interception_requires_both_paths(self):
|
||||
# Half-set is a programmer error, not a silent omission.
|
||||
@@ -179,19 +192,24 @@ class TestRenderAndWrite(unittest.TestCase):
|
||||
"""`PipelockProxy.prepare` plumbs the module-level in-container
|
||||
CA constants through to the YAML. The block should land in the
|
||||
rendered output with `enabled: true`, the configured paths,
|
||||
and the baked LLM-provider passthrough list. The actual
|
||||
and any route-owned passthrough domains. The actual
|
||||
host-side CA generation happens in launch (not prepare), so
|
||||
this test exercises only the YAML rendering."""
|
||||
plan = PipelockProxy().prepare(
|
||||
fixture_minimal().bottles["dev"], "demo", self.out_dir
|
||||
)
|
||||
bottle = Manifest.from_json_obj({
|
||||
"bottles": {"dev": {"egress": {"routes": [
|
||||
{"host": "api.openai.com",
|
||||
"pipelock": {"tls_passthrough": True}},
|
||||
]}}},
|
||||
"agents": {"demo": {"skills": [], "prompt": "", "bottle": "dev"}},
|
||||
}).bottles["dev"]
|
||||
plan = PipelockProxy().prepare(bottle, "demo", self.out_dir)
|
||||
content = plan.yaml_path.read_text()
|
||||
self.assertIn("tls_interception:", content)
|
||||
self.assertIn("enabled: true", content)
|
||||
self.assertIn('ca_cert: "/etc/pipelock-ca.pem"', content)
|
||||
self.assertIn('ca_key: "/etc/pipelock-ca-key.pem"', content)
|
||||
self.assertIn("passthrough_domains:", content)
|
||||
self.assertIn('- "api.anthropic.com"', content)
|
||||
self.assertIn('- "api.openai.com"', content)
|
||||
|
||||
def test_render_emits_ssrf_block_when_allowlist_given(self):
|
||||
cfg = pipelock_build_config(
|
||||
|
||||
Reference in New Issue
Block a user