feat(pipelock): allow route tls passthrough policy
test / unit (pull_request) Successful in 37s
test / integration (pull_request) Successful in 58s

This commit is contained in:
2026-05-28 19:19:40 -04:00
parent 3299674c30
commit bcadc07d09
11 changed files with 164 additions and 78 deletions
+1 -1
View File
@@ -14,7 +14,7 @@
# combined trust bundle (system roots + pipelock CA) and point
# mitmproxy at it. The option REPLACES mitmproxy's default
# trust store, so passing pipelock's CA alone would break
# pipelock-passthrough hosts (api.anthropic.com etc.).
# route-configured pipelock passthrough hosts.
# * `-s /app/egress_addon.py` loads the addon that reads
# /etc/egress/routes.yaml.
+45 -2
View File
@@ -18,6 +18,8 @@ Bottle schema (frontmatter):
user: { name: <str>, email: <str> } # optional
remotes: { <host>: <git-entry>, ... } # optional
egress: { routes: [ <egress-route>, ... ] }
# route keys: host, path_allowlist, auth, role, pipelock
# pipelock: { tls_passthrough: <bool> }
supervise: <bool> # optional
Agent schema (frontmatter):
@@ -319,6 +321,39 @@ def _parse_git_config(
return git, git_user
@dataclass(frozen=True)
class PipelockRoutePolicy:
"""Per-route pipelock policy overrides.
`TlsPassthrough` adds the route host to pipelock's
`tls_interception.passthrough_domains`, so pipelock still enforces
the hostname allowlist but does not MITM/decrypt request bodies or
headers for that host.
"""
TlsPassthrough: bool = False
@classmethod
def from_dict(
cls, bottle_name: str, idx: int, raw: object,
) -> "PipelockRoutePolicy":
label = f"bottle '{bottle_name}' egress.routes[{idx}] pipelock"
d = _as_json_object(raw, label)
for k in d:
if k not in ("tls_passthrough",):
die(
f"{label} has unknown key {k!r}; "
f"only 'tls_passthrough' is accepted"
)
tls_passthrough_raw = d.get("tls_passthrough", False)
if not isinstance(tls_passthrough_raw, bool):
die(
f"{label}.tls_passthrough must be a boolean "
f"(was {type(tls_passthrough_raw).__name__})"
)
return cls(TlsPassthrough=tls_passthrough_raw)
@dataclass(frozen=True)
class EgressRoute:
"""One route on the per-bottle egress sidecar (PRD 0017).
@@ -355,6 +390,7 @@ class EgressRoute:
AuthScheme: str = ""
TokenRef: str = ""
Role: tuple[str, ...] = ()
Pipelock: PipelockRoutePolicy = field(default_factory=PipelockRoutePolicy)
@classmethod
def from_dict(cls, bottle_name: str, idx: int, raw: object) -> "EgressRoute":
@@ -451,11 +487,17 @@ class EgressRoute:
f"{', '.join(sorted(EGRESS_ROLES))}"
)
pipelock = (
PipelockRoutePolicy.from_dict(bottle_name, idx, d["pipelock"])
if "pipelock" in d
else PipelockRoutePolicy()
)
for k in d:
if k not in ("host", "path_allowlist", "auth", "role"):
if k not in ("host", "path_allowlist", "auth", "role", "pipelock"):
die(
f"{label} has unknown key {k!r}; accepted keys are "
f"'host', 'path_allowlist', 'auth', 'role'"
f"'host', 'path_allowlist', 'auth', 'role', 'pipelock'"
)
return cls(
@@ -464,6 +506,7 @@ class EgressRoute:
AuthScheme=auth_scheme,
TokenRef=token_ref,
Role=roles,
Pipelock=pipelock,
)
+16 -32
View File
@@ -26,20 +26,10 @@ from .supervise import SUPERVISE_HOSTNAME
from .manifest import Bottle
# Hosts pipelock should NOT TLS-MITM, even when tls_interception is
# enabled. The Claude API endpoint is an LLM provider — its request
# bodies are user-authored conversation text that legitimately can
# trigger DLP scanners (notably the BIP-39 seed-phrase detector, which
# fires on any 12+ consecutive English words that happen to be on the
# BIP-39 wordlist and pass the checksum). Per pipelock's own
# configuration.md, the recommended treatment for LLM API endpoints is
# `passthrough_domains`: pipelock still proxies the CONNECT (so the
# api_allowlist gate applies), but it does not generate a leaf cert or
# decrypt the body. Body scanning happens on hosts that aren't
# passthrough'd, so DLP protection against agent exfil to other
# allowlisted hosts is unchanged.
DEFAULT_TLS_PASSTHROUGH: tuple[str, ...] = (
"api.anthropic.com",
)
# enabled. This is now route-owned manifest policy via
# `egress.routes[].pipelock.tls_passthrough`; no provider hosts are
# injected implicitly.
DEFAULT_TLS_PASSTHROUGH: tuple[str, ...] = ()
# In-container paths the rendered pipelock YAML references under
@@ -109,25 +99,19 @@ def pipelock_seed_phrase_detection_enabled(bottle: Bottle) -> bool:
def pipelock_effective_tls_passthrough(bottle: Bottle) -> list[str]:
"""Hostnames pipelock should pass through (no TLS MITM, no body
scan). Default carries the LLM API endpoint — its request bodies
are user-authored conversation text that legitimately trips DLP
scanners (notably pipelock's BIP-39 seed-phrase detector). Every
other allowlisted host is MITM'd by pipelock's per-bottle CA so
its body scanner sees the cleartext.
"""Hostnames pipelock should pass through (no TLS MITM).
egress route hosts (github, gitea, npm) are deliberately
NOT auto-added here. egress's HTTPS client trusts pipelock's
CA at runtime (folded into its trust store via docker cp), so
pipelock MITMs and body-scans the egress → upstream leg the
same way it body-scanned the agent's direct HTTPS traffic before
the PRD 0017 cutover.
`bottle` is kept on the signature for forward-compat (a future
knob might let a manifest opt a host into passthrough); today
the returned list is independent of the bottle."""
del bottle # not consulted; see docstring.
return sorted(DEFAULT_TLS_PASSTHROUGH)
A route opts in with `pipelock.tls_passthrough: true`. This is
useful for provider API routes where egress injects the
Authorization header after the agent boundary; pipelock still
enforces the host allowlist but does not decrypt and scan that
provider request.
"""
seen: dict[str, None] = {host: None for host in DEFAULT_TLS_PASSTHROUGH}
for route in bottle.egress.routes:
if route.Pipelock.TlsPassthrough:
seen.setdefault(route.Host, None)
return sorted(seen.keys())