docs(prd-0022): end-to-end sandbox-escape integration test #51

Merged
didericis merged 5 commits from sandbox-escape-integration-test into main 2026-05-26 22:47:50 -04:00
3 changed files with 44 additions and 8 deletions
Showing only changes of commit 23f50f7720 - Show all commits
+17 -1
View File
@@ -179,7 +179,19 @@ def pipelock_build_config(
# built-in default for request_body_scanning is "warn" (forward # built-in default for request_body_scanning is "warn" (forward
# with a log line); claude-bottle hard-codes "block" so a hit # with a log line); claude-bottle hard-codes "block" so a hit
# actually stops the request from leaving the egress network. # actually stops the request from leaving the egress network.
cfg["request_body_scanning"] = {"action": "block"} #
# `scan_headers: true` + `header_mode: all` extends the scan to
# every request header — pipelock's default `header_mode:
# sensitive` only checks Authorization / Cookie / X-Api-Key /
# X-Token / Proxy-Authorization / X-Goog-Api-Key, which an
# agent attempting to exfil could trivially avoid by picking
# a non-sensitive header name. "all" closes the gap; pipelock
# caps it at the same max_body_bytes the body scan uses.
cfg["request_body_scanning"] = {
"action": "block",
"scan_headers": True,
"header_mode": "all",
}
if ca_cert_path or ca_key_path: if ca_cert_path or ca_key_path:
if not (ca_cert_path and ca_key_path): if not (ca_cert_path and ca_key_path):
raise ValueError( raise ValueError(
@@ -230,6 +242,10 @@ def pipelock_render_yaml(cfg: dict[str, object]) -> str:
lines.append("request_body_scanning:") lines.append("request_body_scanning:")
rbs = cast(dict[str, object], cfg["request_body_scanning"]) rbs = cast(dict[str, object], cfg["request_body_scanning"])
lines.append(f' action: "{rbs["action"]}"') lines.append(f' action: "{rbs["action"]}"')
if "scan_headers" in rbs:
lines.append(f" scan_headers: {_bool(rbs['scan_headers'])}")
if "header_mode" in rbs:
lines.append(f' header_mode: "{rbs["header_mode"]}"')
if "tls_interception" in cfg: if "tls_interception" in cfg:
lines.append("") lines.append("")
lines.append("tls_interception:") lines.append("tls_interception:")
+19 -6
View File
@@ -228,10 +228,13 @@ class TestSandboxEscape(unittest.TestCase):
# ---- attack 3: HTTP exfil shapes --------------------------------- # ---- attack 3: HTTP exfil shapes ---------------------------------
# Sandbox-block signature: pipelock / egress return HTTP 403 on # Sandbox-block signature: pipelock / egress return HTTP 403 on
# policy reject; the response body carries `"egress:"` (egress # policy reject; the response body carries a recognizable
# sidecar) or `"pipelock"` (pipelock sidecar). Both are # marker. Egress's reject message starts `"egress: host '...'
# observable from inside the agent via curl. # is not in the bottle's egress.routes allowlist"`; pipelock's
_SANDBOX_BLOCK_MARKERS = ("egress:", "pipelock") # DLP rejects start `"blocked: "` (e.g.
# `"blocked: DLP match: Anthropic API Key (critical)"`,
# `"blocked: request body contains secret"`).
_SANDBOX_BLOCK_MARKERS = ("egress:", "pipelock", "blocked:")
def _assert_sandbox_block(self, label: str, r) -> None: def _assert_sandbox_block(self, label: str, r) -> None:
"""A real sandbox block produces an HTTP 403 with a """A real sandbox block produces an HTTP 403 with a
@@ -276,10 +279,20 @@ class TestSandboxEscape(unittest.TestCase):
PRD 0022 Q1 resolution: this assertion is AUTHORITATIVE. PRD 0022 Q1 resolution: this assertion is AUTHORITATIVE.
If a shape fails here, the leak is real and the If a shape fails here, the leak is real and the
remediation lands as its own PRD before this test merges. remediation lands as its own PRD before this test merges.
DON'T mark expectedFailure to silence it.""" DON'T mark expectedFailure to silence it.
Destination note: we use `raw.githubusercontent.com` (one
of the DEFAULT_ALLOWLIST hosts) rather than
api.anthropic.com because pipelock passthrough's the
Anthropic API endpoint specifically — its DLP scanners
false-positive on real LLM conversation bodies (BIP-39
seed phrases, etc.). That trade-off is documented in
`pipelock.DEFAULT_TLS_PASSTHROUGH`. For non-passthrough
hosts pipelock MITMs and the DLP scan applies, which is
what this attack exercises."""
# Capture HTTP code via curl's -w; don't use --fail so # Capture HTTP code via curl's -w; don't use --fail so
# we get the response body even on 4xx. # we get the response body even on 4xx.
url_base = "https://api.anthropic.com" url_base = "https://raw.githubusercontent.com"
wfmt = '\\nHTTP_CODE:%{http_code}' wfmt = '\\nHTTP_CODE:%{http_code}'
shapes = [ shapes = [
( (
+8 -1
View File
@@ -32,8 +32,15 @@ class TestBuildConfig(unittest.TestCase):
{"include_defaults": True, "scan_env": True}, cfg["dlp"] {"include_defaults": True, "scan_env": True}, cfg["dlp"]
) )
# Body-scan action is hard-coded "block" in pipelock_build_config. # Body-scan action is hard-coded "block" in pipelock_build_config.
# `scan_headers: True` + `header_mode: "all"` close the
# header-shape exfil gap surfaced by PRD 0022 attack 3.
self.assertEqual( self.assertEqual(
{"action": "block"}, cfg["request_body_scanning"] {
"action": "block",
"scan_headers": True,
"header_mode": "all",
},
cfg["request_body_scanning"],
) )
# Baked defaults always present. # Baked defaults always present.
self.assertIn("api.anthropic.com", cast(list[str], cfg["api_allowlist"])) self.assertIn("api.anthropic.com", cast(list[str], cfg["api_allowlist"]))