fix(pipelock): passthrough api.anthropic.com so Claude auth/chat works

Pipelock's BIP-39 seed-phrase scanner fires on Anthropic Messages API bodies because user-authored conversation text can hit 12 consecutive BIP-39 dictionary words that pass the checksum, returning a 403 `blocked: request body contains secret: BIP-39 Seed Phrase` that the Claude CLI surfaces as `Please run /login`. Pipelock's `suppress` section only covers git/file findings, not the inline body scanner, so the recommended treatment for LLM endpoints is `tls_interception.passthrough_domains`: CONNECT is still allowlist- gated, but the body is not MITM'd. The existing body-scan integration test moves to `raw.githubusercontent.com` so it still pins TLS body DLP on non-passthrough'd hosts. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-12 17:55:05 -04:00
parent 96d2c7b7a1
commit 4f0cd0f782
4 changed files with 158 additions and 11 deletions
@@ -30,6 +30,22 @@ DEFAULT_ALLOWLIST: tuple[str, ...] = (
    "raw.githubusercontent.com",
 )

+# Hosts pipelock should NOT TLS-MITM, even when tls_interception is
+# enabled. The Claude API endpoint is an LLM provider — its request
+# bodies are user-authored conversation text that legitimately can
+# trigger DLP scanners (notably the BIP-39 seed-phrase detector, which
+# fires on any 12+ consecutive English words that happen to be on the
+# BIP-39 wordlist and pass the checksum). Per pipelock's own
+# configuration.md, the recommended treatment for LLM API endpoints is
+# `passthrough_domains`: pipelock still proxies the CONNECT (so the
+# api_allowlist gate applies), but it does not generate a leaf cert or
+# decrypt the body. Body scanning happens on hosts that aren't
+# passthrough'd, so DLP protection against agent exfil to other
+# allowlisted hosts is unchanged.
+DEFAULT_TLS_PASSTHROUGH: tuple[str, ...] = (
+    "api.anthropic.com",
+)
+

 # --- Allowlist resolution --------------------------------------------------

@@ -119,6 +135,7 @@ def pipelock_build_config(
            "enabled": True,
            "ca_cert": ca_cert_path,
            "ca_key": ca_key_path,
+            "passthrough_domains": list(DEFAULT_TLS_PASSTHROUGH),
        }
    return cfg

@@ -158,6 +175,11 @@ def pipelock_render_yaml(cfg: dict[str, object]) -> str:
        lines.append(f"  enabled: {_bool(tls['enabled'])}")
        lines.append(f'  ca_cert: "{tls["ca_cert"]}"')
        lines.append(f'  ca_key: "{tls["ca_key"]}"')
+        passthrough = cast(list[str], tls.get("passthrough_domains", []))
+        if passthrough:
+            lines.append("  passthrough_domains:")
+            for d in passthrough:
+                lines.append(f'    - "{d}"')
    return "\n".join(lines) + "\n"


@@ -6,11 +6,18 @@ End-to-end: drives `BottleBackend.prepare → launch` so the real
 image build, network plumbing, pipelock_tls_init, sidecar bring-up,
 and provision_ca (CA install in the agent's trust store) are all in
 the loop. The probe is a single `curl --proxy "$HTTPS_PROXY" -X POST
-... https://api.anthropic.com/...` — curl natively does CONNECT
-through the proxy, the agent's trust store now contains pipelock's
-per-bottle CA so curl trusts pipelock's bumped leaf, and pipelock
-sees the decrypted body and returns its known
-`blocked: request body contains secret: <pattern>` 403."""
+... https://raw.githubusercontent.com/...` — curl natively does
+CONNECT through the proxy, the agent's trust store now contains
+pipelock's per-bottle CA so curl trusts pipelock's bumped leaf, and
+pipelock sees the decrypted body and returns its known
+`blocked: request body contains secret: <pattern>` 403.
+
+The host has to be allowlisted (so the CONNECT is accepted) but NOT
+in `tls_interception.passthrough_domains` (so the body actually gets
+scanned). `api.anthropic.com` is passthrough'd to skip MITM on the
+LLM endpoint, so this probe targets `raw.githubusercontent.com` —
+also on the baked allowlist (Claude Code fetches release assets from
+it) and intercepted+scanned like any non-passthrough host."""

 from __future__ import annotations

@@ -66,7 +73,7 @@ class TestPipelockBlocksSecretHttpsPost(unittest.TestCase):
                    "  -w 'status=%{http_code}\\n' \\\n"
                    "  -o /tmp/probe-body.txt \\\n"
                    '  -X POST -d "token=$FAKE_TOKEN" \\\n'
-                    "  https://api.anthropic.com/dlp-probe\n"
+                    "  https://raw.githubusercontent.com/dlp-probe\n"
                    'echo "body=$(head -c 200 /tmp/probe-body.txt)"\n'
                )
                result = bottle.exec(script)
@@ -0,0 +1,105 @@
+"""Integration: pipelock's `tls_interception.passthrough_domains`
+exempts api.anthropic.com from MITM, so request bodies that would
+otherwise trip the body-scan layer (notably the BIP-39 seed-phrase
+detector firing on user-authored Claude conversation text) are not
+inspected and the request reaches Anthropic's TLS endpoint.
+
+Probe: POST the canonical zero-entropy 12-word BIP-39 mnemonic
+(`abandon` × 11 + `about`) — checksum-valid by construction — to
+`https://api.anthropic.com/v1/messages`. Without the passthrough,
+pipelock returns a 403 `blocked: request body contains secret:
+BIP-39 Seed Phrase`. With it, pipelock relays the CONNECT opaquely
+and the upstream replies with whatever it likes (401/4xx from
+Anthropic for an unauthenticated junk POST). We assert that the
+verdict is NOT pipelock's block.
+"""
+
+from __future__ import annotations
+
+import os
+import shutil
+import tempfile
+import unittest
+from pathlib import Path
+
+from claude_bottle.backend import BottleSpec, get_bottle_backend
+from claude_bottle.manifest import Manifest
+from tests._docker import skip_unless_docker
+
+
+# Canonical BIP-39 12-word test mnemonic. Valid SHA-256 checksum —
+# pipelock's seed-phrase scanner (default `verify_checksum: true`)
+# fires on this exact string if it ever sees the cleartext body.
+_BIP39_PHRASE = (
+    "abandon abandon abandon abandon abandon abandon "
+    "abandon abandon abandon abandon abandon about"
+)
+
+
+@skip_unless_docker()
+class TestPipelockLlmPassthrough(unittest.TestCase):
+    @unittest.skipIf(
+        os.environ.get("GITEA_ACTIONS") == "true",
+        "skipped under act_runner: docker socket mount topology breaks "
+        "in-process visibility of networks created on the host daemon",
+    )
+    def test_bip39_body_to_anthropic_is_not_blocked(self):
+        manifest = Manifest.from_json_obj({
+            "bottles": {
+                "dev": {"env": {"SEED": _BIP39_PHRASE}},
+            },
+            "agents": {
+                "demo": {"skills": [], "prompt": "", "bottle": "dev"},
+            },
+        })
+        backend = get_bottle_backend()
+        stage_dir = Path(tempfile.mkdtemp(prefix="cb-test-stage."))
+        try:
+            spec = BottleSpec(
+                manifest=manifest,
+                agent_name="demo",
+                copy_cwd=False,
+                user_cwd=str(stage_dir),
+                forward_oauth_token=False,
+            )
+            plan = backend.prepare(spec, stage_dir=stage_dir)
+            with backend.launch(plan) as bottle:
+                script = (
+                    "set -eu\n"
+                    'curl --proxy "$HTTPS_PROXY" -s --max-time 10 \\\n'
+                    "  -w 'status=%{http_code}\\n' \\\n"
+                    "  -o /tmp/probe-body.txt \\\n"
+                    '  -X POST -H "content-type: application/json" \\\n'
+                    '  --data "{\\"phrase\\": \\"$SEED\\"}" \\\n'
+                    "  https://api.anthropic.com/v1/messages\n"
+                    'echo "body=$(head -c 200 /tmp/probe-body.txt)"\n'
+                )
+                result = bottle.exec(script)
+        finally:
+            shutil.rmtree(stage_dir, ignore_errors=True)
+
+        self.assertEqual(
+            0, result.returncode,
+            f"exec wrapper failed: stdout={result.stdout!r} "
+            f"stderr={result.stderr!r}",
+        )
+        # The pipelock block verdict starts with `blocked: ` in the
+        # body. Anything else (auth error, 401, 4xx from Anthropic) is
+        # an acceptable outcome — it means the body was NOT inspected
+        # by the proxy and the request was relayed to the upstream
+        # TLS endpoint.
+        self.assertNotIn(
+            "body=blocked: ", result.stdout,
+            f"unexpected pipelock body-scan block on api.anthropic.com; "
+            f"expected passthrough to skip MITM. got: {result.stdout!r}",
+        )
+        self.assertNotIn(
+            "BIP-39", result.stdout,
+            f"BIP-39 verdict should never appear for api.anthropic.com "
+            f"requests under tls_interception.passthrough_domains; "
+            f"got: {result.stdout!r}",
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
@@ -14,7 +14,11 @@ from typing import Any, cast

 from claude_bottle.backend.docker.pipelock import DockerPipelockProxy
 from claude_bottle.manifest import Manifest
-from claude_bottle.pipelock import pipelock_build_config, pipelock_render_yaml
+from claude_bottle.pipelock import (
+    DEFAULT_TLS_PASSTHROUGH,
+    pipelock_build_config,
+    pipelock_render_yaml,
+)
 from tests.fixtures import fixture_minimal, fixture_with_ssh


@@ -56,7 +60,11 @@ class TestBuildConfig(unittest.TestCase):

    def test_tls_interception_block_emitted_when_paths_supplied(self):
        # PRD 0006: paths flow in via DockerPipelockProxy's in-container
-        # constants; this directly pins the dict shape.
+        # constants; this directly pins the dict shape. passthrough_domains
+        # is baked in so LLM provider endpoints (api.anthropic.com) skip
+        # MITM — pipelock's docs explicitly recommend this for LLM hosts,
+        # and without it the BIP-39 body scanner false-positives on
+        # Claude conversation traffic.
        cfg = pipelock_build_config(
            fixture_minimal().bottles["dev"],
            ca_cert_path="/etc/pipelock-ca.pem",
@@ -67,9 +75,11 @@ class TestBuildConfig(unittest.TestCase):
                "enabled": True,
                "ca_cert": "/etc/pipelock-ca.pem",
                "ca_key": "/etc/pipelock-ca-key.pem",
+                "passthrough_domains": list(DEFAULT_TLS_PASSTHROUGH),
            },
            cfg["tls_interception"],
        )
+        self.assertIn("api.anthropic.com", DEFAULT_TLS_PASSTHROUGH)

    def test_tls_interception_requires_both_paths(self):
        # Half-set is a programmer error, not a silent omission.
@@ -135,9 +145,10 @@ class TestRenderAndWrite(unittest.TestCase):
    def test_render_emits_tls_interception_via_prepare(self):
        """`DockerPipelockProxy.prepare` plumbs its in-container CA
        constants through to the YAML. The block should land in the
-        rendered output with `enabled: true` and the configured paths.
-        The actual host-side CA generation happens in launch (not
-        prepare), so this test exercises only the YAML rendering."""
+        rendered output with `enabled: true`, the configured paths,
+        and the baked LLM-provider passthrough list. The actual
+        host-side CA generation happens in launch (not prepare), so
+        this test exercises only the YAML rendering."""
        plan = DockerPipelockProxy().prepare(
            fixture_minimal().bottles["dev"], "demo", self.out_dir
        )
@@ -146,6 +157,8 @@ class TestRenderAndWrite(unittest.TestCase):
        self.assertIn("enabled: true", content)
        self.assertIn('ca_cert: "/etc/pipelock-ca.pem"', content)
        self.assertIn('ca_key: "/etc/pipelock-ca-key.pem"', content)
+        self.assertIn("passthrough_domains:", content)
+        self.assertIn('- "api.anthropic.com"', content)


 if __name__ == "__main__":