From 77a51702fcb7e9e46000b53203efbccfdefecea7 Mon Sep 17 00:00:00 2001 From: didericis Date: Sun, 24 May 2026 14:08:35 -0400 Subject: [PATCH] fix(cred_proxy): force identity encoding on upstream requests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit claude-code sends Accept-Encoding: gzip, deflate, br on every request. api.anthropic.com honors it and returns gzip-compressed SSE responses. Pipelock 2.3.0 has no decompression path; its response scanner fails closed with "blocked: compressed sse_stream response cannot be scanned" — and that gate fires even with response_scanning.enabled=false and sse_streaming disabled. Verified empirically against the real pipelock image. Cleanest fix that preserves DLP coverage end-to-end: have cred-proxy ask upstream for uncompressed bytes. Strip the agent's Accept-Encoding when building the upstream headers and inject `Accept-Encoding: identity`. Upstream returns plaintext; pipelock can scan; no 403. Bandwidth cost is the gzip ratio one-way (cred-proxy ↔ upstream through pipelock). For LLM SSE streams that's a few KB extra per turn — trivial compared to the alternative of leaving pipelock's response scanner blind. --- claude_bottle/cred_proxy_server.py | 15 ++++++++++++++- tests/unit/test_cred_proxy_server.py | 13 +++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/claude_bottle/cred_proxy_server.py b/claude_bottle/cred_proxy_server.py index 1a0f4a3..5fc1c8a 100644 --- a/claude_bottle/cred_proxy_server.py +++ b/claude_bottle/cred_proxy_server.py @@ -157,7 +157,16 @@ _HOP_BY_HOP = frozenset({ "upgrade", }) -_STRIPPED = _HOP_BY_HOP | frozenset({"host", "authorization", "content-length"}) +# Strip the agent's Accept-Encoding on the upstream leg and force +# `identity` instead. The response then flows back uncompressed, +# which lets pipelock's response scanner read the body — pipelock +# 2.3.0 has no decompression path and otherwise blocks with +# "compressed sse_stream response cannot be scanned". The cost is +# bandwidth from upstream; for LLM SSE streams this is negligible +# and the DLP coverage on the agent leg is the win. +_STRIPPED = _HOP_BY_HOP | frozenset({ + "host", "authorization", "content-length", "accept-encoding", +}) def build_forward_headers( @@ -177,6 +186,9 @@ def build_forward_headers( every listed header name. - Inject `Authorization: ` and a Host header pointing at the upstream. + - Force `Accept-Encoding: identity` so the upstream returns + uncompressed bytes — pipelock's response scanner can't read + gzip/br/deflate and would otherwise 403 the response. """ incoming_list = list(incoming) # Headers listed in `Connection:` are also hop-by-hop for this hop. @@ -193,6 +205,7 @@ def build_forward_headers( forwarded.append((name, value)) forwarded.append(("Host", upstream_host)) forwarded.append(("Authorization", f"{auth_scheme} {token}")) + forwarded.append(("Accept-Encoding", "identity")) return forwarded diff --git a/tests/unit/test_cred_proxy_server.py b/tests/unit/test_cred_proxy_server.py index ce22889..bace39a 100644 --- a/tests/unit/test_cred_proxy_server.py +++ b/tests/unit/test_cred_proxy_server.py @@ -141,6 +141,19 @@ class TestBuildForwardHeaders(unittest.TestCase): self.assertNotIn("x-custom", names) # listed in Connection: -> hop-by-hop self.assertIn("x-real", names) + def test_forces_identity_accept_encoding(self): + # The agent's gzip/br Accept-Encoding gets replaced with + # `identity` so the upstream returns uncompressed bytes — + # pipelock's response scanner can't read compressed bodies + # and would 403 with "compressed sse_stream response cannot + # be scanned". + headers = build_forward_headers( + [("Accept-Encoding", "gzip, deflate, br")], + auth_scheme="Bearer", token="t", upstream_host="x.example", + ) + ae = [v for n, v in headers if n.lower() == "accept-encoding"] + self.assertEqual(["identity"], ae) + def test_strips_content_length(self): # http.client recomputes Content-Length; passing it through # double-counts and breaks the upstream.