Stop scanning the request body for CRLF injection
lint / lint (push) Successful in 1m41s
test / unit (pull_request) Successful in 31s
test / integration (pull_request) Successful in 18s

A 403 "egress DLP: URL-encoded CRLF (%0d%0a)" was firing on legitimate
requests (e.g. the Claude Code login flow) and bypassing the on-match
policy entirely, because CRLF blocks carry no matched value and were
routed straight to a hard 403.

Root cause: CRLF injection is only an attack in the request line and
headers. An HTTP body is delimited by Content-Length, so CRLF bytes in
the body cannot split the request — but the scan flattened the body into
the same blob it checked, so form-encoded / multi-line body content
(which legitimately contains %0d%0a) tripped it.

Fix:
- scan_outbound takes a crlf_text param; the addon scans CRLF only over
  the body-excluded request line + headers. crlf_text=None keeps the
  old full-blob behavior for host-side callers/tests; the websocket path
  passes "" since a data frame is not a request line.
- The redact policy now also scrubs CRLF (new strip_crlf helper) from the
  path and headers, so redact is a complete escape hatch and structural
  CRLF in the URL/headers can be forwarded when a route opts into it.

Tests: strip_crlf unit tests; scan_outbound crlf_text body-exclusion and
backward-compat tests.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01HnvBjPZC5V7qeQpFbQdDmS
This commit is contained in:
2026-06-24 20:37:26 -04:00
parent cdfaaa3de8
commit b411577e76
5 changed files with 108 additions and 31 deletions
+16
View File
@@ -487,5 +487,21 @@ class TestMatchedAndSafeTokens(unittest.TestCase):
self.assertEqual("", result.matched)
class TestStripCrlf(unittest.TestCase):
def test_removes_url_encoded_crlf(self):
from bot_bottle.dlp_detectors import strip_crlf
out = strip_crlf("next=%0d%0aX-Injected: evil")
self.assertNotRegex(out, r"%0[dD]%0[aA]")
def test_removes_literal_header_injection(self):
from bot_bottle.dlp_detectors import strip_crlf
out = strip_crlf("value\r\nX-Injected: evil")
self.assertIsNone(scan_crlf_injection(out))
def test_leaves_clean_text_unchanged(self):
from bot_bottle.dlp_detectors import strip_crlf
self.assertEqual("/api/v1/data?q=hello", strip_crlf("/api/v1/data?q=hello"))
if __name__ == "__main__":
unittest.main()
+37
View File
@@ -1212,6 +1212,43 @@ class TestScanOutboundSafeTokens(unittest.TestCase):
self.assertEqual(_AWS_KEY, result.matched)
class TestScanOutboundCrlfText(unittest.TestCase):
"""PRD 0062: CRLF is scanned only over the request line + headers
(crlf_text), never the body — a body is not an injection vector."""
def test_body_crlf_not_flagged_when_crlf_text_excludes_body(self):
# A form-encoded multi-line body legitimately contains %0d%0a.
body = "comment=line1%0d%0aline2"
full = build_outbound_scan_text(
host="api.example.com", path="/submit", query="",
headers={}, body=body,
)
crlf_text = build_outbound_scan_text(
host="api.example.com", path="/submit", query="",
headers={}, body="",
)
self.assertIsNone(scan_outbound(_ROUTE, full, {}, crlf_text=crlf_text))
def test_request_line_crlf_still_flagged(self):
full = build_outbound_scan_text(
host="api.example.com", path="/p", query="next=%0d%0aX:evil",
headers={}, body="",
)
crlf_text = full
result = scan_outbound(_ROUTE, full, {}, crlf_text=crlf_text)
self.assertIsNotNone(result)
assert result is not None
self.assertEqual("block", result.severity)
def test_default_crlf_text_scans_full_blob(self):
# Backward compatibility: crlf_text=None scans everything (body too).
full = build_outbound_scan_text(
host="api.example.com", path="/submit", query="",
headers={}, body="x=%0d%0aX:evil",
)
self.assertIsNotNone(scan_outbound(_ROUTE, full, {}))
class TestBuildTokenAllowPayload(unittest.TestCase):
def test_payload_includes_context_and_no_raw_token(self):
result = ScanResult(