Stop scanning the request body for CRLF injection

A 403 "egress DLP: URL-encoded CRLF (%0d%0a)" was firing on legitimate requests (e.g. the Claude Code login flow) and bypassing the on-match policy entirely, because CRLF blocks carry no matched value and were routed straight to a hard 403. Root cause: CRLF injection is only an attack in the request line and headers. An HTTP body is delimited by Content-Length, so CRLF bytes in the body cannot split the request — but the scan flattened the body into the same blob it checked, so form-encoded / multi-line body content (which legitimately contains %0d%0a) tripped it. Fix: - scan_outbound takes a crlf_text param; the addon scans CRLF only over the body-excluded request line + headers. crlf_text=None keeps the old full-blob behavior for host-side callers/tests; the websocket path passes "" since a data frame is not a request line. - The redact policy now also scrubs CRLF (new strip_crlf helper) from the path and headers, so redact is a complete escape hatch and structural CRLF in the URL/headers can be forwarded when a route opts into it. Tests: strip_crlf unit tests; scan_outbound crlf_text body-exclusion and backward-compat tests. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> Claude-Session: https://claude.ai/code/session_01HnvBjPZC5V7qeQpFbQdDmS
2026-06-24 20:37:26 -04:00
parent cdfaaa3de8
commit b411577e76
5 changed files with 108 additions and 31 deletions
@@ -487,5 +487,21 @@ class TestMatchedAndSafeTokens(unittest.TestCase):
        self.assertEqual("", result.matched)


+class TestStripCrlf(unittest.TestCase):
+    def test_removes_url_encoded_crlf(self):
+        from bot_bottle.dlp_detectors import strip_crlf
+        out = strip_crlf("next=%0d%0aX-Injected: evil")
+        self.assertNotRegex(out, r"%0[dD]%0[aA]")
+
+    def test_removes_literal_header_injection(self):
+        from bot_bottle.dlp_detectors import strip_crlf
+        out = strip_crlf("value\r\nX-Injected: evil")
+        self.assertIsNone(scan_crlf_injection(out))
+
+    def test_leaves_clean_text_unchanged(self):
+        from bot_bottle.dlp_detectors import strip_crlf
+        self.assertEqual("/api/v1/data?q=hello", strip_crlf("/api/v1/data?q=hello"))
+
+
 if __name__ == "__main__":
    unittest.main()
@@ -1212,6 +1212,43 @@ class TestScanOutboundSafeTokens(unittest.TestCase):
        self.assertEqual(_AWS_KEY, result.matched)


+class TestScanOutboundCrlfText(unittest.TestCase):
+    """PRD 0062: CRLF is scanned only over the request line + headers
+    (crlf_text), never the body — a body is not an injection vector."""
+
+    def test_body_crlf_not_flagged_when_crlf_text_excludes_body(self):
+        # A form-encoded multi-line body legitimately contains %0d%0a.
+        body = "comment=line1%0d%0aline2"
+        full = build_outbound_scan_text(
+            host="api.example.com", path="/submit", query="",
+            headers={}, body=body,
+        )
+        crlf_text = build_outbound_scan_text(
+            host="api.example.com", path="/submit", query="",
+            headers={}, body="",
+        )
+        self.assertIsNone(scan_outbound(_ROUTE, full, {}, crlf_text=crlf_text))
+
+    def test_request_line_crlf_still_flagged(self):
+        full = build_outbound_scan_text(
+            host="api.example.com", path="/p", query="next=%0d%0aX:evil",
+            headers={}, body="",
+        )
+        crlf_text = full
+        result = scan_outbound(_ROUTE, full, {}, crlf_text=crlf_text)
+        self.assertIsNotNone(result)
+        assert result is not None
+        self.assertEqual("block", result.severity)
+
+    def test_default_crlf_text_scans_full_blob(self):
+        # Backward compatibility: crlf_text=None scans everything (body too).
+        full = build_outbound_scan_text(
+            host="api.example.com", path="/submit", query="",
+            headers={}, body="x=%0d%0aX:evil",
+        )
+        self.assertIsNotNone(scan_outbound(_ROUTE, full, {}))
+
+
 class TestBuildTokenAllowPayload(unittest.TestCase):
    def test_payload_includes_context_and_no_raw_token(self):
        result = ScanResult(