feat(dlp): add 7 token patterns, Unicode normalization, CRLF injection detection (PRD 0053)
Token patterns: HuggingFace (hf_), Databricks (dapi), Slack (xox[baprs]-), npm (npm_), SendGrid (SG.x.y), PyPI (pypi-), HashiCorp Vault (hvs.). Unicode normalization (_normalize_text) applies NFKD + strips combining marks and control chars before pattern matching, defeating fullwidth-char and combining-mark evasion. CRLF injection (scan_crlf_injection) detects %0d%0a in URLs and literal \r\n header-injection patterns; runs unconditionally in scan_outbound regardless of outbound_detectors config.
This commit is contained in:
@@ -926,6 +926,35 @@ class TestScanOutbound(unittest.TestCase):
|
||||
assert result is not None
|
||||
self.assertEqual("block", result.severity)
|
||||
|
||||
def test_crlf_in_query_blocked(self):
|
||||
# CRLF injection attempt via URL-encoded %0d%0a in a query param
|
||||
text = build_outbound_scan_text(
|
||||
host="api.example.com",
|
||||
path="/search",
|
||||
query="next=%0d%0aX-Injected%3A+evil",
|
||||
headers={},
|
||||
body="",
|
||||
)
|
||||
result = scan_outbound(_ROUTE, text, {})
|
||||
self.assertIsNotNone(result)
|
||||
assert result is not None
|
||||
self.assertEqual("block", result.severity)
|
||||
|
||||
def test_crlf_blocked_even_when_detectors_disabled(self):
|
||||
# CRLF scan runs unconditionally; outbound_detectors: false doesn't skip it
|
||||
route = Route(host="api.example.com", outbound_detectors=())
|
||||
text = build_outbound_scan_text(
|
||||
host="api.example.com",
|
||||
path="/data",
|
||||
query="",
|
||||
headers={"x-redirect": "value\r\nX-Injected: evil"},
|
||||
body="",
|
||||
)
|
||||
result = scan_outbound(route, text, {})
|
||||
self.assertIsNotNone(result)
|
||||
assert result is not None
|
||||
self.assertEqual("block", result.severity)
|
||||
|
||||
|
||||
# --- build_inbound_scan_text --------------------------------------------
|
||||
|
||||
|
||||
Reference in New Issue
Block a user