feat(egress): add location, context snippets, and token redaction to DLP logging

Each DLP block/warn now reports where the match was found (body, authorization header, response body) and includes a context snippet: SNIPPET_CONTEXT chars before and after the match, with the matched value replaced by REDACT ("********"). scan_token_patterns/scan_known_secrets/scan_naive_injection all gain `location` and `context` fields on their ScanResult returns. The outbound scanner takes `auth_header` as a separate kwarg so the two locations are scanned and reported independently. redact_tokens() is added to dlp_detectors and used in egress_addon.py to scrub token patterns and provisioned secrets from host/path fields before they appear in any log output (level 1 and 2). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-06 14:47:42 -04:00
parent 79212481c9
commit 86b0a4d285
4 changed files with 210 additions and 35 deletions
@@ -21,6 +21,21 @@ except ImportError:  # pragma: no cover - host-side path
    from .egress_addon_core import ScanResult


+# ---------------------------------------------------------------------------
+# Snippet helpers
+# ---------------------------------------------------------------------------
+
+SNIPPET_CONTEXT = 40  # chars of surrounding text to include on each side
+REDACT = "********"   # fixed-width replacement for the matched sensitive value
+
+
+def _snippet(text: str, start: int, end: int) -> str:
+    """Return context around a match with the matched span replaced by REDACT."""
+    before = text[max(0, start - SNIPPET_CONTEXT):start].replace("\n", " ").replace("\r", " ")
+    after = text[end:end + SNIPPET_CONTEXT].replace("\n", " ").replace("\r", " ")
+    return f"{before}{REDACT}{after}"
+
+
 # ---------------------------------------------------------------------------
 # Token patterns detector (Phase 1a)
 # ---------------------------------------------------------------------------
@@ -36,16 +51,35 @@ TOKEN_PATTERNS: tuple[tuple[str, re.Pattern[str]], ...] = (
 )


-def scan_token_patterns(text: str) -> ScanResult | None:
+def scan_token_patterns(text: str, *, location: str = "body") -> ScanResult | None:
    for name, pattern in TOKEN_PATTERNS:
-        if pattern.search(text):
+        m = pattern.search(text)
+        if m is not None:
            return ScanResult(
                severity="block",
-                reason=f"outbound request contains {name}",
+                reason=f"{name} found in {location}",
+                location=location,
+                context=_snippet(text, m.start(), m.end()),
            )
    return None


+def redact_tokens(
+    text: str,
+    *,
+    env: typing.Mapping[str, str] | None = None,
+) -> str:
+    """Replace token pattern matches and (if env given) provisioned secrets with REDACT."""
+    for _, pattern in TOKEN_PATTERNS:
+        text = pattern.sub(REDACT, text)
+    if env is not None:
+        for key, value in env.items():
+            if key.startswith("EGRESS_TOKEN_") and value:
+                for variant in _encoded_variants(value):
+                    text = text.replace(variant, REDACT)
+    return text
+
+
 # ---------------------------------------------------------------------------
 # Known secrets detector (Phase 1b)
 # ---------------------------------------------------------------------------
@@ -69,6 +103,7 @@ def _encoded_variants(secret: str) -> list[str]:
 def scan_known_secrets(
    text: str,
    *,
+    location: str = "body",
    env: typing.Mapping[str, str] | None = None,
 ) -> ScanResult | None:
    if env is None:
@@ -77,13 +112,13 @@ def scan_known_secrets(
        if not key.startswith("EGRESS_TOKEN_") or not value:
            continue
        for variant in _encoded_variants(value):
-            if variant in text:
+            pos = text.find(variant)
+            if pos >= 0:
                return ScanResult(
                    severity="block",
-                    reason=(
-                        f"outbound request contains provisioned secret "
-                        f"from {key}"
-                    ),
+                    reason=f"provisioned secret from {key} found in {location}",
+                    location=location,
+                    context=_snippet(text, pos, pos + len(variant)),
                )
    return None

@@ -128,38 +163,69 @@ def _min_distance(
    return best


+def _closest_pair(
+    a_matches: list[re.Match[str]],
+    b_matches: list[re.Match[str]],
+) -> tuple[re.Match[str], re.Match[str]] | None:
+    """Return the pair (a, b) with the smallest character gap, or None."""
+    best: tuple[re.Match[str], re.Match[str]] | None = None
+    best_gap: int | None = None
+    for a in a_matches:
+        for b in b_matches:
+            gap = max(0, max(a.start(), b.start()) - min(a.end(), b.end()))
+            if best_gap is None or gap < best_gap:
+                best_gap = gap
+                best = (a, b)
+    return best
+
+
 def scan_naive_injection(text: str) -> ScanResult | None:
+    location = "response body"
    disclosure_hits = [m for p in DISCLOSURE_PHRASES for m in p.finditer(text)]
    jailbreak_hits = [m for p in JAILBREAK_PHRASES for m in p.finditer(text)]

    if disclosure_hits and jailbreak_hits:
-        dist = _min_distance(disclosure_hits, jailbreak_hits)
-        if dist is not None and dist <= PROXIMITY_CHARS:
-            return ScanResult(
-                severity="block",
-                reason=(
-                    f"disclosure and jailbreak phrases within "
-                    f"{dist} chars in response"
-                ),
-            )
+        pair = _closest_pair(disclosure_hits, jailbreak_hits)
+        if pair is not None:
+            dist = max(0, max(pair[0].start(), pair[1].start()) - min(pair[0].end(), pair[1].end()))
+            if dist <= PROXIMITY_CHARS:
+                first = pair[0] if pair[0].start() <= pair[1].start() else pair[1]
+                return ScanResult(
+                    severity="block",
+                    reason=(
+                        f"disclosure and jailbreak phrases within "
+                        f"{dist} chars in {location}"
+                    ),
+                    location=location,
+                    context=_snippet(text, first.start(), first.end()),
+                )

    if disclosure_hits:
+        m = disclosure_hits[0]
        return ScanResult(
            severity="warn",
-            reason="prompt disclosure phrase detected in response",
+            reason=f"prompt disclosure phrase detected in {location}",
+            location=location,
+            context=_snippet(text, m.start(), m.end()),
        )

    if jailbreak_hits:
+        m = jailbreak_hits[0]
        return ScanResult(
            severity="warn",
-            reason="jailbreak phrase detected in response",
+            reason=f"jailbreak phrase detected in {location}",
+            location=location,
+            context=_snippet(text, m.start(), m.end()),
        )

    return None


 __all__ = [
+    "REDACT",
+    "SNIPPET_CONTEXT",
    "TOKEN_PATTERNS",
+    "redact_tokens",
    "scan_known_secrets",
    "scan_naive_injection",
    "scan_token_patterns",