feat(egress): add location, context snippets, and token redaction to DLP logging

Each DLP block/warn now reports where the match was found (body, authorization header, response body) and includes a context snippet: SNIPPET_CONTEXT chars before and after the match, with the matched value replaced by REDACT ("********"). scan_token_patterns/scan_known_secrets/scan_naive_injection all gain `location` and `context` fields on their ScanResult returns. The outbound scanner takes `auth_header` as a separate kwarg so the two locations are scanned and reported independently. redact_tokens() is added to dlp_detectors and used in egress_addon.py to scrub token patterns and provisioned secrets from host/path fields before they appear in any log output (level 1 and 2). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-06 14:47:42 -04:00
parent 79212481c9
commit 86b0a4d285
4 changed files with 210 additions and 35 deletions
@@ -27,6 +27,11 @@ from egress_addon_core import (  # type: ignore[import-not-found]
    scan_outbound,
 )

+try:
+    from dlp_detectors import redact_tokens  # type: ignore[import-not-found]
+except ImportError:  # pragma: no cover - host-side path
+    from bot_bottle.dlp_detectors import redact_tokens  # type: ignore[import-not-found]
+

 DEFAULT_ROUTES_PATH = "/etc/egress/routes.yaml"

@@ -89,9 +94,9 @@ class EgressAddon:

    def _req_ctx(self, flow: http.HTTPFlow) -> dict[str, object]:
        return {
-            "host": flow.request.pretty_host,
+            "host": redact_tokens(flow.request.pretty_host, env=os.environ),
            "method": flow.request.method,
-            "path": flow.request.path,
+            "path": redact_tokens(flow.request.path, env=os.environ),
        }

    def _block(
@@ -115,9 +120,9 @@ class EgressAddon:
        sys.stderr.write(
            json.dumps({
                "event": "egress_request",
-                "host": flow.request.pretty_host,
+                "host": redact_tokens(flow.request.pretty_host, env=os.environ),
                "method": flow.request.method,
-                "path": flow.request.path,
+                "path": redact_tokens(flow.request.path, env=os.environ),
                "headers": dict(flow.request.headers),
                "body": flow.request.get_text(strict=False) or "",
            })
@@ -149,16 +154,12 @@ class EgressAddon:
        if route is not None:
            body = flow.request.get_text(strict=False) or ""
            auth_header = flow.request.headers.get("authorization", "")
-            scan_text = body
-            if auth_header:
-                scan_text = auth_header + "\n" + body
-            dlp_result = scan_outbound(route, scan_text, os.environ)
+            dlp_result = scan_outbound(route, body, os.environ, auth_header=auth_header)
            if dlp_result is not None and dlp_result.severity == "block":
-                self._block(
-                    flow,
-                    f"egress DLP: {dlp_result.reason}",
-                    ctx=self._req_ctx(flow),
-                )
+                ctx = self._req_ctx(flow)
+                if dlp_result.context:
+                    ctx = {**ctx, "context": dlp_result.context}
+                self._block(flow, f"egress DLP: {dlp_result.reason}", ctx=ctx)
                return

        # Strip inbound Authorization — agent cannot smuggle tokens.
@@ -211,7 +212,12 @@ class EgressAddon:
        result = scan_inbound(route, body)
        if result is None:
            return
-        resp_ctx = {**self._req_ctx(flow), "response_status": flow.response.status_code}
+        resp_ctx: dict[str, object] = {
+            **self._req_ctx(flow),
+            "response_status": flow.response.status_code,
+        }
+        if result.context:
+            resp_ctx = {**resp_ctx, "context": result.context}
        if result.severity == "block":
            self._block(flow, f"egress DLP: {result.reason}", ctx=resp_ctx)
        elif result.severity == "warn" and self.config.log >= LOG_BLOCKS: