PRD 0062: supervisor override for egress token blocks

When the outbound DLP catches a token, route the block through the existing supervisor approval queue instead of returning 403 outright. The egress proxy holds the request open until the operator answers, then remembers an approved value for the life of the proxy so the request -- and later ones carrying it -- flow through. Fails closed on rejection, timeout, malformed response, or when supervise is disabled. - ScanResult.matched carries the raw matched substring (sidecar-only; never logged or written to the proposal). scan_outbound and the token detectors take a safe_tokens set and skip approved values, continuing past a safelisted match so a second secret in the same request is still caught. - New egress-token-allow proposal tool, written directly to the queue by the addon (the gitleaks-allow pattern from PRD 0061). build_token_allow _payload renders host/method/path/detector reason + redacted context. - Async request hook polls the queue without stalling the proxy event loop; EGRESS_TOKEN_ALLOW_TIMEOUT_SECONDS (default 300) bounds the wait. - Supervisor TUI renders egress-token-allow like gitleaks-allow: report only, modify unavailable, approval requires a recorded reason. - Unit tests for the matched/safe-tokens plumbing, payload builder, tool constant round-trip, and TUI paths; README + PRD 0062. Closes #261. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> Claude-Session: https://claude.ai/code/session_01HnvBjPZC5V7qeQpFbQdDmS
2026-06-24 16:12:50 -04:00
parent 7cb967770e
commit 7f2352287e
11 changed files with 525 additions and 24 deletions
@@ -95,6 +95,11 @@ class ScanResult:
    reason: str
    location: str = ""  # where the match was found, e.g. "body", "authorization header"
    context: str = ""   # surrounding text with the match replaced by REDACT
+    # Raw substring the detector matched. Used inside the sidecar to key the
+    # supervisor-approved "safe tokens" set (PRD 0062); never logged or written
+    # to a proposal file. Empty for structural detectors (CRLF) that carry no
+    # safelist-able value.
+    matched: str = ""


 # ---------------------------------------------------------------------------
@@ -690,6 +695,8 @@ def scan_outbound(
    route: Route,
    body: str | bytes,
    environ: typing.Mapping[str, str],
+    *,
+    safe_tokens: typing.AbstractSet[str] | None = None,
 ) -> ScanResult | None:
    # Lazy import to avoid circular deps and keep dlp_detectors optional
    # at import time (the sidecar copies it flat alongside this file).
@@ -709,24 +716,47 @@ def scan_outbound(
    text = body if isinstance(body, str) else body.decode("utf-8", errors="replace")

    # CRLF injection is never legitimate — runs unconditionally, not gated
-    # by outbound_detectors config.
+    # by outbound_detectors config, and never override-able by safe_tokens.
    result = scan_crlf_injection(text)
    if result is not None:
        return result

    if _detector_enabled(route.outbound_detectors, "token_patterns"):
-        result = scan_token_patterns(text, location="body")
+        result = scan_token_patterns(text, location="body", safe_tokens=safe_tokens)
        if result is not None:
            return result

    if _detector_enabled(route.outbound_detectors, "known_secrets"):
-        result = scan_known_secrets(text, location="body", env=environ)
+        result = scan_known_secrets(
+            text, location="body", env=environ, safe_tokens=safe_tokens,
+        )
        if result is not None:
            return result

    return None


+def build_token_allow_payload(
+    host: str,
+    method: str,
+    path: str,
+    result: ScanResult,
+) -> str:
+    """Render the human-readable supervisor proposal body for an outbound
+    token block (PRD 0062). Carries the host/method/path, the detector
+    reason, and the redacted context snippet — never the raw token value."""
+    lines = [
+        "egress blocked an outbound request carrying a detected token",
+        f"host: {host}",
+        f"method: {method}",
+        f"path: {path}",
+        f"detector: {result.reason}",
+    ]
+    if result.context:
+        lines.append(f"context: {result.context}")
+    return "\n".join(lines) + "\n"
+
+
 def scan_inbound(
    route: Route,
    body: str | bytes,
@@ -760,6 +790,7 @@ __all__ = [
    "ScanResult",
    "build_inbound_scan_text",
    "build_outbound_scan_text",
+    "build_token_allow_payload",
    "decide",
    "decide_git_fetch",
    "evaluate_matches",