diff --git a/README.md b/README.md index f2ad087..0a49a1b 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,7 @@ ## Features - **Per-bottle egress allowlist** — TLS-bumped HTTP/HTTPS chokepoint with a per-manifest host allowlist; per-route path/method/header `matches` filtering; outbound DLP scanning for known tokens and secrets, inbound DLP scanning for prompt-injection attempts; DoH and arbitrary hosts blocked by default. +- **Supervisor override for token blocks** — when the outbound DLP catches a token, the request is held and surfaced in `./cli.py supervise` instead of failing outright. The operator approves or rejects; an approved value is remembered for the life of the egress proxy so the request — and later ones carrying it — flow through. Fails closed on rejection or timeout. - **Tokens the agent never sees** — host secrets live in a sidecar; the agent dials `http://sidecar:9099/` and the proxy strips inbound `Authorization` and injects the real token before forwarding. `printenv` in the agent shows proxy URLs only. - **Gitleaks-scanned push (git-gate)** — `bottle.git` remotes route through a per-bottle `git daemon` that gitleaks-scans incoming refs pre-receive and forwards clean refs upstream over SSH. The agent never holds the upstream credential. - **Manifest-scoped skills + secrets** — each bottle declares its skills, env, git identity, remotes, and egress routes; unknown keys die at load. @@ -150,6 +151,8 @@ You help maintain Gitea-hosted projects. | `dlp.inbound_detectors` | no | `false` disables inbound scanning; list restricts to named detectors (`naive_injection_detection`). | | `git.fetch` | no | `true` permits smart HTTP clone/fetch (`git-upload-pack`) for this host. Push (`git-receive-pack`) remains blocked. | +When an outbound request is blocked because a DLP detector matched a token, the proxy queues an `egress-token-allow` proposal for the operator's `./cli.py supervise` TUI and holds the request open until it is answered (or `EGRESS_TOKEN_ALLOW_TIMEOUT_SECONDS`, default 300s, elapses — after which it fails closed). The operator never sees the raw token, only the host, method, path, and a redacted snippet. Approving adds the value to an in-memory safelist for the life of the egress proxy. Structural blocks (CRLF injection) and not-in-allowlist host blocks stay hard `403`s. + More examples in `examples/`. Full design lives under `docs/prds/`; the trust-boundary rationale is in `docs/prds/0011-per-file-md-manifest.md`. ## Trademarks diff --git a/bot_bottle/cli/supervise.py b/bot_bottle/cli/supervise.py index fb76958..9db0b93 100644 --- a/bot_bottle/cli/supervise.py +++ b/bot_bottle/cli/supervise.py @@ -54,6 +54,7 @@ from ..supervise import ( TOOL_ALLOW, TOOL_EGRESS_BLOCK, TOOL_GITLEAKS_ALLOW, + TOOL_EGRESS_TOKEN_ALLOW, archive_proposal, list_pending_proposals, render_diff, @@ -65,6 +66,11 @@ from ._common import PROG _REFRESH_INTERVAL_MS = 1000 +# Proposal tools whose payload is a read-only report, not a file the operator +# edits: modify is unavailable and approval requires a recorded reason for the +# audit trail. +_REPORT_ONLY_TOOLS: tuple[str, ...] = (TOOL_GITLEAKS_ALLOW, TOOL_EGRESS_TOKEN_ALLOW) + @dataclass(frozen=True) class QueuedProposal: @@ -141,7 +147,7 @@ def _suffix_for_tool(tool: str) -> str: return ".dockerfile" if tool in (TOOL_ALLOW, TOOL_EGRESS_BLOCK): return ".yaml" - if tool == TOOL_GITLEAKS_ALLOW: + if tool in (TOOL_GITLEAKS_ALLOW, TOOL_EGRESS_TOKEN_ALLOW): return ".txt" return ".txt" @@ -212,8 +218,8 @@ def _approve_from_tui( notes: str = "", ) -> str: """Approve from curses, prompting for any tool-specific audit note.""" - if qp.proposal.tool == TOOL_GITLEAKS_ALLOW and final_file is None: - notes = _prompt(stdscr, "allow reason (test fixture/false positive): ") + if qp.proposal.tool in _REPORT_ONLY_TOOLS and final_file is None: + notes = _prompt(stdscr, "allow reason (false positive / legitimately needed): ") if not notes: return "approve aborted (empty reason)" approve(qp, final_file=final_file, notes=notes) @@ -411,8 +417,8 @@ def _main_loop(stdscr: "curses._CursesWindow") -> None: # type: ignore except ApplyError as e: status_line = f"apply failed: {e}" elif key == ord("m"): - if qp.proposal.tool == TOOL_GITLEAKS_ALLOW: - status_line = "modify unavailable for gitleaks-allow" + if qp.proposal.tool in _REPORT_ONLY_TOOLS: + status_line = f"modify unavailable for {qp.proposal.tool}" continue edited = _modify(stdscr, qp) if edited is None: @@ -525,7 +531,7 @@ def _detail_view( pass return elif key == ord("m"): - if qp.proposal.tool == TOOL_GITLEAKS_ALLOW: + if qp.proposal.tool in _REPORT_ONLY_TOOLS: return edited = _modify(stdscr, qp) if edited is not None: diff --git a/bot_bottle/dlp_detectors.py b/bot_bottle/dlp_detectors.py index c2c038b..fec3df4 100644 --- a/bot_bottle/dlp_detectors.py +++ b/bot_bottle/dlp_detectors.py @@ -78,16 +78,27 @@ TOKEN_PATTERNS: tuple[tuple[str, re.Pattern[str]], ...] = ( ) -def scan_token_patterns(text: str, *, location: str = "body") -> ScanResult | None: +def scan_token_patterns( + text: str, + *, + location: str = "body", + safe_tokens: typing.AbstractSet[str] | None = None, +) -> ScanResult | None: normalized = _normalize_text(text) for name, pattern in TOKEN_PATTERNS: - m = pattern.search(normalized) - if m is not None: + for m in pattern.finditer(normalized): + value = m.group(0) + # A value the supervisor has approved (PRD 0062) is no longer a + # block — keep scanning so a second, un-approved token in the + # same request is still caught. + if safe_tokens is not None and value in safe_tokens: + continue return ScanResult( severity="block", reason=f"{name} found in {location}", location=location, - context=_snippet(text, m.start(), m.end()), + context=_snippet(normalized, m.start(), m.end()), + matched=value, ) return None @@ -155,6 +166,7 @@ def scan_known_secrets( *, location: str = "body", env: typing.Mapping[str, str] | None = None, + safe_tokens: typing.AbstractSet[str] | None = None, ) -> ScanResult | None: if env is None: return None @@ -164,11 +176,17 @@ def scan_known_secrets( for variant in _encoded_variants(value): pos = text.find(variant) if pos >= 0: + # The supervisor approves the exact encoded variant found + # (PRD 0062); a different encoding of the same secret is a + # fresh block. + if safe_tokens is not None and variant in safe_tokens: + continue return ScanResult( severity="block", reason=f"provisioned secret from {key} found in {location}", location=location, context=_snippet(text, pos, pos + len(variant)), + matched=variant, ) return None diff --git a/bot_bottle/egress_addon.py b/bot_bottle/egress_addon.py index c0df317..f075d60 100644 --- a/bot_bottle/egress_addon.py +++ b/bot_bottle/egress_addon.py @@ -5,6 +5,7 @@ egress container.""" from __future__ import annotations +import asyncio import json import os import signal @@ -17,8 +18,10 @@ from egress_addon_core import ( # type: ignore[import-not-found] # pylint: dis LOG_BLOCKS, LOG_FULL, Config, + ScanResult, build_inbound_scan_text, build_outbound_scan_text, + build_token_allow_payload, decide, decide_git_fetch, is_git_fetch_request, @@ -36,19 +39,48 @@ try: except ImportError: # pragma: no cover - host-side path from bot_bottle.dlp_detectors import redact_tokens # type: ignore[import-not-found] +try: + import supervise as _sv # type: ignore[import-not-found] +except ImportError: # pragma: no cover - host-side path + from bot_bottle import supervise as _sv # type: ignore[import-not-found] + DEFAULT_ROUTES_PATH = "/etc/egress/routes.yaml" INTROSPECT_HOST = "_egress.local" +# Seconds the egress proxy holds a token-blocked request open waiting for the +# operator's supervisor decision (PRD 0062), overridable via env. +DEFAULT_TOKEN_ALLOW_TIMEOUT_SECONDS = 300.0 +# Filesystem poll cadence while awaiting the operator's response. +TOKEN_ALLOW_POLL_INTERVAL_SECONDS = 0.5 + +# Fixed operator guidance attached to every token-allow proposal. +_TOKEN_ALLOW_JUSTIFICATION = ( + "egress DLP blocked an outbound request carrying a detected token. " + "Approve only if this value is a false positive or a credential this " + "request legitimately needs; the value is then allowed for the life of " + "this bottle's egress proxy." +) + class EgressAddon: def __init__(self) -> None: self.routes_path = os.environ.get("EGRESS_ROUTES", DEFAULT_ROUTES_PATH) self.config: Config = Config(routes=()) + # Tokens the operator has approved this session (PRD 0062). In-memory + # only — a restart re-prompts. Mutated only from the asyncio loop that + # runs the addon hooks, so no lock is needed. + self.safe_tokens: set[str] = set() + self._supervise_queue_dir = os.environ.get("SUPERVISE_QUEUE_DIR", "").strip() + self._supervise_slug = os.environ.get("SUPERVISE_BOTTLE_SLUG", "").strip() + self._token_allow_timeout = _token_allow_timeout_from_env(os.environ) self._reload(initial=True) self._install_sighup() + def _supervise_available(self) -> bool: + return bool(self._supervise_queue_dir and self._supervise_slug) + def _reload(self, *, initial: bool = False) -> None: try: text = Path(self.routes_path).read_text(encoding="utf-8") @@ -145,7 +177,7 @@ class EgressAddon: + "\n" ) - def request(self, flow: http.HTTPFlow) -> None: + async def request(self, flow: http.HTTPFlow) -> None: request_path, _, query = flow.request.path.partition("?") if flow.request.pretty_host == INTROSPECT_HOST: @@ -158,15 +190,31 @@ class EgressAddon: route = match_route(self.config.routes, flow.request.pretty_host) if route is not None: body = flow.request.get_text(strict=False) or "" - scan_text = build_outbound_scan_text( - flow.request.pretty_host, - request_path, - query, - outbound_scan_headers(route, dict(flow.request.headers)), - body, - ) - dlp_result = scan_outbound(route, scan_text, os.environ) - if dlp_result is not None and dlp_result.severity == "block": + # Re-scan after each operator approval so a second, un-approved + # token in the same request is still caught (PRD 0062). + while True: + scan_text = build_outbound_scan_text( + flow.request.pretty_host, + request_path, + query, + outbound_scan_headers(route, dict(flow.request.headers)), + body, + ) + dlp_result = scan_outbound( + route, scan_text, os.environ, safe_tokens=self.safe_tokens, + ) + if dlp_result is None or dlp_result.severity != "block": + break + # Token blocks (a match with a safelist-able value) can be + # routed to the operator; structural blocks (CRLF, matched="") + # and any block when supervise is disabled stay hard 403s. + if dlp_result.matched and self._supervise_available(): + approved = await self._supervise_token_block( + flow, request_path, dlp_result, + ) + if approved: + continue # re-scan; matched value now in safe_tokens + return # _supervise_token_block wrote the 403 response ctx = self._req_ctx(flow) if dlp_result.context: ctx = {**ctx, "context": dlp_result.context} @@ -221,6 +269,95 @@ class EgressAddon: if self.config.log >= LOG_FULL: self._log_request(flow) + async def _supervise_token_block( + self, + flow: http.HTTPFlow, + request_path: str, + result: ScanResult, + ) -> bool: + """Route a token DLP block to the operator's supervisor queue and wait. + + Returns True if the operator approved (the matched value is added to + `self.safe_tokens` and the caller re-scans); False if the request must + be blocked (a 403 response has been written to `flow`).""" + host = flow.request.pretty_host + payload = build_token_allow_payload( + redact_tokens(host, env=os.environ), + flow.request.method, + redact_tokens(request_path, env=os.environ), + result, + ) + proposal = _sv.Proposal.new( + bottle_slug=self._supervise_slug, + tool=_sv.TOOL_EGRESS_TOKEN_ALLOW, + proposed_file=payload, + justification=_TOKEN_ALLOW_JUSTIFICATION, + current_file_hash=_sv.sha256_hex(payload), + ) + queue_dir = Path(self._supervise_queue_dir) + try: + _sv.write_proposal(queue_dir, proposal) + except OSError as e: + sys.stderr.write( + f"egress: could not queue token-allow proposal: {e}; " + "blocking request\n" + ) + self._block(flow, f"egress DLP: {result.reason}", ctx=self._req_ctx(flow)) + return False + + sys.stderr.write(json.dumps({ + "event": "egress_token_supervise", + "reason": f"egress DLP: {result.reason}", + "proposal": proposal.id, + **self._req_ctx(flow), + }) + "\n") + + response = await self._await_token_response(queue_dir, proposal.id) + _sv.archive_proposal(queue_dir, proposal.id) + + if response is not None and response.status in ( + _sv.STATUS_APPROVED, _sv.STATUS_MODIFIED, + ): + self.safe_tokens.add(result.matched) + if self.config.log >= LOG_BLOCKS: + sys.stderr.write(json.dumps({ + "event": "egress_token_allowed", + "reason": f"egress DLP: {result.reason}", + "proposal": proposal.id, + **self._req_ctx(flow), + }) + "\n") + return True + + if response is None: + reason = ( + f"egress DLP: {result.reason}; supervisor approval timed out " + f"after {self._token_allow_timeout:g}s" + ) + else: + reason = f"egress DLP: {result.reason}; supervisor rejected the request" + self._block(flow, reason, ctx=self._req_ctx(flow)) + return False + + async def _await_token_response( + self, + queue_dir: Path, + proposal_id: str, + ) -> "_sv.Response | None": + """Poll the queue dir for the operator's response without blocking the + proxy event loop. Returns the Response, or None on timeout.""" + loop = asyncio.get_running_loop() + deadline = loop.time() + self._token_allow_timeout + while True: + try: + return _sv.read_response(queue_dir, proposal_id) + except (OSError, ValueError, KeyError): + # Not written yet, or a partial/malformed write — retry until + # the deadline, then fail closed. + pass + if loop.time() >= deadline: + return None + await asyncio.sleep(TOKEN_ALLOW_POLL_INTERVAL_SECONDS) + def response(self, flow: http.HTTPFlow) -> None: """DLP inbound scan on response headers and body.""" route = match_route(self.config.routes, flow.request.pretty_host) @@ -272,7 +409,9 @@ class EgressAddon: message = flow.websocket.messages[-1] # type: ignore[union-attr] content = message.content.decode("utf-8", errors="replace") if message.from_client: - result = scan_outbound(route, content, os.environ) + result = scan_outbound( + route, content, os.environ, safe_tokens=self.safe_tokens, + ) if result is not None and result.severity == "block": sys.stderr.write(f"egress DLP: {result.reason}\n") flow.kill() # type: ignore[union-attr] @@ -286,4 +425,23 @@ class EgressAddon: sys.stderr.write(f"egress DLP warn: {result.reason}\n") +def _token_allow_timeout_from_env(env: "os._Environ[str]") -> float: + """Read EGRESS_TOKEN_ALLOW_TIMEOUT_SECONDS; fall back to the default on an + unset or invalid value (a bad value should not wedge egress at boot).""" + raw = env.get("EGRESS_TOKEN_ALLOW_TIMEOUT_SECONDS", "").strip() + if not raw: + return DEFAULT_TOKEN_ALLOW_TIMEOUT_SECONDS + try: + value = float(raw) + except ValueError: + value = 0.0 + if value <= 0: + sys.stderr.write( + "egress: invalid EGRESS_TOKEN_ALLOW_TIMEOUT_SECONDS=" + f"{raw!r}; using default {DEFAULT_TOKEN_ALLOW_TIMEOUT_SECONDS:g}s\n" + ) + return DEFAULT_TOKEN_ALLOW_TIMEOUT_SECONDS + return value + + addons = [EgressAddon()] diff --git a/bot_bottle/egress_addon_core.py b/bot_bottle/egress_addon_core.py index 595baeb..f24ce3e 100644 --- a/bot_bottle/egress_addon_core.py +++ b/bot_bottle/egress_addon_core.py @@ -95,6 +95,11 @@ class ScanResult: reason: str location: str = "" # where the match was found, e.g. "body", "authorization header" context: str = "" # surrounding text with the match replaced by REDACT + # Raw substring the detector matched. Used inside the sidecar to key the + # supervisor-approved "safe tokens" set (PRD 0062); never logged or written + # to a proposal file. Empty for structural detectors (CRLF) that carry no + # safelist-able value. + matched: str = "" # --------------------------------------------------------------------------- @@ -690,6 +695,8 @@ def scan_outbound( route: Route, body: str | bytes, environ: typing.Mapping[str, str], + *, + safe_tokens: typing.AbstractSet[str] | None = None, ) -> ScanResult | None: # Lazy import to avoid circular deps and keep dlp_detectors optional # at import time (the sidecar copies it flat alongside this file). @@ -709,24 +716,47 @@ def scan_outbound( text = body if isinstance(body, str) else body.decode("utf-8", errors="replace") # CRLF injection is never legitimate — runs unconditionally, not gated - # by outbound_detectors config. + # by outbound_detectors config, and never override-able by safe_tokens. result = scan_crlf_injection(text) if result is not None: return result if _detector_enabled(route.outbound_detectors, "token_patterns"): - result = scan_token_patterns(text, location="body") + result = scan_token_patterns(text, location="body", safe_tokens=safe_tokens) if result is not None: return result if _detector_enabled(route.outbound_detectors, "known_secrets"): - result = scan_known_secrets(text, location="body", env=environ) + result = scan_known_secrets( + text, location="body", env=environ, safe_tokens=safe_tokens, + ) if result is not None: return result return None +def build_token_allow_payload( + host: str, + method: str, + path: str, + result: ScanResult, +) -> str: + """Render the human-readable supervisor proposal body for an outbound + token block (PRD 0062). Carries the host/method/path, the detector + reason, and the redacted context snippet — never the raw token value.""" + lines = [ + "egress blocked an outbound request carrying a detected token", + f"host: {host}", + f"method: {method}", + f"path: {path}", + f"detector: {result.reason}", + ] + if result.context: + lines.append(f"context: {result.context}") + return "\n".join(lines) + "\n" + + def scan_inbound( route: Route, body: str | bytes, @@ -760,6 +790,7 @@ __all__ = [ "ScanResult", "build_inbound_scan_text", "build_outbound_scan_text", + "build_token_allow_payload", "decide", "decide_git_fetch", "evaluate_matches", diff --git a/bot_bottle/supervise.py b/bot_bottle/supervise.py index b27ab5e..7b1d56d 100644 --- a/bot_bottle/supervise.py +++ b/bot_bottle/supervise.py @@ -52,12 +52,16 @@ TOOL_CAPABILITY_BLOCK = "capability-block" TOOL_EGRESS_BLOCK = "egress-block" TOOL_ALLOW = "allow" TOOL_GITLEAKS_ALLOW = "gitleaks-allow" +# Written directly by the egress addon (not an agent-facing MCP tool) when an +# outbound DLP token block is routed to the operator for override (PRD 0062). +TOOL_EGRESS_TOKEN_ALLOW = "egress-token-allow" TOOL_LIST_EGRESS_ROUTES = "list-egress-routes" TOOLS: tuple[str, ...] = ( TOOL_ALLOW, TOOL_CAPABILITY_BLOCK, TOOL_EGRESS_BLOCK, TOOL_GITLEAKS_ALLOW, + TOOL_EGRESS_TOKEN_ALLOW, TOOL_LIST_EGRESS_ROUTES, ) @@ -556,6 +560,7 @@ __all__ = [ "EGRESS_INTROSPECT_URL", "TOOL_CAPABILITY_BLOCK", "TOOL_GITLEAKS_ALLOW", + "TOOL_EGRESS_TOKEN_ALLOW", "TOOL_LIST_EGRESS_ROUTES", "archive_proposal", "audit_dir", diff --git a/docs/prds/0062-egress-supervisor-token-override.md b/docs/prds/0062-egress-supervisor-token-override.md new file mode 100644 index 0000000..db6c1de --- /dev/null +++ b/docs/prds/0062-egress-supervisor-token-override.md @@ -0,0 +1,140 @@ +# PRD 0062: Supervisor override for egress token blocks + +- **Status:** Active +- **Author:** claude +- **Created:** 2026-06-24 +- **Issue:** #261 + +## Summary + +When the egress proxy blocks an outbound request because a DLP detector +matched a token/secret, route that block through the existing supervisor +approval queue instead of returning `403` immediately. The proxy holds the +request open until the operator approves or rejects it. On approval, the +matched token is added to an in-memory "safe tokens" set so the request — and +any later request carrying the same token — flows through without re-prompting. + +## Problem + +The outbound DLP detectors (`token_patterns`, `known_secrets`) are +deliberately aggressive: any string that looks like a credential is blocked +before it leaves the bottle. That is the right default, but it produces false +positives — a token-shaped value that is not actually a secret, or a credential +the agent legitimately needs to send to a declared host. Today the only +recovery is for the operator to notice the `egress DLP` 403 in the logs and +hand-edit the route's `dlp.outbound_detectors`, which disables the detector for +the whole route rather than allowing the one value. + +The operator has no in-the-loop signal that a token block happened and no +fine-grained way to say "this specific value is fine." + +## Goals / Success Criteria + +1. An outbound DLP **token** block (a `ScanResult` carrying a matched secret + value) creates a supervisor proposal instead of an immediate `403`. +2. The egress proxy holds the blocked request open, polling for the operator's + response up to a bounded timeout. +3. The proposal shows the operator the host, method, path, the detector reason, + and a **redacted** context snippet — never the raw token value. +4. On `approved`/`modified`, the matched token value is added to an in-memory + safe-tokens set and the request proceeds normally; later requests carrying + the same value skip the block. +5. On `rejected`, timeout, malformed response, or missing supervisor wiring, + the request fails closed with the same `403` as today. +6. Structural blocks that carry no token value (CRLF injection) and the + route-not-allowlisted / git blocks are unchanged — they stay hard `403`s and + keep their existing agent-driven `allow` / `egress-block` MCP path. +7. The proxy event loop is not stalled while waiting: the wait is asynchronous, + so other flows keep being served. + +## Non-goals + +- Persisting the safe-tokens set across egress restarts. It lives in process + memory only; a restart re-prompts. (The issue explicitly defers persistence.) +- Supervising inbound (prompt-injection) blocks or WebSocket frame blocks. + WebSocket frames still honour the safe-tokens set for already-approved values + but cannot wait for approval (there is no response surface after upgrade). +- Generalising an approved secret across encodings. The safe-tokens set matches + the exact value the detector found. +- Replacing the per-route `dlp.outbound_detectors` override. That remains the + way to turn a detector off wholesale. + +## Design + +### Detected-value plumbing + +`ScanResult` gains a `matched: str = ""` field carrying the raw substring the +detector matched. The token detectors (`scan_token_patterns`, +`scan_known_secrets`) populate it; the structural CRLF detector leaves it +empty. The value stays inside the egress sidecar process — it is never written +to a log line (logs already use the redacted `context`) nor to the proposal +file. + +`scan_outbound` (and the token detectors it calls) accept a `safe_tokens` +set. A match whose value is in `safe_tokens` is skipped, so an approved token +no longer blocks. The scanners keep searching past a safelisted match so a +second, un-approved secret in the same request is still caught. + +### Supervisor proposal + +A new proposal tool constant `egress-token-allow` is added to +`supervise.TOOLS`. The egress addon writes the proposal directly to +`SUPERVISE_QUEUE_DIR` (the queue is bind-mounted into the sidecar bundle and +shared by every daemon, exactly as git-gate's `gitleaks-allow` proposal in PRD +0061 does). The proposal's `proposed_file` is a human-readable text payload: + +``` +egress blocked an outbound request carrying a detected token +host: api.example.com +method: POST +path: /v1/ingest +detector: OpenAI API key found in body +context: ...before ******** after... +``` + +The justification tells the operator to approve only if the value is a false +positive or a credential the request legitimately needs. + +The addon then polls `.response.json` for +`EGRESS_TOKEN_ALLOW_TIMEOUT_SECONDS` (default 300). `approved`/`modified` +allow the request and add the value to the safe-tokens set; `rejected`, +malformed responses, and timeout fail the request closed. The proposal + +response are archived to `processed/` after a decision. + +Because the wait happens inside mitmproxy's asyncio loop, the addon's +`request` hook is async and polls with `asyncio.sleep`, so concurrent flows +are unaffected. + +### Supervisor UI + +`cli/supervise.py` renders `egress-token-allow` like `gitleaks-allow`: the +text payload is shown, modify is unavailable (there is no file patch to edit), +and approval prompts for a non-empty reason that is recorded in the response +notes. There is no on-disk config diff, so — like `gitleaks-allow` and +`capability-block` — it writes no egress audit-log entry. + +### Failure handling + +If `SUPERVISE_QUEUE_DIR` / `SUPERVISE_BOTTLE_SLUG` are unset (supervise +disabled for the bottle), the addon skips the queue and returns the existing +`403`. Any error writing the proposal or reading the response also fails +closed. + +## Implementation chunks + +1. **Core** — `ScanResult.matched`; thread `safe_tokens` through + `scan_outbound` / token detectors; `build_token_allow_payload`. +2. **Supervise + TUI** — `TOOL_EGRESS_TOKEN_ALLOW`; TUI suffix, modify guard, + required approval reason. +3. **Addon glue** — async `request`, safe-tokens set, proposal write + async + poll, allow/block decision; pass `safe_tokens` into the WebSocket path. +4. **Tests + docs** — core/supervise/TUI unit tests; README egress + supervisor + notes. + +## Open questions + +- Should `known_secrets` (provisioned `EGRESS_TOKEN_*` exfiltration) be + override-able at all, or only `token_patterns`? This PRD allows both — + approval is an explicit operator decision and the safe-tokens set matches the + exact found value — but a future revision could restrict `known_secrets` to + reject-only. diff --git a/tests/unit/test_dlp_detectors.py b/tests/unit/test_dlp_detectors.py index 03ddae6..d723fb1 100644 --- a/tests/unit/test_dlp_detectors.py +++ b/tests/unit/test_dlp_detectors.py @@ -445,5 +445,47 @@ class TestKnownSecretsNewVariants(unittest.TestCase): self.assertIsNotNone(result) +class TestMatchedAndSafeTokens(unittest.TestCase): + """PRD 0062: detectors carry the raw matched value, and a safelisted + value is skipped so the supervisor can approve a specific token.""" + + def test_token_pattern_sets_matched(self): + token = "ghp_" + "A" * 36 + result = scan_token_patterns(f"token: {token}") + assert result is not None + self.assertEqual(token, result.matched) + + def test_safe_token_is_skipped(self): + token = "ghp_" + "A" * 36 + self.assertIsNone( + scan_token_patterns(f"token: {token}", safe_tokens={token}) + ) + + def test_safe_token_does_not_mask_other_token(self): + safe = "ghp_" + "A" * 36 + other = "AKIAIOSFODNN7EXAMPLE" + result = scan_token_patterns( + f"a={safe} b={other}", safe_tokens={safe}, + ) + assert result is not None + self.assertEqual(other, result.matched) + self.assertIn("AWS", result.reason) + + def test_known_secret_sets_matched_and_safelist_skips(self): + secret = "supersecretvalue123" + env = {"EGRESS_TOKEN_FOO": secret} + result = scan_known_secrets(f"x={secret}", env=env) + assert result is not None + self.assertEqual(secret, result.matched) + self.assertIsNone( + scan_known_secrets(f"x={secret}", env=env, safe_tokens={secret}) + ) + + def test_crlf_block_has_no_matched_value(self): + result = scan_crlf_injection("path%0d%0aHost: evil") + assert result is not None + self.assertEqual("", result.matched) + + if __name__ == "__main__": unittest.main() diff --git a/tests/unit/test_egress_addon_core.py b/tests/unit/test_egress_addon_core.py index 09d79e3..06e932a 100644 --- a/tests/unit/test_egress_addon_core.py +++ b/tests/unit/test_egress_addon_core.py @@ -22,8 +22,10 @@ from bot_bottle.egress_addon_core import ( MatchEntry, PathMatch, Route, + ScanResult, build_inbound_scan_text, build_outbound_scan_text, + build_token_allow_payload, decide, decide_git_fetch, evaluate_matches, @@ -1167,5 +1169,55 @@ class TestScanInbound(unittest.TestCase): self.assertEqual("block", result.severity) +class TestScanOutboundSafeTokens(unittest.TestCase): + """PRD 0062: scan_outbound threads the supervisor-approved safe-tokens + set into the token detectors.""" + + def test_safe_token_allows_request(self): + text = build_outbound_scan_text( + host="api.example.com", path="/v1/data", query="", + headers={}, body=f"key={_AWS_KEY}", + ) + self.assertIsNone( + scan_outbound(_ROUTE, text, {}, safe_tokens={_AWS_KEY}) + ) + + def test_unrelated_safe_token_still_blocks(self): + text = build_outbound_scan_text( + host="api.example.com", path="/v1/data", query="", + headers={}, body=f"key={_AWS_KEY}", + ) + result = scan_outbound(_ROUTE, text, {}, safe_tokens={"ghp_" + "A" * 36}) + self.assertIsNotNone(result) + assert result is not None + self.assertEqual(_AWS_KEY, result.matched) + + +class TestBuildTokenAllowPayload(unittest.TestCase): + def test_payload_includes_context_and_no_raw_token(self): + result = ScanResult( + severity="block", + reason="AWS access key found in body", + location="body", + context="key=******** tail", + matched=_AWS_KEY, + ) + payload = build_token_allow_payload( + "api.example.com", "POST", "/v1/ingest", result, + ) + self.assertIn("host: api.example.com", payload) + self.assertIn("method: POST", payload) + self.assertIn("path: /v1/ingest", payload) + self.assertIn("AWS access key found in body", payload) + self.assertIn("key=******** tail", payload) + # The raw matched value must never appear in the proposal file. + self.assertNotIn(_AWS_KEY, payload) + + def test_payload_omits_context_line_when_empty(self): + result = ScanResult(severity="block", reason="r", matched="x") + payload = build_token_allow_payload("h", "GET", "/", result) + self.assertNotIn("context:", payload) + + if __name__ == "__main__": unittest.main() diff --git a/tests/unit/test_supervise.py b/tests/unit/test_supervise.py index 87ded1b..ed30ef2 100644 --- a/tests/unit/test_supervise.py +++ b/tests/unit/test_supervise.py @@ -322,11 +322,22 @@ class TestToolConstants(unittest.TestCase): TOOL_CAPABILITY_BLOCK, supervise.TOOL_EGRESS_BLOCK, TOOL_GITLEAKS_ALLOW, + supervise.TOOL_EGRESS_TOKEN_ALLOW, supervise.TOOL_LIST_EGRESS_ROUTES, ), supervise.TOOLS, ) + def test_token_allow_proposal_roundtrips(self): + p = Proposal.new( + bottle_slug="dev", + tool=supervise.TOOL_EGRESS_TOKEN_ALLOW, + proposed_file="host: api.example.com\n", + justification="false positive", + current_file_hash="h", + ) + self.assertEqual(p, Proposal.from_dict(p.to_dict())) + def test_component_map_has_egress_entries(self): self.assertEqual( { diff --git a/tests/unit/test_supervise_cli.py b/tests/unit/test_supervise_cli.py index 8b9f354..63a7b47 100644 --- a/tests/unit/test_supervise_cli.py +++ b/tests/unit/test_supervise_cli.py @@ -20,6 +20,7 @@ from bot_bottle.supervise import ( STATUS_REJECTED, TOOL_CAPABILITY_BLOCK, TOOL_GITLEAKS_ALLOW, + TOOL_EGRESS_TOKEN_ALLOW, read_audit_entries, read_response, sha256_hex, @@ -35,6 +36,7 @@ def _proposal(slug: str = "dev", tool: str = TOOL_CAPABILITY_BLOCK) -> Proposal: supervise.TOOL_ALLOW: "routes:\n - host: example.com\n", supervise.TOOL_EGRESS_BLOCK: "routes:\n - host: example.com\n", TOOL_GITLEAKS_ALLOW: "file: tests/test_fixture.py\nline: 3\n", + TOOL_EGRESS_TOKEN_ALLOW: "host: api.example.com\ndetector: token\n", } payload = payloads.get(tool, "") return Proposal.new( @@ -196,6 +198,39 @@ class TestApproveReject(_FakeHomeMixin, unittest.TestCase): resp = read_response(qp.queue_dir, qp.proposal.id) self.assertEqual("test fixture", resp.notes) + def test_approve_token_allow_leaves_response_for_egress(self): + qp = self._enqueue(tool=TOOL_EGRESS_TOKEN_ALLOW) + supervise_cli.approve(qp, notes="false positive") + # The egress addon polls the queue dir for the response; the TUI must + # not archive it (the addon archives after reading). + resp = read_response(qp.queue_dir, qp.proposal.id) + self.assertEqual(STATUS_APPROVED, resp.status) + self.assertEqual("false positive", resp.notes) + self.assertFalse((qp.queue_dir / "processed").exists()) + + def test_token_allow_writes_no_audit_log(self): + qp = self._enqueue(tool=TOOL_EGRESS_TOKEN_ALLOW) + supervise_cli.approve(qp, notes="false positive") + self.assertEqual([], read_audit_entries("egress", "dev")) + + def test_tui_token_allow_requires_reason(self): + qp = self._enqueue(tool=TOOL_EGRESS_TOKEN_ALLOW) + with patch.object(supervise_cli, "_prompt", return_value=""): + status = supervise_cli._approve_from_tui(None, qp) # type: ignore[arg-type] + self.assertEqual("approve aborted (empty reason)", status) + self.assertFalse((qp.queue_dir / "processed").exists()) + + def test_tui_token_allow_writes_reason(self): + qp = self._enqueue(tool=TOOL_EGRESS_TOKEN_ALLOW) + with patch.object(supervise_cli, "_prompt", return_value="legit"): + status = supervise_cli._approve_from_tui(None, qp) # type: ignore[arg-type] + self.assertIn("approved egress-token-allow", status) + resp = read_response(qp.queue_dir, qp.proposal.id) + self.assertEqual("legit", resp.notes) + + def test_suffix_for_token_allow_is_txt(self): + self.assertEqual(".txt", supervise_cli._suffix_for_tool(TOOL_EGRESS_TOKEN_ALLOW)) + # class TestCapabilityApplyWiring(_FakeHomeMixin, unittest.TestCase): # # DISABLED — capability_apply functionality is currently commented out.