From 1ad710a04156be1b862782e10593a4ce6de726ee Mon Sep 17 00:00:00 2001 From: didericis Date: Wed, 24 Jun 2026 20:40:36 -0400 Subject: [PATCH] Default agent-provider routes to the redact on-match policy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Provider routes (the agent talking to its own LLM API — api.anthropic.com, the Codex backend, etc.) carry the whole conversation payload, which is the worst source of token-shaped false positives. egress_routes_for_bottle now fills outbound_on_match=redact on any provider route that doesn't set it explicitly, so a match there is scrubbed and forwarded rather than blocked or queued for the operator. A provider that sets the policy keeps its choice; manifest routes still default to supervise. Tests: provider route gets redact default, explicit provider policy preserved, manifest route unaffected. README + PRD 0062 updated. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01HnvBjPZC5V7qeQpFbQdDmS --- README.md | 2 +- bot_bottle/egress.py | 18 +++++++++++++++++- .../0062-egress-supervisor-token-override.md | 9 +++++++++ tests/unit/test_egress.py | 17 +++++++++++++++++ 4 files changed, 44 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 2493b40..63dcafe 100644 --- a/README.md +++ b/README.md @@ -149,7 +149,7 @@ You help maintain Gitea-hosted projects. | `dlp` | no | Per-route DLP overrides. Omit to use defaults (all detectors on). | | `dlp.outbound_detectors` | no | `false` disables outbound scanning; list restricts to named detectors (`token_patterns`, `known_secrets`). | | `dlp.inbound_detectors` | no | `false` disables inbound scanning; list restricts to named detectors (`naive_injection_detection`). | -| `dlp.outbound_on_match` | no | What to do when an outbound token is detected: `supervise` (default — hold for operator approval), `redact` (scrub the value and forward), or `block` (hard 403). | +| `dlp.outbound_on_match` | no | What to do when an outbound token is detected: `supervise` (default for manifest routes — hold for operator approval), `redact` (scrub the value and forward), or `block` (hard 403). Agent-provider routes (e.g. `api.anthropic.com`) default to `redact`. | | `git.fetch` | no | `true` permits smart HTTP clone/fetch (`git-upload-pack`) for this host. Push (`git-receive-pack`) remains blocked. | When an outbound DLP detector matches a token, the route's `dlp.outbound_on_match` policy decides what happens. Under the default `supervise`, the proxy queues an `egress-token-allow` proposal for the operator's `./cli.py supervise` TUI and holds the request open until it is answered (or `EGRESS_TOKEN_ALLOW_TIMEOUT_SECONDS`, default 300s, elapses — after which it fails closed). The operator never sees the raw token, only the host, method, path, and a redacted snippet; approving adds the value to an in-memory safelist for the life of the egress proxy. Under `redact`, the matched value is scrubbed from the body, headers, and path and the request is forwarded (failing closed if a match lands somewhere unredactable, like the hostname). Under `block` it stays a hard `403`. Structural blocks (CRLF injection) and not-in-allowlist host blocks are always hard `403`s regardless of policy. diff --git a/bot_bottle/egress.py b/bot_bottle/egress.py index 01785e4..8943049 100644 --- a/bot_bottle/egress.py +++ b/bot_bottle/egress.py @@ -16,6 +16,7 @@ from pathlib import Path from typing import TYPE_CHECKING from .egress_addon_core import ( + ON_MATCH_REDACT, HeaderMatch as CoreHeaderMatch, MatchEntry as CoreMatchEntry, PathMatch as CorePathMatch, @@ -106,12 +107,27 @@ def egress_routes_for_bottle( ) -> tuple[EgressRoute, ...]: manifest = egress_manifest_routes(bottle) provisioned_hosts = {pr.host.lower() for pr in provider_routes} - merged = list(provider_routes) + [ + merged = list(_default_provider_on_match(provider_routes)) + [ r for r in manifest if r.host.lower() not in provisioned_hosts ] return _assign_token_slots(merged) +def _default_provider_on_match( + provider_routes: tuple[EgressRoute, ...], +) -> tuple[EgressRoute, ...]: + """Provider routes (the agent talking to its own LLM API) default to the + `redact` on-match policy (PRD 0062): high-volume conversation payloads are + the worst source of token-shaped false positives, so a match is scrubbed + and forwarded rather than hard-blocked or queued for the operator. A + provider that sets `outbound_on_match` explicitly keeps its choice.""" + return tuple( + r if r.outbound_on_match + else dataclasses.replace(r, outbound_on_match=ON_MATCH_REDACT) + for r in provider_routes + ) + + def _assign_token_slots( routes: list[EgressRoute], ) -> tuple[EgressRoute, ...]: diff --git a/docs/prds/0062-egress-supervisor-token-override.md b/docs/prds/0062-egress-supervisor-token-override.md index 23d5d5a..6844d47 100644 --- a/docs/prds/0062-egress-supervisor-token-override.md +++ b/docs/prds/0062-egress-supervisor-token-override.md @@ -87,6 +87,15 @@ rendered `routes.yaml` (`egress_render_routes`), and the addon's `Route` request time. The `list-egress-routes` introspection endpoint round-trips it so the agent's proposals preserve it. +**Provider routes default to `redact`.** Agent-provider routes (the agent +talking to its own LLM API — `api.anthropic.com`, the Codex backend, etc.) are +the worst source of token-shaped false positives because the whole +conversation payload flows through them. `egress_routes_for_bottle` fills +`outbound_on_match=redact` on any provider route that doesn't set it +explicitly, so a match there is scrubbed and forwarded rather than blocked or +queued. A provider that sets the policy keeps its choice; manifest routes are +unaffected (they default to `supervise`). + On an outbound block the addon dispatches on the resolved policy: - **Structural blocks always 403.** A `ScanResult` with no `matched` value diff --git a/tests/unit/test_egress.py b/tests/unit/test_egress.py index 83ac82c..4fdae02 100644 --- a/tests/unit/test_egress.py +++ b/tests/unit/test_egress.py @@ -202,6 +202,23 @@ class TestProviderRouteMerge(unittest.TestCase): self.assertEqual((), routes[0].matches) self.assertEqual({}, egress_token_env_map(routes)) + def test_provider_route_defaults_to_redact_on_match(self): + b = _bottle([]) + pr = EgressRoute(host="api.anthropic.com") + routes = egress_routes_for_bottle(b, (pr,)) + self.assertEqual("redact", routes[0].outbound_on_match) + + def test_provider_route_explicit_on_match_preserved(self): + b = _bottle([]) + pr = EgressRoute(host="api.anthropic.com", outbound_on_match="supervise") + routes = egress_routes_for_bottle(b, (pr,)) + self.assertEqual("supervise", routes[0].outbound_on_match) + + def test_manifest_route_does_not_get_redact_default(self): + b = _bottle([{"host": "api.example.com"}]) + routes = egress_routes_for_bottle(b) + self.assertEqual("", routes[0].outbound_on_match) + def test_two_provider_routes_with_same_token_ref_share_slot(self): b = _bottle([]) routes = egress_routes_for_bottle(b, (