From 9b929d06846a6b11c2a62d214683cbe9ea22daaa Mon Sep 17 00:00:00 2001 From: claude Date: Thu, 25 Jun 2026 02:39:54 +0000 Subject: [PATCH] fix(egress): strip injected Authorization and redact bodies in LOG_FULL path _log_request and _log_response wrote headers and bodies to stderr verbatim. _log_request also included the sidecar-injected upstream Authorization value, exposing live bearer tokens on every allowed request under LOG_FULL. Apply redact_tokens to all header values and bodies in both log functions; exclude the authorization header from _log_request entirely since its value is always a live sidecar-injected credential by the time _log_request runs. Closes #257 --- bot_bottle/egress_addon.py | 19 +- .../prd-new-log-full-credential-redaction.md | 85 ++++++ tests/unit/test_egress_addon_log_redaction.py | 273 ++++++++++++++++++ 3 files changed, 373 insertions(+), 4 deletions(-) create mode 100644 docs/prds/prd-new-log-full-credential-redaction.md create mode 100644 tests/unit/test_egress_addon_log_redaction.py diff --git a/bot_bottle/egress_addon.py b/bot_bottle/egress_addon.py index 41f4ca5..a28094e 100644 --- a/bot_bottle/egress_addon.py +++ b/bot_bottle/egress_addon.py @@ -160,26 +160,37 @@ class EgressAddon: ) def _log_request(self, flow: http.HTTPFlow) -> None: + headers = { + k: redact_tokens(v, env=os.environ) + for k, v in flow.request.headers.items() + if k.lower() != "authorization" + } + body = redact_tokens(flow.request.get_text(strict=False) or "", env=os.environ) sys.stderr.write( json.dumps({ "event": "egress_request", "host": redact_tokens(flow.request.pretty_host, env=os.environ), "method": flow.request.method, "path": redact_tokens(flow.request.path, env=os.environ), - "headers": dict(flow.request.headers), - "body": flow.request.get_text(strict=False) or "", + "headers": headers, + "body": body, }) + "\n" ) def _log_response(self, flow: http.HTTPFlow) -> None: + headers = { + k: redact_tokens(v, env=os.environ) + for k, v in flow.response.headers.items() + } + body = redact_tokens(flow.response.get_text(strict=False) or "", env=os.environ) sys.stderr.write( json.dumps({ "event": "egress_response", "host": flow.request.pretty_host, "status": flow.response.status_code, - "headers": dict(flow.response.headers), - "body": flow.response.get_text(strict=False) or "", + "headers": headers, + "body": body, }) + "\n" ) diff --git a/docs/prds/prd-new-log-full-credential-redaction.md b/docs/prds/prd-new-log-full-credential-redaction.md new file mode 100644 index 0000000..85e9267 --- /dev/null +++ b/docs/prds/prd-new-log-full-credential-redaction.md @@ -0,0 +1,85 @@ +# PRD prd-new: LOG_FULL egress logging credential redaction + +- **Status:** Draft +- **Author:** claude +- **Created:** 2026-06-25 +- **Issue:** #257 + +## Summary + +The `LOG_FULL` egress logging path (`_log_request` and `_log_response` in `egress_addon.py`) writes request/response headers and bodies to stderr without redaction and includes the sidecar-injected upstream `Authorization` header verbatim. This PR applies `redact_tokens` to header values and bodies in both log functions and strips the injected `Authorization` header from request logs entirely. + +## Problem + +`LOG_FULL` (log level 2) is intended for debugging egress traffic. When active it calls `_log_request` and `_log_response`. Both functions have two related bugs: + +1. **Injected `Authorization` header exposure.** `_log_request` is called *after* the sidecar injects upstream credentials (`flow.request.headers["authorization"] = decision.inject_authorization`). The full header dict — including the live credential — is serialized to stderr. Any log collector that ingests the egress container's stderr will receive the upstream bearer token in plaintext. + +2. **Unredacted bodies and header values.** Neither `_log_request` nor `_log_response` passes body or header values through `redact_tokens`. By contrast, `_req_ctx` (used for block/warn events) already calls `redact_tokens` on path and host. Any provisioned secret or recognized token pattern that appears in a request body, response body, or non-Authorization header value will be logged verbatim under `LOG_FULL`. + +These two bugs compose: an agent that enables `LOG_FULL` and simultaneously triggers a request that carries a known token gains a write path from credentials → egress logs. + +## Goals / Success Criteria + +- `_log_request` never logs the `authorization` header in any form. +- `_log_request` applies `redact_tokens(value, env=os.environ)` to every other header value before serializing. +- `_log_request` applies `redact_tokens(body, env=os.environ)` to the request body before logging. +- `_log_response` applies `redact_tokens(value, env=os.environ)` to every response header value before logging. +- `_log_response` applies `redact_tokens(body, env=os.environ)` to the response body before logging. +- Unit tests cover each of the five cases above. + +## Non-goals + +- Redacting host or path in the full-log path (already covered by `_req_ctx` for block/warn events; `_log_request` already calls `redact_tokens` on host and path). +- Suppressing `LOG_FULL` or adding a new log level. +- Changing the outbound DLP scan logic. + +## Design + +### `_log_request` + +```python +def _log_request(self, flow: http.HTTPFlow) -> None: + headers = { + k: redact_tokens(v, env=os.environ) + for k, v in flow.request.headers.items() + if k.lower() != "authorization" + } + body = redact_tokens(flow.request.get_text(strict=False) or "", env=os.environ) + sys.stderr.write( + json.dumps({ + "event": "egress_request", + "host": redact_tokens(flow.request.pretty_host, env=os.environ), + "method": flow.request.method, + "path": redact_tokens(flow.request.path, env=os.environ), + "headers": headers, + "body": body, + }) + + "\n" + ) +``` + +The `authorization` key is excluded because by the time `_log_request` is called the sidecar has already injected the upstream credential (`decision.inject_authorization`). Logging it would write a live bearer token to stderr on every allowed request. There is no safe subset to log — the value is always a live credential or empty. + +### `_log_response` + +```python +def _log_response(self, flow: http.HTTPFlow) -> None: + headers = { + k: redact_tokens(v, env=os.environ) + for k, v in flow.response.headers.items() + } + body = redact_tokens(flow.response.get_text(strict=False) or "", env=os.environ) + sys.stderr.write( + json.dumps({ + "event": "egress_response", + "host": flow.request.pretty_host, + "status": flow.response.status_code, + "headers": headers, + "body": body, + }) + + "\n" + ) +``` + +Response headers don't carry injected credentials, so no header name is suppressed — only the values are scrubbed by `redact_tokens`. diff --git a/tests/unit/test_egress_addon_log_redaction.py b/tests/unit/test_egress_addon_log_redaction.py new file mode 100644 index 0000000..59302fb --- /dev/null +++ b/tests/unit/test_egress_addon_log_redaction.py @@ -0,0 +1,273 @@ +"""Unit: LOG_FULL credential redaction in _log_request / _log_response (issue #257). + +egress_addon.py is sidecar-only code that depends on mitmproxy, which is +not installed on the host. This file pre-populates sys.modules with the +minimum mocks needed so EgressAddon can be imported and tested without the +real mitmproxy package.""" + +from __future__ import annotations + +import json +import sys +import types +import unittest +from io import StringIO +from unittest.mock import patch + + +# --------------------------------------------------------------------------- +# Sidecar-import shims — must run before importing egress_addon +# --------------------------------------------------------------------------- + +def _ensure_shims() -> None: + if "mitmproxy" not in sys.modules: + _mm = types.ModuleType("mitmproxy") + _mh = types.ModuleType("mitmproxy.http") + _mm.http = _mh + sys.modules["mitmproxy"] = _mm + sys.modules["mitmproxy.http"] = _mh + if "egress_addon_core" not in sys.modules: + import bot_bottle.egress_addon_core as _core + sys.modules["egress_addon_core"] = _core + + +_ensure_shims() + +from bot_bottle.egress_addon import EgressAddon # noqa: E402 (import after shims) +from bot_bottle.egress_addon_core import Config, LOG_FULL # noqa: E402 + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _addon() -> EgressAddon: + """Return a bare EgressAddon with LOG_FULL config and no routes file.""" + a: EgressAddon = EgressAddon.__new__(EgressAddon) + a.config = Config(routes=(), log=LOG_FULL) + a.safe_tokens = set() + a._supervise_queue_dir = "" + a._supervise_slug = "" + a._token_allow_timeout = 300.0 + return a + + +class _Headers: + def __init__(self, d: dict[str, str]) -> None: + self._d = d + + def items(self) -> list[tuple[str, str]]: + return list(self._d.items()) + + +class _Request: + def __init__( + self, + host: str = "api.example.com", + method: str = "POST", + path: str = "/v1/messages", + headers: dict[str, str] | None = None, + body: str = "", + ) -> None: + self.pretty_host = host + self.method = method + self.path = path + self.headers = _Headers(headers or {}) + self._body = body + + def get_text(self, *, strict: bool = True) -> str: + return self._body + + +class _Response: + def __init__( + self, + status_code: int = 200, + headers: dict[str, str] | None = None, + body: str = "", + ) -> None: + self.status_code = status_code + self.headers = _Headers(headers or {}) + self._body = body + + def get_text(self, *, strict: bool = True) -> str: + return self._body + + +class _Flow: + def __init__( + self, + request: _Request | None = None, + response: _Response | None = None, + ) -> None: + self.request = request or _Request() + self.response = response or _Response() + + +def _log_request(addon: EgressAddon, flow: _Flow) -> dict: + buf = StringIO() + with patch("sys.stderr", buf): + addon._log_request(flow) # type: ignore[arg-type] + return json.loads(buf.getvalue()) + + +def _log_response(addon: EgressAddon, flow: _Flow) -> dict: + buf = StringIO() + with patch("sys.stderr", buf): + addon._log_response(flow) # type: ignore[arg-type] + return json.loads(buf.getvalue()) + + +# --------------------------------------------------------------------------- +# _log_request — authorization header stripped +# --------------------------------------------------------------------------- + + +class TestLogRequestAuthorizationStripped(unittest.TestCase): + def test_lowercase_authorization_excluded(self) -> None: + addon = _addon() + flow = _Flow(request=_Request(headers={"authorization": "Bearer sk-real-secret"})) + entry = _log_request(addon, flow) + self.assertNotIn("authorization", entry["headers"]) + + def test_titlecase_authorization_excluded(self) -> None: + addon = _addon() + flow = _Flow(request=_Request(headers={"Authorization": "Bearer sk-real-secret"})) + entry = _log_request(addon, flow) + self.assertNotIn("Authorization", entry["headers"]) + self.assertNotIn("authorization", entry["headers"]) + + def test_non_auth_headers_retained(self) -> None: + addon = _addon() + flow = _Flow(request=_Request(headers={ + "authorization": "Bearer sk-real-secret", + "content-type": "application/json", + })) + entry = _log_request(addon, flow) + self.assertIn("content-type", entry["headers"]) + self.assertEqual("application/json", entry["headers"]["content-type"]) + + def test_no_authorization_header_logs_all_others(self) -> None: + addon = _addon() + flow = _Flow(request=_Request(headers={"x-request-id": "abc"})) + entry = _log_request(addon, flow) + self.assertEqual({"x-request-id": "abc"}, entry["headers"]) + + +# --------------------------------------------------------------------------- +# _log_request — body redaction +# --------------------------------------------------------------------------- + + +_OPENAI_KEY = "sk-" + "A" * 48 + + +class TestLogRequestBodyRedacted(unittest.TestCase): + def test_token_pattern_in_body_scrubbed(self) -> None: + addon = _addon() + flow = _Flow(request=_Request(body=f"key={_OPENAI_KEY}")) + entry = _log_request(addon, flow) + self.assertNotIn(_OPENAI_KEY, entry["body"]) + self.assertIn("********", entry["body"]) + + def test_provisioned_secret_in_body_scrubbed(self) -> None: + addon = _addon() + secret = "provisioned-egress-secret-xyz" + flow = _Flow(request=_Request(body=f"token={secret}")) + with patch.dict("os.environ", {"EGRESS_TOKEN_0": secret}): + entry = _log_request(addon, flow) + self.assertNotIn(secret, entry["body"]) + self.assertIn("********", entry["body"]) + + def test_clean_body_preserved(self) -> None: + addon = _addon() + payload = '{"model": "claude-3", "max_tokens": 1024}' + flow = _Flow(request=_Request(body=payload)) + entry = _log_request(addon, flow) + self.assertEqual(payload, entry["body"]) + + +# --------------------------------------------------------------------------- +# _log_request — non-authorization header value redaction +# --------------------------------------------------------------------------- + + +class TestLogRequestHeaderValuesRedacted(unittest.TestCase): + def test_token_in_custom_header_scrubbed(self) -> None: + addon = _addon() + flow = _Flow(request=_Request(headers={"x-api-key": _OPENAI_KEY})) + entry = _log_request(addon, flow) + self.assertNotIn(_OPENAI_KEY, entry["headers"].get("x-api-key", "")) + self.assertIn("********", entry["headers"].get("x-api-key", "")) + + def test_clean_header_value_preserved(self) -> None: + addon = _addon() + flow = _Flow(request=_Request(headers={"accept": "application/json"})) + entry = _log_request(addon, flow) + self.assertEqual("application/json", entry["headers"]["accept"]) + + +# --------------------------------------------------------------------------- +# _log_response — body redaction +# --------------------------------------------------------------------------- + + +class TestLogResponseBodyRedacted(unittest.TestCase): + def test_token_pattern_in_response_body_scrubbed(self) -> None: + addon = _addon() + flow = _Flow( + request=_Request(), + response=_Response(body=f'{{"key": "{_OPENAI_KEY}"}}'), + ) + entry = _log_response(addon, flow) + self.assertNotIn(_OPENAI_KEY, entry["body"]) + self.assertIn("********", entry["body"]) + + def test_provisioned_secret_in_response_body_scrubbed(self) -> None: + addon = _addon() + secret = "provisioned-egress-secret-xyz" + flow = _Flow( + request=_Request(), + response=_Response(body=f'{{"token": "{secret}"}}'), + ) + with patch.dict("os.environ", {"EGRESS_TOKEN_0": secret}): + entry = _log_response(addon, flow) + self.assertNotIn(secret, entry["body"]) + self.assertIn("********", entry["body"]) + + def test_clean_response_body_preserved(self) -> None: + addon = _addon() + flow = _Flow(request=_Request(), response=_Response(body='{"result": "ok"}')) + entry = _log_response(addon, flow) + self.assertEqual('{"result": "ok"}', entry["body"]) + + +# --------------------------------------------------------------------------- +# _log_response — response header value redaction +# --------------------------------------------------------------------------- + + +class TestLogResponseHeaderValuesRedacted(unittest.TestCase): + def test_token_in_response_header_scrubbed(self) -> None: + addon = _addon() + flow = _Flow( + request=_Request(), + response=_Response(headers={"set-cookie": f"token={_OPENAI_KEY}"}), + ) + entry = _log_response(addon, flow) + cookie_val = entry["headers"].get("set-cookie", "") + self.assertNotIn(_OPENAI_KEY, cookie_val) + self.assertIn("********", cookie_val) + + def test_clean_response_header_preserved(self) -> None: + addon = _addon() + flow = _Flow( + request=_Request(), + response=_Response(headers={"content-type": "application/json"}), + ) + entry = _log_response(addon, flow) + self.assertEqual("application/json", entry["headers"]["content-type"]) + + +if __name__ == "__main__": + unittest.main()