Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 1f96619c6a |
@@ -160,26 +160,37 @@ class EgressAddon:
|
||||
)
|
||||
|
||||
def _log_request(self, flow: http.HTTPFlow) -> None:
|
||||
headers = {
|
||||
k: redact_tokens(v, env=os.environ)
|
||||
for k, v in flow.request.headers.items()
|
||||
if k.lower() != "authorization"
|
||||
}
|
||||
body = redact_tokens(flow.request.get_text(strict=False) or "", env=os.environ)
|
||||
sys.stderr.write(
|
||||
json.dumps({
|
||||
"event": "egress_request",
|
||||
"host": redact_tokens(flow.request.pretty_host, env=os.environ),
|
||||
"method": flow.request.method,
|
||||
"path": redact_tokens(flow.request.path, env=os.environ),
|
||||
"headers": dict(flow.request.headers),
|
||||
"body": flow.request.get_text(strict=False) or "",
|
||||
"headers": headers,
|
||||
"body": body,
|
||||
})
|
||||
+ "\n"
|
||||
)
|
||||
|
||||
def _log_response(self, flow: http.HTTPFlow) -> None:
|
||||
headers = {
|
||||
k: redact_tokens(v, env=os.environ)
|
||||
for k, v in flow.response.headers.items()
|
||||
}
|
||||
body = redact_tokens(flow.response.get_text(strict=False) or "", env=os.environ)
|
||||
sys.stderr.write(
|
||||
json.dumps({
|
||||
"event": "egress_response",
|
||||
"host": flow.request.pretty_host,
|
||||
"status": flow.response.status_code,
|
||||
"headers": dict(flow.response.headers),
|
||||
"body": flow.response.get_text(strict=False) or "",
|
||||
"headers": headers,
|
||||
"body": body,
|
||||
})
|
||||
+ "\n"
|
||||
)
|
||||
|
||||
@@ -0,0 +1,85 @@
|
||||
# PRD prd-new: LOG_FULL egress logging credential redaction
|
||||
|
||||
- **Status:** Draft
|
||||
- **Author:** claude
|
||||
- **Created:** 2026-06-25
|
||||
- **Issue:** #257
|
||||
|
||||
## Summary
|
||||
|
||||
The `LOG_FULL` egress logging path (`_log_request` and `_log_response` in `egress_addon.py`) writes request/response headers and bodies to stderr without redaction and includes the sidecar-injected upstream `Authorization` header verbatim. This PR applies `redact_tokens` to header values and bodies in both log functions and strips the injected `Authorization` header from request logs entirely.
|
||||
|
||||
## Problem
|
||||
|
||||
`LOG_FULL` (log level 2) is intended for debugging egress traffic. When active it calls `_log_request` and `_log_response`. Both functions have two related bugs:
|
||||
|
||||
1. **Injected `Authorization` header exposure.** `_log_request` is called *after* the sidecar injects upstream credentials (`flow.request.headers["authorization"] = decision.inject_authorization`). The full header dict — including the live credential — is serialized to stderr. Any log collector that ingests the egress container's stderr will receive the upstream bearer token in plaintext.
|
||||
|
||||
2. **Unredacted bodies and header values.** Neither `_log_request` nor `_log_response` passes body or header values through `redact_tokens`. By contrast, `_req_ctx` (used for block/warn events) already calls `redact_tokens` on path and host. Any provisioned secret or recognized token pattern that appears in a request body, response body, or non-Authorization header value will be logged verbatim under `LOG_FULL`.
|
||||
|
||||
These two bugs compose: an agent that enables `LOG_FULL` and simultaneously triggers a request that carries a known token gains a write path from credentials → egress logs.
|
||||
|
||||
## Goals / Success Criteria
|
||||
|
||||
- `_log_request` never logs the `authorization` header in any form.
|
||||
- `_log_request` applies `redact_tokens(value, env=os.environ)` to every other header value before serializing.
|
||||
- `_log_request` applies `redact_tokens(body, env=os.environ)` to the request body before logging.
|
||||
- `_log_response` applies `redact_tokens(value, env=os.environ)` to every response header value before logging.
|
||||
- `_log_response` applies `redact_tokens(body, env=os.environ)` to the response body before logging.
|
||||
- Unit tests cover each of the five cases above.
|
||||
|
||||
## Non-goals
|
||||
|
||||
- Redacting host or path in the full-log path (already covered by `_req_ctx` for block/warn events; `_log_request` already calls `redact_tokens` on host and path).
|
||||
- Suppressing `LOG_FULL` or adding a new log level.
|
||||
- Changing the outbound DLP scan logic.
|
||||
|
||||
## Design
|
||||
|
||||
### `_log_request`
|
||||
|
||||
```python
|
||||
def _log_request(self, flow: http.HTTPFlow) -> None:
|
||||
headers = {
|
||||
k: redact_tokens(v, env=os.environ)
|
||||
for k, v in flow.request.headers.items()
|
||||
if k.lower() != "authorization"
|
||||
}
|
||||
body = redact_tokens(flow.request.get_text(strict=False) or "", env=os.environ)
|
||||
sys.stderr.write(
|
||||
json.dumps({
|
||||
"event": "egress_request",
|
||||
"host": redact_tokens(flow.request.pretty_host, env=os.environ),
|
||||
"method": flow.request.method,
|
||||
"path": redact_tokens(flow.request.path, env=os.environ),
|
||||
"headers": headers,
|
||||
"body": body,
|
||||
})
|
||||
+ "\n"
|
||||
)
|
||||
```
|
||||
|
||||
The `authorization` key is excluded because by the time `_log_request` is called the sidecar has already injected the upstream credential (`decision.inject_authorization`). Logging it would write a live bearer token to stderr on every allowed request. There is no safe subset to log — the value is always a live credential or empty.
|
||||
|
||||
### `_log_response`
|
||||
|
||||
```python
|
||||
def _log_response(self, flow: http.HTTPFlow) -> None:
|
||||
headers = {
|
||||
k: redact_tokens(v, env=os.environ)
|
||||
for k, v in flow.response.headers.items()
|
||||
}
|
||||
body = redact_tokens(flow.response.get_text(strict=False) or "", env=os.environ)
|
||||
sys.stderr.write(
|
||||
json.dumps({
|
||||
"event": "egress_response",
|
||||
"host": flow.request.pretty_host,
|
||||
"status": flow.response.status_code,
|
||||
"headers": headers,
|
||||
"body": body,
|
||||
})
|
||||
+ "\n"
|
||||
)
|
||||
```
|
||||
|
||||
Response headers don't carry injected credentials, so no header name is suppressed — only the values are scrubbed by `redact_tokens`.
|
||||
@@ -0,0 +1,273 @@
|
||||
"""Unit: LOG_FULL credential redaction in _log_request / _log_response (issue #257).
|
||||
|
||||
egress_addon.py is sidecar-only code that depends on mitmproxy, which is
|
||||
not installed on the host. This file pre-populates sys.modules with the
|
||||
minimum mocks needed so EgressAddon can be imported and tested without the
|
||||
real mitmproxy package."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import sys
|
||||
import types
|
||||
import unittest
|
||||
from io import StringIO
|
||||
from unittest.mock import patch
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Sidecar-import shims — must run before importing egress_addon
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _ensure_shims() -> None:
|
||||
if "mitmproxy" not in sys.modules:
|
||||
_mm = types.ModuleType("mitmproxy")
|
||||
_mh = types.ModuleType("mitmproxy.http")
|
||||
_mm.http = _mh
|
||||
sys.modules["mitmproxy"] = _mm
|
||||
sys.modules["mitmproxy.http"] = _mh
|
||||
if "egress_addon_core" not in sys.modules:
|
||||
import bot_bottle.egress_addon_core as _core
|
||||
sys.modules["egress_addon_core"] = _core
|
||||
|
||||
|
||||
_ensure_shims()
|
||||
|
||||
from bot_bottle.egress_addon import EgressAddon # noqa: E402 (import after shims)
|
||||
from bot_bottle.egress_addon_core import Config, LOG_FULL # noqa: E402
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _addon() -> EgressAddon:
|
||||
"""Return a bare EgressAddon with LOG_FULL config and no routes file."""
|
||||
a: EgressAddon = EgressAddon.__new__(EgressAddon)
|
||||
a.config = Config(routes=(), log=LOG_FULL)
|
||||
a.safe_tokens = set()
|
||||
a._supervise_queue_dir = ""
|
||||
a._supervise_slug = ""
|
||||
a._token_allow_timeout = 300.0
|
||||
return a
|
||||
|
||||
|
||||
class _Headers:
|
||||
def __init__(self, d: dict[str, str]) -> None:
|
||||
self._d = d
|
||||
|
||||
def items(self) -> list[tuple[str, str]]:
|
||||
return list(self._d.items())
|
||||
|
||||
|
||||
class _Request:
|
||||
def __init__(
|
||||
self,
|
||||
host: str = "api.example.com",
|
||||
method: str = "POST",
|
||||
path: str = "/v1/messages",
|
||||
headers: dict[str, str] | None = None,
|
||||
body: str = "",
|
||||
) -> None:
|
||||
self.pretty_host = host
|
||||
self.method = method
|
||||
self.path = path
|
||||
self.headers = _Headers(headers or {})
|
||||
self._body = body
|
||||
|
||||
def get_text(self, *, strict: bool = True) -> str:
|
||||
return self._body
|
||||
|
||||
|
||||
class _Response:
|
||||
def __init__(
|
||||
self,
|
||||
status_code: int = 200,
|
||||
headers: dict[str, str] | None = None,
|
||||
body: str = "",
|
||||
) -> None:
|
||||
self.status_code = status_code
|
||||
self.headers = _Headers(headers or {})
|
||||
self._body = body
|
||||
|
||||
def get_text(self, *, strict: bool = True) -> str:
|
||||
return self._body
|
||||
|
||||
|
||||
class _Flow:
|
||||
def __init__(
|
||||
self,
|
||||
request: _Request | None = None,
|
||||
response: _Response | None = None,
|
||||
) -> None:
|
||||
self.request = request or _Request()
|
||||
self.response = response or _Response()
|
||||
|
||||
|
||||
def _log_request(addon: EgressAddon, flow: _Flow) -> dict:
|
||||
buf = StringIO()
|
||||
with patch("sys.stderr", buf):
|
||||
addon._log_request(flow) # type: ignore[arg-type]
|
||||
return json.loads(buf.getvalue())
|
||||
|
||||
|
||||
def _log_response(addon: EgressAddon, flow: _Flow) -> dict:
|
||||
buf = StringIO()
|
||||
with patch("sys.stderr", buf):
|
||||
addon._log_response(flow) # type: ignore[arg-type]
|
||||
return json.loads(buf.getvalue())
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _log_request — authorization header stripped
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestLogRequestAuthorizationStripped(unittest.TestCase):
|
||||
def test_lowercase_authorization_excluded(self) -> None:
|
||||
addon = _addon()
|
||||
flow = _Flow(request=_Request(headers={"authorization": "Bearer sk-real-secret"}))
|
||||
entry = _log_request(addon, flow)
|
||||
self.assertNotIn("authorization", entry["headers"])
|
||||
|
||||
def test_titlecase_authorization_excluded(self) -> None:
|
||||
addon = _addon()
|
||||
flow = _Flow(request=_Request(headers={"Authorization": "Bearer sk-real-secret"}))
|
||||
entry = _log_request(addon, flow)
|
||||
self.assertNotIn("Authorization", entry["headers"])
|
||||
self.assertNotIn("authorization", entry["headers"])
|
||||
|
||||
def test_non_auth_headers_retained(self) -> None:
|
||||
addon = _addon()
|
||||
flow = _Flow(request=_Request(headers={
|
||||
"authorization": "Bearer sk-real-secret",
|
||||
"content-type": "application/json",
|
||||
}))
|
||||
entry = _log_request(addon, flow)
|
||||
self.assertIn("content-type", entry["headers"])
|
||||
self.assertEqual("application/json", entry["headers"]["content-type"])
|
||||
|
||||
def test_no_authorization_header_logs_all_others(self) -> None:
|
||||
addon = _addon()
|
||||
flow = _Flow(request=_Request(headers={"x-request-id": "abc"}))
|
||||
entry = _log_request(addon, flow)
|
||||
self.assertEqual({"x-request-id": "abc"}, entry["headers"])
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _log_request — body redaction
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
_OPENAI_KEY = "sk-" + "A" * 48
|
||||
|
||||
|
||||
class TestLogRequestBodyRedacted(unittest.TestCase):
|
||||
def test_token_pattern_in_body_scrubbed(self) -> None:
|
||||
addon = _addon()
|
||||
flow = _Flow(request=_Request(body=f"key={_OPENAI_KEY}"))
|
||||
entry = _log_request(addon, flow)
|
||||
self.assertNotIn(_OPENAI_KEY, entry["body"])
|
||||
self.assertIn("********", entry["body"])
|
||||
|
||||
def test_provisioned_secret_in_body_scrubbed(self) -> None:
|
||||
addon = _addon()
|
||||
secret = "provisioned-egress-secret-xyz"
|
||||
flow = _Flow(request=_Request(body=f"token={secret}"))
|
||||
with patch.dict("os.environ", {"EGRESS_TOKEN_0": secret}):
|
||||
entry = _log_request(addon, flow)
|
||||
self.assertNotIn(secret, entry["body"])
|
||||
self.assertIn("********", entry["body"])
|
||||
|
||||
def test_clean_body_preserved(self) -> None:
|
||||
addon = _addon()
|
||||
payload = '{"model": "claude-3", "max_tokens": 1024}'
|
||||
flow = _Flow(request=_Request(body=payload))
|
||||
entry = _log_request(addon, flow)
|
||||
self.assertEqual(payload, entry["body"])
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _log_request — non-authorization header value redaction
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestLogRequestHeaderValuesRedacted(unittest.TestCase):
|
||||
def test_token_in_custom_header_scrubbed(self) -> None:
|
||||
addon = _addon()
|
||||
flow = _Flow(request=_Request(headers={"x-api-key": _OPENAI_KEY}))
|
||||
entry = _log_request(addon, flow)
|
||||
self.assertNotIn(_OPENAI_KEY, entry["headers"].get("x-api-key", ""))
|
||||
self.assertIn("********", entry["headers"].get("x-api-key", ""))
|
||||
|
||||
def test_clean_header_value_preserved(self) -> None:
|
||||
addon = _addon()
|
||||
flow = _Flow(request=_Request(headers={"accept": "application/json"}))
|
||||
entry = _log_request(addon, flow)
|
||||
self.assertEqual("application/json", entry["headers"]["accept"])
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _log_response — body redaction
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestLogResponseBodyRedacted(unittest.TestCase):
|
||||
def test_token_pattern_in_response_body_scrubbed(self) -> None:
|
||||
addon = _addon()
|
||||
flow = _Flow(
|
||||
request=_Request(),
|
||||
response=_Response(body=f'{{"key": "{_OPENAI_KEY}"}}'),
|
||||
)
|
||||
entry = _log_response(addon, flow)
|
||||
self.assertNotIn(_OPENAI_KEY, entry["body"])
|
||||
self.assertIn("********", entry["body"])
|
||||
|
||||
def test_provisioned_secret_in_response_body_scrubbed(self) -> None:
|
||||
addon = _addon()
|
||||
secret = "provisioned-egress-secret-xyz"
|
||||
flow = _Flow(
|
||||
request=_Request(),
|
||||
response=_Response(body=f'{{"token": "{secret}"}}'),
|
||||
)
|
||||
with patch.dict("os.environ", {"EGRESS_TOKEN_0": secret}):
|
||||
entry = _log_response(addon, flow)
|
||||
self.assertNotIn(secret, entry["body"])
|
||||
self.assertIn("********", entry["body"])
|
||||
|
||||
def test_clean_response_body_preserved(self) -> None:
|
||||
addon = _addon()
|
||||
flow = _Flow(request=_Request(), response=_Response(body='{"result": "ok"}'))
|
||||
entry = _log_response(addon, flow)
|
||||
self.assertEqual('{"result": "ok"}', entry["body"])
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _log_response — response header value redaction
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestLogResponseHeaderValuesRedacted(unittest.TestCase):
|
||||
def test_token_in_response_header_scrubbed(self) -> None:
|
||||
addon = _addon()
|
||||
flow = _Flow(
|
||||
request=_Request(),
|
||||
response=_Response(headers={"set-cookie": f"token={_OPENAI_KEY}"}),
|
||||
)
|
||||
entry = _log_response(addon, flow)
|
||||
cookie_val = entry["headers"].get("set-cookie", "")
|
||||
self.assertNotIn(_OPENAI_KEY, cookie_val)
|
||||
self.assertIn("********", cookie_val)
|
||||
|
||||
def test_clean_response_header_preserved(self) -> None:
|
||||
addon = _addon()
|
||||
flow = _Flow(
|
||||
request=_Request(),
|
||||
response=_Response(headers={"content-type": "application/json"}),
|
||||
)
|
||||
entry = _log_response(addon, flow)
|
||||
self.assertEqual("application/json", entry["headers"]["content-type"])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user