Compare commits

...

2 Commits

Author SHA1 Message Date
didericis-claude 1f96619c6a fix(egress): strip injected Authorization and redact bodies in LOG_FULL path
lint / lint (push) Failing after 2m15s
test / unit (pull_request) Successful in 43s
test / integration (pull_request) Successful in 25s
_log_request and _log_response wrote headers and bodies to stderr verbatim.
_log_request also included the sidecar-injected upstream Authorization value,
exposing live bearer tokens on every allowed request under LOG_FULL.

Apply redact_tokens to all header values and bodies in both log functions;
exclude the authorization header from _log_request entirely since its value
is always a live sidecar-injected credential by the time _log_request runs.

Closes #257
2026-06-24 23:04:22 -04:00
didericis ecaae708f7 feat(provider): support startup args settings
test / unit (pull_request) Successful in 41s
test / integration (pull_request) Successful in 26s
lint / lint (push) Successful in 2m12s
test / unit (push) Successful in 41s
test / integration (push) Successful in 26s
Update Quality Badges / update-badges (push) Successful in 2m9s
2026-06-24 22:51:27 -04:00
10 changed files with 495 additions and 13 deletions
+9
View File
@@ -371,6 +371,15 @@ def build_agent_provision_plan(
)
def provider_startup_args(
provider_settings: dict[str, object] | None,
) -> tuple[str, ...]:
raw = (provider_settings or {}).get("startup_args", ())
if not isinstance(raw, (list, tuple)):
return ()
return tuple(arg for arg in raw if isinstance(arg, str))
def prompt_args(
prompt_mode: PromptMode,
prompt_path: str | None,
+4 -1
View File
@@ -20,6 +20,7 @@ from ...agent_provider import (
AgentProvisionDir,
AgentProvisionFile,
AgentProvisionPlan,
provider_startup_args,
)
from ...backend.docker import util as docker_mod
from ...egress import EgressRoute
@@ -115,8 +116,9 @@ class ClaudeAgentProvider(AgentProvider):
color: str = "",
provider_settings: dict[str, object] | None = None,
) -> AgentProvisionPlan:
del forward_host_credentials, host_env, provider_settings
del forward_host_credentials, host_env
resolved_guest_env = dict(guest_env or {})
startup_args = provider_startup_args(provider_settings)
guest_home = self.guest_home
trusted_path = trusted_project_path or guest_home
@@ -199,6 +201,7 @@ class ClaudeAgentProvider(AgentProvider):
env_vars=env_vars,
guest_env=resolved_guest_env,
has_prompt=has_prompt,
startup_args=startup_args,
dirs=dirs,
files=tuple(files),
egress_routes=egress_routes,
+4 -1
View File
@@ -22,6 +22,7 @@ from ...agent_provider import (
AgentProvisionCommand,
AgentProvisionFile,
AgentProvisionPlan,
provider_startup_args,
)
from .codex_auth import codex_host_access_token, write_codex_dummy_auth_file
from ...egress import CODEX_HOST_CREDENTIAL_TOKEN_REF, EgressRoute
@@ -79,8 +80,9 @@ class CodexAgentProvider(AgentProvider):
color: str = "",
provider_settings: dict[str, object] | None = None,
) -> AgentProvisionPlan:
del auth_token, label, color, provider_settings
del auth_token, label, color
resolved_guest_env = dict(guest_env or {})
startup_args = provider_startup_args(provider_settings)
guest_home = self.guest_home
trusted_path = trusted_project_path or guest_home
@@ -163,6 +165,7 @@ class CodexAgentProvider(AgentProvider):
env_vars=env_vars,
guest_env=resolved_guest_env,
has_prompt=has_prompt,
startup_args=startup_args,
dirs=tuple(dirs),
files=tuple(files),
pre_copy=tuple(pre_copy),
+3
View File
@@ -21,6 +21,7 @@ from ...agent_provider import (
AgentProvisionDir,
AgentProvisionFile,
AgentProvisionPlan,
provider_startup_args,
)
from ...egress import EgressRoute
from ...log import die, info
@@ -199,6 +200,7 @@ class PiAgentProvider(AgentProvider):
models_payload, base_url, api_key_env, models, provider_name = (
_pi_models_json(settings)
)
extra_startup_args = provider_startup_args(provider_settings)
models_file = state_dir / "pi-models.json"
models_file.write_text(json.dumps(models_payload, indent=2) + "\n")
models_file.chmod(0o600)
@@ -219,6 +221,7 @@ class PiAgentProvider(AgentProvider):
startup_args=(
"--models",
",".join(f"{provider_name}/{model}" for model in models),
*extra_startup_args,
),
dirs=(AgentProvisionDir(f"{guest_home}/.pi/agent"),),
files=(AgentProvisionFile(models_file, _models_path(guest_home)),),
+15 -4
View File
@@ -160,26 +160,37 @@ class EgressAddon:
)
def _log_request(self, flow: http.HTTPFlow) -> None:
headers = {
k: redact_tokens(v, env=os.environ)
for k, v in flow.request.headers.items()
if k.lower() != "authorization"
}
body = redact_tokens(flow.request.get_text(strict=False) or "", env=os.environ)
sys.stderr.write(
json.dumps({
"event": "egress_request",
"host": redact_tokens(flow.request.pretty_host, env=os.environ),
"method": flow.request.method,
"path": redact_tokens(flow.request.path, env=os.environ),
"headers": dict(flow.request.headers),
"body": flow.request.get_text(strict=False) or "",
"headers": headers,
"body": body,
})
+ "\n"
)
def _log_response(self, flow: http.HTTPFlow) -> None:
headers = {
k: redact_tokens(v, env=os.environ)
for k, v in flow.response.headers.items()
}
body = redact_tokens(flow.response.get_text(strict=False) or "", env=os.environ)
sys.stderr.write(
json.dumps({
"event": "egress_response",
"host": flow.request.pretty_host,
"status": flow.response.status_code,
"headers": dict(flow.response.headers),
"body": flow.response.get_text(strict=False) or "",
"headers": headers,
"body": body,
})
+ "\n"
)
+28 -6
View File
@@ -199,13 +199,10 @@ def _parse_provider_settings(
) -> dict[str, object]:
if raw is None:
return {}
if template != "pi":
raise ManifestError(
f"bottle '{bottle_name}' agent_provider.settings is only "
"supported for template 'pi'"
)
settings = as_json_object(raw, f"bottle '{bottle_name}' agent_provider.settings")
allowed = {
common_allowed = {"startup_args"}
pi_allowed = {
"provider",
"base_url",
"api",
@@ -218,12 +215,37 @@ def _parse_provider_settings(
"supports_developer_role",
"supports_reasoning_effort",
}
if template == "pi":
allowed = common_allowed | pi_allowed
elif template in ("claude", "codex"):
allowed = common_allowed
elif template not in PROVIDER_TEMPLATES:
return dict(settings)
else:
allowed = common_allowed
for key in settings:
if key not in allowed:
raise ManifestError(
f"bottle '{bottle_name}' agent_provider.settings has unknown "
f"key {key!r}; allowed: {', '.join(sorted(allowed))}"
)
startup_args = settings.get("startup_args")
if startup_args is not None:
if not isinstance(startup_args, list):
raise ManifestError(
f"bottle '{bottle_name}' agent_provider.settings.startup_args "
f"must be an array of strings"
)
for i, arg in enumerate(startup_args):
if not isinstance(arg, str) or not arg:
raise ManifestError(
f"bottle '{bottle_name}' agent_provider.settings."
f"startup_args[{i}] must be a non-empty string"
)
if template != "pi":
return dict(settings)
for key in ("provider", "base_url", "api", "api_key", "api_key_env"):
value = settings.get(key)
if value is not None and (not isinstance(value, str) or not value):
@@ -0,0 +1,85 @@
# PRD prd-new: LOG_FULL egress logging credential redaction
- **Status:** Draft
- **Author:** claude
- **Created:** 2026-06-25
- **Issue:** #257
## Summary
The `LOG_FULL` egress logging path (`_log_request` and `_log_response` in `egress_addon.py`) writes request/response headers and bodies to stderr without redaction and includes the sidecar-injected upstream `Authorization` header verbatim. This PR applies `redact_tokens` to header values and bodies in both log functions and strips the injected `Authorization` header from request logs entirely.
## Problem
`LOG_FULL` (log level 2) is intended for debugging egress traffic. When active it calls `_log_request` and `_log_response`. Both functions have two related bugs:
1. **Injected `Authorization` header exposure.** `_log_request` is called *after* the sidecar injects upstream credentials (`flow.request.headers["authorization"] = decision.inject_authorization`). The full header dict — including the live credential — is serialized to stderr. Any log collector that ingests the egress container's stderr will receive the upstream bearer token in plaintext.
2. **Unredacted bodies and header values.** Neither `_log_request` nor `_log_response` passes body or header values through `redact_tokens`. By contrast, `_req_ctx` (used for block/warn events) already calls `redact_tokens` on path and host. Any provisioned secret or recognized token pattern that appears in a request body, response body, or non-Authorization header value will be logged verbatim under `LOG_FULL`.
These two bugs compose: an agent that enables `LOG_FULL` and simultaneously triggers a request that carries a known token gains a write path from credentials → egress logs.
## Goals / Success Criteria
- `_log_request` never logs the `authorization` header in any form.
- `_log_request` applies `redact_tokens(value, env=os.environ)` to every other header value before serializing.
- `_log_request` applies `redact_tokens(body, env=os.environ)` to the request body before logging.
- `_log_response` applies `redact_tokens(value, env=os.environ)` to every response header value before logging.
- `_log_response` applies `redact_tokens(body, env=os.environ)` to the response body before logging.
- Unit tests cover each of the five cases above.
## Non-goals
- Redacting host or path in the full-log path (already covered by `_req_ctx` for block/warn events; `_log_request` already calls `redact_tokens` on host and path).
- Suppressing `LOG_FULL` or adding a new log level.
- Changing the outbound DLP scan logic.
## Design
### `_log_request`
```python
def _log_request(self, flow: http.HTTPFlow) -> None:
headers = {
k: redact_tokens(v, env=os.environ)
for k, v in flow.request.headers.items()
if k.lower() != "authorization"
}
body = redact_tokens(flow.request.get_text(strict=False) or "", env=os.environ)
sys.stderr.write(
json.dumps({
"event": "egress_request",
"host": redact_tokens(flow.request.pretty_host, env=os.environ),
"method": flow.request.method,
"path": redact_tokens(flow.request.path, env=os.environ),
"headers": headers,
"body": body,
})
+ "\n"
)
```
The `authorization` key is excluded because by the time `_log_request` is called the sidecar has already injected the upstream credential (`decision.inject_authorization`). Logging it would write a live bearer token to stderr on every allowed request. There is no safe subset to log — the value is always a live credential or empty.
### `_log_response`
```python
def _log_response(self, flow: http.HTTPFlow) -> None:
headers = {
k: redact_tokens(v, env=os.environ)
for k, v in flow.response.headers.items()
}
body = redact_tokens(flow.response.get_text(strict=False) or "", env=os.environ)
sys.stderr.write(
json.dumps({
"event": "egress_response",
"host": flow.request.pretty_host,
"status": flow.response.status_code,
"headers": headers,
"body": body,
})
+ "\n"
)
```
Response headers don't carry injected credentials, so no header name is suppressed — only the values are scrubbed by `redact_tokens`.
+46
View File
@@ -168,6 +168,34 @@ class TestAgentProviderRuntime(unittest.TestCase):
self.assertEqual("~/.claude/statusline.sh", settings["statusLine"]["command"])
self.assertEqual("custom:bot-bottle-research-ui", settings["theme"])
def test_claude_plan_uses_startup_args_from_provider_settings(self):
with tempfile.TemporaryDirectory(prefix="bb-provider.") as tmp:
plan = build_agent_provision_plan(
template="claude",
dockerfile="",
state_dir=Path(tmp),
instance_name="bot-bottle-test",
prompt_file=Path(tmp) / "prompt.txt",
provider_settings={
"startup_args": ["--model", "opus"],
},
)
self.assertEqual(("--model", "opus"), plan.startup_args)
def test_codex_plan_uses_startup_args_from_provider_settings(self):
with tempfile.TemporaryDirectory(prefix="bb-provider.") as tmp:
plan = build_agent_provision_plan(
template="codex",
dockerfile="",
state_dir=Path(tmp),
instance_name="bot-bottle-test",
prompt_file=Path(tmp) / "prompt.txt",
provider_settings={
"startup_args": ["--model", "gpt-5-codex"],
},
)
self.assertEqual(("--model", "gpt-5-codex"), plan.startup_args)
def test_codex_forward_host_credentials_populates_egress_routes(self):
with tempfile.TemporaryDirectory(prefix="bb-provider.") as tmp:
home = Path(tmp) / "host-codex"
@@ -394,6 +422,24 @@ class TestAgentProviderRuntime(unittest.TestCase):
self.assertNotIn("OPENROUTER_API_KEY", plan.guest_env)
self.assertTrue(provider["compat"]["supportsReasoningEffort"])
def test_pi_plan_appends_startup_args_from_provider_settings(self):
with tempfile.TemporaryDirectory(prefix="bb-provider.") as tmp:
plan = build_agent_provision_plan(
template="pi",
dockerfile="",
state_dir=Path(tmp),
instance_name="bot-bottle-test",
prompt_file=Path(tmp) / "prompt.txt",
provider_settings={
"models": ["qwen3:14b"],
"startup_args": ["--no-stream"],
},
)
self.assertEqual(
("--models", "ollama/qwen3:14b", "--no-stream"),
plan.startup_args,
)
def test_pi_prompt_mode_appends_system_prompt_interactively(self):
self.assertEqual(
["--append-system-prompt", "/home/node/.bot-bottle-prompt.txt"],
@@ -0,0 +1,273 @@
"""Unit: LOG_FULL credential redaction in _log_request / _log_response (issue #257).
egress_addon.py is sidecar-only code that depends on mitmproxy, which is
not installed on the host. This file pre-populates sys.modules with the
minimum mocks needed so EgressAddon can be imported and tested without the
real mitmproxy package."""
from __future__ import annotations
import json
import sys
import types
import unittest
from io import StringIO
from unittest.mock import patch
# ---------------------------------------------------------------------------
# Sidecar-import shims — must run before importing egress_addon
# ---------------------------------------------------------------------------
def _ensure_shims() -> None:
if "mitmproxy" not in sys.modules:
_mm = types.ModuleType("mitmproxy")
_mh = types.ModuleType("mitmproxy.http")
_mm.http = _mh
sys.modules["mitmproxy"] = _mm
sys.modules["mitmproxy.http"] = _mh
if "egress_addon_core" not in sys.modules:
import bot_bottle.egress_addon_core as _core
sys.modules["egress_addon_core"] = _core
_ensure_shims()
from bot_bottle.egress_addon import EgressAddon # noqa: E402 (import after shims)
from bot_bottle.egress_addon_core import Config, LOG_FULL # noqa: E402
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _addon() -> EgressAddon:
"""Return a bare EgressAddon with LOG_FULL config and no routes file."""
a: EgressAddon = EgressAddon.__new__(EgressAddon)
a.config = Config(routes=(), log=LOG_FULL)
a.safe_tokens = set()
a._supervise_queue_dir = ""
a._supervise_slug = ""
a._token_allow_timeout = 300.0
return a
class _Headers:
def __init__(self, d: dict[str, str]) -> None:
self._d = d
def items(self) -> list[tuple[str, str]]:
return list(self._d.items())
class _Request:
def __init__(
self,
host: str = "api.example.com",
method: str = "POST",
path: str = "/v1/messages",
headers: dict[str, str] | None = None,
body: str = "",
) -> None:
self.pretty_host = host
self.method = method
self.path = path
self.headers = _Headers(headers or {})
self._body = body
def get_text(self, *, strict: bool = True) -> str:
return self._body
class _Response:
def __init__(
self,
status_code: int = 200,
headers: dict[str, str] | None = None,
body: str = "",
) -> None:
self.status_code = status_code
self.headers = _Headers(headers or {})
self._body = body
def get_text(self, *, strict: bool = True) -> str:
return self._body
class _Flow:
def __init__(
self,
request: _Request | None = None,
response: _Response | None = None,
) -> None:
self.request = request or _Request()
self.response = response or _Response()
def _log_request(addon: EgressAddon, flow: _Flow) -> dict:
buf = StringIO()
with patch("sys.stderr", buf):
addon._log_request(flow) # type: ignore[arg-type]
return json.loads(buf.getvalue())
def _log_response(addon: EgressAddon, flow: _Flow) -> dict:
buf = StringIO()
with patch("sys.stderr", buf):
addon._log_response(flow) # type: ignore[arg-type]
return json.loads(buf.getvalue())
# ---------------------------------------------------------------------------
# _log_request — authorization header stripped
# ---------------------------------------------------------------------------
class TestLogRequestAuthorizationStripped(unittest.TestCase):
def test_lowercase_authorization_excluded(self) -> None:
addon = _addon()
flow = _Flow(request=_Request(headers={"authorization": "Bearer sk-real-secret"}))
entry = _log_request(addon, flow)
self.assertNotIn("authorization", entry["headers"])
def test_titlecase_authorization_excluded(self) -> None:
addon = _addon()
flow = _Flow(request=_Request(headers={"Authorization": "Bearer sk-real-secret"}))
entry = _log_request(addon, flow)
self.assertNotIn("Authorization", entry["headers"])
self.assertNotIn("authorization", entry["headers"])
def test_non_auth_headers_retained(self) -> None:
addon = _addon()
flow = _Flow(request=_Request(headers={
"authorization": "Bearer sk-real-secret",
"content-type": "application/json",
}))
entry = _log_request(addon, flow)
self.assertIn("content-type", entry["headers"])
self.assertEqual("application/json", entry["headers"]["content-type"])
def test_no_authorization_header_logs_all_others(self) -> None:
addon = _addon()
flow = _Flow(request=_Request(headers={"x-request-id": "abc"}))
entry = _log_request(addon, flow)
self.assertEqual({"x-request-id": "abc"}, entry["headers"])
# ---------------------------------------------------------------------------
# _log_request — body redaction
# ---------------------------------------------------------------------------
_OPENAI_KEY = "sk-" + "A" * 48
class TestLogRequestBodyRedacted(unittest.TestCase):
def test_token_pattern_in_body_scrubbed(self) -> None:
addon = _addon()
flow = _Flow(request=_Request(body=f"key={_OPENAI_KEY}"))
entry = _log_request(addon, flow)
self.assertNotIn(_OPENAI_KEY, entry["body"])
self.assertIn("********", entry["body"])
def test_provisioned_secret_in_body_scrubbed(self) -> None:
addon = _addon()
secret = "provisioned-egress-secret-xyz"
flow = _Flow(request=_Request(body=f"token={secret}"))
with patch.dict("os.environ", {"EGRESS_TOKEN_0": secret}):
entry = _log_request(addon, flow)
self.assertNotIn(secret, entry["body"])
self.assertIn("********", entry["body"])
def test_clean_body_preserved(self) -> None:
addon = _addon()
payload = '{"model": "claude-3", "max_tokens": 1024}'
flow = _Flow(request=_Request(body=payload))
entry = _log_request(addon, flow)
self.assertEqual(payload, entry["body"])
# ---------------------------------------------------------------------------
# _log_request — non-authorization header value redaction
# ---------------------------------------------------------------------------
class TestLogRequestHeaderValuesRedacted(unittest.TestCase):
def test_token_in_custom_header_scrubbed(self) -> None:
addon = _addon()
flow = _Flow(request=_Request(headers={"x-api-key": _OPENAI_KEY}))
entry = _log_request(addon, flow)
self.assertNotIn(_OPENAI_KEY, entry["headers"].get("x-api-key", ""))
self.assertIn("********", entry["headers"].get("x-api-key", ""))
def test_clean_header_value_preserved(self) -> None:
addon = _addon()
flow = _Flow(request=_Request(headers={"accept": "application/json"}))
entry = _log_request(addon, flow)
self.assertEqual("application/json", entry["headers"]["accept"])
# ---------------------------------------------------------------------------
# _log_response — body redaction
# ---------------------------------------------------------------------------
class TestLogResponseBodyRedacted(unittest.TestCase):
def test_token_pattern_in_response_body_scrubbed(self) -> None:
addon = _addon()
flow = _Flow(
request=_Request(),
response=_Response(body=f'{{"key": "{_OPENAI_KEY}"}}'),
)
entry = _log_response(addon, flow)
self.assertNotIn(_OPENAI_KEY, entry["body"])
self.assertIn("********", entry["body"])
def test_provisioned_secret_in_response_body_scrubbed(self) -> None:
addon = _addon()
secret = "provisioned-egress-secret-xyz"
flow = _Flow(
request=_Request(),
response=_Response(body=f'{{"token": "{secret}"}}'),
)
with patch.dict("os.environ", {"EGRESS_TOKEN_0": secret}):
entry = _log_response(addon, flow)
self.assertNotIn(secret, entry["body"])
self.assertIn("********", entry["body"])
def test_clean_response_body_preserved(self) -> None:
addon = _addon()
flow = _Flow(request=_Request(), response=_Response(body='{"result": "ok"}'))
entry = _log_response(addon, flow)
self.assertEqual('{"result": "ok"}', entry["body"])
# ---------------------------------------------------------------------------
# _log_response — response header value redaction
# ---------------------------------------------------------------------------
class TestLogResponseHeaderValuesRedacted(unittest.TestCase):
def test_token_in_response_header_scrubbed(self) -> None:
addon = _addon()
flow = _Flow(
request=_Request(),
response=_Response(headers={"set-cookie": f"token={_OPENAI_KEY}"}),
)
entry = _log_response(addon, flow)
cookie_val = entry["headers"].get("set-cookie", "")
self.assertNotIn(_OPENAI_KEY, cookie_val)
self.assertIn("********", cookie_val)
def test_clean_response_header_preserved(self) -> None:
addon = _addon()
flow = _Flow(
request=_Request(),
response=_Response(headers={"content-type": "application/json"}),
)
entry = _log_response(addon, flow)
self.assertEqual("application/json", entry["headers"]["content-type"])
if __name__ == "__main__":
unittest.main()
+28 -1
View File
@@ -167,13 +167,40 @@ class TestAgentProviderHostCredentials(unittest.TestCase):
},
})
def test_settings_rejected_for_claude(self):
def test_startup_args_allowed_for_claude(self):
b = _provider_config_bottle({
"template": "claude",
"settings": {"startup_args": ["--model", "opus"]},
})
self.assertEqual(
{"startup_args": ["--model", "opus"]},
b.agent_provider.settings,
)
def test_startup_args_allowed_for_codex(self):
b = _provider_config_bottle({
"template": "codex",
"settings": {"startup_args": ["--model", "gpt-5-codex"]},
})
self.assertEqual(
{"startup_args": ["--model", "gpt-5-codex"]},
b.agent_provider.settings,
)
def test_provider_specific_settings_still_rejected_for_claude(self):
with self.assertRaises(ManifestError):
_provider_config_bottle({
"template": "claude",
"settings": {"models": ["qwen2.5-coder:7b"]},
})
def test_startup_args_must_be_string_array(self):
with self.assertRaises(ManifestError):
_provider_config_bottle({
"template": "codex",
"settings": {"startup_args": ["--model", 42]},
})
def test_settings_models_must_be_non_empty_string_array(self):
with self.assertRaises(ManifestError):
_provider_config_bottle({