docs(prd): add codex auth redaction policy

complete(prd): mark PRD 0035 active
fix(supervise): bound response waits
2026-06-02 04:09:18 -04:00 · 2026-06-02 08:06:53 +00:00 · 2026-06-02 08:06:45 +00:00 · 2026-06-02 07:58:39 +00:00
4 changed files with 339 additions and 4 deletions
@@ -35,6 +35,7 @@ import json
 import os
 import socketserver
 import sys
+import time
 import typing
 import urllib.error
 import urllib.parse
@@ -63,6 +64,10 @@ ERR_METHOD_NOT_FOUND = -32601
 ERR_INVALID_PARAMS = -32602
 ERR_INTERNAL = -32603

+DEFAULT_RESPONSE_TIMEOUT_SECONDS = 30.0
+MIN_RESPONSE_POLL_INTERVAL_SECONDS = 0.05
+EGRESS_LIST_TIMEOUT_SECONDS = 5.0
+

@dataclass(frozen=True)
 class JsonRpcRequest:
@@ -412,6 +417,7 @@ def _validate_and_bundle_egress_route(
 class ServerConfig:
    bottle_slug: str
    queue_dir: Path
+    response_timeout_seconds: float = DEFAULT_RESPONSE_TIMEOUT_SECONDS


 def handle_initialize(_params: dict[str, object]) -> dict[str, object]:
@@ -442,7 +448,7 @@ def handle_list_egress_routes(
    })
    opener = urllib.request.build_opener(proxy_handler)
    try:
-        with opener.open(_sv.EGRESS_INTROSPECT_URL, timeout=5) as resp:
+        with opener.open(_sv.EGRESS_INTROSPECT_URL, timeout=EGRESS_LIST_TIMEOUT_SECONDS) as resp:
            body = resp.read().decode("utf-8")
    except (urllib.error.URLError, OSError) as e:
        return {
@@ -520,7 +526,20 @@ def handle_tools_call(
        f"for bottle {config.bottle_slug}; waiting for operator...\n"
    )
    sys.stderr.flush()
-    response = _sv.wait_for_response(config.queue_dir, proposal.id)
+    deadline = time.monotonic() + config.response_timeout_seconds
+    try:
+        response = _sv.wait_for_response(
+            config.queue_dir,
+            proposal.id,
+            poll_interval=MIN_RESPONSE_POLL_INTERVAL_SECONDS,
+            deadline=deadline,
+        )
+    except TimeoutError:
+        text = format_pending_response_text(config.response_timeout_seconds)
+        return {
+            "content": [{"type": "text", "text": text}],
+            "isError": False,
+        }
    _sv.archive_proposal(config.queue_dir, proposal.id)

    text = format_response_text(response)
@@ -542,6 +561,16 @@ def format_response_text(response: "_sv.Response") -> str:
    return "\n".join(lines)


+def format_pending_response_text(timeout_seconds: float) -> str:
+    return "\n".join([
+        "status: pending",
+        (
+            "notes: operator response timed out after "
+            f"{timeout_seconds:g}s; proposal remains queued"
+        ),
+    ])
+
+
 # --- HTTP transport --------------------------------------------------------


@@ -654,10 +683,15 @@ def serve(
    queue_dir: Path,
    port: int = _sv.SUPERVISE_PORT,
    bind: str = "0.0.0.0",
+    response_timeout_seconds: float = DEFAULT_RESPONSE_TIMEOUT_SECONDS,
 ) -> typing.NoReturn:
    queue_dir.mkdir(parents=True, exist_ok=True)
    server = MCPServer((bind, port), MCPHandler)
-    server.config = ServerConfig(bottle_slug=bottle_slug, queue_dir=queue_dir)
+    server.config = ServerConfig(
+        bottle_slug=bottle_slug,
+        queue_dir=queue_dir,
+        response_timeout_seconds=response_timeout_seconds,
+    )
    sys.stderr.write(
        f"supervise listening on {bind}:{port}; "
        f"slug={bottle_slug!r}; queue={queue_dir}; "
@@ -682,9 +716,37 @@ def main(argv: list[str]) -> int:
    queue_dir = Path(os.environ.get("SUPERVISE_QUEUE_DIR", _sv.QUEUE_DIR_IN_CONTAINER))
    port = int(os.environ.get("SUPERVISE_PORT", str(_sv.SUPERVISE_PORT)))
    bind = os.environ.get("SUPERVISE_BIND", "0.0.0.0")
-    serve(bottle_slug=bottle_slug, queue_dir=queue_dir, port=port, bind=bind)
+    try:
+        response_timeout_seconds = _response_timeout_from_env(os.environ)
+    except ValueError as e:
+        sys.stderr.write(f"supervise: {e}\n")
+        return 2
+    serve(
+        bottle_slug=bottle_slug,
+        queue_dir=queue_dir,
+        port=port,
+        bind=bind,
+        response_timeout_seconds=response_timeout_seconds,
+    )
    return 0  # serve() does not return


+def _response_timeout_from_env(env: typing.Mapping[str, str]) -> float:
+    raw = env.get("SUPERVISE_RESPONSE_TIMEOUT_SECONDS", "").strip()
+    if not raw:
+        return DEFAULT_RESPONSE_TIMEOUT_SECONDS
+    try:
+        value = float(raw)
+    except ValueError as e:
+        raise ValueError(
+            "SUPERVISE_RESPONSE_TIMEOUT_SECONDS must be a positive number"
+        ) from e
+    if value <= 0:
+        raise ValueError(
+            "SUPERVISE_RESPONSE_TIMEOUT_SECONDS must be a positive number"
+        )
+    return value
+
+
 if __name__ == "__main__":
    raise SystemExit(main(sys.argv))
@@ -0,0 +1,99 @@
+# PRD 0035: Supervise Wait Bounds
+
+- **Status:** Active
+- **Author:** didericis-codex
+- **Created:** 2026-06-02
+- **Issue:** #128
+
+## Summary
+
+Bound the supervise sidecar's request-thread waits so an agent tool call cannot
+hold an HTTP worker forever while waiting for operator action. Preserve the MCP
+tool surface, but make timeout behavior explicit, observable, and tested.
+
+## Problem
+
+`bot_bottle/supervise_server.py` handles MCP over a threaded stdlib HTTP
+server. Tool calls validate a proposal, write it to the supervise queue, and
+then wait for the operator response file. Today that wait can last forever.
+Each outstanding tool call consumes one server thread until the operator acts.
+
+The route-listing helper also performs a live HTTP request to egress inside the
+request thread. It has a short timeout today, but the behavior is not described
+as part of a broader request-thread budget.
+
+This is operationally risky in multi-agent or repeated-call scenarios: a stuck
+or ignored proposal can accumulate blocked threads, and callers do not get a
+clear "still pending" answer they can reason about.
+
+## Goals / Success Criteria
+
+- Add a bounded wait for operator responses to supervise tool calls.
+- Return a clear JSON-RPC tool result when the proposal remains pending after
+  the timeout; do not treat pending operator action as an internal server error.
+- Keep the queued proposal on disk after timeout so the operator can still act.
+- Make the wait duration configurable by environment with a conservative
+  default.
+- Preserve current success and rejection result shapes for completed operator
+  responses.
+- Keep `list-egress-routes` bounded and document its timeout behavior.
+- Add focused tests for approved responses, timed-out pending responses, and
+  route-list timeout/error handling.
+
+## Non-goals
+
+- No asynchronous framework or new runtime dependency.
+- No replacement of the stdlib threaded HTTP server.
+- No change to the host-side supervise queue format.
+- No cancellation protocol between the agent and operator UI.
+- No dashboard or TUI changes.
+
+## Scope
+
+In scope:
+
+- `bot_bottle/supervise_server.py` request handling.
+- Any small helper in `bot_bottle/supervise.py` needed to support a bounded
+  wait cleanly.
+- Unit tests around tool-call response waiting and route-list behavior.
+
+Out of scope:
+
+- Reworking proposal persistence.
+- Changing egress apply or pipelock apply flows.
+- Adding background workers to complete HTTP requests after the client returns.
+
+## Design
+
+Introduce a supervise response wait budget,
+`SUPERVISE_RESPONSE_TIMEOUT_SECONDS`, with a 30 second default. The existing
+poll loop should stop after that budget and return a normal tool result such as
+`{"status": "pending", "notes": "operator response timed out; proposal remains queued"}`.
+The exact field names should fit the existing response schema so agents can
+handle success, rejection, and pending with one result parser.
+
+The proposal file must remain in the queue when the HTTP call times out. The
+operator can still approve or reject it later, but that later response will not
+resume the original HTTP request.
+
+Route-listing should continue to use a short HTTP timeout to egress. Errors
+should be returned as tool results or JSON-RPC errors consistently with the
+existing server behavior; the implementation should avoid an unbounded socket
+wait in the request thread.
+
+## Testing Strategy
+
+- Unit-test a tool call whose response appears before the timeout.
+- Unit-test a tool call whose response never appears and assert the request
+  returns a pending result while the proposal remains queued.
+- Unit-test invalid timeout env values fall back or fail clearly.
+- Unit-test `list-egress-routes` timeout/error behavior with a fake URL opener.
+
+Run:
+
+- `python3 -m unittest tests.unit.test_supervise_server`
+- `python3 -m unittest discover -s tests/unit`
+
+## Open Questions
+
+None.
@@ -0,0 +1,107 @@
+# PRD 0036: Codex Auth Redaction Policy
+
+- **Status:** Draft
+- **Author:** didericis-codex
+- **Created:** 2026-06-02
+- **Issue:** #129
+
+## Summary
+
+Make Codex host-auth redaction explicit and fixture-driven so dummy
+`auth.json` generation cannot accidentally preserve future sensitive fields.
+Keep forwarding only the short-lived host access token through egress, while the
+guest receives a non-secret auth file whose schema remains useful to Codex.
+
+## Problem
+
+`bot_bottle/codex_auth.py` reads the host Codex auth file, extracts the access
+token for egress, and writes a dummy guest `auth.json`. The code redacts JWT
+claims and auth JSON fields with a mix of schema-specific handling and generic
+placeholder behavior.
+
+That is safer than copying raw auth, but it is still coverage-sensitive. If
+Codex adds a new field that carries a token, session identifier, refresh secret,
+or account metadata and the field name does not match current heuristics, the
+dummy auth file could preserve more information than intended. Because this is
+credential-adjacent code, the desired behavior should be allowlist-oriented and
+backed by explicit fixtures.
+
+## Goals / Success Criteria
+
+- Define a durable redaction policy for Codex `auth.json`:
+  - host access token is read for egress only.
+  - guest dummy auth contains no bearer, refresh, session, or secret values.
+  - selected non-secret fields may be preserved only when needed by Codex.
+- Prefer explicit per-field preservation over broad heuristic pass-through.
+- Add representative fixture tests for current Codex auth shapes.
+- Add regression tests for unknown nested fields, sensitive-looking field names,
+  lists, dictionaries, and JWT custom claims.
+- Preserve dummy token expiration alignment with the host access token.
+- Keep existing errors for missing, invalid, non-device, or expired auth.
+
+## Non-goals
+
+- No change to the egress credential-forwarding contract.
+- No attempt to refresh Codex tokens inside the bottle.
+- No copying of refresh tokens or raw host auth into the guest.
+- No dependency on a Codex SDK or external schema package.
+- No user-facing CLI changes.
+
+## Scope
+
+In scope:
+
+- `bot_bottle/codex_auth.py` redaction helpers.
+- Unit tests in `tests/unit/test_codex_auth.py`.
+- Small documentation comments that distinguish preserved non-secret fields from
+  redacted credential material.
+
+Out of scope:
+
+- Provider provisioning outside Codex auth file generation.
+- Egress route construction for Codex.
+- Runtime calls to Codex/OpenAI services.
+
+## Design
+
+Treat the dummy guest `auth.json` as a deliberately synthesized compatibility
+file, not as a redacted copy of the host file. The implementation may continue
+to start from the host object for convenience, but preserved fields should be
+controlled by explicit allowlists at known schema locations.
+
+At the top level, preserve only fields required to keep Codex in the same auth
+branch. In token blocks, replace access, ID, and refresh-like token values with
+dummy values. In JWT payloads, preserve only claims that are known to be
+non-secret and required for Codex behavior; unknown scalar claims should become
+placeholders, unknown lists should become empty lists, and unknown objects
+should recurse or become empty objects according to the local policy.
+
+For the OpenAI auth claim, preserve only currently necessary non-secret values
+such as plan type and selected account id. Everything else should be
+placeholder, empty object, empty list, or omitted according to the policy. The
+policy should be easy to audit from constants or named helper functions.
+
+Tests should use fixture auth objects that include both current expected fields
+and intentionally hostile future-looking fields such as `session_context`,
+`bearer`, `refreshSecret`, nested `token_value`, and opaque arrays. The dummy
+output must not contain the original secret strings.
+
+## Testing Strategy
+
+- Existing `tests/unit/test_codex_auth.py` should continue to pass.
+- Add tests that assert original access/refresh/session strings do not appear in
+  `codex_dummy_auth_json`.
+- Add tests for nested JWT and auth-claim redaction behavior.
+- Add tests that the dummy access/id token `exp` still matches the host access
+  token expiry.
+
+Run:
+
+- `python3 -m unittest tests.unit.test_codex_auth`
+- `python3 -m unittest discover -s tests/unit`
+
+## Open Questions
+
+- Which Codex auth fields are strictly required for the guest CLI to stay in
+  the device-auth branch? If a field is not demonstrably required, the default
+  should be to redact or omit it.
@@ -8,6 +8,7 @@ import threading
 import time
 import unittest
 from pathlib import Path
+from unittest.mock import patch


 # The server module loads `supervise` via same-directory import inside
@@ -29,8 +30,10 @@ from bot_bottle.supervise_server import (
    ServerConfig,
    TOOL_DEFINITIONS,
    _RpcError,
+    _response_timeout_from_env,
    format_response_text,
    handle_initialize,
+    handle_list_egress_routes,
    handle_tools_call,
    handle_tools_list,
    jsonrpc_error,
@@ -300,6 +303,70 @@ class TestHandleToolsCall(unittest.TestCase):
        processed = list((self.queue_dir / "processed").glob("*.json"))
        self.assertEqual(2, len(processed))

+    def test_pending_response_times_out_without_archive(self):
+        config = ServerConfig(
+            bottle_slug="dev",
+            queue_dir=self.queue_dir,
+            response_timeout_seconds=0.05,
+        )
+        result = handle_tools_call(
+            {
+                "name": _sv.TOOL_EGRESS_BLOCK,
+                "arguments": {
+                    "host": "example.com",
+                    "justification": "need a route",
+                },
+            },
+            config,
+        )
+
+        self.assertFalse(result["isError"])  # type: ignore[index]
+        text = result["content"][0]["text"]  # type: ignore[index]
+        self.assertIn("status: pending", text)
+        self.assertIn("proposal remains queued", text)
+        self.assertEqual(1, len(_sv.list_pending_proposals(self.queue_dir)))
+        self.assertFalse((self.queue_dir / "processed").exists())
+
+
+class TestHandleListEgressRoutes(unittest.TestCase):
+    def test_url_error_returns_tool_error(self):
+        class _Opener:
+            def open(self, *args, **kwargs):  # noqa: ANN001, ANN002, ANN003
+                raise OSError("egress unavailable")
+
+        with patch.object(supervise_server.urllib.request, "build_opener", return_value=_Opener()):
+            result = handle_list_egress_routes(
+                {},
+                ServerConfig(bottle_slug="dev", queue_dir=Path("/unused")),
+            )
+
+        self.assertTrue(result["isError"])  # type: ignore[index]
+        text = result["content"][0]["text"]  # type: ignore[index]
+        self.assertIn("could not reach", text)
+        self.assertIn("egress unavailable", text)
+
+
+class TestResponseTimeoutEnv(unittest.TestCase):
+    def test_unset_uses_default(self):
+        self.assertEqual(
+            supervise_server.DEFAULT_RESPONSE_TIMEOUT_SECONDS,
+            _response_timeout_from_env({}),
+        )
+
+    def test_positive_float_accepted(self):
+        self.assertEqual(
+            12.5,
+            _response_timeout_from_env({"SUPERVISE_RESPONSE_TIMEOUT_SECONDS": "12.5"}),
+        )
+
+    def test_invalid_value_rejected(self):
+        with self.assertRaises(ValueError):
+            _response_timeout_from_env({"SUPERVISE_RESPONSE_TIMEOUT_SECONDS": "soon"})
+
+    def test_nonpositive_value_rejected(self):
+        with self.assertRaises(ValueError):
+            _response_timeout_from_env({"SUPERVISE_RESPONSE_TIMEOUT_SECONDS": "0"})
+

 # --- Response text formatting ---------------------------------------------
Author	SHA1	Message	Date
didericis-codex	2247d730cd	docs(prd): add codex auth redaction policy test / unit (pull_request) Successful in 35s Details test / integration (pull_request) Successful in 42s Details	2026-06-02 04:09:18 -04:00
didericis-codex	3472e06efb	complete(prd): mark PRD 0035 active test / integration (pull_request) Successful in 1m4s Details test / unit (pull_request) Successful in 45s Details test / unit (push) Successful in 36s Details test / integration (push) Successful in 46s Details	2026-06-02 08:06:53 +00:00
didericis-codex	82ce5d3034	fix(supervise): bound response waits	2026-06-02 08:06:45 +00:00
didericis-codex	7c260eeff9	docs(prd): add supervise wait bounds test / unit (pull_request) Successful in 36s Details test / integration (pull_request) Successful in 54s Details	2026-06-02 07:58:39 +00:00