2026-06-02 04:14:31 -04:00
3 changed files with 236 additions and 31 deletions
@@ -187,8 +187,10 @@ def _redact_claims(value: object) -> object:
                out[key] = inner if isinstance(inner, list) else []
            elif isinstance(inner, bool):
                out[key] = inner
-            elif isinstance(inner, (dict, list)):
-                out[key] = _redact_claims(inner)
+            elif isinstance(inner, dict):
+                out[key] = {}
+            elif isinstance(inner, list):
+                out[key] = []
            else:
                out[key] = "bot-bottle-placeholder"
        return out
@@ -237,28 +239,49 @@ def _redact_auth_claim(value: object) -> dict:
 def _redact_codex_auth(
    value: object, *, now: datetime | None = None, exp_ts: int | None = None,
 ) -> object:
+    auth = value if isinstance(value, dict) else {}
+    out: dict[str, object] = {}
+    for key, inner in auth.items():
+        lower = key.lower()
+        if lower == "auth_mode" and isinstance(inner, str) and inner:
+            out[key] = inner
+        elif lower == "openai_api_key":
+            out[key] = None
+        elif lower == "tokens":
+            out[key] = _redact_token_block(inner, now=now, exp_ts=exp_ts)
+        else:
+            out[key] = _redact_unknown_auth_value(inner)
+    return out
+
+
+def _redact_token_block(
+    value: object, *, now: datetime | None = None, exp_ts: int | None = None,
+) -> dict[str, object]:
+    tokens = value if isinstance(value, dict) else {}
+    out: dict[str, object] = {}
+    for key, inner in tokens.items():
+        lower = key.lower()
+        if lower in {"access_token", "id_token"}:
+            out[key] = _dummy_jwt_from_host(inner, now=now, exp_ts=exp_ts)
+        elif lower == "account_id" and isinstance(inner, str) and inner:
+            # Current Codex uses this non-secret selected account id
+            # while egress owns the real bearer token.
+            out[key] = inner
+        else:
+            out[key] = _redact_unknown_auth_value(inner)
+    return out
+
+
+def _redact_unknown_auth_value(value: object) -> object:
+    if isinstance(value, bool):
+        return value
    if isinstance(value, dict):
-        out: dict[str, object] = {}
-        for key, inner in value.items():
-            lower = key.lower()
-            if lower == "openai_api_key":
-                out[key] = None
-            elif lower == "tokens":
-                out[key] = _redact_codex_auth(inner, now=now, exp_ts=exp_ts)
-            elif lower in {"access_token", "id_token"}:
-                out[key] = _dummy_jwt_from_host(inner, now=now, exp_ts=exp_ts)
-            elif "token" in lower or "secret" in lower or lower.endswith("_key"):
-                out[key] = "bot-bottle-placeholder"
-            elif lower == "account_id" and isinstance(inner, str) and inner:
-                out[key] = inner
-            elif lower in {"account_id", "user_id", "email"}:
-                out[key] = "bot-bottle-placeholder"
-            else:
-                out[key] = _redact_codex_auth(inner, now=now, exp_ts=exp_ts)
-        return out
+        return {}
    if isinstance(value, list):
-        return [_redact_codex_auth(v, now=now, exp_ts=exp_ts) for v in value]
-    return value
+        return []
+    if value is None:
+        return None
+    return "bot-bottle-placeholder"


 def _jwt_exp(token: str) -> datetime | None:
@@ -0,0 +1,111 @@
+# PRD 0036: Codex Auth Redaction Policy
+
+- **Status:** Active
+- **Author:** didericis-codex
+- **Created:** 2026-06-02
+- **Issue:** #129
+
+## Summary
+
+Make Codex host-auth redaction explicit and fixture-driven so dummy
+`auth.json` generation cannot accidentally preserve future sensitive fields.
+Keep forwarding only the short-lived host access token through egress, while the
+guest receives a non-secret auth file whose schema remains useful to Codex.
+
+## Problem
+
+`bot_bottle/codex_auth.py` reads the host Codex auth file, extracts the access
+token for egress, and writes a dummy guest `auth.json`. The code redacts JWT
+claims and auth JSON fields with a mix of schema-specific handling and generic
+placeholder behavior.
+
+That is safer than copying raw auth, but it is still coverage-sensitive. If
+Codex adds a new field that carries a token, session identifier, refresh secret,
+or account metadata and the field name does not match current heuristics, the
+dummy auth file could preserve more information than intended. Because this is
+credential-adjacent code, the desired behavior should be allowlist-oriented and
+backed by explicit fixtures.
+
+## Goals / Success Criteria
+
+- Define a durable redaction policy for Codex `auth.json`:
+  - host access token is read for egress only.
+  - guest dummy auth contains no bearer, refresh, session, or secret values.
+  - selected non-secret fields may be preserved only when needed by Codex.
+- Prefer explicit per-field preservation over broad heuristic pass-through.
+- Add representative fixture tests for current Codex auth shapes.
+- Add regression tests for unknown nested fields, sensitive-looking field names,
+  lists, dictionaries, and JWT custom claims.
+- Preserve dummy token expiration alignment with the host access token.
+- Keep existing errors for missing, invalid, non-device, or expired auth.
+
+## Non-goals
+
+- No change to the egress credential-forwarding contract.
+- No attempt to refresh Codex tokens inside the bottle.
+- No copying of refresh tokens or raw host auth into the guest.
+- No dependency on a Codex SDK or external schema package.
+- No user-facing CLI changes.
+
+## Scope
+
+In scope:
+
+- `bot_bottle/codex_auth.py` redaction helpers.
+- Unit tests in `tests/unit/test_codex_auth.py`.
+- Small documentation comments that distinguish preserved non-secret fields from
+  redacted credential material.
+
+Out of scope:
+
+- Provider provisioning outside Codex auth file generation.
+- Egress route construction for Codex.
+- Runtime calls to Codex/OpenAI services.
+
+## Design
+
+Treat the dummy guest `auth.json` as a deliberately synthesized compatibility
+file, not as a redacted copy of the host file. The implementation may continue
+to start from the host object for convenience, but preserved fields should be
+controlled by explicit allowlists at known schema locations.
+
+At the top level, preserve only `auth_mode`, replace `OPENAI_API_KEY` /
+`openai_api_key` with `null`, and synthesize the `tokens` block. Unknown scalar
+top-level fields become placeholders, unknown lists become empty lists, and
+unknown dictionaries become empty objects.
+
+In token blocks, replace `access_token` and `id_token` with dummy JWTs, preserve
+the selected non-secret `account_id`, and redact every other token-block field
+with the same placeholder / empty container policy. Refresh, session, and future
+token values are never copied to the guest.
+
+In JWT payloads, preserve only claims that are known to be non-secret and
+required for Codex behavior. Unknown scalar claims become placeholders, unknown
+lists become empty lists, and unknown objects become empty objects.
+
+For the OpenAI auth claim, preserve only currently necessary non-secret values
+such as plan type, selected account id, and boolean localhost state. Everything
+else is placeholder, empty object, or empty list according to the policy.
+
+Tests should use fixture auth objects that include both current expected fields
+and intentionally hostile future-looking fields such as `session_context`,
+`bearer`, `refreshSecret`, nested `token_value`, and opaque arrays. The dummy
+output must not contain the original secret strings.
+
+## Testing Strategy
+
+- Existing `tests/unit/test_codex_auth.py` should continue to pass.
+- Add tests that assert original access/refresh/session strings do not appear in
+  `codex_dummy_auth_json`.
+- Add tests for nested JWT and auth-claim redaction behavior.
+- Add tests that the dummy access/id token `exp` still matches the host access
+  token expiry.
+
+Run:
+
+- `python3 -m unittest tests.unit.test_codex_auth`
+- `python3 -m unittest discover -s tests/unit`
+
+## Open Questions
+
+None.
@@ -18,10 +18,14 @@ from bot_bottle.log import Die


 def _jwt(exp: int) -> str:
+    return _jwt_with_payload({"exp": exp})
+
+
+def _jwt_with_payload(payload: dict) -> str:
    def enc(obj: dict) -> str:
        raw = json.dumps(obj, separators=(",", ":")).encode()
        return base64.urlsafe_b64encode(raw).decode().rstrip("=")
-    return f"{enc({'alg': 'none'})}.{enc({'exp': exp})}.sig"
+    return f"{enc({'alg': 'none'})}.{enc(payload)}.sig"


 def _jwt_payload(token: str) -> dict:
@@ -154,16 +158,10 @@ class TestCodexHostAccessToken(unittest.TestCase):
        )

    def test_dummy_auth_keeps_required_account_claim_shape(self):
-        def jwt(payload: dict) -> str:
-            def enc(obj: dict) -> str:
-                raw = json.dumps(obj, separators=(",", ":")).encode()
-                return base64.urlsafe_b64encode(raw).decode().rstrip("=")
-            return f"{enc({'alg': 'none'})}.{enc(payload)}.sig"
-
        self._write({
            "auth_mode": "chatgpt",
            "tokens": {
-                "access_token": jwt({
+                "access_token": _jwt_with_payload({
                    "exp": 2000000000,
                    "https://api.openai.com/auth": {
                        "chatgpt_plan_type": "plus",
@@ -177,7 +175,7 @@ class TestCodexHostAccessToken(unittest.TestCase):
                        "email_verified": True,
                    },
                }),
-                "id_token": jwt({
+                "id_token": _jwt_with_payload({
                    "exp": 2000000000,
                    "email": "real@example.invalid",
                    "email_verified": True,
@@ -202,6 +200,79 @@ class TestCodexHostAccessToken(unittest.TestCase):
        self.assertEqual("bot-bottle@example.invalid", profile["email"])
        self.assertTrue(profile["email_verified"])

+    def test_dummy_auth_redacts_unknown_future_auth_fields(self):
+        secrets = [
+            "top-session-secret",
+            "top-nested-secret",
+            "refresh-secret",
+            "session-token-secret",
+            "jwt-custom-secret",
+            "jwt-nested-secret",
+            "jwt-list-secret",
+            "id-token-secret",
+            "auth-claim-secret",
+            "auth-claim-nested-secret",
+            "top-list-secret",
+            "token-nested-secret",
+            "token-list-secret",
+        ]
+        self._write({
+            "auth_mode": "chatgpt",
+            "session_context": "top-session-secret",
+            "future_nested": {"value": "top-nested-secret"},
+            "future_list": ["top-list-secret"],
+            "tokens": {
+                "access_token": _jwt_with_payload({
+                    "exp": 2000000000,
+                    "custom_session": "jwt-custom-secret",
+                    "future_nested": {"value": "jwt-nested-secret"},
+                    "future_list": ["jwt-list-secret"],
+                    "https://api.openai.com/auth": {
+                        "chatgpt_plan_type": "plus",
+                        "chatgpt_account_id": "acct-real",
+                        "session_context": "auth-claim-secret",
+                        "nested": {"value": "auth-claim-nested-secret"},
+                    },
+                }),
+                "id_token": _jwt_with_payload({
+                    "exp": 2000000000,
+                    "opaque": "id-token-secret",
+                }),
+                "refresh_token": "refresh-secret",
+                "session_token": "session-token-secret",
+                "future_object": {"value": "token-nested-secret"},
+                "future_list": ["token-list-secret"],
+                "account_id": "acct-host",
+            },
+        })
+
+        dummy_json = codex_dummy_auth_json(
+            {"CODEX_HOME": str(self.home)},
+            now=datetime(2026, 1, 1, tzinfo=timezone.utc),
+        )
+        for secret in secrets:
+            self.assertNotIn(secret, dummy_json)
+
+        dummy = json.loads(dummy_json)
+        self.assertEqual("bot-bottle-placeholder", dummy["session_context"])
+        self.assertEqual({}, dummy["future_nested"])
+        self.assertEqual([], dummy["future_list"])
+        self.assertEqual("bot-bottle-placeholder", dummy["tokens"]["refresh_token"])
+        self.assertEqual("bot-bottle-placeholder", dummy["tokens"]["session_token"])
+        self.assertEqual({}, dummy["tokens"]["future_object"])
+        self.assertEqual([], dummy["tokens"]["future_list"])
+
+        access_payload = _jwt_payload(dummy["tokens"]["access_token"])
+        self.assertEqual(
+            "bot-bottle-placeholder",
+            access_payload["custom_session"],
+        )
+        self.assertEqual({}, access_payload["future_nested"])
+        self.assertEqual([], access_payload["future_list"])
+        auth = access_payload["https://api.openai.com/auth"]
+        self.assertEqual("bot-bottle-placeholder", auth["session_context"])
+        self.assertEqual({}, auth["nested"])
+

 if __name__ == "__main__":
    unittest.main()