diff --git a/bot_bottle/codex_auth.py b/bot_bottle/codex_auth.py index df5e03a..73e9d1c 100644 --- a/bot_bottle/codex_auth.py +++ b/bot_bottle/codex_auth.py @@ -187,8 +187,10 @@ def _redact_claims(value: object) -> object: out[key] = inner if isinstance(inner, list) else [] elif isinstance(inner, bool): out[key] = inner - elif isinstance(inner, (dict, list)): - out[key] = _redact_claims(inner) + elif isinstance(inner, dict): + out[key] = {} + elif isinstance(inner, list): + out[key] = [] else: out[key] = "bot-bottle-placeholder" return out @@ -237,28 +239,49 @@ def _redact_auth_claim(value: object) -> dict: def _redact_codex_auth( value: object, *, now: datetime | None = None, exp_ts: int | None = None, ) -> object: + auth = value if isinstance(value, dict) else {} + out: dict[str, object] = {} + for key, inner in auth.items(): + lower = key.lower() + if lower == "auth_mode" and isinstance(inner, str) and inner: + out[key] = inner + elif lower == "openai_api_key": + out[key] = None + elif lower == "tokens": + out[key] = _redact_token_block(inner, now=now, exp_ts=exp_ts) + else: + out[key] = _redact_unknown_auth_value(inner) + return out + + +def _redact_token_block( + value: object, *, now: datetime | None = None, exp_ts: int | None = None, +) -> dict[str, object]: + tokens = value if isinstance(value, dict) else {} + out: dict[str, object] = {} + for key, inner in tokens.items(): + lower = key.lower() + if lower in {"access_token", "id_token"}: + out[key] = _dummy_jwt_from_host(inner, now=now, exp_ts=exp_ts) + elif lower == "account_id" and isinstance(inner, str) and inner: + # Current Codex uses this non-secret selected account id + # while egress owns the real bearer token. + out[key] = inner + else: + out[key] = _redact_unknown_auth_value(inner) + return out + + +def _redact_unknown_auth_value(value: object) -> object: + if isinstance(value, bool): + return value if isinstance(value, dict): - out: dict[str, object] = {} - for key, inner in value.items(): - lower = key.lower() - if lower == "openai_api_key": - out[key] = None - elif lower == "tokens": - out[key] = _redact_codex_auth(inner, now=now, exp_ts=exp_ts) - elif lower in {"access_token", "id_token"}: - out[key] = _dummy_jwt_from_host(inner, now=now, exp_ts=exp_ts) - elif "token" in lower or "secret" in lower or lower.endswith("_key"): - out[key] = "bot-bottle-placeholder" - elif lower == "account_id" and isinstance(inner, str) and inner: - out[key] = inner - elif lower in {"account_id", "user_id", "email"}: - out[key] = "bot-bottle-placeholder" - else: - out[key] = _redact_codex_auth(inner, now=now, exp_ts=exp_ts) - return out + return {} if isinstance(value, list): - return [_redact_codex_auth(v, now=now, exp_ts=exp_ts) for v in value] - return value + return [] + if value is None: + return None + return "bot-bottle-placeholder" def _jwt_exp(token: str) -> datetime | None: diff --git a/docs/prds/0036-codex-auth-redaction-policy.md b/docs/prds/0036-codex-auth-redaction-policy.md new file mode 100644 index 0000000..7dfb8f6 --- /dev/null +++ b/docs/prds/0036-codex-auth-redaction-policy.md @@ -0,0 +1,111 @@ +# PRD 0036: Codex Auth Redaction Policy + +- **Status:** Active +- **Author:** didericis-codex +- **Created:** 2026-06-02 +- **Issue:** #129 + +## Summary + +Make Codex host-auth redaction explicit and fixture-driven so dummy +`auth.json` generation cannot accidentally preserve future sensitive fields. +Keep forwarding only the short-lived host access token through egress, while the +guest receives a non-secret auth file whose schema remains useful to Codex. + +## Problem + +`bot_bottle/codex_auth.py` reads the host Codex auth file, extracts the access +token for egress, and writes a dummy guest `auth.json`. The code redacts JWT +claims and auth JSON fields with a mix of schema-specific handling and generic +placeholder behavior. + +That is safer than copying raw auth, but it is still coverage-sensitive. If +Codex adds a new field that carries a token, session identifier, refresh secret, +or account metadata and the field name does not match current heuristics, the +dummy auth file could preserve more information than intended. Because this is +credential-adjacent code, the desired behavior should be allowlist-oriented and +backed by explicit fixtures. + +## Goals / Success Criteria + +- Define a durable redaction policy for Codex `auth.json`: + - host access token is read for egress only. + - guest dummy auth contains no bearer, refresh, session, or secret values. + - selected non-secret fields may be preserved only when needed by Codex. +- Prefer explicit per-field preservation over broad heuristic pass-through. +- Add representative fixture tests for current Codex auth shapes. +- Add regression tests for unknown nested fields, sensitive-looking field names, + lists, dictionaries, and JWT custom claims. +- Preserve dummy token expiration alignment with the host access token. +- Keep existing errors for missing, invalid, non-device, or expired auth. + +## Non-goals + +- No change to the egress credential-forwarding contract. +- No attempt to refresh Codex tokens inside the bottle. +- No copying of refresh tokens or raw host auth into the guest. +- No dependency on a Codex SDK or external schema package. +- No user-facing CLI changes. + +## Scope + +In scope: + +- `bot_bottle/codex_auth.py` redaction helpers. +- Unit tests in `tests/unit/test_codex_auth.py`. +- Small documentation comments that distinguish preserved non-secret fields from + redacted credential material. + +Out of scope: + +- Provider provisioning outside Codex auth file generation. +- Egress route construction for Codex. +- Runtime calls to Codex/OpenAI services. + +## Design + +Treat the dummy guest `auth.json` as a deliberately synthesized compatibility +file, not as a redacted copy of the host file. The implementation may continue +to start from the host object for convenience, but preserved fields should be +controlled by explicit allowlists at known schema locations. + +At the top level, preserve only `auth_mode`, replace `OPENAI_API_KEY` / +`openai_api_key` with `null`, and synthesize the `tokens` block. Unknown scalar +top-level fields become placeholders, unknown lists become empty lists, and +unknown dictionaries become empty objects. + +In token blocks, replace `access_token` and `id_token` with dummy JWTs, preserve +the selected non-secret `account_id`, and redact every other token-block field +with the same placeholder / empty container policy. Refresh, session, and future +token values are never copied to the guest. + +In JWT payloads, preserve only claims that are known to be non-secret and +required for Codex behavior. Unknown scalar claims become placeholders, unknown +lists become empty lists, and unknown objects become empty objects. + +For the OpenAI auth claim, preserve only currently necessary non-secret values +such as plan type, selected account id, and boolean localhost state. Everything +else is placeholder, empty object, or empty list according to the policy. + +Tests should use fixture auth objects that include both current expected fields +and intentionally hostile future-looking fields such as `session_context`, +`bearer`, `refreshSecret`, nested `token_value`, and opaque arrays. The dummy +output must not contain the original secret strings. + +## Testing Strategy + +- Existing `tests/unit/test_codex_auth.py` should continue to pass. +- Add tests that assert original access/refresh/session strings do not appear in + `codex_dummy_auth_json`. +- Add tests for nested JWT and auth-claim redaction behavior. +- Add tests that the dummy access/id token `exp` still matches the host access + token expiry. + +Run: + +- `python3 -m unittest tests.unit.test_codex_auth` +- `python3 -m unittest discover -s tests/unit` + +## Open Questions + +None. diff --git a/tests/unit/test_codex_auth.py b/tests/unit/test_codex_auth.py index 7731110..ef9d575 100644 --- a/tests/unit/test_codex_auth.py +++ b/tests/unit/test_codex_auth.py @@ -18,10 +18,14 @@ from bot_bottle.log import Die def _jwt(exp: int) -> str: + return _jwt_with_payload({"exp": exp}) + + +def _jwt_with_payload(payload: dict) -> str: def enc(obj: dict) -> str: raw = json.dumps(obj, separators=(",", ":")).encode() return base64.urlsafe_b64encode(raw).decode().rstrip("=") - return f"{enc({'alg': 'none'})}.{enc({'exp': exp})}.sig" + return f"{enc({'alg': 'none'})}.{enc(payload)}.sig" def _jwt_payload(token: str) -> dict: @@ -154,16 +158,10 @@ class TestCodexHostAccessToken(unittest.TestCase): ) def test_dummy_auth_keeps_required_account_claim_shape(self): - def jwt(payload: dict) -> str: - def enc(obj: dict) -> str: - raw = json.dumps(obj, separators=(",", ":")).encode() - return base64.urlsafe_b64encode(raw).decode().rstrip("=") - return f"{enc({'alg': 'none'})}.{enc(payload)}.sig" - self._write({ "auth_mode": "chatgpt", "tokens": { - "access_token": jwt({ + "access_token": _jwt_with_payload({ "exp": 2000000000, "https://api.openai.com/auth": { "chatgpt_plan_type": "plus", @@ -177,7 +175,7 @@ class TestCodexHostAccessToken(unittest.TestCase): "email_verified": True, }, }), - "id_token": jwt({ + "id_token": _jwt_with_payload({ "exp": 2000000000, "email": "real@example.invalid", "email_verified": True, @@ -202,6 +200,79 @@ class TestCodexHostAccessToken(unittest.TestCase): self.assertEqual("bot-bottle@example.invalid", profile["email"]) self.assertTrue(profile["email_verified"]) + def test_dummy_auth_redacts_unknown_future_auth_fields(self): + secrets = [ + "top-session-secret", + "top-nested-secret", + "refresh-secret", + "session-token-secret", + "jwt-custom-secret", + "jwt-nested-secret", + "jwt-list-secret", + "id-token-secret", + "auth-claim-secret", + "auth-claim-nested-secret", + "top-list-secret", + "token-nested-secret", + "token-list-secret", + ] + self._write({ + "auth_mode": "chatgpt", + "session_context": "top-session-secret", + "future_nested": {"value": "top-nested-secret"}, + "future_list": ["top-list-secret"], + "tokens": { + "access_token": _jwt_with_payload({ + "exp": 2000000000, + "custom_session": "jwt-custom-secret", + "future_nested": {"value": "jwt-nested-secret"}, + "future_list": ["jwt-list-secret"], + "https://api.openai.com/auth": { + "chatgpt_plan_type": "plus", + "chatgpt_account_id": "acct-real", + "session_context": "auth-claim-secret", + "nested": {"value": "auth-claim-nested-secret"}, + }, + }), + "id_token": _jwt_with_payload({ + "exp": 2000000000, + "opaque": "id-token-secret", + }), + "refresh_token": "refresh-secret", + "session_token": "session-token-secret", + "future_object": {"value": "token-nested-secret"}, + "future_list": ["token-list-secret"], + "account_id": "acct-host", + }, + }) + + dummy_json = codex_dummy_auth_json( + {"CODEX_HOME": str(self.home)}, + now=datetime(2026, 1, 1, tzinfo=timezone.utc), + ) + for secret in secrets: + self.assertNotIn(secret, dummy_json) + + dummy = json.loads(dummy_json) + self.assertEqual("bot-bottle-placeholder", dummy["session_context"]) + self.assertEqual({}, dummy["future_nested"]) + self.assertEqual([], dummy["future_list"]) + self.assertEqual("bot-bottle-placeholder", dummy["tokens"]["refresh_token"]) + self.assertEqual("bot-bottle-placeholder", dummy["tokens"]["session_token"]) + self.assertEqual({}, dummy["tokens"]["future_object"]) + self.assertEqual([], dummy["tokens"]["future_list"]) + + access_payload = _jwt_payload(dummy["tokens"]["access_token"]) + self.assertEqual( + "bot-bottle-placeholder", + access_payload["custom_session"], + ) + self.assertEqual({}, access_payload["future_nested"]) + self.assertEqual([], access_payload["future_list"]) + auth = access_payload["https://api.openai.com/auth"] + self.assertEqual("bot-bottle-placeholder", auth["session_context"]) + self.assertEqual({}, auth["nested"]) + if __name__ == "__main__": unittest.main()