From 2247d730cddd0e39d784aabff0ad2b3e23bf5ff4 Mon Sep 17 00:00:00 2001 From: codex Date: Tue, 2 Jun 2026 07:59:37 +0000 Subject: [PATCH 1/3] docs(prd): add codex auth redaction policy --- docs/prds/0036-codex-auth-redaction-policy.md | 107 ++++++++++++++++++ 1 file changed, 107 insertions(+) create mode 100644 docs/prds/0036-codex-auth-redaction-policy.md diff --git a/docs/prds/0036-codex-auth-redaction-policy.md b/docs/prds/0036-codex-auth-redaction-policy.md new file mode 100644 index 0000000..cf88a5a --- /dev/null +++ b/docs/prds/0036-codex-auth-redaction-policy.md @@ -0,0 +1,107 @@ +# PRD 0036: Codex Auth Redaction Policy + +- **Status:** Draft +- **Author:** didericis-codex +- **Created:** 2026-06-02 +- **Issue:** #129 + +## Summary + +Make Codex host-auth redaction explicit and fixture-driven so dummy +`auth.json` generation cannot accidentally preserve future sensitive fields. +Keep forwarding only the short-lived host access token through egress, while the +guest receives a non-secret auth file whose schema remains useful to Codex. + +## Problem + +`bot_bottle/codex_auth.py` reads the host Codex auth file, extracts the access +token for egress, and writes a dummy guest `auth.json`. The code redacts JWT +claims and auth JSON fields with a mix of schema-specific handling and generic +placeholder behavior. + +That is safer than copying raw auth, but it is still coverage-sensitive. If +Codex adds a new field that carries a token, session identifier, refresh secret, +or account metadata and the field name does not match current heuristics, the +dummy auth file could preserve more information than intended. Because this is +credential-adjacent code, the desired behavior should be allowlist-oriented and +backed by explicit fixtures. + +## Goals / Success Criteria + +- Define a durable redaction policy for Codex `auth.json`: + - host access token is read for egress only. + - guest dummy auth contains no bearer, refresh, session, or secret values. + - selected non-secret fields may be preserved only when needed by Codex. +- Prefer explicit per-field preservation over broad heuristic pass-through. +- Add representative fixture tests for current Codex auth shapes. +- Add regression tests for unknown nested fields, sensitive-looking field names, + lists, dictionaries, and JWT custom claims. +- Preserve dummy token expiration alignment with the host access token. +- Keep existing errors for missing, invalid, non-device, or expired auth. + +## Non-goals + +- No change to the egress credential-forwarding contract. +- No attempt to refresh Codex tokens inside the bottle. +- No copying of refresh tokens or raw host auth into the guest. +- No dependency on a Codex SDK or external schema package. +- No user-facing CLI changes. + +## Scope + +In scope: + +- `bot_bottle/codex_auth.py` redaction helpers. +- Unit tests in `tests/unit/test_codex_auth.py`. +- Small documentation comments that distinguish preserved non-secret fields from + redacted credential material. + +Out of scope: + +- Provider provisioning outside Codex auth file generation. +- Egress route construction for Codex. +- Runtime calls to Codex/OpenAI services. + +## Design + +Treat the dummy guest `auth.json` as a deliberately synthesized compatibility +file, not as a redacted copy of the host file. The implementation may continue +to start from the host object for convenience, but preserved fields should be +controlled by explicit allowlists at known schema locations. + +At the top level, preserve only fields required to keep Codex in the same auth +branch. In token blocks, replace access, ID, and refresh-like token values with +dummy values. In JWT payloads, preserve only claims that are known to be +non-secret and required for Codex behavior; unknown scalar claims should become +placeholders, unknown lists should become empty lists, and unknown objects +should recurse or become empty objects according to the local policy. + +For the OpenAI auth claim, preserve only currently necessary non-secret values +such as plan type and selected account id. Everything else should be +placeholder, empty object, empty list, or omitted according to the policy. The +policy should be easy to audit from constants or named helper functions. + +Tests should use fixture auth objects that include both current expected fields +and intentionally hostile future-looking fields such as `session_context`, +`bearer`, `refreshSecret`, nested `token_value`, and opaque arrays. The dummy +output must not contain the original secret strings. + +## Testing Strategy + +- Existing `tests/unit/test_codex_auth.py` should continue to pass. +- Add tests that assert original access/refresh/session strings do not appear in + `codex_dummy_auth_json`. +- Add tests for nested JWT and auth-claim redaction behavior. +- Add tests that the dummy access/id token `exp` still matches the host access + token expiry. + +Run: + +- `python3 -m unittest tests.unit.test_codex_auth` +- `python3 -m unittest discover -s tests/unit` + +## Open Questions + +- Which Codex auth fields are strictly required for the guest CLI to stay in + the device-auth branch? If a field is not demonstrably required, the default + should be to redact or omit it. -- 2.52.0 From 0a8bba58c70fa33ab2d2344e47aef9c10c539d95 Mon Sep 17 00:00:00 2001 From: codex Date: Tue, 2 Jun 2026 08:10:01 +0000 Subject: [PATCH 2/3] fix(codex): harden auth redaction --- bot_bottle/codex_auth.py | 67 +++++++++----- docs/prds/0036-codex-auth-redaction-policy.md | 28 +++--- tests/unit/test_codex_auth.py | 89 +++++++++++++++++-- 3 files changed, 141 insertions(+), 43 deletions(-) diff --git a/bot_bottle/codex_auth.py b/bot_bottle/codex_auth.py index df5e03a..73e9d1c 100644 --- a/bot_bottle/codex_auth.py +++ b/bot_bottle/codex_auth.py @@ -187,8 +187,10 @@ def _redact_claims(value: object) -> object: out[key] = inner if isinstance(inner, list) else [] elif isinstance(inner, bool): out[key] = inner - elif isinstance(inner, (dict, list)): - out[key] = _redact_claims(inner) + elif isinstance(inner, dict): + out[key] = {} + elif isinstance(inner, list): + out[key] = [] else: out[key] = "bot-bottle-placeholder" return out @@ -237,28 +239,49 @@ def _redact_auth_claim(value: object) -> dict: def _redact_codex_auth( value: object, *, now: datetime | None = None, exp_ts: int | None = None, ) -> object: + auth = value if isinstance(value, dict) else {} + out: dict[str, object] = {} + for key, inner in auth.items(): + lower = key.lower() + if lower == "auth_mode" and isinstance(inner, str) and inner: + out[key] = inner + elif lower == "openai_api_key": + out[key] = None + elif lower == "tokens": + out[key] = _redact_token_block(inner, now=now, exp_ts=exp_ts) + else: + out[key] = _redact_unknown_auth_value(inner) + return out + + +def _redact_token_block( + value: object, *, now: datetime | None = None, exp_ts: int | None = None, +) -> dict[str, object]: + tokens = value if isinstance(value, dict) else {} + out: dict[str, object] = {} + for key, inner in tokens.items(): + lower = key.lower() + if lower in {"access_token", "id_token"}: + out[key] = _dummy_jwt_from_host(inner, now=now, exp_ts=exp_ts) + elif lower == "account_id" and isinstance(inner, str) and inner: + # Current Codex uses this non-secret selected account id + # while egress owns the real bearer token. + out[key] = inner + else: + out[key] = _redact_unknown_auth_value(inner) + return out + + +def _redact_unknown_auth_value(value: object) -> object: + if isinstance(value, bool): + return value if isinstance(value, dict): - out: dict[str, object] = {} - for key, inner in value.items(): - lower = key.lower() - if lower == "openai_api_key": - out[key] = None - elif lower == "tokens": - out[key] = _redact_codex_auth(inner, now=now, exp_ts=exp_ts) - elif lower in {"access_token", "id_token"}: - out[key] = _dummy_jwt_from_host(inner, now=now, exp_ts=exp_ts) - elif "token" in lower or "secret" in lower or lower.endswith("_key"): - out[key] = "bot-bottle-placeholder" - elif lower == "account_id" and isinstance(inner, str) and inner: - out[key] = inner - elif lower in {"account_id", "user_id", "email"}: - out[key] = "bot-bottle-placeholder" - else: - out[key] = _redact_codex_auth(inner, now=now, exp_ts=exp_ts) - return out + return {} if isinstance(value, list): - return [_redact_codex_auth(v, now=now, exp_ts=exp_ts) for v in value] - return value + return [] + if value is None: + return None + return "bot-bottle-placeholder" def _jwt_exp(token: str) -> datetime | None: diff --git a/docs/prds/0036-codex-auth-redaction-policy.md b/docs/prds/0036-codex-auth-redaction-policy.md index cf88a5a..84dc417 100644 --- a/docs/prds/0036-codex-auth-redaction-policy.md +++ b/docs/prds/0036-codex-auth-redaction-policy.md @@ -69,17 +69,23 @@ file, not as a redacted copy of the host file. The implementation may continue to start from the host object for convenience, but preserved fields should be controlled by explicit allowlists at known schema locations. -At the top level, preserve only fields required to keep Codex in the same auth -branch. In token blocks, replace access, ID, and refresh-like token values with -dummy values. In JWT payloads, preserve only claims that are known to be -non-secret and required for Codex behavior; unknown scalar claims should become -placeholders, unknown lists should become empty lists, and unknown objects -should recurse or become empty objects according to the local policy. +At the top level, preserve only `auth_mode`, replace `OPENAI_API_KEY` / +`openai_api_key` with `null`, and synthesize the `tokens` block. Unknown scalar +top-level fields become placeholders, unknown lists become empty lists, and +unknown dictionaries become empty objects. + +In token blocks, replace `access_token` and `id_token` with dummy JWTs, preserve +the selected non-secret `account_id`, and redact every other token-block field +with the same placeholder / empty container policy. Refresh, session, and future +token values are never copied to the guest. + +In JWT payloads, preserve only claims that are known to be non-secret and +required for Codex behavior. Unknown scalar claims become placeholders, unknown +lists become empty lists, and unknown objects become empty objects. For the OpenAI auth claim, preserve only currently necessary non-secret values -such as plan type and selected account id. Everything else should be -placeholder, empty object, empty list, or omitted according to the policy. The -policy should be easy to audit from constants or named helper functions. +such as plan type, selected account id, and boolean localhost state. Everything +else is placeholder, empty object, or empty list according to the policy. Tests should use fixture auth objects that include both current expected fields and intentionally hostile future-looking fields such as `session_context`, @@ -102,6 +108,4 @@ Run: ## Open Questions -- Which Codex auth fields are strictly required for the guest CLI to stay in - the device-auth branch? If a field is not demonstrably required, the default - should be to redact or omit it. +None. diff --git a/tests/unit/test_codex_auth.py b/tests/unit/test_codex_auth.py index 7731110..ef9d575 100644 --- a/tests/unit/test_codex_auth.py +++ b/tests/unit/test_codex_auth.py @@ -18,10 +18,14 @@ from bot_bottle.log import Die def _jwt(exp: int) -> str: + return _jwt_with_payload({"exp": exp}) + + +def _jwt_with_payload(payload: dict) -> str: def enc(obj: dict) -> str: raw = json.dumps(obj, separators=(",", ":")).encode() return base64.urlsafe_b64encode(raw).decode().rstrip("=") - return f"{enc({'alg': 'none'})}.{enc({'exp': exp})}.sig" + return f"{enc({'alg': 'none'})}.{enc(payload)}.sig" def _jwt_payload(token: str) -> dict: @@ -154,16 +158,10 @@ class TestCodexHostAccessToken(unittest.TestCase): ) def test_dummy_auth_keeps_required_account_claim_shape(self): - def jwt(payload: dict) -> str: - def enc(obj: dict) -> str: - raw = json.dumps(obj, separators=(",", ":")).encode() - return base64.urlsafe_b64encode(raw).decode().rstrip("=") - return f"{enc({'alg': 'none'})}.{enc(payload)}.sig" - self._write({ "auth_mode": "chatgpt", "tokens": { - "access_token": jwt({ + "access_token": _jwt_with_payload({ "exp": 2000000000, "https://api.openai.com/auth": { "chatgpt_plan_type": "plus", @@ -177,7 +175,7 @@ class TestCodexHostAccessToken(unittest.TestCase): "email_verified": True, }, }), - "id_token": jwt({ + "id_token": _jwt_with_payload({ "exp": 2000000000, "email": "real@example.invalid", "email_verified": True, @@ -202,6 +200,79 @@ class TestCodexHostAccessToken(unittest.TestCase): self.assertEqual("bot-bottle@example.invalid", profile["email"]) self.assertTrue(profile["email_verified"]) + def test_dummy_auth_redacts_unknown_future_auth_fields(self): + secrets = [ + "top-session-secret", + "top-nested-secret", + "refresh-secret", + "session-token-secret", + "jwt-custom-secret", + "jwt-nested-secret", + "jwt-list-secret", + "id-token-secret", + "auth-claim-secret", + "auth-claim-nested-secret", + "top-list-secret", + "token-nested-secret", + "token-list-secret", + ] + self._write({ + "auth_mode": "chatgpt", + "session_context": "top-session-secret", + "future_nested": {"value": "top-nested-secret"}, + "future_list": ["top-list-secret"], + "tokens": { + "access_token": _jwt_with_payload({ + "exp": 2000000000, + "custom_session": "jwt-custom-secret", + "future_nested": {"value": "jwt-nested-secret"}, + "future_list": ["jwt-list-secret"], + "https://api.openai.com/auth": { + "chatgpt_plan_type": "plus", + "chatgpt_account_id": "acct-real", + "session_context": "auth-claim-secret", + "nested": {"value": "auth-claim-nested-secret"}, + }, + }), + "id_token": _jwt_with_payload({ + "exp": 2000000000, + "opaque": "id-token-secret", + }), + "refresh_token": "refresh-secret", + "session_token": "session-token-secret", + "future_object": {"value": "token-nested-secret"}, + "future_list": ["token-list-secret"], + "account_id": "acct-host", + }, + }) + + dummy_json = codex_dummy_auth_json( + {"CODEX_HOME": str(self.home)}, + now=datetime(2026, 1, 1, tzinfo=timezone.utc), + ) + for secret in secrets: + self.assertNotIn(secret, dummy_json) + + dummy = json.loads(dummy_json) + self.assertEqual("bot-bottle-placeholder", dummy["session_context"]) + self.assertEqual({}, dummy["future_nested"]) + self.assertEqual([], dummy["future_list"]) + self.assertEqual("bot-bottle-placeholder", dummy["tokens"]["refresh_token"]) + self.assertEqual("bot-bottle-placeholder", dummy["tokens"]["session_token"]) + self.assertEqual({}, dummy["tokens"]["future_object"]) + self.assertEqual([], dummy["tokens"]["future_list"]) + + access_payload = _jwt_payload(dummy["tokens"]["access_token"]) + self.assertEqual( + "bot-bottle-placeholder", + access_payload["custom_session"], + ) + self.assertEqual({}, access_payload["future_nested"]) + self.assertEqual([], access_payload["future_list"]) + auth = access_payload["https://api.openai.com/auth"] + self.assertEqual("bot-bottle-placeholder", auth["session_context"]) + self.assertEqual({}, auth["nested"]) + if __name__ == "__main__": unittest.main() -- 2.52.0 From a79ef61b629bfec4c25e2109b032b6b9e8173f55 Mon Sep 17 00:00:00 2001 From: codex Date: Tue, 2 Jun 2026 08:10:08 +0000 Subject: [PATCH 3/3] complete(prd): mark PRD 0036 active --- docs/prds/0036-codex-auth-redaction-policy.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/prds/0036-codex-auth-redaction-policy.md b/docs/prds/0036-codex-auth-redaction-policy.md index 84dc417..7dfb8f6 100644 --- a/docs/prds/0036-codex-auth-redaction-policy.md +++ b/docs/prds/0036-codex-auth-redaction-policy.md @@ -1,6 +1,6 @@ # PRD 0036: Codex Auth Redaction Policy -- **Status:** Draft +- **Status:** Active - **Author:** didericis-codex - **Created:** 2026-06-02 - **Issue:** #129 -- 2.52.0