fix(codex): harden auth redaction

This commit is contained in:
2026-06-02 08:10:01 +00:00
parent 2247d730cd
commit 0a8bba58c7
3 changed files with 141 additions and 43 deletions
+45 -22
View File
@@ -187,8 +187,10 @@ def _redact_claims(value: object) -> object:
out[key] = inner if isinstance(inner, list) else [] out[key] = inner if isinstance(inner, list) else []
elif isinstance(inner, bool): elif isinstance(inner, bool):
out[key] = inner out[key] = inner
elif isinstance(inner, (dict, list)): elif isinstance(inner, dict):
out[key] = _redact_claims(inner) out[key] = {}
elif isinstance(inner, list):
out[key] = []
else: else:
out[key] = "bot-bottle-placeholder" out[key] = "bot-bottle-placeholder"
return out return out
@@ -237,28 +239,49 @@ def _redact_auth_claim(value: object) -> dict:
def _redact_codex_auth( def _redact_codex_auth(
value: object, *, now: datetime | None = None, exp_ts: int | None = None, value: object, *, now: datetime | None = None, exp_ts: int | None = None,
) -> object: ) -> object:
auth = value if isinstance(value, dict) else {}
out: dict[str, object] = {}
for key, inner in auth.items():
lower = key.lower()
if lower == "auth_mode" and isinstance(inner, str) and inner:
out[key] = inner
elif lower == "openai_api_key":
out[key] = None
elif lower == "tokens":
out[key] = _redact_token_block(inner, now=now, exp_ts=exp_ts)
else:
out[key] = _redact_unknown_auth_value(inner)
return out
def _redact_token_block(
value: object, *, now: datetime | None = None, exp_ts: int | None = None,
) -> dict[str, object]:
tokens = value if isinstance(value, dict) else {}
out: dict[str, object] = {}
for key, inner in tokens.items():
lower = key.lower()
if lower in {"access_token", "id_token"}:
out[key] = _dummy_jwt_from_host(inner, now=now, exp_ts=exp_ts)
elif lower == "account_id" and isinstance(inner, str) and inner:
# Current Codex uses this non-secret selected account id
# while egress owns the real bearer token.
out[key] = inner
else:
out[key] = _redact_unknown_auth_value(inner)
return out
def _redact_unknown_auth_value(value: object) -> object:
if isinstance(value, bool):
return value
if isinstance(value, dict): if isinstance(value, dict):
out: dict[str, object] = {} return {}
for key, inner in value.items():
lower = key.lower()
if lower == "openai_api_key":
out[key] = None
elif lower == "tokens":
out[key] = _redact_codex_auth(inner, now=now, exp_ts=exp_ts)
elif lower in {"access_token", "id_token"}:
out[key] = _dummy_jwt_from_host(inner, now=now, exp_ts=exp_ts)
elif "token" in lower or "secret" in lower or lower.endswith("_key"):
out[key] = "bot-bottle-placeholder"
elif lower == "account_id" and isinstance(inner, str) and inner:
out[key] = inner
elif lower in {"account_id", "user_id", "email"}:
out[key] = "bot-bottle-placeholder"
else:
out[key] = _redact_codex_auth(inner, now=now, exp_ts=exp_ts)
return out
if isinstance(value, list): if isinstance(value, list):
return [_redact_codex_auth(v, now=now, exp_ts=exp_ts) for v in value] return []
return value if value is None:
return None
return "bot-bottle-placeholder"
def _jwt_exp(token: str) -> datetime | None: def _jwt_exp(token: str) -> datetime | None:
+16 -12
View File
@@ -69,17 +69,23 @@ file, not as a redacted copy of the host file. The implementation may continue
to start from the host object for convenience, but preserved fields should be to start from the host object for convenience, but preserved fields should be
controlled by explicit allowlists at known schema locations. controlled by explicit allowlists at known schema locations.
At the top level, preserve only fields required to keep Codex in the same auth At the top level, preserve only `auth_mode`, replace `OPENAI_API_KEY` /
branch. In token blocks, replace access, ID, and refresh-like token values with `openai_api_key` with `null`, and synthesize the `tokens` block. Unknown scalar
dummy values. In JWT payloads, preserve only claims that are known to be top-level fields become placeholders, unknown lists become empty lists, and
non-secret and required for Codex behavior; unknown scalar claims should become unknown dictionaries become empty objects.
placeholders, unknown lists should become empty lists, and unknown objects
should recurse or become empty objects according to the local policy. In token blocks, replace `access_token` and `id_token` with dummy JWTs, preserve
the selected non-secret `account_id`, and redact every other token-block field
with the same placeholder / empty container policy. Refresh, session, and future
token values are never copied to the guest.
In JWT payloads, preserve only claims that are known to be non-secret and
required for Codex behavior. Unknown scalar claims become placeholders, unknown
lists become empty lists, and unknown objects become empty objects.
For the OpenAI auth claim, preserve only currently necessary non-secret values For the OpenAI auth claim, preserve only currently necessary non-secret values
such as plan type and selected account id. Everything else should be such as plan type, selected account id, and boolean localhost state. Everything
placeholder, empty object, empty list, or omitted according to the policy. The else is placeholder, empty object, or empty list according to the policy.
policy should be easy to audit from constants or named helper functions.
Tests should use fixture auth objects that include both current expected fields Tests should use fixture auth objects that include both current expected fields
and intentionally hostile future-looking fields such as `session_context`, and intentionally hostile future-looking fields such as `session_context`,
@@ -102,6 +108,4 @@ Run:
## Open Questions ## Open Questions
- Which Codex auth fields are strictly required for the guest CLI to stay in None.
the device-auth branch? If a field is not demonstrably required, the default
should be to redact or omit it.
+80 -9
View File
@@ -18,10 +18,14 @@ from bot_bottle.log import Die
def _jwt(exp: int) -> str: def _jwt(exp: int) -> str:
return _jwt_with_payload({"exp": exp})
def _jwt_with_payload(payload: dict) -> str:
def enc(obj: dict) -> str: def enc(obj: dict) -> str:
raw = json.dumps(obj, separators=(",", ":")).encode() raw = json.dumps(obj, separators=(",", ":")).encode()
return base64.urlsafe_b64encode(raw).decode().rstrip("=") return base64.urlsafe_b64encode(raw).decode().rstrip("=")
return f"{enc({'alg': 'none'})}.{enc({'exp': exp})}.sig" return f"{enc({'alg': 'none'})}.{enc(payload)}.sig"
def _jwt_payload(token: str) -> dict: def _jwt_payload(token: str) -> dict:
@@ -154,16 +158,10 @@ class TestCodexHostAccessToken(unittest.TestCase):
) )
def test_dummy_auth_keeps_required_account_claim_shape(self): def test_dummy_auth_keeps_required_account_claim_shape(self):
def jwt(payload: dict) -> str:
def enc(obj: dict) -> str:
raw = json.dumps(obj, separators=(",", ":")).encode()
return base64.urlsafe_b64encode(raw).decode().rstrip("=")
return f"{enc({'alg': 'none'})}.{enc(payload)}.sig"
self._write({ self._write({
"auth_mode": "chatgpt", "auth_mode": "chatgpt",
"tokens": { "tokens": {
"access_token": jwt({ "access_token": _jwt_with_payload({
"exp": 2000000000, "exp": 2000000000,
"https://api.openai.com/auth": { "https://api.openai.com/auth": {
"chatgpt_plan_type": "plus", "chatgpt_plan_type": "plus",
@@ -177,7 +175,7 @@ class TestCodexHostAccessToken(unittest.TestCase):
"email_verified": True, "email_verified": True,
}, },
}), }),
"id_token": jwt({ "id_token": _jwt_with_payload({
"exp": 2000000000, "exp": 2000000000,
"email": "real@example.invalid", "email": "real@example.invalid",
"email_verified": True, "email_verified": True,
@@ -202,6 +200,79 @@ class TestCodexHostAccessToken(unittest.TestCase):
self.assertEqual("bot-bottle@example.invalid", profile["email"]) self.assertEqual("bot-bottle@example.invalid", profile["email"])
self.assertTrue(profile["email_verified"]) self.assertTrue(profile["email_verified"])
def test_dummy_auth_redacts_unknown_future_auth_fields(self):
secrets = [
"top-session-secret",
"top-nested-secret",
"refresh-secret",
"session-token-secret",
"jwt-custom-secret",
"jwt-nested-secret",
"jwt-list-secret",
"id-token-secret",
"auth-claim-secret",
"auth-claim-nested-secret",
"top-list-secret",
"token-nested-secret",
"token-list-secret",
]
self._write({
"auth_mode": "chatgpt",
"session_context": "top-session-secret",
"future_nested": {"value": "top-nested-secret"},
"future_list": ["top-list-secret"],
"tokens": {
"access_token": _jwt_with_payload({
"exp": 2000000000,
"custom_session": "jwt-custom-secret",
"future_nested": {"value": "jwt-nested-secret"},
"future_list": ["jwt-list-secret"],
"https://api.openai.com/auth": {
"chatgpt_plan_type": "plus",
"chatgpt_account_id": "acct-real",
"session_context": "auth-claim-secret",
"nested": {"value": "auth-claim-nested-secret"},
},
}),
"id_token": _jwt_with_payload({
"exp": 2000000000,
"opaque": "id-token-secret",
}),
"refresh_token": "refresh-secret",
"session_token": "session-token-secret",
"future_object": {"value": "token-nested-secret"},
"future_list": ["token-list-secret"],
"account_id": "acct-host",
},
})
dummy_json = codex_dummy_auth_json(
{"CODEX_HOME": str(self.home)},
now=datetime(2026, 1, 1, tzinfo=timezone.utc),
)
for secret in secrets:
self.assertNotIn(secret, dummy_json)
dummy = json.loads(dummy_json)
self.assertEqual("bot-bottle-placeholder", dummy["session_context"])
self.assertEqual({}, dummy["future_nested"])
self.assertEqual([], dummy["future_list"])
self.assertEqual("bot-bottle-placeholder", dummy["tokens"]["refresh_token"])
self.assertEqual("bot-bottle-placeholder", dummy["tokens"]["session_token"])
self.assertEqual({}, dummy["tokens"]["future_object"])
self.assertEqual([], dummy["tokens"]["future_list"])
access_payload = _jwt_payload(dummy["tokens"]["access_token"])
self.assertEqual(
"bot-bottle-placeholder",
access_payload["custom_session"],
)
self.assertEqual({}, access_payload["future_nested"])
self.assertEqual([], access_payload["future_list"])
auth = access_payload["https://api.openai.com/auth"]
self.assertEqual("bot-bottle-placeholder", auth["session_context"])
self.assertEqual({}, auth["nested"])
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()