fix(codex): make host-credential bottles actually authenticate

Debugging a live codex smolmachines bottle surfaced three independent
failures past the sign-in screen; fix each so forward_host_credentials
works end to end:

- codex_auth: dummy access/id tokens now inherit the *real* host token's
  exp instead of now+1h. Codex (0.135) refreshes when its local token's
  JWT exp lapses; with a placeholder refresh_token that refresh fails and
  drops to the sign-in screen. Aligning exp tracks the real token's life.

- prepare: set CODEX_CA_CERTIFICATE to the agent CA bundle for codex
  bottles. Codex is rustls and ignores the system store / NODE_EXTRA_CA_
  CERTS; it reads CODEX_CA_CERTIFICATE (fallback SSL_CERT_FILE) for custom
  roots across HTTPS + wss, so it must be pointed at the egress MITM CA or
  injection can't work without tls_passthrough.

- pipelock: auto tls_passthrough the Codex API hosts when
  forward_host_credentials is on. Egress injects the bearer before
  pipelock, whose header DLP then flags the JWT ("request header contains
  secret") and the retry storm trips its 429. passthrough host-gates the
  CONNECT but skips decrypt+rescan of egress-owned auth. The auto-added
  routes aren't in bottle.egress.routes, so the hosts are added explicitly.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-01 16:38:34 -04:00
committed by didericis
parent f8a4e6f40b
commit 68e5097534
11 changed files with 197 additions and 58 deletions
+6 -5
View File
@@ -373,11 +373,12 @@ launcher reads `tokens.access_token` from the host's
`~/.codex/auth.json`, verifies it is fresh user/device auth, and passes
it to the sidecar's `EGRESS_TOKEN_N` env slot. The agent container gets
a dummy `~/.codex/auth.json` that preserves the host auth-mode shape
but replaces credential values with placeholders, so Codex chooses the
user/device auth path without receiving real access tokens, refresh
tokens, or `OPENAI_API_KEY`. The effective egress table automatically
adds or upgrades `api.openai.com` and `chatgpt.com` to authenticated
routes when `forward_host_credentials` is true.
but replaces credential values with placeholders. It keeps the selected
ChatGPT account id so Codex sends requests for the same account while
egress owns the real bearer token. The agent never receives real access
tokens, refresh tokens, or `OPENAI_API_KEY`. The effective egress table
automatically adds or upgrades `api.openai.com` and `chatgpt.com` to
authenticated routes when `forward_host_credentials` is true.
The built-in Codex template uses `Dockerfile.codex`; set
`agent_provider.dockerfile` to build the agent from a custom Dockerfile
+15 -24
View File
@@ -45,19 +45,11 @@ _HOME_FOR = {
}
def _env_flags_for(user: str) -> list[str]:
def _env_assignments_for(user: str, env: Mapping[str, str]) -> list[str]:
home = _HOME_FOR.get(user, f"/home/{user}")
return ["-e", f"HOME={home}", "-e", f"USER={user}"]
def _guest_env_flags(env: Mapping[str, str]) -> list[str]:
"""Render `{K: V}` into a flat `-e K=V` argv slice for
`smolvm machine exec`. `smolvm machine create -e` set env
on PID 1 but it doesn't propagate to fresh exec process
trees, so we have to re-pass them every call."""
out: list[str] = []
out = [f"HOME={home}", f"USER={user}"]
for k, v in env.items():
out += ["-e", f"{k}={v}"]
out.append(f"{k}={v}")
return out
@@ -98,9 +90,8 @@ class SmolmachinesBottle(Bottle):
flags = ["smolvm", "machine", "exec", "--name", self.name]
if tty:
flags += ["-i", "-t"]
flags += _env_flags_for("node")
flags += _guest_env_flags(self._guest_env)
agent_tail = [self.agent_command]
agent_tail = ["env", *_env_assignments_for("node", self._guest_env),
self.agent_command]
provider_prompt_args = prompt_args(
self._agent_prompt_mode, self._prompt_path, argv=argv,
)
@@ -148,16 +139,16 @@ class SmolmachinesBottle(Bottle):
on both backends. Pass `user="root"` for tests that need
root.
`runuser -u <user> -- /bin/sh -c <script>` switches UID
without invoking a login shell; HOME / USER are set via
`smolvm -e` (see `_env_flags_for`)."""
argv = (
_env_flags_for(user)
+ _guest_env_flags(self._guest_env)
+ ["--", "runuser", "-u", user, "--", "/bin/sh", "-c", script]
)
# _smolvm.machine_exec expects argv (the bit after `--`);
# the -e flags go before, so call smolvm directly.
`runuser -u <user> -- env ... /bin/sh -c <script>` switches UID
without invoking a login shell, then sets HOME / USER and the
bottle env in the child process."""
argv = [
"--", "runuser", "-u", user, "--",
"env", *_env_assignments_for(user, self._guest_env),
"/bin/sh", "-c", script,
]
# Call smolvm directly because this path needs the host-side
# subprocess capture shape used by the Docker backend.
r = subprocess.run(
["smolvm", "machine", "exec", "--name", self.name] + argv,
capture_output=True, text=True, check=False,
@@ -129,6 +129,24 @@ def resolve_plan(
if provider.template == "claude" and has_provider_auth:
guest_env.setdefault("CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC", "1")
guest_env.setdefault("DISABLE_ERROR_REPORTING", "1")
if provider.template == "codex":
# Codex is a Rust/rustls client: unlike the Node agents it does
# NOT consult the system trust store or honor NODE_EXTRA_CA_CERTS.
# It reads CODEX_CA_CERTIFICATE (falling back to SSL_CERT_FILE)
# for custom roots, across HTTPS *and* the wss responses channel.
# Point it at the bundle update-ca-certificates rebuilt with the
# egress MITM CA so Codex trusts the proxy and egress can inject
# the host bearer — without this, codex bottles need
# pipelock tls_passthrough, which disables auth injection.
guest_env["CODEX_CA_CERTIFICATE"] = (
"/etc/ssl/certs/ca-certificates.crt"
)
if provider.template == "codex" and provider.forward_host_credentials:
# Smolvm exec process trees do not reliably inherit the image
# user's login environment. Pin CODEX_HOME to the same path
# provision_provider_auth writes so Codex never falls back to a
# root or unset home and shows the sign-in picker.
guest_env["CODEX_HOME"] = "/home/node/.codex"
supervise_plan = None
if bottle.supervise:
@@ -4,6 +4,7 @@ from __future__ import annotations
import os
from ....log import die
from .. import smolvm as _smolvm
from ..bottle_plan import SmolmachinesBottlePlan
@@ -28,3 +29,21 @@ def provision_provider_auth(plan: SmolmachinesBottlePlan, target: str) -> None:
_smolvm.machine_cp(str(plan.codex_auth_file), f"{target}:{auth_path}")
_smolvm.machine_exec(target, ["chown", "node:node", auth_path])
_smolvm.machine_exec(target, ["chmod", "600", auth_path])
result = _smolvm.machine_exec(
target,
[
"runuser", "-u", "node", "--",
"env",
f"HOME={guest_home}",
f"CODEX_HOME={auth_dir}",
"codex", "login", "status",
],
)
if result.returncode != 0:
detail = (result.stderr or result.stdout).strip()
if detail:
detail = f": {detail}"
die(
"codex host credentials: dummy auth was copied into the "
f"smolmachine, but Codex did not accept it{detail}"
)
+45 -19
View File
@@ -75,11 +75,22 @@ def codex_dummy_auth_json(
now: datetime | None = None,
) -> str:
"""Return a non-secret `auth.json` that keeps Codex in the host's
auth branch while egress owns the real bearer token."""
auth branch while egress owns the real bearer token.
The dummy access/id tokens carry the *host* token's real `exp` so
Codex's proactive refresh lifecycle (it refreshes when its local
access token is at/past expiry) tracks the real token instead of
firing after an artificial TTL. Codex cannot refresh inside the
bottle — the refresh token is a placeholder and the OpenAI token
endpoint is off-route — so a shorter dummy exp would drop Codex to
the sign-in screen the moment it lapsed, even while egress still
holds a valid bearer."""
path = codex_auth_path(host_env)
codex_host_access_token(host_env, now=now)
access = codex_host_access_token(host_env, now=now)
raw = _read_auth_object(path)
dummy = _redact_codex_auth(deepcopy(raw), now=now)
host_exp = _jwt_exp(access)
exp_ts = int(host_exp.timestamp()) if host_exp is not None else None
dummy = _redact_codex_auth(deepcopy(raw), now=now, exp_ts=exp_ts)
return json.dumps(dummy, indent=2, sort_keys=True) + "\n"
@@ -104,29 +115,35 @@ def _read_auth_object(path: Path) -> dict:
return raw
def _dummy_jwt(now: datetime | None = None) -> str:
def _dummy_exp(now: datetime | None, exp_ts: int | None) -> int:
if exp_ts is not None:
return exp_ts
check_now = now or datetime.now(timezone.utc)
exp = int(check_now.timestamp()) + 3600
return int(check_now.timestamp()) + 3600
def _dummy_jwt(now: datetime | None = None, *, exp_ts: int | None = None) -> str:
return _encode_dummy_jwt({
"exp": exp,
"exp": _dummy_exp(now, exp_ts),
"sub": "bot-bottle-placeholder",
})
def _dummy_jwt_from_host(value: object, *, now: datetime | None = None) -> str:
def _dummy_jwt_from_host(
value: object, *, now: datetime | None = None, exp_ts: int | None = None,
) -> str:
if not isinstance(value, str):
return _dummy_jwt(now)
return _dummy_jwt(now, exp_ts=exp_ts)
parts = value.split(".")
if len(parts) < 2:
return _dummy_jwt(now)
return _dummy_jwt(now, exp_ts=exp_ts)
try:
payload = json.loads(_b64url_decode(parts[1]))
except (ValueError, json.JSONDecodeError):
return _dummy_jwt(now)
return _dummy_jwt(now, exp_ts=exp_ts)
if not isinstance(payload, dict):
return _dummy_jwt(now)
return _encode_dummy_jwt(_redact_jwt_payload(payload, now=now))
return _dummy_jwt(now, exp_ts=exp_ts)
return _encode_dummy_jwt(_redact_jwt_payload(payload, now=now, exp_ts=exp_ts))
def _encode_dummy_jwt(payload: dict) -> str:
@@ -141,12 +158,12 @@ def _redact_jwt_payload(
payload: dict,
*,
now: datetime | None = None,
exp_ts: int | None = None,
) -> dict:
check_now = now or datetime.now(timezone.utc)
out = _redact_claims(payload)
if not isinstance(out, dict):
out = {}
out["exp"] = int(check_now.timestamp()) + 3600
out["exp"] = _dummy_exp(now, exp_ts)
out.setdefault("sub", "bot-bottle-placeholder")
return out
@@ -195,6 +212,11 @@ def _redact_auth_claim(value: object) -> dict:
lower = key.lower()
if lower == "chatgpt_plan_type" and isinstance(inner, str) and inner:
out[key] = inner
elif lower == "chatgpt_account_id" and isinstance(inner, str) and inner:
# Current Codex uses the selected account id when building
# ChatGPT requests. Keep that non-secret identifier aligned
# with the host while egress owns the real bearer token.
out[key] = inner
elif lower == "localhost" and isinstance(inner, bool):
out[key] = inner
elif isinstance(inner, bool):
@@ -212,7 +234,9 @@ def _redact_auth_claim(value: object) -> dict:
return out
def _redact_codex_auth(value: object, *, now: datetime | None = None) -> object:
def _redact_codex_auth(
value: object, *, now: datetime | None = None, exp_ts: int | None = None,
) -> object:
if isinstance(value, dict):
out: dict[str, object] = {}
for key, inner in value.items():
@@ -220,18 +244,20 @@ def _redact_codex_auth(value: object, *, now: datetime | None = None) -> object:
if lower == "openai_api_key":
out[key] = None
elif lower == "tokens":
out[key] = _redact_codex_auth(inner, now=now)
out[key] = _redact_codex_auth(inner, now=now, exp_ts=exp_ts)
elif lower in {"access_token", "id_token"}:
out[key] = _dummy_jwt_from_host(inner, now=now)
out[key] = _dummy_jwt_from_host(inner, now=now, exp_ts=exp_ts)
elif "token" in lower or "secret" in lower or lower.endswith("_key"):
out[key] = "bot-bottle-placeholder"
elif lower == "account_id" and isinstance(inner, str) and inner:
out[key] = inner
elif lower in {"account_id", "user_id", "email"}:
out[key] = "bot-bottle-placeholder"
else:
out[key] = _redact_codex_auth(inner, now=now)
out[key] = _redact_codex_auth(inner, now=now, exp_ts=exp_ts)
return out
if isinstance(value, list):
return [_redact_codex_auth(v, now=now) for v in value]
return [_redact_codex_auth(v, now=now, exp_ts=exp_ts) for v in value]
return value
+18 -1
View File
@@ -21,7 +21,11 @@ from dataclasses import dataclass
from pathlib import Path
from typing import cast
from .egress import EGRESS_HOSTNAME, egress_routes_for_bottle
from .egress import (
CODEX_HOST_CREDENTIAL_HOSTS,
EGRESS_HOSTNAME,
egress_routes_for_bottle,
)
from .supervise import SUPERVISE_HOSTNAME
from .manifest import Bottle
@@ -111,6 +115,19 @@ def pipelock_effective_tls_passthrough(bottle: Bottle) -> list[str]:
for route in bottle.egress.routes:
if route.Pipelock.TlsPassthrough:
seen.setdefault(route.Host, None)
# forward_host_credentials makes egress inject the host ChatGPT bearer
# on the Codex API hosts AFTER the agent boundary. Pipelock sits
# downstream of egress and DLP-scans request headers; left to MITM
# these routes it flags the injected JWT as a leaked secret
# ("request header contains secret") and blocks. Pass them through so
# pipelock still enforces the host allowlist on CONNECT but does not
# decrypt + rescan egress-owned auth. The auto-added routes live in
# egress_routes_for_bottle, not bottle.egress.routes, so add the
# hosts explicitly here.
provider = bottle.agent_provider
if provider.forward_host_credentials and provider.template == "codex":
for host in CODEX_HOST_CREDENTIAL_HOSTS:
seen.setdefault(host, None)
return sorted(seen.keys())
@@ -37,8 +37,8 @@ possible, not in the agent.
`CODEX_ACCESS_TOKEN`, or real `tokens.access_token` /
`tokens.refresh_token` values.
- The agent container receives only a dummy Codex `auth.json` that
preserves the host auth-mode shape and replaces credential values
with placeholders.
preserves the host auth-mode shape, keeps the selected ChatGPT
account id, and replaces credential values with placeholders.
- Egress route files remain non-secret: they contain only host/path/auth
slot metadata, never token values.
- Missing, API-key, malformed, or expired host Codex auth fails
+27 -2
View File
@@ -120,7 +120,7 @@ class TestCodexHostAccessToken(unittest.TestCase):
self.assertNotEqual(access, dummy["tokens"]["access_token"])
self.assertNotEqual(refresh, dummy["tokens"]["refresh_token"])
self.assertEqual("bot-bottle-placeholder", dummy["tokens"]["refresh_token"])
self.assertEqual("bot-bottle-placeholder", dummy["tokens"]["account_id"])
self.assertEqual("acct-host", dummy["tokens"]["account_id"])
self.assertIsNotNone(
codex_host_access_token(
{"CODEX_HOME": str(self.home)},
@@ -128,6 +128,31 @@ class TestCodexHostAccessToken(unittest.TestCase):
)
)
def test_dummy_auth_tokens_inherit_host_token_exp(self):
# Codex refreshes when its local access token is at/past exp;
# the dummy must carry the host token's real exp so Codex does
# not drop to the sign-in screen after an artificial TTL while
# egress still holds a valid bearer.
host_exp = 2000000000
self._write({
"auth_mode": "chatgpt",
"tokens": {
"access_token": _jwt(host_exp),
"id_token": _jwt(host_exp),
"refresh_token": "hidden",
},
})
dummy = json.loads(codex_dummy_auth_json(
{"CODEX_HOME": str(self.home)},
now=datetime(2026, 1, 1, tzinfo=timezone.utc),
))
self.assertEqual(
host_exp, _jwt_payload(dummy["tokens"]["access_token"])["exp"],
)
self.assertEqual(
host_exp, _jwt_payload(dummy["tokens"]["id_token"])["exp"],
)
def test_dummy_auth_keeps_required_account_claim_shape(self):
def jwt(payload: dict) -> str:
def enc(obj: dict) -> str:
@@ -172,7 +197,7 @@ class TestCodexHostAccessToken(unittest.TestCase):
auth = access_payload["https://api.openai.com/auth"]
profile = access_payload["https://api.openai.com/profile"]
self.assertEqual("plus", auth["chatgpt_plan_type"])
self.assertEqual("bot-bottle-placeholder", auth["chatgpt_account_id"])
self.assertEqual("acct-real", auth["chatgpt_account_id"])
self.assertEqual("bot-bottle-placeholder", auth["chatgpt_user_id"])
self.assertEqual("bot-bottle@example.invalid", profile["email"])
self.assertTrue(profile["email_verified"])
+19
View File
@@ -113,6 +113,25 @@ class TestTlsPassthrough(unittest.TestCase):
])))
self.assertEqual(["api.openai.com"], passthrough)
def test_forward_host_credentials_passes_through_codex_hosts(self):
# Egress injects the host bearer on the Codex API hosts; pipelock
# must pass them through or its header DLP blocks the injected JWT
# ("request header contains secret"). These routes are auto-added
# (not in bottle.egress.routes), so passthrough is host-derived.
passthrough = pipelock_effective_tls_passthrough(_bottle({
"agent_provider": {
"template": "codex",
"forward_host_credentials": True,
},
}))
self.assertEqual(["api.openai.com", "chatgpt.com"], passthrough)
def test_no_codex_passthrough_without_forward_host_credentials(self):
passthrough = pipelock_effective_tls_passthrough(_bottle({
"agent_provider": {"template": "codex"},
}))
self.assertEqual([], passthrough)
class TestSsrfIpAllowlist(unittest.TestCase):
def test_default_empty(self):
+4 -5
View File
@@ -59,10 +59,9 @@ class TestClaudeArgvWrapped(unittest.TestCase):
"smolvm", "machine", "exec", "--name",
"bot-bottle-dev-abc",
"-i", "-t",
"-e", "HOME=/home/node",
"-e", "USER=node",
"--",
"runuser", "-u", "node", "--",
"env", "HOME=/home/node", "USER=node",
"claude",
],
argv,
@@ -107,7 +106,7 @@ class TestClaudeArgvWrapped(unittest.TestCase):
HTTPS_PROXY="http://127.0.0.1:1234",
NO_PROXY="localhost",
).agent_argv([]))
self.assertIn("-e", argv)
self.assertIn("env", argv)
self.assertIn("HTTPS_PROXY=http://127.0.0.1:1234", argv)
self.assertIn("NO_PROXY=localhost", argv)
@@ -119,8 +118,8 @@ class TestClaudeArgvWrapped(unittest.TestCase):
argv = _bottle().agent_argv([])
agent_idx = argv.index("claude")
self.assertEqual(
["runuser", "-u", "node", "--"],
argv[agent_idx - 4:agent_idx],
["runuser", "-u", "node", "--", "env"],
argv[agent_idx - 7:agent_idx - 2],
)
+24
View File
@@ -211,6 +211,7 @@ class TestProvisionProviderAuth(unittest.TestCase):
) as cp, patch(
"bot_bottle.backend.smolmachines.provision.provider_auth._smolvm.machine_exec"
) as ex:
ex.return_value = SmolvmRunResult(0, "Logged in using ChatGPT\n", "")
_provider_auth.provision_provider_auth(
_plan(codex_auth_file=Path("/tmp/codex-auth.json")),
"bot-bottle-demo-abc12",
@@ -226,6 +227,29 @@ class TestProvisionProviderAuth(unittest.TestCase):
argv_seen,
)
self.assertIn(["chmod", "600", "/home/node/.codex/auth.json"], argv_seen)
self.assertIn(
[
"runuser", "-u", "node", "--",
"env",
"HOME=/home/node",
"CODEX_HOME=/home/node/.codex",
"codex", "login", "status",
],
argv_seen,
)
def test_dies_when_codex_rejects_dummy_auth(self):
with patch(
"bot_bottle.backend.smolmachines.provision.provider_auth._smolvm.machine_cp"
), patch(
"bot_bottle.backend.smolmachines.provision.provider_auth._smolvm.machine_exec"
) as ex:
ex.return_value = SmolvmRunResult(1, "Not logged in\n", "")
with self.assertRaises(SystemExit):
_provider_auth.provision_provider_auth(
_plan(codex_auth_file=Path("/tmp/codex-auth.json")),
"bot-bottle-demo-abc12",
)
class TestProvisionSkills(unittest.TestCase):