refactor(agent): group provider provisioning into plan
test / unit (pull_request) Successful in 33s
test / integration (pull_request) Successful in 46s

This commit is contained in:
2026-06-01 22:07:14 +00:00
parent a8b2237964
commit 10c009c37b
13 changed files with 450 additions and 226 deletions
+118
View File
@@ -7,10 +7,13 @@ command, default image, and prompt/auth behavior.
from __future__ import annotations
import os
from dataclasses import dataclass
from pathlib import Path
from typing import Literal
from .codex_auth import write_codex_dummy_auth_file
PROVIDER_CLAUDE = "claude"
PROVIDER_CODEX = "codex"
@@ -32,6 +35,48 @@ class AgentProviderRuntime:
remote_control_args: tuple[str, ...]
@dataclass(frozen=True)
class AgentProvisionDir:
guest_path: str
mode: str = "700"
owner: str = "node:node"
@dataclass(frozen=True)
class AgentProvisionFile:
host_path: Path
guest_path: str
mode: str = "600"
owner: str = "node:node"
@dataclass(frozen=True)
class AgentProvisionCommand:
argv: tuple[str, ...]
error: str = ""
@dataclass(frozen=True)
class AgentProvisionPlan:
"""Provider-owned guest setup.
Backends interpret this plan with their own copy/exec primitives.
Provider-specific content stays here so future provider plugins can
return the same shape without adding backend-plan fields.
"""
template: str
command: str
prompt_mode: PromptMode
image: str
dockerfile: str
guest_env: dict[str, str]
dirs: tuple[AgentProvisionDir, ...] = ()
files: tuple[AgentProvisionFile, ...] = ()
pre_copy: tuple[AgentProvisionCommand, ...] = ()
verify: tuple[AgentProvisionCommand, ...] = ()
_REPO_ROOT = Path(__file__).resolve().parent.parent
@@ -67,6 +112,79 @@ def runtime_for(template: str) -> AgentProviderRuntime:
return _RUNTIMES[template]
def agent_provision_plan(
*,
template: str,
dockerfile: str,
state_dir: Path,
guest_home: str = "/home/node",
guest_env: dict[str, str] | None = None,
forward_host_credentials: bool = False,
host_env: dict[str, str] | None = None,
) -> AgentProvisionPlan:
runtime = runtime_for(template)
resolved_guest_env = dict(guest_env or {})
dirs: list[AgentProvisionDir] = []
files: list[AgentProvisionFile] = []
pre_copy: list[AgentProvisionCommand] = []
verify: list[AgentProvisionCommand] = []
if template == PROVIDER_CODEX:
resolved_guest_env.setdefault(
"CODEX_CA_CERTIFICATE",
"/etc/ssl/certs/ca-certificates.crt",
)
auth_dir = resolved_guest_env.get("CODEX_HOME", f"{guest_home}/.codex")
dirs.append(AgentProvisionDir(auth_dir))
config_path = f"{auth_dir}/config.toml"
config_file = state_dir / "codex-config.toml"
config_file.write_text(
f'[projects."{guest_home}"]\n'
'trust_level = "trusted"\n'
)
config_file.chmod(0o600)
files.append(AgentProvisionFile(config_file, config_path))
if forward_host_credentials:
auth_file = state_dir / "codex-auth.json"
write_codex_dummy_auth_file(auth_file, host_env or dict(os.environ))
files.append(AgentProvisionFile(auth_file, f"{auth_dir}/auth.json"))
pre_copy.append(AgentProvisionCommand((
"find", auth_dir,
"-maxdepth", "1",
"-type", "f",
"(",
"-name", "*.sqlite",
"-o", "-name", "*.sqlite-*",
"-o", "-name", "*.codex-repair-*.bak",
")",
"-delete",
), "codex host credentials: could not reset runtime db files"))
verify.append(AgentProvisionCommand((
"runuser", "-u", "node", "--",
"env",
f"HOME={guest_home}",
f"CODEX_HOME={auth_dir}",
"codex", "login", "status",
), (
"codex host credentials: dummy auth was copied into the "
"guest, but Codex did not accept it"
)))
return AgentProvisionPlan(
template=template,
command=runtime.command,
prompt_mode=runtime.prompt_mode,
image=runtime.image,
dockerfile=dockerfile,
guest_env=resolved_guest_env,
dirs=tuple(dirs),
files=tuple(files),
pre_copy=tuple(pre_copy),
verify=tuple(verify),
)
def prompt_args(
prompt_mode: PromptMode,
prompt_path: str | None,
+19 -6
View File
@@ -11,7 +11,7 @@ import sys
from dataclasses import dataclass, field
from pathlib import Path
from ...agent_provider import PromptMode
from ...agent_provider import AgentProvisionPlan, PromptMode
from ...egress import EgressPlan
from ...git_gate import GitGatePlan
from ...log import info
@@ -52,10 +52,19 @@ class DockerBottlePlan(BottlePlan):
# is opt-in via the manifest's bottle.supervise field.
supervise_plan: SupervisePlan | None
use_runsc: bool
agent_command: str = "claude"
agent_prompt_mode: PromptMode = "append_file"
agent_provider_template: str = "claude"
codex_auth_file: Path | None = None
agent_provision: AgentProvisionPlan
@property
def agent_command(self) -> str:
return self.agent_provision.command
@property
def agent_prompt_mode(self) -> PromptMode:
return self.agent_provision.prompt_mode
@property
def agent_provider_template(self) -> str:
return self.agent_provision.template
def print(self, *, remote_control: bool) -> None:
"""Render the y/N preflight summary to stderr — compact form
@@ -75,7 +84,11 @@ class DockerBottlePlan(BottlePlan):
# upstream tokens in its own environ, so no token forwarding
# from the agent to the proxy is needed.
env_names = visible_agent_env_names(
sorted(set(bottle.env.keys()) | set(self.forwarded_env.keys())),
sorted(
set(bottle.env.keys())
| set(self.forwarded_env.keys())
| set(self.agent_provision.guest_env.keys())
),
agent_provider_template=self.agent_provider_template,
)
+2
View File
@@ -286,6 +286,8 @@ def _agent_service(plan: DockerBottlePlan) -> dict[str, Any]:
f"SSL_CERT_FILE={AGENT_CA_BUNDLE}",
f"REQUESTS_CA_BUNDLE={AGENT_CA_BUNDLE}",
]
for name, value in sorted(plan.agent_provision.guest_env.items()):
env.append(f"{name}={value}")
# Forwarded vars (OAuth token, manifest host-interpolations):
# bare name → inherits from compose-up process env, value
# never lands on argv or in the compose file.
+10 -9
View File
@@ -14,8 +14,7 @@ import os
from datetime import datetime, timezone
from pathlib import Path
from ...agent_provider import runtime_for
from ...codex_auth import write_codex_dummy_auth_file
from ...agent_provider import agent_provision_plan, runtime_for
from ...egress import Egress
from ...env import ResolvedEnv, resolve_env
from ...git_gate import GitGate
@@ -156,7 +155,6 @@ def resolve_plan(
agent_dir.mkdir(parents=True, exist_ok=True)
env_file = agent_dir / "agent.env"
prompt_file = agent_dir / "prompt.txt"
codex_auth_file = agent_dir / "codex-auth.json"
prompt_file.write_text("")
prompt_file.chmod(0o600)
@@ -221,12 +219,18 @@ def resolve_plan(
# error reporting) that egress can't gate by auth.
forwarded_env.setdefault("CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC", "1")
forwarded_env.setdefault("DISABLE_ERROR_REPORTING", "1")
if provider.forward_host_credentials:
write_codex_dummy_auth_file(codex_auth_file, dict(os.environ))
_write_env_file(resolved, env_file)
prompt_file.write_text(agent.prompt)
use_runsc = docker_mod.runsc_available()
agent_provision = agent_provision_plan(
template=provider.template,
dockerfile=dockerfile_path,
state_dir=agent_dir,
guest_home=os.environ.get("BOT_BOTTLE_CONTAINER_HOME", "/home/node"),
forward_host_credentials=provider.forward_host_credentials,
host_env=dict(os.environ),
)
return DockerBottlePlan(
spec=spec,
@@ -246,10 +250,7 @@ def resolve_plan(
egress_plan=egress_plan,
supervise_plan=supervise_plan,
use_runsc=use_runsc,
agent_command=provider_runtime.command,
agent_prompt_mode=provider_runtime.prompt_mode,
agent_provider_template=provider.template,
codex_auth_file=codex_auth_file if provider.forward_host_credentials else None,
agent_provision=agent_provision,
)
@@ -2,87 +2,35 @@
from __future__ import annotations
import os
import shlex
import subprocess
from ..bottle_plan import DockerBottlePlan
_DEFAULT_GUEST_HOME = "/home/node"
def provision_provider_auth(plan: DockerBottlePlan, target: str) -> None:
"""Prepare Codex home state inside a Docker bottle.
"""Apply provider-owned guest setup through Docker primitives."""
provision = plan.agent_provision
for d in provision.dirs:
_exec(target, ["mkdir", "-p", d.guest_path])
_exec(target, ["chown", d.owner, d.guest_path])
_exec(target, ["chmod", d.mode, d.guest_path])
for command in provision.pre_copy:
_exec(target, list(command.argv))
for f in provision.files:
subprocess.run(
["docker", "cp", str(f.host_path), f"{target}:{f.guest_path}"],
stdout=subprocess.DEVNULL,
check=True,
)
_exec(target, ["chown", f.owner, f.guest_path])
_exec(target, ["chmod", f.mode, f.guest_path])
for command in provision.verify:
_exec(target, list(command.argv))
Every Codex bottle gets a minimal config.toml that trusts the
in-container launch directory. When host credentials are forwarded,
auth.json contains no real access or refresh token values; it only
nudges Codex into the same user/device auth branch as the host.
"""
if plan.agent_provider_template != "codex":
return
container_home = os.environ.get(
"BOT_BOTTLE_CONTAINER_HOME", _DEFAULT_GUEST_HOME,
)
auth_dir = f"{container_home}/.codex"
def _exec(target: str, argv: list[str]) -> None:
subprocess.run(
["docker", "exec", "-u", "0", target, "mkdir", "-p", auth_dir],
stdout=subprocess.DEVNULL,
check=True,
)
subprocess.run(
["docker", "exec", "-u", "0", target, "chown", "node:node", auth_dir],
stdout=subprocess.DEVNULL,
check=True,
)
subprocess.run(
["docker", "exec", "-u", "0", target, "chmod", "700", auth_dir],
stdout=subprocess.DEVNULL,
check=True,
)
config_path = f"{auth_dir}/config.toml"
config = (
f'[projects."{container_home}"]\n'
'trust_level = "trusted"\n'
)
subprocess.run(
[
"docker", "exec", "-u", "0", target,
"sh", "-c",
f"printf %s {shlex.quote(config)} > {shlex.quote(config_path)}",
],
stdout=subprocess.DEVNULL,
check=True,
)
subprocess.run(
["docker", "exec", "-u", "0", target, "chown", "node:node", config_path],
stdout=subprocess.DEVNULL,
check=True,
)
subprocess.run(
["docker", "exec", "-u", "0", target, "chmod", "600", config_path],
stdout=subprocess.DEVNULL,
check=True,
)
if not plan.codex_auth_file:
return
auth_path = f"{auth_dir}/auth.json"
subprocess.run(
["docker", "cp", str(plan.codex_auth_file), f"{target}:{auth_path}"],
stdout=subprocess.DEVNULL,
check=True,
)
subprocess.run(
["docker", "exec", "-u", "0", target, "chown", "node:node", auth_path],
stdout=subprocess.DEVNULL,
check=True,
)
subprocess.run(
["docker", "exec", "-u", "0", target, "chmod", "600", auth_path],
["docker", "exec", "-u", "0", target, *argv],
stdout=subprocess.DEVNULL,
check=True,
)
+22 -7
View File
@@ -12,7 +12,7 @@ import sys
from dataclasses import dataclass
from pathlib import Path
from ...agent_provider import PromptMode
from ...agent_provider import AgentProvisionPlan, PromptMode
from ...egress import EgressPlan
from ...git_gate import GitGatePlan
from ...log import info
@@ -82,6 +82,7 @@ class SmolmachinesBottlePlan(BottlePlan):
# None when bottle.supervise is False, matching the docker
# backend's convention.
supervise_plan: SupervisePlan | None
agent_provision: AgentProvisionPlan
# Agent-side endpoints. On Docker Desktop the docker bridge
# IPs aren't reachable from the smolvm guest (TSI uses macOS
# networking; docker container IPs live in the daemon's VM),
@@ -93,11 +94,22 @@ class SmolmachinesBottlePlan(BottlePlan):
agent_proxy_url: str = ""
agent_git_gate_host: str = ""
agent_supervise_url: str = ""
agent_command: str = "claude"
agent_prompt_mode: PromptMode = "append_file"
agent_provider_template: str = "claude"
agent_dockerfile_path: str = ""
codex_auth_file: Path | None = None
@property
def agent_command(self) -> str:
return self.agent_provision.command
@property
def agent_prompt_mode(self) -> PromptMode:
return self.agent_provision.prompt_mode
@property
def agent_provider_template(self) -> str:
return self.agent_provision.template
@property
def agent_dockerfile_path(self) -> str:
return self.agent_provision.dockerfile
def print(self, *, remote_control: bool) -> None:
"""Compact y/N preflight. Same shape as the Docker
@@ -109,7 +121,10 @@ class SmolmachinesBottlePlan(BottlePlan):
bottle = manifest.bottle_for(spec.agent_name)
env_names = visible_agent_env_names(
sorted(bottle.env.keys()),
sorted(
set(bottle.env.keys())
| set(self.agent_provision.guest_env.keys())
),
agent_provider_template=self.agent_provider_template,
)
upstreams = [
+12 -11
View File
@@ -14,9 +14,8 @@ import os
from datetime import datetime, timezone
from pathlib import Path
from ...agent_provider import runtime_for
from ...agent_provider import agent_provision_plan, runtime_for
from ...backend import BottleSpec
from ...codex_auth import write_codex_dummy_auth_file
from ...backend.docker.bottle_state import (
BottleMetadata,
agent_state_dir,
@@ -163,12 +162,9 @@ def resolve_plan(
agent_dir = agent_state_dir(slug)
agent_dir.mkdir(parents=True, exist_ok=True)
prompt_file = agent_dir / "prompt.txt"
codex_auth_file = agent_dir / "codex-auth.json"
agent = manifest.agents[spec.agent_name]
prompt_file.write_text(agent.prompt or "")
prompt_file.chmod(0o600)
if provider.forward_host_credentials:
write_codex_dummy_auth_file(codex_auth_file, dict(os.environ))
machine_name = f"bot-bottle-{slug}"
# Stash the agent image ref — `launch.launch` runs the
@@ -184,6 +180,15 @@ def resolve_plan(
else:
image_default = provider_runtime.image
agent_image_ref = os.environ.get("BOT_BOTTLE_IMAGE", image_default)
agent_provision = agent_provision_plan(
template=provider.template,
dockerfile=agent_dockerfile_path,
state_dir=agent_dir,
guest_home=os.environ.get("BOT_BOTTLE_GUEST_HOME", "/home/node"),
guest_env=guest_env,
forward_host_credentials=provider.forward_host_credentials,
host_env=dict(os.environ),
)
return SmolmachinesBottlePlan(
spec=spec,
@@ -194,17 +199,13 @@ def resolve_plan(
bundle_ip=bundle_ip,
machine_name=machine_name,
agent_image_ref=agent_image_ref,
guest_env=guest_env,
guest_env=agent_provision.guest_env,
prompt_file=prompt_file,
proxy_plan=proxy_plan,
git_gate_plan=git_gate_plan,
egress_plan=egress_plan,
supervise_plan=supervise_plan,
agent_command=provider_runtime.command,
agent_prompt_mode=provider_runtime.prompt_mode,
agent_provider_template=provider.template,
agent_dockerfile_path=agent_dockerfile_path,
codex_auth_file=codex_auth_file if provider.forward_host_credentials else None,
agent_provision=agent_provision,
)
@@ -2,113 +2,32 @@
from __future__ import annotations
import os
import shlex
from ....log import die
from .. import smolvm as _smolvm
from ..bottle_plan import SmolmachinesBottlePlan
_DEFAULT_GUEST_HOME = "/home/node"
def provision_provider_auth(plan: SmolmachinesBottlePlan, target: str) -> None:
"""Prepare Codex home state inside the smolmachine.
"""Apply provider-owned guest setup through smolvm primitives."""
provision = plan.agent_provision
for d in provision.dirs:
_exec(target, ["mkdir", "-p", d.guest_path], f"could not create {d.guest_path}")
_exec(target, ["chown", d.owner, d.guest_path], f"could not chown {d.guest_path}")
_exec(target, ["chmod", d.mode, d.guest_path], f"could not chmod {d.guest_path}")
for command in provision.pre_copy:
_exec(target, list(command.argv), command.error)
for f in provision.files:
_smolvm.machine_cp(str(f.host_path), f"{target}:{f.guest_path}")
_exec(target, ["chown", f.owner, f.guest_path], f"could not chown {f.guest_path}")
_exec(target, ["chmod", f.mode, f.guest_path], f"could not chmod {f.guest_path}")
for command in provision.verify:
_exec(target, list(command.argv), command.error)
Every Codex bottle gets a minimal config.toml that trusts the
in-guest launch directory. When host credentials are forwarded,
the real host access token remains in the egress bundle env;
auth.json only selects Codex's user/device auth code path.
"""
if plan.agent_provider_template != "codex":
return
guest_home = os.environ.get("BOT_BOTTLE_GUEST_HOME", _DEFAULT_GUEST_HOME)
auth_dir = plan.guest_env.get("CODEX_HOME", f"{guest_home}/.codex")
result = _smolvm.machine_exec(
target,
["mkdir", "-p", auth_dir],
)
def _exec(target: str, argv: list[str], error: str) -> None:
result = _smolvm.machine_exec(target, argv)
if result.returncode != 0:
detail = (result.stderr or result.stdout).strip()
if detail:
detail = f": {detail}"
die(f"codex host credentials: could not create {auth_dir}{detail}")
result = _smolvm.machine_exec(target, ["chown", "node:node", auth_dir])
if result.returncode != 0:
detail = (result.stderr or result.stdout).strip()
if detail:
detail = f": {detail}"
die(f"codex host credentials: could not chown {auth_dir}{detail}")
result = _smolvm.machine_exec(target, ["chmod", "700", auth_dir])
if result.returncode != 0:
detail = (result.stderr or result.stdout).strip()
if detail:
detail = f": {detail}"
die(f"codex host credentials: could not chmod {auth_dir}{detail}")
result = _smolvm.machine_exec(
target,
[
"find", auth_dir,
"-maxdepth", "1",
"-type", "f",
"(",
"-name", "*.sqlite",
"-o", "-name", "*.sqlite-*",
"-o", "-name", "*.codex-repair-*.bak",
")",
"-delete",
],
)
if result.returncode != 0:
detail = (result.stderr or result.stdout).strip()
if detail:
detail = f": {detail}"
die(f"codex host credentials: could not reset runtime db files{detail}")
config_path = f"{auth_dir}/config.toml"
config = (
f'[projects."{guest_home}"]\n'
'trust_level = "trusted"\n'
)
result = _smolvm.machine_exec(
target,
[
"sh", "-c",
f"printf %s {shlex.quote(config)} > {shlex.quote(config_path)}",
],
)
if result.returncode != 0:
detail = (result.stderr or result.stdout).strip()
if detail:
detail = f": {detail}"
die(f"codex host credentials: could not write {config_path}{detail}")
_smolvm.machine_exec(target, ["chown", "node:node", config_path])
_smolvm.machine_exec(target, ["chmod", "600", config_path])
if not plan.codex_auth_file:
return
auth_path = f"{auth_dir}/auth.json"
_smolvm.machine_cp(str(plan.codex_auth_file), f"{target}:{auth_path}")
_smolvm.machine_exec(target, ["chown", "node:node", auth_path])
_smolvm.machine_exec(target, ["chmod", "600", auth_path])
result = _smolvm.machine_exec(
target,
[
"runuser", "-u", "node", "--",
"env",
f"HOME={guest_home}",
f"CODEX_HOME={auth_dir}",
"codex", "login", "status",
],
)
if result.returncode != 0:
detail = (result.stderr or result.stdout).strip()
if detail:
detail = f": {detail}"
die(
"codex host credentials: dummy auth was copied into the "
f"smolmachine, but Codex did not accept it{detail}"
)
die(f"agent provider provisioning: {error}{detail}")