refactor(backend): pass Bottle to provisioners instead of target string
test / unit (pull_request) Successful in 50s
test / integration (pull_request) Successful in 59s
test / unit (push) Successful in 43s
test / integration (push) Successful in 1m3s

Closes #178.

The backend provision functions now receive a Bottle handle with
exec / cp_in methods instead of a raw target string. Provisioner
modules use bottle.exec and bottle.cp_in in place of inlined
subprocess.run(["docker", "exec"/"cp", ...]) and direct
_smolvm.machine_cp / machine_exec calls. This decouples the
provisioners from backend-specific runtime primitives so future
refactors (e.g. the supervise rework) can swap the bottle's exec
implementation without touching every provisioner.

Each launch.py constructs the Bottle handle before calling
provision so it can be passed in; provision_prompt's return value
is wired back onto the bottle's prompt path attribute after the
fact.
This commit was merged in pull request #179.
This commit is contained in:
2026-06-03 20:47:37 +00:00
parent f12b0f754e
commit 0efc07ba67
22 changed files with 662 additions and 884 deletions
+17 -17
View File
@@ -2,8 +2,8 @@
trust store (PRD 0023 chunk 4d).
Mirrors `backend.docker.provision.ca`: select the right CA (egress
when the bottle has routes, else pipelock), `smolvm machine cp` it
to Debian's `/usr/local/share/ca-certificates/` path,
when the bottle has routes, else pipelock), copy it to Debian's
`/usr/local/share/ca-certificates/` path,
`update-ca-certificates` to rebuild the trust bundle, and log the
fingerprint once. The selected cert depends on the agent's
HTTP_PROXY target — same logic as the docker backend, since the
@@ -24,20 +24,20 @@ from ...util import (
log_ca_fingerprint,
select_ca_cert,
)
from .. import smolvm as _smolvm
from ... import Bottle, ExecResult
from ..bottle_plan import SmolmachinesBottlePlan
_SIGKILL_EXIT = 128 + 9
def provision_ca(plan: SmolmachinesBottlePlan, target: str) -> None:
def provision_ca(plan: SmolmachinesBottlePlan, bottle: Bottle) -> None:
"""Copy the agent-facing CA cert into the guest, rebuild the
trust bundle, emit a one-line fingerprint log. Called from
`BottleBackend.provision` after the smolvm guest is up."""
cert_host_path, label = select_ca_cert(plan.egress_plan, plan.proxy_plan)
_smolvm.machine_cp(str(cert_host_path), f"{target}:{AGENT_CA_PATH}")
bottle.cp_in(str(cert_host_path), AGENT_CA_PATH)
# Mode 0644 — readable to non-root tools in the guest.
# update-ca-certificates rebuilds the bundle at AGENT_CA_BUNDLE,
# which is what curl / Python ssl / OpenSSL-based tools read by
@@ -45,21 +45,21 @@ def provision_ca(plan: SmolmachinesBottlePlan, target: str) -> None:
# REQUESTS_CA_BUNDLE) on the guest_env covers Node + Python
# `requests` / libraries that don't load the system bundle.
#
r = _install_ca(target)
r = _install_ca(bottle)
if r.returncode == _SIGKILL_EXIT:
# smolvm/libkrun can SIGKILL an otherwise-normal exec
# during early-VM provisioning. `update-ca-certificates`
# is idempotent, so retry the same install once after a
# short settle delay before treating it as fatal.
time.sleep(1.0)
r = _install_ca(target)
r = _install_ca(bottle)
if r.returncode != 0:
# update-ca-certificates not adding our cert is fatal —
# claude-code's TLS handshake against the egress-MITM'd
# api.anthropic.com would fail downstream. Bail early
# with what we can see (output is captured by smolvm so
# we can surface it).
# with what we can see (output is captured so we can
# surface it).
die(
f"update-ca-certificates didn't add the agent CA "
f"(exit {r.returncode}): "
@@ -70,21 +70,21 @@ def provision_ca(plan: SmolmachinesBottlePlan, target: str) -> None:
log_ca_fingerprint(cert_host_path, label)
def _install_ca(target: str) -> _smolvm.SmolvmRunResult:
def _install_ca(bottle: Bottle) -> ExecResult:
# chown + chmod + update-ca-certificates + bundle
# verification run in one `sh -c` so we only pay one
# machine_exec round trip; the `&&` chaining surfaces the
# first failure as the return code. The verify check is more
# stable than requiring "1 added" in stdout: a retry after a
# verification run in one exec so we only pay one
# round trip; the `&&` chaining surfaces the first failure
# as the return code. The verify check is more stable than
# requiring "1 added" in stdout: a retry after a
# partially-completed first run may legitimately report "0
# added" while the cert is already installed.
return _smolvm.machine_exec(target, [
"sh", "-c",
return bottle.exec(
f"chown root:root {AGENT_CA_PATH} && "
f"chmod 644 {AGENT_CA_PATH} && "
f"update-ca-certificates && "
f"openssl verify -CAfile {AGENT_CA_BUNDLE} {AGENT_CA_PATH}",
])
user="root",
)
# Re-exported for the launch/provision_ca caller + tests. The path
@@ -26,12 +26,13 @@ git_gate module."""
from __future__ import annotations
import os
import shlex
import tempfile
from pathlib import Path
from ....git_gate import git_gate_render_gitconfig
from ....log import info
from .. import smolvm as _smolvm
from ... import Bottle
from ..bottle_plan import SmolmachinesBottlePlan
@@ -46,15 +47,15 @@ def _guest_home() -> str:
return os.environ.get("BOT_BOTTLE_GUEST_HOME", _DEFAULT_GUEST_HOME)
def provision_git(plan: SmolmachinesBottlePlan, target: str) -> None:
def provision_git(plan: SmolmachinesBottlePlan, bottle: Bottle) -> None:
"""Set up git inside the guest. Runs all three subcases; each
no-ops when its condition isn't met."""
_provision_cwd_git(plan, target)
_provision_git_gate_config(plan, target)
_provision_git_user(plan, target)
_provision_cwd_git(plan, bottle)
_provision_git_gate_config(plan, bottle)
_provision_git_user(plan, bottle)
def _provision_cwd_git(plan: SmolmachinesBottlePlan, target: str) -> None:
def _provision_cwd_git(plan: SmolmachinesBottlePlan, bottle: Bottle) -> None:
"""If --cwd was set and the host cwd has a .git directory, copy
it into <guest_home>/workspace/.git and fix ownership. No-op
otherwise."""
@@ -63,25 +64,26 @@ def _provision_cwd_git(plan: SmolmachinesBottlePlan, target: str) -> None:
return
guest_workspace_git = f"{workspace.guest_path}/.git"
host_git = str(workspace.host_path / ".git")
info(f"copying {host_git} -> {target}:{guest_workspace_git}")
# mkdir -p the workspace dir so `machine cp` lands the .git
info(f"copying {host_git} -> {bottle.name}:{guest_workspace_git}")
# mkdir -p the workspace dir so cp_in lands the .git
# directly there even on first-time bottles.
_smolvm.machine_exec(target, ["mkdir", "-p", workspace.guest_path])
_smolvm.machine_cp(
host_git, f"{target}:{guest_workspace_git}",
)
# `machine cp` lands files as root; the agent runs as node so
bottle.exec(f"mkdir -p {shlex.quote(workspace.guest_path)}", user="root")
bottle.cp_in(host_git, guest_workspace_git)
# cp_in lands files as root; the agent runs as node so
# the workspace tree must be chowned over.
_smolvm.machine_exec(
target, ["chown", "-R", workspace.owner, guest_workspace_git],
bottle.exec(
f"chown -R {shlex.quote(workspace.owner)} {shlex.quote(guest_workspace_git)}",
user="root",
)
def _provision_git_gate_config(plan: SmolmachinesBottlePlan, target: str) -> None:
def _provision_git_gate_config(
plan: SmolmachinesBottlePlan, bottle: Bottle
) -> None:
"""Write ~/.gitconfig in the guest with the git-gate insteadOf
rules. No-op when the bottle has no `git` entries."""
bottle = plan.spec.manifest.bottle_for(plan.spec.agent_name)
if not bottle.git:
manifest_bottle = plan.spec.manifest.bottle_for(plan.spec.agent_name)
if not manifest_bottle.git:
return
# `<loopback alias>:<host port>` form: the bundle's git-gate
@@ -90,11 +92,11 @@ def _provision_git_gate_config(plan: SmolmachinesBottlePlan, target: str) -> Non
# TSI, not the docker bridge IP) can dial it. launch.py
# populates `plan.agent_git_gate_host` after bundle bringup.
content = git_gate_render_gitconfig(
bottle.git, plan.agent_git_gate_host, scheme="http",
manifest_bottle.git, plan.agent_git_gate_host, scheme="http",
)
guest_gitconfig = f"{_guest_home()}/.gitconfig"
# Stage the file under the plan's stage_dir so `machine cp`
# Stage the file under the plan's stage_dir so cp_in
# has a stable host path. The plan's stage_dir is cleaned up
# by start.py's session-end teardown.
with tempfile.NamedTemporaryFile(
@@ -105,41 +107,38 @@ def _provision_git_gate_config(plan: SmolmachinesBottlePlan, target: str) -> Non
config_file = Path(f.name)
os.chmod(config_file, 0o600)
info(f"writing {guest_gitconfig} with {len(bottle.git)} insteadOf rule(s)")
_smolvm.machine_cp(str(config_file), f"{target}:{guest_gitconfig}")
_smolvm.machine_exec(target, ["chown", "node:node", guest_gitconfig])
_smolvm.machine_exec(target, ["chmod", "644", guest_gitconfig])
info(f"writing {guest_gitconfig} with {len(manifest_bottle.git)} insteadOf rule(s)")
bottle.cp_in(str(config_file), guest_gitconfig)
bottle.exec(
f"chown node:node {shlex.quote(guest_gitconfig)} && "
f"chmod 644 {shlex.quote(guest_gitconfig)}",
user="root",
)
def _provision_git_user(
plan: SmolmachinesBottlePlan, target: str,
plan: SmolmachinesBottlePlan, bottle: Bottle,
) -> None:
"""Apply `git config --global user.{name,email}` inside the
guest as the node user so --global lands in the same
`/home/node/.gitconfig` that `_provision_git_gate_config`
writes to. No-op when the bottle didn't declare `git.user`.
Runs via `runuser -u node --`; HOME is forced via smolvm's
`-e` flag because runuser (without -l) inherits root's
HOME=/root, which would put --global in the wrong file."""
bottle = plan.spec.manifest.bottle_for(plan.spec.agent_name)
gu = bottle.git_user
SmolmachinesBottle.exec(user="node") automatically sets
HOME=/home/node so --global writes to /home/node/.gitconfig."""
manifest_bottle = plan.spec.manifest.bottle_for(plan.spec.agent_name)
gu = manifest_bottle.git_user
if gu.is_empty():
return
env = {"HOME": _guest_home(), "USER": "node"}
if gu.name:
info(f"git config --global user.name = {gu.name!r}")
_smolvm.machine_exec(
target,
["runuser", "-u", "node", "--",
"git", "config", "--global", "user.name", gu.name],
env=env,
bottle.exec(
f"git config --global user.name {shlex.quote(gu.name)}",
user="node",
)
if gu.email:
info(f"git config --global user.email = {gu.email!r}")
_smolvm.machine_exec(
target,
["runuser", "-u", "node", "--",
"git", "config", "--global", "user.email", gu.email],
env=env,
bottle.exec(
f"git config --global user.email {shlex.quote(gu.email)}",
user="node",
)
@@ -5,7 +5,7 @@ exists) but `--append-system-prompt-file` only fires when the
agent actually has a prompt — the return value signals which
case, mirroring the docker backend's contract.
`smolvm machine cp` lands files as root inside the VM; the claude
cp_in lands files as root inside the VM; the claude
process runs as `node`, so we chown + chmod the prompt after the
copy. Same flow as the docker backend's provision_prompt."""
@@ -13,7 +13,7 @@ from __future__ import annotations
import os
from .. import smolvm as _smolvm
from ... import Bottle
from ..bottle_plan import SmolmachinesBottlePlan
@@ -23,7 +23,7 @@ from ..bottle_plan import SmolmachinesBottlePlan
_DEFAULT_GUEST_HOME = "/home/node"
def provision_prompt(plan: SmolmachinesBottlePlan, target: str) -> str | None:
def provision_prompt(plan: SmolmachinesBottlePlan, bottle: Bottle) -> str | None:
"""Copy the prompt file into the running smolvm guest, fix
ownership/mode. Returns the in-guest path if the agent has a
non-empty prompt (drives --append-system-prompt-file), else
@@ -32,11 +32,13 @@ def provision_prompt(plan: SmolmachinesBottlePlan, target: str) -> str | None:
guest_home = os.environ.get("BOT_BOTTLE_GUEST_HOME", _DEFAULT_GUEST_HOME)
in_guest_prompt_path = f"{guest_home}/.bot-bottle-prompt.txt"
_smolvm.machine_cp(str(plan.prompt_file), f"{target}:{in_guest_prompt_path}")
# machine cp lands as root, source's 0o600 mode is preserved —
bottle.cp_in(str(plan.prompt_file), in_guest_prompt_path)
# cp_in lands as root, source's 0o600 mode is preserved —
# node can't read its own prompt without these two.
_smolvm.machine_exec(target, ["chown", "node:node", in_guest_prompt_path])
_smolvm.machine_exec(target, ["chmod", "600", in_guest_prompt_path])
bottle.exec(
f"chown node:node {in_guest_prompt_path} && chmod 600 {in_guest_prompt_path}",
user="root",
)
agent = plan.spec.manifest.agents[plan.spec.agent_name]
return in_guest_prompt_path if agent.prompt else None
@@ -2,30 +2,32 @@
from __future__ import annotations
import shlex
from ....log import die
from .. import smolvm as _smolvm
from ... import Bottle
from ..bottle_plan import SmolmachinesBottlePlan
def provision_provider_auth(plan: SmolmachinesBottlePlan, target: str) -> None:
"""Apply provider-owned guest setup through smolvm primitives."""
def provision_provider_auth(plan: SmolmachinesBottlePlan, bottle: Bottle) -> None:
"""Apply provider-owned guest setup through the bottle's exec / cp_in."""
provision = plan.agent_provision
for d in provision.dirs:
_exec(target, ["mkdir", "-p", d.guest_path], f"could not create {d.guest_path}")
_exec(target, ["chown", d.owner, d.guest_path], f"could not chown {d.guest_path}")
_exec(target, ["chmod", d.mode, d.guest_path], f"could not chmod {d.guest_path}")
_exec(bottle, f"mkdir -p {shlex.quote(d.guest_path)}", f"could not create {d.guest_path}")
_exec(bottle, f"chown {shlex.quote(d.owner)} {shlex.quote(d.guest_path)}", f"could not chown {d.guest_path}")
_exec(bottle, f"chmod {shlex.quote(d.mode)} {shlex.quote(d.guest_path)}", f"could not chmod {d.guest_path}")
for command in provision.pre_copy:
_exec(target, list(command.argv), command.error)
_exec(bottle, shlex.join(command.argv), command.error)
for f in provision.files:
_smolvm.machine_cp(str(f.host_path), f"{target}:{f.guest_path}")
_exec(target, ["chown", f.owner, f.guest_path], f"could not chown {f.guest_path}")
_exec(target, ["chmod", f.mode, f.guest_path], f"could not chmod {f.guest_path}")
bottle.cp_in(str(f.host_path), f.guest_path)
_exec(bottle, f"chown {shlex.quote(f.owner)} {shlex.quote(f.guest_path)}", f"could not chown {f.guest_path}")
_exec(bottle, f"chmod {shlex.quote(f.mode)} {shlex.quote(f.guest_path)}", f"could not chmod {f.guest_path}")
for command in provision.verify:
_exec(target, list(command.argv), command.error)
_exec(bottle, shlex.join(command.argv), command.error)
def _exec(target: str, argv: list[str], error: str) -> None:
result = _smolvm.machine_exec(target, argv)
def _exec(bottle: Bottle, script: str, error: str) -> None:
result = bottle.exec(script, user="root")
if result.returncode != 0:
detail = (result.stderr or result.stdout).strip()
if detail:
@@ -13,7 +13,7 @@ import os
from ....log import die, info
from ...util import host_skill_dir
from .. import smolvm as _smolvm
from ... import Bottle
from ..bottle_plan import SmolmachinesBottlePlan
@@ -24,18 +24,18 @@ from ..bottle_plan import SmolmachinesBottlePlan
_DEFAULT_SKILLS_DIR = "/home/node/.claude/skills"
def provision_skills(plan: SmolmachinesBottlePlan, target: str) -> None:
def provision_skills(plan: SmolmachinesBottlePlan, bottle: Bottle) -> None:
"""Copy each of the agent's named skills from the host's
~/.claude/skills/<name>/ into the guest's equivalent path.
For each skill: `mkdir -p` the destination, `smolvm machine cp`
the host source dir over, then chown the result to node:node so
the agent can read it. No-op when the agent has no skills.
For each skill: `mkdir -p` the destination, cp_in the host
source dir over, then chown the result to node:node so the
agent can read it. No-op when the agent has no skills.
smolvm machine cp on a directory copies recursively (same
semantics as `cp -r`); unlike docker cp's trailing-slash
convention, smolvm doesn't need the `/.` suffix dance.
cp_in on a directory copies recursively; unlike docker cp's
trailing-slash convention, smolvm doesn't need the `/.` suffix
dance.
machine cp lands files as root inside the VM, so we chown each
cp_in lands files as root inside the VM, so we chown each
skill tree over to node:node after the copy — same pattern as
the docker backend's provision_prompt."""
agent = plan.spec.manifest.agents[plan.spec.agent_name]
@@ -46,7 +46,7 @@ def provision_skills(plan: SmolmachinesBottlePlan, target: str) -> None:
"BOT_BOTTLE_GUEST_SKILLS_DIR", _DEFAULT_SKILLS_DIR,
)
_smolvm.machine_exec(target, ["mkdir", "-p", skills_dir])
bottle.exec(f"mkdir -p {skills_dir}", user="root")
for name in agent.skills:
src = host_skill_dir(name)
@@ -56,8 +56,8 @@ def provision_skills(plan: SmolmachinesBottlePlan, target: str) -> None:
f"validation and copy at {src}."
)
dst = f"{skills_dir}/{name}"
info(f"copying skill {name} into {target}:{dst}")
info(f"copying skill {name} into {bottle.name}:{dst}")
# Wipe any prior copy so re-runs don't accumulate.
_smolvm.machine_exec(target, ["rm", "-rf", dst])
_smolvm.machine_cp(src, f"{target}:{dst}")
_smolvm.machine_exec(target, ["chown", "-R", "node:node", dst])
bottle.exec(f"rm -rf {dst}", user="root")
bottle.cp_in(src, dst)
bottle.exec(f"chown -R node:node {dst}", user="root")
@@ -7,21 +7,21 @@ stuck-recovery MCP tools (pipelock-block, capability-block) at
startup.
Mirrors `backend.docker.provision.supervise` — same `claude mcp
add` call, just dispatched via `smolvm machine exec` instead of
add` call, just dispatched via bottle.exec instead of
`docker exec`, and against `<bundle_ip>:<port>` instead of the
short `supervise` alias (no DNS in the TSI-allowlisted guest)."""
from __future__ import annotations
from ....log import info, warn
from .. import smolvm as _smolvm
from ... import Bottle
from ..bottle_plan import SmolmachinesBottlePlan
_SUPERVISE_MCP_NAME = "supervise"
def provision_supervise(plan: SmolmachinesBottlePlan, target: str) -> None:
def provision_supervise(plan: SmolmachinesBottlePlan, bottle: Bottle) -> None:
"""Run `claude mcp add` inside the guest to register the
supervise sidecar in claude-code's user config. No-op when
bottle.supervise is False.
@@ -38,22 +38,13 @@ def provision_supervise(plan: SmolmachinesBottlePlan, target: str) -> None:
return
url = plan.agent_supervise_url
info(f"registering supervise MCP server in agent claude config → {url}")
# `claude mcp add --scope user` writes to ~/.claude.json. The
# agent is the `node` user; smolvm machine_exec runs as root
# by default, so we have to switch user explicitly and set
# HOME so the config lands in /home/node/.claude.json (where
# the agent's claude actually reads it from).
r = _smolvm.machine_exec(
target,
[
"runuser", "-u", "node", "--",
"env", "HOME=/home/node",
"claude", "mcp", "add",
"--scope", "user",
"--transport", "http",
_SUPERVISE_MCP_NAME,
url,
],
# `claude mcp add --scope user` writes to ~/.claude.json. Run
# as node so the config lands in /home/node/.claude.json.
# SmolmachinesBottle.exec sets HOME and USER automatically
# for the requested user.
r = bottle.exec(
f"claude mcp add --scope user --transport http {_SUPERVISE_MCP_NAME} {url}",
user="node",
)
if r.returncode != 0:
warn(
@@ -5,11 +5,11 @@ from __future__ import annotations
import shlex
from ....log import info
from .. import smolvm as _smolvm
from ... import Bottle
from ..bottle_plan import SmolmachinesBottlePlan
def provision_workspace(plan: SmolmachinesBottlePlan, target: str) -> None:
def provision_workspace(plan: SmolmachinesBottlePlan, bottle: Bottle) -> None:
"""Copy host cwd contents to the planned guest workspace."""
workspace = plan.workspace_plan
if not (workspace.enabled and workspace.copy_contents):
@@ -20,17 +20,13 @@ def provision_workspace(plan: SmolmachinesBottlePlan, target: str) -> None:
guest_parent_q = shlex.quote(guest_parent)
owner_q = shlex.quote(workspace.owner)
mode_q = shlex.quote(workspace.mode)
info(f"copying {workspace.host_path} -> {target}:{workspace.guest_path}")
_smolvm.machine_exec(
target,
["sh", "-c", f"rm -rf {guest_path_q} && mkdir -p {guest_parent_q}"],
info(f"copying {workspace.host_path} -> {bottle.name}:{workspace.guest_path}")
bottle.exec(
f"rm -rf {guest_path_q} && mkdir -p {guest_parent_q}",
user="root",
)
_smolvm.machine_cp(str(workspace.host_path), f"{target}:{workspace.guest_path}")
_smolvm.machine_exec(
target,
[
"sh", "-c",
f"chown -R {owner_q} {guest_path_q} && "
f"chmod {mode_q} {guest_path_q}",
],
bottle.cp_in(str(workspace.host_path), workspace.guest_path)
bottle.exec(
f"chown -R {owner_q} {guest_path_q} && chmod {mode_q} {guest_path_q}",
user="root",
)