feat(smolmachines): provision_ca + provision_git + provision_supervise (PRD 0023 chunk 4d)
test / unit (pull_request) Successful in 26s
test / integration (pull_request) Successful in 43s
test / unit (push) Successful in 26s
test / integration (push) Successful in 42s

End-to-end provisioning parity with the docker backend. After this
chunk a smolmachines bottle has a working trust store, git-gate
gitconfig, and supervise MCP registration — same shape as docker,
dispatched via `smolvm machine cp` / `smolvm machine exec` instead
of `docker cp` / `docker exec`.

Adds three new provision modules:
- ca.py:        select egress vs pipelock CA (same logic as
                docker), machine cp + update-ca-certificates,
                log sha256 fingerprint.
- git.py:       copy host .git when --cwd was passed; render
                ~/.gitconfig with insteadOf URLs. URL prefix is
                `git://<bundle_ip>:9418/...` (no DNS in the
                TSI-allowlisted guest) vs docker's
                `git://git-gate/...`.
- supervise.py: `claude mcp add` via machine_exec; URL is
                `http://<bundle_ip>:9100/`. Failure is logged but
                non-fatal (matches docker).

Shared render: `render_git_gate_gitconfig` moves out of
backend/docker/provision/git.py into the platform-neutral
claude_bottle/git_gate.py (renamed to git_gate_render_gitconfig
for consistency with the existing git_gate_render_* helpers),
parameterized on a `gate_host` argument so both backends use the
same logic with different addresses.

Path/user fixups for the post-chunk-4c agent image (real
claude-bottle image, USER node, $HOME=/home/node):
- prompt.py default path moves from /root/... to
  /home/node/.claude-bottle-prompt.txt; chown + chmod after
  machine cp.
- skills.py default skills dir moves from /root/.claude/skills to
  /home/node/.claude/skills; chown -R per skill.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit was merged in pull request #72.
This commit is contained in:
2026-05-27 14:15:58 -04:00
parent 1fa17d1822
commit ac8c7ba696
10 changed files with 661 additions and 77 deletions
+14 -4
View File
@@ -13,8 +13,11 @@ from . import prepare as _prepare
from .bottle import SmolmachinesBottle
from .bottle_cleanup_plan import SmolmachinesBottleCleanupPlan
from .bottle_plan import SmolmachinesBottlePlan
from .provision import ca as _ca
from .provision import git as _git
from .provision import prompt as _prompt
from .provision import skills as _skills
from .provision import supervise as _supervise
class SmolmachinesBottleBackend(
@@ -37,6 +40,11 @@ class SmolmachinesBottleBackend(
with _launch.launch(plan, provision=self.provision) as bottle:
yield bottle
def provision_ca(
self, plan: SmolmachinesBottlePlan, target: str
) -> None:
_ca.provision_ca(plan, target)
def provision_prompt(
self, plan: SmolmachinesBottlePlan, target: str
) -> str | None:
@@ -50,10 +58,12 @@ class SmolmachinesBottleBackend(
def provision_git(
self, plan: SmolmachinesBottlePlan, target: str
) -> None:
# Chunk 4 follow-on: needs the git-gate inner Plan (so the
# gitconfig insteadOf URL points at the gate's host) and
# the agent image must contain `git`. Stub for chunk 4a.
pass
_git.provision_git(plan, target)
def provision_supervise(
self, plan: SmolmachinesBottlePlan, target: str
) -> None:
_supervise.provision_supervise(plan, target)
def prepare_cleanup(self) -> SmolmachinesBottleCleanupPlan:
return SmolmachinesBottleCleanupPlan()
@@ -0,0 +1,83 @@
"""Install the per-bottle MITM CA into the smolmachines guest's
trust store (PRD 0023 chunk 4d).
Mirrors `backend.docker.provision.ca`: select the right CA (egress
when the bottle has routes, else pipelock), `smolvm machine cp` it
to Debian's `/usr/local/share/ca-certificates/` path,
`update-ca-certificates` to rebuild the trust bundle, and log the
fingerprint once. The selected cert depends on the agent's
HTTP_PROXY target — same logic as the docker backend, since the
agent dials the same daemons through the same bundle.
`smolvm machine exec` runs commands as root in the VM (no `-u`
flag exists; the VM init is root), so we don't need the explicit
`-u 0` the docker backend uses on its `docker exec` calls."""
from __future__ import annotations
import hashlib
import ssl
from pathlib import Path
from ....log import die, info
from ...docker.provision.ca import AGENT_CA_BUNDLE, AGENT_CA_PATH
from .. import smolvm as _smolvm
from ..bottle_plan import SmolmachinesBottlePlan
def _select_ca_cert(plan: SmolmachinesBottlePlan) -> tuple[Path, str]:
"""Pick the CA cert (and a short label for the log line) that
matches the proxy the agent's HTTP_PROXY points at. Egress-proxy
wins when the bottle declares any routes; else pipelock.
The launch step minted both CAs (pipelock always; egress when
routes are declared) and stored their host paths back into the
inner Plans via `dataclasses.replace`. If those paths are empty
here something has gone wrong in launch's bringup."""
if plan.egress_plan.routes:
cert = plan.egress_plan.mitmproxy_ca_cert_only_host_path
if cert == Path() or not cert.is_file():
die(
f"egress CA cert missing at {cert or '(empty)'}; "
f"launch must have called egress_tls_init and "
f"re-bound the plan before provision"
)
return cert, "egress"
cert = plan.proxy_plan.ca_cert_host_path
if not cert or not cert.is_file():
die(
f"pipelock CA cert missing at {cert or '(empty)'}; "
f"launch must have called pipelock_tls_init and re-bound "
f"the plan before provision"
)
return cert, "pipelock"
def provision_ca(plan: SmolmachinesBottlePlan, target: str) -> None:
"""Copy the agent-facing CA cert into the guest, rebuild the
trust bundle, emit a one-line fingerprint log. Called from
`BottleBackend.provision` after the smolvm guest is up."""
cert_host_path, label = _select_ca_cert(plan)
_smolvm.machine_cp(str(cert_host_path), f"{target}:{AGENT_CA_PATH}")
# Mode 0644 — readable to non-root tools in the guest.
# update-ca-certificates rebuilds the bundle at AGENT_CA_BUNDLE,
# which is what curl / Python ssl / OpenSSL-based tools read by
# default. The env trio (NODE_EXTRA_CA_CERTS / SSL_CERT_FILE /
# REQUESTS_CA_BUNDLE) on the guest_env covers Node + Python
# `requests` / libraries that don't load the system bundle.
_smolvm.machine_exec(target, ["chmod", "644", AGENT_CA_PATH])
_smolvm.machine_exec(target, ["update-ca-certificates"])
# Stdlib SHA-256 of the cert's DER bytes — the standard
# fingerprint form. Never the private key.
der = ssl.PEM_cert_to_DER_cert(cert_host_path.read_text())
fingerprint = hashlib.sha256(der).hexdigest()
info(f"{label} ca fingerprint: sha256:{fingerprint[:32]}...")
# Re-exported for the launch/provision_ca caller + tests. The path
# constants come from the docker module because they're tied to
# Debian's `update-ca-certificates` layout — same in both backends
# since both guest images are Debian-family.
__all__ = ["AGENT_CA_BUNDLE", "AGENT_CA_PATH", "provision_ca"]
@@ -0,0 +1,102 @@
"""Git provisioning inside a running smolmachines bottle
(PRD 0023 chunk 4d).
Two concerns, both about git in the agent:
1. If --cwd was passed AND the host cwd has a .git, copy that
.git into /home/node/workspace/.git so the agent operates on
the user's repo.
2. If the bottle declares `git` entries (PRD 0008), write a
~/.gitconfig with insteadOf rules so every git operation
against a declared upstream transparently hits the per-bottle
git-gate. The gate mirrors the upstream in both directions,
so URL rewriting is symmetric.
Differs from `backend.docker.provision.git` in one address detail:
the TSI-allowlisted guest can only reach the bundle's pinned IP
(no DNS resolver in the /32 allowlist), so the insteadOf URLs
are `git://<bundle_ip>:<port>/<name>.git` rather than the
docker backend's `git://git-gate/<name>.git`. The render itself
is the shared `git_gate_render_gitconfig` on the platform-neutral
git_gate module."""
from __future__ import annotations
import os
import tempfile
from pathlib import Path
from ....git_gate import git_gate_render_gitconfig
from ....log import info
from ...docker.git_gate import GIT_GATE_PORT
from .. import smolvm as _smolvm
from ..bottle_plan import SmolmachinesBottlePlan
# `node` is the agent user from the repo Dockerfile. Override via
# CLAUDE_BOTTLE_GUEST_HOME mirrors the docker backend's
# CLAUDE_BOTTLE_CONTAINER_HOME knob — same purpose, different
# transport.
_DEFAULT_GUEST_HOME = "/home/node"
def _guest_home() -> str:
return os.environ.get("CLAUDE_BOTTLE_GUEST_HOME", _DEFAULT_GUEST_HOME)
def provision_git(plan: SmolmachinesBottlePlan, target: str) -> None:
"""Set up git inside the guest. Runs both subcases; each
no-ops when its condition isn't met."""
_provision_cwd_git(plan, target)
_provision_git_gate_config(plan, target)
def _provision_cwd_git(plan: SmolmachinesBottlePlan, target: str) -> None:
"""If --cwd was set and the host cwd has a .git directory, copy
it into <guest_home>/workspace/.git and fix ownership. No-op
otherwise."""
if not (plan.spec.copy_cwd and Path(plan.spec.user_cwd, ".git").is_dir()):
return
guest_workspace_git = f"{_guest_home()}/workspace/.git"
info(f"copying {plan.spec.user_cwd}/.git -> {target}:{guest_workspace_git}")
# mkdir -p the workspace dir so `machine cp` lands the .git
# directly there even on first-time bottles.
_smolvm.machine_exec(target, ["mkdir", "-p", f"{_guest_home()}/workspace"])
_smolvm.machine_cp(
f"{plan.spec.user_cwd}/.git", f"{target}:{guest_workspace_git}",
)
# `machine cp` lands files as root; the agent runs as node so
# the workspace tree must be chowned over.
_smolvm.machine_exec(
target, ["chown", "-R", "node:node", guest_workspace_git],
)
def _provision_git_gate_config(plan: SmolmachinesBottlePlan, target: str) -> None:
"""Write ~/.gitconfig in the guest with the git-gate insteadOf
rules. No-op when the bottle has no `git` entries."""
bottle = plan.spec.manifest.bottle_for(plan.spec.agent_name)
if not bottle.git:
return
# IP-literal form: the TSI allowlist passes <bundle_ip>/32 and
# nothing else, so the agent has to dial the gate by IP+port.
gate_host = f"{plan.bundle_ip}:{GIT_GATE_PORT}"
content = git_gate_render_gitconfig(bottle.git, gate_host)
guest_gitconfig = f"{_guest_home()}/.gitconfig"
# Stage the file under the plan's stage_dir so `machine cp`
# has a stable host path. The plan's stage_dir is cleaned up
# by start.py's session-end teardown.
with tempfile.NamedTemporaryFile(
"w", dir=str(plan.stage_dir), prefix="gitconfig.",
delete=False,
) as f:
f.write(content)
config_file = Path(f.name)
os.chmod(config_file, 0o600)
info(f"writing {guest_gitconfig} with {len(bottle.git)} insteadOf rule(s)")
_smolvm.machine_cp(str(config_file), f"{target}:{guest_gitconfig}")
_smolvm.machine_exec(target, ["chown", "node:node", guest_gitconfig])
_smolvm.machine_exec(target, ["chmod", "644", guest_gitconfig])
@@ -3,30 +3,40 @@
The prompt file is always copied (so the in-guest path always
exists) but `--append-system-prompt-file` only fires when the
agent actually has a prompt — the return value signals which
case, mirroring the docker backend's contract."""
case, mirroring the docker backend's contract.
`smolvm machine cp` lands files as root inside the VM; the claude
process runs as `node`, so we chown + chmod the prompt after the
copy. Same flow as the docker backend's provision_prompt."""
from __future__ import annotations
import os
from .. import smolvm as _smolvm
from ..bottle_plan import SmolmachinesBottlePlan
# In-guest path for the prompt. Smolvm's default agent image
# (alpine for now; the real claude-bottle image later) runs as
# root with $HOME=/root. The path is also surfaced as the return
# value so the caller can pass it via --append-system-prompt-file.
_IN_GUEST_PROMPT_PATH = "/root/.claude-bottle-prompt.txt"
# `node` is the agent user from the repo Dockerfile.
# CLAUDE_BOTTLE_GUEST_HOME mirrors the docker backend's
# CLAUDE_BOTTLE_CONTAINER_HOME knob.
_DEFAULT_GUEST_HOME = "/home/node"
def provision_prompt(plan: SmolmachinesBottlePlan, target: str) -> str | None:
"""Copy the prompt file into the running smolvm guest. Returns
the in-guest path if the agent has a non-empty prompt (drives
--append-system-prompt-file), else None. The file is copied
either way so the path always exists — mirrors the docker
backend's behavior."""
_smolvm.machine_cp(
str(plan.prompt_file),
f"{target}:{_IN_GUEST_PROMPT_PATH}",
)
"""Copy the prompt file into the running smolvm guest, fix
ownership/mode. Returns the in-guest path if the agent has a
non-empty prompt (drives --append-system-prompt-file), else
None. The file is copied either way so the path always
exists — mirrors the docker backend's behavior."""
guest_home = os.environ.get("CLAUDE_BOTTLE_GUEST_HOME", _DEFAULT_GUEST_HOME)
in_guest_prompt_path = f"{guest_home}/.claude-bottle-prompt.txt"
_smolvm.machine_cp(str(plan.prompt_file), f"{target}:{in_guest_prompt_path}")
# machine cp lands as root, source's 0o600 mode is preserved —
# node can't read its own prompt without these two.
_smolvm.machine_exec(target, ["chown", "node:node", in_guest_prompt_path])
_smolvm.machine_exec(target, ["chmod", "600", in_guest_prompt_path])
agent = plan.spec.manifest.agents[plan.spec.agent_name]
return _IN_GUEST_PROMPT_PATH if agent.prompt else None
return in_guest_prompt_path if agent.prompt else None
@@ -18,23 +18,26 @@ from ..bottle_plan import SmolmachinesBottlePlan
# In-guest path mirrors the docker backend's claude-skills
# convention (~/.claude/skills/<name>/). For smolmachines the
# agent is root by default; chunk 5+ may swap to a node user
# in the real claude-bottle image, at which point this path
# follows /home/node/ — the env knob below provides the override.
_DEFAULT_SKILLS_DIR = "/root/.claude/skills"
# convention (~/.claude/skills/<name>/) under the node user's
# home — same path as the real claude-bottle image's
# /home/node/.claude/skills (pre-created in the Dockerfile).
_DEFAULT_SKILLS_DIR = "/home/node/.claude/skills"
def provision_skills(plan: SmolmachinesBottlePlan, target: str) -> None:
"""Copy each of the agent's named skills from the host's
~/.claude/skills/<name>/ into the guest's equivalent path.
For each skill: `mkdir -p` the destination, then
`smolvm machine cp` the host source dir over. No-op when the
agent has no skills.
For each skill: `mkdir -p` the destination, `smolvm machine cp`
the host source dir over, then chown the result to node:node so
the agent can read it. No-op when the agent has no skills.
smolvm machine cp on a directory copies recursively (same
semantics as `cp -r`); unlike docker cp's trailing-slash
convention, smolvm doesn't need the `/.` suffix dance."""
convention, smolvm doesn't need the `/.` suffix dance.
machine cp lands files as root inside the VM, so we chown each
skill tree over to node:node after the copy — same pattern as
the docker backend's provision_prompt."""
agent = plan.spec.manifest.agents[plan.spec.agent_name]
if not agent.skills:
return
@@ -57,3 +60,4 @@ def provision_skills(plan: SmolmachinesBottlePlan, target: str) -> None:
# Wipe any prior copy so re-runs don't accumulate.
_smolvm.machine_exec(target, ["rm", "-rf", dst])
_smolvm.machine_cp(src, f"{target}:{dst}")
_smolvm.machine_exec(target, ["chown", "-R", "node:node", dst])
@@ -0,0 +1,60 @@
"""Supervise sidecar provisioning inside a running smolmachines
bottle (PRD 0023 chunk 4d; PRD 0013 supervise plane).
Registers the per-bottle supervise sidecar as an HTTP MCP server
in the agent's claude-code config so the agent discovers the
stuck-recovery MCP tools (pipelock-block, capability-block) at
startup.
Mirrors `backend.docker.provision.supervise` — same `claude mcp
add` call, just dispatched via `smolvm machine exec` instead of
`docker exec`, and against `<bundle_ip>:<port>` instead of the
short `supervise` alias (no DNS in the TSI-allowlisted guest)."""
from __future__ import annotations
from ....log import info, warn
from ....supervise import SUPERVISE_PORT
from .. import smolvm as _smolvm
from ..bottle_plan import SmolmachinesBottlePlan
_SUPERVISE_MCP_NAME = "supervise"
def supervise_mcp_url(bundle_ip: str) -> str:
return f"http://{bundle_ip}:{SUPERVISE_PORT}/"
def provision_supervise(plan: SmolmachinesBottlePlan, target: str) -> None:
"""Run `claude mcp add` inside the guest to register the
supervise sidecar in claude-code's user config. No-op when
bottle.supervise is False.
Failure is logged but not fatal: the bottle still works (you
just can't call supervise tools from the agent until the entry
is added manually). The operator sees the warning at launch."""
if plan.supervise_plan is None:
return
url = supervise_mcp_url(plan.bundle_ip)
info(f"registering supervise MCP server in agent claude config → {url}")
r = _smolvm.machine_exec(
target,
[
"claude", "mcp", "add",
"--scope", "user",
"--transport", "http",
_SUPERVISE_MCP_NAME,
url,
],
)
if r.returncode != 0:
warn(
f"`claude mcp add supervise` failed (exit {r.returncode}): "
f"{(r.stderr or r.stdout or '').strip()}. Inside the bottle, "
f"register manually with: "
f"claude mcp add --scope user --transport http supervise {url}"
)
__all__ = ["provision_supervise", "supervise_mcp_url"]