fix(smolmachines): build agent image in launch, not prepare
test / unit (pull_request) Successful in 27s
test / integration (pull_request) Successful in 39s

When starting a smolmachines agent from the dashboard the
docker-build output rendered on top of the curses preflight
modal — the build was kicked off before the operator had
confirmed launch. The docker backend's `prepare` is pure
resolution (no docker calls); smolmachines was inconsistent
because `prepare` called `_ensure_smolmachine` which ran
`docker build` → `docker save` → `crane push` → `smolvm pack
create`, several seconds of stderr noise rendered before the
y/N prompt.

Move the pipeline:

- `_ensure_smolmachine` (+ `_SMOLMACHINE_CACHE_DIR` + `_REPO_DIR`
  + the local-registry / smolvm imports) moves from
  `backend/smolmachines/prepare.py` to
  `backend/smolmachines/launch.py`. Called right before
  `_smolvm.machine_create` so the resulting `.smolmachine`
  sidecar path lands as a local in `launch`, not on the plan.

- `SmolmachinesBottlePlan.agent_from_path: Path` becomes
  `agent_image_ref: str`. `prepare` stashes only the docker tag
  (`$CLAUDE_BOTTLE_IMAGE` || `claude-bottle:latest`); `launch`
  resolves it into the artifact at bringup.

This puts smolmachines on the same prepare-vs-launch boundary
the docker backend uses: the preflight summary in the dashboard
prints, the operator confirms, then `launch` runs — and its
stderr is routed via `_route_op_to_right_pane` (in tmux) or via
`curses.endwin` (foreground handoff) so the build output lands
cleanly.

Tests:
- `tests/unit/test_smolmachines_prepare_image.py` →
  `tests/unit/test_smolmachines_launch_image.py`, updated to
  import `_ensure_smolmachine` from `launch` rather than
  `prepare`.
- `test_smolmachines_provision.py`: plan fixture switches
  `agent_from_path` → `agent_image_ref`.

593 unit tests pass.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-27 19:37:15 -04:00
parent 1e82aed54b
commit 7afedaabf3
5 changed files with 117 additions and 104 deletions
@@ -48,7 +48,13 @@ class SmolmachinesBottlePlan(BottlePlan):
# (push to a registry first, or smolvm grows a docker-daemon # (push to a registry first, or smolvm grows a docker-daemon
# transport). # transport).
machine_name: str machine_name: str
agent_from_path: Path # Agent image ref (docker tag). `launch` runs the
# build → save → registry push → smolvm pack pipeline against
# this and feeds the resulting `.smolmachine` artifact to
# `machine_create --from`. The pipeline runs at launch time
# (not prepare time) so the docker build output doesn't garble
# the dashboard's preflight modal.
agent_image_ref: str
# In-guest env vars (HTTPS_PROXY etc) — IP-literal URLs since # In-guest env vars (HTTPS_PROXY etc) — IP-literal URLs since
# the guest has no DNS resolver inside the TSI allowlist. # the guest has no DNS resolver inside the TSI allowlist.
# Passed to `smolvm machine create` as `-e K=V` flags. # Passed to `smolvm machine create` as `-e K=V` flags.
+78 -6
View File
@@ -23,6 +23,7 @@ import dataclasses
import os import os
import time import time
from contextlib import ExitStack, contextmanager from contextlib import ExitStack, contextmanager
from pathlib import Path
from typing import Callable, Generator from typing import Callable, Generator
from ...egress import EGRESS_ROUTES_IN_CONTAINER, egress_resolve_token_values from ...egress import EGRESS_ROUTES_IN_CONTAINER, egress_resolve_token_values
@@ -32,6 +33,7 @@ from ...pipelock import (
) )
from ...supervise import QUEUE_DIR_IN_CONTAINER, SUPERVISE_PORT from ...supervise import QUEUE_DIR_IN_CONTAINER, SUPERVISE_PORT
from ...util import expand_tilde from ...util import expand_tilde
from ..docker import util as docker_mod
from ..docker.egress import ( from ..docker.egress import (
EGRESS_CA_IN_CONTAINER, EGRESS_CA_IN_CONTAINER,
EGRESS_PIPELOCK_CA_IN_CONTAINER, EGRESS_PIPELOCK_CA_IN_CONTAINER,
@@ -55,6 +57,18 @@ from . import sidecar_bundle as _bundle
from . import smolvm as _smolvm from . import smolvm as _smolvm
from .bottle import SmolmachinesBottle from .bottle import SmolmachinesBottle
from .bottle_plan import SmolmachinesBottlePlan from .bottle_plan import SmolmachinesBottlePlan
from .local_registry import crane_push_tarball, ephemeral_registry
# Repo root, used as the `docker build` context for the agent image.
_REPO_DIR = str(Path(__file__).resolve().parent.parent.parent.parent)
# Per-host cache for `smolvm pack create` outputs. Keyed by the
# docker image ID so a Dockerfile change automatically invalidates
# the cache. `pack create` is idempotent on the smolvm side but
# takes several seconds even on a no-op rebuild.
_SMOLMACHINE_CACHE_DIR = Path.home() / ".cache" / "claude-bottle" / "smolmachines"
# Container-internal listening ports for each bundle daemon. The # Container-internal listening ports for each bundle daemon. The
@@ -199,17 +213,25 @@ def launch(
agent_supervise_url=agent_supervise_url, agent_supervise_url=agent_supervise_url,
) )
# 5. smolvm VM. --from carries the pre-packed .smolmachine # 5. Build the agent image and pack it into a
# artifact (built by prepare); --allow-cidr + -e carry the # `.smolmachine` artifact (or hit the per-Dockerfile-digest
# per-bottle TSI allowlist + env. The allowlist is the # cache). Runs here, not in prepare, so the docker-build
# per-bottle loopback alias — narrowing it to one /32 keeps # output doesn't garble the dashboard's preflight modal:
# the agent from reaching other host loopback services or # both the curses-endwin path and the tmux pane-routing
# path redirect stderr around `launch` already.
agent_from_path = _ensure_smolmachine(plan.agent_image_ref)
# smolvm VM. --from carries the pre-packed .smolmachine
# artifact; --allow-cidr + -e carry the per-bottle TSI
# allowlist + env. The allowlist is the per-bottle
# loopback alias — narrowing it to one /32 keeps the
# agent from reaching other host loopback services or
# other bottles' published ports. Smolfile isn't usable # other bottles' published ports. Smolfile isn't usable
# here — smolvm 0.8.0 makes `--from` and `--smolfile` # here — smolvm 0.8.0 makes `--from` and `--smolfile`
# mutually exclusive. # mutually exclusive.
_smolvm.machine_create( _smolvm.machine_create(
plan.machine_name, plan.machine_name,
from_path=plan.agent_from_path, from_path=agent_from_path,
allow_cidrs=[f"{loopback_ip}/32"], allow_cidrs=[f"{loopback_ip}/32"],
env=plan.guest_env, env=plan.guest_env,
) )
@@ -389,3 +411,53 @@ def _resolve_token_env(
if not ep.routes: if not ep.routes:
return {} return {}
return egress_resolve_token_values(ep.token_env_map, dict(host_env)) return egress_resolve_token_values(ep.token_env_map, dict(host_env))
def _ensure_smolmachine(image_ref: str) -> Path:
"""Build the agent docker image and convert it into a
`.smolmachine` artifact, caching the result under
`~/.cache/claude-bottle/smolmachines/` keyed by the docker image
ID (so a Dockerfile change automatically invalidates the cache).
Returns the `.smolmachine.smolmachine` sidecar path — that's
the file `machine create --from` consumes (pack create produces
a launcher binary at `.smolmachine` plus the sidecar alongside
it; the sidecar is the actual artifact).
Conversion path: `docker build` (the existing layer cache
makes no-change rebuilds cheap) → `docker save` to a tarball
→ spin up an ephemeral registry on a private docker network →
`crane push --insecure` from a one-shot container on the same
network → `smolvm pack create --image localhost:<host port>/...`
→ tear down the registry + network. The crane push detour
sidesteps the Docker-Desktop daemon's HTTPS preference for
non-loopback registries — see the `local_registry` module
docstring for the gory details.
Each pack-create costs several seconds even on a hot cache,
so we skip the whole pipeline when the cached sidecar is
already on disk for this image ID."""
_SMOLMACHINE_CACHE_DIR.mkdir(parents=True, exist_ok=True)
docker_mod.build_image(image_ref, _REPO_DIR)
# `sha256:abcd...` -> `abcd...` first 16 chars: short enough to
# keep filenames manageable, long enough to make collisions
# astronomically unlikely.
digest = docker_mod.image_id(image_ref).split(":", 1)[-1][:16]
binary = _SMOLMACHINE_CACHE_DIR / f"{digest}.smolmachine"
sidecar = _SMOLMACHINE_CACHE_DIR / f"{digest}.smolmachine.smolmachine"
if sidecar.is_file():
return sidecar
tarball = _SMOLMACHINE_CACHE_DIR / f"{digest}.image.tar"
docker_mod.save(image_ref, str(tarball))
try:
with ephemeral_registry() as handle:
push_ref = f"{handle.push_endpoint}/claude-bottle:{digest}"
pack_ref = f"{handle.pull_endpoint}/claude-bottle:{digest}"
crane_push_tarball(handle, str(tarball), push_ref)
_smolvm.pack_create(pack_ref, binary)
finally:
# Tarball is ~500MB-1GB for the agent image; reclaim once
# the smolmachine artifact exists. The artifact itself is
# the long-lived cache entry.
tarball.unlink(missing_ok=True)
return sidecar
+9 -79
View File
@@ -1,12 +1,10 @@
"""smolmachines `_resolve_plan` (PRD 0023 chunks 2d + 4c). """smolmachines `_resolve_plan` (PRD 0023 chunks 2d + 4c).
Resolves the per-bottle docker subnet + bundle IP, builds the Resolves the per-bottle docker subnet + bundle IP and assembles
agent's docker image from the repo Dockerfile, converts it into a the guest env. The agent's docker image build → smolmachine
`.smolmachine` artifact via an ephemeral local registry (smolvm's pack pipeline runs in `launch.launch`, not here, so the
crane backend only reads registry refs), and assembles the guest dashboard's preflight modal isn't garbled by docker-build output
env. The `.smolmachine` is cached under before the operator has confirmed.
`~/.cache/claude-bottle/smolmachines/` keyed by the docker image
ID so Dockerfile changes invalidate the cache automatically.
No VM bringup — that's `launch.launch`'s job.""" No VM bringup — that's `launch.launch`'s job."""
@@ -17,7 +15,6 @@ from datetime import datetime, timezone
from pathlib import Path from pathlib import Path
from ...backend import BottleSpec from ...backend import BottleSpec
from ...backend.docker import util as docker_mod
from ...backend.docker.bottle_state import ( from ...backend.docker.bottle_state import (
BottleMetadata, BottleMetadata,
agent_state_dir, agent_state_dir,
@@ -32,23 +29,10 @@ from ...egress import Egress
from ...git_gate import GitGate from ...git_gate import GitGate
from ...pipelock import PipelockProxy from ...pipelock import PipelockProxy
from ...supervise import Supervise from ...supervise import Supervise
from . import smolvm as _smolvm
from .bottle_plan import SmolmachinesBottlePlan from .bottle_plan import SmolmachinesBottlePlan
from .local_registry import crane_push_tarball, ephemeral_registry
from .util import smolmachines_bundle_subnet, smolmachines_preflight from .util import smolmachines_bundle_subnet, smolmachines_preflight
# Repo root, used as the `docker build` context for the agent image.
_REPO_DIR = str(Path(__file__).resolve().parent.parent.parent.parent)
# Per-host cache for `smolvm pack create` outputs. Keyed by the
# image ref so re-prepares for the same image hit the cache
# (pack create is idempotent on the smolvm side but takes several
# seconds even when no layer is fetched).
_SMOLMACHINE_CACHE_DIR = Path.home() / ".cache" / "claude-bottle" / "smolmachines"
# Gateway ports the bundle exposes inside its container — pipelock # Gateway ports the bundle exposes inside its container — pipelock
# HTTPS proxy, git-gate's git-daemon, supervise's MCP. The agent # HTTPS proxy, git-gate's git-daemon, supervise's MCP. The agent
# inside the smolvm guest dials these on the bundle's pinned IP. # inside the smolvm guest dials these on the bundle's pinned IP.
@@ -158,16 +142,12 @@ def resolve_plan(
prompt_file.chmod(0o600) prompt_file.chmod(0o600)
machine_name = f"claude-bottle-{slug}" machine_name = f"claude-bottle-{slug}"
# Build the agent image from the repo Dockerfile (shared with # Stash the agent image ref — `launch.launch` runs the
# the docker backend, layer-cached) and convert it into a # build → pack pipeline at bringup. Honors CLAUDE_BOTTLE_IMAGE
# `.smolmachine` artifact via an ephemeral local registry. The # to match the docker backend's `resolve_plan` default.
# CLAUDE_BOTTLE_IMAGE env var match the docker backend's
# resolve_plan default so both backends use the same image when
# one is built.
agent_image_ref = os.environ.get( agent_image_ref = os.environ.get(
"CLAUDE_BOTTLE_IMAGE", "claude-bottle:latest" "CLAUDE_BOTTLE_IMAGE", "claude-bottle:latest"
) )
agent_from_path = _ensure_smolmachine(agent_image_ref)
return SmolmachinesBottlePlan( return SmolmachinesBottlePlan(
spec=spec, spec=spec,
@@ -177,7 +157,7 @@ def resolve_plan(
bundle_gateway=gateway, bundle_gateway=gateway,
bundle_ip=bundle_ip, bundle_ip=bundle_ip,
machine_name=machine_name, machine_name=machine_name,
agent_from_path=agent_from_path, agent_image_ref=agent_image_ref,
guest_env=guest_env, guest_env=guest_env,
prompt_file=prompt_file, prompt_file=prompt_file,
proxy_plan=proxy_plan, proxy_plan=proxy_plan,
@@ -185,53 +165,3 @@ def resolve_plan(
egress_plan=egress_plan, egress_plan=egress_plan,
supervise_plan=supervise_plan, supervise_plan=supervise_plan,
) )
def _ensure_smolmachine(image_ref: str) -> Path:
"""Build the agent docker image and convert it into a
`.smolmachine` artifact, caching the result under
`~/.cache/claude-bottle/smolmachines/` keyed by the docker image
ID (so a Dockerfile change automatically invalidates the cache).
Returns the `.smolmachine.smolmachine` sidecar path — that's
the file `machine create --from` consumes (pack create produces
a launcher binary at `.smolmachine` plus the sidecar alongside
it; the sidecar is the actual artifact).
Conversion path: `docker build` (the existing layer cache
makes no-change rebuilds cheap) → `docker save` to a tarball
→ spin up an ephemeral registry on a private docker network →
`crane push --insecure` from a one-shot container on the same
network → `smolvm pack create --image localhost:<host port>/...`
→ tear down the registry + network. The crane push detour
sidesteps the Docker-Desktop daemon's HTTPS preference for
non-loopback registries — see the `local_registry` module
docstring for the gory details.
Each pack-create costs several seconds even on a hot cache,
so we skip the whole pipeline when the cached sidecar is
already on disk for this image ID."""
_SMOLMACHINE_CACHE_DIR.mkdir(parents=True, exist_ok=True)
docker_mod.build_image(image_ref, _REPO_DIR)
# `sha256:abcd...` -> `abcd...` first 16 chars: short enough to
# keep filenames manageable, long enough to make collisions
# astronomically unlikely.
digest = docker_mod.image_id(image_ref).split(":", 1)[-1][:16]
binary = _SMOLMACHINE_CACHE_DIR / f"{digest}.smolmachine"
sidecar = _SMOLMACHINE_CACHE_DIR / f"{digest}.smolmachine.smolmachine"
if sidecar.is_file():
return sidecar
tarball = _SMOLMACHINE_CACHE_DIR / f"{digest}.image.tar"
docker_mod.save(image_ref, str(tarball))
try:
with ephemeral_registry() as handle:
push_ref = f"{handle.push_endpoint}/claude-bottle:{digest}"
pack_ref = f"{handle.pull_endpoint}/claude-bottle:{digest}"
crane_push_tarball(handle, str(tarball), push_ref)
_smolvm.pack_create(pack_ref, binary)
finally:
# Tarball is ~500MB-1GB for the agent image; reclaim once
# the smolmachine artifact exists. The artifact itself is
# the long-lived cache entry.
tarball.unlink(missing_ok=True)
return sidecar
@@ -4,7 +4,12 @@
Asserts that the cache-hit path returns without touching the Asserts that the cache-hit path returns without touching the
registry / pack pipeline, and that the cache-miss path runs registry / pack pipeline, and that the cache-miss path runs
build tag push pack in order against a registry port the build tag push pack in order against a registry port the
helper yields.""" helper yields.
The pipeline lives in `launch.py` (moved from `prepare.py` so the
docker build doesn't run before the dashboard's preflight modal;
the curses-endwin / tmux pane-routing handoff happens around
`launch`)."""
from __future__ import annotations from __future__ import annotations
@@ -13,14 +18,14 @@ import unittest
from pathlib import Path from pathlib import Path
from unittest.mock import patch from unittest.mock import patch
from claude_bottle.backend.smolmachines import prepare as _prepare from claude_bottle.backend.smolmachines import launch as _launch_mod
class TestEnsureSmolmachine(unittest.TestCase): class TestEnsureSmolmachine(unittest.TestCase):
def setUp(self): def setUp(self):
self._tmp = tempfile.TemporaryDirectory(prefix="cb-cache.") self._tmp = tempfile.TemporaryDirectory(prefix="cb-cache.")
self._cache_patch = patch.object( self._cache_patch = patch.object(
_prepare, "_SMOLMACHINE_CACHE_DIR", Path(self._tmp.name), _launch_mod, "_SMOLMACHINE_CACHE_DIR", Path(self._tmp.name),
) )
self._cache_patch.start() self._cache_patch.start()
@@ -35,20 +40,20 @@ class TestEnsureSmolmachine(unittest.TestCase):
sidecar.write_text("") sidecar.write_text("")
with patch.object( with patch.object(
_prepare.docker_mod, "build_image", _launch_mod.docker_mod, "build_image",
) as build, patch.object( ) as build, patch.object(
_prepare.docker_mod, "image_id", _launch_mod.docker_mod, "image_id",
return_value=f"sha256:{digest}fffffffffffffffff", return_value=f"sha256:{digest}fffffffffffffffff",
), patch.object( ), patch.object(
_prepare.docker_mod, "save", _launch_mod.docker_mod, "save",
) as save, patch.object( ) as save, patch.object(
_prepare, "ephemeral_registry", _launch_mod, "ephemeral_registry",
) as registry, patch.object( ) as registry, patch.object(
_prepare, "crane_push_tarball", _launch_mod, "crane_push_tarball",
) as push, patch.object( ) as push, patch.object(
_prepare._smolvm, "pack_create", _launch_mod._smolvm, "pack_create",
) as pack: ) as pack:
result = _prepare._ensure_smolmachine("claude-bottle:latest") result = _launch_mod._ensure_smolmachine("claude-bottle:latest")
self.assertEqual(sidecar, result) self.assertEqual(sidecar, result)
# build still runs (Dockerfile edits land without manual rmi). # build still runs (Dockerfile edits land without manual rmi).
@@ -88,25 +93,25 @@ class TestEnsureSmolmachine(unittest.TestCase):
return _f return _f
with patch.object( with patch.object(
_prepare.docker_mod, "build_image", _launch_mod.docker_mod, "build_image",
side_effect=record("build"), side_effect=record("build"),
), patch.object( ), patch.object(
_prepare.docker_mod, "image_id", _launch_mod.docker_mod, "image_id",
return_value=f"sha256:{digest}fffffffffffffffff", return_value=f"sha256:{digest}fffffffffffffffff",
), patch.object( ), patch.object(
_prepare.docker_mod, "save", _launch_mod.docker_mod, "save",
side_effect=record("save"), side_effect=record("save"),
) as save, patch.object( ) as save, patch.object(
_prepare, "ephemeral_registry", _launch_mod, "ephemeral_registry",
return_value=_Reg(), return_value=_Reg(),
), patch.object( ), patch.object(
_prepare, "crane_push_tarball", _launch_mod, "crane_push_tarball",
side_effect=record("push"), side_effect=record("push"),
) as push, patch.object( ) as push, patch.object(
_prepare._smolvm, "pack_create", _launch_mod._smolvm, "pack_create",
side_effect=record("pack"), side_effect=record("pack"),
) as pack: ) as pack:
_prepare._ensure_smolmachine("claude-bottle:latest") _launch_mod._ensure_smolmachine("claude-bottle:latest")
# Build → save → push → pack in that order. No `docker # Build → save → push → pack in that order. No `docker
# push` (the daemon's HTTPS-by-default path is what we're # push` (the daemon's HTTPS-by-default path is what we're
+1 -1
View File
@@ -90,7 +90,7 @@ def _plan(
bundle_gateway="192.168.50.1", bundle_gateway="192.168.50.1",
bundle_ip=bundle_ip, bundle_ip=bundle_ip,
machine_name="claude-bottle-demo-abc12", machine_name="claude-bottle-demo-abc12",
agent_from_path=Path("/tmp/agent.smolmachine"), agent_image_ref="claude-bottle:latest",
guest_env={}, guest_env={},
prompt_file=Path("/tmp/state/demo-abc12/agent/prompt.txt"), prompt_file=Path("/tmp/state/demo-abc12/agent/prompt.txt"),
proxy_plan=PipelockProxyPlan( proxy_plan=PipelockProxyPlan(