fix(smolmachines): build agent image in launch, not prepare
test / unit (push) Successful in 26s
test / integration (push) Successful in 43s

When starting a smolmachines agent from the dashboard the
docker-build output rendered on top of the curses preflight
modal — the build was kicked off before the operator had
confirmed launch. The docker backend's `prepare` is pure
resolution (no docker calls); smolmachines was inconsistent
because `prepare` called `_ensure_smolmachine` which ran
`docker build` → `docker save` → `crane push` → `smolvm pack
create`, several seconds of stderr noise rendered before the
y/N prompt.

Move the pipeline:

- `_ensure_smolmachine` (+ `_SMOLMACHINE_CACHE_DIR` + `_REPO_DIR`
  + the local-registry / smolvm imports) moves from
  `backend/smolmachines/prepare.py` to
  `backend/smolmachines/launch.py`. Called right before
  `_smolvm.machine_create` so the resulting `.smolmachine`
  sidecar path lands as a local in `launch`, not on the plan.

- `SmolmachinesBottlePlan.agent_from_path: Path` becomes
  `agent_image_ref: str`. `prepare` stashes only the docker tag
  (`$CLAUDE_BOTTLE_IMAGE` || `claude-bottle:latest`); `launch`
  resolves it into the artifact at bringup.

This puts smolmachines on the same prepare-vs-launch boundary
the docker backend uses: the preflight summary in the dashboard
prints, the operator confirms, then `launch` runs — and its
stderr is routed via `_route_op_to_right_pane` (in tmux) or via
`curses.endwin` (foreground handoff) so the build output lands
cleanly.

Tests:
- `tests/unit/test_smolmachines_prepare_image.py` →
  `tests/unit/test_smolmachines_launch_image.py`, updated to
  import `_ensure_smolmachine` from `launch` rather than
  `prepare`.
- `test_smolmachines_provision.py`: plan fixture switches
  `agent_from_path` → `agent_image_ref`.

593 unit tests pass.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit was merged in pull request #80.
This commit is contained in:
2026-05-27 19:37:15 -04:00
committed by didericis
parent 5d740a6948
commit 5e0130b56f
5 changed files with 117 additions and 104 deletions
@@ -48,7 +48,13 @@ class SmolmachinesBottlePlan(BottlePlan):
# (push to a registry first, or smolvm grows a docker-daemon
# transport).
machine_name: str
agent_from_path: Path
# Agent image ref (docker tag). `launch` runs the
# build → save → registry push → smolvm pack pipeline against
# this and feeds the resulting `.smolmachine` artifact to
# `machine_create --from`. The pipeline runs at launch time
# (not prepare time) so the docker build output doesn't garble
# the dashboard's preflight modal.
agent_image_ref: str
# In-guest env vars (HTTPS_PROXY etc) — IP-literal URLs since
# the guest has no DNS resolver inside the TSI allowlist.
# Passed to `smolvm machine create` as `-e K=V` flags.
+78 -6
View File
@@ -23,6 +23,7 @@ import dataclasses
import os
import time
from contextlib import ExitStack, contextmanager
from pathlib import Path
from typing import Callable, Generator
from ...egress import EGRESS_ROUTES_IN_CONTAINER, egress_resolve_token_values
@@ -32,6 +33,7 @@ from ...pipelock import (
)
from ...supervise import QUEUE_DIR_IN_CONTAINER, SUPERVISE_PORT
from ...util import expand_tilde
from ..docker import util as docker_mod
from ..docker.egress import (
EGRESS_CA_IN_CONTAINER,
EGRESS_PIPELOCK_CA_IN_CONTAINER,
@@ -55,6 +57,18 @@ from . import sidecar_bundle as _bundle
from . import smolvm as _smolvm
from .bottle import SmolmachinesBottle
from .bottle_plan import SmolmachinesBottlePlan
from .local_registry import crane_push_tarball, ephemeral_registry
# Repo root, used as the `docker build` context for the agent image.
_REPO_DIR = str(Path(__file__).resolve().parent.parent.parent.parent)
# Per-host cache for `smolvm pack create` outputs. Keyed by the
# docker image ID so a Dockerfile change automatically invalidates
# the cache. `pack create` is idempotent on the smolvm side but
# takes several seconds even on a no-op rebuild.
_SMOLMACHINE_CACHE_DIR = Path.home() / ".cache" / "claude-bottle" / "smolmachines"
# Container-internal listening ports for each bundle daemon. The
@@ -199,17 +213,25 @@ def launch(
agent_supervise_url=agent_supervise_url,
)
# 5. smolvm VM. --from carries the pre-packed .smolmachine
# artifact (built by prepare); --allow-cidr + -e carry the
# per-bottle TSI allowlist + env. The allowlist is the
# per-bottle loopback alias — narrowing it to one /32 keeps
# the agent from reaching other host loopback services or
# 5. Build the agent image and pack it into a
# `.smolmachine` artifact (or hit the per-Dockerfile-digest
# cache). Runs here, not in prepare, so the docker-build
# output doesn't garble the dashboard's preflight modal:
# both the curses-endwin path and the tmux pane-routing
# path redirect stderr around `launch` already.
agent_from_path = _ensure_smolmachine(plan.agent_image_ref)
# smolvm VM. --from carries the pre-packed .smolmachine
# artifact; --allow-cidr + -e carry the per-bottle TSI
# allowlist + env. The allowlist is the per-bottle
# loopback alias — narrowing it to one /32 keeps the
# agent from reaching other host loopback services or
# other bottles' published ports. Smolfile isn't usable
# here — smolvm 0.8.0 makes `--from` and `--smolfile`
# mutually exclusive.
_smolvm.machine_create(
plan.machine_name,
from_path=plan.agent_from_path,
from_path=agent_from_path,
allow_cidrs=[f"{loopback_ip}/32"],
env=plan.guest_env,
)
@@ -389,3 +411,53 @@ def _resolve_token_env(
if not ep.routes:
return {}
return egress_resolve_token_values(ep.token_env_map, dict(host_env))
def _ensure_smolmachine(image_ref: str) -> Path:
"""Build the agent docker image and convert it into a
`.smolmachine` artifact, caching the result under
`~/.cache/claude-bottle/smolmachines/` keyed by the docker image
ID (so a Dockerfile change automatically invalidates the cache).
Returns the `.smolmachine.smolmachine` sidecar path — that's
the file `machine create --from` consumes (pack create produces
a launcher binary at `.smolmachine` plus the sidecar alongside
it; the sidecar is the actual artifact).
Conversion path: `docker build` (the existing layer cache
makes no-change rebuilds cheap) → `docker save` to a tarball
→ spin up an ephemeral registry on a private docker network →
`crane push --insecure` from a one-shot container on the same
network → `smolvm pack create --image localhost:<host port>/...`
→ tear down the registry + network. The crane push detour
sidesteps the Docker-Desktop daemon's HTTPS preference for
non-loopback registries — see the `local_registry` module
docstring for the gory details.
Each pack-create costs several seconds even on a hot cache,
so we skip the whole pipeline when the cached sidecar is
already on disk for this image ID."""
_SMOLMACHINE_CACHE_DIR.mkdir(parents=True, exist_ok=True)
docker_mod.build_image(image_ref, _REPO_DIR)
# `sha256:abcd...` -> `abcd...` first 16 chars: short enough to
# keep filenames manageable, long enough to make collisions
# astronomically unlikely.
digest = docker_mod.image_id(image_ref).split(":", 1)[-1][:16]
binary = _SMOLMACHINE_CACHE_DIR / f"{digest}.smolmachine"
sidecar = _SMOLMACHINE_CACHE_DIR / f"{digest}.smolmachine.smolmachine"
if sidecar.is_file():
return sidecar
tarball = _SMOLMACHINE_CACHE_DIR / f"{digest}.image.tar"
docker_mod.save(image_ref, str(tarball))
try:
with ephemeral_registry() as handle:
push_ref = f"{handle.push_endpoint}/claude-bottle:{digest}"
pack_ref = f"{handle.pull_endpoint}/claude-bottle:{digest}"
crane_push_tarball(handle, str(tarball), push_ref)
_smolvm.pack_create(pack_ref, binary)
finally:
# Tarball is ~500MB-1GB for the agent image; reclaim once
# the smolmachine artifact exists. The artifact itself is
# the long-lived cache entry.
tarball.unlink(missing_ok=True)
return sidecar
+9 -79
View File
@@ -1,12 +1,10 @@
"""smolmachines `_resolve_plan` (PRD 0023 chunks 2d + 4c).
Resolves the per-bottle docker subnet + bundle IP, builds the
agent's docker image from the repo Dockerfile, converts it into a
`.smolmachine` artifact via an ephemeral local registry (smolvm's
crane backend only reads registry refs), and assembles the guest
env. The `.smolmachine` is cached under
`~/.cache/claude-bottle/smolmachines/` keyed by the docker image
ID so Dockerfile changes invalidate the cache automatically.
Resolves the per-bottle docker subnet + bundle IP and assembles
the guest env. The agent's docker image build → smolmachine
pack pipeline runs in `launch.launch`, not here, so the
dashboard's preflight modal isn't garbled by docker-build output
before the operator has confirmed.
No VM bringup — that's `launch.launch`'s job."""
@@ -17,7 +15,6 @@ from datetime import datetime, timezone
from pathlib import Path
from ...backend import BottleSpec
from ...backend.docker import util as docker_mod
from ...backend.docker.bottle_state import (
BottleMetadata,
agent_state_dir,
@@ -32,23 +29,10 @@ from ...egress import Egress
from ...git_gate import GitGate
from ...pipelock import PipelockProxy
from ...supervise import Supervise
from . import smolvm as _smolvm
from .bottle_plan import SmolmachinesBottlePlan
from .local_registry import crane_push_tarball, ephemeral_registry
from .util import smolmachines_bundle_subnet, smolmachines_preflight
# Repo root, used as the `docker build` context for the agent image.
_REPO_DIR = str(Path(__file__).resolve().parent.parent.parent.parent)
# Per-host cache for `smolvm pack create` outputs. Keyed by the
# image ref so re-prepares for the same image hit the cache
# (pack create is idempotent on the smolvm side but takes several
# seconds even when no layer is fetched).
_SMOLMACHINE_CACHE_DIR = Path.home() / ".cache" / "claude-bottle" / "smolmachines"
# Gateway ports the bundle exposes inside its container — pipelock
# HTTPS proxy, git-gate's git-daemon, supervise's MCP. The agent
# inside the smolvm guest dials these on the bundle's pinned IP.
@@ -158,16 +142,12 @@ def resolve_plan(
prompt_file.chmod(0o600)
machine_name = f"claude-bottle-{slug}"
# Build the agent image from the repo Dockerfile (shared with
# the docker backend, layer-cached) and convert it into a
# `.smolmachine` artifact via an ephemeral local registry. The
# CLAUDE_BOTTLE_IMAGE env var match the docker backend's
# resolve_plan default so both backends use the same image when
# one is built.
# Stash the agent image ref — `launch.launch` runs the
# build → pack pipeline at bringup. Honors CLAUDE_BOTTLE_IMAGE
# to match the docker backend's `resolve_plan` default.
agent_image_ref = os.environ.get(
"CLAUDE_BOTTLE_IMAGE", "claude-bottle:latest"
)
agent_from_path = _ensure_smolmachine(agent_image_ref)
return SmolmachinesBottlePlan(
spec=spec,
@@ -177,7 +157,7 @@ def resolve_plan(
bundle_gateway=gateway,
bundle_ip=bundle_ip,
machine_name=machine_name,
agent_from_path=agent_from_path,
agent_image_ref=agent_image_ref,
guest_env=guest_env,
prompt_file=prompt_file,
proxy_plan=proxy_plan,
@@ -185,53 +165,3 @@ def resolve_plan(
egress_plan=egress_plan,
supervise_plan=supervise_plan,
)
def _ensure_smolmachine(image_ref: str) -> Path:
"""Build the agent docker image and convert it into a
`.smolmachine` artifact, caching the result under
`~/.cache/claude-bottle/smolmachines/` keyed by the docker image
ID (so a Dockerfile change automatically invalidates the cache).
Returns the `.smolmachine.smolmachine` sidecar path — that's
the file `machine create --from` consumes (pack create produces
a launcher binary at `.smolmachine` plus the sidecar alongside
it; the sidecar is the actual artifact).
Conversion path: `docker build` (the existing layer cache
makes no-change rebuilds cheap) → `docker save` to a tarball
→ spin up an ephemeral registry on a private docker network →
`crane push --insecure` from a one-shot container on the same
network → `smolvm pack create --image localhost:<host port>/...`
→ tear down the registry + network. The crane push detour
sidesteps the Docker-Desktop daemon's HTTPS preference for
non-loopback registries — see the `local_registry` module
docstring for the gory details.
Each pack-create costs several seconds even on a hot cache,
so we skip the whole pipeline when the cached sidecar is
already on disk for this image ID."""
_SMOLMACHINE_CACHE_DIR.mkdir(parents=True, exist_ok=True)
docker_mod.build_image(image_ref, _REPO_DIR)
# `sha256:abcd...` -> `abcd...` first 16 chars: short enough to
# keep filenames manageable, long enough to make collisions
# astronomically unlikely.
digest = docker_mod.image_id(image_ref).split(":", 1)[-1][:16]
binary = _SMOLMACHINE_CACHE_DIR / f"{digest}.smolmachine"
sidecar = _SMOLMACHINE_CACHE_DIR / f"{digest}.smolmachine.smolmachine"
if sidecar.is_file():
return sidecar
tarball = _SMOLMACHINE_CACHE_DIR / f"{digest}.image.tar"
docker_mod.save(image_ref, str(tarball))
try:
with ephemeral_registry() as handle:
push_ref = f"{handle.push_endpoint}/claude-bottle:{digest}"
pack_ref = f"{handle.pull_endpoint}/claude-bottle:{digest}"
crane_push_tarball(handle, str(tarball), push_ref)
_smolvm.pack_create(pack_ref, binary)
finally:
# Tarball is ~500MB-1GB for the agent image; reclaim once
# the smolmachine artifact exists. The artifact itself is
# the long-lived cache entry.
tarball.unlink(missing_ok=True)
return sidecar
@@ -4,7 +4,12 @@
Asserts that the cache-hit path returns without touching the
registry / pack pipeline, and that the cache-miss path runs
build tag push pack in order against a registry port the
helper yields."""
helper yields.
The pipeline lives in `launch.py` (moved from `prepare.py` so the
docker build doesn't run before the dashboard's preflight modal;
the curses-endwin / tmux pane-routing handoff happens around
`launch`)."""
from __future__ import annotations
@@ -13,14 +18,14 @@ import unittest
from pathlib import Path
from unittest.mock import patch
from claude_bottle.backend.smolmachines import prepare as _prepare
from claude_bottle.backend.smolmachines import launch as _launch_mod
class TestEnsureSmolmachine(unittest.TestCase):
def setUp(self):
self._tmp = tempfile.TemporaryDirectory(prefix="cb-cache.")
self._cache_patch = patch.object(
_prepare, "_SMOLMACHINE_CACHE_DIR", Path(self._tmp.name),
_launch_mod, "_SMOLMACHINE_CACHE_DIR", Path(self._tmp.name),
)
self._cache_patch.start()
@@ -35,20 +40,20 @@ class TestEnsureSmolmachine(unittest.TestCase):
sidecar.write_text("")
with patch.object(
_prepare.docker_mod, "build_image",
_launch_mod.docker_mod, "build_image",
) as build, patch.object(
_prepare.docker_mod, "image_id",
_launch_mod.docker_mod, "image_id",
return_value=f"sha256:{digest}fffffffffffffffff",
), patch.object(
_prepare.docker_mod, "save",
_launch_mod.docker_mod, "save",
) as save, patch.object(
_prepare, "ephemeral_registry",
_launch_mod, "ephemeral_registry",
) as registry, patch.object(
_prepare, "crane_push_tarball",
_launch_mod, "crane_push_tarball",
) as push, patch.object(
_prepare._smolvm, "pack_create",
_launch_mod._smolvm, "pack_create",
) as pack:
result = _prepare._ensure_smolmachine("claude-bottle:latest")
result = _launch_mod._ensure_smolmachine("claude-bottle:latest")
self.assertEqual(sidecar, result)
# build still runs (Dockerfile edits land without manual rmi).
@@ -88,25 +93,25 @@ class TestEnsureSmolmachine(unittest.TestCase):
return _f
with patch.object(
_prepare.docker_mod, "build_image",
_launch_mod.docker_mod, "build_image",
side_effect=record("build"),
), patch.object(
_prepare.docker_mod, "image_id",
_launch_mod.docker_mod, "image_id",
return_value=f"sha256:{digest}fffffffffffffffff",
), patch.object(
_prepare.docker_mod, "save",
_launch_mod.docker_mod, "save",
side_effect=record("save"),
) as save, patch.object(
_prepare, "ephemeral_registry",
_launch_mod, "ephemeral_registry",
return_value=_Reg(),
), patch.object(
_prepare, "crane_push_tarball",
_launch_mod, "crane_push_tarball",
side_effect=record("push"),
) as push, patch.object(
_prepare._smolvm, "pack_create",
_launch_mod._smolvm, "pack_create",
side_effect=record("pack"),
) as pack:
_prepare._ensure_smolmachine("claude-bottle:latest")
_launch_mod._ensure_smolmachine("claude-bottle:latest")
# Build → save → push → pack in that order. No `docker
# push` (the daemon's HTTPS-by-default path is what we're
+1 -1
View File
@@ -90,7 +90,7 @@ def _plan(
bundle_gateway="192.168.50.1",
bundle_ip=bundle_ip,
machine_name="claude-bottle-demo-abc12",
agent_from_path=Path("/tmp/agent.smolmachine"),
agent_image_ref="claude-bottle:latest",
guest_env={},
prompt_file=Path("/tmp/state/demo-abc12/agent/prompt.txt"),
proxy_plan=PipelockProxyPlan(