From 5e0130b56f9a4277927283998d2cfbe66e8e834d Mon Sep 17 00:00:00 2001 From: claude Date: Wed, 27 May 2026 19:37:15 -0400 Subject: [PATCH] fix(smolmachines): build agent image in launch, not prepare MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When starting a smolmachines agent from the dashboard the docker-build output rendered on top of the curses preflight modal — the build was kicked off before the operator had confirmed launch. The docker backend's `prepare` is pure resolution (no docker calls); smolmachines was inconsistent because `prepare` called `_ensure_smolmachine` which ran `docker build` → `docker save` → `crane push` → `smolvm pack create`, several seconds of stderr noise rendered before the y/N prompt. Move the pipeline: - `_ensure_smolmachine` (+ `_SMOLMACHINE_CACHE_DIR` + `_REPO_DIR` + the local-registry / smolvm imports) moves from `backend/smolmachines/prepare.py` to `backend/smolmachines/launch.py`. Called right before `_smolvm.machine_create` so the resulting `.smolmachine` sidecar path lands as a local in `launch`, not on the plan. - `SmolmachinesBottlePlan.agent_from_path: Path` becomes `agent_image_ref: str`. `prepare` stashes only the docker tag (`$CLAUDE_BOTTLE_IMAGE` || `claude-bottle:latest`); `launch` resolves it into the artifact at bringup. This puts smolmachines on the same prepare-vs-launch boundary the docker backend uses: the preflight summary in the dashboard prints, the operator confirms, then `launch` runs — and its stderr is routed via `_route_op_to_right_pane` (in tmux) or via `curses.endwin` (foreground handoff) so the build output lands cleanly. Tests: - `tests/unit/test_smolmachines_prepare_image.py` → `tests/unit/test_smolmachines_launch_image.py`, updated to import `_ensure_smolmachine` from `launch` rather than `prepare`. - `test_smolmachines_provision.py`: plan fixture switches `agent_from_path` → `agent_image_ref`. 593 unit tests pass. Co-Authored-By: Claude Opus 4.7 --- .../backend/smolmachines/bottle_plan.py | 8 +- claude_bottle/backend/smolmachines/launch.py | 84 ++++++++++++++++-- claude_bottle/backend/smolmachines/prepare.py | 88 ++----------------- ...e.py => test_smolmachines_launch_image.py} | 39 ++++---- tests/unit/test_smolmachines_provision.py | 2 +- 5 files changed, 117 insertions(+), 104 deletions(-) rename tests/unit/{test_smolmachines_prepare_image.py => test_smolmachines_launch_image.py} (79%) diff --git a/claude_bottle/backend/smolmachines/bottle_plan.py b/claude_bottle/backend/smolmachines/bottle_plan.py index c515002..76a1d6b 100644 --- a/claude_bottle/backend/smolmachines/bottle_plan.py +++ b/claude_bottle/backend/smolmachines/bottle_plan.py @@ -48,7 +48,13 @@ class SmolmachinesBottlePlan(BottlePlan): # (push to a registry first, or smolvm grows a docker-daemon # transport). machine_name: str - agent_from_path: Path + # Agent image ref (docker tag). `launch` runs the + # build → save → registry push → smolvm pack pipeline against + # this and feeds the resulting `.smolmachine` artifact to + # `machine_create --from`. The pipeline runs at launch time + # (not prepare time) so the docker build output doesn't garble + # the dashboard's preflight modal. + agent_image_ref: str # In-guest env vars (HTTPS_PROXY etc) — IP-literal URLs since # the guest has no DNS resolver inside the TSI allowlist. # Passed to `smolvm machine create` as `-e K=V` flags. diff --git a/claude_bottle/backend/smolmachines/launch.py b/claude_bottle/backend/smolmachines/launch.py index 7bafedc..db8c1da 100644 --- a/claude_bottle/backend/smolmachines/launch.py +++ b/claude_bottle/backend/smolmachines/launch.py @@ -23,6 +23,7 @@ import dataclasses import os import time from contextlib import ExitStack, contextmanager +from pathlib import Path from typing import Callable, Generator from ...egress import EGRESS_ROUTES_IN_CONTAINER, egress_resolve_token_values @@ -32,6 +33,7 @@ from ...pipelock import ( ) from ...supervise import QUEUE_DIR_IN_CONTAINER, SUPERVISE_PORT from ...util import expand_tilde +from ..docker import util as docker_mod from ..docker.egress import ( EGRESS_CA_IN_CONTAINER, EGRESS_PIPELOCK_CA_IN_CONTAINER, @@ -55,6 +57,18 @@ from . import sidecar_bundle as _bundle from . import smolvm as _smolvm from .bottle import SmolmachinesBottle from .bottle_plan import SmolmachinesBottlePlan +from .local_registry import crane_push_tarball, ephemeral_registry + + +# Repo root, used as the `docker build` context for the agent image. +_REPO_DIR = str(Path(__file__).resolve().parent.parent.parent.parent) + + +# Per-host cache for `smolvm pack create` outputs. Keyed by the +# docker image ID so a Dockerfile change automatically invalidates +# the cache. `pack create` is idempotent on the smolvm side but +# takes several seconds even on a no-op rebuild. +_SMOLMACHINE_CACHE_DIR = Path.home() / ".cache" / "claude-bottle" / "smolmachines" # Container-internal listening ports for each bundle daemon. The @@ -199,17 +213,25 @@ def launch( agent_supervise_url=agent_supervise_url, ) - # 5. smolvm VM. --from carries the pre-packed .smolmachine - # artifact (built by prepare); --allow-cidr + -e carry the - # per-bottle TSI allowlist + env. The allowlist is the - # per-bottle loopback alias — narrowing it to one /32 keeps - # the agent from reaching other host loopback services or + # 5. Build the agent image and pack it into a + # `.smolmachine` artifact (or hit the per-Dockerfile-digest + # cache). Runs here, not in prepare, so the docker-build + # output doesn't garble the dashboard's preflight modal: + # both the curses-endwin path and the tmux pane-routing + # path redirect stderr around `launch` already. + agent_from_path = _ensure_smolmachine(plan.agent_image_ref) + + # smolvm VM. --from carries the pre-packed .smolmachine + # artifact; --allow-cidr + -e carry the per-bottle TSI + # allowlist + env. The allowlist is the per-bottle + # loopback alias — narrowing it to one /32 keeps the + # agent from reaching other host loopback services or # other bottles' published ports. Smolfile isn't usable # here — smolvm 0.8.0 makes `--from` and `--smolfile` # mutually exclusive. _smolvm.machine_create( plan.machine_name, - from_path=plan.agent_from_path, + from_path=agent_from_path, allow_cidrs=[f"{loopback_ip}/32"], env=plan.guest_env, ) @@ -389,3 +411,53 @@ def _resolve_token_env( if not ep.routes: return {} return egress_resolve_token_values(ep.token_env_map, dict(host_env)) + + +def _ensure_smolmachine(image_ref: str) -> Path: + """Build the agent docker image and convert it into a + `.smolmachine` artifact, caching the result under + `~/.cache/claude-bottle/smolmachines/` keyed by the docker image + ID (so a Dockerfile change automatically invalidates the cache). + + Returns the `.smolmachine.smolmachine` sidecar path — that's + the file `machine create --from` consumes (pack create produces + a launcher binary at `.smolmachine` plus the sidecar alongside + it; the sidecar is the actual artifact). + + Conversion path: `docker build` (the existing layer cache + makes no-change rebuilds cheap) → `docker save` to a tarball + → spin up an ephemeral registry on a private docker network → + `crane push --insecure` from a one-shot container on the same + network → `smolvm pack create --image localhost:/...` + → tear down the registry + network. The crane push detour + sidesteps the Docker-Desktop daemon's HTTPS preference for + non-loopback registries — see the `local_registry` module + docstring for the gory details. + + Each pack-create costs several seconds even on a hot cache, + so we skip the whole pipeline when the cached sidecar is + already on disk for this image ID.""" + _SMOLMACHINE_CACHE_DIR.mkdir(parents=True, exist_ok=True) + docker_mod.build_image(image_ref, _REPO_DIR) + # `sha256:abcd...` -> `abcd...` first 16 chars: short enough to + # keep filenames manageable, long enough to make collisions + # astronomically unlikely. + digest = docker_mod.image_id(image_ref).split(":", 1)[-1][:16] + binary = _SMOLMACHINE_CACHE_DIR / f"{digest}.smolmachine" + sidecar = _SMOLMACHINE_CACHE_DIR / f"{digest}.smolmachine.smolmachine" + if sidecar.is_file(): + return sidecar + tarball = _SMOLMACHINE_CACHE_DIR / f"{digest}.image.tar" + docker_mod.save(image_ref, str(tarball)) + try: + with ephemeral_registry() as handle: + push_ref = f"{handle.push_endpoint}/claude-bottle:{digest}" + pack_ref = f"{handle.pull_endpoint}/claude-bottle:{digest}" + crane_push_tarball(handle, str(tarball), push_ref) + _smolvm.pack_create(pack_ref, binary) + finally: + # Tarball is ~500MB-1GB for the agent image; reclaim once + # the smolmachine artifact exists. The artifact itself is + # the long-lived cache entry. + tarball.unlink(missing_ok=True) + return sidecar diff --git a/claude_bottle/backend/smolmachines/prepare.py b/claude_bottle/backend/smolmachines/prepare.py index fd5b4a3..b43191b 100644 --- a/claude_bottle/backend/smolmachines/prepare.py +++ b/claude_bottle/backend/smolmachines/prepare.py @@ -1,12 +1,10 @@ """smolmachines `_resolve_plan` (PRD 0023 chunks 2d + 4c). -Resolves the per-bottle docker subnet + bundle IP, builds the -agent's docker image from the repo Dockerfile, converts it into a -`.smolmachine` artifact via an ephemeral local registry (smolvm's -crane backend only reads registry refs), and assembles the guest -env. The `.smolmachine` is cached under -`~/.cache/claude-bottle/smolmachines/` keyed by the docker image -ID so Dockerfile changes invalidate the cache automatically. +Resolves the per-bottle docker subnet + bundle IP and assembles +the guest env. The agent's docker image build → smolmachine +pack pipeline runs in `launch.launch`, not here, so the +dashboard's preflight modal isn't garbled by docker-build output +before the operator has confirmed. No VM bringup — that's `launch.launch`'s job.""" @@ -17,7 +15,6 @@ from datetime import datetime, timezone from pathlib import Path from ...backend import BottleSpec -from ...backend.docker import util as docker_mod from ...backend.docker.bottle_state import ( BottleMetadata, agent_state_dir, @@ -32,23 +29,10 @@ from ...egress import Egress from ...git_gate import GitGate from ...pipelock import PipelockProxy from ...supervise import Supervise -from . import smolvm as _smolvm from .bottle_plan import SmolmachinesBottlePlan -from .local_registry import crane_push_tarball, ephemeral_registry from .util import smolmachines_bundle_subnet, smolmachines_preflight -# Repo root, used as the `docker build` context for the agent image. -_REPO_DIR = str(Path(__file__).resolve().parent.parent.parent.parent) - - -# Per-host cache for `smolvm pack create` outputs. Keyed by the -# image ref so re-prepares for the same image hit the cache -# (pack create is idempotent on the smolvm side but takes several -# seconds even when no layer is fetched). -_SMOLMACHINE_CACHE_DIR = Path.home() / ".cache" / "claude-bottle" / "smolmachines" - - # Gateway ports the bundle exposes inside its container — pipelock # HTTPS proxy, git-gate's git-daemon, supervise's MCP. The agent # inside the smolvm guest dials these on the bundle's pinned IP. @@ -158,16 +142,12 @@ def resolve_plan( prompt_file.chmod(0o600) machine_name = f"claude-bottle-{slug}" - # Build the agent image from the repo Dockerfile (shared with - # the docker backend, layer-cached) and convert it into a - # `.smolmachine` artifact via an ephemeral local registry. The - # CLAUDE_BOTTLE_IMAGE env var match the docker backend's - # resolve_plan default so both backends use the same image when - # one is built. + # Stash the agent image ref — `launch.launch` runs the + # build → pack pipeline at bringup. Honors CLAUDE_BOTTLE_IMAGE + # to match the docker backend's `resolve_plan` default. agent_image_ref = os.environ.get( "CLAUDE_BOTTLE_IMAGE", "claude-bottle:latest" ) - agent_from_path = _ensure_smolmachine(agent_image_ref) return SmolmachinesBottlePlan( spec=spec, @@ -177,7 +157,7 @@ def resolve_plan( bundle_gateway=gateway, bundle_ip=bundle_ip, machine_name=machine_name, - agent_from_path=agent_from_path, + agent_image_ref=agent_image_ref, guest_env=guest_env, prompt_file=prompt_file, proxy_plan=proxy_plan, @@ -185,53 +165,3 @@ def resolve_plan( egress_plan=egress_plan, supervise_plan=supervise_plan, ) - - -def _ensure_smolmachine(image_ref: str) -> Path: - """Build the agent docker image and convert it into a - `.smolmachine` artifact, caching the result under - `~/.cache/claude-bottle/smolmachines/` keyed by the docker image - ID (so a Dockerfile change automatically invalidates the cache). - - Returns the `.smolmachine.smolmachine` sidecar path — that's - the file `machine create --from` consumes (pack create produces - a launcher binary at `.smolmachine` plus the sidecar alongside - it; the sidecar is the actual artifact). - - Conversion path: `docker build` (the existing layer cache - makes no-change rebuilds cheap) → `docker save` to a tarball - → spin up an ephemeral registry on a private docker network → - `crane push --insecure` from a one-shot container on the same - network → `smolvm pack create --image localhost:/...` - → tear down the registry + network. The crane push detour - sidesteps the Docker-Desktop daemon's HTTPS preference for - non-loopback registries — see the `local_registry` module - docstring for the gory details. - - Each pack-create costs several seconds even on a hot cache, - so we skip the whole pipeline when the cached sidecar is - already on disk for this image ID.""" - _SMOLMACHINE_CACHE_DIR.mkdir(parents=True, exist_ok=True) - docker_mod.build_image(image_ref, _REPO_DIR) - # `sha256:abcd...` -> `abcd...` first 16 chars: short enough to - # keep filenames manageable, long enough to make collisions - # astronomically unlikely. - digest = docker_mod.image_id(image_ref).split(":", 1)[-1][:16] - binary = _SMOLMACHINE_CACHE_DIR / f"{digest}.smolmachine" - sidecar = _SMOLMACHINE_CACHE_DIR / f"{digest}.smolmachine.smolmachine" - if sidecar.is_file(): - return sidecar - tarball = _SMOLMACHINE_CACHE_DIR / f"{digest}.image.tar" - docker_mod.save(image_ref, str(tarball)) - try: - with ephemeral_registry() as handle: - push_ref = f"{handle.push_endpoint}/claude-bottle:{digest}" - pack_ref = f"{handle.pull_endpoint}/claude-bottle:{digest}" - crane_push_tarball(handle, str(tarball), push_ref) - _smolvm.pack_create(pack_ref, binary) - finally: - # Tarball is ~500MB-1GB for the agent image; reclaim once - # the smolmachine artifact exists. The artifact itself is - # the long-lived cache entry. - tarball.unlink(missing_ok=True) - return sidecar diff --git a/tests/unit/test_smolmachines_prepare_image.py b/tests/unit/test_smolmachines_launch_image.py similarity index 79% rename from tests/unit/test_smolmachines_prepare_image.py rename to tests/unit/test_smolmachines_launch_image.py index f1fc64b..bac3c5d 100644 --- a/tests/unit/test_smolmachines_prepare_image.py +++ b/tests/unit/test_smolmachines_launch_image.py @@ -4,7 +4,12 @@ Asserts that the cache-hit path returns without touching the registry / pack pipeline, and that the cache-miss path runs build → tag → push → pack in order against a registry port the -helper yields.""" +helper yields. + +The pipeline lives in `launch.py` (moved from `prepare.py` so the +docker build doesn't run before the dashboard's preflight modal; +the curses-endwin / tmux pane-routing handoff happens around +`launch`).""" from __future__ import annotations @@ -13,14 +18,14 @@ import unittest from pathlib import Path from unittest.mock import patch -from claude_bottle.backend.smolmachines import prepare as _prepare +from claude_bottle.backend.smolmachines import launch as _launch_mod class TestEnsureSmolmachine(unittest.TestCase): def setUp(self): self._tmp = tempfile.TemporaryDirectory(prefix="cb-cache.") self._cache_patch = patch.object( - _prepare, "_SMOLMACHINE_CACHE_DIR", Path(self._tmp.name), + _launch_mod, "_SMOLMACHINE_CACHE_DIR", Path(self._tmp.name), ) self._cache_patch.start() @@ -35,20 +40,20 @@ class TestEnsureSmolmachine(unittest.TestCase): sidecar.write_text("") with patch.object( - _prepare.docker_mod, "build_image", + _launch_mod.docker_mod, "build_image", ) as build, patch.object( - _prepare.docker_mod, "image_id", + _launch_mod.docker_mod, "image_id", return_value=f"sha256:{digest}fffffffffffffffff", ), patch.object( - _prepare.docker_mod, "save", + _launch_mod.docker_mod, "save", ) as save, patch.object( - _prepare, "ephemeral_registry", + _launch_mod, "ephemeral_registry", ) as registry, patch.object( - _prepare, "crane_push_tarball", + _launch_mod, "crane_push_tarball", ) as push, patch.object( - _prepare._smolvm, "pack_create", + _launch_mod._smolvm, "pack_create", ) as pack: - result = _prepare._ensure_smolmachine("claude-bottle:latest") + result = _launch_mod._ensure_smolmachine("claude-bottle:latest") self.assertEqual(sidecar, result) # build still runs (Dockerfile edits land without manual rmi). @@ -88,25 +93,25 @@ class TestEnsureSmolmachine(unittest.TestCase): return _f with patch.object( - _prepare.docker_mod, "build_image", + _launch_mod.docker_mod, "build_image", side_effect=record("build"), ), patch.object( - _prepare.docker_mod, "image_id", + _launch_mod.docker_mod, "image_id", return_value=f"sha256:{digest}fffffffffffffffff", ), patch.object( - _prepare.docker_mod, "save", + _launch_mod.docker_mod, "save", side_effect=record("save"), ) as save, patch.object( - _prepare, "ephemeral_registry", + _launch_mod, "ephemeral_registry", return_value=_Reg(), ), patch.object( - _prepare, "crane_push_tarball", + _launch_mod, "crane_push_tarball", side_effect=record("push"), ) as push, patch.object( - _prepare._smolvm, "pack_create", + _launch_mod._smolvm, "pack_create", side_effect=record("pack"), ) as pack: - _prepare._ensure_smolmachine("claude-bottle:latest") + _launch_mod._ensure_smolmachine("claude-bottle:latest") # Build → save → push → pack in that order. No `docker # push` (the daemon's HTTPS-by-default path is what we're diff --git a/tests/unit/test_smolmachines_provision.py b/tests/unit/test_smolmachines_provision.py index f00ea79..d738363 100644 --- a/tests/unit/test_smolmachines_provision.py +++ b/tests/unit/test_smolmachines_provision.py @@ -90,7 +90,7 @@ def _plan( bundle_gateway="192.168.50.1", bundle_ip=bundle_ip, machine_name="claude-bottle-demo-abc12", - agent_from_path=Path("/tmp/agent.smolmachine"), + agent_image_ref="claude-bottle:latest", guest_env={}, prompt_file=Path("/tmp/state/demo-abc12/agent/prompt.txt"), proxy_plan=PipelockProxyPlan(