feat(smolmachines): build agent image from repo Dockerfile (PRD 0023 chunk 4c)
Replaces the alpine:latest placeholder with a real claude-bottle agent image, converted into a .smolmachine artifact via an ephemeral local OCI registry. Why the registry hop: smolvm pack create only accepts OCI registry refs. Empirically it rejects docker-daemon://, oci-layout://, docker-archive: tarballs, and every other transport tested — the crane backend treats anything with a scheme prefix as a registry hostname. To convert a locally-built docker image into a .smolmachine we have to push it somewhere smolvm can pull from. Smallest path: bring up registry:2.8.3 bound to 127.0.0.1:<random>, docker tag + docker push into it, smolvm pack create --image localhost:<port>/claude-bottle:<id>, tear down the registry. The .smolmachine is cached under ~/.cache/claude-bottle/smolmachines/ keyed by the docker image ID (first 16 hex chars of the sha256), so a Dockerfile change picks up a new image ID and invalidates the cache. Unchanged rebuilds skip the whole build → registry → pack pipeline. This puts `docker build` in smolmachines prepare (the docker backend defers it to launch). Necessary because pack_create needs the image ID to derive the cache key, and prepare is the only hook ahead of launch that runs once per slug. Adds: - claude_bottle/backend/docker/util.py: image_id / tag / push helpers (thin docker CLI wrappers). - claude_bottle/backend/smolmachines/local_registry.py: ephemeral_registry() context manager; pins registry:2.8.3 by digest, binds 127.0.0.1::5000 (loopback-only), force-removes on exit. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit was merged in pull request #71.
This commit is contained in:
@@ -147,6 +147,39 @@ def build_image_with_cwd(derived: str, base: str, cwd: str) -> None:
|
||||
)
|
||||
|
||||
|
||||
def image_id(ref: str) -> str:
|
||||
"""Return the content-addressed image ID (e.g.
|
||||
`sha256:abcd...`) for `ref`. The smolmachines backend keys its
|
||||
`.smolmachine` artifact cache on this, so a Dockerfile change
|
||||
that produces a new image automatically invalidates the cache."""
|
||||
r = subprocess.run(
|
||||
["docker", "image", "inspect", "--format", "{{.Id}}", ref],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False,
|
||||
)
|
||||
if r.returncode != 0:
|
||||
die(
|
||||
f"docker image inspect for {ref!r} failed: "
|
||||
f"{(r.stderr or '').strip() or '<no stderr>'}"
|
||||
)
|
||||
return r.stdout.strip()
|
||||
|
||||
|
||||
def tag(src: str, dst: str) -> None:
|
||||
"""`docker tag SRC DST`. Idempotent. Used by smolmachines prepare
|
||||
to retag the locally-built image into a localhost:<port>/... ref
|
||||
that the ephemeral registry will accept."""
|
||||
subprocess.run(["docker", "tag", src, dst], check=True)
|
||||
|
||||
|
||||
def push(ref: str) -> None:
|
||||
"""`docker push REF`. Used by smolmachines prepare to push the
|
||||
agent image into the ephemeral local registry so smolvm's crane
|
||||
backend can pull it."""
|
||||
subprocess.run(["docker", "push", ref], check=True)
|
||||
|
||||
|
||||
def _silent_run(cmd: Iterable[str]) -> int:
|
||||
return subprocess.run(
|
||||
list(cmd),
|
||||
|
||||
@@ -0,0 +1,124 @@
|
||||
"""Ephemeral local OCI registry for the smolmachines agent-image
|
||||
conversion path (PRD 0023 chunk 4c).
|
||||
|
||||
`smolvm pack create --image <ref>` only accepts registry refs — it
|
||||
can't read the local docker daemon's image cache, an OCI layout
|
||||
directory, or a `docker save` tarball. To convert the agent's
|
||||
Dockerfile-built image into a `.smolmachine` artifact we run a
|
||||
short-lived `registry:2.8.3` container on `127.0.0.1:<random>`,
|
||||
push the locally-tagged image into it, and let smolvm pull from
|
||||
there. The registry container is torn down as soon as the pack
|
||||
completes.
|
||||
|
||||
Loopback-only bind + the host's docker layer cache mean the round
|
||||
trip is fast (~5s) and there's no exposed surface on the LAN."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import socket
|
||||
import subprocess
|
||||
import time
|
||||
import uuid
|
||||
from contextlib import contextmanager
|
||||
from typing import Iterator
|
||||
|
||||
from ...log import die
|
||||
|
||||
|
||||
# registry:2.8.3, pinned by digest. Same env-override pattern as the
|
||||
# pipelock image pin in claude_bottle/backend/docker/pipelock.py.
|
||||
REGISTRY_IMAGE = os.environ.get(
|
||||
"CLAUDE_BOTTLE_REGISTRY_IMAGE",
|
||||
"registry@sha256:a3d8aaa63ed8681a604f1dea0aa03f100d5895b6a58ace528858a7b332415373",
|
||||
)
|
||||
|
||||
|
||||
# How long to wait for the registry's HTTP layer to bind before
|
||||
# giving up. Two seconds is empirically enough; bumping to 10s leaves
|
||||
# headroom for slow CI runners without making the failure mode chatty.
|
||||
_READY_TIMEOUT_S = 10.0
|
||||
|
||||
|
||||
@contextmanager
|
||||
def ephemeral_registry() -> Iterator[int]:
|
||||
"""Bring up a `registry:2.8.3` container on a random loopback
|
||||
port, yield the port, force-remove the container on exit.
|
||||
|
||||
The container is started with `--rm` so a clean exit cleans up
|
||||
on its own; the `finally` block force-removes on abnormal exit
|
||||
(the calling process crashes between yield and close)."""
|
||||
name = f"claude-bottle-registry-{uuid.uuid4().hex[:12]}"
|
||||
subprocess.run(
|
||||
[
|
||||
"docker", "run", "-d", "--rm",
|
||||
"--name", name,
|
||||
# `127.0.0.1::5000` = bind to loopback, pick a random host
|
||||
# port. No LAN exposure; the container hangs around just
|
||||
# long enough for one push + one pack-create.
|
||||
"-p", "127.0.0.1::5000",
|
||||
REGISTRY_IMAGE,
|
||||
],
|
||||
check=True,
|
||||
capture_output=True,
|
||||
)
|
||||
try:
|
||||
port = _host_port(name)
|
||||
_wait_ready(port)
|
||||
yield port
|
||||
finally:
|
||||
subprocess.run(
|
||||
["docker", "rm", "-f", name],
|
||||
check=False,
|
||||
capture_output=True,
|
||||
)
|
||||
|
||||
|
||||
def _host_port(name: str) -> int:
|
||||
"""Resolve the host-side port docker mapped to the registry's
|
||||
container port 5000. `docker port <name> 5000/tcp` returns one or
|
||||
more `host:port` lines; the loopback-only -p binding ensures we
|
||||
get exactly `127.0.0.1:<port>`."""
|
||||
r = subprocess.run(
|
||||
["docker", "port", name, "5000/tcp"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False,
|
||||
)
|
||||
if r.returncode != 0:
|
||||
die(
|
||||
f"docker port {name} 5000/tcp failed: "
|
||||
f"{(r.stderr or '').strip() or '<no stderr>'}"
|
||||
)
|
||||
# `127.0.0.1:54321\n` — split on the last colon to handle the
|
||||
# `host:port` shape without parsing IP literals.
|
||||
line = (r.stdout or "").splitlines()[0].strip()
|
||||
_, _, port_str = line.rpartition(":")
|
||||
try:
|
||||
return int(port_str)
|
||||
except ValueError:
|
||||
die(f"unexpected `docker port` output: {line!r}")
|
||||
return -1 # unreachable; die() never returns
|
||||
|
||||
|
||||
def _wait_ready(port: int) -> None:
|
||||
"""Block until the registry's HTTP layer accepts a TCP connection
|
||||
on `127.0.0.1:<port>`, or `_READY_TIMEOUT_S` elapses.
|
||||
|
||||
A successful TCP connect is sufficient — registry:2.8.3 binds
|
||||
after it's ready to serve `/v2/` requests, so the push that
|
||||
follows will land on a working server."""
|
||||
deadline = time.monotonic() + _READY_TIMEOUT_S
|
||||
last_err: Exception | None = None
|
||||
while time.monotonic() < deadline:
|
||||
try:
|
||||
with socket.create_connection(("127.0.0.1", port), timeout=0.5):
|
||||
return
|
||||
except OSError as e:
|
||||
last_err = e
|
||||
time.sleep(0.1)
|
||||
die(
|
||||
f"local registry on 127.0.0.1:{port} did not accept "
|
||||
f"connections within {_READY_TIMEOUT_S:.0f}s "
|
||||
f"(last error: {last_err})"
|
||||
)
|
||||
@@ -1,16 +1,23 @@
|
||||
"""smolmachines `_resolve_plan` (PRD 0023 chunk 2d).
|
||||
"""smolmachines `_resolve_plan` (PRD 0023 chunks 2d + 4c).
|
||||
|
||||
Resolves the per-bottle docker subnet + bundle IP, pre-packs the
|
||||
agent's `.smolmachine` artifact (cached under
|
||||
`~/.cache/claude-bottle/smolmachines/`), and assembles the guest
|
||||
env. No VM bringup — that's `launch.launch`'s job."""
|
||||
Resolves the per-bottle docker subnet + bundle IP, builds the
|
||||
agent's docker image from the repo Dockerfile, converts it into a
|
||||
`.smolmachine` artifact via an ephemeral local registry (smolvm's
|
||||
crane backend only reads registry refs), and assembles the guest
|
||||
env. The `.smolmachine` is cached under
|
||||
`~/.cache/claude-bottle/smolmachines/` keyed by the docker image
|
||||
ID so Dockerfile changes invalidate the cache automatically.
|
||||
|
||||
No VM bringup — that's `launch.launch`'s job."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
from ...backend import BottleSpec
|
||||
from ...backend.docker import util as docker_mod
|
||||
from ...backend.docker.bottle_state import (
|
||||
BottleMetadata,
|
||||
agent_state_dir,
|
||||
@@ -27,9 +34,14 @@ from ...pipelock import PipelockProxy
|
||||
from ...supervise import Supervise
|
||||
from . import smolvm as _smolvm
|
||||
from .bottle_plan import SmolmachinesBottlePlan
|
||||
from .local_registry import ephemeral_registry
|
||||
from .util import smolmachines_bundle_subnet, smolmachines_preflight
|
||||
|
||||
|
||||
# Repo root, used as the `docker build` context for the agent image.
|
||||
_REPO_DIR = str(Path(__file__).resolve().parent.parent.parent.parent)
|
||||
|
||||
|
||||
# Per-host cache for `smolvm pack create` outputs. Keyed by the
|
||||
# image ref so re-prepares for the same image hit the cache
|
||||
# (pack create is idempotent on the smolvm side but takes several
|
||||
@@ -132,11 +144,15 @@ def resolve_plan(
|
||||
prompt_file.chmod(0o600)
|
||||
|
||||
machine_name = f"claude-bottle-{slug}"
|
||||
# Chunk 2d placeholder until the agent-image work lands.
|
||||
# alpine pulls cleanly from docker.io via smolvm's crane
|
||||
# backend; the real claude-bottle image lives in the local
|
||||
# docker daemon and isn't reachable that way.
|
||||
agent_image_ref = "alpine:latest"
|
||||
# Build the agent image from the repo Dockerfile (shared with
|
||||
# the docker backend, layer-cached) and convert it into a
|
||||
# `.smolmachine` artifact via an ephemeral local registry. The
|
||||
# CLAUDE_BOTTLE_IMAGE env var match the docker backend's
|
||||
# resolve_plan default so both backends use the same image when
|
||||
# one is built.
|
||||
agent_image_ref = os.environ.get(
|
||||
"CLAUDE_BOTTLE_IMAGE", "claude-bottle:latest"
|
||||
)
|
||||
agent_from_path = _ensure_smolmachine(agent_image_ref)
|
||||
|
||||
return SmolmachinesBottlePlan(
|
||||
@@ -158,21 +174,37 @@ def resolve_plan(
|
||||
|
||||
|
||||
def _ensure_smolmachine(image_ref: str) -> Path:
|
||||
"""Cache `smolvm pack create --image <image_ref>` output under
|
||||
`~/.cache/claude-bottle/smolmachines/<slug>`. Returns the
|
||||
`.smolmachine.smolmachine` sidecar path — that's the file
|
||||
`machine create --from` consumes (pack create produces a
|
||||
launcher binary at `.smolmachine` plus the sidecar alongside
|
||||
"""Build the agent docker image and convert it into a
|
||||
`.smolmachine` artifact, caching the result under
|
||||
`~/.cache/claude-bottle/smolmachines/` keyed by the docker image
|
||||
ID (so a Dockerfile change automatically invalidates the cache).
|
||||
|
||||
Returns the `.smolmachine.smolmachine` sidecar path — that's
|
||||
the file `machine create --from` consumes (pack create produces
|
||||
a launcher binary at `.smolmachine` plus the sidecar alongside
|
||||
it; the sidecar is the actual artifact).
|
||||
|
||||
Re-runs of pack create against the same image hit smolvm's
|
||||
layer cache; we still skip the call entirely when the
|
||||
sidecar is already on disk, since each invocation costs
|
||||
several seconds even on a hot cache."""
|
||||
Conversion path: `docker build` (the existing layer cache makes
|
||||
no-change rebuilds cheap) → `docker tag` with a
|
||||
`localhost:<port>/...` ref → bring up the ephemeral registry
|
||||
container → `docker push` into it → `smolvm pack create --image
|
||||
<localhost ref>` → tear down the registry. Each pack-create
|
||||
costs several seconds even on a hot cache, so we skip the whole
|
||||
pipeline when the cached sidecar is already on disk for this
|
||||
image ID."""
|
||||
_SMOLMACHINE_CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
slug = image_ref.replace(":", "_").replace("/", "_")
|
||||
binary = _SMOLMACHINE_CACHE_DIR / f"{slug}.smolmachine"
|
||||
sidecar = _SMOLMACHINE_CACHE_DIR / f"{slug}.smolmachine.smolmachine"
|
||||
if not sidecar.is_file():
|
||||
_smolvm.pack_create(image_ref, binary)
|
||||
docker_mod.build_image(image_ref, _REPO_DIR)
|
||||
# `sha256:abcd...` -> `abcd...` first 16 chars: short enough to
|
||||
# keep filenames manageable, long enough to make collisions
|
||||
# astronomically unlikely.
|
||||
digest = docker_mod.image_id(image_ref).split(":", 1)[-1][:16]
|
||||
binary = _SMOLMACHINE_CACHE_DIR / f"{digest}.smolmachine"
|
||||
sidecar = _SMOLMACHINE_CACHE_DIR / f"{digest}.smolmachine.smolmachine"
|
||||
if sidecar.is_file():
|
||||
return sidecar
|
||||
with ephemeral_registry() as port:
|
||||
local_ref = f"localhost:{port}/claude-bottle:{digest}"
|
||||
docker_mod.tag(image_ref, local_ref)
|
||||
docker_mod.push(local_ref)
|
||||
_smolvm.pack_create(local_ref, binary)
|
||||
return sidecar
|
||||
|
||||
Reference in New Issue
Block a user