fix(smolmachines): docker push fails on Docker Desktop — daemon-side route differs from host loopback #74

Merged
didericis-claude merged 13 commits from fix-local-registry-docker-desktop into main 2026-05-27 16:10:46 -04:00
20 changed files with 872 additions and 237 deletions
+21
View File
@@ -190,6 +190,27 @@ The container is removed automatically when the session ends. If the script
is killed with SIGKILL the exit trap won't fire and the container may be is killed with SIGKILL the exit trap won't fire and the container may be
left running; remove it with `docker rm -f <container-name>`. left running; remove it with `docker rm -f <container-name>`.
### Smolmachines backend (experimental, macOS-only)
A second backend runs the agent in a smolvm micro-VM (libkrun) with the
sidecar bundle still in Docker. Selected via
`CLAUDE_BOTTLE_BACKEND=smolmachines ./cli.py start <agent>`. Requires
`smolvm` on PATH (`curl -sSL https://smolmachines.com/install.sh | sh`).
**Known limitation, v1:** smolvm's TSI uses macOS networking, and
Docker Desktop's container IPs aren't reachable from macOS, so the
smolmachines bottle dials the sidecar bundle through host loopback
port-forwards (`127.0.0.1:<random>`). TSI filters by IP only, so the
allowlist is `127.0.0.1/32` — meaning the agent VM can reach **any
service bound to macOS's loopback**, not just the bundle's published
ports. Practical implication: while a smolmachines bottle is running,
host-local dev services (postgres on 5432, dev servers, etc.) are
reachable from inside the agent even if you intended them to be
host-private. The docker backend keeps the bottle on a `--internal`
docker network and doesn't have this issue. A future revision will
narrow this via a per-bottle loopback alias + host-side proxy (see
PRD 0023's "loopback scoping" section).
## Manifest ## Manifest
Bottles and agents live as Markdown files with YAML frontmatter under Bottles and agents live as Markdown files with YAML frontmatter under
+14 -6
View File
@@ -119,12 +119,20 @@ class Bottle(ABC):
def exec_claude(self, argv: list[str], *, tty: bool = True) -> int: ... def exec_claude(self, argv: list[str], *, tty: bool = True) -> int: ...
@abstractmethod @abstractmethod
def exec(self, script: str) -> ExecResult: def exec(self, script: str, *, user: str = "node") -> ExecResult:
"""Run `script` as a POSIX shell script inside the bottle and """Run `script` as a POSIX shell script inside the bottle as
return the captured stdout/stderr/returncode. The bottle's `user` (default `node`, matching the agent image's USER
environment (including HTTPS_PROXY pointing at the pipelock directive) and return the captured stdout/stderr/returncode.
sidecar) is inherited by the child. Non-zero exit does not The bottle's environment (including HTTPS_PROXY pointing at
raise — callers inspect `returncode` themselves.""" the pipelock sidecar) is inherited by the child. Non-zero
exit does not raise — callers inspect `returncode`
themselves.
Pass `user="root"` for shell-outs that need privileged file
writes / package install — provisioning calls that need root
bypass `Bottle.exec` and use the backend-specific raw
machine-exec helper, but the tests have a legitimate use
case for arbitrary-user runs."""
@abstractmethod @abstractmethod
def cp_in(self, host_path: str, container_path: str) -> None: ... def cp_in(self, host_path: str, container_path: str) -> None: ...
+6 -3
View File
@@ -51,12 +51,15 @@ class DockerBottle(Bottle):
self.claude_docker_argv(argv, tty=tty), check=False, self.claude_docker_argv(argv, tty=tty), check=False,
).returncode ).returncode
def exec(self, script: str) -> ExecResult: def exec(self, script: str, *, user: str = "node") -> ExecResult:
# Pipe via stdin to `sh -s` so the caller never has to worry # Pipe via stdin to `sh -s` so the caller never has to worry
# about quoting; the script source lands inside the container # about quoting; the script source lands inside the container
# without crossing argv. # without crossing argv. `-u <user>` overrides the image's
# default USER — defaults to `node` which is already the
# image's USER, so the explicit flag is a no-op there but
# keeps the cross-backend contract uniform.
result = subprocess.run( result = subprocess.run(
["docker", "exec", "-i", self.name, "sh", "-s"], ["docker", "exec", "-u", user, "-i", self.name, "sh", "-s"],
input=script, input=script,
capture_output=True, capture_output=True,
text=True, text=True,
+9 -12
View File
@@ -166,18 +166,15 @@ def image_id(ref: str) -> str:
return r.stdout.strip() return r.stdout.strip()
def tag(src: str, dst: str) -> None: def save(ref: str, output: str) -> None:
"""`docker tag SRC DST`. Idempotent. Used by smolmachines prepare """`docker save REF -o OUTPUT`. Writes a tarball of the image
to retag the locally-built image into a localhost:<port>/... ref layers + manifest to the host path. Used by smolmachines
that the ephemeral registry will accept.""" prepare to hand the agent image to a containerized crane that
subprocess.run(["docker", "tag", src, dst], check=True) pushes it to the ephemeral registry — bypassing the docker
daemon's `docker push` (which on Docker Desktop can't reach a
host-loopback registry and refuses plain-HTTP pushes to
def push(ref: str) -> None: non-loopback hosts)."""
"""`docker push REF`. Used by smolmachines prepare to push the subprocess.run(["docker", "save", ref, "-o", output], check=True)
agent image into the ephemeral local registry so smolvm's crane
backend can pull it."""
subprocess.run(["docker", "push", ref], check=True)
def _silent_run(cmd: Iterable[str]) -> int: def _silent_run(cmd: Iterable[str]) -> int:
+86 -19
View File
@@ -4,63 +4,130 @@ Routes `exec_claude` / `exec` / `cp_in` through `smolvm machine
exec` / `smolvm machine cp`. The handle is yielded by `launch` exec` / `smolvm machine cp`. The handle is yielded by `launch`
and torn down via the surrounding ExitStack on context exit; and torn down via the surrounding ExitStack on context exit;
`close` is a no-op idempotent alias so the BottleBackend ABC's `close` is a no-op idempotent alias so the BottleBackend ABC's
context-manager contract is satisfied.""" context-manager contract is satisfied.
User context: `smolvm machine exec` runs commands as root in the
VM, but the agent image's USER is `node` and claude-code refuses
to run as root with `--dangerously-skip-permissions`. Both
`exec_claude` and `exec` switch to the requested user (default
`node`) via `runuser -u <user> --` and set `HOME` / `USER`
through `smolvm -e` — avoiding `runuser -l`'s login-shell wiring
(PAM session setup, /etc/profile sourcing) which can hang on a
minimal Debian VM with no PAM session config."""
from __future__ import annotations from __future__ import annotations
import subprocess import subprocess
import sys from typing import Mapping
from .. import Bottle, ExecResult from .. import Bottle, ExecResult
from . import smolvm as _smolvm from . import smolvm as _smolvm
# Per-user env the agent image's USER (node) expects. claude
# reads ~/.claude.json + writes session state under ~/.claude/;
# bare `runuser -u` inherits root's HOME=/root, which claude
# can't write to. Set HOME / USER explicitly through smolvm -e
# so the child process sees them.
_HOME_FOR = {
"node": "/home/node",
"root": "/root",
}
def _env_flags_for(user: str) -> list[str]:
home = _HOME_FOR.get(user, f"/home/{user}")
return ["-e", f"HOME={home}", "-e", f"USER={user}"]
def _guest_env_flags(env: Mapping[str, str]) -> list[str]:
"""Render `{K: V}` into a flat `-e K=V` argv slice for
`smolvm machine exec`. `smolvm machine create -e` set env
on PID 1 but it doesn't propagate to fresh exec process
trees, so we have to re-pass them every call."""
out: list[str] = []
for k, v in env.items():
out += ["-e", f"{k}={v}"]
return out
class SmolmachinesBottle(Bottle): class SmolmachinesBottle(Bottle):
"""Handle returned by `SmolmachinesBottleBackend.launch`. The """Handle returned by `SmolmachinesBottleBackend.launch`. The
underlying VM lifecycle (create / start / stop / delete) lives underlying VM lifecycle (create / start / stop / delete) lives
on the launch ExitStack — this class only routes runtime on the launch ExitStack — this class only routes runtime
operations to the right `smolvm machine ...` subcommand.""" operations to the right `smolvm machine ...` subcommand."""
def __init__(self, machine_name: str, *, prompt_path: str | None = None) -> None: def __init__(
self,
machine_name: str,
*,
prompt_path: str | None = None,
guest_env: Mapping[str, str] | None = None,
) -> None:
self.name = machine_name self.name = machine_name
# In-VM path to the agent's prompt file. None when the # In-VM path to the agent's prompt file. None when the
# agent declared no prompt (file still exists; we just # agent declared no prompt (file still exists; we just
# don't pass --append-system-prompt-file). # don't pass --append-system-prompt-file).
self._prompt_path = prompt_path self._prompt_path = prompt_path
# Env vars the agent process needs (HTTPS_PROXY,
# CLAUDE_CODE_OAUTH_TOKEN, manifest-declared bottle env, …).
# Forwarded on every `smolvm machine exec` via `-e K=V`
# because exec doesn't inherit from machine_create's env.
self._guest_env = dict(guest_env or {})
def exec_claude(self, argv: list[str], *, tty: bool = True) -> int: def exec_claude(self, argv: list[str], *, tty: bool = True) -> int:
"""Run `claude` interactively inside the VM. Inherits the """Run `claude` interactively inside the VM as the `node`
operator's terminal (stdin / stdout / stderr) so the user. Inherits the operator's terminal (stdin / stdout /
session feels native. Blocks until claude exits; returns stderr) so the session feels native. Blocks until claude
the in-VM exit code. exits; returns the in-VM exit code.
We bypass the captured-output `machine_exec` helper here We bypass the captured-output `machine_exec` helper here
because that one wraps stdout/stderr in pipes — fine for because that one wraps stdout/stderr in pipes — fine for
scripted exec, wrong for an interactive shell. Drop down scripted exec, wrong for an interactive shell. Drop down
to `subprocess.run` with the TTY inherited.""" to `subprocess.run` with the TTY inherited.
UID switches via `runuser -u node --` (not `-l`) so we
avoid login-shell wiring. HOME / USER come from `smolvm
-e` instead, which sets them on the process env."""
flags = ["smolvm", "machine", "exec", "--name", self.name] flags = ["smolvm", "machine", "exec", "--name", self.name]
if tty: if tty:
flags += ["-i", "-t"] flags += ["-i", "-t"]
flags += _env_flags_for("node")
flags += _guest_env_flags(self._guest_env)
claude_argv = ["claude"] claude_argv = ["claude"]
if self._prompt_path: if self._prompt_path:
claude_argv += ["--append-system-prompt-file", self._prompt_path] claude_argv += ["--append-system-prompt-file", self._prompt_path]
flags += ["--", *claude_argv, *argv] claude_argv += argv
flags += ["--", "runuser", "-u", "node", "--", *claude_argv]
result = subprocess.run(flags, check=False) result = subprocess.run(flags, check=False)
return result.returncode return result.returncode
def exec(self, script: str) -> ExecResult: def exec(self, script: str, *, user: str = "node") -> ExecResult:
"""Run a POSIX shell script and capture the result. The """Run a POSIX shell script as `user` (default `node`) and
script runs under `/bin/sh -c`, matching what the docker capture the result. Matches the docker backend's `exec`,
backend's `exec` does — callers can write shell-y test which defaults to the image's USER (also node) — so test
helpers without worrying about argv splitting.""" helpers / provision shell-outs run with the same identity
r = _smolvm.machine_exec( on both backends. Pass `user="root"` for tests that need
self.name, root.
["/bin/sh", "-c", script],
`runuser -u <user> -- /bin/sh -c <script>` switches UID
without invoking a login shell; HOME / USER are set via
`smolvm -e` (see `_env_flags_for`)."""
argv = (
_env_flags_for(user)
+ _guest_env_flags(self._guest_env)
+ ["--", "runuser", "-u", user, "--", "/bin/sh", "-c", script]
)
# _smolvm.machine_exec expects argv (the bit after `--`);
# the -e flags go before, so call smolvm directly.
r = subprocess.run(
["smolvm", "machine", "exec", "--name", self.name] + argv,
capture_output=True, text=True, check=False,
) )
return ExecResult( return ExecResult(
returncode=r.returncode, returncode=r.returncode,
stdout=r.stdout, stdout=r.stdout or "",
stderr=r.stderr, stderr=r.stderr or "",
) )
def cp_in(self, host_path: str, container_path: str) -> None: def cp_in(self, host_path: str, container_path: str) -> None:
@@ -75,6 +75,17 @@ class SmolmachinesBottlePlan(BottlePlan):
# None when bottle.supervise is False, matching the docker # None when bottle.supervise is False, matching the docker
# backend's convention. # backend's convention.
supervise_plan: SupervisePlan | None supervise_plan: SupervisePlan | None
# Agent-side endpoints. On Docker Desktop the docker bridge
# IPs aren't reachable from the smolvm guest (TSI uses macOS
# networking; docker container IPs live in the daemon's VM),
# so the agent dials the bundle via host loopback +
# docker-published random ports. Empty at prepare time;
# launch populates these after bundle bringup via
# `dataclasses.replace`. Format: a `host:port` for git-gate
# (insteadOf URL prefix) + full URLs for proxy / supervise.
agent_proxy_url: str = ""
agent_git_gate_host: str = ""
agent_supervise_url: str = ""
def print(self, *, remote_control: bool) -> None: def print(self, *, remote_control: bool) -> None:
"""Compact y/N preflight. Same shape as the Docker """Compact y/N preflight. Same shape as the Docker
@@ -89,7 +100,10 @@ class SmolmachinesBottlePlan(BottlePlan):
upstreams = [ upstreams = [
f"{g.Name}{g.Upstream}" for g in bottle.git f"{g.Name}{g.Upstream}" for g in bottle.git
] ]
routes = [r.host for r in bottle.egress.routes] # Use the resolved egress_plan (lowercase `host` on the
# plan-level EgressRoute) rather than `bottle.egress.routes`,
# which is the manifest's capitalized-attr form.
routes = [r.host for r in self.egress_plan.routes]
print(file=sys.stderr) print(file=sys.stderr)
info(f"agent : {spec.agent_name}") info(f"agent : {spec.agent_name}")
+147 -12
View File
@@ -21,6 +21,7 @@ from __future__ import annotations
import dataclasses import dataclasses
import os import os
import time
from contextlib import ExitStack, contextmanager from contextlib import ExitStack, contextmanager
from typing import Callable, Generator from typing import Callable, Generator
@@ -34,6 +35,7 @@ from ...util import expand_tilde
from ..docker.egress import ( from ..docker.egress import (
EGRESS_CA_IN_CONTAINER, EGRESS_CA_IN_CONTAINER,
EGRESS_PIPELOCK_CA_IN_CONTAINER, EGRESS_PIPELOCK_CA_IN_CONTAINER,
EGRESS_PORT as _EGRESS_PORT,
egress_tls_init, egress_tls_init,
) )
from ..docker.git_gate import ( from ..docker.git_gate import (
@@ -41,14 +43,28 @@ from ..docker.git_gate import (
GIT_GATE_CREDS_DIR_IN_CONTAINER, GIT_GATE_CREDS_DIR_IN_CONTAINER,
GIT_GATE_ENTRYPOINT_IN_CONTAINER, GIT_GATE_ENTRYPOINT_IN_CONTAINER,
GIT_GATE_HOOK_IN_CONTAINER, GIT_GATE_HOOK_IN_CONTAINER,
GIT_GATE_PORT as _GIT_GATE_PORT,
)
from ..docker.pipelock import (
BUNDLE_LOCAL_PIPELOCK_URL,
PIPELOCK_PORT as _PIPELOCK_PORT_STR,
pipelock_tls_init,
) )
from ..docker.pipelock import BUNDLE_LOCAL_PIPELOCK_URL, pipelock_tls_init
from . import sidecar_bundle as _bundle from . import sidecar_bundle as _bundle
from . import smolvm as _smolvm from . import smolvm as _smolvm
from .bottle import SmolmachinesBottle from .bottle import SmolmachinesBottle
from .bottle_plan import SmolmachinesBottlePlan from .bottle_plan import SmolmachinesBottlePlan
# Container-internal listening ports for each bundle daemon. The
# bundle publishes each one on a random host loopback port (see
# `_bundle.start_bundle`), and `_bundle.bundle_host_port` looks
# them up post-start. Pipelock's port is an env-overridable string
# in docker.pipelock; coerce to int here.
_PIPELOCK_PORT = int(_PIPELOCK_PORT_STR)
_SUPERVISE_PORT = SUPERVISE_PORT
@contextmanager @contextmanager
def launch( def launch(
plan: SmolmachinesBottlePlan, plan: SmolmachinesBottlePlan,
@@ -96,31 +112,129 @@ def launch(
) )
# 3. Build the BundleLaunchSpec from the (now-resolved) # 3. Build the BundleLaunchSpec from the (now-resolved)
# inner Plans: daemon subset, env, bind-mounts. # inner Plans: daemon subset, env, bind-mounts. The spec's
# ports_to_publish list expands depending on which daemons
# the agent needs to reach from the smolvm guest.
bundle_spec = _bundle_launch_spec(plan, network) bundle_spec = _bundle_launch_spec(plan, network)
token_env = _resolve_token_env(plan, os.environ) token_env = _resolve_token_env(plan, os.environ)
_bundle.start_bundle(bundle_spec, env={**os.environ, **token_env}) _bundle.start_bundle(bundle_spec, env={**os.environ, **token_env})
stack.callback(_bundle.stop_bundle, plan.slug) stack.callback(_bundle.stop_bundle, plan.slug)
# 4. smolvm VM. --from carries the pre-packed .smolmachine # 4. Discover the host-side ports docker assigned for the
# bundle's published container ports, and bind the
# agent's URLs to `127.0.0.1:<host port>`. Docker container
# IPs (192.168.x.x in the daemon's bridge) aren't
# reachable from the smolvm guest on macOS — TSI uses
# macOS networking, and macOS sees the daemon's bridge
# via the published-port loopback forward only.
#
# Proxy hop order matches the docker backend: when the
# bottle declares egress routes, the agent's first hop is
# egress (for token injection), then pipelock. Without
# routes, the agent dials pipelock directly. Whichever
# one is "agent-facing" is the daemon whose port we
# publish on host loopback; the other stays bundle-
# internal as the upstream proxy.
if plan.egress_plan.routes:
agent_facing_port = _EGRESS_PORT
else:
agent_facing_port = _PIPELOCK_PORT
agent_facing_host_port = _bundle.bundle_host_port(
plan.slug, agent_facing_port,
)
agent_proxy_url = f"http://127.0.0.1:{agent_facing_host_port}"
agent_git_gate_host = ""
if plan.git_gate_plan.upstreams:
git_gate_host_port = _bundle.bundle_host_port(
plan.slug, _GIT_GATE_PORT,
)
agent_git_gate_host = f"127.0.0.1:{git_gate_host_port}"
agent_supervise_url = ""
if plan.supervise_plan is not None:
supervise_host_port = _bundle.bundle_host_port(
plan.slug, _SUPERVISE_PORT,
)
agent_supervise_url = f"http://127.0.0.1:{supervise_host_port}/"
# Stamp the URLs onto the plan + guest_env. provision_git
# and provision_supervise read the plan fields; the agent
# reads guest_env on every exec_claude.
guest_env = {
**plan.guest_env,
"HTTPS_PROXY": agent_proxy_url,
"HTTP_PROXY": agent_proxy_url,
}
if agent_git_gate_host:
guest_env["GIT_GATE_URL"] = f"git://{agent_git_gate_host}"
if agent_supervise_url:
guest_env["MCP_SUPERVISE_URL"] = agent_supervise_url
plan = dataclasses.replace(
plan,
guest_env=guest_env,
agent_proxy_url=agent_proxy_url,
agent_git_gate_host=agent_git_gate_host,
agent_supervise_url=agent_supervise_url,
)
# 5. smolvm VM. --from carries the pre-packed .smolmachine
# artifact (built by prepare); --allow-cidr + -e carry the # artifact (built by prepare); --allow-cidr + -e carry the
# per-bottle TSI allowlist + env. Smolfile isn't usable # per-bottle TSI allowlist + env. The allowlist is
# here — smolvm 0.8.0 makes `--from` and `--smolfile` # `127.0.0.1/32` because every bundle daemon the agent
# mutually exclusive. # reaches is fronted by a host loopback port-forward.
# Smolfile isn't usable here — smolvm 0.8.0 makes `--from`
# and `--smolfile` mutually exclusive.
_smolvm.machine_create( _smolvm.machine_create(
plan.machine_name, plan.machine_name,
from_path=plan.agent_from_path, from_path=plan.agent_from_path,
allow_cidrs=[f"{plan.bundle_ip}/32"], allow_cidrs=["127.0.0.1/32"],
env=plan.guest_env, env=plan.guest_env,
) )
stack.callback(_smolvm.machine_delete, plan.machine_name) stack.callback(_smolvm.machine_delete, plan.machine_name)
_smolvm.machine_start(plan.machine_name) _smolvm.machine_start(plan.machine_name)
stack.callback(_smolvm.machine_stop, plan.machine_name) stack.callback(_smolvm.machine_stop, plan.machine_name)
# 5. Provision (CA / prompt / skills / git / supervise). # 6. Repair filesystem ownership + perms that smolvm's
# pack process remapped to the host invoker's uid (501
# on macOS) rather than preserving the image's expected
# ownership.
#
# - /home/node → node:node so the node user can write
# its own dotfiles (claude appendFileSync on
# ~/.claude.json otherwise bails with ENOENT/EPERM
# and the TUI hangs without surfacing the error).
# - /tmp + /var/tmp → root:root mode 1777 so non-root
# processes can create their per-uid scratch dirs
# (claude-code creates /tmp/claude-<uid>/ as soon as
# it spawns a Bash tool call).
#
# All folded into one sh -c so we only pay one
# machine_exec round trip — back-to-back exec calls
# right after machine_start hit a SIGKILL race in
# libkrun's exec channel (see provision_ca for the
# other half of this same workaround).
_smolvm.machine_exec(plan.machine_name, [
"sh", "-c",
"chown -R node:node /home/node && "
"chown root:root /tmp /var/tmp && "
"chmod 1777 /tmp /var/tmp",
])
# Wait briefly for the VM to settle. Back-to-back smolvm
# machine_exec calls immediately after machine_start
# occasionally SIGKILL the in-VM child at ~100ms (looks
# like a VM warm-up race in libkrun's exec channel).
# 1.5s is empirically enough to dodge it; provisioning
# already takes seconds so the wait is amortized.
time.sleep(1.5)
# 7. Provision (CA / prompt / skills / git / supervise).
prompt_path = provision(plan, plan.machine_name) prompt_path = provision(plan, plan.machine_name)
yield SmolmachinesBottle(plan.machine_name, prompt_path=prompt_path) yield SmolmachinesBottle(
plan.machine_name,
prompt_path=prompt_path,
guest_env=plan.guest_env,
)
finally: finally:
stack.close() stack.close()
@@ -144,9 +258,14 @@ def _bundle_launch_spec(
env: list[str] = [] env: list[str] = []
volumes: list[tuple[str, str, bool]] = [] volumes: list[tuple[str, str, bool]] = []
# PRD 0023 chunk 3: egress binds 127.0.0.1 inside the bundle # In this Docker-Desktop-compatible topology, whichever daemon
# so TSI's IP-only allowlist can't bypass pipelock. # is "agent-facing" gets its port published on the host
env.append("EGRESS_LISTEN_HOST=127.0.0.1") # loopback (see `_ensure_smolmachine`'s discovery loop) and the
# other stays bundle-internal. The bundle is NOT reachable by
# bridge IP from the smolvm guest, so the
# PRD-0023-chunk-3 EGRESS_LISTEN_HOST=127.0.0.1 mitigation
# isn't needed: the agent can only dial whatever daemon's
# host port we publish, period.
# --- pipelock --------------------------------------------- # --- pipelock ---------------------------------------------
pp = plan.proxy_plan pp = plan.proxy_plan
@@ -201,6 +320,21 @@ def _bundle_launch_spec(
] ]
volumes.append((str(sp.queue_dir), QUEUE_DIR_IN_CONTAINER, False)) volumes.append((str(sp.queue_dir), QUEUE_DIR_IN_CONTAINER, False))
# Container ports the agent reaches from the smolvm guest —
# published on host loopback so the guest can dial via TSI +
# macOS networking. The HTTP/HTTPS chokepoint is whichever
# daemon's port we publish: egress when routes are declared
# (token injection first, then forwards to bundle-internal
# pipelock), pipelock otherwise.
if ep.routes:
ports_to_publish: list[int] = [_EGRESS_PORT]
else:
ports_to_publish = [_PIPELOCK_PORT]
if gp.upstreams:
ports_to_publish.append(_GIT_GATE_PORT)
if sp is not None:
ports_to_publish.append(_SUPERVISE_PORT)
return _bundle.BundleLaunchSpec( return _bundle.BundleLaunchSpec(
slug=plan.slug, slug=plan.slug,
network_name=network, network_name=network,
@@ -210,6 +344,7 @@ def _bundle_launch_spec(
daemons_csv=",".join(daemons), daemons_csv=",".join(daemons),
environment=tuple(env), environment=tuple(env),
volumes=tuple(volumes), volumes=tuple(volumes),
ports_to_publish=tuple(ports_to_publish),
) )
@@ -1,17 +1,37 @@
"""Ephemeral local OCI registry for the smolmachines agent-image """Ephemeral local OCI registry for the smolmachines agent-image
conversion path (PRD 0023 chunk 4c). conversion path (PRD 0023 chunk 4c).
`smolvm pack create --image <ref>` only accepts registry refs it `smolvm pack create --image <ref>` only accepts OCI registry refs
can't read the local docker daemon's image cache, an OCI layout it can't read the local docker daemon's image cache, an OCI
directory, or a `docker save` tarball. To convert the agent's layout directory, or a `docker save` tarball. To convert the
Dockerfile-built image into a `.smolmachine` artifact we run a agent's Dockerfile-built image into a `.smolmachine` artifact we
short-lived `registry:2.8.3` container on `127.0.0.1:<random>`, spin up a short-lived `registry:2.8.3` container alongside a
push the locally-tagged image into it, and let smolvm pull from `crane` helper container on a private docker network, push via
there. The registry container is torn down as soon as the pack `crane push --insecure <tarball> <registry-container>:5000/...`,
completes. and let smolvm pull from the registry's published host port. The
network + both containers are torn down after the pack completes.
Loopback-only bind + the host's docker layer cache mean the round Why this two-container dance instead of plain `docker push`:
trip is fast (~5s) and there's no exposed surface on the LAN.""" - Docker Desktop's daemon runs in its own Linux VM, so its
`localhost` is not the host's loopback. A registry bound to
the host's 127.0.0.1 is unreachable from the daemon side.
- `host.docker.internal` is reachable from the daemon but isn't
in Docker's default insecure-registries CIDRs (only `::1/128`
and `127.0.0.0/8` are), so `docker push` to it tries HTTPS,
hits a plain-HTTP registry, and dies with
`http: server gave HTTP response to HTTPS client`. Adding
`host.docker.internal` to daemon.json works but is a one-time
manual step the user has to do in Docker Desktop's UI.
- Going through a docker network sidesteps the host-vs-daemon
loopback mismatch (crane and registry containers see each
other on the network) AND the HTTPS preference (crane has an
`--insecure` flag that forces plain HTTP).
The registry is also published on a random host port so smolvm
a host process can pull from `localhost:<port>` via Docker's
port-forward. smolvm's bundled crane auto-falls-back to HTTP for
localhost addresses, so no insecure-registries config is needed
on that side either."""
from __future__ import annotations from __future__ import annotations
@@ -21,6 +41,7 @@ import subprocess
import time import time
import uuid import uuid
from contextlib import contextmanager from contextlib import contextmanager
from dataclasses import dataclass
from typing import Iterator from typing import Iterator
from ...log import die from ...log import die
@@ -34,64 +55,150 @@ REGISTRY_IMAGE = os.environ.get(
) )
# gcr.io/go-containerregistry/crane:latest, pinned by digest. ~10MB,
# stable upstream from Google; we only invoke `crane push --insecure`
# against a localhost-equivalent registry, so the trust surface is
# narrow.
CRANE_IMAGE = os.environ.get(
"CLAUDE_BOTTLE_CRANE_IMAGE",
"gcr.io/go-containerregistry/crane@sha256:0ae17ecb34315aa7cbff28f6eddee3b7adae0b2f90101260d990804db1eb0084",
)
# Internal port the registry binds to inside its container — fixed
# by the registry:2 image. The host-side mapping is random.
_REGISTRY_CONTAINER_PORT = "5000"
# How long to wait for the registry's HTTP layer to bind before # How long to wait for the registry's HTTP layer to bind before
# giving up. Two seconds is empirically enough; bumping to 10s leaves # giving up. Two seconds is empirically enough; 10s leaves headroom
# headroom for slow CI runners without making the failure mode chatty. # for slow CI runners without making the failure mode chatty.
_READY_TIMEOUT_S = 10.0 _READY_TIMEOUT_S = 10.0
@dataclass(frozen=True)
class RegistryHandle:
"""Everything callers need to push to + pull from the ephemeral
registry.
`network` is the per-session docker network a `crane push`
container has to join it to reach the registry by name.
`push_endpoint` is the `<host>:<port>` form to embed in image
refs given to the crane push container (resolves via docker
network DNS). `pull_endpoint` is the `<host>:<port>` form a
host process (smolvm) uses; the registry's host port mapping
backs this."""
network: str
push_endpoint: str
pull_endpoint: str
@contextmanager @contextmanager
def ephemeral_registry() -> Iterator[int]: def ephemeral_registry() -> Iterator[RegistryHandle]:
"""Bring up a `registry:2.8.3` container on a random loopback """Bring up a per-session docker network + a `registry:2.8.3`
port, yield the port, force-remove the container on exit. container on it (published on a random host port), yield a
`RegistryHandle`, force-remove both on exit.
The container is started with `--rm` so a clean exit cleans up The container is started with `--rm` so a clean exit cleans up
on its own; the `finally` block force-removes on abnormal exit on its own; the `finally` block force-removes on abnormal exit
(the calling process crashes between yield and close).""" (the calling process crashes between yield and close)."""
name = f"claude-bottle-registry-{uuid.uuid4().hex[:12]}" session_id = uuid.uuid4().hex[:12]
network = f"claude-bottle-registry-net-{session_id}"
registry_name = f"claude-bottle-registry-{session_id}"
subprocess.run( subprocess.run(
[ ["docker", "network", "create", network],
"docker", "run", "-d", "--rm",
"--name", name,
# `127.0.0.1::5000` = bind to loopback, pick a random host
# port. No LAN exposure; the container hangs around just
# long enough for one push + one pack-create.
"-p", "127.0.0.1::5000",
REGISTRY_IMAGE,
],
check=True, check=True,
capture_output=True, capture_output=True,
) )
try: try:
port = _host_port(name) subprocess.run(
_wait_ready(port) [
yield port "docker", "run", "-d", "--rm",
"--name", registry_name,
"--network", network,
# `-p :5000` (no IP prefix) binds the container's
# port 5000 on a random host port across all
# interfaces. The host side reaches the registry
# via this port — smolvm's `pack create` pulls from
# `localhost:<port>` and the docker port-forward
# routes there.
"-p", _REGISTRY_CONTAINER_PORT,
REGISTRY_IMAGE,
],
check=True,
capture_output=True,
)
try:
port = _host_port(registry_name)
_wait_ready(port)
yield RegistryHandle(
network=network,
push_endpoint=f"{registry_name}:{_REGISTRY_CONTAINER_PORT}",
pull_endpoint=f"localhost:{port}",
)
finally:
subprocess.run(
["docker", "rm", "-f", registry_name],
check=False,
capture_output=True,
)
finally: finally:
subprocess.run( subprocess.run(
["docker", "rm", "-f", name], ["docker", "network", "rm", network],
check=False, check=False,
capture_output=True, capture_output=True,
) )
def _host_port(name: str) -> int: def crane_push_tarball(handle: RegistryHandle, tarball_path: str, ref: str) -> None:
"""Resolve the host-side port docker mapped to the registry's """Run `crane push --insecure <tarball> <ref>` inside a one-shot
container port 5000. `docker port <name> 5000/tcp` returns one or container on the registry's docker network. `ref` should
more `host:port` lines; the loopback-only -p binding ensures we reference the registry by `handle.push_endpoint` so the crane
get exactly `127.0.0.1:<port>`.""" container resolves it via docker network DNS.
Doesn't go through `docker push` to avoid the Docker-Desktop
daemon's HTTPS preference for non-loopback hostnames — crane's
`--insecure` flag forces plain HTTP, which is what the
registry container speaks."""
r = subprocess.run( r = subprocess.run(
["docker", "port", name, "5000/tcp"], [
"docker", "run", "--rm",
"--network", handle.network,
"-v", f"{tarball_path}:/img.tar:ro",
CRANE_IMAGE,
"push", "--insecure", "/img.tar", ref,
],
capture_output=True, capture_output=True,
text=True, text=True,
check=False, check=False,
) )
if r.returncode != 0: if r.returncode != 0:
die( die(
f"docker port {name} 5000/tcp failed: " f"crane push of {tarball_path!r} to {ref!r} failed: "
f"{(r.stderr or r.stdout or '').strip() or '<no output>'}"
)
def _host_port(name: str) -> int:
"""Resolve the host-side port docker mapped to the registry's
container port. `docker port <name> 5000/tcp` returns one or
more `host:port` lines (one per address family) we take the
first."""
r = subprocess.run(
["docker", "port", name, f"{_REGISTRY_CONTAINER_PORT}/tcp"],
capture_output=True,
text=True,
check=False,
)
if r.returncode != 0:
die(
f"docker port {name} {_REGISTRY_CONTAINER_PORT}/tcp failed: "
f"{(r.stderr or '').strip() or '<no stderr>'}" f"{(r.stderr or '').strip() or '<no stderr>'}"
) )
# `127.0.0.1:54321\n` — split on the last colon to handle the # `0.0.0.0:54321\n[::]:54321\n` — split on the last colon to
# `host:port` shape without parsing IP literals. # handle either IPv4 or IPv6 host syntax.
line = (r.stdout or "").splitlines()[0].strip() line = (r.stdout or "").splitlines()[0].strip()
_, _, port_str = line.rpartition(":") _, _, port_str = line.rpartition(":")
try: try:
@@ -102,12 +209,15 @@ def _host_port(name: str) -> int:
def _wait_ready(port: int) -> None: def _wait_ready(port: int) -> None:
"""Block until the registry's HTTP layer accepts a TCP connection """Block until the registry's HTTP layer accepts a TCP
on `127.0.0.1:<port>`, or `_READY_TIMEOUT_S` elapses. connection on `127.0.0.1:<port>`, or `_READY_TIMEOUT_S`
elapses.
A successful TCP connect is sufficient registry:2.8.3 binds A successful TCP connect is sufficient registry:2.8.3 binds
after it's ready to serve `/v2/` requests, so the push that after it's ready to serve `/v2/` requests, so the push that
follows will land on a working server.""" follows will land on a working server. We probe loopback
specifically (not via the docker network) because this helper
runs on the host."""
deadline = time.monotonic() + _READY_TIMEOUT_S deadline = time.monotonic() + _READY_TIMEOUT_S
last_err: Exception | None = None last_err: Exception | None = None
while time.monotonic() < deadline: while time.monotonic() < deadline:
+53 -26
View File
@@ -34,7 +34,7 @@ from ...pipelock import PipelockProxy
from ...supervise import Supervise from ...supervise import Supervise
from . import smolvm as _smolvm from . import smolvm as _smolvm
from .bottle_plan import SmolmachinesBottlePlan from .bottle_plan import SmolmachinesBottlePlan
from .local_registry import ephemeral_registry from .local_registry import crane_push_tarball, ephemeral_registry
from .util import smolmachines_bundle_subnet, smolmachines_preflight from .util import smolmachines_bundle_subnet, smolmachines_preflight
@@ -89,22 +89,23 @@ def resolve_plan(
subnet, gateway, bundle_ip = smolmachines_bundle_subnet(slug) subnet, gateway, bundle_ip = smolmachines_bundle_subnet(slug)
# Agent's env. IP literals; no DNS resolution inside the guest # Agent's env: the prepare-time view doesn't yet know the
# (TSI allowlist contains only `<bundle_ip>/32` — no resolver). # host loopback ports the bundle's daemons get published on
# (those come from docker AFTER `docker run` returns), so
# HTTPS_PROXY / GIT_GATE_URL / MCP_SUPERVISE_URL are
# populated in launch.py and stamped onto guest_env there.
# What we set here is the part that doesn't depend on
# bundle bringup — bottle.env literals, the empty-NO_PROXY
# safe default, and the TLS trust env trio
# (NODE_EXTRA_CA_CERTS / SSL_CERT_FILE / REQUESTS_CA_BUNDLE)
# pointing at Debian's update-ca-certificates output bundle.
guest_env: dict[str, str] = { guest_env: dict[str, str] = {
**bottle.env, **bottle.env,
"HTTPS_PROXY": f"http://{bundle_ip}:{_BUNDLE_PIPELOCK_PORT}",
"HTTP_PROXY": f"http://{bundle_ip}:{_BUNDLE_PIPELOCK_PORT}",
"NO_PROXY": "localhost,127.0.0.1", "NO_PROXY": "localhost,127.0.0.1",
"NODE_EXTRA_CA_CERTS": "/etc/ssl/certs/ca-certificates.crt",
"SSL_CERT_FILE": "/etc/ssl/certs/ca-certificates.crt",
"REQUESTS_CA_BUNDLE": "/etc/ssl/certs/ca-certificates.crt",
} }
if bottle.git:
guest_env["GIT_GATE_URL"] = (
f"git://{bundle_ip}:{_BUNDLE_GIT_GATE_PORT}"
)
if bottle.supervise:
guest_env["MCP_SUPERVISE_URL"] = (
f"http://{bundle_ip}:{_BUNDLE_SUPERVISE_PORT}"
)
# Inner Plans for the four bundle daemons. The ABCs are # Inner Plans for the four bundle daemons. The ABCs are
# platform-neutral — `.prepare()` writes config files + returns # platform-neutral — `.prepare()` writes config files + returns
@@ -124,6 +125,19 @@ def resolve_plan(
egress_dir.mkdir(parents=True, exist_ok=True) egress_dir.mkdir(parents=True, exist_ok=True)
egress_plan = Egress().prepare(bottle, slug, egress_dir) egress_plan = Egress().prepare(bottle, slug, egress_dir)
# Claude-code refuses to start without *something* it
# recognises as a credential. When the bottle has an egress
# route carrying the `claude_code_oauth` role marker, egress
# strips + re-injects the real Authorization header on the
# outbound leg using a token held in egress's own environ — so
# the agent gets a non-secret placeholder here (matches the
# docker backend's forwarded_env logic in
# claude_bottle/backend/docker/prepare.py).
if any("claude_code_oauth" in r.roles for r in egress_plan.routes):
guest_env["CLAUDE_CODE_OAUTH_TOKEN"] = "egress-placeholder"
guest_env.setdefault("CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC", "1")
guest_env.setdefault("DISABLE_ERROR_REPORTING", "1")
supervise_plan = None supervise_plan = None
if bottle.supervise: if bottle.supervise:
supervise_dir = supervise_state_dir(slug) supervise_dir = supervise_state_dir(slug)
@@ -184,14 +198,19 @@ def _ensure_smolmachine(image_ref: str) -> Path:
a launcher binary at `.smolmachine` plus the sidecar alongside a launcher binary at `.smolmachine` plus the sidecar alongside
it; the sidecar is the actual artifact). it; the sidecar is the actual artifact).
Conversion path: `docker build` (the existing layer cache makes Conversion path: `docker build` (the existing layer cache
no-change rebuilds cheap) `docker tag` with a makes no-change rebuilds cheap) `docker save` to a tarball
`localhost:<port>/...` ref bring up the ephemeral registry spin up an ephemeral registry on a private docker network
container `docker push` into it `smolvm pack create --image `crane push --insecure` from a one-shot container on the same
<localhost ref>` tear down the registry. Each pack-create network `smolvm pack create --image localhost:<host port>/...`
costs several seconds even on a hot cache, so we skip the whole tear down the registry + network. The crane push detour
pipeline when the cached sidecar is already on disk for this sidesteps the Docker-Desktop daemon's HTTPS preference for
image ID.""" non-loopback registries see the `local_registry` module
docstring for the gory details.
Each pack-create costs several seconds even on a hot cache,
so we skip the whole pipeline when the cached sidecar is
already on disk for this image ID."""
_SMOLMACHINE_CACHE_DIR.mkdir(parents=True, exist_ok=True) _SMOLMACHINE_CACHE_DIR.mkdir(parents=True, exist_ok=True)
docker_mod.build_image(image_ref, _REPO_DIR) docker_mod.build_image(image_ref, _REPO_DIR)
# `sha256:abcd...` -> `abcd...` first 16 chars: short enough to # `sha256:abcd...` -> `abcd...` first 16 chars: short enough to
@@ -202,9 +221,17 @@ def _ensure_smolmachine(image_ref: str) -> Path:
sidecar = _SMOLMACHINE_CACHE_DIR / f"{digest}.smolmachine.smolmachine" sidecar = _SMOLMACHINE_CACHE_DIR / f"{digest}.smolmachine.smolmachine"
if sidecar.is_file(): if sidecar.is_file():
return sidecar return sidecar
with ephemeral_registry() as port: tarball = _SMOLMACHINE_CACHE_DIR / f"{digest}.image.tar"
local_ref = f"localhost:{port}/claude-bottle:{digest}" docker_mod.save(image_ref, str(tarball))
docker_mod.tag(image_ref, local_ref) try:
docker_mod.push(local_ref) with ephemeral_registry() as handle:
_smolvm.pack_create(local_ref, binary) push_ref = f"{handle.push_endpoint}/claude-bottle:{digest}"
pack_ref = f"{handle.pull_endpoint}/claude-bottle:{digest}"
crane_push_tarball(handle, str(tarball), push_ref)
_smolvm.pack_create(pack_ref, binary)
finally:
# Tarball is ~500MB-1GB for the agent image; reclaim once
# the smolmachine artifact exists. The artifact itself is
# the long-lived cache entry.
tarball.unlink(missing_ok=True)
return sidecar return sidecar
@@ -66,8 +66,29 @@ def provision_ca(plan: SmolmachinesBottlePlan, target: str) -> None:
# default. The env trio (NODE_EXTRA_CA_CERTS / SSL_CERT_FILE / # default. The env trio (NODE_EXTRA_CA_CERTS / SSL_CERT_FILE /
# REQUESTS_CA_BUNDLE) on the guest_env covers Node + Python # REQUESTS_CA_BUNDLE) on the guest_env covers Node + Python
# `requests` / libraries that don't load the system bundle. # `requests` / libraries that don't load the system bundle.
_smolvm.machine_exec(target, ["chmod", "644", AGENT_CA_PATH]) #
_smolvm.machine_exec(target, ["update-ca-certificates"]) # chown + chmod + update-ca-certificates run in one
# `sh -c` so we only pay one machine_exec round trip; the
# `&&` chaining surfaces the first failure as the return
# code.
r = _smolvm.machine_exec(target, [
"sh", "-c",
f"chown root:root {AGENT_CA_PATH} && "
f"chmod 644 {AGENT_CA_PATH} && "
f"update-ca-certificates",
])
if r.returncode != 0 or "1 added" not in (r.stdout or ""):
# update-ca-certificates not adding our cert is fatal —
# claude-code's TLS handshake against the egress-MITM'd
# api.anthropic.com would fail downstream. Bail early
# with what we can see (output is captured by smolvm so
# we can surface it).
die(
f"update-ca-certificates didn't add the agent CA "
f"(exit {r.returncode}): "
f"stdout={(r.stdout or '').strip()!r} "
f"stderr={(r.stderr or '').strip()!r}"
)
# Stdlib SHA-256 of the cert's DER bytes — the standard # Stdlib SHA-256 of the cert's DER bytes — the standard
# fingerprint form. Never the private key. # fingerprint form. Never the private key.
@@ -28,7 +28,6 @@ from pathlib import Path
from ....git_gate import git_gate_render_gitconfig from ....git_gate import git_gate_render_gitconfig
from ....log import info from ....log import info
from ...docker.git_gate import GIT_GATE_PORT
from .. import smolvm as _smolvm from .. import smolvm as _smolvm
from ..bottle_plan import SmolmachinesBottlePlan from ..bottle_plan import SmolmachinesBottlePlan
@@ -79,10 +78,12 @@ def _provision_git_gate_config(plan: SmolmachinesBottlePlan, target: str) -> Non
if not bottle.git: if not bottle.git:
return return
# IP-literal form: the TSI allowlist passes <bundle_ip>/32 and # `127.0.0.1:<host port>` form: the bundle's git-gate port
# nothing else, so the agent has to dial the gate by IP+port. # is published on host loopback at launch time so the
gate_host = f"{plan.bundle_ip}:{GIT_GATE_PORT}" # smolvm guest (which can only reach macOS networking via
content = git_gate_render_gitconfig(bottle.git, gate_host) # TSI, not the docker bridge IP) can dial it. launch.py
# populates `plan.agent_git_gate_host` after bundle bringup.
content = git_gate_render_gitconfig(bottle.git, plan.agent_git_gate_host)
guest_gitconfig = f"{_guest_home()}/.gitconfig" guest_gitconfig = f"{_guest_home()}/.gitconfig"
# Stage the file under the plan's stage_dir so `machine cp` # Stage the file under the plan's stage_dir so `machine cp`
@@ -14,7 +14,6 @@ short `supervise` alias (no DNS in the TSI-allowlisted guest)."""
from __future__ import annotations from __future__ import annotations
from ....log import info, warn from ....log import info, warn
from ....supervise import SUPERVISE_PORT
from .. import smolvm as _smolvm from .. import smolvm as _smolvm
from ..bottle_plan import SmolmachinesBottlePlan from ..bottle_plan import SmolmachinesBottlePlan
@@ -22,25 +21,33 @@ from ..bottle_plan import SmolmachinesBottlePlan
_SUPERVISE_MCP_NAME = "supervise" _SUPERVISE_MCP_NAME = "supervise"
def supervise_mcp_url(bundle_ip: str) -> str:
return f"http://{bundle_ip}:{SUPERVISE_PORT}/"
def provision_supervise(plan: SmolmachinesBottlePlan, target: str) -> None: def provision_supervise(plan: SmolmachinesBottlePlan, target: str) -> None:
"""Run `claude mcp add` inside the guest to register the """Run `claude mcp add` inside the guest to register the
supervise sidecar in claude-code's user config. No-op when supervise sidecar in claude-code's user config. No-op when
bottle.supervise is False. bottle.supervise is False.
The URL is the agent-side endpoint launch.py populated after
bundle bringup `http://127.0.0.1:<host port>/` rather than
the bundle's docker bridge IP, because that bridge isn't
reachable from the smolvm guest on macOS.
Failure is logged but not fatal: the bottle still works (you Failure is logged but not fatal: the bottle still works (you
just can't call supervise tools from the agent until the entry just can't call supervise tools from the agent until the entry
is added manually). The operator sees the warning at launch.""" is added manually). The operator sees the warning at launch."""
if plan.supervise_plan is None: if plan.supervise_plan is None:
return return
url = supervise_mcp_url(plan.bundle_ip) url = plan.agent_supervise_url
info(f"registering supervise MCP server in agent claude config → {url}") info(f"registering supervise MCP server in agent claude config → {url}")
# `claude mcp add --scope user` writes to ~/.claude.json. The
# agent is the `node` user; smolvm machine_exec runs as root
# by default, so we have to switch user explicitly and set
# HOME so the config lands in /home/node/.claude.json (where
# the agent's claude actually reads it from).
r = _smolvm.machine_exec( r = _smolvm.machine_exec(
target, target,
[ [
"runuser", "-u", "node", "--",
"env", "HOME=/home/node",
"claude", "mcp", "add", "claude", "mcp", "add",
"--scope", "user", "--scope", "user",
"--transport", "http", "--transport", "http",
@@ -57,4 +64,4 @@ def provision_supervise(plan: SmolmachinesBottlePlan, target: str) -> None:
) )
__all__ = ["provision_supervise", "supervise_mcp_url"] __all__ = ["provision_supervise"]
@@ -70,6 +70,13 @@ class BundleLaunchSpec:
environment: Sequence[str] = field(default_factory=tuple) environment: Sequence[str] = field(default_factory=tuple)
# (host_path, container_path, read_only) bind mounts. # (host_path, container_path, read_only) bind mounts.
volumes: Sequence[tuple[str, str, bool]] = field(default_factory=tuple) volumes: Sequence[tuple[str, str, bool]] = field(default_factory=tuple)
# Container ports to publish on the host's 127.0.0.1, random
# host-side port per entry. The smolvm guest's TSI talks via
# macOS networking, so docker container IPs (192.168.x.x in
# the daemon's bridge) aren't directly reachable from the
# guest — host-loopback port-forwards are. Egress's port
# is bundle-internal and never published.
ports_to_publish: Sequence[int] = field(default_factory=tuple)
def create_bundle_network(network_name: str, subnet: str, gateway: str) -> None: def create_bundle_network(network_name: str, subnet: str, gateway: str) -> None:
@@ -135,6 +142,11 @@ def start_bundle(spec: BundleLaunchSpec, *,
for host_path, container_path, read_only in spec.volumes: for host_path, container_path, read_only in spec.volumes:
suffix = ":ro" if read_only else "" suffix = ":ro" if read_only else ""
argv += ["-v", f"{host_path}:{container_path}{suffix}"] argv += ["-v", f"{host_path}:{container_path}{suffix}"]
# Loopback-only host port-forwards — the smolvm guest's TSI
# uses macOS networking, and macOS loopback is the only host
# surface that round-trips into Docker Desktop's daemon VM.
for port in spec.ports_to_publish:
argv += ["-p", f"127.0.0.1::{port}"]
argv.append(spec.image) argv.append(spec.image)
result = subprocess.run( result = subprocess.run(
argv, capture_output=True, text=True, argv, capture_output=True, text=True,
@@ -147,6 +159,33 @@ def start_bundle(spec: BundleLaunchSpec, *,
) )
def bundle_host_port(slug: str, container_port: int) -> int:
"""`docker port <bundle> <container_port>/tcp` → the random
host-side port docker assigned. Called after `start_bundle`
on each container port listed in `BundleLaunchSpec
.ports_to_publish` so the launch step can build the agent's
HTTPS_PROXY / GIT_GATE / SUPERVISE URLs in
`127.0.0.1:<host port>` form."""
container = bundle_container_name(slug)
result = subprocess.run(
["docker", "port", container, f"{container_port}/tcp"],
capture_output=True, text=True, check=False,
)
if result.returncode != 0:
die(
f"docker port {container} {container_port}/tcp failed: "
f"{(result.stderr or '').strip() or '<no stderr>'}"
)
# `127.0.0.1:54321\n` — rpartition on last colon gives the port.
line = (result.stdout or "").splitlines()[0].strip()
_, _, port_str = line.rpartition(":")
try:
return int(port_str)
except ValueError:
die(f"unexpected `docker port` output: {line!r}")
return -1 # unreachable; die() never returns
def stop_bundle(slug: str) -> None: def stop_bundle(slug: str) -> None:
"""Idempotent: a missing container returns success.""" """Idempotent: a missing container returns success."""
container = bundle_container_name(slug) container = bundle_container_name(slug)
+10 -1
View File
@@ -117,12 +117,21 @@ def machine_create(
Smolfile because `--from` and `--smolfile` are themselves Smolfile because `--from` and `--smolfile` are themselves
mutually exclusive in smolvm 0.8.0 and we want `--from`'s mutually exclusive in smolvm 0.8.0 and we want `--from`'s
no-pull-at-start property. The flag form gives the same no-pull-at-start property. The flag form gives the same
result without the Smolfile complication.""" result without the Smolfile complication.
`--net` is sent explicitly when `allow_cidrs` is non-empty.
smolvm 0.8.0's docs say `--allow-cidr` implies `--net`, but
empirically the implication only fires when no `--from` is
set `--from PATH --allow-cidr X/32` silently produces a
machine with `network: false` and no routes in the guest, so
the agent can't reach the bundle's pinned IP."""
args: list[str] = ["machine", "create"] args: list[str] = ["machine", "create"]
if image is not None: if image is not None:
args += ["--image", image] args += ["--image", image]
if from_path is not None: if from_path is not None:
args += ["--from", str(from_path)] args += ["--from", str(from_path)]
if allow_cidrs:
args.append("--net")
for cidr in allow_cidrs: for cidr in allow_cidrs:
args += ["--allow-cidr", cidr] args += ["--allow-cidr", cidr]
if env: if env:
+16
View File
@@ -600,6 +600,22 @@ PRD 0024's bundle image is a prerequisite — this PRD assumes
the plan is to filter on a deterministic name prefix the plan is to filter on a deterministic name prefix
`claude-bottle-<slug>` + cross-reference with on-disk metadata `claude-bottle-<slug>` + cross-reference with on-disk metadata
under `state/<slug>/`. under `state/<slug>/`.
8. **Loopback scoping (Docker Desktop pivot).** The original
design pinned the bundle at a docker bridge IP and set TSI's
allowlist to `<bundle-ip>/32`. On Docker Desktop / macOS the
daemon runs inside its own Linux VM, so bridge IPs aren't
reachable from macOS networking — TSI's syscall impersonation
can't reach them. Resolution: publish each agent-facing bundle
port on host loopback (`-p 127.0.0.1::<port>`) and set TSI to
`127.0.0.1/32`. **This widens the TSI allowlist to anything
bound to macOS's loopback** — postgres, dev servers, other
bottles' published ports, mDNSResponder, etc. The agent can't
reach them by intent, but TSI can't filter by port. Follow-up
to scope back: bind each bottle's bundle ports on a per-bottle
loopback alias (e.g. `127.0.0.2` for bottle A, `127.0.0.3` for
B) added via `ifconfig lo0 alias`, set TSI to that single /32.
Needs sudo for alias setup; a small daemon-or-script we ship
alongside the launcher could handle it.
## References ## References
+4 -12
View File
@@ -54,26 +54,18 @@ class TestImageId(unittest.TestCase):
self.assertIn("missing:tag", die.call_args.args[0]) self.assertIn("missing:tag", die.call_args.args[0])
class TestTagPush(unittest.TestCase): class TestSave(unittest.TestCase):
def test_tag_runs_docker_tag(self): def test_save_runs_docker_save(self):
with patch.object( with patch.object(
docker_mod.subprocess, "run", return_value=_ok(), docker_mod.subprocess, "run", return_value=_ok(),
) as run: ) as run:
docker_mod.tag("claude-bottle:latest", "localhost:5000/cb:abc") docker_mod.save("claude-bottle:latest", "/tmp/img.tar")
argv = run.call_args.args[0] argv = run.call_args.args[0]
self.assertEqual( self.assertEqual(
["docker", "tag", "claude-bottle:latest", "localhost:5000/cb:abc"], ["docker", "save", "claude-bottle:latest", "-o", "/tmp/img.tar"],
argv, argv,
) )
def test_push_runs_docker_push(self):
with patch.object(
docker_mod.subprocess, "run", return_value=_ok(),
) as run:
docker_mod.push("localhost:5000/cb:abc")
argv = run.call_args.args[0]
self.assertEqual(["docker", "push", "localhost:5000/cb:abc"], argv)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
+159 -50
View File
@@ -1,15 +1,16 @@
"""Unit: ephemeral local-registry helper (PRD 0023 chunk 4c). """Unit: ephemeral local-registry helper (PRD 0023 chunk 4c).
The helper brings up a `registry:2.8.3` container on a random The helper brings up a `registry:2.8.3` container on a private
loopback port, yields the port, and tears the container down on docker network with a random host-side port, yields a
exit. Tests mock `subprocess.run` + `socket.create_connection` so `RegistryHandle`, and tears the container + network down on exit.
they run without docker.""" Tests mock `subprocess.run` + `socket.create_connection` so they
run without docker."""
from __future__ import annotations from __future__ import annotations
import subprocess import subprocess
import unittest import unittest
from unittest.mock import call, patch from unittest.mock import patch
from claude_bottle.backend.smolmachines import local_registry from claude_bottle.backend.smolmachines import local_registry
@@ -20,38 +21,81 @@ def _ok(stdout: str = "", stderr: str = "") -> subprocess.CompletedProcess:
) )
def _fail(stderr: str = "boom") -> subprocess.CompletedProcess:
return subprocess.CompletedProcess(
args=[], returncode=1, stdout="", stderr=stderr,
)
# Run sequence per ephemeral_registry() call:
# docker network create -> ok
# docker run -d (registry) -> ok (container id)
# docker port (host port) -> ok (mapping line)
# docker rm -f (registry) -> ok (in finally)
# docker network rm -> ok (in finally)
def _stock_run_sequence(port_line: str = "0.0.0.0:54321\n"):
return [
_ok(), # docker network create
_ok(stdout="<container-id>\n"), # docker run
_ok(stdout=port_line), # docker port
_ok(), # docker rm -f
_ok(), # docker network rm
]
class TestEphemeralRegistry(unittest.TestCase): class TestEphemeralRegistry(unittest.TestCase):
def test_yields_host_port_parsed_from_docker_port(self): def test_yields_handle_with_network_and_endpoints(self):
# docker run + docker port + docker rm in that order; the
# port command returns `127.0.0.1:54321` for the loopback
# binding.
with patch.object( with patch.object(
local_registry.subprocess, "run", local_registry.subprocess, "run",
side_effect=[ side_effect=_stock_run_sequence(),
_ok(stdout="<container-id>\n"),
_ok(stdout="127.0.0.1:54321\n"),
_ok(),
],
) as run, patch.object( ) as run, patch.object(
local_registry.socket, "create_connection", local_registry.socket, "create_connection",
return_value=_FakeSocket(), return_value=_FakeSocket(),
): ):
with local_registry.ephemeral_registry() as port: with local_registry.ephemeral_registry() as handle:
self.assertEqual(54321, port) # push_endpoint points at the registry container by
# its docker-network name on its container port.
self.assertTrue(
handle.push_endpoint.startswith(
"claude-bottle-registry-"
)
)
self.assertTrue(handle.push_endpoint.endswith(":5000"))
# pull_endpoint is the host-side mapping for smolvm.
self.assertEqual("localhost:54321", handle.pull_endpoint)
# network name is the per-session bridge crane joins.
self.assertTrue(
handle.network.startswith("claude-bottle-registry-net-")
)
# docker network create + docker run + docker port + rm -f + network rm
self.assertEqual(5, run.call_count)
# docker run, docker port, docker rm -f def test_registry_run_publishes_random_port_across_interfaces(self):
self.assertEqual(3, run.call_count)
run_argv = run.call_args_list[0].args[0]
self.assertEqual(["docker", "run"], run_argv[:2])
self.assertIn("--rm", run_argv)
# Loopback-only port binding so the registry isn't exposed
# on the LAN even briefly.
self.assertIn("127.0.0.1::5000", run_argv)
def test_force_removes_container_on_clean_exit(self):
with patch.object( with patch.object(
local_registry.subprocess, "run", local_registry.subprocess, "run",
side_effect=[_ok(stdout="cid\n"), _ok(stdout="127.0.0.1:1234\n"), _ok()], side_effect=_stock_run_sequence(),
) as run, patch.object(
local_registry.socket, "create_connection",
return_value=_FakeSocket(),
):
with local_registry.ephemeral_registry():
pass
# second call is the docker run for the registry
run_argv = run.call_args_list[1].args[0]
self.assertEqual(["docker", "run"], run_argv[:2])
self.assertIn("--rm", run_argv)
# `-p 5000` (no IP prefix) — needed so the host-published
# port is reachable from BOTH the host (for smolvm) and the
# docker daemon (for the docker port command to find it).
self.assertIn("5000", run_argv)
# And the registry is attached to the same per-session
# network the crane push container joins.
self.assertIn("--network", run_argv)
def test_force_removes_container_and_network_on_clean_exit(self):
with patch.object(
local_registry.subprocess, "run",
side_effect=_stock_run_sequence(),
) as run, patch.object( ) as run, patch.object(
local_registry.socket, "create_connection", local_registry.socket, "create_connection",
return_value=_FakeSocket(), return_value=_FakeSocket(),
@@ -59,14 +103,16 @@ class TestEphemeralRegistry(unittest.TestCase):
with local_registry.ephemeral_registry(): with local_registry.ephemeral_registry():
pass pass
# Last call is `docker rm -f <name>`. # Last two calls are `docker rm -f <container>` then
last_argv = run.call_args_list[-1].args[0] # `docker network rm <network>`.
self.assertEqual(["docker", "rm", "-f"], last_argv[:3]) argvs = [c.args[0] for c in run.call_args_list]
self.assertEqual(["docker", "rm", "-f"], argvs[-2][:3])
self.assertEqual(["docker", "network", "rm"], argvs[-1][:3])
def test_force_removes_container_on_exception_inside_with(self): def test_force_removes_on_exception_inside_with(self):
with patch.object( with patch.object(
local_registry.subprocess, "run", local_registry.subprocess, "run",
side_effect=[_ok(stdout="cid\n"), _ok(stdout="127.0.0.1:1234\n"), _ok()], side_effect=_stock_run_sequence(),
) as run, patch.object( ) as run, patch.object(
local_registry.socket, "create_connection", local_registry.socket, "create_connection",
return_value=_FakeSocket(), return_value=_FakeSocket(),
@@ -75,15 +121,15 @@ class TestEphemeralRegistry(unittest.TestCase):
with local_registry.ephemeral_registry(): with local_registry.ephemeral_registry():
raise RuntimeError("inside with") raise RuntimeError("inside with")
# rm -f still ran on exception. # Both teardowns still ran.
last_argv = run.call_args_list[-1].args[0] argvs = [c.args[0] for c in run.call_args_list]
self.assertEqual(["docker", "rm", "-f"], last_argv[:3]) self.assertEqual(["docker", "rm", "-f"], argvs[-2][:3])
self.assertEqual(["docker", "network", "rm"], argvs[-1][:3])
def test_wait_ready_times_out_when_socket_never_connects(self): def test_wait_ready_times_out(self):
# Drop the timeout to a value that fits the test budget.
with patch.object(local_registry, "_READY_TIMEOUT_S", 0.1), patch.object( with patch.object(local_registry, "_READY_TIMEOUT_S", 0.1), patch.object(
local_registry.subprocess, "run", local_registry.subprocess, "run",
side_effect=[_ok(stdout="cid\n"), _ok(stdout="127.0.0.1:1234\n"), _ok()], side_effect=_stock_run_sequence(),
) as run, patch.object( ) as run, patch.object(
local_registry.socket, "create_connection", local_registry.socket, "create_connection",
side_effect=OSError("conn refused"), side_effect=OSError("conn refused"),
@@ -95,18 +141,27 @@ class TestEphemeralRegistry(unittest.TestCase):
with local_registry.ephemeral_registry(): with local_registry.ephemeral_registry():
self.fail("yield reached despite unreachable registry") self.fail("yield reached despite unreachable registry")
die.assert_called_once() die.assert_called_once()
# rm -f still ran (cleanup goes through the finally block). # Teardown still ran via the finally blocks.
last_argv = run.call_args_list[-1].args[0] argvs = [c.args[0] for c in run.call_args_list]
self.assertEqual(["docker", "rm", "-f"], last_argv[:3]) self.assertEqual(["docker", "rm", "-f"], argvs[-2][:3])
self.assertEqual(["docker", "network", "rm"], argvs[-1][:3])
def test_unique_container_name_per_call(self): def test_unique_session_ids_per_call(self):
names: list[str] = [] sessions: list[tuple[str, str]] = []
def capture(argv, *a, **kw): def capture(argv, *a, **kw):
if argv[:3] == ["docker", "network", "create"]:
return _ok()
if argv[:2] == ["docker", "run"]: if argv[:2] == ["docker", "run"]:
names.append(argv[argv.index("--name") + 1]) # `--name <registry-name>` and `--network <net-name>`
return _ok(stdout="cid\n" if argv[:2] == ["docker", "run"] # both encode the session id.
else "127.0.0.1:1\n") name = argv[argv.index("--name") + 1]
network = argv[argv.index("--network") + 1]
sessions.append((name, network))
return _ok(stdout="cid\n")
if argv[:2] == ["docker", "port"]:
return _ok(stdout="0.0.0.0:1\n")
return _ok()
with patch.object( with patch.object(
local_registry.subprocess, "run", side_effect=capture, local_registry.subprocess, "run", side_effect=capture,
@@ -119,10 +174,64 @@ class TestEphemeralRegistry(unittest.TestCase):
with local_registry.ephemeral_registry(): with local_registry.ephemeral_registry():
pass pass
self.assertEqual(2, len(names)) self.assertEqual(2, len(sessions))
self.assertNotEqual(names[0], names[1]) self.assertNotEqual(sessions[0], sessions[1])
for n in names:
self.assertTrue(n.startswith("claude-bottle-registry-"))
class TestCranePushTarball(unittest.TestCase):
def test_runs_crane_container_on_registry_network_with_insecure_flag(self):
handle = local_registry.RegistryHandle(
network="cb-registry-net-x",
push_endpoint="cb-registry-x:5000",
pull_endpoint="localhost:54321",
)
with patch.object(
local_registry.subprocess, "run", return_value=_ok(),
) as run:
local_registry.crane_push_tarball(
handle, "/tmp/img.tar", "cb-registry-x:5000/cb:abc",
)
argv = run.call_args.args[0]
# Joined to the same docker network so it can reach the
# registry by container name (no host port-forward needed
# for the push leg).
self.assertEqual("docker", argv[0])
self.assertEqual("run", argv[1])
self.assertIn("--rm", argv)
self.assertIn("--network", argv)
self.assertEqual(
"cb-registry-net-x", argv[argv.index("--network") + 1],
)
# The tarball is mounted read-only at /img.tar.
self.assertIn("-v", argv)
self.assertIn("/tmp/img.tar:/img.tar:ro", argv)
# And the crane command itself uses --insecure so plain
# HTTP is allowed against the registry container.
self.assertIn("push", argv)
self.assertIn("--insecure", argv)
self.assertIn("/img.tar", argv)
self.assertIn("cb-registry-x:5000/cb:abc", argv)
def test_dies_when_crane_returns_non_zero(self):
handle = local_registry.RegistryHandle(
network="cb-net", push_endpoint="cb:5000", pull_endpoint="localhost:1",
)
with patch.object(
local_registry.subprocess, "run",
return_value=_fail("push failed"),
), patch.object(
local_registry, "die", side_effect=SystemExit("die"),
) as die:
with self.assertRaises(SystemExit):
local_registry.crane_push_tarball(
handle, "/tmp/img.tar", "cb:5000/cb:abc",
)
die.assert_called_once()
# Error message names what was being pushed where.
msg = die.call_args.args[0]
self.assertIn("/tmp/img.tar", msg)
self.assertIn("cb:5000/cb:abc", msg)
class _FakeSocket: class _FakeSocket:
+46 -27
View File
@@ -40,32 +40,43 @@ class TestEnsureSmolmachine(unittest.TestCase):
_prepare.docker_mod, "image_id", _prepare.docker_mod, "image_id",
return_value=f"sha256:{digest}fffffffffffffffff", return_value=f"sha256:{digest}fffffffffffffffff",
), patch.object( ), patch.object(
_prepare.docker_mod, "save",
) as save, patch.object(
_prepare, "ephemeral_registry", _prepare, "ephemeral_registry",
) as registry, patch.object( ) as registry, patch.object(
_prepare.docker_mod, "tag", _prepare, "crane_push_tarball",
) as tag, patch.object(
_prepare.docker_mod, "push",
) as push, patch.object( ) as push, patch.object(
_prepare._smolvm, "pack_create", _prepare._smolvm, "pack_create",
) as pack: ) as pack:
result = _prepare._ensure_smolmachine("claude-bottle:latest") result = _prepare._ensure_smolmachine("claude-bottle:latest")
self.assertEqual(sidecar, result) self.assertEqual(sidecar, result)
# build still runs (Dockerfile edits land without manual rmi) # build still runs (Dockerfile edits land without manual rmi).
build.assert_called_once() build.assert_called_once()
# No registry, no tag, no push, no pack on cache hit. # No save (500MB tarball), no registry, no push, no pack on
# cache hit.
save.assert_not_called()
registry.assert_not_called() registry.assert_not_called()
tag.assert_not_called()
push.assert_not_called() push.assert_not_called()
pack.assert_not_called() pack.assert_not_called()
def test_cache_miss_runs_build_tag_push_pack_in_order(self): def test_cache_miss_runs_build_save_push_pack_in_order(self):
digest = "0123456789abcdef" digest = "0123456789abcdef"
# ephemeral_registry is a context manager yielding the port. # ephemeral_registry yields a RegistryHandle with the
# docker network + a push endpoint (container DNS) and
# pull endpoint (host port-forward).
from claude_bottle.backend.smolmachines.local_registry import (
RegistryHandle,
)
class _Reg: class _Reg:
def __enter__(self_inner): def __enter__(self_inner):
return 54321 return RegistryHandle(
network="cb-net-xyz",
push_endpoint="cb-registry-xyz:5000",
pull_endpoint="localhost:54321",
)
def __exit__(self_inner, *exc): def __exit__(self_inner, *exc):
return False return False
@@ -83,13 +94,13 @@ class TestEnsureSmolmachine(unittest.TestCase):
_prepare.docker_mod, "image_id", _prepare.docker_mod, "image_id",
return_value=f"sha256:{digest}fffffffffffffffff", return_value=f"sha256:{digest}fffffffffffffffff",
), patch.object( ), patch.object(
_prepare.docker_mod, "save",
side_effect=record("save"),
) as save, patch.object(
_prepare, "ephemeral_registry", _prepare, "ephemeral_registry",
return_value=_Reg(), return_value=_Reg(),
), patch.object( ), patch.object(
_prepare.docker_mod, "tag", _prepare, "crane_push_tarball",
side_effect=record("tag"),
) as tag, patch.object(
_prepare.docker_mod, "push",
side_effect=record("push"), side_effect=record("push"),
) as push, patch.object( ) as push, patch.object(
_prepare._smolvm, "pack_create", _prepare._smolvm, "pack_create",
@@ -97,23 +108,31 @@ class TestEnsureSmolmachine(unittest.TestCase):
) as pack: ) as pack:
_prepare._ensure_smolmachine("claude-bottle:latest") _prepare._ensure_smolmachine("claude-bottle:latest")
# build first (no point pushing if the build fails), then # Build → save → push → pack in that order. No `docker
# tag → push → pack against the registry port. # push` (the daemon's HTTPS-by-default path is what we're
self.assertEqual(["build", "tag", "push", "pack"], calls) # sidestepping).
self.assertEqual(["build", "save", "push", "pack"], calls)
# tag goes from the source ref to a localhost:<port> ref # docker save targets a per-digest tarball alongside the
# with the digest as the tag suffix (so different builds # cached sidecar.
# land on different tags in the registry). save_args = save.call_args.args
tag_args = tag.call_args.args self.assertEqual("claude-bottle:latest", save_args[0])
self.assertEqual("claude-bottle:latest", tag_args[0]) self.assertTrue(save_args[1].endswith(f"{digest}.image.tar"))
self.assertEqual(f"localhost:54321/claude-bottle:{digest}", tag_args[1])
# push targets the same localhost ref tag picks. # crane push runs against the push_endpoint (container DNS
# on the registry network) with the digest as the tag.
push_args = push.call_args.args push_args = push.call_args.args
self.assertEqual(f"localhost:54321/claude-bottle:{digest}", push_args[0]) self.assertEqual(
# pack_create reads from the registry ref, writes the f"cb-registry-xyz:5000/claude-bottle:{digest}", push_args[2],
# binary alongside the cached sidecar. )
# pack_create reads from the pull_endpoint (host port-
# forward, smolvm is on the host). Same repo+tag, just a
# different routing hostname — the registry stores one blob.
pack_args = pack.call_args.args pack_args = pack.call_args.args
self.assertEqual(f"localhost:54321/claude-bottle:{digest}", pack_args[0]) self.assertEqual(
f"localhost:54321/claude-bottle:{digest}", pack_args[0],
)
self.assertTrue(str(pack_args[1]).endswith(f"{digest}.smolmachine")) self.assertTrue(str(pack_args[1]).endswith(f"{digest}.smolmachine"))
+45 -13
View File
@@ -44,6 +44,8 @@ def _plan(
pipelock_ca_path: Path = Path(), pipelock_ca_path: Path = Path(),
supervise: bool = False, supervise: bool = False,
bundle_ip: str = "192.168.50.2", bundle_ip: str = "192.168.50.2",
agent_git_gate_host: str = "127.0.0.1:55555",
agent_supervise_url: str = "http://127.0.0.1:55556/",
) -> SmolmachinesBottlePlan: ) -> SmolmachinesBottlePlan:
bottle_json: dict = {} bottle_json: dict = {}
if git: if git:
@@ -111,6 +113,8 @@ def _plan(
mitmproxy_ca_cert_only_host_path=egress_ca_path, mitmproxy_ca_cert_only_host_path=egress_ca_path,
), ),
supervise_plan=supervise_plan, supervise_plan=supervise_plan,
agent_git_gate_host=agent_git_gate_host,
agent_supervise_url=agent_supervise_url,
) )
@@ -303,21 +307,38 @@ class TestProvisionCA(unittest.TestCase):
def tearDown(self): def tearDown(self):
self._tmp.cleanup() self._tmp.cleanup()
# provision_ca dies hard if update-ca-certificates' stdout
# doesn't include "1 added"; supply a stock success return
# so the bulk of the tests below exercise the happy path.
_UPDATE_OK = SmolvmRunResult(
returncode=0,
stdout="Updating certificates in /etc/ssl/certs...\n1 added, 0 removed; done.\n",
stderr="",
)
def test_pipelock_path_when_no_routes(self): def test_pipelock_path_when_no_routes(self):
plan = _plan(pipelock_ca_path=self.pipelock_ca) plan = _plan(pipelock_ca_path=self.pipelock_ca)
with patch( with patch(
"claude_bottle.backend.smolmachines.provision.ca._smolvm.machine_cp" "claude_bottle.backend.smolmachines.provision.ca._smolvm.machine_cp"
) as cp, patch( ) as cp, patch(
"claude_bottle.backend.smolmachines.provision.ca._smolvm.machine_exec" "claude_bottle.backend.smolmachines.provision.ca._smolvm.machine_exec",
return_value=self._UPDATE_OK,
) as ex: ) as ex:
_ca.provision_ca(plan, "claude-bottle-demo-abc12") _ca.provision_ca(plan, "claude-bottle-demo-abc12")
cp.assert_called_once_with( cp.assert_called_once_with(
str(self.pipelock_ca), str(self.pipelock_ca),
"claude-bottle-demo-abc12:" + _ca.AGENT_CA_PATH, "claude-bottle-demo-abc12:" + _ca.AGENT_CA_PATH,
) )
argvs = [c.args[1] for c in ex.call_args_list] # chmod + chown + update-ca-certificates are now folded
self.assertIn(["chmod", "644", _ca.AGENT_CA_PATH], argvs) # into one `sh -c` invocation (working around a smolvm
self.assertIn(["update-ca-certificates"], argvs) # exec warm-up SIGKILL race), so we look at the single
# exec's argv rather than expecting separate calls.
ex.assert_called_once()
argv = ex.call_args.args[1]
self.assertEqual("sh", argv[0])
self.assertEqual("-c", argv[1])
self.assertIn("chmod 644", argv[2])
self.assertIn("update-ca-certificates", argv[2])
def test_egress_path_when_routes_declared(self): def test_egress_path_when_routes_declared(self):
plan = _plan( plan = _plan(
@@ -328,7 +349,8 @@ class TestProvisionCA(unittest.TestCase):
with patch( with patch(
"claude_bottle.backend.smolmachines.provision.ca._smolvm.machine_cp" "claude_bottle.backend.smolmachines.provision.ca._smolvm.machine_cp"
) as cp, patch( ) as cp, patch(
"claude_bottle.backend.smolmachines.provision.ca._smolvm.machine_exec" "claude_bottle.backend.smolmachines.provision.ca._smolvm.machine_exec",
return_value=self._UPDATE_OK,
): ):
_ca.provision_ca(plan, "claude-bottle-demo-abc12") _ca.provision_ca(plan, "claude-bottle-demo-abc12")
# When routes are declared, egress is the agent's first hop, # When routes are declared, egress is the agent's first hop,
@@ -412,9 +434,10 @@ class TestProvisionGit(unittest.TestCase):
cp.assert_not_called() cp.assert_not_called()
def test_writes_gitconfig_with_ip_port_form_for_smolmachines(self): def test_writes_gitconfig_with_ip_port_form_for_smolmachines(self):
# Smolmachines's TSI-allowlisted guest has no DNS resolver, # Smolmachines's TSI-allowlisted guest dials git-gate via
# so the insteadOf URL has to be IP+port rather than the # `127.0.0.1:<host port>` — the bundle's git-gate port is
# docker backend's `git-gate` short alias. # published on host loopback at launch time, and the plan
# carries the discovered host port (here mocked to 9418).
plan = _plan( plan = _plan(
git=[GitEntry( git=[GitEntry(
Name="claude-bottle", Name="claude-bottle",
@@ -422,7 +445,7 @@ class TestProvisionGit(unittest.TestCase):
IdentityFile="~/.ssh/id_ed25519", IdentityFile="~/.ssh/id_ed25519",
)], )],
stage_dir=self.stage, stage_dir=self.stage,
bundle_ip="192.168.99.2", agent_git_gate_host="127.0.0.1:9418",
) )
with patch( with patch(
"claude_bottle.backend.smolmachines.provision.git._smolvm.machine_cp" "claude_bottle.backend.smolmachines.provision.git._smolvm.machine_cp"
@@ -437,7 +460,7 @@ class TestProvisionGit(unittest.TestCase):
self.assertEqual(self.stage, staged_path.parent) self.assertEqual(self.stage, staged_path.parent)
content = staged_path.read_text() content = staged_path.read_text()
self.assertIn( self.assertIn(
'[url "git://192.168.99.2:9418/claude-bottle.git"]', content, '[url "git://127.0.0.1:9418/claude-bottle.git"]', content,
) )
self.assertIn( self.assertIn(
"\tinsteadOf = ssh://git@host/repo.git", content, "\tinsteadOf = ssh://git@host/repo.git", content,
@@ -453,7 +476,10 @@ class TestProvisionSupervise(unittest.TestCase):
ex.assert_not_called() ex.assert_not_called()
def test_calls_claude_mcp_add_when_supervise_enabled(self): def test_calls_claude_mcp_add_when_supervise_enabled(self):
plan = _plan(supervise=True, bundle_ip="192.168.50.2") plan = _plan(
supervise=True,
agent_supervise_url="http://127.0.0.1:9100/",
)
with patch( with patch(
"claude_bottle.backend.smolmachines.provision.supervise._smolvm.machine_exec", "claude_bottle.backend.smolmachines.provision.supervise._smolvm.machine_exec",
return_value=SmolvmRunResult(returncode=0, stdout="", stderr=""), return_value=SmolvmRunResult(returncode=0, stdout="", stderr=""),
@@ -461,14 +487,20 @@ class TestProvisionSupervise(unittest.TestCase):
_supervise.provision_supervise(plan, "claude-bottle-demo-abc12") _supervise.provision_supervise(plan, "claude-bottle-demo-abc12")
ex.assert_called_once() ex.assert_called_once()
argv = ex.call_args.args[1] argv = ex.call_args.args[1]
# claude mcp add --scope user --transport http supervise <url> # `claude mcp add --scope user` writes to ~/.claude.json,
# and the agent is the `node` user — switch UID + set
# HOME so the config lands in /home/node/.claude.json,
# not root's. URL is the agent-side endpoint (host
# loopback + discovered port), not the docker bridge IP.
self.assertEqual( self.assertEqual(
[ [
"runuser", "-u", "node", "--",
"env", "HOME=/home/node",
"claude", "mcp", "add", "claude", "mcp", "add",
"--scope", "user", "--scope", "user",
"--transport", "http", "--transport", "http",
"supervise", "supervise",
"http://192.168.50.2:9100/", "http://127.0.0.1:9100/",
], ],
argv, argv,
) )
+8
View File
@@ -82,12 +82,20 @@ class TestArgvShapes(unittest.TestCase):
self.assertEqual("smolvm", argv[0]) self.assertEqual("smolvm", argv[0])
self.assertIn("--from", argv) self.assertIn("--from", argv)
self.assertIn("/stage/agent.smolmachine", argv) self.assertIn("/stage/agent.smolmachine", argv)
# `--net` is explicit because smolvm 0.8.0's implied-net
# from --allow-cidr doesn't fire when --from is set.
self.assertIn("--net", argv)
self.assertIn("--allow-cidr", argv) self.assertIn("--allow-cidr", argv)
self.assertIn("192.168.50.2/32", argv) self.assertIn("192.168.50.2/32", argv)
self.assertIn("-e", argv) self.assertIn("-e", argv)
self.assertIn("HTTPS_PROXY=http://192.168.50.2:8888", argv) self.assertIn("HTTPS_PROXY=http://192.168.50.2:8888", argv)
self.assertEqual("agent-xyz", argv[-1]) self.assertEqual("agent-xyz", argv[-1])
def test_machine_create_omits_net_when_no_allow_cidrs(self):
with self._patch_run() as m:
machine_create("agent-xyz", from_path=Path("/x.smolmachine"))
self.assertNotIn("--net", m.call_args.args[0])
def test_machine_start_uses_dash_name(self): def test_machine_start_uses_dash_name(self):
# `start` is the --name flag form, NOT positional. # `start` is the --name flag form, NOT positional.
with self._patch_run() as m: with self._patch_run() as m: