Merge pull request 'fix(smolmachines): docker push fails on Docker Desktop — daemon-side route differs from host loopback' (#74) from fix-local-registry-docker-desktop into main
This commit was merged in pull request #74.
This commit is contained in:
@@ -190,6 +190,27 @@ The container is removed automatically when the session ends. If the script
|
||||
is killed with SIGKILL the exit trap won't fire and the container may be
|
||||
left running; remove it with `docker rm -f <container-name>`.
|
||||
|
||||
### Smolmachines backend (experimental, macOS-only)
|
||||
|
||||
A second backend runs the agent in a smolvm micro-VM (libkrun) with the
|
||||
sidecar bundle still in Docker. Selected via
|
||||
`CLAUDE_BOTTLE_BACKEND=smolmachines ./cli.py start <agent>`. Requires
|
||||
`smolvm` on PATH (`curl -sSL https://smolmachines.com/install.sh | sh`).
|
||||
|
||||
**Known limitation, v1:** smolvm's TSI uses macOS networking, and
|
||||
Docker Desktop's container IPs aren't reachable from macOS, so the
|
||||
smolmachines bottle dials the sidecar bundle through host loopback
|
||||
port-forwards (`127.0.0.1:<random>`). TSI filters by IP only, so the
|
||||
allowlist is `127.0.0.1/32` — meaning the agent VM can reach **any
|
||||
service bound to macOS's loopback**, not just the bundle's published
|
||||
ports. Practical implication: while a smolmachines bottle is running,
|
||||
host-local dev services (postgres on 5432, dev servers, etc.) are
|
||||
reachable from inside the agent even if you intended them to be
|
||||
host-private. The docker backend keeps the bottle on a `--internal`
|
||||
docker network and doesn't have this issue. A future revision will
|
||||
narrow this via a per-bottle loopback alias + host-side proxy (see
|
||||
PRD 0023's "loopback scoping" section).
|
||||
|
||||
## Manifest
|
||||
|
||||
Bottles and agents live as Markdown files with YAML frontmatter under
|
||||
|
||||
@@ -119,12 +119,20 @@ class Bottle(ABC):
|
||||
def exec_claude(self, argv: list[str], *, tty: bool = True) -> int: ...
|
||||
|
||||
@abstractmethod
|
||||
def exec(self, script: str) -> ExecResult:
|
||||
"""Run `script` as a POSIX shell script inside the bottle and
|
||||
return the captured stdout/stderr/returncode. The bottle's
|
||||
environment (including HTTPS_PROXY pointing at the pipelock
|
||||
sidecar) is inherited by the child. Non-zero exit does not
|
||||
raise — callers inspect `returncode` themselves."""
|
||||
def exec(self, script: str, *, user: str = "node") -> ExecResult:
|
||||
"""Run `script` as a POSIX shell script inside the bottle as
|
||||
`user` (default `node`, matching the agent image's USER
|
||||
directive) and return the captured stdout/stderr/returncode.
|
||||
The bottle's environment (including HTTPS_PROXY pointing at
|
||||
the pipelock sidecar) is inherited by the child. Non-zero
|
||||
exit does not raise — callers inspect `returncode`
|
||||
themselves.
|
||||
|
||||
Pass `user="root"` for shell-outs that need privileged file
|
||||
writes / package install — provisioning calls that need root
|
||||
bypass `Bottle.exec` and use the backend-specific raw
|
||||
machine-exec helper, but the tests have a legitimate use
|
||||
case for arbitrary-user runs."""
|
||||
|
||||
@abstractmethod
|
||||
def cp_in(self, host_path: str, container_path: str) -> None: ...
|
||||
|
||||
@@ -51,12 +51,15 @@ class DockerBottle(Bottle):
|
||||
self.claude_docker_argv(argv, tty=tty), check=False,
|
||||
).returncode
|
||||
|
||||
def exec(self, script: str) -> ExecResult:
|
||||
def exec(self, script: str, *, user: str = "node") -> ExecResult:
|
||||
# Pipe via stdin to `sh -s` so the caller never has to worry
|
||||
# about quoting; the script source lands inside the container
|
||||
# without crossing argv.
|
||||
# without crossing argv. `-u <user>` overrides the image's
|
||||
# default USER — defaults to `node` which is already the
|
||||
# image's USER, so the explicit flag is a no-op there but
|
||||
# keeps the cross-backend contract uniform.
|
||||
result = subprocess.run(
|
||||
["docker", "exec", "-i", self.name, "sh", "-s"],
|
||||
["docker", "exec", "-u", user, "-i", self.name, "sh", "-s"],
|
||||
input=script,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
|
||||
@@ -166,18 +166,15 @@ def image_id(ref: str) -> str:
|
||||
return r.stdout.strip()
|
||||
|
||||
|
||||
def tag(src: str, dst: str) -> None:
|
||||
"""`docker tag SRC DST`. Idempotent. Used by smolmachines prepare
|
||||
to retag the locally-built image into a localhost:<port>/... ref
|
||||
that the ephemeral registry will accept."""
|
||||
subprocess.run(["docker", "tag", src, dst], check=True)
|
||||
|
||||
|
||||
def push(ref: str) -> None:
|
||||
"""`docker push REF`. Used by smolmachines prepare to push the
|
||||
agent image into the ephemeral local registry so smolvm's crane
|
||||
backend can pull it."""
|
||||
subprocess.run(["docker", "push", ref], check=True)
|
||||
def save(ref: str, output: str) -> None:
|
||||
"""`docker save REF -o OUTPUT`. Writes a tarball of the image
|
||||
layers + manifest to the host path. Used by smolmachines
|
||||
prepare to hand the agent image to a containerized crane that
|
||||
pushes it to the ephemeral registry — bypassing the docker
|
||||
daemon's `docker push` (which on Docker Desktop can't reach a
|
||||
host-loopback registry and refuses plain-HTTP pushes to
|
||||
non-loopback hosts)."""
|
||||
subprocess.run(["docker", "save", ref, "-o", output], check=True)
|
||||
|
||||
|
||||
def _silent_run(cmd: Iterable[str]) -> int:
|
||||
|
||||
@@ -4,63 +4,130 @@ Routes `exec_claude` / `exec` / `cp_in` through `smolvm machine
|
||||
exec` / `smolvm machine cp`. The handle is yielded by `launch`
|
||||
and torn down via the surrounding ExitStack on context exit;
|
||||
`close` is a no-op idempotent alias so the BottleBackend ABC's
|
||||
context-manager contract is satisfied."""
|
||||
context-manager contract is satisfied.
|
||||
|
||||
User context: `smolvm machine exec` runs commands as root in the
|
||||
VM, but the agent image's USER is `node` and claude-code refuses
|
||||
to run as root with `--dangerously-skip-permissions`. Both
|
||||
`exec_claude` and `exec` switch to the requested user (default
|
||||
`node`) via `runuser -u <user> --` and set `HOME` / `USER`
|
||||
through `smolvm -e` — avoiding `runuser -l`'s login-shell wiring
|
||||
(PAM session setup, /etc/profile sourcing) which can hang on a
|
||||
minimal Debian VM with no PAM session config."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
import sys
|
||||
from typing import Mapping
|
||||
|
||||
from .. import Bottle, ExecResult
|
||||
from . import smolvm as _smolvm
|
||||
|
||||
|
||||
# Per-user env the agent image's USER (node) expects. claude
|
||||
# reads ~/.claude.json + writes session state under ~/.claude/;
|
||||
# bare `runuser -u` inherits root's HOME=/root, which claude
|
||||
# can't write to. Set HOME / USER explicitly through smolvm -e
|
||||
# so the child process sees them.
|
||||
_HOME_FOR = {
|
||||
"node": "/home/node",
|
||||
"root": "/root",
|
||||
}
|
||||
|
||||
|
||||
def _env_flags_for(user: str) -> list[str]:
|
||||
home = _HOME_FOR.get(user, f"/home/{user}")
|
||||
return ["-e", f"HOME={home}", "-e", f"USER={user}"]
|
||||
|
||||
|
||||
def _guest_env_flags(env: Mapping[str, str]) -> list[str]:
|
||||
"""Render `{K: V}` into a flat `-e K=V` argv slice for
|
||||
`smolvm machine exec`. `smolvm machine create -e` set env
|
||||
on PID 1 but it doesn't propagate to fresh exec process
|
||||
trees, so we have to re-pass them every call."""
|
||||
out: list[str] = []
|
||||
for k, v in env.items():
|
||||
out += ["-e", f"{k}={v}"]
|
||||
return out
|
||||
|
||||
|
||||
class SmolmachinesBottle(Bottle):
|
||||
"""Handle returned by `SmolmachinesBottleBackend.launch`. The
|
||||
underlying VM lifecycle (create / start / stop / delete) lives
|
||||
on the launch ExitStack — this class only routes runtime
|
||||
operations to the right `smolvm machine ...` subcommand."""
|
||||
|
||||
def __init__(self, machine_name: str, *, prompt_path: str | None = None) -> None:
|
||||
def __init__(
|
||||
self,
|
||||
machine_name: str,
|
||||
*,
|
||||
prompt_path: str | None = None,
|
||||
guest_env: Mapping[str, str] | None = None,
|
||||
) -> None:
|
||||
self.name = machine_name
|
||||
# In-VM path to the agent's prompt file. None when the
|
||||
# agent declared no prompt (file still exists; we just
|
||||
# don't pass --append-system-prompt-file).
|
||||
self._prompt_path = prompt_path
|
||||
# Env vars the agent process needs (HTTPS_PROXY,
|
||||
# CLAUDE_CODE_OAUTH_TOKEN, manifest-declared bottle env, …).
|
||||
# Forwarded on every `smolvm machine exec` via `-e K=V`
|
||||
# because exec doesn't inherit from machine_create's env.
|
||||
self._guest_env = dict(guest_env or {})
|
||||
|
||||
def exec_claude(self, argv: list[str], *, tty: bool = True) -> int:
|
||||
"""Run `claude` interactively inside the VM. Inherits the
|
||||
operator's terminal (stdin / stdout / stderr) so the
|
||||
session feels native. Blocks until claude exits; returns
|
||||
the in-VM exit code.
|
||||
"""Run `claude` interactively inside the VM as the `node`
|
||||
user. Inherits the operator's terminal (stdin / stdout /
|
||||
stderr) so the session feels native. Blocks until claude
|
||||
exits; returns the in-VM exit code.
|
||||
|
||||
We bypass the captured-output `machine_exec` helper here
|
||||
because that one wraps stdout/stderr in pipes — fine for
|
||||
scripted exec, wrong for an interactive shell. Drop down
|
||||
to `subprocess.run` with the TTY inherited."""
|
||||
to `subprocess.run` with the TTY inherited.
|
||||
|
||||
UID switches via `runuser -u node --` (not `-l`) so we
|
||||
avoid login-shell wiring. HOME / USER come from `smolvm
|
||||
-e` instead, which sets them on the process env."""
|
||||
flags = ["smolvm", "machine", "exec", "--name", self.name]
|
||||
if tty:
|
||||
flags += ["-i", "-t"]
|
||||
flags += _env_flags_for("node")
|
||||
flags += _guest_env_flags(self._guest_env)
|
||||
claude_argv = ["claude"]
|
||||
if self._prompt_path:
|
||||
claude_argv += ["--append-system-prompt-file", self._prompt_path]
|
||||
flags += ["--", *claude_argv, *argv]
|
||||
claude_argv += argv
|
||||
flags += ["--", "runuser", "-u", "node", "--", *claude_argv]
|
||||
result = subprocess.run(flags, check=False)
|
||||
return result.returncode
|
||||
|
||||
def exec(self, script: str) -> ExecResult:
|
||||
"""Run a POSIX shell script and capture the result. The
|
||||
script runs under `/bin/sh -c`, matching what the docker
|
||||
backend's `exec` does — callers can write shell-y test
|
||||
helpers without worrying about argv splitting."""
|
||||
r = _smolvm.machine_exec(
|
||||
self.name,
|
||||
["/bin/sh", "-c", script],
|
||||
def exec(self, script: str, *, user: str = "node") -> ExecResult:
|
||||
"""Run a POSIX shell script as `user` (default `node`) and
|
||||
capture the result. Matches the docker backend's `exec`,
|
||||
which defaults to the image's USER (also node) — so test
|
||||
helpers / provision shell-outs run with the same identity
|
||||
on both backends. Pass `user="root"` for tests that need
|
||||
root.
|
||||
|
||||
`runuser -u <user> -- /bin/sh -c <script>` switches UID
|
||||
without invoking a login shell; HOME / USER are set via
|
||||
`smolvm -e` (see `_env_flags_for`)."""
|
||||
argv = (
|
||||
_env_flags_for(user)
|
||||
+ _guest_env_flags(self._guest_env)
|
||||
+ ["--", "runuser", "-u", user, "--", "/bin/sh", "-c", script]
|
||||
)
|
||||
# _smolvm.machine_exec expects argv (the bit after `--`);
|
||||
# the -e flags go before, so call smolvm directly.
|
||||
r = subprocess.run(
|
||||
["smolvm", "machine", "exec", "--name", self.name] + argv,
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
return ExecResult(
|
||||
returncode=r.returncode,
|
||||
stdout=r.stdout,
|
||||
stderr=r.stderr,
|
||||
stdout=r.stdout or "",
|
||||
stderr=r.stderr or "",
|
||||
)
|
||||
|
||||
def cp_in(self, host_path: str, container_path: str) -> None:
|
||||
|
||||
@@ -75,6 +75,17 @@ class SmolmachinesBottlePlan(BottlePlan):
|
||||
# None when bottle.supervise is False, matching the docker
|
||||
# backend's convention.
|
||||
supervise_plan: SupervisePlan | None
|
||||
# Agent-side endpoints. On Docker Desktop the docker bridge
|
||||
# IPs aren't reachable from the smolvm guest (TSI uses macOS
|
||||
# networking; docker container IPs live in the daemon's VM),
|
||||
# so the agent dials the bundle via host loopback +
|
||||
# docker-published random ports. Empty at prepare time;
|
||||
# launch populates these after bundle bringup via
|
||||
# `dataclasses.replace`. Format: a `host:port` for git-gate
|
||||
# (insteadOf URL prefix) + full URLs for proxy / supervise.
|
||||
agent_proxy_url: str = ""
|
||||
agent_git_gate_host: str = ""
|
||||
agent_supervise_url: str = ""
|
||||
|
||||
def print(self, *, remote_control: bool) -> None:
|
||||
"""Compact y/N preflight. Same shape as the Docker
|
||||
@@ -89,7 +100,10 @@ class SmolmachinesBottlePlan(BottlePlan):
|
||||
upstreams = [
|
||||
f"{g.Name} → {g.Upstream}" for g in bottle.git
|
||||
]
|
||||
routes = [r.host for r in bottle.egress.routes]
|
||||
# Use the resolved egress_plan (lowercase `host` on the
|
||||
# plan-level EgressRoute) rather than `bottle.egress.routes`,
|
||||
# which is the manifest's capitalized-attr form.
|
||||
routes = [r.host for r in self.egress_plan.routes]
|
||||
|
||||
print(file=sys.stderr)
|
||||
info(f"agent : {spec.agent_name}")
|
||||
|
||||
@@ -21,6 +21,7 @@ from __future__ import annotations
|
||||
|
||||
import dataclasses
|
||||
import os
|
||||
import time
|
||||
from contextlib import ExitStack, contextmanager
|
||||
from typing import Callable, Generator
|
||||
|
||||
@@ -34,6 +35,7 @@ from ...util import expand_tilde
|
||||
from ..docker.egress import (
|
||||
EGRESS_CA_IN_CONTAINER,
|
||||
EGRESS_PIPELOCK_CA_IN_CONTAINER,
|
||||
EGRESS_PORT as _EGRESS_PORT,
|
||||
egress_tls_init,
|
||||
)
|
||||
from ..docker.git_gate import (
|
||||
@@ -41,14 +43,28 @@ from ..docker.git_gate import (
|
||||
GIT_GATE_CREDS_DIR_IN_CONTAINER,
|
||||
GIT_GATE_ENTRYPOINT_IN_CONTAINER,
|
||||
GIT_GATE_HOOK_IN_CONTAINER,
|
||||
GIT_GATE_PORT as _GIT_GATE_PORT,
|
||||
)
|
||||
from ..docker.pipelock import (
|
||||
BUNDLE_LOCAL_PIPELOCK_URL,
|
||||
PIPELOCK_PORT as _PIPELOCK_PORT_STR,
|
||||
pipelock_tls_init,
|
||||
)
|
||||
from ..docker.pipelock import BUNDLE_LOCAL_PIPELOCK_URL, pipelock_tls_init
|
||||
from . import sidecar_bundle as _bundle
|
||||
from . import smolvm as _smolvm
|
||||
from .bottle import SmolmachinesBottle
|
||||
from .bottle_plan import SmolmachinesBottlePlan
|
||||
|
||||
|
||||
# Container-internal listening ports for each bundle daemon. The
|
||||
# bundle publishes each one on a random host loopback port (see
|
||||
# `_bundle.start_bundle`), and `_bundle.bundle_host_port` looks
|
||||
# them up post-start. Pipelock's port is an env-overridable string
|
||||
# in docker.pipelock; coerce to int here.
|
||||
_PIPELOCK_PORT = int(_PIPELOCK_PORT_STR)
|
||||
_SUPERVISE_PORT = SUPERVISE_PORT
|
||||
|
||||
|
||||
@contextmanager
|
||||
def launch(
|
||||
plan: SmolmachinesBottlePlan,
|
||||
@@ -96,31 +112,129 @@ def launch(
|
||||
)
|
||||
|
||||
# 3. Build the BundleLaunchSpec from the (now-resolved)
|
||||
# inner Plans: daemon subset, env, bind-mounts.
|
||||
# inner Plans: daemon subset, env, bind-mounts. The spec's
|
||||
# ports_to_publish list expands depending on which daemons
|
||||
# the agent needs to reach from the smolvm guest.
|
||||
bundle_spec = _bundle_launch_spec(plan, network)
|
||||
token_env = _resolve_token_env(plan, os.environ)
|
||||
_bundle.start_bundle(bundle_spec, env={**os.environ, **token_env})
|
||||
stack.callback(_bundle.stop_bundle, plan.slug)
|
||||
|
||||
# 4. smolvm VM. --from carries the pre-packed .smolmachine
|
||||
# 4. Discover the host-side ports docker assigned for the
|
||||
# bundle's published container ports, and bind the
|
||||
# agent's URLs to `127.0.0.1:<host port>`. Docker container
|
||||
# IPs (192.168.x.x in the daemon's bridge) aren't
|
||||
# reachable from the smolvm guest on macOS — TSI uses
|
||||
# macOS networking, and macOS sees the daemon's bridge
|
||||
# via the published-port loopback forward only.
|
||||
#
|
||||
# Proxy hop order matches the docker backend: when the
|
||||
# bottle declares egress routes, the agent's first hop is
|
||||
# egress (for token injection), then pipelock. Without
|
||||
# routes, the agent dials pipelock directly. Whichever
|
||||
# one is "agent-facing" is the daemon whose port we
|
||||
# publish on host loopback; the other stays bundle-
|
||||
# internal as the upstream proxy.
|
||||
if plan.egress_plan.routes:
|
||||
agent_facing_port = _EGRESS_PORT
|
||||
else:
|
||||
agent_facing_port = _PIPELOCK_PORT
|
||||
agent_facing_host_port = _bundle.bundle_host_port(
|
||||
plan.slug, agent_facing_port,
|
||||
)
|
||||
agent_proxy_url = f"http://127.0.0.1:{agent_facing_host_port}"
|
||||
agent_git_gate_host = ""
|
||||
if plan.git_gate_plan.upstreams:
|
||||
git_gate_host_port = _bundle.bundle_host_port(
|
||||
plan.slug, _GIT_GATE_PORT,
|
||||
)
|
||||
agent_git_gate_host = f"127.0.0.1:{git_gate_host_port}"
|
||||
agent_supervise_url = ""
|
||||
if plan.supervise_plan is not None:
|
||||
supervise_host_port = _bundle.bundle_host_port(
|
||||
plan.slug, _SUPERVISE_PORT,
|
||||
)
|
||||
agent_supervise_url = f"http://127.0.0.1:{supervise_host_port}/"
|
||||
|
||||
# Stamp the URLs onto the plan + guest_env. provision_git
|
||||
# and provision_supervise read the plan fields; the agent
|
||||
# reads guest_env on every exec_claude.
|
||||
guest_env = {
|
||||
**plan.guest_env,
|
||||
"HTTPS_PROXY": agent_proxy_url,
|
||||
"HTTP_PROXY": agent_proxy_url,
|
||||
}
|
||||
if agent_git_gate_host:
|
||||
guest_env["GIT_GATE_URL"] = f"git://{agent_git_gate_host}"
|
||||
if agent_supervise_url:
|
||||
guest_env["MCP_SUPERVISE_URL"] = agent_supervise_url
|
||||
plan = dataclasses.replace(
|
||||
plan,
|
||||
guest_env=guest_env,
|
||||
agent_proxy_url=agent_proxy_url,
|
||||
agent_git_gate_host=agent_git_gate_host,
|
||||
agent_supervise_url=agent_supervise_url,
|
||||
)
|
||||
|
||||
# 5. smolvm VM. --from carries the pre-packed .smolmachine
|
||||
# artifact (built by prepare); --allow-cidr + -e carry the
|
||||
# per-bottle TSI allowlist + env. Smolfile isn't usable
|
||||
# here — smolvm 0.8.0 makes `--from` and `--smolfile`
|
||||
# mutually exclusive.
|
||||
# per-bottle TSI allowlist + env. The allowlist is
|
||||
# `127.0.0.1/32` because every bundle daemon the agent
|
||||
# reaches is fronted by a host loopback port-forward.
|
||||
# Smolfile isn't usable here — smolvm 0.8.0 makes `--from`
|
||||
# and `--smolfile` mutually exclusive.
|
||||
_smolvm.machine_create(
|
||||
plan.machine_name,
|
||||
from_path=plan.agent_from_path,
|
||||
allow_cidrs=[f"{plan.bundle_ip}/32"],
|
||||
allow_cidrs=["127.0.0.1/32"],
|
||||
env=plan.guest_env,
|
||||
)
|
||||
stack.callback(_smolvm.machine_delete, plan.machine_name)
|
||||
_smolvm.machine_start(plan.machine_name)
|
||||
stack.callback(_smolvm.machine_stop, plan.machine_name)
|
||||
|
||||
# 5. Provision (CA / prompt / skills / git / supervise).
|
||||
# 6. Repair filesystem ownership + perms that smolvm's
|
||||
# pack process remapped to the host invoker's uid (501
|
||||
# on macOS) rather than preserving the image's expected
|
||||
# ownership.
|
||||
#
|
||||
# - /home/node → node:node so the node user can write
|
||||
# its own dotfiles (claude appendFileSync on
|
||||
# ~/.claude.json otherwise bails with ENOENT/EPERM
|
||||
# and the TUI hangs without surfacing the error).
|
||||
# - /tmp + /var/tmp → root:root mode 1777 so non-root
|
||||
# processes can create their per-uid scratch dirs
|
||||
# (claude-code creates /tmp/claude-<uid>/ as soon as
|
||||
# it spawns a Bash tool call).
|
||||
#
|
||||
# All folded into one sh -c so we only pay one
|
||||
# machine_exec round trip — back-to-back exec calls
|
||||
# right after machine_start hit a SIGKILL race in
|
||||
# libkrun's exec channel (see provision_ca for the
|
||||
# other half of this same workaround).
|
||||
_smolvm.machine_exec(plan.machine_name, [
|
||||
"sh", "-c",
|
||||
"chown -R node:node /home/node && "
|
||||
"chown root:root /tmp /var/tmp && "
|
||||
"chmod 1777 /tmp /var/tmp",
|
||||
])
|
||||
|
||||
# Wait briefly for the VM to settle. Back-to-back smolvm
|
||||
# machine_exec calls immediately after machine_start
|
||||
# occasionally SIGKILL the in-VM child at ~100ms (looks
|
||||
# like a VM warm-up race in libkrun's exec channel).
|
||||
# 1.5s is empirically enough to dodge it; provisioning
|
||||
# already takes seconds so the wait is amortized.
|
||||
time.sleep(1.5)
|
||||
|
||||
# 7. Provision (CA / prompt / skills / git / supervise).
|
||||
prompt_path = provision(plan, plan.machine_name)
|
||||
|
||||
yield SmolmachinesBottle(plan.machine_name, prompt_path=prompt_path)
|
||||
yield SmolmachinesBottle(
|
||||
plan.machine_name,
|
||||
prompt_path=prompt_path,
|
||||
guest_env=plan.guest_env,
|
||||
)
|
||||
finally:
|
||||
stack.close()
|
||||
|
||||
@@ -144,9 +258,14 @@ def _bundle_launch_spec(
|
||||
env: list[str] = []
|
||||
volumes: list[tuple[str, str, bool]] = []
|
||||
|
||||
# PRD 0023 chunk 3: egress binds 127.0.0.1 inside the bundle
|
||||
# so TSI's IP-only allowlist can't bypass pipelock.
|
||||
env.append("EGRESS_LISTEN_HOST=127.0.0.1")
|
||||
# In this Docker-Desktop-compatible topology, whichever daemon
|
||||
# is "agent-facing" gets its port published on the host
|
||||
# loopback (see `_ensure_smolmachine`'s discovery loop) and the
|
||||
# other stays bundle-internal. The bundle is NOT reachable by
|
||||
# bridge IP from the smolvm guest, so the
|
||||
# PRD-0023-chunk-3 EGRESS_LISTEN_HOST=127.0.0.1 mitigation
|
||||
# isn't needed: the agent can only dial whatever daemon's
|
||||
# host port we publish, period.
|
||||
|
||||
# --- pipelock ---------------------------------------------
|
||||
pp = plan.proxy_plan
|
||||
@@ -201,6 +320,21 @@ def _bundle_launch_spec(
|
||||
]
|
||||
volumes.append((str(sp.queue_dir), QUEUE_DIR_IN_CONTAINER, False))
|
||||
|
||||
# Container ports the agent reaches from the smolvm guest —
|
||||
# published on host loopback so the guest can dial via TSI +
|
||||
# macOS networking. The HTTP/HTTPS chokepoint is whichever
|
||||
# daemon's port we publish: egress when routes are declared
|
||||
# (token injection first, then forwards to bundle-internal
|
||||
# pipelock), pipelock otherwise.
|
||||
if ep.routes:
|
||||
ports_to_publish: list[int] = [_EGRESS_PORT]
|
||||
else:
|
||||
ports_to_publish = [_PIPELOCK_PORT]
|
||||
if gp.upstreams:
|
||||
ports_to_publish.append(_GIT_GATE_PORT)
|
||||
if sp is not None:
|
||||
ports_to_publish.append(_SUPERVISE_PORT)
|
||||
|
||||
return _bundle.BundleLaunchSpec(
|
||||
slug=plan.slug,
|
||||
network_name=network,
|
||||
@@ -210,6 +344,7 @@ def _bundle_launch_spec(
|
||||
daemons_csv=",".join(daemons),
|
||||
environment=tuple(env),
|
||||
volumes=tuple(volumes),
|
||||
ports_to_publish=tuple(ports_to_publish),
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -1,17 +1,37 @@
|
||||
"""Ephemeral local OCI registry for the smolmachines agent-image
|
||||
conversion path (PRD 0023 chunk 4c).
|
||||
|
||||
`smolvm pack create --image <ref>` only accepts registry refs — it
|
||||
can't read the local docker daemon's image cache, an OCI layout
|
||||
directory, or a `docker save` tarball. To convert the agent's
|
||||
Dockerfile-built image into a `.smolmachine` artifact we run a
|
||||
short-lived `registry:2.8.3` container on `127.0.0.1:<random>`,
|
||||
push the locally-tagged image into it, and let smolvm pull from
|
||||
there. The registry container is torn down as soon as the pack
|
||||
completes.
|
||||
`smolvm pack create --image <ref>` only accepts OCI registry refs
|
||||
— it can't read the local docker daemon's image cache, an OCI
|
||||
layout directory, or a `docker save` tarball. To convert the
|
||||
agent's Dockerfile-built image into a `.smolmachine` artifact we
|
||||
spin up a short-lived `registry:2.8.3` container alongside a
|
||||
`crane` helper container on a private docker network, push via
|
||||
`crane push --insecure <tarball> <registry-container>:5000/...`,
|
||||
and let smolvm pull from the registry's published host port. The
|
||||
network + both containers are torn down after the pack completes.
|
||||
|
||||
Loopback-only bind + the host's docker layer cache mean the round
|
||||
trip is fast (~5s) and there's no exposed surface on the LAN."""
|
||||
Why this two-container dance instead of plain `docker push`:
|
||||
- Docker Desktop's daemon runs in its own Linux VM, so its
|
||||
`localhost` is not the host's loopback. A registry bound to
|
||||
the host's 127.0.0.1 is unreachable from the daemon side.
|
||||
- `host.docker.internal` is reachable from the daemon but isn't
|
||||
in Docker's default insecure-registries CIDRs (only `::1/128`
|
||||
and `127.0.0.0/8` are), so `docker push` to it tries HTTPS,
|
||||
hits a plain-HTTP registry, and dies with
|
||||
`http: server gave HTTP response to HTTPS client`. Adding
|
||||
`host.docker.internal` to daemon.json works but is a one-time
|
||||
manual step the user has to do in Docker Desktop's UI.
|
||||
- Going through a docker network sidesteps the host-vs-daemon
|
||||
loopback mismatch (crane and registry containers see each
|
||||
other on the network) AND the HTTPS preference (crane has an
|
||||
`--insecure` flag that forces plain HTTP).
|
||||
|
||||
The registry is also published on a random host port so smolvm
|
||||
— a host process — can pull from `localhost:<port>` via Docker's
|
||||
port-forward. smolvm's bundled crane auto-falls-back to HTTP for
|
||||
localhost addresses, so no insecure-registries config is needed
|
||||
on that side either."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
@@ -21,6 +41,7 @@ import subprocess
|
||||
import time
|
||||
import uuid
|
||||
from contextlib import contextmanager
|
||||
from dataclasses import dataclass
|
||||
from typing import Iterator
|
||||
|
||||
from ...log import die
|
||||
@@ -34,64 +55,150 @@ REGISTRY_IMAGE = os.environ.get(
|
||||
)
|
||||
|
||||
|
||||
# gcr.io/go-containerregistry/crane:latest, pinned by digest. ~10MB,
|
||||
# stable upstream from Google; we only invoke `crane push --insecure`
|
||||
# against a localhost-equivalent registry, so the trust surface is
|
||||
# narrow.
|
||||
CRANE_IMAGE = os.environ.get(
|
||||
"CLAUDE_BOTTLE_CRANE_IMAGE",
|
||||
"gcr.io/go-containerregistry/crane@sha256:0ae17ecb34315aa7cbff28f6eddee3b7adae0b2f90101260d990804db1eb0084",
|
||||
)
|
||||
|
||||
|
||||
# Internal port the registry binds to inside its container — fixed
|
||||
# by the registry:2 image. The host-side mapping is random.
|
||||
_REGISTRY_CONTAINER_PORT = "5000"
|
||||
|
||||
|
||||
# How long to wait for the registry's HTTP layer to bind before
|
||||
# giving up. Two seconds is empirically enough; bumping to 10s leaves
|
||||
# headroom for slow CI runners without making the failure mode chatty.
|
||||
# giving up. Two seconds is empirically enough; 10s leaves headroom
|
||||
# for slow CI runners without making the failure mode chatty.
|
||||
_READY_TIMEOUT_S = 10.0
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class RegistryHandle:
|
||||
"""Everything callers need to push to + pull from the ephemeral
|
||||
registry.
|
||||
|
||||
`network` is the per-session docker network — a `crane push`
|
||||
container has to join it to reach the registry by name.
|
||||
`push_endpoint` is the `<host>:<port>` form to embed in image
|
||||
refs given to the crane push container (resolves via docker
|
||||
network DNS). `pull_endpoint` is the `<host>:<port>` form a
|
||||
host process (smolvm) uses; the registry's host port mapping
|
||||
backs this."""
|
||||
|
||||
network: str
|
||||
push_endpoint: str
|
||||
pull_endpoint: str
|
||||
|
||||
|
||||
@contextmanager
|
||||
def ephemeral_registry() -> Iterator[int]:
|
||||
"""Bring up a `registry:2.8.3` container on a random loopback
|
||||
port, yield the port, force-remove the container on exit.
|
||||
def ephemeral_registry() -> Iterator[RegistryHandle]:
|
||||
"""Bring up a per-session docker network + a `registry:2.8.3`
|
||||
container on it (published on a random host port), yield a
|
||||
`RegistryHandle`, force-remove both on exit.
|
||||
|
||||
The container is started with `--rm` so a clean exit cleans up
|
||||
on its own; the `finally` block force-removes on abnormal exit
|
||||
(the calling process crashes between yield and close)."""
|
||||
name = f"claude-bottle-registry-{uuid.uuid4().hex[:12]}"
|
||||
session_id = uuid.uuid4().hex[:12]
|
||||
network = f"claude-bottle-registry-net-{session_id}"
|
||||
registry_name = f"claude-bottle-registry-{session_id}"
|
||||
|
||||
subprocess.run(
|
||||
[
|
||||
"docker", "run", "-d", "--rm",
|
||||
"--name", name,
|
||||
# `127.0.0.1::5000` = bind to loopback, pick a random host
|
||||
# port. No LAN exposure; the container hangs around just
|
||||
# long enough for one push + one pack-create.
|
||||
"-p", "127.0.0.1::5000",
|
||||
REGISTRY_IMAGE,
|
||||
],
|
||||
["docker", "network", "create", network],
|
||||
check=True,
|
||||
capture_output=True,
|
||||
)
|
||||
try:
|
||||
port = _host_port(name)
|
||||
_wait_ready(port)
|
||||
yield port
|
||||
subprocess.run(
|
||||
[
|
||||
"docker", "run", "-d", "--rm",
|
||||
"--name", registry_name,
|
||||
"--network", network,
|
||||
# `-p :5000` (no IP prefix) binds the container's
|
||||
# port 5000 on a random host port across all
|
||||
# interfaces. The host side reaches the registry
|
||||
# via this port — smolvm's `pack create` pulls from
|
||||
# `localhost:<port>` and the docker port-forward
|
||||
# routes there.
|
||||
"-p", _REGISTRY_CONTAINER_PORT,
|
||||
REGISTRY_IMAGE,
|
||||
],
|
||||
check=True,
|
||||
capture_output=True,
|
||||
)
|
||||
try:
|
||||
port = _host_port(registry_name)
|
||||
_wait_ready(port)
|
||||
yield RegistryHandle(
|
||||
network=network,
|
||||
push_endpoint=f"{registry_name}:{_REGISTRY_CONTAINER_PORT}",
|
||||
pull_endpoint=f"localhost:{port}",
|
||||
)
|
||||
finally:
|
||||
subprocess.run(
|
||||
["docker", "rm", "-f", registry_name],
|
||||
check=False,
|
||||
capture_output=True,
|
||||
)
|
||||
finally:
|
||||
subprocess.run(
|
||||
["docker", "rm", "-f", name],
|
||||
["docker", "network", "rm", network],
|
||||
check=False,
|
||||
capture_output=True,
|
||||
)
|
||||
|
||||
|
||||
def _host_port(name: str) -> int:
|
||||
"""Resolve the host-side port docker mapped to the registry's
|
||||
container port 5000. `docker port <name> 5000/tcp` returns one or
|
||||
more `host:port` lines; the loopback-only -p binding ensures we
|
||||
get exactly `127.0.0.1:<port>`."""
|
||||
def crane_push_tarball(handle: RegistryHandle, tarball_path: str, ref: str) -> None:
|
||||
"""Run `crane push --insecure <tarball> <ref>` inside a one-shot
|
||||
container on the registry's docker network. `ref` should
|
||||
reference the registry by `handle.push_endpoint` so the crane
|
||||
container resolves it via docker network DNS.
|
||||
|
||||
Doesn't go through `docker push` to avoid the Docker-Desktop
|
||||
daemon's HTTPS preference for non-loopback hostnames — crane's
|
||||
`--insecure` flag forces plain HTTP, which is what the
|
||||
registry container speaks."""
|
||||
r = subprocess.run(
|
||||
["docker", "port", name, "5000/tcp"],
|
||||
[
|
||||
"docker", "run", "--rm",
|
||||
"--network", handle.network,
|
||||
"-v", f"{tarball_path}:/img.tar:ro",
|
||||
CRANE_IMAGE,
|
||||
"push", "--insecure", "/img.tar", ref,
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False,
|
||||
)
|
||||
if r.returncode != 0:
|
||||
die(
|
||||
f"docker port {name} 5000/tcp failed: "
|
||||
f"crane push of {tarball_path!r} to {ref!r} failed: "
|
||||
f"{(r.stderr or r.stdout or '').strip() or '<no output>'}"
|
||||
)
|
||||
|
||||
|
||||
def _host_port(name: str) -> int:
|
||||
"""Resolve the host-side port docker mapped to the registry's
|
||||
container port. `docker port <name> 5000/tcp` returns one or
|
||||
more `host:port` lines (one per address family) — we take the
|
||||
first."""
|
||||
r = subprocess.run(
|
||||
["docker", "port", name, f"{_REGISTRY_CONTAINER_PORT}/tcp"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False,
|
||||
)
|
||||
if r.returncode != 0:
|
||||
die(
|
||||
f"docker port {name} {_REGISTRY_CONTAINER_PORT}/tcp failed: "
|
||||
f"{(r.stderr or '').strip() or '<no stderr>'}"
|
||||
)
|
||||
# `127.0.0.1:54321\n` — split on the last colon to handle the
|
||||
# `host:port` shape without parsing IP literals.
|
||||
# `0.0.0.0:54321\n[::]:54321\n` — split on the last colon to
|
||||
# handle either IPv4 or IPv6 host syntax.
|
||||
line = (r.stdout or "").splitlines()[0].strip()
|
||||
_, _, port_str = line.rpartition(":")
|
||||
try:
|
||||
@@ -102,12 +209,15 @@ def _host_port(name: str) -> int:
|
||||
|
||||
|
||||
def _wait_ready(port: int) -> None:
|
||||
"""Block until the registry's HTTP layer accepts a TCP connection
|
||||
on `127.0.0.1:<port>`, or `_READY_TIMEOUT_S` elapses.
|
||||
"""Block until the registry's HTTP layer accepts a TCP
|
||||
connection on `127.0.0.1:<port>`, or `_READY_TIMEOUT_S`
|
||||
elapses.
|
||||
|
||||
A successful TCP connect is sufficient — registry:2.8.3 binds
|
||||
after it's ready to serve `/v2/` requests, so the push that
|
||||
follows will land on a working server."""
|
||||
follows will land on a working server. We probe loopback
|
||||
specifically (not via the docker network) because this helper
|
||||
runs on the host."""
|
||||
deadline = time.monotonic() + _READY_TIMEOUT_S
|
||||
last_err: Exception | None = None
|
||||
while time.monotonic() < deadline:
|
||||
|
||||
@@ -34,7 +34,7 @@ from ...pipelock import PipelockProxy
|
||||
from ...supervise import Supervise
|
||||
from . import smolvm as _smolvm
|
||||
from .bottle_plan import SmolmachinesBottlePlan
|
||||
from .local_registry import ephemeral_registry
|
||||
from .local_registry import crane_push_tarball, ephemeral_registry
|
||||
from .util import smolmachines_bundle_subnet, smolmachines_preflight
|
||||
|
||||
|
||||
@@ -89,22 +89,23 @@ def resolve_plan(
|
||||
|
||||
subnet, gateway, bundle_ip = smolmachines_bundle_subnet(slug)
|
||||
|
||||
# Agent's env. IP literals; no DNS resolution inside the guest
|
||||
# (TSI allowlist contains only `<bundle_ip>/32` — no resolver).
|
||||
# Agent's env: the prepare-time view doesn't yet know the
|
||||
# host loopback ports the bundle's daemons get published on
|
||||
# (those come from docker AFTER `docker run` returns), so
|
||||
# HTTPS_PROXY / GIT_GATE_URL / MCP_SUPERVISE_URL are
|
||||
# populated in launch.py and stamped onto guest_env there.
|
||||
# What we set here is the part that doesn't depend on
|
||||
# bundle bringup — bottle.env literals, the empty-NO_PROXY
|
||||
# safe default, and the TLS trust env trio
|
||||
# (NODE_EXTRA_CA_CERTS / SSL_CERT_FILE / REQUESTS_CA_BUNDLE)
|
||||
# pointing at Debian's update-ca-certificates output bundle.
|
||||
guest_env: dict[str, str] = {
|
||||
**bottle.env,
|
||||
"HTTPS_PROXY": f"http://{bundle_ip}:{_BUNDLE_PIPELOCK_PORT}",
|
||||
"HTTP_PROXY": f"http://{bundle_ip}:{_BUNDLE_PIPELOCK_PORT}",
|
||||
"NO_PROXY": "localhost,127.0.0.1",
|
||||
"NODE_EXTRA_CA_CERTS": "/etc/ssl/certs/ca-certificates.crt",
|
||||
"SSL_CERT_FILE": "/etc/ssl/certs/ca-certificates.crt",
|
||||
"REQUESTS_CA_BUNDLE": "/etc/ssl/certs/ca-certificates.crt",
|
||||
}
|
||||
if bottle.git:
|
||||
guest_env["GIT_GATE_URL"] = (
|
||||
f"git://{bundle_ip}:{_BUNDLE_GIT_GATE_PORT}"
|
||||
)
|
||||
if bottle.supervise:
|
||||
guest_env["MCP_SUPERVISE_URL"] = (
|
||||
f"http://{bundle_ip}:{_BUNDLE_SUPERVISE_PORT}"
|
||||
)
|
||||
|
||||
# Inner Plans for the four bundle daemons. The ABCs are
|
||||
# platform-neutral — `.prepare()` writes config files + returns
|
||||
@@ -124,6 +125,19 @@ def resolve_plan(
|
||||
egress_dir.mkdir(parents=True, exist_ok=True)
|
||||
egress_plan = Egress().prepare(bottle, slug, egress_dir)
|
||||
|
||||
# Claude-code refuses to start without *something* it
|
||||
# recognises as a credential. When the bottle has an egress
|
||||
# route carrying the `claude_code_oauth` role marker, egress
|
||||
# strips + re-injects the real Authorization header on the
|
||||
# outbound leg using a token held in egress's own environ — so
|
||||
# the agent gets a non-secret placeholder here (matches the
|
||||
# docker backend's forwarded_env logic in
|
||||
# claude_bottle/backend/docker/prepare.py).
|
||||
if any("claude_code_oauth" in r.roles for r in egress_plan.routes):
|
||||
guest_env["CLAUDE_CODE_OAUTH_TOKEN"] = "egress-placeholder"
|
||||
guest_env.setdefault("CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC", "1")
|
||||
guest_env.setdefault("DISABLE_ERROR_REPORTING", "1")
|
||||
|
||||
supervise_plan = None
|
||||
if bottle.supervise:
|
||||
supervise_dir = supervise_state_dir(slug)
|
||||
@@ -184,14 +198,19 @@ def _ensure_smolmachine(image_ref: str) -> Path:
|
||||
a launcher binary at `.smolmachine` plus the sidecar alongside
|
||||
it; the sidecar is the actual artifact).
|
||||
|
||||
Conversion path: `docker build` (the existing layer cache makes
|
||||
no-change rebuilds cheap) → `docker tag` with a
|
||||
`localhost:<port>/...` ref → bring up the ephemeral registry
|
||||
container → `docker push` into it → `smolvm pack create --image
|
||||
<localhost ref>` → tear down the registry. Each pack-create
|
||||
costs several seconds even on a hot cache, so we skip the whole
|
||||
pipeline when the cached sidecar is already on disk for this
|
||||
image ID."""
|
||||
Conversion path: `docker build` (the existing layer cache
|
||||
makes no-change rebuilds cheap) → `docker save` to a tarball
|
||||
→ spin up an ephemeral registry on a private docker network →
|
||||
`crane push --insecure` from a one-shot container on the same
|
||||
network → `smolvm pack create --image localhost:<host port>/...`
|
||||
→ tear down the registry + network. The crane push detour
|
||||
sidesteps the Docker-Desktop daemon's HTTPS preference for
|
||||
non-loopback registries — see the `local_registry` module
|
||||
docstring for the gory details.
|
||||
|
||||
Each pack-create costs several seconds even on a hot cache,
|
||||
so we skip the whole pipeline when the cached sidecar is
|
||||
already on disk for this image ID."""
|
||||
_SMOLMACHINE_CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
docker_mod.build_image(image_ref, _REPO_DIR)
|
||||
# `sha256:abcd...` -> `abcd...` first 16 chars: short enough to
|
||||
@@ -202,9 +221,17 @@ def _ensure_smolmachine(image_ref: str) -> Path:
|
||||
sidecar = _SMOLMACHINE_CACHE_DIR / f"{digest}.smolmachine.smolmachine"
|
||||
if sidecar.is_file():
|
||||
return sidecar
|
||||
with ephemeral_registry() as port:
|
||||
local_ref = f"localhost:{port}/claude-bottle:{digest}"
|
||||
docker_mod.tag(image_ref, local_ref)
|
||||
docker_mod.push(local_ref)
|
||||
_smolvm.pack_create(local_ref, binary)
|
||||
tarball = _SMOLMACHINE_CACHE_DIR / f"{digest}.image.tar"
|
||||
docker_mod.save(image_ref, str(tarball))
|
||||
try:
|
||||
with ephemeral_registry() as handle:
|
||||
push_ref = f"{handle.push_endpoint}/claude-bottle:{digest}"
|
||||
pack_ref = f"{handle.pull_endpoint}/claude-bottle:{digest}"
|
||||
crane_push_tarball(handle, str(tarball), push_ref)
|
||||
_smolvm.pack_create(pack_ref, binary)
|
||||
finally:
|
||||
# Tarball is ~500MB-1GB for the agent image; reclaim once
|
||||
# the smolmachine artifact exists. The artifact itself is
|
||||
# the long-lived cache entry.
|
||||
tarball.unlink(missing_ok=True)
|
||||
return sidecar
|
||||
|
||||
@@ -66,8 +66,29 @@ def provision_ca(plan: SmolmachinesBottlePlan, target: str) -> None:
|
||||
# default. The env trio (NODE_EXTRA_CA_CERTS / SSL_CERT_FILE /
|
||||
# REQUESTS_CA_BUNDLE) on the guest_env covers Node + Python
|
||||
# `requests` / libraries that don't load the system bundle.
|
||||
_smolvm.machine_exec(target, ["chmod", "644", AGENT_CA_PATH])
|
||||
_smolvm.machine_exec(target, ["update-ca-certificates"])
|
||||
#
|
||||
# chown + chmod + update-ca-certificates run in one
|
||||
# `sh -c` so we only pay one machine_exec round trip; the
|
||||
# `&&` chaining surfaces the first failure as the return
|
||||
# code.
|
||||
r = _smolvm.machine_exec(target, [
|
||||
"sh", "-c",
|
||||
f"chown root:root {AGENT_CA_PATH} && "
|
||||
f"chmod 644 {AGENT_CA_PATH} && "
|
||||
f"update-ca-certificates",
|
||||
])
|
||||
if r.returncode != 0 or "1 added" not in (r.stdout or ""):
|
||||
# update-ca-certificates not adding our cert is fatal —
|
||||
# claude-code's TLS handshake against the egress-MITM'd
|
||||
# api.anthropic.com would fail downstream. Bail early
|
||||
# with what we can see (output is captured by smolvm so
|
||||
# we can surface it).
|
||||
die(
|
||||
f"update-ca-certificates didn't add the agent CA "
|
||||
f"(exit {r.returncode}): "
|
||||
f"stdout={(r.stdout or '').strip()!r} "
|
||||
f"stderr={(r.stderr or '').strip()!r}"
|
||||
)
|
||||
|
||||
# Stdlib SHA-256 of the cert's DER bytes — the standard
|
||||
# fingerprint form. Never the private key.
|
||||
|
||||
@@ -28,7 +28,6 @@ from pathlib import Path
|
||||
|
||||
from ....git_gate import git_gate_render_gitconfig
|
||||
from ....log import info
|
||||
from ...docker.git_gate import GIT_GATE_PORT
|
||||
from .. import smolvm as _smolvm
|
||||
from ..bottle_plan import SmolmachinesBottlePlan
|
||||
|
||||
@@ -79,10 +78,12 @@ def _provision_git_gate_config(plan: SmolmachinesBottlePlan, target: str) -> Non
|
||||
if not bottle.git:
|
||||
return
|
||||
|
||||
# IP-literal form: the TSI allowlist passes <bundle_ip>/32 and
|
||||
# nothing else, so the agent has to dial the gate by IP+port.
|
||||
gate_host = f"{plan.bundle_ip}:{GIT_GATE_PORT}"
|
||||
content = git_gate_render_gitconfig(bottle.git, gate_host)
|
||||
# `127.0.0.1:<host port>` form: the bundle's git-gate port
|
||||
# is published on host loopback at launch time so the
|
||||
# smolvm guest (which can only reach macOS networking via
|
||||
# TSI, not the docker bridge IP) can dial it. launch.py
|
||||
# populates `plan.agent_git_gate_host` after bundle bringup.
|
||||
content = git_gate_render_gitconfig(bottle.git, plan.agent_git_gate_host)
|
||||
|
||||
guest_gitconfig = f"{_guest_home()}/.gitconfig"
|
||||
# Stage the file under the plan's stage_dir so `machine cp`
|
||||
|
||||
@@ -14,7 +14,6 @@ short `supervise` alias (no DNS in the TSI-allowlisted guest)."""
|
||||
from __future__ import annotations
|
||||
|
||||
from ....log import info, warn
|
||||
from ....supervise import SUPERVISE_PORT
|
||||
from .. import smolvm as _smolvm
|
||||
from ..bottle_plan import SmolmachinesBottlePlan
|
||||
|
||||
@@ -22,25 +21,33 @@ from ..bottle_plan import SmolmachinesBottlePlan
|
||||
_SUPERVISE_MCP_NAME = "supervise"
|
||||
|
||||
|
||||
def supervise_mcp_url(bundle_ip: str) -> str:
|
||||
return f"http://{bundle_ip}:{SUPERVISE_PORT}/"
|
||||
|
||||
|
||||
def provision_supervise(plan: SmolmachinesBottlePlan, target: str) -> None:
|
||||
"""Run `claude mcp add` inside the guest to register the
|
||||
supervise sidecar in claude-code's user config. No-op when
|
||||
bottle.supervise is False.
|
||||
|
||||
The URL is the agent-side endpoint launch.py populated after
|
||||
bundle bringup — `http://127.0.0.1:<host port>/` rather than
|
||||
the bundle's docker bridge IP, because that bridge isn't
|
||||
reachable from the smolvm guest on macOS.
|
||||
|
||||
Failure is logged but not fatal: the bottle still works (you
|
||||
just can't call supervise tools from the agent until the entry
|
||||
is added manually). The operator sees the warning at launch."""
|
||||
if plan.supervise_plan is None:
|
||||
return
|
||||
url = supervise_mcp_url(plan.bundle_ip)
|
||||
url = plan.agent_supervise_url
|
||||
info(f"registering supervise MCP server in agent claude config → {url}")
|
||||
# `claude mcp add --scope user` writes to ~/.claude.json. The
|
||||
# agent is the `node` user; smolvm machine_exec runs as root
|
||||
# by default, so we have to switch user explicitly and set
|
||||
# HOME so the config lands in /home/node/.claude.json (where
|
||||
# the agent's claude actually reads it from).
|
||||
r = _smolvm.machine_exec(
|
||||
target,
|
||||
[
|
||||
"runuser", "-u", "node", "--",
|
||||
"env", "HOME=/home/node",
|
||||
"claude", "mcp", "add",
|
||||
"--scope", "user",
|
||||
"--transport", "http",
|
||||
@@ -57,4 +64,4 @@ def provision_supervise(plan: SmolmachinesBottlePlan, target: str) -> None:
|
||||
)
|
||||
|
||||
|
||||
__all__ = ["provision_supervise", "supervise_mcp_url"]
|
||||
__all__ = ["provision_supervise"]
|
||||
|
||||
@@ -70,6 +70,13 @@ class BundleLaunchSpec:
|
||||
environment: Sequence[str] = field(default_factory=tuple)
|
||||
# (host_path, container_path, read_only) bind mounts.
|
||||
volumes: Sequence[tuple[str, str, bool]] = field(default_factory=tuple)
|
||||
# Container ports to publish on the host's 127.0.0.1, random
|
||||
# host-side port per entry. The smolvm guest's TSI talks via
|
||||
# macOS networking, so docker container IPs (192.168.x.x in
|
||||
# the daemon's bridge) aren't directly reachable from the
|
||||
# guest — host-loopback port-forwards are. Egress's port
|
||||
# is bundle-internal and never published.
|
||||
ports_to_publish: Sequence[int] = field(default_factory=tuple)
|
||||
|
||||
|
||||
def create_bundle_network(network_name: str, subnet: str, gateway: str) -> None:
|
||||
@@ -135,6 +142,11 @@ def start_bundle(spec: BundleLaunchSpec, *,
|
||||
for host_path, container_path, read_only in spec.volumes:
|
||||
suffix = ":ro" if read_only else ""
|
||||
argv += ["-v", f"{host_path}:{container_path}{suffix}"]
|
||||
# Loopback-only host port-forwards — the smolvm guest's TSI
|
||||
# uses macOS networking, and macOS loopback is the only host
|
||||
# surface that round-trips into Docker Desktop's daemon VM.
|
||||
for port in spec.ports_to_publish:
|
||||
argv += ["-p", f"127.0.0.1::{port}"]
|
||||
argv.append(spec.image)
|
||||
result = subprocess.run(
|
||||
argv, capture_output=True, text=True,
|
||||
@@ -147,6 +159,33 @@ def start_bundle(spec: BundleLaunchSpec, *,
|
||||
)
|
||||
|
||||
|
||||
def bundle_host_port(slug: str, container_port: int) -> int:
|
||||
"""`docker port <bundle> <container_port>/tcp` → the random
|
||||
host-side port docker assigned. Called after `start_bundle`
|
||||
on each container port listed in `BundleLaunchSpec
|
||||
.ports_to_publish` so the launch step can build the agent's
|
||||
HTTPS_PROXY / GIT_GATE / SUPERVISE URLs in
|
||||
`127.0.0.1:<host port>` form."""
|
||||
container = bundle_container_name(slug)
|
||||
result = subprocess.run(
|
||||
["docker", "port", container, f"{container_port}/tcp"],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
die(
|
||||
f"docker port {container} {container_port}/tcp failed: "
|
||||
f"{(result.stderr or '').strip() or '<no stderr>'}"
|
||||
)
|
||||
# `127.0.0.1:54321\n` — rpartition on last colon gives the port.
|
||||
line = (result.stdout or "").splitlines()[0].strip()
|
||||
_, _, port_str = line.rpartition(":")
|
||||
try:
|
||||
return int(port_str)
|
||||
except ValueError:
|
||||
die(f"unexpected `docker port` output: {line!r}")
|
||||
return -1 # unreachable; die() never returns
|
||||
|
||||
|
||||
def stop_bundle(slug: str) -> None:
|
||||
"""Idempotent: a missing container returns success."""
|
||||
container = bundle_container_name(slug)
|
||||
|
||||
@@ -117,12 +117,21 @@ def machine_create(
|
||||
Smolfile because `--from` and `--smolfile` are themselves
|
||||
mutually exclusive in smolvm 0.8.0 — and we want `--from`'s
|
||||
no-pull-at-start property. The flag form gives the same
|
||||
result without the Smolfile complication."""
|
||||
result without the Smolfile complication.
|
||||
|
||||
`--net` is sent explicitly when `allow_cidrs` is non-empty.
|
||||
smolvm 0.8.0's docs say `--allow-cidr` implies `--net`, but
|
||||
empirically the implication only fires when no `--from` is
|
||||
set — `--from PATH --allow-cidr X/32` silently produces a
|
||||
machine with `network: false` and no routes in the guest, so
|
||||
the agent can't reach the bundle's pinned IP."""
|
||||
args: list[str] = ["machine", "create"]
|
||||
if image is not None:
|
||||
args += ["--image", image]
|
||||
if from_path is not None:
|
||||
args += ["--from", str(from_path)]
|
||||
if allow_cidrs:
|
||||
args.append("--net")
|
||||
for cidr in allow_cidrs:
|
||||
args += ["--allow-cidr", cidr]
|
||||
if env:
|
||||
|
||||
@@ -600,6 +600,22 @@ PRD 0024's bundle image is a prerequisite — this PRD assumes
|
||||
the plan is to filter on a deterministic name prefix
|
||||
`claude-bottle-<slug>` + cross-reference with on-disk metadata
|
||||
under `state/<slug>/`.
|
||||
8. **Loopback scoping (Docker Desktop pivot).** The original
|
||||
design pinned the bundle at a docker bridge IP and set TSI's
|
||||
allowlist to `<bundle-ip>/32`. On Docker Desktop / macOS the
|
||||
daemon runs inside its own Linux VM, so bridge IPs aren't
|
||||
reachable from macOS networking — TSI's syscall impersonation
|
||||
can't reach them. Resolution: publish each agent-facing bundle
|
||||
port on host loopback (`-p 127.0.0.1::<port>`) and set TSI to
|
||||
`127.0.0.1/32`. **This widens the TSI allowlist to anything
|
||||
bound to macOS's loopback** — postgres, dev servers, other
|
||||
bottles' published ports, mDNSResponder, etc. The agent can't
|
||||
reach them by intent, but TSI can't filter by port. Follow-up
|
||||
to scope back: bind each bottle's bundle ports on a per-bottle
|
||||
loopback alias (e.g. `127.0.0.2` for bottle A, `127.0.0.3` for
|
||||
B) added via `ifconfig lo0 alias`, set TSI to that single /32.
|
||||
Needs sudo for alias setup; a small daemon-or-script we ship
|
||||
alongside the launcher could handle it.
|
||||
|
||||
## References
|
||||
|
||||
|
||||
@@ -54,26 +54,18 @@ class TestImageId(unittest.TestCase):
|
||||
self.assertIn("missing:tag", die.call_args.args[0])
|
||||
|
||||
|
||||
class TestTagPush(unittest.TestCase):
|
||||
def test_tag_runs_docker_tag(self):
|
||||
class TestSave(unittest.TestCase):
|
||||
def test_save_runs_docker_save(self):
|
||||
with patch.object(
|
||||
docker_mod.subprocess, "run", return_value=_ok(),
|
||||
) as run:
|
||||
docker_mod.tag("claude-bottle:latest", "localhost:5000/cb:abc")
|
||||
docker_mod.save("claude-bottle:latest", "/tmp/img.tar")
|
||||
argv = run.call_args.args[0]
|
||||
self.assertEqual(
|
||||
["docker", "tag", "claude-bottle:latest", "localhost:5000/cb:abc"],
|
||||
["docker", "save", "claude-bottle:latest", "-o", "/tmp/img.tar"],
|
||||
argv,
|
||||
)
|
||||
|
||||
def test_push_runs_docker_push(self):
|
||||
with patch.object(
|
||||
docker_mod.subprocess, "run", return_value=_ok(),
|
||||
) as run:
|
||||
docker_mod.push("localhost:5000/cb:abc")
|
||||
argv = run.call_args.args[0]
|
||||
self.assertEqual(["docker", "push", "localhost:5000/cb:abc"], argv)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
@@ -1,15 +1,16 @@
|
||||
"""Unit: ephemeral local-registry helper (PRD 0023 chunk 4c).
|
||||
|
||||
The helper brings up a `registry:2.8.3` container on a random
|
||||
loopback port, yields the port, and tears the container down on
|
||||
exit. Tests mock `subprocess.run` + `socket.create_connection` so
|
||||
they run without docker."""
|
||||
The helper brings up a `registry:2.8.3` container on a private
|
||||
docker network with a random host-side port, yields a
|
||||
`RegistryHandle`, and tears the container + network down on exit.
|
||||
Tests mock `subprocess.run` + `socket.create_connection` so they
|
||||
run without docker."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
import unittest
|
||||
from unittest.mock import call, patch
|
||||
from unittest.mock import patch
|
||||
|
||||
from claude_bottle.backend.smolmachines import local_registry
|
||||
|
||||
@@ -20,38 +21,81 @@ def _ok(stdout: str = "", stderr: str = "") -> subprocess.CompletedProcess:
|
||||
)
|
||||
|
||||
|
||||
def _fail(stderr: str = "boom") -> subprocess.CompletedProcess:
|
||||
return subprocess.CompletedProcess(
|
||||
args=[], returncode=1, stdout="", stderr=stderr,
|
||||
)
|
||||
|
||||
|
||||
# Run sequence per ephemeral_registry() call:
|
||||
# docker network create -> ok
|
||||
# docker run -d (registry) -> ok (container id)
|
||||
# docker port (host port) -> ok (mapping line)
|
||||
# docker rm -f (registry) -> ok (in finally)
|
||||
# docker network rm -> ok (in finally)
|
||||
def _stock_run_sequence(port_line: str = "0.0.0.0:54321\n"):
|
||||
return [
|
||||
_ok(), # docker network create
|
||||
_ok(stdout="<container-id>\n"), # docker run
|
||||
_ok(stdout=port_line), # docker port
|
||||
_ok(), # docker rm -f
|
||||
_ok(), # docker network rm
|
||||
]
|
||||
|
||||
|
||||
class TestEphemeralRegistry(unittest.TestCase):
|
||||
def test_yields_host_port_parsed_from_docker_port(self):
|
||||
# docker run + docker port + docker rm in that order; the
|
||||
# port command returns `127.0.0.1:54321` for the loopback
|
||||
# binding.
|
||||
def test_yields_handle_with_network_and_endpoints(self):
|
||||
with patch.object(
|
||||
local_registry.subprocess, "run",
|
||||
side_effect=[
|
||||
_ok(stdout="<container-id>\n"),
|
||||
_ok(stdout="127.0.0.1:54321\n"),
|
||||
_ok(),
|
||||
],
|
||||
side_effect=_stock_run_sequence(),
|
||||
) as run, patch.object(
|
||||
local_registry.socket, "create_connection",
|
||||
return_value=_FakeSocket(),
|
||||
):
|
||||
with local_registry.ephemeral_registry() as port:
|
||||
self.assertEqual(54321, port)
|
||||
with local_registry.ephemeral_registry() as handle:
|
||||
# push_endpoint points at the registry container by
|
||||
# its docker-network name on its container port.
|
||||
self.assertTrue(
|
||||
handle.push_endpoint.startswith(
|
||||
"claude-bottle-registry-"
|
||||
)
|
||||
)
|
||||
self.assertTrue(handle.push_endpoint.endswith(":5000"))
|
||||
# pull_endpoint is the host-side mapping for smolvm.
|
||||
self.assertEqual("localhost:54321", handle.pull_endpoint)
|
||||
# network name is the per-session bridge crane joins.
|
||||
self.assertTrue(
|
||||
handle.network.startswith("claude-bottle-registry-net-")
|
||||
)
|
||||
# docker network create + docker run + docker port + rm -f + network rm
|
||||
self.assertEqual(5, run.call_count)
|
||||
|
||||
# docker run, docker port, docker rm -f
|
||||
self.assertEqual(3, run.call_count)
|
||||
run_argv = run.call_args_list[0].args[0]
|
||||
self.assertEqual(["docker", "run"], run_argv[:2])
|
||||
self.assertIn("--rm", run_argv)
|
||||
# Loopback-only port binding so the registry isn't exposed
|
||||
# on the LAN even briefly.
|
||||
self.assertIn("127.0.0.1::5000", run_argv)
|
||||
|
||||
def test_force_removes_container_on_clean_exit(self):
|
||||
def test_registry_run_publishes_random_port_across_interfaces(self):
|
||||
with patch.object(
|
||||
local_registry.subprocess, "run",
|
||||
side_effect=[_ok(stdout="cid\n"), _ok(stdout="127.0.0.1:1234\n"), _ok()],
|
||||
side_effect=_stock_run_sequence(),
|
||||
) as run, patch.object(
|
||||
local_registry.socket, "create_connection",
|
||||
return_value=_FakeSocket(),
|
||||
):
|
||||
with local_registry.ephemeral_registry():
|
||||
pass
|
||||
# second call is the docker run for the registry
|
||||
run_argv = run.call_args_list[1].args[0]
|
||||
self.assertEqual(["docker", "run"], run_argv[:2])
|
||||
self.assertIn("--rm", run_argv)
|
||||
# `-p 5000` (no IP prefix) — needed so the host-published
|
||||
# port is reachable from BOTH the host (for smolvm) and the
|
||||
# docker daemon (for the docker port command to find it).
|
||||
self.assertIn("5000", run_argv)
|
||||
# And the registry is attached to the same per-session
|
||||
# network the crane push container joins.
|
||||
self.assertIn("--network", run_argv)
|
||||
|
||||
def test_force_removes_container_and_network_on_clean_exit(self):
|
||||
with patch.object(
|
||||
local_registry.subprocess, "run",
|
||||
side_effect=_stock_run_sequence(),
|
||||
) as run, patch.object(
|
||||
local_registry.socket, "create_connection",
|
||||
return_value=_FakeSocket(),
|
||||
@@ -59,14 +103,16 @@ class TestEphemeralRegistry(unittest.TestCase):
|
||||
with local_registry.ephemeral_registry():
|
||||
pass
|
||||
|
||||
# Last call is `docker rm -f <name>`.
|
||||
last_argv = run.call_args_list[-1].args[0]
|
||||
self.assertEqual(["docker", "rm", "-f"], last_argv[:3])
|
||||
# Last two calls are `docker rm -f <container>` then
|
||||
# `docker network rm <network>`.
|
||||
argvs = [c.args[0] for c in run.call_args_list]
|
||||
self.assertEqual(["docker", "rm", "-f"], argvs[-2][:3])
|
||||
self.assertEqual(["docker", "network", "rm"], argvs[-1][:3])
|
||||
|
||||
def test_force_removes_container_on_exception_inside_with(self):
|
||||
def test_force_removes_on_exception_inside_with(self):
|
||||
with patch.object(
|
||||
local_registry.subprocess, "run",
|
||||
side_effect=[_ok(stdout="cid\n"), _ok(stdout="127.0.0.1:1234\n"), _ok()],
|
||||
side_effect=_stock_run_sequence(),
|
||||
) as run, patch.object(
|
||||
local_registry.socket, "create_connection",
|
||||
return_value=_FakeSocket(),
|
||||
@@ -75,15 +121,15 @@ class TestEphemeralRegistry(unittest.TestCase):
|
||||
with local_registry.ephemeral_registry():
|
||||
raise RuntimeError("inside with")
|
||||
|
||||
# rm -f still ran on exception.
|
||||
last_argv = run.call_args_list[-1].args[0]
|
||||
self.assertEqual(["docker", "rm", "-f"], last_argv[:3])
|
||||
# Both teardowns still ran.
|
||||
argvs = [c.args[0] for c in run.call_args_list]
|
||||
self.assertEqual(["docker", "rm", "-f"], argvs[-2][:3])
|
||||
self.assertEqual(["docker", "network", "rm"], argvs[-1][:3])
|
||||
|
||||
def test_wait_ready_times_out_when_socket_never_connects(self):
|
||||
# Drop the timeout to a value that fits the test budget.
|
||||
def test_wait_ready_times_out(self):
|
||||
with patch.object(local_registry, "_READY_TIMEOUT_S", 0.1), patch.object(
|
||||
local_registry.subprocess, "run",
|
||||
side_effect=[_ok(stdout="cid\n"), _ok(stdout="127.0.0.1:1234\n"), _ok()],
|
||||
side_effect=_stock_run_sequence(),
|
||||
) as run, patch.object(
|
||||
local_registry.socket, "create_connection",
|
||||
side_effect=OSError("conn refused"),
|
||||
@@ -95,18 +141,27 @@ class TestEphemeralRegistry(unittest.TestCase):
|
||||
with local_registry.ephemeral_registry():
|
||||
self.fail("yield reached despite unreachable registry")
|
||||
die.assert_called_once()
|
||||
# rm -f still ran (cleanup goes through the finally block).
|
||||
last_argv = run.call_args_list[-1].args[0]
|
||||
self.assertEqual(["docker", "rm", "-f"], last_argv[:3])
|
||||
# Teardown still ran via the finally blocks.
|
||||
argvs = [c.args[0] for c in run.call_args_list]
|
||||
self.assertEqual(["docker", "rm", "-f"], argvs[-2][:3])
|
||||
self.assertEqual(["docker", "network", "rm"], argvs[-1][:3])
|
||||
|
||||
def test_unique_container_name_per_call(self):
|
||||
names: list[str] = []
|
||||
def test_unique_session_ids_per_call(self):
|
||||
sessions: list[tuple[str, str]] = []
|
||||
|
||||
def capture(argv, *a, **kw):
|
||||
if argv[:3] == ["docker", "network", "create"]:
|
||||
return _ok()
|
||||
if argv[:2] == ["docker", "run"]:
|
||||
names.append(argv[argv.index("--name") + 1])
|
||||
return _ok(stdout="cid\n" if argv[:2] == ["docker", "run"]
|
||||
else "127.0.0.1:1\n")
|
||||
# `--name <registry-name>` and `--network <net-name>`
|
||||
# both encode the session id.
|
||||
name = argv[argv.index("--name") + 1]
|
||||
network = argv[argv.index("--network") + 1]
|
||||
sessions.append((name, network))
|
||||
return _ok(stdout="cid\n")
|
||||
if argv[:2] == ["docker", "port"]:
|
||||
return _ok(stdout="0.0.0.0:1\n")
|
||||
return _ok()
|
||||
|
||||
with patch.object(
|
||||
local_registry.subprocess, "run", side_effect=capture,
|
||||
@@ -119,10 +174,64 @@ class TestEphemeralRegistry(unittest.TestCase):
|
||||
with local_registry.ephemeral_registry():
|
||||
pass
|
||||
|
||||
self.assertEqual(2, len(names))
|
||||
self.assertNotEqual(names[0], names[1])
|
||||
for n in names:
|
||||
self.assertTrue(n.startswith("claude-bottle-registry-"))
|
||||
self.assertEqual(2, len(sessions))
|
||||
self.assertNotEqual(sessions[0], sessions[1])
|
||||
|
||||
|
||||
class TestCranePushTarball(unittest.TestCase):
|
||||
def test_runs_crane_container_on_registry_network_with_insecure_flag(self):
|
||||
handle = local_registry.RegistryHandle(
|
||||
network="cb-registry-net-x",
|
||||
push_endpoint="cb-registry-x:5000",
|
||||
pull_endpoint="localhost:54321",
|
||||
)
|
||||
with patch.object(
|
||||
local_registry.subprocess, "run", return_value=_ok(),
|
||||
) as run:
|
||||
local_registry.crane_push_tarball(
|
||||
handle, "/tmp/img.tar", "cb-registry-x:5000/cb:abc",
|
||||
)
|
||||
|
||||
argv = run.call_args.args[0]
|
||||
# Joined to the same docker network so it can reach the
|
||||
# registry by container name (no host port-forward needed
|
||||
# for the push leg).
|
||||
self.assertEqual("docker", argv[0])
|
||||
self.assertEqual("run", argv[1])
|
||||
self.assertIn("--rm", argv)
|
||||
self.assertIn("--network", argv)
|
||||
self.assertEqual(
|
||||
"cb-registry-net-x", argv[argv.index("--network") + 1],
|
||||
)
|
||||
# The tarball is mounted read-only at /img.tar.
|
||||
self.assertIn("-v", argv)
|
||||
self.assertIn("/tmp/img.tar:/img.tar:ro", argv)
|
||||
# And the crane command itself uses --insecure so plain
|
||||
# HTTP is allowed against the registry container.
|
||||
self.assertIn("push", argv)
|
||||
self.assertIn("--insecure", argv)
|
||||
self.assertIn("/img.tar", argv)
|
||||
self.assertIn("cb-registry-x:5000/cb:abc", argv)
|
||||
|
||||
def test_dies_when_crane_returns_non_zero(self):
|
||||
handle = local_registry.RegistryHandle(
|
||||
network="cb-net", push_endpoint="cb:5000", pull_endpoint="localhost:1",
|
||||
)
|
||||
with patch.object(
|
||||
local_registry.subprocess, "run",
|
||||
return_value=_fail("push failed"),
|
||||
), patch.object(
|
||||
local_registry, "die", side_effect=SystemExit("die"),
|
||||
) as die:
|
||||
with self.assertRaises(SystemExit):
|
||||
local_registry.crane_push_tarball(
|
||||
handle, "/tmp/img.tar", "cb:5000/cb:abc",
|
||||
)
|
||||
die.assert_called_once()
|
||||
# Error message names what was being pushed where.
|
||||
msg = die.call_args.args[0]
|
||||
self.assertIn("/tmp/img.tar", msg)
|
||||
self.assertIn("cb:5000/cb:abc", msg)
|
||||
|
||||
|
||||
class _FakeSocket:
|
||||
|
||||
@@ -40,32 +40,43 @@ class TestEnsureSmolmachine(unittest.TestCase):
|
||||
_prepare.docker_mod, "image_id",
|
||||
return_value=f"sha256:{digest}fffffffffffffffff",
|
||||
), patch.object(
|
||||
_prepare.docker_mod, "save",
|
||||
) as save, patch.object(
|
||||
_prepare, "ephemeral_registry",
|
||||
) as registry, patch.object(
|
||||
_prepare.docker_mod, "tag",
|
||||
) as tag, patch.object(
|
||||
_prepare.docker_mod, "push",
|
||||
_prepare, "crane_push_tarball",
|
||||
) as push, patch.object(
|
||||
_prepare._smolvm, "pack_create",
|
||||
) as pack:
|
||||
result = _prepare._ensure_smolmachine("claude-bottle:latest")
|
||||
|
||||
self.assertEqual(sidecar, result)
|
||||
# build still runs (Dockerfile edits land without manual rmi)
|
||||
# build still runs (Dockerfile edits land without manual rmi).
|
||||
build.assert_called_once()
|
||||
# No registry, no tag, no push, no pack on cache hit.
|
||||
# No save (500MB tarball), no registry, no push, no pack on
|
||||
# cache hit.
|
||||
save.assert_not_called()
|
||||
registry.assert_not_called()
|
||||
tag.assert_not_called()
|
||||
push.assert_not_called()
|
||||
pack.assert_not_called()
|
||||
|
||||
def test_cache_miss_runs_build_tag_push_pack_in_order(self):
|
||||
def test_cache_miss_runs_build_save_push_pack_in_order(self):
|
||||
digest = "0123456789abcdef"
|
||||
|
||||
# ephemeral_registry is a context manager yielding the port.
|
||||
# ephemeral_registry yields a RegistryHandle with the
|
||||
# docker network + a push endpoint (container DNS) and
|
||||
# pull endpoint (host port-forward).
|
||||
from claude_bottle.backend.smolmachines.local_registry import (
|
||||
RegistryHandle,
|
||||
)
|
||||
|
||||
class _Reg:
|
||||
def __enter__(self_inner):
|
||||
return 54321
|
||||
return RegistryHandle(
|
||||
network="cb-net-xyz",
|
||||
push_endpoint="cb-registry-xyz:5000",
|
||||
pull_endpoint="localhost:54321",
|
||||
)
|
||||
def __exit__(self_inner, *exc):
|
||||
return False
|
||||
|
||||
@@ -83,13 +94,13 @@ class TestEnsureSmolmachine(unittest.TestCase):
|
||||
_prepare.docker_mod, "image_id",
|
||||
return_value=f"sha256:{digest}fffffffffffffffff",
|
||||
), patch.object(
|
||||
_prepare.docker_mod, "save",
|
||||
side_effect=record("save"),
|
||||
) as save, patch.object(
|
||||
_prepare, "ephemeral_registry",
|
||||
return_value=_Reg(),
|
||||
), patch.object(
|
||||
_prepare.docker_mod, "tag",
|
||||
side_effect=record("tag"),
|
||||
) as tag, patch.object(
|
||||
_prepare.docker_mod, "push",
|
||||
_prepare, "crane_push_tarball",
|
||||
side_effect=record("push"),
|
||||
) as push, patch.object(
|
||||
_prepare._smolvm, "pack_create",
|
||||
@@ -97,23 +108,31 @@ class TestEnsureSmolmachine(unittest.TestCase):
|
||||
) as pack:
|
||||
_prepare._ensure_smolmachine("claude-bottle:latest")
|
||||
|
||||
# build first (no point pushing if the build fails), then
|
||||
# tag → push → pack against the registry port.
|
||||
self.assertEqual(["build", "tag", "push", "pack"], calls)
|
||||
# Build → save → push → pack in that order. No `docker
|
||||
# push` (the daemon's HTTPS-by-default path is what we're
|
||||
# sidestepping).
|
||||
self.assertEqual(["build", "save", "push", "pack"], calls)
|
||||
|
||||
# tag goes from the source ref to a localhost:<port> ref
|
||||
# with the digest as the tag suffix (so different builds
|
||||
# land on different tags in the registry).
|
||||
tag_args = tag.call_args.args
|
||||
self.assertEqual("claude-bottle:latest", tag_args[0])
|
||||
self.assertEqual(f"localhost:54321/claude-bottle:{digest}", tag_args[1])
|
||||
# push targets the same localhost ref tag picks.
|
||||
# docker save targets a per-digest tarball alongside the
|
||||
# cached sidecar.
|
||||
save_args = save.call_args.args
|
||||
self.assertEqual("claude-bottle:latest", save_args[0])
|
||||
self.assertTrue(save_args[1].endswith(f"{digest}.image.tar"))
|
||||
|
||||
# crane push runs against the push_endpoint (container DNS
|
||||
# on the registry network) with the digest as the tag.
|
||||
push_args = push.call_args.args
|
||||
self.assertEqual(f"localhost:54321/claude-bottle:{digest}", push_args[0])
|
||||
# pack_create reads from the registry ref, writes the
|
||||
# binary alongside the cached sidecar.
|
||||
self.assertEqual(
|
||||
f"cb-registry-xyz:5000/claude-bottle:{digest}", push_args[2],
|
||||
)
|
||||
|
||||
# pack_create reads from the pull_endpoint (host port-
|
||||
# forward, smolvm is on the host). Same repo+tag, just a
|
||||
# different routing hostname — the registry stores one blob.
|
||||
pack_args = pack.call_args.args
|
||||
self.assertEqual(f"localhost:54321/claude-bottle:{digest}", pack_args[0])
|
||||
self.assertEqual(
|
||||
f"localhost:54321/claude-bottle:{digest}", pack_args[0],
|
||||
)
|
||||
self.assertTrue(str(pack_args[1]).endswith(f"{digest}.smolmachine"))
|
||||
|
||||
|
||||
|
||||
@@ -44,6 +44,8 @@ def _plan(
|
||||
pipelock_ca_path: Path = Path(),
|
||||
supervise: bool = False,
|
||||
bundle_ip: str = "192.168.50.2",
|
||||
agent_git_gate_host: str = "127.0.0.1:55555",
|
||||
agent_supervise_url: str = "http://127.0.0.1:55556/",
|
||||
) -> SmolmachinesBottlePlan:
|
||||
bottle_json: dict = {}
|
||||
if git:
|
||||
@@ -111,6 +113,8 @@ def _plan(
|
||||
mitmproxy_ca_cert_only_host_path=egress_ca_path,
|
||||
),
|
||||
supervise_plan=supervise_plan,
|
||||
agent_git_gate_host=agent_git_gate_host,
|
||||
agent_supervise_url=agent_supervise_url,
|
||||
)
|
||||
|
||||
|
||||
@@ -303,21 +307,38 @@ class TestProvisionCA(unittest.TestCase):
|
||||
def tearDown(self):
|
||||
self._tmp.cleanup()
|
||||
|
||||
# provision_ca dies hard if update-ca-certificates' stdout
|
||||
# doesn't include "1 added"; supply a stock success return
|
||||
# so the bulk of the tests below exercise the happy path.
|
||||
_UPDATE_OK = SmolvmRunResult(
|
||||
returncode=0,
|
||||
stdout="Updating certificates in /etc/ssl/certs...\n1 added, 0 removed; done.\n",
|
||||
stderr="",
|
||||
)
|
||||
|
||||
def test_pipelock_path_when_no_routes(self):
|
||||
plan = _plan(pipelock_ca_path=self.pipelock_ca)
|
||||
with patch(
|
||||
"claude_bottle.backend.smolmachines.provision.ca._smolvm.machine_cp"
|
||||
) as cp, patch(
|
||||
"claude_bottle.backend.smolmachines.provision.ca._smolvm.machine_exec"
|
||||
"claude_bottle.backend.smolmachines.provision.ca._smolvm.machine_exec",
|
||||
return_value=self._UPDATE_OK,
|
||||
) as ex:
|
||||
_ca.provision_ca(plan, "claude-bottle-demo-abc12")
|
||||
cp.assert_called_once_with(
|
||||
str(self.pipelock_ca),
|
||||
"claude-bottle-demo-abc12:" + _ca.AGENT_CA_PATH,
|
||||
)
|
||||
argvs = [c.args[1] for c in ex.call_args_list]
|
||||
self.assertIn(["chmod", "644", _ca.AGENT_CA_PATH], argvs)
|
||||
self.assertIn(["update-ca-certificates"], argvs)
|
||||
# chmod + chown + update-ca-certificates are now folded
|
||||
# into one `sh -c` invocation (working around a smolvm
|
||||
# exec warm-up SIGKILL race), so we look at the single
|
||||
# exec's argv rather than expecting separate calls.
|
||||
ex.assert_called_once()
|
||||
argv = ex.call_args.args[1]
|
||||
self.assertEqual("sh", argv[0])
|
||||
self.assertEqual("-c", argv[1])
|
||||
self.assertIn("chmod 644", argv[2])
|
||||
self.assertIn("update-ca-certificates", argv[2])
|
||||
|
||||
def test_egress_path_when_routes_declared(self):
|
||||
plan = _plan(
|
||||
@@ -328,7 +349,8 @@ class TestProvisionCA(unittest.TestCase):
|
||||
with patch(
|
||||
"claude_bottle.backend.smolmachines.provision.ca._smolvm.machine_cp"
|
||||
) as cp, patch(
|
||||
"claude_bottle.backend.smolmachines.provision.ca._smolvm.machine_exec"
|
||||
"claude_bottle.backend.smolmachines.provision.ca._smolvm.machine_exec",
|
||||
return_value=self._UPDATE_OK,
|
||||
):
|
||||
_ca.provision_ca(plan, "claude-bottle-demo-abc12")
|
||||
# When routes are declared, egress is the agent's first hop,
|
||||
@@ -412,9 +434,10 @@ class TestProvisionGit(unittest.TestCase):
|
||||
cp.assert_not_called()
|
||||
|
||||
def test_writes_gitconfig_with_ip_port_form_for_smolmachines(self):
|
||||
# Smolmachines's TSI-allowlisted guest has no DNS resolver,
|
||||
# so the insteadOf URL has to be IP+port rather than the
|
||||
# docker backend's `git-gate` short alias.
|
||||
# Smolmachines's TSI-allowlisted guest dials git-gate via
|
||||
# `127.0.0.1:<host port>` — the bundle's git-gate port is
|
||||
# published on host loopback at launch time, and the plan
|
||||
# carries the discovered host port (here mocked to 9418).
|
||||
plan = _plan(
|
||||
git=[GitEntry(
|
||||
Name="claude-bottle",
|
||||
@@ -422,7 +445,7 @@ class TestProvisionGit(unittest.TestCase):
|
||||
IdentityFile="~/.ssh/id_ed25519",
|
||||
)],
|
||||
stage_dir=self.stage,
|
||||
bundle_ip="192.168.99.2",
|
||||
agent_git_gate_host="127.0.0.1:9418",
|
||||
)
|
||||
with patch(
|
||||
"claude_bottle.backend.smolmachines.provision.git._smolvm.machine_cp"
|
||||
@@ -437,7 +460,7 @@ class TestProvisionGit(unittest.TestCase):
|
||||
self.assertEqual(self.stage, staged_path.parent)
|
||||
content = staged_path.read_text()
|
||||
self.assertIn(
|
||||
'[url "git://192.168.99.2:9418/claude-bottle.git"]', content,
|
||||
'[url "git://127.0.0.1:9418/claude-bottle.git"]', content,
|
||||
)
|
||||
self.assertIn(
|
||||
"\tinsteadOf = ssh://git@host/repo.git", content,
|
||||
@@ -453,7 +476,10 @@ class TestProvisionSupervise(unittest.TestCase):
|
||||
ex.assert_not_called()
|
||||
|
||||
def test_calls_claude_mcp_add_when_supervise_enabled(self):
|
||||
plan = _plan(supervise=True, bundle_ip="192.168.50.2")
|
||||
plan = _plan(
|
||||
supervise=True,
|
||||
agent_supervise_url="http://127.0.0.1:9100/",
|
||||
)
|
||||
with patch(
|
||||
"claude_bottle.backend.smolmachines.provision.supervise._smolvm.machine_exec",
|
||||
return_value=SmolvmRunResult(returncode=0, stdout="", stderr=""),
|
||||
@@ -461,14 +487,20 @@ class TestProvisionSupervise(unittest.TestCase):
|
||||
_supervise.provision_supervise(plan, "claude-bottle-demo-abc12")
|
||||
ex.assert_called_once()
|
||||
argv = ex.call_args.args[1]
|
||||
# claude mcp add --scope user --transport http supervise <url>
|
||||
# `claude mcp add --scope user` writes to ~/.claude.json,
|
||||
# and the agent is the `node` user — switch UID + set
|
||||
# HOME so the config lands in /home/node/.claude.json,
|
||||
# not root's. URL is the agent-side endpoint (host
|
||||
# loopback + discovered port), not the docker bridge IP.
|
||||
self.assertEqual(
|
||||
[
|
||||
"runuser", "-u", "node", "--",
|
||||
"env", "HOME=/home/node",
|
||||
"claude", "mcp", "add",
|
||||
"--scope", "user",
|
||||
"--transport", "http",
|
||||
"supervise",
|
||||
"http://192.168.50.2:9100/",
|
||||
"http://127.0.0.1:9100/",
|
||||
],
|
||||
argv,
|
||||
)
|
||||
|
||||
@@ -82,12 +82,20 @@ class TestArgvShapes(unittest.TestCase):
|
||||
self.assertEqual("smolvm", argv[0])
|
||||
self.assertIn("--from", argv)
|
||||
self.assertIn("/stage/agent.smolmachine", argv)
|
||||
# `--net` is explicit because smolvm 0.8.0's implied-net
|
||||
# from --allow-cidr doesn't fire when --from is set.
|
||||
self.assertIn("--net", argv)
|
||||
self.assertIn("--allow-cidr", argv)
|
||||
self.assertIn("192.168.50.2/32", argv)
|
||||
self.assertIn("-e", argv)
|
||||
self.assertIn("HTTPS_PROXY=http://192.168.50.2:8888", argv)
|
||||
self.assertEqual("agent-xyz", argv[-1])
|
||||
|
||||
def test_machine_create_omits_net_when_no_allow_cidrs(self):
|
||||
with self._patch_run() as m:
|
||||
machine_create("agent-xyz", from_path=Path("/x.smolmachine"))
|
||||
self.assertNotIn("--net", m.call_args.args[0])
|
||||
|
||||
def test_machine_start_uses_dash_name(self):
|
||||
# `start` is the --name flag form, NOT positional.
|
||||
with self._patch_run() as m:
|
||||
|
||||
Reference in New Issue
Block a user