fix(smolmachines): agent dials bundle via host loopback ports, not docker bridge IP
test / unit (pull_request) Successful in 26s
test / integration (pull_request) Successful in 39s

Claude hung on outbound network calls under
CLAUDE_BOTTLE_BACKEND=smolmachines:

  Unable to connect to API (FailedToOpenSocket)

Root cause: the PRD-0023 design pinned the bundle at a docker
bridge IP (192.168.X.2) and set the smolvm guest's TSI allowlist
to `<bundle-ip>/32`. On native Linux this works — host shares
the docker bridge's network namespace, TSI's syscall
impersonation reaches the bridge IP directly. On Docker Desktop
(macOS), the daemon runs in its own Linux VM and docker bridge
IPs aren't reachable from macOS networking, so the smolvm
guest's TSI requests die "Network is unreachable" before they
hit pipelock.

Fix: publish each agent-facing bundle daemon's port on host
loopback (-p 127.0.0.1::PORT), discover the random host-side
ports after start, and route the agent through
`127.0.0.1:<host port>` instead of the bridge IP. macOS loopback
is the surface Docker Desktop's gvproxy forwards into the
daemon's VM, so the chain (guest TSI -> macOS loopback ->
daemon VM port-forward -> bundle container) works on both
Docker Desktop and native Linux.

Concrete changes:
- BundleLaunchSpec: add `ports_to_publish` so start_bundle adds
  `-p 127.0.0.1::PORT` for the agent-facing ports (pipelock
  always; git-gate when upstreams declared; supervise when
  enabled). Egress's port stays bundle-internal.
- sidecar_bundle.bundle_host_port(): wrap `docker port <bundle>
  <container_port>/tcp` so launch can look up the random
  host-side mapping after start.
- launch.py: discover the host ports, build URLs of the form
  `http://127.0.0.1:<host port>` / `git://127.0.0.1:<host port>`,
  stamp onto guest_env + new agent_*_url fields on the plan.
- launch.py: TSI allow_cidrs flips to `["127.0.0.1/32"]`. The
  bundle IP is no longer the agent's target.
- prepare.py: stop synthesizing HTTPS_PROXY / GIT_GATE_URL /
  MCP_SUPERVISE_URL at prepare time — launch owns those now
  (the values depend on a port docker hasn't assigned yet).
- provision_git: gate_host from plan.agent_git_gate_host.
- provision_supervise: URL from plan.agent_supervise_url.

End-to-end verified on Docker Desktop / macOS: guest dials
pipelock through TSI, pipelock forwards to api.anthropic.com,
the API responds with 401 (i.e. it received the request).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-27 15:31:44 -04:00
parent da1e5e1ba8
commit 4f136a9932
7 changed files with 170 additions and 44 deletions
@@ -75,6 +75,17 @@ class SmolmachinesBottlePlan(BottlePlan):
# None when bottle.supervise is False, matching the docker
# backend's convention.
supervise_plan: SupervisePlan | None
# Agent-side endpoints. On Docker Desktop the docker bridge
# IPs aren't reachable from the smolvm guest (TSI uses macOS
# networking; docker container IPs live in the daemon's VM),
# so the agent dials the bundle via host loopback +
# docker-published random ports. Empty at prepare time;
# launch populates these after bundle bringup via
# `dataclasses.replace`. Format: a `host:port` for git-gate
# (insteadOf URL prefix) + full URLs for proxy / supervise.
agent_proxy_url: str = ""
agent_git_gate_host: str = ""
agent_supervise_url: str = ""
def print(self, *, remote_control: bool) -> None:
"""Compact y/N preflight. Same shape as the Docker
+80 -9
View File
@@ -41,14 +41,28 @@ from ..docker.git_gate import (
GIT_GATE_CREDS_DIR_IN_CONTAINER,
GIT_GATE_ENTRYPOINT_IN_CONTAINER,
GIT_GATE_HOOK_IN_CONTAINER,
GIT_GATE_PORT as _GIT_GATE_PORT,
)
from ..docker.pipelock import (
BUNDLE_LOCAL_PIPELOCK_URL,
PIPELOCK_PORT as _PIPELOCK_PORT_STR,
pipelock_tls_init,
)
from ..docker.pipelock import BUNDLE_LOCAL_PIPELOCK_URL, pipelock_tls_init
from . import sidecar_bundle as _bundle
from . import smolvm as _smolvm
from .bottle import SmolmachinesBottle
from .bottle_plan import SmolmachinesBottlePlan
# Container-internal listening ports for each bundle daemon. The
# bundle publishes each one on a random host loopback port (see
# `_bundle.start_bundle`), and `_bundle.bundle_host_port` looks
# them up post-start. Pipelock's port is an env-overridable string
# in docker.pipelock; coerce to int here.
_PIPELOCK_PORT = int(_PIPELOCK_PORT_STR)
_SUPERVISE_PORT = SUPERVISE_PORT
@contextmanager
def launch(
plan: SmolmachinesBottlePlan,
@@ -96,28 +110,74 @@ def launch(
)
# 3. Build the BundleLaunchSpec from the (now-resolved)
# inner Plans: daemon subset, env, bind-mounts.
# inner Plans: daemon subset, env, bind-mounts. The spec's
# ports_to_publish list expands depending on which daemons
# the agent needs to reach from the smolvm guest.
bundle_spec = _bundle_launch_spec(plan, network)
token_env = _resolve_token_env(plan, os.environ)
_bundle.start_bundle(bundle_spec, env={**os.environ, **token_env})
stack.callback(_bundle.stop_bundle, plan.slug)
# 4. smolvm VM. --from carries the pre-packed .smolmachine
# 4. Discover the host-side ports docker assigned for the
# bundle's published container ports, and bind the
# agent's URLs to `127.0.0.1:<host port>`. Docker container
# IPs (192.168.x.x in the daemon's bridge) aren't
# reachable from the smolvm guest on macOS — TSI uses
# macOS networking, and macOS sees the daemon's bridge
# via the published-port loopback forward only.
pipelock_host_port = _bundle.bundle_host_port(plan.slug, _PIPELOCK_PORT)
agent_proxy_url = f"http://127.0.0.1:{pipelock_host_port}"
agent_git_gate_host = ""
if plan.git_gate_plan.upstreams:
git_gate_host_port = _bundle.bundle_host_port(
plan.slug, _GIT_GATE_PORT,
)
agent_git_gate_host = f"127.0.0.1:{git_gate_host_port}"
agent_supervise_url = ""
if plan.supervise_plan is not None:
supervise_host_port = _bundle.bundle_host_port(
plan.slug, _SUPERVISE_PORT,
)
agent_supervise_url = f"http://127.0.0.1:{supervise_host_port}/"
# Stamp the URLs onto the plan + guest_env. provision_git
# and provision_supervise read the plan fields; the agent
# reads guest_env on every exec_claude.
guest_env = {
**plan.guest_env,
"HTTPS_PROXY": agent_proxy_url,
"HTTP_PROXY": agent_proxy_url,
}
if agent_git_gate_host:
guest_env["GIT_GATE_URL"] = f"git://{agent_git_gate_host}"
if agent_supervise_url:
guest_env["MCP_SUPERVISE_URL"] = agent_supervise_url
plan = dataclasses.replace(
plan,
guest_env=guest_env,
agent_proxy_url=agent_proxy_url,
agent_git_gate_host=agent_git_gate_host,
agent_supervise_url=agent_supervise_url,
)
# 5. smolvm VM. --from carries the pre-packed .smolmachine
# artifact (built by prepare); --allow-cidr + -e carry the
# per-bottle TSI allowlist + env. Smolfile isn't usable
# here — smolvm 0.8.0 makes `--from` and `--smolfile`
# mutually exclusive.
# per-bottle TSI allowlist + env. The allowlist is
# `127.0.0.1/32` because every bundle daemon the agent
# reaches is fronted by a host loopback port-forward.
# Smolfile isn't usable here — smolvm 0.8.0 makes `--from`
# and `--smolfile` mutually exclusive.
_smolvm.machine_create(
plan.machine_name,
from_path=plan.agent_from_path,
allow_cidrs=[f"{plan.bundle_ip}/32"],
allow_cidrs=["127.0.0.1/32"],
env=plan.guest_env,
)
stack.callback(_smolvm.machine_delete, plan.machine_name)
_smolvm.machine_start(plan.machine_name)
stack.callback(_smolvm.machine_stop, plan.machine_name)
# 5. Reclaim /home/node for the node user. smolvm's pack
# 6. Reclaim /home/node for the node user. smolvm's pack
# process remaps OCI-layer ownership to the host invoker's
# uid (501 on macOS) rather than preserving the image's
# uid 1000 — so without this chown, node can't write its
@@ -129,7 +189,7 @@ def launch(
["chown", "-R", "node:node", "/home/node"],
)
# 6. Provision (CA / prompt / skills / git / supervise).
# 7. Provision (CA / prompt / skills / git / supervise).
prompt_path = provision(plan, plan.machine_name)
yield SmolmachinesBottle(
@@ -217,6 +277,16 @@ def _bundle_launch_spec(
]
volumes.append((str(sp.queue_dir), QUEUE_DIR_IN_CONTAINER, False))
# Container ports the agent reaches from the smolvm guest —
# published on host loopback so the guest can dial via TSI +
# macOS networking. Egress is bundle-internal and never
# published.
ports_to_publish: list[int] = [_PIPELOCK_PORT]
if gp.upstreams:
ports_to_publish.append(_GIT_GATE_PORT)
if sp is not None:
ports_to_publish.append(_SUPERVISE_PORT)
return _bundle.BundleLaunchSpec(
slug=plan.slug,
network_name=network,
@@ -226,6 +296,7 @@ def _bundle_launch_spec(
daemons_csv=",".join(daemons),
environment=tuple(env),
volumes=tuple(volumes),
ports_to_publish=tuple(ports_to_publish),
)
+10 -16
View File
@@ -89,29 +89,23 @@ def resolve_plan(
subnet, gateway, bundle_ip = smolmachines_bundle_subnet(slug)
# Agent's env. IP literals; no DNS resolution inside the guest
# (TSI allowlist contains only `<bundle_ip>/32` — no resolver).
# TLS trust env trio (NODE_EXTRA_CA_CERTS / SSL_CERT_FILE /
# REQUESTS_CA_BUNDLE) points at Debian's
# update-ca-certificates output bundle — provision_ca writes
# the per-bottle MITM CA there at launch time.
# Agent's env: the prepare-time view doesn't yet know the
# host loopback ports the bundle's daemons get published on
# (those come from docker AFTER `docker run` returns), so
# HTTPS_PROXY / GIT_GATE_URL / MCP_SUPERVISE_URL are
# populated in launch.py and stamped onto guest_env there.
# What we set here is the part that doesn't depend on
# bundle bringup — bottle.env literals, the empty-NO_PROXY
# safe default, and the TLS trust env trio
# (NODE_EXTRA_CA_CERTS / SSL_CERT_FILE / REQUESTS_CA_BUNDLE)
# pointing at Debian's update-ca-certificates output bundle.
guest_env: dict[str, str] = {
**bottle.env,
"HTTPS_PROXY": f"http://{bundle_ip}:{_BUNDLE_PIPELOCK_PORT}",
"HTTP_PROXY": f"http://{bundle_ip}:{_BUNDLE_PIPELOCK_PORT}",
"NO_PROXY": "localhost,127.0.0.1",
"NODE_EXTRA_CA_CERTS": "/etc/ssl/certs/ca-certificates.crt",
"SSL_CERT_FILE": "/etc/ssl/certs/ca-certificates.crt",
"REQUESTS_CA_BUNDLE": "/etc/ssl/certs/ca-certificates.crt",
}
if bottle.git:
guest_env["GIT_GATE_URL"] = (
f"git://{bundle_ip}:{_BUNDLE_GIT_GATE_PORT}"
)
if bottle.supervise:
guest_env["MCP_SUPERVISE_URL"] = (
f"http://{bundle_ip}:{_BUNDLE_SUPERVISE_PORT}"
)
# Inner Plans for the four bundle daemons. The ABCs are
# platform-neutral — `.prepare()` writes config files + returns
@@ -28,7 +28,6 @@ from pathlib import Path
from ....git_gate import git_gate_render_gitconfig
from ....log import info
from ...docker.git_gate import GIT_GATE_PORT
from .. import smolvm as _smolvm
from ..bottle_plan import SmolmachinesBottlePlan
@@ -79,10 +78,12 @@ def _provision_git_gate_config(plan: SmolmachinesBottlePlan, target: str) -> Non
if not bottle.git:
return
# IP-literal form: the TSI allowlist passes <bundle_ip>/32 and
# nothing else, so the agent has to dial the gate by IP+port.
gate_host = f"{plan.bundle_ip}:{GIT_GATE_PORT}"
content = git_gate_render_gitconfig(bottle.git, gate_host)
# `127.0.0.1:<host port>` form: the bundle's git-gate port
# is published on host loopback at launch time so the
# smolvm guest (which can only reach macOS networking via
# TSI, not the docker bridge IP) can dial it. launch.py
# populates `plan.agent_git_gate_host` after bundle bringup.
content = git_gate_render_gitconfig(bottle.git, plan.agent_git_gate_host)
guest_gitconfig = f"{_guest_home()}/.gitconfig"
# Stage the file under the plan's stage_dir so `machine cp`
@@ -14,7 +14,6 @@ short `supervise` alias (no DNS in the TSI-allowlisted guest)."""
from __future__ import annotations
from ....log import info, warn
from ....supervise import SUPERVISE_PORT
from .. import smolvm as _smolvm
from ..bottle_plan import SmolmachinesBottlePlan
@@ -22,21 +21,22 @@ from ..bottle_plan import SmolmachinesBottlePlan
_SUPERVISE_MCP_NAME = "supervise"
def supervise_mcp_url(bundle_ip: str) -> str:
return f"http://{bundle_ip}:{SUPERVISE_PORT}/"
def provision_supervise(plan: SmolmachinesBottlePlan, target: str) -> None:
"""Run `claude mcp add` inside the guest to register the
supervise sidecar in claude-code's user config. No-op when
bottle.supervise is False.
The URL is the agent-side endpoint launch.py populated after
bundle bringup — `http://127.0.0.1:<host port>/` rather than
the bundle's docker bridge IP, because that bridge isn't
reachable from the smolvm guest on macOS.
Failure is logged but not fatal: the bottle still works (you
just can't call supervise tools from the agent until the entry
is added manually). The operator sees the warning at launch."""
if plan.supervise_plan is None:
return
url = supervise_mcp_url(plan.bundle_ip)
url = plan.agent_supervise_url
info(f"registering supervise MCP server in agent claude config → {url}")
r = _smolvm.machine_exec(
target,
@@ -57,4 +57,4 @@ def provision_supervise(plan: SmolmachinesBottlePlan, target: str) -> None:
)
__all__ = ["provision_supervise", "supervise_mcp_url"]
__all__ = ["provision_supervise"]
@@ -70,6 +70,13 @@ class BundleLaunchSpec:
environment: Sequence[str] = field(default_factory=tuple)
# (host_path, container_path, read_only) bind mounts.
volumes: Sequence[tuple[str, str, bool]] = field(default_factory=tuple)
# Container ports to publish on the host's 127.0.0.1, random
# host-side port per entry. The smolvm guest's TSI talks via
# macOS networking, so docker container IPs (192.168.x.x in
# the daemon's bridge) aren't directly reachable from the
# guest — host-loopback port-forwards are. Egress's port
# is bundle-internal and never published.
ports_to_publish: Sequence[int] = field(default_factory=tuple)
def create_bundle_network(network_name: str, subnet: str, gateway: str) -> None:
@@ -135,6 +142,11 @@ def start_bundle(spec: BundleLaunchSpec, *,
for host_path, container_path, read_only in spec.volumes:
suffix = ":ro" if read_only else ""
argv += ["-v", f"{host_path}:{container_path}{suffix}"]
# Loopback-only host port-forwards — the smolvm guest's TSI
# uses macOS networking, and macOS loopback is the only host
# surface that round-trips into Docker Desktop's daemon VM.
for port in spec.ports_to_publish:
argv += ["-p", f"127.0.0.1::{port}"]
argv.append(spec.image)
result = subprocess.run(
argv, capture_output=True, text=True,
@@ -147,6 +159,33 @@ def start_bundle(spec: BundleLaunchSpec, *,
)
def bundle_host_port(slug: str, container_port: int) -> int:
"""`docker port <bundle> <container_port>/tcp` → the random
host-side port docker assigned. Called after `start_bundle`
on each container port listed in `BundleLaunchSpec
.ports_to_publish` so the launch step can build the agent's
HTTPS_PROXY / GIT_GATE / SUPERVISE URLs in
`127.0.0.1:<host port>` form."""
container = bundle_container_name(slug)
result = subprocess.run(
["docker", "port", container, f"{container_port}/tcp"],
capture_output=True, text=True, check=False,
)
if result.returncode != 0:
die(
f"docker port {container} {container_port}/tcp failed: "
f"{(result.stderr or '').strip() or '<no stderr>'}"
)
# `127.0.0.1:54321\n` — rpartition on last colon gives the port.
line = (result.stdout or "").splitlines()[0].strip()
_, _, port_str = line.rpartition(":")
try:
return int(port_str)
except ValueError:
die(f"unexpected `docker port` output: {line!r}")
return -1 # unreachable; die() never returns
def stop_bundle(slug: str) -> None:
"""Idempotent: a missing container returns success."""
container = bundle_container_name(slug)
+17 -7
View File
@@ -44,6 +44,8 @@ def _plan(
pipelock_ca_path: Path = Path(),
supervise: bool = False,
bundle_ip: str = "192.168.50.2",
agent_git_gate_host: str = "127.0.0.1:55555",
agent_supervise_url: str = "http://127.0.0.1:55556/",
) -> SmolmachinesBottlePlan:
bottle_json: dict = {}
if git:
@@ -111,6 +113,8 @@ def _plan(
mitmproxy_ca_cert_only_host_path=egress_ca_path,
),
supervise_plan=supervise_plan,
agent_git_gate_host=agent_git_gate_host,
agent_supervise_url=agent_supervise_url,
)
@@ -412,9 +416,10 @@ class TestProvisionGit(unittest.TestCase):
cp.assert_not_called()
def test_writes_gitconfig_with_ip_port_form_for_smolmachines(self):
# Smolmachines's TSI-allowlisted guest has no DNS resolver,
# so the insteadOf URL has to be IP+port rather than the
# docker backend's `git-gate` short alias.
# Smolmachines's TSI-allowlisted guest dials git-gate via
# `127.0.0.1:<host port>` — the bundle's git-gate port is
# published on host loopback at launch time, and the plan
# carries the discovered host port (here mocked to 9418).
plan = _plan(
git=[GitEntry(
Name="claude-bottle",
@@ -422,7 +427,7 @@ class TestProvisionGit(unittest.TestCase):
IdentityFile="~/.ssh/id_ed25519",
)],
stage_dir=self.stage,
bundle_ip="192.168.99.2",
agent_git_gate_host="127.0.0.1:9418",
)
with patch(
"claude_bottle.backend.smolmachines.provision.git._smolvm.machine_cp"
@@ -437,7 +442,7 @@ class TestProvisionGit(unittest.TestCase):
self.assertEqual(self.stage, staged_path.parent)
content = staged_path.read_text()
self.assertIn(
'[url "git://192.168.99.2:9418/claude-bottle.git"]', content,
'[url "git://127.0.0.1:9418/claude-bottle.git"]', content,
)
self.assertIn(
"\tinsteadOf = ssh://git@host/repo.git", content,
@@ -453,7 +458,10 @@ class TestProvisionSupervise(unittest.TestCase):
ex.assert_not_called()
def test_calls_claude_mcp_add_when_supervise_enabled(self):
plan = _plan(supervise=True, bundle_ip="192.168.50.2")
plan = _plan(
supervise=True,
agent_supervise_url="http://127.0.0.1:9100/",
)
with patch(
"claude_bottle.backend.smolmachines.provision.supervise._smolvm.machine_exec",
return_value=SmolvmRunResult(returncode=0, stdout="", stderr=""),
@@ -462,13 +470,15 @@ class TestProvisionSupervise(unittest.TestCase):
ex.assert_called_once()
argv = ex.call_args.args[1]
# claude mcp add --scope user --transport http supervise <url>
# — URL is the agent-side endpoint (host loopback +
# discovered port), not the docker bridge IP.
self.assertEqual(
[
"claude", "mcp", "add",
"--scope", "user",
"--transport", "http",
"supervise",
"http://192.168.50.2:9100/",
"http://127.0.0.1:9100/",
],
argv,
)