From 4f136a9932ead5d432765b78bff27052077a604e Mon Sep 17 00:00:00 2001 From: claude Date: Wed, 27 May 2026 15:31:44 -0400 Subject: [PATCH] fix(smolmachines): agent dials bundle via host loopback ports, not docker bridge IP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Claude hung on outbound network calls under CLAUDE_BOTTLE_BACKEND=smolmachines: Unable to connect to API (FailedToOpenSocket) Root cause: the PRD-0023 design pinned the bundle at a docker bridge IP (192.168.X.2) and set the smolvm guest's TSI allowlist to `/32`. On native Linux this works — host shares the docker bridge's network namespace, TSI's syscall impersonation reaches the bridge IP directly. On Docker Desktop (macOS), the daemon runs in its own Linux VM and docker bridge IPs aren't reachable from macOS networking, so the smolvm guest's TSI requests die "Network is unreachable" before they hit pipelock. Fix: publish each agent-facing bundle daemon's port on host loopback (-p 127.0.0.1::PORT), discover the random host-side ports after start, and route the agent through `127.0.0.1:` instead of the bridge IP. macOS loopback is the surface Docker Desktop's gvproxy forwards into the daemon's VM, so the chain (guest TSI -> macOS loopback -> daemon VM port-forward -> bundle container) works on both Docker Desktop and native Linux. Concrete changes: - BundleLaunchSpec: add `ports_to_publish` so start_bundle adds `-p 127.0.0.1::PORT` for the agent-facing ports (pipelock always; git-gate when upstreams declared; supervise when enabled). Egress's port stays bundle-internal. - sidecar_bundle.bundle_host_port(): wrap `docker port /tcp` so launch can look up the random host-side mapping after start. - launch.py: discover the host ports, build URLs of the form `http://127.0.0.1:` / `git://127.0.0.1:`, stamp onto guest_env + new agent_*_url fields on the plan. - launch.py: TSI allow_cidrs flips to `["127.0.0.1/32"]`. The bundle IP is no longer the agent's target. - prepare.py: stop synthesizing HTTPS_PROXY / GIT_GATE_URL / MCP_SUPERVISE_URL at prepare time — launch owns those now (the values depend on a port docker hasn't assigned yet). - provision_git: gate_host from plan.agent_git_gate_host. - provision_supervise: URL from plan.agent_supervise_url. End-to-end verified on Docker Desktop / macOS: guest dials pipelock through TSI, pipelock forwards to api.anthropic.com, the API responds with 401 (i.e. it received the request). Co-Authored-By: Claude Opus 4.7 --- .../backend/smolmachines/bottle_plan.py | 11 +++ claude_bottle/backend/smolmachines/launch.py | 89 +++++++++++++++++-- claude_bottle/backend/smolmachines/prepare.py | 26 +++--- .../backend/smolmachines/provision/git.py | 11 +-- .../smolmachines/provision/supervise.py | 14 +-- .../backend/smolmachines/sidecar_bundle.py | 39 ++++++++ tests/unit/test_smolmachines_provision.py | 24 +++-- 7 files changed, 170 insertions(+), 44 deletions(-) diff --git a/claude_bottle/backend/smolmachines/bottle_plan.py b/claude_bottle/backend/smolmachines/bottle_plan.py index 801be03..c515002 100644 --- a/claude_bottle/backend/smolmachines/bottle_plan.py +++ b/claude_bottle/backend/smolmachines/bottle_plan.py @@ -75,6 +75,17 @@ class SmolmachinesBottlePlan(BottlePlan): # None when bottle.supervise is False, matching the docker # backend's convention. supervise_plan: SupervisePlan | None + # Agent-side endpoints. On Docker Desktop the docker bridge + # IPs aren't reachable from the smolvm guest (TSI uses macOS + # networking; docker container IPs live in the daemon's VM), + # so the agent dials the bundle via host loopback + + # docker-published random ports. Empty at prepare time; + # launch populates these after bundle bringup via + # `dataclasses.replace`. Format: a `host:port` for git-gate + # (insteadOf URL prefix) + full URLs for proxy / supervise. + agent_proxy_url: str = "" + agent_git_gate_host: str = "" + agent_supervise_url: str = "" def print(self, *, remote_control: bool) -> None: """Compact y/N preflight. Same shape as the Docker diff --git a/claude_bottle/backend/smolmachines/launch.py b/claude_bottle/backend/smolmachines/launch.py index 6f1839c..8b3f409 100644 --- a/claude_bottle/backend/smolmachines/launch.py +++ b/claude_bottle/backend/smolmachines/launch.py @@ -41,14 +41,28 @@ from ..docker.git_gate import ( GIT_GATE_CREDS_DIR_IN_CONTAINER, GIT_GATE_ENTRYPOINT_IN_CONTAINER, GIT_GATE_HOOK_IN_CONTAINER, + GIT_GATE_PORT as _GIT_GATE_PORT, +) +from ..docker.pipelock import ( + BUNDLE_LOCAL_PIPELOCK_URL, + PIPELOCK_PORT as _PIPELOCK_PORT_STR, + pipelock_tls_init, ) -from ..docker.pipelock import BUNDLE_LOCAL_PIPELOCK_URL, pipelock_tls_init from . import sidecar_bundle as _bundle from . import smolvm as _smolvm from .bottle import SmolmachinesBottle from .bottle_plan import SmolmachinesBottlePlan +# Container-internal listening ports for each bundle daemon. The +# bundle publishes each one on a random host loopback port (see +# `_bundle.start_bundle`), and `_bundle.bundle_host_port` looks +# them up post-start. Pipelock's port is an env-overridable string +# in docker.pipelock; coerce to int here. +_PIPELOCK_PORT = int(_PIPELOCK_PORT_STR) +_SUPERVISE_PORT = SUPERVISE_PORT + + @contextmanager def launch( plan: SmolmachinesBottlePlan, @@ -96,28 +110,74 @@ def launch( ) # 3. Build the BundleLaunchSpec from the (now-resolved) - # inner Plans: daemon subset, env, bind-mounts. + # inner Plans: daemon subset, env, bind-mounts. The spec's + # ports_to_publish list expands depending on which daemons + # the agent needs to reach from the smolvm guest. bundle_spec = _bundle_launch_spec(plan, network) token_env = _resolve_token_env(plan, os.environ) _bundle.start_bundle(bundle_spec, env={**os.environ, **token_env}) stack.callback(_bundle.stop_bundle, plan.slug) - # 4. smolvm VM. --from carries the pre-packed .smolmachine + # 4. Discover the host-side ports docker assigned for the + # bundle's published container ports, and bind the + # agent's URLs to `127.0.0.1:`. Docker container + # IPs (192.168.x.x in the daemon's bridge) aren't + # reachable from the smolvm guest on macOS — TSI uses + # macOS networking, and macOS sees the daemon's bridge + # via the published-port loopback forward only. + pipelock_host_port = _bundle.bundle_host_port(plan.slug, _PIPELOCK_PORT) + agent_proxy_url = f"http://127.0.0.1:{pipelock_host_port}" + agent_git_gate_host = "" + if plan.git_gate_plan.upstreams: + git_gate_host_port = _bundle.bundle_host_port( + plan.slug, _GIT_GATE_PORT, + ) + agent_git_gate_host = f"127.0.0.1:{git_gate_host_port}" + agent_supervise_url = "" + if plan.supervise_plan is not None: + supervise_host_port = _bundle.bundle_host_port( + plan.slug, _SUPERVISE_PORT, + ) + agent_supervise_url = f"http://127.0.0.1:{supervise_host_port}/" + + # Stamp the URLs onto the plan + guest_env. provision_git + # and provision_supervise read the plan fields; the agent + # reads guest_env on every exec_claude. + guest_env = { + **plan.guest_env, + "HTTPS_PROXY": agent_proxy_url, + "HTTP_PROXY": agent_proxy_url, + } + if agent_git_gate_host: + guest_env["GIT_GATE_URL"] = f"git://{agent_git_gate_host}" + if agent_supervise_url: + guest_env["MCP_SUPERVISE_URL"] = agent_supervise_url + plan = dataclasses.replace( + plan, + guest_env=guest_env, + agent_proxy_url=agent_proxy_url, + agent_git_gate_host=agent_git_gate_host, + agent_supervise_url=agent_supervise_url, + ) + + # 5. smolvm VM. --from carries the pre-packed .smolmachine # artifact (built by prepare); --allow-cidr + -e carry the - # per-bottle TSI allowlist + env. Smolfile isn't usable - # here — smolvm 0.8.0 makes `--from` and `--smolfile` - # mutually exclusive. + # per-bottle TSI allowlist + env. The allowlist is + # `127.0.0.1/32` because every bundle daemon the agent + # reaches is fronted by a host loopback port-forward. + # Smolfile isn't usable here — smolvm 0.8.0 makes `--from` + # and `--smolfile` mutually exclusive. _smolvm.machine_create( plan.machine_name, from_path=plan.agent_from_path, - allow_cidrs=[f"{plan.bundle_ip}/32"], + allow_cidrs=["127.0.0.1/32"], env=plan.guest_env, ) stack.callback(_smolvm.machine_delete, plan.machine_name) _smolvm.machine_start(plan.machine_name) stack.callback(_smolvm.machine_stop, plan.machine_name) - # 5. Reclaim /home/node for the node user. smolvm's pack + # 6. Reclaim /home/node for the node user. smolvm's pack # process remaps OCI-layer ownership to the host invoker's # uid (501 on macOS) rather than preserving the image's # uid 1000 — so without this chown, node can't write its @@ -129,7 +189,7 @@ def launch( ["chown", "-R", "node:node", "/home/node"], ) - # 6. Provision (CA / prompt / skills / git / supervise). + # 7. Provision (CA / prompt / skills / git / supervise). prompt_path = provision(plan, plan.machine_name) yield SmolmachinesBottle( @@ -217,6 +277,16 @@ def _bundle_launch_spec( ] volumes.append((str(sp.queue_dir), QUEUE_DIR_IN_CONTAINER, False)) + # Container ports the agent reaches from the smolvm guest — + # published on host loopback so the guest can dial via TSI + + # macOS networking. Egress is bundle-internal and never + # published. + ports_to_publish: list[int] = [_PIPELOCK_PORT] + if gp.upstreams: + ports_to_publish.append(_GIT_GATE_PORT) + if sp is not None: + ports_to_publish.append(_SUPERVISE_PORT) + return _bundle.BundleLaunchSpec( slug=plan.slug, network_name=network, @@ -226,6 +296,7 @@ def _bundle_launch_spec( daemons_csv=",".join(daemons), environment=tuple(env), volumes=tuple(volumes), + ports_to_publish=tuple(ports_to_publish), ) diff --git a/claude_bottle/backend/smolmachines/prepare.py b/claude_bottle/backend/smolmachines/prepare.py index 5718bf8..fd5b4a3 100644 --- a/claude_bottle/backend/smolmachines/prepare.py +++ b/claude_bottle/backend/smolmachines/prepare.py @@ -89,29 +89,23 @@ def resolve_plan( subnet, gateway, bundle_ip = smolmachines_bundle_subnet(slug) - # Agent's env. IP literals; no DNS resolution inside the guest - # (TSI allowlist contains only `/32` — no resolver). - # TLS trust env trio (NODE_EXTRA_CA_CERTS / SSL_CERT_FILE / - # REQUESTS_CA_BUNDLE) points at Debian's - # update-ca-certificates output bundle — provision_ca writes - # the per-bottle MITM CA there at launch time. + # Agent's env: the prepare-time view doesn't yet know the + # host loopback ports the bundle's daemons get published on + # (those come from docker AFTER `docker run` returns), so + # HTTPS_PROXY / GIT_GATE_URL / MCP_SUPERVISE_URL are + # populated in launch.py and stamped onto guest_env there. + # What we set here is the part that doesn't depend on + # bundle bringup — bottle.env literals, the empty-NO_PROXY + # safe default, and the TLS trust env trio + # (NODE_EXTRA_CA_CERTS / SSL_CERT_FILE / REQUESTS_CA_BUNDLE) + # pointing at Debian's update-ca-certificates output bundle. guest_env: dict[str, str] = { **bottle.env, - "HTTPS_PROXY": f"http://{bundle_ip}:{_BUNDLE_PIPELOCK_PORT}", - "HTTP_PROXY": f"http://{bundle_ip}:{_BUNDLE_PIPELOCK_PORT}", "NO_PROXY": "localhost,127.0.0.1", "NODE_EXTRA_CA_CERTS": "/etc/ssl/certs/ca-certificates.crt", "SSL_CERT_FILE": "/etc/ssl/certs/ca-certificates.crt", "REQUESTS_CA_BUNDLE": "/etc/ssl/certs/ca-certificates.crt", } - if bottle.git: - guest_env["GIT_GATE_URL"] = ( - f"git://{bundle_ip}:{_BUNDLE_GIT_GATE_PORT}" - ) - if bottle.supervise: - guest_env["MCP_SUPERVISE_URL"] = ( - f"http://{bundle_ip}:{_BUNDLE_SUPERVISE_PORT}" - ) # Inner Plans for the four bundle daemons. The ABCs are # platform-neutral — `.prepare()` writes config files + returns diff --git a/claude_bottle/backend/smolmachines/provision/git.py b/claude_bottle/backend/smolmachines/provision/git.py index 65ec15d..dc3fefb 100644 --- a/claude_bottle/backend/smolmachines/provision/git.py +++ b/claude_bottle/backend/smolmachines/provision/git.py @@ -28,7 +28,6 @@ from pathlib import Path from ....git_gate import git_gate_render_gitconfig from ....log import info -from ...docker.git_gate import GIT_GATE_PORT from .. import smolvm as _smolvm from ..bottle_plan import SmolmachinesBottlePlan @@ -79,10 +78,12 @@ def _provision_git_gate_config(plan: SmolmachinesBottlePlan, target: str) -> Non if not bottle.git: return - # IP-literal form: the TSI allowlist passes /32 and - # nothing else, so the agent has to dial the gate by IP+port. - gate_host = f"{plan.bundle_ip}:{GIT_GATE_PORT}" - content = git_gate_render_gitconfig(bottle.git, gate_host) + # `127.0.0.1:` form: the bundle's git-gate port + # is published on host loopback at launch time so the + # smolvm guest (which can only reach macOS networking via + # TSI, not the docker bridge IP) can dial it. launch.py + # populates `plan.agent_git_gate_host` after bundle bringup. + content = git_gate_render_gitconfig(bottle.git, plan.agent_git_gate_host) guest_gitconfig = f"{_guest_home()}/.gitconfig" # Stage the file under the plan's stage_dir so `machine cp` diff --git a/claude_bottle/backend/smolmachines/provision/supervise.py b/claude_bottle/backend/smolmachines/provision/supervise.py index 1ac95d0..724dafe 100644 --- a/claude_bottle/backend/smolmachines/provision/supervise.py +++ b/claude_bottle/backend/smolmachines/provision/supervise.py @@ -14,7 +14,6 @@ short `supervise` alias (no DNS in the TSI-allowlisted guest).""" from __future__ import annotations from ....log import info, warn -from ....supervise import SUPERVISE_PORT from .. import smolvm as _smolvm from ..bottle_plan import SmolmachinesBottlePlan @@ -22,21 +21,22 @@ from ..bottle_plan import SmolmachinesBottlePlan _SUPERVISE_MCP_NAME = "supervise" -def supervise_mcp_url(bundle_ip: str) -> str: - return f"http://{bundle_ip}:{SUPERVISE_PORT}/" - - def provision_supervise(plan: SmolmachinesBottlePlan, target: str) -> None: """Run `claude mcp add` inside the guest to register the supervise sidecar in claude-code's user config. No-op when bottle.supervise is False. + The URL is the agent-side endpoint launch.py populated after + bundle bringup — `http://127.0.0.1:/` rather than + the bundle's docker bridge IP, because that bridge isn't + reachable from the smolvm guest on macOS. + Failure is logged but not fatal: the bottle still works (you just can't call supervise tools from the agent until the entry is added manually). The operator sees the warning at launch.""" if plan.supervise_plan is None: return - url = supervise_mcp_url(plan.bundle_ip) + url = plan.agent_supervise_url info(f"registering supervise MCP server in agent claude config → {url}") r = _smolvm.machine_exec( target, @@ -57,4 +57,4 @@ def provision_supervise(plan: SmolmachinesBottlePlan, target: str) -> None: ) -__all__ = ["provision_supervise", "supervise_mcp_url"] +__all__ = ["provision_supervise"] diff --git a/claude_bottle/backend/smolmachines/sidecar_bundle.py b/claude_bottle/backend/smolmachines/sidecar_bundle.py index 010ee5b..286692c 100644 --- a/claude_bottle/backend/smolmachines/sidecar_bundle.py +++ b/claude_bottle/backend/smolmachines/sidecar_bundle.py @@ -70,6 +70,13 @@ class BundleLaunchSpec: environment: Sequence[str] = field(default_factory=tuple) # (host_path, container_path, read_only) bind mounts. volumes: Sequence[tuple[str, str, bool]] = field(default_factory=tuple) + # Container ports to publish on the host's 127.0.0.1, random + # host-side port per entry. The smolvm guest's TSI talks via + # macOS networking, so docker container IPs (192.168.x.x in + # the daemon's bridge) aren't directly reachable from the + # guest — host-loopback port-forwards are. Egress's port + # is bundle-internal and never published. + ports_to_publish: Sequence[int] = field(default_factory=tuple) def create_bundle_network(network_name: str, subnet: str, gateway: str) -> None: @@ -135,6 +142,11 @@ def start_bundle(spec: BundleLaunchSpec, *, for host_path, container_path, read_only in spec.volumes: suffix = ":ro" if read_only else "" argv += ["-v", f"{host_path}:{container_path}{suffix}"] + # Loopback-only host port-forwards — the smolvm guest's TSI + # uses macOS networking, and macOS loopback is the only host + # surface that round-trips into Docker Desktop's daemon VM. + for port in spec.ports_to_publish: + argv += ["-p", f"127.0.0.1::{port}"] argv.append(spec.image) result = subprocess.run( argv, capture_output=True, text=True, @@ -147,6 +159,33 @@ def start_bundle(spec: BundleLaunchSpec, *, ) +def bundle_host_port(slug: str, container_port: int) -> int: + """`docker port /tcp` → the random + host-side port docker assigned. Called after `start_bundle` + on each container port listed in `BundleLaunchSpec + .ports_to_publish` so the launch step can build the agent's + HTTPS_PROXY / GIT_GATE / SUPERVISE URLs in + `127.0.0.1:` form.""" + container = bundle_container_name(slug) + result = subprocess.run( + ["docker", "port", container, f"{container_port}/tcp"], + capture_output=True, text=True, check=False, + ) + if result.returncode != 0: + die( + f"docker port {container} {container_port}/tcp failed: " + f"{(result.stderr or '').strip() or ''}" + ) + # `127.0.0.1:54321\n` — rpartition on last colon gives the port. + line = (result.stdout or "").splitlines()[0].strip() + _, _, port_str = line.rpartition(":") + try: + return int(port_str) + except ValueError: + die(f"unexpected `docker port` output: {line!r}") + return -1 # unreachable; die() never returns + + def stop_bundle(slug: str) -> None: """Idempotent: a missing container returns success.""" container = bundle_container_name(slug) diff --git a/tests/unit/test_smolmachines_provision.py b/tests/unit/test_smolmachines_provision.py index a4dd52a..c3780a7 100644 --- a/tests/unit/test_smolmachines_provision.py +++ b/tests/unit/test_smolmachines_provision.py @@ -44,6 +44,8 @@ def _plan( pipelock_ca_path: Path = Path(), supervise: bool = False, bundle_ip: str = "192.168.50.2", + agent_git_gate_host: str = "127.0.0.1:55555", + agent_supervise_url: str = "http://127.0.0.1:55556/", ) -> SmolmachinesBottlePlan: bottle_json: dict = {} if git: @@ -111,6 +113,8 @@ def _plan( mitmproxy_ca_cert_only_host_path=egress_ca_path, ), supervise_plan=supervise_plan, + agent_git_gate_host=agent_git_gate_host, + agent_supervise_url=agent_supervise_url, ) @@ -412,9 +416,10 @@ class TestProvisionGit(unittest.TestCase): cp.assert_not_called() def test_writes_gitconfig_with_ip_port_form_for_smolmachines(self): - # Smolmachines's TSI-allowlisted guest has no DNS resolver, - # so the insteadOf URL has to be IP+port rather than the - # docker backend's `git-gate` short alias. + # Smolmachines's TSI-allowlisted guest dials git-gate via + # `127.0.0.1:` — the bundle's git-gate port is + # published on host loopback at launch time, and the plan + # carries the discovered host port (here mocked to 9418). plan = _plan( git=[GitEntry( Name="claude-bottle", @@ -422,7 +427,7 @@ class TestProvisionGit(unittest.TestCase): IdentityFile="~/.ssh/id_ed25519", )], stage_dir=self.stage, - bundle_ip="192.168.99.2", + agent_git_gate_host="127.0.0.1:9418", ) with patch( "claude_bottle.backend.smolmachines.provision.git._smolvm.machine_cp" @@ -437,7 +442,7 @@ class TestProvisionGit(unittest.TestCase): self.assertEqual(self.stage, staged_path.parent) content = staged_path.read_text() self.assertIn( - '[url "git://192.168.99.2:9418/claude-bottle.git"]', content, + '[url "git://127.0.0.1:9418/claude-bottle.git"]', content, ) self.assertIn( "\tinsteadOf = ssh://git@host/repo.git", content, @@ -453,7 +458,10 @@ class TestProvisionSupervise(unittest.TestCase): ex.assert_not_called() def test_calls_claude_mcp_add_when_supervise_enabled(self): - plan = _plan(supervise=True, bundle_ip="192.168.50.2") + plan = _plan( + supervise=True, + agent_supervise_url="http://127.0.0.1:9100/", + ) with patch( "claude_bottle.backend.smolmachines.provision.supervise._smolvm.machine_exec", return_value=SmolvmRunResult(returncode=0, stdout="", stderr=""), @@ -462,13 +470,15 @@ class TestProvisionSupervise(unittest.TestCase): ex.assert_called_once() argv = ex.call_args.args[1] # claude mcp add --scope user --transport http supervise + # — URL is the agent-side endpoint (host loopback + + # discovered port), not the docker bridge IP. self.assertEqual( [ "claude", "mcp", "add", "--scope", "user", "--transport", "http", "supervise", - "http://192.168.50.2:9100/", + "http://127.0.0.1:9100/", ], argv, )