fix(smolmachines): docker push fails on Docker Desktop — daemon-side route differs from host loopback #74

Merged
didericis-claude merged 13 commits from fix-local-registry-docker-desktop into main 2026-05-27 16:10:46 -04:00
3 changed files with 88 additions and 15 deletions
Showing only changes of commit 5486170be1 - Show all commits
+42 -8
View File
@@ -21,6 +21,7 @@ from __future__ import annotations
import dataclasses import dataclasses
import os import os
import time
from contextlib import ExitStack, contextmanager from contextlib import ExitStack, contextmanager
from typing import Callable, Generator from typing import Callable, Generator
@@ -34,6 +35,7 @@ from ...util import expand_tilde
from ..docker.egress import ( from ..docker.egress import (
EGRESS_CA_IN_CONTAINER, EGRESS_CA_IN_CONTAINER,
EGRESS_PIPELOCK_CA_IN_CONTAINER, EGRESS_PIPELOCK_CA_IN_CONTAINER,
EGRESS_PORT as _EGRESS_PORT,
egress_tls_init, egress_tls_init,
) )
from ..docker.git_gate import ( from ..docker.git_gate import (
@@ -125,8 +127,22 @@ def launch(
# reachable from the smolvm guest on macOS — TSI uses # reachable from the smolvm guest on macOS — TSI uses
# macOS networking, and macOS sees the daemon's bridge # macOS networking, and macOS sees the daemon's bridge
# via the published-port loopback forward only. # via the published-port loopback forward only.
pipelock_host_port = _bundle.bundle_host_port(plan.slug, _PIPELOCK_PORT) #
agent_proxy_url = f"http://127.0.0.1:{pipelock_host_port}" # Proxy hop order matches the docker backend: when the
# bottle declares egress routes, the agent's first hop is
# egress (for token injection), then pipelock. Without
# routes, the agent dials pipelock directly. Whichever
# one is "agent-facing" is the daemon whose port we
# publish on host loopback; the other stays bundle-
# internal as the upstream proxy.
if plan.egress_plan.routes:
agent_facing_port = _EGRESS_PORT
else:
agent_facing_port = _PIPELOCK_PORT
agent_facing_host_port = _bundle.bundle_host_port(
plan.slug, agent_facing_port,
)
agent_proxy_url = f"http://127.0.0.1:{agent_facing_host_port}"
agent_git_gate_host = "" agent_git_gate_host = ""
if plan.git_gate_plan.upstreams: if plan.git_gate_plan.upstreams:
git_gate_host_port = _bundle.bundle_host_port( git_gate_host_port = _bundle.bundle_host_port(
@@ -189,6 +205,14 @@ def launch(
["chown", "-R", "node:node", "/home/node"], ["chown", "-R", "node:node", "/home/node"],
) )
# Wait briefly for the VM to settle. Back-to-back smolvm
# machine_exec calls immediately after machine_start
# occasionally SIGKILL the in-VM child at ~100ms (looks
# like a VM warm-up race in libkrun's exec channel).
# 1.5s is empirically enough to dodge it; provisioning
# already takes seconds so the wait is amortized.
time.sleep(1.5)
# 7. Provision (CA / prompt / skills / git / supervise). # 7. Provision (CA / prompt / skills / git / supervise).
prompt_path = provision(plan, plan.machine_name) prompt_path = provision(plan, plan.machine_name)
@@ -220,9 +244,14 @@ def _bundle_launch_spec(
env: list[str] = [] env: list[str] = []
volumes: list[tuple[str, str, bool]] = [] volumes: list[tuple[str, str, bool]] = []
# PRD 0023 chunk 3: egress binds 127.0.0.1 inside the bundle # In this Docker-Desktop-compatible topology, whichever daemon
# so TSI's IP-only allowlist can't bypass pipelock. # is "agent-facing" gets its port published on the host
env.append("EGRESS_LISTEN_HOST=127.0.0.1") # loopback (see `_ensure_smolmachine`'s discovery loop) and the
# other stays bundle-internal. The bundle is NOT reachable by
# bridge IP from the smolvm guest, so the
# PRD-0023-chunk-3 EGRESS_LISTEN_HOST=127.0.0.1 mitigation
# isn't needed: the agent can only dial whatever daemon's
# host port we publish, period.
# --- pipelock --------------------------------------------- # --- pipelock ---------------------------------------------
pp = plan.proxy_plan pp = plan.proxy_plan
@@ -279,9 +308,14 @@ def _bundle_launch_spec(
# Container ports the agent reaches from the smolvm guest — # Container ports the agent reaches from the smolvm guest —
# published on host loopback so the guest can dial via TSI + # published on host loopback so the guest can dial via TSI +
# macOS networking. Egress is bundle-internal and never # macOS networking. The HTTP/HTTPS chokepoint is whichever
# published. # daemon's port we publish: egress when routes are declared
ports_to_publish: list[int] = [_PIPELOCK_PORT] # (token injection first, then forwards to bundle-internal
# pipelock), pipelock otherwise.
if ep.routes:
ports_to_publish: list[int] = [_EGRESS_PORT]
else:
ports_to_publish = [_PIPELOCK_PORT]
if gp.upstreams: if gp.upstreams:
ports_to_publish.append(_GIT_GATE_PORT) ports_to_publish.append(_GIT_GATE_PORT)
if sp is not None: if sp is not None:
@@ -66,8 +66,29 @@ def provision_ca(plan: SmolmachinesBottlePlan, target: str) -> None:
# default. The env trio (NODE_EXTRA_CA_CERTS / SSL_CERT_FILE / # default. The env trio (NODE_EXTRA_CA_CERTS / SSL_CERT_FILE /
# REQUESTS_CA_BUNDLE) on the guest_env covers Node + Python # REQUESTS_CA_BUNDLE) on the guest_env covers Node + Python
# `requests` / libraries that don't load the system bundle. # `requests` / libraries that don't load the system bundle.
_smolvm.machine_exec(target, ["chmod", "644", AGENT_CA_PATH]) #
_smolvm.machine_exec(target, ["update-ca-certificates"]) # chown + chmod + update-ca-certificates run in one
# `sh -c` so we only pay one machine_exec round trip; the
# `&&` chaining surfaces the first failure as the return
# code.
r = _smolvm.machine_exec(target, [
"sh", "-c",
f"chown root:root {AGENT_CA_PATH} && "
f"chmod 644 {AGENT_CA_PATH} && "
f"update-ca-certificates",
])
if r.returncode != 0 or "1 added" not in (r.stdout or ""):
# update-ca-certificates not adding our cert is fatal —
# claude-code's TLS handshake against the egress-MITM'd
# api.anthropic.com would fail downstream. Bail early
# with what we can see (output is captured by smolvm so
# we can surface it).
die(
f"update-ca-certificates didn't add the agent CA "
f"(exit {r.returncode}): "
f"stdout={(r.stdout or '').strip()!r} "
f"stderr={(r.stderr or '').strip()!r}"
)
# Stdlib SHA-256 of the cert's DER bytes — the standard # Stdlib SHA-256 of the cert's DER bytes — the standard
# fingerprint form. Never the private key. # fingerprint form. Never the private key.
+23 -5
View File
@@ -307,21 +307,38 @@ class TestProvisionCA(unittest.TestCase):
def tearDown(self): def tearDown(self):
self._tmp.cleanup() self._tmp.cleanup()
# provision_ca dies hard if update-ca-certificates' stdout
# doesn't include "1 added"; supply a stock success return
# so the bulk of the tests below exercise the happy path.
_UPDATE_OK = SmolvmRunResult(
returncode=0,
stdout="Updating certificates in /etc/ssl/certs...\n1 added, 0 removed; done.\n",
stderr="",
)
def test_pipelock_path_when_no_routes(self): def test_pipelock_path_when_no_routes(self):
plan = _plan(pipelock_ca_path=self.pipelock_ca) plan = _plan(pipelock_ca_path=self.pipelock_ca)
with patch( with patch(
"claude_bottle.backend.smolmachines.provision.ca._smolvm.machine_cp" "claude_bottle.backend.smolmachines.provision.ca._smolvm.machine_cp"
) as cp, patch( ) as cp, patch(
"claude_bottle.backend.smolmachines.provision.ca._smolvm.machine_exec" "claude_bottle.backend.smolmachines.provision.ca._smolvm.machine_exec",
return_value=self._UPDATE_OK,
) as ex: ) as ex:
_ca.provision_ca(plan, "claude-bottle-demo-abc12") _ca.provision_ca(plan, "claude-bottle-demo-abc12")
cp.assert_called_once_with( cp.assert_called_once_with(
str(self.pipelock_ca), str(self.pipelock_ca),
"claude-bottle-demo-abc12:" + _ca.AGENT_CA_PATH, "claude-bottle-demo-abc12:" + _ca.AGENT_CA_PATH,
) )
argvs = [c.args[1] for c in ex.call_args_list] # chmod + chown + update-ca-certificates are now folded
self.assertIn(["chmod", "644", _ca.AGENT_CA_PATH], argvs) # into one `sh -c` invocation (working around a smolvm
self.assertIn(["update-ca-certificates"], argvs) # exec warm-up SIGKILL race), so we look at the single
# exec's argv rather than expecting separate calls.
ex.assert_called_once()
argv = ex.call_args.args[1]
self.assertEqual("sh", argv[0])
self.assertEqual("-c", argv[1])
self.assertIn("chmod 644", argv[2])
self.assertIn("update-ca-certificates", argv[2])
def test_egress_path_when_routes_declared(self): def test_egress_path_when_routes_declared(self):
plan = _plan( plan = _plan(
@@ -332,7 +349,8 @@ class TestProvisionCA(unittest.TestCase):
with patch( with patch(
"claude_bottle.backend.smolmachines.provision.ca._smolvm.machine_cp" "claude_bottle.backend.smolmachines.provision.ca._smolvm.machine_cp"
) as cp, patch( ) as cp, patch(
"claude_bottle.backend.smolmachines.provision.ca._smolvm.machine_exec" "claude_bottle.backend.smolmachines.provision.ca._smolvm.machine_exec",
return_value=self._UPDATE_OK,
): ):
_ca.provision_ca(plan, "claude-bottle-demo-abc12") _ca.provision_ca(plan, "claude-bottle-demo-abc12")
# When routes are declared, egress is the agent's first hop, # When routes are declared, egress is the agent's first hop,