Files
bot-bottle/claude_bottle/backend/smolmachines/sidecar_bundle.py
T
didericis-claude 4f136a9932
test / unit (pull_request) Successful in 26s
test / integration (pull_request) Successful in 39s
fix(smolmachines): agent dials bundle via host loopback ports, not docker bridge IP
Claude hung on outbound network calls under
CLAUDE_BOTTLE_BACKEND=smolmachines:

  Unable to connect to API (FailedToOpenSocket)

Root cause: the PRD-0023 design pinned the bundle at a docker
bridge IP (192.168.X.2) and set the smolvm guest's TSI allowlist
to `<bundle-ip>/32`. On native Linux this works — host shares
the docker bridge's network namespace, TSI's syscall
impersonation reaches the bridge IP directly. On Docker Desktop
(macOS), the daemon runs in its own Linux VM and docker bridge
IPs aren't reachable from macOS networking, so the smolvm
guest's TSI requests die "Network is unreachable" before they
hit pipelock.

Fix: publish each agent-facing bundle daemon's port on host
loopback (-p 127.0.0.1::PORT), discover the random host-side
ports after start, and route the agent through
`127.0.0.1:<host port>` instead of the bridge IP. macOS loopback
is the surface Docker Desktop's gvproxy forwards into the
daemon's VM, so the chain (guest TSI -> macOS loopback ->
daemon VM port-forward -> bundle container) works on both
Docker Desktop and native Linux.

Concrete changes:
- BundleLaunchSpec: add `ports_to_publish` so start_bundle adds
  `-p 127.0.0.1::PORT` for the agent-facing ports (pipelock
  always; git-gate when upstreams declared; supervise when
  enabled). Egress's port stays bundle-internal.
- sidecar_bundle.bundle_host_port(): wrap `docker port <bundle>
  <container_port>/tcp` so launch can look up the random
  host-side mapping after start.
- launch.py: discover the host ports, build URLs of the form
  `http://127.0.0.1:<host port>` / `git://127.0.0.1:<host port>`,
  stamp onto guest_env + new agent_*_url fields on the plan.
- launch.py: TSI allow_cidrs flips to `["127.0.0.1/32"]`. The
  bundle IP is no longer the agent's target.
- prepare.py: stop synthesizing HTTPS_PROXY / GIT_GATE_URL /
  MCP_SUPERVISE_URL at prepare time — launch owns those now
  (the values depend on a port docker hasn't assigned yet).
- provision_git: gate_host from plan.agent_git_gate_host.
- provision_supervise: URL from plan.agent_supervise_url.

End-to-end verified on Docker Desktop / macOS: guest dials
pipelock through TSI, pipelock forwards to api.anthropic.com,
the API responds with 401 (i.e. it received the request).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-27 15:31:44 -04:00

204 lines
7.8 KiB
Python

"""Per-bottle sidecar bundle bringup for the smolmachines backend
(PRD 0023).
Two docker resources per bottle live here:
- **A dedicated bridge network**, subnet derived from the slug.
The bundle container gets a pinned IP at `<subnet>.2` so the
smolvm guest's TSI allowlist (`<bundle-ip>/32`) has a stable
target. Without pinning, we'd have to inspect the container's
assigned IP after start and feed it back into the Smolfile
— a race we can sidestep with `--ip`.
- **The bundle container itself**, running the PRD 0024 bundle
image (`claude-bottle-sidecars:latest` by default). Same
image, same daemons, same daemon-private env / bind-mounts
as the docker backend.
This module ships the lifecycle primitives only — create
network, start bundle, stop bundle, remove network — wrapped
around `subprocess.run(["docker", ...])`. Wiring them into the
launch flow + populating the `BundleLaunchSpec` from the inner
Plans (PipelockProxyPlan, EgressPlan, …) lands in chunk 2d."""
from __future__ import annotations
import subprocess
from dataclasses import dataclass, field
from pathlib import Path
from typing import Sequence
from ...log import die, warn
from ..docker.sidecar_bundle import SIDECAR_BUNDLE_IMAGE
def bundle_network_name(slug: str) -> str:
"""`claude-bottle-bundle-<slug>` — distinct from the docker
backend's `claude-bottle-net-<slug>` so a smolmachines bottle
and a docker bottle for the same agent don't collide on
network name."""
return f"claude-bottle-bundle-{slug}"
def bundle_container_name(slug: str) -> str:
"""`claude-bottle-sidecars-<slug>` — same name shape the docker
backend uses for the bundle (PRD 0024 chunk 5). The dashboard's
prefix-based discovery covers both backends with one filter."""
return f"claude-bottle-sidecars-{slug}"
@dataclass(frozen=True)
class BundleLaunchSpec:
"""Everything `start_bundle` needs to bring up one bundle
container. Populated by chunk-2d's launch flow from the inner
Plans the prepare step already produces."""
slug: str
network_name: str
subnet: str
gateway: str
bundle_ip: str
image: str = SIDECAR_BUNDLE_IMAGE
# Daemon subset CSV for CLAUDE_BOTTLE_SIDECAR_DAEMONS. The
# supervisor inside the bundle reads it to skip
# bottle-irrelevant daemons (e.g. supervise=False bottles).
daemons_csv: str = "egress,pipelock"
# Plain "KEY=VALUE" strings + "KEY" bare names (the bare-name
# form inherits the value from the docker-run subprocess env,
# matching the docker backend's compose-up secret-forwarding
# pattern).
environment: Sequence[str] = field(default_factory=tuple)
# (host_path, container_path, read_only) bind mounts.
volumes: Sequence[tuple[str, str, bool]] = field(default_factory=tuple)
# Container ports to publish on the host's 127.0.0.1, random
# host-side port per entry. The smolvm guest's TSI talks via
# macOS networking, so docker container IPs (192.168.x.x in
# the daemon's bridge) aren't directly reachable from the
# guest — host-loopback port-forwards are. Egress's port
# is bundle-internal and never published.
ports_to_publish: Sequence[int] = field(default_factory=tuple)
def create_bundle_network(network_name: str, subnet: str, gateway: str) -> None:
"""`docker network create` with an explicit subnet + gateway
so the bundle's `--ip` lands on the address the Smolfile's
TSI allowlist points at. Idempotent on the caller's side —
`start_bundle` catches the "network exists" error and treats
it as success (chunk-2d teardown is paired with each create).
"""
result = subprocess.run(
["docker", "network", "create",
"--subnet", subnet, "--gateway", gateway,
network_name],
capture_output=True, text=True, check=False,
)
if result.returncode != 0:
# Already-exists is fine on a resume path; everything else
# is fatal — the bundle won't have an addressable network.
if "already exists" in (result.stderr or "").lower():
return
die(
f"docker network create {network_name} failed: "
f"{(result.stderr or '').strip()}"
)
def remove_bundle_network(network_name: str) -> None:
"""Idempotent: a missing network returns success."""
result = subprocess.run(
["docker", "network", "rm", network_name],
capture_output=True, text=True, check=False,
)
if result.returncode == 0:
return
if "no such network" in (result.stderr or "").lower():
return
# Network with attached containers is the common non-fatal
# case during a partial teardown — warn but don't die.
warn(
f"docker network rm {network_name} failed: "
f"{(result.stderr or '').strip()}"
)
def start_bundle(spec: BundleLaunchSpec, *,
env: dict[str, str] | None = None) -> None:
"""Bring the bundle container up on the per-bottle bridge with
the pinned IP. Argv is built deterministically from `spec`;
`env` is the host subprocess env (forwarded values for any
bare-name entries in `spec.environment`)."""
container = bundle_container_name(spec.slug)
argv = [
"docker", "run",
"--name", container,
"--detach",
"--rm",
"--network", spec.network_name,
"--ip", spec.bundle_ip,
"-e", f"CLAUDE_BOTTLE_SIDECAR_DAEMONS={spec.daemons_csv}",
]
for entry in spec.environment:
argv += ["-e", entry]
for host_path, container_path, read_only in spec.volumes:
suffix = ":ro" if read_only else ""
argv += ["-v", f"{host_path}:{container_path}{suffix}"]
# Loopback-only host port-forwards — the smolvm guest's TSI
# uses macOS networking, and macOS loopback is the only host
# surface that round-trips into Docker Desktop's daemon VM.
for port in spec.ports_to_publish:
argv += ["-p", f"127.0.0.1::{port}"]
argv.append(spec.image)
result = subprocess.run(
argv, capture_output=True, text=True,
env=dict(env) if env is not None else None, check=False,
)
if result.returncode != 0:
die(
f"docker run for bundle {container} failed: "
f"{(result.stderr or '').strip()}"
)
def bundle_host_port(slug: str, container_port: int) -> int:
"""`docker port <bundle> <container_port>/tcp` → the random
host-side port docker assigned. Called after `start_bundle`
on each container port listed in `BundleLaunchSpec
.ports_to_publish` so the launch step can build the agent's
HTTPS_PROXY / GIT_GATE / SUPERVISE URLs in
`127.0.0.1:<host port>` form."""
container = bundle_container_name(slug)
result = subprocess.run(
["docker", "port", container, f"{container_port}/tcp"],
capture_output=True, text=True, check=False,
)
if result.returncode != 0:
die(
f"docker port {container} {container_port}/tcp failed: "
f"{(result.stderr or '').strip() or '<no stderr>'}"
)
# `127.0.0.1:54321\n` — rpartition on last colon gives the port.
line = (result.stdout or "").splitlines()[0].strip()
_, _, port_str = line.rpartition(":")
try:
return int(port_str)
except ValueError:
die(f"unexpected `docker port` output: {line!r}")
return -1 # unreachable; die() never returns
def stop_bundle(slug: str) -> None:
"""Idempotent: a missing container returns success."""
container = bundle_container_name(slug)
result = subprocess.run(
["docker", "rm", "-f", container],
capture_output=True, text=True, check=False,
)
if result.returncode == 0:
return
if "no such container" in (result.stderr or "").lower():
return
warn(
f"docker rm -f {container} failed: "
f"{(result.stderr or '').strip()}"
)