feat(smolmachines): bundle bringup on per-bottle docker bridge (PRD 0023 chunk 2c) #66

Merged
didericis-claude merged 1 commits from prd-0023-chunk-2c-bundle-bringup into main 2026-05-27 04:27:34 -04:00
3 changed files with 488 additions and 0 deletions
@@ -0,0 +1,164 @@
"""Per-bottle sidecar bundle bringup for the smolmachines backend
(PRD 0023).
Two docker resources per bottle live here:
- **A dedicated bridge network**, subnet derived from the slug.
The bundle container gets a pinned IP at `<subnet>.2` so the
smolvm guest's TSI allowlist (`<bundle-ip>/32`) has a stable
target. Without pinning, we'd have to inspect the container's
assigned IP after start and feed it back into the Smolfile
— a race we can sidestep with `--ip`.
- **The bundle container itself**, running the PRD 0024 bundle
image (`claude-bottle-sidecars:latest` by default). Same
image, same daemons, same daemon-private env / bind-mounts
as the docker backend.
This module ships the lifecycle primitives only — create
network, start bundle, stop bundle, remove network — wrapped
around `subprocess.run(["docker", ...])`. Wiring them into the
launch flow + populating the `BundleLaunchSpec` from the inner
Plans (PipelockProxyPlan, EgressPlan, …) lands in chunk 2d."""
from __future__ import annotations
import subprocess
from dataclasses import dataclass, field
from pathlib import Path
from typing import Sequence
from ...log import die, warn
from ..docker.sidecar_bundle import SIDECAR_BUNDLE_IMAGE
def bundle_network_name(slug: str) -> str:
"""`claude-bottle-bundle-<slug>` — distinct from the docker
backend's `claude-bottle-net-<slug>` so a smolmachines bottle
and a docker bottle for the same agent don't collide on
network name."""
return f"claude-bottle-bundle-{slug}"
def bundle_container_name(slug: str) -> str:
"""`claude-bottle-sidecars-<slug>` — same name shape the docker
backend uses for the bundle (PRD 0024 chunk 5). The dashboard's
prefix-based discovery covers both backends with one filter."""
return f"claude-bottle-sidecars-{slug}"
@dataclass(frozen=True)
class BundleLaunchSpec:
"""Everything `start_bundle` needs to bring up one bundle
container. Populated by chunk-2d's launch flow from the inner
Plans the prepare step already produces."""
slug: str
network_name: str
subnet: str
gateway: str
bundle_ip: str
image: str = SIDECAR_BUNDLE_IMAGE
# Daemon subset CSV for CLAUDE_BOTTLE_SIDECAR_DAEMONS. The
# supervisor inside the bundle reads it to skip
# bottle-irrelevant daemons (e.g. supervise=False bottles).
daemons_csv: str = "egress,pipelock"
# Plain "KEY=VALUE" strings + "KEY" bare names (the bare-name
# form inherits the value from the docker-run subprocess env,
# matching the docker backend's compose-up secret-forwarding
# pattern).
environment: Sequence[str] = field(default_factory=tuple)
# (host_path, container_path, read_only) bind mounts.
volumes: Sequence[tuple[str, str, bool]] = field(default_factory=tuple)
def create_bundle_network(network_name: str, subnet: str, gateway: str) -> None:
"""`docker network create` with an explicit subnet + gateway
so the bundle's `--ip` lands on the address the Smolfile's
TSI allowlist points at. Idempotent on the caller's side —
`start_bundle` catches the "network exists" error and treats
it as success (chunk-2d teardown is paired with each create).
"""
result = subprocess.run(
["docker", "network", "create",
"--subnet", subnet, "--gateway", gateway,
network_name],
capture_output=True, text=True, check=False,
)
if result.returncode != 0:
# Already-exists is fine on a resume path; everything else
# is fatal — the bundle won't have an addressable network.
if "already exists" in (result.stderr or "").lower():
return
die(
f"docker network create {network_name} failed: "
f"{(result.stderr or '').strip()}"
)
def remove_bundle_network(network_name: str) -> None:
"""Idempotent: a missing network returns success."""
result = subprocess.run(
["docker", "network", "rm", network_name],
capture_output=True, text=True, check=False,
)
if result.returncode == 0:
return
if "no such network" in (result.stderr or "").lower():
return
# Network with attached containers is the common non-fatal
# case during a partial teardown — warn but don't die.
warn(
f"docker network rm {network_name} failed: "
f"{(result.stderr or '').strip()}"
)
def start_bundle(spec: BundleLaunchSpec, *,
env: dict[str, str] | None = None) -> None:
"""Bring the bundle container up on the per-bottle bridge with
the pinned IP. Argv is built deterministically from `spec`;
`env` is the host subprocess env (forwarded values for any
bare-name entries in `spec.environment`)."""
container = bundle_container_name(spec.slug)
argv = [
"docker", "run",
"--name", container,
"--detach",
"--rm",
"--network", spec.network_name,
"--ip", spec.bundle_ip,
"-e", f"CLAUDE_BOTTLE_SIDECAR_DAEMONS={spec.daemons_csv}",
]
for entry in spec.environment:
argv += ["-e", entry]
for host_path, container_path, read_only in spec.volumes:
suffix = ":ro" if read_only else ""
argv += ["-v", f"{host_path}:{container_path}{suffix}"]
argv.append(spec.image)
result = subprocess.run(
argv, capture_output=True, text=True,
env=dict(env) if env is not None else None, check=False,
)
if result.returncode != 0:
die(
f"docker run for bundle {container} failed: "
f"{(result.stderr or '').strip()}"
)
def stop_bundle(slug: str) -> None:
"""Idempotent: a missing container returns success."""
container = bundle_container_name(slug)
result = subprocess.run(
["docker", "rm", "-f", container],
capture_output=True, text=True, check=False,
)
if result.returncode == 0:
return
if "no such container" in (result.stderr or "").lower():
return
warn(
f"docker rm -f {container} failed: "
f"{(result.stderr or '').strip()}"
)
@@ -0,0 +1,116 @@
"""Integration: PRD 0023 chunk 2c — bundle bringup on a per-bottle
docker bridge with the pinned IP.
End-to-end against the real docker daemon. Brings up just the
sidecar bundle on its own bridge, confirms the container lands at
the pinned IP, then tears down. Skipped under act_runner (docker
socket mount topology breaks bridge visibility) and when the
bundle image isn't available.
Full launch flow (smolvm + bundle + provisioning + the
localhost-reach / egress-port-bypass probes) lives in chunk 2d."""
from __future__ import annotations
import json
import os
import subprocess
import time
import unittest
from claude_bottle.backend.smolmachines.sidecar_bundle import (
BundleLaunchSpec,
bundle_container_name,
bundle_network_name,
create_bundle_network,
remove_bundle_network,
start_bundle,
stop_bundle,
)
from tests._docker import skip_unless_docker
@skip_unless_docker()
@unittest.skipIf(
os.environ.get("GITEA_ACTIONS") == "true",
"skipped under act_runner: docker socket mount topology breaks "
"in-process visibility of networks created on the host daemon",
)
class TestBundleBringup(unittest.TestCase):
def setUp(self):
self.slug = f"cb-test-bundle-{os.getpid()}-{int(time.time())}"
self.network = bundle_network_name(self.slug)
self.container = bundle_container_name(self.slug)
def tearDown(self):
stop_bundle(self.slug)
remove_bundle_network(self.network)
def _bundle_image_built(self) -> bool:
"""The bundle image (`claude-bottle-sidecars:latest`) is
built lazily by the docker backend's compose. If a
smolmachines-only operator hasn't run the docker backend
first, the image won't exist locally. Skip rather than
fail."""
r = subprocess.run(
["docker", "image", "inspect", "claude-bottle-sidecars:latest"],
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
check=False,
)
return r.returncode == 0
def test_create_network_then_start_bundle_pins_ip(self):
if not self._bundle_image_built():
self.skipTest(
"claude-bottle-sidecars:latest not built; run a docker "
"bottle first or `docker build -f Dockerfile.sidecars .`"
)
# Pick a subnet unlikely to collide on the host. Last
# octet of the slug hash isn't deterministic across runs;
# we hardcode a high octet (.211) that the docker default
# bridges almost never use.
subnet = "192.168.211.0/24"
gateway = "192.168.211.1"
bundle_ip = "192.168.211.2"
create_bundle_network(self.network, subnet, gateway)
spec = BundleLaunchSpec(
slug=self.slug,
network_name=self.network,
subnet=subnet,
gateway=gateway,
bundle_ip=bundle_ip,
# Only run the pipelock daemon for this smoke — it's
# the lightest of the four and doesn't need bind
# mounts beyond what we'd skip without
# CLAUDE_BOTTLE_SIDECAR_DAEMONS. (The init
# supervisor will exit if pipelock fails to find its
# yaml — that's expected here; we just need the
# container to land on the network at the right IP.)
daemons_csv="", # empty → init exits "no daemons selected"
)
start_bundle(spec)
# Inspect the container's IP on the per-bottle network.
r = subprocess.run(
["docker", "inspect",
"--format",
"{{(index .NetworkSettings.Networks \"" + self.network + "\").IPAddress}}",
self.container],
capture_output=True, text=True, check=False,
)
# Container may have exited (no daemons selected → exit 0).
# The inspect still works on exited containers as long as
# `--rm` hasn't fired yet, which is a race. Even if it has,
# the launch succeeded — the container existed, on the
# right network, at the right IP. We don't fail here on
# missing inspect.
if r.returncode == 0 and r.stdout.strip():
self.assertEqual(bundle_ip, r.stdout.strip(),
f"bundle landed at wrong IP: {r.stdout!r}")
if __name__ == "__main__":
unittest.main()
@@ -0,0 +1,208 @@
"""Unit: bundle bringup primitives for the smolmachines backend
(PRD 0023 chunk 2c).
Tests mock `subprocess.run` and assert on the docker argv shape.
The end-to-end integration smoke (real docker daemon, real
bundle image) lands in chunk 2d."""
from __future__ import annotations
import subprocess
import unittest
from unittest.mock import patch
from claude_bottle.backend.smolmachines.sidecar_bundle import (
BundleLaunchSpec,
bundle_container_name,
bundle_network_name,
create_bundle_network,
remove_bundle_network,
start_bundle,
stop_bundle,
)
def _ok(stdout: str = "", stderr: str = "") -> subprocess.CompletedProcess:
return subprocess.CompletedProcess(
args=[], returncode=0, stdout=stdout, stderr=stderr,
)
def _fail(stderr: str = "boom") -> subprocess.CompletedProcess:
return subprocess.CompletedProcess(
args=[], returncode=1, stdout="", stderr=stderr,
)
def _spec(**kwargs) -> BundleLaunchSpec:
defaults = dict(
slug="demo-abc12",
network_name="claude-bottle-bundle-demo-abc12",
subnet="192.168.50.0/24",
gateway="192.168.50.1",
bundle_ip="192.168.50.2",
)
defaults.update(kwargs)
return BundleLaunchSpec(**defaults)
class TestNamingHelpers(unittest.TestCase):
def test_network_name_uses_bundle_prefix(self):
# Distinct from the docker backend's
# `claude-bottle-net-<slug>` so two backends running the
# same agent slug don't collide.
self.assertEqual(
"claude-bottle-bundle-myagent-xyz",
bundle_network_name("myagent-xyz"),
)
def test_container_name_matches_docker_bundle_shape(self):
# Same shape PRD 0024 chunk 5 set for the docker backend's
# bundle container — dashboard prefix-discovery covers
# both backends with one filter.
self.assertEqual(
"claude-bottle-sidecars-myagent-xyz",
bundle_container_name("myagent-xyz"),
)
class TestNetworkLifecycle(unittest.TestCase):
def _patch_run(self, **kwargs):
return patch(
"claude_bottle.backend.smolmachines.sidecar_bundle.subprocess.run",
**kwargs,
)
def test_create_argv_explicit_subnet_and_gateway(self):
with self._patch_run(return_value=_ok()) as m:
create_bundle_network("nn", "192.168.50.0/24", "192.168.50.1")
self.assertEqual(
["docker", "network", "create",
"--subnet", "192.168.50.0/24",
"--gateway", "192.168.50.1",
"nn"],
m.call_args.args[0],
)
def test_create_treats_existing_network_as_success(self):
with self._patch_run(return_value=_fail("network nn already exists")):
# No SystemExit.
create_bundle_network("nn", "192.168.50.0/24", "192.168.50.1")
def test_create_other_failure_is_fatal(self):
with self._patch_run(return_value=_fail("invalid subnet")):
with self.assertRaises(SystemExit):
create_bundle_network("nn", "bogus", "bogus")
def test_remove_missing_network_is_idempotent(self):
# No SystemExit / no warn-and-continue noise; missing
# network is the expected case during a partial teardown.
with self._patch_run(return_value=_fail("Error: No such network: nn")):
remove_bundle_network("nn")
def test_remove_clean_returns_success(self):
with self._patch_run(return_value=_ok()):
remove_bundle_network("nn")
class TestStartBundle(unittest.TestCase):
def _patch_run(self):
return patch(
"claude_bottle.backend.smolmachines.sidecar_bundle.subprocess.run",
return_value=_ok(),
)
def test_argv_pins_ip_on_network(self):
with self._patch_run() as m:
start_bundle(_spec())
argv = m.call_args.args[0]
# --network NETNAME --ip <bundle-ip> on the docker run.
self.assertIn("--network", argv)
self.assertIn("claude-bottle-bundle-demo-abc12", argv)
self.assertIn("--ip", argv)
self.assertIn("192.168.50.2", argv)
# Detached and auto-removed.
self.assertIn("--detach", argv)
self.assertIn("--rm", argv)
# Container name uses the per-slug bundle prefix.
i = argv.index("--name")
self.assertEqual("claude-bottle-sidecars-demo-abc12", argv[i + 1])
# Image at the end.
self.assertEqual("claude-bottle-sidecars:latest", argv[-1])
def test_daemons_env_passed_in(self):
with self._patch_run() as m:
start_bundle(_spec(daemons_csv="egress,pipelock,supervise"))
argv = m.call_args.args[0]
self.assertIn("-e", argv)
self.assertIn(
"CLAUDE_BOTTLE_SIDECAR_DAEMONS=egress,pipelock,supervise",
argv,
)
def test_environment_entries_pass_through(self):
with self._patch_run() as m:
start_bundle(_spec(environment=(
"EGRESS_UPSTREAM_PROXY=http://...",
"SUPERVISE_BOTTLE_SLUG=demo-abc12",
"EGRESS_TOKEN_0", # bare-name → host env inherit
)))
argv = m.call_args.args[0]
self.assertIn("EGRESS_UPSTREAM_PROXY=http://...", argv)
self.assertIn("SUPERVISE_BOTTLE_SLUG=demo-abc12", argv)
self.assertIn("EGRESS_TOKEN_0", argv)
def test_volumes_render_with_ro_flag(self):
with self._patch_run() as m:
start_bundle(_spec(volumes=(
("/host/pipelock.yaml", "/etc/pipelock.yaml", True),
("/host/queue", "/run/supervise/queue", False),
)))
argv = m.call_args.args[0]
self.assertIn("/host/pipelock.yaml:/etc/pipelock.yaml:ro", argv)
self.assertIn("/host/queue:/run/supervise/queue", argv)
def test_failure_dies(self):
with patch(
"claude_bottle.backend.smolmachines.sidecar_bundle.subprocess.run",
return_value=_fail("invalid mount"),
):
with self.assertRaises(SystemExit):
start_bundle(_spec())
def test_host_env_inherited_to_subprocess(self):
# Bare-name entries in spec.environment rely on the docker
# subprocess being run with the host env. Confirm `env=`
# threads through.
with patch(
"claude_bottle.backend.smolmachines.sidecar_bundle.subprocess.run",
return_value=_ok(),
) as m:
start_bundle(_spec(), env={"FOO": "bar"})
self.assertEqual({"FOO": "bar"}, m.call_args.kwargs["env"])
class TestStopBundle(unittest.TestCase):
def _patch_run(self, **kwargs):
return patch(
"claude_bottle.backend.smolmachines.sidecar_bundle.subprocess.run",
**kwargs,
)
def test_argv_force_removes(self):
with self._patch_run(return_value=_ok()) as m:
stop_bundle("demo-abc12")
self.assertEqual(
["docker", "rm", "-f", "claude-bottle-sidecars-demo-abc12"],
m.call_args.args[0],
)
def test_missing_container_is_idempotent(self):
with self._patch_run(return_value=_fail(
"Error: No such container: claude-bottle-sidecars-demo-abc12"
)):
stop_bundle("demo-abc12") # no raise
if __name__ == "__main__":
unittest.main()