feat(smolmachines): per-bottle loopback alias scopes TSI to single /32 #76
@@ -200,19 +200,25 @@ sidecar bundle still in Docker. Selected via
|
||||
The integration tests run against whichever backend the env var
|
||||
selects and skip cleanly when its prerequisites are missing.
|
||||
|
||||
**Known limitation, v1:** smolvm's TSI uses macOS networking, and
|
||||
Docker Desktop's container IPs aren't reachable from macOS, so the
|
||||
smolmachines bottle dials the sidecar bundle through host loopback
|
||||
port-forwards (`127.0.0.1:<random>`). TSI filters by IP only, so the
|
||||
allowlist is `127.0.0.1/32` — meaning the agent VM can reach **any
|
||||
service bound to macOS's loopback**, not just the bundle's published
|
||||
ports. Practical implication: while a smolmachines bottle is running,
|
||||
host-local dev services (postgres on 5432, dev servers, etc.) are
|
||||
reachable from inside the agent even if you intended them to be
|
||||
host-private. The docker backend keeps the bottle on a `--internal`
|
||||
docker network and doesn't have this issue. A future revision will
|
||||
narrow this via a per-bottle loopback alias + host-side proxy (see
|
||||
PRD 0023's "loopback scoping" section).
|
||||
**One-time sudo on first launch (macOS):** smolmachines bottles
|
||||
each reserve a loopback alias from a pool (`127.0.0.16` ..
|
||||
`127.0.0.31`) and bind their bundle's port-forwards to it; the
|
||||
first `./cli.py start` after each reboot prompts for sudo to add
|
||||
missing aliases via `ifconfig lo0 alias`. Aliases persist until
|
||||
reboot; subsequent launches don't prompt. The agent's TSI
|
||||
allowlist is the alias's `/32`, so each bottle can only reach
|
||||
its own bundle's published ports — not other bottles' ports,
|
||||
not other host loopback services (postgres, dev servers, etc.).
|
||||
|
||||
This enforcement requires a workaround for a smolvm 0.8.0 bug:
|
||||
the CLI's `--allow-cidr` flag is silently dropped when combined
|
||||
with `--from <smolmachine>`. The launcher patches smolvm's
|
||||
persistent state DB
|
||||
(`~/Library/Application Support/smolvm/server/smolvm.db`)
|
||||
directly between `machine create` and `machine start` to set
|
||||
the allowlist. The hack falls away automatically when smolvm
|
||||
honors the flag upstream — see the `loopback_alias` module's
|
||||
docstring for the investigation trail.
|
||||
|
||||
## Manifest
|
||||
|
||||
|
||||
@@ -50,6 +50,7 @@ from ..docker.pipelock import (
|
||||
PIPELOCK_PORT as _PIPELOCK_PORT_STR,
|
||||
pipelock_tls_init,
|
||||
)
|
||||
from . import loopback_alias as _loopback
|
||||
from . import sidecar_bundle as _bundle
|
||||
from . import smolvm as _smolvm
|
||||
from .bottle import SmolmachinesBottle
|
||||
@@ -76,7 +77,16 @@ def launch(
|
||||
via the ExitStack."""
|
||||
stack = ExitStack()
|
||||
try:
|
||||
# 1. Per-bottle docker bridge.
|
||||
# 1. Reserve a loopback alias for this bottle. macOS only
|
||||
# routes 127.0.0.1 by default; the per-bottle alias is
|
||||
# what bundles the docker port-publishes and TSI allowlist
|
||||
# against, so this bottle can't reach other bottles' (or
|
||||
# other host services') ports on the loopback. Lazy
|
||||
# sudo-driven on first use per boot. No-op on Linux.
|
||||
_loopback.ensure_pool()
|
||||
loopback_ip = _loopback.allocate(plan.slug)
|
||||
|
||||
# 2. Per-bottle docker bridge.
|
||||
network = _bundle.bundle_network_name(plan.slug)
|
||||
_bundle.create_bundle_network(network, plan.bundle_subnet, plan.bundle_gateway)
|
||||
stack.callback(_bundle.remove_bundle_network, network)
|
||||
@@ -112,21 +122,22 @@ def launch(
|
||||
)
|
||||
|
||||
# 3. Build the BundleLaunchSpec from the (now-resolved)
|
||||
# inner Plans: daemon subset, env, bind-mounts. The spec's
|
||||
# ports_to_publish list expands depending on which daemons
|
||||
# the agent needs to reach from the smolvm guest.
|
||||
bundle_spec = _bundle_launch_spec(plan, network)
|
||||
# inner Plans: daemon subset, env, bind-mounts, and the
|
||||
# loopback alias to bind published ports against. The
|
||||
# spec's ports_to_publish list expands depending on which
|
||||
# daemons the agent needs to reach from the smolvm guest.
|
||||
bundle_spec = _bundle_launch_spec(plan, network, loopback_ip)
|
||||
token_env = _resolve_token_env(plan, os.environ)
|
||||
_bundle.start_bundle(bundle_spec, env={**os.environ, **token_env})
|
||||
stack.callback(_bundle.stop_bundle, plan.slug)
|
||||
|
||||
# 4. Discover the host-side ports docker assigned for the
|
||||
# bundle's published container ports, and bind the
|
||||
# agent's URLs to `127.0.0.1:<host port>`. Docker container
|
||||
# IPs (192.168.x.x in the daemon's bridge) aren't
|
||||
# reachable from the smolvm guest on macOS — TSI uses
|
||||
# macOS networking, and macOS sees the daemon's bridge
|
||||
# via the published-port loopback forward only.
|
||||
# agent's URLs to `<loopback_ip>:<host port>`. Docker
|
||||
# container IPs (192.168.x.x in the daemon's bridge)
|
||||
# aren't reachable from the smolvm guest on macOS — TSI
|
||||
# uses macOS networking, and macOS sees the daemon's
|
||||
# bridge via the published-port loopback forward only.
|
||||
#
|
||||
# Proxy hop order matches the docker backend: when the
|
||||
# bottle declares egress routes, the agent's first hop is
|
||||
@@ -140,29 +151,41 @@ def launch(
|
||||
else:
|
||||
agent_facing_port = _PIPELOCK_PORT
|
||||
agent_facing_host_port = _bundle.bundle_host_port(
|
||||
plan.slug, agent_facing_port,
|
||||
plan.slug, agent_facing_port, host_ip=loopback_ip,
|
||||
)
|
||||
agent_proxy_url = f"http://127.0.0.1:{agent_facing_host_port}"
|
||||
agent_proxy_url = f"http://{loopback_ip}:{agent_facing_host_port}"
|
||||
agent_git_gate_host = ""
|
||||
if plan.git_gate_plan.upstreams:
|
||||
git_gate_host_port = _bundle.bundle_host_port(
|
||||
plan.slug, _GIT_GATE_PORT,
|
||||
plan.slug, _GIT_GATE_PORT, host_ip=loopback_ip,
|
||||
)
|
||||
agent_git_gate_host = f"127.0.0.1:{git_gate_host_port}"
|
||||
agent_git_gate_host = f"{loopback_ip}:{git_gate_host_port}"
|
||||
agent_supervise_url = ""
|
||||
if plan.supervise_plan is not None:
|
||||
supervise_host_port = _bundle.bundle_host_port(
|
||||
plan.slug, _SUPERVISE_PORT,
|
||||
plan.slug, _SUPERVISE_PORT, host_ip=loopback_ip,
|
||||
)
|
||||
agent_supervise_url = f"http://127.0.0.1:{supervise_host_port}/"
|
||||
agent_supervise_url = f"http://{loopback_ip}:{supervise_host_port}/"
|
||||
|
||||
# Stamp the URLs onto the plan + guest_env. provision_git
|
||||
# and provision_supervise read the plan fields; the agent
|
||||
# reads guest_env on every exec_claude.
|
||||
#
|
||||
# NO_PROXY has to include the per-bottle loopback alias —
|
||||
# otherwise claude's HTTPS_PROXY catches direct calls to
|
||||
# the supervise URL (`http://<alias>:<port>/`) and proxies
|
||||
# them through egress, which has no route for the alias
|
||||
# and rejects with "Failed to connect". The git-gate URL
|
||||
# uses git://, not affected by HTTP_PROXY, so the alias
|
||||
# only has to be in NO_PROXY for the MCP / supervise
|
||||
# path. Append rather than overwrite so prepare.py's
|
||||
# `localhost,127.0.0.1` baseline stays in place.
|
||||
existing_no_proxy = plan.guest_env.get("NO_PROXY", "localhost,127.0.0.1")
|
||||
guest_env = {
|
||||
**plan.guest_env,
|
||||
"HTTPS_PROXY": agent_proxy_url,
|
||||
"HTTP_PROXY": agent_proxy_url,
|
||||
"NO_PROXY": f"{existing_no_proxy},{loopback_ip}",
|
||||
}
|
||||
if agent_git_gate_host:
|
||||
guest_env["GIT_GATE_URL"] = f"git://{agent_git_gate_host}"
|
||||
@@ -178,18 +201,25 @@ def launch(
|
||||
|
||||
# 5. smolvm VM. --from carries the pre-packed .smolmachine
|
||||
# artifact (built by prepare); --allow-cidr + -e carry the
|
||||
# per-bottle TSI allowlist + env. The allowlist is
|
||||
# `127.0.0.1/32` because every bundle daemon the agent
|
||||
# reaches is fronted by a host loopback port-forward.
|
||||
# Smolfile isn't usable here — smolvm 0.8.0 makes `--from`
|
||||
# and `--smolfile` mutually exclusive.
|
||||
# per-bottle TSI allowlist + env. The allowlist is the
|
||||
# per-bottle loopback alias — narrowing it to one /32 keeps
|
||||
# the agent from reaching other host loopback services or
|
||||
# other bottles' published ports. Smolfile isn't usable
|
||||
# here — smolvm 0.8.0 makes `--from` and `--smolfile`
|
||||
# mutually exclusive.
|
||||
_smolvm.machine_create(
|
||||
plan.machine_name,
|
||||
from_path=plan.agent_from_path,
|
||||
allow_cidrs=["127.0.0.1/32"],
|
||||
allow_cidrs=[f"{loopback_ip}/32"],
|
||||
env=plan.guest_env,
|
||||
)
|
||||
stack.callback(_smolvm.machine_delete, plan.machine_name)
|
||||
# Workaround smolvm 0.8.0: `--allow-cidr` is silently
|
||||
# dropped when combined with `--from`. Patch the persisted
|
||||
# state DB to set the allowlist before start so the booted
|
||||
# VM's TSI actually enforces. See loopback_alias's module
|
||||
# docstring for the investigation that led here.
|
||||
_loopback.force_allowlist(plan.machine_name, [f"{loopback_ip}/32"])
|
||||
_smolvm.machine_start(plan.machine_name)
|
||||
stack.callback(_smolvm.machine_stop, plan.machine_name)
|
||||
|
||||
@@ -240,7 +270,7 @@ def launch(
|
||||
|
||||
|
||||
def _bundle_launch_spec(
|
||||
plan: SmolmachinesBottlePlan, network: str
|
||||
plan: SmolmachinesBottlePlan, network: str, loopback_ip: str,
|
||||
) -> _bundle.BundleLaunchSpec:
|
||||
"""Build a BundleLaunchSpec from the resolved inner Plans.
|
||||
|
||||
@@ -345,6 +375,7 @@ def _bundle_launch_spec(
|
||||
environment=tuple(env),
|
||||
volumes=tuple(volumes),
|
||||
ports_to_publish=tuple(ports_to_publish),
|
||||
publish_host_ip=loopback_ip,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,254 @@
|
||||
"""Per-bottle loopback alias allocation + TSI allowlist
|
||||
enforcement (PRD 0023, follow-up to PR #74).
|
||||
|
||||
After the pivot to host-loopback port-forwards, the smolmachines
|
||||
TSI allowlist was `127.0.0.1/32` — which meant the agent VM could
|
||||
reach **any** service bound to macOS's loopback, not just the
|
||||
bundle's published ports. Real downgrade from the docker
|
||||
backend's `--internal` network isolation.
|
||||
|
||||
This module narrows the allowlist by allocating each bottle a
|
||||
unique loopback alias (`127.0.0.16` .. `127.0.0.31`). The
|
||||
bundle's port-forwards bind to that alias, and the alias's /32
|
||||
is what TSI allows.
|
||||
|
||||
**Smolvm 0.8.0 quirk + workaround.** `smolvm machine create
|
||||
--from <smolmachine> --net --allow-cidr X/32` silently drops the
|
||||
flag — verified empirically that the agent process's allowlist
|
||||
ends up `null` in smolvm's persistent state DB (`~/Library/
|
||||
Application Support/smolvm/server/smolvm.db`, `vms` table,
|
||||
`data` BLOB), and the booted VM reaches all of `127.0.0.0/8`
|
||||
regardless of what we passed. Workaround: after machine_create,
|
||||
open the SQLite DB and patch the row's `allowed_cidrs` field
|
||||
directly. Smolvm reads the DB at machine_start, so the patched
|
||||
value takes effect on boot. Tested: enforcement is real — the
|
||||
guest's connect to a non-allowlisted IP fails with `Permission
|
||||
denied`. Other paths we tried (machine update, stop-edit-
|
||||
agent.config.json-restart, --smolfile, --image localhost:N/...)
|
||||
were dead ends.
|
||||
|
||||
macOS only configures `127.0.0.1` on `lo0` by default; the
|
||||
additional aliases require `sudo ifconfig lo0 alias`. We lazily
|
||||
sudo-add the missing pool on first use per boot — the aliases
|
||||
persist on `lo0` until reboot, so subsequent launches don't
|
||||
prompt.
|
||||
|
||||
Linux native daemons share the host's network namespace; the
|
||||
whole `127.0.0.0/8` is reachable by default and aliases are
|
||||
unnecessary. The pool logic detects native-Linux and skips sudo
|
||||
entirely; the DB patch is also gated on macOS.
|
||||
|
||||
Allocation is coordinated by inspecting running bundle
|
||||
containers' published host IPs — each bottle's bundle owns the
|
||||
alias appearing in its port bindings. The lowest-numbered free
|
||||
alias gets handed to a new bottle."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import platform
|
||||
import re
|
||||
import sqlite3
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import Iterable
|
||||
|
||||
from ...log import die, info
|
||||
|
||||
|
||||
# smolvm's persistent VM state on macOS — a SQLite DB whose `vms`
|
||||
# table holds one JSON BLOB per machine. The Linux path is
|
||||
# different, but smolmachines is macOS-only in v1 (PRD 0023) so
|
||||
# we hard-code this. If the file moves under us we'll see a
|
||||
# clear FileNotFoundError; not worth defensive cross-platform
|
||||
# detection until the backend actually needs Linux.
|
||||
_SMOLVM_DB_PATH = (
|
||||
Path.home()
|
||||
/ "Library"
|
||||
/ "Application Support"
|
||||
/ "smolvm"
|
||||
/ "server"
|
||||
/ "smolvm.db"
|
||||
)
|
||||
|
||||
|
||||
# Sixteen aliases by default. Tunable for hosts that want more
|
||||
# concurrent bottles (each bottle reserves one alias for its
|
||||
# bundle bringup). The range is chosen to avoid the reserved
|
||||
# 127.0.0.1/2/3 ports (1 is the default, 2 is sometimes used by
|
||||
# CUPS, 3 by other macOS services) and stay well clear of
|
||||
# 127.0.0.53 (systemd-resolved) and 127.0.0.54 (libvirt).
|
||||
_POOL_START = 16
|
||||
_POOL_END = 31 # inclusive
|
||||
|
||||
|
||||
# Loopback aliases pool: 127.0.0.<start>..127.0.0.<end>.
|
||||
def _pool_addresses() -> list[str]:
|
||||
return [f"127.0.0.{i}" for i in range(_POOL_START, _POOL_END + 1)]
|
||||
|
||||
|
||||
def _is_macos() -> bool:
|
||||
return platform.system() == "Darwin"
|
||||
|
||||
|
||||
def ensure_pool() -> None:
|
||||
"""Make sure each address in the pool is up on `lo0`. Lazily
|
||||
runs `sudo ifconfig lo0 alias <ip>/32 up` for missing entries
|
||||
(sudo prompts once, then the aliases persist on lo0 until
|
||||
reboot). No-op on non-macOS hosts."""
|
||||
if not _is_macos():
|
||||
return
|
||||
missing = [ip for ip in _pool_addresses() if not _alias_present(ip)]
|
||||
if not missing:
|
||||
return
|
||||
info(
|
||||
f"smolmachines needs {len(missing)} loopback alias(es) on lo0 "
|
||||
f"({', '.join(missing[:3])}{', ...' if len(missing) > 3 else ''}) "
|
||||
f"to scope per-bottle TSI allowlists. sudo will prompt once; "
|
||||
f"aliases persist until reboot."
|
||||
)
|
||||
for ip in missing:
|
||||
result = subprocess.run(
|
||||
["sudo", "-p", "claude-bottle (loopback alias): ",
|
||||
"ifconfig", "lo0", "alias", f"{ip}/32", "up"],
|
||||
check=False,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
die(
|
||||
f"sudo ifconfig lo0 alias {ip} failed (exit "
|
||||
f"{result.returncode}). Re-run with sudo available, "
|
||||
f"or add manually: sudo ifconfig lo0 alias {ip}/32 up"
|
||||
)
|
||||
|
||||
|
||||
def force_allowlist(machine_name: str, allowed_cidrs: list[str]) -> None:
|
||||
"""Patch smolvm's persistent VM-state DB to set the machine's
|
||||
`allowed_cidrs` to the given list. Workaround for smolvm
|
||||
0.8.0's silent-drop of `--allow-cidr` when used with `--from`.
|
||||
|
||||
Must run AFTER `smolvm machine create` (the row has to
|
||||
exist) and BEFORE `smolvm machine start` (smolvm reads the
|
||||
row on start; in-flight VMs don't pick up changes). Once
|
||||
smolvm honors the CLI flag upstream this whole function is
|
||||
redundant — flag-respecting create + remove this call from
|
||||
launch.
|
||||
|
||||
No-op on non-macOS — the DB path differs and the Linux
|
||||
smolmachines code path isn't exercised in v1."""
|
||||
if not _is_macos():
|
||||
return
|
||||
if not _SMOLVM_DB_PATH.is_file():
|
||||
die(
|
||||
f"smolvm state DB not found at {_SMOLVM_DB_PATH}. "
|
||||
f"smolvm 0.8.0 expected? `smolvm --version` to check."
|
||||
)
|
||||
con = sqlite3.connect(str(_SMOLVM_DB_PATH))
|
||||
try:
|
||||
cur = con.cursor()
|
||||
row = cur.execute(
|
||||
"SELECT data FROM vms WHERE name = ?", (machine_name,),
|
||||
).fetchone()
|
||||
if row is None:
|
||||
die(
|
||||
f"smolvm DB has no row for machine {machine_name!r} — "
|
||||
f"machine_create must run before force_allowlist."
|
||||
)
|
||||
cfg = json.loads(row[0])
|
||||
cfg["allowed_cidrs"] = list(allowed_cidrs)
|
||||
# Write as BLOB (the column type smolvm uses) — passing a
|
||||
# plain str makes sqlite store it as Text and smolvm then
|
||||
# fails to read it.
|
||||
cur.execute(
|
||||
"UPDATE vms SET data = ? WHERE name = ?",
|
||||
(sqlite3.Binary(json.dumps(cfg).encode()), machine_name),
|
||||
)
|
||||
con.commit()
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
|
||||
def allocate(slug: str) -> str:
|
||||
"""Pick the lowest-numbered alias from the pool not already
|
||||
in use by a running smolmachines bundle. Bails when the pool
|
||||
is exhausted — the caller should report the limit to the
|
||||
operator. `slug` is logged for traceability; not otherwise
|
||||
used (no on-disk reservation, allocation is purely
|
||||
docker-state-driven).
|
||||
|
||||
On non-macOS the whole `127.0.0.0/8` is loopback by default;
|
||||
`127.0.0.1` is fine to share and we skip the alias dance.
|
||||
This still returns a deterministic address so launch.py's
|
||||
callers don't have to branch on platform."""
|
||||
if not _is_macos():
|
||||
return "127.0.0.1"
|
||||
in_use = _aliases_in_use()
|
||||
for ip in _pool_addresses():
|
||||
if ip not in in_use:
|
||||
return ip
|
||||
die(
|
||||
f"smolmachines loopback alias pool exhausted "
|
||||
f"({_POOL_END - _POOL_START + 1} aliases, all in use). "
|
||||
f"Stop a running bottle (`smolvm machine ls --json`) or "
|
||||
f"raise _POOL_END in loopback_alias.py."
|
||||
)
|
||||
return "" # unreachable; die() never returns
|
||||
|
||||
|
||||
def _alias_present(ip: str) -> bool:
|
||||
"""True iff `ifconfig lo0` shows `<ip>` as an inet address.
|
||||
Exact-match — `127.0.0.1` shouldn't match `127.0.0.16`."""
|
||||
result = subprocess.run(
|
||||
["/sbin/ifconfig", "lo0"],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
return False
|
||||
pattern = re.compile(rf"\binet {re.escape(ip)}\b")
|
||||
return bool(pattern.search(result.stdout or ""))
|
||||
|
||||
|
||||
def _aliases_in_use() -> set[str]:
|
||||
"""Aliases already bound by another smolmachines bundle's
|
||||
published-port mappings. We inspect every container whose
|
||||
name matches the smolmachines bundle prefix and pull the
|
||||
`HostIp` out of its port bindings."""
|
||||
result = subprocess.run(
|
||||
["docker", "ps", "--format", "{{.Names}}",
|
||||
"--filter", "name=claude-bottle-sidecars-"],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
return set()
|
||||
names = [n.strip() for n in (result.stdout or "").splitlines() if n.strip()]
|
||||
in_use: set[str] = set()
|
||||
for name in names:
|
||||
in_use.update(_host_ips_for_container(name))
|
||||
return in_use
|
||||
|
||||
|
||||
def _host_ips_for_container(name: str) -> Iterable[str]:
|
||||
"""Yield the `HostIp` values across all port bindings on
|
||||
container `name`. A bundle binds three or four ports and
|
||||
they all share the same HostIp, so callers can take any."""
|
||||
result = subprocess.run(
|
||||
["docker", "inspect", name,
|
||||
"--format", "{{json .HostConfig.PortBindings}}"],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
return ()
|
||||
try:
|
||||
bindings = json.loads(result.stdout or "{}")
|
||||
except json.JSONDecodeError:
|
||||
return ()
|
||||
seen: set[str] = set()
|
||||
for _port, mappings in (bindings or {}).items():
|
||||
for m in mappings or []:
|
||||
host_ip = m.get("HostIp") or ""
|
||||
if host_ip:
|
||||
seen.add(host_ip)
|
||||
return seen
|
||||
|
||||
|
||||
__all__ = ["allocate", "ensure_pool", "force_allowlist"]
|
||||
@@ -70,13 +70,19 @@ class BundleLaunchSpec:
|
||||
environment: Sequence[str] = field(default_factory=tuple)
|
||||
# (host_path, container_path, read_only) bind mounts.
|
||||
volumes: Sequence[tuple[str, str, bool]] = field(default_factory=tuple)
|
||||
# Container ports to publish on the host's 127.0.0.1, random
|
||||
# Container ports to publish on `publish_host_ip`, random
|
||||
# host-side port per entry. The smolvm guest's TSI talks via
|
||||
# macOS networking, so docker container IPs (192.168.x.x in
|
||||
# the daemon's bridge) aren't directly reachable from the
|
||||
# guest — host-loopback port-forwards are. Egress's port
|
||||
# is bundle-internal and never published.
|
||||
ports_to_publish: Sequence[int] = field(default_factory=tuple)
|
||||
# Loopback IP to bind published ports against. Per-bottle
|
||||
# loopback aliases (`127.0.0.16` etc., added via sudo
|
||||
# ifconfig lo0 alias) narrow the TSI allowlist so a bottle
|
||||
# can't reach other bottles' (or other host services') ports
|
||||
# via 127.0.0.1.
|
||||
publish_host_ip: str = "127.0.0.1"
|
||||
|
||||
|
||||
def create_bundle_network(network_name: str, subnet: str, gateway: str) -> None:
|
||||
@@ -145,8 +151,10 @@ def start_bundle(spec: BundleLaunchSpec, *,
|
||||
# Loopback-only host port-forwards — the smolvm guest's TSI
|
||||
# uses macOS networking, and macOS loopback is the only host
|
||||
# surface that round-trips into Docker Desktop's daemon VM.
|
||||
# Binds to the per-bottle alias so TSI's IP-only allowlist
|
||||
# narrows reachability to this bottle's bundle only.
|
||||
for port in spec.ports_to_publish:
|
||||
argv += ["-p", f"127.0.0.1::{port}"]
|
||||
argv += ["-p", f"{spec.publish_host_ip}::{port}"]
|
||||
argv.append(spec.image)
|
||||
result = subprocess.run(
|
||||
argv, capture_output=True, text=True,
|
||||
@@ -159,13 +167,15 @@ def start_bundle(spec: BundleLaunchSpec, *,
|
||||
)
|
||||
|
||||
|
||||
def bundle_host_port(slug: str, container_port: int) -> int:
|
||||
def bundle_host_port(
|
||||
slug: str, container_port: int, *, host_ip: str = "127.0.0.1",
|
||||
) -> int:
|
||||
"""`docker port <bundle> <container_port>/tcp` → the random
|
||||
host-side port docker assigned. Called after `start_bundle`
|
||||
on each container port listed in `BundleLaunchSpec
|
||||
.ports_to_publish` so the launch step can build the agent's
|
||||
HTTPS_PROXY / GIT_GATE / SUPERVISE URLs in
|
||||
`127.0.0.1:<host port>` form."""
|
||||
host-side port docker assigned for the binding on `host_ip`.
|
||||
Called after `start_bundle` on each container port listed in
|
||||
`BundleLaunchSpec.ports_to_publish` so the launch step can
|
||||
build the agent's HTTPS_PROXY / GIT_GATE / SUPERVISE URLs in
|
||||
`<host_ip>:<host port>` form."""
|
||||
container = bundle_container_name(slug)
|
||||
result = subprocess.run(
|
||||
["docker", "port", container, f"{container_port}/tcp"],
|
||||
@@ -176,14 +186,22 @@ def bundle_host_port(slug: str, container_port: int) -> int:
|
||||
f"docker port {container} {container_port}/tcp failed: "
|
||||
f"{(result.stderr or '').strip() or '<no stderr>'}"
|
||||
)
|
||||
# `127.0.0.1:54321\n` — rpartition on last colon gives the port.
|
||||
line = (result.stdout or "").splitlines()[0].strip()
|
||||
_, _, port_str = line.rpartition(":")
|
||||
try:
|
||||
return int(port_str)
|
||||
except ValueError:
|
||||
die(f"unexpected `docker port` output: {line!r}")
|
||||
return -1 # unreachable; die() never returns
|
||||
# Each line looks like `127.0.0.16:54321` — one per address
|
||||
# family / host IP. Match on the expected host_ip prefix so
|
||||
# bottles bound to per-bottle aliases pick the right line.
|
||||
for raw in (result.stdout or "").splitlines():
|
||||
line = raw.strip()
|
||||
if line.startswith(f"{host_ip}:"):
|
||||
_, _, port_str = line.rpartition(":")
|
||||
try:
|
||||
return int(port_str)
|
||||
except ValueError:
|
||||
die(f"unexpected `docker port` output: {line!r}")
|
||||
die(
|
||||
f"no port mapping on {host_ip} for {container} "
|
||||
f"{container_port}/tcp; got: {(result.stdout or '').strip()!r}"
|
||||
)
|
||||
return -1 # unreachable; die() never returns
|
||||
|
||||
|
||||
def stop_bundle(slug: str) -> None:
|
||||
|
||||
@@ -609,13 +609,33 @@ PRD 0024's bundle image is a prerequisite — this PRD assumes
|
||||
port on host loopback (`-p 127.0.0.1::<port>`) and set TSI to
|
||||
`127.0.0.1/32`. **This widens the TSI allowlist to anything
|
||||
bound to macOS's loopback** — postgres, dev servers, other
|
||||
bottles' published ports, mDNSResponder, etc. The agent can't
|
||||
reach them by intent, but TSI can't filter by port. Follow-up
|
||||
to scope back: bind each bottle's bundle ports on a per-bottle
|
||||
loopback alias (e.g. `127.0.0.2` for bottle A, `127.0.0.3` for
|
||||
B) added via `ifconfig lo0 alias`, set TSI to that single /32.
|
||||
Needs sudo for alias setup; a small daemon-or-script we ship
|
||||
alongside the launcher could handle it.
|
||||
bottles' published ports, mDNSResponder, etc.
|
||||
|
||||
**Fix + smolvm 0.8.0 workaround.** Allocate each bottle a
|
||||
unique loopback alias (`127.0.0.16` .. `127.0.0.31`), bind
|
||||
bundle port-forwards to it, set TSI's allowlist to that
|
||||
alias's /32. The agent can only reach its own bundle; other
|
||||
bottles' ports, host loopback services, and the internet are
|
||||
all denied.
|
||||
|
||||
Smolvm 0.8.0 silently drops `--allow-cidr` when combined
|
||||
with `--from <smolmachine>` (verified empirically:
|
||||
`agent.config.json` shows `allowed_cidrs:null` despite the
|
||||
flag). The launcher patches smolvm's persistent state DB
|
||||
(`~/Library/Application Support/smolvm/server/smolvm.db`,
|
||||
`vms.data` BLOB) between `machine create` and `machine
|
||||
start` to set the allowlist directly. Smolvm reads the DB
|
||||
at start, so TSI enforces. Tested end-to-end: VM → `127.0.0.1`
|
||||
= "Permission denied"; VM → `<alias>:<bundle-port>` =
|
||||
connects.
|
||||
|
||||
Other paths tried that didn't work: `machine update
|
||||
--allow-cidr` doesn't exist; stop-edit-`agent.config.json`-
|
||||
restart fails (file removed on stop); `--smolfile` mutually
|
||||
exclusive with `--from`; `--image localhost:<port>/...` fails
|
||||
because smolvm's pull agent can't reach host loopback during
|
||||
pull. When smolvm honors `--allow-cidr` with `--from`
|
||||
upstream, the DB patch becomes redundant and can be removed.
|
||||
|
||||
## References
|
||||
|
||||
|
||||
@@ -0,0 +1,278 @@
|
||||
"""Unit: per-bottle loopback alias pool (follow-up to the
|
||||
Docker-Desktop fix in PR #74).
|
||||
|
||||
`ensure_pool` lazily sudo-adds missing aliases on macOS; no-ops
|
||||
on Linux. `allocate` picks the lowest-numbered unused alias by
|
||||
inspecting running bundle containers' port bindings."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import sqlite3
|
||||
import subprocess
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
from claude_bottle.backend.smolmachines import loopback_alias
|
||||
|
||||
|
||||
def _ok(stdout: str = "") -> subprocess.CompletedProcess:
|
||||
return subprocess.CompletedProcess(
|
||||
args=[], returncode=0, stdout=stdout, stderr="",
|
||||
)
|
||||
|
||||
|
||||
def _fail(stderr: str = "boom") -> subprocess.CompletedProcess:
|
||||
return subprocess.CompletedProcess(
|
||||
args=[], returncode=1, stdout="", stderr=stderr,
|
||||
)
|
||||
|
||||
|
||||
# `ifconfig lo0` on macOS with the default lo0 config: just
|
||||
# 127.0.0.1. We craft fixtures around this shape.
|
||||
_LO0_DEFAULT = (
|
||||
"lo0: flags=8049<UP,LOOPBACK,RUNNING,MULTICAST> mtu 16384\n"
|
||||
"\tinet 127.0.0.1 netmask 0xff000000\n"
|
||||
"\tinet6 ::1 prefixlen 128\n"
|
||||
)
|
||||
|
||||
_LO0_PARTIAL = (
|
||||
_LO0_DEFAULT
|
||||
+ "\tinet 127.0.0.16 netmask 0xffffffff\n"
|
||||
+ "\tinet 127.0.0.17 netmask 0xffffffff\n"
|
||||
)
|
||||
|
||||
|
||||
def _lo0_full() -> str:
|
||||
"""All 16 pool addresses already aliased."""
|
||||
aliases = "".join(
|
||||
f"\tinet 127.0.0.{i} netmask 0xffffffff\n"
|
||||
for i in range(16, 32)
|
||||
)
|
||||
return _LO0_DEFAULT + aliases
|
||||
|
||||
|
||||
class TestEnsurePool(unittest.TestCase):
|
||||
def test_noop_on_linux(self):
|
||||
# `_is_macos` returns False on Linux; ensure_pool should
|
||||
# never shell out to sudo.
|
||||
with patch.object(loopback_alias, "_is_macos", return_value=False), \
|
||||
patch.object(loopback_alias.subprocess, "run") as run:
|
||||
loopback_alias.ensure_pool()
|
||||
run.assert_not_called()
|
||||
|
||||
def test_all_present_skips_sudo(self):
|
||||
with patch.object(loopback_alias, "_is_macos", return_value=True), \
|
||||
patch.object(
|
||||
loopback_alias.subprocess, "run",
|
||||
return_value=_ok(stdout=_lo0_full()),
|
||||
) as run:
|
||||
loopback_alias.ensure_pool()
|
||||
# Just the ifconfig probe per pool address; no sudo at all.
|
||||
for call in run.call_args_list:
|
||||
self.assertNotIn("sudo", call.args[0])
|
||||
|
||||
def test_missing_aliases_dispatch_sudo(self):
|
||||
# lo0 only has 16+17 already; sudo runs for 18..31 (14 missing).
|
||||
runs: list[list[str]] = []
|
||||
|
||||
def fake_run(argv, *a, **kw):
|
||||
runs.append(argv)
|
||||
if argv[:2] == ["/sbin/ifconfig", "lo0"]:
|
||||
return _ok(stdout=_LO0_PARTIAL)
|
||||
return _ok()
|
||||
|
||||
with patch.object(loopback_alias, "_is_macos", return_value=True), \
|
||||
patch.object(loopback_alias.subprocess, "run", side_effect=fake_run):
|
||||
loopback_alias.ensure_pool()
|
||||
|
||||
sudo_calls = [r for r in runs if r and r[0] == "sudo"]
|
||||
self.assertEqual(14, len(sudo_calls))
|
||||
sudo_ips = {call[call.index("alias") + 1].split("/")[0] for call in sudo_calls}
|
||||
self.assertEqual(
|
||||
{f"127.0.0.{i}" for i in range(18, 32)},
|
||||
sudo_ips,
|
||||
)
|
||||
|
||||
def test_sudo_failure_dies(self):
|
||||
def fake_run(argv, *a, **kw):
|
||||
if argv[:2] == ["/sbin/ifconfig", "lo0"]:
|
||||
return _ok(stdout=_LO0_DEFAULT)
|
||||
if argv[:1] == ["sudo"]:
|
||||
return _fail()
|
||||
return _ok()
|
||||
|
||||
with patch.object(loopback_alias, "_is_macos", return_value=True), \
|
||||
patch.object(loopback_alias.subprocess, "run", side_effect=fake_run), \
|
||||
patch.object(loopback_alias, "die", side_effect=SystemExit("die")):
|
||||
with self.assertRaises(SystemExit):
|
||||
loopback_alias.ensure_pool()
|
||||
|
||||
|
||||
class TestAllocate(unittest.TestCase):
|
||||
def test_returns_loopback_on_linux(self):
|
||||
with patch.object(loopback_alias, "_is_macos", return_value=False):
|
||||
self.assertEqual("127.0.0.1", loopback_alias.allocate("demo"))
|
||||
|
||||
def test_picks_lowest_unused_on_macos(self):
|
||||
# No bundles running -> first pool entry.
|
||||
with patch.object(loopback_alias, "_is_macos", return_value=True), \
|
||||
patch.object(loopback_alias, "_aliases_in_use", return_value=set()):
|
||||
self.assertEqual("127.0.0.16", loopback_alias.allocate("demo-1"))
|
||||
|
||||
def test_skips_in_use_aliases(self):
|
||||
with patch.object(loopback_alias, "_is_macos", return_value=True), \
|
||||
patch.object(
|
||||
loopback_alias, "_aliases_in_use",
|
||||
return_value={"127.0.0.16", "127.0.0.17", "127.0.0.19"},
|
||||
):
|
||||
# First unused = 127.0.0.18.
|
||||
self.assertEqual("127.0.0.18", loopback_alias.allocate("demo-3"))
|
||||
|
||||
def test_dies_when_pool_exhausted(self):
|
||||
all_in_use = {f"127.0.0.{i}" for i in range(16, 32)}
|
||||
with patch.object(loopback_alias, "_is_macos", return_value=True), \
|
||||
patch.object(
|
||||
loopback_alias, "_aliases_in_use",
|
||||
return_value=all_in_use,
|
||||
), patch.object(
|
||||
loopback_alias, "die", side_effect=SystemExit("die"),
|
||||
):
|
||||
with self.assertRaises(SystemExit):
|
||||
loopback_alias.allocate("demo-overflow")
|
||||
|
||||
|
||||
class TestAliasInUseDetection(unittest.TestCase):
|
||||
"""`_aliases_in_use` inspects every running bundle and pulls
|
||||
each container's port-binding `HostIp` out. The detection has
|
||||
to survive: no running bundles, multiple bundles, docker
|
||||
inspect failures."""
|
||||
|
||||
def test_no_bundles_returns_empty(self):
|
||||
with patch.object(
|
||||
loopback_alias.subprocess, "run",
|
||||
return_value=_ok(stdout=""),
|
||||
):
|
||||
self.assertEqual(set(), loopback_alias._aliases_in_use())
|
||||
|
||||
def test_walks_bundles_and_pulls_host_ips(self):
|
||||
# First call: docker ps -> two bundle names.
|
||||
# Then docker inspect each, returning a port-bindings JSON
|
||||
# blob with a HostIp on the per-bottle alias.
|
||||
ps_out = "claude-bottle-sidecars-a\nclaude-bottle-sidecars-b\n"
|
||||
inspect_a = (
|
||||
'{"8888/tcp":[{"HostIp":"127.0.0.16","HostPort":"54000"}]}'
|
||||
)
|
||||
inspect_b = (
|
||||
'{"9099/tcp":[{"HostIp":"127.0.0.17","HostPort":"54001"}]}'
|
||||
)
|
||||
|
||||
seq = [
|
||||
_ok(stdout=ps_out),
|
||||
_ok(stdout=inspect_a),
|
||||
_ok(stdout=inspect_b),
|
||||
]
|
||||
with patch.object(
|
||||
loopback_alias.subprocess, "run", side_effect=seq,
|
||||
):
|
||||
self.assertEqual(
|
||||
{"127.0.0.16", "127.0.0.17"},
|
||||
loopback_alias._aliases_in_use(),
|
||||
)
|
||||
|
||||
def test_inspect_failures_are_skipped(self):
|
||||
ps_out = "claude-bottle-sidecars-c\n"
|
||||
with patch.object(
|
||||
loopback_alias.subprocess, "run",
|
||||
side_effect=[_ok(stdout=ps_out), _fail("inspect failed")],
|
||||
):
|
||||
self.assertEqual(set(), loopback_alias._aliases_in_use())
|
||||
|
||||
|
||||
class TestForceAllowlist(unittest.TestCase):
|
||||
"""Smolvm 0.8.0 silently drops `--allow-cidr` with `--from`,
|
||||
so `force_allowlist` opens the state DB directly and sets
|
||||
the row's `allowed_cidrs` field. Round-trip tests against a
|
||||
real SQLite DB to lock down the BLOB encoding."""
|
||||
|
||||
def setUp(self):
|
||||
self._tmp = tempfile.TemporaryDirectory(prefix="smolvm-db.")
|
||||
self.db = Path(self._tmp.name) / "smolvm.db"
|
||||
con = sqlite3.connect(str(self.db))
|
||||
con.execute(
|
||||
"CREATE TABLE vms (name TEXT PRIMARY KEY NOT NULL, data BLOB NOT NULL)"
|
||||
)
|
||||
# Mimic smolvm's row shape (the JSON keys that exist on
|
||||
# creation; allowed_cidrs is the field we patch).
|
||||
cfg = {
|
||||
"name": "demo-vm",
|
||||
"cpus": 4,
|
||||
"mem": 8192,
|
||||
"network": True,
|
||||
"allowed_cidrs": None,
|
||||
}
|
||||
con.execute(
|
||||
"INSERT INTO vms (name, data) VALUES (?, ?)",
|
||||
("demo-vm", sqlite3.Binary(json.dumps(cfg).encode())),
|
||||
)
|
||||
con.commit()
|
||||
con.close()
|
||||
|
||||
def tearDown(self):
|
||||
self._tmp.cleanup()
|
||||
|
||||
def test_patches_allowed_cidrs_on_row(self):
|
||||
with patch.object(loopback_alias, "_is_macos", return_value=True), \
|
||||
patch.object(loopback_alias, "_SMOLVM_DB_PATH", self.db):
|
||||
loopback_alias.force_allowlist("demo-vm", ["127.0.0.16/32"])
|
||||
|
||||
con = sqlite3.connect(str(self.db))
|
||||
row = con.execute(
|
||||
"SELECT typeof(data), data FROM vms WHERE name='demo-vm'",
|
||||
).fetchone()
|
||||
con.close()
|
||||
# Must round-trip as BLOB (the column type smolvm reads).
|
||||
self.assertEqual("blob", row[0])
|
||||
cfg = json.loads(row[1])
|
||||
self.assertEqual(["127.0.0.16/32"], cfg["allowed_cidrs"])
|
||||
# Other fields preserved verbatim.
|
||||
self.assertEqual(4, cfg["cpus"])
|
||||
self.assertTrue(cfg["network"])
|
||||
|
||||
def test_noop_on_linux(self):
|
||||
with patch.object(loopback_alias, "_is_macos", return_value=False), \
|
||||
patch.object(loopback_alias, "_SMOLVM_DB_PATH", self.db):
|
||||
loopback_alias.force_allowlist("demo-vm", ["127.0.0.16/32"])
|
||||
# DB row should be untouched.
|
||||
con = sqlite3.connect(str(self.db))
|
||||
cfg = json.loads(con.execute(
|
||||
"SELECT data FROM vms WHERE name='demo-vm'",
|
||||
).fetchone()[0])
|
||||
con.close()
|
||||
self.assertIsNone(cfg["allowed_cidrs"])
|
||||
|
||||
def test_dies_on_missing_db(self):
|
||||
with patch.object(loopback_alias, "_is_macos", return_value=True), \
|
||||
patch.object(
|
||||
loopback_alias, "_SMOLVM_DB_PATH",
|
||||
Path("/nonexistent/smolvm.db"),
|
||||
), patch.object(
|
||||
loopback_alias, "die", side_effect=SystemExit("die"),
|
||||
):
|
||||
with self.assertRaises(SystemExit):
|
||||
loopback_alias.force_allowlist("demo-vm", ["127.0.0.16/32"])
|
||||
|
||||
def test_dies_on_missing_row(self):
|
||||
with patch.object(loopback_alias, "_is_macos", return_value=True), \
|
||||
patch.object(loopback_alias, "_SMOLVM_DB_PATH", self.db), \
|
||||
patch.object(
|
||||
loopback_alias, "die", side_effect=SystemExit("die"),
|
||||
):
|
||||
with self.assertRaises(SystemExit):
|
||||
loopback_alias.force_allowlist("not-in-db", ["127.0.0.16/32"])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user