From 7eda2a66eccb386bfdcbad24ec2d43b6e9bb234a Mon Sep 17 00:00:00 2001 From: claude Date: Wed, 27 May 2026 16:55:03 -0400 Subject: [PATCH] feat(smolmachines): patch smolvm state DB to actually enforce per-bottle allowlist MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Earlier commit framed this PR as "infrastructure landed, TSI enforcement blocked on upstream smolvm 0.8.0." Found a clean workaround that lets us enforce now. Smolvm persists each machine's config (including `allowed_cidrs`) as a JSON BLOB in `~/Library/Application Support/smolvm/server/smolvm.db`, `vms.data`. `machine create --allow-cidr X/32` silently writes `allowed_cidrs: null` to that row when combined with `--from`, but smolvm reads the row at `machine start` — so patching the row between create and start sets the allowlist for real. New `loopback_alias.force_allowlist(machine_name, cidrs)` opens the SQLite DB, JSON-decodes the row, sets `allowed_cidrs`, and writes back as BLOB (Text type silently corrupts smolvm's later reads). launch.py calls it immediately after `machine_create` and before `machine_start`. Verified end-to-end on macOS / Docker Desktop: VM allowlist after start: ["127.0.0.16/32"] VM → 127.0.0.1:3000 → BLOCKED (Permission denied) VM → 8.8.8.8:53 → BLOCKED (Permission denied) VM → 127.0.0.16: → CONNECTED The DB-patch hack is correct only because smolvm reads `allowed_cidrs` from the row at start time (not derived in- process). When upstream honors `--allow-cidr` with `--from`, the call becomes redundant — drop the call and the workaround is gone. Tests: 4 new for `force_allowlist` (BLOB round-trip; Linux no-op; missing DB; missing row). Total 593 unit tests pass. README + PRD updated to reflect the fix landed (no longer "infrastructure pending upstream"). gitea#75 can close. Co-Authored-By: Claude Opus 4.7 --- README.md | 27 ++--- claude_bottle/backend/smolmachines/launch.py | 6 + .../backend/smolmachines/loopback_alias.py | 111 ++++++++++++++---- docs/prds/0023-smolmachines-backend.md | 40 ++++--- .../unit/test_smolmachines_loopback_alias.py | 87 ++++++++++++++ 5 files changed, 219 insertions(+), 52 deletions(-) diff --git a/README.md b/README.md index 5031c68..0677cd6 100644 --- a/README.md +++ b/README.md @@ -205,21 +205,20 @@ each reserve a loopback alias from a pool (`127.0.0.16` .. `127.0.0.31`) and bind their bundle's port-forwards to it; the first `./cli.py start` after each reboot prompts for sudo to add missing aliases via `ifconfig lo0 alias`. Aliases persist until -reboot; subsequent launches don't prompt. +reboot; subsequent launches don't prompt. The agent's TSI +allowlist is the alias's `/32`, so each bottle can only reach +its own bundle's published ports — not other bottles' ports, +not other host loopback services (postgres, dev servers, etc.). -**Known v1 limitation — agent can reach the whole host -loopback:** the alias-allocation infrastructure exists, but TSI -allowlist enforcement is blocked on a smolvm 0.8.0 upstream bug: -`smolvm machine create --from --net --allow-cidr -X/32` silently drops the allowlist (the persisted -`agent.config.json` shows `allowed_cidrs: null`, and the running -VM reaches `127.0.0.0/8` regardless). So while a smolmachines -bottle is running, host-local dev services (postgres on 5432, -dev servers, etc.) are reachable from inside the agent even -though the launcher's `--allow-cidr` says otherwise. The docker -backend keeps the bottle on a `--internal` docker network and -doesn't have this issue. Tracked in gitea issue #75; will -auto-resolve once smolvm honors the flag. +This enforcement requires a workaround for a smolvm 0.8.0 bug: +the CLI's `--allow-cidr` flag is silently dropped when combined +with `--from `. The launcher patches smolvm's +persistent state DB +(`~/Library/Application Support/smolvm/server/smolvm.db`) +directly between `machine create` and `machine start` to set +the allowlist. The hack falls away automatically when smolvm +honors the flag upstream — see the `loopback_alias` module's +docstring for the investigation trail. ## Manifest diff --git a/claude_bottle/backend/smolmachines/launch.py b/claude_bottle/backend/smolmachines/launch.py index fc7c306..b3b3498 100644 --- a/claude_bottle/backend/smolmachines/launch.py +++ b/claude_bottle/backend/smolmachines/launch.py @@ -202,6 +202,12 @@ def launch( env=plan.guest_env, ) stack.callback(_smolvm.machine_delete, plan.machine_name) + # Workaround smolvm 0.8.0: `--allow-cidr` is silently + # dropped when combined with `--from`. Patch the persisted + # state DB to set the allowlist before start so the booted + # VM's TSI actually enforces. See loopback_alias's module + # docstring for the investigation that led here. + _loopback.force_allowlist(plan.machine_name, [f"{loopback_ip}/32"]) _smolvm.machine_start(plan.machine_name) stack.callback(_smolvm.machine_stop, plan.machine_name) diff --git a/claude_bottle/backend/smolmachines/loopback_alias.py b/claude_bottle/backend/smolmachines/loopback_alias.py index 65a1a0d..7897e4c 100644 --- a/claude_bottle/backend/smolmachines/loopback_alias.py +++ b/claude_bottle/backend/smolmachines/loopback_alias.py @@ -1,30 +1,31 @@ -"""Per-bottle loopback alias allocation (PRD 0023, follow-up to -the Docker-Desktop fix in PR #74). +"""Per-bottle loopback alias allocation + TSI allowlist +enforcement (PRD 0023, follow-up to PR #74). After the pivot to host-loopback port-forwards, the smolmachines TSI allowlist was `127.0.0.1/32` — which meant the agent VM could reach **any** service bound to macOS's loopback, not just the -bundle's published ports. That's a real downgrade from the -docker backend's `--internal` network isolation. +bundle's published ports. Real downgrade from the docker +backend's `--internal` network isolation. -This module is the host-side half of the eventual fix: allocate -each bottle a unique loopback alias (`127.0.0.16` .. `127.0.0.31` -by default), bind the bundle's port-forwards to that alias, and -pass the alias's /32 as smolvm's `--allow-cidr`. If TSI enforced -the allowlist, the agent could only reach its own bundle. +This module narrows the allowlist by allocating each bottle a +unique loopback alias (`127.0.0.16` .. `127.0.0.31`). The +bundle's port-forwards bind to that alias, and the alias's /32 +is what TSI allows. -**Upstream block, smolvm 0.8.0:** verified empirically that -`smolvm machine create --from --net --allow-cidr -X/32` silently drops the allowlist. The persisted -`agent.config.json` shows `allowed_cidrs: null`, and the running -VM can reach any host loopback service regardless of the -flag. `machine update --allow-cidr` doesn't exist; stop-edit- -start of `agent.config.json` doesn't work (the file is removed -on stop); `--smolfile` is mutually exclusive with `--from`. So -the alias scoping infrastructure lives here, ready, but the -TSI enforcement is blocked on a smolvm upstream fix. Until that -lands, the agent can still reach the whole `127.0.0.0/8`. The -README + gitea issue #75 spell this out. +**Smolvm 0.8.0 quirk + workaround.** `smolvm machine create +--from --net --allow-cidr X/32` silently drops the +flag — verified empirically that the agent process's allowlist +ends up `null` in smolvm's persistent state DB (`~/Library/ +Application Support/smolvm/server/smolvm.db`, `vms` table, +`data` BLOB), and the booted VM reaches all of `127.0.0.0/8` +regardless of what we passed. Workaround: after machine_create, +open the SQLite DB and patch the row's `allowed_cidrs` field +directly. Smolvm reads the DB at machine_start, so the patched +value takes effect on boot. Tested: enforcement is real — the +guest's connect to a non-allowlisted IP fails with `Permission +denied`. Other paths we tried (machine update, stop-edit- +agent.config.json-restart, --smolfile, --image localhost:N/...) +were dead ends. macOS only configures `127.0.0.1` on `lo0` by default; the additional aliases require `sudo ifconfig lo0 alias`. We lazily @@ -35,7 +36,7 @@ prompt. Linux native daemons share the host's network namespace; the whole `127.0.0.0/8` is reachable by default and aliases are unnecessary. The pool logic detects native-Linux and skips sudo -entirely. +entirely; the DB patch is also gated on macOS. Allocation is coordinated by inspecting running bundle containers' published host IPs — each bottle's bundle owns the @@ -48,12 +49,30 @@ import json import os import platform import re +import sqlite3 import subprocess +from pathlib import Path from typing import Iterable from ...log import die, info +# smolvm's persistent VM state on macOS — a SQLite DB whose `vms` +# table holds one JSON BLOB per machine. The Linux path is +# different, but smolmachines is macOS-only in v1 (PRD 0023) so +# we hard-code this. If the file moves under us we'll see a +# clear FileNotFoundError; not worth defensive cross-platform +# detection until the backend actually needs Linux. +_SMOLVM_DB_PATH = ( + Path.home() + / "Library" + / "Application Support" + / "smolvm" + / "server" + / "smolvm.db" +) + + # Sixteen aliases by default. Tunable for hosts that want more # concurrent bottles (each bottle reserves one alias for its # bundle bringup). The range is chosen to avoid the reserved @@ -103,6 +122,52 @@ def ensure_pool() -> None: ) +def force_allowlist(machine_name: str, allowed_cidrs: list[str]) -> None: + """Patch smolvm's persistent VM-state DB to set the machine's + `allowed_cidrs` to the given list. Workaround for smolvm + 0.8.0's silent-drop of `--allow-cidr` when used with `--from`. + + Must run AFTER `smolvm machine create` (the row has to + exist) and BEFORE `smolvm machine start` (smolvm reads the + row on start; in-flight VMs don't pick up changes). Once + smolvm honors the CLI flag upstream this whole function is + redundant — flag-respecting create + remove this call from + launch. + + No-op on non-macOS — the DB path differs and the Linux + smolmachines code path isn't exercised in v1.""" + if not _is_macos(): + return + if not _SMOLVM_DB_PATH.is_file(): + die( + f"smolvm state DB not found at {_SMOLVM_DB_PATH}. " + f"smolvm 0.8.0 expected? `smolvm --version` to check." + ) + con = sqlite3.connect(str(_SMOLVM_DB_PATH)) + try: + cur = con.cursor() + row = cur.execute( + "SELECT data FROM vms WHERE name = ?", (machine_name,), + ).fetchone() + if row is None: + die( + f"smolvm DB has no row for machine {machine_name!r} — " + f"machine_create must run before force_allowlist." + ) + cfg = json.loads(row[0]) + cfg["allowed_cidrs"] = list(allowed_cidrs) + # Write as BLOB (the column type smolvm uses) — passing a + # plain str makes sqlite store it as Text and smolvm then + # fails to read it. + cur.execute( + "UPDATE vms SET data = ? WHERE name = ?", + (sqlite3.Binary(json.dumps(cfg).encode()), machine_name), + ) + con.commit() + finally: + con.close() + + def allocate(slug: str) -> str: """Pick the lowest-numbered alias from the pool not already in use by a running smolmachines bundle. Bails when the pool @@ -186,4 +251,4 @@ def _host_ips_for_container(name: str) -> Iterable[str]: return seen -__all__ = ["allocate", "ensure_pool"] +__all__ = ["allocate", "ensure_pool", "force_allowlist"] diff --git a/docs/prds/0023-smolmachines-backend.md b/docs/prds/0023-smolmachines-backend.md index f326081..4bf2b6b 100644 --- a/docs/prds/0023-smolmachines-backend.md +++ b/docs/prds/0023-smolmachines-backend.md @@ -611,21 +611,31 @@ PRD 0024's bundle image is a prerequisite — this PRD assumes bound to macOS's loopback** — postgres, dev servers, other bottles' published ports, mDNSResponder, etc. - **Attempted fix + upstream block (`smolmachines-loopback- - alias-scoping` branch).** Allocate each bottle a unique - loopback alias (`127.0.0.16` .. `127.0.0.31`), bind bundle - port-forwards to it, set TSI's `--allow-cidr` to that /32. - Verified empirically that `smolvm 0.8.0 machine create --from - --net --allow-cidr X/32` **silently drops the - allowlist** — `agent.config.json` shows `allowed_cidrs:null` - and the VM reaches all of `127.0.0.0/8` regardless of the - flag. Workarounds tried: `machine update --allow-cidr` - doesn't exist; stop-edit-`agent.config.json`-restart fails - (file is removed on stop); `--smolfile` is mutually exclusive - with `--from`. Alias-allocation infrastructure is in place - so the day smolvm honors `--allow-cidr` with `--from`, the - scoping starts working. Until then the agent can reach the - whole host loopback. Tracked in gitea issue #75. + **Fix + smolvm 0.8.0 workaround.** Allocate each bottle a + unique loopback alias (`127.0.0.16` .. `127.0.0.31`), bind + bundle port-forwards to it, set TSI's allowlist to that + alias's /32. The agent can only reach its own bundle; other + bottles' ports, host loopback services, and the internet are + all denied. + + Smolvm 0.8.0 silently drops `--allow-cidr` when combined + with `--from ` (verified empirically: + `agent.config.json` shows `allowed_cidrs:null` despite the + flag). The launcher patches smolvm's persistent state DB + (`~/Library/Application Support/smolvm/server/smolvm.db`, + `vms.data` BLOB) between `machine create` and `machine + start` to set the allowlist directly. Smolvm reads the DB + at start, so TSI enforces. Tested end-to-end: VM → `127.0.0.1` + = "Permission denied"; VM → `:` = + connects. + + Other paths tried that didn't work: `machine update + --allow-cidr` doesn't exist; stop-edit-`agent.config.json`- + restart fails (file removed on stop); `--smolfile` mutually + exclusive with `--from`; `--image localhost:/...` fails + because smolvm's pull agent can't reach host loopback during + pull. When smolvm honors `--allow-cidr` with `--from` + upstream, the DB patch becomes redundant and can be removed. ## References diff --git a/tests/unit/test_smolmachines_loopback_alias.py b/tests/unit/test_smolmachines_loopback_alias.py index d3e8223..f7b428d 100644 --- a/tests/unit/test_smolmachines_loopback_alias.py +++ b/tests/unit/test_smolmachines_loopback_alias.py @@ -7,8 +7,12 @@ inspecting running bundle containers' port bindings.""" from __future__ import annotations +import json +import sqlite3 import subprocess +import tempfile import unittest +from pathlib import Path from unittest.mock import patch from claude_bottle.backend.smolmachines import loopback_alias @@ -187,5 +191,88 @@ class TestAliasInUseDetection(unittest.TestCase): self.assertEqual(set(), loopback_alias._aliases_in_use()) +class TestForceAllowlist(unittest.TestCase): + """Smolvm 0.8.0 silently drops `--allow-cidr` with `--from`, + so `force_allowlist` opens the state DB directly and sets + the row's `allowed_cidrs` field. Round-trip tests against a + real SQLite DB to lock down the BLOB encoding.""" + + def setUp(self): + self._tmp = tempfile.TemporaryDirectory(prefix="smolvm-db.") + self.db = Path(self._tmp.name) / "smolvm.db" + con = sqlite3.connect(str(self.db)) + con.execute( + "CREATE TABLE vms (name TEXT PRIMARY KEY NOT NULL, data BLOB NOT NULL)" + ) + # Mimic smolvm's row shape (the JSON keys that exist on + # creation; allowed_cidrs is the field we patch). + cfg = { + "name": "demo-vm", + "cpus": 4, + "mem": 8192, + "network": True, + "allowed_cidrs": None, + } + con.execute( + "INSERT INTO vms (name, data) VALUES (?, ?)", + ("demo-vm", sqlite3.Binary(json.dumps(cfg).encode())), + ) + con.commit() + con.close() + + def tearDown(self): + self._tmp.cleanup() + + def test_patches_allowed_cidrs_on_row(self): + with patch.object(loopback_alias, "_is_macos", return_value=True), \ + patch.object(loopback_alias, "_SMOLVM_DB_PATH", self.db): + loopback_alias.force_allowlist("demo-vm", ["127.0.0.16/32"]) + + con = sqlite3.connect(str(self.db)) + row = con.execute( + "SELECT typeof(data), data FROM vms WHERE name='demo-vm'", + ).fetchone() + con.close() + # Must round-trip as BLOB (the column type smolvm reads). + self.assertEqual("blob", row[0]) + cfg = json.loads(row[1]) + self.assertEqual(["127.0.0.16/32"], cfg["allowed_cidrs"]) + # Other fields preserved verbatim. + self.assertEqual(4, cfg["cpus"]) + self.assertTrue(cfg["network"]) + + def test_noop_on_linux(self): + with patch.object(loopback_alias, "_is_macos", return_value=False), \ + patch.object(loopback_alias, "_SMOLVM_DB_PATH", self.db): + loopback_alias.force_allowlist("demo-vm", ["127.0.0.16/32"]) + # DB row should be untouched. + con = sqlite3.connect(str(self.db)) + cfg = json.loads(con.execute( + "SELECT data FROM vms WHERE name='demo-vm'", + ).fetchone()[0]) + con.close() + self.assertIsNone(cfg["allowed_cidrs"]) + + def test_dies_on_missing_db(self): + with patch.object(loopback_alias, "_is_macos", return_value=True), \ + patch.object( + loopback_alias, "_SMOLVM_DB_PATH", + Path("/nonexistent/smolvm.db"), + ), patch.object( + loopback_alias, "die", side_effect=SystemExit("die"), + ): + with self.assertRaises(SystemExit): + loopback_alias.force_allowlist("demo-vm", ["127.0.0.16/32"]) + + def test_dies_on_missing_row(self): + with patch.object(loopback_alias, "_is_macos", return_value=True), \ + patch.object(loopback_alias, "_SMOLVM_DB_PATH", self.db), \ + patch.object( + loopback_alias, "die", side_effect=SystemExit("die"), + ): + with self.assertRaises(SystemExit): + loopback_alias.force_allowlist("not-in-db", ["127.0.0.16/32"]) + + if __name__ == "__main__": unittest.main()