From a4413406df5ac9458d56e2f8b248b41798f62f75 Mon Sep 17 00:00:00 2001 From: didericis Date: Thu, 25 Jun 2026 16:49:04 -0400 Subject: [PATCH] feat(smolmachines): run backend on Linux MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Port the smolmachines backend so BOT_BOTTLE_BACKEND=smolmachines works on Linux (KVM), not just macOS: - Preflight gates /dev/kvm presence + accessibility on Linux with actionable remediation (kvm module, kvm group). - smolvm state-DB path is platform-derived (XDG on Linux). - force_allowlist runs on both platforms and is fail-closed: it verifies the persisted TSI allowlist and dies rather than booting a VM whose egress confinement it can't confirm. Previously it no-oped on Linux, failing OPEN. - allocate() does per-bottle 127.0.0. scoping on Linux too (no ifconfig needed — all of 127/8 is already loopback); only ensure_pool's lo0 aliasing stays macOS-only. - README documents Linux + NixOS host setup. Linux/KVM integration (the sandbox-escape acceptance gate) is pending verification on a NixOS host; unit tests cover the new platform branches. Issue: #283 Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01NkwFXLFff9PYPy4wgVBJp9 --- README.md | 20 ++- bot_bottle/backend/smolmachines/launch.py | 38 +++-- .../backend/smolmachines/loopback_alias.py | 156 +++++++++++------- bot_bottle/backend/smolmachines/util.py | 60 +++++-- .../unit/test_smolmachines_loopback_alias.py | 135 +++++++++++++-- tests/unit/test_smolmachines_util.py | 63 +++++++ 6 files changed, 368 insertions(+), 104 deletions(-) diff --git a/README.md b/README.md index 876a95f..d7315c7 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ - **Provider templates (Claude, Codex)** — `Dockerfile.claude` / `Dockerfile.codex`, or a bottle-supplied Dockerfile. Claude auth via long-lived OAuth token; Codex via opt-in host device-auth forwarding. - **gVisor auto-detect** — on Linux hosts where `runsc` is registered with Docker, every bottle launches under it for a userspace syscall barrier; no manifest config required. - **Apple Container backend (macOS default when available)** — runs the agent and sidecar bundle with Apple's `container` CLI, using a host-only agent network plus a separate sidecar egress network. -- **Smolmachines backend** — runs the agent in a libkrun micro-VM while the sidecar bundle stays in Docker. TSI and smolmachines DNS filtering close the raw DNS exfiltration gap that exists in the legacy Docker backend. +- **Smolmachines backend** — runs the agent in a libkrun micro-VM while the sidecar bundle stays in Docker. TSI and smolmachines DNS filtering close the raw DNS exfiltration gap that exists in the legacy Docker backend. Runs on macOS (Hypervisor.framework) and Linux (KVM, `/dev/kvm`). - **Legacy Docker backend** — still available for examples, CI, and hosts without Apple Container via `BOT_BOTTLE_BACKEND=docker` or `--backend=docker`. ## Architecture @@ -72,10 +72,26 @@ When the agent exits, `cli.py` tears down every sidecar and both networks; nothi ## Quickstart -On compatible macOS hosts, the default backend requires Apple's `container` CLI and does not require Docker. The smolmachines backend requires Docker on the host for the sidecar bundle plus smolvm. The legacy Docker backend requires Docker. Claude bottles also need a long-lived Claude Code OAuth token (`claude setup-token`) exported as `BOT_BOTTLE_CLAUDE_OAUTH_TOKEN`. +On compatible macOS hosts, the default backend requires Apple's `container` CLI and does not require Docker. The smolmachines backend requires Docker on the host for the sidecar bundle plus `smolvm` (macOS or Linux). The legacy Docker backend requires Docker. Claude bottles also need a long-lived Claude Code OAuth token (`claude setup-token`) exported as `BOT_BOTTLE_CLAUDE_OAUTH_TOKEN`. Use `BOT_BOTTLE_BACKEND=docker ./cli.py start ` on hosts where Apple Container is not installed and Docker is the desired backend. +### smolmachines on Linux + +The smolmachines backend runs on Linux as well as macOS. On Linux, `smolvm`/libkrun use KVM, so the host needs: + +- **`/dev/kvm`** present and accessible. Load `kvm-intel` or `kvm-amd` (and enable virtualization in BIOS/firmware). The invoking user must be in the `kvm` group: `sudo usermod -aG kvm "$USER"` then re-login. bot-bottle preflights this and reports exactly what's missing. +- **`smolvm`** on `PATH`: `curl -sSL https://smolmachines.com/install.sh | sh`. +- **Docker** for the sidecar bundle and image build, same as macOS. + +Per-bottle isolation works the same as macOS without any `ifconfig`/sudo step — all of `127.0.0.0/8` is already loopback on Linux, so each bottle's sidecar bundle is published on its own `127.0.0.` and TSI's allowlist is scoped to that `/32`. + +```sh +BOT_BOTTLE_BACKEND=smolmachines ./cli.py start +``` + +> **NixOS:** enable `virtualisation.docker`, ensure the KVM module is loaded (`boot.kernelModules = [ "kvm-intel" ];` or `kvm-amd`), and add your user to the `kvm` and `docker` groups. If you run bottles from a Gitea Actions runner, use a `host`-label runner so Docker, `smolvm`, and `/dev/kvm` are all reachable from the job. `smolvm` isn't in nixpkgs — install the release binary (pin the version) and put it on the runner's `PATH`. + ```sh ./cli.py start # builds the image on first run, drops you into claude ``` diff --git a/bot_bottle/backend/smolmachines/launch.py b/bot_bottle/backend/smolmachines/launch.py index 8b67e25..f1fe440 100644 --- a/bot_bottle/backend/smolmachines/launch.py +++ b/bot_bottle/backend/smolmachines/launch.py @@ -141,10 +141,12 @@ def _allocate_resources( ) -> tuple[str, str]: """Reserve a loopback alias and create the per-bottle docker bridge. - macOS only routes 127.0.0.1 by default; the per-bottle alias - scopes TSI's allowlist to this bottle's published ports so the - agent can't reach other bottles' or host services' ports on - loopback. No-op on Linux.""" + The per-bottle alias scopes TSI's allowlist to this bottle's + published ports so the agent can't reach other bottles' or host + services' ports on loopback. On macOS `ensure_pool` first + sudo-aliases the pool on `lo0`; on Linux that's a no-op since + all of 127.0.0.0/8 is already loopback, but the per-bottle + allocation runs on both.""" _loopback.ensure_pool() loopback_ip = _loopback.allocate(plan.slug) network = _bundle.bundle_network_name(plan.slug) @@ -190,9 +192,11 @@ def _discover_urls( return the plan with URLs + guest_env stamped in. Docker container IPs (192.168.x.x in the daemon's bridge) - aren't reachable from the smolvm guest on macOS — TSI uses - macOS networking, and macOS sees the daemon's bridge via the - published-port loopback forward only. + aren't reachable from the smolvm guest — TSI proxies the + guest's connects through the host, and the host reaches the + bundle only via its published-port loopback forward (the + daemon's bridge isn't on the TSI allowlist). The agent dials + the published port on the per-bottle loopback alias. NO_PROXY includes the per-bottle loopback alias so the supervise + git-gate URLs bypass HTTPS_PROXY.""" @@ -252,10 +256,11 @@ def _launch_vm( """Create, patch, and start the smolvm VM; register teardown. --allow-cidr is the per-bottle loopback alias so the guest can - only reach this bottle's bundle ports. force_allowlist patches - smolvm 0.8.0's silent-drop of --allow-cidr when combined with - --from. Smolfile isn't usable here — smolvm 0.8.0 makes --from - and --smolfile mutually exclusive.""" + only reach this bottle's bundle ports. force_allowlist then + confirms the allowlist persisted (patching smolvm 0.8.0's + silent-drop of --allow-cidr when combined with --from) and + fails closed if it can't. Smolfile isn't usable here — smolvm + 0.8.0 makes --from and --smolfile mutually exclusive.""" _smolvm.machine_create( plan.machine_name, from_path=agent_from_path, @@ -263,9 +268,10 @@ def _launch_vm( env=plan.guest_env, ) stack.callback(_smolvm.machine_delete, plan.machine_name) - # Workaround smolvm 0.8.0: `--allow-cidr` is silently dropped - # when combined with `--from`. Patch the persisted state DB - # before start so the booted VM's TSI actually enforces. + # Confirm the booted VM's TSI allowlist will actually enforce the + # /32 before start (smolvm 0.8.0 silently drops `--allow-cidr` + # with `--from`, so the persisted state DB is patched if needed). + # Fails closed if enforcement can't be confirmed. _loopback.force_allowlist(plan.machine_name, [f"{loopback_ip}/32"]) _smolvm.machine_start(plan.machine_name) stack.callback(_smolvm.machine_stop, plan.machine_name) @@ -275,7 +281,9 @@ def _init_vm(plan: SmolmachinesBottlePlan) -> None: """Repair filesystem ownership and wait for exec channel readiness. Ownership repair: smolvm's pack process remaps files to the host - invoker's uid (501 on macOS). /home/node must be node:node so + invoker's uid (e.g. 501 on macOS, 1000 on Linux). The chowns use + names not numbers so they're correct on either. /home/node must + be node:node so Claude Code can write ~/.claude.json; /tmp + /var/tmp need root mode 1777 so non-root processes can create per-uid scratch dirs. All folded into one sh -c to avoid back-to-back exec calls diff --git a/bot_bottle/backend/smolmachines/loopback_alias.py b/bot_bottle/backend/smolmachines/loopback_alias.py index 4608088..50b4ba6 100644 --- a/bot_bottle/backend/smolmachines/loopback_alias.py +++ b/bot_bottle/backend/smolmachines/loopback_alias.py @@ -33,10 +33,13 @@ sudo-add the missing pool on first use per boot — the aliases persist on `lo0` until reboot, so subsequent launches don't prompt. -Linux native daemons share the host's network namespace; the -whole `127.0.0.0/8` is reachable by default and aliases are -unnecessary. The pool logic detects native-Linux and skips sudo -entirely; the DB patch is also gated on macOS. +On Linux the whole `127.0.0.0/8` is already routed to `lo`, so +docker can publish a bundle's ports directly on `127.0.0.` +with no `ifconfig`/sudo step. `ensure_pool` is therefore a no-op +on Linux, but per-bottle alias *allocation* and the TSI allowlist +DB patch run on both platforms — the isolation property is +identical, it's just cheaper to set up on Linux. The state-DB +path differs per platform (see `_smolvm_db_path`). Allocation is coordinated by inspecting running bundle containers' published host IPs — each bottle's bundle owns the @@ -47,6 +50,7 @@ from __future__ import annotations import fcntl import json +import os import platform import re import sqlite3 @@ -57,20 +61,34 @@ from typing import Iterable from ...log import die, info -# smolvm's persistent VM state on macOS — a SQLite DB whose `vms` -# table holds one JSON BLOB per machine. The Linux path is -# different, but smolmachines is macOS-only in v1 (PRD 0023) so -# we hard-code this. If the file moves under us we'll see a -# clear FileNotFoundError; not worth defensive cross-platform -# detection until the backend actually needs Linux. -_SMOLVM_DB_PATH = ( - Path.home() - / "Library" - / "Application Support" - / "smolvm" - / "server" - / "smolvm.db" -) +def _smolvm_db_path() -> Path: + """smolvm's persistent VM state — a SQLite DB whose `vms` table + holds one JSON BLOB per machine. macOS stores it under + `Application Support`; Linux follows the XDG base-dir spec + (`$XDG_DATA_HOME`, default `~/.local/share`). + + NOTE: the Linux location is inferred from smolvm's documented + `~/.local/share` install layout and must be confirmed against a + real Linux smolvm install. If it's wrong, `force_allowlist`'s + fail-closed check turns it into a clear launch-time error rather + than a silent escape.""" + if platform.system() == "Darwin": + return ( + Path.home() + / "Library" + / "Application Support" + / "smolvm" + / "server" + / "smolvm.db" + ) + xdg_data = os.environ.get("XDG_DATA_HOME") + base = Path(xdg_data) if xdg_data else Path.home() / ".local" / "share" + return base / "smolvm" / "server" / "smolvm.db" + + +# Resolved once at import: the host platform doesn't change within a +# process. Tests patch this attribute directly. +_SMOLVM_DB_PATH = _smolvm_db_path() # Sixteen aliases by default. Tunable for hosts that want more @@ -131,51 +149,74 @@ def ensure_pool() -> None: def force_allowlist(machine_name: str, allowed_cidrs: list[str]) -> None: - """Patch smolvm's persistent VM-state DB to set the machine's - `allowed_cidrs` to the given list. Workaround for smolvm - 0.8.0's silent-drop of `--allow-cidr` when used with `--from`. + """Ensure the machine's persisted TSI allowlist equals + `allowed_cidrs`, failing **closed** if that can't be confirmed. - Must run AFTER `smolvm machine create` (the row has to - exist) and BEFORE `smolvm machine start` (smolvm reads the - row on start; in-flight VMs don't pick up changes). Once - smolvm honors the CLI flag upstream this whole function is - redundant — flag-respecting create + remove this call from - launch. + Runs on both macOS and Linux. It exists because smolvm 0.8.0 + silently drops `--allow-cidr` when combined with `--from`, so + the allowlist has to be written into smolvm's persistent state + DB before `machine start`. Rather than assume the flag was + dropped, we read the persisted row and only patch when it + doesn't already match — so a newer smolvm that honors the flag + is left untouched. - No-op on non-macOS — the DB path differs and the Linux - smolmachines code path isn't exercised in v1.""" - if not _is_macos(): - return + Must run AFTER `smolvm machine create` (the row has to exist) + and BEFORE `smolvm machine start` (smolvm reads the row on + start; in-flight VMs don't pick up changes). + + Fail-closed: if the state DB is missing, the row is missing, or + the allowlist still doesn't match after patching, we `die()` + rather than boot a VM whose egress confinement we can't verify + — an unconfirmed allowlist is a sandbox-escape risk (the agent + VM could reach all of host loopback).""" + want = list(allowed_cidrs) if not _SMOLVM_DB_PATH.is_file(): die( - f"smolvm state DB not found at {_SMOLVM_DB_PATH}. " - f"smolvm 0.8.0 expected? `smolvm --version` to check." + f"smolvm state DB not found at {_SMOLVM_DB_PATH}; cannot " + f"confirm the TSI allowlist is enforced. Refusing to launch " + f"(fail-closed). Check `smolvm --version` and the DB " + f"location for your platform." ) con = sqlite3.connect(str(_SMOLVM_DB_PATH)) try: - cur = con.cursor() - row = cur.execute( - "SELECT data FROM vms WHERE name = ?", (machine_name,), - ).fetchone() - if row is None: - die( - f"smolvm DB has no row for machine {machine_name!r} — " - f"machine_create must run before force_allowlist." + cfg = _read_machine_cfg(con, machine_name) + if cfg.get("allowed_cidrs") != want: + cfg["allowed_cidrs"] = want + # Write as BLOB (the column type smolvm uses) — passing a + # plain str makes sqlite store it as Text and smolvm then + # fails to read it. + con.execute( + "UPDATE vms SET data = ? WHERE name = ?", + (sqlite3.Binary(json.dumps(cfg).encode()), machine_name), + ) + con.commit() + cfg = _read_machine_cfg(con, machine_name) + if cfg.get("allowed_cidrs") != want: + die( + f"could not enforce TSI allowlist {want!r} for machine " + f"{machine_name!r} (persisted value is " + f"{cfg.get('allowed_cidrs')!r}). Refusing to launch " + f"(fail-closed)." ) - cfg = json.loads(row[0]) - cfg["allowed_cidrs"] = list(allowed_cidrs) - # Write as BLOB (the column type smolvm uses) — passing a - # plain str makes sqlite store it as Text and smolvm then - # fails to read it. - cur.execute( - "UPDATE vms SET data = ? WHERE name = ?", - (sqlite3.Binary(json.dumps(cfg).encode()), machine_name), - ) - con.commit() finally: con.close() +def _read_machine_cfg(con: sqlite3.Connection, machine_name: str) -> dict[str, object]: + """Read + JSON-decode a machine's `data` BLOB from the smolvm + state DB. Dies (fail-closed) if the row is missing — the caller + can't confirm enforcement without it.""" + row = con.execute( + "SELECT data FROM vms WHERE name = ?", (machine_name,), + ).fetchone() + if row is None: + die( + f"smolvm DB has no row for machine {machine_name!r} — " + f"machine_create must run before force_allowlist." + ) + return json.loads(row[0]) + + def allocate(_slug: str) -> str: """Pick the lowest-numbered alias from the pool not already in use by a running smolmachines bundle. Bails when the pool @@ -184,16 +225,17 @@ def allocate(_slug: str) -> str: used (no on-disk reservation, allocation is purely docker-state-driven). - On non-macOS the whole `127.0.0.0/8` is loopback by default; - `127.0.0.1` is fine to share and we skip the alias dance. - This still returns a deterministic address so launch.py's - callers don't have to branch on platform. + Runs on both platforms: the allocation logic (docker-state + inspection + the file lock) is platform-independent. macOS + needs `ensure_pool` to have aliased the addresses on `lo0` + first; on Linux all of `127.0.0.0/8` is already loopback, so + docker can publish on the chosen `127.0.0.` with no setup. + Per-bottle scoping (so the agent can't reach other bottles' or + host services' loopback ports) therefore holds on both. An exclusive file lock serialises concurrent calls so two simultaneous launches don't read the same docker state and claim the same alias.""" - if not _is_macos(): - return "127.0.0.1" _ALLOC_LOCK_PATH.parent.mkdir(parents=True, exist_ok=True) with open(_ALLOC_LOCK_PATH, "w", encoding="utf-8") as lf: fcntl.flock(lf, fcntl.LOCK_EX) diff --git a/bot_bottle/backend/smolmachines/util.py b/bot_bottle/backend/smolmachines/util.py index cadb8a7..938610a 100644 --- a/bot_bottle/backend/smolmachines/util.py +++ b/bot_bottle/backend/smolmachines/util.py @@ -5,26 +5,58 @@ unit-tested without importing the docker subprocess paths.""" from __future__ import annotations import hashlib +import os +import platform import shutil from ...log import die +# libkrun's Linux backend drives the guest through KVM, so the host +# must expose `/dev/kvm` and the invoking user must be able to open +# it. macOS uses Hypervisor.framework and needs no device node. +_KVM_DEVICE = "/dev/kvm" + def smolmachines_preflight() -> None: - """Ensure `smolvm` is on PATH before the launch flow runs. - Called from `_resolve_plan`; gives the operator a clear - install pointer rather than a cryptic FileNotFoundError - later. `gvproxy` is no longer required — see the PRD's design - pivot section.""" - if shutil.which("smolvm") is not None: - return - die( - "BOT_BOTTLE_BACKEND=smolmachines requires `smolvm` on " - "PATH. Install with: " - "curl -sSL https://smolmachines.com/install.sh | sh. " - "To use the legacy Docker backend instead, set " - "BOT_BOTTLE_BACKEND=docker or pass --backend=docker." - ) + """Ensure the host can run the smolmachines backend before the + launch flow starts. Called from `_resolve_plan`; surfaces a + clear, actionable error instead of a cryptic `smolvm` failure + deep in launch. + + Checks `smolvm` is on PATH (both platforms) and, on Linux, + that `/dev/kvm` exists and is accessible. `gvproxy` is no + longer required — see the PRD's design pivot section.""" + if shutil.which("smolvm") is None: + die( + "BOT_BOTTLE_BACKEND=smolmachines requires `smolvm` on " + "PATH. Install with: " + "curl -sSL https://smolmachines.com/install.sh | sh. " + "To use the legacy Docker backend instead, set " + "BOT_BOTTLE_BACKEND=docker or pass --backend=docker." + ) + if platform.system() == "Linux": + _preflight_kvm() + + +def _preflight_kvm() -> None: + """Linux-only: libkrun needs `/dev/kvm`. Distinguish 'KVM not + enabled' from 'no permission' so the operator knows which to + fix.""" + if not os.path.exists(_KVM_DEVICE): + die( + f"BOT_BOTTLE_BACKEND=smolmachines needs {_KVM_DEVICE} on " + "Linux but it is missing. Enable KVM: load the kvm-intel " + "or kvm-amd kernel module (and confirm virtualization is " + "enabled in BIOS/firmware). To use the legacy Docker " + "backend instead, set BOT_BOTTLE_BACKEND=docker." + ) + if not os.access(_KVM_DEVICE, os.R_OK | os.W_OK): + die( + f"{_KVM_DEVICE} exists but is not readable/writable by the " + "current user. Add your user to the `kvm` group " + "(`sudo usermod -aG kvm \"$USER\"`) and re-login, or run " + "with access to the device." + ) def smolmachines_bundle_subnet(slug: str) -> tuple[str, str, str]: diff --git a/tests/unit/test_smolmachines_loopback_alias.py b/tests/unit/test_smolmachines_loopback_alias.py index 7751af6..b020821 100644 --- a/tests/unit/test_smolmachines_loopback_alias.py +++ b/tests/unit/test_smolmachines_loopback_alias.py @@ -8,6 +8,7 @@ inspecting running bundle containers' port bindings.""" from __future__ import annotations import json +import os import sqlite3 import subprocess import tempfile @@ -112,9 +113,16 @@ class TestEnsurePool(unittest.TestCase): class TestAllocate(unittest.TestCase): - def test_returns_loopback_on_linux(self): - with patch.object(loopback_alias, "_is_macos", return_value=False): - self.assertEqual("127.0.0.1", loopback_alias.allocate("demo")) + def test_per_bottle_alias_on_linux(self): + # Linux gets the same per-bottle scoping as macOS (127/8 is + # already loopback, so no ifconfig is needed). A fresh host + # with no running bundles allocates the first pool entry. + with tempfile.TemporaryDirectory() as tmp: + lock_path = Path(tmp) / "smolmachines.lock" + with patch.object(loopback_alias, "_is_macos", return_value=False), \ + patch.object(loopback_alias, "_ALLOC_LOCK_PATH", lock_path), \ + patch.object(loopback_alias, "_aliases_in_use", return_value=set()): + self.assertEqual("127.0.0.16", loopback_alias.allocate("demo")) def test_picks_lowest_unused_on_macos(self): # No bundles running -> first pool entry. @@ -166,12 +174,25 @@ class TestAllocateLock(unittest.TestCase): self.assertIn(fcntl_mod.LOCK_EX, flock_calls) - def test_no_lock_on_linux(self): - # Linux early-returns before touching the lock file. - with patch.object(loopback_alias, "_is_macos", return_value=False), \ - patch.object(loopback_alias.fcntl, "flock") as flock: - loopback_alias.allocate("demo") - flock.assert_not_called() + def test_acquires_exclusive_lock_on_linux(self): + # Linux allocates per-bottle too, so it must take the same + # lock to serialise concurrent launches. + import fcntl as fcntl_mod + flock_calls: list[int] = [] + + def record_flock(fd, op): # type: ignore + flock_calls.append(op) + + with tempfile.TemporaryDirectory() as tmp: + lock_path = Path(tmp) / "smolmachines.lock" + with patch.object(loopback_alias, "_is_macos", return_value=False), \ + patch.object(loopback_alias, "_ALLOC_LOCK_PATH", lock_path), \ + patch.object(loopback_alias, "_aliases_in_use", return_value=set()), \ + patch.object(loopback_alias.fcntl, "flock", + side_effect=record_flock): + loopback_alias.allocate("demo") + + self.assertIn(fcntl_mod.LOCK_EX, flock_calls) def test_sequential_allocations_with_shared_lock_are_serialised(self): # Two sequential calls share the same lock file. The second @@ -241,10 +262,12 @@ class TestAliasInUseDetection(unittest.TestCase): class TestForceAllowlist(unittest.TestCase): - """Smolvm 0.8.0 silently drops `--allow-cidr` with `--from`, - so `force_allowlist` opens the state DB directly and sets - the row's `allowed_cidrs` field. Round-trip tests against a - real SQLite DB to lock down the BLOB encoding.""" + """Smolvm 0.8.0 silently drops `--allow-cidr` with `--from`, so + `force_allowlist` opens the state DB directly and sets the row's + `allowed_cidrs` field — on both macOS and Linux. It is + fail-closed: it dies rather than launching a VM whose allowlist + it can't confirm. Round-trip tests against a real SQLite DB to + lock down the BLOB encoding.""" def setUp(self): self._tmp = tempfile.TemporaryDirectory(prefix="smolvm-db.") @@ -290,17 +313,67 @@ class TestForceAllowlist(unittest.TestCase): self.assertEqual(4, cfg["cpus"]) self.assertTrue(cfg["network"]) - def test_noop_on_linux(self): + def test_patches_on_linux_too(self): + # force_allowlist no longer no-ops on Linux — the TSI + # allowlist must be enforced there as well. with patch.object(loopback_alias, "_is_macos", return_value=False), \ patch.object(loopback_alias, "_SMOLVM_DB_PATH", self.db): loopback_alias.force_allowlist("demo-vm", ["127.0.0.16/32"]) - # DB row should be untouched. con = sqlite3.connect(str(self.db)) cfg = json.loads(con.execute( "SELECT data FROM vms WHERE name='demo-vm'", ).fetchone()[0]) con.close() - self.assertIsNone(cfg["allowed_cidrs"]) + self.assertEqual(["127.0.0.16/32"], cfg["allowed_cidrs"]) + + def test_skips_write_when_already_matching(self): + # A newer smolvm that honors --allow-cidr at create leaves the + # row already correct; force_allowlist must not rewrite it. We + # detect a no-write by comparing the raw BLOB byte-for-byte + # (a rewrite re-serialises the JSON, changing key order/bytes + # is not guaranteed, but mtime/identity isn't observable — so + # we assert the stored bytes are exactly what we pre-seeded). + seeded = json.dumps({ + "name": "demo-vm", "cpus": 4, "mem": 8192, + "network": True, "allowed_cidrs": ["127.0.0.16/32"], + }).encode() + con = sqlite3.connect(str(self.db)) + con.execute( + "UPDATE vms SET data=? WHERE name='demo-vm'", + (sqlite3.Binary(seeded),), + ) + con.commit() + con.close() + + with patch.object(loopback_alias, "_is_macos", return_value=True), \ + patch.object(loopback_alias, "_SMOLVM_DB_PATH", self.db): + loopback_alias.force_allowlist("demo-vm", ["127.0.0.16/32"]) + + con = sqlite3.connect(str(self.db)) + stored = con.execute( + "SELECT data FROM vms WHERE name='demo-vm'").fetchone()[0] + con.close() + self.assertEqual(seeded, bytes(stored)) + + def test_dies_when_patch_does_not_take(self): + # If the persisted allowlist still doesn't match after the + # patch (e.g. wrong schema / smolvm stores it elsewhere), + # force_allowlist must fail closed rather than boot the VM. + original = loopback_alias._read_machine_cfg + + def stale_cfg(con, name): + # Always report the un-patched row so the post-write + # verification never sees the requested cidrs. + cfg = original(con, name) + cfg["allowed_cidrs"] = None + return cfg + + with patch.object(loopback_alias, "_is_macos", return_value=True), \ + patch.object(loopback_alias, "_SMOLVM_DB_PATH", self.db), \ + patch.object(loopback_alias, "_read_machine_cfg", side_effect=stale_cfg), \ + patch.object(loopback_alias, "die", side_effect=SystemExit("die")): + with self.assertRaises(SystemExit): + loopback_alias.force_allowlist("demo-vm", ["127.0.0.16/32"]) def test_dies_on_missing_db(self): with patch.object(loopback_alias, "_is_macos", return_value=True), \ @@ -323,5 +396,35 @@ class TestForceAllowlist(unittest.TestCase): loopback_alias.force_allowlist("not-in-db", ["127.0.0.16/32"]) +class TestSmolvmDbPath(unittest.TestCase): + """The smolvm state-DB path is platform-derived: Application + Support on macOS, XDG data dir on Linux.""" + + def test_macos_path(self): + with patch.object(loopback_alias.platform, "system", return_value="Darwin"): + p = loopback_alias._smolvm_db_path() + self.assertEqual( + ("Library", "Application Support", "smolvm", "server", "smolvm.db"), + p.parts[-5:], + ) + + def test_linux_default_xdg_path(self): + env = {k: v for k, v in os.environ.items() if k != "XDG_DATA_HOME"} + with patch.object(loopback_alias.platform, "system", return_value="Linux"), \ + patch.dict(loopback_alias.os.environ, env, clear=True): + p = loopback_alias._smolvm_db_path() + self.assertEqual( + (".local", "share", "smolvm", "server", "smolvm.db"), + p.parts[-5:], + ) + + def test_linux_respects_xdg_data_home(self): + with patch.object(loopback_alias.platform, "system", return_value="Linux"), \ + patch.dict(loopback_alias.os.environ, + {"XDG_DATA_HOME": "/custom/data"}, clear=False): + p = loopback_alias._smolvm_db_path() + self.assertEqual(Path("/custom/data/smolvm/server/smolvm.db"), p) + + if __name__ == "__main__": unittest.main() diff --git a/tests/unit/test_smolmachines_util.py b/tests/unit/test_smolmachines_util.py index 436cce6..4d932aa 100644 --- a/tests/unit/test_smolmachines_util.py +++ b/tests/unit/test_smolmachines_util.py @@ -56,9 +56,14 @@ class TestBundleSubnet(unittest.TestCase): class TestPreflight(unittest.TestCase): def test_smolvm_present_returns_none(self): + # Pin macOS so the Linux KVM gate doesn't fire on a CI runner + # (ubuntu, no /dev/kvm) — this test isolates the PATH check. with patch( "bot_bottle.backend.smolmachines.util.shutil.which", return_value="/usr/local/bin/smolvm", + ), patch( + "bot_bottle.backend.smolmachines.util.platform.system", + return_value="Darwin", ): self.assertIsNone(smolmachines_preflight()) @@ -88,5 +93,63 @@ class TestPreflight(unittest.TestCase): self.assertIn("BOT_BOTTLE_BACKEND=docker", msg) +class TestKvmPreflight(unittest.TestCase): + """Linux-only KVM gate: smolvm needs /dev/kvm present and + accessible. macOS skips this entirely (Hypervisor.framework).""" + + def _run(self, *, system, exists, access): + with patch( + "bot_bottle.backend.smolmachines.util.shutil.which", + return_value="/usr/bin/smolvm", + ), patch( + "bot_bottle.backend.smolmachines.util.platform.system", + return_value=system, + ), patch( + "bot_bottle.backend.smolmachines.util.os.path.exists", + return_value=exists, + ), patch( + "bot_bottle.backend.smolmachines.util.os.access", + return_value=access, + ): + return smolmachines_preflight() + + def test_macos_skips_kvm_check(self): + # Even with /dev/kvm absent, macOS must not run the gate. + self.assertIsNone(self._run(system="Darwin", exists=False, access=False)) + + def test_linux_ok_returns_none(self): + self.assertIsNone(self._run(system="Linux", exists=True, access=True)) + + def test_linux_missing_device_dies(self): + with self.assertRaises(SystemExit): + self._run(system="Linux", exists=False, access=False) + + def test_linux_no_access_dies(self): + with self.assertRaises(SystemExit): + self._run(system="Linux", exists=True, access=False) + + def test_linux_missing_device_message(self): + import io + import sys + captured = io.StringIO() + with patch.object(sys, "stderr", captured): + with self.assertRaises(SystemExit): + self._run(system="Linux", exists=False, access=False) + msg = captured.getvalue() + self.assertIn("/dev/kvm", msg) + self.assertIn("kvm-intel", msg) + + def test_linux_no_access_message(self): + import io + import sys + captured = io.StringIO() + with patch.object(sys, "stderr", captured): + with self.assertRaises(SystemExit): + self._run(system="Linux", exists=True, access=False) + msg = captured.getvalue() + self.assertIn("kvm", msg) + self.assertIn("group", msg) + + if __name__ == "__main__": unittest.main()