diff --git a/bot_bottle/backend/smolmachines/launch.py b/bot_bottle/backend/smolmachines/launch.py index 14d5a33..fa043ba 100644 --- a/bot_bottle/backend/smolmachines/launch.py +++ b/bot_bottle/backend/smolmachines/launch.py @@ -21,7 +21,6 @@ from __future__ import annotations import dataclasses import os -import time from contextlib import ExitStack, contextmanager from pathlib import Path from typing import Callable, Generator @@ -94,200 +93,23 @@ def launch( via the ExitStack.""" stack = ExitStack() try: - # 1. Reserve a loopback alias for this bottle. macOS only - # routes 127.0.0.1 by default; the per-bottle alias is - # what bundles the docker port-publishes and TSI allowlist - # against, so this bottle can't reach other bottles' (or - # other host services') ports on the loopback. Lazy - # sudo-driven on first use per boot. No-op on Linux. - _loopback.ensure_pool() - loopback_ip = _loopback.allocate(plan.slug) + loopback_ip, network = _allocate_resources(plan, stack) + plan = _mint_certs(plan) + plan = _start_bundle(plan, network, loopback_ip, stack) + plan = _discover_urls(plan, loopback_ip) - # 2. Per-bottle docker bridge. - network = _bundle.bundle_network_name(plan.slug) - _bundle.create_bundle_network(network, plan.bundle_subnet, plan.bundle_gateway) - stack.callback(_bundle.remove_bundle_network, network) - - # 2. Mint per-bottle CAs and update the inner Plans with - # their launch-time paths. pipelock always runs in the - # bundle; egress's CA is only minted when the bottle - # declares routes (otherwise egress runs idle without - # MITM and the CA files would be unused). - ca_cert_host, ca_key_host = pipelock_tls_init(plan.proxy_plan.yaml_path.parent) - proxy_plan = dataclasses.replace( - plan.proxy_plan, - ca_cert_host_path=ca_cert_host, - ca_key_host_path=ca_key_host, - ) - egress_plan = plan.egress_plan - if egress_plan.routes: - egress_ca_host, egress_ca_cert_only = egress_tls_init( - plan.egress_plan.routes_path.parent, - ) - egress_plan = dataclasses.replace( - egress_plan, - mitmproxy_ca_host_path=egress_ca_host, - mitmproxy_ca_cert_only_host_path=egress_ca_cert_only, - pipelock_ca_host_path=ca_cert_host, - # On smolmachines, egress's upstream is pipelock - # on the bundle's localhost — they're in the same - # container's network namespace. - pipelock_proxy_url=BUNDLE_LOCAL_PIPELOCK_URL, - ) - plan = dataclasses.replace( - plan, proxy_plan=proxy_plan, egress_plan=egress_plan, - ) - - # 3. Build the BundleLaunchSpec from the (now-resolved) - # inner Plans: daemon subset, env, bind-mounts, and the - # loopback alias to bind published ports against. The - # spec's ports_to_publish list expands depending on which - # daemons the agent needs to reach from the smolvm guest. - bundle_spec = _bundle_launch_spec(plan, network, loopback_ip) - token_env = _resolve_token_env(plan, dict(os.environ)) - _bundle.ensure_bundle_image(bundle_spec.image) - _bundle.start_bundle(bundle_spec, env={**os.environ, **token_env}) - stack.callback(_bundle.stop_bundle, plan.slug) - - # 4. Discover the host-side ports docker assigned for the - # bundle's published container ports, and bind the - # agent's URLs to `:`. Docker - # container IPs (192.168.x.x in the daemon's bridge) - # aren't reachable from the smolvm guest on macOS — TSI - # uses macOS networking, and macOS sees the daemon's - # bridge via the published-port loopback forward only. - # - # Proxy hop order matches the docker backend: when the - # bottle declares egress routes, the agent's first hop is - # egress (for token injection), then pipelock. Without - # routes, the agent dials pipelock directly. Whichever - # one is "agent-facing" is the daemon whose port we - # publish on host loopback; the other stays bundle- - # internal as the upstream proxy. - if plan.egress_plan.routes: - agent_facing_port = _EGRESS_PORT - else: - agent_facing_port = _PIPELOCK_PORT - agent_facing_host_port = _bundle.bundle_host_port( - plan.slug, agent_facing_port, host_ip=loopback_ip, - ) - agent_proxy_url = f"http://{loopback_ip}:{agent_facing_host_port}" - agent_git_gate_host = "" - if plan.git_gate_plan.upstreams: - git_gate_host_port = _bundle.bundle_host_port( - plan.slug, _GIT_HTTP_PORT, host_ip=loopback_ip, - ) - agent_git_gate_host = f"{loopback_ip}:{git_gate_host_port}" - agent_supervise_url = "" - if plan.supervise_plan is not None: - supervise_host_port = _bundle.bundle_host_port( - plan.slug, _SUPERVISE_PORT, host_ip=loopback_ip, - ) - agent_supervise_url = f"http://{loopback_ip}:{supervise_host_port}/" - - # Stamp the URLs onto the plan + guest_env. provision_git - # and provision_supervise read the plan fields; the agent - # reads guest_env on every exec_agent. - # - # NO_PROXY has to include the per-bottle loopback alias — - # otherwise claude's HTTPS_PROXY catches direct calls to - # the supervise URL (`http://:/`) and proxies - # them through egress, which has no route for the alias - # and rejects with "Failed to connect". The smolmachines - # git-gate URL uses smart HTTP, so it also has to bypass - # the agent's HTTP_PROXY and go straight to the host- - # published git HTTP endpoint. Append rather than overwrite - # so prepare.py's - # `localhost,127.0.0.1` baseline stays in place. - existing_no_proxy = plan.guest_env.get("NO_PROXY", "localhost,127.0.0.1") - guest_env = { - **plan.guest_env, - "HTTPS_PROXY": agent_proxy_url, - "HTTP_PROXY": agent_proxy_url, - "NO_PROXY": f"{existing_no_proxy},{loopback_ip}", - } - if agent_git_gate_host: - guest_env["GIT_GATE_URL"] = f"http://{agent_git_gate_host}" - if agent_supervise_url: - guest_env["MCP_SUPERVISE_URL"] = agent_supervise_url - plan = dataclasses.replace( - plan, - guest_env=guest_env, - agent_proxy_url=agent_proxy_url, - agent_git_gate_host=agent_git_gate_host, - agent_supervise_url=agent_supervise_url, - ) - - # 5. Build the agent image and pack it into a - # `.smolmachine` artifact (or hit the per-Dockerfile-digest - # cache). Runs here, not in prepare, so the docker-build - # output doesn't garble the dashboard's preflight modal: - # both the curses-endwin path and the tmux pane-routing - # path redirect stderr around `launch` already. + # Build the agent image and pack it into a `.smolmachine` + # artifact (or hit the per-Dockerfile-digest cache). Runs + # here, not in prepare, so the docker-build output doesn't + # garble the dashboard's preflight modal. agent_from_path = _ensure_smolmachine( plan.agent_image_ref, dockerfile=plan.agent_dockerfile_path, ) - # smolvm VM. --from carries the pre-packed .smolmachine - # artifact; --allow-cidr + -e carry the per-bottle TSI - # allowlist + env. The allowlist is the per-bottle - # loopback alias — narrowing it to one /32 keeps the - # agent from reaching other host loopback services or - # other bottles' published ports. Smolfile isn't usable - # here — smolvm 0.8.0 makes `--from` and `--smolfile` - # mutually exclusive. - _smolvm.machine_create( - plan.machine_name, - from_path=agent_from_path, - allow_cidrs=[f"{loopback_ip}/32"], - env=plan.guest_env, - ) - stack.callback(_smolvm.machine_delete, plan.machine_name) - # Workaround smolvm 0.8.0: `--allow-cidr` is silently - # dropped when combined with `--from`. Patch the persisted - # state DB to set the allowlist before start so the booted - # VM's TSI actually enforces. See loopback_alias's module - # docstring for the investigation that led here. - _loopback.force_allowlist(plan.machine_name, [f"{loopback_ip}/32"]) - _smolvm.machine_start(plan.machine_name) - stack.callback(_smolvm.machine_stop, plan.machine_name) + _launch_vm(plan, agent_from_path, loopback_ip, stack) + _init_vm(plan) - # 6. Repair filesystem ownership + perms that smolvm's - # pack process remapped to the host invoker's uid (501 - # on macOS) rather than preserving the image's expected - # ownership. - # - # - /home/node → node:node so the node user can write - # its own dotfiles (claude appendFileSync on - # ~/.claude.json otherwise bails with ENOENT/EPERM - # and the TUI hangs without surfacing the error). - # - /tmp + /var/tmp → root:root mode 1777 so non-root - # processes can create their per-uid scratch dirs - # (claude-code creates /tmp/claude-/ as soon as - # it spawns a Bash tool call). - # - # All folded into one sh -c so we only pay one - # machine_exec round trip — back-to-back exec calls - # right after machine_start hit a SIGKILL race in - # libkrun's exec channel (see provision_ca for the - # other half of this same workaround). - _smolvm.machine_exec(plan.machine_name, [ - "sh", "-c", - "chown -R node:node /home/node && " - "chown root:root /tmp /var/tmp && " - "chmod 1777 /tmp /var/tmp", - ]) - - # Wait briefly for the VM to settle. Back-to-back smolvm - # machine_exec calls immediately after machine_start - # occasionally SIGKILL the in-VM child at ~100ms (looks - # like a VM warm-up race in libkrun's exec channel). - # 1.5s is empirically enough to dodge it; provisioning - # already takes seconds so the wait is amortized. - time.sleep(1.5) - - # 7. Provision (CA / prompt / skills / git / supervise). prompt_path = provision(plan, plan.machine_name) yield SmolmachinesBottle( @@ -301,6 +123,180 @@ def launch( stack.close() +def _allocate_resources( + plan: SmolmachinesBottlePlan, + stack: ExitStack, +) -> tuple[str, str]: + """Reserve a loopback alias and create the per-bottle docker bridge. + + macOS only routes 127.0.0.1 by default; the per-bottle alias + scopes TSI's allowlist to this bottle's published ports so the + agent can't reach other bottles' or host services' ports on + loopback. No-op on Linux.""" + _loopback.ensure_pool() + loopback_ip = _loopback.allocate(plan.slug) + network = _bundle.bundle_network_name(plan.slug) + _bundle.create_bundle_network(network, plan.bundle_subnet, plan.bundle_gateway) + stack.callback(_bundle.remove_bundle_network, network) + return loopback_ip, network + + +def _mint_certs(plan: SmolmachinesBottlePlan) -> SmolmachinesBottlePlan: + """Mint per-bottle CAs and return the plan with CA paths filled. + + Pipelock always runs in the bundle. Egress's CA is only minted + when the bottle declares routes — otherwise egress runs idle + without MITM and the CA files would be unused.""" + ca_cert_host, ca_key_host = pipelock_tls_init(plan.proxy_plan.yaml_path.parent) + proxy_plan = dataclasses.replace( + plan.proxy_plan, + ca_cert_host_path=ca_cert_host, + ca_key_host_path=ca_key_host, + ) + egress_plan = plan.egress_plan + if egress_plan.routes: + egress_ca_host, egress_ca_cert_only = egress_tls_init( + plan.egress_plan.routes_path.parent, + ) + egress_plan = dataclasses.replace( + egress_plan, + mitmproxy_ca_host_path=egress_ca_host, + mitmproxy_ca_cert_only_host_path=egress_ca_cert_only, + pipelock_ca_host_path=ca_cert_host, + # On smolmachines, egress's upstream is pipelock on the + # bundle's localhost — they're in the same container's + # network namespace. + pipelock_proxy_url=BUNDLE_LOCAL_PIPELOCK_URL, + ) + return dataclasses.replace(plan, proxy_plan=proxy_plan, egress_plan=egress_plan) + + +def _start_bundle( + plan: SmolmachinesBottlePlan, + network: str, + loopback_ip: str, + stack: ExitStack, +) -> SmolmachinesBottlePlan: + """Build the BundleLaunchSpec, resolve token env, start the + sidecar bundle container, and register teardown.""" + bundle_spec = _bundle_launch_spec(plan, network, loopback_ip) + token_env = _resolve_token_env(plan, dict(os.environ)) + _bundle.ensure_bundle_image(bundle_spec.image) + _bundle.start_bundle(bundle_spec, env={**os.environ, **token_env}) + stack.callback(_bundle.stop_bundle, plan.slug) + return plan + + +def _discover_urls( + plan: SmolmachinesBottlePlan, + loopback_ip: str, +) -> SmolmachinesBottlePlan: + """Discover host-side ports for published container ports and + return the plan with URLs + guest_env stamped in. + + Docker container IPs (192.168.x.x in the daemon's bridge) + aren't reachable from the smolvm guest on macOS — TSI uses + macOS networking, and macOS sees the daemon's bridge via the + published-port loopback forward only. + + Proxy hop order: when the bottle declares egress routes, the + agent's first hop is egress (for token injection), then + pipelock. Without routes, the agent dials pipelock directly. + NO_PROXY includes the per-bottle loopback alias so the + supervise + git-gate URLs bypass HTTPS_PROXY.""" + if plan.egress_plan.routes: + agent_facing_port = _EGRESS_PORT + else: + agent_facing_port = _PIPELOCK_PORT + agent_facing_host_port = _bundle.bundle_host_port( + plan.slug, agent_facing_port, host_ip=loopback_ip, + ) + agent_proxy_url = f"http://{loopback_ip}:{agent_facing_host_port}" + + agent_git_gate_host = "" + if plan.git_gate_plan.upstreams: + git_gate_host_port = _bundle.bundle_host_port( + plan.slug, _GIT_HTTP_PORT, host_ip=loopback_ip, + ) + agent_git_gate_host = f"{loopback_ip}:{git_gate_host_port}" + + agent_supervise_url = "" + if plan.supervise_plan is not None: + supervise_host_port = _bundle.bundle_host_port( + plan.slug, _SUPERVISE_PORT, host_ip=loopback_ip, + ) + agent_supervise_url = f"http://{loopback_ip}:{supervise_host_port}/" + + existing_no_proxy = plan.guest_env.get("NO_PROXY", "localhost,127.0.0.1") + guest_env = { + **plan.guest_env, + "HTTPS_PROXY": agent_proxy_url, + "HTTP_PROXY": agent_proxy_url, + "NO_PROXY": f"{existing_no_proxy},{loopback_ip}", + } + if agent_git_gate_host: + guest_env["GIT_GATE_URL"] = f"http://{agent_git_gate_host}" + if agent_supervise_url: + guest_env["MCP_SUPERVISE_URL"] = agent_supervise_url + + return dataclasses.replace( + plan, + guest_env=guest_env, + agent_proxy_url=agent_proxy_url, + agent_git_gate_host=agent_git_gate_host, + agent_supervise_url=agent_supervise_url, + ) + + +def _launch_vm( + plan: SmolmachinesBottlePlan, + agent_from_path: Path, + loopback_ip: str, + stack: ExitStack, +) -> None: + """Create, patch, and start the smolvm VM; register teardown. + + --allow-cidr is the per-bottle loopback alias so the guest can + only reach this bottle's bundle ports. force_allowlist patches + smolvm 0.8.0's silent-drop of --allow-cidr when combined with + --from. Smolfile isn't usable here — smolvm 0.8.0 makes --from + and --smolfile mutually exclusive.""" + _smolvm.machine_create( + plan.machine_name, + from_path=agent_from_path, + allow_cidrs=[f"{loopback_ip}/32"], + env=plan.guest_env, + ) + stack.callback(_smolvm.machine_delete, plan.machine_name) + # Workaround smolvm 0.8.0: `--allow-cidr` is silently dropped + # when combined with `--from`. Patch the persisted state DB + # before start so the booted VM's TSI actually enforces. + _loopback.force_allowlist(plan.machine_name, [f"{loopback_ip}/32"]) + _smolvm.machine_start(plan.machine_name) + stack.callback(_smolvm.machine_stop, plan.machine_name) + + +def _init_vm(plan: SmolmachinesBottlePlan) -> None: + """Repair filesystem ownership and wait for exec channel readiness. + + Ownership repair: smolvm's pack process remaps files to the host + invoker's uid (501 on macOS). /home/node must be node:node so + Claude Code can write ~/.claude.json; /tmp + /var/tmp need root + mode 1777 so non-root processes can create per-uid scratch dirs. + All folded into one sh -c to avoid back-to-back exec calls + immediately after machine_start (libkrun exec-channel race). + + wait_exec_ready polls until the exec channel is ready for the + subsequent provision calls, replacing the empirical sleep.""" + _smolvm.machine_exec(plan.machine_name, [ + "sh", "-c", + "chown -R node:node /home/node && " + "chown root:root /tmp /var/tmp && " + "chmod 1777 /tmp /var/tmp", + ]) + _smolvm.wait_exec_ready(plan.machine_name) + + def _bundle_launch_spec( plan: SmolmachinesBottlePlan, network: str, loopback_ip: str, ) -> _bundle.BundleLaunchSpec: @@ -324,10 +320,9 @@ def _bundle_launch_spec( # is "agent-facing" gets its port published on the host # loopback (see `_ensure_smolmachine`'s discovery loop) and the # other stays bundle-internal. The bundle is NOT reachable by - # bridge IP from the smolvm guest, so the - # PRD-0023-chunk-3 EGRESS_LISTEN_HOST=127.0.0.1 mitigation - # isn't needed: the agent can only dial whatever daemon's - # host port we publish, period. + # bridge IP from the smolvm guest on macOS — TSI uses macOS + # networking, and macOS sees the daemon's bridge via the + # published-port loopback forward only. # --- pipelock --------------------------------------------- pp = plan.proxy_plan diff --git a/bot_bottle/backend/smolmachines/loopback_alias.py b/bot_bottle/backend/smolmachines/loopback_alias.py index 7fc65e6..6f3033c 100644 --- a/bot_bottle/backend/smolmachines/loopback_alias.py +++ b/bot_bottle/backend/smolmachines/loopback_alias.py @@ -45,6 +45,7 @@ alias gets handed to a new bottle.""" from __future__ import annotations +import fcntl import json import os import platform @@ -83,6 +84,14 @@ _POOL_START = 16 _POOL_END = 31 # inclusive +# File lock that serialises concurrent allocate() calls so two +# simultaneous launches can't read the same docker state and claim +# the same alias. Narrowed to the allocate() call itself; docker run +# runs after the lock is released. Once the container is running it +# appears in docker state and future allocate() calls will see it. +_ALLOC_LOCK_PATH = Path.home() / ".cache" / "bot-bottle" / "smolmachines.lock" + + # Loopback aliases pool: 127.0.0...127.0.0.. def _pool_addresses() -> list[str]: return [f"127.0.0.{i}" for i in range(_POOL_START, _POOL_END + 1)] @@ -179,9 +188,20 @@ def allocate(slug: str) -> str: On non-macOS the whole `127.0.0.0/8` is loopback by default; `127.0.0.1` is fine to share and we skip the alias dance. This still returns a deterministic address so launch.py's - callers don't have to branch on platform.""" + callers don't have to branch on platform. + + An exclusive file lock serialises concurrent calls so two + simultaneous launches don't read the same docker state and + claim the same alias.""" if not _is_macos(): return "127.0.0.1" + _ALLOC_LOCK_PATH.parent.mkdir(parents=True, exist_ok=True) + with open(_ALLOC_LOCK_PATH, "w") as lf: + fcntl.flock(lf, fcntl.LOCK_EX) + return _allocate_locked() + + +def _allocate_locked() -> str: in_use = _aliases_in_use() for ip in _pool_addresses(): if ip not in in_use: diff --git a/bot_bottle/backend/smolmachines/smolvm.py b/bot_bottle/backend/smolmachines/smolvm.py index c3ada06..570321f 100644 --- a/bot_bottle/backend/smolmachines/smolvm.py +++ b/bot_bottle/backend/smolmachines/smolvm.py @@ -27,10 +27,13 @@ from __future__ import annotations import shutil import subprocess +import time from dataclasses import dataclass from pathlib import Path from typing import Mapping, Sequence +from ...log import die + _SMOLVM = "smolvm" @@ -197,6 +200,30 @@ def machine_exec( ) +def wait_exec_ready(name: str, *, timeout: float = 5.0) -> None: + """Poll `machine exec true` until exit 0 or `timeout` elapses. + + Replaces `time.sleep(1.5)` after `machine_start`: libkrun's exec + channel needs a brief warm-up before back-to-back exec calls are + safe. Polling exits as soon as the channel is ready and fails + loudly if the VM never responds.""" + deadline = time.monotonic() + timeout + delay = 0.1 + while time.monotonic() < deadline: + r = machine_exec(name, ["true"]) + if r.returncode == 0: + return + remaining = deadline - time.monotonic() + if remaining <= 0: + break + time.sleep(min(delay, remaining)) + delay = min(delay * 2, 0.5) + die( + f"smolvm machine {name!r}: exec channel not ready after " + f"{timeout:.0f}s — VM may have failed to boot." + ) + + def machine_cp(src: str, dst: str) -> None: """`smolvm machine cp SRC DST`. Path syntax: `machine:path` to reference a path inside the VM, bare path for the host. Both diff --git a/tests/unit/test_smolmachines_loopback_alias.py b/tests/unit/test_smolmachines_loopback_alias.py index 4f17c04..4fcc537 100644 --- a/tests/unit/test_smolmachines_loopback_alias.py +++ b/tests/unit/test_smolmachines_loopback_alias.py @@ -11,6 +11,7 @@ import json import sqlite3 import subprocess import tempfile +import threading import unittest from pathlib import Path from unittest.mock import patch @@ -144,6 +145,55 @@ class TestAllocate(unittest.TestCase): loopback_alias.allocate("demo-overflow") +class TestAllocateLock(unittest.TestCase): + """allocate() on macOS acquires a file lock so concurrent calls + serialise rather than racing on docker state.""" + + def test_acquires_exclusive_lock_on_macos(self): + import fcntl as fcntl_mod + flock_calls: list[int] = [] + + def record_flock(fd, op): + flock_calls.append(op) + + with tempfile.TemporaryDirectory() as tmp: + lock_path = Path(tmp) / "smolmachines.lock" + with patch.object(loopback_alias, "_is_macos", return_value=True), \ + patch.object(loopback_alias, "_ALLOC_LOCK_PATH", lock_path), \ + patch.object(loopback_alias, "_aliases_in_use", return_value=set()), \ + patch.object(loopback_alias.fcntl, "flock", + side_effect=record_flock): + loopback_alias.allocate("demo") + + self.assertIn(fcntl_mod.LOCK_EX, flock_calls) + + def test_no_lock_on_linux(self): + # Linux early-returns before touching the lock file. + with patch.object(loopback_alias, "_is_macos", return_value=False), \ + patch.object(loopback_alias.fcntl, "flock") as flock: + loopback_alias.allocate("demo") + flock.assert_not_called() + + def test_sequential_allocations_with_shared_lock_are_serialised(self): + # Two sequential calls share the same lock file. The second + # call sees {127.0.0.16} in use (as if the first caller's + # docker run completed between the two lock acquisitions) and + # returns the next alias. + in_use_seq = [set(), {"127.0.0.16"}] + + with tempfile.TemporaryDirectory() as tmp: + lock_path = Path(tmp) / "smolmachines.lock" + results: list[str] = [] + for _ in range(2): + with patch.object(loopback_alias, "_is_macos", return_value=True), \ + patch.object(loopback_alias, "_ALLOC_LOCK_PATH", lock_path), \ + patch.object(loopback_alias, "_aliases_in_use", + return_value=in_use_seq.pop(0)): + results.append(loopback_alias.allocate("demo")) + + self.assertEqual(["127.0.0.16", "127.0.0.17"], results) + + class TestAliasInUseDetection(unittest.TestCase): """`_aliases_in_use` inspects every running bundle and pulls each container's port-binding `HostIp` out. The detection has diff --git a/tests/unit/test_smolmachines_smolvm.py b/tests/unit/test_smolmachines_smolvm.py index 37add1d..bb5ed5f 100644 --- a/tests/unit/test_smolmachines_smolvm.py +++ b/tests/unit/test_smolmachines_smolvm.py @@ -12,6 +12,7 @@ import unittest from pathlib import Path from unittest.mock import patch +from bot_bottle.backend.smolmachines import smolvm as smolvm_mod from bot_bottle.backend.smolmachines.smolvm import ( SmolvmError, SmolvmRunResult, @@ -23,6 +24,7 @@ from bot_bottle.backend.smolmachines.smolvm import ( machine_start, machine_stop, pack_create, + wait_exec_ready, ) @@ -204,6 +206,45 @@ class TestErrorPath(unittest.TestCase): self.assertEqual(SmolvmRunResult(42, "", "nope"), r) +class TestWaitExecReady(unittest.TestCase): + """wait_exec_ready polls machine_exec(name, ["true"]) until it + returns 0, then exits. On timeout it calls die().""" + + def test_returns_immediately_when_exec_succeeds_first_try(self): + with patch.object(smolvm_mod, "machine_exec", + return_value=SmolvmRunResult(0, "", "")) as m: + wait_exec_ready("vm-x") + m.assert_called_once_with("vm-x", ["true"]) + + def test_retries_on_nonzero_and_returns_on_success(self): + results = [ + SmolvmRunResult(1, "", "not ready"), + SmolvmRunResult(1, "", "not ready"), + SmolvmRunResult(0, "", ""), + ] + with patch.object(smolvm_mod, "machine_exec", + side_effect=results) as m, \ + patch.object(smolvm_mod.time, "sleep"): + wait_exec_ready("vm-x") + self.assertEqual(3, m.call_count) + + def test_dies_on_timeout(self): + # machine_exec always returns non-zero; monotonic advances past + # the deadline after the first sleep so the loop exits. + ticks = [0.0, 0.0, 10.0] # third call puts us past deadline + with patch.object(smolvm_mod, "machine_exec", + return_value=SmolvmRunResult(1, "", "")), \ + patch.object(smolvm_mod.time, "monotonic", + side_effect=ticks), \ + patch.object(smolvm_mod.time, "sleep"), \ + patch.object(smolvm_mod, "die", + side_effect=SystemExit("die")) as die_mock: + with self.assertRaises(SystemExit): + wait_exec_ready("vm-x", timeout=5.0) + die_mock.assert_called_once() + self.assertIn("vm-x", die_mock.call_args.args[0]) + + class TestIsAvailable(unittest.TestCase): def test_true_when_on_path(self): with patch(