refactor(smolmachines): decompose launch(), add wait_exec_ready, file-lock allocate() (PRD 0032)

Decompose the 207-line launch() into six named helpers: _allocate_resources, _mint_certs, _start_bundle, _discover_urls, _launch_vm, _init_vm. Each has explicit inputs/outputs and is independently testable. Replace time.sleep(1.5) with smolvm.wait_exec_ready(), which polls `machine exec true` with exponential backoff. Exits as soon as the exec channel is ready; dies loudly with a timeout message instead of silently leaving the VM in an unknown state. File-lock loopback_alias.allocate() with fcntl.flock(LOCK_EX) so concurrent bottle launches can't race on docker state and claim the same alias.
2026-06-02 06:23:39 +00:00
parent fe97b6014d
commit 0d922371b0
5 changed files with 326 additions and 193 deletions
@@ -21,7 +21,6 @@ from __future__ import annotations
 import dataclasses
 import os
 import time
 from contextlib import ExitStack, contextmanager
 from pathlib import Path
 from typing import Callable, Generator
@@ -94,200 +93,23 @@ def launch(
    via the ExitStack."""
    stack = ExitStack()
    try:
-        # 1. Reserve a loopback alias for this bottle. macOS only
+        loopback_ip, network = _allocate_resources(plan, stack)
-        # routes 127.0.0.1 by default; the per-bottle alias is
+        plan = _mint_certs(plan)
-        # what bundles the docker port-publishes and TSI allowlist
+        plan = _start_bundle(plan, network, loopback_ip, stack)
-        # against, so this bottle can't reach other bottles' (or
+        plan = _discover_urls(plan, loopback_ip)
        # other host services') ports on the loopback. Lazy
        # sudo-driven on first use per boot. No-op on Linux.
        _loopback.ensure_pool()
        loopback_ip = _loopback.allocate(plan.slug)
-        # 2. Per-bottle docker bridge.
+        # Build the agent image and pack it into a `.smolmachine`
-        network = _bundle.bundle_network_name(plan.slug)
+        # artifact (or hit the per-Dockerfile-digest cache). Runs
-        _bundle.create_bundle_network(network, plan.bundle_subnet, plan.bundle_gateway)
+        # here, not in prepare, so the docker-build output doesn't
-        stack.callback(_bundle.remove_bundle_network, network)
+        # garble the dashboard's preflight modal.
        # 2. Mint per-bottle CAs and update the inner Plans with
        # their launch-time paths. pipelock always runs in the
        # bundle; egress's CA is only minted when the bottle
        # declares routes (otherwise egress runs idle without
        # MITM and the CA files would be unused).
        ca_cert_host, ca_key_host = pipelock_tls_init(plan.proxy_plan.yaml_path.parent)
        proxy_plan = dataclasses.replace(
            plan.proxy_plan,
            ca_cert_host_path=ca_cert_host,
            ca_key_host_path=ca_key_host,
        )
        egress_plan = plan.egress_plan
        if egress_plan.routes:
            egress_ca_host, egress_ca_cert_only = egress_tls_init(
                plan.egress_plan.routes_path.parent,
            )
            egress_plan = dataclasses.replace(
                egress_plan,
                mitmproxy_ca_host_path=egress_ca_host,
                mitmproxy_ca_cert_only_host_path=egress_ca_cert_only,
                pipelock_ca_host_path=ca_cert_host,
                # On smolmachines, egress's upstream is pipelock
                # on the bundle's localhost — they're in the same
                # container's network namespace.
                pipelock_proxy_url=BUNDLE_LOCAL_PIPELOCK_URL,
            )
        plan = dataclasses.replace(
            plan, proxy_plan=proxy_plan, egress_plan=egress_plan,
        )
        # 3. Build the BundleLaunchSpec from the (now-resolved)
        # inner Plans: daemon subset, env, bind-mounts, and the
        # loopback alias to bind published ports against. The
        # spec's ports_to_publish list expands depending on which
        # daemons the agent needs to reach from the smolvm guest.
        bundle_spec = _bundle_launch_spec(plan, network, loopback_ip)
        token_env = _resolve_token_env(plan, dict(os.environ))
        _bundle.ensure_bundle_image(bundle_spec.image)
        _bundle.start_bundle(bundle_spec, env={**os.environ, **token_env})
        stack.callback(_bundle.stop_bundle, plan.slug)
        # 4. Discover the host-side ports docker assigned for the
        # bundle's published container ports, and bind the
        # agent's URLs to `<loopback_ip>:<host port>`. Docker
        # container IPs (192.168.x.x in the daemon's bridge)
        # aren't reachable from the smolvm guest on macOS — TSI
        # uses macOS networking, and macOS sees the daemon's
        # bridge via the published-port loopback forward only.
        #
        # Proxy hop order matches the docker backend: when the
        # bottle declares egress routes, the agent's first hop is
        # egress (for token injection), then pipelock. Without
        # routes, the agent dials pipelock directly. Whichever
        # one is "agent-facing" is the daemon whose port we
        # publish on host loopback; the other stays bundle-
        # internal as the upstream proxy.
        if plan.egress_plan.routes:
            agent_facing_port = _EGRESS_PORT
        else:
            agent_facing_port = _PIPELOCK_PORT
        agent_facing_host_port = _bundle.bundle_host_port(
            plan.slug, agent_facing_port, host_ip=loopback_ip,
        )
        agent_proxy_url = f"http://{loopback_ip}:{agent_facing_host_port}"
        agent_git_gate_host = ""
        if plan.git_gate_plan.upstreams:
            git_gate_host_port = _bundle.bundle_host_port(
                plan.slug, _GIT_HTTP_PORT, host_ip=loopback_ip,
            )
            agent_git_gate_host = f"{loopback_ip}:{git_gate_host_port}"
        agent_supervise_url = ""
        if plan.supervise_plan is not None:
            supervise_host_port = _bundle.bundle_host_port(
                plan.slug, _SUPERVISE_PORT, host_ip=loopback_ip,
            )
            agent_supervise_url = f"http://{loopback_ip}:{supervise_host_port}/"
        # Stamp the URLs onto the plan + guest_env. provision_git
        # and provision_supervise read the plan fields; the agent
        # reads guest_env on every exec_agent.
        #
        # NO_PROXY has to include the per-bottle loopback alias —
        # otherwise claude's HTTPS_PROXY catches direct calls to
        # the supervise URL (`http://<alias>:<port>/`) and proxies
        # them through egress, which has no route for the alias
        # and rejects with "Failed to connect". The smolmachines
        # git-gate URL uses smart HTTP, so it also has to bypass
        # the agent's HTTP_PROXY and go straight to the host-
        # published git HTTP endpoint. Append rather than overwrite
        # so prepare.py's
        # `localhost,127.0.0.1` baseline stays in place.
        existing_no_proxy = plan.guest_env.get("NO_PROXY", "localhost,127.0.0.1")
        guest_env = {
            **plan.guest_env,
            "HTTPS_PROXY": agent_proxy_url,
            "HTTP_PROXY":  agent_proxy_url,
            "NO_PROXY":    f"{existing_no_proxy},{loopback_ip}",
        }
        if agent_git_gate_host:
            guest_env["GIT_GATE_URL"] = f"http://{agent_git_gate_host}"
        if agent_supervise_url:
            guest_env["MCP_SUPERVISE_URL"] = agent_supervise_url
        plan = dataclasses.replace(
            plan,
            guest_env=guest_env,
            agent_proxy_url=agent_proxy_url,
            agent_git_gate_host=agent_git_gate_host,
            agent_supervise_url=agent_supervise_url,
        )
        # 5. Build the agent image and pack it into a
        # `.smolmachine` artifact (or hit the per-Dockerfile-digest
        # cache). Runs here, not in prepare, so the docker-build
        # output doesn't garble the dashboard's preflight modal:
        # both the curses-endwin path and the tmux pane-routing
        # path redirect stderr around `launch` already.
        agent_from_path = _ensure_smolmachine(
            plan.agent_image_ref,
            dockerfile=plan.agent_dockerfile_path,
        )
-        # smolvm VM. --from carries the pre-packed .smolmachine
+        _launch_vm(plan, agent_from_path, loopback_ip, stack)
-        # artifact; --allow-cidr + -e carry the per-bottle TSI
+        _init_vm(plan)
        # allowlist + env. The allowlist is the per-bottle
        # loopback alias — narrowing it to one /32 keeps the
        # agent from reaching other host loopback services or
        # other bottles' published ports. Smolfile isn't usable
        # here — smolvm 0.8.0 makes `--from` and `--smolfile`
        # mutually exclusive.
        _smolvm.machine_create(
            plan.machine_name,
            from_path=agent_from_path,
            allow_cidrs=[f"{loopback_ip}/32"],
            env=plan.guest_env,
        )
        stack.callback(_smolvm.machine_delete, plan.machine_name)
        # Workaround smolvm 0.8.0: `--allow-cidr` is silently
        # dropped when combined with `--from`. Patch the persisted
        # state DB to set the allowlist before start so the booted
        # VM's TSI actually enforces. See loopback_alias's module
        # docstring for the investigation that led here.
        _loopback.force_allowlist(plan.machine_name, [f"{loopback_ip}/32"])
        _smolvm.machine_start(plan.machine_name)
        stack.callback(_smolvm.machine_stop, plan.machine_name)
        # 6. Repair filesystem ownership + perms that smolvm's
        # pack process remapped to the host invoker's uid (501
        # on macOS) rather than preserving the image's expected
        # ownership.
        #
        #  - /home/node → node:node so the node user can write
        #    its own dotfiles (claude appendFileSync on
        #    ~/.claude.json otherwise bails with ENOENT/EPERM
        #    and the TUI hangs without surfacing the error).
        #  - /tmp + /var/tmp → root:root mode 1777 so non-root
        #    processes can create their per-uid scratch dirs
        #    (claude-code creates /tmp/claude-<uid>/ as soon as
        #    it spawns a Bash tool call).
        #
        # All folded into one sh -c so we only pay one
        # machine_exec round trip — back-to-back exec calls
        # right after machine_start hit a SIGKILL race in
        # libkrun's exec channel (see provision_ca for the
        # other half of this same workaround).
        _smolvm.machine_exec(plan.machine_name, [
            "sh", "-c",
            "chown -R node:node /home/node && "
            "chown root:root /tmp /var/tmp && "
            "chmod 1777 /tmp /var/tmp",
        ])
        # Wait briefly for the VM to settle. Back-to-back smolvm
        # machine_exec calls immediately after machine_start
        # occasionally SIGKILL the in-VM child at ~100ms (looks
        # like a VM warm-up race in libkrun's exec channel).
        # 1.5s is empirically enough to dodge it; provisioning
        # already takes seconds so the wait is amortized.
        time.sleep(1.5)
        # 7. Provision (CA / prompt / skills / git / supervise).
        prompt_path = provision(plan, plan.machine_name)
        yield SmolmachinesBottle(
@@ -301,6 +123,180 @@ def launch(
        stack.close()
 def _allocate_resources(
    plan: SmolmachinesBottlePlan,
    stack: ExitStack,
 ) -> tuple[str, str]:
    """Reserve a loopback alias and create the per-bottle docker bridge.
    macOS only routes 127.0.0.1 by default; the per-bottle alias
    scopes TSI's allowlist to this bottle's published ports so the
    agent can't reach other bottles' or host services' ports on
    loopback. No-op on Linux."""
    _loopback.ensure_pool()
    loopback_ip = _loopback.allocate(plan.slug)
    network = _bundle.bundle_network_name(plan.slug)
    _bundle.create_bundle_network(network, plan.bundle_subnet, plan.bundle_gateway)
    stack.callback(_bundle.remove_bundle_network, network)
    return loopback_ip, network
 def _mint_certs(plan: SmolmachinesBottlePlan) -> SmolmachinesBottlePlan:
    """Mint per-bottle CAs and return the plan with CA paths filled.
    Pipelock always runs in the bundle. Egress's CA is only minted
    when the bottle declares routes — otherwise egress runs idle
    without MITM and the CA files would be unused."""
    ca_cert_host, ca_key_host = pipelock_tls_init(plan.proxy_plan.yaml_path.parent)
    proxy_plan = dataclasses.replace(
        plan.proxy_plan,
        ca_cert_host_path=ca_cert_host,
        ca_key_host_path=ca_key_host,
    )
    egress_plan = plan.egress_plan
    if egress_plan.routes:
        egress_ca_host, egress_ca_cert_only = egress_tls_init(
            plan.egress_plan.routes_path.parent,
        )
        egress_plan = dataclasses.replace(
            egress_plan,
            mitmproxy_ca_host_path=egress_ca_host,
            mitmproxy_ca_cert_only_host_path=egress_ca_cert_only,
            pipelock_ca_host_path=ca_cert_host,
            # On smolmachines, egress's upstream is pipelock on the
            # bundle's localhost — they're in the same container's
            # network namespace.
            pipelock_proxy_url=BUNDLE_LOCAL_PIPELOCK_URL,
        )
    return dataclasses.replace(plan, proxy_plan=proxy_plan, egress_plan=egress_plan)
 def _start_bundle(
    plan: SmolmachinesBottlePlan,
    network: str,
    loopback_ip: str,
    stack: ExitStack,
 ) -> SmolmachinesBottlePlan:
    """Build the BundleLaunchSpec, resolve token env, start the
    sidecar bundle container, and register teardown."""
    bundle_spec = _bundle_launch_spec(plan, network, loopback_ip)
    token_env = _resolve_token_env(plan, dict(os.environ))
    _bundle.ensure_bundle_image(bundle_spec.image)
    _bundle.start_bundle(bundle_spec, env={**os.environ, **token_env})
    stack.callback(_bundle.stop_bundle, plan.slug)
    return plan
 def _discover_urls(
    plan: SmolmachinesBottlePlan,
    loopback_ip: str,
 ) -> SmolmachinesBottlePlan:
    """Discover host-side ports for published container ports and
    return the plan with URLs + guest_env stamped in.
    Docker container IPs (192.168.x.x in the daemon's bridge)
    aren't reachable from the smolvm guest on macOS — TSI uses
    macOS networking, and macOS sees the daemon's bridge via the
    published-port loopback forward only.
    Proxy hop order: when the bottle declares egress routes, the
    agent's first hop is egress (for token injection), then
    pipelock. Without routes, the agent dials pipelock directly.
    NO_PROXY includes the per-bottle loopback alias so the
    supervise + git-gate URLs bypass HTTPS_PROXY."""
    if plan.egress_plan.routes:
        agent_facing_port = _EGRESS_PORT
    else:
        agent_facing_port = _PIPELOCK_PORT
    agent_facing_host_port = _bundle.bundle_host_port(
        plan.slug, agent_facing_port, host_ip=loopback_ip,
    )
    agent_proxy_url = f"http://{loopback_ip}:{agent_facing_host_port}"
    agent_git_gate_host = ""
    if plan.git_gate_plan.upstreams:
        git_gate_host_port = _bundle.bundle_host_port(
            plan.slug, _GIT_HTTP_PORT, host_ip=loopback_ip,
        )
        agent_git_gate_host = f"{loopback_ip}:{git_gate_host_port}"
    agent_supervise_url = ""
    if plan.supervise_plan is not None:
        supervise_host_port = _bundle.bundle_host_port(
            plan.slug, _SUPERVISE_PORT, host_ip=loopback_ip,
        )
        agent_supervise_url = f"http://{loopback_ip}:{supervise_host_port}/"
    existing_no_proxy = plan.guest_env.get("NO_PROXY", "localhost,127.0.0.1")
    guest_env = {
        **plan.guest_env,
        "HTTPS_PROXY": agent_proxy_url,
        "HTTP_PROXY":  agent_proxy_url,
        "NO_PROXY":    f"{existing_no_proxy},{loopback_ip}",
    }
    if agent_git_gate_host:
        guest_env["GIT_GATE_URL"] = f"http://{agent_git_gate_host}"
    if agent_supervise_url:
        guest_env["MCP_SUPERVISE_URL"] = agent_supervise_url
    return dataclasses.replace(
        plan,
        guest_env=guest_env,
        agent_proxy_url=agent_proxy_url,
        agent_git_gate_host=agent_git_gate_host,
        agent_supervise_url=agent_supervise_url,
    )
 def _launch_vm(
    plan: SmolmachinesBottlePlan,
    agent_from_path: Path,
    loopback_ip: str,
    stack: ExitStack,
 ) -> None:
    """Create, patch, and start the smolvm VM; register teardown.
    --allow-cidr is the per-bottle loopback alias so the guest can
    only reach this bottle's bundle ports. force_allowlist patches
    smolvm 0.8.0's silent-drop of --allow-cidr when combined with
    --from. Smolfile isn't usable here — smolvm 0.8.0 makes --from
    and --smolfile mutually exclusive."""
    _smolvm.machine_create(
        plan.machine_name,
        from_path=agent_from_path,
        allow_cidrs=[f"{loopback_ip}/32"],
        env=plan.guest_env,
    )
    stack.callback(_smolvm.machine_delete, plan.machine_name)
    # Workaround smolvm 0.8.0: `--allow-cidr` is silently dropped
    # when combined with `--from`. Patch the persisted state DB
    # before start so the booted VM's TSI actually enforces.
    _loopback.force_allowlist(plan.machine_name, [f"{loopback_ip}/32"])
    _smolvm.machine_start(plan.machine_name)
    stack.callback(_smolvm.machine_stop, plan.machine_name)
 def _init_vm(plan: SmolmachinesBottlePlan) -> None:
    """Repair filesystem ownership and wait for exec channel readiness.
    Ownership repair: smolvm's pack process remaps files to the host
    invoker's uid (501 on macOS). /home/node must be node:node so
    Claude Code can write ~/.claude.json; /tmp + /var/tmp need root
    mode 1777 so non-root processes can create per-uid scratch dirs.
    All folded into one sh -c to avoid back-to-back exec calls
    immediately after machine_start (libkrun exec-channel race).
    wait_exec_ready polls until the exec channel is ready for the
    subsequent provision calls, replacing the empirical sleep."""
    _smolvm.machine_exec(plan.machine_name, [
        "sh", "-c",
        "chown -R node:node /home/node && "
        "chown root:root /tmp /var/tmp && "
        "chmod 1777 /tmp /var/tmp",
    ])
    _smolvm.wait_exec_ready(plan.machine_name)
 def _bundle_launch_spec(
    plan: SmolmachinesBottlePlan, network: str, loopback_ip: str,
 ) -> _bundle.BundleLaunchSpec:
@@ -324,10 +320,9 @@ def _bundle_launch_spec(
    # is "agent-facing" gets its port published on the host
    # loopback (see `_ensure_smolmachine`'s discovery loop) and the
    # other stays bundle-internal. The bundle is NOT reachable by
-    # bridge IP from the smolvm guest, so the
+    # bridge IP from the smolvm guest on macOS — TSI uses macOS
-    # PRD-0023-chunk-3 EGRESS_LISTEN_HOST=127.0.0.1 mitigation
+    # networking, and macOS sees the daemon's bridge via the
-    # isn't needed: the agent can only dial whatever daemon's
+    # published-port loopback forward only.
    # host port we publish, period.
    # --- pipelock ---------------------------------------------
    pp = plan.proxy_plan
@@ -45,6 +45,7 @@ alias gets handed to a new bottle."""
 from __future__ import annotations
 import fcntl
 import json
 import os
 import platform
@@ -83,6 +84,14 @@ _POOL_START = 16
 _POOL_END = 31  # inclusive
 # File lock that serialises concurrent allocate() calls so two
 # simultaneous launches can't read the same docker state and claim
 # the same alias. Narrowed to the allocate() call itself; docker run
 # runs after the lock is released. Once the container is running it
 # appears in docker state and future allocate() calls will see it.
 _ALLOC_LOCK_PATH = Path.home() / ".cache" / "bot-bottle" / "smolmachines.lock"
 # Loopback aliases pool: 127.0.0.<start>..127.0.0.<end>.
 def _pool_addresses() -> list[str]:
    return [f"127.0.0.{i}" for i in range(_POOL_START, _POOL_END + 1)]
@@ -179,9 +188,20 @@ def allocate(slug: str) -> str:
    On non-macOS the whole `127.0.0.0/8` is loopback by default;
    `127.0.0.1` is fine to share and we skip the alias dance.
    This still returns a deterministic address so launch.py's
-    callers don't have to branch on platform."""
+    callers don't have to branch on platform.
    An exclusive file lock serialises concurrent calls so two
    simultaneous launches don't read the same docker state and
    claim the same alias."""
    if not _is_macos():
        return "127.0.0.1"
    _ALLOC_LOCK_PATH.parent.mkdir(parents=True, exist_ok=True)
    with open(_ALLOC_LOCK_PATH, "w") as lf:
        fcntl.flock(lf, fcntl.LOCK_EX)
        return _allocate_locked()
 def _allocate_locked() -> str:
    in_use = _aliases_in_use()
    for ip in _pool_addresses():
        if ip not in in_use:
@@ -27,10 +27,13 @@ from __future__ import annotations
 import shutil
 import subprocess
 import time
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Mapping, Sequence
 from ...log import die
 _SMOLVM = "smolvm"
@@ -197,6 +200,30 @@ def machine_exec(
    )
 def wait_exec_ready(name: str, *, timeout: float = 5.0) -> None:
    """Poll `machine exec true` until exit 0 or `timeout` elapses.
    Replaces `time.sleep(1.5)` after `machine_start`: libkrun's exec
    channel needs a brief warm-up before back-to-back exec calls are
    safe. Polling exits as soon as the channel is ready and fails
    loudly if the VM never responds."""
    deadline = time.monotonic() + timeout
    delay = 0.1
    while time.monotonic() < deadline:
        r = machine_exec(name, ["true"])
        if r.returncode == 0:
            return
        remaining = deadline - time.monotonic()
        if remaining <= 0:
            break
        time.sleep(min(delay, remaining))
        delay = min(delay * 2, 0.5)
    die(
        f"smolvm machine {name!r}: exec channel not ready after "
        f"{timeout:.0f}s — VM may have failed to boot."
    )
 def machine_cp(src: str, dst: str) -> None:
    """`smolvm machine cp SRC DST`. Path syntax: `machine:path` to
    reference a path inside the VM, bare path for the host. Both
@@ -11,6 +11,7 @@ import json
 import sqlite3
 import subprocess
 import tempfile
 import threading
 import unittest
 from pathlib import Path
 from unittest.mock import patch
@@ -144,6 +145,55 @@ class TestAllocate(unittest.TestCase):
                loopback_alias.allocate("demo-overflow")
 class TestAllocateLock(unittest.TestCase):
    """allocate() on macOS acquires a file lock so concurrent calls
    serialise rather than racing on docker state."""
    def test_acquires_exclusive_lock_on_macos(self):
        import fcntl as fcntl_mod
        flock_calls: list[int] = []
        def record_flock(fd, op):
            flock_calls.append(op)
        with tempfile.TemporaryDirectory() as tmp:
            lock_path = Path(tmp) / "smolmachines.lock"
            with patch.object(loopback_alias, "_is_macos", return_value=True), \
                 patch.object(loopback_alias, "_ALLOC_LOCK_PATH", lock_path), \
                 patch.object(loopback_alias, "_aliases_in_use", return_value=set()), \
                 patch.object(loopback_alias.fcntl, "flock",
                              side_effect=record_flock):
                loopback_alias.allocate("demo")
        self.assertIn(fcntl_mod.LOCK_EX, flock_calls)
    def test_no_lock_on_linux(self):
        # Linux early-returns before touching the lock file.
        with patch.object(loopback_alias, "_is_macos", return_value=False), \
             patch.object(loopback_alias.fcntl, "flock") as flock:
            loopback_alias.allocate("demo")
        flock.assert_not_called()
    def test_sequential_allocations_with_shared_lock_are_serialised(self):
        # Two sequential calls share the same lock file. The second
        # call sees {127.0.0.16} in use (as if the first caller's
        # docker run completed between the two lock acquisitions) and
        # returns the next alias.
        in_use_seq = [set(), {"127.0.0.16"}]
        with tempfile.TemporaryDirectory() as tmp:
            lock_path = Path(tmp) / "smolmachines.lock"
            results: list[str] = []
            for _ in range(2):
                with patch.object(loopback_alias, "_is_macos", return_value=True), \
                     patch.object(loopback_alias, "_ALLOC_LOCK_PATH", lock_path), \
                     patch.object(loopback_alias, "_aliases_in_use",
                                  return_value=in_use_seq.pop(0)):
                    results.append(loopback_alias.allocate("demo"))
        self.assertEqual(["127.0.0.16", "127.0.0.17"], results)
 class TestAliasInUseDetection(unittest.TestCase):
    """`_aliases_in_use` inspects every running bundle and pulls
    each container's port-binding `HostIp` out. The detection has
@@ -12,6 +12,7 @@ import unittest
 from pathlib import Path
 from unittest.mock import patch
 from bot_bottle.backend.smolmachines import smolvm as smolvm_mod
 from bot_bottle.backend.smolmachines.smolvm import (
    SmolvmError,
    SmolvmRunResult,
@@ -23,6 +24,7 @@ from bot_bottle.backend.smolmachines.smolvm import (
    machine_start,
    machine_stop,
    pack_create,
    wait_exec_ready,
 )
@@ -204,6 +206,45 @@ class TestErrorPath(unittest.TestCase):
        self.assertEqual(SmolvmRunResult(42, "", "nope"), r)
 class TestWaitExecReady(unittest.TestCase):
    """wait_exec_ready polls machine_exec(name, ["true"]) until it
    returns 0, then exits. On timeout it calls die()."""
    def test_returns_immediately_when_exec_succeeds_first_try(self):
        with patch.object(smolvm_mod, "machine_exec",
                          return_value=SmolvmRunResult(0, "", "")) as m:
            wait_exec_ready("vm-x")
        m.assert_called_once_with("vm-x", ["true"])
    def test_retries_on_nonzero_and_returns_on_success(self):
        results = [
            SmolvmRunResult(1, "", "not ready"),
            SmolvmRunResult(1, "", "not ready"),
            SmolvmRunResult(0, "", ""),
        ]
        with patch.object(smolvm_mod, "machine_exec",
                          side_effect=results) as m, \
             patch.object(smolvm_mod.time, "sleep"):
            wait_exec_ready("vm-x")
        self.assertEqual(3, m.call_count)
    def test_dies_on_timeout(self):
        # machine_exec always returns non-zero; monotonic advances past
        # the deadline after the first sleep so the loop exits.
        ticks = [0.0, 0.0, 10.0]  # third call puts us past deadline
        with patch.object(smolvm_mod, "machine_exec",
                          return_value=SmolvmRunResult(1, "", "")), \
             patch.object(smolvm_mod.time, "monotonic",
                          side_effect=ticks), \
             patch.object(smolvm_mod.time, "sleep"), \
             patch.object(smolvm_mod, "die",
                          side_effect=SystemExit("die")) as die_mock:
            with self.assertRaises(SystemExit):
                wait_exec_ready("vm-x", timeout=5.0)
        die_mock.assert_called_once()
        self.assertIn("vm-x", die_mock.call_args.args[0])
 class TestIsAvailable(unittest.TestCase):
    def test_true_when_on_path(self):
        with patch(