feat(cleanup): walk every backend, reap smolmachines orphans too
test / unit (pull_request) Successful in 26s
test / integration (pull_request) Successful in 42s

`./cli.py cleanup` previously called only the env-var-selected
backend's `prepare_cleanup` / `cleanup` — so a leftover smolvm
machine + bundle container + bundle network from a crashed
smolmachines bottle would survive a default `docker`-mode cleanup
indefinitely.

Smolmachines now has a real `cleanup` module (alongside
`enumerate.py` from issue #77) that walks:

  - smolvm machines named `claude-bottle-*` (via
    `smolvm machine ls --json`)
  - bundle containers `claude-bottle-sidecars-*`
  - bundle networks `claude-bottle-bundle-*`

Cleanup runs stop+delete on the machines, force-rm on the
containers, network rm on the networks. Each step is best-effort
so a failed rm doesn't block the rest.

`cli.py cleanup` walks every backend in `known_backend_names()`
and runs each backend's `cleanup` after a single y/N prompt that
shows a combined plan.

State dirs (`~/.claude-bottle/state/<slug>/`) are shared layout
with the docker backend, which still owns the orphan-state-dir
bucket. It now consults `enumerate_active_bottles()` for the
cross-backend live identity set so a running smolmachines
bottle's state dir isn't reaped during a cleanup.

Tests: smolmachines cleanup (prepare + cleanup ordering + failure
handling); cross-backend orphan protection on the docker
state-dir check; CLI cmd_cleanup walks both backends, short-
circuits on all-empty, aborts on N. 617 unit tests pass.

End-to-end verified on this host:
  $ smolvm machine ls --json | jq '.[].name'
  "claude-bottle-researcher-m3hxd"
  $ ./cli.py cleanup
  --- smolmachines backend ---
  smolvm machine:  claude-bottle-researcher-m3hxd
  remove all of the above? [y/N]

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-27 18:38:00 -04:00
parent 5d740a6948
commit 5323fc1b53
8 changed files with 538 additions and 37 deletions
+23 -4
View File
@@ -83,11 +83,18 @@ def _list_prefixed_networks() -> list[str]:
return sorted(set(out))
def _list_orphan_state_dirs(live_projects: set[str]) -> list[str]:
def _list_orphan_state_dirs(
live_projects: set[str], protected_identities: set[str],
) -> list[str]:
"""State identities whose compose project isn't running and
that don't have a `.preserve` marker. `.preserve` means the
user (or an auto-preserve-on-crash) wants the state kept for
`resume`."""
`resume`.
`protected_identities` is the set of slugs that are live in
ANY backend — used so this docker-side check doesn't reap a
running smolmachines bottle's state dir (the layout is shared
across both backends)."""
state_root = _supervise.claude_bottle_root() / "state"
if not state_root.is_dir():
return []
@@ -99,6 +106,8 @@ def _list_orphan_state_dirs(live_projects: set[str]) -> list[str]:
project = f"{COMPOSE_PROJECT_PREFIX}{identity}"
if project in live_projects:
continue
if identity in protected_identities:
continue
if is_preserved(identity):
continue
orphans.append(identity)
@@ -106,15 +115,25 @@ def _list_orphan_state_dirs(live_projects: set[str]) -> list[str]:
def prepare_cleanup() -> DockerBottleCleanupPlan:
"""Enumerate everything cleanup will touch. No removals."""
"""Enumerate everything cleanup will touch. No removals.
Pulls the union of live identities across backends via
`enumerate_active_agents()` so the orphan-state-dir bucket
doesn't include slugs whose smolmachines VM is still up."""
docker_mod.require_docker()
projects = list_compose_projects()
project_set = set(projects)
# Late import to avoid a circular at module-load time —
# the backend package's __init__ imports this module.
from .. import enumerate_active_agents
protected = {a.slug for a in enumerate_active_agents()}
return DockerBottleCleanupPlan(
projects=tuple(projects),
stray_containers=tuple(_list_prefixed_containers()),
stray_networks=tuple(_list_prefixed_networks()),
orphan_state_dirs=tuple(_list_orphan_state_dirs(project_set)),
orphan_state_dirs=tuple(
_list_orphan_state_dirs(project_set, protected),
),
)
@@ -8,6 +8,7 @@ from pathlib import Path
from typing import Generator, Sequence
from .. import ActiveAgent, BottleBackend, BottleSpec
from . import cleanup as _cleanup
from . import enumerate as _enumerate
from . import launch as _launch
from . import prepare as _prepare
@@ -76,12 +77,10 @@ class SmolmachinesBottleBackend(
_supervise.provision_supervise(plan, target)
def prepare_cleanup(self) -> SmolmachinesBottleCleanupPlan:
return SmolmachinesBottleCleanupPlan()
return _cleanup.prepare_cleanup()
def cleanup(self, plan: SmolmachinesBottleCleanupPlan) -> None:
del plan
# Nothing to clean in chunks 1-3 — see
# SmolmachinesBottleCleanupPlan docstring.
_cleanup.cleanup(plan)
def enumerate_active(self) -> Sequence[ActiveAgent]:
return _enumerate.enumerate_active()
@@ -1,13 +1,29 @@
"""SmolmachinesBottleCleanupPlan — concrete BottleCleanupPlan stub
(PRD 0023 chunk 1).
"""SmolmachinesBottleCleanupPlan — concrete BottleCleanupPlan (issue #77).
Chunk 1 always reports nothing-to-clean. Real enumeration —
orphaned smolvm machines, stranded gvproxy sockets, leftover
sidecar bundle containers — lands in chunk 4 alongside the
integration-test sweep that exercises teardown."""
Tracks the resources `SmolmachinesBottleBackend.cleanup` will
remove:
- machines: smolvm machines whose name starts with
`claude-bottle-` (running or stopped). Stopped +
deleted via `smolvm machine stop` + `machine delete -f`.
- bundles: docker containers `claude-bottle-sidecars-<slug>`
left over from a smolmachines bottle (the bundle's
port-forwards stay published on lo0 aliases until
the container is gone). Removed via `docker rm -f`.
- networks: docker networks `claude-bottle-bundle-<slug>`
attached to the bundles. Removed via
`docker network rm`.
Smolmachines state dirs live under the same `~/.claude-bottle/state/`
path the docker backend uses; the docker backend's
`prepare_cleanup` already enumerates orphan state dirs and is the
single source of truth for that bucket (consults
`enumerate_active_bottles()` so it doesn't reap a live
smolmachines bottle's dir)."""
from __future__ import annotations
import sys
from dataclasses import dataclass
from ...log import info
@@ -16,10 +32,24 @@ from .. import BottleCleanupPlan
@dataclass(frozen=True)
class SmolmachinesBottleCleanupPlan(BottleCleanupPlan):
def print(self) -> None:
info("smolmachines cleanup: nothing to remove (chunk 4 will "
"enumerate orphan machines + gvproxy sockets)")
"""Resources SmolmachinesBottleBackend.cleanup will remove.
Produced by `prepare_cleanup`; sorted so the y/N output is
stable."""
machines: tuple[str, ...] = ()
bundles: tuple[str, ...] = ()
networks: tuple[str, ...] = ()
@property
def empty(self) -> bool:
return True
return not self.machines and not self.bundles and not self.networks
def print(self) -> None:
print(file=sys.stderr)
for name in self.machines:
info(f"smolvm machine: {name}")
for name in self.bundles:
info(f"bundle container:{name}")
for name in self.networks:
info(f"bundle network: {name}")
print(file=sys.stderr)
@@ -0,0 +1,159 @@
"""Cleanup + active-listing for the smolmachines backend (issue #77).
`prepare_cleanup` enumerates leftover smolmachines resources:
- smolvm machines (`smolvm machine ls --json`) whose name starts
with `claude-bottle-`.
- bundle docker containers (`claude-bottle-sidecars-<slug>`).
- bundle docker networks (`claude-bottle-bundle-<slug>`).
State dirs live under `~/.claude-bottle/state/<identity>/` —
shared layout with the docker backend, which has the single
orphan-state-dir enumerator (it already consults
`enumerate_active_agents()` so a live smolmachines bottle's dir
is preserved).
`cleanup` removes everything in the plan: stop + delete each VM,
force-rm each container, rm each network. Each step is
best-effort — a failure on one resource doesn't block the others."""
from __future__ import annotations
import json
import subprocess
from ...log import info, warn
from . import sidecar_bundle as _bundle
from . import smolvm as _smolvm
from .bottle_cleanup_plan import SmolmachinesBottleCleanupPlan
# Both names start with the same prefix the launcher uses.
_VM_PREFIX = "claude-bottle-"
_BUNDLE_PREFIX = _bundle.bundle_container_name("") # `claude-bottle-sidecars-`
_NETWORK_PREFIX = _bundle.bundle_network_name("") # `claude-bottle-bundle-`
def prepare_cleanup() -> SmolmachinesBottleCleanupPlan:
"""Enumerate every smolmachines-owned resource on the host.
No side effects. Returns an empty plan when smolvm isn't on
PATH (no machines to reap) — `cleanup` is a no-op in that
case too."""
machines = _list_claude_bottle_machines()
bundles = _list_bundle_containers()
networks = _list_bundle_networks()
return SmolmachinesBottleCleanupPlan(
machines=tuple(sorted(machines)),
bundles=tuple(sorted(bundles)),
networks=tuple(sorted(networks)),
)
def cleanup(plan: SmolmachinesBottleCleanupPlan) -> None:
"""Remove everything in the plan. Order matters: stop VMs
first (they hold ports on lo0 aliases via libkrun), then the
bundle containers (which hold the host port-forwards), then
the networks (which docker won't reap until the containers
are gone)."""
for name in plan.machines:
info(f"stopping smolvm machine {name}")
subprocess.run(
["smolvm", "machine", "stop", "--name", name],
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
check=False,
)
info(f"deleting smolvm machine {name}")
r = subprocess.run(
["smolvm", "machine", "delete", "-f", name],
capture_output=True, text=True, check=False,
)
if r.returncode != 0:
warn(
f"smolvm machine delete -f {name} failed: "
f"{(r.stderr or '').strip()}"
)
for name in plan.bundles:
info(f"removing bundle container {name}")
subprocess.run(
["docker", "rm", "-f", name],
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
check=False,
)
for name in plan.networks:
info(f"removing bundle network {name}")
r = subprocess.run(
["docker", "network", "rm", name],
capture_output=True, text=True, check=False,
)
if r.returncode != 0 and "no such network" not in (r.stderr or "").lower():
warn(
f"docker network rm {name} failed: "
f"{(r.stderr or '').strip()}"
)
def _list_claude_bottle_machines() -> list[str]:
"""All smolvm machines named `claude-bottle-*`, regardless of
state (running / stopped / created). Empty when smolvm isn't
installed."""
if not _smolvm.is_available():
return []
r = subprocess.run(
["smolvm", "machine", "ls", "--json"],
capture_output=True, text=True, check=False,
)
if r.returncode != 0:
return []
try:
machines = json.loads(r.stdout or "[]")
except json.JSONDecodeError:
return []
return [
m["name"] for m in machines
if isinstance(m, dict)
and m.get("name", "").startswith(_VM_PREFIX)
]
def _list_bundle_containers() -> list[str]:
"""All docker containers named `claude-bottle-sidecars-*`,
running or stopped. Empty when docker isn't installed."""
# Late import: `backend/__init__` imports this module
# transitively via the smolmachines backend.
from .. import has_backend
if not has_backend("docker"):
return []
r = subprocess.run(
["docker", "ps", "-a",
"--filter", f"name=^{_BUNDLE_PREFIX}",
"--format", "{{.Names}}"],
capture_output=True, text=True, check=False,
)
if r.returncode != 0:
return []
return [
line for line in (r.stdout or "").splitlines()
if line and line.startswith(_BUNDLE_PREFIX)
]
def _list_bundle_networks() -> list[str]:
"""All docker networks named `claude-bottle-bundle-*`. Empty
when docker isn't installed."""
from .. import has_backend
if not has_backend("docker"):
return []
r = subprocess.run(
["docker", "network", "ls",
"--filter", f"name={_NETWORK_PREFIX}",
"--format", "{{.Name}}"],
capture_output=True, text=True, check=False,
)
if r.returncode != 0:
return []
return [
line for line in (r.stdout or "").splitlines()
if line and line.startswith(_NETWORK_PREFIX)
]