feat(cleanup): walk every backend, reap smolmachines orphans too
`./cli.py cleanup` previously called only the env-var-selected backend's `prepare_cleanup` / `cleanup` — so a leftover smolvm machine + bundle container + bundle network from a crashed smolmachines bottle would survive a default `docker`-mode cleanup indefinitely. Smolmachines now has a real `cleanup` module (alongside `enumerate.py` from issue #77) that walks: - smolvm machines named `claude-bottle-*` (via `smolvm machine ls --json`) - bundle containers `claude-bottle-sidecars-*` - bundle networks `claude-bottle-bundle-*` Cleanup runs stop+delete on the machines, force-rm on the containers, network rm on the networks. Each step is best-effort so a failed rm doesn't block the rest. `cli.py cleanup` walks every backend in `known_backend_names()` and runs each backend's `cleanup` after a single y/N prompt that shows a combined plan. State dirs (`~/.claude-bottle/state/<slug>/`) are shared layout with the docker backend, which still owns the orphan-state-dir bucket. It now consults `enumerate_active_bottles()` for the cross-backend live identity set so a running smolmachines bottle's state dir isn't reaped during a cleanup. Tests: smolmachines cleanup (prepare + cleanup ordering + failure handling); cross-backend orphan protection on the docker state-dir check; CLI cmd_cleanup walks both backends, short- circuits on all-empty, aborts on N. 617 unit tests pass. End-to-end verified on this host: $ smolvm machine ls --json | jq '.[].name' "claude-bottle-researcher-m3hxd" $ ./cli.py cleanup --- smolmachines backend --- smolvm machine: claude-bottle-researcher-m3hxd remove all of the above? [y/N] Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit was merged in pull request #79.
This commit is contained in:
@@ -83,11 +83,18 @@ def _list_prefixed_networks() -> list[str]:
|
||||
return sorted(set(out))
|
||||
|
||||
|
||||
def _list_orphan_state_dirs(live_projects: set[str]) -> list[str]:
|
||||
def _list_orphan_state_dirs(
|
||||
live_projects: set[str], protected_identities: set[str],
|
||||
) -> list[str]:
|
||||
"""State identities whose compose project isn't running and
|
||||
that don't have a `.preserve` marker. `.preserve` means the
|
||||
user (or an auto-preserve-on-crash) wants the state kept for
|
||||
`resume`."""
|
||||
`resume`.
|
||||
|
||||
`protected_identities` is the set of slugs that are live in
|
||||
ANY backend — used so this docker-side check doesn't reap a
|
||||
running smolmachines bottle's state dir (the layout is shared
|
||||
across both backends)."""
|
||||
state_root = _supervise.claude_bottle_root() / "state"
|
||||
if not state_root.is_dir():
|
||||
return []
|
||||
@@ -99,6 +106,8 @@ def _list_orphan_state_dirs(live_projects: set[str]) -> list[str]:
|
||||
project = f"{COMPOSE_PROJECT_PREFIX}{identity}"
|
||||
if project in live_projects:
|
||||
continue
|
||||
if identity in protected_identities:
|
||||
continue
|
||||
if is_preserved(identity):
|
||||
continue
|
||||
orphans.append(identity)
|
||||
@@ -106,15 +115,25 @@ def _list_orphan_state_dirs(live_projects: set[str]) -> list[str]:
|
||||
|
||||
|
||||
def prepare_cleanup() -> DockerBottleCleanupPlan:
|
||||
"""Enumerate everything cleanup will touch. No removals."""
|
||||
"""Enumerate everything cleanup will touch. No removals.
|
||||
|
||||
Pulls the union of live identities across backends via
|
||||
`enumerate_active_agents()` so the orphan-state-dir bucket
|
||||
doesn't include slugs whose smolmachines VM is still up."""
|
||||
docker_mod.require_docker()
|
||||
projects = list_compose_projects()
|
||||
project_set = set(projects)
|
||||
# Late import to avoid a circular at module-load time —
|
||||
# the backend package's __init__ imports this module.
|
||||
from .. import enumerate_active_agents
|
||||
protected = {a.slug for a in enumerate_active_agents()}
|
||||
return DockerBottleCleanupPlan(
|
||||
projects=tuple(projects),
|
||||
stray_containers=tuple(_list_prefixed_containers()),
|
||||
stray_networks=tuple(_list_prefixed_networks()),
|
||||
orphan_state_dirs=tuple(_list_orphan_state_dirs(project_set)),
|
||||
orphan_state_dirs=tuple(
|
||||
_list_orphan_state_dirs(project_set, protected),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -8,6 +8,7 @@ from pathlib import Path
|
||||
from typing import Generator, Sequence
|
||||
|
||||
from .. import ActiveAgent, BottleBackend, BottleSpec
|
||||
from . import cleanup as _cleanup
|
||||
from . import enumerate as _enumerate
|
||||
from . import launch as _launch
|
||||
from . import prepare as _prepare
|
||||
@@ -76,12 +77,10 @@ class SmolmachinesBottleBackend(
|
||||
_supervise.provision_supervise(plan, target)
|
||||
|
||||
def prepare_cleanup(self) -> SmolmachinesBottleCleanupPlan:
|
||||
return SmolmachinesBottleCleanupPlan()
|
||||
return _cleanup.prepare_cleanup()
|
||||
|
||||
def cleanup(self, plan: SmolmachinesBottleCleanupPlan) -> None:
|
||||
del plan
|
||||
# Nothing to clean in chunks 1-3 — see
|
||||
# SmolmachinesBottleCleanupPlan docstring.
|
||||
_cleanup.cleanup(plan)
|
||||
|
||||
def enumerate_active(self) -> Sequence[ActiveAgent]:
|
||||
return _enumerate.enumerate_active()
|
||||
|
||||
@@ -1,13 +1,29 @@
|
||||
"""SmolmachinesBottleCleanupPlan — concrete BottleCleanupPlan stub
|
||||
(PRD 0023 chunk 1).
|
||||
"""SmolmachinesBottleCleanupPlan — concrete BottleCleanupPlan (issue #77).
|
||||
|
||||
Chunk 1 always reports nothing-to-clean. Real enumeration —
|
||||
orphaned smolvm machines, stranded gvproxy sockets, leftover
|
||||
sidecar bundle containers — lands in chunk 4 alongside the
|
||||
integration-test sweep that exercises teardown."""
|
||||
Tracks the resources `SmolmachinesBottleBackend.cleanup` will
|
||||
remove:
|
||||
|
||||
- machines: smolvm machines whose name starts with
|
||||
`claude-bottle-` (running or stopped). Stopped +
|
||||
deleted via `smolvm machine stop` + `machine delete -f`.
|
||||
- bundles: docker containers `claude-bottle-sidecars-<slug>`
|
||||
left over from a smolmachines bottle (the bundle's
|
||||
port-forwards stay published on lo0 aliases until
|
||||
the container is gone). Removed via `docker rm -f`.
|
||||
- networks: docker networks `claude-bottle-bundle-<slug>`
|
||||
attached to the bundles. Removed via
|
||||
`docker network rm`.
|
||||
|
||||
Smolmachines state dirs live under the same `~/.claude-bottle/state/`
|
||||
path the docker backend uses; the docker backend's
|
||||
`prepare_cleanup` already enumerates orphan state dirs and is the
|
||||
single source of truth for that bucket (consults
|
||||
`enumerate_active_bottles()` so it doesn't reap a live
|
||||
smolmachines bottle's dir)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
|
||||
from ...log import info
|
||||
@@ -16,10 +32,24 @@ from .. import BottleCleanupPlan
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SmolmachinesBottleCleanupPlan(BottleCleanupPlan):
|
||||
def print(self) -> None:
|
||||
info("smolmachines cleanup: nothing to remove (chunk 4 will "
|
||||
"enumerate orphan machines + gvproxy sockets)")
|
||||
"""Resources SmolmachinesBottleBackend.cleanup will remove.
|
||||
Produced by `prepare_cleanup`; sorted so the y/N output is
|
||||
stable."""
|
||||
|
||||
machines: tuple[str, ...] = ()
|
||||
bundles: tuple[str, ...] = ()
|
||||
networks: tuple[str, ...] = ()
|
||||
|
||||
@property
|
||||
def empty(self) -> bool:
|
||||
return True
|
||||
return not self.machines and not self.bundles and not self.networks
|
||||
|
||||
def print(self) -> None:
|
||||
print(file=sys.stderr)
|
||||
for name in self.machines:
|
||||
info(f"smolvm machine: {name}")
|
||||
for name in self.bundles:
|
||||
info(f"bundle container:{name}")
|
||||
for name in self.networks:
|
||||
info(f"bundle network: {name}")
|
||||
print(file=sys.stderr)
|
||||
|
||||
@@ -0,0 +1,159 @@
|
||||
"""Cleanup + active-listing for the smolmachines backend (issue #77).
|
||||
|
||||
`prepare_cleanup` enumerates leftover smolmachines resources:
|
||||
|
||||
- smolvm machines (`smolvm machine ls --json`) whose name starts
|
||||
with `claude-bottle-`.
|
||||
- bundle docker containers (`claude-bottle-sidecars-<slug>`).
|
||||
- bundle docker networks (`claude-bottle-bundle-<slug>`).
|
||||
|
||||
State dirs live under `~/.claude-bottle/state/<identity>/` —
|
||||
shared layout with the docker backend, which has the single
|
||||
orphan-state-dir enumerator (it already consults
|
||||
`enumerate_active_agents()` so a live smolmachines bottle's dir
|
||||
is preserved).
|
||||
|
||||
`cleanup` removes everything in the plan: stop + delete each VM,
|
||||
force-rm each container, rm each network. Each step is
|
||||
best-effort — a failure on one resource doesn't block the others."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
|
||||
from ...log import info, warn
|
||||
from . import sidecar_bundle as _bundle
|
||||
from . import smolvm as _smolvm
|
||||
from .bottle_cleanup_plan import SmolmachinesBottleCleanupPlan
|
||||
|
||||
|
||||
# Both names start with the same prefix the launcher uses.
|
||||
_VM_PREFIX = "claude-bottle-"
|
||||
_BUNDLE_PREFIX = _bundle.bundle_container_name("") # `claude-bottle-sidecars-`
|
||||
_NETWORK_PREFIX = _bundle.bundle_network_name("") # `claude-bottle-bundle-`
|
||||
|
||||
|
||||
def prepare_cleanup() -> SmolmachinesBottleCleanupPlan:
|
||||
"""Enumerate every smolmachines-owned resource on the host.
|
||||
No side effects. Returns an empty plan when smolvm isn't on
|
||||
PATH (no machines to reap) — `cleanup` is a no-op in that
|
||||
case too."""
|
||||
machines = _list_claude_bottle_machines()
|
||||
bundles = _list_bundle_containers()
|
||||
networks = _list_bundle_networks()
|
||||
return SmolmachinesBottleCleanupPlan(
|
||||
machines=tuple(sorted(machines)),
|
||||
bundles=tuple(sorted(bundles)),
|
||||
networks=tuple(sorted(networks)),
|
||||
)
|
||||
|
||||
|
||||
def cleanup(plan: SmolmachinesBottleCleanupPlan) -> None:
|
||||
"""Remove everything in the plan. Order matters: stop VMs
|
||||
first (they hold ports on lo0 aliases via libkrun), then the
|
||||
bundle containers (which hold the host port-forwards), then
|
||||
the networks (which docker won't reap until the containers
|
||||
are gone)."""
|
||||
for name in plan.machines:
|
||||
info(f"stopping smolvm machine {name}")
|
||||
subprocess.run(
|
||||
["smolvm", "machine", "stop", "--name", name],
|
||||
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
|
||||
check=False,
|
||||
)
|
||||
info(f"deleting smolvm machine {name}")
|
||||
r = subprocess.run(
|
||||
["smolvm", "machine", "delete", "-f", name],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
if r.returncode != 0:
|
||||
warn(
|
||||
f"smolvm machine delete -f {name} failed: "
|
||||
f"{(r.stderr or '').strip()}"
|
||||
)
|
||||
|
||||
for name in plan.bundles:
|
||||
info(f"removing bundle container {name}")
|
||||
subprocess.run(
|
||||
["docker", "rm", "-f", name],
|
||||
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
|
||||
check=False,
|
||||
)
|
||||
|
||||
for name in plan.networks:
|
||||
info(f"removing bundle network {name}")
|
||||
r = subprocess.run(
|
||||
["docker", "network", "rm", name],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
if r.returncode != 0 and "no such network" not in (r.stderr or "").lower():
|
||||
warn(
|
||||
f"docker network rm {name} failed: "
|
||||
f"{(r.stderr or '').strip()}"
|
||||
)
|
||||
|
||||
|
||||
def _list_claude_bottle_machines() -> list[str]:
|
||||
"""All smolvm machines named `claude-bottle-*`, regardless of
|
||||
state (running / stopped / created). Empty when smolvm isn't
|
||||
installed."""
|
||||
if not _smolvm.is_available():
|
||||
return []
|
||||
r = subprocess.run(
|
||||
["smolvm", "machine", "ls", "--json"],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
if r.returncode != 0:
|
||||
return []
|
||||
try:
|
||||
machines = json.loads(r.stdout or "[]")
|
||||
except json.JSONDecodeError:
|
||||
return []
|
||||
return [
|
||||
m["name"] for m in machines
|
||||
if isinstance(m, dict)
|
||||
and m.get("name", "").startswith(_VM_PREFIX)
|
||||
]
|
||||
|
||||
|
||||
def _list_bundle_containers() -> list[str]:
|
||||
"""All docker containers named `claude-bottle-sidecars-*`,
|
||||
running or stopped. Empty when docker isn't installed."""
|
||||
# Late import: `backend/__init__` imports this module
|
||||
# transitively via the smolmachines backend.
|
||||
from .. import has_backend
|
||||
if not has_backend("docker"):
|
||||
return []
|
||||
r = subprocess.run(
|
||||
["docker", "ps", "-a",
|
||||
"--filter", f"name=^{_BUNDLE_PREFIX}",
|
||||
"--format", "{{.Names}}"],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
if r.returncode != 0:
|
||||
return []
|
||||
return [
|
||||
line for line in (r.stdout or "").splitlines()
|
||||
if line and line.startswith(_BUNDLE_PREFIX)
|
||||
]
|
||||
|
||||
|
||||
def _list_bundle_networks() -> list[str]:
|
||||
"""All docker networks named `claude-bottle-bundle-*`. Empty
|
||||
when docker isn't installed."""
|
||||
from .. import has_backend
|
||||
if not has_backend("docker"):
|
||||
return []
|
||||
r = subprocess.run(
|
||||
["docker", "network", "ls",
|
||||
"--filter", f"name={_NETWORK_PREFIX}",
|
||||
"--format", "{{.Name}}"],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
if r.returncode != 0:
|
||||
return []
|
||||
return [
|
||||
line for line in (r.stdout or "").splitlines()
|
||||
if line and line.startswith(_NETWORK_PREFIX)
|
||||
]
|
||||
@@ -1,11 +1,16 @@
|
||||
"""cleanup: stop and remove all orphaned claude-bottle resources.
|
||||
|
||||
PRD 0018 chunk 4: backend's prepare_cleanup carries everything in
|
||||
one plan — live compose projects (whose `compose down` removes
|
||||
containers + networks atomically), legacy stray containers/networks
|
||||
that aren't in any project, and orphan state dirs (per-bottle
|
||||
state with no live project AND no `.preserve` marker). One prompt,
|
||||
one cleanup call.
|
||||
Walks every registered backend (docker + smolmachines) so a single
|
||||
`./cli.py cleanup` reaps both backends' leftovers — orphaned
|
||||
smolvm machines won't survive a docker-only cleanup pass (issue
|
||||
addressed alongside #77).
|
||||
|
||||
Each backend's `prepare_cleanup` enumerates its own resources;
|
||||
docker's `_list_orphan_state_dirs` consults
|
||||
`enumerate_active_agents()` for the union of live identities so
|
||||
state dirs of running smolmachines bottles aren't reaped. State
|
||||
dirs are shared layout, so docker is the single owner of that
|
||||
bucket.
|
||||
|
||||
State dirs with `.preserve` are intentionally never touched — they
|
||||
hold capability-block rebuilds or crash snapshots the operator may
|
||||
@@ -17,25 +22,37 @@ from __future__ import annotations
|
||||
|
||||
import sys
|
||||
|
||||
from ..backend import get_bottle_backend
|
||||
from ..backend import get_bottle_backend, known_backend_names
|
||||
from ..log import info
|
||||
from ._common import read_tty_line
|
||||
|
||||
|
||||
def cmd_cleanup(_argv: list[str]) -> int:
|
||||
backend = get_bottle_backend()
|
||||
plan = backend.prepare_cleanup()
|
||||
# Order: stable backend iteration so the y/N output is
|
||||
# deterministic across runs.
|
||||
plans = [
|
||||
(name, get_bottle_backend(name)) for name in known_backend_names()
|
||||
]
|
||||
prepared = [(name, b, b.prepare_cleanup()) for name, b in plans]
|
||||
|
||||
if plan.empty:
|
||||
if all(p.empty for _, _, p in prepared):
|
||||
info("no claude-bottle resources to clean up")
|
||||
return 0
|
||||
|
||||
plan.print()
|
||||
for name, _, plan in prepared:
|
||||
if plan.empty:
|
||||
continue
|
||||
info(f"--- {name} backend ---")
|
||||
plan.print()
|
||||
|
||||
if not _prompt_yes("remove all of the above?"):
|
||||
info("cleanup: skipped")
|
||||
return 0
|
||||
|
||||
backend.cleanup(plan)
|
||||
for name, backend, plan in prepared:
|
||||
if plan.empty:
|
||||
continue
|
||||
backend.cleanup(plan)
|
||||
info("cleanup: done")
|
||||
return 0
|
||||
|
||||
|
||||
Reference in New Issue
Block a user