4859040c6f
`./cli.py cleanup` previously called only the env-var-selected backend's `prepare_cleanup` / `cleanup` — so a leftover smolvm machine + bundle container + bundle network from a crashed smolmachines bottle would survive a default `docker`-mode cleanup indefinitely. Smolmachines now has a real `cleanup` module (alongside `enumerate.py` from issue #77) that walks: - smolvm machines named `claude-bottle-*` (via `smolvm machine ls --json`) - bundle containers `claude-bottle-sidecars-*` - bundle networks `claude-bottle-bundle-*` Cleanup runs stop+delete on the machines, force-rm on the containers, network rm on the networks. Each step is best-effort so a failed rm doesn't block the rest. `cli.py cleanup` walks every backend in `known_backend_names()` and runs each backend's `cleanup` after a single y/N prompt that shows a combined plan. State dirs (`~/.claude-bottle/state/<slug>/`) are shared layout with the docker backend, which still owns the orphan-state-dir bucket. It now consults `enumerate_active_bottles()` for the cross-backend live identity set so a running smolmachines bottle's state dir isn't reaped during a cleanup. Tests: smolmachines cleanup (prepare + cleanup ordering + failure handling); cross-backend orphan protection on the docker state-dir check; CLI cmd_cleanup walks both backends, short- circuits on all-empty, aborts on N. 617 unit tests pass. End-to-end verified on this host: $ smolvm machine ls --json | jq '.[].name' "claude-bottle-researcher-m3hxd" $ ./cli.py cleanup --- smolmachines backend --- smolvm machine: claude-bottle-researcher-m3hxd remove all of the above? [y/N] Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
253 lines
9.0 KiB
Python
253 lines
9.0 KiB
Python
"""Cleanup + active-listing for the Docker bottle backend.
|
|
|
|
PRD 0018 chunk 4: cleanup is centered on `docker compose ls`.
|
|
Pre-compose code paths could leave bare containers / networks
|
|
without a compose project; those still show up via the prefix
|
|
scan, just as a fallback bucket alongside the project list.
|
|
|
|
`prepare_cleanup` enumerates:
|
|
|
|
- Live compose projects whose name starts with `claude-bottle-`.
|
|
- `claude-bottle-*` containers that aren't part of any compose
|
|
project (legacy orphans).
|
|
- `claude-bottle-*` networks that aren't tied to a compose
|
|
project (legacy orphans; compose-managed networks come down
|
|
with `compose down --volumes` and don't appear here).
|
|
- State dirs under ~/.claude-bottle/state/<identity>/ with no
|
|
live compose project AND no `.preserve` marker.
|
|
|
|
`cleanup` removes everything in the plan.
|
|
|
|
`list_active` queries the same compose project namespace and prints
|
|
each project's services for ad-hoc inspection.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import shutil
|
|
import subprocess
|
|
|
|
from ... import supervise as _supervise
|
|
from ...log import info, warn
|
|
from .. import ActiveBottle
|
|
from . import util as docker_mod
|
|
from .bottle_cleanup_plan import DockerBottleCleanupPlan
|
|
from .bottle_state import bottle_state_dir, is_preserved, read_metadata
|
|
from .compose import (
|
|
COMPOSE_PROJECT_PREFIX,
|
|
compose_project_name,
|
|
list_active_slugs,
|
|
list_compose_projects,
|
|
)
|
|
|
|
|
|
def _list_prefixed_containers() -> list[str]:
|
|
"""All claude-bottle-prefixed containers, running or stopped."""
|
|
result = subprocess.run(
|
|
["docker", "ps", "-a",
|
|
"--filter", f"name=^{COMPOSE_PROJECT_PREFIX}",
|
|
"--format", "{{.Names}}\t{{.Label \"com.docker.compose.project\"}}"],
|
|
capture_output=True, text=True, check=False,
|
|
)
|
|
if result.returncode != 0:
|
|
warn(f"docker ps failed: {result.stderr.strip()}")
|
|
return []
|
|
out: list[str] = []
|
|
for line in (result.stdout or "").splitlines():
|
|
if not line:
|
|
continue
|
|
name, _, project = line.partition("\t")
|
|
# Stray = no compose label. Compose-managed containers carry
|
|
# `com.docker.compose.project=<name>`; we'll reap those via
|
|
# `compose down`, not via container rm.
|
|
if not project:
|
|
out.append(name)
|
|
return sorted(set(out))
|
|
|
|
|
|
def _list_prefixed_networks() -> list[str]:
|
|
"""All claude-bottle-prefixed networks not currently attached
|
|
to a compose project. Compose-managed networks have a
|
|
`com.docker.compose.project` label; bare ones (from pre-compose
|
|
code paths) don't."""
|
|
result = subprocess.run(
|
|
["docker", "network", "ls",
|
|
"--filter", f"name={COMPOSE_PROJECT_PREFIX}",
|
|
"--format", "{{.Name}}\t{{.Label \"com.docker.compose.project\"}}"],
|
|
capture_output=True, text=True, check=False,
|
|
)
|
|
if result.returncode != 0:
|
|
warn(f"docker network ls failed: {result.stderr.strip()}")
|
|
return []
|
|
out: list[str] = []
|
|
for line in (result.stdout or "").splitlines():
|
|
if not line:
|
|
continue
|
|
name, _, project = line.partition("\t")
|
|
if not project:
|
|
out.append(name)
|
|
return sorted(set(out))
|
|
|
|
|
|
def _list_orphan_state_dirs(
|
|
live_projects: set[str], protected_identities: set[str],
|
|
) -> list[str]:
|
|
"""State identities whose compose project isn't running and
|
|
that don't have a `.preserve` marker. `.preserve` means the
|
|
user (or an auto-preserve-on-crash) wants the state kept for
|
|
`resume`.
|
|
|
|
`protected_identities` is the set of slugs that are live in
|
|
ANY backend — used so this docker-side check doesn't reap a
|
|
running smolmachines bottle's state dir (the layout is shared
|
|
across both backends)."""
|
|
state_root = _supervise.claude_bottle_root() / "state"
|
|
if not state_root.is_dir():
|
|
return []
|
|
orphans: list[str] = []
|
|
for child in sorted(state_root.iterdir()):
|
|
if not child.is_dir():
|
|
continue
|
|
identity = child.name
|
|
project = f"{COMPOSE_PROJECT_PREFIX}{identity}"
|
|
if project in live_projects:
|
|
continue
|
|
if identity in protected_identities:
|
|
continue
|
|
if is_preserved(identity):
|
|
continue
|
|
orphans.append(identity)
|
|
return orphans
|
|
|
|
|
|
def prepare_cleanup() -> DockerBottleCleanupPlan:
|
|
"""Enumerate everything cleanup will touch. No removals.
|
|
|
|
Pulls the union of live identities across backends via
|
|
`enumerate_active_bottles()` so the orphan-state-dir bucket
|
|
doesn't include slugs whose smolmachines VM is still up."""
|
|
docker_mod.require_docker()
|
|
projects = list_compose_projects()
|
|
project_set = set(projects)
|
|
# Late import to avoid a circular at module-load time —
|
|
# the backend package's __init__ imports this module.
|
|
from .. import enumerate_active_bottles
|
|
protected = {b.slug for b in enumerate_active_bottles()}
|
|
return DockerBottleCleanupPlan(
|
|
projects=tuple(projects),
|
|
stray_containers=tuple(_list_prefixed_containers()),
|
|
stray_networks=tuple(_list_prefixed_networks()),
|
|
orphan_state_dirs=tuple(
|
|
_list_orphan_state_dirs(project_set, protected),
|
|
),
|
|
)
|
|
|
|
|
|
def cleanup(plan: DockerBottleCleanupPlan) -> None:
|
|
"""Remove everything in the plan. Projects first (whose `compose
|
|
down` reaps their containers + networks atomically), then stray
|
|
legacy resources, then orphan state dirs."""
|
|
for project in plan.projects:
|
|
info(f"docker compose down ({project})")
|
|
result = subprocess.run(
|
|
["docker", "compose", "-p", project, "down", "--volumes"],
|
|
capture_output=True, text=True, check=False,
|
|
)
|
|
if result.returncode != 0:
|
|
warn(
|
|
f"compose down failed for {project}: "
|
|
f"{result.stderr.strip()}"
|
|
)
|
|
|
|
for name in plan.stray_containers:
|
|
info(f"removing stray container {name}")
|
|
subprocess.run(
|
|
["docker", "rm", "-f", name],
|
|
stdout=subprocess.DEVNULL,
|
|
stderr=subprocess.DEVNULL,
|
|
check=False,
|
|
)
|
|
|
|
for name in plan.stray_networks:
|
|
info(f"removing stray network {name}")
|
|
subprocess.run(
|
|
["docker", "network", "rm", name],
|
|
stdout=subprocess.DEVNULL,
|
|
stderr=subprocess.DEVNULL,
|
|
check=False,
|
|
)
|
|
|
|
for identity in plan.orphan_state_dirs:
|
|
path = bottle_state_dir(identity)
|
|
info(f"removing orphan state dir {path}")
|
|
try:
|
|
shutil.rmtree(path, ignore_errors=True)
|
|
except OSError as e:
|
|
warn(f"failed to remove {path}: {e}")
|
|
|
|
|
|
def enumerate_active() -> list[ActiveBottle]:
|
|
"""All currently-running docker-backed bottles as
|
|
`ActiveBottle` records. Backend-agnostic shape — the CLI
|
|
`list active` command and the dashboard agents pane both
|
|
consume this. Empty list when docker is unreachable or
|
|
nothing's running."""
|
|
# docker on PATH? Defensive — `list active` shouldn't die
|
|
# just because the docker backend isn't usable on this host.
|
|
if shutil.which("docker") is None:
|
|
return []
|
|
slugs = list_active_slugs(include_stopped=False)
|
|
if not slugs:
|
|
return []
|
|
services_by_project = _query_services_by_project()
|
|
out: list[ActiveBottle] = []
|
|
for slug in slugs:
|
|
project = compose_project_name(slug)
|
|
services = services_by_project.get(project, set())
|
|
metadata = read_metadata(slug)
|
|
out.append(ActiveBottle(
|
|
backend_name="docker",
|
|
slug=slug,
|
|
agent_name=metadata.agent_name if metadata else "?",
|
|
started_at=metadata.started_at if metadata else "",
|
|
services=tuple(sorted(services)),
|
|
))
|
|
return out
|
|
|
|
|
|
def _parse_services_by_project(stdout: str) -> dict[str, set[str]]:
|
|
"""Parse `docker ps` output formatted as
|
|
`<project-label>\\t<service-label>` (one line per container)
|
|
into a `{project: {service, ...}}` mapping. Pure function for
|
|
testing — the docker invocation is in `_query_services_by_project`."""
|
|
out: dict[str, set[str]] = {}
|
|
for line in stdout.splitlines():
|
|
project, _, service = line.partition("\t")
|
|
if not project or not service:
|
|
continue
|
|
out.setdefault(project, set()).add(service)
|
|
return out
|
|
|
|
|
|
def _query_services_by_project() -> dict[str, set[str]]:
|
|
"""One `docker ps` call → `{project: {service, ...}}`. Moved
|
|
here from the dashboard so the same query backs the CLI's
|
|
`list active` and the dashboard's agents pane."""
|
|
try:
|
|
r = subprocess.run(
|
|
[
|
|
"docker", "ps",
|
|
"--filter", "label=com.docker.compose.project",
|
|
"--format",
|
|
'{{.Label "com.docker.compose.project"}}'
|
|
"\t"
|
|
'{{.Label "com.docker.compose.service"}}',
|
|
],
|
|
capture_output=True, text=True, check=False,
|
|
)
|
|
except FileNotFoundError:
|
|
return {}
|
|
if r.returncode != 0:
|
|
return {}
|
|
return _parse_services_by_project(r.stdout or "")
|