Files
bot-bottle/claude_bottle/backend/docker/cleanup.py
T
didericis-claude 4859040c6f
test / unit (pull_request) Successful in 26s
test / integration (pull_request) Successful in 40s
feat(cleanup): walk every backend, reap smolmachines orphans too
`./cli.py cleanup` previously called only the env-var-selected
backend's `prepare_cleanup` / `cleanup` — so a leftover smolvm
machine + bundle container + bundle network from a crashed
smolmachines bottle would survive a default `docker`-mode cleanup
indefinitely.

Smolmachines now has a real `cleanup` module (alongside
`enumerate.py` from issue #77) that walks:

  - smolvm machines named `claude-bottle-*` (via
    `smolvm machine ls --json`)
  - bundle containers `claude-bottle-sidecars-*`
  - bundle networks `claude-bottle-bundle-*`

Cleanup runs stop+delete on the machines, force-rm on the
containers, network rm on the networks. Each step is best-effort
so a failed rm doesn't block the rest.

`cli.py cleanup` walks every backend in `known_backend_names()`
and runs each backend's `cleanup` after a single y/N prompt that
shows a combined plan.

State dirs (`~/.claude-bottle/state/<slug>/`) are shared layout
with the docker backend, which still owns the orphan-state-dir
bucket. It now consults `enumerate_active_bottles()` for the
cross-backend live identity set so a running smolmachines
bottle's state dir isn't reaped during a cleanup.

Tests: smolmachines cleanup (prepare + cleanup ordering + failure
handling); cross-backend orphan protection on the docker
state-dir check; CLI cmd_cleanup walks both backends, short-
circuits on all-empty, aborts on N. 617 unit tests pass.

End-to-end verified on this host:
  $ smolvm machine ls --json | jq '.[].name'
  "claude-bottle-researcher-m3hxd"
  $ ./cli.py cleanup
  --- smolmachines backend ---
  smolvm machine:  claude-bottle-researcher-m3hxd
  remove all of the above? [y/N]

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-27 18:38:00 -04:00

253 lines
9.0 KiB
Python

"""Cleanup + active-listing for the Docker bottle backend.
PRD 0018 chunk 4: cleanup is centered on `docker compose ls`.
Pre-compose code paths could leave bare containers / networks
without a compose project; those still show up via the prefix
scan, just as a fallback bucket alongside the project list.
`prepare_cleanup` enumerates:
- Live compose projects whose name starts with `claude-bottle-`.
- `claude-bottle-*` containers that aren't part of any compose
project (legacy orphans).
- `claude-bottle-*` networks that aren't tied to a compose
project (legacy orphans; compose-managed networks come down
with `compose down --volumes` and don't appear here).
- State dirs under ~/.claude-bottle/state/<identity>/ with no
live compose project AND no `.preserve` marker.
`cleanup` removes everything in the plan.
`list_active` queries the same compose project namespace and prints
each project's services for ad-hoc inspection.
"""
from __future__ import annotations
import shutil
import subprocess
from ... import supervise as _supervise
from ...log import info, warn
from .. import ActiveBottle
from . import util as docker_mod
from .bottle_cleanup_plan import DockerBottleCleanupPlan
from .bottle_state import bottle_state_dir, is_preserved, read_metadata
from .compose import (
COMPOSE_PROJECT_PREFIX,
compose_project_name,
list_active_slugs,
list_compose_projects,
)
def _list_prefixed_containers() -> list[str]:
"""All claude-bottle-prefixed containers, running or stopped."""
result = subprocess.run(
["docker", "ps", "-a",
"--filter", f"name=^{COMPOSE_PROJECT_PREFIX}",
"--format", "{{.Names}}\t{{.Label \"com.docker.compose.project\"}}"],
capture_output=True, text=True, check=False,
)
if result.returncode != 0:
warn(f"docker ps failed: {result.stderr.strip()}")
return []
out: list[str] = []
for line in (result.stdout or "").splitlines():
if not line:
continue
name, _, project = line.partition("\t")
# Stray = no compose label. Compose-managed containers carry
# `com.docker.compose.project=<name>`; we'll reap those via
# `compose down`, not via container rm.
if not project:
out.append(name)
return sorted(set(out))
def _list_prefixed_networks() -> list[str]:
"""All claude-bottle-prefixed networks not currently attached
to a compose project. Compose-managed networks have a
`com.docker.compose.project` label; bare ones (from pre-compose
code paths) don't."""
result = subprocess.run(
["docker", "network", "ls",
"--filter", f"name={COMPOSE_PROJECT_PREFIX}",
"--format", "{{.Name}}\t{{.Label \"com.docker.compose.project\"}}"],
capture_output=True, text=True, check=False,
)
if result.returncode != 0:
warn(f"docker network ls failed: {result.stderr.strip()}")
return []
out: list[str] = []
for line in (result.stdout or "").splitlines():
if not line:
continue
name, _, project = line.partition("\t")
if not project:
out.append(name)
return sorted(set(out))
def _list_orphan_state_dirs(
live_projects: set[str], protected_identities: set[str],
) -> list[str]:
"""State identities whose compose project isn't running and
that don't have a `.preserve` marker. `.preserve` means the
user (or an auto-preserve-on-crash) wants the state kept for
`resume`.
`protected_identities` is the set of slugs that are live in
ANY backend — used so this docker-side check doesn't reap a
running smolmachines bottle's state dir (the layout is shared
across both backends)."""
state_root = _supervise.claude_bottle_root() / "state"
if not state_root.is_dir():
return []
orphans: list[str] = []
for child in sorted(state_root.iterdir()):
if not child.is_dir():
continue
identity = child.name
project = f"{COMPOSE_PROJECT_PREFIX}{identity}"
if project in live_projects:
continue
if identity in protected_identities:
continue
if is_preserved(identity):
continue
orphans.append(identity)
return orphans
def prepare_cleanup() -> DockerBottleCleanupPlan:
"""Enumerate everything cleanup will touch. No removals.
Pulls the union of live identities across backends via
`enumerate_active_bottles()` so the orphan-state-dir bucket
doesn't include slugs whose smolmachines VM is still up."""
docker_mod.require_docker()
projects = list_compose_projects()
project_set = set(projects)
# Late import to avoid a circular at module-load time —
# the backend package's __init__ imports this module.
from .. import enumerate_active_bottles
protected = {b.slug for b in enumerate_active_bottles()}
return DockerBottleCleanupPlan(
projects=tuple(projects),
stray_containers=tuple(_list_prefixed_containers()),
stray_networks=tuple(_list_prefixed_networks()),
orphan_state_dirs=tuple(
_list_orphan_state_dirs(project_set, protected),
),
)
def cleanup(plan: DockerBottleCleanupPlan) -> None:
"""Remove everything in the plan. Projects first (whose `compose
down` reaps their containers + networks atomically), then stray
legacy resources, then orphan state dirs."""
for project in plan.projects:
info(f"docker compose down ({project})")
result = subprocess.run(
["docker", "compose", "-p", project, "down", "--volumes"],
capture_output=True, text=True, check=False,
)
if result.returncode != 0:
warn(
f"compose down failed for {project}: "
f"{result.stderr.strip()}"
)
for name in plan.stray_containers:
info(f"removing stray container {name}")
subprocess.run(
["docker", "rm", "-f", name],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
)
for name in plan.stray_networks:
info(f"removing stray network {name}")
subprocess.run(
["docker", "network", "rm", name],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
)
for identity in plan.orphan_state_dirs:
path = bottle_state_dir(identity)
info(f"removing orphan state dir {path}")
try:
shutil.rmtree(path, ignore_errors=True)
except OSError as e:
warn(f"failed to remove {path}: {e}")
def enumerate_active() -> list[ActiveBottle]:
"""All currently-running docker-backed bottles as
`ActiveBottle` records. Backend-agnostic shape — the CLI
`list active` command and the dashboard agents pane both
consume this. Empty list when docker is unreachable or
nothing's running."""
# docker on PATH? Defensive — `list active` shouldn't die
# just because the docker backend isn't usable on this host.
if shutil.which("docker") is None:
return []
slugs = list_active_slugs(include_stopped=False)
if not slugs:
return []
services_by_project = _query_services_by_project()
out: list[ActiveBottle] = []
for slug in slugs:
project = compose_project_name(slug)
services = services_by_project.get(project, set())
metadata = read_metadata(slug)
out.append(ActiveBottle(
backend_name="docker",
slug=slug,
agent_name=metadata.agent_name if metadata else "?",
started_at=metadata.started_at if metadata else "",
services=tuple(sorted(services)),
))
return out
def _parse_services_by_project(stdout: str) -> dict[str, set[str]]:
"""Parse `docker ps` output formatted as
`<project-label>\\t<service-label>` (one line per container)
into a `{project: {service, ...}}` mapping. Pure function for
testing — the docker invocation is in `_query_services_by_project`."""
out: dict[str, set[str]] = {}
for line in stdout.splitlines():
project, _, service = line.partition("\t")
if not project or not service:
continue
out.setdefault(project, set()).add(service)
return out
def _query_services_by_project() -> dict[str, set[str]]:
"""One `docker ps` call → `{project: {service, ...}}`. Moved
here from the dashboard so the same query backs the CLI's
`list active` and the dashboard's agents pane."""
try:
r = subprocess.run(
[
"docker", "ps",
"--filter", "label=com.docker.compose.project",
"--format",
'{{.Label "com.docker.compose.project"}}'
"\t"
'{{.Label "com.docker.compose.service"}}',
],
capture_output=True, text=True, check=False,
)
except FileNotFoundError:
return {}
if r.returncode != 0:
return {}
return _parse_services_by_project(r.stdout or "")