diff --git a/claude_bottle/backend/__init__.py b/claude_bottle/backend/__init__.py index 45c65ac..04132ad 100644 --- a/claude_bottle/backend/__init__.py +++ b/claude_bottle/backend/__init__.py @@ -53,6 +53,11 @@ class BottleSpec: agent_name: str copy_cwd: bool user_cwd: str + # PRD 0016 follow-up: when set, the backend's prepare step uses + # this identity instead of minting a fresh one — the resume path + # (`cli.py resume `) sets this to continue an existing + # bottle's state. Empty string for a fresh `start`. + identity: str = "" @dataclass(frozen=True) diff --git a/claude_bottle/backend/docker/bottle_plan.py b/claude_bottle/backend/docker/bottle_plan.py index ce0d26f..9349753 100644 --- a/claude_bottle/backend/docker/bottle_plan.py +++ b/claude_bottle/backend/docker/bottle_plan.py @@ -44,6 +44,11 @@ class DockerBottlePlan(BottlePlan): image: str derived_image: str # "" -> no derived image runtime_image: str # image actually launched (derived or base) + # Absolute path to the Dockerfile that builds `image`. Empty means + # use the repo's default Dockerfile. Populated to a per-bottle + # state file (~/.claude-bottle/state//Dockerfile) after a + # capability-block remediation (PRD 0016). + dockerfile_path: str env_file: Path # docker --env-file: NAME=VALUE literals # name -> value for vars forwarded into the docker-run child process # via subprocess env (so values never land on argv or in a file). @@ -89,6 +94,11 @@ class DockerBottlePlan(BottlePlan): print(file=sys.stderr) info(f"agent : {spec.agent_name}") info(f"image : {self.image}") + if self.dockerfile_path: + info( + f"dockerfile : {self.dockerfile_path} " + f"(per-bottle override from PRD 0016 capability rebuild)" + ) if self.derived_image: info( f"cwd : {spec.user_cwd} -> /home/node/workspace " diff --git a/claude_bottle/backend/docker/bottle_state.py b/claude_bottle/backend/docker/bottle_state.py new file mode 100644 index 0000000..912a794 --- /dev/null +++ b/claude_bottle/backend/docker/bottle_state.py @@ -0,0 +1,170 @@ +"""Per-bottle persistent state (PRD 0016). + +Holds the per-bottle Dockerfile override that capability-block +remediation writes, the transcript snapshot the state-preservation +helper saves before teardown, and the launch metadata that lets +`cli.py resume ` reconstruct a bottle's spec. State +lives at: + + ~/.claude-bottle/state// + metadata.json — agent_name + cwd + started_at (for resume) + Dockerfile — per-bottle override (absent → use repo's) + transcript/ — last snapshotted agent state (best-effort) + +When the per-bottle Dockerfile is present, the launch step builds +the agent image with a per-bottle tag (claude-bottle-rebuilt-) +from this file rather than the repo's. The build context is still +the repo root so the Dockerfile can COPY claude_bottle source files +the same way the original does. + +Identity model: +- Every `cli.py start ` mints a fresh identity via + `bottle_identity(agent_name)`: slug-prefix for readability plus a + 5-char random suffix for parallel-safe uniqueness. The metadata + written at launch time pins (agent_name, cwd) to that identity. +- `cli.py resume ` reads the metadata and re-launches a + bottle pinned to the same identity, picking up any per-bottle + Dockerfile and transcript snapshot. +""" + +from __future__ import annotations + +import dataclasses +import json +import secrets +import string +from dataclasses import dataclass +from pathlib import Path + +from ... import supervise as _supervise +from . import util as docker_mod + + +# Directory layout: ~/.claude-bottle/state//... +_STATE_SUBDIR = "state" +_PER_BOTTLE_DOCKERFILE_NAME = "Dockerfile" +_TRANSCRIPT_SUBDIR = "transcript" +_METADATA_NAME = "metadata.json" + +# 5 chars of base36 alphabet ≈ 60M combinations. Plenty for human +# operators starting bottles by hand; collision-free in practice. +_RANDOM_SUFFIX_LEN = 5 +_SUFFIX_ALPHABET = string.ascii_lowercase + string.digits + + +def bottle_identity(agent_name: str) -> str: + """Mint a fresh per-launch bottle identity. The slug-prefix is + `slugify(agent_name)` for readability; the suffix is 5 random + base36 chars so two simultaneous `start ` invocations + don't collide on container/network names. + + Every call produces a different identity (non-deterministic). + To continue an existing bottle's state, use the recorded + identity from BottleMetadata via `cli.py resume `, + not this function.""" + slug = docker_mod.slugify(agent_name) + suffix = "".join(secrets.choice(_SUFFIX_ALPHABET) for _ in range(_RANDOM_SUFFIX_LEN)) + return f"{slug}-{suffix}" + + +@dataclass(frozen=True) +class BottleMetadata: + """Persistent record of how a bottle was launched, written at + start time and read by `cli.py resume`. Lives at + ~/.claude-bottle/state//metadata.json.""" + + identity: str + agent_name: str + cwd: str # empty string when --cwd was not passed + copy_cwd: bool + started_at: str # ISO 8601 UTC + + +def metadata_path(identity: str) -> Path: + return bottle_state_dir(identity) / _METADATA_NAME + + +def write_metadata(metadata: BottleMetadata) -> Path: + """Persist `metadata` to ~/.claude-bottle/state//metadata.json. + Mode 0o644 — no secrets, just (agent_name, cwd, timestamp).""" + path = metadata_path(metadata.identity) + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(json.dumps(dataclasses.asdict(metadata), indent=2) + "\n") + path.chmod(0o644) + return path + + +def read_metadata(identity: str) -> BottleMetadata | None: + """Return the metadata for `identity`, or None if no state has + been recorded for it. Used by `cli.py resume` to reconstruct + the launch spec.""" + path = metadata_path(identity) + if not path.is_file(): + return None + raw = json.loads(path.read_text()) + if not isinstance(raw, dict): + return None + return BottleMetadata( + identity=str(raw.get("identity", identity)), + agent_name=str(raw.get("agent_name", "")), + cwd=str(raw.get("cwd", "")), + copy_cwd=bool(raw.get("copy_cwd", False)), + started_at=str(raw.get("started_at", "")), + ) + + +def bottle_state_dir(identity: str) -> Path: + """Per-bottle state directory on the host. Created lazily by the + write helpers; readers tolerate its absence.""" + return _supervise.claude_bottle_root() / _STATE_SUBDIR / identity + + +def per_bottle_dockerfile_path(identity: str) -> Path: + return bottle_state_dir(identity) / _PER_BOTTLE_DOCKERFILE_NAME + + +def per_bottle_dockerfile(identity: str) -> str | None: + """Return the per-bottle Dockerfile content if present, else + None. None means: use the repo's Dockerfile (the original + pre-capability-block behavior).""" + p = per_bottle_dockerfile_path(identity) + if p.is_file(): + return p.read_text() + return None + + +def write_per_bottle_dockerfile(identity: str, content: str) -> Path: + p = per_bottle_dockerfile_path(identity) + p.parent.mkdir(parents=True, exist_ok=True) + p.write_text(content) + p.chmod(0o644) + return p + + +def per_bottle_image_tag(identity: str) -> str: + """Image tag for a rebuilt bottle. Distinct from the base + claude-bottle:latest so per-bottle rebuilds don't collide in + the docker image cache.""" + return f"claude-bottle-rebuilt-{identity}:latest" + + +def transcript_snapshot_dir(identity: str) -> Path: + """Where capability_apply stashes the agent's transcript before + teardown, so the next `cli.py start ` can offer to + resume from it.""" + return bottle_state_dir(identity) / _TRANSCRIPT_SUBDIR + + +__all__ = [ + "BottleMetadata", + "bottle_identity", + "bottle_state_dir", + "metadata_path", + "per_bottle_dockerfile", + "per_bottle_dockerfile_path", + "per_bottle_image_tag", + "read_metadata", + "transcript_snapshot_dir", + "write_metadata", + "write_per_bottle_dockerfile", +] diff --git a/claude_bottle/backend/docker/capability_apply.py b/claude_bottle/backend/docker/capability_apply.py new file mode 100644 index 0000000..414e59f --- /dev/null +++ b/claude_bottle/backend/docker/capability_apply.py @@ -0,0 +1,210 @@ +"""capability_apply — host-side orchestrator for capability-block +remediation (PRD 0016). + +On approval of a capability-block proposal, the dashboard calls +apply_capability_change(slug, new_dockerfile) which: + + 1. Snapshots the agent's transcript dir to + ~/.claude-bottle/state//transcript/ (best-effort). + 2. Pushes the agent's working tree via `git push` (best-effort — + no upstream / no commits / no git repo all skip with a log). + 3. Writes the new Dockerfile to + ~/.claude-bottle/state//Dockerfile (PRD 0016 Phase 1 + state). The next `cli.py start ` picks it up. + 4. Force-removes the agent container + all sidecars + the + per-bottle networks. Idempotent — missing resources are not + errors. + +Returns (before, after) Dockerfile contents so the dashboard can +record / render the diff. (capability-block has no audit log per +PRD 0013 — the per-bottle Dockerfile state is its own record.) + +This is "fire-and-forget" from the agent's perspective: by the time +the dashboard writes the response file the supervise sidecar is +gone, so the agent's tool call connection drops without ever +receiving the response. The replacement agent (next manual +`cli.py start`) sees the new Dockerfile and starts from there. +v1 does not auto-relaunch — see PRD 0016's capability-block return +semantics open question. +""" + +from __future__ import annotations + +import os +import shutil +import subprocess +from pathlib import Path + +from ...log import info, warn +from .bottle_state import ( + per_bottle_dockerfile, + per_bottle_dockerfile_path, + transcript_snapshot_dir, + write_per_bottle_dockerfile, +) + + +# Agent home inside the container (per the repo Dockerfile's +# `USER node` + `WORKDIR /home/node`). Used to locate the transcript +# dir + the workspace dir for git push. +_AGENT_HOME_IN_CONTAINER = "/home/node" +_AGENT_TRANSCRIPT_IN_CONTAINER = f"{_AGENT_HOME_IN_CONTAINER}/.claude" +_AGENT_WORKSPACE_IN_CONTAINER = f"{_AGENT_HOME_IN_CONTAINER}/workspace" + +# Per-bottle resource name patterns (mirroring prepare.py / +# the various sidecar modules). The agent container's name is the +# slug with no infix; sidecars carry an infix like cred-proxy. +def _agent_container_name(slug: str) -> str: + return f"claude-bottle-{slug}" + + +def _per_bottle_container_names(slug: str) -> list[str]: + """All container names that belong to this bottle. Missing + containers are silently skipped by the teardown helper, so it's + fine to include names that don't exist for a given bottle.""" + return [ + _agent_container_name(slug), + f"claude-bottle-cred-proxy-{slug}", + f"claude-bottle-pipelock-{slug}", + f"claude-bottle-git-gate-{slug}", + f"claude-bottle-supervise-{slug}", + ] + + +def _per_bottle_network_names(slug: str) -> list[str]: + return [ + f"claude-bottle-net-{slug}", + f"claude-bottle-egress-{slug}", + ] + + +class CapabilityApplyError(RuntimeError): + """Raised when the apply fails in a way that should keep the + proposal pending (so the operator can retry). Best-effort + failures (transcript snapshot, git push) do not raise — they + just log and proceed.""" + + +# --- Public helpers -------------------------------------------------------- + + +def fetch_current_dockerfile(slug: str) -> str: + """Return the Dockerfile content the next `cli.py start ` + would use for this bottle. If a per-bottle override exists, that + one; otherwise the repo's Dockerfile. + + Used by the operator-edit verb to show the current source of + truth, and by apply_capability_change for the before-diff.""" + override = per_bottle_dockerfile(slug) + if override is not None: + return override + repo_dockerfile = _repo_dockerfile_path() + if repo_dockerfile.is_file(): + return repo_dockerfile.read_text() + raise CapabilityApplyError( + f"no per-bottle Dockerfile for {slug} and no repo Dockerfile at " + f"{repo_dockerfile}" + ) + + +def apply_capability_change(slug: str, new_dockerfile: str) -> tuple[str, str]: + """End-to-end capability-block remediation. See module docstring + for the sequence. Returns (before, after) Dockerfile content.""" + if not new_dockerfile.strip(): + raise CapabilityApplyError("proposed Dockerfile is empty") + before = fetch_current_dockerfile(slug) + + _snapshot_transcript(slug) + _push_working_tree(slug) + write_per_bottle_dockerfile(slug, new_dockerfile) + _teardown_bottle(slug) + + return before, new_dockerfile + + +# --- Internals ------------------------------------------------------------- + + +def _repo_dockerfile_path() -> Path: + """Path to the repo's Dockerfile (one dir above this module's + package root). Resolved at call time so the path is correct + regardless of where this module is imported from.""" + # claude_bottle/backend/docker/capability_apply.py -> repo root + return Path(__file__).resolve().parent.parent.parent.parent / "Dockerfile" + + +def _snapshot_transcript(slug: str) -> None: + """`docker cp` /home/node/.claude out of the agent container into + ~/.claude-bottle/state//transcript/. Best-effort: missing + container, missing dir, or cp error all log a warning and return. + The transcript is what `claude --resume` reads to pick up where + the agent left off.""" + container = _agent_container_name(slug) + dest = transcript_snapshot_dir(slug) + if dest.exists(): + # Remove any prior snapshot so the new one is a clean copy. + shutil.rmtree(dest, ignore_errors=True) + dest.parent.mkdir(parents=True, exist_ok=True) + r = subprocess.run( + ["docker", "cp", f"{container}:{_AGENT_TRANSCRIPT_IN_CONTAINER}", str(dest)], + capture_output=True, text=True, check=False, + ) + if r.returncode != 0: + warn( + f"capability-apply: transcript snapshot skipped " + f"({(r.stderr or '').strip() or 'no transcript dir in container?'})" + ) + return + info(f"capability-apply: transcript snapshotted to {dest}") + + +def _push_working_tree(slug: str) -> None: + """`docker exec git push` from /home/node/workspace. + Best-effort: not-a-git-repo, no upstream, nothing-to-push, no + network all log a warning and return. The replacement bottle + will pick up whatever's actually upstream.""" + container = _agent_container_name(slug) + r = subprocess.run( + [ + "docker", "exec", container, "sh", "-c", + f"cd {_AGENT_WORKSPACE_IN_CONTAINER} && " + f"git rev-parse --is-inside-work-tree >/dev/null 2>&1 && " + f"git push origin HEAD 2>&1 || true", + ], + capture_output=True, text=True, check=False, + ) + if r.returncode != 0: + warn( + f"capability-apply: git push skipped " + f"({(r.stderr or '').strip() or 'docker exec failed'})" + ) + return + output = (r.stdout or "").strip() + if output: + info(f"capability-apply: git push: {output}") + else: + info("capability-apply: git push ran (no output — likely not a git workspace)") + + +def _teardown_bottle(slug: str) -> None: + """Force-remove all per-bottle docker resources. Idempotent — + `docker rm -f` / `docker network rm` silently ignore missing + names, so this can be called even mid-rebuild.""" + info(f"capability-apply: tearing down bottle {slug}") + for name in _per_bottle_container_names(slug): + subprocess.run( + ["docker", "rm", "-f", name], + stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False, + ) + for net in _per_bottle_network_names(slug): + subprocess.run( + ["docker", "network", "rm", net], + stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False, + ) + + +__all__ = [ + "CapabilityApplyError", + "apply_capability_change", + "fetch_current_dockerfile", +] diff --git a/claude_bottle/backend/docker/launch.py b/claude_bottle/backend/docker/launch.py index f8a6def..b2da057 100644 --- a/claude_bottle/backend/docker/launch.py +++ b/claude_bottle/backend/docker/launch.py @@ -66,7 +66,10 @@ def launch( pass try: - docker_mod.build_image(plan.image, _REPO_DIR) + docker_mod.build_image( + plan.image, _REPO_DIR, + dockerfile=plan.dockerfile_path, + ) if plan.derived_image: docker_mod.build_image_with_cwd( plan.derived_image, plan.image, plan.spec.user_cwd diff --git a/claude_bottle/backend/docker/prepare.py b/claude_bottle/backend/docker/prepare.py index 531eb43..d3ba481 100644 --- a/claude_bottle/backend/docker/prepare.py +++ b/claude_bottle/backend/docker/prepare.py @@ -11,6 +11,7 @@ via the base class's `prepare` template before this is called. from __future__ import annotations import os +from datetime import datetime, timezone from pathlib import Path from ... import pipelock @@ -26,6 +27,14 @@ from .cred_proxy import ( cred_proxy_url, ) from .git_gate import DockerGitGate, git_gate_container_name +from .bottle_state import ( + BottleMetadata, + bottle_identity, + per_bottle_dockerfile, + per_bottle_dockerfile_path, + per_bottle_image_tag, + write_metadata, +) from .pipelock import DockerPipelockProxy, pipelock_container_name from .supervise import DockerSupervise, supervise_container_name @@ -48,9 +57,34 @@ def resolve_plan( agent = manifest.agents[spec.agent_name] bottle = manifest.bottle_for(spec.agent_name) - slug = docker_mod.slugify(spec.agent_name) + # PRD 0016 follow-up: identity, not bare slug. A fresh `start` + # mints a random-suffixed identity (so parallel runs of the same + # agent in the same cwd don't collide on container/network + # names); a `resume` passes the recorded identity in via + # spec.identity to continue an existing bottle's state. + slug = spec.identity or bottle_identity(spec.agent_name) + # Record the launch metadata so `cli.py resume ` can + # reconstruct the spec. Idempotent — re-writes on resume with a + # refreshed started_at. + write_metadata(BottleMetadata( + identity=slug, + agent_name=spec.agent_name, + cwd=spec.user_cwd if spec.copy_cwd else "", + copy_cwd=spec.copy_cwd, + started_at=datetime.now(timezone.utc).isoformat(), + )) - image = os.environ.get("CLAUDE_BOTTLE_IMAGE", "claude-bottle:latest") + # PRD 0016 capability-block: if a per-bottle Dockerfile has been + # written (via apply_capability_change), the base image becomes + # per_bottle_image_tag(slug) built from that file. --cwd still + # layers a derived image on top. + dockerfile_path = "" + if per_bottle_dockerfile(slug) is not None: + image_default = per_bottle_image_tag(slug) + dockerfile_path = str(per_bottle_dockerfile_path(slug)) + else: + image_default = "claude-bottle:latest" + image = os.environ.get("CLAUDE_BOTTLE_IMAGE", image_default) derived_image = "" runtime_image = image if spec.copy_cwd: @@ -184,6 +218,7 @@ def resolve_plan( image=image, derived_image=derived_image, runtime_image=runtime_image, + dockerfile_path=dockerfile_path, env_file=env_file, forwarded_env=forwarded_env, prompt_file=prompt_file, diff --git a/claude_bottle/cli/__init__.py b/claude_bottle/cli/__init__.py index 6d24aea..f6ee37a 100644 --- a/claude_bottle/cli/__init__.py +++ b/claude_bottle/cli/__init__.py @@ -1,6 +1,6 @@ """Main CLI dispatcher. -Commands: cleanup, dashboard, edit, info, init, list, start +Commands: cleanup, dashboard, edit, info, init, list, resume, start """ from __future__ import annotations @@ -15,6 +15,7 @@ from .dashboard import cmd_dashboard from .edit import cmd_edit from .info import cmd_info from .init import cmd_init +from .resume import cmd_resume from .start import cmd_start cmd_list = _list_mod.cmd_list @@ -26,6 +27,7 @@ COMMANDS = { "info": cmd_info, "init": cmd_init, "list": cmd_list, + "resume": cmd_resume, "start": cmd_start, } @@ -39,6 +41,7 @@ def usage() -> None: sys.stderr.write(" info print env, skills, and prompt details for a named agent\n") sys.stderr.write(" init interactively create a new agent and add it to claude-bottle.json\n") sys.stderr.write(" list list available agents or active containers\n") + sys.stderr.write(" resume re-launch a bottle by its identity (continues state from PRD 0016)\n") sys.stderr.write(" start boot a container for a named agent and attach an interactive session\n\n") sys.stderr.write(f"Run '{PROG} --help' for command-specific usage.\n") diff --git a/claude_bottle/cli/dashboard.py b/claude_bottle/cli/dashboard.py index c379fb6..4ced7e6 100644 --- a/claude_bottle/cli/dashboard.py +++ b/claude_bottle/cli/dashboard.py @@ -22,6 +22,10 @@ from datetime import datetime, timezone from pathlib import Path from .. import supervise as _supervise +from ..backend.docker.capability_apply import ( + CapabilityApplyError, + apply_capability_change, +) from ..backend.docker.cred_proxy_apply import ( CredProxyApplyError, apply_routes_change, @@ -45,6 +49,7 @@ from ..supervise import ( TOOL_CAPABILITY_BLOCK, TOOL_CRED_PROXY_BLOCK, TOOL_PIPELOCK_BLOCK, + archive_proposal, list_pending_proposals, render_diff, write_audit_entry, @@ -56,7 +61,7 @@ from ._common import PROG # Errors any remediation engine may raise. Caught by the TUI key # handlers and surfaced in the status line so a failed apply keeps # the proposal pending rather than crashing curses. -ApplyError = (CredProxyApplyError, PipelockApplyError) +ApplyError = (CredProxyApplyError, PipelockApplyError, CapabilityApplyError) # --- Discovery ------------------------------------------------------------- @@ -107,6 +112,16 @@ def discover_pipelock_slugs() -> list[str]: return _discover_sidecar_slugs("claude-bottle-pipelock-") +def _approval_status(qp: QueuedProposal, verb: str) -> str: + """Status-line text after a successful approval. For capability- + block, append the `resume ` hint so the operator can + bring the rebuilt bottle back up with one copy-paste.""" + base = f"{verb} {qp.proposal.tool} for [{qp.proposal.bottle_slug}]" + if qp.proposal.tool == TOOL_CAPABILITY_BLOCK: + return f"{base}; resume: ./cli.py resume {qp.proposal.bottle_slug}" + return base + + def discover_pending() -> list[QueuedProposal]: """Walk ~/.claude-bottle/queue/* and collect pending proposals from every bottle's queue. Sorted by arrival time across the @@ -155,9 +170,10 @@ def approve( diff_before, diff_after = apply_allowlist_change( qp.proposal.bottle_slug, file_to_apply, ) - # capability-block remediation lands in PRD 0016; until then - # it stays a no-op approval and the audit (none for capability) - # is skipped. + elif qp.proposal.tool == TOOL_CAPABILITY_BLOCK: + diff_before, diff_after = apply_capability_change( + qp.proposal.bottle_slug, file_to_apply, + ) response = Response( proposal_id=qp.proposal.id, @@ -170,6 +186,12 @@ def approve( qp, action=status, notes=notes, diff_before=diff_before, diff_after=diff_after, ) + if qp.proposal.tool == TOOL_CAPABILITY_BLOCK: + # The supervise sidecar was torn down by apply_capability_change, + # so it can't archive its own proposal+response. Archive here so + # dashboard.discover_pending stops surfacing the resolved + # proposal forever. + archive_proposal(qp.queue_dir, qp.proposal.id) def reject(qp: QueuedProposal, *, reason: str) -> None: @@ -359,7 +381,7 @@ def _main_loop(stdscr: "curses._CursesWindow") -> None: elif key == ord("a"): try: approve(qp) - status_line = f"approved {qp.proposal.tool} for [{qp.proposal.bottle_slug}]" + status_line = _approval_status(qp, "approved") except ApplyError as e: status_line = f"apply failed: {e}" elif key == ord("m"): @@ -369,7 +391,7 @@ def _main_loop(stdscr: "curses._CursesWindow") -> None: else: try: approve(qp, final_file=edited, notes="operator modified before approving") - status_line = f"modified+approved {qp.proposal.tool} for [{qp.proposal.bottle_slug}]" + status_line = _approval_status(qp, "modified+approved") except ApplyError as e: status_line = f"apply failed: {e}" elif key == ord("r"): diff --git a/claude_bottle/cli/resume.py b/claude_bottle/cli/resume.py new file mode 100644 index 0000000..ca4cd6e --- /dev/null +++ b/claude_bottle/cli/resume.py @@ -0,0 +1,66 @@ +"""resume: re-launch a bottle by its identity. + +Reads ~/.claude-bottle/state//metadata.json to recover the +(agent_name, cwd, copy_cwd) the bottle was originally started with, +then runs the same launch core as `start` — but pinned to the +recorded identity so the new bottle picks up any per-bottle Dockerfile +(from capability-block apply) and transcript snapshot under the same +state dir. + +Use case: an agent calls capability-block, the dashboard approves +and tears down the bottle, the operator runs + ./cli.py resume +to bring up the replacement with the new capabilities baked in. +""" + +from __future__ import annotations + +import argparse + +from ..backend import BottleSpec +from ..backend.docker.bottle_state import read_metadata +from ..log import die +from ..manifest import Manifest +from ._common import PROG, USER_CWD +from .start import _launch_bottle + + +def cmd_resume(argv: list[str]) -> int: + parser = argparse.ArgumentParser(prog=f"{PROG} resume", add_help=True) + parser.add_argument("--dry-run", action="store_true") + parser.add_argument("--remote-control", action="store_true") + parser.add_argument( + "--format", + choices=("text", "json"), + default="text", + help="preflight output format; --format=json requires --dry-run", + ) + parser.add_argument( + "identity", + help="bottle identity from a prior `start` (see its session-end output)", + ) + args = parser.parse_args(argv) + + metadata = read_metadata(args.identity) + if metadata is None: + die( + f"no state recorded for identity {args.identity!r}; " + f"check ~/.claude-bottle/state/ or run `cli.py start` to create a new bottle" + ) + + manifest = Manifest.resolve(USER_CWD) + manifest.require_agent(metadata.agent_name) + + spec = BottleSpec( + manifest=manifest, + agent_name=metadata.agent_name, + copy_cwd=metadata.copy_cwd, + user_cwd=metadata.cwd or USER_CWD, + identity=metadata.identity, + ) + return _launch_bottle( + spec, + dry_run=args.dry_run, + output_format=args.format, + remote_control=args.remote_control, + ) diff --git a/claude_bottle/cli/start.py b/claude_bottle/cli/start.py index a98e330..81fdbfc 100644 --- a/claude_bottle/cli/start.py +++ b/claude_bottle/cli/start.py @@ -1,6 +1,10 @@ """start: boot a sandboxed container for a named agent and attach an interactive claude-code session. The container is torn down when the -session ends.""" +session ends. + +The launch core is shared with `cli.py resume `: see +_launch_bottle below. +""" from __future__ import annotations @@ -43,18 +47,35 @@ def cmd_start(argv: list[str]) -> int: copy_cwd=args.cwd, user_cwd=USER_CWD, ) + return _launch_bottle( + spec, + dry_run=dry_run, + output_format=args.format, + remote_control=args.remote_control, + ) + +def _launch_bottle( + spec: BottleSpec, + *, + dry_run: bool, + output_format: str, + remote_control: bool, +) -> int: + """Shared launch core for `start` and `resume`. Builds the plan, + prints / dry-runs / prompts as appropriate, brings the bottle up, + attaches claude, and prints the resume hint on session end.""" stage_dir = Path(tempfile.mkdtemp(prefix="claude-bottle-stage.")) try: backend = get_bottle_backend() plan = backend.prepare(spec, stage_dir=stage_dir) - if args.format == "json": - json.dump(plan.to_dict(remote_control=args.remote_control), sys.stdout, indent=2) + if output_format == "json": + json.dump(plan.to_dict(remote_control=remote_control), sys.stdout, indent=2) sys.stdout.write("\n") return 0 - plan.print(remote_control=args.remote_control) + plan.print(remote_control=remote_control) if dry_run: info("dry-run requested; not starting container.") @@ -67,16 +88,27 @@ def cmd_start(argv: list[str]) -> int: info("aborted by user") return 0 + identity = _identity_from_plan(plan) with backend.launch(plan) as bottle: info( "attaching interactive claude session " "(Ctrl-D or 'exit' to leave; container will be removed)" ) claude_args = ["--dangerously-skip-permissions"] - if args.remote_control: + if remote_control: claude_args.append("--remote-control") bottle.exec_claude(claude_args, tty=True) info(f"session ended; container {bottle.name} will be removed") + if identity: + info(f"to resume this bottle: ./cli.py resume {identity}") return 0 finally: shutil.rmtree(stage_dir, ignore_errors=True) + + +def _identity_from_plan(plan: object) -> str: + """Backend-specific: the docker plan exposes the identity as + `.slug`. Other backends in the future would expose their own + identity attribute; for now we duck-type to keep this layer + backend-agnostic.""" + return getattr(plan, "slug", "") diff --git a/docs/prds/0016-capability-block-remediation.md b/docs/prds/0016-capability-block-remediation.md new file mode 100644 index 0000000..f70025e --- /dev/null +++ b/docs/prds/0016-capability-block-remediation.md @@ -0,0 +1,70 @@ +# PRD 0016: capability block remediation + +- **Status:** Draft +- **Author:** didericis +- **Created:** 2026-05-25 +- **Parent:** PRD 0012 +- **Depends on:** PRD 0013 + +## Summary + +Wires the **capability block** path (PRD 0012 *Stuck categories*) end-to-end. On operator approval of a `capability-block` proposal, the rebuild orchestrator tears down the existing bottle, builds from the new Dockerfile, and starts a replacement bottle on the same branch via the state-preservation helper. The replacement agent picks up where the original left off, now with the missing capability. Heaviest of the three remediation PRDs because the orchestrator and state-preservation helper are non-trivial. + +## Problem + +See PRD 0012. This PRD specifically addresses: with 0013 in place, the operator can approve a `capability-block` proposal but nothing happens — the bottle is not rebuilt, the agent stays stuck. This PRD closes the loop. Unlike 0014 and 0015, the remediation requires container teardown + rebuild + state hand-off, so the design surface is larger. + +## Goals / Success Criteria + +A real capability block recovers end-to-end: the agent's invocation of a tool / command / skill fails (not found, permission denied), the agent calls `capability-block` with a proposed Dockerfile and justification, the operator approves in the TUI, the orchestrator tears down the bottle and starts a replacement built from the new Dockerfile, the replacement agent inherits the working tree and best-effort transcript and continues on the same branch. + +## Non-goals + +- Live mutation of the running container (re-stated from PRD 0012 non-goals). +- Forking into multiple parallel rebuilt bottles. One-for-one replacement only. +- cred-proxy or pipelock handling (covered by 0014 and 0015). + +## Scope + +### In scope + +- A rebuild orchestrator that, on operator approval, tears down the existing bottle, builds from the approved Dockerfile, and starts a replacement on the same branch. +- A state-preservation helper that handles the hand-off across the rebuild: working tree push is mandatory; transcript / reasoning context is best-effort. +- `capability-block` approval handler in the MCP sidecar (replacing the 0013 no-op): on approval, hand off to the orchestrator. +- Bottle lifecycle script changes for orchestrated teardown + rebuild (distinct from a fresh-spawn). +- Bottle manifest schema changes: record originating manifest version / change history per agent run so the dashboard can show "what changed" rather than "what is." +- A per-agent-run record that maps a running bottle back to its PR / branch, so the orchestrator knows which branch to resume on. + +### Out of scope + +- Rolling back a rebuild that the replacement agent regrets. The audit trail (git history + bottle rebuild record) shows what changed; a follow-up `capability-block` proposal can revert. + +## Proposed Design + +### New services / components + +- **Rebuild orchestrator.** On approval, tears down the existing bottle, builds from the new Dockerfile, snapshots state via the state-preservation helper, and starts a fresh bottle on the same branch. +- **State-preservation helper.** Mandatory: ensures the working tree is pushed before teardown. Best-effort: carries forward the agent's transcript / reasoning context — including the approved `capability-block` proposal — into the replacement container so the new agent starts warm rather than cold. + +### Existing code touched + +- **MCP sidecar** (PRD 0013) — the `capability-block` approval handler stops being a no-op; on approval, hands off to the rebuild orchestrator. +- **Bottle lifecycle scripts** — extended for orchestrated teardown + rebuild with state hand-off, distinct from a fresh-spawn. +- **Bottle manifest schema** — records the originating manifest version / change history per agent run. +- **`cli.py`** — gains the rebuild path. + +### Data model changes + +- A per-agent-run record sufficient to map a running bottle back to its PR / branch. + +## Open questions + +- **`capability-block` return semantics.** The current agent is torn down on approval, so the tool's return value never reaches it. Options: (a) fire-and-forget, the tool returns immediately with "queued" and the agent halts; (b) block the tool, let the rebuild orchestrator's teardown kill the connection, the replacement agent gets the approval record via state-preservation; (c) the tool blocks, returns "approved" right before teardown, the agent has milliseconds to log it. (b) seems cleanest but is worth confirming during implementation. +- **Best-effort transcript preservation.** Mount the agent's state directory, snapshot on teardown, remount in the replacement? How much fidelity is "good enough" for the new agent to pick up? +- **Bottle → PR/branch mapping.** Recorded at bottle-spawn time, derived from the working tree, or specified in the manifest? +- **Rejection semantics.** Does the agent receive a tool reply explaining the rejection, or does the bottle just stay torn down? + +## References + +- PRD 0012 — stuck-agent recovery flow overview. +- PRD 0013 — supervise plane foundation (prerequisite). diff --git a/tests/integration/test_capability_apply.py b/tests/integration/test_capability_apply.py new file mode 100644 index 0000000..e45395c --- /dev/null +++ b/tests/integration/test_capability_apply.py @@ -0,0 +1,217 @@ +"""Integration: drive `apply_capability_change` against a real +container that mimics the agent's name + filesystem layout (PRD 0016). + +The real `cli.py start ` flow is too heavy for an integration +test (it builds the agent image, brings up all the sidecars, attaches +an interactive claude session). Instead, this test stages the +minimum the orchestrator interacts with: + + - A lightweight `alpine:latest sleep infinity` container named + `claude-bottle-` (matches the agent container name pattern) + on the per-bottle internal network. + - A marker file under `/home/node/.claude/` so we can assert the + transcript snapshot path actually transferred bytes. + +Then `apply_capability_change` runs and we verify: + - Per-bottle Dockerfile written. + - Containers + networks removed. + - Transcript snapshot dir on the host has the marker file. + +docker exec / cp / rm work across the docker socket boundary, so +this test runs in DinD too — no act_runner skip needed. +""" + +from __future__ import annotations + +import os +import shutil +import subprocess +import tempfile +import time +import unittest +from pathlib import Path + +from claude_bottle import supervise +from claude_bottle.backend.docker import bottle_state, capability_apply +from claude_bottle.backend.docker.capability_apply import apply_capability_change +from claude_bottle.backend.docker.network import ( + network_create_egress, + network_create_internal, + network_remove, +) +from tests._docker import skip_unless_docker + + +ALPINE_IMAGE = "alpine:latest" + + +@skip_unless_docker() +class TestCapabilityApply(unittest.TestCase): + @classmethod + def setUpClass(cls): + r = subprocess.run( + ["docker", "pull", ALPINE_IMAGE], + stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False, + ) + if r.returncode != 0: + raise unittest.SkipTest(f"could not pull {ALPINE_IMAGE}") + + def setUp(self): + self.slug = f"cb-test-cap-{os.getpid()}-{int(time.time())}" + self.agent_name = f"claude-bottle-{self.slug}" + self.sidecar_names: list[str] = [] + self.internal_net = "" + self.egress_net = "" + # Fake home so tests don't touch ~/.claude-bottle/. + self._tmp = tempfile.TemporaryDirectory(prefix="cap-apply-int.") + self._original_root = supervise.claude_bottle_root + + def fake_root() -> Path: + return Path(self._tmp.name) / ".claude-bottle" + + supervise.claude_bottle_root = fake_root # type: ignore[assignment] + + def tearDown(self): + supervise.claude_bottle_root = self._original_root # type: ignore[assignment] + for name in [self.agent_name, *self.sidecar_names]: + subprocess.run( + ["docker", "rm", "-f", name], + stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False, + ) + for n in (self.internal_net, self.egress_net): + if n: + network_remove(n) + self._tmp.cleanup() + + def _bring_up_fake_bottle(self) -> None: + self.internal_net = network_create_internal(self.slug) + self.egress_net = network_create_egress(self.slug) + # Agent container with the canonical name. + r = subprocess.run( + [ + "docker", "run", "-d", + "--name", self.agent_name, + "--network", self.internal_net, + ALPINE_IMAGE, + "sh", "-c", + "mkdir -p /home/node/.claude && " + "echo 'transcript-marker' > /home/node/.claude/sessions.json && " + "sleep 3600", + ], + capture_output=True, text=True, check=False, + ) + self.assertEqual(0, r.returncode, r.stderr) + # Also start a fake supervise sidecar so teardown has something + # extra to clean up (mirrors a real bottle's container set). + sidecar = f"claude-bottle-supervise-{self.slug}" + subprocess.run( + [ + "docker", "run", "-d", + "--name", sidecar, + "--network", self.internal_net, + ALPINE_IMAGE, "sleep", "3600", + ], + capture_output=True, text=True, check=False, + ) + self.sidecar_names.append(sidecar) + + def _containers_named_like(self) -> list[str]: + """All running/stopped containers whose names start with + the bottle's slug — both agent + sidecars.""" + r = subprocess.run( + [ + "docker", "ps", "-a", + "--filter", f"name={self.agent_name}", + "--format", "{{.Names}}", + ], + capture_output=True, text=True, check=False, + ) + return [line for line in (r.stdout or "").splitlines() if line] + + def _networks_named_like(self) -> list[str]: + r = subprocess.run( + [ + "docker", "network", "ls", + "--filter", f"name={self.slug}", + "--format", "{{.Name}}", + ], + capture_output=True, text=True, check=False, + ) + return [line for line in (r.stdout or "").splitlines() if line] + + def test_apply_writes_dockerfile_and_tears_down(self): + self._bring_up_fake_bottle() + self.assertIn(self.agent_name, self._containers_named_like()) + + new_dockerfile = "FROM python:3.13\nRUN apk add ripgrep\n" + before, after = apply_capability_change(self.slug, new_dockerfile) + + # Before is the repo Dockerfile (no prior per-bottle override); + # after is what we passed in. + self.assertIn("FROM ", before) + self.assertEqual(new_dockerfile, after) + + # Per-bottle Dockerfile written on the host. + self.assertEqual( + new_dockerfile, + bottle_state.per_bottle_dockerfile(self.slug), + ) + + # Agent + sidecars gone. + self.assertEqual([], self._containers_named_like()) + # Networks removed (matching the slug substring). + nets = self._networks_named_like() + self.assertEqual([], nets) + # Mark them as already cleaned so tearDown is idempotent. + self.internal_net = "" + self.egress_net = "" + self.sidecar_names = [] + + def test_transcript_snapshot_captured(self): + self._bring_up_fake_bottle() + apply_capability_change(self.slug, "FROM x\n") + snap = bottle_state.transcript_snapshot_dir(self.slug) + self.assertTrue(snap.is_dir(), f"transcript snapshot dir {snap} missing") + # docker cp :/home/node/.claude produces + # /.claude/sessions.json (it preserves the source dir name + # inside the destination if the destination already exists). + # Walk the snapshot looking for the marker contents. + marker_found = False + for path in snap.rglob("sessions.json"): + if "transcript-marker" in path.read_text(): + marker_found = True + break + self.assertTrue(marker_found, f"marker not found under {snap}") + # Cleaned up by apply already. + self.internal_net = "" + self.egress_net = "" + self.sidecar_names = [] + + def test_subsequent_apply_uses_per_bottle_dockerfile_for_before(self): + # First change: before is repo's Dockerfile. + self._bring_up_fake_bottle() + first_before, _ = apply_capability_change(self.slug, "FROM v1\n") + self.assertIn("FROM ", first_before) + + # Second change: before is "FROM v1\n" (the per-bottle override + # from the first change), proving the state persists across + # rebuilds. + self._bring_up_fake_bottle() + second_before, second_after = apply_capability_change(self.slug, "FROM v2\n") + self.assertEqual("FROM v1\n", second_before) + self.assertEqual("FROM v2\n", second_after) + self.internal_net = "" + self.egress_net = "" + self.sidecar_names = [] + + def test_teardown_idempotent_when_nothing_running(self): + # No bottle ever brought up — teardown still doesn't raise. + apply_capability_change(self.slug, "FROM x\n") + self.assertEqual( + "FROM x\n", + bottle_state.per_bottle_dockerfile(self.slug), + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/unit/test_bottle_state.py b/tests/unit/test_bottle_state.py new file mode 100644 index 0000000..4fe2287 --- /dev/null +++ b/tests/unit/test_bottle_state.py @@ -0,0 +1,166 @@ +"""Unit: per-bottle state helpers (PRD 0016 Phase 1) + identity + +launch metadata.""" + +import re +import tempfile +import unittest +from pathlib import Path + +from claude_bottle import supervise +from claude_bottle.backend.docker import bottle_state +from claude_bottle.backend.docker.bottle_state import ( + BottleMetadata, + read_metadata, + write_metadata, +) + + +class _FakeHomeMixin: + def _setup_fake_home(self): + self._tmp = tempfile.TemporaryDirectory(prefix="bottle-state-test.") + original = supervise.claude_bottle_root + + def fake_root() -> Path: + return Path(self._tmp.name) / ".claude-bottle" + + supervise.claude_bottle_root = fake_root # type: ignore[assignment] + self._restore = lambda: setattr(supervise, "claude_bottle_root", original) + + def _teardown_fake_home(self): + self._restore() + self._tmp.cleanup() + + +class TestPerBottleDockerfile(_FakeHomeMixin, unittest.TestCase): + def setUp(self): + self._setup_fake_home() + + def tearDown(self): + self._teardown_fake_home() + + def test_returns_none_when_absent(self): + self.assertIsNone(bottle_state.per_bottle_dockerfile("dev")) + + def test_write_then_read_roundtrip(self): + bottle_state.write_per_bottle_dockerfile( + "dev", "FROM python:3.13\nRUN apk add ripgrep\n", + ) + self.assertEqual( + "FROM python:3.13\nRUN apk add ripgrep\n", + bottle_state.per_bottle_dockerfile("dev"), + ) + + def test_isolated_per_slug(self): + bottle_state.write_per_bottle_dockerfile("dev", "FROM dev\n") + bottle_state.write_per_bottle_dockerfile("api", "FROM api\n") + self.assertEqual("FROM dev\n", bottle_state.per_bottle_dockerfile("dev")) + self.assertEqual("FROM api\n", bottle_state.per_bottle_dockerfile("api")) + + def test_dockerfile_path_under_state_dir(self): + path = bottle_state.per_bottle_dockerfile_path("dev") + self.assertTrue(str(path).endswith("/.claude-bottle/state/dev/Dockerfile")) + + def test_image_tag_unique_per_slug(self): + self.assertEqual( + "claude-bottle-rebuilt-dev:latest", + bottle_state.per_bottle_image_tag("dev"), + ) + self.assertNotEqual( + bottle_state.per_bottle_image_tag("dev"), + bottle_state.per_bottle_image_tag("api"), + ) + + def test_transcript_dir_under_state_dir(self): + path = bottle_state.transcript_snapshot_dir("dev") + self.assertTrue(str(path).endswith("/.claude-bottle/state/dev/transcript")) + + +class TestBottleIdentity(unittest.TestCase): + """bottle_identity(agent_name) — PRD 0016 follow-up. + + Every call mints a fresh identity with a random 5-char suffix + so multiple instances of the same agent can run in parallel + without container name collisions. The slug-prefix is for + readability; the suffix is for uniqueness. To continue an + existing bottle, use the recorded identity via + `cli.py resume `, not this function.""" + + def test_format_is_slug_dash_5_alnum(self): + identity = bottle_state.bottle_identity("dev") + self.assertTrue(identity.startswith("dev-")) + suffix = identity[len("dev-"):] + self.assertEqual(5, len(suffix)) + self.assertTrue( + re.fullmatch(r"[a-z0-9]+", suffix), + f"suffix {suffix!r} must be lowercase base36", + ) + + def test_two_calls_yield_different_identities(self): + # 5-char base36 gives ~60M combinations; collision in two + # calls is astronomically unlikely. If this ever flakes it's + # almost certainly a regression, not a bad-luck collision. + a = bottle_state.bottle_identity("dev") + b = bottle_state.bottle_identity("dev") + self.assertNotEqual(a, b) + + def test_different_agents_get_different_prefixes(self): + a = bottle_state.bottle_identity("dev") + b = bottle_state.bottle_identity("api") + self.assertTrue(a.startswith("dev-")) + self.assertTrue(b.startswith("api-")) + + def test_agent_name_slugified(self): + identity = bottle_state.bottle_identity("My Agent") + self.assertTrue(identity.startswith("my-agent-")) + + +class TestBottleMetadata(_FakeHomeMixin, unittest.TestCase): + def setUp(self): + self._setup_fake_home() + + def tearDown(self): + self._teardown_fake_home() + + def test_read_missing_returns_none(self): + self.assertIsNone(read_metadata("does-not-exist")) + + def test_write_then_read_roundtrip(self): + meta = BottleMetadata( + identity="dev-a4f8c", + agent_name="dev", + cwd="/proj/A", + copy_cwd=True, + started_at="2026-05-25T12:00:00+00:00", + ) + write_metadata(meta) + loaded = read_metadata("dev-a4f8c") + self.assertEqual(meta, loaded) + + def test_metadata_lives_under_state_dir(self): + meta = BottleMetadata( + identity="dev-x", agent_name="dev", + cwd="", copy_cwd=False, started_at="t", + ) + path = write_metadata(meta) + self.assertTrue( + str(path).endswith("/.claude-bottle/state/dev-x/metadata.json"), + ) + + def test_overwriting_metadata_updates_timestamp(self): + # `resume` re-writes metadata with a fresh started_at; + # everything else stays the same. + write_metadata(BottleMetadata( + identity="dev-y", agent_name="dev", + cwd="/proj/A", copy_cwd=True, started_at="t1", + )) + write_metadata(BottleMetadata( + identity="dev-y", agent_name="dev", + cwd="/proj/A", copy_cwd=True, started_at="t2", + )) + loaded = read_metadata("dev-y") + assert loaded is not None + self.assertEqual("t2", loaded.started_at) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/unit/test_capability_apply.py b/tests/unit/test_capability_apply.py new file mode 100644 index 0000000..e3fca9f --- /dev/null +++ b/tests/unit/test_capability_apply.py @@ -0,0 +1,123 @@ +"""Unit: capability_apply helpers (PRD 0016 Phase 2). + +docker cp / exec / rm / network rm paths are covered by the +integration test in Phase 4. Here we cover: + - fetch_current_dockerfile fallback chain (per-bottle → repo) + - apply_capability_change writes the per-bottle Dockerfile and + returns the correct (before, after). + - apply_capability_change rejects empty input. +""" + +import tempfile +import unittest +from pathlib import Path + +from claude_bottle import supervise +from claude_bottle.backend.docker import bottle_state, capability_apply +from claude_bottle.backend.docker.capability_apply import ( + CapabilityApplyError, + apply_capability_change, + fetch_current_dockerfile, +) + + +class _FakeHomeMixin: + def _setup_fake_home(self): + self._tmp = tempfile.TemporaryDirectory(prefix="cap-apply-test.") + original = supervise.claude_bottle_root + + def fake_root() -> Path: + return Path(self._tmp.name) / ".claude-bottle" + + supervise.claude_bottle_root = fake_root # type: ignore[assignment] + self._restore = lambda: setattr(supervise, "claude_bottle_root", original) + + def _teardown_fake_home(self): + self._restore() + self._tmp.cleanup() + + +class TestFetchCurrentDockerfile(_FakeHomeMixin, unittest.TestCase): + def setUp(self): + self._setup_fake_home() + + def tearDown(self): + self._teardown_fake_home() + + def test_returns_per_bottle_dockerfile_when_present(self): + bottle_state.write_per_bottle_dockerfile("dev", "FROM rebuilt\n") + self.assertEqual("FROM rebuilt\n", fetch_current_dockerfile("dev")) + + def test_falls_back_to_repo_dockerfile_when_no_override(self): + # The repo's Dockerfile actually exists; the test just checks + # we get its content (non-empty) when no per-bottle override + # is set. + content = fetch_current_dockerfile("dev-no-override") + self.assertIn("FROM ", content) + + +class TestApplyCapabilityChange(_FakeHomeMixin, unittest.TestCase): + def setUp(self): + self._setup_fake_home() + # Stub out the docker-dependent helpers. The orchestrator's + # job is to sequence write + snapshot + push + teardown; we + # validate that sequence here, not the docker primitives. + self._calls: list[str] = [] + self._orig_snapshot = capability_apply._snapshot_transcript + self._orig_push = capability_apply._push_working_tree + self._orig_teardown = capability_apply._teardown_bottle + + def stub_snapshot(slug): + self._calls.append(f"snapshot:{slug}") + + def stub_push(slug): + self._calls.append(f"push:{slug}") + + def stub_teardown(slug): + self._calls.append(f"teardown:{slug}") + + capability_apply._snapshot_transcript = stub_snapshot # type: ignore[assignment] + capability_apply._push_working_tree = stub_push # type: ignore[assignment] + capability_apply._teardown_bottle = stub_teardown # type: ignore[assignment] + + def tearDown(self): + capability_apply._snapshot_transcript = self._orig_snapshot # type: ignore[assignment] + capability_apply._push_working_tree = self._orig_push # type: ignore[assignment] + capability_apply._teardown_bottle = self._orig_teardown # type: ignore[assignment] + self._teardown_fake_home() + + def test_writes_per_bottle_dockerfile_and_returns_before_after(self): + bottle_state.write_per_bottle_dockerfile("dev", "FROM old\n") + before, after = apply_capability_change("dev", "FROM new\nRUN apk add ripgrep\n") + self.assertEqual("FROM old\n", before) + self.assertEqual("FROM new\nRUN apk add ripgrep\n", after) + self.assertEqual( + "FROM new\nRUN apk add ripgrep\n", + bottle_state.per_bottle_dockerfile("dev"), + ) + + def test_calls_snapshot_push_teardown_in_order(self): + apply_capability_change("dev", "FROM new\n") + # Snapshot + push must happen BEFORE write_per_bottle_dockerfile + # (so they capture pre-rebuild state) and BEFORE teardown (so + # the agent container still exists to docker exec / cp from). + # Teardown must be last. + self.assertEqual( + ["snapshot:dev", "push:dev", "teardown:dev"], + self._calls, + ) + + def test_first_change_falls_back_to_repo_dockerfile_for_before(self): + # No per-bottle override yet — before-diff comes from the + # repo's Dockerfile. + before, after = apply_capability_change("dev-fresh", "FROM new\n") + self.assertIn("FROM ", before) + self.assertEqual("FROM new\n", after) + + def test_empty_dockerfile_rejected(self): + with self.assertRaises(CapabilityApplyError): + apply_capability_change("dev", " \n\t\n") + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/unit/test_dashboard.py b/tests/unit/test_dashboard.py index 6e00994..b6f1911 100644 --- a/tests/unit/test_dashboard.py +++ b/tests/unit/test_dashboard.py @@ -16,6 +16,7 @@ from datetime import datetime, timezone from pathlib import Path from claude_bottle import supervise +from claude_bottle.backend.docker.capability_apply import CapabilityApplyError from claude_bottle.backend.docker.cred_proxy_apply import CredProxyApplyError from claude_bottle.backend.docker.pipelock_apply import PipelockApplyError from claude_bottle.cli import dashboard @@ -118,6 +119,7 @@ class TestApproveReject(_FakeHomeMixin, unittest.TestCase): self._setup_fake_home() self._original_apply_routes = dashboard.apply_routes_change self._original_apply_allowlist = dashboard.apply_allowlist_change + self._original_apply_capability = dashboard.apply_capability_change # Default stubs: succeed with deterministic before/after so the # audit log shows a non-empty diff. dashboard.apply_routes_change = lambda slug, content: ( @@ -126,10 +128,14 @@ class TestApproveReject(_FakeHomeMixin, unittest.TestCase): dashboard.apply_allowlist_change = lambda slug, content: ( "old.example\n", content, ) + dashboard.apply_capability_change = lambda slug, content: ( + "FROM old\n", content, + ) def tearDown(self): dashboard.apply_routes_change = self._original_apply_routes dashboard.apply_allowlist_change = self._original_apply_allowlist + dashboard.apply_capability_change = self._original_apply_capability self._teardown_fake_home() def _enqueue(self, tool: str = TOOL_CRED_PROXY_BLOCK): @@ -333,6 +339,75 @@ class TestPipelockApplyWiring(_FakeHomeMixin, unittest.TestCase): self.assertIn("+new.example", entries[0].diff) +class TestCapabilityApplyWiring(_FakeHomeMixin, unittest.TestCase): + """PRD 0016 Phase 3: approve() on a capability-block proposal + calls apply_capability_change, archives the proposal afterward + (sidecar is gone so it can't archive itself), and writes no + audit entry (capability-block has none per PRD 0013).""" + + def setUp(self): + self._setup_fake_home() + self._original = dashboard.apply_capability_change + + def tearDown(self): + dashboard.apply_capability_change = self._original + self._teardown_fake_home() + + def _enqueue_capability(self, proposed: str = "FROM python:3.13\nRUN apk add ripgrep\n"): + p = Proposal.new( + bottle_slug="dev", tool=TOOL_CAPABILITY_BLOCK, + proposed_file=proposed, + justification="need ripgrep", + current_file_hash=sha256_hex(proposed), + now=FIXED, + ) + qdir = supervise.queue_dir_for_slug("dev") + qdir.mkdir(parents=True, exist_ok=True) + supervise.write_proposal(qdir, p) + return dashboard.QueuedProposal(proposal=p, queue_dir=qdir) + + def test_capability_block_calls_apply_with_proposed_file(self): + calls = [] + dashboard.apply_capability_change = lambda slug, content: ( + calls.append((slug, content)) or ("FROM old\n", content) + ) + qp = self._enqueue_capability("FROM bookworm\n") + dashboard.approve(qp) + self.assertEqual([("dev", "FROM bookworm\n")], calls) + + def test_apply_failure_blocks_response_and_keeps_pending(self): + dashboard.apply_capability_change = lambda slug, content: (_ for _ in ()).throw( + CapabilityApplyError("teardown failed") + ) + qp = self._enqueue_capability() + with self.assertRaises(CapabilityApplyError): + dashboard.approve(qp) + self.assertEqual( + [qp.proposal.id], + [p.id for p in supervise.list_pending_proposals(qp.queue_dir)], + ) + + def test_no_audit_log_for_capability(self): + dashboard.apply_capability_change = lambda slug, content: ("FROM old\n", content) + qp = self._enqueue_capability() + dashboard.approve(qp) + # capability-block has no audit log per PRD 0013 — its record + # lives in the per-bottle Dockerfile + transcript state. + self.assertEqual([], read_audit_entries("cred-proxy", "dev")) + self.assertEqual([], read_audit_entries("pipelock", "dev")) + + def test_proposal_archived_after_apply(self): + dashboard.apply_capability_change = lambda slug, content: ("FROM old\n", content) + qp = self._enqueue_capability() + dashboard.approve(qp) + # Sidecar would normally archive after delivering the response, + # but it's gone by then. The dashboard archives so + # discover_pending stops surfacing the resolved proposal. + self.assertEqual([], supervise.list_pending_proposals(qp.queue_dir)) + processed = list((qp.queue_dir / "processed").glob("*.json")) + self.assertEqual(2, len(processed)) + + class TestOperatorEditRoutes(_FakeHomeMixin, unittest.TestCase): """PRD 0014 Phase 4: operator-initiated routes edit (not gated on a pending proposal)."""