Merge pull request 'PRD 0016: capability block remediation' (#22) from prd-0016-capability-block into main
test / unit (push) Successful in 18s
test / integration (push) Successful in 1m34s

This commit was merged in pull request #22.
This commit is contained in:
2026-05-25 06:14:39 -04:00
15 changed files with 1222 additions and 15 deletions
+5
View File
@@ -53,6 +53,11 @@ class BottleSpec:
agent_name: str
copy_cwd: bool
user_cwd: str
# PRD 0016 follow-up: when set, the backend's prepare step uses
# this identity instead of minting a fresh one — the resume path
# (`cli.py resume <identity>`) sets this to continue an existing
# bottle's state. Empty string for a fresh `start`.
identity: str = ""
@dataclass(frozen=True)
@@ -44,6 +44,11 @@ class DockerBottlePlan(BottlePlan):
image: str
derived_image: str # "" -> no derived image
runtime_image: str # image actually launched (derived or base)
# Absolute path to the Dockerfile that builds `image`. Empty means
# use the repo's default Dockerfile. Populated to a per-bottle
# state file (~/.claude-bottle/state/<slug>/Dockerfile) after a
# capability-block remediation (PRD 0016).
dockerfile_path: str
env_file: Path # docker --env-file: NAME=VALUE literals
# name -> value for vars forwarded into the docker-run child process
# via subprocess env (so values never land on argv or in a file).
@@ -89,6 +94,11 @@ class DockerBottlePlan(BottlePlan):
print(file=sys.stderr)
info(f"agent : {spec.agent_name}")
info(f"image : {self.image}")
if self.dockerfile_path:
info(
f"dockerfile : {self.dockerfile_path} "
f"(per-bottle override from PRD 0016 capability rebuild)"
)
if self.derived_image:
info(
f"cwd : {spec.user_cwd} -> /home/node/workspace "
@@ -0,0 +1,170 @@
"""Per-bottle persistent state (PRD 0016).
Holds the per-bottle Dockerfile override that capability-block
remediation writes, the transcript snapshot the state-preservation
helper saves before teardown, and the launch metadata that lets
`cli.py resume <identity>` reconstruct a bottle's spec. State
lives at:
~/.claude-bottle/state/<identity>/
metadata.json — agent_name + cwd + started_at (for resume)
Dockerfile — per-bottle override (absent → use repo's)
transcript/ — last snapshotted agent state (best-effort)
When the per-bottle Dockerfile is present, the launch step builds
the agent image with a per-bottle tag (claude-bottle-rebuilt-<id>)
from this file rather than the repo's. The build context is still
the repo root so the Dockerfile can COPY claude_bottle source files
the same way the original does.
Identity model:
- Every `cli.py start <agent>` mints a fresh identity via
`bottle_identity(agent_name)`: slug-prefix for readability plus a
5-char random suffix for parallel-safe uniqueness. The metadata
written at launch time pins (agent_name, cwd) to that identity.
- `cli.py resume <identity>` reads the metadata and re-launches a
bottle pinned to the same identity, picking up any per-bottle
Dockerfile and transcript snapshot.
"""
from __future__ import annotations
import dataclasses
import json
import secrets
import string
from dataclasses import dataclass
from pathlib import Path
from ... import supervise as _supervise
from . import util as docker_mod
# Directory layout: ~/.claude-bottle/state/<identity>/...
_STATE_SUBDIR = "state"
_PER_BOTTLE_DOCKERFILE_NAME = "Dockerfile"
_TRANSCRIPT_SUBDIR = "transcript"
_METADATA_NAME = "metadata.json"
# 5 chars of base36 alphabet ≈ 60M combinations. Plenty for human
# operators starting bottles by hand; collision-free in practice.
_RANDOM_SUFFIX_LEN = 5
_SUFFIX_ALPHABET = string.ascii_lowercase + string.digits
def bottle_identity(agent_name: str) -> str:
"""Mint a fresh per-launch bottle identity. The slug-prefix is
`slugify(agent_name)` for readability; the suffix is 5 random
base36 chars so two simultaneous `start <agent>` invocations
don't collide on container/network names.
Every call produces a different identity (non-deterministic).
To continue an existing bottle's state, use the recorded
identity from BottleMetadata via `cli.py resume <identity>`,
not this function."""
slug = docker_mod.slugify(agent_name)
suffix = "".join(secrets.choice(_SUFFIX_ALPHABET) for _ in range(_RANDOM_SUFFIX_LEN))
return f"{slug}-{suffix}"
@dataclass(frozen=True)
class BottleMetadata:
"""Persistent record of how a bottle was launched, written at
start time and read by `cli.py resume`. Lives at
~/.claude-bottle/state/<identity>/metadata.json."""
identity: str
agent_name: str
cwd: str # empty string when --cwd was not passed
copy_cwd: bool
started_at: str # ISO 8601 UTC
def metadata_path(identity: str) -> Path:
return bottle_state_dir(identity) / _METADATA_NAME
def write_metadata(metadata: BottleMetadata) -> Path:
"""Persist `metadata` to ~/.claude-bottle/state/<identity>/metadata.json.
Mode 0o644 — no secrets, just (agent_name, cwd, timestamp)."""
path = metadata_path(metadata.identity)
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(json.dumps(dataclasses.asdict(metadata), indent=2) + "\n")
path.chmod(0o644)
return path
def read_metadata(identity: str) -> BottleMetadata | None:
"""Return the metadata for `identity`, or None if no state has
been recorded for it. Used by `cli.py resume` to reconstruct
the launch spec."""
path = metadata_path(identity)
if not path.is_file():
return None
raw = json.loads(path.read_text())
if not isinstance(raw, dict):
return None
return BottleMetadata(
identity=str(raw.get("identity", identity)),
agent_name=str(raw.get("agent_name", "")),
cwd=str(raw.get("cwd", "")),
copy_cwd=bool(raw.get("copy_cwd", False)),
started_at=str(raw.get("started_at", "")),
)
def bottle_state_dir(identity: str) -> Path:
"""Per-bottle state directory on the host. Created lazily by the
write helpers; readers tolerate its absence."""
return _supervise.claude_bottle_root() / _STATE_SUBDIR / identity
def per_bottle_dockerfile_path(identity: str) -> Path:
return bottle_state_dir(identity) / _PER_BOTTLE_DOCKERFILE_NAME
def per_bottle_dockerfile(identity: str) -> str | None:
"""Return the per-bottle Dockerfile content if present, else
None. None means: use the repo's Dockerfile (the original
pre-capability-block behavior)."""
p = per_bottle_dockerfile_path(identity)
if p.is_file():
return p.read_text()
return None
def write_per_bottle_dockerfile(identity: str, content: str) -> Path:
p = per_bottle_dockerfile_path(identity)
p.parent.mkdir(parents=True, exist_ok=True)
p.write_text(content)
p.chmod(0o644)
return p
def per_bottle_image_tag(identity: str) -> str:
"""Image tag for a rebuilt bottle. Distinct from the base
claude-bottle:latest so per-bottle rebuilds don't collide in
the docker image cache."""
return f"claude-bottle-rebuilt-{identity}:latest"
def transcript_snapshot_dir(identity: str) -> Path:
"""Where capability_apply stashes the agent's transcript before
teardown, so the next `cli.py start <agent>` can offer to
resume from it."""
return bottle_state_dir(identity) / _TRANSCRIPT_SUBDIR
__all__ = [
"BottleMetadata",
"bottle_identity",
"bottle_state_dir",
"metadata_path",
"per_bottle_dockerfile",
"per_bottle_dockerfile_path",
"per_bottle_image_tag",
"read_metadata",
"transcript_snapshot_dir",
"write_metadata",
"write_per_bottle_dockerfile",
]
@@ -0,0 +1,210 @@
"""capability_apply — host-side orchestrator for capability-block
remediation (PRD 0016).
On approval of a capability-block proposal, the dashboard calls
apply_capability_change(slug, new_dockerfile) which:
1. Snapshots the agent's transcript dir to
~/.claude-bottle/state/<slug>/transcript/ (best-effort).
2. Pushes the agent's working tree via `git push` (best-effort —
no upstream / no commits / no git repo all skip with a log).
3. Writes the new Dockerfile to
~/.claude-bottle/state/<slug>/Dockerfile (PRD 0016 Phase 1
state). The next `cli.py start <agent>` picks it up.
4. Force-removes the agent container + all sidecars + the
per-bottle networks. Idempotent — missing resources are not
errors.
Returns (before, after) Dockerfile contents so the dashboard can
record / render the diff. (capability-block has no audit log per
PRD 0013 — the per-bottle Dockerfile state is its own record.)
This is "fire-and-forget" from the agent's perspective: by the time
the dashboard writes the response file the supervise sidecar is
gone, so the agent's tool call connection drops without ever
receiving the response. The replacement agent (next manual
`cli.py start`) sees the new Dockerfile and starts from there.
v1 does not auto-relaunch — see PRD 0016's capability-block return
semantics open question.
"""
from __future__ import annotations
import os
import shutil
import subprocess
from pathlib import Path
from ...log import info, warn
from .bottle_state import (
per_bottle_dockerfile,
per_bottle_dockerfile_path,
transcript_snapshot_dir,
write_per_bottle_dockerfile,
)
# Agent home inside the container (per the repo Dockerfile's
# `USER node` + `WORKDIR /home/node`). Used to locate the transcript
# dir + the workspace dir for git push.
_AGENT_HOME_IN_CONTAINER = "/home/node"
_AGENT_TRANSCRIPT_IN_CONTAINER = f"{_AGENT_HOME_IN_CONTAINER}/.claude"
_AGENT_WORKSPACE_IN_CONTAINER = f"{_AGENT_HOME_IN_CONTAINER}/workspace"
# Per-bottle resource name patterns (mirroring prepare.py /
# the various sidecar modules). The agent container's name is the
# slug with no infix; sidecars carry an infix like cred-proxy.
def _agent_container_name(slug: str) -> str:
return f"claude-bottle-{slug}"
def _per_bottle_container_names(slug: str) -> list[str]:
"""All container names that belong to this bottle. Missing
containers are silently skipped by the teardown helper, so it's
fine to include names that don't exist for a given bottle."""
return [
_agent_container_name(slug),
f"claude-bottle-cred-proxy-{slug}",
f"claude-bottle-pipelock-{slug}",
f"claude-bottle-git-gate-{slug}",
f"claude-bottle-supervise-{slug}",
]
def _per_bottle_network_names(slug: str) -> list[str]:
return [
f"claude-bottle-net-{slug}",
f"claude-bottle-egress-{slug}",
]
class CapabilityApplyError(RuntimeError):
"""Raised when the apply fails in a way that should keep the
proposal pending (so the operator can retry). Best-effort
failures (transcript snapshot, git push) do not raise — they
just log and proceed."""
# --- Public helpers --------------------------------------------------------
def fetch_current_dockerfile(slug: str) -> str:
"""Return the Dockerfile content the next `cli.py start <agent>`
would use for this bottle. If a per-bottle override exists, that
one; otherwise the repo's Dockerfile.
Used by the operator-edit verb to show the current source of
truth, and by apply_capability_change for the before-diff."""
override = per_bottle_dockerfile(slug)
if override is not None:
return override
repo_dockerfile = _repo_dockerfile_path()
if repo_dockerfile.is_file():
return repo_dockerfile.read_text()
raise CapabilityApplyError(
f"no per-bottle Dockerfile for {slug} and no repo Dockerfile at "
f"{repo_dockerfile}"
)
def apply_capability_change(slug: str, new_dockerfile: str) -> tuple[str, str]:
"""End-to-end capability-block remediation. See module docstring
for the sequence. Returns (before, after) Dockerfile content."""
if not new_dockerfile.strip():
raise CapabilityApplyError("proposed Dockerfile is empty")
before = fetch_current_dockerfile(slug)
_snapshot_transcript(slug)
_push_working_tree(slug)
write_per_bottle_dockerfile(slug, new_dockerfile)
_teardown_bottle(slug)
return before, new_dockerfile
# --- Internals -------------------------------------------------------------
def _repo_dockerfile_path() -> Path:
"""Path to the repo's Dockerfile (one dir above this module's
package root). Resolved at call time so the path is correct
regardless of where this module is imported from."""
# claude_bottle/backend/docker/capability_apply.py -> repo root
return Path(__file__).resolve().parent.parent.parent.parent / "Dockerfile"
def _snapshot_transcript(slug: str) -> None:
"""`docker cp` /home/node/.claude out of the agent container into
~/.claude-bottle/state/<slug>/transcript/. Best-effort: missing
container, missing dir, or cp error all log a warning and return.
The transcript is what `claude --resume` reads to pick up where
the agent left off."""
container = _agent_container_name(slug)
dest = transcript_snapshot_dir(slug)
if dest.exists():
# Remove any prior snapshot so the new one is a clean copy.
shutil.rmtree(dest, ignore_errors=True)
dest.parent.mkdir(parents=True, exist_ok=True)
r = subprocess.run(
["docker", "cp", f"{container}:{_AGENT_TRANSCRIPT_IN_CONTAINER}", str(dest)],
capture_output=True, text=True, check=False,
)
if r.returncode != 0:
warn(
f"capability-apply: transcript snapshot skipped "
f"({(r.stderr or '').strip() or 'no transcript dir in container?'})"
)
return
info(f"capability-apply: transcript snapshotted to {dest}")
def _push_working_tree(slug: str) -> None:
"""`docker exec <agent> git push` from /home/node/workspace.
Best-effort: not-a-git-repo, no upstream, nothing-to-push, no
network all log a warning and return. The replacement bottle
will pick up whatever's actually upstream."""
container = _agent_container_name(slug)
r = subprocess.run(
[
"docker", "exec", container, "sh", "-c",
f"cd {_AGENT_WORKSPACE_IN_CONTAINER} && "
f"git rev-parse --is-inside-work-tree >/dev/null 2>&1 && "
f"git push origin HEAD 2>&1 || true",
],
capture_output=True, text=True, check=False,
)
if r.returncode != 0:
warn(
f"capability-apply: git push skipped "
f"({(r.stderr or '').strip() or 'docker exec failed'})"
)
return
output = (r.stdout or "").strip()
if output:
info(f"capability-apply: git push: {output}")
else:
info("capability-apply: git push ran (no output — likely not a git workspace)")
def _teardown_bottle(slug: str) -> None:
"""Force-remove all per-bottle docker resources. Idempotent —
`docker rm -f` / `docker network rm` silently ignore missing
names, so this can be called even mid-rebuild."""
info(f"capability-apply: tearing down bottle {slug}")
for name in _per_bottle_container_names(slug):
subprocess.run(
["docker", "rm", "-f", name],
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False,
)
for net in _per_bottle_network_names(slug):
subprocess.run(
["docker", "network", "rm", net],
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False,
)
__all__ = [
"CapabilityApplyError",
"apply_capability_change",
"fetch_current_dockerfile",
]
+4 -1
View File
@@ -66,7 +66,10 @@ def launch(
pass
try:
docker_mod.build_image(plan.image, _REPO_DIR)
docker_mod.build_image(
plan.image, _REPO_DIR,
dockerfile=plan.dockerfile_path,
)
if plan.derived_image:
docker_mod.build_image_with_cwd(
plan.derived_image, plan.image, plan.spec.user_cwd
+37 -2
View File
@@ -11,6 +11,7 @@ via the base class's `prepare` template before this is called.
from __future__ import annotations
import os
from datetime import datetime, timezone
from pathlib import Path
from ... import pipelock
@@ -26,6 +27,14 @@ from .cred_proxy import (
cred_proxy_url,
)
from .git_gate import DockerGitGate, git_gate_container_name
from .bottle_state import (
BottleMetadata,
bottle_identity,
per_bottle_dockerfile,
per_bottle_dockerfile_path,
per_bottle_image_tag,
write_metadata,
)
from .pipelock import DockerPipelockProxy, pipelock_container_name
from .supervise import DockerSupervise, supervise_container_name
@@ -48,9 +57,34 @@ def resolve_plan(
agent = manifest.agents[spec.agent_name]
bottle = manifest.bottle_for(spec.agent_name)
slug = docker_mod.slugify(spec.agent_name)
# PRD 0016 follow-up: identity, not bare slug. A fresh `start`
# mints a random-suffixed identity (so parallel runs of the same
# agent in the same cwd don't collide on container/network
# names); a `resume` passes the recorded identity in via
# spec.identity to continue an existing bottle's state.
slug = spec.identity or bottle_identity(spec.agent_name)
# Record the launch metadata so `cli.py resume <identity>` can
# reconstruct the spec. Idempotent — re-writes on resume with a
# refreshed started_at.
write_metadata(BottleMetadata(
identity=slug,
agent_name=spec.agent_name,
cwd=spec.user_cwd if spec.copy_cwd else "",
copy_cwd=spec.copy_cwd,
started_at=datetime.now(timezone.utc).isoformat(),
))
image = os.environ.get("CLAUDE_BOTTLE_IMAGE", "claude-bottle:latest")
# PRD 0016 capability-block: if a per-bottle Dockerfile has been
# written (via apply_capability_change), the base image becomes
# per_bottle_image_tag(slug) built from that file. --cwd still
# layers a derived image on top.
dockerfile_path = ""
if per_bottle_dockerfile(slug) is not None:
image_default = per_bottle_image_tag(slug)
dockerfile_path = str(per_bottle_dockerfile_path(slug))
else:
image_default = "claude-bottle:latest"
image = os.environ.get("CLAUDE_BOTTLE_IMAGE", image_default)
derived_image = ""
runtime_image = image
if spec.copy_cwd:
@@ -184,6 +218,7 @@ def resolve_plan(
image=image,
derived_image=derived_image,
runtime_image=runtime_image,
dockerfile_path=dockerfile_path,
env_file=env_file,
forwarded_env=forwarded_env,
prompt_file=prompt_file,
+4 -1
View File
@@ -1,6 +1,6 @@
"""Main CLI dispatcher.
Commands: cleanup, dashboard, edit, info, init, list, start
Commands: cleanup, dashboard, edit, info, init, list, resume, start
"""
from __future__ import annotations
@@ -15,6 +15,7 @@ from .dashboard import cmd_dashboard
from .edit import cmd_edit
from .info import cmd_info
from .init import cmd_init
from .resume import cmd_resume
from .start import cmd_start
cmd_list = _list_mod.cmd_list
@@ -26,6 +27,7 @@ COMMANDS = {
"info": cmd_info,
"init": cmd_init,
"list": cmd_list,
"resume": cmd_resume,
"start": cmd_start,
}
@@ -39,6 +41,7 @@ def usage() -> None:
sys.stderr.write(" info print env, skills, and prompt details for a named agent\n")
sys.stderr.write(" init interactively create a new agent and add it to claude-bottle.json\n")
sys.stderr.write(" list list available agents or active containers\n")
sys.stderr.write(" resume re-launch a bottle by its identity (continues state from PRD 0016)\n")
sys.stderr.write(" start boot a container for a named agent and attach an interactive session\n\n")
sys.stderr.write(f"Run '{PROG} <command> --help' for command-specific usage.\n")
+28 -6
View File
@@ -22,6 +22,10 @@ from datetime import datetime, timezone
from pathlib import Path
from .. import supervise as _supervise
from ..backend.docker.capability_apply import (
CapabilityApplyError,
apply_capability_change,
)
from ..backend.docker.cred_proxy_apply import (
CredProxyApplyError,
apply_routes_change,
@@ -45,6 +49,7 @@ from ..supervise import (
TOOL_CAPABILITY_BLOCK,
TOOL_CRED_PROXY_BLOCK,
TOOL_PIPELOCK_BLOCK,
archive_proposal,
list_pending_proposals,
render_diff,
write_audit_entry,
@@ -56,7 +61,7 @@ from ._common import PROG
# Errors any remediation engine may raise. Caught by the TUI key
# handlers and surfaced in the status line so a failed apply keeps
# the proposal pending rather than crashing curses.
ApplyError = (CredProxyApplyError, PipelockApplyError)
ApplyError = (CredProxyApplyError, PipelockApplyError, CapabilityApplyError)
# --- Discovery -------------------------------------------------------------
@@ -107,6 +112,16 @@ def discover_pipelock_slugs() -> list[str]:
return _discover_sidecar_slugs("claude-bottle-pipelock-")
def _approval_status(qp: QueuedProposal, verb: str) -> str:
"""Status-line text after a successful approval. For capability-
block, append the `resume <identity>` hint so the operator can
bring the rebuilt bottle back up with one copy-paste."""
base = f"{verb} {qp.proposal.tool} for [{qp.proposal.bottle_slug}]"
if qp.proposal.tool == TOOL_CAPABILITY_BLOCK:
return f"{base}; resume: ./cli.py resume {qp.proposal.bottle_slug}"
return base
def discover_pending() -> list[QueuedProposal]:
"""Walk ~/.claude-bottle/queue/* and collect pending proposals
from every bottle's queue. Sorted by arrival time across the
@@ -155,9 +170,10 @@ def approve(
diff_before, diff_after = apply_allowlist_change(
qp.proposal.bottle_slug, file_to_apply,
)
# capability-block remediation lands in PRD 0016; until then
# it stays a no-op approval and the audit (none for capability)
# is skipped.
elif qp.proposal.tool == TOOL_CAPABILITY_BLOCK:
diff_before, diff_after = apply_capability_change(
qp.proposal.bottle_slug, file_to_apply,
)
response = Response(
proposal_id=qp.proposal.id,
@@ -170,6 +186,12 @@ def approve(
qp, action=status, notes=notes,
diff_before=diff_before, diff_after=diff_after,
)
if qp.proposal.tool == TOOL_CAPABILITY_BLOCK:
# The supervise sidecar was torn down by apply_capability_change,
# so it can't archive its own proposal+response. Archive here so
# dashboard.discover_pending stops surfacing the resolved
# proposal forever.
archive_proposal(qp.queue_dir, qp.proposal.id)
def reject(qp: QueuedProposal, *, reason: str) -> None:
@@ -359,7 +381,7 @@ def _main_loop(stdscr: "curses._CursesWindow") -> None:
elif key == ord("a"):
try:
approve(qp)
status_line = f"approved {qp.proposal.tool} for [{qp.proposal.bottle_slug}]"
status_line = _approval_status(qp, "approved")
except ApplyError as e:
status_line = f"apply failed: {e}"
elif key == ord("m"):
@@ -369,7 +391,7 @@ def _main_loop(stdscr: "curses._CursesWindow") -> None:
else:
try:
approve(qp, final_file=edited, notes="operator modified before approving")
status_line = f"modified+approved {qp.proposal.tool} for [{qp.proposal.bottle_slug}]"
status_line = _approval_status(qp, "modified+approved")
except ApplyError as e:
status_line = f"apply failed: {e}"
elif key == ord("r"):
+66
View File
@@ -0,0 +1,66 @@
"""resume: re-launch a bottle by its identity.
Reads ~/.claude-bottle/state/<identity>/metadata.json to recover the
(agent_name, cwd, copy_cwd) the bottle was originally started with,
then runs the same launch core as `start` but pinned to the
recorded identity so the new bottle picks up any per-bottle Dockerfile
(from capability-block apply) and transcript snapshot under the same
state dir.
Use case: an agent calls capability-block, the dashboard approves
and tears down the bottle, the operator runs
./cli.py resume <identity>
to bring up the replacement with the new capabilities baked in.
"""
from __future__ import annotations
import argparse
from ..backend import BottleSpec
from ..backend.docker.bottle_state import read_metadata
from ..log import die
from ..manifest import Manifest
from ._common import PROG, USER_CWD
from .start import _launch_bottle
def cmd_resume(argv: list[str]) -> int:
parser = argparse.ArgumentParser(prog=f"{PROG} resume", add_help=True)
parser.add_argument("--dry-run", action="store_true")
parser.add_argument("--remote-control", action="store_true")
parser.add_argument(
"--format",
choices=("text", "json"),
default="text",
help="preflight output format; --format=json requires --dry-run",
)
parser.add_argument(
"identity",
help="bottle identity from a prior `start` (see its session-end output)",
)
args = parser.parse_args(argv)
metadata = read_metadata(args.identity)
if metadata is None:
die(
f"no state recorded for identity {args.identity!r}; "
f"check ~/.claude-bottle/state/ or run `cli.py start` to create a new bottle"
)
manifest = Manifest.resolve(USER_CWD)
manifest.require_agent(metadata.agent_name)
spec = BottleSpec(
manifest=manifest,
agent_name=metadata.agent_name,
copy_cwd=metadata.copy_cwd,
user_cwd=metadata.cwd or USER_CWD,
identity=metadata.identity,
)
return _launch_bottle(
spec,
dry_run=args.dry_run,
output_format=args.format,
remote_control=args.remote_control,
)
+37 -5
View File
@@ -1,6 +1,10 @@
"""start: boot a sandboxed container for a named agent and attach an
interactive claude-code session. The container is torn down when the
session ends."""
session ends.
The launch core is shared with `cli.py resume <identity>`: see
_launch_bottle below.
"""
from __future__ import annotations
@@ -43,18 +47,35 @@ def cmd_start(argv: list[str]) -> int:
copy_cwd=args.cwd,
user_cwd=USER_CWD,
)
return _launch_bottle(
spec,
dry_run=dry_run,
output_format=args.format,
remote_control=args.remote_control,
)
def _launch_bottle(
spec: BottleSpec,
*,
dry_run: bool,
output_format: str,
remote_control: bool,
) -> int:
"""Shared launch core for `start` and `resume`. Builds the plan,
prints / dry-runs / prompts as appropriate, brings the bottle up,
attaches claude, and prints the resume hint on session end."""
stage_dir = Path(tempfile.mkdtemp(prefix="claude-bottle-stage."))
try:
backend = get_bottle_backend()
plan = backend.prepare(spec, stage_dir=stage_dir)
if args.format == "json":
json.dump(plan.to_dict(remote_control=args.remote_control), sys.stdout, indent=2)
if output_format == "json":
json.dump(plan.to_dict(remote_control=remote_control), sys.stdout, indent=2)
sys.stdout.write("\n")
return 0
plan.print(remote_control=args.remote_control)
plan.print(remote_control=remote_control)
if dry_run:
info("dry-run requested; not starting container.")
@@ -67,16 +88,27 @@ def cmd_start(argv: list[str]) -> int:
info("aborted by user")
return 0
identity = _identity_from_plan(plan)
with backend.launch(plan) as bottle:
info(
"attaching interactive claude session "
"(Ctrl-D or 'exit' to leave; container will be removed)"
)
claude_args = ["--dangerously-skip-permissions"]
if args.remote_control:
if remote_control:
claude_args.append("--remote-control")
bottle.exec_claude(claude_args, tty=True)
info(f"session ended; container {bottle.name} will be removed")
if identity:
info(f"to resume this bottle: ./cli.py resume {identity}")
return 0
finally:
shutil.rmtree(stage_dir, ignore_errors=True)
def _identity_from_plan(plan: object) -> str:
"""Backend-specific: the docker plan exposes the identity as
`.slug`. Other backends in the future would expose their own
identity attribute; for now we duck-type to keep this layer
backend-agnostic."""
return getattr(plan, "slug", "")
@@ -0,0 +1,70 @@
# PRD 0016: capability block remediation
- **Status:** Draft
- **Author:** didericis
- **Created:** 2026-05-25
- **Parent:** PRD 0012
- **Depends on:** PRD 0013
## Summary
Wires the **capability block** path (PRD 0012 *Stuck categories*) end-to-end. On operator approval of a `capability-block` proposal, the rebuild orchestrator tears down the existing bottle, builds from the new Dockerfile, and starts a replacement bottle on the same branch via the state-preservation helper. The replacement agent picks up where the original left off, now with the missing capability. Heaviest of the three remediation PRDs because the orchestrator and state-preservation helper are non-trivial.
## Problem
See PRD 0012. This PRD specifically addresses: with 0013 in place, the operator can approve a `capability-block` proposal but nothing happens — the bottle is not rebuilt, the agent stays stuck. This PRD closes the loop. Unlike 0014 and 0015, the remediation requires container teardown + rebuild + state hand-off, so the design surface is larger.
## Goals / Success Criteria
A real capability block recovers end-to-end: the agent's invocation of a tool / command / skill fails (not found, permission denied), the agent calls `capability-block` with a proposed Dockerfile and justification, the operator approves in the TUI, the orchestrator tears down the bottle and starts a replacement built from the new Dockerfile, the replacement agent inherits the working tree and best-effort transcript and continues on the same branch.
## Non-goals
- Live mutation of the running container (re-stated from PRD 0012 non-goals).
- Forking into multiple parallel rebuilt bottles. One-for-one replacement only.
- cred-proxy or pipelock handling (covered by 0014 and 0015).
## Scope
### In scope
- A rebuild orchestrator that, on operator approval, tears down the existing bottle, builds from the approved Dockerfile, and starts a replacement on the same branch.
- A state-preservation helper that handles the hand-off across the rebuild: working tree push is mandatory; transcript / reasoning context is best-effort.
- `capability-block` approval handler in the MCP sidecar (replacing the 0013 no-op): on approval, hand off to the orchestrator.
- Bottle lifecycle script changes for orchestrated teardown + rebuild (distinct from a fresh-spawn).
- Bottle manifest schema changes: record originating manifest version / change history per agent run so the dashboard can show "what changed" rather than "what is."
- A per-agent-run record that maps a running bottle back to its PR / branch, so the orchestrator knows which branch to resume on.
### Out of scope
- Rolling back a rebuild that the replacement agent regrets. The audit trail (git history + bottle rebuild record) shows what changed; a follow-up `capability-block` proposal can revert.
## Proposed Design
### New services / components
- **Rebuild orchestrator.** On approval, tears down the existing bottle, builds from the new Dockerfile, snapshots state via the state-preservation helper, and starts a fresh bottle on the same branch.
- **State-preservation helper.** Mandatory: ensures the working tree is pushed before teardown. Best-effort: carries forward the agent's transcript / reasoning context — including the approved `capability-block` proposal — into the replacement container so the new agent starts warm rather than cold.
### Existing code touched
- **MCP sidecar** (PRD 0013) — the `capability-block` approval handler stops being a no-op; on approval, hands off to the rebuild orchestrator.
- **Bottle lifecycle scripts** — extended for orchestrated teardown + rebuild with state hand-off, distinct from a fresh-spawn.
- **Bottle manifest schema** — records the originating manifest version / change history per agent run.
- **`cli.py`** — gains the rebuild path.
### Data model changes
- A per-agent-run record sufficient to map a running bottle back to its PR / branch.
## Open questions
- **`capability-block` return semantics.** The current agent is torn down on approval, so the tool's return value never reaches it. Options: (a) fire-and-forget, the tool returns immediately with "queued" and the agent halts; (b) block the tool, let the rebuild orchestrator's teardown kill the connection, the replacement agent gets the approval record via state-preservation; (c) the tool blocks, returns "approved" right before teardown, the agent has milliseconds to log it. (b) seems cleanest but is worth confirming during implementation.
- **Best-effort transcript preservation.** Mount the agent's state directory, snapshot on teardown, remount in the replacement? How much fidelity is "good enough" for the new agent to pick up?
- **Bottle → PR/branch mapping.** Recorded at bottle-spawn time, derived from the working tree, or specified in the manifest?
- **Rejection semantics.** Does the agent receive a tool reply explaining the rejection, or does the bottle just stay torn down?
## References
- PRD 0012 — stuck-agent recovery flow overview.
- PRD 0013 — supervise plane foundation (prerequisite).
+217
View File
@@ -0,0 +1,217 @@
"""Integration: drive `apply_capability_change` against a real
container that mimics the agent's name + filesystem layout (PRD 0016).
The real `cli.py start <agent>` flow is too heavy for an integration
test (it builds the agent image, brings up all the sidecars, attaches
an interactive claude session). Instead, this test stages the
minimum the orchestrator interacts with:
- A lightweight `alpine:latest sleep infinity` container named
`claude-bottle-<slug>` (matches the agent container name pattern)
on the per-bottle internal network.
- A marker file under `/home/node/.claude/` so we can assert the
transcript snapshot path actually transferred bytes.
Then `apply_capability_change` runs and we verify:
- Per-bottle Dockerfile written.
- Containers + networks removed.
- Transcript snapshot dir on the host has the marker file.
docker exec / cp / rm work across the docker socket boundary, so
this test runs in DinD too no act_runner skip needed.
"""
from __future__ import annotations
import os
import shutil
import subprocess
import tempfile
import time
import unittest
from pathlib import Path
from claude_bottle import supervise
from claude_bottle.backend.docker import bottle_state, capability_apply
from claude_bottle.backend.docker.capability_apply import apply_capability_change
from claude_bottle.backend.docker.network import (
network_create_egress,
network_create_internal,
network_remove,
)
from tests._docker import skip_unless_docker
ALPINE_IMAGE = "alpine:latest"
@skip_unless_docker()
class TestCapabilityApply(unittest.TestCase):
@classmethod
def setUpClass(cls):
r = subprocess.run(
["docker", "pull", ALPINE_IMAGE],
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False,
)
if r.returncode != 0:
raise unittest.SkipTest(f"could not pull {ALPINE_IMAGE}")
def setUp(self):
self.slug = f"cb-test-cap-{os.getpid()}-{int(time.time())}"
self.agent_name = f"claude-bottle-{self.slug}"
self.sidecar_names: list[str] = []
self.internal_net = ""
self.egress_net = ""
# Fake home so tests don't touch ~/.claude-bottle/.
self._tmp = tempfile.TemporaryDirectory(prefix="cap-apply-int.")
self._original_root = supervise.claude_bottle_root
def fake_root() -> Path:
return Path(self._tmp.name) / ".claude-bottle"
supervise.claude_bottle_root = fake_root # type: ignore[assignment]
def tearDown(self):
supervise.claude_bottle_root = self._original_root # type: ignore[assignment]
for name in [self.agent_name, *self.sidecar_names]:
subprocess.run(
["docker", "rm", "-f", name],
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False,
)
for n in (self.internal_net, self.egress_net):
if n:
network_remove(n)
self._tmp.cleanup()
def _bring_up_fake_bottle(self) -> None:
self.internal_net = network_create_internal(self.slug)
self.egress_net = network_create_egress(self.slug)
# Agent container with the canonical name.
r = subprocess.run(
[
"docker", "run", "-d",
"--name", self.agent_name,
"--network", self.internal_net,
ALPINE_IMAGE,
"sh", "-c",
"mkdir -p /home/node/.claude && "
"echo 'transcript-marker' > /home/node/.claude/sessions.json && "
"sleep 3600",
],
capture_output=True, text=True, check=False,
)
self.assertEqual(0, r.returncode, r.stderr)
# Also start a fake supervise sidecar so teardown has something
# extra to clean up (mirrors a real bottle's container set).
sidecar = f"claude-bottle-supervise-{self.slug}"
subprocess.run(
[
"docker", "run", "-d",
"--name", sidecar,
"--network", self.internal_net,
ALPINE_IMAGE, "sleep", "3600",
],
capture_output=True, text=True, check=False,
)
self.sidecar_names.append(sidecar)
def _containers_named_like(self) -> list[str]:
"""All running/stopped containers whose names start with
the bottle's slug — both agent + sidecars."""
r = subprocess.run(
[
"docker", "ps", "-a",
"--filter", f"name={self.agent_name}",
"--format", "{{.Names}}",
],
capture_output=True, text=True, check=False,
)
return [line for line in (r.stdout or "").splitlines() if line]
def _networks_named_like(self) -> list[str]:
r = subprocess.run(
[
"docker", "network", "ls",
"--filter", f"name={self.slug}",
"--format", "{{.Name}}",
],
capture_output=True, text=True, check=False,
)
return [line for line in (r.stdout or "").splitlines() if line]
def test_apply_writes_dockerfile_and_tears_down(self):
self._bring_up_fake_bottle()
self.assertIn(self.agent_name, self._containers_named_like())
new_dockerfile = "FROM python:3.13\nRUN apk add ripgrep\n"
before, after = apply_capability_change(self.slug, new_dockerfile)
# Before is the repo Dockerfile (no prior per-bottle override);
# after is what we passed in.
self.assertIn("FROM ", before)
self.assertEqual(new_dockerfile, after)
# Per-bottle Dockerfile written on the host.
self.assertEqual(
new_dockerfile,
bottle_state.per_bottle_dockerfile(self.slug),
)
# Agent + sidecars gone.
self.assertEqual([], self._containers_named_like())
# Networks removed (matching the slug substring).
nets = self._networks_named_like()
self.assertEqual([], nets)
# Mark them as already cleaned so tearDown is idempotent.
self.internal_net = ""
self.egress_net = ""
self.sidecar_names = []
def test_transcript_snapshot_captured(self):
self._bring_up_fake_bottle()
apply_capability_change(self.slug, "FROM x\n")
snap = bottle_state.transcript_snapshot_dir(self.slug)
self.assertTrue(snap.is_dir(), f"transcript snapshot dir {snap} missing")
# docker cp <container>:/home/node/.claude <dst> produces
# <dst>/.claude/sessions.json (it preserves the source dir name
# inside the destination if the destination already exists).
# Walk the snapshot looking for the marker contents.
marker_found = False
for path in snap.rglob("sessions.json"):
if "transcript-marker" in path.read_text():
marker_found = True
break
self.assertTrue(marker_found, f"marker not found under {snap}")
# Cleaned up by apply already.
self.internal_net = ""
self.egress_net = ""
self.sidecar_names = []
def test_subsequent_apply_uses_per_bottle_dockerfile_for_before(self):
# First change: before is repo's Dockerfile.
self._bring_up_fake_bottle()
first_before, _ = apply_capability_change(self.slug, "FROM v1\n")
self.assertIn("FROM ", first_before)
# Second change: before is "FROM v1\n" (the per-bottle override
# from the first change), proving the state persists across
# rebuilds.
self._bring_up_fake_bottle()
second_before, second_after = apply_capability_change(self.slug, "FROM v2\n")
self.assertEqual("FROM v1\n", second_before)
self.assertEqual("FROM v2\n", second_after)
self.internal_net = ""
self.egress_net = ""
self.sidecar_names = []
def test_teardown_idempotent_when_nothing_running(self):
# No bottle ever brought up — teardown still doesn't raise.
apply_capability_change(self.slug, "FROM x\n")
self.assertEqual(
"FROM x\n",
bottle_state.per_bottle_dockerfile(self.slug),
)
if __name__ == "__main__":
unittest.main()
+166
View File
@@ -0,0 +1,166 @@
"""Unit: per-bottle state helpers (PRD 0016 Phase 1) + identity +
launch metadata."""
import re
import tempfile
import unittest
from pathlib import Path
from claude_bottle import supervise
from claude_bottle.backend.docker import bottle_state
from claude_bottle.backend.docker.bottle_state import (
BottleMetadata,
read_metadata,
write_metadata,
)
class _FakeHomeMixin:
def _setup_fake_home(self):
self._tmp = tempfile.TemporaryDirectory(prefix="bottle-state-test.")
original = supervise.claude_bottle_root
def fake_root() -> Path:
return Path(self._tmp.name) / ".claude-bottle"
supervise.claude_bottle_root = fake_root # type: ignore[assignment]
self._restore = lambda: setattr(supervise, "claude_bottle_root", original)
def _teardown_fake_home(self):
self._restore()
self._tmp.cleanup()
class TestPerBottleDockerfile(_FakeHomeMixin, unittest.TestCase):
def setUp(self):
self._setup_fake_home()
def tearDown(self):
self._teardown_fake_home()
def test_returns_none_when_absent(self):
self.assertIsNone(bottle_state.per_bottle_dockerfile("dev"))
def test_write_then_read_roundtrip(self):
bottle_state.write_per_bottle_dockerfile(
"dev", "FROM python:3.13\nRUN apk add ripgrep\n",
)
self.assertEqual(
"FROM python:3.13\nRUN apk add ripgrep\n",
bottle_state.per_bottle_dockerfile("dev"),
)
def test_isolated_per_slug(self):
bottle_state.write_per_bottle_dockerfile("dev", "FROM dev\n")
bottle_state.write_per_bottle_dockerfile("api", "FROM api\n")
self.assertEqual("FROM dev\n", bottle_state.per_bottle_dockerfile("dev"))
self.assertEqual("FROM api\n", bottle_state.per_bottle_dockerfile("api"))
def test_dockerfile_path_under_state_dir(self):
path = bottle_state.per_bottle_dockerfile_path("dev")
self.assertTrue(str(path).endswith("/.claude-bottle/state/dev/Dockerfile"))
def test_image_tag_unique_per_slug(self):
self.assertEqual(
"claude-bottle-rebuilt-dev:latest",
bottle_state.per_bottle_image_tag("dev"),
)
self.assertNotEqual(
bottle_state.per_bottle_image_tag("dev"),
bottle_state.per_bottle_image_tag("api"),
)
def test_transcript_dir_under_state_dir(self):
path = bottle_state.transcript_snapshot_dir("dev")
self.assertTrue(str(path).endswith("/.claude-bottle/state/dev/transcript"))
class TestBottleIdentity(unittest.TestCase):
"""bottle_identity(agent_name) — PRD 0016 follow-up.
Every call mints a fresh identity with a random 5-char suffix
so multiple instances of the same agent can run in parallel
without container name collisions. The slug-prefix is for
readability; the suffix is for uniqueness. To continue an
existing bottle, use the recorded identity via
`cli.py resume <identity>`, not this function."""
def test_format_is_slug_dash_5_alnum(self):
identity = bottle_state.bottle_identity("dev")
self.assertTrue(identity.startswith("dev-"))
suffix = identity[len("dev-"):]
self.assertEqual(5, len(suffix))
self.assertTrue(
re.fullmatch(r"[a-z0-9]+", suffix),
f"suffix {suffix!r} must be lowercase base36",
)
def test_two_calls_yield_different_identities(self):
# 5-char base36 gives ~60M combinations; collision in two
# calls is astronomically unlikely. If this ever flakes it's
# almost certainly a regression, not a bad-luck collision.
a = bottle_state.bottle_identity("dev")
b = bottle_state.bottle_identity("dev")
self.assertNotEqual(a, b)
def test_different_agents_get_different_prefixes(self):
a = bottle_state.bottle_identity("dev")
b = bottle_state.bottle_identity("api")
self.assertTrue(a.startswith("dev-"))
self.assertTrue(b.startswith("api-"))
def test_agent_name_slugified(self):
identity = bottle_state.bottle_identity("My Agent")
self.assertTrue(identity.startswith("my-agent-"))
class TestBottleMetadata(_FakeHomeMixin, unittest.TestCase):
def setUp(self):
self._setup_fake_home()
def tearDown(self):
self._teardown_fake_home()
def test_read_missing_returns_none(self):
self.assertIsNone(read_metadata("does-not-exist"))
def test_write_then_read_roundtrip(self):
meta = BottleMetadata(
identity="dev-a4f8c",
agent_name="dev",
cwd="/proj/A",
copy_cwd=True,
started_at="2026-05-25T12:00:00+00:00",
)
write_metadata(meta)
loaded = read_metadata("dev-a4f8c")
self.assertEqual(meta, loaded)
def test_metadata_lives_under_state_dir(self):
meta = BottleMetadata(
identity="dev-x", agent_name="dev",
cwd="", copy_cwd=False, started_at="t",
)
path = write_metadata(meta)
self.assertTrue(
str(path).endswith("/.claude-bottle/state/dev-x/metadata.json"),
)
def test_overwriting_metadata_updates_timestamp(self):
# `resume` re-writes metadata with a fresh started_at;
# everything else stays the same.
write_metadata(BottleMetadata(
identity="dev-y", agent_name="dev",
cwd="/proj/A", copy_cwd=True, started_at="t1",
))
write_metadata(BottleMetadata(
identity="dev-y", agent_name="dev",
cwd="/proj/A", copy_cwd=True, started_at="t2",
))
loaded = read_metadata("dev-y")
assert loaded is not None
self.assertEqual("t2", loaded.started_at)
if __name__ == "__main__":
unittest.main()
+123
View File
@@ -0,0 +1,123 @@
"""Unit: capability_apply helpers (PRD 0016 Phase 2).
docker cp / exec / rm / network rm paths are covered by the
integration test in Phase 4. Here we cover:
- fetch_current_dockerfile fallback chain (per-bottle repo)
- apply_capability_change writes the per-bottle Dockerfile and
returns the correct (before, after).
- apply_capability_change rejects empty input.
"""
import tempfile
import unittest
from pathlib import Path
from claude_bottle import supervise
from claude_bottle.backend.docker import bottle_state, capability_apply
from claude_bottle.backend.docker.capability_apply import (
CapabilityApplyError,
apply_capability_change,
fetch_current_dockerfile,
)
class _FakeHomeMixin:
def _setup_fake_home(self):
self._tmp = tempfile.TemporaryDirectory(prefix="cap-apply-test.")
original = supervise.claude_bottle_root
def fake_root() -> Path:
return Path(self._tmp.name) / ".claude-bottle"
supervise.claude_bottle_root = fake_root # type: ignore[assignment]
self._restore = lambda: setattr(supervise, "claude_bottle_root", original)
def _teardown_fake_home(self):
self._restore()
self._tmp.cleanup()
class TestFetchCurrentDockerfile(_FakeHomeMixin, unittest.TestCase):
def setUp(self):
self._setup_fake_home()
def tearDown(self):
self._teardown_fake_home()
def test_returns_per_bottle_dockerfile_when_present(self):
bottle_state.write_per_bottle_dockerfile("dev", "FROM rebuilt\n")
self.assertEqual("FROM rebuilt\n", fetch_current_dockerfile("dev"))
def test_falls_back_to_repo_dockerfile_when_no_override(self):
# The repo's Dockerfile actually exists; the test just checks
# we get its content (non-empty) when no per-bottle override
# is set.
content = fetch_current_dockerfile("dev-no-override")
self.assertIn("FROM ", content)
class TestApplyCapabilityChange(_FakeHomeMixin, unittest.TestCase):
def setUp(self):
self._setup_fake_home()
# Stub out the docker-dependent helpers. The orchestrator's
# job is to sequence write + snapshot + push + teardown; we
# validate that sequence here, not the docker primitives.
self._calls: list[str] = []
self._orig_snapshot = capability_apply._snapshot_transcript
self._orig_push = capability_apply._push_working_tree
self._orig_teardown = capability_apply._teardown_bottle
def stub_snapshot(slug):
self._calls.append(f"snapshot:{slug}")
def stub_push(slug):
self._calls.append(f"push:{slug}")
def stub_teardown(slug):
self._calls.append(f"teardown:{slug}")
capability_apply._snapshot_transcript = stub_snapshot # type: ignore[assignment]
capability_apply._push_working_tree = stub_push # type: ignore[assignment]
capability_apply._teardown_bottle = stub_teardown # type: ignore[assignment]
def tearDown(self):
capability_apply._snapshot_transcript = self._orig_snapshot # type: ignore[assignment]
capability_apply._push_working_tree = self._orig_push # type: ignore[assignment]
capability_apply._teardown_bottle = self._orig_teardown # type: ignore[assignment]
self._teardown_fake_home()
def test_writes_per_bottle_dockerfile_and_returns_before_after(self):
bottle_state.write_per_bottle_dockerfile("dev", "FROM old\n")
before, after = apply_capability_change("dev", "FROM new\nRUN apk add ripgrep\n")
self.assertEqual("FROM old\n", before)
self.assertEqual("FROM new\nRUN apk add ripgrep\n", after)
self.assertEqual(
"FROM new\nRUN apk add ripgrep\n",
bottle_state.per_bottle_dockerfile("dev"),
)
def test_calls_snapshot_push_teardown_in_order(self):
apply_capability_change("dev", "FROM new\n")
# Snapshot + push must happen BEFORE write_per_bottle_dockerfile
# (so they capture pre-rebuild state) and BEFORE teardown (so
# the agent container still exists to docker exec / cp from).
# Teardown must be last.
self.assertEqual(
["snapshot:dev", "push:dev", "teardown:dev"],
self._calls,
)
def test_first_change_falls_back_to_repo_dockerfile_for_before(self):
# No per-bottle override yet — before-diff comes from the
# repo's Dockerfile.
before, after = apply_capability_change("dev-fresh", "FROM new\n")
self.assertIn("FROM ", before)
self.assertEqual("FROM new\n", after)
def test_empty_dockerfile_rejected(self):
with self.assertRaises(CapabilityApplyError):
apply_capability_change("dev", " \n\t\n")
if __name__ == "__main__":
unittest.main()
+75
View File
@@ -16,6 +16,7 @@ from datetime import datetime, timezone
from pathlib import Path
from claude_bottle import supervise
from claude_bottle.backend.docker.capability_apply import CapabilityApplyError
from claude_bottle.backend.docker.cred_proxy_apply import CredProxyApplyError
from claude_bottle.backend.docker.pipelock_apply import PipelockApplyError
from claude_bottle.cli import dashboard
@@ -118,6 +119,7 @@ class TestApproveReject(_FakeHomeMixin, unittest.TestCase):
self._setup_fake_home()
self._original_apply_routes = dashboard.apply_routes_change
self._original_apply_allowlist = dashboard.apply_allowlist_change
self._original_apply_capability = dashboard.apply_capability_change
# Default stubs: succeed with deterministic before/after so the
# audit log shows a non-empty diff.
dashboard.apply_routes_change = lambda slug, content: (
@@ -126,10 +128,14 @@ class TestApproveReject(_FakeHomeMixin, unittest.TestCase):
dashboard.apply_allowlist_change = lambda slug, content: (
"old.example\n", content,
)
dashboard.apply_capability_change = lambda slug, content: (
"FROM old\n", content,
)
def tearDown(self):
dashboard.apply_routes_change = self._original_apply_routes
dashboard.apply_allowlist_change = self._original_apply_allowlist
dashboard.apply_capability_change = self._original_apply_capability
self._teardown_fake_home()
def _enqueue(self, tool: str = TOOL_CRED_PROXY_BLOCK):
@@ -333,6 +339,75 @@ class TestPipelockApplyWiring(_FakeHomeMixin, unittest.TestCase):
self.assertIn("+new.example", entries[0].diff)
class TestCapabilityApplyWiring(_FakeHomeMixin, unittest.TestCase):
"""PRD 0016 Phase 3: approve() on a capability-block proposal
calls apply_capability_change, archives the proposal afterward
(sidecar is gone so it can't archive itself), and writes no
audit entry (capability-block has none per PRD 0013)."""
def setUp(self):
self._setup_fake_home()
self._original = dashboard.apply_capability_change
def tearDown(self):
dashboard.apply_capability_change = self._original
self._teardown_fake_home()
def _enqueue_capability(self, proposed: str = "FROM python:3.13\nRUN apk add ripgrep\n"):
p = Proposal.new(
bottle_slug="dev", tool=TOOL_CAPABILITY_BLOCK,
proposed_file=proposed,
justification="need ripgrep",
current_file_hash=sha256_hex(proposed),
now=FIXED,
)
qdir = supervise.queue_dir_for_slug("dev")
qdir.mkdir(parents=True, exist_ok=True)
supervise.write_proposal(qdir, p)
return dashboard.QueuedProposal(proposal=p, queue_dir=qdir)
def test_capability_block_calls_apply_with_proposed_file(self):
calls = []
dashboard.apply_capability_change = lambda slug, content: (
calls.append((slug, content)) or ("FROM old\n", content)
)
qp = self._enqueue_capability("FROM bookworm\n")
dashboard.approve(qp)
self.assertEqual([("dev", "FROM bookworm\n")], calls)
def test_apply_failure_blocks_response_and_keeps_pending(self):
dashboard.apply_capability_change = lambda slug, content: (_ for _ in ()).throw(
CapabilityApplyError("teardown failed")
)
qp = self._enqueue_capability()
with self.assertRaises(CapabilityApplyError):
dashboard.approve(qp)
self.assertEqual(
[qp.proposal.id],
[p.id for p in supervise.list_pending_proposals(qp.queue_dir)],
)
def test_no_audit_log_for_capability(self):
dashboard.apply_capability_change = lambda slug, content: ("FROM old\n", content)
qp = self._enqueue_capability()
dashboard.approve(qp)
# capability-block has no audit log per PRD 0013 — its record
# lives in the per-bottle Dockerfile + transcript state.
self.assertEqual([], read_audit_entries("cred-proxy", "dev"))
self.assertEqual([], read_audit_entries("pipelock", "dev"))
def test_proposal_archived_after_apply(self):
dashboard.apply_capability_change = lambda slug, content: ("FROM old\n", content)
qp = self._enqueue_capability()
dashboard.approve(qp)
# Sidecar would normally archive after delivering the response,
# but it's gone by then. The dashboard archives so
# discover_pending stops surfacing the resolved proposal.
self.assertEqual([], supervise.list_pending_proposals(qp.queue_dir))
processed = list((qp.queue_dir / "processed").glob("*.json"))
self.assertEqual(2, len(processed))
class TestOperatorEditRoutes(_FakeHomeMixin, unittest.TestCase):
"""PRD 0014 Phase 4: operator-initiated routes edit (not gated
on a pending proposal)."""