Merge pull request 'PRD 0016: capability block remediation' (#22) from prd-0016-capability-block into main
This commit was merged in pull request #22.
This commit is contained in:
@@ -53,6 +53,11 @@ class BottleSpec:
|
||||
agent_name: str
|
||||
copy_cwd: bool
|
||||
user_cwd: str
|
||||
# PRD 0016 follow-up: when set, the backend's prepare step uses
|
||||
# this identity instead of minting a fresh one — the resume path
|
||||
# (`cli.py resume <identity>`) sets this to continue an existing
|
||||
# bottle's state. Empty string for a fresh `start`.
|
||||
identity: str = ""
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
|
||||
@@ -44,6 +44,11 @@ class DockerBottlePlan(BottlePlan):
|
||||
image: str
|
||||
derived_image: str # "" -> no derived image
|
||||
runtime_image: str # image actually launched (derived or base)
|
||||
# Absolute path to the Dockerfile that builds `image`. Empty means
|
||||
# use the repo's default Dockerfile. Populated to a per-bottle
|
||||
# state file (~/.claude-bottle/state/<slug>/Dockerfile) after a
|
||||
# capability-block remediation (PRD 0016).
|
||||
dockerfile_path: str
|
||||
env_file: Path # docker --env-file: NAME=VALUE literals
|
||||
# name -> value for vars forwarded into the docker-run child process
|
||||
# via subprocess env (so values never land on argv or in a file).
|
||||
@@ -89,6 +94,11 @@ class DockerBottlePlan(BottlePlan):
|
||||
print(file=sys.stderr)
|
||||
info(f"agent : {spec.agent_name}")
|
||||
info(f"image : {self.image}")
|
||||
if self.dockerfile_path:
|
||||
info(
|
||||
f"dockerfile : {self.dockerfile_path} "
|
||||
f"(per-bottle override from PRD 0016 capability rebuild)"
|
||||
)
|
||||
if self.derived_image:
|
||||
info(
|
||||
f"cwd : {spec.user_cwd} -> /home/node/workspace "
|
||||
|
||||
@@ -0,0 +1,170 @@
|
||||
"""Per-bottle persistent state (PRD 0016).
|
||||
|
||||
Holds the per-bottle Dockerfile override that capability-block
|
||||
remediation writes, the transcript snapshot the state-preservation
|
||||
helper saves before teardown, and the launch metadata that lets
|
||||
`cli.py resume <identity>` reconstruct a bottle's spec. State
|
||||
lives at:
|
||||
|
||||
~/.claude-bottle/state/<identity>/
|
||||
metadata.json — agent_name + cwd + started_at (for resume)
|
||||
Dockerfile — per-bottle override (absent → use repo's)
|
||||
transcript/ — last snapshotted agent state (best-effort)
|
||||
|
||||
When the per-bottle Dockerfile is present, the launch step builds
|
||||
the agent image with a per-bottle tag (claude-bottle-rebuilt-<id>)
|
||||
from this file rather than the repo's. The build context is still
|
||||
the repo root so the Dockerfile can COPY claude_bottle source files
|
||||
the same way the original does.
|
||||
|
||||
Identity model:
|
||||
- Every `cli.py start <agent>` mints a fresh identity via
|
||||
`bottle_identity(agent_name)`: slug-prefix for readability plus a
|
||||
5-char random suffix for parallel-safe uniqueness. The metadata
|
||||
written at launch time pins (agent_name, cwd) to that identity.
|
||||
- `cli.py resume <identity>` reads the metadata and re-launches a
|
||||
bottle pinned to the same identity, picking up any per-bottle
|
||||
Dockerfile and transcript snapshot.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import dataclasses
|
||||
import json
|
||||
import secrets
|
||||
import string
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
from ... import supervise as _supervise
|
||||
from . import util as docker_mod
|
||||
|
||||
|
||||
# Directory layout: ~/.claude-bottle/state/<identity>/...
|
||||
_STATE_SUBDIR = "state"
|
||||
_PER_BOTTLE_DOCKERFILE_NAME = "Dockerfile"
|
||||
_TRANSCRIPT_SUBDIR = "transcript"
|
||||
_METADATA_NAME = "metadata.json"
|
||||
|
||||
# 5 chars of base36 alphabet ≈ 60M combinations. Plenty for human
|
||||
# operators starting bottles by hand; collision-free in practice.
|
||||
_RANDOM_SUFFIX_LEN = 5
|
||||
_SUFFIX_ALPHABET = string.ascii_lowercase + string.digits
|
||||
|
||||
|
||||
def bottle_identity(agent_name: str) -> str:
|
||||
"""Mint a fresh per-launch bottle identity. The slug-prefix is
|
||||
`slugify(agent_name)` for readability; the suffix is 5 random
|
||||
base36 chars so two simultaneous `start <agent>` invocations
|
||||
don't collide on container/network names.
|
||||
|
||||
Every call produces a different identity (non-deterministic).
|
||||
To continue an existing bottle's state, use the recorded
|
||||
identity from BottleMetadata via `cli.py resume <identity>`,
|
||||
not this function."""
|
||||
slug = docker_mod.slugify(agent_name)
|
||||
suffix = "".join(secrets.choice(_SUFFIX_ALPHABET) for _ in range(_RANDOM_SUFFIX_LEN))
|
||||
return f"{slug}-{suffix}"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class BottleMetadata:
|
||||
"""Persistent record of how a bottle was launched, written at
|
||||
start time and read by `cli.py resume`. Lives at
|
||||
~/.claude-bottle/state/<identity>/metadata.json."""
|
||||
|
||||
identity: str
|
||||
agent_name: str
|
||||
cwd: str # empty string when --cwd was not passed
|
||||
copy_cwd: bool
|
||||
started_at: str # ISO 8601 UTC
|
||||
|
||||
|
||||
def metadata_path(identity: str) -> Path:
|
||||
return bottle_state_dir(identity) / _METADATA_NAME
|
||||
|
||||
|
||||
def write_metadata(metadata: BottleMetadata) -> Path:
|
||||
"""Persist `metadata` to ~/.claude-bottle/state/<identity>/metadata.json.
|
||||
Mode 0o644 — no secrets, just (agent_name, cwd, timestamp)."""
|
||||
path = metadata_path(metadata.identity)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(json.dumps(dataclasses.asdict(metadata), indent=2) + "\n")
|
||||
path.chmod(0o644)
|
||||
return path
|
||||
|
||||
|
||||
def read_metadata(identity: str) -> BottleMetadata | None:
|
||||
"""Return the metadata for `identity`, or None if no state has
|
||||
been recorded for it. Used by `cli.py resume` to reconstruct
|
||||
the launch spec."""
|
||||
path = metadata_path(identity)
|
||||
if not path.is_file():
|
||||
return None
|
||||
raw = json.loads(path.read_text())
|
||||
if not isinstance(raw, dict):
|
||||
return None
|
||||
return BottleMetadata(
|
||||
identity=str(raw.get("identity", identity)),
|
||||
agent_name=str(raw.get("agent_name", "")),
|
||||
cwd=str(raw.get("cwd", "")),
|
||||
copy_cwd=bool(raw.get("copy_cwd", False)),
|
||||
started_at=str(raw.get("started_at", "")),
|
||||
)
|
||||
|
||||
|
||||
def bottle_state_dir(identity: str) -> Path:
|
||||
"""Per-bottle state directory on the host. Created lazily by the
|
||||
write helpers; readers tolerate its absence."""
|
||||
return _supervise.claude_bottle_root() / _STATE_SUBDIR / identity
|
||||
|
||||
|
||||
def per_bottle_dockerfile_path(identity: str) -> Path:
|
||||
return bottle_state_dir(identity) / _PER_BOTTLE_DOCKERFILE_NAME
|
||||
|
||||
|
||||
def per_bottle_dockerfile(identity: str) -> str | None:
|
||||
"""Return the per-bottle Dockerfile content if present, else
|
||||
None. None means: use the repo's Dockerfile (the original
|
||||
pre-capability-block behavior)."""
|
||||
p = per_bottle_dockerfile_path(identity)
|
||||
if p.is_file():
|
||||
return p.read_text()
|
||||
return None
|
||||
|
||||
|
||||
def write_per_bottle_dockerfile(identity: str, content: str) -> Path:
|
||||
p = per_bottle_dockerfile_path(identity)
|
||||
p.parent.mkdir(parents=True, exist_ok=True)
|
||||
p.write_text(content)
|
||||
p.chmod(0o644)
|
||||
return p
|
||||
|
||||
|
||||
def per_bottle_image_tag(identity: str) -> str:
|
||||
"""Image tag for a rebuilt bottle. Distinct from the base
|
||||
claude-bottle:latest so per-bottle rebuilds don't collide in
|
||||
the docker image cache."""
|
||||
return f"claude-bottle-rebuilt-{identity}:latest"
|
||||
|
||||
|
||||
def transcript_snapshot_dir(identity: str) -> Path:
|
||||
"""Where capability_apply stashes the agent's transcript before
|
||||
teardown, so the next `cli.py start <agent>` can offer to
|
||||
resume from it."""
|
||||
return bottle_state_dir(identity) / _TRANSCRIPT_SUBDIR
|
||||
|
||||
|
||||
__all__ = [
|
||||
"BottleMetadata",
|
||||
"bottle_identity",
|
||||
"bottle_state_dir",
|
||||
"metadata_path",
|
||||
"per_bottle_dockerfile",
|
||||
"per_bottle_dockerfile_path",
|
||||
"per_bottle_image_tag",
|
||||
"read_metadata",
|
||||
"transcript_snapshot_dir",
|
||||
"write_metadata",
|
||||
"write_per_bottle_dockerfile",
|
||||
]
|
||||
@@ -0,0 +1,210 @@
|
||||
"""capability_apply — host-side orchestrator for capability-block
|
||||
remediation (PRD 0016).
|
||||
|
||||
On approval of a capability-block proposal, the dashboard calls
|
||||
apply_capability_change(slug, new_dockerfile) which:
|
||||
|
||||
1. Snapshots the agent's transcript dir to
|
||||
~/.claude-bottle/state/<slug>/transcript/ (best-effort).
|
||||
2. Pushes the agent's working tree via `git push` (best-effort —
|
||||
no upstream / no commits / no git repo all skip with a log).
|
||||
3. Writes the new Dockerfile to
|
||||
~/.claude-bottle/state/<slug>/Dockerfile (PRD 0016 Phase 1
|
||||
state). The next `cli.py start <agent>` picks it up.
|
||||
4. Force-removes the agent container + all sidecars + the
|
||||
per-bottle networks. Idempotent — missing resources are not
|
||||
errors.
|
||||
|
||||
Returns (before, after) Dockerfile contents so the dashboard can
|
||||
record / render the diff. (capability-block has no audit log per
|
||||
PRD 0013 — the per-bottle Dockerfile state is its own record.)
|
||||
|
||||
This is "fire-and-forget" from the agent's perspective: by the time
|
||||
the dashboard writes the response file the supervise sidecar is
|
||||
gone, so the agent's tool call connection drops without ever
|
||||
receiving the response. The replacement agent (next manual
|
||||
`cli.py start`) sees the new Dockerfile and starts from there.
|
||||
v1 does not auto-relaunch — see PRD 0016's capability-block return
|
||||
semantics open question.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
from ...log import info, warn
|
||||
from .bottle_state import (
|
||||
per_bottle_dockerfile,
|
||||
per_bottle_dockerfile_path,
|
||||
transcript_snapshot_dir,
|
||||
write_per_bottle_dockerfile,
|
||||
)
|
||||
|
||||
|
||||
# Agent home inside the container (per the repo Dockerfile's
|
||||
# `USER node` + `WORKDIR /home/node`). Used to locate the transcript
|
||||
# dir + the workspace dir for git push.
|
||||
_AGENT_HOME_IN_CONTAINER = "/home/node"
|
||||
_AGENT_TRANSCRIPT_IN_CONTAINER = f"{_AGENT_HOME_IN_CONTAINER}/.claude"
|
||||
_AGENT_WORKSPACE_IN_CONTAINER = f"{_AGENT_HOME_IN_CONTAINER}/workspace"
|
||||
|
||||
# Per-bottle resource name patterns (mirroring prepare.py /
|
||||
# the various sidecar modules). The agent container's name is the
|
||||
# slug with no infix; sidecars carry an infix like cred-proxy.
|
||||
def _agent_container_name(slug: str) -> str:
|
||||
return f"claude-bottle-{slug}"
|
||||
|
||||
|
||||
def _per_bottle_container_names(slug: str) -> list[str]:
|
||||
"""All container names that belong to this bottle. Missing
|
||||
containers are silently skipped by the teardown helper, so it's
|
||||
fine to include names that don't exist for a given bottle."""
|
||||
return [
|
||||
_agent_container_name(slug),
|
||||
f"claude-bottle-cred-proxy-{slug}",
|
||||
f"claude-bottle-pipelock-{slug}",
|
||||
f"claude-bottle-git-gate-{slug}",
|
||||
f"claude-bottle-supervise-{slug}",
|
||||
]
|
||||
|
||||
|
||||
def _per_bottle_network_names(slug: str) -> list[str]:
|
||||
return [
|
||||
f"claude-bottle-net-{slug}",
|
||||
f"claude-bottle-egress-{slug}",
|
||||
]
|
||||
|
||||
|
||||
class CapabilityApplyError(RuntimeError):
|
||||
"""Raised when the apply fails in a way that should keep the
|
||||
proposal pending (so the operator can retry). Best-effort
|
||||
failures (transcript snapshot, git push) do not raise — they
|
||||
just log and proceed."""
|
||||
|
||||
|
||||
# --- Public helpers --------------------------------------------------------
|
||||
|
||||
|
||||
def fetch_current_dockerfile(slug: str) -> str:
|
||||
"""Return the Dockerfile content the next `cli.py start <agent>`
|
||||
would use for this bottle. If a per-bottle override exists, that
|
||||
one; otherwise the repo's Dockerfile.
|
||||
|
||||
Used by the operator-edit verb to show the current source of
|
||||
truth, and by apply_capability_change for the before-diff."""
|
||||
override = per_bottle_dockerfile(slug)
|
||||
if override is not None:
|
||||
return override
|
||||
repo_dockerfile = _repo_dockerfile_path()
|
||||
if repo_dockerfile.is_file():
|
||||
return repo_dockerfile.read_text()
|
||||
raise CapabilityApplyError(
|
||||
f"no per-bottle Dockerfile for {slug} and no repo Dockerfile at "
|
||||
f"{repo_dockerfile}"
|
||||
)
|
||||
|
||||
|
||||
def apply_capability_change(slug: str, new_dockerfile: str) -> tuple[str, str]:
|
||||
"""End-to-end capability-block remediation. See module docstring
|
||||
for the sequence. Returns (before, after) Dockerfile content."""
|
||||
if not new_dockerfile.strip():
|
||||
raise CapabilityApplyError("proposed Dockerfile is empty")
|
||||
before = fetch_current_dockerfile(slug)
|
||||
|
||||
_snapshot_transcript(slug)
|
||||
_push_working_tree(slug)
|
||||
write_per_bottle_dockerfile(slug, new_dockerfile)
|
||||
_teardown_bottle(slug)
|
||||
|
||||
return before, new_dockerfile
|
||||
|
||||
|
||||
# --- Internals -------------------------------------------------------------
|
||||
|
||||
|
||||
def _repo_dockerfile_path() -> Path:
|
||||
"""Path to the repo's Dockerfile (one dir above this module's
|
||||
package root). Resolved at call time so the path is correct
|
||||
regardless of where this module is imported from."""
|
||||
# claude_bottle/backend/docker/capability_apply.py -> repo root
|
||||
return Path(__file__).resolve().parent.parent.parent.parent / "Dockerfile"
|
||||
|
||||
|
||||
def _snapshot_transcript(slug: str) -> None:
|
||||
"""`docker cp` /home/node/.claude out of the agent container into
|
||||
~/.claude-bottle/state/<slug>/transcript/. Best-effort: missing
|
||||
container, missing dir, or cp error all log a warning and return.
|
||||
The transcript is what `claude --resume` reads to pick up where
|
||||
the agent left off."""
|
||||
container = _agent_container_name(slug)
|
||||
dest = transcript_snapshot_dir(slug)
|
||||
if dest.exists():
|
||||
# Remove any prior snapshot so the new one is a clean copy.
|
||||
shutil.rmtree(dest, ignore_errors=True)
|
||||
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||
r = subprocess.run(
|
||||
["docker", "cp", f"{container}:{_AGENT_TRANSCRIPT_IN_CONTAINER}", str(dest)],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
if r.returncode != 0:
|
||||
warn(
|
||||
f"capability-apply: transcript snapshot skipped "
|
||||
f"({(r.stderr or '').strip() or 'no transcript dir in container?'})"
|
||||
)
|
||||
return
|
||||
info(f"capability-apply: transcript snapshotted to {dest}")
|
||||
|
||||
|
||||
def _push_working_tree(slug: str) -> None:
|
||||
"""`docker exec <agent> git push` from /home/node/workspace.
|
||||
Best-effort: not-a-git-repo, no upstream, nothing-to-push, no
|
||||
network all log a warning and return. The replacement bottle
|
||||
will pick up whatever's actually upstream."""
|
||||
container = _agent_container_name(slug)
|
||||
r = subprocess.run(
|
||||
[
|
||||
"docker", "exec", container, "sh", "-c",
|
||||
f"cd {_AGENT_WORKSPACE_IN_CONTAINER} && "
|
||||
f"git rev-parse --is-inside-work-tree >/dev/null 2>&1 && "
|
||||
f"git push origin HEAD 2>&1 || true",
|
||||
],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
if r.returncode != 0:
|
||||
warn(
|
||||
f"capability-apply: git push skipped "
|
||||
f"({(r.stderr or '').strip() or 'docker exec failed'})"
|
||||
)
|
||||
return
|
||||
output = (r.stdout or "").strip()
|
||||
if output:
|
||||
info(f"capability-apply: git push: {output}")
|
||||
else:
|
||||
info("capability-apply: git push ran (no output — likely not a git workspace)")
|
||||
|
||||
|
||||
def _teardown_bottle(slug: str) -> None:
|
||||
"""Force-remove all per-bottle docker resources. Idempotent —
|
||||
`docker rm -f` / `docker network rm` silently ignore missing
|
||||
names, so this can be called even mid-rebuild."""
|
||||
info(f"capability-apply: tearing down bottle {slug}")
|
||||
for name in _per_bottle_container_names(slug):
|
||||
subprocess.run(
|
||||
["docker", "rm", "-f", name],
|
||||
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False,
|
||||
)
|
||||
for net in _per_bottle_network_names(slug):
|
||||
subprocess.run(
|
||||
["docker", "network", "rm", net],
|
||||
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False,
|
||||
)
|
||||
|
||||
|
||||
__all__ = [
|
||||
"CapabilityApplyError",
|
||||
"apply_capability_change",
|
||||
"fetch_current_dockerfile",
|
||||
]
|
||||
@@ -66,7 +66,10 @@ def launch(
|
||||
pass
|
||||
|
||||
try:
|
||||
docker_mod.build_image(plan.image, _REPO_DIR)
|
||||
docker_mod.build_image(
|
||||
plan.image, _REPO_DIR,
|
||||
dockerfile=plan.dockerfile_path,
|
||||
)
|
||||
if plan.derived_image:
|
||||
docker_mod.build_image_with_cwd(
|
||||
plan.derived_image, plan.image, plan.spec.user_cwd
|
||||
|
||||
@@ -11,6 +11,7 @@ via the base class's `prepare` template before this is called.
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
from ... import pipelock
|
||||
@@ -26,6 +27,14 @@ from .cred_proxy import (
|
||||
cred_proxy_url,
|
||||
)
|
||||
from .git_gate import DockerGitGate, git_gate_container_name
|
||||
from .bottle_state import (
|
||||
BottleMetadata,
|
||||
bottle_identity,
|
||||
per_bottle_dockerfile,
|
||||
per_bottle_dockerfile_path,
|
||||
per_bottle_image_tag,
|
||||
write_metadata,
|
||||
)
|
||||
from .pipelock import DockerPipelockProxy, pipelock_container_name
|
||||
from .supervise import DockerSupervise, supervise_container_name
|
||||
|
||||
@@ -48,9 +57,34 @@ def resolve_plan(
|
||||
agent = manifest.agents[spec.agent_name]
|
||||
bottle = manifest.bottle_for(spec.agent_name)
|
||||
|
||||
slug = docker_mod.slugify(spec.agent_name)
|
||||
# PRD 0016 follow-up: identity, not bare slug. A fresh `start`
|
||||
# mints a random-suffixed identity (so parallel runs of the same
|
||||
# agent in the same cwd don't collide on container/network
|
||||
# names); a `resume` passes the recorded identity in via
|
||||
# spec.identity to continue an existing bottle's state.
|
||||
slug = spec.identity or bottle_identity(spec.agent_name)
|
||||
# Record the launch metadata so `cli.py resume <identity>` can
|
||||
# reconstruct the spec. Idempotent — re-writes on resume with a
|
||||
# refreshed started_at.
|
||||
write_metadata(BottleMetadata(
|
||||
identity=slug,
|
||||
agent_name=spec.agent_name,
|
||||
cwd=spec.user_cwd if spec.copy_cwd else "",
|
||||
copy_cwd=spec.copy_cwd,
|
||||
started_at=datetime.now(timezone.utc).isoformat(),
|
||||
))
|
||||
|
||||
image = os.environ.get("CLAUDE_BOTTLE_IMAGE", "claude-bottle:latest")
|
||||
# PRD 0016 capability-block: if a per-bottle Dockerfile has been
|
||||
# written (via apply_capability_change), the base image becomes
|
||||
# per_bottle_image_tag(slug) built from that file. --cwd still
|
||||
# layers a derived image on top.
|
||||
dockerfile_path = ""
|
||||
if per_bottle_dockerfile(slug) is not None:
|
||||
image_default = per_bottle_image_tag(slug)
|
||||
dockerfile_path = str(per_bottle_dockerfile_path(slug))
|
||||
else:
|
||||
image_default = "claude-bottle:latest"
|
||||
image = os.environ.get("CLAUDE_BOTTLE_IMAGE", image_default)
|
||||
derived_image = ""
|
||||
runtime_image = image
|
||||
if spec.copy_cwd:
|
||||
@@ -184,6 +218,7 @@ def resolve_plan(
|
||||
image=image,
|
||||
derived_image=derived_image,
|
||||
runtime_image=runtime_image,
|
||||
dockerfile_path=dockerfile_path,
|
||||
env_file=env_file,
|
||||
forwarded_env=forwarded_env,
|
||||
prompt_file=prompt_file,
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
"""Main CLI dispatcher.
|
||||
|
||||
Commands: cleanup, dashboard, edit, info, init, list, start
|
||||
Commands: cleanup, dashboard, edit, info, init, list, resume, start
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -15,6 +15,7 @@ from .dashboard import cmd_dashboard
|
||||
from .edit import cmd_edit
|
||||
from .info import cmd_info
|
||||
from .init import cmd_init
|
||||
from .resume import cmd_resume
|
||||
from .start import cmd_start
|
||||
|
||||
cmd_list = _list_mod.cmd_list
|
||||
@@ -26,6 +27,7 @@ COMMANDS = {
|
||||
"info": cmd_info,
|
||||
"init": cmd_init,
|
||||
"list": cmd_list,
|
||||
"resume": cmd_resume,
|
||||
"start": cmd_start,
|
||||
}
|
||||
|
||||
@@ -39,6 +41,7 @@ def usage() -> None:
|
||||
sys.stderr.write(" info print env, skills, and prompt details for a named agent\n")
|
||||
sys.stderr.write(" init interactively create a new agent and add it to claude-bottle.json\n")
|
||||
sys.stderr.write(" list list available agents or active containers\n")
|
||||
sys.stderr.write(" resume re-launch a bottle by its identity (continues state from PRD 0016)\n")
|
||||
sys.stderr.write(" start boot a container for a named agent and attach an interactive session\n\n")
|
||||
sys.stderr.write(f"Run '{PROG} <command> --help' for command-specific usage.\n")
|
||||
|
||||
|
||||
@@ -22,6 +22,10 @@ from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
from .. import supervise as _supervise
|
||||
from ..backend.docker.capability_apply import (
|
||||
CapabilityApplyError,
|
||||
apply_capability_change,
|
||||
)
|
||||
from ..backend.docker.cred_proxy_apply import (
|
||||
CredProxyApplyError,
|
||||
apply_routes_change,
|
||||
@@ -45,6 +49,7 @@ from ..supervise import (
|
||||
TOOL_CAPABILITY_BLOCK,
|
||||
TOOL_CRED_PROXY_BLOCK,
|
||||
TOOL_PIPELOCK_BLOCK,
|
||||
archive_proposal,
|
||||
list_pending_proposals,
|
||||
render_diff,
|
||||
write_audit_entry,
|
||||
@@ -56,7 +61,7 @@ from ._common import PROG
|
||||
# Errors any remediation engine may raise. Caught by the TUI key
|
||||
# handlers and surfaced in the status line so a failed apply keeps
|
||||
# the proposal pending rather than crashing curses.
|
||||
ApplyError = (CredProxyApplyError, PipelockApplyError)
|
||||
ApplyError = (CredProxyApplyError, PipelockApplyError, CapabilityApplyError)
|
||||
|
||||
|
||||
# --- Discovery -------------------------------------------------------------
|
||||
@@ -107,6 +112,16 @@ def discover_pipelock_slugs() -> list[str]:
|
||||
return _discover_sidecar_slugs("claude-bottle-pipelock-")
|
||||
|
||||
|
||||
def _approval_status(qp: QueuedProposal, verb: str) -> str:
|
||||
"""Status-line text after a successful approval. For capability-
|
||||
block, append the `resume <identity>` hint so the operator can
|
||||
bring the rebuilt bottle back up with one copy-paste."""
|
||||
base = f"{verb} {qp.proposal.tool} for [{qp.proposal.bottle_slug}]"
|
||||
if qp.proposal.tool == TOOL_CAPABILITY_BLOCK:
|
||||
return f"{base}; resume: ./cli.py resume {qp.proposal.bottle_slug}"
|
||||
return base
|
||||
|
||||
|
||||
def discover_pending() -> list[QueuedProposal]:
|
||||
"""Walk ~/.claude-bottle/queue/* and collect pending proposals
|
||||
from every bottle's queue. Sorted by arrival time across the
|
||||
@@ -155,9 +170,10 @@ def approve(
|
||||
diff_before, diff_after = apply_allowlist_change(
|
||||
qp.proposal.bottle_slug, file_to_apply,
|
||||
)
|
||||
# capability-block remediation lands in PRD 0016; until then
|
||||
# it stays a no-op approval and the audit (none for capability)
|
||||
# is skipped.
|
||||
elif qp.proposal.tool == TOOL_CAPABILITY_BLOCK:
|
||||
diff_before, diff_after = apply_capability_change(
|
||||
qp.proposal.bottle_slug, file_to_apply,
|
||||
)
|
||||
|
||||
response = Response(
|
||||
proposal_id=qp.proposal.id,
|
||||
@@ -170,6 +186,12 @@ def approve(
|
||||
qp, action=status, notes=notes,
|
||||
diff_before=diff_before, diff_after=diff_after,
|
||||
)
|
||||
if qp.proposal.tool == TOOL_CAPABILITY_BLOCK:
|
||||
# The supervise sidecar was torn down by apply_capability_change,
|
||||
# so it can't archive its own proposal+response. Archive here so
|
||||
# dashboard.discover_pending stops surfacing the resolved
|
||||
# proposal forever.
|
||||
archive_proposal(qp.queue_dir, qp.proposal.id)
|
||||
|
||||
|
||||
def reject(qp: QueuedProposal, *, reason: str) -> None:
|
||||
@@ -359,7 +381,7 @@ def _main_loop(stdscr: "curses._CursesWindow") -> None:
|
||||
elif key == ord("a"):
|
||||
try:
|
||||
approve(qp)
|
||||
status_line = f"approved {qp.proposal.tool} for [{qp.proposal.bottle_slug}]"
|
||||
status_line = _approval_status(qp, "approved")
|
||||
except ApplyError as e:
|
||||
status_line = f"apply failed: {e}"
|
||||
elif key == ord("m"):
|
||||
@@ -369,7 +391,7 @@ def _main_loop(stdscr: "curses._CursesWindow") -> None:
|
||||
else:
|
||||
try:
|
||||
approve(qp, final_file=edited, notes="operator modified before approving")
|
||||
status_line = f"modified+approved {qp.proposal.tool} for [{qp.proposal.bottle_slug}]"
|
||||
status_line = _approval_status(qp, "modified+approved")
|
||||
except ApplyError as e:
|
||||
status_line = f"apply failed: {e}"
|
||||
elif key == ord("r"):
|
||||
|
||||
@@ -0,0 +1,66 @@
|
||||
"""resume: re-launch a bottle by its identity.
|
||||
|
||||
Reads ~/.claude-bottle/state/<identity>/metadata.json to recover the
|
||||
(agent_name, cwd, copy_cwd) the bottle was originally started with,
|
||||
then runs the same launch core as `start` — but pinned to the
|
||||
recorded identity so the new bottle picks up any per-bottle Dockerfile
|
||||
(from capability-block apply) and transcript snapshot under the same
|
||||
state dir.
|
||||
|
||||
Use case: an agent calls capability-block, the dashboard approves
|
||||
and tears down the bottle, the operator runs
|
||||
./cli.py resume <identity>
|
||||
to bring up the replacement with the new capabilities baked in.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
|
||||
from ..backend import BottleSpec
|
||||
from ..backend.docker.bottle_state import read_metadata
|
||||
from ..log import die
|
||||
from ..manifest import Manifest
|
||||
from ._common import PROG, USER_CWD
|
||||
from .start import _launch_bottle
|
||||
|
||||
|
||||
def cmd_resume(argv: list[str]) -> int:
|
||||
parser = argparse.ArgumentParser(prog=f"{PROG} resume", add_help=True)
|
||||
parser.add_argument("--dry-run", action="store_true")
|
||||
parser.add_argument("--remote-control", action="store_true")
|
||||
parser.add_argument(
|
||||
"--format",
|
||||
choices=("text", "json"),
|
||||
default="text",
|
||||
help="preflight output format; --format=json requires --dry-run",
|
||||
)
|
||||
parser.add_argument(
|
||||
"identity",
|
||||
help="bottle identity from a prior `start` (see its session-end output)",
|
||||
)
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
metadata = read_metadata(args.identity)
|
||||
if metadata is None:
|
||||
die(
|
||||
f"no state recorded for identity {args.identity!r}; "
|
||||
f"check ~/.claude-bottle/state/ or run `cli.py start` to create a new bottle"
|
||||
)
|
||||
|
||||
manifest = Manifest.resolve(USER_CWD)
|
||||
manifest.require_agent(metadata.agent_name)
|
||||
|
||||
spec = BottleSpec(
|
||||
manifest=manifest,
|
||||
agent_name=metadata.agent_name,
|
||||
copy_cwd=metadata.copy_cwd,
|
||||
user_cwd=metadata.cwd or USER_CWD,
|
||||
identity=metadata.identity,
|
||||
)
|
||||
return _launch_bottle(
|
||||
spec,
|
||||
dry_run=args.dry_run,
|
||||
output_format=args.format,
|
||||
remote_control=args.remote_control,
|
||||
)
|
||||
@@ -1,6 +1,10 @@
|
||||
"""start: boot a sandboxed container for a named agent and attach an
|
||||
interactive claude-code session. The container is torn down when the
|
||||
session ends."""
|
||||
session ends.
|
||||
|
||||
The launch core is shared with `cli.py resume <identity>`: see
|
||||
_launch_bottle below.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
@@ -43,18 +47,35 @@ def cmd_start(argv: list[str]) -> int:
|
||||
copy_cwd=args.cwd,
|
||||
user_cwd=USER_CWD,
|
||||
)
|
||||
return _launch_bottle(
|
||||
spec,
|
||||
dry_run=dry_run,
|
||||
output_format=args.format,
|
||||
remote_control=args.remote_control,
|
||||
)
|
||||
|
||||
|
||||
def _launch_bottle(
|
||||
spec: BottleSpec,
|
||||
*,
|
||||
dry_run: bool,
|
||||
output_format: str,
|
||||
remote_control: bool,
|
||||
) -> int:
|
||||
"""Shared launch core for `start` and `resume`. Builds the plan,
|
||||
prints / dry-runs / prompts as appropriate, brings the bottle up,
|
||||
attaches claude, and prints the resume hint on session end."""
|
||||
stage_dir = Path(tempfile.mkdtemp(prefix="claude-bottle-stage."))
|
||||
try:
|
||||
backend = get_bottle_backend()
|
||||
plan = backend.prepare(spec, stage_dir=stage_dir)
|
||||
|
||||
if args.format == "json":
|
||||
json.dump(plan.to_dict(remote_control=args.remote_control), sys.stdout, indent=2)
|
||||
if output_format == "json":
|
||||
json.dump(plan.to_dict(remote_control=remote_control), sys.stdout, indent=2)
|
||||
sys.stdout.write("\n")
|
||||
return 0
|
||||
|
||||
plan.print(remote_control=args.remote_control)
|
||||
plan.print(remote_control=remote_control)
|
||||
|
||||
if dry_run:
|
||||
info("dry-run requested; not starting container.")
|
||||
@@ -67,16 +88,27 @@ def cmd_start(argv: list[str]) -> int:
|
||||
info("aborted by user")
|
||||
return 0
|
||||
|
||||
identity = _identity_from_plan(plan)
|
||||
with backend.launch(plan) as bottle:
|
||||
info(
|
||||
"attaching interactive claude session "
|
||||
"(Ctrl-D or 'exit' to leave; container will be removed)"
|
||||
)
|
||||
claude_args = ["--dangerously-skip-permissions"]
|
||||
if args.remote_control:
|
||||
if remote_control:
|
||||
claude_args.append("--remote-control")
|
||||
bottle.exec_claude(claude_args, tty=True)
|
||||
info(f"session ended; container {bottle.name} will be removed")
|
||||
if identity:
|
||||
info(f"to resume this bottle: ./cli.py resume {identity}")
|
||||
return 0
|
||||
finally:
|
||||
shutil.rmtree(stage_dir, ignore_errors=True)
|
||||
|
||||
|
||||
def _identity_from_plan(plan: object) -> str:
|
||||
"""Backend-specific: the docker plan exposes the identity as
|
||||
`.slug`. Other backends in the future would expose their own
|
||||
identity attribute; for now we duck-type to keep this layer
|
||||
backend-agnostic."""
|
||||
return getattr(plan, "slug", "")
|
||||
|
||||
@@ -0,0 +1,70 @@
|
||||
# PRD 0016: capability block remediation
|
||||
|
||||
- **Status:** Draft
|
||||
- **Author:** didericis
|
||||
- **Created:** 2026-05-25
|
||||
- **Parent:** PRD 0012
|
||||
- **Depends on:** PRD 0013
|
||||
|
||||
## Summary
|
||||
|
||||
Wires the **capability block** path (PRD 0012 *Stuck categories*) end-to-end. On operator approval of a `capability-block` proposal, the rebuild orchestrator tears down the existing bottle, builds from the new Dockerfile, and starts a replacement bottle on the same branch via the state-preservation helper. The replacement agent picks up where the original left off, now with the missing capability. Heaviest of the three remediation PRDs because the orchestrator and state-preservation helper are non-trivial.
|
||||
|
||||
## Problem
|
||||
|
||||
See PRD 0012. This PRD specifically addresses: with 0013 in place, the operator can approve a `capability-block` proposal but nothing happens — the bottle is not rebuilt, the agent stays stuck. This PRD closes the loop. Unlike 0014 and 0015, the remediation requires container teardown + rebuild + state hand-off, so the design surface is larger.
|
||||
|
||||
## Goals / Success Criteria
|
||||
|
||||
A real capability block recovers end-to-end: the agent's invocation of a tool / command / skill fails (not found, permission denied), the agent calls `capability-block` with a proposed Dockerfile and justification, the operator approves in the TUI, the orchestrator tears down the bottle and starts a replacement built from the new Dockerfile, the replacement agent inherits the working tree and best-effort transcript and continues on the same branch.
|
||||
|
||||
## Non-goals
|
||||
|
||||
- Live mutation of the running container (re-stated from PRD 0012 non-goals).
|
||||
- Forking into multiple parallel rebuilt bottles. One-for-one replacement only.
|
||||
- cred-proxy or pipelock handling (covered by 0014 and 0015).
|
||||
|
||||
## Scope
|
||||
|
||||
### In scope
|
||||
|
||||
- A rebuild orchestrator that, on operator approval, tears down the existing bottle, builds from the approved Dockerfile, and starts a replacement on the same branch.
|
||||
- A state-preservation helper that handles the hand-off across the rebuild: working tree push is mandatory; transcript / reasoning context is best-effort.
|
||||
- `capability-block` approval handler in the MCP sidecar (replacing the 0013 no-op): on approval, hand off to the orchestrator.
|
||||
- Bottle lifecycle script changes for orchestrated teardown + rebuild (distinct from a fresh-spawn).
|
||||
- Bottle manifest schema changes: record originating manifest version / change history per agent run so the dashboard can show "what changed" rather than "what is."
|
||||
- A per-agent-run record that maps a running bottle back to its PR / branch, so the orchestrator knows which branch to resume on.
|
||||
|
||||
### Out of scope
|
||||
|
||||
- Rolling back a rebuild that the replacement agent regrets. The audit trail (git history + bottle rebuild record) shows what changed; a follow-up `capability-block` proposal can revert.
|
||||
|
||||
## Proposed Design
|
||||
|
||||
### New services / components
|
||||
|
||||
- **Rebuild orchestrator.** On approval, tears down the existing bottle, builds from the new Dockerfile, snapshots state via the state-preservation helper, and starts a fresh bottle on the same branch.
|
||||
- **State-preservation helper.** Mandatory: ensures the working tree is pushed before teardown. Best-effort: carries forward the agent's transcript / reasoning context — including the approved `capability-block` proposal — into the replacement container so the new agent starts warm rather than cold.
|
||||
|
||||
### Existing code touched
|
||||
|
||||
- **MCP sidecar** (PRD 0013) — the `capability-block` approval handler stops being a no-op; on approval, hands off to the rebuild orchestrator.
|
||||
- **Bottle lifecycle scripts** — extended for orchestrated teardown + rebuild with state hand-off, distinct from a fresh-spawn.
|
||||
- **Bottle manifest schema** — records the originating manifest version / change history per agent run.
|
||||
- **`cli.py`** — gains the rebuild path.
|
||||
|
||||
### Data model changes
|
||||
|
||||
- A per-agent-run record sufficient to map a running bottle back to its PR / branch.
|
||||
|
||||
## Open questions
|
||||
|
||||
- **`capability-block` return semantics.** The current agent is torn down on approval, so the tool's return value never reaches it. Options: (a) fire-and-forget, the tool returns immediately with "queued" and the agent halts; (b) block the tool, let the rebuild orchestrator's teardown kill the connection, the replacement agent gets the approval record via state-preservation; (c) the tool blocks, returns "approved" right before teardown, the agent has milliseconds to log it. (b) seems cleanest but is worth confirming during implementation.
|
||||
- **Best-effort transcript preservation.** Mount the agent's state directory, snapshot on teardown, remount in the replacement? How much fidelity is "good enough" for the new agent to pick up?
|
||||
- **Bottle → PR/branch mapping.** Recorded at bottle-spawn time, derived from the working tree, or specified in the manifest?
|
||||
- **Rejection semantics.** Does the agent receive a tool reply explaining the rejection, or does the bottle just stay torn down?
|
||||
|
||||
## References
|
||||
|
||||
- PRD 0012 — stuck-agent recovery flow overview.
|
||||
- PRD 0013 — supervise plane foundation (prerequisite).
|
||||
@@ -0,0 +1,217 @@
|
||||
"""Integration: drive `apply_capability_change` against a real
|
||||
container that mimics the agent's name + filesystem layout (PRD 0016).
|
||||
|
||||
The real `cli.py start <agent>` flow is too heavy for an integration
|
||||
test (it builds the agent image, brings up all the sidecars, attaches
|
||||
an interactive claude session). Instead, this test stages the
|
||||
minimum the orchestrator interacts with:
|
||||
|
||||
- A lightweight `alpine:latest sleep infinity` container named
|
||||
`claude-bottle-<slug>` (matches the agent container name pattern)
|
||||
on the per-bottle internal network.
|
||||
- A marker file under `/home/node/.claude/` so we can assert the
|
||||
transcript snapshot path actually transferred bytes.
|
||||
|
||||
Then `apply_capability_change` runs and we verify:
|
||||
- Per-bottle Dockerfile written.
|
||||
- Containers + networks removed.
|
||||
- Transcript snapshot dir on the host has the marker file.
|
||||
|
||||
docker exec / cp / rm work across the docker socket boundary, so
|
||||
this test runs in DinD too — no act_runner skip needed.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
import time
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
from claude_bottle import supervise
|
||||
from claude_bottle.backend.docker import bottle_state, capability_apply
|
||||
from claude_bottle.backend.docker.capability_apply import apply_capability_change
|
||||
from claude_bottle.backend.docker.network import (
|
||||
network_create_egress,
|
||||
network_create_internal,
|
||||
network_remove,
|
||||
)
|
||||
from tests._docker import skip_unless_docker
|
||||
|
||||
|
||||
ALPINE_IMAGE = "alpine:latest"
|
||||
|
||||
|
||||
@skip_unless_docker()
|
||||
class TestCapabilityApply(unittest.TestCase):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
r = subprocess.run(
|
||||
["docker", "pull", ALPINE_IMAGE],
|
||||
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False,
|
||||
)
|
||||
if r.returncode != 0:
|
||||
raise unittest.SkipTest(f"could not pull {ALPINE_IMAGE}")
|
||||
|
||||
def setUp(self):
|
||||
self.slug = f"cb-test-cap-{os.getpid()}-{int(time.time())}"
|
||||
self.agent_name = f"claude-bottle-{self.slug}"
|
||||
self.sidecar_names: list[str] = []
|
||||
self.internal_net = ""
|
||||
self.egress_net = ""
|
||||
# Fake home so tests don't touch ~/.claude-bottle/.
|
||||
self._tmp = tempfile.TemporaryDirectory(prefix="cap-apply-int.")
|
||||
self._original_root = supervise.claude_bottle_root
|
||||
|
||||
def fake_root() -> Path:
|
||||
return Path(self._tmp.name) / ".claude-bottle"
|
||||
|
||||
supervise.claude_bottle_root = fake_root # type: ignore[assignment]
|
||||
|
||||
def tearDown(self):
|
||||
supervise.claude_bottle_root = self._original_root # type: ignore[assignment]
|
||||
for name in [self.agent_name, *self.sidecar_names]:
|
||||
subprocess.run(
|
||||
["docker", "rm", "-f", name],
|
||||
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False,
|
||||
)
|
||||
for n in (self.internal_net, self.egress_net):
|
||||
if n:
|
||||
network_remove(n)
|
||||
self._tmp.cleanup()
|
||||
|
||||
def _bring_up_fake_bottle(self) -> None:
|
||||
self.internal_net = network_create_internal(self.slug)
|
||||
self.egress_net = network_create_egress(self.slug)
|
||||
# Agent container with the canonical name.
|
||||
r = subprocess.run(
|
||||
[
|
||||
"docker", "run", "-d",
|
||||
"--name", self.agent_name,
|
||||
"--network", self.internal_net,
|
||||
ALPINE_IMAGE,
|
||||
"sh", "-c",
|
||||
"mkdir -p /home/node/.claude && "
|
||||
"echo 'transcript-marker' > /home/node/.claude/sessions.json && "
|
||||
"sleep 3600",
|
||||
],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
self.assertEqual(0, r.returncode, r.stderr)
|
||||
# Also start a fake supervise sidecar so teardown has something
|
||||
# extra to clean up (mirrors a real bottle's container set).
|
||||
sidecar = f"claude-bottle-supervise-{self.slug}"
|
||||
subprocess.run(
|
||||
[
|
||||
"docker", "run", "-d",
|
||||
"--name", sidecar,
|
||||
"--network", self.internal_net,
|
||||
ALPINE_IMAGE, "sleep", "3600",
|
||||
],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
self.sidecar_names.append(sidecar)
|
||||
|
||||
def _containers_named_like(self) -> list[str]:
|
||||
"""All running/stopped containers whose names start with
|
||||
the bottle's slug — both agent + sidecars."""
|
||||
r = subprocess.run(
|
||||
[
|
||||
"docker", "ps", "-a",
|
||||
"--filter", f"name={self.agent_name}",
|
||||
"--format", "{{.Names}}",
|
||||
],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
return [line for line in (r.stdout or "").splitlines() if line]
|
||||
|
||||
def _networks_named_like(self) -> list[str]:
|
||||
r = subprocess.run(
|
||||
[
|
||||
"docker", "network", "ls",
|
||||
"--filter", f"name={self.slug}",
|
||||
"--format", "{{.Name}}",
|
||||
],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
return [line for line in (r.stdout or "").splitlines() if line]
|
||||
|
||||
def test_apply_writes_dockerfile_and_tears_down(self):
|
||||
self._bring_up_fake_bottle()
|
||||
self.assertIn(self.agent_name, self._containers_named_like())
|
||||
|
||||
new_dockerfile = "FROM python:3.13\nRUN apk add ripgrep\n"
|
||||
before, after = apply_capability_change(self.slug, new_dockerfile)
|
||||
|
||||
# Before is the repo Dockerfile (no prior per-bottle override);
|
||||
# after is what we passed in.
|
||||
self.assertIn("FROM ", before)
|
||||
self.assertEqual(new_dockerfile, after)
|
||||
|
||||
# Per-bottle Dockerfile written on the host.
|
||||
self.assertEqual(
|
||||
new_dockerfile,
|
||||
bottle_state.per_bottle_dockerfile(self.slug),
|
||||
)
|
||||
|
||||
# Agent + sidecars gone.
|
||||
self.assertEqual([], self._containers_named_like())
|
||||
# Networks removed (matching the slug substring).
|
||||
nets = self._networks_named_like()
|
||||
self.assertEqual([], nets)
|
||||
# Mark them as already cleaned so tearDown is idempotent.
|
||||
self.internal_net = ""
|
||||
self.egress_net = ""
|
||||
self.sidecar_names = []
|
||||
|
||||
def test_transcript_snapshot_captured(self):
|
||||
self._bring_up_fake_bottle()
|
||||
apply_capability_change(self.slug, "FROM x\n")
|
||||
snap = bottle_state.transcript_snapshot_dir(self.slug)
|
||||
self.assertTrue(snap.is_dir(), f"transcript snapshot dir {snap} missing")
|
||||
# docker cp <container>:/home/node/.claude <dst> produces
|
||||
# <dst>/.claude/sessions.json (it preserves the source dir name
|
||||
# inside the destination if the destination already exists).
|
||||
# Walk the snapshot looking for the marker contents.
|
||||
marker_found = False
|
||||
for path in snap.rglob("sessions.json"):
|
||||
if "transcript-marker" in path.read_text():
|
||||
marker_found = True
|
||||
break
|
||||
self.assertTrue(marker_found, f"marker not found under {snap}")
|
||||
# Cleaned up by apply already.
|
||||
self.internal_net = ""
|
||||
self.egress_net = ""
|
||||
self.sidecar_names = []
|
||||
|
||||
def test_subsequent_apply_uses_per_bottle_dockerfile_for_before(self):
|
||||
# First change: before is repo's Dockerfile.
|
||||
self._bring_up_fake_bottle()
|
||||
first_before, _ = apply_capability_change(self.slug, "FROM v1\n")
|
||||
self.assertIn("FROM ", first_before)
|
||||
|
||||
# Second change: before is "FROM v1\n" (the per-bottle override
|
||||
# from the first change), proving the state persists across
|
||||
# rebuilds.
|
||||
self._bring_up_fake_bottle()
|
||||
second_before, second_after = apply_capability_change(self.slug, "FROM v2\n")
|
||||
self.assertEqual("FROM v1\n", second_before)
|
||||
self.assertEqual("FROM v2\n", second_after)
|
||||
self.internal_net = ""
|
||||
self.egress_net = ""
|
||||
self.sidecar_names = []
|
||||
|
||||
def test_teardown_idempotent_when_nothing_running(self):
|
||||
# No bottle ever brought up — teardown still doesn't raise.
|
||||
apply_capability_change(self.slug, "FROM x\n")
|
||||
self.assertEqual(
|
||||
"FROM x\n",
|
||||
bottle_state.per_bottle_dockerfile(self.slug),
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,166 @@
|
||||
"""Unit: per-bottle state helpers (PRD 0016 Phase 1) + identity +
|
||||
launch metadata."""
|
||||
|
||||
import re
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
from claude_bottle import supervise
|
||||
from claude_bottle.backend.docker import bottle_state
|
||||
from claude_bottle.backend.docker.bottle_state import (
|
||||
BottleMetadata,
|
||||
read_metadata,
|
||||
write_metadata,
|
||||
)
|
||||
|
||||
|
||||
class _FakeHomeMixin:
|
||||
def _setup_fake_home(self):
|
||||
self._tmp = tempfile.TemporaryDirectory(prefix="bottle-state-test.")
|
||||
original = supervise.claude_bottle_root
|
||||
|
||||
def fake_root() -> Path:
|
||||
return Path(self._tmp.name) / ".claude-bottle"
|
||||
|
||||
supervise.claude_bottle_root = fake_root # type: ignore[assignment]
|
||||
self._restore = lambda: setattr(supervise, "claude_bottle_root", original)
|
||||
|
||||
def _teardown_fake_home(self):
|
||||
self._restore()
|
||||
self._tmp.cleanup()
|
||||
|
||||
|
||||
class TestPerBottleDockerfile(_FakeHomeMixin, unittest.TestCase):
|
||||
def setUp(self):
|
||||
self._setup_fake_home()
|
||||
|
||||
def tearDown(self):
|
||||
self._teardown_fake_home()
|
||||
|
||||
def test_returns_none_when_absent(self):
|
||||
self.assertIsNone(bottle_state.per_bottle_dockerfile("dev"))
|
||||
|
||||
def test_write_then_read_roundtrip(self):
|
||||
bottle_state.write_per_bottle_dockerfile(
|
||||
"dev", "FROM python:3.13\nRUN apk add ripgrep\n",
|
||||
)
|
||||
self.assertEqual(
|
||||
"FROM python:3.13\nRUN apk add ripgrep\n",
|
||||
bottle_state.per_bottle_dockerfile("dev"),
|
||||
)
|
||||
|
||||
def test_isolated_per_slug(self):
|
||||
bottle_state.write_per_bottle_dockerfile("dev", "FROM dev\n")
|
||||
bottle_state.write_per_bottle_dockerfile("api", "FROM api\n")
|
||||
self.assertEqual("FROM dev\n", bottle_state.per_bottle_dockerfile("dev"))
|
||||
self.assertEqual("FROM api\n", bottle_state.per_bottle_dockerfile("api"))
|
||||
|
||||
def test_dockerfile_path_under_state_dir(self):
|
||||
path = bottle_state.per_bottle_dockerfile_path("dev")
|
||||
self.assertTrue(str(path).endswith("/.claude-bottle/state/dev/Dockerfile"))
|
||||
|
||||
def test_image_tag_unique_per_slug(self):
|
||||
self.assertEqual(
|
||||
"claude-bottle-rebuilt-dev:latest",
|
||||
bottle_state.per_bottle_image_tag("dev"),
|
||||
)
|
||||
self.assertNotEqual(
|
||||
bottle_state.per_bottle_image_tag("dev"),
|
||||
bottle_state.per_bottle_image_tag("api"),
|
||||
)
|
||||
|
||||
def test_transcript_dir_under_state_dir(self):
|
||||
path = bottle_state.transcript_snapshot_dir("dev")
|
||||
self.assertTrue(str(path).endswith("/.claude-bottle/state/dev/transcript"))
|
||||
|
||||
|
||||
class TestBottleIdentity(unittest.TestCase):
|
||||
"""bottle_identity(agent_name) — PRD 0016 follow-up.
|
||||
|
||||
Every call mints a fresh identity with a random 5-char suffix
|
||||
so multiple instances of the same agent can run in parallel
|
||||
without container name collisions. The slug-prefix is for
|
||||
readability; the suffix is for uniqueness. To continue an
|
||||
existing bottle, use the recorded identity via
|
||||
`cli.py resume <identity>`, not this function."""
|
||||
|
||||
def test_format_is_slug_dash_5_alnum(self):
|
||||
identity = bottle_state.bottle_identity("dev")
|
||||
self.assertTrue(identity.startswith("dev-"))
|
||||
suffix = identity[len("dev-"):]
|
||||
self.assertEqual(5, len(suffix))
|
||||
self.assertTrue(
|
||||
re.fullmatch(r"[a-z0-9]+", suffix),
|
||||
f"suffix {suffix!r} must be lowercase base36",
|
||||
)
|
||||
|
||||
def test_two_calls_yield_different_identities(self):
|
||||
# 5-char base36 gives ~60M combinations; collision in two
|
||||
# calls is astronomically unlikely. If this ever flakes it's
|
||||
# almost certainly a regression, not a bad-luck collision.
|
||||
a = bottle_state.bottle_identity("dev")
|
||||
b = bottle_state.bottle_identity("dev")
|
||||
self.assertNotEqual(a, b)
|
||||
|
||||
def test_different_agents_get_different_prefixes(self):
|
||||
a = bottle_state.bottle_identity("dev")
|
||||
b = bottle_state.bottle_identity("api")
|
||||
self.assertTrue(a.startswith("dev-"))
|
||||
self.assertTrue(b.startswith("api-"))
|
||||
|
||||
def test_agent_name_slugified(self):
|
||||
identity = bottle_state.bottle_identity("My Agent")
|
||||
self.assertTrue(identity.startswith("my-agent-"))
|
||||
|
||||
|
||||
class TestBottleMetadata(_FakeHomeMixin, unittest.TestCase):
|
||||
def setUp(self):
|
||||
self._setup_fake_home()
|
||||
|
||||
def tearDown(self):
|
||||
self._teardown_fake_home()
|
||||
|
||||
def test_read_missing_returns_none(self):
|
||||
self.assertIsNone(read_metadata("does-not-exist"))
|
||||
|
||||
def test_write_then_read_roundtrip(self):
|
||||
meta = BottleMetadata(
|
||||
identity="dev-a4f8c",
|
||||
agent_name="dev",
|
||||
cwd="/proj/A",
|
||||
copy_cwd=True,
|
||||
started_at="2026-05-25T12:00:00+00:00",
|
||||
)
|
||||
write_metadata(meta)
|
||||
loaded = read_metadata("dev-a4f8c")
|
||||
self.assertEqual(meta, loaded)
|
||||
|
||||
def test_metadata_lives_under_state_dir(self):
|
||||
meta = BottleMetadata(
|
||||
identity="dev-x", agent_name="dev",
|
||||
cwd="", copy_cwd=False, started_at="t",
|
||||
)
|
||||
path = write_metadata(meta)
|
||||
self.assertTrue(
|
||||
str(path).endswith("/.claude-bottle/state/dev-x/metadata.json"),
|
||||
)
|
||||
|
||||
def test_overwriting_metadata_updates_timestamp(self):
|
||||
# `resume` re-writes metadata with a fresh started_at;
|
||||
# everything else stays the same.
|
||||
write_metadata(BottleMetadata(
|
||||
identity="dev-y", agent_name="dev",
|
||||
cwd="/proj/A", copy_cwd=True, started_at="t1",
|
||||
))
|
||||
write_metadata(BottleMetadata(
|
||||
identity="dev-y", agent_name="dev",
|
||||
cwd="/proj/A", copy_cwd=True, started_at="t2",
|
||||
))
|
||||
loaded = read_metadata("dev-y")
|
||||
assert loaded is not None
|
||||
self.assertEqual("t2", loaded.started_at)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,123 @@
|
||||
"""Unit: capability_apply helpers (PRD 0016 Phase 2).
|
||||
|
||||
docker cp / exec / rm / network rm paths are covered by the
|
||||
integration test in Phase 4. Here we cover:
|
||||
- fetch_current_dockerfile fallback chain (per-bottle → repo)
|
||||
- apply_capability_change writes the per-bottle Dockerfile and
|
||||
returns the correct (before, after).
|
||||
- apply_capability_change rejects empty input.
|
||||
"""
|
||||
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
from claude_bottle import supervise
|
||||
from claude_bottle.backend.docker import bottle_state, capability_apply
|
||||
from claude_bottle.backend.docker.capability_apply import (
|
||||
CapabilityApplyError,
|
||||
apply_capability_change,
|
||||
fetch_current_dockerfile,
|
||||
)
|
||||
|
||||
|
||||
class _FakeHomeMixin:
|
||||
def _setup_fake_home(self):
|
||||
self._tmp = tempfile.TemporaryDirectory(prefix="cap-apply-test.")
|
||||
original = supervise.claude_bottle_root
|
||||
|
||||
def fake_root() -> Path:
|
||||
return Path(self._tmp.name) / ".claude-bottle"
|
||||
|
||||
supervise.claude_bottle_root = fake_root # type: ignore[assignment]
|
||||
self._restore = lambda: setattr(supervise, "claude_bottle_root", original)
|
||||
|
||||
def _teardown_fake_home(self):
|
||||
self._restore()
|
||||
self._tmp.cleanup()
|
||||
|
||||
|
||||
class TestFetchCurrentDockerfile(_FakeHomeMixin, unittest.TestCase):
|
||||
def setUp(self):
|
||||
self._setup_fake_home()
|
||||
|
||||
def tearDown(self):
|
||||
self._teardown_fake_home()
|
||||
|
||||
def test_returns_per_bottle_dockerfile_when_present(self):
|
||||
bottle_state.write_per_bottle_dockerfile("dev", "FROM rebuilt\n")
|
||||
self.assertEqual("FROM rebuilt\n", fetch_current_dockerfile("dev"))
|
||||
|
||||
def test_falls_back_to_repo_dockerfile_when_no_override(self):
|
||||
# The repo's Dockerfile actually exists; the test just checks
|
||||
# we get its content (non-empty) when no per-bottle override
|
||||
# is set.
|
||||
content = fetch_current_dockerfile("dev-no-override")
|
||||
self.assertIn("FROM ", content)
|
||||
|
||||
|
||||
class TestApplyCapabilityChange(_FakeHomeMixin, unittest.TestCase):
|
||||
def setUp(self):
|
||||
self._setup_fake_home()
|
||||
# Stub out the docker-dependent helpers. The orchestrator's
|
||||
# job is to sequence write + snapshot + push + teardown; we
|
||||
# validate that sequence here, not the docker primitives.
|
||||
self._calls: list[str] = []
|
||||
self._orig_snapshot = capability_apply._snapshot_transcript
|
||||
self._orig_push = capability_apply._push_working_tree
|
||||
self._orig_teardown = capability_apply._teardown_bottle
|
||||
|
||||
def stub_snapshot(slug):
|
||||
self._calls.append(f"snapshot:{slug}")
|
||||
|
||||
def stub_push(slug):
|
||||
self._calls.append(f"push:{slug}")
|
||||
|
||||
def stub_teardown(slug):
|
||||
self._calls.append(f"teardown:{slug}")
|
||||
|
||||
capability_apply._snapshot_transcript = stub_snapshot # type: ignore[assignment]
|
||||
capability_apply._push_working_tree = stub_push # type: ignore[assignment]
|
||||
capability_apply._teardown_bottle = stub_teardown # type: ignore[assignment]
|
||||
|
||||
def tearDown(self):
|
||||
capability_apply._snapshot_transcript = self._orig_snapshot # type: ignore[assignment]
|
||||
capability_apply._push_working_tree = self._orig_push # type: ignore[assignment]
|
||||
capability_apply._teardown_bottle = self._orig_teardown # type: ignore[assignment]
|
||||
self._teardown_fake_home()
|
||||
|
||||
def test_writes_per_bottle_dockerfile_and_returns_before_after(self):
|
||||
bottle_state.write_per_bottle_dockerfile("dev", "FROM old\n")
|
||||
before, after = apply_capability_change("dev", "FROM new\nRUN apk add ripgrep\n")
|
||||
self.assertEqual("FROM old\n", before)
|
||||
self.assertEqual("FROM new\nRUN apk add ripgrep\n", after)
|
||||
self.assertEqual(
|
||||
"FROM new\nRUN apk add ripgrep\n",
|
||||
bottle_state.per_bottle_dockerfile("dev"),
|
||||
)
|
||||
|
||||
def test_calls_snapshot_push_teardown_in_order(self):
|
||||
apply_capability_change("dev", "FROM new\n")
|
||||
# Snapshot + push must happen BEFORE write_per_bottle_dockerfile
|
||||
# (so they capture pre-rebuild state) and BEFORE teardown (so
|
||||
# the agent container still exists to docker exec / cp from).
|
||||
# Teardown must be last.
|
||||
self.assertEqual(
|
||||
["snapshot:dev", "push:dev", "teardown:dev"],
|
||||
self._calls,
|
||||
)
|
||||
|
||||
def test_first_change_falls_back_to_repo_dockerfile_for_before(self):
|
||||
# No per-bottle override yet — before-diff comes from the
|
||||
# repo's Dockerfile.
|
||||
before, after = apply_capability_change("dev-fresh", "FROM new\n")
|
||||
self.assertIn("FROM ", before)
|
||||
self.assertEqual("FROM new\n", after)
|
||||
|
||||
def test_empty_dockerfile_rejected(self):
|
||||
with self.assertRaises(CapabilityApplyError):
|
||||
apply_capability_change("dev", " \n\t\n")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -16,6 +16,7 @@ from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
from claude_bottle import supervise
|
||||
from claude_bottle.backend.docker.capability_apply import CapabilityApplyError
|
||||
from claude_bottle.backend.docker.cred_proxy_apply import CredProxyApplyError
|
||||
from claude_bottle.backend.docker.pipelock_apply import PipelockApplyError
|
||||
from claude_bottle.cli import dashboard
|
||||
@@ -118,6 +119,7 @@ class TestApproveReject(_FakeHomeMixin, unittest.TestCase):
|
||||
self._setup_fake_home()
|
||||
self._original_apply_routes = dashboard.apply_routes_change
|
||||
self._original_apply_allowlist = dashboard.apply_allowlist_change
|
||||
self._original_apply_capability = dashboard.apply_capability_change
|
||||
# Default stubs: succeed with deterministic before/after so the
|
||||
# audit log shows a non-empty diff.
|
||||
dashboard.apply_routes_change = lambda slug, content: (
|
||||
@@ -126,10 +128,14 @@ class TestApproveReject(_FakeHomeMixin, unittest.TestCase):
|
||||
dashboard.apply_allowlist_change = lambda slug, content: (
|
||||
"old.example\n", content,
|
||||
)
|
||||
dashboard.apply_capability_change = lambda slug, content: (
|
||||
"FROM old\n", content,
|
||||
)
|
||||
|
||||
def tearDown(self):
|
||||
dashboard.apply_routes_change = self._original_apply_routes
|
||||
dashboard.apply_allowlist_change = self._original_apply_allowlist
|
||||
dashboard.apply_capability_change = self._original_apply_capability
|
||||
self._teardown_fake_home()
|
||||
|
||||
def _enqueue(self, tool: str = TOOL_CRED_PROXY_BLOCK):
|
||||
@@ -333,6 +339,75 @@ class TestPipelockApplyWiring(_FakeHomeMixin, unittest.TestCase):
|
||||
self.assertIn("+new.example", entries[0].diff)
|
||||
|
||||
|
||||
class TestCapabilityApplyWiring(_FakeHomeMixin, unittest.TestCase):
|
||||
"""PRD 0016 Phase 3: approve() on a capability-block proposal
|
||||
calls apply_capability_change, archives the proposal afterward
|
||||
(sidecar is gone so it can't archive itself), and writes no
|
||||
audit entry (capability-block has none per PRD 0013)."""
|
||||
|
||||
def setUp(self):
|
||||
self._setup_fake_home()
|
||||
self._original = dashboard.apply_capability_change
|
||||
|
||||
def tearDown(self):
|
||||
dashboard.apply_capability_change = self._original
|
||||
self._teardown_fake_home()
|
||||
|
||||
def _enqueue_capability(self, proposed: str = "FROM python:3.13\nRUN apk add ripgrep\n"):
|
||||
p = Proposal.new(
|
||||
bottle_slug="dev", tool=TOOL_CAPABILITY_BLOCK,
|
||||
proposed_file=proposed,
|
||||
justification="need ripgrep",
|
||||
current_file_hash=sha256_hex(proposed),
|
||||
now=FIXED,
|
||||
)
|
||||
qdir = supervise.queue_dir_for_slug("dev")
|
||||
qdir.mkdir(parents=True, exist_ok=True)
|
||||
supervise.write_proposal(qdir, p)
|
||||
return dashboard.QueuedProposal(proposal=p, queue_dir=qdir)
|
||||
|
||||
def test_capability_block_calls_apply_with_proposed_file(self):
|
||||
calls = []
|
||||
dashboard.apply_capability_change = lambda slug, content: (
|
||||
calls.append((slug, content)) or ("FROM old\n", content)
|
||||
)
|
||||
qp = self._enqueue_capability("FROM bookworm\n")
|
||||
dashboard.approve(qp)
|
||||
self.assertEqual([("dev", "FROM bookworm\n")], calls)
|
||||
|
||||
def test_apply_failure_blocks_response_and_keeps_pending(self):
|
||||
dashboard.apply_capability_change = lambda slug, content: (_ for _ in ()).throw(
|
||||
CapabilityApplyError("teardown failed")
|
||||
)
|
||||
qp = self._enqueue_capability()
|
||||
with self.assertRaises(CapabilityApplyError):
|
||||
dashboard.approve(qp)
|
||||
self.assertEqual(
|
||||
[qp.proposal.id],
|
||||
[p.id for p in supervise.list_pending_proposals(qp.queue_dir)],
|
||||
)
|
||||
|
||||
def test_no_audit_log_for_capability(self):
|
||||
dashboard.apply_capability_change = lambda slug, content: ("FROM old\n", content)
|
||||
qp = self._enqueue_capability()
|
||||
dashboard.approve(qp)
|
||||
# capability-block has no audit log per PRD 0013 — its record
|
||||
# lives in the per-bottle Dockerfile + transcript state.
|
||||
self.assertEqual([], read_audit_entries("cred-proxy", "dev"))
|
||||
self.assertEqual([], read_audit_entries("pipelock", "dev"))
|
||||
|
||||
def test_proposal_archived_after_apply(self):
|
||||
dashboard.apply_capability_change = lambda slug, content: ("FROM old\n", content)
|
||||
qp = self._enqueue_capability()
|
||||
dashboard.approve(qp)
|
||||
# Sidecar would normally archive after delivering the response,
|
||||
# but it's gone by then. The dashboard archives so
|
||||
# discover_pending stops surfacing the resolved proposal.
|
||||
self.assertEqual([], supervise.list_pending_proposals(qp.queue_dir))
|
||||
processed = list((qp.queue_dir / "processed").glob("*.json"))
|
||||
self.assertEqual(2, len(processed))
|
||||
|
||||
|
||||
class TestOperatorEditRoutes(_FakeHomeMixin, unittest.TestCase):
|
||||
"""PRD 0014 Phase 4: operator-initiated routes edit (not gated
|
||||
on a pending proposal)."""
|
||||
|
||||
Reference in New Issue
Block a user