Merge pull request 'PRD 0016: capability block remediation' (#22) from prd-0016-capability-block into main
This commit was merged in pull request #22.
This commit is contained in:
@@ -53,6 +53,11 @@ class BottleSpec:
|
|||||||
agent_name: str
|
agent_name: str
|
||||||
copy_cwd: bool
|
copy_cwd: bool
|
||||||
user_cwd: str
|
user_cwd: str
|
||||||
|
# PRD 0016 follow-up: when set, the backend's prepare step uses
|
||||||
|
# this identity instead of minting a fresh one — the resume path
|
||||||
|
# (`cli.py resume <identity>`) sets this to continue an existing
|
||||||
|
# bottle's state. Empty string for a fresh `start`.
|
||||||
|
identity: str = ""
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
|
|||||||
@@ -44,6 +44,11 @@ class DockerBottlePlan(BottlePlan):
|
|||||||
image: str
|
image: str
|
||||||
derived_image: str # "" -> no derived image
|
derived_image: str # "" -> no derived image
|
||||||
runtime_image: str # image actually launched (derived or base)
|
runtime_image: str # image actually launched (derived or base)
|
||||||
|
# Absolute path to the Dockerfile that builds `image`. Empty means
|
||||||
|
# use the repo's default Dockerfile. Populated to a per-bottle
|
||||||
|
# state file (~/.claude-bottle/state/<slug>/Dockerfile) after a
|
||||||
|
# capability-block remediation (PRD 0016).
|
||||||
|
dockerfile_path: str
|
||||||
env_file: Path # docker --env-file: NAME=VALUE literals
|
env_file: Path # docker --env-file: NAME=VALUE literals
|
||||||
# name -> value for vars forwarded into the docker-run child process
|
# name -> value for vars forwarded into the docker-run child process
|
||||||
# via subprocess env (so values never land on argv or in a file).
|
# via subprocess env (so values never land on argv or in a file).
|
||||||
@@ -89,6 +94,11 @@ class DockerBottlePlan(BottlePlan):
|
|||||||
print(file=sys.stderr)
|
print(file=sys.stderr)
|
||||||
info(f"agent : {spec.agent_name}")
|
info(f"agent : {spec.agent_name}")
|
||||||
info(f"image : {self.image}")
|
info(f"image : {self.image}")
|
||||||
|
if self.dockerfile_path:
|
||||||
|
info(
|
||||||
|
f"dockerfile : {self.dockerfile_path} "
|
||||||
|
f"(per-bottle override from PRD 0016 capability rebuild)"
|
||||||
|
)
|
||||||
if self.derived_image:
|
if self.derived_image:
|
||||||
info(
|
info(
|
||||||
f"cwd : {spec.user_cwd} -> /home/node/workspace "
|
f"cwd : {spec.user_cwd} -> /home/node/workspace "
|
||||||
|
|||||||
@@ -0,0 +1,170 @@
|
|||||||
|
"""Per-bottle persistent state (PRD 0016).
|
||||||
|
|
||||||
|
Holds the per-bottle Dockerfile override that capability-block
|
||||||
|
remediation writes, the transcript snapshot the state-preservation
|
||||||
|
helper saves before teardown, and the launch metadata that lets
|
||||||
|
`cli.py resume <identity>` reconstruct a bottle's spec. State
|
||||||
|
lives at:
|
||||||
|
|
||||||
|
~/.claude-bottle/state/<identity>/
|
||||||
|
metadata.json — agent_name + cwd + started_at (for resume)
|
||||||
|
Dockerfile — per-bottle override (absent → use repo's)
|
||||||
|
transcript/ — last snapshotted agent state (best-effort)
|
||||||
|
|
||||||
|
When the per-bottle Dockerfile is present, the launch step builds
|
||||||
|
the agent image with a per-bottle tag (claude-bottle-rebuilt-<id>)
|
||||||
|
from this file rather than the repo's. The build context is still
|
||||||
|
the repo root so the Dockerfile can COPY claude_bottle source files
|
||||||
|
the same way the original does.
|
||||||
|
|
||||||
|
Identity model:
|
||||||
|
- Every `cli.py start <agent>` mints a fresh identity via
|
||||||
|
`bottle_identity(agent_name)`: slug-prefix for readability plus a
|
||||||
|
5-char random suffix for parallel-safe uniqueness. The metadata
|
||||||
|
written at launch time pins (agent_name, cwd) to that identity.
|
||||||
|
- `cli.py resume <identity>` reads the metadata and re-launches a
|
||||||
|
bottle pinned to the same identity, picking up any per-bottle
|
||||||
|
Dockerfile and transcript snapshot.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import dataclasses
|
||||||
|
import json
|
||||||
|
import secrets
|
||||||
|
import string
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from ... import supervise as _supervise
|
||||||
|
from . import util as docker_mod
|
||||||
|
|
||||||
|
|
||||||
|
# Directory layout: ~/.claude-bottle/state/<identity>/...
|
||||||
|
_STATE_SUBDIR = "state"
|
||||||
|
_PER_BOTTLE_DOCKERFILE_NAME = "Dockerfile"
|
||||||
|
_TRANSCRIPT_SUBDIR = "transcript"
|
||||||
|
_METADATA_NAME = "metadata.json"
|
||||||
|
|
||||||
|
# 5 chars of base36 alphabet ≈ 60M combinations. Plenty for human
|
||||||
|
# operators starting bottles by hand; collision-free in practice.
|
||||||
|
_RANDOM_SUFFIX_LEN = 5
|
||||||
|
_SUFFIX_ALPHABET = string.ascii_lowercase + string.digits
|
||||||
|
|
||||||
|
|
||||||
|
def bottle_identity(agent_name: str) -> str:
|
||||||
|
"""Mint a fresh per-launch bottle identity. The slug-prefix is
|
||||||
|
`slugify(agent_name)` for readability; the suffix is 5 random
|
||||||
|
base36 chars so two simultaneous `start <agent>` invocations
|
||||||
|
don't collide on container/network names.
|
||||||
|
|
||||||
|
Every call produces a different identity (non-deterministic).
|
||||||
|
To continue an existing bottle's state, use the recorded
|
||||||
|
identity from BottleMetadata via `cli.py resume <identity>`,
|
||||||
|
not this function."""
|
||||||
|
slug = docker_mod.slugify(agent_name)
|
||||||
|
suffix = "".join(secrets.choice(_SUFFIX_ALPHABET) for _ in range(_RANDOM_SUFFIX_LEN))
|
||||||
|
return f"{slug}-{suffix}"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class BottleMetadata:
|
||||||
|
"""Persistent record of how a bottle was launched, written at
|
||||||
|
start time and read by `cli.py resume`. Lives at
|
||||||
|
~/.claude-bottle/state/<identity>/metadata.json."""
|
||||||
|
|
||||||
|
identity: str
|
||||||
|
agent_name: str
|
||||||
|
cwd: str # empty string when --cwd was not passed
|
||||||
|
copy_cwd: bool
|
||||||
|
started_at: str # ISO 8601 UTC
|
||||||
|
|
||||||
|
|
||||||
|
def metadata_path(identity: str) -> Path:
|
||||||
|
return bottle_state_dir(identity) / _METADATA_NAME
|
||||||
|
|
||||||
|
|
||||||
|
def write_metadata(metadata: BottleMetadata) -> Path:
|
||||||
|
"""Persist `metadata` to ~/.claude-bottle/state/<identity>/metadata.json.
|
||||||
|
Mode 0o644 — no secrets, just (agent_name, cwd, timestamp)."""
|
||||||
|
path = metadata_path(metadata.identity)
|
||||||
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
path.write_text(json.dumps(dataclasses.asdict(metadata), indent=2) + "\n")
|
||||||
|
path.chmod(0o644)
|
||||||
|
return path
|
||||||
|
|
||||||
|
|
||||||
|
def read_metadata(identity: str) -> BottleMetadata | None:
|
||||||
|
"""Return the metadata for `identity`, or None if no state has
|
||||||
|
been recorded for it. Used by `cli.py resume` to reconstruct
|
||||||
|
the launch spec."""
|
||||||
|
path = metadata_path(identity)
|
||||||
|
if not path.is_file():
|
||||||
|
return None
|
||||||
|
raw = json.loads(path.read_text())
|
||||||
|
if not isinstance(raw, dict):
|
||||||
|
return None
|
||||||
|
return BottleMetadata(
|
||||||
|
identity=str(raw.get("identity", identity)),
|
||||||
|
agent_name=str(raw.get("agent_name", "")),
|
||||||
|
cwd=str(raw.get("cwd", "")),
|
||||||
|
copy_cwd=bool(raw.get("copy_cwd", False)),
|
||||||
|
started_at=str(raw.get("started_at", "")),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def bottle_state_dir(identity: str) -> Path:
|
||||||
|
"""Per-bottle state directory on the host. Created lazily by the
|
||||||
|
write helpers; readers tolerate its absence."""
|
||||||
|
return _supervise.claude_bottle_root() / _STATE_SUBDIR / identity
|
||||||
|
|
||||||
|
|
||||||
|
def per_bottle_dockerfile_path(identity: str) -> Path:
|
||||||
|
return bottle_state_dir(identity) / _PER_BOTTLE_DOCKERFILE_NAME
|
||||||
|
|
||||||
|
|
||||||
|
def per_bottle_dockerfile(identity: str) -> str | None:
|
||||||
|
"""Return the per-bottle Dockerfile content if present, else
|
||||||
|
None. None means: use the repo's Dockerfile (the original
|
||||||
|
pre-capability-block behavior)."""
|
||||||
|
p = per_bottle_dockerfile_path(identity)
|
||||||
|
if p.is_file():
|
||||||
|
return p.read_text()
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def write_per_bottle_dockerfile(identity: str, content: str) -> Path:
|
||||||
|
p = per_bottle_dockerfile_path(identity)
|
||||||
|
p.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
p.write_text(content)
|
||||||
|
p.chmod(0o644)
|
||||||
|
return p
|
||||||
|
|
||||||
|
|
||||||
|
def per_bottle_image_tag(identity: str) -> str:
|
||||||
|
"""Image tag for a rebuilt bottle. Distinct from the base
|
||||||
|
claude-bottle:latest so per-bottle rebuilds don't collide in
|
||||||
|
the docker image cache."""
|
||||||
|
return f"claude-bottle-rebuilt-{identity}:latest"
|
||||||
|
|
||||||
|
|
||||||
|
def transcript_snapshot_dir(identity: str) -> Path:
|
||||||
|
"""Where capability_apply stashes the agent's transcript before
|
||||||
|
teardown, so the next `cli.py start <agent>` can offer to
|
||||||
|
resume from it."""
|
||||||
|
return bottle_state_dir(identity) / _TRANSCRIPT_SUBDIR
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"BottleMetadata",
|
||||||
|
"bottle_identity",
|
||||||
|
"bottle_state_dir",
|
||||||
|
"metadata_path",
|
||||||
|
"per_bottle_dockerfile",
|
||||||
|
"per_bottle_dockerfile_path",
|
||||||
|
"per_bottle_image_tag",
|
||||||
|
"read_metadata",
|
||||||
|
"transcript_snapshot_dir",
|
||||||
|
"write_metadata",
|
||||||
|
"write_per_bottle_dockerfile",
|
||||||
|
]
|
||||||
@@ -0,0 +1,210 @@
|
|||||||
|
"""capability_apply — host-side orchestrator for capability-block
|
||||||
|
remediation (PRD 0016).
|
||||||
|
|
||||||
|
On approval of a capability-block proposal, the dashboard calls
|
||||||
|
apply_capability_change(slug, new_dockerfile) which:
|
||||||
|
|
||||||
|
1. Snapshots the agent's transcript dir to
|
||||||
|
~/.claude-bottle/state/<slug>/transcript/ (best-effort).
|
||||||
|
2. Pushes the agent's working tree via `git push` (best-effort —
|
||||||
|
no upstream / no commits / no git repo all skip with a log).
|
||||||
|
3. Writes the new Dockerfile to
|
||||||
|
~/.claude-bottle/state/<slug>/Dockerfile (PRD 0016 Phase 1
|
||||||
|
state). The next `cli.py start <agent>` picks it up.
|
||||||
|
4. Force-removes the agent container + all sidecars + the
|
||||||
|
per-bottle networks. Idempotent — missing resources are not
|
||||||
|
errors.
|
||||||
|
|
||||||
|
Returns (before, after) Dockerfile contents so the dashboard can
|
||||||
|
record / render the diff. (capability-block has no audit log per
|
||||||
|
PRD 0013 — the per-bottle Dockerfile state is its own record.)
|
||||||
|
|
||||||
|
This is "fire-and-forget" from the agent's perspective: by the time
|
||||||
|
the dashboard writes the response file the supervise sidecar is
|
||||||
|
gone, so the agent's tool call connection drops without ever
|
||||||
|
receiving the response. The replacement agent (next manual
|
||||||
|
`cli.py start`) sees the new Dockerfile and starts from there.
|
||||||
|
v1 does not auto-relaunch — see PRD 0016's capability-block return
|
||||||
|
semantics open question.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
import subprocess
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from ...log import info, warn
|
||||||
|
from .bottle_state import (
|
||||||
|
per_bottle_dockerfile,
|
||||||
|
per_bottle_dockerfile_path,
|
||||||
|
transcript_snapshot_dir,
|
||||||
|
write_per_bottle_dockerfile,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# Agent home inside the container (per the repo Dockerfile's
|
||||||
|
# `USER node` + `WORKDIR /home/node`). Used to locate the transcript
|
||||||
|
# dir + the workspace dir for git push.
|
||||||
|
_AGENT_HOME_IN_CONTAINER = "/home/node"
|
||||||
|
_AGENT_TRANSCRIPT_IN_CONTAINER = f"{_AGENT_HOME_IN_CONTAINER}/.claude"
|
||||||
|
_AGENT_WORKSPACE_IN_CONTAINER = f"{_AGENT_HOME_IN_CONTAINER}/workspace"
|
||||||
|
|
||||||
|
# Per-bottle resource name patterns (mirroring prepare.py /
|
||||||
|
# the various sidecar modules). The agent container's name is the
|
||||||
|
# slug with no infix; sidecars carry an infix like cred-proxy.
|
||||||
|
def _agent_container_name(slug: str) -> str:
|
||||||
|
return f"claude-bottle-{slug}"
|
||||||
|
|
||||||
|
|
||||||
|
def _per_bottle_container_names(slug: str) -> list[str]:
|
||||||
|
"""All container names that belong to this bottle. Missing
|
||||||
|
containers are silently skipped by the teardown helper, so it's
|
||||||
|
fine to include names that don't exist for a given bottle."""
|
||||||
|
return [
|
||||||
|
_agent_container_name(slug),
|
||||||
|
f"claude-bottle-cred-proxy-{slug}",
|
||||||
|
f"claude-bottle-pipelock-{slug}",
|
||||||
|
f"claude-bottle-git-gate-{slug}",
|
||||||
|
f"claude-bottle-supervise-{slug}",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def _per_bottle_network_names(slug: str) -> list[str]:
|
||||||
|
return [
|
||||||
|
f"claude-bottle-net-{slug}",
|
||||||
|
f"claude-bottle-egress-{slug}",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class CapabilityApplyError(RuntimeError):
|
||||||
|
"""Raised when the apply fails in a way that should keep the
|
||||||
|
proposal pending (so the operator can retry). Best-effort
|
||||||
|
failures (transcript snapshot, git push) do not raise — they
|
||||||
|
just log and proceed."""
|
||||||
|
|
||||||
|
|
||||||
|
# --- Public helpers --------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_current_dockerfile(slug: str) -> str:
|
||||||
|
"""Return the Dockerfile content the next `cli.py start <agent>`
|
||||||
|
would use for this bottle. If a per-bottle override exists, that
|
||||||
|
one; otherwise the repo's Dockerfile.
|
||||||
|
|
||||||
|
Used by the operator-edit verb to show the current source of
|
||||||
|
truth, and by apply_capability_change for the before-diff."""
|
||||||
|
override = per_bottle_dockerfile(slug)
|
||||||
|
if override is not None:
|
||||||
|
return override
|
||||||
|
repo_dockerfile = _repo_dockerfile_path()
|
||||||
|
if repo_dockerfile.is_file():
|
||||||
|
return repo_dockerfile.read_text()
|
||||||
|
raise CapabilityApplyError(
|
||||||
|
f"no per-bottle Dockerfile for {slug} and no repo Dockerfile at "
|
||||||
|
f"{repo_dockerfile}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def apply_capability_change(slug: str, new_dockerfile: str) -> tuple[str, str]:
|
||||||
|
"""End-to-end capability-block remediation. See module docstring
|
||||||
|
for the sequence. Returns (before, after) Dockerfile content."""
|
||||||
|
if not new_dockerfile.strip():
|
||||||
|
raise CapabilityApplyError("proposed Dockerfile is empty")
|
||||||
|
before = fetch_current_dockerfile(slug)
|
||||||
|
|
||||||
|
_snapshot_transcript(slug)
|
||||||
|
_push_working_tree(slug)
|
||||||
|
write_per_bottle_dockerfile(slug, new_dockerfile)
|
||||||
|
_teardown_bottle(slug)
|
||||||
|
|
||||||
|
return before, new_dockerfile
|
||||||
|
|
||||||
|
|
||||||
|
# --- Internals -------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def _repo_dockerfile_path() -> Path:
|
||||||
|
"""Path to the repo's Dockerfile (one dir above this module's
|
||||||
|
package root). Resolved at call time so the path is correct
|
||||||
|
regardless of where this module is imported from."""
|
||||||
|
# claude_bottle/backend/docker/capability_apply.py -> repo root
|
||||||
|
return Path(__file__).resolve().parent.parent.parent.parent / "Dockerfile"
|
||||||
|
|
||||||
|
|
||||||
|
def _snapshot_transcript(slug: str) -> None:
|
||||||
|
"""`docker cp` /home/node/.claude out of the agent container into
|
||||||
|
~/.claude-bottle/state/<slug>/transcript/. Best-effort: missing
|
||||||
|
container, missing dir, or cp error all log a warning and return.
|
||||||
|
The transcript is what `claude --resume` reads to pick up where
|
||||||
|
the agent left off."""
|
||||||
|
container = _agent_container_name(slug)
|
||||||
|
dest = transcript_snapshot_dir(slug)
|
||||||
|
if dest.exists():
|
||||||
|
# Remove any prior snapshot so the new one is a clean copy.
|
||||||
|
shutil.rmtree(dest, ignore_errors=True)
|
||||||
|
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
r = subprocess.run(
|
||||||
|
["docker", "cp", f"{container}:{_AGENT_TRANSCRIPT_IN_CONTAINER}", str(dest)],
|
||||||
|
capture_output=True, text=True, check=False,
|
||||||
|
)
|
||||||
|
if r.returncode != 0:
|
||||||
|
warn(
|
||||||
|
f"capability-apply: transcript snapshot skipped "
|
||||||
|
f"({(r.stderr or '').strip() or 'no transcript dir in container?'})"
|
||||||
|
)
|
||||||
|
return
|
||||||
|
info(f"capability-apply: transcript snapshotted to {dest}")
|
||||||
|
|
||||||
|
|
||||||
|
def _push_working_tree(slug: str) -> None:
|
||||||
|
"""`docker exec <agent> git push` from /home/node/workspace.
|
||||||
|
Best-effort: not-a-git-repo, no upstream, nothing-to-push, no
|
||||||
|
network all log a warning and return. The replacement bottle
|
||||||
|
will pick up whatever's actually upstream."""
|
||||||
|
container = _agent_container_name(slug)
|
||||||
|
r = subprocess.run(
|
||||||
|
[
|
||||||
|
"docker", "exec", container, "sh", "-c",
|
||||||
|
f"cd {_AGENT_WORKSPACE_IN_CONTAINER} && "
|
||||||
|
f"git rev-parse --is-inside-work-tree >/dev/null 2>&1 && "
|
||||||
|
f"git push origin HEAD 2>&1 || true",
|
||||||
|
],
|
||||||
|
capture_output=True, text=True, check=False,
|
||||||
|
)
|
||||||
|
if r.returncode != 0:
|
||||||
|
warn(
|
||||||
|
f"capability-apply: git push skipped "
|
||||||
|
f"({(r.stderr or '').strip() or 'docker exec failed'})"
|
||||||
|
)
|
||||||
|
return
|
||||||
|
output = (r.stdout or "").strip()
|
||||||
|
if output:
|
||||||
|
info(f"capability-apply: git push: {output}")
|
||||||
|
else:
|
||||||
|
info("capability-apply: git push ran (no output — likely not a git workspace)")
|
||||||
|
|
||||||
|
|
||||||
|
def _teardown_bottle(slug: str) -> None:
|
||||||
|
"""Force-remove all per-bottle docker resources. Idempotent —
|
||||||
|
`docker rm -f` / `docker network rm` silently ignore missing
|
||||||
|
names, so this can be called even mid-rebuild."""
|
||||||
|
info(f"capability-apply: tearing down bottle {slug}")
|
||||||
|
for name in _per_bottle_container_names(slug):
|
||||||
|
subprocess.run(
|
||||||
|
["docker", "rm", "-f", name],
|
||||||
|
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False,
|
||||||
|
)
|
||||||
|
for net in _per_bottle_network_names(slug):
|
||||||
|
subprocess.run(
|
||||||
|
["docker", "network", "rm", net],
|
||||||
|
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"CapabilityApplyError",
|
||||||
|
"apply_capability_change",
|
||||||
|
"fetch_current_dockerfile",
|
||||||
|
]
|
||||||
@@ -66,7 +66,10 @@ def launch(
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
try:
|
try:
|
||||||
docker_mod.build_image(plan.image, _REPO_DIR)
|
docker_mod.build_image(
|
||||||
|
plan.image, _REPO_DIR,
|
||||||
|
dockerfile=plan.dockerfile_path,
|
||||||
|
)
|
||||||
if plan.derived_image:
|
if plan.derived_image:
|
||||||
docker_mod.build_image_with_cwd(
|
docker_mod.build_image_with_cwd(
|
||||||
plan.derived_image, plan.image, plan.spec.user_cwd
|
plan.derived_image, plan.image, plan.spec.user_cwd
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ via the base class's `prepare` template before this is called.
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
from datetime import datetime, timezone
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from ... import pipelock
|
from ... import pipelock
|
||||||
@@ -26,6 +27,14 @@ from .cred_proxy import (
|
|||||||
cred_proxy_url,
|
cred_proxy_url,
|
||||||
)
|
)
|
||||||
from .git_gate import DockerGitGate, git_gate_container_name
|
from .git_gate import DockerGitGate, git_gate_container_name
|
||||||
|
from .bottle_state import (
|
||||||
|
BottleMetadata,
|
||||||
|
bottle_identity,
|
||||||
|
per_bottle_dockerfile,
|
||||||
|
per_bottle_dockerfile_path,
|
||||||
|
per_bottle_image_tag,
|
||||||
|
write_metadata,
|
||||||
|
)
|
||||||
from .pipelock import DockerPipelockProxy, pipelock_container_name
|
from .pipelock import DockerPipelockProxy, pipelock_container_name
|
||||||
from .supervise import DockerSupervise, supervise_container_name
|
from .supervise import DockerSupervise, supervise_container_name
|
||||||
|
|
||||||
@@ -48,9 +57,34 @@ def resolve_plan(
|
|||||||
agent = manifest.agents[spec.agent_name]
|
agent = manifest.agents[spec.agent_name]
|
||||||
bottle = manifest.bottle_for(spec.agent_name)
|
bottle = manifest.bottle_for(spec.agent_name)
|
||||||
|
|
||||||
slug = docker_mod.slugify(spec.agent_name)
|
# PRD 0016 follow-up: identity, not bare slug. A fresh `start`
|
||||||
|
# mints a random-suffixed identity (so parallel runs of the same
|
||||||
|
# agent in the same cwd don't collide on container/network
|
||||||
|
# names); a `resume` passes the recorded identity in via
|
||||||
|
# spec.identity to continue an existing bottle's state.
|
||||||
|
slug = spec.identity or bottle_identity(spec.agent_name)
|
||||||
|
# Record the launch metadata so `cli.py resume <identity>` can
|
||||||
|
# reconstruct the spec. Idempotent — re-writes on resume with a
|
||||||
|
# refreshed started_at.
|
||||||
|
write_metadata(BottleMetadata(
|
||||||
|
identity=slug,
|
||||||
|
agent_name=spec.agent_name,
|
||||||
|
cwd=spec.user_cwd if spec.copy_cwd else "",
|
||||||
|
copy_cwd=spec.copy_cwd,
|
||||||
|
started_at=datetime.now(timezone.utc).isoformat(),
|
||||||
|
))
|
||||||
|
|
||||||
image = os.environ.get("CLAUDE_BOTTLE_IMAGE", "claude-bottle:latest")
|
# PRD 0016 capability-block: if a per-bottle Dockerfile has been
|
||||||
|
# written (via apply_capability_change), the base image becomes
|
||||||
|
# per_bottle_image_tag(slug) built from that file. --cwd still
|
||||||
|
# layers a derived image on top.
|
||||||
|
dockerfile_path = ""
|
||||||
|
if per_bottle_dockerfile(slug) is not None:
|
||||||
|
image_default = per_bottle_image_tag(slug)
|
||||||
|
dockerfile_path = str(per_bottle_dockerfile_path(slug))
|
||||||
|
else:
|
||||||
|
image_default = "claude-bottle:latest"
|
||||||
|
image = os.environ.get("CLAUDE_BOTTLE_IMAGE", image_default)
|
||||||
derived_image = ""
|
derived_image = ""
|
||||||
runtime_image = image
|
runtime_image = image
|
||||||
if spec.copy_cwd:
|
if spec.copy_cwd:
|
||||||
@@ -184,6 +218,7 @@ def resolve_plan(
|
|||||||
image=image,
|
image=image,
|
||||||
derived_image=derived_image,
|
derived_image=derived_image,
|
||||||
runtime_image=runtime_image,
|
runtime_image=runtime_image,
|
||||||
|
dockerfile_path=dockerfile_path,
|
||||||
env_file=env_file,
|
env_file=env_file,
|
||||||
forwarded_env=forwarded_env,
|
forwarded_env=forwarded_env,
|
||||||
prompt_file=prompt_file,
|
prompt_file=prompt_file,
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
"""Main CLI dispatcher.
|
"""Main CLI dispatcher.
|
||||||
|
|
||||||
Commands: cleanup, dashboard, edit, info, init, list, start
|
Commands: cleanup, dashboard, edit, info, init, list, resume, start
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
@@ -15,6 +15,7 @@ from .dashboard import cmd_dashboard
|
|||||||
from .edit import cmd_edit
|
from .edit import cmd_edit
|
||||||
from .info import cmd_info
|
from .info import cmd_info
|
||||||
from .init import cmd_init
|
from .init import cmd_init
|
||||||
|
from .resume import cmd_resume
|
||||||
from .start import cmd_start
|
from .start import cmd_start
|
||||||
|
|
||||||
cmd_list = _list_mod.cmd_list
|
cmd_list = _list_mod.cmd_list
|
||||||
@@ -26,6 +27,7 @@ COMMANDS = {
|
|||||||
"info": cmd_info,
|
"info": cmd_info,
|
||||||
"init": cmd_init,
|
"init": cmd_init,
|
||||||
"list": cmd_list,
|
"list": cmd_list,
|
||||||
|
"resume": cmd_resume,
|
||||||
"start": cmd_start,
|
"start": cmd_start,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -39,6 +41,7 @@ def usage() -> None:
|
|||||||
sys.stderr.write(" info print env, skills, and prompt details for a named agent\n")
|
sys.stderr.write(" info print env, skills, and prompt details for a named agent\n")
|
||||||
sys.stderr.write(" init interactively create a new agent and add it to claude-bottle.json\n")
|
sys.stderr.write(" init interactively create a new agent and add it to claude-bottle.json\n")
|
||||||
sys.stderr.write(" list list available agents or active containers\n")
|
sys.stderr.write(" list list available agents or active containers\n")
|
||||||
|
sys.stderr.write(" resume re-launch a bottle by its identity (continues state from PRD 0016)\n")
|
||||||
sys.stderr.write(" start boot a container for a named agent and attach an interactive session\n\n")
|
sys.stderr.write(" start boot a container for a named agent and attach an interactive session\n\n")
|
||||||
sys.stderr.write(f"Run '{PROG} <command> --help' for command-specific usage.\n")
|
sys.stderr.write(f"Run '{PROG} <command> --help' for command-specific usage.\n")
|
||||||
|
|
||||||
|
|||||||
@@ -22,6 +22,10 @@ from datetime import datetime, timezone
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from .. import supervise as _supervise
|
from .. import supervise as _supervise
|
||||||
|
from ..backend.docker.capability_apply import (
|
||||||
|
CapabilityApplyError,
|
||||||
|
apply_capability_change,
|
||||||
|
)
|
||||||
from ..backend.docker.cred_proxy_apply import (
|
from ..backend.docker.cred_proxy_apply import (
|
||||||
CredProxyApplyError,
|
CredProxyApplyError,
|
||||||
apply_routes_change,
|
apply_routes_change,
|
||||||
@@ -45,6 +49,7 @@ from ..supervise import (
|
|||||||
TOOL_CAPABILITY_BLOCK,
|
TOOL_CAPABILITY_BLOCK,
|
||||||
TOOL_CRED_PROXY_BLOCK,
|
TOOL_CRED_PROXY_BLOCK,
|
||||||
TOOL_PIPELOCK_BLOCK,
|
TOOL_PIPELOCK_BLOCK,
|
||||||
|
archive_proposal,
|
||||||
list_pending_proposals,
|
list_pending_proposals,
|
||||||
render_diff,
|
render_diff,
|
||||||
write_audit_entry,
|
write_audit_entry,
|
||||||
@@ -56,7 +61,7 @@ from ._common import PROG
|
|||||||
# Errors any remediation engine may raise. Caught by the TUI key
|
# Errors any remediation engine may raise. Caught by the TUI key
|
||||||
# handlers and surfaced in the status line so a failed apply keeps
|
# handlers and surfaced in the status line so a failed apply keeps
|
||||||
# the proposal pending rather than crashing curses.
|
# the proposal pending rather than crashing curses.
|
||||||
ApplyError = (CredProxyApplyError, PipelockApplyError)
|
ApplyError = (CredProxyApplyError, PipelockApplyError, CapabilityApplyError)
|
||||||
|
|
||||||
|
|
||||||
# --- Discovery -------------------------------------------------------------
|
# --- Discovery -------------------------------------------------------------
|
||||||
@@ -107,6 +112,16 @@ def discover_pipelock_slugs() -> list[str]:
|
|||||||
return _discover_sidecar_slugs("claude-bottle-pipelock-")
|
return _discover_sidecar_slugs("claude-bottle-pipelock-")
|
||||||
|
|
||||||
|
|
||||||
|
def _approval_status(qp: QueuedProposal, verb: str) -> str:
|
||||||
|
"""Status-line text after a successful approval. For capability-
|
||||||
|
block, append the `resume <identity>` hint so the operator can
|
||||||
|
bring the rebuilt bottle back up with one copy-paste."""
|
||||||
|
base = f"{verb} {qp.proposal.tool} for [{qp.proposal.bottle_slug}]"
|
||||||
|
if qp.proposal.tool == TOOL_CAPABILITY_BLOCK:
|
||||||
|
return f"{base}; resume: ./cli.py resume {qp.proposal.bottle_slug}"
|
||||||
|
return base
|
||||||
|
|
||||||
|
|
||||||
def discover_pending() -> list[QueuedProposal]:
|
def discover_pending() -> list[QueuedProposal]:
|
||||||
"""Walk ~/.claude-bottle/queue/* and collect pending proposals
|
"""Walk ~/.claude-bottle/queue/* and collect pending proposals
|
||||||
from every bottle's queue. Sorted by arrival time across the
|
from every bottle's queue. Sorted by arrival time across the
|
||||||
@@ -155,9 +170,10 @@ def approve(
|
|||||||
diff_before, diff_after = apply_allowlist_change(
|
diff_before, diff_after = apply_allowlist_change(
|
||||||
qp.proposal.bottle_slug, file_to_apply,
|
qp.proposal.bottle_slug, file_to_apply,
|
||||||
)
|
)
|
||||||
# capability-block remediation lands in PRD 0016; until then
|
elif qp.proposal.tool == TOOL_CAPABILITY_BLOCK:
|
||||||
# it stays a no-op approval and the audit (none for capability)
|
diff_before, diff_after = apply_capability_change(
|
||||||
# is skipped.
|
qp.proposal.bottle_slug, file_to_apply,
|
||||||
|
)
|
||||||
|
|
||||||
response = Response(
|
response = Response(
|
||||||
proposal_id=qp.proposal.id,
|
proposal_id=qp.proposal.id,
|
||||||
@@ -170,6 +186,12 @@ def approve(
|
|||||||
qp, action=status, notes=notes,
|
qp, action=status, notes=notes,
|
||||||
diff_before=diff_before, diff_after=diff_after,
|
diff_before=diff_before, diff_after=diff_after,
|
||||||
)
|
)
|
||||||
|
if qp.proposal.tool == TOOL_CAPABILITY_BLOCK:
|
||||||
|
# The supervise sidecar was torn down by apply_capability_change,
|
||||||
|
# so it can't archive its own proposal+response. Archive here so
|
||||||
|
# dashboard.discover_pending stops surfacing the resolved
|
||||||
|
# proposal forever.
|
||||||
|
archive_proposal(qp.queue_dir, qp.proposal.id)
|
||||||
|
|
||||||
|
|
||||||
def reject(qp: QueuedProposal, *, reason: str) -> None:
|
def reject(qp: QueuedProposal, *, reason: str) -> None:
|
||||||
@@ -359,7 +381,7 @@ def _main_loop(stdscr: "curses._CursesWindow") -> None:
|
|||||||
elif key == ord("a"):
|
elif key == ord("a"):
|
||||||
try:
|
try:
|
||||||
approve(qp)
|
approve(qp)
|
||||||
status_line = f"approved {qp.proposal.tool} for [{qp.proposal.bottle_slug}]"
|
status_line = _approval_status(qp, "approved")
|
||||||
except ApplyError as e:
|
except ApplyError as e:
|
||||||
status_line = f"apply failed: {e}"
|
status_line = f"apply failed: {e}"
|
||||||
elif key == ord("m"):
|
elif key == ord("m"):
|
||||||
@@ -369,7 +391,7 @@ def _main_loop(stdscr: "curses._CursesWindow") -> None:
|
|||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
approve(qp, final_file=edited, notes="operator modified before approving")
|
approve(qp, final_file=edited, notes="operator modified before approving")
|
||||||
status_line = f"modified+approved {qp.proposal.tool} for [{qp.proposal.bottle_slug}]"
|
status_line = _approval_status(qp, "modified+approved")
|
||||||
except ApplyError as e:
|
except ApplyError as e:
|
||||||
status_line = f"apply failed: {e}"
|
status_line = f"apply failed: {e}"
|
||||||
elif key == ord("r"):
|
elif key == ord("r"):
|
||||||
|
|||||||
@@ -0,0 +1,66 @@
|
|||||||
|
"""resume: re-launch a bottle by its identity.
|
||||||
|
|
||||||
|
Reads ~/.claude-bottle/state/<identity>/metadata.json to recover the
|
||||||
|
(agent_name, cwd, copy_cwd) the bottle was originally started with,
|
||||||
|
then runs the same launch core as `start` — but pinned to the
|
||||||
|
recorded identity so the new bottle picks up any per-bottle Dockerfile
|
||||||
|
(from capability-block apply) and transcript snapshot under the same
|
||||||
|
state dir.
|
||||||
|
|
||||||
|
Use case: an agent calls capability-block, the dashboard approves
|
||||||
|
and tears down the bottle, the operator runs
|
||||||
|
./cli.py resume <identity>
|
||||||
|
to bring up the replacement with the new capabilities baked in.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
from ..backend import BottleSpec
|
||||||
|
from ..backend.docker.bottle_state import read_metadata
|
||||||
|
from ..log import die
|
||||||
|
from ..manifest import Manifest
|
||||||
|
from ._common import PROG, USER_CWD
|
||||||
|
from .start import _launch_bottle
|
||||||
|
|
||||||
|
|
||||||
|
def cmd_resume(argv: list[str]) -> int:
|
||||||
|
parser = argparse.ArgumentParser(prog=f"{PROG} resume", add_help=True)
|
||||||
|
parser.add_argument("--dry-run", action="store_true")
|
||||||
|
parser.add_argument("--remote-control", action="store_true")
|
||||||
|
parser.add_argument(
|
||||||
|
"--format",
|
||||||
|
choices=("text", "json"),
|
||||||
|
default="text",
|
||||||
|
help="preflight output format; --format=json requires --dry-run",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"identity",
|
||||||
|
help="bottle identity from a prior `start` (see its session-end output)",
|
||||||
|
)
|
||||||
|
args = parser.parse_args(argv)
|
||||||
|
|
||||||
|
metadata = read_metadata(args.identity)
|
||||||
|
if metadata is None:
|
||||||
|
die(
|
||||||
|
f"no state recorded for identity {args.identity!r}; "
|
||||||
|
f"check ~/.claude-bottle/state/ or run `cli.py start` to create a new bottle"
|
||||||
|
)
|
||||||
|
|
||||||
|
manifest = Manifest.resolve(USER_CWD)
|
||||||
|
manifest.require_agent(metadata.agent_name)
|
||||||
|
|
||||||
|
spec = BottleSpec(
|
||||||
|
manifest=manifest,
|
||||||
|
agent_name=metadata.agent_name,
|
||||||
|
copy_cwd=metadata.copy_cwd,
|
||||||
|
user_cwd=metadata.cwd or USER_CWD,
|
||||||
|
identity=metadata.identity,
|
||||||
|
)
|
||||||
|
return _launch_bottle(
|
||||||
|
spec,
|
||||||
|
dry_run=args.dry_run,
|
||||||
|
output_format=args.format,
|
||||||
|
remote_control=args.remote_control,
|
||||||
|
)
|
||||||
@@ -1,6 +1,10 @@
|
|||||||
"""start: boot a sandboxed container for a named agent and attach an
|
"""start: boot a sandboxed container for a named agent and attach an
|
||||||
interactive claude-code session. The container is torn down when the
|
interactive claude-code session. The container is torn down when the
|
||||||
session ends."""
|
session ends.
|
||||||
|
|
||||||
|
The launch core is shared with `cli.py resume <identity>`: see
|
||||||
|
_launch_bottle below.
|
||||||
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
@@ -43,18 +47,35 @@ def cmd_start(argv: list[str]) -> int:
|
|||||||
copy_cwd=args.cwd,
|
copy_cwd=args.cwd,
|
||||||
user_cwd=USER_CWD,
|
user_cwd=USER_CWD,
|
||||||
)
|
)
|
||||||
|
return _launch_bottle(
|
||||||
|
spec,
|
||||||
|
dry_run=dry_run,
|
||||||
|
output_format=args.format,
|
||||||
|
remote_control=args.remote_control,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _launch_bottle(
|
||||||
|
spec: BottleSpec,
|
||||||
|
*,
|
||||||
|
dry_run: bool,
|
||||||
|
output_format: str,
|
||||||
|
remote_control: bool,
|
||||||
|
) -> int:
|
||||||
|
"""Shared launch core for `start` and `resume`. Builds the plan,
|
||||||
|
prints / dry-runs / prompts as appropriate, brings the bottle up,
|
||||||
|
attaches claude, and prints the resume hint on session end."""
|
||||||
stage_dir = Path(tempfile.mkdtemp(prefix="claude-bottle-stage."))
|
stage_dir = Path(tempfile.mkdtemp(prefix="claude-bottle-stage."))
|
||||||
try:
|
try:
|
||||||
backend = get_bottle_backend()
|
backend = get_bottle_backend()
|
||||||
plan = backend.prepare(spec, stage_dir=stage_dir)
|
plan = backend.prepare(spec, stage_dir=stage_dir)
|
||||||
|
|
||||||
if args.format == "json":
|
if output_format == "json":
|
||||||
json.dump(plan.to_dict(remote_control=args.remote_control), sys.stdout, indent=2)
|
json.dump(plan.to_dict(remote_control=remote_control), sys.stdout, indent=2)
|
||||||
sys.stdout.write("\n")
|
sys.stdout.write("\n")
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
plan.print(remote_control=args.remote_control)
|
plan.print(remote_control=remote_control)
|
||||||
|
|
||||||
if dry_run:
|
if dry_run:
|
||||||
info("dry-run requested; not starting container.")
|
info("dry-run requested; not starting container.")
|
||||||
@@ -67,16 +88,27 @@ def cmd_start(argv: list[str]) -> int:
|
|||||||
info("aborted by user")
|
info("aborted by user")
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
identity = _identity_from_plan(plan)
|
||||||
with backend.launch(plan) as bottle:
|
with backend.launch(plan) as bottle:
|
||||||
info(
|
info(
|
||||||
"attaching interactive claude session "
|
"attaching interactive claude session "
|
||||||
"(Ctrl-D or 'exit' to leave; container will be removed)"
|
"(Ctrl-D or 'exit' to leave; container will be removed)"
|
||||||
)
|
)
|
||||||
claude_args = ["--dangerously-skip-permissions"]
|
claude_args = ["--dangerously-skip-permissions"]
|
||||||
if args.remote_control:
|
if remote_control:
|
||||||
claude_args.append("--remote-control")
|
claude_args.append("--remote-control")
|
||||||
bottle.exec_claude(claude_args, tty=True)
|
bottle.exec_claude(claude_args, tty=True)
|
||||||
info(f"session ended; container {bottle.name} will be removed")
|
info(f"session ended; container {bottle.name} will be removed")
|
||||||
|
if identity:
|
||||||
|
info(f"to resume this bottle: ./cli.py resume {identity}")
|
||||||
return 0
|
return 0
|
||||||
finally:
|
finally:
|
||||||
shutil.rmtree(stage_dir, ignore_errors=True)
|
shutil.rmtree(stage_dir, ignore_errors=True)
|
||||||
|
|
||||||
|
|
||||||
|
def _identity_from_plan(plan: object) -> str:
|
||||||
|
"""Backend-specific: the docker plan exposes the identity as
|
||||||
|
`.slug`. Other backends in the future would expose their own
|
||||||
|
identity attribute; for now we duck-type to keep this layer
|
||||||
|
backend-agnostic."""
|
||||||
|
return getattr(plan, "slug", "")
|
||||||
|
|||||||
@@ -0,0 +1,70 @@
|
|||||||
|
# PRD 0016: capability block remediation
|
||||||
|
|
||||||
|
- **Status:** Draft
|
||||||
|
- **Author:** didericis
|
||||||
|
- **Created:** 2026-05-25
|
||||||
|
- **Parent:** PRD 0012
|
||||||
|
- **Depends on:** PRD 0013
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
Wires the **capability block** path (PRD 0012 *Stuck categories*) end-to-end. On operator approval of a `capability-block` proposal, the rebuild orchestrator tears down the existing bottle, builds from the new Dockerfile, and starts a replacement bottle on the same branch via the state-preservation helper. The replacement agent picks up where the original left off, now with the missing capability. Heaviest of the three remediation PRDs because the orchestrator and state-preservation helper are non-trivial.
|
||||||
|
|
||||||
|
## Problem
|
||||||
|
|
||||||
|
See PRD 0012. This PRD specifically addresses: with 0013 in place, the operator can approve a `capability-block` proposal but nothing happens — the bottle is not rebuilt, the agent stays stuck. This PRD closes the loop. Unlike 0014 and 0015, the remediation requires container teardown + rebuild + state hand-off, so the design surface is larger.
|
||||||
|
|
||||||
|
## Goals / Success Criteria
|
||||||
|
|
||||||
|
A real capability block recovers end-to-end: the agent's invocation of a tool / command / skill fails (not found, permission denied), the agent calls `capability-block` with a proposed Dockerfile and justification, the operator approves in the TUI, the orchestrator tears down the bottle and starts a replacement built from the new Dockerfile, the replacement agent inherits the working tree and best-effort transcript and continues on the same branch.
|
||||||
|
|
||||||
|
## Non-goals
|
||||||
|
|
||||||
|
- Live mutation of the running container (re-stated from PRD 0012 non-goals).
|
||||||
|
- Forking into multiple parallel rebuilt bottles. One-for-one replacement only.
|
||||||
|
- cred-proxy or pipelock handling (covered by 0014 and 0015).
|
||||||
|
|
||||||
|
## Scope
|
||||||
|
|
||||||
|
### In scope
|
||||||
|
|
||||||
|
- A rebuild orchestrator that, on operator approval, tears down the existing bottle, builds from the approved Dockerfile, and starts a replacement on the same branch.
|
||||||
|
- A state-preservation helper that handles the hand-off across the rebuild: working tree push is mandatory; transcript / reasoning context is best-effort.
|
||||||
|
- `capability-block` approval handler in the MCP sidecar (replacing the 0013 no-op): on approval, hand off to the orchestrator.
|
||||||
|
- Bottle lifecycle script changes for orchestrated teardown + rebuild (distinct from a fresh-spawn).
|
||||||
|
- Bottle manifest schema changes: record originating manifest version / change history per agent run so the dashboard can show "what changed" rather than "what is."
|
||||||
|
- A per-agent-run record that maps a running bottle back to its PR / branch, so the orchestrator knows which branch to resume on.
|
||||||
|
|
||||||
|
### Out of scope
|
||||||
|
|
||||||
|
- Rolling back a rebuild that the replacement agent regrets. The audit trail (git history + bottle rebuild record) shows what changed; a follow-up `capability-block` proposal can revert.
|
||||||
|
|
||||||
|
## Proposed Design
|
||||||
|
|
||||||
|
### New services / components
|
||||||
|
|
||||||
|
- **Rebuild orchestrator.** On approval, tears down the existing bottle, builds from the new Dockerfile, snapshots state via the state-preservation helper, and starts a fresh bottle on the same branch.
|
||||||
|
- **State-preservation helper.** Mandatory: ensures the working tree is pushed before teardown. Best-effort: carries forward the agent's transcript / reasoning context — including the approved `capability-block` proposal — into the replacement container so the new agent starts warm rather than cold.
|
||||||
|
|
||||||
|
### Existing code touched
|
||||||
|
|
||||||
|
- **MCP sidecar** (PRD 0013) — the `capability-block` approval handler stops being a no-op; on approval, hands off to the rebuild orchestrator.
|
||||||
|
- **Bottle lifecycle scripts** — extended for orchestrated teardown + rebuild with state hand-off, distinct from a fresh-spawn.
|
||||||
|
- **Bottle manifest schema** — records the originating manifest version / change history per agent run.
|
||||||
|
- **`cli.py`** — gains the rebuild path.
|
||||||
|
|
||||||
|
### Data model changes
|
||||||
|
|
||||||
|
- A per-agent-run record sufficient to map a running bottle back to its PR / branch.
|
||||||
|
|
||||||
|
## Open questions
|
||||||
|
|
||||||
|
- **`capability-block` return semantics.** The current agent is torn down on approval, so the tool's return value never reaches it. Options: (a) fire-and-forget, the tool returns immediately with "queued" and the agent halts; (b) block the tool, let the rebuild orchestrator's teardown kill the connection, the replacement agent gets the approval record via state-preservation; (c) the tool blocks, returns "approved" right before teardown, the agent has milliseconds to log it. (b) seems cleanest but is worth confirming during implementation.
|
||||||
|
- **Best-effort transcript preservation.** Mount the agent's state directory, snapshot on teardown, remount in the replacement? How much fidelity is "good enough" for the new agent to pick up?
|
||||||
|
- **Bottle → PR/branch mapping.** Recorded at bottle-spawn time, derived from the working tree, or specified in the manifest?
|
||||||
|
- **Rejection semantics.** Does the agent receive a tool reply explaining the rejection, or does the bottle just stay torn down?
|
||||||
|
|
||||||
|
## References
|
||||||
|
|
||||||
|
- PRD 0012 — stuck-agent recovery flow overview.
|
||||||
|
- PRD 0013 — supervise plane foundation (prerequisite).
|
||||||
@@ -0,0 +1,217 @@
|
|||||||
|
"""Integration: drive `apply_capability_change` against a real
|
||||||
|
container that mimics the agent's name + filesystem layout (PRD 0016).
|
||||||
|
|
||||||
|
The real `cli.py start <agent>` flow is too heavy for an integration
|
||||||
|
test (it builds the agent image, brings up all the sidecars, attaches
|
||||||
|
an interactive claude session). Instead, this test stages the
|
||||||
|
minimum the orchestrator interacts with:
|
||||||
|
|
||||||
|
- A lightweight `alpine:latest sleep infinity` container named
|
||||||
|
`claude-bottle-<slug>` (matches the agent container name pattern)
|
||||||
|
on the per-bottle internal network.
|
||||||
|
- A marker file under `/home/node/.claude/` so we can assert the
|
||||||
|
transcript snapshot path actually transferred bytes.
|
||||||
|
|
||||||
|
Then `apply_capability_change` runs and we verify:
|
||||||
|
- Per-bottle Dockerfile written.
|
||||||
|
- Containers + networks removed.
|
||||||
|
- Transcript snapshot dir on the host has the marker file.
|
||||||
|
|
||||||
|
docker exec / cp / rm work across the docker socket boundary, so
|
||||||
|
this test runs in DinD too — no act_runner skip needed.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
import subprocess
|
||||||
|
import tempfile
|
||||||
|
import time
|
||||||
|
import unittest
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from claude_bottle import supervise
|
||||||
|
from claude_bottle.backend.docker import bottle_state, capability_apply
|
||||||
|
from claude_bottle.backend.docker.capability_apply import apply_capability_change
|
||||||
|
from claude_bottle.backend.docker.network import (
|
||||||
|
network_create_egress,
|
||||||
|
network_create_internal,
|
||||||
|
network_remove,
|
||||||
|
)
|
||||||
|
from tests._docker import skip_unless_docker
|
||||||
|
|
||||||
|
|
||||||
|
ALPINE_IMAGE = "alpine:latest"
|
||||||
|
|
||||||
|
|
||||||
|
@skip_unless_docker()
|
||||||
|
class TestCapabilityApply(unittest.TestCase):
|
||||||
|
@classmethod
|
||||||
|
def setUpClass(cls):
|
||||||
|
r = subprocess.run(
|
||||||
|
["docker", "pull", ALPINE_IMAGE],
|
||||||
|
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False,
|
||||||
|
)
|
||||||
|
if r.returncode != 0:
|
||||||
|
raise unittest.SkipTest(f"could not pull {ALPINE_IMAGE}")
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
self.slug = f"cb-test-cap-{os.getpid()}-{int(time.time())}"
|
||||||
|
self.agent_name = f"claude-bottle-{self.slug}"
|
||||||
|
self.sidecar_names: list[str] = []
|
||||||
|
self.internal_net = ""
|
||||||
|
self.egress_net = ""
|
||||||
|
# Fake home so tests don't touch ~/.claude-bottle/.
|
||||||
|
self._tmp = tempfile.TemporaryDirectory(prefix="cap-apply-int.")
|
||||||
|
self._original_root = supervise.claude_bottle_root
|
||||||
|
|
||||||
|
def fake_root() -> Path:
|
||||||
|
return Path(self._tmp.name) / ".claude-bottle"
|
||||||
|
|
||||||
|
supervise.claude_bottle_root = fake_root # type: ignore[assignment]
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
supervise.claude_bottle_root = self._original_root # type: ignore[assignment]
|
||||||
|
for name in [self.agent_name, *self.sidecar_names]:
|
||||||
|
subprocess.run(
|
||||||
|
["docker", "rm", "-f", name],
|
||||||
|
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False,
|
||||||
|
)
|
||||||
|
for n in (self.internal_net, self.egress_net):
|
||||||
|
if n:
|
||||||
|
network_remove(n)
|
||||||
|
self._tmp.cleanup()
|
||||||
|
|
||||||
|
def _bring_up_fake_bottle(self) -> None:
|
||||||
|
self.internal_net = network_create_internal(self.slug)
|
||||||
|
self.egress_net = network_create_egress(self.slug)
|
||||||
|
# Agent container with the canonical name.
|
||||||
|
r = subprocess.run(
|
||||||
|
[
|
||||||
|
"docker", "run", "-d",
|
||||||
|
"--name", self.agent_name,
|
||||||
|
"--network", self.internal_net,
|
||||||
|
ALPINE_IMAGE,
|
||||||
|
"sh", "-c",
|
||||||
|
"mkdir -p /home/node/.claude && "
|
||||||
|
"echo 'transcript-marker' > /home/node/.claude/sessions.json && "
|
||||||
|
"sleep 3600",
|
||||||
|
],
|
||||||
|
capture_output=True, text=True, check=False,
|
||||||
|
)
|
||||||
|
self.assertEqual(0, r.returncode, r.stderr)
|
||||||
|
# Also start a fake supervise sidecar so teardown has something
|
||||||
|
# extra to clean up (mirrors a real bottle's container set).
|
||||||
|
sidecar = f"claude-bottle-supervise-{self.slug}"
|
||||||
|
subprocess.run(
|
||||||
|
[
|
||||||
|
"docker", "run", "-d",
|
||||||
|
"--name", sidecar,
|
||||||
|
"--network", self.internal_net,
|
||||||
|
ALPINE_IMAGE, "sleep", "3600",
|
||||||
|
],
|
||||||
|
capture_output=True, text=True, check=False,
|
||||||
|
)
|
||||||
|
self.sidecar_names.append(sidecar)
|
||||||
|
|
||||||
|
def _containers_named_like(self) -> list[str]:
|
||||||
|
"""All running/stopped containers whose names start with
|
||||||
|
the bottle's slug — both agent + sidecars."""
|
||||||
|
r = subprocess.run(
|
||||||
|
[
|
||||||
|
"docker", "ps", "-a",
|
||||||
|
"--filter", f"name={self.agent_name}",
|
||||||
|
"--format", "{{.Names}}",
|
||||||
|
],
|
||||||
|
capture_output=True, text=True, check=False,
|
||||||
|
)
|
||||||
|
return [line for line in (r.stdout or "").splitlines() if line]
|
||||||
|
|
||||||
|
def _networks_named_like(self) -> list[str]:
|
||||||
|
r = subprocess.run(
|
||||||
|
[
|
||||||
|
"docker", "network", "ls",
|
||||||
|
"--filter", f"name={self.slug}",
|
||||||
|
"--format", "{{.Name}}",
|
||||||
|
],
|
||||||
|
capture_output=True, text=True, check=False,
|
||||||
|
)
|
||||||
|
return [line for line in (r.stdout or "").splitlines() if line]
|
||||||
|
|
||||||
|
def test_apply_writes_dockerfile_and_tears_down(self):
|
||||||
|
self._bring_up_fake_bottle()
|
||||||
|
self.assertIn(self.agent_name, self._containers_named_like())
|
||||||
|
|
||||||
|
new_dockerfile = "FROM python:3.13\nRUN apk add ripgrep\n"
|
||||||
|
before, after = apply_capability_change(self.slug, new_dockerfile)
|
||||||
|
|
||||||
|
# Before is the repo Dockerfile (no prior per-bottle override);
|
||||||
|
# after is what we passed in.
|
||||||
|
self.assertIn("FROM ", before)
|
||||||
|
self.assertEqual(new_dockerfile, after)
|
||||||
|
|
||||||
|
# Per-bottle Dockerfile written on the host.
|
||||||
|
self.assertEqual(
|
||||||
|
new_dockerfile,
|
||||||
|
bottle_state.per_bottle_dockerfile(self.slug),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Agent + sidecars gone.
|
||||||
|
self.assertEqual([], self._containers_named_like())
|
||||||
|
# Networks removed (matching the slug substring).
|
||||||
|
nets = self._networks_named_like()
|
||||||
|
self.assertEqual([], nets)
|
||||||
|
# Mark them as already cleaned so tearDown is idempotent.
|
||||||
|
self.internal_net = ""
|
||||||
|
self.egress_net = ""
|
||||||
|
self.sidecar_names = []
|
||||||
|
|
||||||
|
def test_transcript_snapshot_captured(self):
|
||||||
|
self._bring_up_fake_bottle()
|
||||||
|
apply_capability_change(self.slug, "FROM x\n")
|
||||||
|
snap = bottle_state.transcript_snapshot_dir(self.slug)
|
||||||
|
self.assertTrue(snap.is_dir(), f"transcript snapshot dir {snap} missing")
|
||||||
|
# docker cp <container>:/home/node/.claude <dst> produces
|
||||||
|
# <dst>/.claude/sessions.json (it preserves the source dir name
|
||||||
|
# inside the destination if the destination already exists).
|
||||||
|
# Walk the snapshot looking for the marker contents.
|
||||||
|
marker_found = False
|
||||||
|
for path in snap.rglob("sessions.json"):
|
||||||
|
if "transcript-marker" in path.read_text():
|
||||||
|
marker_found = True
|
||||||
|
break
|
||||||
|
self.assertTrue(marker_found, f"marker not found under {snap}")
|
||||||
|
# Cleaned up by apply already.
|
||||||
|
self.internal_net = ""
|
||||||
|
self.egress_net = ""
|
||||||
|
self.sidecar_names = []
|
||||||
|
|
||||||
|
def test_subsequent_apply_uses_per_bottle_dockerfile_for_before(self):
|
||||||
|
# First change: before is repo's Dockerfile.
|
||||||
|
self._bring_up_fake_bottle()
|
||||||
|
first_before, _ = apply_capability_change(self.slug, "FROM v1\n")
|
||||||
|
self.assertIn("FROM ", first_before)
|
||||||
|
|
||||||
|
# Second change: before is "FROM v1\n" (the per-bottle override
|
||||||
|
# from the first change), proving the state persists across
|
||||||
|
# rebuilds.
|
||||||
|
self._bring_up_fake_bottle()
|
||||||
|
second_before, second_after = apply_capability_change(self.slug, "FROM v2\n")
|
||||||
|
self.assertEqual("FROM v1\n", second_before)
|
||||||
|
self.assertEqual("FROM v2\n", second_after)
|
||||||
|
self.internal_net = ""
|
||||||
|
self.egress_net = ""
|
||||||
|
self.sidecar_names = []
|
||||||
|
|
||||||
|
def test_teardown_idempotent_when_nothing_running(self):
|
||||||
|
# No bottle ever brought up — teardown still doesn't raise.
|
||||||
|
apply_capability_change(self.slug, "FROM x\n")
|
||||||
|
self.assertEqual(
|
||||||
|
"FROM x\n",
|
||||||
|
bottle_state.per_bottle_dockerfile(self.slug),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
@@ -0,0 +1,166 @@
|
|||||||
|
"""Unit: per-bottle state helpers (PRD 0016 Phase 1) + identity +
|
||||||
|
launch metadata."""
|
||||||
|
|
||||||
|
import re
|
||||||
|
import tempfile
|
||||||
|
import unittest
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from claude_bottle import supervise
|
||||||
|
from claude_bottle.backend.docker import bottle_state
|
||||||
|
from claude_bottle.backend.docker.bottle_state import (
|
||||||
|
BottleMetadata,
|
||||||
|
read_metadata,
|
||||||
|
write_metadata,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeHomeMixin:
|
||||||
|
def _setup_fake_home(self):
|
||||||
|
self._tmp = tempfile.TemporaryDirectory(prefix="bottle-state-test.")
|
||||||
|
original = supervise.claude_bottle_root
|
||||||
|
|
||||||
|
def fake_root() -> Path:
|
||||||
|
return Path(self._tmp.name) / ".claude-bottle"
|
||||||
|
|
||||||
|
supervise.claude_bottle_root = fake_root # type: ignore[assignment]
|
||||||
|
self._restore = lambda: setattr(supervise, "claude_bottle_root", original)
|
||||||
|
|
||||||
|
def _teardown_fake_home(self):
|
||||||
|
self._restore()
|
||||||
|
self._tmp.cleanup()
|
||||||
|
|
||||||
|
|
||||||
|
class TestPerBottleDockerfile(_FakeHomeMixin, unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self._setup_fake_home()
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
self._teardown_fake_home()
|
||||||
|
|
||||||
|
def test_returns_none_when_absent(self):
|
||||||
|
self.assertIsNone(bottle_state.per_bottle_dockerfile("dev"))
|
||||||
|
|
||||||
|
def test_write_then_read_roundtrip(self):
|
||||||
|
bottle_state.write_per_bottle_dockerfile(
|
||||||
|
"dev", "FROM python:3.13\nRUN apk add ripgrep\n",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
"FROM python:3.13\nRUN apk add ripgrep\n",
|
||||||
|
bottle_state.per_bottle_dockerfile("dev"),
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_isolated_per_slug(self):
|
||||||
|
bottle_state.write_per_bottle_dockerfile("dev", "FROM dev\n")
|
||||||
|
bottle_state.write_per_bottle_dockerfile("api", "FROM api\n")
|
||||||
|
self.assertEqual("FROM dev\n", bottle_state.per_bottle_dockerfile("dev"))
|
||||||
|
self.assertEqual("FROM api\n", bottle_state.per_bottle_dockerfile("api"))
|
||||||
|
|
||||||
|
def test_dockerfile_path_under_state_dir(self):
|
||||||
|
path = bottle_state.per_bottle_dockerfile_path("dev")
|
||||||
|
self.assertTrue(str(path).endswith("/.claude-bottle/state/dev/Dockerfile"))
|
||||||
|
|
||||||
|
def test_image_tag_unique_per_slug(self):
|
||||||
|
self.assertEqual(
|
||||||
|
"claude-bottle-rebuilt-dev:latest",
|
||||||
|
bottle_state.per_bottle_image_tag("dev"),
|
||||||
|
)
|
||||||
|
self.assertNotEqual(
|
||||||
|
bottle_state.per_bottle_image_tag("dev"),
|
||||||
|
bottle_state.per_bottle_image_tag("api"),
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_transcript_dir_under_state_dir(self):
|
||||||
|
path = bottle_state.transcript_snapshot_dir("dev")
|
||||||
|
self.assertTrue(str(path).endswith("/.claude-bottle/state/dev/transcript"))
|
||||||
|
|
||||||
|
|
||||||
|
class TestBottleIdentity(unittest.TestCase):
|
||||||
|
"""bottle_identity(agent_name) — PRD 0016 follow-up.
|
||||||
|
|
||||||
|
Every call mints a fresh identity with a random 5-char suffix
|
||||||
|
so multiple instances of the same agent can run in parallel
|
||||||
|
without container name collisions. The slug-prefix is for
|
||||||
|
readability; the suffix is for uniqueness. To continue an
|
||||||
|
existing bottle, use the recorded identity via
|
||||||
|
`cli.py resume <identity>`, not this function."""
|
||||||
|
|
||||||
|
def test_format_is_slug_dash_5_alnum(self):
|
||||||
|
identity = bottle_state.bottle_identity("dev")
|
||||||
|
self.assertTrue(identity.startswith("dev-"))
|
||||||
|
suffix = identity[len("dev-"):]
|
||||||
|
self.assertEqual(5, len(suffix))
|
||||||
|
self.assertTrue(
|
||||||
|
re.fullmatch(r"[a-z0-9]+", suffix),
|
||||||
|
f"suffix {suffix!r} must be lowercase base36",
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_two_calls_yield_different_identities(self):
|
||||||
|
# 5-char base36 gives ~60M combinations; collision in two
|
||||||
|
# calls is astronomically unlikely. If this ever flakes it's
|
||||||
|
# almost certainly a regression, not a bad-luck collision.
|
||||||
|
a = bottle_state.bottle_identity("dev")
|
||||||
|
b = bottle_state.bottle_identity("dev")
|
||||||
|
self.assertNotEqual(a, b)
|
||||||
|
|
||||||
|
def test_different_agents_get_different_prefixes(self):
|
||||||
|
a = bottle_state.bottle_identity("dev")
|
||||||
|
b = bottle_state.bottle_identity("api")
|
||||||
|
self.assertTrue(a.startswith("dev-"))
|
||||||
|
self.assertTrue(b.startswith("api-"))
|
||||||
|
|
||||||
|
def test_agent_name_slugified(self):
|
||||||
|
identity = bottle_state.bottle_identity("My Agent")
|
||||||
|
self.assertTrue(identity.startswith("my-agent-"))
|
||||||
|
|
||||||
|
|
||||||
|
class TestBottleMetadata(_FakeHomeMixin, unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self._setup_fake_home()
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
self._teardown_fake_home()
|
||||||
|
|
||||||
|
def test_read_missing_returns_none(self):
|
||||||
|
self.assertIsNone(read_metadata("does-not-exist"))
|
||||||
|
|
||||||
|
def test_write_then_read_roundtrip(self):
|
||||||
|
meta = BottleMetadata(
|
||||||
|
identity="dev-a4f8c",
|
||||||
|
agent_name="dev",
|
||||||
|
cwd="/proj/A",
|
||||||
|
copy_cwd=True,
|
||||||
|
started_at="2026-05-25T12:00:00+00:00",
|
||||||
|
)
|
||||||
|
write_metadata(meta)
|
||||||
|
loaded = read_metadata("dev-a4f8c")
|
||||||
|
self.assertEqual(meta, loaded)
|
||||||
|
|
||||||
|
def test_metadata_lives_under_state_dir(self):
|
||||||
|
meta = BottleMetadata(
|
||||||
|
identity="dev-x", agent_name="dev",
|
||||||
|
cwd="", copy_cwd=False, started_at="t",
|
||||||
|
)
|
||||||
|
path = write_metadata(meta)
|
||||||
|
self.assertTrue(
|
||||||
|
str(path).endswith("/.claude-bottle/state/dev-x/metadata.json"),
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_overwriting_metadata_updates_timestamp(self):
|
||||||
|
# `resume` re-writes metadata with a fresh started_at;
|
||||||
|
# everything else stays the same.
|
||||||
|
write_metadata(BottleMetadata(
|
||||||
|
identity="dev-y", agent_name="dev",
|
||||||
|
cwd="/proj/A", copy_cwd=True, started_at="t1",
|
||||||
|
))
|
||||||
|
write_metadata(BottleMetadata(
|
||||||
|
identity="dev-y", agent_name="dev",
|
||||||
|
cwd="/proj/A", copy_cwd=True, started_at="t2",
|
||||||
|
))
|
||||||
|
loaded = read_metadata("dev-y")
|
||||||
|
assert loaded is not None
|
||||||
|
self.assertEqual("t2", loaded.started_at)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
@@ -0,0 +1,123 @@
|
|||||||
|
"""Unit: capability_apply helpers (PRD 0016 Phase 2).
|
||||||
|
|
||||||
|
docker cp / exec / rm / network rm paths are covered by the
|
||||||
|
integration test in Phase 4. Here we cover:
|
||||||
|
- fetch_current_dockerfile fallback chain (per-bottle → repo)
|
||||||
|
- apply_capability_change writes the per-bottle Dockerfile and
|
||||||
|
returns the correct (before, after).
|
||||||
|
- apply_capability_change rejects empty input.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import tempfile
|
||||||
|
import unittest
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from claude_bottle import supervise
|
||||||
|
from claude_bottle.backend.docker import bottle_state, capability_apply
|
||||||
|
from claude_bottle.backend.docker.capability_apply import (
|
||||||
|
CapabilityApplyError,
|
||||||
|
apply_capability_change,
|
||||||
|
fetch_current_dockerfile,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeHomeMixin:
|
||||||
|
def _setup_fake_home(self):
|
||||||
|
self._tmp = tempfile.TemporaryDirectory(prefix="cap-apply-test.")
|
||||||
|
original = supervise.claude_bottle_root
|
||||||
|
|
||||||
|
def fake_root() -> Path:
|
||||||
|
return Path(self._tmp.name) / ".claude-bottle"
|
||||||
|
|
||||||
|
supervise.claude_bottle_root = fake_root # type: ignore[assignment]
|
||||||
|
self._restore = lambda: setattr(supervise, "claude_bottle_root", original)
|
||||||
|
|
||||||
|
def _teardown_fake_home(self):
|
||||||
|
self._restore()
|
||||||
|
self._tmp.cleanup()
|
||||||
|
|
||||||
|
|
||||||
|
class TestFetchCurrentDockerfile(_FakeHomeMixin, unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self._setup_fake_home()
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
self._teardown_fake_home()
|
||||||
|
|
||||||
|
def test_returns_per_bottle_dockerfile_when_present(self):
|
||||||
|
bottle_state.write_per_bottle_dockerfile("dev", "FROM rebuilt\n")
|
||||||
|
self.assertEqual("FROM rebuilt\n", fetch_current_dockerfile("dev"))
|
||||||
|
|
||||||
|
def test_falls_back_to_repo_dockerfile_when_no_override(self):
|
||||||
|
# The repo's Dockerfile actually exists; the test just checks
|
||||||
|
# we get its content (non-empty) when no per-bottle override
|
||||||
|
# is set.
|
||||||
|
content = fetch_current_dockerfile("dev-no-override")
|
||||||
|
self.assertIn("FROM ", content)
|
||||||
|
|
||||||
|
|
||||||
|
class TestApplyCapabilityChange(_FakeHomeMixin, unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self._setup_fake_home()
|
||||||
|
# Stub out the docker-dependent helpers. The orchestrator's
|
||||||
|
# job is to sequence write + snapshot + push + teardown; we
|
||||||
|
# validate that sequence here, not the docker primitives.
|
||||||
|
self._calls: list[str] = []
|
||||||
|
self._orig_snapshot = capability_apply._snapshot_transcript
|
||||||
|
self._orig_push = capability_apply._push_working_tree
|
||||||
|
self._orig_teardown = capability_apply._teardown_bottle
|
||||||
|
|
||||||
|
def stub_snapshot(slug):
|
||||||
|
self._calls.append(f"snapshot:{slug}")
|
||||||
|
|
||||||
|
def stub_push(slug):
|
||||||
|
self._calls.append(f"push:{slug}")
|
||||||
|
|
||||||
|
def stub_teardown(slug):
|
||||||
|
self._calls.append(f"teardown:{slug}")
|
||||||
|
|
||||||
|
capability_apply._snapshot_transcript = stub_snapshot # type: ignore[assignment]
|
||||||
|
capability_apply._push_working_tree = stub_push # type: ignore[assignment]
|
||||||
|
capability_apply._teardown_bottle = stub_teardown # type: ignore[assignment]
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
capability_apply._snapshot_transcript = self._orig_snapshot # type: ignore[assignment]
|
||||||
|
capability_apply._push_working_tree = self._orig_push # type: ignore[assignment]
|
||||||
|
capability_apply._teardown_bottle = self._orig_teardown # type: ignore[assignment]
|
||||||
|
self._teardown_fake_home()
|
||||||
|
|
||||||
|
def test_writes_per_bottle_dockerfile_and_returns_before_after(self):
|
||||||
|
bottle_state.write_per_bottle_dockerfile("dev", "FROM old\n")
|
||||||
|
before, after = apply_capability_change("dev", "FROM new\nRUN apk add ripgrep\n")
|
||||||
|
self.assertEqual("FROM old\n", before)
|
||||||
|
self.assertEqual("FROM new\nRUN apk add ripgrep\n", after)
|
||||||
|
self.assertEqual(
|
||||||
|
"FROM new\nRUN apk add ripgrep\n",
|
||||||
|
bottle_state.per_bottle_dockerfile("dev"),
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_calls_snapshot_push_teardown_in_order(self):
|
||||||
|
apply_capability_change("dev", "FROM new\n")
|
||||||
|
# Snapshot + push must happen BEFORE write_per_bottle_dockerfile
|
||||||
|
# (so they capture pre-rebuild state) and BEFORE teardown (so
|
||||||
|
# the agent container still exists to docker exec / cp from).
|
||||||
|
# Teardown must be last.
|
||||||
|
self.assertEqual(
|
||||||
|
["snapshot:dev", "push:dev", "teardown:dev"],
|
||||||
|
self._calls,
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_first_change_falls_back_to_repo_dockerfile_for_before(self):
|
||||||
|
# No per-bottle override yet — before-diff comes from the
|
||||||
|
# repo's Dockerfile.
|
||||||
|
before, after = apply_capability_change("dev-fresh", "FROM new\n")
|
||||||
|
self.assertIn("FROM ", before)
|
||||||
|
self.assertEqual("FROM new\n", after)
|
||||||
|
|
||||||
|
def test_empty_dockerfile_rejected(self):
|
||||||
|
with self.assertRaises(CapabilityApplyError):
|
||||||
|
apply_capability_change("dev", " \n\t\n")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
@@ -16,6 +16,7 @@ from datetime import datetime, timezone
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from claude_bottle import supervise
|
from claude_bottle import supervise
|
||||||
|
from claude_bottle.backend.docker.capability_apply import CapabilityApplyError
|
||||||
from claude_bottle.backend.docker.cred_proxy_apply import CredProxyApplyError
|
from claude_bottle.backend.docker.cred_proxy_apply import CredProxyApplyError
|
||||||
from claude_bottle.backend.docker.pipelock_apply import PipelockApplyError
|
from claude_bottle.backend.docker.pipelock_apply import PipelockApplyError
|
||||||
from claude_bottle.cli import dashboard
|
from claude_bottle.cli import dashboard
|
||||||
@@ -118,6 +119,7 @@ class TestApproveReject(_FakeHomeMixin, unittest.TestCase):
|
|||||||
self._setup_fake_home()
|
self._setup_fake_home()
|
||||||
self._original_apply_routes = dashboard.apply_routes_change
|
self._original_apply_routes = dashboard.apply_routes_change
|
||||||
self._original_apply_allowlist = dashboard.apply_allowlist_change
|
self._original_apply_allowlist = dashboard.apply_allowlist_change
|
||||||
|
self._original_apply_capability = dashboard.apply_capability_change
|
||||||
# Default stubs: succeed with deterministic before/after so the
|
# Default stubs: succeed with deterministic before/after so the
|
||||||
# audit log shows a non-empty diff.
|
# audit log shows a non-empty diff.
|
||||||
dashboard.apply_routes_change = lambda slug, content: (
|
dashboard.apply_routes_change = lambda slug, content: (
|
||||||
@@ -126,10 +128,14 @@ class TestApproveReject(_FakeHomeMixin, unittest.TestCase):
|
|||||||
dashboard.apply_allowlist_change = lambda slug, content: (
|
dashboard.apply_allowlist_change = lambda slug, content: (
|
||||||
"old.example\n", content,
|
"old.example\n", content,
|
||||||
)
|
)
|
||||||
|
dashboard.apply_capability_change = lambda slug, content: (
|
||||||
|
"FROM old\n", content,
|
||||||
|
)
|
||||||
|
|
||||||
def tearDown(self):
|
def tearDown(self):
|
||||||
dashboard.apply_routes_change = self._original_apply_routes
|
dashboard.apply_routes_change = self._original_apply_routes
|
||||||
dashboard.apply_allowlist_change = self._original_apply_allowlist
|
dashboard.apply_allowlist_change = self._original_apply_allowlist
|
||||||
|
dashboard.apply_capability_change = self._original_apply_capability
|
||||||
self._teardown_fake_home()
|
self._teardown_fake_home()
|
||||||
|
|
||||||
def _enqueue(self, tool: str = TOOL_CRED_PROXY_BLOCK):
|
def _enqueue(self, tool: str = TOOL_CRED_PROXY_BLOCK):
|
||||||
@@ -333,6 +339,75 @@ class TestPipelockApplyWiring(_FakeHomeMixin, unittest.TestCase):
|
|||||||
self.assertIn("+new.example", entries[0].diff)
|
self.assertIn("+new.example", entries[0].diff)
|
||||||
|
|
||||||
|
|
||||||
|
class TestCapabilityApplyWiring(_FakeHomeMixin, unittest.TestCase):
|
||||||
|
"""PRD 0016 Phase 3: approve() on a capability-block proposal
|
||||||
|
calls apply_capability_change, archives the proposal afterward
|
||||||
|
(sidecar is gone so it can't archive itself), and writes no
|
||||||
|
audit entry (capability-block has none per PRD 0013)."""
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
self._setup_fake_home()
|
||||||
|
self._original = dashboard.apply_capability_change
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
dashboard.apply_capability_change = self._original
|
||||||
|
self._teardown_fake_home()
|
||||||
|
|
||||||
|
def _enqueue_capability(self, proposed: str = "FROM python:3.13\nRUN apk add ripgrep\n"):
|
||||||
|
p = Proposal.new(
|
||||||
|
bottle_slug="dev", tool=TOOL_CAPABILITY_BLOCK,
|
||||||
|
proposed_file=proposed,
|
||||||
|
justification="need ripgrep",
|
||||||
|
current_file_hash=sha256_hex(proposed),
|
||||||
|
now=FIXED,
|
||||||
|
)
|
||||||
|
qdir = supervise.queue_dir_for_slug("dev")
|
||||||
|
qdir.mkdir(parents=True, exist_ok=True)
|
||||||
|
supervise.write_proposal(qdir, p)
|
||||||
|
return dashboard.QueuedProposal(proposal=p, queue_dir=qdir)
|
||||||
|
|
||||||
|
def test_capability_block_calls_apply_with_proposed_file(self):
|
||||||
|
calls = []
|
||||||
|
dashboard.apply_capability_change = lambda slug, content: (
|
||||||
|
calls.append((slug, content)) or ("FROM old\n", content)
|
||||||
|
)
|
||||||
|
qp = self._enqueue_capability("FROM bookworm\n")
|
||||||
|
dashboard.approve(qp)
|
||||||
|
self.assertEqual([("dev", "FROM bookworm\n")], calls)
|
||||||
|
|
||||||
|
def test_apply_failure_blocks_response_and_keeps_pending(self):
|
||||||
|
dashboard.apply_capability_change = lambda slug, content: (_ for _ in ()).throw(
|
||||||
|
CapabilityApplyError("teardown failed")
|
||||||
|
)
|
||||||
|
qp = self._enqueue_capability()
|
||||||
|
with self.assertRaises(CapabilityApplyError):
|
||||||
|
dashboard.approve(qp)
|
||||||
|
self.assertEqual(
|
||||||
|
[qp.proposal.id],
|
||||||
|
[p.id for p in supervise.list_pending_proposals(qp.queue_dir)],
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_no_audit_log_for_capability(self):
|
||||||
|
dashboard.apply_capability_change = lambda slug, content: ("FROM old\n", content)
|
||||||
|
qp = self._enqueue_capability()
|
||||||
|
dashboard.approve(qp)
|
||||||
|
# capability-block has no audit log per PRD 0013 — its record
|
||||||
|
# lives in the per-bottle Dockerfile + transcript state.
|
||||||
|
self.assertEqual([], read_audit_entries("cred-proxy", "dev"))
|
||||||
|
self.assertEqual([], read_audit_entries("pipelock", "dev"))
|
||||||
|
|
||||||
|
def test_proposal_archived_after_apply(self):
|
||||||
|
dashboard.apply_capability_change = lambda slug, content: ("FROM old\n", content)
|
||||||
|
qp = self._enqueue_capability()
|
||||||
|
dashboard.approve(qp)
|
||||||
|
# Sidecar would normally archive after delivering the response,
|
||||||
|
# but it's gone by then. The dashboard archives so
|
||||||
|
# discover_pending stops surfacing the resolved proposal.
|
||||||
|
self.assertEqual([], supervise.list_pending_proposals(qp.queue_dir))
|
||||||
|
processed = list((qp.queue_dir / "processed").glob("*.json"))
|
||||||
|
self.assertEqual(2, len(processed))
|
||||||
|
|
||||||
|
|
||||||
class TestOperatorEditRoutes(_FakeHomeMixin, unittest.TestCase):
|
class TestOperatorEditRoutes(_FakeHomeMixin, unittest.TestCase):
|
||||||
"""PRD 0014 Phase 4: operator-initiated routes edit (not gated
|
"""PRD 0014 Phase 4: operator-initiated routes edit (not gated
|
||||||
on a pending proposal)."""
|
on a pending proposal)."""
|
||||||
|
|||||||
Reference in New Issue
Block a user