feat(bottle): random-suffix identity + cli.py resume <identity>
test / unit (pull_request) Successful in 18s
test / integration (pull_request) Successful in 1m30s

Replaces the cwd-hash identity with a random 5-char base36 suffix
per launch, so two simultaneous `start <agent>` invocations against
the same cwd no longer collide on container names. Each launch is
its own bottle.

State carries metadata: every prepare step writes
~/.claude-bottle/state/<identity>/metadata.json with the
(agent_name, cwd, copy_cwd, started_at) the bottle was launched
with. The new `cli.py resume <identity>` reads this metadata and
re-launches a bottle pinned to the same identity — picking up the
per-bottle Dockerfile (from a prior capability-block apply) and
the transcript snapshot under the same state dir.

- bottle_state.py: bottle_identity(agent_name) drops the cwd param
  and gains a random suffix; BottleMetadata dataclass +
  read/write/metadata_path helpers.
- BottleSpec gains an optional identity field — resume sets it to
  pin the identity; start leaves it empty so prepare mints fresh.
- prepare.py: writes metadata at launch time; uses spec.identity if
  provided (resume) else bottle_identity(agent_name) (fresh start).
- start.py: extracted _launch_bottle from cmd_start so resume can
  share the launch core; prints `./cli.py resume <identity>` hint
  at session end.
- cli/resume.py (new): reads metadata, reconstructs BottleSpec
  with the recorded identity + cwd, delegates to _launch_bottle.
  Errors clearly when no state exists for the given identity.
- cli/__init__.py: registers `resume` in COMMANDS + usage.
- dashboard.py: capability-block approval status line now appends
  the `resume <identity>` hint so the operator can copy-paste the
  rebuild command without leaving the TUI.

Closes the rebuild loop in PRD 0016: agent calls capability-block →
operator approves → bottle torn down with state preserved → status
line shows resume command → operator runs it → replacement bottle
boots with the new Dockerfile and prior transcript.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-25 06:09:45 -04:00
parent e996f72532
commit 4032e04a9c
8 changed files with 311 additions and 76 deletions
+87 -27
View File
@@ -1,23 +1,39 @@
"""Per-bottle persistent state (PRD 0016).
Holds the per-bottle Dockerfile override that capability-block
remediation writes, plus the transcript snapshot the
state-preservation helper saves before teardown. State lives at:
remediation writes, the transcript snapshot the state-preservation
helper saves before teardown, and the launch metadata that lets
`cli.py resume <identity>` reconstruct a bottle's spec. State
lives at:
~/.claude-bottle/state/<slug>/
~/.claude-bottle/state/<identity>/
metadata.json — agent_name + cwd + started_at (for resume)
Dockerfile — per-bottle override (absent → use repo's)
transcript/ — last snapshotted agent state (best-effort)
When the per-bottle Dockerfile is present, the launch step builds
the agent image with a per-bottle tag (claude-bottle-rebuilt-<slug>)
the agent image with a per-bottle tag (claude-bottle-rebuilt-<id>)
from this file rather than the repo's. The build context is still
the repo root so the Dockerfile can COPY claude_bottle source files
the same way the original does.
Identity model:
- Every `cli.py start <agent>` mints a fresh identity via
`bottle_identity(agent_name)`: slug-prefix for readability plus a
5-char random suffix for parallel-safe uniqueness. The metadata
written at launch time pins (agent_name, cwd) to that identity.
- `cli.py resume <identity>` reads the metadata and re-launches a
bottle pinned to the same identity, picking up any per-bottle
Dockerfile and transcript snapshot.
"""
from __future__ import annotations
import hashlib
import dataclasses
import json
import secrets
import string
from dataclasses import dataclass
from pathlib import Path
from ... import supervise as _supervise
@@ -28,33 +44,73 @@ from . import util as docker_mod
_STATE_SUBDIR = "state"
_PER_BOTTLE_DOCKERFILE_NAME = "Dockerfile"
_TRANSCRIPT_SUBDIR = "transcript"
_METADATA_NAME = "metadata.json"
# How many hex chars of the cwd hash to fold into the identity. 12
# hex chars = 48 bits of entropy; the cost of a collision is two
# unrelated cwds sharing the same state — annoying but not security-
# relevant. 12 keeps the identity short enough to stay readable in
# container names and `ls` output.
_CWD_HASH_LEN = 12
# 5 chars of base36 alphabet ≈ 60M combinations. Plenty for human
# operators starting bottles by hand; collision-free in practice.
_RANDOM_SUFFIX_LEN = 5
_SUFFIX_ALPHABET = string.ascii_lowercase + string.digits
def bottle_identity(agent_name: str, cwd: Path | None) -> str:
"""Stable, unique identifier for a bottle. Used as the key for
every persistent and runtime resource: container names, network
names, queue dir, audit log, per-bottle Dockerfile state.
def bottle_identity(agent_name: str) -> str:
"""Mint a fresh per-launch bottle identity. The slug-prefix is
`slugify(agent_name)` for readability; the suffix is 5 random
base36 chars so two simultaneous `start <agent>` invocations
don't collide on container/network names.
Without --cwd, the identity is just `slugify(agent_name)` — the
same value the codebase used to compute as `slug`. With --cwd
the identity is `slugify(agent_name)-<sha256(resolved-cwd)[:N]>`
so the same agent against different projects gets distinct
state. Same agent against the same cwd is stable across launches.
`cwd` should be the path the agent will see, *resolved* by the
caller, or None when no cwd was passed (no --cwd flag)."""
Every call produces a different identity (non-deterministic).
To continue an existing bottle's state, use the recorded
identity from BottleMetadata via `cli.py resume <identity>`,
not this function."""
slug = docker_mod.slugify(agent_name)
if cwd is None:
return slug
h = hashlib.sha256(str(cwd).encode("utf-8")).hexdigest()
return f"{slug}-{h[:_CWD_HASH_LEN]}"
suffix = "".join(secrets.choice(_SUFFIX_ALPHABET) for _ in range(_RANDOM_SUFFIX_LEN))
return f"{slug}-{suffix}"
@dataclass(frozen=True)
class BottleMetadata:
"""Persistent record of how a bottle was launched, written at
start time and read by `cli.py resume`. Lives at
~/.claude-bottle/state/<identity>/metadata.json."""
identity: str
agent_name: str
cwd: str # empty string when --cwd was not passed
copy_cwd: bool
started_at: str # ISO 8601 UTC
def metadata_path(identity: str) -> Path:
return bottle_state_dir(identity) / _METADATA_NAME
def write_metadata(metadata: BottleMetadata) -> Path:
"""Persist `metadata` to ~/.claude-bottle/state/<identity>/metadata.json.
Mode 0o644 — no secrets, just (agent_name, cwd, timestamp)."""
path = metadata_path(metadata.identity)
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(json.dumps(dataclasses.asdict(metadata), indent=2) + "\n")
path.chmod(0o644)
return path
def read_metadata(identity: str) -> BottleMetadata | None:
"""Return the metadata for `identity`, or None if no state has
been recorded for it. Used by `cli.py resume` to reconstruct
the launch spec."""
path = metadata_path(identity)
if not path.is_file():
return None
raw = json.loads(path.read_text())
if not isinstance(raw, dict):
return None
return BottleMetadata(
identity=str(raw.get("identity", identity)),
agent_name=str(raw.get("agent_name", "")),
cwd=str(raw.get("cwd", "")),
copy_cwd=bool(raw.get("copy_cwd", False)),
started_at=str(raw.get("started_at", "")),
)
def bottle_state_dir(identity: str) -> Path:
@@ -100,11 +156,15 @@ def transcript_snapshot_dir(identity: str) -> Path:
__all__ = [
"BottleMetadata",
"bottle_identity",
"bottle_state_dir",
"metadata_path",
"per_bottle_dockerfile",
"per_bottle_dockerfile_path",
"per_bottle_image_tag",
"read_metadata",
"transcript_snapshot_dir",
"write_metadata",
"write_per_bottle_dockerfile",
]