Compare commits

..

3 Commits

Author SHA1 Message Date
didericis-claude 36a512bb4a refactor: drop redundant single-parent fast path in _resolve_one_bottle
test / unit (pull_request) Successful in 34s
test / integration (pull_request) Successful in 17s
_fold_parents with one name returns after the first resolve; the
single-element branch was a verbatim copy of the general path.
2026-06-25 04:42:21 -04:00
didericis-claude 766ad17aab fix: resolve pyright reportUnnecessaryIsInstance in _resolve_one_bottle
Validate list entries against object-typed raw_list before narrowing to
list[str], so the isinstance(pname, str) check is not redundant.
2026-06-25 04:42:21 -04:00
didericis-claude 7484927252 feat: support multiple parents in bottle extends:
Allow extends: to accept a list of bottle names in addition to a plain
string. Parents are resolved independently and folded left-to-right
into a single combined parent before the child is merged on top, so
orthogonal concerns (base env, networking, agent provider) can live in
separate bottles without forcing a linear chain.

Merge rules for the parent fold: env dict-merge with later winning on
collision; git-gate.user per-field overlay; git-gate.repos union by
name with later winning per-field on same name; egress.routes
concatenated; all scalar fields (supervise, agent_provider, egress.log)
use last-wins. The existing child-wins-over-all-parents rule is
unchanged. Cycle detection, diamond deduplication, and missing/invalid
parent errors all work across multi-parent graphs.

Closes #268
2026-06-25 04:42:21 -04:00
21 changed files with 461 additions and 125 deletions
+1 -1
View File
@@ -6,7 +6,7 @@
[![test](https://gitea.dideric.is/didericis/bot-bottle/actions/workflows/test.yml/badge.svg?branch=main)](https://gitea.dideric.is/didericis/bot-bottle/actions?workflow=test.yml)
[![pylint](https://img.shields.io/badge/pylint-9.93%2F10-brightgreen)](https://github.com/PyCQA/pylint)
[![pyright](https://img.shields.io/badge/pyright-1%20errors-brightgreen)](https://github.com/microsoft/pyright)
[![pyright](https://img.shields.io/badge/pyright-0%20errors-brightgreen)](https://github.com/microsoft/pyright)
**Problem:** Developer wants to run a coding agent without supervision, but they don't want a prompt injected or misbehaving agent wrecking their environment or exfiltrating sensitive data.
@@ -0,0 +1,211 @@
"""capability_apply — host-side orchestrator for capability-block
remediation (PRD 0016).
On approval of a capability-block proposal, the dashboard calls
apply_capability_change(slug, new_dockerfile) which:
1. Snapshots the agent's transcript dir to
~/.bot-bottle/state/<slug>/transcript/ (best-effort).
2. Pushes the agent's working tree via `git push` (best-effort —
no upstream / no commits / no git repo all skip with a log).
3. Writes the new Dockerfile to
~/.bot-bottle/state/<slug>/Dockerfile (PRD 0016 Phase 1
state). The next `cli.py start <agent>` picks it up.
4. Force-removes the agent container + all sidecars + the
per-bottle networks. Idempotent — missing resources are not
errors.
Returns (before, after) Dockerfile contents so the dashboard can
record / render the diff. (capability-block has no audit log per
PRD 0013 — the per-bottle Dockerfile state is its own record.)
This is "fire-and-forget" from the agent's perspective: by the time
the dashboard writes the response file the supervise sidecar is
gone, so the agent's tool call connection drops without ever
receiving the response. The replacement agent (next manual
`cli.py start`) sees the new Dockerfile and starts from there.
v1 does not auto-relaunch — see PRD 0016's capability-block return
semantics open question.
"""
from __future__ import annotations
import shutil
import subprocess
from ...agent_provider import get_provider
from ...log import info, warn
from ...bottle_state import (
mark_preserved,
per_bottle_dockerfile,
transcript_snapshot_dir,
write_per_bottle_dockerfile,
)
from .sidecar_bundle import sidecar_bundle_container_name
# Agent home inside the container (per the repo Dockerfile's
# `USER node` + `WORKDIR /home/node`). Used to locate the transcript
# dir + the workspace dir for git push.
_AGENT_HOME_IN_CONTAINER = "/home/node"
_AGENT_TRANSCRIPT_IN_CONTAINER = f"{_AGENT_HOME_IN_CONTAINER}/.claude"
_AGENT_WORKSPACE_IN_CONTAINER = f"{_AGENT_HOME_IN_CONTAINER}/workspace"
# Per-bottle resource name patterns (mirroring prepare.py).
def _agent_container_name(slug: str) -> str:
return f"bot-bottle-{slug}"
def _per_bottle_container_names(slug: str) -> list[str]:
"""All container names that belong to this bottle. Missing
containers are silently skipped by the teardown helper, so it's
fine to include names that don't exist for a given bottle."""
return [
_agent_container_name(slug),
sidecar_bundle_container_name(slug),
]
def _per_bottle_network_names(slug: str) -> list[str]:
return [
f"bot-bottle-net-{slug}",
f"bot-bottle-egress-{slug}",
]
class CapabilityApplyError(RuntimeError):
"""Raised when the apply fails in a way that should keep the
proposal pending (so the operator can retry). Best-effort
failures (transcript snapshot, git push) do not raise — they
just log and proceed."""
# --- Public helpers --------------------------------------------------------
def fetch_current_dockerfile(slug: str) -> str:
"""Return the Dockerfile content the next `cli.py start <agent>`
would use for this bottle. If a per-bottle override exists, that
one; otherwise the repo's Dockerfile.
Used by the operator-edit verb to show the current source of
truth, and by apply_capability_change for the before-diff."""
override = per_bottle_dockerfile(slug)
if override is not None:
return override
repo_dockerfile = get_provider("claude").dockerfile
if repo_dockerfile.is_file():
return repo_dockerfile.read_text()
raise CapabilityApplyError(
f"no per-bottle Dockerfile for {slug} and no provider Dockerfile at "
f"{repo_dockerfile}"
)
def apply_capability_change(slug: str, new_dockerfile: str) -> tuple[str, str]:
"""End-to-end capability-block remediation. See module docstring
for the sequence. Returns (before, after) Dockerfile content."""
if not new_dockerfile.strip():
raise CapabilityApplyError("proposed Dockerfile is empty")
before = fetch_current_dockerfile(slug)
snapshot_transcript(slug)
_push_working_tree(slug)
write_per_bottle_dockerfile(slug, new_dockerfile)
# Set the preserve marker BEFORE teardown so cli.py's session-end
# cleanup sees it and keeps the state dir intact for the
# operator's `cli.py resume <identity>`. Without the marker the
# state dir would be deleted as part of normal session end.
mark_preserved(slug)
_teardown_bottle(slug)
return before, new_dockerfile
# --- Internals -------------------------------------------------------------
def snapshot_transcript(slug: str) -> None:
"""`docker cp` /home/node/.claude out of the agent container into
~/.bot-bottle/state/<slug>/transcript/. Best-effort: missing
container, missing dir, or cp error all log a warning and return.
The transcript is what `claude --resume` reads to pick up where
the agent left off.
Called from two places:
- capability-apply, before tearing the bottle down.
- cli.py's session-end path, before the launch context closes,
so a crash or normal exit also leaves a transcript on disk
(deleted along with the state dir on clean exit, kept on
crash or capability-block per the preserve marker)."""
container = _agent_container_name(slug)
dest = transcript_snapshot_dir(slug)
if dest.exists():
# Remove any prior snapshot so the new one is a clean copy.
shutil.rmtree(dest, ignore_errors=True)
dest.parent.mkdir(parents=True, exist_ok=True)
r = subprocess.run(
["docker", "cp", f"{container}:{_AGENT_TRANSCRIPT_IN_CONTAINER}", str(dest)],
capture_output=True, text=True, check=False,
)
if r.returncode != 0:
warn(
f"transcript snapshot skipped "
f"({(r.stderr or '').strip() or 'no transcript dir in container?'})"
)
return
info(f"transcript snapshotted to {dest}")
def _push_working_tree(slug: str) -> None:
"""`docker exec <agent> git push` from /home/node/workspace.
Best-effort: not-a-git-repo, no upstream, nothing-to-push, no
network all log a warning and return. The replacement bottle
will pick up whatever's actually upstream."""
container = _agent_container_name(slug)
r = subprocess.run(
[
"docker", "exec", container, "sh", "-c",
f"cd {_AGENT_WORKSPACE_IN_CONTAINER} && "
f"git rev-parse --is-inside-work-tree >/dev/null 2>&1 && "
f"git push origin HEAD 2>&1 || true",
],
capture_output=True, text=True, check=False,
)
if r.returncode != 0:
warn(
f"capability-apply: git push skipped "
f"({(r.stderr or '').strip() or 'docker exec failed'})"
)
return
output = (r.stdout or "").strip()
if output:
info(f"capability-apply: git push: {output}")
else:
info("capability-apply: git push ran (no output — likely not a git workspace)")
def _teardown_bottle(slug: str) -> None:
"""Force-remove all per-bottle docker resources. Idempotent —
`docker rm -f` / `docker network rm` silently ignore missing
names, so this can be called even mid-rebuild."""
info(f"capability-apply: tearing down bottle {slug}")
for name in _per_bottle_container_names(slug):
subprocess.run(
["docker", "rm", "-f", name],
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False,
)
for net in _per_bottle_network_names(slug):
subprocess.run(
["docker", "network", "rm", net],
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False,
)
__all__ = [
"CapabilityApplyError",
"apply_capability_change",
"fetch_current_dockerfile",
"snapshot_transcript",
]
+10
View File
@@ -34,6 +34,7 @@ from ...egress import (
from ...git_gate import GIT_GATE_HOSTNAME
from ...log import die, warn
from ...supervise import (
CURRENT_CONFIG_DIR_IN_AGENT,
QUEUE_DIR_IN_CONTAINER,
SUPERVISE_HOSTNAME,
SUPERVISE_PORT,
@@ -232,6 +233,15 @@ def _agent_service(plan: DockerBottlePlan) -> dict[str, Any]:
if plan.use_runsc:
service["runtime"] = "runsc"
volumes: list[dict[str, Any]] = []
if plan.supervise_plan is not None:
volumes.append(_bind(
plan.supervise_plan.current_config_dir,
CURRENT_CONFIG_DIR_IN_AGENT,
))
if volumes:
service["volumes"] = volumes
# The init supervisor inside the bundle owns intra-bundle
# daemon ordering, so the agent only waits for the bundle
# container itself.
+16 -10
View File
@@ -1,7 +1,8 @@
"""Per-bottle persistent state.
"""Per-bottle persistent state (PRD 0016).
Holds optional per-bottle Dockerfile overrides, the transcript snapshot
the state-preservation helper saves before teardown, and the launch metadata that lets
Holds the per-bottle Dockerfile override that capability-block
remediation writes, the transcript snapshot the state-preservation
helper saves before teardown, and the launch metadata that lets
`cli.py resume <identity>` reconstruct a bottle's spec. State
lives at:
@@ -60,7 +61,7 @@ _METADATA_NAME = "metadata.json"
_LIVE_CONFIG_SUBDIR = "live-config"
LIVE_CONFIG_ROUTES_NAME = "routes.yaml"
LIVE_CONFIG_ALLOWLIST_NAME = "allowlist"
# Empty marker file. Session preservation writes it before teardown so
# Empty marker file. capability_apply writes it before teardown so
# cli.py's session-end cleanup knows to preserve the state dir for
# `cli.py resume <identity>`. Absent = clean up.
_PRESERVE_MARKER = ".preserve"
@@ -163,7 +164,8 @@ def per_bottle_dockerfile_path(identity: str) -> Path:
def per_bottle_dockerfile(identity: str) -> str | None:
"""Return the per-bottle Dockerfile content if present, else
None. None means: use the provider or manifest Dockerfile."""
None. None means: use the repo's Dockerfile (the original
pre-capability-block behavior)."""
p = per_bottle_dockerfile_path(identity)
if p.is_file():
return p.read_text()
@@ -247,7 +249,9 @@ def write_live_config(
def transcript_snapshot_dir(identity: str) -> Path:
"""Where agent session snapshots are kept for resume flows."""
"""Where capability_apply stashes the agent's transcript before
teardown, so the next `cli.py start <agent>` can offer to
resume from it."""
return bottle_state_dir(identity) / _TRANSCRIPT_SUBDIR
@@ -274,7 +278,8 @@ def git_gate_state_dir(identity: str) -> Path:
def supervise_state_dir(identity: str) -> Path:
"""State subdir reserved for supervise sidecar bind-mount sources.
"""State subdir for the supervise sidecar's current-config dir
(bind-mounted into the agent at /etc/bot-bottle/current-config).
The queue dir is intentionally NOT under here — it lives at
~/.bot-bottle/queue/<slug>/ alongside the audit logs, so it
survives state-dir cleanup."""
@@ -296,8 +301,9 @@ def preserve_marker_path(identity: str) -> Path:
def mark_preserved(identity: str) -> Path:
"""Mark this bottle's state for preservation across session
teardown so cli.py's session-end cleanup leaves the state dir
intact for a subsequent `cli.py resume`."""
teardown. Written by capability_apply.apply_capability_change so
cli.py's session-end cleanup leaves the state dir intact for a
subsequent `cli.py resume`."""
path = preserve_marker_path(identity)
path.parent.mkdir(parents=True, exist_ok=True)
path.touch()
@@ -310,7 +316,7 @@ def is_preserved(identity: str) -> bool:
def clear_preserve_marker(identity: str) -> None:
"""Idempotent removal. Called at fresh launch (start or resume)
so a marker left from a prior preserved session doesn't keep
so a marker left from a prior capability-block doesn't keep
state alive past the next normal session-end."""
try:
preserve_marker_path(identity).unlink()
+3 -2
View File
@@ -13,8 +13,9 @@ dirs are shared layout, so docker is the single owner of that
bucket.
State dirs with `.preserve` are intentionally never touched — they
hold preserved sessions the operator may want to `resume`. Manual
`rm -rf ~/.bot-bottle/state/<identity>` is the path for those.
hold capability-block rebuilds or crash snapshots the operator may
want to `resume`. Manual `rm -rf ~/.bot-bottle/state/<identity>`
is the path for those.
"""
from __future__ import annotations
+5 -4
View File
@@ -4,12 +4,13 @@ Reads ~/.bot-bottle/state/<identity>/metadata.json to recover the
(agent_name, cwd, copy_cwd) the bottle was originally started with,
then runs the same launch core as `start` — but pinned to the
recorded identity so the new bottle picks up any per-bottle Dockerfile
override and transcript snapshot under the same state dir.
(from capability-block apply) and transcript snapshot under the same
state dir.
Use case: an interrupted or preserved bottle needs to be relaunched;
the operator runs
Use case: an agent calls capability-block, the dashboard approves
and tears down the bottle, the operator runs
./cli.py resume <identity>
to bring up the replacement from the recorded state.
to bring up the replacement with the new capabilities baked in.
"""
from __future__ import annotations
+7 -2
View File
@@ -31,6 +31,7 @@ from ..bottle_state import (
is_preserved,
mark_preserved,
)
# from ..backend.docker.capability_apply import snapshot_transcript
from ..log import info
from ..manifest import ManifestIndex
from ._common import PROG, USER_CWD, read_tty_line
@@ -256,8 +257,12 @@ def _launch_bottle(
)
# While the container is still alive: always snapshot the
# transcript and — if the agent exited non-zero — mark
# the state for preservation. This picks up crashes /
# Ctrl-Cs / OOM kills before cleanup removes the state dir.
# the state for preservation. Capability-block already
# did both before triggering teardown from the dashboard;
# this picks up crashes / Ctrl-Cs / OOM kills the same
# way. snapshot_transcript is best-effort so the
# capability-block path's prior snapshot isn't clobbered
# when the container is already gone.
if agent_provider_template == "claude":
capture_claude_session_state(identity, exit_code)
return 0
+29 -3
View File
@@ -2,8 +2,9 @@
act on them (approve / modify / reject).
Curses-based TUI; modify-then-approve shells out to $EDITOR. The
Egress proposals are queued for operator review as full routes.yaml
updates.
approval handler wires to PRD 0016 (capability-block), which rebuilds
the bottle Dockerfile. Egress proposals are queued for operator review
as full routes.yaml updates.
"""
from __future__ import annotations
@@ -21,6 +22,10 @@ from pathlib import Path
from .. import supervise as _supervise
from ..bottle_state import read_metadata
# from ..backend.docker.capability_apply import (
# CapabilityApplyError,
# apply_capability_change,
# )
from ..backend.docker.egress_apply import (
EgressApplyError,
applicator as _docker_applicator,
@@ -33,6 +38,10 @@ from ..backend.smolmachines.egress_apply import (
)
from ..log import Die, error, info
class CapabilityApplyError(RuntimeError):
"""Placeholder while capability_apply is disabled."""
from ..supervise import (
COMPONENT_FOR_TOOL,
AuditEntry,
@@ -41,6 +50,7 @@ from ..supervise import (
STATUS_APPROVED,
STATUS_MODIFIED,
STATUS_REJECTED,
TOOL_CAPABILITY_BLOCK,
TOOL_EGRESS_ALLOW,
TOOL_EGRESS_BLOCK,
TOOL_GITLEAKS_ALLOW,
@@ -73,7 +83,7 @@ class QueuedProposal:
# Errors any remediation engine may raise. Caught by the TUI key
# handlers and surfaced in the status line so a failed apply keeps
# the proposal pending rather than crashing curses.
ApplyError = (EgressApplyError,)
ApplyError = (CapabilityApplyError, EgressApplyError)
def apply_routes_change(slug: str, content: str) -> tuple[str, str]:
@@ -133,6 +143,8 @@ def _detail_lines(
def _suffix_for_tool(tool: str) -> str:
if tool == TOOL_CAPABILITY_BLOCK:
return ".dockerfile"
if tool in (TOOL_EGRESS_ALLOW, TOOL_EGRESS_BLOCK):
return ".yaml"
if tool in (TOOL_GITLEAKS_ALLOW, TOOL_EGRESS_TOKEN_ALLOW):
@@ -154,6 +166,17 @@ def approve(
file_to_apply = final_file if final_file is not None else qp.proposal.proposed_file
diff_before, diff_after = "", ""
# if qp.proposal.tool == TOOL_CAPABILITY_BLOCK:
# _meta = read_metadata(qp.proposal.bottle_slug)
# if _meta is not None and not _meta.compose_project:
# raise CapabilityApplyError(
# "capability-block remediation is not supported for smolmachines "
# "bottles. Reject this proposal or handle the capability change "
# "manually, then restart the bottle."
# )
# diff_before, diff_after = apply_capability_change(
# qp.proposal.bottle_slug, file_to_apply,
# )
if qp.proposal.tool in (TOOL_EGRESS_ALLOW, TOOL_EGRESS_BLOCK):
diff_before, diff_after = apply_routes_change(
qp.proposal.bottle_slug,
@@ -171,6 +194,9 @@ def approve(
qp, action=status, notes=notes,
diff_before=diff_before, diff_after=diff_after,
)
if qp.proposal.tool == TOOL_CAPABILITY_BLOCK:
archive_proposal(qp.queue_dir, qp.proposal.id)
def reject(qp: QueuedProposal, *, reason: str) -> None:
"""Write a rejection response and an audit entry."""
+4 -2
View File
@@ -113,8 +113,10 @@ class ManifestBottle:
egress: ManifestEgressConfig = field(default_factory=ManifestEgressConfig)
# Per-bottle stuck-recovery sidecar (PRD 0013). When true (the
# default, issue #249), the launch step brings up a supervise
# sidecar that exposes egress MCP tools to the agent. Set
# `supervise: false` to skip the sidecar.
# sidecar that exposes MCP tools to the agent (egress-block,
# capability-block) plus mounts the current-config dir read-only
# into the agent at /etc/bot-bottle/current-config. Set
# `supervise: false` to skip the sidecar and mount.
supervise: bool = True
@classmethod
+42 -10
View File
@@ -2,10 +2,11 @@
The supervise plane is the per-bottle MCP sidecar plus its host-side
queue/audit support. The sidecar (bot_bottle.supervise_server)
sits on the bottle's internal network and exposes MCP tools the agent
calls when it needs an operator-reviewed egress change:
sits on the bottle's internal network and exposes three MCP tools the
agent calls when it hits a stuck-recovery category:
* egress-block / allow agent proposes a new routes.yaml
* capability-block agent proposes a new agent Dockerfile
Each tool call: the agent passes the full proposed file plus a
justification text. The sidecar validates the proposal syntactically,
@@ -47,6 +48,7 @@ from pathlib import Path
SUPERVISE_HOSTNAME = "supervise"
SUPERVISE_PORT = 9100
TOOL_CAPABILITY_BLOCK = "capability-block"
TOOL_EGRESS_BLOCK = "egress-block"
TOOL_EGRESS_ALLOW = "egress-allow"
TOOL_GITLEAKS_ALLOW = "gitleaks-allow"
@@ -56,6 +58,7 @@ TOOL_EGRESS_TOKEN_ALLOW = "egress-token-allow"
TOOL_LIST_EGRESS_ROUTES = "list-egress-routes"
TOOLS: tuple[str, ...] = (
TOOL_EGRESS_ALLOW,
TOOL_CAPABILITY_BLOCK,
TOOL_EGRESS_BLOCK,
TOOL_GITLEAKS_ALLOW,
TOOL_EGRESS_TOKEN_ALLOW,
@@ -72,6 +75,10 @@ TOOLS: tuple[str, ...] = (
EGRESS_FORWARD_PROXY = "http://127.0.0.1:9099"
EGRESS_INTROSPECT_URL = "http://_egress.local/allowlist"
# capability-block has no on-disk config the operator edits in place
# (the Dockerfile is rebuilt, not patched), so it has no audit log
# here — those changes are captured by git history + the rebuild record
# laid down in PRD 0016.
COMPONENT_FOR_TOOL: dict[str, str] = {
TOOL_EGRESS_ALLOW: "egress",
TOOL_EGRESS_BLOCK: "egress",
@@ -87,6 +94,8 @@ STATUSES: tuple[str, ...] = (STATUS_APPROVED, STATUS_MODIFIED, STATUS_REJECTED)
ACTION_OPERATOR_EDIT = "operator-edit"
QUEUE_DIR_IN_CONTAINER = "/run/supervise/queue"
CURRENT_CONFIG_DIR_IN_AGENT = "/etc/bot-bottle/current-config"
DEFAULT_POLL_INTERVAL_SEC = 0.5
@@ -429,39 +438,59 @@ def sha256_hex(content: str) -> str:
# --- Sidecar plan + abstract lifecycle -------------------------------------
# Filename of the staged Dockerfile inside the agent's read-only
# current-config mount. The capability-block tool's description
# points the agent at this exact path so it can read the current
# Dockerfile and propose modifications.
#
# routes.yaml + allowlist used to live here too; PRD 0017 chunk 3
# moved them behind the `list-egress-routes` MCP tool (live state
# from egress's introspection endpoint) so the agent always sees
# current data rather than a launch-time snapshot.
CURRENT_CONFIG_DOCKERFILE = "Dockerfile"
@dataclass(frozen=True)
class SupervisePlan:
"""Output of Supervise.prepare; consumed by .start.
`queue_dir` is the host directory bind-mounted into the sidecar
at /run/supervise/queue. `internal_network` is empty at prepare
time; the backend's launch step fills it via dataclasses.replace
before calling .start."""
at /run/supervise/queue. `current_config_dir` is the host
directory bind-mounted (read-only) into the *agent* container
at /etc/bot-bottle/current-config currently holds only the
Dockerfile snapshot (routes.yaml + allowlist moved to the
`list-egress-routes` MCP tool). `internal_network` is
empty at prepare time; the backend's launch step fills it via
dataclasses.replace before calling .start."""
slug: str
queue_dir: Path
current_config_dir: Path
internal_network: str = ""
class Supervise(ABC):
"""Per-bottle supervise sidecar. Encapsulates the host-side
prepare (queue dir staging); the sidecar's start/stop lifecycle
is backend-specific."""
prepare (queue dir + current-config staging); the sidecar's
start/stop lifecycle is backend-specific."""
def prepare(
self,
slug: str,
stage_dir: Path,
) -> SupervisePlan:
"""Stage the per-bottle queue dir on the host. Returns the
plan; `internal_network` must be set by the launch step before
"""Stage the per-bottle queue dir on the host and the
current-config dir under `stage_dir`. Returns the plan;
`internal_network` must be set by the launch step before
.start runs."""
del stage_dir
queue_dir = queue_dir_for_slug(slug)
queue_dir.mkdir(parents=True, exist_ok=True)
current_config_dir = stage_dir / "current-config"
current_config_dir.mkdir(parents=True, exist_ok=True)
return SupervisePlan(
slug=slug,
queue_dir=queue_dir,
current_config_dir=current_config_dir,
)
# --- Helpers ---------------------------------------------------------------
@@ -512,6 +541,8 @@ __all__ = [
"ACTION_OPERATOR_EDIT",
"AuditEntry",
"COMPONENT_FOR_TOOL",
"CURRENT_CONFIG_DIR_IN_AGENT",
"CURRENT_CONFIG_DOCKERFILE",
"DEFAULT_POLL_INTERVAL_SEC",
"Proposal",
"QUEUE_DIR_IN_CONTAINER",
@@ -527,6 +558,7 @@ __all__ = [
"TOOLS",
"EGRESS_FORWARD_PROXY",
"EGRESS_INTROSPECT_URL",
"TOOL_CAPABILITY_BLOCK",
"TOOL_EGRESS_ALLOW",
"TOOL_EGRESS_BLOCK",
"TOOL_GITLEAKS_ALLOW",
+40 -6
View File
@@ -1,8 +1,8 @@
"""Supervise sidecar HTTP server (PRD 0013).
Per-bottle MCP server exposing tools the agent calls to propose egress
config changes when stuck. The tools are `egress-allow`,
`egress-block`, and `list-egress-routes`.
Per-bottle MCP server exposing tools the agent calls to propose config
changes when stuck. The tools are `allow`, `egress-block`,
`capability-block`, and `list-egress-routes`.
Each queued tool call:
@@ -253,6 +253,34 @@ TOOL_DEFINITIONS: list[dict[str, object]] = [
"required": ["routes_yaml", "justification"],
},
},
{
"name": _sv.TOOL_CAPABILITY_BLOCK,
"description": (
"Call when the bottle is missing a tool, skill, permission, "
"or env var you need — something that lives in the agent "
"Dockerfile rather than in the egress routes. "
"Read the current Dockerfile from "
"/etc/bot-bottle/current-config/Dockerfile, compose a "
"modified version, and pass the full new file plus a "
"justification. On approval the supervisor rebuilds the "
"bottle from the new Dockerfile and starts a replacement on "
"the same branch (wired in PRD 0016; v1 acknowledges only)."
),
"inputSchema": {
"type": "object",
"properties": {
"dockerfile": {
"type": "string",
"description": "Full proposed Dockerfile content.",
},
"justification": {
"type": "string",
"description": "Why this capability is needed.",
},
},
"required": ["dockerfile", "justification"],
},
},
]
@@ -260,6 +288,7 @@ TOOL_DEFINITIONS: list[dict[str, object]] = [
# payload (stored in Proposal.proposed_file).
PROPOSED_FILE_FIELD: dict[str, str] = {
_sv.TOOL_EGRESS_ALLOW: "routes_yaml",
_sv.TOOL_CAPABILITY_BLOCK: "dockerfile",
_sv.TOOL_EGRESS_BLOCK: "routes_yaml",
}
@@ -273,7 +302,11 @@ def validate_proposed_file(tool: str, content: str) -> None:
enter the queue."""
if not content.strip():
raise _RpcClientError(ERR_INVALID_PARAMS, f"{tool}: proposed file is empty")
if tool in (_sv.TOOL_EGRESS_ALLOW, _sv.TOOL_EGRESS_BLOCK):
if tool == _sv.TOOL_CAPABILITY_BLOCK:
# Dockerfiles are too varied to validate syntactically beyond
# non-empty. The operator reads the diff in the TUI.
pass
elif tool in (_sv.TOOL_EGRESS_ALLOW, _sv.TOOL_EGRESS_BLOCK):
try:
config = load_config(content)
except ValueError as e:
@@ -454,8 +487,9 @@ def format_pending_response_text(timeout_seconds: float) -> str:
# --- HTTP transport --------------------------------------------------------
# Max request body the server accepts. 1 MB is well above any realistic
# routes.yaml proposal.
# Max request body the server accepts. Generous because Dockerfile
# proposals can be a few KB; routes.json is small. 1 MB is well above
# any realistic config file.
MAX_BODY_BYTES = 1 * 1024 * 1024
+2 -2
View File
@@ -115,8 +115,8 @@ class TestBottleIdentity(unittest.TestCase):
class TestPreserveMarker(_FakeHomeMixin, unittest.TestCase):
"""The .preserve marker tells cli.py's session-end cleanup to keep
the state dir instead of removing it."""
"""The .preserve marker is how capability_apply tells cli.py's
session-end cleanup to keep the state dir instead of removing it."""
def setUp(self):
self._setup_fake_home()
+2 -2
View File
@@ -29,8 +29,8 @@ class _FakeHomeMixin:
class TestCaptureSessionState(_FakeHomeMixin, unittest.TestCase):
# capture_claude_session_state handles the preserve marker for
# non-zero agent exits.
# snapshot_transcript is commented out (capability_apply is disabled);
# capture_claude_session_state now only handles the preserve marker.
def setUp(self):
self._setup_fake_home()
+11 -3
View File
@@ -108,6 +108,7 @@ def _supervise_plan() -> SupervisePlan:
return SupervisePlan(
slug=SLUG,
queue_dir=STATE / "supervise" / "queue",
current_config_dir=STATE / "supervise" / "current-config",
internal_network=f"bot-bottle-net-{SLUG}",
)
@@ -270,11 +271,18 @@ class TestAgentAlwaysPresent(unittest.TestCase):
s = bottle_plan_to_compose(_plan(**kwargs))["services"]["agent"]
self.assertEqual(["sidecars"], s["depends_on"])
def test_agent_has_no_current_config_mount_with_supervise(self):
def test_agent_current_config_mount_only_with_supervise(self):
with_sv = bottle_plan_to_compose(_plan(supervise=True))["services"]["agent"]
self.assertNotIn("volumes", with_sv)
self.assertTrue(any(
v["target"] == "/etc/bot-bottle/current-config"
for v in with_sv.get("volumes", [])
))
without_sv = bottle_plan_to_compose(_plan(supervise=False))["services"]["agent"]
self.assertNotIn("volumes", without_sv)
# Either no volumes key at all, or no current-config target.
self.assertFalse(any(
v["target"] == "/etc/bot-bottle/current-config"
for v in without_sv.get("volumes", [])
))
class TestSidecarBundleShape(unittest.TestCase):
@@ -75,6 +75,7 @@ def _plan(
supervise_plan = SupervisePlan(
slug="demo-abc12",
queue_dir=Path("/tmp/queue"),
current_config_dir=Path("/tmp/current-config"),
)
return DockerBottlePlan(
spec=spec,
@@ -78,6 +78,7 @@ def _plan(
supervise_plan = SupervisePlan(
slug="demo-abc12",
queue_dir=Path("/tmp/queue"),
current_config_dir=Path("/tmp/current-config"),
)
return DockerBottlePlan(
spec=spec,
+2 -2
View File
@@ -65,8 +65,8 @@ class TestOrphanStateDirs(_FakeHomeMixin, unittest.TestCase):
)
def test_preserve_marker_skips_dir(self):
# Preserve marker means the user explicitly wanted this dir
# kept for `resume`.
# Preserve marker = capability-block or crash auto-preserve;
# the user explicitly wanted this dir kept for `resume`.
bottle_state.write_per_bottle_dockerfile("kept-ccc", "FROM x\n")
bottle_state.mark_preserved("kept-ccc")
self.assertEqual(
@@ -130,6 +130,7 @@ def _plan(
supervise_plan = SupervisePlan(
slug="demo-abc12",
queue_dir=Path("/tmp/queue"),
current_config_dir=Path("/tmp/current-config"),
)
return SmolmachinesBottlePlan(
spec=spec,
+18 -13
View File
@@ -16,7 +16,7 @@ from bot_bottle.supervise import (
STATUS_APPROVED,
STATUS_MODIFIED,
STATUS_REJECTED,
TOOL_EGRESS_ALLOW,
TOOL_CAPABILITY_BLOCK,
TOOL_GITLEAKS_ALLOW,
archive_proposal,
audit_log_path,
@@ -37,9 +37,9 @@ FIXED_TS = datetime(2026, 5, 25, 12, 0, 0, tzinfo=timezone.utc)
def _proposal(
tool: str = TOOL_EGRESS_ALLOW,
proposed: str = "routes:\n - host: example.com\n",
justification: str = "need egress",
tool: str = TOOL_CAPABILITY_BLOCK,
proposed: str = "FROM python:3.13\n",
justification: str = "need a capability",
) -> Proposal:
return Proposal.new(
bottle_slug="dev",
@@ -57,7 +57,7 @@ class TestProposalRoundtrip(unittest.TestCase):
self.assertTrue(p.id)
self.assertEqual("2026-05-25T12:00:00+00:00", p.arrival_timestamp)
self.assertEqual("dev", p.bottle_slug)
self.assertEqual(TOOL_EGRESS_ALLOW, p.tool)
self.assertEqual(TOOL_CAPABILITY_BLOCK, p.tool)
def test_to_from_dict_roundtrip(self):
p = _proposal()
@@ -142,14 +142,14 @@ class TestQueueIO(unittest.TestCase):
def test_list_pending_sorted_by_arrival(self):
# Fabricate two with explicit timestamps.
a = Proposal.new(
bottle_slug="dev", tool=TOOL_EGRESS_ALLOW,
proposed_file="routes:\n - host: early.example.com\n", justification="early",
bottle_slug="dev", tool=TOOL_CAPABILITY_BLOCK,
proposed_file="FROM python:3.13\n", justification="early",
current_file_hash="x",
now=datetime(2026, 5, 25, 10, 0, 0, tzinfo=timezone.utc),
)
b = Proposal.new(
bottle_slug="dev", tool=TOOL_EGRESS_ALLOW,
proposed_file="routes:\n - host: late.example.com\n", justification="late",
bottle_slug="dev", tool=TOOL_CAPABILITY_BLOCK,
proposed_file="FROM python:3.13\n", justification="late",
current_file_hash="x",
now=datetime(2026, 5, 25, 14, 0, 0, tzinfo=timezone.utc),
)
@@ -319,6 +319,7 @@ class TestToolConstants(unittest.TestCase):
self.assertEqual(
(
supervise.TOOL_EGRESS_ALLOW,
TOOL_CAPABILITY_BLOCK,
supervise.TOOL_EGRESS_BLOCK,
TOOL_GITLEAKS_ALLOW,
supervise.TOOL_EGRESS_TOKEN_ALLOW,
@@ -377,16 +378,20 @@ class TestSupervisePrepare(unittest.TestCase):
supervise.bot_bottle_root = fake_root # type: ignore[assignment]
return lambda: setattr(supervise, "bot_bottle_root", original)
def test_prepare_creates_queue(self):
def test_prepare_creates_queue_and_current_config(self):
plan = _StubSupervise().prepare("dev", self.stage_dir)
self.assertTrue(plan.queue_dir.is_dir())
self.assertTrue(plan.current_config_dir.is_dir())
self.assertEqual("dev", plan.slug)
self.assertEqual("", plan.internal_network)
def test_prepare_does_not_create_current_config_dir(self):
def test_prepare_writes_no_files_to_current_config(self):
# dockerfile_content is no longer accepted by prepare.
# routes.yaml + allowlist live behind the
# `list-egress-routes` MCP tool (PRD 0017 chunk 3).
plan = _StubSupervise().prepare("dev", self.stage_dir)
self.assertFalse((self.stage_dir / "current-config").exists())
self.assertFalse(hasattr(plan, "current_config_dir"))
files = sorted(p.name for p in plan.current_config_dir.iterdir())
self.assertEqual([], files)
if __name__ == "__main__":
+30 -24
View File
@@ -18,7 +18,7 @@ from bot_bottle.supervise import (
STATUS_APPROVED,
STATUS_MODIFIED,
STATUS_REJECTED,
TOOL_EGRESS_ALLOW,
TOOL_CAPABILITY_BLOCK,
TOOL_GITLEAKS_ALLOW,
TOOL_EGRESS_TOKEN_ALLOW,
read_audit_entries,
@@ -30,8 +30,9 @@ from bot_bottle.supervise import (
FIXED = datetime(2026, 5, 25, 12, 0, 0, tzinfo=timezone.utc)
def _proposal(slug: str = "dev", tool: str = TOOL_EGRESS_ALLOW) -> Proposal:
def _proposal(slug: str = "dev", tool: str = TOOL_CAPABILITY_BLOCK) -> Proposal:
payloads = {
TOOL_CAPABILITY_BLOCK: "FROM python:3.13\n",
supervise.TOOL_EGRESS_ALLOW: "routes:\n - host: example.com\n",
supervise.TOOL_EGRESS_BLOCK: "routes:\n - host: example.com\n",
TOOL_GITLEAKS_ALLOW: "file: tests/test_fixture.py\nline: 3\n",
@@ -85,14 +86,14 @@ class TestDiscoverPending(_FakeHomeMixin, unittest.TestCase):
def test_sorted_by_arrival_across_bottles(self):
early = Proposal.new(
bottle_slug="api", tool=TOOL_EGRESS_ALLOW,
proposed_file="routes:\n - host: early.example.com\n", justification="early",
bottle_slug="api", tool=TOOL_CAPABILITY_BLOCK,
proposed_file="FROM python:3.13\n", justification="early",
current_file_hash="h",
now=datetime(2026, 5, 25, 10, 0, 0, tzinfo=timezone.utc),
)
late = Proposal.new(
bottle_slug="dev", tool=TOOL_EGRESS_ALLOW,
proposed_file="routes:\n - host: late.example.com\n", justification="late",
bottle_slug="dev", tool=TOOL_CAPABILITY_BLOCK,
proposed_file="FROM python:3.13\n", justification="late",
current_file_hash="h",
now=datetime(2026, 5, 25, 14, 0, 0, tzinfo=timezone.utc),
)
@@ -121,7 +122,7 @@ class TestApproveReject(_FakeHomeMixin, unittest.TestCase):
def tearDown(self):
self._teardown_fake_home()
def _enqueue(self, tool: str = TOOL_EGRESS_ALLOW):
def _enqueue(self, tool: str = TOOL_CAPABILITY_BLOCK):
p = _proposal(tool=tool)
qdir = supervise.queue_dir_for_slug("dev")
qdir.mkdir(parents=True, exist_ok=True)
@@ -130,29 +131,19 @@ class TestApproveReject(_FakeHomeMixin, unittest.TestCase):
def test_approve_writes_response(self):
qp = self._enqueue()
with patch(
"bot_bottle.cli.supervise.apply_routes_change",
return_value=("routes: []\n", "routes:\n - host: example.com\n"),
):
supervise_cli.approve(qp)
resp = read_response(qp.queue_dir, qp.proposal.id)
supervise_cli.approve(qp)
# capability-block is archived on approve, so the response file
# moves to processed/ before the caller can read it.
resp = read_response(qp.queue_dir / "processed", qp.proposal.id)
self.assertEqual(STATUS_APPROVED, resp.status)
self.assertIsNone(resp.final_file)
def test_approve_with_final_file_marks_modified(self):
qp = self._enqueue()
with patch(
"bot_bottle.cli.supervise.apply_routes_change",
return_value=("routes: []\n", "routes:\n - host: edited.example.com\n"),
):
supervise_cli.approve(
qp,
final_file="routes:\n - host: edited.example.com\n",
notes="tweaked",
)
resp = read_response(qp.queue_dir, qp.proposal.id)
supervise_cli.approve(qp, final_file="FROM bookworm\n", notes="tweaked")
resp = read_response(qp.queue_dir / "processed", qp.proposal.id)
self.assertEqual(STATUS_MODIFIED, resp.status)
self.assertEqual("routes:\n - host: edited.example.com\n", resp.final_file)
self.assertEqual("FROM bookworm\n", resp.final_file)
self.assertEqual("tweaked", resp.notes)
def test_reject_writes_rejection(self):
@@ -162,6 +153,11 @@ class TestApproveReject(_FakeHomeMixin, unittest.TestCase):
self.assertEqual(STATUS_REJECTED, resp.status)
self.assertEqual("nope", resp.notes)
def test_no_audit_log_for_capability_block(self):
qp = self._enqueue(tool=TOOL_CAPABILITY_BLOCK)
supervise_cli.approve(qp)
self.assertEqual([], read_audit_entries("egress", "dev"))
def test_approve_egress_block_writes_audit_log(self):
qp = self._enqueue(tool=supervise.TOOL_EGRESS_BLOCK)
with patch(
@@ -236,6 +232,11 @@ class TestApproveReject(_FakeHomeMixin, unittest.TestCase):
self.assertEqual(".txt", supervise_cli._suffix_for_tool(TOOL_EGRESS_TOKEN_ALLOW))
# class TestCapabilityApplyWiring(_FakeHomeMixin, unittest.TestCase):
# # DISABLED — capability_apply functionality is currently commented out.
# pass
class TestEditInEditor(unittest.TestCase):
def test_runs_editor_returns_edited_content(self):
original_editor = os.environ.get("EDITOR")
@@ -280,5 +281,10 @@ class TestEditInEditor(unittest.TestCase):
os.environ["EDITOR"] = original_editor
# class TestCapabilityBlockSmolmachinesGuard(_FakeHomeMixin, unittest.TestCase):
# # DISABLED — capability_apply functionality is currently commented out.
# pass
if __name__ == "__main__":
unittest.main()
+25 -39
View File
@@ -50,15 +50,15 @@ from bot_bottle.supervise_server import (
class TestValidation(unittest.TestCase):
def test_capability_block_accepts_anything_nonempty(self):
validate_proposed_file(
_sv.TOOL_CAPABILITY_BLOCK,
"FROM python:3.13\nRUN apk add git\n",
)
def test_empty_proposed_file_rejected_for_tools_with_file_field(self):
with self.assertRaises(_RpcError):
validate_proposed_file(_sv.TOOL_EGRESS_ALLOW, " \n\t")
def test_capability_block_rejected_as_unknown_tool(self):
with self.assertRaises(_RpcError) as cm:
validate_proposed_file("capability-block", "FROM python:3.13\n")
self.assertEqual(ERR_INVALID_PARAMS, cm.exception.code)
self.assertIn("unknown tool", cm.exception.message)
validate_proposed_file(_sv.TOOL_CAPABILITY_BLOCK, " \n\t")
def test_egress_routes_yaml_is_validated(self):
validate_proposed_file(
@@ -127,9 +127,9 @@ class TestRpcInternalErrorOnIoFailure(unittest.TestCase):
with self.assertRaises(_RpcInternalError) as cm:
handle_tools_call(
{
"name": _sv.TOOL_EGRESS_ALLOW,
"name": _sv.TOOL_CAPABILITY_BLOCK,
"arguments": {
"routes_yaml": "routes:\n - host: example.com\n",
"dockerfile": "FROM python:3.13\n",
"justification": "x",
},
},
@@ -219,6 +219,7 @@ class TestHandleToolsList(unittest.TestCase):
self.assertEqual(
sorted([
_sv.TOOL_EGRESS_ALLOW,
_sv.TOOL_CAPABILITY_BLOCK,
_sv.TOOL_EGRESS_BLOCK,
_sv.TOOL_LIST_EGRESS_ROUTES,
]),
@@ -294,10 +295,10 @@ class TestHandleToolsCall(unittest.TestCase):
try:
result = handle_tools_call(
{
"name": _sv.TOOL_EGRESS_BLOCK,
"name": _sv.TOOL_CAPABILITY_BLOCK,
"arguments": {
"routes_yaml": "routes:\n - host: example.com\n",
"justification": "need example.com",
"dockerfile": "FROM python:3.13\n",
"justification": "need git",
},
},
self.config,
@@ -334,9 +335,9 @@ class TestHandleToolsCall(unittest.TestCase):
try:
result = handle_tools_call(
{
"name": _sv.TOOL_EGRESS_ALLOW,
"name": _sv.TOOL_CAPABILITY_BLOCK,
"arguments": {
"routes_yaml": "routes:\n - host: example.com\n",
"dockerfile": "FROM python:3.13\n",
"justification": "needed for tests",
},
},
@@ -358,35 +359,20 @@ class TestHandleToolsCall(unittest.TestCase):
with self.assertRaises(_RpcError):
handle_tools_call(
{
"name": _sv.TOOL_EGRESS_ALLOW,
"arguments": {"routes_yaml": "routes:\n - host: example.com\n"},
"name": _sv.TOOL_CAPABILITY_BLOCK,
"arguments": {"dockerfile": "FROM python:3.13\n"},
},
self.config,
)
def test_capability_block_call_raises_unknown_tool(self):
with self.assertRaises(_RpcError) as cm:
handle_tools_call(
{
"name": "capability-block",
"arguments": {
"dockerfile": "FROM python:3.13\n",
"justification": "need git",
},
},
self.config,
)
self.assertEqual(ERR_INVALID_PARAMS, cm.exception.code)
self.assertIn("unknown tool", cm.exception.message)
def test_archives_proposal_after_response(self):
responder = self._respond_when_proposal_appears(_sv.STATUS_APPROVED)
try:
handle_tools_call(
{
"name": _sv.TOOL_EGRESS_ALLOW,
"name": _sv.TOOL_CAPABILITY_BLOCK,
"arguments": {
"routes_yaml": "routes:\n - host: example.com\n",
"dockerfile": "FROM python:3.13\n",
"justification": "x",
},
},
@@ -408,10 +394,10 @@ class TestHandleToolsCall(unittest.TestCase):
)
result = handle_tools_call(
{
"name": _sv.TOOL_EGRESS_ALLOW,
"name": _sv.TOOL_CAPABILITY_BLOCK,
"arguments": {
"routes_yaml": "routes:\n - host: example.com\n",
"justification": "need egress",
"dockerfile": "FROM python:3.13\n",
"justification": "need a capability",
},
},
config,
@@ -535,7 +521,7 @@ class TestHttpEndToEnd(unittest.TestCase):
self.assertEqual("2.0", result["jsonrpc"])
self.assertEqual(1, result["id"])
names = [t["name"] for t in result["result"]["tools"]] # type: ignore[index]
self.assertNotIn("capability-block", names)
self.assertIn(_sv.TOOL_CAPABILITY_BLOCK, names)
self.assertIn(_sv.TOOL_EGRESS_ALLOW, names)
self.assertIn(_sv.TOOL_EGRESS_BLOCK, names)
@@ -555,9 +541,9 @@ class TestHttpEndToEnd(unittest.TestCase):
"id": 99,
"method": "tools/call",
"params": {
"name": _sv.TOOL_EGRESS_ALLOW,
"name": _sv.TOOL_CAPABILITY_BLOCK,
"arguments": {
"routes_yaml": "routes:\n - host: example.com\n",
"dockerfile": "FROM python:3.13\n",
"justification": "x",
},
},