Compare commits

..

6 Commits

Author SHA1 Message Date
didericis-claude 88f58bf4c0 merge: update tests/unit/test_supervise_server.py from issue-277-coverage-ci
lint / lint (push) Failing after 1m46s
test / unit (pull_request) Failing after 36s
test / integration (pull_request) Successful in 16s
2026-06-25 14:31:36 -04:00
didericis-claude ca0dc72b89 merge: update bot_bottle/cli/supervise.py from issue-277-coverage-ci
test / unit (pull_request) Successful in 41s
test / integration (pull_request) Successful in 16s
lint / lint (push) Successful in 1m46s
2026-06-25 14:31:32 -04:00
didericis-claude 2fc99ea098 merge: update .gitignore from issue-277-coverage-ci
test / unit (pull_request) Successful in 40s
test / integration (pull_request) Successful in 15s
2026-06-25 14:31:29 -04:00
didericis-claude 9a9235f2af merge: update .coveragerc from issue-277-coverage-ci 2026-06-25 14:31:27 -04:00
didericis 42f79283f0 test: fix integration coverage failures
lint / lint (push) Successful in 1m50s
test / unit (pull_request) Successful in 40s
test / integration (pull_request) Successful in 18s
2026-06-25 04:39:43 -04:00
didericis d6b9d7af3e ci: add coverage.py reporting 2026-06-25 04:08:21 -04:00
45 changed files with 568 additions and 2646 deletions
+2 -15
View File
@@ -8,7 +8,6 @@ on:
- '**.py'
- '.pylintrc'
- 'pyrightconfig.json'
- '.coveragerc'
workflow_dispatch:
jobs:
@@ -46,19 +45,10 @@ jobs:
echo "errors=$ERRORS" >> $GITHUB_OUTPUT
echo "Pyright errors: $ERRORS"
- name: Run coverage and extract percentage
id: coverage
run: |
python -m coverage run -m unittest discover -t . -s tests/unit > /dev/null 2>&1 || true
PERCENT=$(python -m coverage report 2>/dev/null | grep '^TOTAL' | grep -oP '\d+(?=%)' | tail -1)
echo "percent=$PERCENT" >> $GITHUB_OUTPUT
echo "Coverage: $PERCENT%"
- name: Update badges in README
run: |
PYLINT_SCORE="${{ steps.pylint.outputs.score }}"
PYRIGHT_ERRORS="${{ steps.pyright.outputs.errors }}"
COVERAGE_PERCENT="${{ steps.coverage.outputs.percent }}"
PYLINT_SCORE_ENCODED=$(echo "$PYLINT_SCORE" | sed 's|/|%2F|g')
@@ -68,12 +58,9 @@ jobs:
if [ -n "$PYRIGHT_ERRORS" ]; then
sed -i "s|/badge/pyright-[^)]*|/badge/pyright-${PYRIGHT_ERRORS}%20errors-brightgreen|" README.md
fi
if [ -n "$COVERAGE_PERCENT" ]; then
sed -i "s|/badge/coverage-[^)]*|/badge/coverage-${COVERAGE_PERCENT}%25-brightgreen|" README.md
fi
echo "Updated badges:"
grep -E "pylint|pyright|coverage" README.md | head -3
grep -E "pylint|pyright" README.md | head -2
- name: Commit and push badge updates
run: |
@@ -86,7 +73,7 @@ jobs:
else
echo "Badge changes detected, committing..."
git add README.md
MSG="chore: update quality badges"$'\n\n'"- Pylint: ${{ steps.pylint.outputs.score }}"$'\n'"- Pyright: ${{ steps.pyright.outputs.errors }} errors"$'\n'"- Coverage: ${{ steps.coverage.outputs.percent }}%"$'\n\n'"[skip ci]"
MSG="chore: update quality badges"$'\n\n'"- Pylint: ${{ steps.pylint.outputs.score }}"$'\n'"- Pyright: ${{ steps.pyright.outputs.errors }} errors"$'\n\n'"[skip ci]"
git commit -m "$MSG"
git push
fi
-1
View File
@@ -7,7 +7,6 @@
[![test](https://gitea.dideric.is/didericis/bot-bottle/actions/workflows/test.yml/badge.svg?branch=main)](https://gitea.dideric.is/didericis/bot-bottle/actions?workflow=test.yml)
[![pylint](https://img.shields.io/badge/pylint-9.93%2F10-brightgreen)](https://github.com/PyCQA/pylint)
[![pyright](https://img.shields.io/badge/pyright-0%20errors-brightgreen)](https://github.com/microsoft/pyright)
[![coverage](https://img.shields.io/badge/coverage-79%25-brightgreen)](https://coverage.readthedocs.io/)
**Problem:** Developer wants to run a coding agent without supervision, but they don't want a prompt injected or misbehaving agent wrecking their environment or exfiltrating sensitive data.
+3 -13
View File
@@ -72,9 +72,6 @@ class BottleSpec:
identity: str = ""
label: str = ""
color: str = ""
# Ordered bottle names selected at launch (issue #269). When non-empty
# they are merged in order and replace the agent's `bottle:` field.
bottle_names: tuple[str, ...] = ()
@dataclass(frozen=True)
@@ -132,11 +129,7 @@ class BottlePlan(ABC):
info(f"provider : {self.agent_provision.template}")
print_multi("env ", env_names)
print_multi("skills ", list(agent.skills))
effective_bottles = (
list(spec.bottle_names) if spec.bottle_names
else ([agent.bottle] if agent.bottle else [])
)
print_multi("bottle ", effective_bottles)
info(f"bottle : {agent.bottle}")
identity = manifest.git_identity_summary()
if identity:
@@ -370,7 +363,7 @@ class BottleBackend(ABC, Generic[PlanT, CleanupT]):
Returns the loaded Manifest for the selected agent. Subclasses with
additional preconditions should override and call
`super()._validate(spec)` first."""
manifest = spec.manifest.load_for_agent(spec.agent_name, spec.bottle_names)
manifest = spec.manifest.load_for_agent(spec.agent_name)
self._validate_skills(manifest.agent.skills)
self._validate_agent_provider_dockerfile(spec, manifest)
return manifest
@@ -396,12 +389,9 @@ class BottleBackend(ABC, Generic[PlanT, CleanupT]):
if not path.is_absolute():
path = Path(spec.user_cwd) / path
if not path.is_file():
effective = (
", ".join(spec.bottle_names) if spec.bottle_names else manifest.agent.bottle
)
die(
f"agent_provider.dockerfile for bottle "
f"'{effective}' not found: {path}"
f"'{manifest.agent.bottle}' not found: {path}"
)
@abstractmethod
@@ -0,0 +1,211 @@
"""capability_apply — host-side orchestrator for capability-block
remediation (PRD 0016).
On approval of a capability-block proposal, the dashboard calls
apply_capability_change(slug, new_dockerfile) which:
1. Snapshots the agent's transcript dir to
~/.bot-bottle/state/<slug>/transcript/ (best-effort).
2. Pushes the agent's working tree via `git push` (best-effort —
no upstream / no commits / no git repo all skip with a log).
3. Writes the new Dockerfile to
~/.bot-bottle/state/<slug>/Dockerfile (PRD 0016 Phase 1
state). The next `cli.py start <agent>` picks it up.
4. Force-removes the agent container + all sidecars + the
per-bottle networks. Idempotent — missing resources are not
errors.
Returns (before, after) Dockerfile contents so the dashboard can
record / render the diff. (capability-block has no audit log per
PRD 0013 — the per-bottle Dockerfile state is its own record.)
This is "fire-and-forget" from the agent's perspective: by the time
the dashboard writes the response file the supervise sidecar is
gone, so the agent's tool call connection drops without ever
receiving the response. The replacement agent (next manual
`cli.py start`) sees the new Dockerfile and starts from there.
v1 does not auto-relaunch — see PRD 0016's capability-block return
semantics open question.
"""
from __future__ import annotations
import shutil
import subprocess
from ...agent_provider import get_provider
from ...log import info, warn
from ...bottle_state import (
mark_preserved,
per_bottle_dockerfile,
transcript_snapshot_dir,
write_per_bottle_dockerfile,
)
from .sidecar_bundle import sidecar_bundle_container_name
# Agent home inside the container (per the repo Dockerfile's
# `USER node` + `WORKDIR /home/node`). Used to locate the transcript
# dir + the workspace dir for git push.
_AGENT_HOME_IN_CONTAINER = "/home/node"
_AGENT_TRANSCRIPT_IN_CONTAINER = f"{_AGENT_HOME_IN_CONTAINER}/.claude"
_AGENT_WORKSPACE_IN_CONTAINER = f"{_AGENT_HOME_IN_CONTAINER}/workspace"
# Per-bottle resource name patterns (mirroring prepare.py).
def _agent_container_name(slug: str) -> str:
return f"bot-bottle-{slug}"
def _per_bottle_container_names(slug: str) -> list[str]:
"""All container names that belong to this bottle. Missing
containers are silently skipped by the teardown helper, so it's
fine to include names that don't exist for a given bottle."""
return [
_agent_container_name(slug),
sidecar_bundle_container_name(slug),
]
def _per_bottle_network_names(slug: str) -> list[str]:
return [
f"bot-bottle-net-{slug}",
f"bot-bottle-egress-{slug}",
]
class CapabilityApplyError(RuntimeError):
"""Raised when the apply fails in a way that should keep the
proposal pending (so the operator can retry). Best-effort
failures (transcript snapshot, git push) do not raise — they
just log and proceed."""
# --- Public helpers --------------------------------------------------------
def fetch_current_dockerfile(slug: str) -> str:
"""Return the Dockerfile content the next `cli.py start <agent>`
would use for this bottle. If a per-bottle override exists, that
one; otherwise the repo's Dockerfile.
Used by the operator-edit verb to show the current source of
truth, and by apply_capability_change for the before-diff."""
override = per_bottle_dockerfile(slug)
if override is not None:
return override
repo_dockerfile = get_provider("claude").dockerfile
if repo_dockerfile.is_file():
return repo_dockerfile.read_text()
raise CapabilityApplyError(
f"no per-bottle Dockerfile for {slug} and no provider Dockerfile at "
f"{repo_dockerfile}"
)
def apply_capability_change(slug: str, new_dockerfile: str) -> tuple[str, str]:
"""End-to-end capability-block remediation. See module docstring
for the sequence. Returns (before, after) Dockerfile content."""
if not new_dockerfile.strip():
raise CapabilityApplyError("proposed Dockerfile is empty")
before = fetch_current_dockerfile(slug)
snapshot_transcript(slug)
_push_working_tree(slug)
write_per_bottle_dockerfile(slug, new_dockerfile)
# Set the preserve marker BEFORE teardown so cli.py's session-end
# cleanup sees it and keeps the state dir intact for the
# operator's `cli.py resume <identity>`. Without the marker the
# state dir would be deleted as part of normal session end.
mark_preserved(slug)
_teardown_bottle(slug)
return before, new_dockerfile
# --- Internals -------------------------------------------------------------
def snapshot_transcript(slug: str) -> None:
"""`docker cp` /home/node/.claude out of the agent container into
~/.bot-bottle/state/<slug>/transcript/. Best-effort: missing
container, missing dir, or cp error all log a warning and return.
The transcript is what `claude --resume` reads to pick up where
the agent left off.
Called from two places:
- capability-apply, before tearing the bottle down.
- cli.py's session-end path, before the launch context closes,
so a crash or normal exit also leaves a transcript on disk
(deleted along with the state dir on clean exit, kept on
crash or capability-block per the preserve marker)."""
container = _agent_container_name(slug)
dest = transcript_snapshot_dir(slug)
if dest.exists():
# Remove any prior snapshot so the new one is a clean copy.
shutil.rmtree(dest, ignore_errors=True)
dest.parent.mkdir(parents=True, exist_ok=True)
r = subprocess.run(
["docker", "cp", f"{container}:{_AGENT_TRANSCRIPT_IN_CONTAINER}", str(dest)],
capture_output=True, text=True, check=False,
)
if r.returncode != 0:
warn(
f"transcript snapshot skipped "
f"({(r.stderr or '').strip() or 'no transcript dir in container?'})"
)
return
info(f"transcript snapshotted to {dest}")
def _push_working_tree(slug: str) -> None:
"""`docker exec <agent> git push` from /home/node/workspace.
Best-effort: not-a-git-repo, no upstream, nothing-to-push, no
network all log a warning and return. The replacement bottle
will pick up whatever's actually upstream."""
container = _agent_container_name(slug)
r = subprocess.run(
[
"docker", "exec", container, "sh", "-c",
f"cd {_AGENT_WORKSPACE_IN_CONTAINER} && "
f"git rev-parse --is-inside-work-tree >/dev/null 2>&1 && "
f"git push origin HEAD 2>&1 || true",
],
capture_output=True, text=True, check=False,
)
if r.returncode != 0:
warn(
f"capability-apply: git push skipped "
f"({(r.stderr or '').strip() or 'docker exec failed'})"
)
return
output = (r.stdout or "").strip()
if output:
info(f"capability-apply: git push: {output}")
else:
info("capability-apply: git push ran (no output — likely not a git workspace)")
def _teardown_bottle(slug: str) -> None:
"""Force-remove all per-bottle docker resources. Idempotent —
`docker rm -f` / `docker network rm` silently ignore missing
names, so this can be called even mid-rebuild."""
info(f"capability-apply: tearing down bottle {slug}")
for name in _per_bottle_container_names(slug):
subprocess.run(
["docker", "rm", "-f", name],
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False,
)
for net in _per_bottle_network_names(slug):
subprocess.run(
["docker", "network", "rm", net],
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False,
)
__all__ = [
"CapabilityApplyError",
"apply_capability_change",
"fetch_current_dockerfile",
"snapshot_transcript",
]
+10
View File
@@ -34,6 +34,7 @@ from ...egress import (
from ...git_gate import GIT_GATE_HOSTNAME
from ...log import die, warn
from ...supervise import (
CURRENT_CONFIG_DIR_IN_AGENT,
QUEUE_DIR_IN_CONTAINER,
SUPERVISE_HOSTNAME,
SUPERVISE_PORT,
@@ -232,6 +233,15 @@ def _agent_service(plan: DockerBottlePlan) -> dict[str, Any]:
if plan.use_runsc:
service["runtime"] = "runsc"
volumes: list[dict[str, Any]] = []
if plan.supervise_plan is not None:
volumes.append(_bind(
plan.supervise_plan.current_config_dir,
CURRENT_CONFIG_DIR_IN_AGENT,
))
if volumes:
service["volumes"] = volumes
# The init supervisor inside the bundle owns intra-bundle
# daemon ordering, so the agent only waits for the bundle
# container itself.
-1
View File
@@ -63,7 +63,6 @@ def write_launch_metadata(
backend=backend,
label=spec.label,
color=spec.color,
bottle_names=spec.bottle_names,
))
+16 -19
View File
@@ -1,7 +1,8 @@
"""Per-bottle persistent state.
"""Per-bottle persistent state (PRD 0016).
Holds optional per-bottle Dockerfile overrides, the transcript snapshot
the state-preservation helper saves before teardown, and the launch metadata that lets
Holds the per-bottle Dockerfile override that capability-block
remediation writes, the transcript snapshot the state-preservation
helper saves before teardown, and the launch metadata that lets
`cli.py resume <identity>` reconstruct a bottle's spec. State
lives at:
@@ -60,7 +61,7 @@ _METADATA_NAME = "metadata.json"
_LIVE_CONFIG_SUBDIR = "live-config"
LIVE_CONFIG_ROUTES_NAME = "routes.yaml"
LIVE_CONFIG_ALLOWLIST_NAME = "allowlist"
# Empty marker file. Session preservation writes it before teardown so
# Empty marker file. capability_apply writes it before teardown so
# cli.py's session-end cleanup knows to preserve the state dir for
# `cli.py resume <identity>`. Absent = clean up.
_PRESERVE_MARKER = ".preserve"
@@ -111,10 +112,6 @@ class BottleMetadata:
backend: str = ""
label: str = ""
color: str = ""
# Ordered bottle names selected at launch (issue #269). Empty tuple
# for state dirs written before this change; resume falls back to
# the agent's `bottle:` field in that case.
bottle_names: tuple[str, ...] = ()
def metadata_path(identity: str) -> Path:
@@ -142,10 +139,6 @@ def read_metadata(identity: str) -> BottleMetadata | None:
if not isinstance(raw, dict):
return None
raw_typed = cast(dict[str, object], raw)
raw_bottle_names = raw_typed.get("bottle_names", [])
bottle_names: tuple[str, ...] = ()
if isinstance(raw_bottle_names, list):
bottle_names = tuple(str(n) for n in raw_bottle_names if isinstance(n, str))
return BottleMetadata(
identity=str(raw_typed.get("identity", identity)),
agent_name=str(raw_typed.get("agent_name", "")),
@@ -156,7 +149,6 @@ def read_metadata(identity: str) -> BottleMetadata | None:
backend=str(raw_typed.get("backend", "")),
label=str(raw_typed.get("label", "")),
color=str(raw_typed.get("color", "")),
bottle_names=bottle_names,
)
@@ -172,7 +164,8 @@ def per_bottle_dockerfile_path(identity: str) -> Path:
def per_bottle_dockerfile(identity: str) -> str | None:
"""Return the per-bottle Dockerfile content if present, else
None. None means: use the provider or manifest Dockerfile."""
None. None means: use the repo's Dockerfile (the original
pre-capability-block behavior)."""
p = per_bottle_dockerfile_path(identity)
if p.is_file():
return p.read_text()
@@ -256,7 +249,9 @@ def write_live_config(
def transcript_snapshot_dir(identity: str) -> Path:
"""Where agent session snapshots are kept for resume flows."""
"""Where capability_apply stashes the agent's transcript before
teardown, so the next `cli.py start <agent>` can offer to
resume from it."""
return bottle_state_dir(identity) / _TRANSCRIPT_SUBDIR
@@ -283,7 +278,8 @@ def git_gate_state_dir(identity: str) -> Path:
def supervise_state_dir(identity: str) -> Path:
"""State subdir reserved for supervise sidecar bind-mount sources.
"""State subdir for the supervise sidecar's current-config dir
(bind-mounted into the agent at /etc/bot-bottle/current-config).
The queue dir is intentionally NOT under here — it lives at
~/.bot-bottle/queue/<slug>/ alongside the audit logs, so it
survives state-dir cleanup."""
@@ -305,8 +301,9 @@ def preserve_marker_path(identity: str) -> Path:
def mark_preserved(identity: str) -> Path:
"""Mark this bottle's state for preservation across session
teardown so cli.py's session-end cleanup leaves the state dir
intact for a subsequent `cli.py resume`."""
teardown. Written by capability_apply.apply_capability_change so
cli.py's session-end cleanup leaves the state dir intact for a
subsequent `cli.py resume`."""
path = preserve_marker_path(identity)
path.parent.mkdir(parents=True, exist_ok=True)
path.touch()
@@ -319,7 +316,7 @@ def is_preserved(identity: str) -> bool:
def clear_preserve_marker(identity: str) -> None:
"""Idempotent removal. Called at fresh launch (start or resume)
so a marker left from a prior preserved session doesn't keep
so a marker left from a prior capability-block doesn't keep
state alive past the next normal session-end."""
try:
preserve_marker_path(identity).unlink()
+3 -2
View File
@@ -13,8 +13,9 @@ dirs are shared layout, so docker is the single owner of that
bucket.
State dirs with `.preserve` are intentionally never touched — they
hold preserved sessions the operator may want to `resume`. Manual
`rm -rf ~/.bot-bottle/state/<identity>` is the path for those.
hold capability-block rebuilds or crash snapshots the operator may
want to `resume`. Manual `rm -rf ~/.bot-bottle/state/<identity>`
is the path for those.
"""
from __future__ import annotations
+5 -5
View File
@@ -4,12 +4,13 @@ Reads ~/.bot-bottle/state/<identity>/metadata.json to recover the
(agent_name, cwd, copy_cwd) the bottle was originally started with,
then runs the same launch core as `start` — but pinned to the
recorded identity so the new bottle picks up any per-bottle Dockerfile
override and transcript snapshot under the same state dir.
(from capability-block apply) and transcript snapshot under the same
state dir.
Use case: an interrupted or preserved bottle needs to be relaunched;
the operator runs
Use case: an agent calls capability-block, the dashboard approves
and tears down the bottle, the operator runs
./cli.py resume <identity>
to bring up the replacement from the recorded state.
to bring up the replacement with the new capabilities baked in.
"""
from __future__ import annotations
@@ -49,7 +50,6 @@ def cmd_resume(argv: list[str]) -> int:
copy_cwd=metadata.copy_cwd,
user_cwd=metadata.cwd or USER_CWD,
identity=metadata.identity,
bottle_names=tuple(metadata.bottle_names),
)
backend_name = metadata.backend or None
return _launch_bottle(
+9 -156
View File
@@ -31,8 +31,9 @@ from ..bottle_state import (
is_preserved,
mark_preserved,
)
# from ..backend.docker.capability_apply import snapshot_transcript
from ..log import info
from ..manifest import Manifest, ManifestIndex
from ..manifest import ManifestIndex
from ._common import PROG, USER_CWD, read_tty_line
from . import tui
@@ -73,23 +74,6 @@ def cmd_start(argv: list[str]) -> int:
backend_name: str | None = args.backend
# Bottle multiselect: always show after agent selection so operators
# can compose bottles at launch time without editing agent manifests.
available_bottles = manifest.all_bottle_names
lineage_map = _bottle_lineage(manifest)
display_labels = [lineage_map.get(n, n) for n in available_bottles]
label_to_name = {lineage_map.get(n, n): n for n in available_bottles}
initial_bottle = _peek_agent_bottle(manifest, agent_name)
initial_labels = [lineage_map.get(initial_bottle, initial_bottle)] if initial_bottle else []
selected_labels = tui.filter_multiselect(
display_labels,
title="Select bottles",
initial=initial_labels,
)
if selected_labels is None:
return 0
bottle_names = tuple(label_to_name.get(lbl, lbl) for lbl in selected_labels)
label, color = tui.name_color_modal(default_label=agent_name)
label, color = _resolve_unique_label(label, color)
@@ -100,7 +84,6 @@ def cmd_start(argv: list[str]) -> int:
user_cwd=USER_CWD,
label=label,
color=color,
bottle_names=bottle_names,
)
return _launch_bottle(
spec,
@@ -207,38 +190,6 @@ def _identity_from_plan(plan: object) -> str:
return getattr(plan, "slug", "")
def _peek_agent_bottle(manifest: ManifestIndex, agent_name: str) -> str:
"""Return the `bottle:` value from the named agent's frontmatter without
fully parsing the agent file, or "" when absent or unreadable.
Used to pre-populate the bottle multiselect with the agent's default
bottle so operators who haven't removed `bottle:` from their manifests
don't need to re-select it every time."""
if manifest.home_md is None:
# Eager mode (from_json_obj): agent is pre-parsed.
if agent_name in manifest.agents:
return manifest.agents[agent_name].bottle
return ""
from ..manifest_loader import scan_agent_names
from ..yaml_subset import YamlSubsetError, parse_frontmatter
home_agents = scan_agent_names(manifest.home_md / "agents")
cwd_agents: dict[str, Path] = {}
if manifest.cwd_md is not None:
cwd_agents = scan_agent_names(manifest.cwd_md / "agents")
merged = {**home_agents, **cwd_agents}
path = merged.get(agent_name)
if path is None:
return ""
try:
fm, _ = parse_frontmatter(path.read_text())
bottle = fm.get("bottle", "")
return str(bottle) if isinstance(bottle, str) else ""
except (OSError, YamlSubsetError):
return ""
def _resolve_unique_label(label: str, color: str) -> tuple[str, str]:
"""Re-prompt with a disclaimer until the label's slug is not already
in use among running bottles. Passes through unchanged when no
@@ -265,112 +216,10 @@ def _text_prompt_yes() -> bool:
def _text_render_preflight():
def _render(plan: DockerBottlePlan) -> None:
print(file=sys.stderr)
print(_manifest_to_yaml(plan.manifest), file=sys.stderr)
plan.print()
return _render
def _bottle_lineage(manifest: ManifestIndex) -> dict[str, str]:
"""Return {bottle_name: lineage_label} for bottles that have an extends chain.
Bottles without a parent are omitted (the caller falls back to the bare name).
Labels show the chain root-first: e.g. 'dev -> bot-bottle-dev -> claude-dev'."""
if manifest.home_md is None:
return {}
bottles_dir = manifest.home_md / "bottles"
if not bottles_dir.is_dir():
return {}
from ..yaml_subset import YamlSubsetError, parse_frontmatter
extends_of: dict[str, str] = {}
for path in bottles_dir.glob("*.md"):
try:
fm, _ = parse_frontmatter(path.read_text())
parent = fm.get("extends", "")
if isinstance(parent, str) and parent:
extends_of[path.stem] = parent
except (OSError, YamlSubsetError):
pass
labels: dict[str, str] = {}
for name in extends_of:
chain = [name]
seen = {name}
cur = name
while cur in extends_of:
par = extends_of[cur]
if par in seen:
break
chain.append(par)
seen.add(par)
cur = par
labels[name] = " -> ".join(reversed(chain))
return labels
def _manifest_to_yaml(manifest: Manifest) -> str:
"""Serialize the resolved Manifest to a YAML string for preflight display."""
lines: list[str] = []
agent = manifest.agent
lines.append("agent:")
if agent.skills:
lines.append(" skills:")
for s in agent.skills:
lines.append(f" - {s}")
if not agent.git_user.is_empty():
lines.append(" git-gate:")
lines.append(" user:")
if agent.git_user.name:
lines.append(f" name: {agent.git_user.name}")
if agent.git_user.email:
lines.append(f" email: {agent.git_user.email}")
bottle = manifest.bottle
lines.append("bottle:")
if bottle.agent_provider.template != "claude" or bottle.agent_provider.dockerfile:
lines.append(" agent_provider:")
lines.append(f" template: {bottle.agent_provider.template}")
if bottle.agent_provider.dockerfile:
lines.append(f" dockerfile: {bottle.agent_provider.dockerfile}")
if bottle.env:
lines.append(" env:")
for k, v in sorted(bottle.env.items()):
lines.append(f" {k}: {v}")
has_git_gate = not bottle.git_user.is_empty() or bottle.git
if has_git_gate:
lines.append(" git-gate:")
if not bottle.git_user.is_empty():
lines.append(" user:")
if bottle.git_user.name:
lines.append(f" name: {bottle.git_user.name}")
if bottle.git_user.email:
lines.append(f" email: {bottle.git_user.email}")
if bottle.git:
lines.append(" repos:")
for entry in bottle.git:
lines.append(f" {entry.Name}:")
lines.append(f" url: {entry.Upstream}")
if bottle.egress.routes:
lines.append(" egress:")
lines.append(" routes:")
for r in bottle.egress.routes:
lines.append(f" - host: {r.Host}")
if r.AuthScheme:
lines.append(f" auth:")
lines.append(f" scheme: {r.AuthScheme}")
lines.append(f" supervise: {'true' if bottle.supervise else 'false'}")
return "\n".join(lines)
def _launch_bottle(
spec: BottleSpec,
*,
@@ -408,8 +257,12 @@ def _launch_bottle(
)
# While the container is still alive: always snapshot the
# transcript and — if the agent exited non-zero — mark
# the state for preservation. This picks up crashes /
# Ctrl-Cs / OOM kills before cleanup removes the state dir.
# the state for preservation. Capability-block already
# did both before triggering teardown from the dashboard;
# this picks up crashes / Ctrl-Cs / OOM kills the same
# way. snapshot_transcript is best-effort so the
# capability-block path's prior snapshot isn't clobbered
# when the container is already gone.
if agent_provider_template == "claude":
capture_claude_session_state(identity, exit_code)
return 0
-292
View File
@@ -17,43 +17,6 @@ import sys
from typing import Any, Optional
def filter_multiselect(
items: list[str],
*,
title: str = "",
initial: Optional[list[str]] = None,
tty_path: str = "/dev/tty",
) -> Optional[list[str]]:
"""Render a multi-select picker over *items*.
Returns the ordered list of selected items, or ``None`` if the user
cancelled (Esc / ``q`` / Ctrl-C / Ctrl-D with no items).
Press Space to toggle the item under the cursor.
Press Enter to confirm the current selection.
Press Ctrl-D to confirm the current selection (returns even if empty).
Press Esc/q to cancel (returns None).
*initial* pre-populates the selection in insertion order. Items
added are appended; removed items leave the remaining order unchanged.
"""
if not items:
return []
try:
tty_fd = open(tty_path, "r+b", buffering=0)
except OSError:
return None
try:
fd_dup = os.dup(tty_fd.fileno())
return _run_multiselect(
items, title=title, initial=list(initial or []), tty_fd=fd_dup
)
finally:
tty_fd.close()
def filter_select(
items: list[str],
*,
@@ -258,261 +221,6 @@ def _addstr_safe(screen: Any, row: int, col: int, text: str, attr: int = curses.
pass
# ---------------------------------------------------------------------------
# filter_multiselect internals
# ---------------------------------------------------------------------------
_KEY_SPACE = 32
def _run_multiselect(
items: list[str], *, title: str, initial: list[str], tty_fd: int
) -> Optional[list[str]]:
"""Drive a curses multi-select session on *tty_fd*."""
os.environ.setdefault("TERM", "xterm-256color")
orig_stdin = sys.__stdin__
orig_stdout = sys.__stdout__
try:
import io
tty_text = io.TextIOWrapper(io.FileIO(tty_fd, mode='r+'), write_through=True)
sys.__stdin__ = tty_text # type: ignore[assignment]
sys.__stdout__ = tty_text # type: ignore[assignment]
screen = curses.initscr()
curses.noecho()
curses.cbreak()
screen.keypad(True)
try:
result = _multiselect_loop(screen, items, title=title, initial=initial)
finally:
screen.keypad(False)
curses.nocbreak()
curses.echo()
curses.endwin()
except Exception: # noqa: W0718
return None
finally:
sys.__stdin__ = orig_stdin # type: ignore[assignment]
sys.__stdout__ = orig_stdout # type: ignore[assignment]
return result
def _multiselect_loop(
screen: Any, items: list[str], *, title: str, initial: list[str]
) -> Optional[list[str]]:
query = ""
cursor = 0
selected: list[str] = [s for s in initial if s in items]
# focus = "filter": navigate + toggle items in the filterable list
# focus = "order": navigate + reorder items in the selected list
focus = "filter"
order_cursor = 0
while True:
filtered = _filter_items(items, query)
if not filtered:
cursor = 0
elif cursor >= len(filtered):
cursor = len(filtered) - 1
if not selected:
order_cursor = 0
if focus == "order":
focus = "filter"
elif order_cursor >= len(selected):
order_cursor = len(selected) - 1
try:
_render_multiselect(
screen, filtered, cursor,
query=query, title=title, selected=selected,
focus=focus, order_cursor=order_cursor,
)
except curses.error:
return None
try:
key = screen.getch()
except KeyboardInterrupt:
return None
if key in (_KEY_ESC, _KEY_CTRL_C, ord("q")):
return None
if key == _KEY_CTRL_D:
return list(selected)
# Tab toggles between filter and order focus.
if key == ord("\t"):
if focus == "filter" and selected:
focus = "order"
order_cursor = 0
else:
focus = "filter"
continue
if focus == "filter":
if key in (curses.KEY_ENTER, _KEY_ENTER_ALT, ord("\r")):
return list(selected)
elif key == _KEY_SPACE:
if filtered:
item = filtered[cursor]
if item in selected:
selected.remove(item)
else:
selected.append(item)
elif key in (curses.KEY_UP, ord("k")):
if cursor > 0:
cursor -= 1
elif key in (curses.KEY_DOWN, ord("j")):
if cursor < len(filtered) - 1:
cursor += 1
elif key in (curses.KEY_BACKSPACE, _KEY_BACKSPACE_WIN, 127):
query = query[:-1]
new_filtered = _filter_items(items, query)
if cursor >= len(new_filtered):
cursor = max(0, len(new_filtered) - 1)
elif 32 <= key <= 126 and key != _KEY_SPACE:
query += chr(key)
cursor = 0
else: # focus == "order"
if key in (curses.KEY_UP, ord("k")):
if order_cursor > 0:
order_cursor -= 1
elif key in (curses.KEY_DOWN, ord("j")):
if order_cursor < len(selected) - 1:
order_cursor += 1
elif key == ord("K"):
# Move selected item up (earlier in order).
if order_cursor > 0:
i = order_cursor
selected[i - 1], selected[i] = selected[i], selected[i - 1]
order_cursor -= 1
elif key == ord("J"):
# Move selected item down (later in order).
if order_cursor < len(selected) - 1:
i = order_cursor
selected[i], selected[i + 1] = selected[i + 1], selected[i]
order_cursor += 1
elif key in (curses.KEY_ENTER, _KEY_ENTER_ALT, ord("\r"), _KEY_SPACE):
# Remove item from selection while in order mode.
del selected[order_cursor]
if order_cursor >= len(selected) and order_cursor > 0:
order_cursor -= 1
def _render_multiselect(
screen: Any,
filtered: list[str],
cursor: int,
*,
query: str,
title: str,
selected: list[str],
focus: str = "filter",
order_cursor: int = 0,
) -> None:
screen.erase()
rows, cols = screen.getmaxyx()
min_rows = 7
if rows < min_rows:
raise curses.error("terminal too small")
sep = "" * min(cols - 1, 40)
row = 0
if title and row < rows - 1:
_addstr_safe(screen, row, 0, title[:cols - 1], curses.A_BOLD)
row += 1
# Filter line — dim when focus is on the order panel.
filter_label = f"Filter: {query}"
filter_hint = " [Tab: reorder]" if focus == "filter" and selected else ""
filter_attr = curses.A_DIM if focus == "order" else curses.A_NORMAL
if row < rows - 1:
_addstr_safe(screen, row, 0, (filter_label + filter_hint)[:cols - 1], filter_attr)
row += 1
if row < rows - 1:
_addstr_safe(screen, row, 0, sep)
row += 1
# Compute how many rows the bottom order panel needs.
# Cap the visible selected list to keep the filter list legible.
order_rows = min(len(selected), max(1, (rows - row) // 3)) if selected else 0
# Bottom reserved: sep + order_rows + sep + help = order_rows + 3
bottom_reserved = order_rows + 3
list_start = row
list_rows = rows - list_start - bottom_reserved
if list_rows < 1:
list_rows = 1
selected_set = set(selected)
filter_dim = focus == "order"
scroll = max(0, cursor - list_rows + 1)
visible = filtered[scroll: scroll + list_rows]
for idx, item in enumerate(visible):
abs_idx = scroll + idx
mark = "[*]" if item in selected_set else "[ ]"
prefix = "> " if (abs_idx == cursor and focus == "filter") else " "
line = (prefix + mark + " " + item)[:cols - 1]
item_attr = curses.A_DIM if filter_dim else (
curses.A_REVERSE if abs_idx == cursor else curses.A_NORMAL
)
if row < rows - bottom_reserved:
_addstr_safe(screen, row, 0, line, item_attr)
row += 1
# Separator before the order panel.
if row < rows - (order_rows + 2):
_addstr_safe(screen, row, 0, sep)
row += 1
# Order panel.
order_scroll = max(0, order_cursor - order_rows + 1)
order_visible = selected[order_scroll: order_scroll + order_rows]
for idx, item in enumerate(order_visible):
abs_idx = order_scroll + idx
is_active = focus == "order" and abs_idx == order_cursor
prefix = "> " if is_active else " "
line = (prefix + item)[:cols - 1]
attr = curses.A_REVERSE if is_active else curses.A_NORMAL
if row < rows - 2:
_addstr_safe(screen, row, 0, line, attr)
row += 1
if row < rows - 1:
_addstr_safe(screen, row, 0, sep)
row += 1
if focus == "filter":
help_line = "[↑↓/jk] move [Space] toggle [Enter] confirm [Tab] reorder [Esc/q] cancel"
else:
help_line = "[↑↓/jk] cursor [K/J] reorder [Space/Enter] remove [Tab] back [Ctrl-D] done"
if row < rows:
_addstr_safe(screen, min(rows - 1, row), 0, help_line[:cols - 1])
screen.refresh()
# ---------------------------------------------------------------------------
# name_color_modal — two-step label + color picker
# ---------------------------------------------------------------------------
+1 -1
View File
@@ -21,7 +21,7 @@ FROM node:22-slim
# to it) works against egress's bumped TLS without the agent needing
# local DNS.
RUN apt-get update \
&& apt-get install -y --no-install-recommends git ca-certificates curl ripgrep \
&& apt-get install -y --no-install-recommends git ca-certificates curl \
&& rm -rf /var/lib/apt/lists/*
# App-specific deps. Python isn't required by claude-code itself
+1 -1
View File
@@ -6,7 +6,7 @@
FROM node:22-slim
RUN apt-get update \
&& apt-get install -y --no-install-recommends git ca-certificates curl procps ripgrep \
&& apt-get install -y --no-install-recommends git ca-certificates curl procps \
&& rm -rf /var/lib/apt/lists/*
# App-specific deps. Python isn't required by codex itself
@@ -21,11 +21,6 @@ from pathlib import Path
from ...deploy_key_provisioner import DeployKeyCollisionError, DeployKeyProvisioner
# Timeout for ssh-keygen and Gitea API HTTP calls. A hung Gitea instance at
# prepare time would stall bottle launch indefinitely without this bound.
_API_TIMEOUT_SECS = 30
_KEYGEN_TIMEOUT_SECS = 10
class GiteaDeployKeyProvisioner(DeployKeyProvisioner):
"""Manages deploy keys on a Gitea instance."""
@@ -51,7 +46,6 @@ class GiteaDeployKeyProvisioner(DeployKeyProvisioner):
check=True,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
timeout=_KEYGEN_TIMEOUT_SECS,
)
private_key = key_path.read_bytes()
public_key = key_path.with_suffix(".pub").read_text().strip()
@@ -73,7 +67,7 @@ class GiteaDeployKeyProvisioner(DeployKeyProvisioner):
method="POST",
)
try:
with urllib.request.urlopen(req, timeout=_API_TIMEOUT_SECS) as resp:
with urllib.request.urlopen(req) as resp:
body = json.loads(resp.read())
except urllib.error.HTTPError as exc:
_body = _read_error_body(exc)
@@ -104,7 +98,7 @@ class GiteaDeployKeyProvisioner(DeployKeyProvisioner):
method="DELETE",
)
try:
with urllib.request.urlopen(req, timeout=_API_TIMEOUT_SECS):
with urllib.request.urlopen(req):
pass
except urllib.error.HTTPError as exc:
if exc.code == 404:
+10 -21
View File
@@ -210,17 +210,6 @@ def egress_token_env_map(
return out
def _yaml_str_escape(s: str) -> str:
"""Escape a string for use inside a YAML double-quoted scalar."""
return (
s.replace("\\", "\\\\")
.replace('"', '\\"')
.replace("\n", "\\n")
.replace("\r", "\\r")
.replace("\t", "\\t")
)
def _route_to_yaml_fields(r: Route) -> dict[str, object]:
fields: dict[str, object] = {"host": r.host}
if r.auth_scheme and r.token_env:
@@ -283,12 +272,12 @@ def _render_match_entry(entry: dict[str, object]) -> list[str]:
for pd in entry["paths"]: # type: ignore[union-attr]
pd_dict: dict[str, str] = pd # type: ignore[assignment]
if "type" in pd_dict:
lines.append(f' - type: "{_yaml_str_escape(pd_dict["type"])}"')
lines.append(f' value: "{_yaml_str_escape(pd_dict["value"])}"')
lines.append(f' - type: "{pd_dict["type"]}"')
lines.append(f' value: "{pd_dict["value"]}"')
else:
lines.append(f' - value: "{_yaml_str_escape(pd_dict["value"])}"')
lines.append(f' - value: "{pd_dict["value"]}"')
if "methods" in entry:
methods_str = ", ".join(f'"{_yaml_str_escape(m)}"' for m in entry["methods"]) # type: ignore[union-attr]
methods_str = ", ".join(f'"{m}"' for m in entry["methods"]) # type: ignore[union-attr]
prefix = " - " if first_key else " "
lines.append(f'{prefix}methods: [{methods_str}]')
first_key = False
@@ -298,8 +287,8 @@ def _render_match_entry(entry: dict[str, object]) -> list[str]:
first_key = False
for hd in entry["headers"]: # type: ignore[union-attr]
hd_dict: dict[str, str] = hd # type: ignore[assignment]
lines.append(f' - name: "{_yaml_str_escape(hd_dict["name"])}"')
lines.append(f' value: "{_yaml_str_escape(hd_dict["value"])}"')
lines.append(f' - name: "{hd_dict["name"]}"')
lines.append(f' value: "{hd_dict["value"]}"')
if first_key:
lines.append(" - {}")
return lines
@@ -319,10 +308,10 @@ def egress_render_routes(
return "\n".join(lines) + "\n"
for r in routes:
f = _route_to_yaml_fields(r)
lines.append(f' - host: "{_yaml_str_escape(str(f["host"]))}"')
lines.append(f' - host: "{f["host"]}"')
if "auth_scheme" in f:
lines.append(f' auth_scheme: "{_yaml_str_escape(str(f["auth_scheme"]))}"')
lines.append(f' token_env: "{_yaml_str_escape(str(f["token_env"]))}"')
lines.append(f' auth_scheme: "{f["auth_scheme"]}"')
lines.append(f' token_env: "{f["token_env"]}"')
if "matches" in f:
lines.append(" matches:")
for entry in f["matches"]: # type: ignore[union-attr]
@@ -342,7 +331,7 @@ def egress_render_routes(
items_str = ", ".join(f'"{x}"' for x in dv)
lines.append(f" {dk}: [{items_str}]")
elif isinstance(dv, str):
lines.append(f' {dk}: "{_yaml_str_escape(dv)}"')
lines.append(f' {dk}: "{dv}"')
return "\n".join(lines) + "\n"
+6 -17
View File
@@ -43,10 +43,10 @@ from .manifest import ManifestBottle, ManifestGitEntry
# Short network alias for git-gate inside the sidecar bundle. The
# agent's `.gitconfig` insteadOf rewrites resolve through this name.
GIT_GATE_HOSTNAME = "git-gate"
# Shared timeout (seconds) for all git-gate subprocess and CGI calls:
# git daemon (--timeout/--init-timeout), the access-hook subprocess in
# git_http_backend, and the git http-backend CGI subprocess.
GIT_GATE_TIMEOUT_SECS = 15
# Bound half-open git client sessions. If an agent/tool runner is
# interrupted during push, git daemon should reap the receive-pack
# child instead of keeping the gate wedged indefinitely.
GIT_GATE_DAEMON_TIMEOUT_SECS = 15
@dataclass(frozen=True)
@@ -112,15 +112,6 @@ def git_gate_upstreams_for_bottle(bottle: ManifestBottle) -> tuple[GitGateUpstre
)
def _gitconfig_validate_value(field: str, value: str) -> None:
"""Raise ValueError if value contains characters that break gitconfig line syntax."""
if "\n" in value or "\r" in value:
raise ValueError(
f"git-gate: {field} contains a newline, which would inject "
f"arbitrary gitconfig keys; rejecting manifest entry"
)
def git_gate_render_gitconfig(
entries: tuple[ManifestGitEntry, ...], gate_host: str, *, scheme: str = "git",
) -> str:
@@ -145,7 +136,6 @@ def git_gate_render_gitconfig(
"# fetch-from-upstream-before-every-upload-pack via access-hook).\n",
]
for entry in entries:
_gitconfig_validate_value(f"repos[{entry.Name!r}].url", entry.Upstream)
out.append(f'[url "{scheme}://{gate_host}/{entry.Name}.git"]\n')
out.append(f"\tinsteadOf = {entry.Upstream}\n")
if entry.RemoteKey and entry.RemoteKey != entry.UpstreamHost:
@@ -158,7 +148,6 @@ def git_gate_render_gitconfig(
f"ssh://{entry.UpstreamUser}@{entry.RemoteKey}{port}/"
f"{entry.UpstreamPath}"
)
_gitconfig_validate_value(f"repos[{entry.Name!r}].url (resolved alias)", alias)
out.append(f"\tinsteadOf = {alias}\n")
return "".join(out)
@@ -228,8 +217,8 @@ def git_gate_render_entrypoint(upstreams: tuple[GitGateUpstream, ...]) -> str:
"",
"exec git daemon \\",
" --reuseaddr \\",
f" --timeout={GIT_GATE_TIMEOUT_SECS} \\",
f" --init-timeout={GIT_GATE_TIMEOUT_SECS} \\",
f" --timeout={GIT_GATE_DAEMON_TIMEOUT_SECS} \\",
f" --init-timeout={GIT_GATE_DAEMON_TIMEOUT_SECS} \\",
" --base-path=/git \\",
" --export-all \\",
" --enable=receive-pack \\",
+1 -11
View File
@@ -16,8 +16,6 @@ from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
from pathlib import Path
from urllib.parse import urlsplit
from .git_gate import GIT_GATE_TIMEOUT_SECS
DEFAULT_PORT = 9420
@@ -49,7 +47,6 @@ class GitHttpHandler(BaseHTTPRequestHandler):
[hook_path, "upload-pack", str(repo_dir), peer, peer],
capture_output=True,
check=False,
timeout=GIT_GATE_TIMEOUT_SECS,
)
if hook.returncode != 0:
detail = (hook.stderr or hook.stdout).decode(
@@ -113,7 +110,6 @@ class GitHttpHandler(BaseHTTPRequestHandler):
env=env,
capture_output=True,
check=False,
timeout=GIT_GATE_TIMEOUT_SECS,
)
self._write_cgi_response(proc.stdout)
@@ -152,13 +148,7 @@ class GitHttpHandler(BaseHTTPRequestHandler):
key, _, value = line.decode("latin1").partition(":")
value = value.strip()
if key.lower() == "status":
try:
status = int(value.split()[0])
except (ValueError, IndexError):
self.log_message(
"malformed CGI Status header %r; using 500", value,
)
status = 500
status = int(value.split()[0])
else:
headers.append((key, value))
self.send_response(status)
+18 -105
View File
@@ -113,8 +113,10 @@ class ManifestBottle:
egress: ManifestEgressConfig = field(default_factory=ManifestEgressConfig)
# Per-bottle stuck-recovery sidecar (PRD 0013). When true (the
# default, issue #249), the launch step brings up a supervise
# sidecar that exposes egress MCP tools to the agent. Set
# `supervise: false` to skip the sidecar.
# sidecar that exposes MCP tools to the agent (egress-block,
# capability-block) plus mounts the current-config dir read-only
# into the agent at /etc/bot-bottle/current-config. Set
# `supervise: false` to skip the sidecar and mount.
supervise: bool = True
@classmethod
@@ -213,65 +215,6 @@ def _merge_git_user(
)
def _resolve_effective_bottle_eager(
agent_name: str,
agent: "ManifestAgent",
bottle_names: "tuple[str, ...]",
bottles: "Mapping[str, ManifestBottle]",
) -> "ManifestBottle":
"""Return the effective ManifestBottle for the eager (from_json_obj) path.
When bottle_names is non-empty they are merged in order. When empty, falls
back to agent.bottle. Raises ManifestError when neither is set."""
from .manifest_extends import merge_bottles_runtime
if bottle_names:
resolved: list[ManifestBottle] = []
for bn in bottle_names:
if bn not in bottles:
available = ", ".join(sorted(bottles.keys())) or "(none)"
raise ManifestError(
f"bottle '{bn}' not defined. Available: {available}"
)
resolved.append(bottles[bn])
return merge_bottles_runtime(resolved)
if not agent.bottle:
raise ManifestError(
f"agent '{agent_name}' has no 'bottle' field and no bottles were "
f"selected at launch. Select at least one bottle or add "
f"'bottle: <name>' to the agent manifest."
)
return bottles[agent.bottle]
def _resolve_effective_bottle_lazy(
agent_name: str,
agent_bottle: str,
bottle_names: "tuple[str, ...]",
bottles_dir: "Path",
) -> "ManifestBottle":
"""Return the effective ManifestBottle for the lazy (from_md_dirs) path.
When bottle_names is non-empty they are resolved from disk and merged in
order. When empty, falls back to agent_bottle. Raises ManifestError when
neither is set."""
from .manifest_extends import merge_bottles_runtime
from .manifest_loader import load_bottle_chain_from_dir
if bottle_names:
resolved = [load_bottle_chain_from_dir(bn, bottles_dir) for bn in bottle_names]
return merge_bottles_runtime(resolved)
if not agent_bottle:
raise ManifestError(
f"agent '{agent_name}' has no 'bottle' field and no bottles were "
f"selected at launch. Select at least one bottle or add "
f"'bottle: <name>' to the agent manifest."
)
return load_bottle_chain_from_dir(agent_bottle, bottles_dir)
@dataclass(frozen=True)
class Manifest:
"""Single-agent/bottle value type. Returned by ManifestIndex.load_for_agent().
@@ -417,18 +360,6 @@ class ManifestIndex:
}
return cls(bottles=bottles, agents=agents)
@property
def all_bottle_names(self) -> list[str]:
"""Sorted list of all discoverable bottle names.
In names-only mode (from resolve/from_md_dirs) this scans bottle
filenames without reading their content. In eager mode (from
from_json_obj) it returns the pre-parsed bottles' names."""
if self.home_md is not None:
from .manifest_loader import scan_bottle_names
return scan_bottle_names(self.home_md / "bottles")
return sorted(self.bottles.keys())
@property
def all_agent_names(self) -> list[str]:
"""Sorted list of all discoverable agent names.
@@ -445,18 +376,9 @@ class ManifestIndex:
return sorted(home_names | cwd_names)
return sorted(self.agents.keys())
def load_for_agent(
self,
agent_name: str,
bottle_names: "tuple[str, ...] | None" = None,
) -> "Manifest":
def load_for_agent(self, agent_name: str) -> "Manifest":
"""Parse the named agent and its bottle; return a single-value Manifest.
`bottle_names` is an ordered list of bottles selected at launch time.
When non-empty they are resolved and merged in order (index 0 = base;
later entries override). When empty or None, falls back to the agent's
own `bottle:` field. Raises ManifestError when neither is set.
In lazy mode (from resolve/from_md_dirs) the agent file and its
bottle chain are read from disk for the first time here. In eager
mode (from_json_obj) the data is already parsed; this just filters
@@ -467,8 +389,6 @@ class ManifestIndex:
Always raises ManifestError if the agent is unknown or invalid.
Backends call this at preflight inside _validate."""
effective_bottle_names: tuple[str, ...] = bottle_names or ()
if self.home_md is None:
# Eager manifest (from_json_obj): data already parsed; filter to
# the one requested agent and its bottle so the returned Manifest
@@ -479,14 +399,12 @@ class ManifestIndex:
f"agent '{agent_name}' not defined. Available: {available}"
)
agent = self.agents[agent_name]
raw_bottle = _resolve_effective_bottle_eager(
agent_name, agent, effective_bottle_names, self.bottles
)
raw_bottle = self.bottles[agent.bottle]
merged = _merge_git_user(agent.git_user, raw_bottle.git_user)
bottle = raw_bottle if merged == raw_bottle.git_user else replace(raw_bottle, git_user=merged)
return Manifest(agent=agent, bottle=bottle)
from .manifest_loader import scan_agent_names
from .manifest_loader import load_bottle_chain_from_dir, scan_agent_names
from .manifest_schema import validate_agent_frontmatter_keys
from .yaml_subset import YamlSubsetError, parse_frontmatter
@@ -513,31 +431,26 @@ class ManifestIndex:
validate_agent_frontmatter_keys(agent_path, fm.keys())
# Determine the effective bottle name(s).
agent_bottle = fm.get("bottle") or ""
bottle_name = fm.get("bottle")
if not isinstance(bottle_name, str) or not bottle_name:
raise ManifestError(
f"agent '{agent_name}' must declare a 'bottle' field "
f"naming a defined bottle"
)
# Load the bottle chain (may raise ManifestError).
bottles_dir = self.home_md / "bottles"
raw_bottle = _resolve_effective_bottle_lazy(
agent_name, str(agent_bottle), effective_bottle_names, bottles_dir
)
effective_bottle_name = (
effective_bottle_names[-1] if effective_bottle_names
else str(agent_bottle)
)
raw_bottle = load_bottle_chain_from_dir(bottle_name, bottles_dir)
# Build and validate the full ManifestAgent.
agent_dict: dict[str, object] = {
"bottle": bottle_name,
"skills": fm.get("skills", []),
"prompt": body.strip(),
}
if agent_bottle:
agent_dict["bottle"] = agent_bottle
if "git-gate" in fm:
agent_dict["git-gate"] = fm["git-gate"]
# Pass the effective bottle name as the known-bottles set so agents
# that have bottle: set are validated; agents without bottle: pass {}
# since bottle_names were already resolved above.
known = {effective_bottle_name} if effective_bottle_name else set()
agent = ManifestAgent.from_dict(agent_name, agent_dict, known)
agent = ManifestAgent.from_dict(agent_name, agent_dict, {bottle_name})
merged_user = _merge_git_user(agent.git_user, raw_bottle.git_user)
bottle = raw_bottle if merged_user == raw_bottle.git_user else replace(raw_bottle, git_user=merged_user)
+13 -16
View File
@@ -109,8 +109,7 @@ class ManifestAgentProvider:
@dataclass(frozen=True)
class ManifestAgent:
# Optional: when empty the operator selects bottles at launch time.
bottle: str = ""
bottle: str
skills: tuple[str, ...] = ()
prompt: str = ""
# Per-agent git identity (issue #94). Overlays the referenced
@@ -130,20 +129,18 @@ class ManifestAgent:
f"allowed keys are {allowed}."
)
bottle_raw = d.get("bottle")
bottle = ""
if bottle_raw is not None:
if not isinstance(bottle_raw, str) or not bottle_raw:
raise ManifestError(
f"agent '{name}' bottle must be a non-empty string when declared"
)
if bottle_raw not in bottle_names:
available = ", ".join(sorted(bottle_names)) or "(none defined)"
raise ManifestError(
f"agent '{name}' references bottle '{bottle_raw}', which is not defined. "
f"Available: {available}"
)
bottle = bottle_raw
bottle = d.get("bottle")
if not isinstance(bottle, str) or not bottle:
raise ManifestError(
f"agent '{name}' must declare a 'bottle' field naming a "
f"defined bottle"
)
if bottle not in bottle_names:
available = ", ".join(sorted(bottle_names)) or "(none defined)"
raise ManifestError(
f"agent '{name}' references bottle '{bottle}', which is not defined. "
f"Available: {available}"
)
skills: tuple[str, ...] = ()
skills_raw = d.get("skills")
+18 -162
View File
@@ -9,58 +9,6 @@ if TYPE_CHECKING:
from .manifest_egress import ManifestEgressConfig
def merge_bottles_runtime(bottles: "list[ManifestBottle]") -> "ManifestBottle":
"""Merge an ordered list of pre-resolved ManifestBottle objects.
Index 0 is the base; each subsequent entry is applied on top using
the same field-merge rules as the file-based extends machinery:
env: dict merge, later wins; git_user: per-field overlay, later
wins on non-empty; git (repos): union by name, later wins; egress
routes: concatenate; agent_provider, supervise: later replaces.
"""
if not bottles:
raise ValueError("merge_bottles_runtime requires at least one bottle")
result = bottles[0]
for override in bottles[1:]:
result = _merge_two_bottles_runtime(result, override)
return result
def _merge_two_bottles_runtime(base: "ManifestBottle", override: "ManifestBottle") -> "ManifestBottle":
from .manifest import ManifestBottle, ManifestGitUser
from .manifest_egress import ManifestEgressConfig
merged_env = {**base.env, **override.env}
merged_git_user = ManifestGitUser(
name=override.git_user.name or base.git_user.name,
email=override.git_user.email or base.git_user.email,
)
# git repos: union keyed by Name, override wins per-name.
base_repos_by_name = {entry.Name: entry for entry in base.git}
override_repos_by_name = {entry.Name: entry for entry in override.git}
merged_repos_names = list(base_repos_by_name) + [
n for n in override_repos_by_name if n not in base_repos_by_name
]
merged_git = tuple(
override_repos_by_name.get(n, base_repos_by_name[n])
for n in merged_repos_names
)
merged_routes = base.egress.routes + override.egress.routes
merged_egress = ManifestEgressConfig(routes=merged_routes, Log=override.egress.Log)
return ManifestBottle(
env=merged_env,
agent_provider=override.agent_provider,
git=merged_git,
git_user=merged_git_user,
egress=merged_egress,
supervise=override.supervise,
)
def resolve_bottles(raws: dict[str, dict[str, object]]) -> dict[str, ManifestBottle]:
"""Apply `extends:` chains and return resolved ManifestBottle objects."""
cache: dict[str, ManifestBottle] = {}
@@ -101,125 +49,33 @@ def _resolve_one_bottle(
repos_cache[name] = _resolve_repos_raw({}, child_raw)
return bottle
# Normalize to list, accepting both str and list[str].
raw_list: list[object]
if isinstance(parent_name_raw, str):
raw_list = [parent_name_raw]
elif isinstance(parent_name_raw, list):
raw_list = parent_name_raw
else:
if not isinstance(parent_name_raw, str):
raise ManifestError(
f"bottle '{name}' extends must be a string or list of strings "
f"bottle '{name}' extends must be a string "
f"(was {type(parent_name_raw).__name__})"
)
# Validate each entry before resolving any of them.
parent_names: list[str] = []
for i, pname in enumerate(raw_list):
if not isinstance(pname, str):
raise ManifestError(
f"bottle '{name}' extends[{i}] must be a string "
f"(was {type(pname).__name__})"
)
parent_names.append(pname)
if pname == name:
raise ManifestError(
f"bottle '{name}' extends itself; remove the self-reference"
)
if pname not in raws:
avail = ", ".join(sorted(raws.keys())) or "(none)"
raise ManifestError(
f"bottle '{name}' extends '{pname}' which is not "
f"defined. Available bottles: {avail}"
)
combined_parent, combined_repos_raw = _fold_parents(
parent_names, raws, cache, repos_cache, seen + (name,)
parent_name: str = parent_name_raw
if parent_name == name:
raise ManifestError(
f"bottle '{name}' extends itself; remove the "
f"self-reference"
)
if parent_name not in raws:
avail = ", ".join(sorted(raws.keys())) or "(none)"
raise ManifestError(
f"bottle '{name}' extends '{parent_name}' which is not "
f"defined. Available bottles: {avail}"
)
parent = _resolve_one_bottle(
parent_name, raws, cache, repos_cache, seen + (name,)
)
merged_repos_raw = _resolve_repos_raw(combined_repos_raw, child_raw)
bottle = _merge_bottles(combined_parent, child_raw, merged_repos_raw, name)
merged_repos_raw = _resolve_repos_raw(repos_cache[parent_name], child_raw)
bottle = _merge_bottles(parent, child_raw, merged_repos_raw, name)
cache[name] = bottle
repos_cache[name] = merged_repos_raw
return bottle
def _fold_parents(
parent_names: list[str],
raws: dict[str, dict[str, object]],
cache: dict[str, ManifestBottle],
repos_cache: dict[str, dict[str, object]],
seen: tuple[str, ...],
) -> tuple[ManifestBottle, dict[str, object]]:
"""Resolve each parent and fold them left-to-right.
Later parents win over earlier ones on conflict. The `seen` tuple
carries the current bottle's name so cycle detection works across
every parent edge in the multi-parent graph."""
first = parent_names[0]
effective = _resolve_one_bottle(first, raws, cache, repos_cache, seen)
effective_repos_raw = repos_cache[first]
for pname in parent_names[1:]:
later = _resolve_one_bottle(pname, raws, cache, repos_cache, seen)
later_repos_raw = repos_cache[pname]
effective, effective_repos_raw = _fold_two_bottles(
effective, effective_repos_raw, later, later_repos_raw
)
return effective, effective_repos_raw
def _fold_two_bottles(
earlier: ManifestBottle,
earlier_repos_raw: dict[str, object],
later: ManifestBottle,
later_repos_raw: dict[str, object],
) -> tuple[ManifestBottle, dict[str, object]]:
"""Combine two resolved parent bottles; later wins over earlier."""
from .manifest import ManifestBottle, ManifestGitUser
from .manifest_egress import ManifestEgressConfig
from .manifest_git import parse_git_gate_config
from .manifest_util import as_json_object
merged_env = {**earlier.env, **later.env}
merged_git_user = ManifestGitUser(
name=later.git_user.name or earlier.git_user.name,
email=later.git_user.email or earlier.git_user.email,
)
# Repos: union by name; for same-name entries, later wins per-field.
# Unlike _resolve_repos_raw, an empty later_repos_raw means "no repos
# declared" — it does NOT clear the earlier parent's repos.
names = list(earlier_repos_raw) + [
n for n in later_repos_raw if n not in earlier_repos_raw
]
merged_repos_raw: dict[str, object] = {
n: {
**as_json_object(earlier_repos_raw.get(n, {}), "earlier parent repo"),
**as_json_object(later_repos_raw.get(n, {}), "later parent repo"),
}
for n in names
}
if merged_repos_raw:
merged_git, _ = parse_git_gate_config("_fold", {"repos": merged_repos_raw})
else:
merged_git = ()
# Egress: routes concatenate; scalar fields use last-wins.
merged_egress = ManifestEgressConfig(
routes=earlier.egress.routes + later.egress.routes,
Log=later.egress.Log,
)
return ManifestBottle(
env=merged_env,
agent_provider=later.agent_provider,
git=merged_git,
git_user=merged_git_user,
egress=merged_egress,
supervise=later.supervise,
), merged_repos_raw
def _merge_bottles(
parent: ManifestBottle,
child_raw: dict[str, object],
-21
View File
@@ -32,25 +32,6 @@ def check_stale_json(dir_path: Path, md_dir: Path, label: str) -> None:
)
def scan_bottle_names(bottles_dir: Path) -> list[str]:
"""Scan `<bottles_dir>/*.md` for valid filenames and return sorted bottle names.
No file content is read. Invalid filenames are skipped with a warning."""
result: list[str] = []
if not bottles_dir.is_dir():
return result
for path in sorted(bottles_dir.glob("*.md")):
name = entity_name_from_path(path)
if name is None:
warn(
f"skipping {path}: filename must match "
f"[a-z][a-z0-9-]*.md (got {path.name!r})"
)
continue
result.append(name)
return result
def scan_agent_names(agents_dir: Path) -> dict[str, Path]:
"""Scan `<agents_dir>/*.md` for valid filenames and return `{name: path}`.
@@ -106,7 +87,5 @@ def load_bottle_chain_from_dir(
parent = fm.get("extends")
if isinstance(parent, str):
to_load.append(parent)
elif isinstance(parent, list):
to_load.extend(p for p in parent if isinstance(p, str))
return resolve_bottles(raws)[bottle_name]
+2 -2
View File
@@ -18,8 +18,8 @@ _FILENAME_RX = re.compile(r"^[a-z][a-z0-9-]*$")
BOTTLE_KEYS = frozenset(
{"env", "extends", "agent_provider", "git-gate", "egress", "supervise"}
)
AGENT_KEYS_REQUIRED: frozenset[str] = frozenset()
AGENT_KEYS_OPTIONAL = frozenset({"bottle", "skills", "git-gate"})
AGENT_KEYS_REQUIRED = frozenset({"bottle"})
AGENT_KEYS_OPTIONAL = frozenset({"skills", "git-gate"})
# Claude Code subagent fields bot-bottle ignores at launch but does
# not reject. This lets the same file double as
+42 -10
View File
@@ -2,10 +2,11 @@
The supervise plane is the per-bottle MCP sidecar plus its host-side
queue/audit support. The sidecar (bot_bottle.supervise_server)
sits on the bottle's internal network and exposes MCP tools the agent
calls when it needs an operator-reviewed egress change:
sits on the bottle's internal network and exposes three MCP tools the
agent calls when it hits a stuck-recovery category:
* egress-block / allow agent proposes a new routes.yaml
* capability-block agent proposes a new agent Dockerfile
Each tool call: the agent passes the full proposed file plus a
justification text. The sidecar validates the proposal syntactically,
@@ -47,6 +48,7 @@ from pathlib import Path
SUPERVISE_HOSTNAME = "supervise"
SUPERVISE_PORT = 9100
TOOL_CAPABILITY_BLOCK = "capability-block"
TOOL_EGRESS_BLOCK = "egress-block"
TOOL_EGRESS_ALLOW = "egress-allow"
TOOL_GITLEAKS_ALLOW = "gitleaks-allow"
@@ -56,6 +58,7 @@ TOOL_EGRESS_TOKEN_ALLOW = "egress-token-allow"
TOOL_LIST_EGRESS_ROUTES = "list-egress-routes"
TOOLS: tuple[str, ...] = (
TOOL_EGRESS_ALLOW,
TOOL_CAPABILITY_BLOCK,
TOOL_EGRESS_BLOCK,
TOOL_GITLEAKS_ALLOW,
TOOL_EGRESS_TOKEN_ALLOW,
@@ -72,6 +75,10 @@ TOOLS: tuple[str, ...] = (
EGRESS_FORWARD_PROXY = "http://127.0.0.1:9099"
EGRESS_INTROSPECT_URL = "http://_egress.local/allowlist"
# capability-block has no on-disk config the operator edits in place
# (the Dockerfile is rebuilt, not patched), so it has no audit log
# here — those changes are captured by git history + the rebuild record
# laid down in PRD 0016.
COMPONENT_FOR_TOOL: dict[str, str] = {
TOOL_EGRESS_ALLOW: "egress",
TOOL_EGRESS_BLOCK: "egress",
@@ -87,6 +94,8 @@ STATUSES: tuple[str, ...] = (STATUS_APPROVED, STATUS_MODIFIED, STATUS_REJECTED)
ACTION_OPERATOR_EDIT = "operator-edit"
QUEUE_DIR_IN_CONTAINER = "/run/supervise/queue"
CURRENT_CONFIG_DIR_IN_AGENT = "/etc/bot-bottle/current-config"
DEFAULT_POLL_INTERVAL_SEC = 0.5
@@ -429,39 +438,59 @@ def sha256_hex(content: str) -> str:
# --- Sidecar plan + abstract lifecycle -------------------------------------
# Filename of the staged Dockerfile inside the agent's read-only
# current-config mount. The capability-block tool's description
# points the agent at this exact path so it can read the current
# Dockerfile and propose modifications.
#
# routes.yaml + allowlist used to live here too; PRD 0017 chunk 3
# moved them behind the `list-egress-routes` MCP tool (live state
# from egress's introspection endpoint) so the agent always sees
# current data rather than a launch-time snapshot.
CURRENT_CONFIG_DOCKERFILE = "Dockerfile"
@dataclass(frozen=True)
class SupervisePlan:
"""Output of Supervise.prepare; consumed by .start.
`queue_dir` is the host directory bind-mounted into the sidecar
at /run/supervise/queue. `internal_network` is empty at prepare
time; the backend's launch step fills it via dataclasses.replace
before calling .start."""
at /run/supervise/queue. `current_config_dir` is the host
directory bind-mounted (read-only) into the *agent* container
at /etc/bot-bottle/current-config currently holds only the
Dockerfile snapshot (routes.yaml + allowlist moved to the
`list-egress-routes` MCP tool). `internal_network` is
empty at prepare time; the backend's launch step fills it via
dataclasses.replace before calling .start."""
slug: str
queue_dir: Path
current_config_dir: Path
internal_network: str = ""
class Supervise(ABC):
"""Per-bottle supervise sidecar. Encapsulates the host-side
prepare (queue dir staging); the sidecar's start/stop lifecycle
is backend-specific."""
prepare (queue dir + current-config staging); the sidecar's
start/stop lifecycle is backend-specific."""
def prepare(
self,
slug: str,
stage_dir: Path,
) -> SupervisePlan:
"""Stage the per-bottle queue dir on the host. Returns the
plan; `internal_network` must be set by the launch step before
"""Stage the per-bottle queue dir on the host and the
current-config dir under `stage_dir`. Returns the plan;
`internal_network` must be set by the launch step before
.start runs."""
del stage_dir
queue_dir = queue_dir_for_slug(slug)
queue_dir.mkdir(parents=True, exist_ok=True)
current_config_dir = stage_dir / "current-config"
current_config_dir.mkdir(parents=True, exist_ok=True)
return SupervisePlan(
slug=slug,
queue_dir=queue_dir,
current_config_dir=current_config_dir,
)
# --- Helpers ---------------------------------------------------------------
@@ -512,6 +541,8 @@ __all__ = [
"ACTION_OPERATOR_EDIT",
"AuditEntry",
"COMPONENT_FOR_TOOL",
"CURRENT_CONFIG_DIR_IN_AGENT",
"CURRENT_CONFIG_DOCKERFILE",
"DEFAULT_POLL_INTERVAL_SEC",
"Proposal",
"QUEUE_DIR_IN_CONTAINER",
@@ -527,6 +558,7 @@ __all__ = [
"TOOLS",
"EGRESS_FORWARD_PROXY",
"EGRESS_INTROSPECT_URL",
"TOOL_CAPABILITY_BLOCK",
"TOOL_EGRESS_ALLOW",
"TOOL_EGRESS_BLOCK",
"TOOL_GITLEAKS_ALLOW",
+61 -52
View File
@@ -1,8 +1,8 @@
"""Supervise sidecar HTTP server (PRD 0013).
Per-bottle MCP server exposing tools the agent calls to propose egress
config changes when stuck. The tools are `egress-allow`,
`egress-block`, and `list-egress-routes`.
Per-bottle MCP server exposing tools the agent calls to propose config
changes when stuck. The tools are `allow`, `egress-block`,
`capability-block`, and `list-egress-routes`.
Each queued tool call:
@@ -90,19 +90,19 @@ def parse_jsonrpc(body: bytes) -> JsonRpcRequest:
try:
raw = json.loads(body)
except json.JSONDecodeError as e:
raise _RpcClientError(ERR_PARSE, f"parse error: {e}") from e
raise _RpcError(ERR_PARSE, f"parse error: {e}") from e
if not isinstance(raw, dict):
raise _RpcClientError(ERR_INVALID_REQUEST, "request must be a JSON object")
raise _RpcError(ERR_INVALID_REQUEST, "request must be a JSON object")
if raw.get("jsonrpc") != JSONRPC_VERSION:
raise _RpcClientError(ERR_INVALID_REQUEST, "jsonrpc field must be '2.0'")
raise _RpcError(ERR_INVALID_REQUEST, "jsonrpc field must be '2.0'")
method = raw.get("method")
if not isinstance(method, str):
raise _RpcClientError(ERR_INVALID_REQUEST, "method must be a string")
raise _RpcError(ERR_INVALID_REQUEST, "method must be a string")
params = raw.get("params", {})
if params is None:
params = {}
if not isinstance(params, dict):
raise _RpcClientError(ERR_INVALID_PARAMS, "params must be an object")
raise _RpcError(ERR_INVALID_PARAMS, "params must be an object")
rpc_id = raw.get("id", _NO_ID)
is_notification = rpc_id is _NO_ID
return JsonRpcRequest(
@@ -117,23 +117,12 @@ _NO_ID = object()
class _RpcError(Exception):
"""Base class for all typed RPC errors that surface as JSON-RPC error responses."""
def __init__(self, code: int, message: str):
super().__init__(message)
self.code = code
self.message = message
class _RpcClientError(_RpcError):
"""Caller sent a bad request; returned verbatim, no server-side logging."""
class _RpcInternalError(_RpcError):
"""Server-side fault; logged at ERROR with cause, always returns ERR_INTERNAL."""
def __init__(self, message: str) -> None:
super().__init__(ERR_INTERNAL, message)
def jsonrpc_result(request_id: object, result: object) -> bytes:
payload = {"jsonrpc": JSONRPC_VERSION, "id": request_id, "result": result}
return (json.dumps(payload) + "\n").encode("utf-8")
@@ -253,6 +242,34 @@ TOOL_DEFINITIONS: list[dict[str, object]] = [
"required": ["routes_yaml", "justification"],
},
},
{
"name": _sv.TOOL_CAPABILITY_BLOCK,
"description": (
"Call when the bottle is missing a tool, skill, permission, "
"or env var you need — something that lives in the agent "
"Dockerfile rather than in the egress routes. "
"Read the current Dockerfile from "
"/etc/bot-bottle/current-config/Dockerfile, compose a "
"modified version, and pass the full new file plus a "
"justification. On approval the supervisor rebuilds the "
"bottle from the new Dockerfile and starts a replacement on "
"the same branch (wired in PRD 0016; v1 acknowledges only)."
),
"inputSchema": {
"type": "object",
"properties": {
"dockerfile": {
"type": "string",
"description": "Full proposed Dockerfile content.",
},
"justification": {
"type": "string",
"description": "Why this capability is needed.",
},
},
"required": ["dockerfile", "justification"],
},
},
]
@@ -260,6 +277,7 @@ TOOL_DEFINITIONS: list[dict[str, object]] = [
# payload (stored in Proposal.proposed_file).
PROPOSED_FILE_FIELD: dict[str, str] = {
_sv.TOOL_EGRESS_ALLOW: "routes_yaml",
_sv.TOOL_CAPABILITY_BLOCK: "dockerfile",
_sv.TOOL_EGRESS_BLOCK: "routes_yaml",
}
@@ -272,22 +290,26 @@ def validate_proposed_file(tool: str, content: str) -> None:
catches obvious paste-errors / wrong-tool selections before they
enter the queue."""
if not content.strip():
raise _RpcClientError(ERR_INVALID_PARAMS, f"{tool}: proposed file is empty")
if tool in (_sv.TOOL_EGRESS_ALLOW, _sv.TOOL_EGRESS_BLOCK):
raise _RpcError(ERR_INVALID_PARAMS, f"{tool}: proposed file is empty")
if tool == _sv.TOOL_CAPABILITY_BLOCK:
# Dockerfiles are too varied to validate syntactically beyond
# non-empty. The operator reads the diff in the TUI.
pass
elif tool in (_sv.TOOL_EGRESS_ALLOW, _sv.TOOL_EGRESS_BLOCK):
try:
config = load_config(content)
except ValueError as e:
raise _RpcClientError(
raise _RpcError(
ERR_INVALID_PARAMS,
f"{tool}: proposed routes.yaml is not valid: {e}",
) from e
if config.log != LOG_OFF:
raise _RpcClientError(
raise _RpcError(
ERR_INVALID_PARAMS,
f"{tool}: proposed routes.yaml must not change egress logging",
)
else:
raise _RpcClientError(ERR_INVALID_PARAMS, f"unknown tool {tool!r}")
raise _RpcError(ERR_INVALID_PARAMS, f"unknown tool {tool!r}")
# --- MCP handlers ----------------------------------------------------------
@@ -360,17 +382,17 @@ def handle_tools_call(
doesn't need operator approval."""
name = params.get("name")
if not isinstance(name, str):
raise _RpcClientError(ERR_INVALID_PARAMS, "tools/call missing 'name'")
raise _RpcError(ERR_INVALID_PARAMS, "tools/call missing 'name'")
if name == _sv.TOOL_LIST_EGRESS_ROUTES:
return handle_list_egress_routes(typing.cast(dict[str, object], params.get("arguments", {})), config)
args_raw = params.get("arguments", {})
if not isinstance(args_raw, dict):
raise _RpcClientError(ERR_INVALID_PARAMS, "tools/call 'arguments' must be an object")
raise _RpcError(ERR_INVALID_PARAMS, "tools/call 'arguments' must be an object")
justification = args_raw.get("justification")
if not isinstance(justification, str) or not justification.strip():
raise _RpcClientError(
raise _RpcError(
ERR_INVALID_PARAMS,
f"{name}: 'justification' is required and must be a non-empty string",
)
@@ -379,13 +401,13 @@ def handle_tools_call(
file_field = PROPOSED_FILE_FIELD[name]
proposed_file = args_raw.get(file_field)
if not isinstance(proposed_file, str):
raise _RpcClientError(
raise _RpcError(
ERR_INVALID_PARAMS,
f"{name}: '{file_field}' is required and must be a string",
)
validate_proposed_file(name, proposed_file)
else:
raise _RpcClientError(ERR_INVALID_PARAMS, f"unknown tool {name!r}")
raise _RpcError(ERR_INVALID_PARAMS, f"unknown tool {name!r}")
proposal = _sv.Proposal.new(
bottle_slug=config.bottle_slug,
@@ -394,10 +416,7 @@ def handle_tools_call(
justification=justification,
current_file_hash=_sv.sha256_hex(proposed_file),
)
try:
_sv.write_proposal(config.queue_dir, proposal)
except OSError as e:
raise _RpcInternalError(f"failed to write proposal to queue: {e}") from e
_sv.write_proposal(config.queue_dir, proposal)
sys.stderr.write(
f"supervise: queued proposal {proposal.id} ({name}) "
f"for bottle {config.bottle_slug}; waiting for operator...\n"
@@ -417,10 +436,7 @@ def handle_tools_call(
"content": [{"type": "text", "text": text}],
"isError": False,
}
try:
_sv.archive_proposal(config.queue_dir, proposal.id)
except OSError as e:
raise _RpcInternalError(f"failed to archive proposal: {e}") from e
_sv.archive_proposal(config.queue_dir, proposal.id)
text = format_response_text(response)
return {
@@ -454,8 +470,9 @@ def format_pending_response_text(timeout_seconds: float) -> str:
# --- HTTP transport --------------------------------------------------------
# Max request body the server accepts. 1 MB is well above any realistic
# routes.yaml proposal.
# Max request body the server accepts. Generous because Dockerfile
# proposals can be a few KB; routes.json is small. 1 MB is well above
# any realistic config file.
MAX_BODY_BYTES = 1 * 1024 * 1024
@@ -495,7 +512,7 @@ class MCPHandler(http.server.BaseHTTPRequestHandler):
try:
req = parse_jsonrpc(body)
except _RpcClientError as e:
except _RpcError as e:
self._write_jsonrpc(jsonrpc_error(None, e.code, e.message))
return
@@ -503,19 +520,11 @@ class MCPHandler(http.server.BaseHTTPRequestHandler):
try:
result = self._dispatch(req, config)
except _RpcClientError as e:
except _RpcError as e:
self._write_jsonrpc(jsonrpc_error(req.id, e.code, e.message))
return
except _RpcInternalError as e:
cause = e.__cause__
detail = f": {cause}" if cause else ""
sys.stderr.write(f"supervise: internal error: {e.message}{detail}\n")
sys.stderr.flush()
self._write_jsonrpc(jsonrpc_error(req.id, ERR_INTERNAL, "internal error"))
return
except Exception as e: # noqa: W0718 — unexpected errors
sys.stderr.write(f"supervise: unexpected error: {type(e).__name__}: {e}\n")
sys.stderr.flush()
except Exception as e: # noqa: W0718 — catch-all for RPC dispatch errors
sys.stderr.write(f"supervise: internal error: {e}\n")
self._write_jsonrpc(jsonrpc_error(req.id, ERR_INTERNAL, "internal error"))
return
@@ -534,7 +543,7 @@ class MCPHandler(http.server.BaseHTTPRequestHandler):
return handle_tools_list(req.params)
if method == "tools/call":
return handle_tools_call(req.params, config)
raise _RpcClientError(ERR_METHOD_NOT_FOUND, f"method not found: {method}")
raise _RpcError(ERR_METHOD_NOT_FOUND, f"method not found: {method}")
def _write_jsonrpc(self, body: bytes) -> None:
self.send_response(200)
-166
View File
@@ -1,166 +0,0 @@
# PRD 0065: Multi-parent `extends:` for bottles
- **Status:** Active
- **Author:** didericis
- **Created:** 2026-06-25
- **Issue:** #268
- **Extends:** PRD 0025 (`0025-bottle-extends.md`)
## Summary
Allow a bottle's `extends:` field to accept either a single bottle name (existing
behavior) or a list of bottle names (new). Multiple parents are resolved
independently and folded left-to-right into a single effective parent before the
child is merged on top. This lets orthogonal concerns (base env, networking/egress,
agent provider) live in separate bottles and be composed without forcing them into a
linear chain.
## Problem
PRD 0025 shipped single-parent `extends:` and listed "No multi-parent inheritance"
as a non-goal. In practice, users want to compose multiple orthogonal bottles — a
base environment, a networking profile, and an agent-provider override — without
creating a three-level linear chain that couples unrelated parents to each other.
The linear chain workaround has two problems:
1. **Ordering constraint.** `networking extends base` works, but then
`agent extends networking` can't also pick up `base` without going through
`networking`, coupling two unrelated concerns.
2. **Quadratic duplication.** N orthogonal bottles require O(N²) chain variants
(one chain per permutation of applied concerns).
Multi-parent `extends:` removes both constraints: each orthogonal concern stays in
its own bottle, and the child bottle is the only place that names the combination.
## Goals / Success Criteria
- `extends:` accepts a list of strings in addition to a plain string.
- Backward compat: existing single-string `extends:` is unchanged.
- Parents are resolved left-to-right; later entries win on conflict.
- Child wins over all parents (unchanged from PRD 0025).
- Cycle detection covers multi-parent graphs, not just linear chains.
- Diamond inheritance: a shared ancestor is resolved once (via the existing cache).
- Invalid list entries (non-string, undefined bottle, self-reference) die at parse
with clear messages.
- `manifest_loader.py`'s `load_bottle_chain_from_dir` enqueues all parents from a
list `extends:` so the resolver sees every bottle in the graph.
## Non-goals
- No change to the agent-vs-bottle trust boundary (PRD 0025 "Alternatives
considered" option 2 stays rejected).
- No MRO / C3 linearization. Left-to-right fold is sufficient for the expected use
cases.
- No preflight display of per-field provenance across multiple parents (same open
question as PRD 0025; remains a follow-up).
## Design
### Schema
`extends:` now accepts either form:
```yaml
# single parent (unchanged)
extends: base
# multiple parents (new)
extends: [base, networking]
```
Both forms are normalized to a list internally. A list with one element behaves
identically to the string form.
### Merge rules for multi-parent fold
Parents are folded pairwise left-to-right before the child merge. For each step in
the fold, the "earlier" bottle is the running accumulator and the "later" bottle is
the next parent. Rules per field:
| Field | Fold rule |
|--------------------|--------------------------------------------------------------|
| `env` | dict merge; later wins on key collision |
| `git-gate.user` | per-field overlay; later's non-empty fields win |
| `git-gate.repos` | union by name; for same-name entries, later wins per-field |
| `egress.routes` | concatenate (earlier first, later appended) |
| `egress.log` | later wins (last-wins) |
| `agent_provider` | later wins (last-wins) |
| `supervise` | later wins (last-wins) |
After the fold, the combined parent is merged against the child using the existing
PRD 0025 rules (child always wins). The child's `egress.routes` appends to the
combined parent's concatenated routes; `validate_egress_routes` runs once on the
final merged set and catches duplicate hosts.
### Algorithm
```
extends: [p1, p2, p3]
fold:
combined = resolve(p1)
combined = fold_two(combined, resolve(p2))
combined = fold_two(combined, resolve(p3))
merge:
result = _merge_bottles(combined, child_raw, name)
```
`fold_two(earlier, later)` applies the rules in the table above. Cycle detection
(the `seen` tuple) is passed to each parent resolution call unchanged — if any
parent's chain circles back to the current bottle, it is caught. The `cache` dict
ensures a shared ancestor is only resolved once across all parents.
### Error cases
| Condition | Error message shape |
|----------------------------------------|------------------------------------------------------------------|
| `extends` is not a string or list | `extends must be a string or list of strings (was <type>)` |
| A list entry is not a string | `extends[<i>] must be a string (was <type>)` |
| A list entry names an undefined bottle | `extends '<name>' which is not defined. Available bottles: ...` |
| A list entry is the bottle itself | `extends itself; remove the self-reference` |
| Cycle through any parent edge | `is in an extends cycle: <chain>` |
## Implementation
### `bot_bottle/manifest_extends.py`
- `_resolve_one_bottle`: accept `str | list[str]` for `extends`; normalize to list;
validate each entry; for a single-entry list fall through to the existing
single-parent path; for multiple entries call `_fold_parents` then
`_merge_bottles`.
- `_fold_parents(parent_names, raws, cache, repos_cache, seen)`: resolve each
parent and fold pairwise left-to-right; return `(effective_bottle,
effective_repos_raw)`.
- `_fold_two_bottles(earlier, earlier_repos_raw, later, later_repos_raw)`: apply
the fold rules above; return `(folded_bottle, folded_repos_raw)`.
### `bot_bottle/manifest_loader.py`
- `load_bottle_chain_from_dir`: when `extends` is a list, enqueue all parent names
for loading (previously only `isinstance(parent, str)` was handled).
### `tests/unit/test_manifest_extends.py`
- `TestExtendsErrors.test_non_string_extends_dies`: update to use an integer
`extends` value (a list is now valid).
- New class `TestExtendsMultiParent` covering all cases listed in the issue.
## Testing strategy
Unit tests via `ManifestIndex.from_json_obj` (same resolver surface used by all
paths). No integration test changes needed — downstream code consumes the already-
merged bottle and is unchanged.
Test cases:
- Two-parent list: env union, egress routes concat, git repos union
- Last-parent-wins on scalar (supervise, agent_provider)
- Child wins over all parents on conflict
- Diamond: two parents share an ancestor; ancestor resolved once
- Single-element list: identical to string form
- Non-string extends value → ManifestError
- Non-string list entry → ManifestError
- Undefined bottle in list → ManifestError
- Self-reference in list → ManifestError
- Cycle through multi-parent edge → ManifestError
@@ -1,216 +0,0 @@
# PRD 0066: Separate agent and bottle selection
- **Status:** Active
- **Author:** claude
- **Created:** 2026-06-25
- **Issue:** #269
## Summary
Agents and bottles are two separate concerns: agents carry a system prompt and
skills; bottles carry infrastructure configuration (egress, git-gate, env,
agent provider). Today an agent's manifest file hard-codes a single `bottle:`
reference, which prevents the same agent prompt from being reused across
projects that need different bottle configurations. This PRD decouples them: at
launch time, after choosing the agent, the operator picks an ordered list of
bottles via a multi-select picker. The selected bottles are merged in order
(later entries override earlier ones) to produce the effective bottle for the
session.
## Problem
The current `bottle: <name>` field on an agent manifest file binds the agent
permanently to one bottle. To use the same system prompt with a different bottle
(e.g. `claude-implementer` at home vs. at a client site that needs a different
egress policy), the operator must duplicate the agent file and change the
`bottle:` field. Duplicate agent files drift out of sync.
## Goals / Success Criteria
1. `bottle:` in an agent's frontmatter becomes optional. Existing manifests with
`bottle:` continue to work unchanged (backward compat).
2. After selecting an agent (via the existing single-select picker), a new
multi-select bottle picker appears showing all available bottles.
3. The multi-select picker pre-populates with the agent's `bottle:` value when
present.
4. Confirming with one or more bottles selected uses those bottles, merged in
selection order, as the effective bottle for the session.
5. Confirming with an empty selection falls back to the agent's `bottle:` field.
If neither is set, a ManifestError is raised pointing the operator at the fix.
6. The ordered bottle list is stored in launch metadata so `./cli.py resume`
uses the same bottles.
7. The preflight summary (`y/N` screen) shows the effective bottle name(s).
8. The multi-select picker supports incremental filtering, Space/Enter to toggle
selection, an ordered "Selected: ..." summary line, Ctrl-D to confirm, and
Esc/q to cancel the whole start operation.
9. Unit tests cover: multi-select widget (filter, toggle, confirm, cancel),
the `cmd_start` bottle-picker step, and the manifest `load_for_agent`
runtime-bottle-merge path.
## Non-goals
- Reordering the selection list from within the picker (order = insertion order;
drag-and-drop is out of scope).
- Storing bottle selection history / MRU.
- Changes to `./cli.py edit`, `./cli.py list`, or `./cli.py info`.
- Removing the `bottle:` key from the agent schema (it stays, now optional).
## Design
### `bot_bottle/cli/tui.py``filter_multiselect`
```python
def filter_multiselect(
items: list[str],
*,
title: str = "",
initial: list[str] | None = None,
tty_path: str = "/dev/tty",
) -> list[str] | None:
"""Multi-select variant of filter_select.
Returns the ordered list of selected items, or None on cancel.
Press Space/Enter to toggle the item under the cursor.
Press Ctrl-D to confirm. Press Esc/q to cancel.
"""
```
Layout:
```
Select bottles
Filter: _
─────────────────────────────────────────
> [*] claude
[ ] dev
[ ] codex
─────────────────────────────────────────
Selected (in order): claude
─────────────────────────────────────────
[↑↓/jk] move [Space] toggle [Ctrl-D] done [Esc] cancel
```
`initial` pre-populates the ordered selection. `None` means no pre-selection.
Items added are appended in insertion order; items removed leave the remaining
order unchanged.
### `bot_bottle/manifest_schema.py` — optional `bottle:`
`bottle` moves from `AGENT_KEYS_REQUIRED` to `AGENT_KEYS_OPTIONAL`.
### `bot_bottle/manifest_agent.py` — optional `bottle:`
`ManifestAgent.bottle` changes from `str` (required) to `str = ""`.
`from_dict` no longer requires the key to be present; the bottle-exists
validation is skipped when the key is absent.
### `bot_bottle/manifest_loader.py``scan_bottle_names`
```python
def scan_bottle_names(bottles_dir: Path) -> list[str]:
"""Scan <bottles_dir>/*.md and return sorted bottle names."""
```
### `bot_bottle/manifest.py``ManifestIndex` changes
**`all_bottle_names` property** — analogous to `all_agent_names`; scans
`home_md / "bottles"` in lazy mode, returns `sorted(self.bottles.keys())` in
eager mode.
**`load_for_agent(agent_name, bottle_names: tuple[str, ...] = ())`** — new
`bottle_names` parameter. When non-empty, the listed bottles are resolved and
merged in order (index 0 is the base; each subsequent bottle is applied on top
using the same field-merge rules as `extends:`). The result replaces the bottle
that `agent.bottle` would have provided. When empty, falls back to `agent.bottle`.
Raises ManifestError if neither `bottle_names` nor `agent.bottle` is set.
### `bot_bottle/manifest_extends.py``merge_bottles_runtime`
```python
def merge_bottles_runtime(bottles: list[ManifestBottle]) -> ManifestBottle:
"""Merge an ordered list of pre-resolved ManifestBottle objects.
Index 0 is the base; each subsequent entry overrides the previous using
the same rules as the file-based extends machinery:
- env: dict merge, later wins
- git_user: per-field overlay, later wins on non-empty
- git (repos): union by name, later wins per-name
- egress.routes: concatenate
- agent_provider, supervise: later bottle's value replaces earlier
"""
```
This function operates on already-parsed `ManifestBottle` objects, so it does
not need to touch the raw-dict path.
### `bot_bottle/backend/__init__.py``BottleSpec` + `_validate`
`BottleSpec` gains `bottle_names: tuple[str, ...] = ()`.
`BottleBackend._validate` passes `spec.bottle_names` to `load_for_agent`:
```python
manifest = spec.manifest.load_for_agent(spec.agent_name, spec.bottle_names)
```
The preflight print updates `info(f"bottle: {agent.bottle}")` to display the
effective bottle name(s). When `spec.bottle_names` is non-empty those are
shown; when empty and `agent.bottle` is set, the agent's `bottle:` is shown.
### `bot_bottle/bottle_state.py` — persist bottle names
`BottleMetadata` gains `bottle_names: tuple[str, ...] = ()`. `read_metadata`
reads this from JSON (default `()`). `write_launch_metadata` passes
`spec.bottle_names` through.
### `bot_bottle/cli/start.py` — bottle multiselect step
After agent selection, before the name/color modal:
```python
available_bottle_names = manifest.all_bottle_names
# Peek at agent's bottle default for pre-population
initial_bottle = _peek_agent_bottle(manifest, agent_name)
initial = [initial_bottle] if initial_bottle else []
bottle_names_list = tui.filter_multiselect(
available_bottle_names,
title="Select bottles",
initial=initial,
)
if bottle_names_list is None:
return 0 # user cancelled
bottle_names = tuple(bottle_names_list)
```
`_peek_agent_bottle` reads the agent file's frontmatter without full parsing,
returning the `bottle:` value or `""` when absent.
`BottleSpec` is built with `bottle_names=bottle_names`.
### `bot_bottle/cli/resume.py` — bottle names from metadata
```python
spec = BottleSpec(
...
bottle_names=tuple(metadata.bottle_names),
)
```
## Implementation chunks
1. **Schema + model**`manifest_schema.py`, `manifest_agent.py` (optional
`bottle:`), `manifest_loader.py` (`scan_bottle_names`), `manifest.py`
(`all_bottle_names`, `load_for_agent` signature), `manifest_extends.py`
(`merge_bottles_runtime`), `bottle_state.py` (`bottle_names` field),
`resolve_common.py` (thread through).
2. **Backend**`BottleSpec.bottle_names`, `_validate`, preflight print.
3. **TUI**`filter_multiselect` in `tui.py` + unit tests.
4. **CLI wiring**`start.py` bottle picker step, `resume.py` metadata load.
5. **Tests**`test_cli_start_selector.py` bottle-picker cases,
`test_manifest_agent.py` optional-bottle cases, new
`test_manifest_bottle_merge.py` for `merge_bottles_runtime`.
## Open questions
None.
-247
View File
@@ -1,247 +0,0 @@
# PRD prd-new: Egress control plane — metering, budgets, and forced cutoff
- **Status:** Draft
- **Author:** didericis
- **Created:** 2026-06-25
- **Issue:** #251
## Summary
Add an **out-of-band egress enforcement & observability plane**: meter every
agent's token usage at the egress proxy, decrement budgets without the agent's
cooperation, and forcibly cut a bottle's egress when a budget is exhausted —
either automatically or on command from a host-level dashboard. The trigger
(usage threshold) and the action (route-drop / freeze / kill) both live in the
egress plane and run with no agent in the loop. This is distinct from the
supervise sidecar (PRD 0013), which is agent-initiated and therefore cannot
enforce a cost cutoff on a runaway agent. State (usage ledger, budgets, audit)
moves into a host-level SQLite database behind a thin repository API, the first
SQL store in an otherwise flat-file repo.
## Problem
bot-bottle can't currently do two things the cost-overrun case demands:
1. **Forced egress shutdown on limit.** When an agent crosses a token
threshold, kill its egress automatically — no human in the loop.
2. **Remote (host-level) management.** Drive agents from a single surface:
see usage, cut egress, stop bottles, to prevent cost overruns.
The existing supervise sidecar (PRD 0013) is **entirely agent-initiated**: every
action begins with the agent voluntarily calling an MCP tool and an operator
approving it. A runaway or expensive agent — exactly the cost-overrun case —
will never call `egress-block` on itself. Supervision is therefore a
**collaborative recovery** mechanism, not an **enforcement** mechanism; making
it mandatory (#249) would not deliver forced cost-cutoff.
The requirement forces a distinction the current design blurs:
- **Plane A — enforcement / observability (this PRD).** System → infrastructure.
Meter usage, cut egress on threshold or command, account for cost.
Out-of-band; independent of the agent. **Unconditional** — an enforcement
plane you can opt out of isn't enforcement.
- **Plane B — agent-facing recovery (the existing supervise sidecar).**
Agent → operator, approval-gated. Useful interactively; meaningless for a
headless agent with no operator watching its queue. Remains optional.
This PRD builds Plane A. It reframes the "always-on control" invariant of #249
as "the egress control plane is always present" — a more defensible property
than "every agent runs the agent-facing supervisor." Unsupervised
(headless/CI/ephemeral) agents stay first-class: still subject to the mandatory
meter + kill switch, they simply lack the agent-facing proposal tools they
couldn't use anyway.
## Goals / Success Criteria
- The egress proxy meters every request to a metered API host (e.g.
`api.anthropic.com`) and records authoritative token usage per bottle and per
agent provider, with no agent cooperation.
- A budget can be set at four scopes with deterministic precedence
(**agent → bottle → parent bottle → global host budget**); the
most-specific applicable budget governs.
- When usage crosses a budget, the bottle's configured **cutoff policy**
(`cutoff` | `freeze` | `kill`) fires automatically, executed host-side on the
egress plane — never via the supervise queue.
- An operator can, from a single **host-level TUI dashboard**, see live per-bottle
usage against budget and command a cutoff/stop on demand.
- Host budgets, default cutoff policy, and per-provider limits are declared in a
new host-level `~/.bot-bottle/settings.yml`, parseable by `yaml_subset.py`.
- All usage, budget state, and enforcement actions persist in a host-level
SQLite DB behind a thin repository API, so the store can later be swapped for
a cross-host cloud service.
## Non-goals
- **Remote control / cross-host control plane.** Web + mobile remote control,
cross-host budgets, and the authn/transport they require are explicitly
deferred. v1 is a **host-only TUI** with no remote surface.
- **Dollar-denominated budgets.** Budgets are token counts keyed by agent
provider, not currency. Price tables are out of scope.
- **Migrating existing flat-file state into SQLite.** Resume `metadata.json`,
transcripts, Dockerfile overrides, the supervise queue, and audit logs stay on
the filesystem. Only the *new* metering/budget/enforcement ledger is SQL.
- **Making the supervise sidecar (Plane B) mandatory.** Out of scope here; this
PRD is the answer to "what should be unconditional" (Plane A), leaving #249's
Plane-B question open.
- **Per-request hard pre-send blocking as the primary mechanism.** The gate is
budget-crossing detected at/after metering; a pre-flight estimator (below) is a
refinement, not the core enforcement path.
## Design
### Two measurements: gate vs. account
There are two distinct needs, and they want different signals:
- **Account (authoritative).** Decrement the real budget from the API
**response**, which already carries authoritative usage (Anthropic
`input_tokens` / `output_tokens`, OpenAI `usage`). The egress addon already
has a `response(flow)` hook (`bot_bottle/egress_addon.py:460`), so the real
number is available with no extra network call. **Caveat:** agent traffic is
mostly streaming SSE, so the response path must tail the stream for the final
usage event rather than parse a single JSON body — scoped explicitly as work.
- **Gate (estimate).** To block *before* sending, only the request is available,
so an estimator / provider `count_tokens` endpoint is the only option.
Calling `count_tokens` for accounting would be both less accurate *and* an extra
metered egress call per request, so accounting uses response `usage` and the
estimator is reserved for the optional pre-flight gate.
### `count_tokens` on agent providers
Add an abstract `count_tokens(request) -> int` to the `AgentProvider`
abstraction (`bot_bottle/agent_provider.py`):
- **Default** is a good-enough stdlib estimator. Prefer stdlib only; a small
pip dependency *for the sidecar* is acceptable for the fallback if stdlib
proves too inaccurate (this does not relax the package's stdlib-first stance —
it would be a sidecar-only dep, like the bundle already carries).
- **Built-in `claude`** uses Anthropic's token-counting endpoint;
**built-in `codex`** uses OpenAI's. These are exact for the gate but cost a
metered call, so they are gate-only; accounting still comes from the response.
### Budgets and precedence
Budgets are token counts keyed by **agent provider name** (the same names
bottles already use). Four scopes, most-specific wins:
```
agent → bottle → parent bottle → global (host)
```
The global host budget is the highest-priority feature to ship (the cross-host
control plane will eventually consume it); per-agent and per-bottle budgets
override it for finer control. A budget can also be supplied **at bottle
launch** (`--budget` or equivalent), overriding the settings.yml defaults for
that run. Enforcement evaluates the effective budget as the
nearest-defined scope at decrement time.
### `~/.bot-bottle/settings.yml`
New **host-level** settings file (the `~/.bot-bottle/` root, *not* the per-repo
`.bot-bottle/` — host budgets must not be committed per-repo). Parsed by
`yaml_subset.py`, so it must stay within that bounded subset (flat mappings,
scalars; no anchors, no multi-line block scalars). Shape:
```yaml
budget:
claude: 5000000 # token budget keyed by agent provider
codex: 2000000
shutdown: cutoff # default cutoff policy: cutoff | freeze | kill
```
### Forced cutoff and cutoff policy
On budget exhaustion (or an operator command), the configured per-bottle cutoff
policy fires. The three policies map onto primitives that already exist:
- **`cutoff`** (default) — drop the bottle's `routes.yaml` to empty and reload
(or isolate the bottle from the egress network); the agent/bottle keeps
running but can no longer reach metered hosts. This is the route-drop already
available on the egress plane (`bot_bottle/backend/egress_apply.py`).
- **`freeze`** — commit/snapshot state, then kill the agent/bottle; resumable
later via `bot_bottle/backend/freeze.py`.
- **`kill`** — tear the bottle down without saving state (backend teardown).
The trigger lives in the metering path and the action in the egress/backend
plane; **neither touches the supervise proposal queue** (design constraint from
#251).
### Host-level SQLite store
**Decision: introduce SQLite now, narrowly.**
- **The dependency objection doesn't apply.** `sqlite3` is in the Python stdlib,
so it does not break the AGENTS.md stdlib-first / no-runtime-pip stance — same
category as the hand-rolled `yaml_subset.py`, except the stdlib already ships
the whole engine.
- **It fits the problem.** A *global* token budget decremented concurrently by N
egress sidecars (today `~/.bot-bottle/` already has `state/`, `audit/`,
`queue/` written by parallel bottles) is a read-modify-write race. Over JSON
that means hand-rolled file locking; SQLite gives atomic transactions + WAL for
free. The per-agent/per-bottle precedence rollup plus "sum across all bottles"
is a `GROUP BY`, not an N-directory rescan.
- **It rehearses the cloud swap.** "Wrap operations in an API so we can swap to a
cloud service" maps directly onto a thin repository/DAO over SQLite → Postgres
later. A JSON-file store is a worse rehearsal than SQL.
**Costs (real but bounded):** a new paradigm in a flat-file repo needs a
`schema_version` table + idempotent startup migrations; SQLite serializes
writers, so WAL mode + `busy_timeout` are required (a non-issue at a handful of
bottles); test fixtures need temp DBs.
**Scope of the store:** one DB at `~/.bot-bottle/bot-bottle.db` behind a thin
repository API. Only the **new** metering/budget/enforcement-audit ledger lives
there. Existing per-bottle blobs (resume `metadata.json`, transcripts,
Dockerfile overrides, supervise queue) stay on the filesystem — migrating them
now is churn for no benefit and they lack the concurrency/aggregation problem.
### Host-level controller + dashboard
A single **host-level controller** owns the meter, budget evaluation, and the
cutoff actions across all bottles (cf. `bot_bottle/cli/supervise.py`'s
cross-bottle view), rather than a per-bottle daemon. v1 ships one host-level
**TUI dashboard** that reads live usage-vs-budget from the SQLite store and
offers on-demand cutoff/stop. The existing supervisor UI should eventually fold
into this same dashboard; this PRD lays the host-level surface it will move to.
## Implementation chunks
Ordered, individually mergeable:
1. **SQLite repository foundation.** `~/.bot-bottle/bot-bottle.db`, schema +
`schema_version` migrations, WAL + `busy_timeout`, thin repository API,
temp-DB test fixtures. No behavior wired yet.
2. **Metering at the egress proxy.** Parse authoritative response `usage`
(including SSE final-usage tailing) in the egress addon `response` hook;
write per-bottle / per-provider usage rows to the ledger.
3. **`settings.yml` + budget model.** Host-level `~/.bot-bottle/settings.yml`
parsed by `yaml_subset.py`; budget precedence (agent → bottle → parent →
global) and the `--budget` launch flag.
4. **Forced cutoff + cutoff policy.** Wire the threshold trigger to the
`cutoff` / `freeze` / `kill` primitives on the egress/backend plane; record
enforcement actions to the audit ledger.
5. **Host-level TUI dashboard.** Live usage-vs-budget view + on-demand
cutoff/stop, reading the store.
6. **`count_tokens` pre-flight gate (optional refinement).** Abstract method +
stdlib estimator default; Anthropic/OpenAI endpoints for built-in
claude/codex; optional pre-send block.
## Open questions
- **SSE usage tailing robustness.** Buffering streamed responses to extract the
final usage event without breaking the agent's own stream consumption — how
much of the body must the addon hold, and what's the failure mode if the
stream is interrupted mid-flight?
- **Crossing mid-request.** A single response can push usage past budget only
*after* it's already been delivered. Is post-hoc cutoff (next request blocked)
sufficient, or is a pre-flight estimator gate (chunk 6) required for v1?
- **Provider name ↔ metered host mapping.** How does the proxy attribute a
flow to an agent-provider budget key — by destination host, by bottle
identity, or both?
- **Parent-bottle budget semantics.** For `bottle extends` (PRD 0025 / 0065)
chains, does "parent bottle" mean the manifest parent, the launching bottle,
or the full ancestry summed?
- **Dashboard ↔ controller transport (even host-only).** In-process, a local
socket, or polling the SQLite store directly? Picks the seam the future remote
control plane will extend.
+2 -2
View File
@@ -115,8 +115,8 @@ class TestBottleIdentity(unittest.TestCase):
class TestPreserveMarker(_FakeHomeMixin, unittest.TestCase):
"""The .preserve marker tells cli.py's session-end cleanup to keep
the state dir instead of removing it."""
"""The .preserve marker is how capability_apply tells cli.py's
session-end cleanup to keep the state dir instead of removing it."""
def setUp(self):
self._setup_fake_home()
+61 -224
View File
@@ -1,8 +1,7 @@
"""Unit: cmd_start selector dispatch (PRD 0051, issue #269).
"""Unit: cmd_start selector dispatch (PRD 0051).
Tests that cmd_start calls filter_select only when the agent name is
absent, shows the bottle multiselect after agent selection, and skips
pickers when both are explicitly set.
absent, skips it when the agent is explicit, and returns 0 on cancel.
All actual launch work is stubbed so no container is created.
"""
@@ -11,7 +10,6 @@ from __future__ import annotations
import os
import unittest
from collections.abc import Mapping, Sequence
from unittest.mock import MagicMock, patch
import bot_bottle.cli.start as start_mod
@@ -19,16 +17,10 @@ import bot_bottle.cli.tui as tui_mod
from bot_bottle.backend import ActiveAgent
def _make_manifest(
agent_names: list[str],
bottle_names: list[str] | None = None,
agent_bottle: str = "",
):
def _make_manifest(agent_names: list[str]):
manifest = MagicMock()
manifest.agents = {name: MagicMock(bottle=agent_bottle) for name in agent_names}
manifest.agents = {name: MagicMock() for name in agent_names}
manifest.all_agent_names = sorted(agent_names)
manifest.all_bottle_names = sorted(bottle_names or [])
manifest.home_md = None # eager mode so _peek_agent_bottle uses agents dict
return manifest
@@ -36,27 +28,27 @@ class TestCmdStartSelector(unittest.TestCase):
"""Drive cmd_start with a minimal set of stubs."""
def setUp(self):
self._manifest = _make_manifest(["researcher", "implementer"], ["claude", "dev"])
# Stub Manifest.resolve so no on-disk manifest is needed.
self._manifest = _make_manifest(["researcher", "implementer"])
self._resolve_patch = patch(
"bot_bottle.cli.start.ManifestIndex.resolve",
return_value=self._manifest,
)
self._resolve_patch.start()
# Stub _launch_bottle so no real container work happens.
self._launch_patch = patch(
"bot_bottle.cli.start._launch_bottle",
return_value=0,
)
self._launch_mock = self._launch_patch.start()
# Stub filter_select (agent picker) and filter_multiselect (bottle picker).
self._agent_picker_patch = patch.object(tui_mod, "filter_select")
self._agent_picker_mock = self._agent_picker_patch.start()
self._bottle_picker_patch = patch.object(tui_mod, "filter_multiselect")
self._bottle_picker_mock = self._bottle_picker_patch.start()
self._bottle_picker_mock.return_value = ["claude"] # default: one bottle selected
# Stub filter_select to avoid opening /dev/tty.
self._tui_patch = patch.object(tui_mod, "filter_select")
self._tui_mock = self._tui_patch.start()
# Ensure BOT_BOTTLE_BACKEND is absent so omitted --backend
# flows through to the resolver default.
self._env_patch = patch.dict(os.environ, {}, clear=False)
self._env_patch.start()
os.environ.pop("BOT_BOTTLE_BACKEND", None)
@@ -64,108 +56,50 @@ class TestCmdStartSelector(unittest.TestCase):
def tearDown(self):
self._resolve_patch.stop()
self._launch_patch.stop()
self._agent_picker_patch.stop()
self._bottle_picker_patch.stop()
self._tui_patch.stop()
self._env_patch.stop()
# ------------------------------------------------------------------
# Agent explicit — agent picker skipped; bottle picker always shown
# Both explicit — no picker shown
# ------------------------------------------------------------------
def test_explicit_agent_skips_agent_picker(self):
def test_both_explicit_skips_picker(self):
self._tui_mock.return_value = "researcher"
rc = start_mod.cmd_start(["--backend=docker", "researcher"])
self.assertEqual(0, rc)
self._agent_picker_mock.assert_not_called()
self._bottle_picker_mock.assert_called_once()
self._tui_mock.assert_not_called()
self._launch_mock.assert_called_once()
def test_explicit_agent_bottle_picker_shows_available_bottles(self):
start_mod.cmd_start(["researcher"])
call_kwargs = self._bottle_picker_mock.call_args
self.assertEqual(["claude", "dev"], call_kwargs[0][0])
self.assertIn("bottle", call_kwargs[1]["title"].lower())
# ------------------------------------------------------------------
# Agent absent → agent picker fires; bottle picker always follows
# ------------------------------------------------------------------
def test_agent_absent_shows_agent_picker(self):
self._agent_picker_mock.return_value = "researcher"
rc = start_mod.cmd_start(["--backend=docker"])
self.assertEqual(0, rc)
self._agent_picker_mock.assert_called_once()
call_kwargs = self._agent_picker_mock.call_args
self.assertEqual(["implementer", "researcher"], call_kwargs[0][0])
self.assertIn("agent", call_kwargs[1]["title"].lower())
# Bottle picker must also fire after agent selection.
self._bottle_picker_mock.assert_called_once()
def test_agent_picker_cancel_skips_bottle_picker(self):
self._agent_picker_mock.return_value = None
rc = start_mod.cmd_start(["--backend=docker"])
self.assertEqual(0, rc)
self._bottle_picker_mock.assert_not_called()
self._launch_mock.assert_not_called()
def test_bottle_picker_cancel_returns_0(self):
self._bottle_picker_mock.return_value = None
rc = start_mod.cmd_start(["researcher"])
self.assertEqual(0, rc)
self._launch_mock.assert_not_called()
# ------------------------------------------------------------------
# Bottle selection is forwarded to BottleSpec
# ------------------------------------------------------------------
def test_selected_bottles_forwarded_to_spec(self):
self._bottle_picker_mock.return_value = ["claude", "dev"]
start_mod.cmd_start(["researcher"])
self._launch_mock.assert_called_once()
spec = self._launch_mock.call_args[0][0]
self.assertEqual(("claude", "dev"), spec.bottle_names)
def test_empty_bottle_selection_forwarded(self):
self._bottle_picker_mock.return_value = []
start_mod.cmd_start(["researcher"])
self._launch_mock.assert_called_once()
spec = self._launch_mock.call_args[0][0]
self.assertEqual((), spec.bottle_names)
# ------------------------------------------------------------------
# Agent default bottle pre-populates the picker
# ------------------------------------------------------------------
def test_agent_bottle_prepopulates_bottle_picker(self):
manifest = _make_manifest(
["implementer"], ["claude", "dev"], agent_bottle="claude"
)
with patch(
"bot_bottle.cli.start.ManifestIndex.resolve", return_value=manifest
):
start_mod.cmd_start(["implementer"])
call_kwargs = self._bottle_picker_mock.call_args
self.assertEqual(["claude"], call_kwargs[1]["initial"])
def test_no_agent_bottle_empty_initial(self):
manifest = _make_manifest(["researcher"], ["claude", "dev"], agent_bottle="")
with patch(
"bot_bottle.cli.start.ManifestIndex.resolve", return_value=manifest
):
start_mod.cmd_start(["researcher"])
call_kwargs = self._bottle_picker_mock.call_args
self.assertEqual([], call_kwargs[1]["initial"])
# ------------------------------------------------------------------
# Backend wiring
# ------------------------------------------------------------------
def test_explicit_backend_forwarded(self):
start_mod.cmd_start(["--backend=docker", "researcher"])
_, kwargs = self._launch_mock.call_args
self.assertEqual("docker", kwargs["backend_name"])
def test_absent_backend_uses_default(self):
start_mod.cmd_start(["researcher"])
# ------------------------------------------------------------------
# Agent absent → agent picker fires; backend explicit
# ------------------------------------------------------------------
def test_agent_absent_shows_agent_picker(self):
self._tui_mock.return_value = "researcher"
rc = start_mod.cmd_start(["--backend=docker"])
self.assertEqual(0, rc)
self._tui_mock.assert_called_once()
call_kwargs = self._tui_mock.call_args
self.assertEqual(["implementer", "researcher"], call_kwargs[0][0])
self.assertIn("agent", call_kwargs[1]["title"].lower())
def test_agent_picker_cancel_returns_0(self):
self._tui_mock.return_value = None
rc = start_mod.cmd_start(["--backend=docker"])
self.assertEqual(0, rc)
self._launch_mock.assert_not_called()
# ------------------------------------------------------------------
# Agent explicit, backend absent → no picker
# ------------------------------------------------------------------
def test_backend_absent_uses_default_without_picker(self):
rc = start_mod.cmd_start(["researcher"])
self.assertEqual(0, rc)
self._tui_mock.assert_not_called()
self._launch_mock.assert_called_once()
_, kwargs = self._launch_mock.call_args
self.assertIsNone(kwargs["backend_name"])
@@ -176,21 +110,28 @@ class TestCmdStartSelector(unittest.TestCase):
finally:
os.environ.pop("BOT_BOTTLE_BACKEND", None)
self.assertEqual(0, rc)
self._tui_mock.assert_not_called()
def test_both_absent_shows_agent_picker_then_bottle_picker(self):
self._agent_picker_mock.return_value = "researcher"
# ------------------------------------------------------------------
# Both absent → only agent picker
# ------------------------------------------------------------------
def test_both_absent_shows_only_agent_picker(self):
self._tui_mock.return_value = "researcher"
rc = start_mod.cmd_start([])
self.assertEqual(0, rc)
self._agent_picker_mock.assert_called_once()
self._bottle_picker_mock.assert_called_once()
self._tui_mock.assert_called_once()
title = self._tui_mock.call_args[1]["title"].lower()
self.assertIn("agent", title)
self._launch_mock.assert_called_once()
_, kwargs = self._launch_mock.call_args
self.assertIsNone(kwargs["backend_name"])
def test_both_absent_agent_cancel_skips_bottle_and_launch(self):
self._agent_picker_mock.return_value = None
def test_both_absent_agent_cancel_skips_backend_picker(self):
self._tui_mock.side_effect = [None]
rc = start_mod.cmd_start([])
self.assertEqual(0, rc)
self._agent_picker_mock.assert_called_once()
self._bottle_picker_mock.assert_not_called()
self.assertEqual(1, self._tui_mock.call_count)
self._launch_mock.assert_not_called()
@@ -208,13 +149,11 @@ class TestCmdStartLabelCollision(unittest.TestCase):
"""cmd_start re-prompts when the label's slug is already running."""
def setUp(self):
self._manifest = _make_manifest(["researcher"], ["claude"])
self._manifest = _make_manifest(["researcher"])
patch("bot_bottle.cli.start.ManifestIndex.resolve", return_value=self._manifest).start()
self._launch_mock = patch(
"bot_bottle.cli.start._launch_bottle", return_value=0,
).start()
# Stub the bottle picker to always return a selection.
patch.object(tui_mod, "filter_multiselect", return_value=["claude"]).start()
self.addCleanup(patch.stopall)
def test_no_collision_proceeds_without_reprompt(self):
@@ -254,107 +193,5 @@ class TestCmdStartLabelCollision(unittest.TestCase):
self.assertIn("already in use", second_call_kwargs.get("disclaimer", ""))
class TestBottleLineage(unittest.TestCase):
"""Unit tests for _bottle_lineage."""
def test_returns_empty_in_eager_mode(self):
manifest = _make_manifest(["agent"], ["base", "dev"])
# home_md is None in eager mode → no file reads, returns {}
result = start_mod._bottle_lineage(manifest)
self.assertEqual({}, result)
def test_reads_extends_chain_from_files(self):
import tempfile
from pathlib import Path
with tempfile.TemporaryDirectory() as tmp:
bottles_dir = Path(tmp) / "bottles"
bottles_dir.mkdir()
(bottles_dir / "base.md").write_text("---\n{}\n---\n")
(bottles_dir / "mid.md").write_text("---\nextends: base\n---\n")
(bottles_dir / "leaf.md").write_text("---\nextends: mid\n---\n")
manifest = MagicMock()
manifest.home_md = Path(tmp)
result = start_mod._bottle_lineage(manifest)
self.assertNotIn("base", result) # no parent → not in map
self.assertEqual("base -> mid", result["mid"])
self.assertEqual("base -> mid -> leaf", result["leaf"])
def test_cycle_protection(self):
import tempfile
from pathlib import Path
with tempfile.TemporaryDirectory() as tmp:
bottles_dir = Path(tmp) / "bottles"
bottles_dir.mkdir()
(bottles_dir / "a.md").write_text("---\nextends: b\n---\n")
(bottles_dir / "b.md").write_text("---\nextends: a\n---\n")
manifest = MagicMock()
manifest.home_md = Path(tmp)
result = start_mod._bottle_lineage(manifest)
# Cycle must not hang; each should get a two-element chain.
for name in ("a", "b"):
self.assertIn(name, result)
self.assertIn("->", result[name])
class TestManifestToYaml(unittest.TestCase):
"""Unit tests for _manifest_to_yaml."""
def _make_manifest_obj(
self,
*,
skills: Sequence[str] = (),
env: Mapping[str, str] | None = None,
supervise: bool = True,
agent_provider_template: str = "claude",
):
from bot_bottle.manifest import Manifest, ManifestBottle
from bot_bottle.manifest_agent import ManifestAgent, ManifestAgentProvider
agent = ManifestAgent(skills=tuple(skills))
bottle = ManifestBottle(
env=env or {},
supervise=supervise,
agent_provider=ManifestAgentProvider(template=agent_provider_template),
)
return Manifest(agent=agent, bottle=bottle)
def test_includes_agent_section(self):
m = self._make_manifest_obj(skills=["researcher"])
yaml = start_mod._manifest_to_yaml(m)
self.assertIn("agent:", yaml)
self.assertIn("- researcher", yaml)
def test_includes_bottle_section(self):
m = self._make_manifest_obj(env={"FOO": "bar"})
yaml = start_mod._manifest_to_yaml(m)
self.assertIn("bottle:", yaml)
self.assertIn("FOO: bar", yaml)
def test_supervise_rendered(self):
m_true = self._make_manifest_obj(supervise=True)
m_false = self._make_manifest_obj(supervise=False)
self.assertIn("supervise: true", start_mod._manifest_to_yaml(m_true))
self.assertIn("supervise: false", start_mod._manifest_to_yaml(m_false))
def test_non_claude_provider_shown(self):
m = self._make_manifest_obj(agent_provider_template="codex")
yaml = start_mod._manifest_to_yaml(m)
self.assertIn("agent_provider:", yaml)
self.assertIn("template: codex", yaml)
def test_default_claude_provider_omitted(self):
m = self._make_manifest_obj(agent_provider_template="claude")
yaml = start_mod._manifest_to_yaml(m)
self.assertNotIn("agent_provider:", yaml)
if __name__ == "__main__":
unittest.main()
+2 -2
View File
@@ -29,8 +29,8 @@ class _FakeHomeMixin:
class TestCaptureSessionState(_FakeHomeMixin, unittest.TestCase):
# capture_claude_session_state handles the preserve marker for
# non-zero agent exits.
# snapshot_transcript is commented out (capability_apply is disabled);
# capture_claude_session_state now only handles the preserve marker.
def setUp(self):
self._setup_fake_home()
+2 -128
View File
@@ -1,4 +1,4 @@
"""Unit tests for bot_bottle.cli.tui — filter_select and filter_multiselect.
"""Unit tests for bot_bottle.cli.tui — filter_select internals.
We test the pure-Python logic (_filter_items, cursor movement, confirm,
cancel) by exercising the internal helpers directly, without spinning up
@@ -8,15 +8,8 @@ a real curses session (which requires a TTY).
from __future__ import annotations
import unittest
from typing import Any, Optional
from bot_bottle.cli.tui import _filter_items, _multiselect_loop, filter_multiselect, filter_select
_KEY_SPACE = 32
_KEY_ENTER = 10
_KEY_ESC = 27
_KEY_CTRL_D = 4
from bot_bottle.cli.tui import _filter_items, filter_select
class TestFilterItems(unittest.TestCase):
@@ -53,124 +46,5 @@ class TestFilterSelectEmptyItems(unittest.TestCase):
self.assertIsNone(result)
class TestFilterMultiselectEmptyItems(unittest.TestCase):
def test_returns_empty_list_for_empty_items(self):
# No TTY needed — short-circuits before opening tty.
result = filter_multiselect([], title="Select", tty_path="/dev/null")
self.assertEqual([], result)
def test_returns_none_when_tty_unavailable(self):
result = filter_multiselect(["a", "b"], tty_path="/nonexistent/tty")
self.assertIsNone(result)
class TestMultiselectLoopReordering(unittest.TestCase):
"""Exercise _multiselect_loop key handling without a real curses terminal.
We drive the loop via a fake screen that feeds a pre-recorded key sequence
and records what was drawn we only need the return value, so the fake
screen's getch() raises StopIteration after the key list is exhausted, and
the loop is expected to return before that via Ctrl-D.
"""
def _run(self, keys: list[int], items: list[str], initial: list[str]) -> Optional[list[str]]:
"""Run _multiselect_loop with a synthetic screen feeding `keys`."""
key_iter = iter(keys)
class FakeScreen:
def erase(self) -> None: pass
def getmaxyx(self) -> tuple[int, int]: return (40, 80)
def refresh(self) -> None: pass
def getch(self) -> int: return next(key_iter)
def addstr(self, *a: Any) -> None: pass
def keypad(self, *a: Any) -> None: pass
return _multiselect_loop(FakeScreen(), items, title="", initial=initial) # type: ignore[arg-type]
def test_ctrl_d_confirms_initial_selection(self):
result = self._run([_KEY_CTRL_D], ["a", "b", "c"], ["a", "b"])
self.assertEqual(["a", "b"], result)
def test_esc_cancels(self):
result = self._run([_KEY_ESC], ["a", "b"], ["a"])
self.assertIsNone(result)
def test_tab_then_K_moves_item_up(self):
# Start: selected = ["a", "b", "c"]
# Tab → order mode (order_cursor=0 on "a")
# ↓ → order_cursor=1 (on "b")
# K → swap b and a → ["b", "a", "c"], order_cursor=0
# Ctrl-D → confirm
DOWN = ord("j")
result = self._run(
[ord("\t"), DOWN, ord("K"), _KEY_CTRL_D],
["a", "b", "c"],
["a", "b", "c"],
)
self.assertEqual(["b", "a", "c"], result)
def test_tab_then_J_moves_item_down(self):
# selected = ["a", "b", "c"], focus order, cursor=0
# J → swap a and b → ["b", "a", "c"], cursor=1
# Ctrl-D → confirm
result = self._run(
[ord("\t"), ord("J"), _KEY_CTRL_D],
["a", "b", "c"],
["a", "b", "c"],
)
self.assertEqual(["b", "a", "c"], result)
def test_K_at_top_is_no_op(self):
# cursor already at 0, K should not change order
result = self._run(
[ord("\t"), ord("K"), _KEY_CTRL_D],
["a", "b"],
["a", "b"],
)
self.assertEqual(["a", "b"], result)
def test_J_at_bottom_is_no_op(self):
DOWN = ord("j")
result = self._run(
[ord("\t"), DOWN, ord("J"), _KEY_CTRL_D],
["a", "b"],
["a", "b"],
)
self.assertEqual(["a", "b"], result)
def test_tab_back_to_filter_then_confirm(self):
# Tab → order, Tab → filter, Ctrl-D confirms unchanged
result = self._run(
[ord("\t"), ord("\t"), _KEY_CTRL_D],
["a", "b"],
["a", "b"],
)
self.assertEqual(["a", "b"], result)
def test_space_toggles_item_on(self):
# Space on an unselected item selects it; Ctrl-D confirms.
result = self._run([_KEY_SPACE, _KEY_CTRL_D], ["a", "b"], [])
self.assertEqual(["a"], result)
def test_space_toggles_item_off(self):
# Space on a selected item deselects it; Ctrl-D confirms empty.
result = self._run([_KEY_SPACE, _KEY_CTRL_D], ["a", "b"], ["a"])
self.assertEqual([], result)
def test_enter_confirms_without_toggle(self):
# Enter immediately confirms the current selection without toggling.
result = self._run([_KEY_ENTER], ["a", "b"], ["a"])
self.assertEqual(["a"], result)
def test_enter_confirms_empty_selection(self):
result = self._run([_KEY_ENTER], ["a", "b"], [])
self.assertEqual([], result)
def test_space_then_enter_confirms(self):
# Space selects "a", Enter confirms.
result = self._run([_KEY_SPACE, _KEY_ENTER], ["a", "b"], [])
self.assertEqual(["a"], result)
if __name__ == "__main__":
unittest.main()
+11 -3
View File
@@ -108,6 +108,7 @@ def _supervise_plan() -> SupervisePlan:
return SupervisePlan(
slug=SLUG,
queue_dir=STATE / "supervise" / "queue",
current_config_dir=STATE / "supervise" / "current-config",
internal_network=f"bot-bottle-net-{SLUG}",
)
@@ -270,11 +271,18 @@ class TestAgentAlwaysPresent(unittest.TestCase):
s = bottle_plan_to_compose(_plan(**kwargs))["services"]["agent"]
self.assertEqual(["sidecars"], s["depends_on"])
def test_agent_has_no_current_config_mount_with_supervise(self):
def test_agent_current_config_mount_only_with_supervise(self):
with_sv = bottle_plan_to_compose(_plan(supervise=True))["services"]["agent"]
self.assertNotIn("volumes", with_sv)
self.assertTrue(any(
v["target"] == "/etc/bot-bottle/current-config"
for v in with_sv.get("volumes", [])
))
without_sv = bottle_plan_to_compose(_plan(supervise=False))["services"]["agent"]
self.assertNotIn("volumes", without_sv)
# Either no volumes key at all, or no current-config target.
self.assertFalse(any(
v["target"] == "/etc/bot-bottle/current-config"
for v in without_sv.get("volumes", [])
))
class TestSidecarBundleShape(unittest.TestCase):
@@ -75,6 +75,7 @@ def _plan(
supervise_plan = SupervisePlan(
slug="demo-abc12",
queue_dir=Path("/tmp/queue"),
current_config_dir=Path("/tmp/current-config"),
)
return DockerBottlePlan(
spec=spec,
@@ -78,6 +78,7 @@ def _plan(
supervise_plan = SupervisePlan(
slug="demo-abc12",
queue_dir=Path("/tmp/queue"),
current_config_dir=Path("/tmp/current-config"),
)
return DockerBottlePlan(
spec=spec,
@@ -10,8 +10,6 @@ from unittest.mock import MagicMock, patch
from bot_bottle.contrib.gitea.deploy_key_provisioner import (
GiteaDeployKeyProvisioner,
_API_TIMEOUT_SECS,
_KEYGEN_TIMEOUT_SECS,
_split_owner_repo,
)
from bot_bottle.deploy_key_provisioner import DeployKeyCollisionError
@@ -85,25 +83,6 @@ class TestCreate(unittest.TestCase):
self.assertEqual(str(fake_key_id), key_id)
self.assertEqual(fake_private, private_bytes)
def test_create_passes_timeout_to_ssh_keygen_and_urlopen(self):
provisioner = _provisioner()
with patch(
"bot_bottle.contrib.gitea.deploy_key_provisioner.subprocess.run"
) as mock_run, patch(
"bot_bottle.contrib.gitea.deploy_key_provisioner.urllib.request.urlopen"
) as mock_urlopen, patch(
"bot_bottle.contrib.gitea.deploy_key_provisioner.Path.read_bytes",
return_value=b"PRIVATE",
), patch(
"bot_bottle.contrib.gitea.deploy_key_provisioner.Path.read_text",
return_value="ssh-ed25519 AAAA\n",
):
mock_urlopen.return_value = _urlopen_response({"id": 1})
provisioner.create("owner/repo", "title")
self.assertEqual(_KEYGEN_TIMEOUT_SECS, mock_run.call_args.kwargs.get("timeout"))
self.assertEqual(_API_TIMEOUT_SECS, mock_urlopen.call_args.kwargs.get("timeout"))
def test_create_raises_on_http_error(self):
provisioner = _provisioner()
with patch(
@@ -160,16 +139,6 @@ class TestDelete(unittest.TestCase):
self.assertIn("/api/v1/repos/didericis/bot-bottle/keys/99", req.full_url)
self.assertEqual("DELETE", req.get_method())
def test_delete_passes_timeout_to_urlopen(self):
provisioner = _provisioner()
with patch(
"bot_bottle.contrib.gitea.deploy_key_provisioner.urllib.request.urlopen"
) as mock_urlopen:
mock_urlopen.return_value = _urlopen_response({})
provisioner.delete("owner/repo", "7")
self.assertEqual(_API_TIMEOUT_SECS, mock_urlopen.call_args.kwargs.get("timeout"))
def test_delete_tolerates_404(self):
provisioner = _provisioner()
with patch(
+2 -2
View File
@@ -65,8 +65,8 @@ class TestOrphanStateDirs(_FakeHomeMixin, unittest.TestCase):
)
def test_preserve_marker_skips_dir(self):
# Preserve marker means the user explicitly wanted this dir
# kept for `resume`.
# Preserve marker = capability-block or crash auto-preserve;
# the user explicitly wanted this dir kept for `resume`.
bottle_state.write_per_bottle_dockerfile("kept-ccc", "FROM x\n")
bottle_state.mark_preserved("kept-ccc")
self.assertEqual(
-71
View File
@@ -10,7 +10,6 @@ from bot_bottle.egress import (
Egress,
EgressPlan,
EgressRoute,
_yaml_str_escape,
egress_agent_env_entries,
egress_manifest_routes,
egress_render_routes,
@@ -420,76 +419,6 @@ class TestRenderRoutes(unittest.TestCase):
self.assertEqual(LOG_BLOCKS, cfg.log)
class TestYamlStrEscape(unittest.TestCase):
"""_yaml_str_escape produces safe YAML double-quoted scalar content."""
def test_plain_string_unchanged(self):
self.assertEqual("api.example.com", _yaml_str_escape("api.example.com"))
def test_double_quote_escaped(self):
self.assertEqual('\\"', _yaml_str_escape('"'))
def test_backslash_escaped(self):
self.assertEqual("\\\\", _yaml_str_escape("\\"))
def test_newline_escaped(self):
self.assertEqual("\\n", _yaml_str_escape("\n"))
def test_carriage_return_escaped(self):
self.assertEqual("\\r", _yaml_str_escape("\r"))
def test_tab_escaped(self):
self.assertEqual("\\t", _yaml_str_escape("\t"))
def test_combined(self):
self.assertEqual('\\"\\n\\\\', _yaml_str_escape('"\n\\'))
class TestRenderRoutesEscaping(unittest.TestCase):
"""Stray quotes/newlines in manifest strings do not corrupt routes.yaml."""
@staticmethod
def _parsed(routes) -> list[dict]: # type: ignore
return parse_yaml_subset(egress_render_routes(routes))["routes"] # type: ignore
def test_host_with_double_quote_round_trips(self):
routes = (EgressRoute(host='bad"host.example'),)
parsed = self._parsed(routes)
self.assertEqual('bad"host.example', parsed[0]["host"])
def test_host_with_newline_round_trips(self):
routes = (EgressRoute(host="host\nextra.example"),)
parsed = self._parsed(routes)
self.assertEqual("host\nextra.example", parsed[0]["host"])
def test_auth_scheme_with_double_quote_round_trips(self):
routes = (EgressRoute(
host="api.example",
auth_scheme='Bear"er',
token_env="EGRESS_TOKEN_0",
),)
parsed = self._parsed(routes)
self.assertEqual('Bear"er', parsed[0]["auth_scheme"])
def test_path_value_with_double_quote_round_trips(self):
from bot_bottle.egress_addon_core import PathMatch, MatchEntry
routes = (EgressRoute(
host="api.example",
matches=(MatchEntry(paths=(PathMatch(type="prefix", value='/v1/"quoted"/'),)),),
),)
parsed = self._parsed(routes)
self.assertEqual('/v1/"quoted"/', parsed[0]["matches"][0]["paths"][0]["value"])
def test_header_value_with_double_quote_round_trips(self):
from bot_bottle.egress_addon_core import HeaderMatch, MatchEntry
routes = (EgressRoute(
host="api.example",
matches=(MatchEntry(headers=(HeaderMatch(name="x-h", value='val"ue'),)),),
),)
parsed = self._parsed(routes)
self.assertEqual('val"ue', parsed[0]["matches"][0]["headers"][0]["value"])
class TestResolveTokenValues(unittest.TestCase):
def test_reads_host_env(self):
out = egress_resolve_token_values(
-65
View File
@@ -4,7 +4,6 @@ import os
import tempfile
import unittest
from pathlib import Path
from unittest.mock import patch
from bot_bottle.git_gate import (
GitGate,
@@ -14,8 +13,6 @@ from bot_bottle.git_gate import (
git_gate_render_access_hook,
git_gate_render_entrypoint,
git_gate_render_hook,
revoke_git_gate_provisioned_keys,
_resolve_identity_file,
git_gate_upstreams_for_bottle,
)
from bot_bottle.manifest import ManifestIndex
@@ -331,68 +328,6 @@ class TestPrepare(unittest.TestCase):
self.assertIn("exec git daemon", content)
class TestDynamicKeyProvisioning(unittest.TestCase):
def setUp(self):
self.stage = Path(tempfile.mkdtemp())
def tearDown(self):
import shutil
shutil.rmtree(self.stage, ignore_errors=True)
def _gitea_manifest(self):
return ManifestIndex.from_json_obj({
"bottles": {
"dev": {
"git-gate": {
"repos": {
"repo": {
"url": "ssh://git@gitea.example.com/org/repo.git",
"key": {
"provider": "gitea",
"forge_token_env": "GITEA_TOKEN",
},
"host_key": "ssh-ed25519 AAAA...",
},
},
}
}
},
"agents": {"demo": {"skills": [], "prompt": "", "bottle": "dev"}},
})
def test_resolve_identity_file_static_uses_entry_path(self):
entry = fixture_with_git().bottles["dev"].git[0]
self.assertEqual(entry.IdentityFile, _resolve_identity_file(entry, "demo", self.stage))
def test_resolve_identity_file_gitea_provisions_key(self):
entry = self._gitea_manifest().bottles["dev"].git[0]
with patch("bot_bottle.git_gate._provision_dynamic_key", return_value="/tmp/provisioned-key") as mock_provision:
self.assertEqual("/tmp/provisioned-key", _resolve_identity_file(entry, "demo", self.stage))
mock_provision.assert_called_once()
def test_revoke_skips_non_gitea_and_missing_id_file(self):
revoke_git_gate_provisioned_keys(fixture_with_git().bottles["dev"], self.stage)
def test_revoke_calls_delete_for_gitea_entry(self):
bottle = self._gitea_manifest().bottles["dev"]
(self.stage / "repo-deploy-key-id").write_text("123\n")
with patch.dict("os.environ", {"GITEA_TOKEN": "token"}), patch(
"bot_bottle.deploy_key_provisioner.get_provisioner"
) as mock_get_provisioner:
provisioner = mock_get_provisioner.return_value
revoke_git_gate_provisioned_keys(bottle, self.stage)
mock_get_provisioner.assert_called_once()
provisioner.delete.assert_called_once_with("org/repo", "123")
def test_revoke_missing_token_raises(self):
bottle = self._gitea_manifest().bottles["dev"]
(self.stage / "repo-deploy-key-id").write_text("123\n")
with patch.dict("os.environ", {}, clear=True), self.assertRaises(RuntimeError) as cm:
revoke_git_gate_provisioned_keys(bottle, self.stage)
self.assertIn("env var is not set", str(cm.exception))
class TestShellEscaping(unittest.TestCase):
"""Regression tests: all three render functions must produce syntactically
valid sh code even when names and upstream URLs contain shell-special
-107
View File
@@ -9,7 +9,6 @@ import urllib.request
from pathlib import Path
from unittest import mock
from bot_bottle.git_gate import GIT_GATE_TIMEOUT_SECS
from bot_bottle.git_http_backend import GitHttpHandler, MAX_BODY_BYTES
@@ -151,61 +150,6 @@ class TestGitHttpBackend(unittest.TestCase):
)
self.assertEqual("git/test", env["HTTP_USER_AGENT"])
def test_subprocess_calls_include_timeout(self):
"""Both subprocess.run calls (access-hook and git http-backend) must
pass timeout= so a hung upstream cannot wedge the sidecar."""
from http.server import ThreadingHTTPServer
with tempfile.TemporaryDirectory() as tmp:
root = Path(tmp)
(root / "repo.git").mkdir()
old_root = os.environ.get("GIT_PROJECT_ROOT")
os.environ["GIT_PROJECT_ROOT"] = str(root)
self.addCleanup(self._restore_env, old_root)
old_hook = os.environ.get("GIT_GATE_ACCESS_HOOK")
hook = root / "access-hook"
hook.write_text("#!/bin/sh\nexit 0\n")
hook.chmod(0o700)
os.environ["GIT_GATE_ACCESS_HOOK"] = str(hook)
self.addCleanup(self._restore_hook, old_hook)
server = ThreadingHTTPServer(("127.0.0.1", 0), GitHttpHandler)
thread = threading.Thread(target=server.serve_forever, daemon=True)
thread.start()
self.addCleanup(server.shutdown)
self.addCleanup(server.server_close)
backend_response = (
b"Status: 200 OK\r\n"
b"Content-Type: application/x-git-upload-pack-result\r\n"
b"\r\n"
b"0000"
)
calls = [
subprocess.CompletedProcess(["hook"], 0, b"", b""),
subprocess.CompletedProcess(["git"], 0, backend_response, b""),
]
with mock.patch(
"bot_bottle.git_http_backend.subprocess.run",
side_effect=calls,
) as run:
req = urllib.request.Request(
f"http://127.0.0.1:{server.server_port}"
"/repo.git/git-upload-pack",
data=b"",
method="POST",
)
with urllib.request.urlopen(req, timeout=5):
pass
for call in run.call_args_list:
self.assertEqual(
GIT_GATE_TIMEOUT_SECS,
call.kwargs.get("timeout"),
f"subprocess.run call missing timeout: {call}",
)
def test_access_hook_denial_is_logged_to_stdout(self):
"""When the access-hook exits non-zero we still return 403 to the
client, but the hook's stderr must also appear on the handler's
@@ -312,57 +256,6 @@ class TestGitHttpBackend(unittest.TestCase):
os.environ["GIT_GATE_ACCESS_HOOK"] = value
class TestMalformedStatusHeader(unittest.TestCase):
"""Malformed CGI Status: headers must not propagate as unhandled exceptions;
the handler should fall back to HTTP 500."""
def setUp(self):
from http.server import ThreadingHTTPServer
import tempfile
self._tmp = tempfile.mkdtemp()
os.environ["GIT_PROJECT_ROOT"] = self._tmp
self._server = ThreadingHTTPServer(("127.0.0.1", 0), GitHttpHandler)
self._thread = threading.Thread(
target=self._server.serve_forever, daemon=True,
)
self._thread.start()
self._port = self._server.server_port
def tearDown(self):
self._server.shutdown()
self._server.server_close()
os.environ.pop("GIT_PROJECT_ROOT", None)
import shutil
shutil.rmtree(self._tmp, ignore_errors=True)
def _get_with_backend_response(self, cgi_response: bytes) -> int:
with mock.patch(
"bot_bottle.git_http_backend.subprocess.run",
return_value=mock.Mock(returncode=0, stdout=cgi_response),
):
req = urllib.request.Request(
f"http://127.0.0.1:{self._port}/repo.git/info/refs",
method="GET",
)
try:
with urllib.request.urlopen(req, timeout=3) as resp:
return resp.status
except urllib.error.HTTPError as e: # type: ignore
return e.code
def test_empty_status_value_returns_500(self):
status = self._get_with_backend_response(
b"Status: \r\nContent-Type: text/plain\r\n\r\n"
)
self.assertEqual(500, status)
def test_non_numeric_status_returns_500(self):
status = self._get_with_backend_response(
b"Status: bad\r\nContent-Type: text/plain\r\n\r\n"
)
self.assertEqual(500, status)
class TestContentLengthBounds(unittest.TestCase):
"""PRD 0041: malformed or oversized Content-Length is rejected before
git http-backend is invoked."""
-200
View File
@@ -1,200 +0,0 @@
"""Unit: runtime bottle composition (issue #269).
Tests for merge_bottles_runtime and ManifestIndex.load_for_agent with
the new bottle_names parameter.
"""
from __future__ import annotations
import os
import shutil
import tempfile
import textwrap
import unittest
from pathlib import Path
from bot_bottle.manifest import ManifestBottle, ManifestError, ManifestIndex
from bot_bottle.manifest_extends import merge_bottles_runtime
def _index(bottles: dict[str, object], agents: dict[str, object]) -> ManifestIndex:
return ManifestIndex.from_json_obj({"bottles": bottles, "agents": agents})
def _bottle(**kwargs: object) -> ManifestBottle:
return ManifestBottle.from_dict("test", kwargs)
class TestMergeBottlesRuntime(unittest.TestCase):
def test_single_bottle_returns_as_is(self):
b = _bottle(env={"FOO": "1"})
result = merge_bottles_runtime([b])
self.assertEqual({"FOO": "1"}, dict(result.env))
def test_env_later_wins(self):
base = _bottle(env={"FOO": "base", "ONLY_BASE": "x"})
override = _bottle(env={"FOO": "override", "ONLY_OVERRIDE": "y"})
result = merge_bottles_runtime([base, override])
self.assertEqual("override", result.env["FOO"])
self.assertEqual("x", result.env["ONLY_BASE"])
self.assertEqual("y", result.env["ONLY_OVERRIDE"])
def test_egress_routes_concatenated(self):
from bot_bottle.manifest_egress import ManifestEgressConfig, ManifestEgressRoute
r1 = ManifestEgressRoute(Host="api.a.com")
r2 = ManifestEgressRoute(Host="api.b.com")
base = ManifestBottle(egress=ManifestEgressConfig(routes=(r1,)))
override = ManifestBottle(egress=ManifestEgressConfig(routes=(r2,)))
result = merge_bottles_runtime([base, override])
hosts = [r.Host for r in result.egress.routes]
self.assertIn("api.a.com", hosts)
self.assertIn("api.b.com", hosts)
def test_supervise_later_wins(self):
base = _bottle(supervise=True)
override = _bottle(supervise=False)
result = merge_bottles_runtime([base, override])
self.assertFalse(result.supervise)
def test_three_bottles_merged_left_to_right(self):
b1 = _bottle(env={"A": "1", "B": "1", "C": "1"})
b2 = _bottle(env={"B": "2", "C": "2"})
b3 = _bottle(env={"C": "3"})
result = merge_bottles_runtime([b1, b2, b3])
self.assertEqual("1", result.env["A"])
self.assertEqual("2", result.env["B"])
self.assertEqual("3", result.env["C"])
def test_empty_list_raises(self):
with self.assertRaises(ValueError):
merge_bottles_runtime([])
class TestLoadForAgentWithBottleNames(unittest.TestCase):
def test_bottle_names_override_agent_bottle(self):
idx = _index(
bottles={
"base": {"env": {"X": "base"}},
"override": {"env": {"X": "override"}},
},
agents={"impl": {"bottle": "base", "skills": [], "prompt": ""}},
)
m = idx.load_for_agent("impl", ("override",))
self.assertEqual("override", m.bottle.env["X"])
def test_bottle_names_merged_in_order(self):
idx = _index(
bottles={
"a": {"env": {"X": "a", "A": "only-a"}},
"b": {"env": {"X": "b", "B": "only-b"}},
},
agents={"impl": {"bottle": "a", "skills": [], "prompt": ""}},
)
m = idx.load_for_agent("impl", ("a", "b"))
self.assertEqual("b", m.bottle.env["X"])
self.assertEqual("only-a", m.bottle.env["A"])
self.assertEqual("only-b", m.bottle.env["B"])
def test_empty_bottle_names_uses_agent_bottle(self):
idx = _index(
bottles={"base": {"env": {"X": "base"}}},
agents={"impl": {"bottle": "base", "skills": [], "prompt": ""}},
)
m = idx.load_for_agent("impl", ())
self.assertEqual("base", m.bottle.env["X"])
def test_no_bottle_and_no_bottle_names_raises(self):
idx = _index(
bottles={"base": {}},
agents={"impl": {"skills": [], "prompt": ""}},
)
with self.assertRaises(ManifestError) as ctx:
idx.load_for_agent("impl", ())
self.assertIn("no 'bottle' field", str(ctx.exception))
def test_unknown_bottle_name_raises(self):
idx = _index(
bottles={"base": {}},
agents={"impl": {"bottle": "base", "skills": [], "prompt": ""}},
)
with self.assertRaises(ManifestError) as ctx:
idx.load_for_agent("impl", ("nonexistent",))
self.assertIn("nonexistent", str(ctx.exception))
def test_agent_without_bottle_works_with_bottle_names(self):
idx = _index(
bottles={"base": {"env": {"X": "base"}}},
agents={"impl": {"skills": [], "prompt": ""}},
)
m = idx.load_for_agent("impl", ("base",))
self.assertEqual("base", m.bottle.env["X"])
class TestAllBottleNames(unittest.TestCase):
def test_eager_mode_returns_bottle_names(self):
idx = _index(
bottles={"alpha": {}, "beta": {}, "gamma": {}},
agents={"impl": {"bottle": "alpha", "skills": [], "prompt": ""}},
)
self.assertEqual(["alpha", "beta", "gamma"], idx.all_bottle_names)
def test_lazy_mode_scans_files(self):
home = Path(tempfile.mkdtemp(prefix="cb-home-"))
orig_home = os.environ.get("HOME")
os.environ["HOME"] = str(home)
try:
bottles_dir = home / ".bot-bottle" / "bottles"
agents_dir = home / ".bot-bottle" / "agents"
bottles_dir.mkdir(parents=True)
agents_dir.mkdir(parents=True)
(bottles_dir / "claude.md").write_text("---\n---\n")
(bottles_dir / "dev.md").write_text("---\n---\n")
(agents_dir / "impl.md").write_text("---\nbottle: claude\n---\n")
idx = ManifestIndex.resolve(str(home))
self.assertEqual(["claude", "dev"], idx.all_bottle_names)
finally:
if orig_home is None:
os.environ.pop("HOME", None)
else:
os.environ["HOME"] = orig_home
shutil.rmtree(home, ignore_errors=True)
class TestAgentOptionalBottleMd(unittest.TestCase):
"""Agent file without bottle: works when bottle_names are provided at launch."""
def setUp(self) -> None:
self.home = Path(tempfile.mkdtemp(prefix="cb-home-"))
self._orig_home = os.environ.get("HOME")
os.environ["HOME"] = str(self.home)
def tearDown(self) -> None:
if self._orig_home is None:
os.environ.pop("HOME", None)
else:
os.environ["HOME"] = self._orig_home
shutil.rmtree(self.home, ignore_errors=True)
def _write(self, rel: str, text: str) -> None:
p = self.home / ".bot-bottle" / rel
p.parent.mkdir(parents=True, exist_ok=True)
p.write_text(textwrap.dedent(text).lstrip("\n"))
def test_agent_without_bottle_resolves_with_bottle_names(self):
self._write("bottles/dev.md", "---\nenv:\n X: dev\n---\n")
self._write("agents/impl.md", "---\n---\nimpl agent.\n")
idx = ManifestIndex.resolve(str(self.home))
m = idx.load_for_agent("impl", ("dev",))
self.assertEqual("dev", m.bottle.env["X"])
def test_agent_without_bottle_fails_without_bottle_names(self):
self._write("bottles/dev.md", "---\n---\n")
self._write("agents/impl.md", "---\n---\nimpl agent.\n")
idx = ManifestIndex.resolve(str(self.home))
with self.assertRaises(ManifestError) as ctx:
idx.load_for_agent("impl", ())
self.assertIn("no 'bottle' field", str(ctx.exception))
if __name__ == "__main__":
unittest.main()
+3 -176
View File
@@ -423,182 +423,9 @@ class TestExtendsErrors(unittest.TestCase):
)
self.assertIn("extends cycle", msg)
def test_non_string_non_list_extends_dies(self):
msg = _error_message(_build, child={"extends": 123})
self.assertIn("extends must be a string or list of strings", msg)
def test_list_entry_non_string_dies(self):
msg = _error_message(_build, child={"extends": [123]})
self.assertIn("extends[0] must be a string", msg)
class TestExtendsMultiParent(unittest.TestCase):
"""extends: [p1, p2, ...] — multi-parent composition (issue #268)."""
_GIT_A = {"url": "ssh://git@host-a/a.git", "key": {"provider": "static", "path": "/k"}}
_GIT_B = {"url": "ssh://git@host-b/b.git", "key": {"provider": "static", "path": "/k"}}
def test_single_element_list_same_as_string(self):
m = _build(
base={"env": {"X": "1"}},
child={"extends": ["base"]},
)
self.assertEqual({"X": "1"}, dict(m.bottles["child"].env))
def test_two_parents_env_union(self):
m = _build(
p1={"env": {"A": "1"}},
p2={"env": {"B": "2"}},
child={"extends": ["p1", "p2"]},
)
self.assertEqual({"A": "1", "B": "2"}, dict(m.bottles["child"].env))
def test_two_parents_env_last_wins_on_collision(self):
m = _build(
p1={"env": {"X": "from-p1"}},
p2={"env": {"X": "from-p2"}},
child={"extends": ["p1", "p2"]},
)
self.assertEqual("from-p2", m.bottles["child"].env["X"])
def test_child_wins_over_all_parents(self):
m = _build(
p1={"env": {"X": "from-p1"}},
p2={"env": {"X": "from-p2"}},
child={"extends": ["p1", "p2"], "env": {"X": "from-child"}},
)
self.assertEqual("from-child", m.bottles["child"].env["X"])
def test_two_parents_supervise_last_wins(self):
m = _build(
p1={"supervise": False},
p2={"supervise": True},
child={"extends": ["p1", "p2"]},
)
self.assertTrue(m.bottles["child"].supervise)
def test_child_supervise_overrides_all_parents(self):
m = _build(
p1={"supervise": True},
p2={"supervise": True},
child={"extends": ["p1", "p2"], "supervise": False},
)
self.assertFalse(m.bottles["child"].supervise)
def test_two_parents_egress_routes_concatenated(self):
m = _build(
p1={"egress": {"routes": [{"host": "a.example.com"}]}},
p2={"egress": {"routes": [{"host": "b.example.com"}]}},
child={"extends": ["p1", "p2"]},
)
hosts = [r.Host for r in m.bottles["child"].egress.routes]
self.assertEqual(["a.example.com", "b.example.com"], hosts)
def test_child_egress_appends_after_combined_parents(self):
m = _build(
p1={"egress": {"routes": [{"host": "a.example.com"}]}},
p2={"egress": {"routes": [{"host": "b.example.com"}]}},
child={
"extends": ["p1", "p2"],
"egress": {"routes": [{"host": "c.example.com"}]},
},
)
hosts = [r.Host for r in m.bottles["child"].egress.routes]
self.assertEqual(["a.example.com", "b.example.com", "c.example.com"], hosts)
def test_two_parents_git_repos_union(self):
m = _build(
p1={"git-gate": {"repos": {"a": self._GIT_A}}},
p2={"git-gate": {"repos": {"b": self._GIT_B}}},
child={"extends": ["p1", "p2"]},
)
names = {e.Name for e in m.bottles["child"].git}
self.assertEqual({"a", "b"}, names)
def test_two_parents_git_same_name_later_wins_per_field(self):
# Both parents declare the same repo name. p2's `key` wins; p1's
# `host_key` is preserved because p2 doesn't override it.
p1_entry = {
"url": "ssh://git@host-a/repo.git",
"host_key": "ecdsa AAAA",
"key": {"provider": "static", "path": "/k1"},
}
p2_entry = {
"url": "ssh://git@host-a/repo.git", # required, same url
"key": {"provider": "gitea", "forge_token_env": "TOK"},
}
m = _build(
p1={"git-gate": {"repos": {"repo": p1_entry}}},
p2={"git-gate": {"repos": {"repo": p2_entry}}},
child={"extends": ["p1", "p2"]},
)
entries = m.bottles["child"].git
self.assertEqual(1, len(entries))
e = entries[0]
self.assertEqual("ssh://git@host-a/repo.git", e.Upstream)
self.assertEqual("ecdsa AAAA", e.KnownHostKey)
self.assertEqual("gitea", e.Key.provider)
def test_p1_repos_preserved_when_p2_has_none(self):
m = _build(
p1={"git-gate": {"repos": {"a": self._GIT_A}}},
p2={"env": {"X": "1"}},
child={"extends": ["p1", "p2"]},
)
names = [e.Name for e in m.bottles["child"].git]
self.assertEqual(["a"], names)
def test_diamond_shared_ancestor_resolved_once(self):
# a <- b, a <- c; child extends [b, c]
# `a` must be resolved once and cached.
m = _build(
a={"env": {"FROM_A": "1"}, "supervise": False},
b={"extends": "a", "env": {"FROM_B": "1"}},
c={"extends": "a", "env": {"FROM_C": "1"}},
child={"extends": ["b", "c"]},
)
child = m.bottles["child"]
self.assertEqual("1", child.env["FROM_A"])
self.assertEqual("1", child.env["FROM_B"])
self.assertEqual("1", child.env["FROM_C"])
# supervise=False from `a` threads through both b and c; c is the
# later parent so its effective supervise (False) wins.
self.assertFalse(child.supervise)
def test_three_parents_env_fold_order(self):
m = _build(
p1={"env": {"X": "1", "A": "a"}},
p2={"env": {"X": "2", "B": "b"}},
p3={"env": {"X": "3", "C": "c"}},
child={"extends": ["p1", "p2", "p3"]},
)
env = dict(m.bottles["child"].env)
self.assertEqual("3", env["X"])
self.assertEqual("a", env["A"])
self.assertEqual("b", env["B"])
self.assertEqual("c", env["C"])
def test_undefined_bottle_in_list_dies(self):
msg = _error_message(
_build,
base={"env": {}},
child={"extends": ["base", "ghost"]},
)
self.assertIn("extends 'ghost'", msg)
self.assertIn("not defined", msg)
def test_self_reference_in_list_dies(self):
msg = _error_message(_build, child={"extends": ["child"]})
self.assertIn("extends itself", msg)
def test_cycle_through_multi_parent_edge_dies(self):
msg = _error_message(
_build,
a={"extends": ["b", "c"]},
b={},
c={"extends": "a"},
)
self.assertIn("extends cycle", msg)
def test_non_string_extends_dies(self):
msg = _error_message(_build, child={"extends": ["base"]})
self.assertIn("extends must be a string", msg)
class TestExtendsAvailableInBottleKeys(unittest.TestCase):
-38
View File
@@ -8,7 +8,6 @@ import unittest
from bot_bottle.git_gate import (
GIT_GATE_HOSTNAME,
_gitconfig_validate_value,
git_gate_render_gitconfig,
)
from bot_bottle.manifest import ManifestIndex
@@ -91,42 +90,5 @@ class TestGitGateGitconfigRender(unittest.TestCase):
self.assertNotIn("gitea.dideric.is", out)
class TestGitconfigValidateValue(unittest.TestCase):
"""_gitconfig_validate_value rejects values that would inject gitconfig keys."""
def test_normal_url_passes(self):
_gitconfig_validate_value("url", "ssh://git@github.com/owner/repo.git")
def test_newline_in_url_raises(self):
with self.assertRaises(ValueError):
_gitconfig_validate_value("url", "ssh://git@github.com/owner/\nrepo.git")
def test_carriage_return_in_url_raises(self):
with self.assertRaises(ValueError):
_gitconfig_validate_value("url", "ssh://git@github.com/\rrepo.git")
def test_error_message_names_field(self):
with self.assertRaises(ValueError, msg="error should name the field") as ctx:
_gitconfig_validate_value("repos['bad'].url", "ssh://host/\npath")
self.assertIn("repos['bad'].url", str(ctx.exception))
class TestGitconfigRenderRejectsNewlineInUpstream(unittest.TestCase):
"""git_gate_render_gitconfig raises on Upstream values with newlines."""
def test_newline_in_upstream_raises(self):
m = ManifestIndex.from_json_obj({
"bottles": {"dev": {"git-gate": {"repos": {
"evil": {
"url": "ssh://git@github.com/owner/\nfake-key = injected\nrepo.git",
"key": {"provider": "static", "path": "/dev/null"},
},
}}}},
"agents": {"demo": {"skills": [], "prompt": "", "bottle": "dev"}},
})
with self.assertRaises(ValueError):
git_gate_render_gitconfig(m.bottles["dev"].git, GIT_GATE_HOSTNAME)
if __name__ == "__main__":
unittest.main()
@@ -130,6 +130,7 @@ def _plan(
supervise_plan = SupervisePlan(
slug="demo-abc12",
queue_dir=Path("/tmp/queue"),
current_config_dir=Path("/tmp/current-config"),
)
return SmolmachinesBottlePlan(
spec=spec,
+18 -13
View File
@@ -16,7 +16,7 @@ from bot_bottle.supervise import (
STATUS_APPROVED,
STATUS_MODIFIED,
STATUS_REJECTED,
TOOL_EGRESS_ALLOW,
TOOL_CAPABILITY_BLOCK,
TOOL_GITLEAKS_ALLOW,
archive_proposal,
audit_log_path,
@@ -37,9 +37,9 @@ FIXED_TS = datetime(2026, 5, 25, 12, 0, 0, tzinfo=timezone.utc)
def _proposal(
tool: str = TOOL_EGRESS_ALLOW,
proposed: str = "routes:\n - host: example.com\n",
justification: str = "need egress",
tool: str = TOOL_CAPABILITY_BLOCK,
proposed: str = "FROM python:3.13\n",
justification: str = "need a capability",
) -> Proposal:
return Proposal.new(
bottle_slug="dev",
@@ -57,7 +57,7 @@ class TestProposalRoundtrip(unittest.TestCase):
self.assertTrue(p.id)
self.assertEqual("2026-05-25T12:00:00+00:00", p.arrival_timestamp)
self.assertEqual("dev", p.bottle_slug)
self.assertEqual(TOOL_EGRESS_ALLOW, p.tool)
self.assertEqual(TOOL_CAPABILITY_BLOCK, p.tool)
def test_to_from_dict_roundtrip(self):
p = _proposal()
@@ -142,14 +142,14 @@ class TestQueueIO(unittest.TestCase):
def test_list_pending_sorted_by_arrival(self):
# Fabricate two with explicit timestamps.
a = Proposal.new(
bottle_slug="dev", tool=TOOL_EGRESS_ALLOW,
proposed_file="routes:\n - host: early.example.com\n", justification="early",
bottle_slug="dev", tool=TOOL_CAPABILITY_BLOCK,
proposed_file="FROM python:3.13\n", justification="early",
current_file_hash="x",
now=datetime(2026, 5, 25, 10, 0, 0, tzinfo=timezone.utc),
)
b = Proposal.new(
bottle_slug="dev", tool=TOOL_EGRESS_ALLOW,
proposed_file="routes:\n - host: late.example.com\n", justification="late",
bottle_slug="dev", tool=TOOL_CAPABILITY_BLOCK,
proposed_file="FROM python:3.13\n", justification="late",
current_file_hash="x",
now=datetime(2026, 5, 25, 14, 0, 0, tzinfo=timezone.utc),
)
@@ -319,6 +319,7 @@ class TestToolConstants(unittest.TestCase):
self.assertEqual(
(
supervise.TOOL_EGRESS_ALLOW,
TOOL_CAPABILITY_BLOCK,
supervise.TOOL_EGRESS_BLOCK,
TOOL_GITLEAKS_ALLOW,
supervise.TOOL_EGRESS_TOKEN_ALLOW,
@@ -377,16 +378,20 @@ class TestSupervisePrepare(unittest.TestCase):
supervise.bot_bottle_root = fake_root # type: ignore[assignment]
return lambda: setattr(supervise, "bot_bottle_root", original)
def test_prepare_creates_queue(self):
def test_prepare_creates_queue_and_current_config(self):
plan = _StubSupervise().prepare("dev", self.stage_dir)
self.assertTrue(plan.queue_dir.is_dir())
self.assertTrue(plan.current_config_dir.is_dir())
self.assertEqual("dev", plan.slug)
self.assertEqual("", plan.internal_network)
def test_prepare_does_not_create_current_config_dir(self):
def test_prepare_writes_no_files_to_current_config(self):
# dockerfile_content is no longer accepted by prepare.
# routes.yaml + allowlist live behind the
# `list-egress-routes` MCP tool (PRD 0017 chunk 3).
plan = _StubSupervise().prepare("dev", self.stage_dir)
self.assertFalse((self.stage_dir / "current-config").exists())
self.assertFalse(hasattr(plan, "current_config_dir"))
files = sorted(p.name for p in plan.current_config_dir.iterdir())
self.assertEqual([], files)
if __name__ == "__main__":
+30 -24
View File
@@ -18,7 +18,7 @@ from bot_bottle.supervise import (
STATUS_APPROVED,
STATUS_MODIFIED,
STATUS_REJECTED,
TOOL_EGRESS_ALLOW,
TOOL_CAPABILITY_BLOCK,
TOOL_GITLEAKS_ALLOW,
TOOL_EGRESS_TOKEN_ALLOW,
read_audit_entries,
@@ -30,8 +30,9 @@ from bot_bottle.supervise import (
FIXED = datetime(2026, 5, 25, 12, 0, 0, tzinfo=timezone.utc)
def _proposal(slug: str = "dev", tool: str = TOOL_EGRESS_ALLOW) -> Proposal:
def _proposal(slug: str = "dev", tool: str = TOOL_CAPABILITY_BLOCK) -> Proposal:
payloads = {
TOOL_CAPABILITY_BLOCK: "FROM python:3.13\n",
supervise.TOOL_EGRESS_ALLOW: "routes:\n - host: example.com\n",
supervise.TOOL_EGRESS_BLOCK: "routes:\n - host: example.com\n",
TOOL_GITLEAKS_ALLOW: "file: tests/test_fixture.py\nline: 3\n",
@@ -85,14 +86,14 @@ class TestDiscoverPending(_FakeHomeMixin, unittest.TestCase):
def test_sorted_by_arrival_across_bottles(self):
early = Proposal.new(
bottle_slug="api", tool=TOOL_EGRESS_ALLOW,
proposed_file="routes:\n - host: early.example.com\n", justification="early",
bottle_slug="api", tool=TOOL_CAPABILITY_BLOCK,
proposed_file="FROM python:3.13\n", justification="early",
current_file_hash="h",
now=datetime(2026, 5, 25, 10, 0, 0, tzinfo=timezone.utc),
)
late = Proposal.new(
bottle_slug="dev", tool=TOOL_EGRESS_ALLOW,
proposed_file="routes:\n - host: late.example.com\n", justification="late",
bottle_slug="dev", tool=TOOL_CAPABILITY_BLOCK,
proposed_file="FROM python:3.13\n", justification="late",
current_file_hash="h",
now=datetime(2026, 5, 25, 14, 0, 0, tzinfo=timezone.utc),
)
@@ -121,7 +122,7 @@ class TestApproveReject(_FakeHomeMixin, unittest.TestCase):
def tearDown(self):
self._teardown_fake_home()
def _enqueue(self, tool: str = TOOL_EGRESS_ALLOW):
def _enqueue(self, tool: str = TOOL_CAPABILITY_BLOCK):
p = _proposal(tool=tool)
qdir = supervise.queue_dir_for_slug("dev")
qdir.mkdir(parents=True, exist_ok=True)
@@ -130,29 +131,19 @@ class TestApproveReject(_FakeHomeMixin, unittest.TestCase):
def test_approve_writes_response(self):
qp = self._enqueue()
with patch(
"bot_bottle.cli.supervise.apply_routes_change",
return_value=("routes: []\n", "routes:\n - host: example.com\n"),
):
supervise_cli.approve(qp)
resp = read_response(qp.queue_dir, qp.proposal.id)
supervise_cli.approve(qp)
# capability-block is archived on approve, so the response file
# moves to processed/ before the caller can read it.
resp = read_response(qp.queue_dir / "processed", qp.proposal.id)
self.assertEqual(STATUS_APPROVED, resp.status)
self.assertIsNone(resp.final_file)
def test_approve_with_final_file_marks_modified(self):
qp = self._enqueue()
with patch(
"bot_bottle.cli.supervise.apply_routes_change",
return_value=("routes: []\n", "routes:\n - host: edited.example.com\n"),
):
supervise_cli.approve(
qp,
final_file="routes:\n - host: edited.example.com\n",
notes="tweaked",
)
resp = read_response(qp.queue_dir, qp.proposal.id)
supervise_cli.approve(qp, final_file="FROM bookworm\n", notes="tweaked")
resp = read_response(qp.queue_dir / "processed", qp.proposal.id)
self.assertEqual(STATUS_MODIFIED, resp.status)
self.assertEqual("routes:\n - host: edited.example.com\n", resp.final_file)
self.assertEqual("FROM bookworm\n", resp.final_file)
self.assertEqual("tweaked", resp.notes)
def test_reject_writes_rejection(self):
@@ -162,6 +153,11 @@ class TestApproveReject(_FakeHomeMixin, unittest.TestCase):
self.assertEqual(STATUS_REJECTED, resp.status)
self.assertEqual("nope", resp.notes)
def test_no_audit_log_for_capability_block(self):
qp = self._enqueue(tool=TOOL_CAPABILITY_BLOCK)
supervise_cli.approve(qp)
self.assertEqual([], read_audit_entries("egress", "dev"))
def test_approve_egress_block_writes_audit_log(self):
qp = self._enqueue(tool=supervise.TOOL_EGRESS_BLOCK)
with patch(
@@ -236,6 +232,11 @@ class TestApproveReject(_FakeHomeMixin, unittest.TestCase):
self.assertEqual(".txt", supervise_cli._suffix_for_tool(TOOL_EGRESS_TOKEN_ALLOW))
# class TestCapabilityApplyWiring(_FakeHomeMixin, unittest.TestCase):
# # DISABLED — capability_apply functionality is currently commented out.
# pass
class TestEditInEditor(unittest.TestCase):
def test_runs_editor_returns_edited_content(self):
original_editor = os.environ.get("EDITOR")
@@ -280,5 +281,10 @@ class TestEditInEditor(unittest.TestCase):
os.environ["EDITOR"] = original_editor
# class TestCapabilityBlockSmolmachinesGuard(_FakeHomeMixin, unittest.TestCase):
# # DISABLED — capability_apply functionality is currently commented out.
# pass
if __name__ == "__main__":
unittest.main()