PRD 0049: strip dashboard to supervisor tui #176

Merged
didericis merged 12 commits from prd-0049-strip-dashboard-to-supervisor-tui into main 2026-06-03 13:48:32 -04:00
17 changed files with 1053 additions and 3047 deletions
+5 -5
View File
@@ -1,6 +1,6 @@
"""Main CLI dispatcher.
Commands: cleanup, dashboard, edit, info, init, list, resume, start
Commands: cleanup, edit, info, init, list, resume, start, supervise
"""
from __future__ import annotations
@@ -12,24 +12,24 @@ from ..manifest import ManifestError
from ._common import PROG
from . import list as _list_mod
from .cleanup import cmd_cleanup
from .dashboard import cmd_dashboard
from .edit import cmd_edit
from .info import cmd_info
from .init import cmd_init
from .resume import cmd_resume
from .start import cmd_start
from .supervise import cmd_supervise
cmd_list = _list_mod.cmd_list
COMMANDS = {
"cleanup": cmd_cleanup,
"dashboard": cmd_dashboard,
"edit": cmd_edit,
"info": cmd_info,
"init": cmd_init,
"list": cmd_list,
"resume": cmd_resume,
"start": cmd_start,
"supervise": cmd_supervise,
}
@@ -37,13 +37,13 @@ def usage() -> None:
sys.stderr.write(f"usage: {PROG} <command> [args...]\n\n")
sys.stderr.write("Commands:\n")
sys.stderr.write(" cleanup stop and remove all active bot-bottle containers\n")
sys.stderr.write(" dashboard view + approve/modify/reject pending supervise proposals (PRD 0013)\n")
sys.stderr.write(" edit open an agent in vim for editing\n")
sys.stderr.write(" info print env, skills, and prompt details for a named agent\n")
sys.stderr.write(" init interactively create a new agent and add it to bot-bottle.json\n")
sys.stderr.write(" list list available agents or active containers\n")
sys.stderr.write(" resume re-launch a bottle by its identity (continues state from PRD 0016)\n")
sys.stderr.write(" start boot a container for a named agent and attach an interactive session\n\n")
sys.stderr.write(" start boot a container for a named agent and attach an interactive session\n")
sys.stderr.write(" supervise view + approve/modify/reject pending supervise proposals (PRD 0013)\n\n")
sys.stderr.write(f"Run '{PROG} <command> --help' for command-specific usage.\n")
File diff suppressed because it is too large Load Diff
-421
View File
@@ -1,421 +0,0 @@
"""dashboard_model: state/model layer for the dashboard TUI.
Data structures, discovery queries, pure state helpers, and derived
values extracted from dashboard.py so they can be tested in isolation
and navigated without wading through curses rendering code.
"""
from __future__ import annotations
import os
import shlex
from dataclasses import dataclass
from pathlib import Path
from .. import supervise as _supervise
from ..agent_provider import runtime_for
from ..backend import ActiveAgent, enumerate_active_agents
from ..backend.docker.capability_apply import CapabilityApplyError
from ..backend.docker.egress_apply import EgressApplyError
from ..backend.docker.pipelock_apply import PipelockApplyError
from ..manifest import Manifest
from ..supervise import (
TOOL_CAPABILITY_BLOCK,
TOOL_PIPELOCK_BLOCK,
Proposal,
list_pending_proposals,
)
# --- Constants ---------------------------------------------------------------
_REFRESH_INTERVAL_MS = 1000
_NEW_PROPOSAL_HIGHLIGHT_SEC = 5.0
PANE_PROPOSALS = "proposals"
PANE_AGENTS = "agents"
# --- Data structures ---------------------------------------------------------
@dataclass(frozen=True)
class QueuedProposal:
"""A pending proposal plus the queue dir it was found in."""
proposal: Proposal
queue_dir: Path
ApplyError = (EgressApplyError, PipelockApplyError, CapabilityApplyError)
# --- Discovery ---------------------------------------------------------------
def discover_active_agents() -> list[ActiveAgent]:
"""All currently-running agents across every backend with
their metadata + service set. Returns [] when neither
backend is reachable. Backed by the shared
`enumerate_active_agents` helper so the CLI's
`./cli.py list active` and this dashboard show the same data."""
return enumerate_active_agents()
def discover_pending() -> list[QueuedProposal]:
"""Walk ~/.bot-bottle/queue/* and collect pending proposals
from every bottle's queue. Sorted by arrival time across the
union — the operator works the global FIFO."""
queue_root = _supervise.bot_bottle_root() / "queue"
if not queue_root.is_dir():
return []
out: list[QueuedProposal] = []
for slug_dir in sorted(queue_root.iterdir()):
if not slug_dir.is_dir():
continue
for proposal in list_pending_proposals(slug_dir):
out.append(QueuedProposal(proposal=proposal, queue_dir=slug_dir))
out.sort(key=lambda q: q.proposal.arrival_timestamp)
return out
# --- Derived values ----------------------------------------------------------
def _approval_status(qp: QueuedProposal, verb: str) -> str:
"""Status-line text after a successful approval. For capability-
block, append the `resume <identity>` hint so the operator can
bring the rebuilt bottle back up with one copy-paste."""
base = f"{verb} {qp.proposal.tool} for [{qp.proposal.bottle_slug}]"
if qp.proposal.tool == TOOL_CAPABILITY_BLOCK:
return f"{base}; resume: ./cli.py resume {qp.proposal.bottle_slug}"
return base
def _is_recent(
proposal_id: str,
first_seen: dict[str, float] | None,
now: float | None,
) -> bool:
"""True if `proposal_id` was first seen within the highlight
window. Both `first_seen` and `now` may be None (rendered as
not-recent) so the helper is safe in cold-start paths."""
if first_seen is None or now is None:
return False
started = first_seen.get(proposal_id)
if started is None:
return False
return (now - started) < _NEW_PROPOSAL_HIGHLIGHT_SEC
def _selection_status(
focus: str, agents: list[ActiveAgent], selected_agent: int,
) -> str:
"""Status-line text for the idle state. Surfaces the agents-
pane selection so the operator can tell what an agent-scoped
edit verb would target."""
if focus != PANE_AGENTS:
return ""
if not agents:
return "[no active agents]"
if 0 <= selected_agent < len(agents):
return f"[selected: {agents[selected_agent].slug}]"
return "[no agent selected]"
def _selected_agent(
focus: str, agents: list[ActiveAgent], selected_agent: int,
) -> ActiveAgent | None:
"""The selected agent to scope `e` / `p` to, or None if no
selection is valid (proposals pane focused, no active agents,
or selection out of bounds)."""
if focus != PANE_AGENTS:
return None
if not agents:
return None
if 0 <= selected_agent < len(agents):
return agents[selected_agent]
return None
# --- Picker helpers ----------------------------------------------------------
def _filter_agents(query: str, names: list[str]) -> list[str]:
"""Case-insensitive substring filter for the picker. Pure
function — no curses, easy to unit-test."""
if not query:
return list(names)
q = query.lower()
return [n for n in names if q in n.lower()]
def _running_counts(
bottles: dict, agents_now: list[ActiveAgent],
) -> dict[str, int]:
"""Per-agent running count: dashboard-owned + externally-
discovered, summed by agent_name. The picker shows this so the
operator knows whether picking an agent starts a fresh bottle
or a Nth one."""
counts: dict[str, int] = {}
for a in agents_now:
counts[a.agent_name] = counts.get(a.agent_name, 0) + 1
return counts
# --- Agent-row rendering helpers ---------------------------------------------
def _format_agent_row(a: ActiveAgent, maxw: int) -> str:
"""One-line agent row: ` [<backend>] <slug> <agent_name> started
<HH:MM:SS> [<sidecars>]`. The `agent` service is filtered out of
the displayed list — it's always present for an active bottle,
so listing it carries no information; the sidecars are the
differentiator.
The `[docker]` / `[smolmachines]` prefix lets the operator tell
which backend a bottle came from (issue #77). Truncated to
`maxw` because the renderer's addnstr only enforces width if
we hand it a properly-sized string."""
started = (
a.started_at.split("T", 1)[1][:8]
if "T" in a.started_at else (a.started_at or "?")
)
sidecars = tuple(s for s in a.services if s != "agent")
services = ",".join(sidecars) if sidecars else "(starting)"
backend_tag = f"[{a.backend_name}]" if a.backend_name else ""
line = (
f" {backend_tag} {a.slug} {a.agent_name} "
f"started {started} [{services}]"
)
if len(line) > maxw:
return line[: max(0, maxw - 1)] + ""
return line
# --- Detail-view helpers -----------------------------------------------------
def _detail_lines(
qp: QueuedProposal,
*,
green_attr: int = 0,
) -> list[tuple[str, int]]:
"""Return the detail-view body as (text, curses-attr) tuples.
Most lines are plain (attr=0); pipelock-block proposals append
a green "→ would allow host: ..." line so the operator sees at
a glance which hostname will land in pipelock's allowlist if
they hit approve. The URL itself is shown above for context."""
p = qp.proposal
out: list[tuple[str, int]] = [
(f"bottle: {p.bottle_slug}", 0),
(f"tool: {p.tool}", 0),
(f"id: {p.id}", 0),
(f"arrived: {p.arrival_timestamp}", 0),
(f"queue: {qp.queue_dir}", 0),
("", 0),
("justification:", 0),
]
out.extend((" " + line, 0) for line in p.justification.splitlines() or [""])
out.extend([
("", 0),
(_proposed_payload_label(p.tool) + ":", 0),
])
out.extend((line, 0) for line in p.proposed_file.splitlines() or [""])
if p.tool == TOOL_PIPELOCK_BLOCK:
host = _failed_url_host(p.proposed_file)
if host:
out.append(("", 0))
out.append((host, green_attr))
return out
def _failed_url_host(url: str) -> str:
"""Best-effort hostname extraction from a pipelock-block proposal's
failed_url payload. Returns empty string on unparseable input —
callers handle empty as "nothing to highlight"."""
import urllib.parse
try:
return urllib.parse.urlsplit(url.strip()).hostname or ""
except ValueError:
return ""
def _proposed_payload_label(tool: str) -> str:
"""The detail-view section heading for the proposal's payload —
`proposed_file` is what the dataclass calls it, but for
pipelock-block the payload is a single URL not a file. Render
the label per tool so the operator's eye matches."""
if tool == TOOL_PIPELOCK_BLOCK:
return "failed URL"
return "proposed file"
def _suffix_for_tool(tool: str) -> str:
if tool == TOOL_CAPABILITY_BLOCK:
return ".dockerfile"
return ".txt"
# --- Bottle/agent resolution -------------------------------------------------
def _bottle_for_slug(
slug: str,
bottles: dict,
manifest: Manifest | None,
) -> tuple["object", str]:
"""Return `(bottle_handle, prompt_path_hint)` for a re-attach.
If the slug is in `bottles` (dashboard-owned), return the stored
handle directly. Otherwise synthesize a bottle from the persisted
metadata. The backend field in metadata (PRD 0040) selects Docker
or smolmachines; unknown or missing metadata defaults to Docker.
Returns the empty string for prompt_path_hint when we omit the
flag — the caller passes None to DockerBottle in that case."""
from ..backend.docker.bottle import DockerBottle
from ..backend.docker.bottle_state import read_metadata
from ..backend.smolmachines.bottle import SmolmachinesBottle
if slug in bottles:
_cm, bottle, _identity = bottles[slug]
return bottle, ""
instance_name = f"bot-bottle-{slug}"
prompt_path: str | None = None
metadata = read_metadata(slug)
if metadata is not None and manifest is not None:
agent = manifest.agents.get(metadata.agent_name)
if agent is not None and agent.prompt:
container_home = os.environ.get(
"BOT_BOTTLE_CONTAINER_HOME", "/home/node",
)
prompt_path = f"{container_home}/.bot-bottle-prompt.txt"
backend = metadata.backend if metadata is not None else ""
if backend == "smolmachines":
synth: object = SmolmachinesBottle(
instance_name,
prompt_path=prompt_path,
)
else:
synth = DockerBottle(
container=instance_name,
teardown=lambda: None,
prompt_path_in_container=prompt_path,
)
return synth, (prompt_path or "")
def _pick_next_after_stop(
agents_before: list[ActiveAgent],
selected_index: int,
stopped_slug: str,
) -> tuple[int, ActiveAgent] | None:
"""After stopping `stopped_slug` from the agents list, choose
the agent that should take focus next. The agent below the
stopped row (which slides up to fill its index) is the
natural pick; if the stopped agent was last, the row above
instead. Returns (new_index, agent) or None if no agents
remain. Pure — easy to unit-test."""
new_agents = [a for a in agents_before if a.slug != stopped_slug]
if not new_agents:
return None
new_index = min(max(selected_index, 0), len(new_agents) - 1)
return new_index, new_agents[new_index]
# --- tmux argv builders ------------------------------------------------------
def _in_tmux() -> bool:
"""True when the dashboard is running inside a tmux session.
Tmux sets `$TMUX` to the path of its server socket."""
return bool(os.environ.get("TMUX"))
def _agent_runtime_args(
*, resume: bool, remote_control: bool = False, agent_provider_template: str = "claude",
) -> list[str]:
"""The argv the dashboard hands to `bottle.agent_argv`
on every attach — matches what `attach_agent` builds for the
foreground handoff so both surfaces produce the same claude
invocation."""
runtime = runtime_for(agent_provider_template)
args = list(runtime.bypass_args)
if remote_control:
args.extend(runtime.remote_control_args)
if resume:
args.extend(runtime.resume_args)
return args
def _build_resume_argv_with_fallback(
bottle, *, remote_control: bool = False, agent_provider_template: str = "claude",
) -> list[str]:
"""Build a backend-exec argv that runs `claude --continue` and
falls back to plain `claude` if no prior session exists.
`--continue` exits non-zero when an agent has been spun up
but never typed at — there's no transcript to resume. The
shell-level `||` wrapper makes that case start a fresh
session instead of crashing the pane. The trade-off: we
invoke `sh -c` inside the bottle, so the command is two
`claude` invocations behind a tiny shell rather than one
direct exec. Acceptable; the shell adds microseconds and
the fallback only kicks in when --continue would have
failed anyway.
Works across backends because `bottle.agent_argv` always
surfaces the `claude` token preceded by the backend's exec
framing (docker: `docker exec -it <c>`; smolmachines:
`smolvm machine exec --name <m> -- runuser -u node --`).
Splitting at `claude` keeps the framing as the prefix and
wraps just the agent tail in `sh -c`."""
if agent_provider_template != "claude":
return bottle.agent_argv(
_agent_runtime_args(
resume=True,
remote_control=remote_control,
agent_provider_template=agent_provider_template,
)
)
base_args = _agent_runtime_args(
resume=False,
remote_control=remote_control,
agent_provider_template=agent_provider_template,
)
base_exec = bottle.agent_argv(base_args)
# Split exec-framing prefix from the agent-and-args tail so
# we can compose `<claude…> --continue || <claude…>` inside
# `sh -c`. The provider command token is the marker.
command = getattr(bottle, "agent_command", runtime_for(agent_provider_template).command)
agent_idx = base_exec.index(command)
prefix = base_exec[:agent_idx]
agent_cmd = " ".join(shlex.quote(a) for a in base_exec[agent_idx:])
resume_args = " ".join(
shlex.quote(a) for a in runtime_for(agent_provider_template).resume_args
)
return [
*prefix,
"sh", "-c",
f"{agent_cmd} {resume_args} || {agent_cmd}",
]
def _build_split_pane_argv(agent_argv: list[str]) -> list[str]:
"""Pure helper: wrap a backend-exec argv with `tmux split-window
-h -P -F '#{pane_id}'`. The `-P -F` combo tells tmux to print
the new pane's id on stdout so we can track it for later
`respawn-pane` calls."""
return [
"tmux", "split-window", "-h",
"-P", "-F", "#{pane_id}",
*agent_argv,
]
def _build_respawn_pane_argv(pane_id: str, agent_argv: list[str]) -> list[str]:
"""Pure helper: wrap a backend-exec argv with `tmux respawn-pane
-k -t <pane_id>`. `-k` kills the existing process in the pane
before respawning."""
return ["tmux", "respawn-pane", "-k", "-t", pane_id, *agent_argv]
+11 -25
View File
@@ -2,10 +2,8 @@
interactive claude-code session. The container is torn down when the
session ends.
The launch core is shared with `cli.py resume <identity>` and (PRD
0020 chunk 1+) the dashboard's in-process start flow: see the
public helpers `prepare_with_preflight`, `attach_agent`, and the
private orchestrator `_launch_bottle`.
The launch core is shared with `cli.py resume <identity>` through
the private orchestrator `_launch_bottle`.
"""
from __future__ import annotations
@@ -71,7 +69,7 @@ def cmd_start(argv: list[str]) -> int:
)
# --- Public helpers shared with the dashboard (PRD 0020) -----------------
# --- Launch helpers ------------------------------------------------------
def prepare_with_preflight(
@@ -84,14 +82,11 @@ def prepare_with_preflight(
backend_name: str | None = None,
) -> tuple[DockerBottlePlan | None, str]:
"""Run `backend.prepare`, render the preflight summary via the
injected callable, prompt y/N via the injected callable. The CLI
binds these to stderr/stdin; the dashboard binds them to a
curses modal.
injected callable, prompt y/N via the injected callable.
`backend_name` selects which backend prepares the plan
(`None` → `$BOT_BOTTLE_BACKEND` → `docker`). Dashboard
passes the value from its new-agent backend-picker modal; the
CLI passes whatever `--backend` resolved to.
(`None` → `$BOT_BOTTLE_BACKEND` → `docker`). The CLI passes
whatever `--backend` resolved to.
Returns `(plan, identity)`. `plan` is None on dry-run or
operator-N, but `identity` is set as soon as `backend.prepare`
@@ -122,16 +117,10 @@ def attach_agent(
agent process's exit code.
`resume=True` adds `--continue` so claude picks up its most
recent session non-interactively (no session-picker prompt)
the right shape for the dashboard's Enter re-attach (PRD 0020
chunk 3), where a bottle typically has exactly one session.
First-attach paths (`./cli.py start`, the dashboard's new-agent
flow) leave it False.
recent session non-interactively (no session-picker prompt).
First-attach paths (`./cli.py start`) leave it False.
Used as the inner step of `./cli.py start` (one-shot) and by the
dashboard, which calls it from inside a `curses.endwin → … →
stdscr.refresh()` handoff so the curses surface gets out of the
terminal's way while the agent has it."""
Used as the inner step of `./cli.py start`."""
runtime = runtime_for(agent_provider_template)
info(
f"attaching interactive {agent_provider_template} session "
@@ -148,8 +137,7 @@ def attach_agent(
def capture_claude_session_state(identity: str, exit_code: int) -> None:
"""Inside the launch context, while the container is still
alive: snapshot the transcript and mark for preservation if
claude crashed. Public for the dashboard's death-handling path
(PRD 0020 open question 3)."""
claude crashed."""
# FIXME: this captures Claude-specific session state. A follow-up
# spike should explore freezing provider-neutral container state
# instead of relying on each agent's transcript layout.
@@ -162,9 +150,7 @@ def capture_claude_session_state(identity: str, exit_code: int) -> None:
def settle_state(identity: str) -> None:
"""Post-teardown housekeeping: print the resume hint if the
state was preserved, otherwise reap the per-bottle state dir.
Public so the dashboard's explicit-stop path calls the same
settlement the CLI uses on context exit."""
state was preserved, otherwise reap the per-bottle state dir."""
if not identity:
return
if is_preserved(identity):
+577
View File
@@ -0,0 +1,577 @@
"""supervise: list pending supervise proposals across all bottles and
act on them (approve / modify / reject).
Curses-based TUI; modify-then-approve shells out to $EDITOR. The
approval handlers wire to the per-tool remediation engines:
PRD 0014 (egress, retargeted from cred-proxy in PRD 0017
chunk 3) writes routes.yaml + SIGHUPs egress; PRD 0015
(pipelock) writes the allowlist + restarts pipelock; PRD 0016
(capability) rebuilds the bottle Dockerfile.
"""
from __future__ import annotations
import argparse
import curses
import os
import subprocess
import sys
import tempfile
import traceback
from dataclasses import dataclass
from datetime import datetime, timezone
from pathlib import Path
from .. import supervise as _supervise
from ..backend.docker.bottle_state import read_metadata
from ..backend.docker.capability_apply import (
CapabilityApplyError,
apply_capability_change,
)
from ..backend.docker.egress_apply import EgressApplyError, add_route
from ..backend.docker.pipelock_apply import (
PipelockApplyError,
apply_allowlist_change,
fetch_current_allowlist,
parse_allowlist_content,
render_allowlist_content,
)
from ..log import Die, error, info
from ..supervise import (
COMPONENT_FOR_TOOL,
AuditEntry,
Proposal,
Response,
STATUS_APPROVED,
STATUS_MODIFIED,
STATUS_REJECTED,
TOOL_CAPABILITY_BLOCK,
TOOL_EGRESS_BLOCK,
TOOL_PIPELOCK_BLOCK,
archive_proposal,
list_pending_proposals,
render_diff,
write_audit_entry,
write_response,
)
from ._common import PROG
_REFRESH_INTERVAL_MS = 1000
@dataclass(frozen=True)
class QueuedProposal:
"""A pending proposal plus the queue dir it was found in."""
proposal: Proposal
queue_dir: Path
# Errors any remediation engine may raise. Caught by the TUI key
# handlers and surfaced in the status line so a failed apply keeps
# the proposal pending rather than crashing curses.
ApplyError = (EgressApplyError, PipelockApplyError, CapabilityApplyError)
def discover_pending() -> list[QueuedProposal]:
"""Walk ~/.bot-bottle/queue/* and collect pending proposals."""
queue_root = _supervise.bot_bottle_root() / "queue"
if not queue_root.is_dir():
return []
out: list[QueuedProposal] = []
for slug_dir in sorted(queue_root.iterdir()):
if not slug_dir.is_dir():
continue
for proposal in list_pending_proposals(slug_dir):
out.append(QueuedProposal(proposal=proposal, queue_dir=slug_dir))
out.sort(key=lambda q: q.proposal.arrival_timestamp)
return out
def _approval_status(qp: QueuedProposal, verb: str) -> str:
"""Status-line text after a successful approval."""
base = f"{verb} {qp.proposal.tool} for [{qp.proposal.bottle_slug}]"
if qp.proposal.tool == TOOL_CAPABILITY_BLOCK:
return f"{base}; resume: ./cli.py resume {qp.proposal.bottle_slug}"
return base
def _detail_lines(
qp: QueuedProposal,
*,
green_attr: int = 0,
) -> list[tuple[str, int]]:
"""Return the detail-view body as (text, curses-attr) tuples."""
p = qp.proposal
out: list[tuple[str, int]] = [
(f"bottle: {p.bottle_slug}", 0),
(f"tool: {p.tool}", 0),
(f"id: {p.id}", 0),
(f"arrived: {p.arrival_timestamp}", 0),
(f"queue: {qp.queue_dir}", 0),
("", 0),
("justification:", 0),
]
out.extend((" " + line, 0) for line in p.justification.splitlines() or [""])
out.extend([
("", 0),
(_proposed_payload_label(p.tool) + ":", 0),
])
out.extend((line, 0) for line in p.proposed_file.splitlines() or [""])
if p.tool == TOOL_PIPELOCK_BLOCK:
host = _failed_url_host(p.proposed_file)
if host:
out.append(("", 0))
out.append((host, green_attr))
return out
def _failed_url_host(url: str) -> str:
"""Best-effort hostname extraction from a pipelock-block proposal."""
import urllib.parse
try:
return urllib.parse.urlsplit(url.strip()).hostname or ""
except ValueError:
return ""
def _proposed_payload_label(tool: str) -> str:
if tool == TOOL_PIPELOCK_BLOCK:
return "failed URL"
return "proposed file"
def _suffix_for_tool(tool: str) -> str:
if tool == TOOL_CAPABILITY_BLOCK:
return ".dockerfile"
return ".txt"
# --- Operator actions ------------------------------------------------------
def approve(
qp: QueuedProposal,
*,
notes: str = "",
final_file: str | None = None,
) -> None:
"""Apply the proposal, write the waiting response, and audit it."""
status = STATUS_MODIFIED if final_file is not None else STATUS_APPROVED
file_to_apply = final_file if final_file is not None else qp.proposal.proposed_file
diff_before, diff_after = "", ""
if qp.proposal.tool == TOOL_EGRESS_BLOCK:
diff_before, diff_after = add_route(
qp.proposal.bottle_slug, file_to_apply,
)
elif qp.proposal.tool == TOOL_PIPELOCK_BLOCK:
diff_before, diff_after = _apply_pipelock_url(
qp.proposal.bottle_slug, file_to_apply,
)
elif qp.proposal.tool == TOOL_CAPABILITY_BLOCK:
_meta = read_metadata(qp.proposal.bottle_slug)
if _meta is not None and not _meta.compose_project:
raise CapabilityApplyError(
"capability-block remediation is not supported for smolmachines "
"bottles. Reject this proposal or handle the capability change "
"manually, then restart the bottle."
)
diff_before, diff_after = apply_capability_change(
qp.proposal.bottle_slug, file_to_apply,
)
response = Response(
proposal_id=qp.proposal.id,
status=status,
notes=notes,
final_file=final_file,
)
write_response(qp.queue_dir, response)
_write_audit(
qp, action=status, notes=notes,
diff_before=diff_before, diff_after=diff_after,
)
if qp.proposal.tool == TOOL_CAPABILITY_BLOCK:
archive_proposal(qp.queue_dir, qp.proposal.id)
def reject(qp: QueuedProposal, *, reason: str) -> None:
"""Write a rejection response and an audit entry."""
response = Response(
proposal_id=qp.proposal.id,
status=STATUS_REJECTED,
notes=reason,
final_file=None,
)
write_response(qp.queue_dir, response)
_write_audit(qp, action=STATUS_REJECTED, notes=reason, diff_before="", diff_after="")
def _apply_pipelock_url(slug: str, failed_url: str) -> tuple[str, str]:
"""Merge a pipelock-block failed URL's host into the allowlist."""
import urllib.parse
parsed = urllib.parse.urlsplit(failed_url.strip())
host = parsed.hostname or ""
if not host:
raise PipelockApplyError(
f"proposed failed_url has no extractable host: {failed_url!r}"
)
current = fetch_current_allowlist(slug)
hosts = parse_allowlist_content(current)
if host not in hosts:
hosts.append(host)
return apply_allowlist_change(slug, render_allowlist_content(hosts))
def _write_audit(
qp: QueuedProposal,
*,
action: str,
notes: str,
diff_before: str,
diff_after: str,
) -> None:
"""Audit log for egress / pipelock tools."""
component = COMPONENT_FOR_TOOL.get(qp.proposal.tool)
if component is None:
return
write_audit_entry(AuditEntry(
timestamp=datetime.now(timezone.utc).isoformat(),
bottle_slug=qp.proposal.bottle_slug,
component=component,
operator_action=action,
operator_notes=notes,
justification=qp.proposal.justification,
diff=render_diff(diff_before, diff_after, label=component),
))
# --- $EDITOR integration --------------------------------------------------
def edit_in_editor(content: str, *, suffix: str = ".tmp") -> str | None:
"""Open `content` in $EDITOR and return edited content, if changed."""
editor = os.environ.get("EDITOR", "vim")
with tempfile.NamedTemporaryFile(
mode="w", suffix=suffix, delete=False, prefix="supervise-modify.",
) as f:
f.write(content)
path = f.name
try:
subprocess.run([editor, path], check=False)
with open(path) as f:
edited = f.read()
return edited if edited != content else None
finally:
try:
os.unlink(path)
except OSError:
pass
# --- TUI -------------------------------------------------------------------
def cmd_supervise(argv: list[str]) -> int:
parser = argparse.ArgumentParser(prog=f"{PROG} supervise", add_help=True)
parser.add_argument(
"--once", action="store_true",
help="list pending proposals once and exit (no TUI)",
)
args = parser.parse_args(argv)
if args.once:
return _list_once()
try:
curses.wrapper(_main_loop)
except KeyboardInterrupt:
return 130
except Die as e:
if e.message:
error(e.message)
else:
error("supervise exited on a fatal error (no detail captured).")
return e.code if isinstance(e.code, int) else 1
except Exception as e:
log_path = _write_crash_log(e)
error(f"supervise crashed: {type(e).__name__}: {e}")
error(f"full traceback written to {log_path}")
return 1
return 0
def _write_crash_log(exc: BaseException) -> Path:
"""Persist `exc`'s traceback to a stable file under ~/.bot-bottle/."""
stamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
body = "".join(
traceback.format_exception(type(exc), exc, exc.__traceback__)
)
entry = f"=== supervise crash {stamp} ===\n{body}\n"
try:
log_dir = _supervise.bot_bottle_root() / "logs"
log_dir.mkdir(parents=True, exist_ok=True)
path = log_dir / "supervise-crash.log"
with path.open("a", encoding="utf-8") as fh:
fh.write(entry)
return path
except OSError:
fd, tmp = tempfile.mkstemp(
prefix="bot-bottle-supervise-crash-", suffix=".log",
)
with os.fdopen(fd, "w", encoding="utf-8") as fh:
fh.write(entry)
return Path(tmp)
def _list_once() -> int:
pending = discover_pending()
if not pending:
info("no pending proposals")
return 0
for qp in pending:
sys.stdout.write(
f"{qp.proposal.arrival_timestamp} "
f"[{qp.proposal.bottle_slug}] "
f"{qp.proposal.tool} "
f"{qp.proposal.id}\n"
)
sys.stdout.write(f" {qp.proposal.justification}\n")
return 0
def _try_init_green() -> int:
"""Initialise a green color pair and return its attr, or 0."""
try:
curses.start_color()
curses.use_default_colors()
curses.init_pair(1, curses.COLOR_GREEN, -1)
return curses.color_pair(1)
except curses.error:
return 0
def _main_loop(stdscr: "curses._CursesWindow") -> None:
curses.curs_set(0)
stdscr.timeout(_REFRESH_INTERVAL_MS)
green_attr = _try_init_green()
selected = 0
status_line = ""
seen_ids: set[str] = set()
while True:
pending = discover_pending()
if selected >= len(pending):
selected = max(0, len(pending) - 1)
live_ids = {qp.proposal.id for qp in pending}
newly_arrived = live_ids - seen_ids
if seen_ids and newly_arrived:
try:
curses.beep()
except curses.error:
pass
for i, qp in enumerate(pending):
if qp.proposal.id in newly_arrived:
selected = i
break
seen_ids = live_ids
_render(
stdscr, pending, selected, status_line,
green_attr=green_attr,
)
try:
key = stdscr.getch()
except KeyboardInterrupt:
return
if key == -1:
continue
status_line = ""
if key in (ord("q"), 27):
return
if not pending:
continue
qp = pending[selected]
if key in (curses.KEY_DOWN, ord("j")):
selected = min(selected + 1, len(pending) - 1)
elif key in (curses.KEY_UP, ord("k")):
selected = max(selected - 1, 0)
elif key in (curses.KEY_ENTER, 10, 13):
_detail_view(stdscr, qp, green_attr=green_attr)
elif key == ord("a"):
try:
approve(qp)
status_line = _approval_status(qp, "approved")
except ApplyError as e:
status_line = f"apply failed: {e}"
elif key == ord("m"):
edited = _modify(stdscr, qp)
if edited is None:
status_line = "modify aborted (no change)"
else:
try:
approve(qp, final_file=edited, notes="operator modified before approving")
status_line = _approval_status(qp, "modified+approved")
except ApplyError as e:
status_line = f"apply failed: {e}"
elif key == ord("r"):
reason = _prompt(stdscr, "reject reason: ")
if reason:
reject(qp, reason=reason)
status_line = f"rejected {qp.proposal.tool} for [{qp.proposal.bottle_slug}]"
else:
status_line = "reject aborted (empty reason)"
def _render(
stdscr: "curses._CursesWindow",
pending: list[QueuedProposal],
selected: int,
status_line: str,
*,
green_attr: int = 0,
) -> None:
stdscr.erase()
h, w = stdscr.getmaxyx()
header = f"bot-bottle supervise ({len(pending)} pending)"
stdscr.addnstr(0, 0, header, w - 1, curses.A_BOLD)
stdscr.hline(1, 0, curses.ACS_HLINE, w)
row = 2
if not pending:
stdscr.addnstr(
row, 2,
"no pending proposals; agents will queue here when they call a "
"supervise tool",
w - 4,
)
else:
for i, qp in enumerate(pending):
if row >= h - 3:
break
p = qp.proposal
ts_short = (
p.arrival_timestamp.split("T", 1)[1][:8]
if "T" in p.arrival_timestamp else p.arrival_timestamp
)
cursor = "> " if i == selected else " "
line = (
f"{cursor}{ts_short} "
f"[{p.bottle_slug}] {p.tool:<18} {p.id[:8]} "
f"{_proposed_payload_label(p.tool)}"
)
attr = curses.A_REVERSE if i == selected else curses.A_NORMAL
stdscr.addnstr(row, 0, line, w - 1, attr)
row += 1
if row >= h - 3:
break
if p.justification:
stdscr.addnstr(row, 4, p.justification[: max(0, w - 5)], w - 5)
row += 1
footer = "[j/k] move [Enter] view [a] approve [m] modify [r] reject [q] quit"
stdscr.hline(h - 2, 0, curses.ACS_HLINE, w)
stdscr.addnstr(h - 1, 0, footer, w - 1, curses.A_DIM)
if status_line:
stdscr.addnstr(h - 3, 0, status_line, w - 1, curses.A_BOLD)
stdscr.refresh()
def _detail_view(
stdscr: "curses._CursesWindow",
qp: QueuedProposal,
*,
green_attr: int = 0,
) -> None:
"""Render the full proposal. Scrollable. Press q to return."""
lines = _detail_lines(qp, green_attr=green_attr)
offset = 0
while True:
stdscr.erase()
h, w = stdscr.getmaxyx()
for i, (text, attr) in enumerate(lines[offset:offset + h - 1]):
stdscr.addnstr(i, 0, text, w - 1, attr)
stdscr.addnstr(
h - 1, 0,
"[j/k] scroll [g/G] top/bottom [a] approve [m] modify [r] reject [q] back",
w - 1, curses.A_DIM,
)
stdscr.refresh()
key = stdscr.getch()
if key in (ord("q"), 27):
return
if key in (curses.KEY_DOWN, ord("j")):
offset = min(offset + 1, max(0, len(lines) - 1))
elif key in (curses.KEY_UP, ord("k")):
offset = max(offset - 1, 0)
elif key == ord("g"):
offset = 0
elif key == ord("G"):
offset = max(0, len(lines) - 1)
elif key == ord("a"):
try:
approve(qp)
except ApplyError:
pass
return
elif key == ord("m"):
edited = _modify(stdscr, qp)
if edited is not None:
try:
approve(qp, final_file=edited, notes="operator modified before approving")
except ApplyError:
pass
return
elif key == ord("r"):
reason = _prompt(stdscr, "reject reason: ")
if reason:
reject(qp, reason=reason)
return
def _modify(stdscr: "curses._CursesWindow", qp: QueuedProposal) -> str | None:
"""Suspend curses, open $EDITOR on the proposed file, return edited content."""
suffix = _suffix_for_tool(qp.proposal.tool)
curses.endwin()
try:
edited = edit_in_editor(qp.proposal.proposed_file, suffix=suffix)
finally:
stdscr.refresh()
return edited
def _prompt(stdscr: "curses._CursesWindow", label: str) -> str:
"""One-line input at the bottom of the screen."""
curses.curs_set(1)
h, _ = stdscr.getmaxyx()
stdscr.move(h - 2, 0)
stdscr.clrtoeol()
stdscr.addstr(h - 2, 0, label)
stdscr.refresh()
curses.echo()
try:
raw = stdscr.getstr(h - 2, len(label), 200)
finally:
curses.noecho()
curses.curs_set(0)
return raw.decode("utf-8", errors="replace").strip()
__all__ = [
"QueuedProposal",
"approve",
"cmd_supervise",
"discover_pending",
"edit_in_editor",
"reject",
]
+2 -2
View File
@@ -12,8 +12,8 @@ agent calls when it hits a stuck-recovery category:
Each tool call: the agent passes the full proposed file plus a
justification text. The sidecar validates the proposal syntactically,
writes it to the host's per-bottle queue dir, and holds the tool-call
connection open. The operator's TUI dashboard
(bot_bottle.cli.dashboard) sees the proposal, accepts
connection open. The operator's supervise TUI
(bot_bottle.cli.supervise) sees the proposal, accepts
approve / modify / reject, and writes a response file alongside the
proposal. The sidecar sees the response and returns `{status, notes}`
to the agent.
+1 -1
View File
@@ -1,6 +1,6 @@
# PRD 0019: Active agents in the dashboard, agent-scoped edit verbs
- **Status:** Active
- **Status:** Superseded by [PRD 0049](0049-strip-dashboard-to-supervisor-tui.md)
- **Author:** didericis
- **Created:** 2026-05-26
@@ -1,6 +1,6 @@
# PRD 0020: Start and attach to agents from inside the dashboard
- **Status:** Active
- **Status:** Superseded by [PRD 0049](0049-strip-dashboard-to-supervisor-tui.md)
- **Author:** didericis
- **Created:** 2026-05-26
+1 -1
View File
@@ -1,6 +1,6 @@
# PRD 0021: Dashboard as left tmux pane, selected agent as right pane
- **Status:** Active
- **Status:** Superseded by [PRD 0049](0049-strip-dashboard-to-supervisor-tui.md)
- **Author:** didericis
- **Created:** 2026-05-26
@@ -0,0 +1,343 @@
- **Status:** Active
- **Author:** didericis
- **Created:** 2026-06-03
- **Issue:** #174
## Summary
The `./cli.py dashboard` command has grown from its PRD 0013 roots
(triage supervise proposals) into a parallel-agent control surface
(PRDs 0019/0020/0021): an active-agents pane, agent picker + start,
re-attach, per-bottle stop, tmux split-pane handoff, operator-
initiated `routes`/`pipelock` edits. Each chunk is reasonable on its
own; together they make the dashboard the largest CLI file in the
repo and the thing most likely to break on a rough edge (curses /
tmux / docker-exec / metadata-discovery interactions).
This PRD reverses that scope creep. The dashboard is reduced to the
**supervise-plane triage TUI** it was in PRDs 00130016: list pending
proposals, approve / modify / reject each one, write audit entries,
deliver the response that unblocks the agent's tool call. Everything
that's about *starting / re-entering / stopping* bottles, or about
*operator-initiated* config edits, comes out. The command is renamed
`./cli.py supervise` so the name matches what it does after the cut.
Future agent-management UX is explicitly punted: if and when a
control surface for parallel agents resurfaces, the working
assumption (per the issue) is that a web GUI — usable from mobile
— is a better second pass than another round of curses iteration.
That decision is not in this PRD's scope; this PRD only removes the
half-built local-curses path so we stop maintaining it.
## Problem
Three concrete pains, all downstream of the dashboard's growth:
1. **Surface area vs. polish.** `dashboard.py` is ~1740 lines;
`dashboard_model.py` adds another ~420. The interactions among
curses, modals, tmux split-pane, docker-exec handoff, agent
provider templates, metadata-driven re-attach, and
ExitStack-free bottle ownership are intricate enough that
shipping the next polish increment costs more than it returns.
2. **No clear ownership of "starts and stops bottles".** Today
that responsibility is split: `./cli.py start` owns one-shot
sessions; the dashboard owns multi-session bottles it started
itself; `./cli.py cleanup` owns everything else. The dashboard
tracking its own `bottles: dict[str, (cm, bottle, identity)]`
that doesn't survive a quit is a confusing third lane.
3. **Wrong target shape for a "manage many agents" UI.** The
parallel-agent experience the dashboard reaches for is mobile-
meaningful — checking in on agents from a phone is the high-
value case — and curses inside an SSH session is the wrong
tool for that. Continuing to polish a local-only TUI delays
the right next investment.
The triage half of the dashboard isn't suffering from any of these.
Pending proposals are a small, well-scoped, real workload, and the
PRD 00130016 surface for handling them is the right shape. The
problem is everything that got bolted onto that core after.
## Goals / Success Criteria
1. The supervise TUI starts up, lists pending proposals across all
running bottles, and supports approve / modify / reject + the
`--once` non-interactive mode — exactly as PRDs 00130016
specified, minus everything 0019/0020/0021 added.
2. The CLI subcommand is renamed `supervise` (was `dashboard`). The
old name is not aliased — this PRD is intentionally a
compat/breaking change (the issue carries the
`Compat/Breaking` label).
3. `dashboard.py` shrinks to a single proposal-triage curses loop:
no agents pane, no Tab pane switching, no agent picker, no
start / re-attach / stop verbs, no tmux split-pane, no
`e`/`p` operator-edit verbs, no per-process `bottles` dict.
4. `dashboard_model.py` is collapsed into whatever
`supervise.py` (CLI) needs; the model module is removed if it
has no purpose after the cut.
5. The proposal-side apply paths in `bot_bottle/backend/docker/
egress_apply.py`, `pipelock_apply.py`, and `capability_apply.py`
are unchanged — they are still called by the approve path.
6. The supervise-sidecar / proposal-queue protocol (PRD 0013) is
unchanged: the agent's experience is identical.
7. The previously-active PRDs that this one undoes are marked
`Superseded by PRD 0049`:
- PRD 0019 — active-agents pane + agent-scoped edit verbs
- PRD 0020 — start / re-attach / stop from the dashboard
- PRD 0021 — tmux split-pane
## Non-goals
- **A web GUI for managing agents.** The issue floats this as a
second pass; this PRD does not design or commit to it. The cut
is "remove the path we no longer want to invest in", not
"build the replacement".
- **A separate CLI for operator-initiated routes / pipelock
edits.** Today those edits live as `e` / `p` keys inside the
dashboard. After this PRD they don't exist anywhere — operators
who need ad-hoc edits use the same path the agents do (call the
supervise tool from inside the bottle) or hand-edit the host-
side files and restart the sidecar. Adding a `./cli.py routes
edit <slug>` verb is a follow-up if the loss bites.
- **Removing `./cli.py start` or changing its semantics.** Start
remains the one-shot launch path. PRD 0020's bottle-outlives-
process model is removed; the only path to a long-running
bottle is `./cli.py start` (foreground) plus `cli.py cleanup`
for teardown.
- **Removing the supervise-sidecar protocol or any of the three
block-remediation engines.** PRDs 00130016 stay Active. The
agent's view of the world doesn't change.
- **Renaming `dashboard` anywhere other than the CLI entry
point.** The dashboard-related docs (PRDs, decision records,
research notes) keep their historical references — they
describe the state of the world at the time they were written,
and the Status: Superseded line is the marker that the world
has moved on.
- **Migrating the proposal-queue file layout.** The queue still
lives at `~/.bot-bottle/queue/<slug>/`; the audit log still
lives at `~/.bot-bottle/audit/<component>-<slug>.log`. The CLI
surface changes; the on-disk surface does not.
## Scope
### In scope
- **Rename the subcommand.** `./cli.py dashboard` becomes
`./cli.py supervise`. The module moves from `bot_bottle/cli/
dashboard.py` to `bot_bottle/cli/supervise.py`. The dispatcher
in `bot_bottle/cli/__init__.py` and the help text both update.
- **Strip the curses loop to proposal-only.** The remaining
surface is: list pending proposals (with the new-arrival bell
from PRD 0013), Enter for detail view,
`a`/`m`/`r` for approve / modify / reject, `q` to quit. No
agents pane, no Tab, no agent picker, no `n`/`x`/`e`/`p`, no
tmux dispatch, no `bottles` dict on the main loop.
- **Drop unused helpers.** `_picker_modal`, `_preflight_modal`,
`_backend_picker_modal`, `_new_agent_flow`, `_attach_to_bottle`,
`_attach_in_tmux`, `_attach_via_handoff`, `_tmux_*`,
`_ensure_right_pane`, `_redirect_stderr_to_file`,
`_route_op_to_right_pane`, `_stop_bottle_flow`,
`_operator_edit_*_flow`, `operator_edit_routes`,
`operator_edit_allowlist`, and their imports come out.
- **Collapse the model module.** `dashboard_model.py`'s
proposal-side helpers (`QueuedProposal`, `discover_pending`,
`_approval_status`, `_detail_lines`,
`_failed_url_host`, `_proposed_payload_label`,
`_suffix_for_tool`, `_REFRESH_INTERVAL_MS`) move back into
`supervise.py` (CLI) or into `bot_bottle/supervise.py`
(the daemon-side module) — wherever they fit. The agents /
picker / tmux helpers in that module (`PANE_*`,
`_filter_agents`, `_running_counts`, `_format_agent_row`,
`_selection_status`, `_selected_agent`, `_bottle_for_slug`,
`_pick_next_after_stop`, `_agent_runtime_args`,
`_build_resume_argv_with_fallback`, `_build_split_pane_argv`,
`_build_respawn_pane_argv`, `_in_tmux`,
`discover_active_agents`) are deleted.
- **Mark superseded PRDs.** The Status line on PRDs 0019, 0020,
and 0021 changes to `Superseded by [PRD 0049](0049-strip-
dashboard-to-supervisor-tui.md)`.
- **Test cleanup.** Any test that targets a removed surface (the
agent picker, the tmux split helpers, the start-from-dashboard
flow, the operator-edit flows, `discover_active_agents`)
comes out. Tests covering proposal triage stay.
- **Help / usage strings.** `bot_bottle/cli/__init__.py`'s usage
block updates the command name and one-liner.
### Out of scope
- Any new feature in the supervise TUI. The cut is purely
subtractive (except for the rename).
- Behavior changes in `./cli.py start`, `cli.py cleanup`,
`cli.py resume`, `cli.py list`, `cli.py info`, `cli.py edit`,
`cli.py init` — unchanged.
- Changes to the supervise sidecar (`supervise_server.py`,
`supervise.py` daemon module). The wire protocol stays.
- Changes to the routes / pipelock / capability apply engines.
- Migration helpers, deprecation warnings, or a transitional
`dashboard` alias for `supervise`. The label on the issue says
Compat/Breaking; the rename is a hard cutover.
## Proposed design
### Final shape of the TUI
After this PRD the `./cli.py supervise` curses surface is:
```
bot-bottle supervise (3 pending)
─────────────────────────────────────────────────────────
> 03:14:22 [implementer-cy7a6] egress-block abc123… add
github.com/foo
03:13:55 [researcher-9xqs1] pipelock-block def456… allow
registry.npmjs.org
03:13:10 [implementer-cy7a6] capability-block ghi789… install
ripgrep
─────────────────────────────────────────────────────────
[j/k] move [Enter] view [a] approve [m] modify [r] reject [q] quit
```
- One pane. No Tab. `j` / `k` / arrows move through the queue.
- Enter opens the existing detail view (justification +
proposed-file body + the green pipelock host-extraction hint).
`a` / `m` / `r` work from both the list view and the detail
view, same as today.
- `q` / Esc quits. There are no dashboard-owned bottles, so no
per-process teardown decision — `q` just exits.
- The new-arrival bell stays, because it is a real win for the
operator's "I was typing at claude and a proposal landed" case.
No tmux-specific focus management remains.
### Code organisation
After the cut, the CLI module looks roughly like:
```
bot_bottle/cli/supervise.py
- cmd_supervise(argv)
- _list_once() # --once mode
- _main_loop(stdscr) # proposal-only
- _render(stdscr, pending, ...)
- _detail_view(stdscr, qp, ...)
- _modify(stdscr, qp)
- _prompt(stdscr, label)
- _write_crash_log(exc)
- approve(qp, *, notes, final_file)
- reject(qp, *, reason)
- QueuedProposal, discover_pending
- _detail_lines, _approval_status,
_failed_url_host,
_proposed_payload_label,
_suffix_for_tool
```
`dashboard_model.py` has no purpose once the agents / picker /
tmux helpers are gone, so it is removed and the surviving
proposal-side helpers move into `supervise.py` directly. The
PRD-0013 refactor that split model out (`refactor: extract
dashboard state/model layer into dashboard_model.py`) was
load-bearing for the bigger dashboard surface; with the surface
shrunk back, the split is no longer justified.
### Removed PRDs: how to mark them
The three superseded PRDs keep their bodies intact. Only the
Status line at the top changes:
```
- **Status:** Superseded by [PRD
0049](0049-strip-dashboard-to-supervisor-tui.md)
```
The PRD's own Goals / Success Criteria are left as the historical
record of what the feature shipped — readers tracing back from the
code or the git log land in a PRD that explains what once was, with
a clear pointer forward. No PRD body is rewritten.
### Tests to keep, tests to remove
Keep:
- `tests/cli/test_dashboard*.py` cases that exercise
`discover_pending`, `approve`, `reject`, `_detail_lines`,
`_approval_status`, `_failed_url_host`,
`_proposed_payload_label`, `_suffix_for_tool`,
`_modify` / `edit_in_editor`.
- `tests/cli/test_dashboard_once.py` (or equivalent) — the
`--once` listing mode.
Remove:
- Any test of `_picker_modal`, `_preflight_modal`,
`_backend_picker_modal`, `_new_agent_flow`, `_attach_*`,
`_tmux_*`, `_route_op_to_right_pane`,
`_redirect_stderr_to_file`, `_stop_bottle_flow`,
`_operator_edit_*`, `_filter_agents`, `_running_counts`,
`_format_agent_row`, `_selection_status`,
`_selected_agent`, `_bottle_for_slug`,
`_pick_next_after_stop`, `_agent_runtime_args`,
`_build_*_argv`, `discover_active_agents`.
- The test files that exist solely to cover those (e.g.,
`test_dashboard_picker.py`, `test_dashboard_tmux.py`,
`test_dashboard_attach.py`, `test_dashboard_agents.py`
whichever of these exist after the file walk).
Files are renamed `test_supervise_*.py` to mirror the module
rename. The rename is mechanical; no test logic changes.
## Implementation chunks
Sized for a single PR each.
1. **Strip + rename in one cut.** Move `bot_bottle/cli/
dashboard.py` to `bot_bottle/cli/supervise.py`, delete the
removed helpers, delete `dashboard_model.py`, inline the
surviving helpers, update the dispatcher + usage in
`bot_bottle/cli/__init__.py`, rename tests to match, mark
PRDs 0019/0020/0021 as superseded. One commit per logical
piece inside the PR (rename, strip, supersede notes,
tests).
2. **Activate PRD 0049.** Flip this PRD's Status line from
Draft to Active in the same PR as chunk 1 once the
implementation lands. (The repo convention is that a PRD's
shipping commit is also the Status flip — see the recent
`docs(prd): activate PRD 0048…` commit shape.)
The PR closes issue #174.
## Open questions
1. **`e` / `p` operator-initiated edits — gone for good or
moved to a separate CLI verb?** The PRD removes them with no
replacement. The simplest replacement is `./cli.py routes
edit <slug>` and `./cli.py pipelock edit <slug>`, sharing
the existing `apply_routes_change` / `apply_allowlist_change`
engines. If the loss is felt within the first parallel
run after this lands, that follow-up is a small PR. Leaving
it for a separate PRD so this one stays subtractive.
2. **`--once` output shape.** The text listing today emits one
proposal per line. Worth keeping exactly as-is for
scripting consumers; this PRD does not change it. Flagging
only because the rename could tempt a tweak.
3. **Audit-log entry shape for an unprompted edit applied via
a future `routes edit` CLI verb.** Today's
`operator_edit_routes` writes an `ACTION_OPERATOR_EDIT`
audit entry. With those flows removed the constant has no
callers inside this PRD's scope. Keep the constant exported
from `supervise.py` (it's already an `__all__` member) so a
follow-up CLI verb can re-use the same audit shape without
re-introducing dead code first.
## References
- Issue
[#174](https://gitea.dideric.is/didericis/bot-bottle/issues/174)
— the request: "strip the dashboard down into just a TUI for
managing agent requests for new egress routes and new
capabilities."
- PRD 0013 — supervise plane foundation (the floor this PRD
reverts the dashboard to).
- PRDs 0014 / 0015 / 0016 — block-remediation engines that the
supervise TUI continues to drive on approve.
- PRDs 0019 / 0020 / 0021 — the bolted-on capabilities this PRD
removes.
-46
View File
@@ -277,51 +277,5 @@ class TestBottleMetadataBackend(_FakeHomeMixin, unittest.TestCase):
self.assertEqual("", loaded.backend)
class TestBottleForSlugBackend(_FakeHomeMixin, unittest.TestCase):
"""PRD 0040: _bottle_for_slug constructs the right bottle type."""
def setUp(self):
self._setup_fake_home()
def tearDown(self):
self._teardown_fake_home()
def test_docker_metadata_returns_docker_bottle(self):
from bot_bottle.backend.docker.bottle import DockerBottle
from bot_bottle.cli.dashboard import _bottle_for_slug
write_metadata(BottleMetadata(
identity="dev-d1",
agent_name="dev",
cwd="",
copy_cwd=False,
started_at="2026-06-02T00:00:00+00:00",
compose_project="bot-bottle-dev-d1",
backend="docker",
))
bottle, _ = _bottle_for_slug("dev-d1", {}, None)
self.assertIsInstance(bottle, DockerBottle)
def test_smolmachines_metadata_returns_smolmachines_bottle(self):
from bot_bottle.backend.smolmachines.bottle import SmolmachinesBottle
from bot_bottle.cli.dashboard import _bottle_for_slug
write_metadata(BottleMetadata(
identity="dev-s1",
agent_name="dev",
cwd="",
copy_cwd=False,
started_at="2026-06-02T00:00:00+00:00",
compose_project="",
backend="smolmachines",
))
bottle, _ = _bottle_for_slug("dev-s1", {}, None)
self.assertIsInstance(bottle, SmolmachinesBottle)
def test_no_metadata_defaults_to_docker_bottle(self):
from bot_bottle.backend.docker.bottle import DockerBottle
from bot_bottle.cli.dashboard import _bottle_for_slug
bottle, _ = _bottle_for_slug("unknown-slug", {}, None)
self.assertIsInstance(bottle, DockerBottle)
if __name__ == "__main__":
unittest.main()
-492
View File
@@ -1,492 +0,0 @@
"""Unit: dashboard's row-formatting + selection helpers (PRD 0019)."""
from __future__ import annotations
import tempfile
import unittest
from pathlib import Path
from unittest import mock
from bot_bottle import supervise
from bot_bottle.cli import dashboard
class _FakeHomeMixin:
def _setup_fake_home(self) -> None:
self._tmp = tempfile.TemporaryDirectory(prefix="dashboard-aa-test.")
original = supervise.bot_bottle_root
def fake_root() -> Path:
return Path(self._tmp.name) / ".bot-bottle"
supervise.bot_bottle_root = fake_root # type: ignore[assignment]
self._restore_home = lambda: setattr(supervise, "bot_bottle_root", original)
def _teardown_fake_home(self) -> None:
self._restore_home()
self._tmp.cleanup()
class TestFormatAgentRow(unittest.TestCase):
"""One-line row formatting for the agents pane (PRD 0019 chunk 2)."""
def _agent(self, **overrides) -> dashboard.ActiveAgent:
defaults = dict(
backend_name="docker",
slug="dev-abc12",
agent_name="implementer",
started_at="2026-05-26T02:55:01+00:00",
services=("egress", "git-gate", "pipelock", "supervise"),
)
defaults.update(overrides)
return dashboard.ActiveAgent(**defaults)
def test_renders_slug_name_time_services(self):
s = dashboard._format_agent_row(self._agent(), 200)
self.assertIn("dev-abc12", s)
self.assertIn("implementer", s)
self.assertIn("02:55:01", s)
self.assertIn("egress,git-gate,pipelock,supervise", s)
def test_starting_label_when_no_services(self):
# Race window: compose project is up but containers haven't
# been picked up by `docker ps` yet.
s = dashboard._format_agent_row(self._agent(services=()), 200)
self.assertIn("(starting)", s)
def test_filters_agent_service_from_display(self):
# The `agent` service is always present for an active bottle;
# listing it is noise. The row should show only the sidecars.
s = dashboard._format_agent_row(
self._agent(services=("agent", "pipelock", "supervise")), 200,
)
self.assertIn("[pipelock,supervise]", s)
self.assertNotIn("agent,", s)
self.assertNotIn(",agent", s)
def test_only_agent_service_shows_starting(self):
# A bottle whose only running service is `agent` (sidecars
# still warming up) renders as `(starting)`.
s = dashboard._format_agent_row(self._agent(services=("agent",)), 200)
self.assertIn("(starting)", s)
def test_question_mark_when_no_started_at(self):
s = dashboard._format_agent_row(self._agent(started_at=""), 200)
self.assertIn("started ?", s)
def test_truncates_to_maxw(self):
s = dashboard._format_agent_row(self._agent(), 30)
self.assertLessEqual(len(s), 30)
self.assertTrue(s.endswith(""))
class TestSelectionStatus(unittest.TestCase):
"""Idle-state status-line text for the agents-pane focus
(PRD 0019 chunk 3). Empty when the proposals pane is focused;
surfaces the selected agent (or a clear placeholder) when the
agents pane is focused."""
def _agent(self, slug: str) -> dashboard.ActiveAgent:
return dashboard.ActiveAgent(
backend_name="docker",
slug=slug, agent_name="x", started_at="", services=(),
)
def test_empty_when_proposals_focused(self):
s = dashboard._selection_status(
dashboard.PANE_PROPOSALS, [self._agent("a-1")], 0,
)
self.assertEqual("", s)
def test_no_agents_message_when_agents_pane_empty(self):
s = dashboard._selection_status(dashboard.PANE_AGENTS, [], 0)
self.assertEqual("[no active agents]", s)
def test_shows_selected_slug(self):
agents = [self._agent("a-1"), self._agent("b-2"), self._agent("c-3")]
s = dashboard._selection_status(dashboard.PANE_AGENTS, agents, 1)
self.assertEqual("[selected: b-2]", s)
def test_out_of_bounds_falls_back_to_no_selection(self):
agents = [self._agent("only")]
s = dashboard._selection_status(dashboard.PANE_AGENTS, agents, 99)
self.assertEqual("[no agent selected]", s)
class TestFilterAgents(unittest.TestCase):
"""Pure-function picker filter (PRD 0020 chunk 2). Curses-free
so we can exercise the substring + case-insensitivity rules
directly."""
NAMES = ["implementer", "researcher", "triage-bot", "ImplDeluxe"]
def test_empty_query_returns_all(self):
self.assertEqual(self.NAMES, dashboard._filter_agents("", self.NAMES))
def test_substring_match(self):
self.assertEqual(
["implementer", "ImplDeluxe"],
dashboard._filter_agents("impl", self.NAMES),
)
def test_case_insensitive(self):
self.assertEqual(
["implementer", "ImplDeluxe"],
dashboard._filter_agents("IMPL", self.NAMES),
)
def test_no_match_returns_empty(self):
self.assertEqual([], dashboard._filter_agents("zzz", self.NAMES))
def test_preserves_input_order(self):
# Filtering should never re-sort; the picker draws in the
# order the manifest exposed.
out = dashboard._filter_agents("e", ["beta", "alpha", "echo"])
self.assertEqual(["beta", "echo"], out)
class TestDashboardManifestLoading(unittest.TestCase):
def test_new_agent_flow_empty_manifest_has_no_picker_entries(self):
manifest = dashboard.Manifest.from_json_obj({"bottles": {}, "agents": {}})
with mock.patch("bot_bottle.cli.dashboard._picker_modal", return_value=None) as picker:
status = dashboard._new_agent_flow(
None, manifest, {}, [], tmux_state=None, # type: ignore[arg-type]
)
picker.assert_called_once()
self.assertEqual([], picker.call_args.args[1])
self.assertIn("no agents configured", status)
class TestRunningCounts(unittest.TestCase):
"""Per-agent running-count surfaced in the picker so the
operator sees `(N running)` before picking. Counts come from
the dashboard's current `discover_active_agents` snapshot."""
def _agent(self, agent_name: str) -> dashboard.ActiveAgent:
return dashboard.ActiveAgent(
backend_name="docker",
slug=f"{agent_name}-abc",
agent_name=agent_name,
started_at="",
services=(),
)
def test_empty_when_no_active_agents(self):
self.assertEqual({}, dashboard._running_counts({}, []))
def test_one_per_unique_agent_name(self):
agents = [self._agent("a"), self._agent("b"), self._agent("c")]
self.assertEqual(
{"a": 1, "b": 1, "c": 1},
dashboard._running_counts({}, agents),
)
def test_counts_collisions(self):
agents = [
self._agent("implementer"),
self._agent("implementer"),
self._agent("researcher"),
]
self.assertEqual(
{"implementer": 2, "researcher": 1},
dashboard._running_counts({}, agents),
)
class TestSelectedAgent(unittest.TestCase):
"""`_selected_agent` is what chunk 4's e/p key handlers use to
decide whether to fire and which agent to target."""
def _agent(self, slug: str, services: tuple[str, ...] = ()) -> dashboard.ActiveAgent:
return dashboard.ActiveAgent(
backend_name="docker",
slug=slug, agent_name="x", started_at="", services=services,
)
def test_none_when_proposals_focused(self):
agents = [self._agent("a-1")]
self.assertIsNone(
dashboard._selected_agent(dashboard.PANE_PROPOSALS, agents, 0),
)
def test_none_when_no_agents(self):
self.assertIsNone(
dashboard._selected_agent(dashboard.PANE_AGENTS, [], 0),
)
def test_returns_indexed_agent_when_in_range(self):
agents = [self._agent("a-1"), self._agent("b-2")]
result = dashboard._selected_agent(dashboard.PANE_AGENTS, agents, 1)
self.assertIsNotNone(result)
assert result is not None # for type checker
self.assertEqual("b-2", result.slug)
def test_none_when_index_out_of_range(self):
agents = [self._agent("only")]
self.assertIsNone(
dashboard._selected_agent(dashboard.PANE_AGENTS, agents, 99),
)
class TestBottleForSlug(unittest.TestCase):
"""Re-attach target resolution (PRD 0020 chunk 3). Dashboard-
owned bottles return the stored handle as-is; non-owned bottles
get a synthesized DockerBottle backed by the slug-derived
container name."""
def test_owned_bottle_returns_held_handle(self):
sentinel = object()
bottles = {"dev-abc": (None, sentinel, "dev-abc")}
bottle, _ = dashboard._bottle_for_slug("dev-abc", bottles, None)
self.assertIs(sentinel, bottle)
def test_unowned_synthesizes_docker_bottle(self):
bottle, _ = dashboard._bottle_for_slug("dev-xyz", {}, None)
# The synth wraps the slug-derived container name.
self.assertEqual("bot-bottle-dev-xyz", bottle.name)
def test_unowned_without_manifest_omits_prompt_path(self):
bottle, hint = dashboard._bottle_for_slug("dev-xyz", {}, None)
self.assertEqual("", hint)
class TestPickNextAfterStop(unittest.TestCase):
"""After `x` stops a bottle, the dashboard slides focus to
the next agent the one filling the stopped row, or the
new last row if the stopped was last. Pure helper, easy
to unit-test."""
def _agent(self, slug: str) -> dashboard.ActiveAgent:
return dashboard.ActiveAgent(
backend_name="docker",
slug=slug, agent_name=slug, started_at="", services=(),
)
def test_empty_list_returns_none(self):
self.assertIsNone(
dashboard._pick_next_after_stop([], 0, "anything"),
)
def test_only_agent_being_stopped_returns_none(self):
# Stopping the last agent → nothing to focus.
agents = [self._agent("only")]
self.assertIsNone(
dashboard._pick_next_after_stop(agents, 0, "only"),
)
def test_middle_row_slides_up_to_same_index(self):
agents = [self._agent("a"), self._agent("b"), self._agent("c")]
# Cursor was on "b" at index 1; stopping "b" → "c" now sits
# at index 1 and takes focus.
out = dashboard._pick_next_after_stop(agents, 1, "b")
self.assertEqual((1, self._agent("c")), out)
def test_last_row_wraps_to_new_last(self):
agents = [self._agent("a"), self._agent("b"), self._agent("c")]
# Cursor on "c" at index 2; stopping "c" leaves a 2-agent
# list — index 2 is out of bounds, fall back to new last (1).
out = dashboard._pick_next_after_stop(agents, 2, "c")
self.assertEqual((1, self._agent("b")), out)
def test_first_row(self):
agents = [self._agent("a"), self._agent("b")]
out = dashboard._pick_next_after_stop(agents, 0, "a")
self.assertEqual((0, self._agent("b")), out)
def test_clamps_negative_selection(self):
# Defensive: a stale negative index doesn't crash.
agents = [self._agent("a"), self._agent("b")]
out = dashboard._pick_next_after_stop(agents, -1, "a")
self.assertEqual((0, self._agent("b")), out)
class TestTmuxPaneArgvBuilders(unittest.TestCase):
"""Pure argv builders for the tmux split-pane integration
(PRD 0021 chunk 2). The subprocess invocation itself is
environment-dependent; here we lock the wrapping shape so
a regression surfaces in CI without needing a real tmux."""
DOCKER_ARGV = [
"docker", "exec", "-it",
"bot-bottle-dev-abc",
"claude", "--dangerously-skip-permissions", "--continue",
]
def test_split_pane_argv_horizontal_with_pane_id_capture(self):
argv = dashboard._build_split_pane_argv(self.DOCKER_ARGV)
self.assertEqual(
["tmux", "split-window", "-h",
"-P", "-F", "#{pane_id}",
*self.DOCKER_ARGV],
argv,
)
def test_respawn_pane_argv_kills_existing_process(self):
argv = dashboard._build_respawn_pane_argv("%12", self.DOCKER_ARGV)
self.assertEqual(
["tmux", "respawn-pane", "-k", "-t", "%12", *self.DOCKER_ARGV],
argv,
)
def test_respawn_pane_argv_threads_pane_id_unmodified(self):
# Pane ids contain `%`; make sure we pass them straight
# through to `-t` without quoting or substitution surprises.
argv = dashboard._build_respawn_pane_argv("%abc.123", ["sh"])
self.assertIn("%abc.123", argv)
class TestResumeArgvWithFallback(unittest.TestCase):
"""The `claude --continue || claude` shell fallback for the
tmux re-attach path. Without it, an agent that's been spun
up but never typed at crashes the pane on Enter because
--continue has no session to resume."""
def _bottle(self, prompt_path: str | None = None):
from bot_bottle.backend.docker.bottle import DockerBottle
return DockerBottle(
container="bot-bottle-dev-abc",
teardown=lambda: None,
prompt_path_in_container=prompt_path,
)
def test_wraps_in_sh_c_with_or_fallback(self):
argv = dashboard._build_resume_argv_with_fallback(self._bottle())
# Must end with `sh -c '<cmd> --continue || <cmd>'`.
self.assertEqual(
["docker", "exec", "-it", "bot-bottle-dev-abc", "sh", "-c"],
argv[:6],
)
inner = argv[6]
self.assertIn("--continue", inner)
self.assertIn("||", inner)
# Both branches mention claude.
self.assertEqual(2, inner.count("claude"))
def test_inner_args_quoted_safely(self):
# Paths with spaces would break naive concatenation.
bottle = self._bottle("/home/with space/.prompt")
argv = dashboard._build_resume_argv_with_fallback(bottle)
inner = argv[-1]
# shlex.quote should single-quote any token with a space.
self.assertIn("'/home/with space/.prompt'", inner)
def test_includes_skip_permissions(self):
argv = dashboard._build_resume_argv_with_fallback(self._bottle())
self.assertIn("--dangerously-skip-permissions", argv[-1])
def test_includes_prompt_file_flag_when_set(self):
bottle = self._bottle("/home/node/.bot-bottle-prompt.txt")
argv = dashboard._build_resume_argv_with_fallback(bottle)
self.assertIn("--append-system-prompt-file", argv[-1])
self.assertIn("/home/node/.bot-bottle-prompt.txt", argv[-1])
class TestClaudeRuntimeArgs(unittest.TestCase):
"""The argv passed to `bottle.agent_argv` on each
attach. Locked here so the tmux + foreground paths build
identical agent invocations."""
def test_default_skip_permissions_only(self):
self.assertEqual(
["--dangerously-skip-permissions"],
dashboard._agent_runtime_args(resume=False),
)
def test_resume_appends_continue(self):
self.assertEqual(
["--dangerously-skip-permissions", "--continue"],
dashboard._agent_runtime_args(resume=True),
)
def test_remote_control(self):
args = dashboard._agent_runtime_args(
resume=False, remote_control=True,
)
self.assertIn("--remote-control", args)
class TestStopBottleFlow(unittest.TestCase):
"""Explicit per-bottle stop (PRD 0020 chunk 4). The non-owned
path is the one safe to test without curses + docker the
owned path drives `cm.__exit__` against a real launch context
and belongs in integration tests."""
def test_non_owned_returns_cleanup_hint(self):
# stdscr is None here on purpose — the non-owned branch
# returns before any curses call.
msg = dashboard._stop_bottle_flow(
stdscr=None, # type: ignore[arg-type]
bottles={},
slug="ghost-zzz",
)
self.assertIn("not dashboard-owned", msg)
self.assertIn("./cli.py cleanup", msg)
def test_non_owned_does_not_touch_tmux_state(self):
# PRD 0021: a stop on an unknown slug should never clear
# the right-pane occupant tracking, even if the slugs
# happen to match (defensive — non-owned can't be in the
# right pane via the dashboard's normal flow anyway).
tmux_state = {"pane_id": "%5", "slug": "live-bbb"}
dashboard._stop_bottle_flow(
stdscr=None, # type: ignore[arg-type]
bottles={},
slug="ghost-zzz",
tmux_state=tmux_state,
)
self.assertEqual({"pane_id": "%5", "slug": "live-bbb"}, tmux_state)
class TestOperatorEditFlowGuards(_FakeHomeMixin, unittest.TestCase):
"""Chunk-4 contract: the edit flow refuses when the selected
agent doesn't have the required sidecar running. The discover-
and-prompt scaffolding is gone, so the gating happens here
instead of in the key handler."""
def setUp(self) -> None:
self._setup_fake_home()
def tearDown(self) -> None:
self._teardown_fake_home()
def _agent(self, services: tuple[str, ...]) -> dashboard.ActiveAgent:
return dashboard.ActiveAgent(
backend_name="docker",
slug="dev-abc12",
agent_name="impl",
started_at="",
services=services,
)
def test_routes_edit_refuses_without_egress(self):
# Bottle without bottle.egress.routes → no egress sidecar.
msg = dashboard._operator_edit_flow(
stdscr=None, # type: ignore[arg-type]
agent=self._agent(("pipelock", "supervise")),
required_service="egress",
label="routes",
fetch=lambda _: "x",
apply=lambda _slug, _content: None,
suffix=".yaml",
)
self.assertIn("no running egress sidecar", msg)
self.assertIn("dev-abc12", msg)
def test_pipelock_edit_refuses_when_pipelock_missing(self):
# Belt-and-braces — pipelock should always be there, but
# the race window between `compose up` and `docker ps`
# update is real.
msg = dashboard._operator_edit_flow(
stdscr=None, # type: ignore[arg-type]
agent=self._agent(()),
required_service="pipelock",
label="pipelock",
fetch=lambda _: "x",
apply=lambda _slug, _content: None,
suffix=".txt",
)
self.assertIn("no running pipelock sidecar", msg)
if __name__ == "__main__":
unittest.main()
-39
View File
@@ -1,39 +0,0 @@
"""Unit: dashboard's new-proposal highlight window.
The curses rendering itself is exercised manually; this isolates
the pure decision `is the proposal still in its post-arrival
highlight window?`"""
import unittest
from bot_bottle.cli import dashboard
class TestIsRecent(unittest.TestCase):
def test_just_seen_is_recent(self):
self.assertTrue(dashboard._is_recent("p1", {"p1": 100.0}, now=100.5))
def test_seen_within_window(self):
# Default window is 5s.
self.assertTrue(
dashboard._is_recent("p1", {"p1": 100.0}, now=104.9),
)
def test_seen_past_window_is_not_recent(self):
self.assertFalse(
dashboard._is_recent("p1", {"p1": 100.0}, now=106.0),
)
def test_unknown_proposal_is_not_recent(self):
self.assertFalse(
dashboard._is_recent("p2", {"p1": 100.0}, now=100.5),
)
def test_none_args_safe_default(self):
self.assertFalse(dashboard._is_recent("p1", None, None))
self.assertFalse(dashboard._is_recent("p1", {"p1": 100.0}, None))
self.assertFalse(dashboard._is_recent("p1", None, 100.5))
if __name__ == "__main__":
unittest.main()
-94
View File
@@ -1,94 +0,0 @@
"""Unit: dashboard_model — state/model layer extracted from dashboard.py.
Tests for functions that were previously buried in the 2103-line
dashboard.py and had no coverage: _approval_status,
_proposed_payload_label, and _suffix_for_tool."""
import unittest
from pathlib import Path
from bot_bottle.cli.dashboard_model import (
QueuedProposal,
_approval_status,
_proposed_payload_label,
_suffix_for_tool,
)
from bot_bottle.supervise import (
Proposal,
TOOL_CAPABILITY_BLOCK,
TOOL_EGRESS_BLOCK,
TOOL_PIPELOCK_BLOCK,
sha256_hex,
)
from datetime import datetime, timezone
def _qp(tool: str, slug: str = "dev") -> QueuedProposal:
payload = "x"
p = Proposal.new(
bottle_slug=slug,
tool=tool,
proposed_file=payload,
justification="test",
current_file_hash=sha256_hex(payload),
now=datetime(2026, 6, 1, 0, 0, 0, tzinfo=timezone.utc),
)
return QueuedProposal(proposal=p, queue_dir=Path("/tmp/q"))
class TestApprovalStatus(unittest.TestCase):
def test_egress_block_base_message(self):
qp = _qp(TOOL_EGRESS_BLOCK, slug="my-bot")
msg = _approval_status(qp, "approved")
self.assertEqual("approved egress-block for [my-bot]", msg)
def test_modified_verb(self):
qp = _qp(TOOL_PIPELOCK_BLOCK, slug="dev")
msg = _approval_status(qp, "modified+approved")
self.assertEqual("modified+approved pipelock-block for [dev]", msg)
def test_capability_block_appends_resume_hint(self):
qp = _qp(TOOL_CAPABILITY_BLOCK, slug="alpha")
msg = _approval_status(qp, "approved")
self.assertIn("resume: ./cli.py resume alpha", msg)
self.assertIn("approved capability-block for [alpha]", msg)
def test_egress_block_has_no_resume_hint(self):
qp = _qp(TOOL_EGRESS_BLOCK)
self.assertNotIn("resume", _approval_status(qp, "approved"))
def test_pipelock_block_has_no_resume_hint(self):
qp = _qp(TOOL_PIPELOCK_BLOCK)
self.assertNotIn("resume", _approval_status(qp, "approved"))
class TestProposedPayloadLabel(unittest.TestCase):
def test_pipelock_returns_failed_url(self):
self.assertEqual("failed URL", _proposed_payload_label(TOOL_PIPELOCK_BLOCK))
def test_egress_returns_proposed_file(self):
self.assertEqual("proposed file", _proposed_payload_label(TOOL_EGRESS_BLOCK))
def test_capability_returns_proposed_file(self):
self.assertEqual("proposed file", _proposed_payload_label(TOOL_CAPABILITY_BLOCK))
def test_unknown_tool_returns_proposed_file(self):
self.assertEqual("proposed file", _proposed_payload_label("unknown-tool"))
class TestSuffixForTool(unittest.TestCase):
def test_capability_block_returns_dockerfile_suffix(self):
self.assertEqual(".dockerfile", _suffix_for_tool(TOOL_CAPABILITY_BLOCK))
def test_egress_block_returns_txt(self):
self.assertEqual(".txt", _suffix_for_tool(TOOL_EGRESS_BLOCK))
def test_pipelock_block_returns_txt(self):
self.assertEqual(".txt", _suffix_for_tool(TOOL_PIPELOCK_BLOCK))
def test_unknown_tool_returns_txt(self):
self.assertEqual(".txt", _suffix_for_tool("whatever"))
if __name__ == "__main__":
unittest.main()
@@ -1,12 +1,12 @@
"""Unit: dashboard headless paths (PRD 0013 phase 4, PRD 0014).
"""Unit: supervise headless paths (PRD 0013 phase 4, PRD 0014).
The curses TUI itself isn't exercised here — these tests cover the
discovery + approve/reject + audit-write paths that the TUI's key
handlers call into.
apply_routes_change is stubbed at the dashboard module level so the
tests don't need a running cred-proxy sidecar; the real docker
exec/cp/SIGHUP plumbing is covered by the integration test.
add_route is stubbed at the supervise CLI module level so the tests
don't need a running egress sidecar; the real docker exec/cp/SIGHUP
plumbing is covered by the integration test.
"""
import os
@@ -19,7 +19,7 @@ from bot_bottle import supervise
from bot_bottle.backend.docker.capability_apply import CapabilityApplyError
from bot_bottle.backend.docker.egress_apply import EgressApplyError
from bot_bottle.backend.docker.pipelock_apply import PipelockApplyError
from bot_bottle.cli import dashboard
from bot_bottle.cli import supervise as supervise_cli
from bot_bottle.supervise import (
Proposal,
STATUS_APPROVED,
@@ -61,7 +61,7 @@ class _FakeHomeMixin:
"""Patch supervise.bot_bottle_root to a temp dir for the test."""
def _setup_fake_home(self):
self._tmp = tempfile.TemporaryDirectory(prefix="dashboard-test.")
self._tmp = tempfile.TemporaryDirectory(prefix="supervise-test.")
original = supervise.bot_bottle_root
def fake_root() -> Path:
@@ -83,14 +83,14 @@ class TestDiscoverPending(_FakeHomeMixin, unittest.TestCase):
self._teardown_fake_home()
def test_empty_when_no_queues(self):
self.assertEqual([], dashboard.discover_pending())
self.assertEqual([], supervise_cli.discover_pending())
def test_walks_all_slug_subdirs(self):
for slug in ("dev", "api"):
qdir = supervise.queue_dir_for_slug(slug)
qdir.mkdir(parents=True)
supervise.write_proposal(qdir, _proposal(slug=slug))
pending = dashboard.discover_pending()
pending = supervise_cli.discover_pending()
self.assertEqual({"dev", "api"}, {qp.proposal.bottle_slug for qp in pending})
def test_sorted_by_arrival_across_bottles(self):
@@ -110,7 +110,7 @@ class TestDiscoverPending(_FakeHomeMixin, unittest.TestCase):
qdir = supervise.queue_dir_for_slug(p.bottle_slug)
qdir.mkdir(parents=True, exist_ok=True)
supervise.write_proposal(qdir, p)
pending = dashboard.discover_pending()
pending = supervise_cli.discover_pending()
self.assertEqual([early.id, late.id], [qp.proposal.id for qp in pending])
def test_excludes_already_responded(self):
@@ -121,34 +121,34 @@ class TestDiscoverPending(_FakeHomeMixin, unittest.TestCase):
supervise.write_response(qdir, supervise.Response(
proposal_id=p.id, status=STATUS_APPROVED, notes="",
))
self.assertEqual([], dashboard.discover_pending())
self.assertEqual([], supervise_cli.discover_pending())
class TestApproveReject(_FakeHomeMixin, unittest.TestCase):
def setUp(self):
self._setup_fake_home()
self._original_add_route = dashboard.add_route
self._original_apply_allowlist = dashboard.apply_allowlist_change
self._original_fetch_allowlist = dashboard.fetch_current_allowlist
self._original_apply_capability = dashboard.apply_capability_change
self._original_add_route = supervise_cli.add_route
self._original_apply_allowlist = supervise_cli.apply_allowlist_change
self._original_fetch_allowlist = supervise_cli.fetch_current_allowlist
self._original_apply_capability = supervise_cli.apply_capability_change
# Default stubs: succeed with deterministic before/after so the
# audit log shows a non-empty diff.
dashboard.add_route = lambda slug, content: (
supervise_cli.add_route = lambda slug, content: (
'{"routes": []}\n', '{"routes": [{"host": "x"}]}\n',
)
dashboard.apply_allowlist_change = lambda slug, content: (
supervise_cli.apply_allowlist_change = lambda slug, content: (
"old.example\n", content,
)
dashboard.fetch_current_allowlist = lambda slug: "old.example\n"
dashboard.apply_capability_change = lambda slug, content: (
supervise_cli.fetch_current_allowlist = lambda slug: "old.example\n"
supervise_cli.apply_capability_change = lambda slug, content: (
"FROM old\n", content,
)
def tearDown(self):
dashboard.add_route = self._original_add_route
dashboard.apply_allowlist_change = self._original_apply_allowlist
dashboard.fetch_current_allowlist = self._original_fetch_allowlist
dashboard.apply_capability_change = self._original_apply_capability
supervise_cli.add_route = self._original_add_route
supervise_cli.apply_allowlist_change = self._original_apply_allowlist
supervise_cli.fetch_current_allowlist = self._original_fetch_allowlist
supervise_cli.apply_capability_change = self._original_apply_capability
self._teardown_fake_home()
def _enqueue(self, tool: str = TOOL_EGRESS_BLOCK):
@@ -156,11 +156,11 @@ class TestApproveReject(_FakeHomeMixin, unittest.TestCase):
qdir = supervise.queue_dir_for_slug("dev")
qdir.mkdir(parents=True, exist_ok=True)
supervise.write_proposal(qdir, p)
return dashboard.QueuedProposal(proposal=p, queue_dir=qdir)
return supervise_cli.QueuedProposal(proposal=p, queue_dir=qdir)
def test_approve_writes_response_and_audit(self):
qp = self._enqueue()
dashboard.approve(qp)
supervise_cli.approve(qp)
resp = read_response(qp.queue_dir, qp.proposal.id)
self.assertEqual(STATUS_APPROVED, resp.status)
self.assertIsNone(resp.final_file)
@@ -170,7 +170,7 @@ class TestApproveReject(_FakeHomeMixin, unittest.TestCase):
def test_approve_with_final_file_marks_modified(self):
qp = self._enqueue()
dashboard.approve(qp, final_file='{"routes": [{"path": "/x/"}]}\n', notes="tweaked")
supervise_cli.approve(qp, final_file='{"routes": [{"path": "/x/"}]}\n', notes="tweaked")
resp = read_response(qp.queue_dir, qp.proposal.id)
self.assertEqual(STATUS_MODIFIED, resp.status)
self.assertEqual('{"routes": [{"path": "/x/"}]}\n', resp.final_file)
@@ -180,7 +180,7 @@ class TestApproveReject(_FakeHomeMixin, unittest.TestCase):
def test_reject_writes_rejection(self):
qp = self._enqueue()
dashboard.reject(qp, reason="nope")
supervise_cli.reject(qp, reason="nope")
resp = read_response(qp.queue_dir, qp.proposal.id)
self.assertEqual(STATUS_REJECTED, resp.status)
self.assertEqual("nope", resp.notes)
@@ -190,7 +190,7 @@ class TestApproveReject(_FakeHomeMixin, unittest.TestCase):
def test_capability_block_skips_audit_log(self):
qp = self._enqueue(tool=TOOL_CAPABILITY_BLOCK)
dashboard.approve(qp)
supervise_cli.approve(qp)
# No audit log for capability-block (per PRD 0013 / 0016).
# cred-proxy and pipelock logs both empty.
self.assertEqual([], read_audit_entries("egress", "dev"))
@@ -198,7 +198,7 @@ class TestApproveReject(_FakeHomeMixin, unittest.TestCase):
def test_pipelock_audit_distinct_from_egress(self):
qp = self._enqueue(tool=TOOL_PIPELOCK_BLOCK)
dashboard.approve(qp)
supervise_cli.approve(qp)
self.assertEqual(1, len(read_audit_entries("pipelock", "dev")))
self.assertEqual(0, len(read_audit_entries("egress", "dev")))
@@ -210,10 +210,10 @@ class TestEgressApplyWiring(_FakeHomeMixin, unittest.TestCase):
def setUp(self):
self._setup_fake_home()
self._original_add_route = dashboard.add_route
self._original_add_route = supervise_cli.add_route
def tearDown(self):
dashboard.add_route = self._original_add_route
supervise_cli.add_route = self._original_add_route
self._teardown_fake_home()
def _enqueue_egress(self, proposed: str = '{"host": "x.example"}\n'):
@@ -227,17 +227,17 @@ class TestEgressApplyWiring(_FakeHomeMixin, unittest.TestCase):
qdir = supervise.queue_dir_for_slug("dev")
qdir.mkdir(parents=True, exist_ok=True)
supervise.write_proposal(qdir, p)
return dashboard.QueuedProposal(proposal=p, queue_dir=qdir)
return supervise_cli.QueuedProposal(proposal=p, queue_dir=qdir)
def test_egress_block_calls_add_route_with_proposed_json(self):
calls = []
dashboard.add_route = lambda slug, content: (
supervise_cli.add_route = lambda slug, content: (
calls.append((slug, content)) or ("before", "after")
)
qp = self._enqueue_egress(
proposed='{"host": "new.example", "path_allowlist": ["/x/"]}\n'
)
dashboard.approve(qp)
supervise_cli.approve(qp)
self.assertEqual(1, len(calls))
slug, content = calls[0]
self.assertEqual("dev", slug)
@@ -250,11 +250,11 @@ class TestEgressApplyWiring(_FakeHomeMixin, unittest.TestCase):
def test_modify_passes_final_file_to_add_route(self):
calls = []
dashboard.add_route = lambda slug, content: (
supervise_cli.add_route = lambda slug, content: (
calls.append(content) or ("before", "after")
)
qp = self._enqueue_egress()
dashboard.approve(
supervise_cli.approve(
qp,
final_file='{"host": "edited.example"}\n',
notes="tweaked",
@@ -262,12 +262,12 @@ class TestEgressApplyWiring(_FakeHomeMixin, unittest.TestCase):
self.assertEqual(['{"host": "edited.example"}\n'], calls)
def test_apply_failure_blocks_response_and_audit(self):
dashboard.add_route = lambda slug, content: (_ for _ in ()).throw(
supervise_cli.add_route = lambda slug, content: (_ for _ in ()).throw(
EgressApplyError("docker exec failed")
)
qp = self._enqueue_egress()
with self.assertRaises(EgressApplyError):
dashboard.approve(qp)
supervise_cli.approve(qp)
# No response file (proposal stays pending).
self.assertEqual(
[qp.proposal.id],
@@ -277,25 +277,20 @@ class TestEgressApplyWiring(_FakeHomeMixin, unittest.TestCase):
self.assertEqual([], read_audit_entries("egress", "dev"))
def test_real_diff_lands_in_audit(self):
dashboard.add_route = lambda slug, content: (
supervise_cli.add_route = lambda slug, content: (
'{"routes": []}\n', # before
'{"routes": [{"host": "new.example"}]}\n', # after
)
qp = self._enqueue_egress(proposed='{"host": "new.example"}\n')
dashboard.approve(qp)
supervise_cli.approve(qp)
entries = read_audit_entries("egress", "dev")
self.assertEqual(1, len(entries))
self.assertIn('+{"routes": [{"host": "new.example"}]}', entries[0].diff)
self.assertIn('-{"routes": []}', entries[0].diff)
def test_reject_does_not_call_apply(self):
called = []
dashboard.apply_routes_change = lambda slug, content: (
called.append(True) or ("", content)
)
qp = self._enqueue_egress()
dashboard.reject(qp, reason="no thanks")
self.assertEqual([], called)
supervise_cli.reject(qp, reason="no thanks")
# Reject still writes a response + audit entry with empty diff.
resp = read_response(qp.queue_dir, qp.proposal.id)
self.assertEqual(STATUS_REJECTED, resp.status)
@@ -306,18 +301,18 @@ class TestEgressApplyWiring(_FakeHomeMixin, unittest.TestCase):
class TestPipelockApplyWiring(_FakeHomeMixin, unittest.TestCase):
"""PRD 0015 Phase 2 + PR #25 follow-up: approve() on a
pipelock-block proposal carries the failed URL; the dashboard
pipelock-block proposal carries the failed URL; the supervise TUI
extracts the host, merges it into the running allowlist, and
calls apply_allowlist_change with the merged content."""
def setUp(self):
self._setup_fake_home()
self._original_apply = dashboard.apply_allowlist_change
self._original_fetch = dashboard.fetch_current_allowlist
self._original_apply = supervise_cli.apply_allowlist_change
self._original_fetch = supervise_cli.fetch_current_allowlist
def tearDown(self):
dashboard.apply_allowlist_change = self._original_apply
dashboard.fetch_current_allowlist = self._original_fetch
supervise_cli.apply_allowlist_change = self._original_apply
supervise_cli.fetch_current_allowlist = self._original_fetch
self._teardown_fake_home()
def _enqueue_pipelock(self, failed_url: str = "https://api.github.com/repos/foo/bar"):
@@ -331,17 +326,17 @@ class TestPipelockApplyWiring(_FakeHomeMixin, unittest.TestCase):
qdir = supervise.queue_dir_for_slug("dev")
qdir.mkdir(parents=True, exist_ok=True)
supervise.write_proposal(qdir, p)
return dashboard.QueuedProposal(proposal=p, queue_dir=qdir)
return supervise_cli.QueuedProposal(proposal=p, queue_dir=qdir)
def test_url_host_merged_into_current_allowlist(self):
dashboard.fetch_current_allowlist = lambda slug: "existing.example\n"
supervise_cli.fetch_current_allowlist = lambda slug: "existing.example\n"
applied = []
dashboard.apply_allowlist_change = lambda slug, content: (
supervise_cli.apply_allowlist_change = lambda slug, content: (
applied.append((slug, content))
or ("existing.example\n", content)
)
qp = self._enqueue_pipelock("https://api.github.com/repos/foo/bar")
dashboard.approve(qp)
supervise_cli.approve(qp)
# apply_allowlist_change was called with the merged content:
# existing host + the URL's host (no path, since pipelock is
# hostname-only).
@@ -353,27 +348,27 @@ class TestPipelockApplyWiring(_FakeHomeMixin, unittest.TestCase):
self.assertNotIn("/repos/foo/bar", content) # path stripped
def test_host_already_in_allowlist_is_idempotent(self):
dashboard.fetch_current_allowlist = lambda slug: "api.github.com\n"
supervise_cli.fetch_current_allowlist = lambda slug: "api.github.com\n"
applied = []
dashboard.apply_allowlist_change = lambda slug, content: (
supervise_cli.apply_allowlist_change = lambda slug, content: (
applied.append(content)
or ("api.github.com\n", content)
)
qp = self._enqueue_pipelock("https://api.github.com/some/path")
dashboard.approve(qp)
supervise_cli.approve(qp)
# Still applied, but the content is unchanged from current —
# before/after diff is empty.
self.assertEqual(1, len(applied))
self.assertEqual("api.github.com\n", applied[0])
def test_apply_failure_blocks_response_and_audit(self):
dashboard.fetch_current_allowlist = lambda slug: "existing.example\n"
dashboard.apply_allowlist_change = lambda slug, content: (_ for _ in ()).throw(
supervise_cli.fetch_current_allowlist = lambda slug: "existing.example\n"
supervise_cli.apply_allowlist_change = lambda slug, content: (_ for _ in ()).throw(
PipelockApplyError("docker exec failed")
)
qp = self._enqueue_pipelock()
with self.assertRaises(PipelockApplyError):
dashboard.approve(qp)
supervise_cli.approve(qp)
self.assertEqual(
[qp.proposal.id],
[p.id for p in supervise.list_pending_proposals(qp.queue_dir)],
@@ -381,12 +376,12 @@ class TestPipelockApplyWiring(_FakeHomeMixin, unittest.TestCase):
self.assertEqual([], read_audit_entries("pipelock", "dev"))
def test_url_without_host_raises(self):
dashboard.fetch_current_allowlist = lambda slug: ""
supervise_cli.fetch_current_allowlist = lambda slug: ""
# supervise_server's validator would catch this; if a broken
# URL ever makes it through, the dashboard surfaces it too.
# URL ever makes it through, the supervise TUI surfaces it too.
qp = self._enqueue_pipelock("https:///nohost")
with self.assertRaises(PipelockApplyError):
dashboard.approve(qp)
supervise_cli.approve(qp)
class TestCapabilityApplyWiring(_FakeHomeMixin, unittest.TestCase):
@@ -397,10 +392,10 @@ class TestCapabilityApplyWiring(_FakeHomeMixin, unittest.TestCase):
def setUp(self):
self._setup_fake_home()
self._original = dashboard.apply_capability_change
self._original = supervise_cli.apply_capability_change
def tearDown(self):
dashboard.apply_capability_change = self._original
supervise_cli.apply_capability_change = self._original
self._teardown_fake_home()
def _enqueue_capability(self, proposed: str = "FROM python:3.13\nRUN apk add ripgrep\n"):
@@ -414,112 +409,50 @@ class TestCapabilityApplyWiring(_FakeHomeMixin, unittest.TestCase):
qdir = supervise.queue_dir_for_slug("dev")
qdir.mkdir(parents=True, exist_ok=True)
supervise.write_proposal(qdir, p)
return dashboard.QueuedProposal(proposal=p, queue_dir=qdir)
return supervise_cli.QueuedProposal(proposal=p, queue_dir=qdir)
def test_capability_block_calls_apply_with_proposed_file(self):
calls = []
dashboard.apply_capability_change = lambda slug, content: (
supervise_cli.apply_capability_change = lambda slug, content: (
calls.append((slug, content)) or ("FROM old\n", content)
)
qp = self._enqueue_capability("FROM bookworm\n")
dashboard.approve(qp)
supervise_cli.approve(qp)
self.assertEqual([("dev", "FROM bookworm\n")], calls)
def test_apply_failure_blocks_response_and_keeps_pending(self):
dashboard.apply_capability_change = lambda slug, content: (_ for _ in ()).throw(
supervise_cli.apply_capability_change = lambda slug, content: (_ for _ in ()).throw(
CapabilityApplyError("teardown failed")
)
qp = self._enqueue_capability()
with self.assertRaises(CapabilityApplyError):
dashboard.approve(qp)
supervise_cli.approve(qp)
self.assertEqual(
[qp.proposal.id],
[p.id for p in supervise.list_pending_proposals(qp.queue_dir)],
)
def test_no_audit_log_for_capability(self):
dashboard.apply_capability_change = lambda slug, content: ("FROM old\n", content)
supervise_cli.apply_capability_change = lambda slug, content: ("FROM old\n", content)
qp = self._enqueue_capability()
dashboard.approve(qp)
supervise_cli.approve(qp)
# capability-block has no audit log per PRD 0013 — its record
# lives in the per-bottle Dockerfile + transcript state.
self.assertEqual([], read_audit_entries("egress", "dev"))
self.assertEqual([], read_audit_entries("pipelock", "dev"))
def test_proposal_archived_after_apply(self):
dashboard.apply_capability_change = lambda slug, content: ("FROM old\n", content)
supervise_cli.apply_capability_change = lambda slug, content: ("FROM old\n", content)
qp = self._enqueue_capability()
dashboard.approve(qp)
supervise_cli.approve(qp)
# Sidecar would normally archive after delivering the response,
# but it's gone by then. The dashboard archives so
# but it's gone by then. The supervise TUI archives so
# discover_pending stops surfacing the resolved proposal.
self.assertEqual([], supervise.list_pending_proposals(qp.queue_dir))
processed = list((qp.queue_dir / "processed").glob("*.json"))
self.assertEqual(2, len(processed))
class TestOperatorEditRoutes(_FakeHomeMixin, unittest.TestCase):
"""PRD 0014 Phase 4: operator-initiated routes edit (not gated
on a pending proposal)."""
def setUp(self):
self._setup_fake_home()
self._original_apply = dashboard.apply_routes_change
def tearDown(self):
dashboard.apply_routes_change = self._original_apply
self._teardown_fake_home()
def test_writes_audit_with_operator_edit_action(self):
dashboard.apply_routes_change = lambda slug, content: (
'{"routes": []}\n', content,
)
dashboard.operator_edit_routes("dev", '{"routes": [{"path": "/x/"}]}\n')
entries = read_audit_entries("egress", "dev")
self.assertEqual(1, len(entries))
self.assertEqual(supervise.ACTION_OPERATOR_EDIT, entries[0].operator_action)
self.assertEqual("", entries[0].justification)
self.assertIn("+", entries[0].diff)
def test_failure_does_not_write_audit(self):
dashboard.apply_routes_change = lambda slug, content: (_ for _ in ()).throw(
EgressApplyError("nope")
)
with self.assertRaises(EgressApplyError):
dashboard.operator_edit_routes("dev", '{"routes": []}\n')
self.assertEqual([], read_audit_entries("egress", "dev"))
class TestOperatorEditAllowlist(_FakeHomeMixin, unittest.TestCase):
"""PRD 0015 Phase 3: operator-initiated pipelock allowlist edit."""
def setUp(self):
self._setup_fake_home()
self._original = dashboard.apply_allowlist_change
def tearDown(self):
dashboard.apply_allowlist_change = self._original
self._teardown_fake_home()
def test_writes_audit_with_operator_edit_action(self):
dashboard.apply_allowlist_change = lambda slug, content: (
"old.example\n", content,
)
dashboard.operator_edit_allowlist("dev", "old.example\nnew.example\n")
entries = read_audit_entries("pipelock", "dev")
self.assertEqual(1, len(entries))
self.assertEqual(supervise.ACTION_OPERATOR_EDIT, entries[0].operator_action)
self.assertIn("+new.example", entries[0].diff)
def test_failure_does_not_write_audit(self):
dashboard.apply_allowlist_change = lambda slug, content: (_ for _ in ()).throw(
PipelockApplyError("nope")
)
with self.assertRaises(PipelockApplyError):
dashboard.operator_edit_allowlist("dev", "x.example\n")
self.assertEqual([], read_audit_entries("pipelock", "dev"))
class TestEditInEditor(unittest.TestCase):
def test_runs_editor_returns_edited_content(self):
# Fake "editor" is /bin/sh -c 'cat <<EOF > $1 ... EOF'
@@ -544,7 +477,7 @@ class TestEditInEditor(unittest.TestCase):
os.chmod(editor_script, 0o755)
os.environ["EDITOR"] = editor_script
try:
result = dashboard.edit_in_editor("original")
result = supervise_cli.edit_in_editor("original")
self.assertEqual("edited", result)
finally:
os.unlink(editor_script)
@@ -566,7 +499,7 @@ class TestEditInEditor(unittest.TestCase):
os.chmod(editor_script, 0o755)
os.environ["EDITOR"] = editor_script
try:
result = dashboard.edit_in_editor("original")
result = supervise_cli.edit_in_editor("original")
self.assertIsNone(result)
finally:
os.unlink(editor_script)
@@ -583,19 +516,19 @@ class TestCapabilityBlockSmolmachinesGuard(_FakeHomeMixin, unittest.TestCase):
def setUp(self):
self._setup_fake_home()
self._original_apply_capability = dashboard.apply_capability_change
dashboard.apply_capability_change = lambda slug, content: ("", content)
self._original_apply_capability = supervise_cli.apply_capability_change
supervise_cli.apply_capability_change = lambda slug, content: ("", content)
def tearDown(self):
dashboard.apply_capability_change = self._original_apply_capability
supervise_cli.apply_capability_change = self._original_apply_capability
self._teardown_fake_home()
def _enqueue_capability(self, slug: str = "dev") -> "dashboard.QueuedProposal":
def _enqueue_capability(self, slug: str = "dev") -> "supervise_cli.QueuedProposal":
p = _proposal(slug=slug, tool=TOOL_CAPABILITY_BLOCK)
qdir = supervise.queue_dir_for_slug(slug)
qdir.mkdir(parents=True, exist_ok=True)
supervise.write_proposal(qdir, p)
return dashboard.QueuedProposal(proposal=p, queue_dir=qdir)
return supervise_cli.QueuedProposal(proposal=p, queue_dir=qdir)
def _write_metadata(self, slug: str, compose_project: str) -> None:
from bot_bottle.backend.docker.bottle_state import BottleMetadata, write_metadata
@@ -612,18 +545,18 @@ class TestCapabilityBlockSmolmachinesGuard(_FakeHomeMixin, unittest.TestCase):
self._write_metadata("dev", compose_project="")
qp = self._enqueue_capability("dev")
with self.assertRaises(CapabilityApplyError) as ctx:
dashboard.approve(qp)
supervise_cli.approve(qp)
self.assertIn("smolmachines", str(ctx.exception))
def test_docker_bottle_calls_apply_capability_change(self):
self._write_metadata("dev", compose_project="bot-bottle-dev")
qp = self._enqueue_capability("dev")
dashboard.approve(qp) # must not raise
supervise_cli.approve(qp) # must not raise
def test_no_metadata_falls_through_to_docker_path(self):
# No metadata at all → assume Docker (backward-compatible).
qp = self._enqueue_capability("dev")
dashboard.approve(qp) # must not raise
supervise_cli.approve(qp) # must not raise
if __name__ == "__main__":
@@ -1,6 +1,6 @@
"""Unit: dashboard launch/crash failure logging (issue #100).
"""Unit: supervise launch/crash failure logging (issue #100).
The dashboard runs under curses, so anything written to stderr while the
The supervise TUI runs under curses, so anything written to stderr while the
TUI owns the terminal is wiped when the terminal is restored. These
tests lock the recovery paths: a config error (`Die`) is re-surfaced
after the wrapper returns, and an unexpected crash is persisted to a
@@ -17,7 +17,7 @@ from pathlib import Path
from unittest import mock
from bot_bottle import supervise
from bot_bottle.cli import dashboard
from bot_bottle.cli import supervise as supervise_cli
from bot_bottle.log import Die, die
@@ -44,7 +44,7 @@ class _FakeHomeMixin:
~/.bot-bottle."""
def _setup_fake_home(self):
self._tmp = tempfile.TemporaryDirectory(prefix="dash-crash-test.")
self._tmp = tempfile.TemporaryDirectory(prefix="supervise-crash-test.")
self._orig_root = supervise.bot_bottle_root
self._root = Path(self._tmp.name) / ".bot-bottle"
supervise.bot_bottle_root = lambda: self._root # type: ignore[assignment]
@@ -54,7 +54,7 @@ class _FakeHomeMixin:
self._tmp.cleanup()
class TestCmdDashboardErrorPaths(_FakeHomeMixin, unittest.TestCase):
class TestCmdSuperviseErrorPaths(_FakeHomeMixin, unittest.TestCase):
def setUp(self):
self._setup_fake_home()
@@ -63,42 +63,42 @@ class TestCmdDashboardErrorPaths(_FakeHomeMixin, unittest.TestCase):
def test_keyboard_interrupt_returns_130(self):
with mock.patch.object(
dashboard.curses, "wrapper", side_effect=KeyboardInterrupt
supervise_cli.curses, "wrapper", side_effect=KeyboardInterrupt
):
self.assertEqual(130, dashboard.cmd_dashboard([]))
self.assertEqual(130, supervise_cli.cmd_supervise([]))
def test_die_resurfaces_message_after_curses(self):
buf = io.StringIO()
with mock.patch.object(
dashboard.curses, "wrapper",
supervise_cli.curses, "wrapper",
side_effect=Die(1, "manifest parse error at line 3"),
):
with contextlib.redirect_stderr(buf):
rc = dashboard.cmd_dashboard([])
rc = supervise_cli.cmd_supervise([])
self.assertEqual(1, rc)
self.assertIn("manifest parse error at line 3", buf.getvalue())
def test_die_without_message_has_fallback(self):
buf = io.StringIO()
with mock.patch.object(dashboard.curses, "wrapper", side_effect=Die(1)):
with mock.patch.object(supervise_cli.curses, "wrapper", side_effect=Die(1)):
with contextlib.redirect_stderr(buf):
rc = dashboard.cmd_dashboard([])
rc = supervise_cli.cmd_supervise([])
self.assertEqual(1, rc)
self.assertIn("fatal error", buf.getvalue())
def test_unexpected_exception_writes_crash_log(self):
buf = io.StringIO()
with mock.patch.object(
dashboard.curses, "wrapper",
supervise_cli.curses, "wrapper",
side_effect=ValueError("kaboom in render"),
):
with contextlib.redirect_stderr(buf):
rc = dashboard.cmd_dashboard([])
rc = supervise_cli.cmd_supervise([])
self.assertEqual(1, rc)
out = buf.getvalue()
self.assertIn("dashboard crashed: ValueError: kaboom in render", out)
self.assertIn("supervise crashed: ValueError: kaboom in render", out)
self.assertIn("full traceback written to", out)
log_path = self._root / "logs" / "dashboard-crash.log"
log_path = self._root / "logs" / "supervise-crash.log"
self.assertTrue(log_path.exists())
content = log_path.read_text()
self.assertIn("kaboom in render", content)
@@ -116,10 +116,10 @@ class TestWriteCrashLog(_FakeHomeMixin, unittest.TestCase):
try:
raise RuntimeError("explode")
except RuntimeError as e:
path = dashboard._write_crash_log(e)
self.assertEqual(self._root / "logs" / "dashboard-crash.log", path)
path = supervise_cli._write_crash_log(e)
self.assertEqual(self._root / "logs" / "supervise-crash.log", path)
text = path.read_text()
self.assertIn("=== dashboard crash", text)
self.assertIn("=== supervise crash", text)
self.assertIn("RuntimeError: explode", text)
def test_falls_back_to_tempfile_when_home_unwritable(self):
@@ -131,7 +131,7 @@ class TestWriteCrashLog(_FakeHomeMixin, unittest.TestCase):
try:
raise RuntimeError("explode2")
except RuntimeError as e:
path = dashboard._write_crash_log(e)
path = supervise_cli._write_crash_log(e)
self.assertTrue(path.exists())
self.assertIn("explode2", path.read_text())
@@ -1,4 +1,4 @@
"""Unit: dashboard's detail-view line builder.
"""Unit: supervise's detail-view line builder.
_detail_lines returns (text, attr) tuples. Most are plain; for
pipelock-block proposals it appends a "→ would allow host: <host>"
@@ -8,7 +8,7 @@ which hostname will land in pipelock's allowlist on approval."""
import unittest
from bot_bottle import supervise
from bot_bottle.cli import dashboard
from bot_bottle.cli import supervise as supervise_cli
from bot_bottle.supervise import (
Proposal,
TOOL_CAPABILITY_BLOCK,
@@ -18,7 +18,7 @@ from bot_bottle.supervise import (
)
def _qp(tool: str, payload: str) -> dashboard.QueuedProposal:
def _qp(tool: str, payload: str) -> supervise_cli.QueuedProposal:
from datetime import datetime, timezone
from pathlib import Path
p = Proposal.new(
@@ -29,14 +29,14 @@ def _qp(tool: str, payload: str) -> dashboard.QueuedProposal:
current_file_hash=sha256_hex(payload),
now=datetime(2026, 5, 25, 12, 0, 0, tzinfo=timezone.utc),
)
return dashboard.QueuedProposal(proposal=p, queue_dir=Path("/tmp/q"))
return supervise_cli.QueuedProposal(proposal=p, queue_dir=Path("/tmp/q"))
class TestPipelockHostHighlight(unittest.TestCase):
GREEN = 0xDEADBEEF # arbitrary sentinel; _detail_lines passes through
def test_appends_green_host_line_for_pipelock_block(self):
lines = dashboard._detail_lines(
lines = supervise_cli._detail_lines(
_qp(TOOL_PIPELOCK_BLOCK, "https://api.github.com/repos/foo/bar"),
green_attr=self.GREEN,
)
@@ -47,14 +47,14 @@ class TestPipelockHostHighlight(unittest.TestCase):
self.assertEqual(["api.github.com"], green_lines)
def test_no_green_lines_for_egress_block(self):
lines = dashboard._detail_lines(
lines = supervise_cli._detail_lines(
_qp(TOOL_EGRESS_BLOCK, '{"routes": []}'),
green_attr=self.GREEN,
)
self.assertEqual([], [t for t, a in lines if a == self.GREEN])
def test_no_green_lines_for_capability_block(self):
lines = dashboard._detail_lines(
lines = supervise_cli._detail_lines(
_qp(TOOL_CAPABILITY_BLOCK, "FROM python:3.13\n"),
green_attr=self.GREEN,
)
@@ -63,8 +63,8 @@ class TestPipelockHostHighlight(unittest.TestCase):
def test_skips_host_line_when_url_unparseable(self):
# Shouldn't happen in production — supervise_server validates
# the URL before queuing — but if a malformed payload ever
# reaches the dashboard, don't render a misleading host line.
lines = dashboard._detail_lines(
# reaches the supervise TUI, don't render a misleading host line.
lines = supervise_cli._detail_lines(
_qp(TOOL_PIPELOCK_BLOCK, "garbage-not-a-url"),
green_attr=self.GREEN,
)
@@ -73,7 +73,7 @@ class TestPipelockHostHighlight(unittest.TestCase):
def test_no_green_attr_passed_still_renders_host(self):
# Even without color support (green_attr=0), the host line
# is still present — it just won't be coloured.
lines = dashboard._detail_lines(
lines = supervise_cli._detail_lines(
_qp(TOOL_PIPELOCK_BLOCK, "https://api.github.com/x"),
green_attr=0,
)
@@ -86,14 +86,14 @@ class TestFailedUrlHost(unittest.TestCase):
def test_extracts_hostname(self):
self.assertEqual(
"api.github.com",
dashboard._failed_url_host("https://api.github.com/repos/foo"),
supervise_cli._failed_url_host("https://api.github.com/repos/foo"),
)
def test_returns_empty_for_unparseable(self):
self.assertEqual("", dashboard._failed_url_host("not a url"))
self.assertEqual("", supervise_cli._failed_url_host("not a url"))
def test_returns_empty_for_url_without_host(self):
self.assertEqual("", dashboard._failed_url_host("https:///nohost"))
self.assertEqual("", supervise_cli._failed_url_host("https:///nohost"))
if __name__ == "__main__":