PRD 0049: strip dashboard to supervisor tui #176
@@ -1,6 +1,6 @@
|
||||
"""Main CLI dispatcher.
|
||||
|
||||
Commands: cleanup, dashboard, edit, info, init, list, resume, start
|
||||
Commands: cleanup, edit, info, init, list, resume, start, supervise
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -12,24 +12,24 @@ from ..manifest import ManifestError
|
||||
from ._common import PROG
|
||||
from . import list as _list_mod
|
||||
from .cleanup import cmd_cleanup
|
||||
from .dashboard import cmd_dashboard
|
||||
from .edit import cmd_edit
|
||||
from .info import cmd_info
|
||||
from .init import cmd_init
|
||||
from .resume import cmd_resume
|
||||
from .start import cmd_start
|
||||
from .supervise import cmd_supervise
|
||||
|
||||
cmd_list = _list_mod.cmd_list
|
||||
|
||||
COMMANDS = {
|
||||
"cleanup": cmd_cleanup,
|
||||
"dashboard": cmd_dashboard,
|
||||
"edit": cmd_edit,
|
||||
"info": cmd_info,
|
||||
"init": cmd_init,
|
||||
"list": cmd_list,
|
||||
"resume": cmd_resume,
|
||||
"start": cmd_start,
|
||||
"supervise": cmd_supervise,
|
||||
}
|
||||
|
||||
|
||||
@@ -37,13 +37,13 @@ def usage() -> None:
|
||||
sys.stderr.write(f"usage: {PROG} <command> [args...]\n\n")
|
||||
sys.stderr.write("Commands:\n")
|
||||
sys.stderr.write(" cleanup stop and remove all active bot-bottle containers\n")
|
||||
sys.stderr.write(" dashboard view + approve/modify/reject pending supervise proposals (PRD 0013)\n")
|
||||
sys.stderr.write(" edit open an agent in vim for editing\n")
|
||||
sys.stderr.write(" info print env, skills, and prompt details for a named agent\n")
|
||||
sys.stderr.write(" init interactively create a new agent and add it to bot-bottle.json\n")
|
||||
sys.stderr.write(" list list available agents or active containers\n")
|
||||
sys.stderr.write(" resume re-launch a bottle by its identity (continues state from PRD 0016)\n")
|
||||
sys.stderr.write(" start boot a container for a named agent and attach an interactive session\n\n")
|
||||
sys.stderr.write(" start boot a container for a named agent and attach an interactive session\n")
|
||||
sys.stderr.write(" supervise view + approve/modify/reject pending supervise proposals (PRD 0013)\n\n")
|
||||
sys.stderr.write(f"Run '{PROG} <command> --help' for command-specific usage.\n")
|
||||
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,421 +0,0 @@
|
||||
"""dashboard_model: state/model layer for the dashboard TUI.
|
||||
|
||||
Data structures, discovery queries, pure state helpers, and derived
|
||||
values extracted from dashboard.py so they can be tested in isolation
|
||||
and navigated without wading through curses rendering code.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import shlex
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
from .. import supervise as _supervise
|
||||
from ..agent_provider import runtime_for
|
||||
from ..backend import ActiveAgent, enumerate_active_agents
|
||||
from ..backend.docker.capability_apply import CapabilityApplyError
|
||||
from ..backend.docker.egress_apply import EgressApplyError
|
||||
from ..backend.docker.pipelock_apply import PipelockApplyError
|
||||
from ..manifest import Manifest
|
||||
from ..supervise import (
|
||||
TOOL_CAPABILITY_BLOCK,
|
||||
TOOL_PIPELOCK_BLOCK,
|
||||
Proposal,
|
||||
list_pending_proposals,
|
||||
)
|
||||
|
||||
|
||||
# --- Constants ---------------------------------------------------------------
|
||||
|
||||
|
||||
_REFRESH_INTERVAL_MS = 1000
|
||||
|
||||
_NEW_PROPOSAL_HIGHLIGHT_SEC = 5.0
|
||||
|
||||
PANE_PROPOSALS = "proposals"
|
||||
PANE_AGENTS = "agents"
|
||||
|
||||
|
||||
# --- Data structures ---------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class QueuedProposal:
|
||||
"""A pending proposal plus the queue dir it was found in."""
|
||||
|
||||
proposal: Proposal
|
||||
queue_dir: Path
|
||||
|
||||
|
||||
ApplyError = (EgressApplyError, PipelockApplyError, CapabilityApplyError)
|
||||
|
||||
|
||||
# --- Discovery ---------------------------------------------------------------
|
||||
|
||||
|
||||
def discover_active_agents() -> list[ActiveAgent]:
|
||||
"""All currently-running agents across every backend with
|
||||
their metadata + service set. Returns [] when neither
|
||||
backend is reachable. Backed by the shared
|
||||
`enumerate_active_agents` helper so the CLI's
|
||||
`./cli.py list active` and this dashboard show the same data."""
|
||||
return enumerate_active_agents()
|
||||
|
||||
|
||||
def discover_pending() -> list[QueuedProposal]:
|
||||
"""Walk ~/.bot-bottle/queue/* and collect pending proposals
|
||||
from every bottle's queue. Sorted by arrival time across the
|
||||
union — the operator works the global FIFO."""
|
||||
queue_root = _supervise.bot_bottle_root() / "queue"
|
||||
if not queue_root.is_dir():
|
||||
return []
|
||||
out: list[QueuedProposal] = []
|
||||
for slug_dir in sorted(queue_root.iterdir()):
|
||||
if not slug_dir.is_dir():
|
||||
continue
|
||||
for proposal in list_pending_proposals(slug_dir):
|
||||
out.append(QueuedProposal(proposal=proposal, queue_dir=slug_dir))
|
||||
out.sort(key=lambda q: q.proposal.arrival_timestamp)
|
||||
return out
|
||||
|
||||
|
||||
# --- Derived values ----------------------------------------------------------
|
||||
|
||||
|
||||
def _approval_status(qp: QueuedProposal, verb: str) -> str:
|
||||
"""Status-line text after a successful approval. For capability-
|
||||
block, append the `resume <identity>` hint so the operator can
|
||||
bring the rebuilt bottle back up with one copy-paste."""
|
||||
base = f"{verb} {qp.proposal.tool} for [{qp.proposal.bottle_slug}]"
|
||||
if qp.proposal.tool == TOOL_CAPABILITY_BLOCK:
|
||||
return f"{base}; resume: ./cli.py resume {qp.proposal.bottle_slug}"
|
||||
return base
|
||||
|
||||
|
||||
def _is_recent(
|
||||
proposal_id: str,
|
||||
first_seen: dict[str, float] | None,
|
||||
now: float | None,
|
||||
) -> bool:
|
||||
"""True if `proposal_id` was first seen within the highlight
|
||||
window. Both `first_seen` and `now` may be None (rendered as
|
||||
not-recent) so the helper is safe in cold-start paths."""
|
||||
if first_seen is None or now is None:
|
||||
return False
|
||||
started = first_seen.get(proposal_id)
|
||||
if started is None:
|
||||
return False
|
||||
return (now - started) < _NEW_PROPOSAL_HIGHLIGHT_SEC
|
||||
|
||||
|
||||
def _selection_status(
|
||||
focus: str, agents: list[ActiveAgent], selected_agent: int,
|
||||
) -> str:
|
||||
"""Status-line text for the idle state. Surfaces the agents-
|
||||
pane selection so the operator can tell what an agent-scoped
|
||||
edit verb would target."""
|
||||
if focus != PANE_AGENTS:
|
||||
return ""
|
||||
if not agents:
|
||||
return "[no active agents]"
|
||||
if 0 <= selected_agent < len(agents):
|
||||
return f"[selected: {agents[selected_agent].slug}]"
|
||||
return "[no agent selected]"
|
||||
|
||||
|
||||
def _selected_agent(
|
||||
focus: str, agents: list[ActiveAgent], selected_agent: int,
|
||||
) -> ActiveAgent | None:
|
||||
"""The selected agent to scope `e` / `p` to, or None if no
|
||||
selection is valid (proposals pane focused, no active agents,
|
||||
or selection out of bounds)."""
|
||||
if focus != PANE_AGENTS:
|
||||
return None
|
||||
if not agents:
|
||||
return None
|
||||
if 0 <= selected_agent < len(agents):
|
||||
return agents[selected_agent]
|
||||
return None
|
||||
|
||||
|
||||
# --- Picker helpers ----------------------------------------------------------
|
||||
|
||||
|
||||
def _filter_agents(query: str, names: list[str]) -> list[str]:
|
||||
"""Case-insensitive substring filter for the picker. Pure
|
||||
function — no curses, easy to unit-test."""
|
||||
if not query:
|
||||
return list(names)
|
||||
q = query.lower()
|
||||
return [n for n in names if q in n.lower()]
|
||||
|
||||
|
||||
def _running_counts(
|
||||
bottles: dict, agents_now: list[ActiveAgent],
|
||||
) -> dict[str, int]:
|
||||
"""Per-agent running count: dashboard-owned + externally-
|
||||
discovered, summed by agent_name. The picker shows this so the
|
||||
operator knows whether picking an agent starts a fresh bottle
|
||||
or a Nth one."""
|
||||
counts: dict[str, int] = {}
|
||||
for a in agents_now:
|
||||
counts[a.agent_name] = counts.get(a.agent_name, 0) + 1
|
||||
return counts
|
||||
|
||||
|
||||
# --- Agent-row rendering helpers ---------------------------------------------
|
||||
|
||||
|
||||
def _format_agent_row(a: ActiveAgent, maxw: int) -> str:
|
||||
"""One-line agent row: ` [<backend>] <slug> <agent_name> started
|
||||
<HH:MM:SS> [<sidecars>]`. The `agent` service is filtered out of
|
||||
the displayed list — it's always present for an active bottle,
|
||||
so listing it carries no information; the sidecars are the
|
||||
differentiator.
|
||||
|
||||
The `[docker]` / `[smolmachines]` prefix lets the operator tell
|
||||
which backend a bottle came from (issue #77). Truncated to
|
||||
`maxw` because the renderer's addnstr only enforces width if
|
||||
we hand it a properly-sized string."""
|
||||
started = (
|
||||
a.started_at.split("T", 1)[1][:8]
|
||||
if "T" in a.started_at else (a.started_at or "?")
|
||||
)
|
||||
sidecars = tuple(s for s in a.services if s != "agent")
|
||||
services = ",".join(sidecars) if sidecars else "(starting)"
|
||||
backend_tag = f"[{a.backend_name}]" if a.backend_name else ""
|
||||
line = (
|
||||
f" {backend_tag} {a.slug} {a.agent_name} "
|
||||
f"started {started} [{services}]"
|
||||
)
|
||||
if len(line) > maxw:
|
||||
return line[: max(0, maxw - 1)] + "…"
|
||||
return line
|
||||
|
||||
|
||||
# --- Detail-view helpers -----------------------------------------------------
|
||||
|
||||
|
||||
def _detail_lines(
|
||||
qp: QueuedProposal,
|
||||
*,
|
||||
green_attr: int = 0,
|
||||
) -> list[tuple[str, int]]:
|
||||
"""Return the detail-view body as (text, curses-attr) tuples.
|
||||
Most lines are plain (attr=0); pipelock-block proposals append
|
||||
a green "→ would allow host: ..." line so the operator sees at
|
||||
a glance which hostname will land in pipelock's allowlist if
|
||||
they hit approve. The URL itself is shown above for context."""
|
||||
p = qp.proposal
|
||||
out: list[tuple[str, int]] = [
|
||||
(f"bottle: {p.bottle_slug}", 0),
|
||||
(f"tool: {p.tool}", 0),
|
||||
(f"id: {p.id}", 0),
|
||||
(f"arrived: {p.arrival_timestamp}", 0),
|
||||
(f"queue: {qp.queue_dir}", 0),
|
||||
("", 0),
|
||||
("justification:", 0),
|
||||
]
|
||||
out.extend((" " + line, 0) for line in p.justification.splitlines() or [""])
|
||||
out.extend([
|
||||
("", 0),
|
||||
(_proposed_payload_label(p.tool) + ":", 0),
|
||||
])
|
||||
out.extend((line, 0) for line in p.proposed_file.splitlines() or [""])
|
||||
if p.tool == TOOL_PIPELOCK_BLOCK:
|
||||
host = _failed_url_host(p.proposed_file)
|
||||
if host:
|
||||
out.append(("", 0))
|
||||
out.append((host, green_attr))
|
||||
return out
|
||||
|
||||
|
||||
def _failed_url_host(url: str) -> str:
|
||||
"""Best-effort hostname extraction from a pipelock-block proposal's
|
||||
failed_url payload. Returns empty string on unparseable input —
|
||||
callers handle empty as "nothing to highlight"."""
|
||||
import urllib.parse
|
||||
try:
|
||||
return urllib.parse.urlsplit(url.strip()).hostname or ""
|
||||
except ValueError:
|
||||
return ""
|
||||
|
||||
|
||||
def _proposed_payload_label(tool: str) -> str:
|
||||
"""The detail-view section heading for the proposal's payload —
|
||||
`proposed_file` is what the dataclass calls it, but for
|
||||
pipelock-block the payload is a single URL not a file. Render
|
||||
the label per tool so the operator's eye matches."""
|
||||
if tool == TOOL_PIPELOCK_BLOCK:
|
||||
return "failed URL"
|
||||
return "proposed file"
|
||||
|
||||
|
||||
def _suffix_for_tool(tool: str) -> str:
|
||||
if tool == TOOL_CAPABILITY_BLOCK:
|
||||
return ".dockerfile"
|
||||
return ".txt"
|
||||
|
||||
|
||||
# --- Bottle/agent resolution -------------------------------------------------
|
||||
|
||||
|
||||
def _bottle_for_slug(
|
||||
slug: str,
|
||||
bottles: dict,
|
||||
manifest: Manifest | None,
|
||||
) -> tuple["object", str]:
|
||||
"""Return `(bottle_handle, prompt_path_hint)` for a re-attach.
|
||||
If the slug is in `bottles` (dashboard-owned), return the stored
|
||||
handle directly. Otherwise synthesize a bottle from the persisted
|
||||
metadata. The backend field in metadata (PRD 0040) selects Docker
|
||||
or smolmachines; unknown or missing metadata defaults to Docker.
|
||||
|
||||
Returns the empty string for prompt_path_hint when we omit the
|
||||
flag — the caller passes None to DockerBottle in that case."""
|
||||
from ..backend.docker.bottle import DockerBottle
|
||||
from ..backend.docker.bottle_state import read_metadata
|
||||
from ..backend.smolmachines.bottle import SmolmachinesBottle
|
||||
if slug in bottles:
|
||||
_cm, bottle, _identity = bottles[slug]
|
||||
return bottle, ""
|
||||
instance_name = f"bot-bottle-{slug}"
|
||||
prompt_path: str | None = None
|
||||
metadata = read_metadata(slug)
|
||||
if metadata is not None and manifest is not None:
|
||||
agent = manifest.agents.get(metadata.agent_name)
|
||||
if agent is not None and agent.prompt:
|
||||
container_home = os.environ.get(
|
||||
"BOT_BOTTLE_CONTAINER_HOME", "/home/node",
|
||||
)
|
||||
prompt_path = f"{container_home}/.bot-bottle-prompt.txt"
|
||||
backend = metadata.backend if metadata is not None else ""
|
||||
if backend == "smolmachines":
|
||||
synth: object = SmolmachinesBottle(
|
||||
instance_name,
|
||||
prompt_path=prompt_path,
|
||||
)
|
||||
else:
|
||||
synth = DockerBottle(
|
||||
container=instance_name,
|
||||
teardown=lambda: None,
|
||||
prompt_path_in_container=prompt_path,
|
||||
)
|
||||
return synth, (prompt_path or "")
|
||||
|
||||
|
||||
def _pick_next_after_stop(
|
||||
agents_before: list[ActiveAgent],
|
||||
selected_index: int,
|
||||
stopped_slug: str,
|
||||
) -> tuple[int, ActiveAgent] | None:
|
||||
"""After stopping `stopped_slug` from the agents list, choose
|
||||
the agent that should take focus next. The agent below the
|
||||
stopped row (which slides up to fill its index) is the
|
||||
natural pick; if the stopped agent was last, the row above
|
||||
instead. Returns (new_index, agent) or None if no agents
|
||||
remain. Pure — easy to unit-test."""
|
||||
new_agents = [a for a in agents_before if a.slug != stopped_slug]
|
||||
if not new_agents:
|
||||
return None
|
||||
new_index = min(max(selected_index, 0), len(new_agents) - 1)
|
||||
return new_index, new_agents[new_index]
|
||||
|
||||
|
||||
# --- tmux argv builders ------------------------------------------------------
|
||||
|
||||
|
||||
def _in_tmux() -> bool:
|
||||
"""True when the dashboard is running inside a tmux session.
|
||||
Tmux sets `$TMUX` to the path of its server socket."""
|
||||
return bool(os.environ.get("TMUX"))
|
||||
|
||||
|
||||
def _agent_runtime_args(
|
||||
*, resume: bool, remote_control: bool = False, agent_provider_template: str = "claude",
|
||||
) -> list[str]:
|
||||
"""The argv the dashboard hands to `bottle.agent_argv`
|
||||
on every attach — matches what `attach_agent` builds for the
|
||||
foreground handoff so both surfaces produce the same claude
|
||||
invocation."""
|
||||
runtime = runtime_for(agent_provider_template)
|
||||
args = list(runtime.bypass_args)
|
||||
if remote_control:
|
||||
args.extend(runtime.remote_control_args)
|
||||
if resume:
|
||||
args.extend(runtime.resume_args)
|
||||
return args
|
||||
|
||||
|
||||
def _build_resume_argv_with_fallback(
|
||||
bottle, *, remote_control: bool = False, agent_provider_template: str = "claude",
|
||||
) -> list[str]:
|
||||
"""Build a backend-exec argv that runs `claude --continue` and
|
||||
falls back to plain `claude` if no prior session exists.
|
||||
|
||||
`--continue` exits non-zero when an agent has been spun up
|
||||
but never typed at — there's no transcript to resume. The
|
||||
shell-level `||` wrapper makes that case start a fresh
|
||||
session instead of crashing the pane. The trade-off: we
|
||||
invoke `sh -c` inside the bottle, so the command is two
|
||||
`claude` invocations behind a tiny shell rather than one
|
||||
direct exec. Acceptable; the shell adds microseconds and
|
||||
the fallback only kicks in when --continue would have
|
||||
failed anyway.
|
||||
|
||||
Works across backends because `bottle.agent_argv` always
|
||||
surfaces the `claude` token preceded by the backend's exec
|
||||
framing (docker: `docker exec -it <c>`; smolmachines:
|
||||
`smolvm machine exec --name <m> -- runuser -u node --`).
|
||||
Splitting at `claude` keeps the framing as the prefix and
|
||||
wraps just the agent tail in `sh -c`."""
|
||||
if agent_provider_template != "claude":
|
||||
return bottle.agent_argv(
|
||||
_agent_runtime_args(
|
||||
resume=True,
|
||||
remote_control=remote_control,
|
||||
agent_provider_template=agent_provider_template,
|
||||
)
|
||||
)
|
||||
base_args = _agent_runtime_args(
|
||||
resume=False,
|
||||
remote_control=remote_control,
|
||||
agent_provider_template=agent_provider_template,
|
||||
)
|
||||
base_exec = bottle.agent_argv(base_args)
|
||||
# Split exec-framing prefix from the agent-and-args tail so
|
||||
# we can compose `<claude…> --continue || <claude…>` inside
|
||||
# `sh -c`. The provider command token is the marker.
|
||||
command = getattr(bottle, "agent_command", runtime_for(agent_provider_template).command)
|
||||
agent_idx = base_exec.index(command)
|
||||
prefix = base_exec[:agent_idx]
|
||||
agent_cmd = " ".join(shlex.quote(a) for a in base_exec[agent_idx:])
|
||||
resume_args = " ".join(
|
||||
shlex.quote(a) for a in runtime_for(agent_provider_template).resume_args
|
||||
)
|
||||
return [
|
||||
*prefix,
|
||||
"sh", "-c",
|
||||
f"{agent_cmd} {resume_args} || {agent_cmd}",
|
||||
]
|
||||
|
||||
|
||||
def _build_split_pane_argv(agent_argv: list[str]) -> list[str]:
|
||||
"""Pure helper: wrap a backend-exec argv with `tmux split-window
|
||||
-h -P -F '#{pane_id}'`. The `-P -F` combo tells tmux to print
|
||||
the new pane's id on stdout so we can track it for later
|
||||
`respawn-pane` calls."""
|
||||
return [
|
||||
"tmux", "split-window", "-h",
|
||||
"-P", "-F", "#{pane_id}",
|
||||
*agent_argv,
|
||||
]
|
||||
|
||||
|
||||
def _build_respawn_pane_argv(pane_id: str, agent_argv: list[str]) -> list[str]:
|
||||
"""Pure helper: wrap a backend-exec argv with `tmux respawn-pane
|
||||
-k -t <pane_id>`. `-k` kills the existing process in the pane
|
||||
before respawning."""
|
||||
return ["tmux", "respawn-pane", "-k", "-t", pane_id, *agent_argv]
|
||||
+11
-25
@@ -2,10 +2,8 @@
|
||||
interactive claude-code session. The container is torn down when the
|
||||
session ends.
|
||||
|
||||
The launch core is shared with `cli.py resume <identity>` and (PRD
|
||||
0020 chunk 1+) the dashboard's in-process start flow: see the
|
||||
public helpers `prepare_with_preflight`, `attach_agent`, and the
|
||||
private orchestrator `_launch_bottle`.
|
||||
The launch core is shared with `cli.py resume <identity>` through
|
||||
the private orchestrator `_launch_bottle`.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -71,7 +69,7 @@ def cmd_start(argv: list[str]) -> int:
|
||||
)
|
||||
|
||||
|
||||
# --- Public helpers shared with the dashboard (PRD 0020) -----------------
|
||||
# --- Launch helpers ------------------------------------------------------
|
||||
|
||||
|
||||
def prepare_with_preflight(
|
||||
@@ -84,14 +82,11 @@ def prepare_with_preflight(
|
||||
backend_name: str | None = None,
|
||||
) -> tuple[DockerBottlePlan | None, str]:
|
||||
"""Run `backend.prepare`, render the preflight summary via the
|
||||
injected callable, prompt y/N via the injected callable. The CLI
|
||||
binds these to stderr/stdin; the dashboard binds them to a
|
||||
curses modal.
|
||||
injected callable, prompt y/N via the injected callable.
|
||||
|
||||
`backend_name` selects which backend prepares the plan
|
||||
(`None` → `$BOT_BOTTLE_BACKEND` → `docker`). Dashboard
|
||||
passes the value from its new-agent backend-picker modal; the
|
||||
CLI passes whatever `--backend` resolved to.
|
||||
(`None` → `$BOT_BOTTLE_BACKEND` → `docker`). The CLI passes
|
||||
whatever `--backend` resolved to.
|
||||
|
||||
Returns `(plan, identity)`. `plan` is None on dry-run or
|
||||
operator-N, but `identity` is set as soon as `backend.prepare`
|
||||
@@ -122,16 +117,10 @@ def attach_agent(
|
||||
agent process's exit code.
|
||||
|
||||
`resume=True` adds `--continue` so claude picks up its most
|
||||
recent session non-interactively (no session-picker prompt) —
|
||||
the right shape for the dashboard's Enter re-attach (PRD 0020
|
||||
chunk 3), where a bottle typically has exactly one session.
|
||||
First-attach paths (`./cli.py start`, the dashboard's new-agent
|
||||
flow) leave it False.
|
||||
recent session non-interactively (no session-picker prompt).
|
||||
First-attach paths (`./cli.py start`) leave it False.
|
||||
|
||||
Used as the inner step of `./cli.py start` (one-shot) and by the
|
||||
dashboard, which calls it from inside a `curses.endwin → … →
|
||||
stdscr.refresh()` handoff so the curses surface gets out of the
|
||||
terminal's way while the agent has it."""
|
||||
Used as the inner step of `./cli.py start`."""
|
||||
runtime = runtime_for(agent_provider_template)
|
||||
info(
|
||||
f"attaching interactive {agent_provider_template} session "
|
||||
@@ -148,8 +137,7 @@ def attach_agent(
|
||||
def capture_claude_session_state(identity: str, exit_code: int) -> None:
|
||||
"""Inside the launch context, while the container is still
|
||||
alive: snapshot the transcript and mark for preservation if
|
||||
claude crashed. Public for the dashboard's death-handling path
|
||||
(PRD 0020 open question 3)."""
|
||||
claude crashed."""
|
||||
# FIXME: this captures Claude-specific session state. A follow-up
|
||||
# spike should explore freezing provider-neutral container state
|
||||
# instead of relying on each agent's transcript layout.
|
||||
@@ -162,9 +150,7 @@ def capture_claude_session_state(identity: str, exit_code: int) -> None:
|
||||
|
||||
def settle_state(identity: str) -> None:
|
||||
"""Post-teardown housekeeping: print the resume hint if the
|
||||
state was preserved, otherwise reap the per-bottle state dir.
|
||||
Public so the dashboard's explicit-stop path calls the same
|
||||
settlement the CLI uses on context exit."""
|
||||
state was preserved, otherwise reap the per-bottle state dir."""
|
||||
if not identity:
|
||||
return
|
||||
if is_preserved(identity):
|
||||
|
||||
@@ -0,0 +1,577 @@
|
||||
"""supervise: list pending supervise proposals across all bottles and
|
||||
act on them (approve / modify / reject).
|
||||
|
||||
Curses-based TUI; modify-then-approve shells out to $EDITOR. The
|
||||
approval handlers wire to the per-tool remediation engines:
|
||||
PRD 0014 (egress, retargeted from cred-proxy in PRD 0017
|
||||
chunk 3) writes routes.yaml + SIGHUPs egress; PRD 0015
|
||||
(pipelock) writes the allowlist + restarts pipelock; PRD 0016
|
||||
(capability) rebuilds the bottle Dockerfile.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import curses
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import traceback
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
from .. import supervise as _supervise
|
||||
from ..backend.docker.bottle_state import read_metadata
|
||||
from ..backend.docker.capability_apply import (
|
||||
CapabilityApplyError,
|
||||
apply_capability_change,
|
||||
)
|
||||
from ..backend.docker.egress_apply import EgressApplyError, add_route
|
||||
from ..backend.docker.pipelock_apply import (
|
||||
PipelockApplyError,
|
||||
apply_allowlist_change,
|
||||
fetch_current_allowlist,
|
||||
parse_allowlist_content,
|
||||
render_allowlist_content,
|
||||
)
|
||||
from ..log import Die, error, info
|
||||
from ..supervise import (
|
||||
COMPONENT_FOR_TOOL,
|
||||
AuditEntry,
|
||||
Proposal,
|
||||
Response,
|
||||
STATUS_APPROVED,
|
||||
STATUS_MODIFIED,
|
||||
STATUS_REJECTED,
|
||||
TOOL_CAPABILITY_BLOCK,
|
||||
TOOL_EGRESS_BLOCK,
|
||||
TOOL_PIPELOCK_BLOCK,
|
||||
archive_proposal,
|
||||
list_pending_proposals,
|
||||
render_diff,
|
||||
write_audit_entry,
|
||||
write_response,
|
||||
)
|
||||
from ._common import PROG
|
||||
|
||||
|
||||
_REFRESH_INTERVAL_MS = 1000
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class QueuedProposal:
|
||||
"""A pending proposal plus the queue dir it was found in."""
|
||||
|
||||
proposal: Proposal
|
||||
queue_dir: Path
|
||||
|
||||
|
||||
# Errors any remediation engine may raise. Caught by the TUI key
|
||||
# handlers and surfaced in the status line so a failed apply keeps
|
||||
# the proposal pending rather than crashing curses.
|
||||
ApplyError = (EgressApplyError, PipelockApplyError, CapabilityApplyError)
|
||||
|
||||
|
||||
def discover_pending() -> list[QueuedProposal]:
|
||||
"""Walk ~/.bot-bottle/queue/* and collect pending proposals."""
|
||||
queue_root = _supervise.bot_bottle_root() / "queue"
|
||||
if not queue_root.is_dir():
|
||||
return []
|
||||
out: list[QueuedProposal] = []
|
||||
for slug_dir in sorted(queue_root.iterdir()):
|
||||
if not slug_dir.is_dir():
|
||||
continue
|
||||
for proposal in list_pending_proposals(slug_dir):
|
||||
out.append(QueuedProposal(proposal=proposal, queue_dir=slug_dir))
|
||||
out.sort(key=lambda q: q.proposal.arrival_timestamp)
|
||||
return out
|
||||
|
||||
|
||||
def _approval_status(qp: QueuedProposal, verb: str) -> str:
|
||||
"""Status-line text after a successful approval."""
|
||||
base = f"{verb} {qp.proposal.tool} for [{qp.proposal.bottle_slug}]"
|
||||
if qp.proposal.tool == TOOL_CAPABILITY_BLOCK:
|
||||
return f"{base}; resume: ./cli.py resume {qp.proposal.bottle_slug}"
|
||||
return base
|
||||
|
||||
|
||||
def _detail_lines(
|
||||
qp: QueuedProposal,
|
||||
*,
|
||||
green_attr: int = 0,
|
||||
) -> list[tuple[str, int]]:
|
||||
"""Return the detail-view body as (text, curses-attr) tuples."""
|
||||
p = qp.proposal
|
||||
out: list[tuple[str, int]] = [
|
||||
(f"bottle: {p.bottle_slug}", 0),
|
||||
(f"tool: {p.tool}", 0),
|
||||
(f"id: {p.id}", 0),
|
||||
(f"arrived: {p.arrival_timestamp}", 0),
|
||||
(f"queue: {qp.queue_dir}", 0),
|
||||
("", 0),
|
||||
("justification:", 0),
|
||||
]
|
||||
out.extend((" " + line, 0) for line in p.justification.splitlines() or [""])
|
||||
out.extend([
|
||||
("", 0),
|
||||
(_proposed_payload_label(p.tool) + ":", 0),
|
||||
])
|
||||
out.extend((line, 0) for line in p.proposed_file.splitlines() or [""])
|
||||
if p.tool == TOOL_PIPELOCK_BLOCK:
|
||||
host = _failed_url_host(p.proposed_file)
|
||||
if host:
|
||||
out.append(("", 0))
|
||||
out.append((host, green_attr))
|
||||
return out
|
||||
|
||||
|
||||
def _failed_url_host(url: str) -> str:
|
||||
"""Best-effort hostname extraction from a pipelock-block proposal."""
|
||||
import urllib.parse
|
||||
|
||||
try:
|
||||
return urllib.parse.urlsplit(url.strip()).hostname or ""
|
||||
except ValueError:
|
||||
return ""
|
||||
|
||||
|
||||
def _proposed_payload_label(tool: str) -> str:
|
||||
if tool == TOOL_PIPELOCK_BLOCK:
|
||||
return "failed URL"
|
||||
return "proposed file"
|
||||
|
||||
|
||||
def _suffix_for_tool(tool: str) -> str:
|
||||
if tool == TOOL_CAPABILITY_BLOCK:
|
||||
return ".dockerfile"
|
||||
return ".txt"
|
||||
|
||||
|
||||
# --- Operator actions ------------------------------------------------------
|
||||
|
||||
|
||||
def approve(
|
||||
qp: QueuedProposal,
|
||||
*,
|
||||
notes: str = "",
|
||||
final_file: str | None = None,
|
||||
) -> None:
|
||||
"""Apply the proposal, write the waiting response, and audit it."""
|
||||
status = STATUS_MODIFIED if final_file is not None else STATUS_APPROVED
|
||||
file_to_apply = final_file if final_file is not None else qp.proposal.proposed_file
|
||||
|
||||
diff_before, diff_after = "", ""
|
||||
if qp.proposal.tool == TOOL_EGRESS_BLOCK:
|
||||
diff_before, diff_after = add_route(
|
||||
qp.proposal.bottle_slug, file_to_apply,
|
||||
)
|
||||
elif qp.proposal.tool == TOOL_PIPELOCK_BLOCK:
|
||||
diff_before, diff_after = _apply_pipelock_url(
|
||||
qp.proposal.bottle_slug, file_to_apply,
|
||||
)
|
||||
elif qp.proposal.tool == TOOL_CAPABILITY_BLOCK:
|
||||
_meta = read_metadata(qp.proposal.bottle_slug)
|
||||
if _meta is not None and not _meta.compose_project:
|
||||
raise CapabilityApplyError(
|
||||
"capability-block remediation is not supported for smolmachines "
|
||||
"bottles. Reject this proposal or handle the capability change "
|
||||
"manually, then restart the bottle."
|
||||
)
|
||||
diff_before, diff_after = apply_capability_change(
|
||||
qp.proposal.bottle_slug, file_to_apply,
|
||||
)
|
||||
|
||||
response = Response(
|
||||
proposal_id=qp.proposal.id,
|
||||
status=status,
|
||||
notes=notes,
|
||||
final_file=final_file,
|
||||
)
|
||||
write_response(qp.queue_dir, response)
|
||||
_write_audit(
|
||||
qp, action=status, notes=notes,
|
||||
diff_before=diff_before, diff_after=diff_after,
|
||||
)
|
||||
if qp.proposal.tool == TOOL_CAPABILITY_BLOCK:
|
||||
archive_proposal(qp.queue_dir, qp.proposal.id)
|
||||
|
||||
|
||||
def reject(qp: QueuedProposal, *, reason: str) -> None:
|
||||
"""Write a rejection response and an audit entry."""
|
||||
response = Response(
|
||||
proposal_id=qp.proposal.id,
|
||||
status=STATUS_REJECTED,
|
||||
notes=reason,
|
||||
final_file=None,
|
||||
)
|
||||
write_response(qp.queue_dir, response)
|
||||
_write_audit(qp, action=STATUS_REJECTED, notes=reason, diff_before="", diff_after="")
|
||||
|
||||
|
||||
def _apply_pipelock_url(slug: str, failed_url: str) -> tuple[str, str]:
|
||||
"""Merge a pipelock-block failed URL's host into the allowlist."""
|
||||
import urllib.parse
|
||||
|
||||
parsed = urllib.parse.urlsplit(failed_url.strip())
|
||||
host = parsed.hostname or ""
|
||||
if not host:
|
||||
raise PipelockApplyError(
|
||||
f"proposed failed_url has no extractable host: {failed_url!r}"
|
||||
)
|
||||
current = fetch_current_allowlist(slug)
|
||||
hosts = parse_allowlist_content(current)
|
||||
if host not in hosts:
|
||||
hosts.append(host)
|
||||
return apply_allowlist_change(slug, render_allowlist_content(hosts))
|
||||
|
||||
|
||||
def _write_audit(
|
||||
qp: QueuedProposal,
|
||||
*,
|
||||
action: str,
|
||||
notes: str,
|
||||
diff_before: str,
|
||||
diff_after: str,
|
||||
) -> None:
|
||||
"""Audit log for egress / pipelock tools."""
|
||||
component = COMPONENT_FOR_TOOL.get(qp.proposal.tool)
|
||||
if component is None:
|
||||
return
|
||||
write_audit_entry(AuditEntry(
|
||||
timestamp=datetime.now(timezone.utc).isoformat(),
|
||||
bottle_slug=qp.proposal.bottle_slug,
|
||||
component=component,
|
||||
operator_action=action,
|
||||
operator_notes=notes,
|
||||
justification=qp.proposal.justification,
|
||||
diff=render_diff(diff_before, diff_after, label=component),
|
||||
))
|
||||
|
||||
|
||||
# --- $EDITOR integration --------------------------------------------------
|
||||
|
||||
|
||||
def edit_in_editor(content: str, *, suffix: str = ".tmp") -> str | None:
|
||||
"""Open `content` in $EDITOR and return edited content, if changed."""
|
||||
editor = os.environ.get("EDITOR", "vim")
|
||||
with tempfile.NamedTemporaryFile(
|
||||
mode="w", suffix=suffix, delete=False, prefix="supervise-modify.",
|
||||
) as f:
|
||||
f.write(content)
|
||||
path = f.name
|
||||
try:
|
||||
subprocess.run([editor, path], check=False)
|
||||
with open(path) as f:
|
||||
edited = f.read()
|
||||
return edited if edited != content else None
|
||||
finally:
|
||||
try:
|
||||
os.unlink(path)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
# --- TUI -------------------------------------------------------------------
|
||||
|
||||
|
||||
def cmd_supervise(argv: list[str]) -> int:
|
||||
parser = argparse.ArgumentParser(prog=f"{PROG} supervise", add_help=True)
|
||||
parser.add_argument(
|
||||
"--once", action="store_true",
|
||||
help="list pending proposals once and exit (no TUI)",
|
||||
)
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
if args.once:
|
||||
return _list_once()
|
||||
try:
|
||||
curses.wrapper(_main_loop)
|
||||
except KeyboardInterrupt:
|
||||
return 130
|
||||
except Die as e:
|
||||
if e.message:
|
||||
error(e.message)
|
||||
else:
|
||||
error("supervise exited on a fatal error (no detail captured).")
|
||||
return e.code if isinstance(e.code, int) else 1
|
||||
except Exception as e:
|
||||
log_path = _write_crash_log(e)
|
||||
error(f"supervise crashed: {type(e).__name__}: {e}")
|
||||
error(f"full traceback written to {log_path}")
|
||||
return 1
|
||||
return 0
|
||||
|
||||
|
||||
def _write_crash_log(exc: BaseException) -> Path:
|
||||
"""Persist `exc`'s traceback to a stable file under ~/.bot-bottle/."""
|
||||
stamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
body = "".join(
|
||||
traceback.format_exception(type(exc), exc, exc.__traceback__)
|
||||
)
|
||||
entry = f"=== supervise crash {stamp} ===\n{body}\n"
|
||||
try:
|
||||
log_dir = _supervise.bot_bottle_root() / "logs"
|
||||
log_dir.mkdir(parents=True, exist_ok=True)
|
||||
path = log_dir / "supervise-crash.log"
|
||||
with path.open("a", encoding="utf-8") as fh:
|
||||
fh.write(entry)
|
||||
return path
|
||||
except OSError:
|
||||
fd, tmp = tempfile.mkstemp(
|
||||
prefix="bot-bottle-supervise-crash-", suffix=".log",
|
||||
)
|
||||
with os.fdopen(fd, "w", encoding="utf-8") as fh:
|
||||
fh.write(entry)
|
||||
return Path(tmp)
|
||||
|
||||
|
||||
def _list_once() -> int:
|
||||
pending = discover_pending()
|
||||
if not pending:
|
||||
info("no pending proposals")
|
||||
return 0
|
||||
for qp in pending:
|
||||
sys.stdout.write(
|
||||
f"{qp.proposal.arrival_timestamp} "
|
||||
f"[{qp.proposal.bottle_slug}] "
|
||||
f"{qp.proposal.tool} "
|
||||
f"{qp.proposal.id}\n"
|
||||
)
|
||||
sys.stdout.write(f" {qp.proposal.justification}\n")
|
||||
return 0
|
||||
|
||||
|
||||
def _try_init_green() -> int:
|
||||
"""Initialise a green color pair and return its attr, or 0."""
|
||||
try:
|
||||
curses.start_color()
|
||||
curses.use_default_colors()
|
||||
curses.init_pair(1, curses.COLOR_GREEN, -1)
|
||||
return curses.color_pair(1)
|
||||
except curses.error:
|
||||
return 0
|
||||
|
||||
|
||||
def _main_loop(stdscr: "curses._CursesWindow") -> None:
|
||||
curses.curs_set(0)
|
||||
stdscr.timeout(_REFRESH_INTERVAL_MS)
|
||||
green_attr = _try_init_green()
|
||||
selected = 0
|
||||
status_line = ""
|
||||
seen_ids: set[str] = set()
|
||||
|
||||
while True:
|
||||
pending = discover_pending()
|
||||
if selected >= len(pending):
|
||||
selected = max(0, len(pending) - 1)
|
||||
|
||||
live_ids = {qp.proposal.id for qp in pending}
|
||||
newly_arrived = live_ids - seen_ids
|
||||
if seen_ids and newly_arrived:
|
||||
try:
|
||||
curses.beep()
|
||||
except curses.error:
|
||||
pass
|
||||
for i, qp in enumerate(pending):
|
||||
if qp.proposal.id in newly_arrived:
|
||||
selected = i
|
||||
break
|
||||
seen_ids = live_ids
|
||||
|
||||
_render(
|
||||
stdscr, pending, selected, status_line,
|
||||
green_attr=green_attr,
|
||||
)
|
||||
|
||||
try:
|
||||
key = stdscr.getch()
|
||||
except KeyboardInterrupt:
|
||||
return
|
||||
|
||||
if key == -1:
|
||||
continue
|
||||
|
||||
status_line = ""
|
||||
|
||||
if key in (ord("q"), 27):
|
||||
return
|
||||
|
||||
if not pending:
|
||||
continue
|
||||
qp = pending[selected]
|
||||
|
||||
if key in (curses.KEY_DOWN, ord("j")):
|
||||
selected = min(selected + 1, len(pending) - 1)
|
||||
elif key in (curses.KEY_UP, ord("k")):
|
||||
selected = max(selected - 1, 0)
|
||||
elif key in (curses.KEY_ENTER, 10, 13):
|
||||
_detail_view(stdscr, qp, green_attr=green_attr)
|
||||
elif key == ord("a"):
|
||||
try:
|
||||
approve(qp)
|
||||
status_line = _approval_status(qp, "approved")
|
||||
except ApplyError as e:
|
||||
status_line = f"apply failed: {e}"
|
||||
elif key == ord("m"):
|
||||
edited = _modify(stdscr, qp)
|
||||
if edited is None:
|
||||
status_line = "modify aborted (no change)"
|
||||
else:
|
||||
try:
|
||||
approve(qp, final_file=edited, notes="operator modified before approving")
|
||||
status_line = _approval_status(qp, "modified+approved")
|
||||
except ApplyError as e:
|
||||
status_line = f"apply failed: {e}"
|
||||
elif key == ord("r"):
|
||||
reason = _prompt(stdscr, "reject reason: ")
|
||||
if reason:
|
||||
reject(qp, reason=reason)
|
||||
status_line = f"rejected {qp.proposal.tool} for [{qp.proposal.bottle_slug}]"
|
||||
else:
|
||||
status_line = "reject aborted (empty reason)"
|
||||
|
||||
|
||||
def _render(
|
||||
stdscr: "curses._CursesWindow",
|
||||
pending: list[QueuedProposal],
|
||||
selected: int,
|
||||
status_line: str,
|
||||
*,
|
||||
green_attr: int = 0,
|
||||
) -> None:
|
||||
stdscr.erase()
|
||||
h, w = stdscr.getmaxyx()
|
||||
header = f"bot-bottle supervise ({len(pending)} pending)"
|
||||
stdscr.addnstr(0, 0, header, w - 1, curses.A_BOLD)
|
||||
stdscr.hline(1, 0, curses.ACS_HLINE, w)
|
||||
|
||||
row = 2
|
||||
if not pending:
|
||||
stdscr.addnstr(
|
||||
row, 2,
|
||||
"no pending proposals; agents will queue here when they call a "
|
||||
"supervise tool",
|
||||
w - 4,
|
||||
)
|
||||
else:
|
||||
for i, qp in enumerate(pending):
|
||||
if row >= h - 3:
|
||||
break
|
||||
p = qp.proposal
|
||||
ts_short = (
|
||||
p.arrival_timestamp.split("T", 1)[1][:8]
|
||||
if "T" in p.arrival_timestamp else p.arrival_timestamp
|
||||
)
|
||||
cursor = "> " if i == selected else " "
|
||||
line = (
|
||||
f"{cursor}{ts_short} "
|
||||
f"[{p.bottle_slug}] {p.tool:<18} {p.id[:8]} "
|
||||
f"{_proposed_payload_label(p.tool)}"
|
||||
)
|
||||
attr = curses.A_REVERSE if i == selected else curses.A_NORMAL
|
||||
stdscr.addnstr(row, 0, line, w - 1, attr)
|
||||
row += 1
|
||||
if row >= h - 3:
|
||||
break
|
||||
if p.justification:
|
||||
stdscr.addnstr(row, 4, p.justification[: max(0, w - 5)], w - 5)
|
||||
row += 1
|
||||
|
||||
footer = "[j/k] move [Enter] view [a] approve [m] modify [r] reject [q] quit"
|
||||
stdscr.hline(h - 2, 0, curses.ACS_HLINE, w)
|
||||
stdscr.addnstr(h - 1, 0, footer, w - 1, curses.A_DIM)
|
||||
if status_line:
|
||||
stdscr.addnstr(h - 3, 0, status_line, w - 1, curses.A_BOLD)
|
||||
stdscr.refresh()
|
||||
|
||||
|
||||
def _detail_view(
|
||||
stdscr: "curses._CursesWindow",
|
||||
qp: QueuedProposal,
|
||||
*,
|
||||
green_attr: int = 0,
|
||||
) -> None:
|
||||
"""Render the full proposal. Scrollable. Press q to return."""
|
||||
lines = _detail_lines(qp, green_attr=green_attr)
|
||||
offset = 0
|
||||
while True:
|
||||
stdscr.erase()
|
||||
h, w = stdscr.getmaxyx()
|
||||
for i, (text, attr) in enumerate(lines[offset:offset + h - 1]):
|
||||
stdscr.addnstr(i, 0, text, w - 1, attr)
|
||||
stdscr.addnstr(
|
||||
h - 1, 0,
|
||||
"[j/k] scroll [g/G] top/bottom [a] approve [m] modify [r] reject [q] back",
|
||||
w - 1, curses.A_DIM,
|
||||
)
|
||||
stdscr.refresh()
|
||||
key = stdscr.getch()
|
||||
if key in (ord("q"), 27):
|
||||
return
|
||||
if key in (curses.KEY_DOWN, ord("j")):
|
||||
offset = min(offset + 1, max(0, len(lines) - 1))
|
||||
elif key in (curses.KEY_UP, ord("k")):
|
||||
offset = max(offset - 1, 0)
|
||||
elif key == ord("g"):
|
||||
offset = 0
|
||||
elif key == ord("G"):
|
||||
offset = max(0, len(lines) - 1)
|
||||
elif key == ord("a"):
|
||||
try:
|
||||
approve(qp)
|
||||
except ApplyError:
|
||||
pass
|
||||
return
|
||||
elif key == ord("m"):
|
||||
edited = _modify(stdscr, qp)
|
||||
if edited is not None:
|
||||
try:
|
||||
approve(qp, final_file=edited, notes="operator modified before approving")
|
||||
except ApplyError:
|
||||
pass
|
||||
return
|
||||
elif key == ord("r"):
|
||||
reason = _prompt(stdscr, "reject reason: ")
|
||||
if reason:
|
||||
reject(qp, reason=reason)
|
||||
return
|
||||
|
||||
|
||||
def _modify(stdscr: "curses._CursesWindow", qp: QueuedProposal) -> str | None:
|
||||
"""Suspend curses, open $EDITOR on the proposed file, return edited content."""
|
||||
suffix = _suffix_for_tool(qp.proposal.tool)
|
||||
curses.endwin()
|
||||
try:
|
||||
edited = edit_in_editor(qp.proposal.proposed_file, suffix=suffix)
|
||||
finally:
|
||||
stdscr.refresh()
|
||||
return edited
|
||||
|
||||
|
||||
def _prompt(stdscr: "curses._CursesWindow", label: str) -> str:
|
||||
"""One-line input at the bottom of the screen."""
|
||||
curses.curs_set(1)
|
||||
h, _ = stdscr.getmaxyx()
|
||||
stdscr.move(h - 2, 0)
|
||||
stdscr.clrtoeol()
|
||||
stdscr.addstr(h - 2, 0, label)
|
||||
stdscr.refresh()
|
||||
curses.echo()
|
||||
try:
|
||||
raw = stdscr.getstr(h - 2, len(label), 200)
|
||||
finally:
|
||||
curses.noecho()
|
||||
curses.curs_set(0)
|
||||
return raw.decode("utf-8", errors="replace").strip()
|
||||
|
||||
|
||||
__all__ = [
|
||||
"QueuedProposal",
|
||||
"approve",
|
||||
"cmd_supervise",
|
||||
"discover_pending",
|
||||
"edit_in_editor",
|
||||
"reject",
|
||||
]
|
||||
@@ -12,8 +12,8 @@ agent calls when it hits a stuck-recovery category:
|
||||
Each tool call: the agent passes the full proposed file plus a
|
||||
justification text. The sidecar validates the proposal syntactically,
|
||||
writes it to the host's per-bottle queue dir, and holds the tool-call
|
||||
connection open. The operator's TUI dashboard
|
||||
(bot_bottle.cli.dashboard) sees the proposal, accepts
|
||||
connection open. The operator's supervise TUI
|
||||
(bot_bottle.cli.supervise) sees the proposal, accepts
|
||||
approve / modify / reject, and writes a response file alongside the
|
||||
proposal. The sidecar sees the response and returns `{status, notes}`
|
||||
to the agent.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# PRD 0019: Active agents in the dashboard, agent-scoped edit verbs
|
||||
|
||||
- **Status:** Active
|
||||
- **Status:** Superseded by [PRD 0049](0049-strip-dashboard-to-supervisor-tui.md)
|
||||
- **Author:** didericis
|
||||
- **Created:** 2026-05-26
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# PRD 0020: Start and attach to agents from inside the dashboard
|
||||
|
||||
- **Status:** Active
|
||||
- **Status:** Superseded by [PRD 0049](0049-strip-dashboard-to-supervisor-tui.md)
|
||||
- **Author:** didericis
|
||||
- **Created:** 2026-05-26
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# PRD 0021: Dashboard as left tmux pane, selected agent as right pane
|
||||
|
||||
- **Status:** Active
|
||||
- **Status:** Superseded by [PRD 0049](0049-strip-dashboard-to-supervisor-tui.md)
|
||||
- **Author:** didericis
|
||||
- **Created:** 2026-05-26
|
||||
|
||||
|
||||
@@ -0,0 +1,343 @@
|
||||
|
||||
- **Status:** Active
|
||||
- **Author:** didericis
|
||||
- **Created:** 2026-06-03
|
||||
- **Issue:** #174
|
||||
|
||||
## Summary
|
||||
|
||||
The `./cli.py dashboard` command has grown from its PRD 0013 roots
|
||||
(triage supervise proposals) into a parallel-agent control surface
|
||||
(PRDs 0019/0020/0021): an active-agents pane, agent picker + start,
|
||||
re-attach, per-bottle stop, tmux split-pane handoff, operator-
|
||||
initiated `routes`/`pipelock` edits. Each chunk is reasonable on its
|
||||
own; together they make the dashboard the largest CLI file in the
|
||||
repo and the thing most likely to break on a rough edge (curses /
|
||||
tmux / docker-exec / metadata-discovery interactions).
|
||||
|
||||
This PRD reverses that scope creep. The dashboard is reduced to the
|
||||
**supervise-plane triage TUI** it was in PRDs 0013–0016: list pending
|
||||
proposals, approve / modify / reject each one, write audit entries,
|
||||
deliver the response that unblocks the agent's tool call. Everything
|
||||
that's about *starting / re-entering / stopping* bottles, or about
|
||||
*operator-initiated* config edits, comes out. The command is renamed
|
||||
`./cli.py supervise` so the name matches what it does after the cut.
|
||||
|
||||
Future agent-management UX is explicitly punted: if and when a
|
||||
control surface for parallel agents resurfaces, the working
|
||||
assumption (per the issue) is that a web GUI — usable from mobile
|
||||
— is a better second pass than another round of curses iteration.
|
||||
That decision is not in this PRD's scope; this PRD only removes the
|
||||
half-built local-curses path so we stop maintaining it.
|
||||
|
||||
## Problem
|
||||
|
||||
Three concrete pains, all downstream of the dashboard's growth:
|
||||
|
||||
1. **Surface area vs. polish.** `dashboard.py` is ~1740 lines;
|
||||
`dashboard_model.py` adds another ~420. The interactions among
|
||||
curses, modals, tmux split-pane, docker-exec handoff, agent
|
||||
provider templates, metadata-driven re-attach, and
|
||||
ExitStack-free bottle ownership are intricate enough that
|
||||
shipping the next polish increment costs more than it returns.
|
||||
2. **No clear ownership of "starts and stops bottles".** Today
|
||||
that responsibility is split: `./cli.py start` owns one-shot
|
||||
sessions; the dashboard owns multi-session bottles it started
|
||||
itself; `./cli.py cleanup` owns everything else. The dashboard
|
||||
tracking its own `bottles: dict[str, (cm, bottle, identity)]`
|
||||
that doesn't survive a quit is a confusing third lane.
|
||||
3. **Wrong target shape for a "manage many agents" UI.** The
|
||||
parallel-agent experience the dashboard reaches for is mobile-
|
||||
meaningful — checking in on agents from a phone is the high-
|
||||
value case — and curses inside an SSH session is the wrong
|
||||
tool for that. Continuing to polish a local-only TUI delays
|
||||
the right next investment.
|
||||
|
||||
The triage half of the dashboard isn't suffering from any of these.
|
||||
Pending proposals are a small, well-scoped, real workload, and the
|
||||
PRD 0013–0016 surface for handling them is the right shape. The
|
||||
problem is everything that got bolted onto that core after.
|
||||
|
||||
## Goals / Success Criteria
|
||||
|
||||
1. The supervise TUI starts up, lists pending proposals across all
|
||||
running bottles, and supports approve / modify / reject + the
|
||||
`--once` non-interactive mode — exactly as PRDs 0013–0016
|
||||
specified, minus everything 0019/0020/0021 added.
|
||||
2. The CLI subcommand is renamed `supervise` (was `dashboard`). The
|
||||
old name is not aliased — this PRD is intentionally a
|
||||
compat/breaking change (the issue carries the
|
||||
`Compat/Breaking` label).
|
||||
3. `dashboard.py` shrinks to a single proposal-triage curses loop:
|
||||
no agents pane, no Tab pane switching, no agent picker, no
|
||||
start / re-attach / stop verbs, no tmux split-pane, no
|
||||
`e`/`p` operator-edit verbs, no per-process `bottles` dict.
|
||||
4. `dashboard_model.py` is collapsed into whatever
|
||||
`supervise.py` (CLI) needs; the model module is removed if it
|
||||
has no purpose after the cut.
|
||||
5. The proposal-side apply paths in `bot_bottle/backend/docker/
|
||||
egress_apply.py`, `pipelock_apply.py`, and `capability_apply.py`
|
||||
are unchanged — they are still called by the approve path.
|
||||
6. The supervise-sidecar / proposal-queue protocol (PRD 0013) is
|
||||
unchanged: the agent's experience is identical.
|
||||
7. The previously-active PRDs that this one undoes are marked
|
||||
`Superseded by PRD 0049`:
|
||||
- PRD 0019 — active-agents pane + agent-scoped edit verbs
|
||||
- PRD 0020 — start / re-attach / stop from the dashboard
|
||||
- PRD 0021 — tmux split-pane
|
||||
|
||||
## Non-goals
|
||||
|
||||
- **A web GUI for managing agents.** The issue floats this as a
|
||||
second pass; this PRD does not design or commit to it. The cut
|
||||
is "remove the path we no longer want to invest in", not
|
||||
"build the replacement".
|
||||
- **A separate CLI for operator-initiated routes / pipelock
|
||||
edits.** Today those edits live as `e` / `p` keys inside the
|
||||
dashboard. After this PRD they don't exist anywhere — operators
|
||||
who need ad-hoc edits use the same path the agents do (call the
|
||||
supervise tool from inside the bottle) or hand-edit the host-
|
||||
side files and restart the sidecar. Adding a `./cli.py routes
|
||||
edit <slug>` verb is a follow-up if the loss bites.
|
||||
- **Removing `./cli.py start` or changing its semantics.** Start
|
||||
remains the one-shot launch path. PRD 0020's bottle-outlives-
|
||||
process model is removed; the only path to a long-running
|
||||
bottle is `./cli.py start` (foreground) plus `cli.py cleanup`
|
||||
for teardown.
|
||||
- **Removing the supervise-sidecar protocol or any of the three
|
||||
block-remediation engines.** PRDs 0013–0016 stay Active. The
|
||||
agent's view of the world doesn't change.
|
||||
- **Renaming `dashboard` anywhere other than the CLI entry
|
||||
point.** The dashboard-related docs (PRDs, decision records,
|
||||
research notes) keep their historical references — they
|
||||
describe the state of the world at the time they were written,
|
||||
and the Status: Superseded line is the marker that the world
|
||||
has moved on.
|
||||
- **Migrating the proposal-queue file layout.** The queue still
|
||||
lives at `~/.bot-bottle/queue/<slug>/`; the audit log still
|
||||
lives at `~/.bot-bottle/audit/<component>-<slug>.log`. The CLI
|
||||
surface changes; the on-disk surface does not.
|
||||
|
||||
## Scope
|
||||
|
||||
### In scope
|
||||
|
||||
- **Rename the subcommand.** `./cli.py dashboard` becomes
|
||||
`./cli.py supervise`. The module moves from `bot_bottle/cli/
|
||||
dashboard.py` to `bot_bottle/cli/supervise.py`. The dispatcher
|
||||
in `bot_bottle/cli/__init__.py` and the help text both update.
|
||||
- **Strip the curses loop to proposal-only.** The remaining
|
||||
surface is: list pending proposals (with the new-arrival bell
|
||||
from PRD 0013), Enter for detail view,
|
||||
`a`/`m`/`r` for approve / modify / reject, `q` to quit. No
|
||||
agents pane, no Tab, no agent picker, no `n`/`x`/`e`/`p`, no
|
||||
tmux dispatch, no `bottles` dict on the main loop.
|
||||
- **Drop unused helpers.** `_picker_modal`, `_preflight_modal`,
|
||||
`_backend_picker_modal`, `_new_agent_flow`, `_attach_to_bottle`,
|
||||
`_attach_in_tmux`, `_attach_via_handoff`, `_tmux_*`,
|
||||
`_ensure_right_pane`, `_redirect_stderr_to_file`,
|
||||
`_route_op_to_right_pane`, `_stop_bottle_flow`,
|
||||
`_operator_edit_*_flow`, `operator_edit_routes`,
|
||||
`operator_edit_allowlist`, and their imports come out.
|
||||
- **Collapse the model module.** `dashboard_model.py`'s
|
||||
proposal-side helpers (`QueuedProposal`, `discover_pending`,
|
||||
`_approval_status`, `_detail_lines`,
|
||||
`_failed_url_host`, `_proposed_payload_label`,
|
||||
`_suffix_for_tool`, `_REFRESH_INTERVAL_MS`) move back into
|
||||
`supervise.py` (CLI) or into `bot_bottle/supervise.py`
|
||||
(the daemon-side module) — wherever they fit. The agents /
|
||||
picker / tmux helpers in that module (`PANE_*`,
|
||||
`_filter_agents`, `_running_counts`, `_format_agent_row`,
|
||||
`_selection_status`, `_selected_agent`, `_bottle_for_slug`,
|
||||
`_pick_next_after_stop`, `_agent_runtime_args`,
|
||||
`_build_resume_argv_with_fallback`, `_build_split_pane_argv`,
|
||||
`_build_respawn_pane_argv`, `_in_tmux`,
|
||||
`discover_active_agents`) are deleted.
|
||||
- **Mark superseded PRDs.** The Status line on PRDs 0019, 0020,
|
||||
and 0021 changes to `Superseded by [PRD 0049](0049-strip-
|
||||
dashboard-to-supervisor-tui.md)`.
|
||||
- **Test cleanup.** Any test that targets a removed surface (the
|
||||
agent picker, the tmux split helpers, the start-from-dashboard
|
||||
flow, the operator-edit flows, `discover_active_agents`)
|
||||
comes out. Tests covering proposal triage stay.
|
||||
- **Help / usage strings.** `bot_bottle/cli/__init__.py`'s usage
|
||||
block updates the command name and one-liner.
|
||||
|
||||
### Out of scope
|
||||
|
||||
- Any new feature in the supervise TUI. The cut is purely
|
||||
subtractive (except for the rename).
|
||||
- Behavior changes in `./cli.py start`, `cli.py cleanup`,
|
||||
`cli.py resume`, `cli.py list`, `cli.py info`, `cli.py edit`,
|
||||
`cli.py init` — unchanged.
|
||||
- Changes to the supervise sidecar (`supervise_server.py`,
|
||||
`supervise.py` daemon module). The wire protocol stays.
|
||||
- Changes to the routes / pipelock / capability apply engines.
|
||||
- Migration helpers, deprecation warnings, or a transitional
|
||||
`dashboard` alias for `supervise`. The label on the issue says
|
||||
Compat/Breaking; the rename is a hard cutover.
|
||||
|
||||
## Proposed design
|
||||
|
||||
### Final shape of the TUI
|
||||
|
||||
After this PRD the `./cli.py supervise` curses surface is:
|
||||
|
||||
```
|
||||
bot-bottle supervise (3 pending)
|
||||
─────────────────────────────────────────────────────────
|
||||
> 03:14:22 [implementer-cy7a6] egress-block abc123… add
|
||||
github.com/foo
|
||||
03:13:55 [researcher-9xqs1] pipelock-block def456… allow
|
||||
registry.npmjs.org
|
||||
03:13:10 [implementer-cy7a6] capability-block ghi789… install
|
||||
ripgrep
|
||||
|
||||
─────────────────────────────────────────────────────────
|
||||
[j/k] move [Enter] view [a] approve [m] modify [r] reject [q] quit
|
||||
```
|
||||
|
||||
- One pane. No Tab. `j` / `k` / arrows move through the queue.
|
||||
- Enter opens the existing detail view (justification +
|
||||
proposed-file body + the green pipelock host-extraction hint).
|
||||
`a` / `m` / `r` work from both the list view and the detail
|
||||
view, same as today.
|
||||
- `q` / Esc quits. There are no dashboard-owned bottles, so no
|
||||
per-process teardown decision — `q` just exits.
|
||||
- The new-arrival bell stays, because it is a real win for the
|
||||
operator's "I was typing at claude and a proposal landed" case.
|
||||
No tmux-specific focus management remains.
|
||||
|
||||
### Code organisation
|
||||
|
||||
After the cut, the CLI module looks roughly like:
|
||||
|
||||
```
|
||||
bot_bottle/cli/supervise.py
|
||||
- cmd_supervise(argv)
|
||||
- _list_once() # --once mode
|
||||
- _main_loop(stdscr) # proposal-only
|
||||
- _render(stdscr, pending, ...)
|
||||
- _detail_view(stdscr, qp, ...)
|
||||
- _modify(stdscr, qp)
|
||||
- _prompt(stdscr, label)
|
||||
- _write_crash_log(exc)
|
||||
- approve(qp, *, notes, final_file)
|
||||
- reject(qp, *, reason)
|
||||
- QueuedProposal, discover_pending
|
||||
- _detail_lines, _approval_status,
|
||||
_failed_url_host,
|
||||
_proposed_payload_label,
|
||||
_suffix_for_tool
|
||||
```
|
||||
|
||||
`dashboard_model.py` has no purpose once the agents / picker /
|
||||
tmux helpers are gone, so it is removed and the surviving
|
||||
proposal-side helpers move into `supervise.py` directly. The
|
||||
PRD-0013 refactor that split model out (`refactor: extract
|
||||
dashboard state/model layer into dashboard_model.py`) was
|
||||
load-bearing for the bigger dashboard surface; with the surface
|
||||
shrunk back, the split is no longer justified.
|
||||
|
||||
### Removed PRDs: how to mark them
|
||||
|
||||
The three superseded PRDs keep their bodies intact. Only the
|
||||
Status line at the top changes:
|
||||
|
||||
```
|
||||
- **Status:** Superseded by [PRD
|
||||
0049](0049-strip-dashboard-to-supervisor-tui.md)
|
||||
```
|
||||
|
||||
The PRD's own Goals / Success Criteria are left as the historical
|
||||
record of what the feature shipped — readers tracing back from the
|
||||
code or the git log land in a PRD that explains what once was, with
|
||||
a clear pointer forward. No PRD body is rewritten.
|
||||
|
||||
### Tests to keep, tests to remove
|
||||
|
||||
Keep:
|
||||
- `tests/cli/test_dashboard*.py` cases that exercise
|
||||
`discover_pending`, `approve`, `reject`, `_detail_lines`,
|
||||
`_approval_status`, `_failed_url_host`,
|
||||
`_proposed_payload_label`, `_suffix_for_tool`,
|
||||
`_modify` / `edit_in_editor`.
|
||||
- `tests/cli/test_dashboard_once.py` (or equivalent) — the
|
||||
`--once` listing mode.
|
||||
|
||||
Remove:
|
||||
- Any test of `_picker_modal`, `_preflight_modal`,
|
||||
`_backend_picker_modal`, `_new_agent_flow`, `_attach_*`,
|
||||
`_tmux_*`, `_route_op_to_right_pane`,
|
||||
`_redirect_stderr_to_file`, `_stop_bottle_flow`,
|
||||
`_operator_edit_*`, `_filter_agents`, `_running_counts`,
|
||||
`_format_agent_row`, `_selection_status`,
|
||||
`_selected_agent`, `_bottle_for_slug`,
|
||||
`_pick_next_after_stop`, `_agent_runtime_args`,
|
||||
`_build_*_argv`, `discover_active_agents`.
|
||||
- The test files that exist solely to cover those (e.g.,
|
||||
`test_dashboard_picker.py`, `test_dashboard_tmux.py`,
|
||||
`test_dashboard_attach.py`, `test_dashboard_agents.py` —
|
||||
whichever of these exist after the file walk).
|
||||
|
||||
Files are renamed `test_supervise_*.py` to mirror the module
|
||||
rename. The rename is mechanical; no test logic changes.
|
||||
|
||||
## Implementation chunks
|
||||
|
||||
Sized for a single PR each.
|
||||
|
||||
1. **Strip + rename in one cut.** Move `bot_bottle/cli/
|
||||
dashboard.py` to `bot_bottle/cli/supervise.py`, delete the
|
||||
removed helpers, delete `dashboard_model.py`, inline the
|
||||
surviving helpers, update the dispatcher + usage in
|
||||
`bot_bottle/cli/__init__.py`, rename tests to match, mark
|
||||
PRDs 0019/0020/0021 as superseded. One commit per logical
|
||||
piece inside the PR (rename, strip, supersede notes,
|
||||
tests).
|
||||
2. **Activate PRD 0049.** Flip this PRD's Status line from
|
||||
Draft to Active in the same PR as chunk 1 once the
|
||||
implementation lands. (The repo convention is that a PRD's
|
||||
shipping commit is also the Status flip — see the recent
|
||||
`docs(prd): activate PRD 0048…` commit shape.)
|
||||
|
||||
The PR closes issue #174.
|
||||
|
||||
## Open questions
|
||||
|
||||
1. **`e` / `p` operator-initiated edits — gone for good or
|
||||
moved to a separate CLI verb?** The PRD removes them with no
|
||||
replacement. The simplest replacement is `./cli.py routes
|
||||
edit <slug>` and `./cli.py pipelock edit <slug>`, sharing
|
||||
the existing `apply_routes_change` / `apply_allowlist_change`
|
||||
engines. If the loss is felt within the first parallel
|
||||
run after this lands, that follow-up is a small PR. Leaving
|
||||
it for a separate PRD so this one stays subtractive.
|
||||
|
||||
2. **`--once` output shape.** The text listing today emits one
|
||||
proposal per line. Worth keeping exactly as-is for
|
||||
scripting consumers; this PRD does not change it. Flagging
|
||||
only because the rename could tempt a tweak.
|
||||
|
||||
3. **Audit-log entry shape for an unprompted edit applied via
|
||||
a future `routes edit` CLI verb.** Today's
|
||||
`operator_edit_routes` writes an `ACTION_OPERATOR_EDIT`
|
||||
audit entry. With those flows removed the constant has no
|
||||
callers inside this PRD's scope. Keep the constant exported
|
||||
from `supervise.py` (it's already an `__all__` member) so a
|
||||
follow-up CLI verb can re-use the same audit shape without
|
||||
re-introducing dead code first.
|
||||
|
||||
## References
|
||||
|
||||
- Issue
|
||||
[#174](https://gitea.dideric.is/didericis/bot-bottle/issues/174)
|
||||
— the request: "strip the dashboard down into just a TUI for
|
||||
managing agent requests for new egress routes and new
|
||||
capabilities."
|
||||
- PRD 0013 — supervise plane foundation (the floor this PRD
|
||||
reverts the dashboard to).
|
||||
- PRDs 0014 / 0015 / 0016 — block-remediation engines that the
|
||||
supervise TUI continues to drive on approve.
|
||||
- PRDs 0019 / 0020 / 0021 — the bolted-on capabilities this PRD
|
||||
removes.
|
||||
@@ -277,51 +277,5 @@ class TestBottleMetadataBackend(_FakeHomeMixin, unittest.TestCase):
|
||||
self.assertEqual("", loaded.backend)
|
||||
|
||||
|
||||
class TestBottleForSlugBackend(_FakeHomeMixin, unittest.TestCase):
|
||||
"""PRD 0040: _bottle_for_slug constructs the right bottle type."""
|
||||
|
||||
def setUp(self):
|
||||
self._setup_fake_home()
|
||||
|
||||
def tearDown(self):
|
||||
self._teardown_fake_home()
|
||||
|
||||
def test_docker_metadata_returns_docker_bottle(self):
|
||||
from bot_bottle.backend.docker.bottle import DockerBottle
|
||||
from bot_bottle.cli.dashboard import _bottle_for_slug
|
||||
write_metadata(BottleMetadata(
|
||||
identity="dev-d1",
|
||||
agent_name="dev",
|
||||
cwd="",
|
||||
copy_cwd=False,
|
||||
started_at="2026-06-02T00:00:00+00:00",
|
||||
compose_project="bot-bottle-dev-d1",
|
||||
backend="docker",
|
||||
))
|
||||
bottle, _ = _bottle_for_slug("dev-d1", {}, None)
|
||||
self.assertIsInstance(bottle, DockerBottle)
|
||||
|
||||
def test_smolmachines_metadata_returns_smolmachines_bottle(self):
|
||||
from bot_bottle.backend.smolmachines.bottle import SmolmachinesBottle
|
||||
from bot_bottle.cli.dashboard import _bottle_for_slug
|
||||
write_metadata(BottleMetadata(
|
||||
identity="dev-s1",
|
||||
agent_name="dev",
|
||||
cwd="",
|
||||
copy_cwd=False,
|
||||
started_at="2026-06-02T00:00:00+00:00",
|
||||
compose_project="",
|
||||
backend="smolmachines",
|
||||
))
|
||||
bottle, _ = _bottle_for_slug("dev-s1", {}, None)
|
||||
self.assertIsInstance(bottle, SmolmachinesBottle)
|
||||
|
||||
def test_no_metadata_defaults_to_docker_bottle(self):
|
||||
from bot_bottle.backend.docker.bottle import DockerBottle
|
||||
from bot_bottle.cli.dashboard import _bottle_for_slug
|
||||
bottle, _ = _bottle_for_slug("unknown-slug", {}, None)
|
||||
self.assertIsInstance(bottle, DockerBottle)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
@@ -1,492 +0,0 @@
|
||||
"""Unit: dashboard's row-formatting + selection helpers (PRD 0019)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
from bot_bottle import supervise
|
||||
from bot_bottle.cli import dashboard
|
||||
|
||||
|
||||
class _FakeHomeMixin:
|
||||
def _setup_fake_home(self) -> None:
|
||||
self._tmp = tempfile.TemporaryDirectory(prefix="dashboard-aa-test.")
|
||||
original = supervise.bot_bottle_root
|
||||
|
||||
def fake_root() -> Path:
|
||||
return Path(self._tmp.name) / ".bot-bottle"
|
||||
|
||||
supervise.bot_bottle_root = fake_root # type: ignore[assignment]
|
||||
self._restore_home = lambda: setattr(supervise, "bot_bottle_root", original)
|
||||
|
||||
def _teardown_fake_home(self) -> None:
|
||||
self._restore_home()
|
||||
self._tmp.cleanup()
|
||||
|
||||
|
||||
class TestFormatAgentRow(unittest.TestCase):
|
||||
"""One-line row formatting for the agents pane (PRD 0019 chunk 2)."""
|
||||
|
||||
def _agent(self, **overrides) -> dashboard.ActiveAgent:
|
||||
defaults = dict(
|
||||
backend_name="docker",
|
||||
slug="dev-abc12",
|
||||
agent_name="implementer",
|
||||
started_at="2026-05-26T02:55:01+00:00",
|
||||
services=("egress", "git-gate", "pipelock", "supervise"),
|
||||
)
|
||||
defaults.update(overrides)
|
||||
return dashboard.ActiveAgent(**defaults)
|
||||
|
||||
def test_renders_slug_name_time_services(self):
|
||||
s = dashboard._format_agent_row(self._agent(), 200)
|
||||
self.assertIn("dev-abc12", s)
|
||||
self.assertIn("implementer", s)
|
||||
self.assertIn("02:55:01", s)
|
||||
self.assertIn("egress,git-gate,pipelock,supervise", s)
|
||||
|
||||
def test_starting_label_when_no_services(self):
|
||||
# Race window: compose project is up but containers haven't
|
||||
# been picked up by `docker ps` yet.
|
||||
s = dashboard._format_agent_row(self._agent(services=()), 200)
|
||||
self.assertIn("(starting)", s)
|
||||
|
||||
def test_filters_agent_service_from_display(self):
|
||||
# The `agent` service is always present for an active bottle;
|
||||
# listing it is noise. The row should show only the sidecars.
|
||||
s = dashboard._format_agent_row(
|
||||
self._agent(services=("agent", "pipelock", "supervise")), 200,
|
||||
)
|
||||
self.assertIn("[pipelock,supervise]", s)
|
||||
self.assertNotIn("agent,", s)
|
||||
self.assertNotIn(",agent", s)
|
||||
|
||||
def test_only_agent_service_shows_starting(self):
|
||||
# A bottle whose only running service is `agent` (sidecars
|
||||
# still warming up) renders as `(starting)`.
|
||||
s = dashboard._format_agent_row(self._agent(services=("agent",)), 200)
|
||||
self.assertIn("(starting)", s)
|
||||
|
||||
def test_question_mark_when_no_started_at(self):
|
||||
s = dashboard._format_agent_row(self._agent(started_at=""), 200)
|
||||
self.assertIn("started ?", s)
|
||||
|
||||
def test_truncates_to_maxw(self):
|
||||
s = dashboard._format_agent_row(self._agent(), 30)
|
||||
self.assertLessEqual(len(s), 30)
|
||||
self.assertTrue(s.endswith("…"))
|
||||
|
||||
|
||||
class TestSelectionStatus(unittest.TestCase):
|
||||
"""Idle-state status-line text for the agents-pane focus
|
||||
(PRD 0019 chunk 3). Empty when the proposals pane is focused;
|
||||
surfaces the selected agent (or a clear placeholder) when the
|
||||
agents pane is focused."""
|
||||
|
||||
def _agent(self, slug: str) -> dashboard.ActiveAgent:
|
||||
return dashboard.ActiveAgent(
|
||||
backend_name="docker",
|
||||
slug=slug, agent_name="x", started_at="", services=(),
|
||||
)
|
||||
|
||||
def test_empty_when_proposals_focused(self):
|
||||
s = dashboard._selection_status(
|
||||
dashboard.PANE_PROPOSALS, [self._agent("a-1")], 0,
|
||||
)
|
||||
self.assertEqual("", s)
|
||||
|
||||
def test_no_agents_message_when_agents_pane_empty(self):
|
||||
s = dashboard._selection_status(dashboard.PANE_AGENTS, [], 0)
|
||||
self.assertEqual("[no active agents]", s)
|
||||
|
||||
def test_shows_selected_slug(self):
|
||||
agents = [self._agent("a-1"), self._agent("b-2"), self._agent("c-3")]
|
||||
s = dashboard._selection_status(dashboard.PANE_AGENTS, agents, 1)
|
||||
self.assertEqual("[selected: b-2]", s)
|
||||
|
||||
def test_out_of_bounds_falls_back_to_no_selection(self):
|
||||
agents = [self._agent("only")]
|
||||
s = dashboard._selection_status(dashboard.PANE_AGENTS, agents, 99)
|
||||
self.assertEqual("[no agent selected]", s)
|
||||
|
||||
|
||||
class TestFilterAgents(unittest.TestCase):
|
||||
"""Pure-function picker filter (PRD 0020 chunk 2). Curses-free
|
||||
so we can exercise the substring + case-insensitivity rules
|
||||
directly."""
|
||||
|
||||
NAMES = ["implementer", "researcher", "triage-bot", "ImplDeluxe"]
|
||||
|
||||
def test_empty_query_returns_all(self):
|
||||
self.assertEqual(self.NAMES, dashboard._filter_agents("", self.NAMES))
|
||||
|
||||
def test_substring_match(self):
|
||||
self.assertEqual(
|
||||
["implementer", "ImplDeluxe"],
|
||||
dashboard._filter_agents("impl", self.NAMES),
|
||||
)
|
||||
|
||||
def test_case_insensitive(self):
|
||||
self.assertEqual(
|
||||
["implementer", "ImplDeluxe"],
|
||||
dashboard._filter_agents("IMPL", self.NAMES),
|
||||
)
|
||||
|
||||
def test_no_match_returns_empty(self):
|
||||
self.assertEqual([], dashboard._filter_agents("zzz", self.NAMES))
|
||||
|
||||
def test_preserves_input_order(self):
|
||||
# Filtering should never re-sort; the picker draws in the
|
||||
# order the manifest exposed.
|
||||
out = dashboard._filter_agents("e", ["beta", "alpha", "echo"])
|
||||
self.assertEqual(["beta", "echo"], out)
|
||||
|
||||
|
||||
class TestDashboardManifestLoading(unittest.TestCase):
|
||||
def test_new_agent_flow_empty_manifest_has_no_picker_entries(self):
|
||||
manifest = dashboard.Manifest.from_json_obj({"bottles": {}, "agents": {}})
|
||||
with mock.patch("bot_bottle.cli.dashboard._picker_modal", return_value=None) as picker:
|
||||
status = dashboard._new_agent_flow(
|
||||
None, manifest, {}, [], tmux_state=None, # type: ignore[arg-type]
|
||||
)
|
||||
picker.assert_called_once()
|
||||
self.assertEqual([], picker.call_args.args[1])
|
||||
self.assertIn("no agents configured", status)
|
||||
|
||||
|
||||
class TestRunningCounts(unittest.TestCase):
|
||||
"""Per-agent running-count surfaced in the picker so the
|
||||
operator sees `(N running)` before picking. Counts come from
|
||||
the dashboard's current `discover_active_agents` snapshot."""
|
||||
|
||||
def _agent(self, agent_name: str) -> dashboard.ActiveAgent:
|
||||
return dashboard.ActiveAgent(
|
||||
backend_name="docker",
|
||||
slug=f"{agent_name}-abc",
|
||||
agent_name=agent_name,
|
||||
started_at="",
|
||||
services=(),
|
||||
)
|
||||
|
||||
def test_empty_when_no_active_agents(self):
|
||||
self.assertEqual({}, dashboard._running_counts({}, []))
|
||||
|
||||
def test_one_per_unique_agent_name(self):
|
||||
agents = [self._agent("a"), self._agent("b"), self._agent("c")]
|
||||
self.assertEqual(
|
||||
{"a": 1, "b": 1, "c": 1},
|
||||
dashboard._running_counts({}, agents),
|
||||
)
|
||||
|
||||
def test_counts_collisions(self):
|
||||
agents = [
|
||||
self._agent("implementer"),
|
||||
self._agent("implementer"),
|
||||
self._agent("researcher"),
|
||||
]
|
||||
self.assertEqual(
|
||||
{"implementer": 2, "researcher": 1},
|
||||
dashboard._running_counts({}, agents),
|
||||
)
|
||||
|
||||
|
||||
class TestSelectedAgent(unittest.TestCase):
|
||||
"""`_selected_agent` is what chunk 4's e/p key handlers use to
|
||||
decide whether to fire and which agent to target."""
|
||||
|
||||
def _agent(self, slug: str, services: tuple[str, ...] = ()) -> dashboard.ActiveAgent:
|
||||
return dashboard.ActiveAgent(
|
||||
backend_name="docker",
|
||||
slug=slug, agent_name="x", started_at="", services=services,
|
||||
)
|
||||
|
||||
def test_none_when_proposals_focused(self):
|
||||
agents = [self._agent("a-1")]
|
||||
self.assertIsNone(
|
||||
dashboard._selected_agent(dashboard.PANE_PROPOSALS, agents, 0),
|
||||
)
|
||||
|
||||
def test_none_when_no_agents(self):
|
||||
self.assertIsNone(
|
||||
dashboard._selected_agent(dashboard.PANE_AGENTS, [], 0),
|
||||
)
|
||||
|
||||
def test_returns_indexed_agent_when_in_range(self):
|
||||
agents = [self._agent("a-1"), self._agent("b-2")]
|
||||
result = dashboard._selected_agent(dashboard.PANE_AGENTS, agents, 1)
|
||||
self.assertIsNotNone(result)
|
||||
assert result is not None # for type checker
|
||||
self.assertEqual("b-2", result.slug)
|
||||
|
||||
def test_none_when_index_out_of_range(self):
|
||||
agents = [self._agent("only")]
|
||||
self.assertIsNone(
|
||||
dashboard._selected_agent(dashboard.PANE_AGENTS, agents, 99),
|
||||
)
|
||||
|
||||
|
||||
class TestBottleForSlug(unittest.TestCase):
|
||||
"""Re-attach target resolution (PRD 0020 chunk 3). Dashboard-
|
||||
owned bottles return the stored handle as-is; non-owned bottles
|
||||
get a synthesized DockerBottle backed by the slug-derived
|
||||
container name."""
|
||||
|
||||
def test_owned_bottle_returns_held_handle(self):
|
||||
sentinel = object()
|
||||
bottles = {"dev-abc": (None, sentinel, "dev-abc")}
|
||||
bottle, _ = dashboard._bottle_for_slug("dev-abc", bottles, None)
|
||||
self.assertIs(sentinel, bottle)
|
||||
|
||||
def test_unowned_synthesizes_docker_bottle(self):
|
||||
bottle, _ = dashboard._bottle_for_slug("dev-xyz", {}, None)
|
||||
# The synth wraps the slug-derived container name.
|
||||
self.assertEqual("bot-bottle-dev-xyz", bottle.name)
|
||||
|
||||
def test_unowned_without_manifest_omits_prompt_path(self):
|
||||
bottle, hint = dashboard._bottle_for_slug("dev-xyz", {}, None)
|
||||
self.assertEqual("", hint)
|
||||
|
||||
|
||||
class TestPickNextAfterStop(unittest.TestCase):
|
||||
"""After `x` stops a bottle, the dashboard slides focus to
|
||||
the next agent — the one filling the stopped row, or the
|
||||
new last row if the stopped was last. Pure helper, easy
|
||||
to unit-test."""
|
||||
|
||||
def _agent(self, slug: str) -> dashboard.ActiveAgent:
|
||||
return dashboard.ActiveAgent(
|
||||
backend_name="docker",
|
||||
slug=slug, agent_name=slug, started_at="", services=(),
|
||||
)
|
||||
|
||||
def test_empty_list_returns_none(self):
|
||||
self.assertIsNone(
|
||||
dashboard._pick_next_after_stop([], 0, "anything"),
|
||||
)
|
||||
|
||||
def test_only_agent_being_stopped_returns_none(self):
|
||||
# Stopping the last agent → nothing to focus.
|
||||
agents = [self._agent("only")]
|
||||
self.assertIsNone(
|
||||
dashboard._pick_next_after_stop(agents, 0, "only"),
|
||||
)
|
||||
|
||||
def test_middle_row_slides_up_to_same_index(self):
|
||||
agents = [self._agent("a"), self._agent("b"), self._agent("c")]
|
||||
# Cursor was on "b" at index 1; stopping "b" → "c" now sits
|
||||
# at index 1 and takes focus.
|
||||
out = dashboard._pick_next_after_stop(agents, 1, "b")
|
||||
self.assertEqual((1, self._agent("c")), out)
|
||||
|
||||
def test_last_row_wraps_to_new_last(self):
|
||||
agents = [self._agent("a"), self._agent("b"), self._agent("c")]
|
||||
# Cursor on "c" at index 2; stopping "c" leaves a 2-agent
|
||||
# list — index 2 is out of bounds, fall back to new last (1).
|
||||
out = dashboard._pick_next_after_stop(agents, 2, "c")
|
||||
self.assertEqual((1, self._agent("b")), out)
|
||||
|
||||
def test_first_row(self):
|
||||
agents = [self._agent("a"), self._agent("b")]
|
||||
out = dashboard._pick_next_after_stop(agents, 0, "a")
|
||||
self.assertEqual((0, self._agent("b")), out)
|
||||
|
||||
def test_clamps_negative_selection(self):
|
||||
# Defensive: a stale negative index doesn't crash.
|
||||
agents = [self._agent("a"), self._agent("b")]
|
||||
out = dashboard._pick_next_after_stop(agents, -1, "a")
|
||||
self.assertEqual((0, self._agent("b")), out)
|
||||
|
||||
|
||||
class TestTmuxPaneArgvBuilders(unittest.TestCase):
|
||||
"""Pure argv builders for the tmux split-pane integration
|
||||
(PRD 0021 chunk 2). The subprocess invocation itself is
|
||||
environment-dependent; here we lock the wrapping shape so
|
||||
a regression surfaces in CI without needing a real tmux."""
|
||||
|
||||
DOCKER_ARGV = [
|
||||
"docker", "exec", "-it",
|
||||
"bot-bottle-dev-abc",
|
||||
"claude", "--dangerously-skip-permissions", "--continue",
|
||||
]
|
||||
|
||||
def test_split_pane_argv_horizontal_with_pane_id_capture(self):
|
||||
argv = dashboard._build_split_pane_argv(self.DOCKER_ARGV)
|
||||
self.assertEqual(
|
||||
["tmux", "split-window", "-h",
|
||||
"-P", "-F", "#{pane_id}",
|
||||
*self.DOCKER_ARGV],
|
||||
argv,
|
||||
)
|
||||
|
||||
def test_respawn_pane_argv_kills_existing_process(self):
|
||||
argv = dashboard._build_respawn_pane_argv("%12", self.DOCKER_ARGV)
|
||||
self.assertEqual(
|
||||
["tmux", "respawn-pane", "-k", "-t", "%12", *self.DOCKER_ARGV],
|
||||
argv,
|
||||
)
|
||||
|
||||
def test_respawn_pane_argv_threads_pane_id_unmodified(self):
|
||||
# Pane ids contain `%`; make sure we pass them straight
|
||||
# through to `-t` without quoting or substitution surprises.
|
||||
argv = dashboard._build_respawn_pane_argv("%abc.123", ["sh"])
|
||||
self.assertIn("%abc.123", argv)
|
||||
|
||||
|
||||
class TestResumeArgvWithFallback(unittest.TestCase):
|
||||
"""The `claude --continue || claude` shell fallback for the
|
||||
tmux re-attach path. Without it, an agent that's been spun
|
||||
up but never typed at crashes the pane on Enter because
|
||||
--continue has no session to resume."""
|
||||
|
||||
def _bottle(self, prompt_path: str | None = None):
|
||||
from bot_bottle.backend.docker.bottle import DockerBottle
|
||||
return DockerBottle(
|
||||
container="bot-bottle-dev-abc",
|
||||
teardown=lambda: None,
|
||||
prompt_path_in_container=prompt_path,
|
||||
)
|
||||
|
||||
def test_wraps_in_sh_c_with_or_fallback(self):
|
||||
argv = dashboard._build_resume_argv_with_fallback(self._bottle())
|
||||
# Must end with `sh -c '<cmd> --continue || <cmd>'`.
|
||||
self.assertEqual(
|
||||
["docker", "exec", "-it", "bot-bottle-dev-abc", "sh", "-c"],
|
||||
argv[:6],
|
||||
)
|
||||
inner = argv[6]
|
||||
self.assertIn("--continue", inner)
|
||||
self.assertIn("||", inner)
|
||||
# Both branches mention claude.
|
||||
self.assertEqual(2, inner.count("claude"))
|
||||
|
||||
def test_inner_args_quoted_safely(self):
|
||||
# Paths with spaces would break naive concatenation.
|
||||
bottle = self._bottle("/home/with space/.prompt")
|
||||
argv = dashboard._build_resume_argv_with_fallback(bottle)
|
||||
inner = argv[-1]
|
||||
# shlex.quote should single-quote any token with a space.
|
||||
self.assertIn("'/home/with space/.prompt'", inner)
|
||||
|
||||
def test_includes_skip_permissions(self):
|
||||
argv = dashboard._build_resume_argv_with_fallback(self._bottle())
|
||||
self.assertIn("--dangerously-skip-permissions", argv[-1])
|
||||
|
||||
def test_includes_prompt_file_flag_when_set(self):
|
||||
bottle = self._bottle("/home/node/.bot-bottle-prompt.txt")
|
||||
argv = dashboard._build_resume_argv_with_fallback(bottle)
|
||||
self.assertIn("--append-system-prompt-file", argv[-1])
|
||||
self.assertIn("/home/node/.bot-bottle-prompt.txt", argv[-1])
|
||||
|
||||
|
||||
class TestClaudeRuntimeArgs(unittest.TestCase):
|
||||
"""The argv passed to `bottle.agent_argv` on each
|
||||
attach. Locked here so the tmux + foreground paths build
|
||||
identical agent invocations."""
|
||||
|
||||
def test_default_skip_permissions_only(self):
|
||||
self.assertEqual(
|
||||
["--dangerously-skip-permissions"],
|
||||
dashboard._agent_runtime_args(resume=False),
|
||||
)
|
||||
|
||||
def test_resume_appends_continue(self):
|
||||
self.assertEqual(
|
||||
["--dangerously-skip-permissions", "--continue"],
|
||||
dashboard._agent_runtime_args(resume=True),
|
||||
)
|
||||
|
||||
def test_remote_control(self):
|
||||
args = dashboard._agent_runtime_args(
|
||||
resume=False, remote_control=True,
|
||||
)
|
||||
self.assertIn("--remote-control", args)
|
||||
|
||||
|
||||
class TestStopBottleFlow(unittest.TestCase):
|
||||
"""Explicit per-bottle stop (PRD 0020 chunk 4). The non-owned
|
||||
path is the one safe to test without curses + docker — the
|
||||
owned path drives `cm.__exit__` against a real launch context
|
||||
and belongs in integration tests."""
|
||||
|
||||
def test_non_owned_returns_cleanup_hint(self):
|
||||
# stdscr is None here on purpose — the non-owned branch
|
||||
# returns before any curses call.
|
||||
msg = dashboard._stop_bottle_flow(
|
||||
stdscr=None, # type: ignore[arg-type]
|
||||
bottles={},
|
||||
slug="ghost-zzz",
|
||||
)
|
||||
self.assertIn("not dashboard-owned", msg)
|
||||
self.assertIn("./cli.py cleanup", msg)
|
||||
|
||||
def test_non_owned_does_not_touch_tmux_state(self):
|
||||
# PRD 0021: a stop on an unknown slug should never clear
|
||||
# the right-pane occupant tracking, even if the slugs
|
||||
# happen to match (defensive — non-owned can't be in the
|
||||
# right pane via the dashboard's normal flow anyway).
|
||||
tmux_state = {"pane_id": "%5", "slug": "live-bbb"}
|
||||
dashboard._stop_bottle_flow(
|
||||
stdscr=None, # type: ignore[arg-type]
|
||||
bottles={},
|
||||
slug="ghost-zzz",
|
||||
tmux_state=tmux_state,
|
||||
)
|
||||
self.assertEqual({"pane_id": "%5", "slug": "live-bbb"}, tmux_state)
|
||||
|
||||
|
||||
class TestOperatorEditFlowGuards(_FakeHomeMixin, unittest.TestCase):
|
||||
"""Chunk-4 contract: the edit flow refuses when the selected
|
||||
agent doesn't have the required sidecar running. The discover-
|
||||
and-prompt scaffolding is gone, so the gating happens here
|
||||
instead of in the key handler."""
|
||||
|
||||
def setUp(self) -> None:
|
||||
self._setup_fake_home()
|
||||
|
||||
def tearDown(self) -> None:
|
||||
self._teardown_fake_home()
|
||||
|
||||
def _agent(self, services: tuple[str, ...]) -> dashboard.ActiveAgent:
|
||||
return dashboard.ActiveAgent(
|
||||
backend_name="docker",
|
||||
slug="dev-abc12",
|
||||
agent_name="impl",
|
||||
started_at="",
|
||||
services=services,
|
||||
)
|
||||
|
||||
def test_routes_edit_refuses_without_egress(self):
|
||||
# Bottle without bottle.egress.routes → no egress sidecar.
|
||||
msg = dashboard._operator_edit_flow(
|
||||
stdscr=None, # type: ignore[arg-type]
|
||||
agent=self._agent(("pipelock", "supervise")),
|
||||
required_service="egress",
|
||||
label="routes",
|
||||
fetch=lambda _: "x",
|
||||
apply=lambda _slug, _content: None,
|
||||
suffix=".yaml",
|
||||
)
|
||||
self.assertIn("no running egress sidecar", msg)
|
||||
self.assertIn("dev-abc12", msg)
|
||||
|
||||
def test_pipelock_edit_refuses_when_pipelock_missing(self):
|
||||
# Belt-and-braces — pipelock should always be there, but
|
||||
# the race window between `compose up` and `docker ps`
|
||||
# update is real.
|
||||
msg = dashboard._operator_edit_flow(
|
||||
stdscr=None, # type: ignore[arg-type]
|
||||
agent=self._agent(()),
|
||||
required_service="pipelock",
|
||||
label="pipelock",
|
||||
fetch=lambda _: "x",
|
||||
apply=lambda _slug, _content: None,
|
||||
suffix=".txt",
|
||||
)
|
||||
self.assertIn("no running pipelock sidecar", msg)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -1,39 +0,0 @@
|
||||
"""Unit: dashboard's new-proposal highlight window.
|
||||
|
||||
The curses rendering itself is exercised manually; this isolates
|
||||
the pure decision `is the proposal still in its post-arrival
|
||||
highlight window?`"""
|
||||
|
||||
import unittest
|
||||
|
||||
from bot_bottle.cli import dashboard
|
||||
|
||||
|
||||
class TestIsRecent(unittest.TestCase):
|
||||
def test_just_seen_is_recent(self):
|
||||
self.assertTrue(dashboard._is_recent("p1", {"p1": 100.0}, now=100.5))
|
||||
|
||||
def test_seen_within_window(self):
|
||||
# Default window is 5s.
|
||||
self.assertTrue(
|
||||
dashboard._is_recent("p1", {"p1": 100.0}, now=104.9),
|
||||
)
|
||||
|
||||
def test_seen_past_window_is_not_recent(self):
|
||||
self.assertFalse(
|
||||
dashboard._is_recent("p1", {"p1": 100.0}, now=106.0),
|
||||
)
|
||||
|
||||
def test_unknown_proposal_is_not_recent(self):
|
||||
self.assertFalse(
|
||||
dashboard._is_recent("p2", {"p1": 100.0}, now=100.5),
|
||||
)
|
||||
|
||||
def test_none_args_safe_default(self):
|
||||
self.assertFalse(dashboard._is_recent("p1", None, None))
|
||||
self.assertFalse(dashboard._is_recent("p1", {"p1": 100.0}, None))
|
||||
self.assertFalse(dashboard._is_recent("p1", None, 100.5))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -1,94 +0,0 @@
|
||||
"""Unit: dashboard_model — state/model layer extracted from dashboard.py.
|
||||
|
||||
Tests for functions that were previously buried in the 2103-line
|
||||
dashboard.py and had no coverage: _approval_status,
|
||||
_proposed_payload_label, and _suffix_for_tool."""
|
||||
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
from bot_bottle.cli.dashboard_model import (
|
||||
QueuedProposal,
|
||||
_approval_status,
|
||||
_proposed_payload_label,
|
||||
_suffix_for_tool,
|
||||
)
|
||||
from bot_bottle.supervise import (
|
||||
Proposal,
|
||||
TOOL_CAPABILITY_BLOCK,
|
||||
TOOL_EGRESS_BLOCK,
|
||||
TOOL_PIPELOCK_BLOCK,
|
||||
sha256_hex,
|
||||
)
|
||||
from datetime import datetime, timezone
|
||||
|
||||
|
||||
def _qp(tool: str, slug: str = "dev") -> QueuedProposal:
|
||||
payload = "x"
|
||||
p = Proposal.new(
|
||||
bottle_slug=slug,
|
||||
tool=tool,
|
||||
proposed_file=payload,
|
||||
justification="test",
|
||||
current_file_hash=sha256_hex(payload),
|
||||
now=datetime(2026, 6, 1, 0, 0, 0, tzinfo=timezone.utc),
|
||||
)
|
||||
return QueuedProposal(proposal=p, queue_dir=Path("/tmp/q"))
|
||||
|
||||
|
||||
class TestApprovalStatus(unittest.TestCase):
|
||||
def test_egress_block_base_message(self):
|
||||
qp = _qp(TOOL_EGRESS_BLOCK, slug="my-bot")
|
||||
msg = _approval_status(qp, "approved")
|
||||
self.assertEqual("approved egress-block for [my-bot]", msg)
|
||||
|
||||
def test_modified_verb(self):
|
||||
qp = _qp(TOOL_PIPELOCK_BLOCK, slug="dev")
|
||||
msg = _approval_status(qp, "modified+approved")
|
||||
self.assertEqual("modified+approved pipelock-block for [dev]", msg)
|
||||
|
||||
def test_capability_block_appends_resume_hint(self):
|
||||
qp = _qp(TOOL_CAPABILITY_BLOCK, slug="alpha")
|
||||
msg = _approval_status(qp, "approved")
|
||||
self.assertIn("resume: ./cli.py resume alpha", msg)
|
||||
self.assertIn("approved capability-block for [alpha]", msg)
|
||||
|
||||
def test_egress_block_has_no_resume_hint(self):
|
||||
qp = _qp(TOOL_EGRESS_BLOCK)
|
||||
self.assertNotIn("resume", _approval_status(qp, "approved"))
|
||||
|
||||
def test_pipelock_block_has_no_resume_hint(self):
|
||||
qp = _qp(TOOL_PIPELOCK_BLOCK)
|
||||
self.assertNotIn("resume", _approval_status(qp, "approved"))
|
||||
|
||||
|
||||
class TestProposedPayloadLabel(unittest.TestCase):
|
||||
def test_pipelock_returns_failed_url(self):
|
||||
self.assertEqual("failed URL", _proposed_payload_label(TOOL_PIPELOCK_BLOCK))
|
||||
|
||||
def test_egress_returns_proposed_file(self):
|
||||
self.assertEqual("proposed file", _proposed_payload_label(TOOL_EGRESS_BLOCK))
|
||||
|
||||
def test_capability_returns_proposed_file(self):
|
||||
self.assertEqual("proposed file", _proposed_payload_label(TOOL_CAPABILITY_BLOCK))
|
||||
|
||||
def test_unknown_tool_returns_proposed_file(self):
|
||||
self.assertEqual("proposed file", _proposed_payload_label("unknown-tool"))
|
||||
|
||||
|
||||
class TestSuffixForTool(unittest.TestCase):
|
||||
def test_capability_block_returns_dockerfile_suffix(self):
|
||||
self.assertEqual(".dockerfile", _suffix_for_tool(TOOL_CAPABILITY_BLOCK))
|
||||
|
||||
def test_egress_block_returns_txt(self):
|
||||
self.assertEqual(".txt", _suffix_for_tool(TOOL_EGRESS_BLOCK))
|
||||
|
||||
def test_pipelock_block_returns_txt(self):
|
||||
self.assertEqual(".txt", _suffix_for_tool(TOOL_PIPELOCK_BLOCK))
|
||||
|
||||
def test_unknown_tool_returns_txt(self):
|
||||
self.assertEqual(".txt", _suffix_for_tool("whatever"))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -1,12 +1,12 @@
|
||||
"""Unit: dashboard headless paths (PRD 0013 phase 4, PRD 0014).
|
||||
"""Unit: supervise headless paths (PRD 0013 phase 4, PRD 0014).
|
||||
|
||||
The curses TUI itself isn't exercised here — these tests cover the
|
||||
discovery + approve/reject + audit-write paths that the TUI's key
|
||||
handlers call into.
|
||||
|
||||
apply_routes_change is stubbed at the dashboard module level so the
|
||||
tests don't need a running cred-proxy sidecar; the real docker
|
||||
exec/cp/SIGHUP plumbing is covered by the integration test.
|
||||
add_route is stubbed at the supervise CLI module level so the tests
|
||||
don't need a running egress sidecar; the real docker exec/cp/SIGHUP
|
||||
plumbing is covered by the integration test.
|
||||
"""
|
||||
|
||||
import os
|
||||
@@ -19,7 +19,7 @@ from bot_bottle import supervise
|
||||
from bot_bottle.backend.docker.capability_apply import CapabilityApplyError
|
||||
from bot_bottle.backend.docker.egress_apply import EgressApplyError
|
||||
from bot_bottle.backend.docker.pipelock_apply import PipelockApplyError
|
||||
from bot_bottle.cli import dashboard
|
||||
from bot_bottle.cli import supervise as supervise_cli
|
||||
from bot_bottle.supervise import (
|
||||
Proposal,
|
||||
STATUS_APPROVED,
|
||||
@@ -61,7 +61,7 @@ class _FakeHomeMixin:
|
||||
"""Patch supervise.bot_bottle_root to a temp dir for the test."""
|
||||
|
||||
def _setup_fake_home(self):
|
||||
self._tmp = tempfile.TemporaryDirectory(prefix="dashboard-test.")
|
||||
self._tmp = tempfile.TemporaryDirectory(prefix="supervise-test.")
|
||||
original = supervise.bot_bottle_root
|
||||
|
||||
def fake_root() -> Path:
|
||||
@@ -83,14 +83,14 @@ class TestDiscoverPending(_FakeHomeMixin, unittest.TestCase):
|
||||
self._teardown_fake_home()
|
||||
|
||||
def test_empty_when_no_queues(self):
|
||||
self.assertEqual([], dashboard.discover_pending())
|
||||
self.assertEqual([], supervise_cli.discover_pending())
|
||||
|
||||
def test_walks_all_slug_subdirs(self):
|
||||
for slug in ("dev", "api"):
|
||||
qdir = supervise.queue_dir_for_slug(slug)
|
||||
qdir.mkdir(parents=True)
|
||||
supervise.write_proposal(qdir, _proposal(slug=slug))
|
||||
pending = dashboard.discover_pending()
|
||||
pending = supervise_cli.discover_pending()
|
||||
self.assertEqual({"dev", "api"}, {qp.proposal.bottle_slug for qp in pending})
|
||||
|
||||
def test_sorted_by_arrival_across_bottles(self):
|
||||
@@ -110,7 +110,7 @@ class TestDiscoverPending(_FakeHomeMixin, unittest.TestCase):
|
||||
qdir = supervise.queue_dir_for_slug(p.bottle_slug)
|
||||
qdir.mkdir(parents=True, exist_ok=True)
|
||||
supervise.write_proposal(qdir, p)
|
||||
pending = dashboard.discover_pending()
|
||||
pending = supervise_cli.discover_pending()
|
||||
self.assertEqual([early.id, late.id], [qp.proposal.id for qp in pending])
|
||||
|
||||
def test_excludes_already_responded(self):
|
||||
@@ -121,34 +121,34 @@ class TestDiscoverPending(_FakeHomeMixin, unittest.TestCase):
|
||||
supervise.write_response(qdir, supervise.Response(
|
||||
proposal_id=p.id, status=STATUS_APPROVED, notes="",
|
||||
))
|
||||
self.assertEqual([], dashboard.discover_pending())
|
||||
self.assertEqual([], supervise_cli.discover_pending())
|
||||
|
||||
|
||||
class TestApproveReject(_FakeHomeMixin, unittest.TestCase):
|
||||
def setUp(self):
|
||||
self._setup_fake_home()
|
||||
self._original_add_route = dashboard.add_route
|
||||
self._original_apply_allowlist = dashboard.apply_allowlist_change
|
||||
self._original_fetch_allowlist = dashboard.fetch_current_allowlist
|
||||
self._original_apply_capability = dashboard.apply_capability_change
|
||||
self._original_add_route = supervise_cli.add_route
|
||||
self._original_apply_allowlist = supervise_cli.apply_allowlist_change
|
||||
self._original_fetch_allowlist = supervise_cli.fetch_current_allowlist
|
||||
self._original_apply_capability = supervise_cli.apply_capability_change
|
||||
# Default stubs: succeed with deterministic before/after so the
|
||||
# audit log shows a non-empty diff.
|
||||
dashboard.add_route = lambda slug, content: (
|
||||
supervise_cli.add_route = lambda slug, content: (
|
||||
'{"routes": []}\n', '{"routes": [{"host": "x"}]}\n',
|
||||
)
|
||||
dashboard.apply_allowlist_change = lambda slug, content: (
|
||||
supervise_cli.apply_allowlist_change = lambda slug, content: (
|
||||
"old.example\n", content,
|
||||
)
|
||||
dashboard.fetch_current_allowlist = lambda slug: "old.example\n"
|
||||
dashboard.apply_capability_change = lambda slug, content: (
|
||||
supervise_cli.fetch_current_allowlist = lambda slug: "old.example\n"
|
||||
supervise_cli.apply_capability_change = lambda slug, content: (
|
||||
"FROM old\n", content,
|
||||
)
|
||||
|
||||
def tearDown(self):
|
||||
dashboard.add_route = self._original_add_route
|
||||
dashboard.apply_allowlist_change = self._original_apply_allowlist
|
||||
dashboard.fetch_current_allowlist = self._original_fetch_allowlist
|
||||
dashboard.apply_capability_change = self._original_apply_capability
|
||||
supervise_cli.add_route = self._original_add_route
|
||||
supervise_cli.apply_allowlist_change = self._original_apply_allowlist
|
||||
supervise_cli.fetch_current_allowlist = self._original_fetch_allowlist
|
||||
supervise_cli.apply_capability_change = self._original_apply_capability
|
||||
self._teardown_fake_home()
|
||||
|
||||
def _enqueue(self, tool: str = TOOL_EGRESS_BLOCK):
|
||||
@@ -156,11 +156,11 @@ class TestApproveReject(_FakeHomeMixin, unittest.TestCase):
|
||||
qdir = supervise.queue_dir_for_slug("dev")
|
||||
qdir.mkdir(parents=True, exist_ok=True)
|
||||
supervise.write_proposal(qdir, p)
|
||||
return dashboard.QueuedProposal(proposal=p, queue_dir=qdir)
|
||||
return supervise_cli.QueuedProposal(proposal=p, queue_dir=qdir)
|
||||
|
||||
def test_approve_writes_response_and_audit(self):
|
||||
qp = self._enqueue()
|
||||
dashboard.approve(qp)
|
||||
supervise_cli.approve(qp)
|
||||
resp = read_response(qp.queue_dir, qp.proposal.id)
|
||||
self.assertEqual(STATUS_APPROVED, resp.status)
|
||||
self.assertIsNone(resp.final_file)
|
||||
@@ -170,7 +170,7 @@ class TestApproveReject(_FakeHomeMixin, unittest.TestCase):
|
||||
|
||||
def test_approve_with_final_file_marks_modified(self):
|
||||
qp = self._enqueue()
|
||||
dashboard.approve(qp, final_file='{"routes": [{"path": "/x/"}]}\n', notes="tweaked")
|
||||
supervise_cli.approve(qp, final_file='{"routes": [{"path": "/x/"}]}\n', notes="tweaked")
|
||||
resp = read_response(qp.queue_dir, qp.proposal.id)
|
||||
self.assertEqual(STATUS_MODIFIED, resp.status)
|
||||
self.assertEqual('{"routes": [{"path": "/x/"}]}\n', resp.final_file)
|
||||
@@ -180,7 +180,7 @@ class TestApproveReject(_FakeHomeMixin, unittest.TestCase):
|
||||
|
||||
def test_reject_writes_rejection(self):
|
||||
qp = self._enqueue()
|
||||
dashboard.reject(qp, reason="nope")
|
||||
supervise_cli.reject(qp, reason="nope")
|
||||
resp = read_response(qp.queue_dir, qp.proposal.id)
|
||||
self.assertEqual(STATUS_REJECTED, resp.status)
|
||||
self.assertEqual("nope", resp.notes)
|
||||
@@ -190,7 +190,7 @@ class TestApproveReject(_FakeHomeMixin, unittest.TestCase):
|
||||
|
||||
def test_capability_block_skips_audit_log(self):
|
||||
qp = self._enqueue(tool=TOOL_CAPABILITY_BLOCK)
|
||||
dashboard.approve(qp)
|
||||
supervise_cli.approve(qp)
|
||||
# No audit log for capability-block (per PRD 0013 / 0016).
|
||||
# cred-proxy and pipelock logs both empty.
|
||||
self.assertEqual([], read_audit_entries("egress", "dev"))
|
||||
@@ -198,7 +198,7 @@ class TestApproveReject(_FakeHomeMixin, unittest.TestCase):
|
||||
|
||||
def test_pipelock_audit_distinct_from_egress(self):
|
||||
qp = self._enqueue(tool=TOOL_PIPELOCK_BLOCK)
|
||||
dashboard.approve(qp)
|
||||
supervise_cli.approve(qp)
|
||||
self.assertEqual(1, len(read_audit_entries("pipelock", "dev")))
|
||||
self.assertEqual(0, len(read_audit_entries("egress", "dev")))
|
||||
|
||||
@@ -210,10 +210,10 @@ class TestEgressApplyWiring(_FakeHomeMixin, unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self._setup_fake_home()
|
||||
self._original_add_route = dashboard.add_route
|
||||
self._original_add_route = supervise_cli.add_route
|
||||
|
||||
def tearDown(self):
|
||||
dashboard.add_route = self._original_add_route
|
||||
supervise_cli.add_route = self._original_add_route
|
||||
self._teardown_fake_home()
|
||||
|
||||
def _enqueue_egress(self, proposed: str = '{"host": "x.example"}\n'):
|
||||
@@ -227,17 +227,17 @@ class TestEgressApplyWiring(_FakeHomeMixin, unittest.TestCase):
|
||||
qdir = supervise.queue_dir_for_slug("dev")
|
||||
qdir.mkdir(parents=True, exist_ok=True)
|
||||
supervise.write_proposal(qdir, p)
|
||||
return dashboard.QueuedProposal(proposal=p, queue_dir=qdir)
|
||||
return supervise_cli.QueuedProposal(proposal=p, queue_dir=qdir)
|
||||
|
||||
def test_egress_block_calls_add_route_with_proposed_json(self):
|
||||
calls = []
|
||||
dashboard.add_route = lambda slug, content: (
|
||||
supervise_cli.add_route = lambda slug, content: (
|
||||
calls.append((slug, content)) or ("before", "after")
|
||||
)
|
||||
qp = self._enqueue_egress(
|
||||
proposed='{"host": "new.example", "path_allowlist": ["/x/"]}\n'
|
||||
)
|
||||
dashboard.approve(qp)
|
||||
supervise_cli.approve(qp)
|
||||
self.assertEqual(1, len(calls))
|
||||
slug, content = calls[0]
|
||||
self.assertEqual("dev", slug)
|
||||
@@ -250,11 +250,11 @@ class TestEgressApplyWiring(_FakeHomeMixin, unittest.TestCase):
|
||||
|
||||
def test_modify_passes_final_file_to_add_route(self):
|
||||
calls = []
|
||||
dashboard.add_route = lambda slug, content: (
|
||||
supervise_cli.add_route = lambda slug, content: (
|
||||
calls.append(content) or ("before", "after")
|
||||
)
|
||||
qp = self._enqueue_egress()
|
||||
dashboard.approve(
|
||||
supervise_cli.approve(
|
||||
qp,
|
||||
final_file='{"host": "edited.example"}\n',
|
||||
notes="tweaked",
|
||||
@@ -262,12 +262,12 @@ class TestEgressApplyWiring(_FakeHomeMixin, unittest.TestCase):
|
||||
self.assertEqual(['{"host": "edited.example"}\n'], calls)
|
||||
|
||||
def test_apply_failure_blocks_response_and_audit(self):
|
||||
dashboard.add_route = lambda slug, content: (_ for _ in ()).throw(
|
||||
supervise_cli.add_route = lambda slug, content: (_ for _ in ()).throw(
|
||||
EgressApplyError("docker exec failed")
|
||||
)
|
||||
qp = self._enqueue_egress()
|
||||
with self.assertRaises(EgressApplyError):
|
||||
dashboard.approve(qp)
|
||||
supervise_cli.approve(qp)
|
||||
# No response file (proposal stays pending).
|
||||
self.assertEqual(
|
||||
[qp.proposal.id],
|
||||
@@ -277,25 +277,20 @@ class TestEgressApplyWiring(_FakeHomeMixin, unittest.TestCase):
|
||||
self.assertEqual([], read_audit_entries("egress", "dev"))
|
||||
|
||||
def test_real_diff_lands_in_audit(self):
|
||||
dashboard.add_route = lambda slug, content: (
|
||||
supervise_cli.add_route = lambda slug, content: (
|
||||
'{"routes": []}\n', # before
|
||||
'{"routes": [{"host": "new.example"}]}\n', # after
|
||||
)
|
||||
qp = self._enqueue_egress(proposed='{"host": "new.example"}\n')
|
||||
dashboard.approve(qp)
|
||||
supervise_cli.approve(qp)
|
||||
entries = read_audit_entries("egress", "dev")
|
||||
self.assertEqual(1, len(entries))
|
||||
self.assertIn('+{"routes": [{"host": "new.example"}]}', entries[0].diff)
|
||||
self.assertIn('-{"routes": []}', entries[0].diff)
|
||||
|
||||
def test_reject_does_not_call_apply(self):
|
||||
called = []
|
||||
dashboard.apply_routes_change = lambda slug, content: (
|
||||
called.append(True) or ("", content)
|
||||
)
|
||||
qp = self._enqueue_egress()
|
||||
dashboard.reject(qp, reason="no thanks")
|
||||
self.assertEqual([], called)
|
||||
supervise_cli.reject(qp, reason="no thanks")
|
||||
# Reject still writes a response + audit entry with empty diff.
|
||||
resp = read_response(qp.queue_dir, qp.proposal.id)
|
||||
self.assertEqual(STATUS_REJECTED, resp.status)
|
||||
@@ -306,18 +301,18 @@ class TestEgressApplyWiring(_FakeHomeMixin, unittest.TestCase):
|
||||
|
||||
class TestPipelockApplyWiring(_FakeHomeMixin, unittest.TestCase):
|
||||
"""PRD 0015 Phase 2 + PR #25 follow-up: approve() on a
|
||||
pipelock-block proposal carries the failed URL; the dashboard
|
||||
pipelock-block proposal carries the failed URL; the supervise TUI
|
||||
extracts the host, merges it into the running allowlist, and
|
||||
calls apply_allowlist_change with the merged content."""
|
||||
|
||||
def setUp(self):
|
||||
self._setup_fake_home()
|
||||
self._original_apply = dashboard.apply_allowlist_change
|
||||
self._original_fetch = dashboard.fetch_current_allowlist
|
||||
self._original_apply = supervise_cli.apply_allowlist_change
|
||||
self._original_fetch = supervise_cli.fetch_current_allowlist
|
||||
|
||||
def tearDown(self):
|
||||
dashboard.apply_allowlist_change = self._original_apply
|
||||
dashboard.fetch_current_allowlist = self._original_fetch
|
||||
supervise_cli.apply_allowlist_change = self._original_apply
|
||||
supervise_cli.fetch_current_allowlist = self._original_fetch
|
||||
self._teardown_fake_home()
|
||||
|
||||
def _enqueue_pipelock(self, failed_url: str = "https://api.github.com/repos/foo/bar"):
|
||||
@@ -331,17 +326,17 @@ class TestPipelockApplyWiring(_FakeHomeMixin, unittest.TestCase):
|
||||
qdir = supervise.queue_dir_for_slug("dev")
|
||||
qdir.mkdir(parents=True, exist_ok=True)
|
||||
supervise.write_proposal(qdir, p)
|
||||
return dashboard.QueuedProposal(proposal=p, queue_dir=qdir)
|
||||
return supervise_cli.QueuedProposal(proposal=p, queue_dir=qdir)
|
||||
|
||||
def test_url_host_merged_into_current_allowlist(self):
|
||||
dashboard.fetch_current_allowlist = lambda slug: "existing.example\n"
|
||||
supervise_cli.fetch_current_allowlist = lambda slug: "existing.example\n"
|
||||
applied = []
|
||||
dashboard.apply_allowlist_change = lambda slug, content: (
|
||||
supervise_cli.apply_allowlist_change = lambda slug, content: (
|
||||
applied.append((slug, content))
|
||||
or ("existing.example\n", content)
|
||||
)
|
||||
qp = self._enqueue_pipelock("https://api.github.com/repos/foo/bar")
|
||||
dashboard.approve(qp)
|
||||
supervise_cli.approve(qp)
|
||||
# apply_allowlist_change was called with the merged content:
|
||||
# existing host + the URL's host (no path, since pipelock is
|
||||
# hostname-only).
|
||||
@@ -353,27 +348,27 @@ class TestPipelockApplyWiring(_FakeHomeMixin, unittest.TestCase):
|
||||
self.assertNotIn("/repos/foo/bar", content) # path stripped
|
||||
|
||||
def test_host_already_in_allowlist_is_idempotent(self):
|
||||
dashboard.fetch_current_allowlist = lambda slug: "api.github.com\n"
|
||||
supervise_cli.fetch_current_allowlist = lambda slug: "api.github.com\n"
|
||||
applied = []
|
||||
dashboard.apply_allowlist_change = lambda slug, content: (
|
||||
supervise_cli.apply_allowlist_change = lambda slug, content: (
|
||||
applied.append(content)
|
||||
or ("api.github.com\n", content)
|
||||
)
|
||||
qp = self._enqueue_pipelock("https://api.github.com/some/path")
|
||||
dashboard.approve(qp)
|
||||
supervise_cli.approve(qp)
|
||||
# Still applied, but the content is unchanged from current —
|
||||
# before/after diff is empty.
|
||||
self.assertEqual(1, len(applied))
|
||||
self.assertEqual("api.github.com\n", applied[0])
|
||||
|
||||
def test_apply_failure_blocks_response_and_audit(self):
|
||||
dashboard.fetch_current_allowlist = lambda slug: "existing.example\n"
|
||||
dashboard.apply_allowlist_change = lambda slug, content: (_ for _ in ()).throw(
|
||||
supervise_cli.fetch_current_allowlist = lambda slug: "existing.example\n"
|
||||
supervise_cli.apply_allowlist_change = lambda slug, content: (_ for _ in ()).throw(
|
||||
PipelockApplyError("docker exec failed")
|
||||
)
|
||||
qp = self._enqueue_pipelock()
|
||||
with self.assertRaises(PipelockApplyError):
|
||||
dashboard.approve(qp)
|
||||
supervise_cli.approve(qp)
|
||||
self.assertEqual(
|
||||
[qp.proposal.id],
|
||||
[p.id for p in supervise.list_pending_proposals(qp.queue_dir)],
|
||||
@@ -381,12 +376,12 @@ class TestPipelockApplyWiring(_FakeHomeMixin, unittest.TestCase):
|
||||
self.assertEqual([], read_audit_entries("pipelock", "dev"))
|
||||
|
||||
def test_url_without_host_raises(self):
|
||||
dashboard.fetch_current_allowlist = lambda slug: ""
|
||||
supervise_cli.fetch_current_allowlist = lambda slug: ""
|
||||
# supervise_server's validator would catch this; if a broken
|
||||
# URL ever makes it through, the dashboard surfaces it too.
|
||||
# URL ever makes it through, the supervise TUI surfaces it too.
|
||||
qp = self._enqueue_pipelock("https:///nohost")
|
||||
with self.assertRaises(PipelockApplyError):
|
||||
dashboard.approve(qp)
|
||||
supervise_cli.approve(qp)
|
||||
|
||||
|
||||
class TestCapabilityApplyWiring(_FakeHomeMixin, unittest.TestCase):
|
||||
@@ -397,10 +392,10 @@ class TestCapabilityApplyWiring(_FakeHomeMixin, unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self._setup_fake_home()
|
||||
self._original = dashboard.apply_capability_change
|
||||
self._original = supervise_cli.apply_capability_change
|
||||
|
||||
def tearDown(self):
|
||||
dashboard.apply_capability_change = self._original
|
||||
supervise_cli.apply_capability_change = self._original
|
||||
self._teardown_fake_home()
|
||||
|
||||
def _enqueue_capability(self, proposed: str = "FROM python:3.13\nRUN apk add ripgrep\n"):
|
||||
@@ -414,112 +409,50 @@ class TestCapabilityApplyWiring(_FakeHomeMixin, unittest.TestCase):
|
||||
qdir = supervise.queue_dir_for_slug("dev")
|
||||
qdir.mkdir(parents=True, exist_ok=True)
|
||||
supervise.write_proposal(qdir, p)
|
||||
return dashboard.QueuedProposal(proposal=p, queue_dir=qdir)
|
||||
return supervise_cli.QueuedProposal(proposal=p, queue_dir=qdir)
|
||||
|
||||
def test_capability_block_calls_apply_with_proposed_file(self):
|
||||
calls = []
|
||||
dashboard.apply_capability_change = lambda slug, content: (
|
||||
supervise_cli.apply_capability_change = lambda slug, content: (
|
||||
calls.append((slug, content)) or ("FROM old\n", content)
|
||||
)
|
||||
qp = self._enqueue_capability("FROM bookworm\n")
|
||||
dashboard.approve(qp)
|
||||
supervise_cli.approve(qp)
|
||||
self.assertEqual([("dev", "FROM bookworm\n")], calls)
|
||||
|
||||
def test_apply_failure_blocks_response_and_keeps_pending(self):
|
||||
dashboard.apply_capability_change = lambda slug, content: (_ for _ in ()).throw(
|
||||
supervise_cli.apply_capability_change = lambda slug, content: (_ for _ in ()).throw(
|
||||
CapabilityApplyError("teardown failed")
|
||||
)
|
||||
qp = self._enqueue_capability()
|
||||
with self.assertRaises(CapabilityApplyError):
|
||||
dashboard.approve(qp)
|
||||
supervise_cli.approve(qp)
|
||||
self.assertEqual(
|
||||
[qp.proposal.id],
|
||||
[p.id for p in supervise.list_pending_proposals(qp.queue_dir)],
|
||||
)
|
||||
|
||||
def test_no_audit_log_for_capability(self):
|
||||
dashboard.apply_capability_change = lambda slug, content: ("FROM old\n", content)
|
||||
supervise_cli.apply_capability_change = lambda slug, content: ("FROM old\n", content)
|
||||
qp = self._enqueue_capability()
|
||||
dashboard.approve(qp)
|
||||
supervise_cli.approve(qp)
|
||||
# capability-block has no audit log per PRD 0013 — its record
|
||||
# lives in the per-bottle Dockerfile + transcript state.
|
||||
self.assertEqual([], read_audit_entries("egress", "dev"))
|
||||
self.assertEqual([], read_audit_entries("pipelock", "dev"))
|
||||
|
||||
def test_proposal_archived_after_apply(self):
|
||||
dashboard.apply_capability_change = lambda slug, content: ("FROM old\n", content)
|
||||
supervise_cli.apply_capability_change = lambda slug, content: ("FROM old\n", content)
|
||||
qp = self._enqueue_capability()
|
||||
dashboard.approve(qp)
|
||||
supervise_cli.approve(qp)
|
||||
# Sidecar would normally archive after delivering the response,
|
||||
# but it's gone by then. The dashboard archives so
|
||||
# but it's gone by then. The supervise TUI archives so
|
||||
# discover_pending stops surfacing the resolved proposal.
|
||||
self.assertEqual([], supervise.list_pending_proposals(qp.queue_dir))
|
||||
processed = list((qp.queue_dir / "processed").glob("*.json"))
|
||||
self.assertEqual(2, len(processed))
|
||||
|
||||
|
||||
class TestOperatorEditRoutes(_FakeHomeMixin, unittest.TestCase):
|
||||
"""PRD 0014 Phase 4: operator-initiated routes edit (not gated
|
||||
on a pending proposal)."""
|
||||
|
||||
def setUp(self):
|
||||
self._setup_fake_home()
|
||||
self._original_apply = dashboard.apply_routes_change
|
||||
|
||||
def tearDown(self):
|
||||
dashboard.apply_routes_change = self._original_apply
|
||||
self._teardown_fake_home()
|
||||
|
||||
def test_writes_audit_with_operator_edit_action(self):
|
||||
dashboard.apply_routes_change = lambda slug, content: (
|
||||
'{"routes": []}\n', content,
|
||||
)
|
||||
dashboard.operator_edit_routes("dev", '{"routes": [{"path": "/x/"}]}\n')
|
||||
entries = read_audit_entries("egress", "dev")
|
||||
self.assertEqual(1, len(entries))
|
||||
self.assertEqual(supervise.ACTION_OPERATOR_EDIT, entries[0].operator_action)
|
||||
self.assertEqual("", entries[0].justification)
|
||||
self.assertIn("+", entries[0].diff)
|
||||
|
||||
def test_failure_does_not_write_audit(self):
|
||||
dashboard.apply_routes_change = lambda slug, content: (_ for _ in ()).throw(
|
||||
EgressApplyError("nope")
|
||||
)
|
||||
with self.assertRaises(EgressApplyError):
|
||||
dashboard.operator_edit_routes("dev", '{"routes": []}\n')
|
||||
self.assertEqual([], read_audit_entries("egress", "dev"))
|
||||
|
||||
|
||||
class TestOperatorEditAllowlist(_FakeHomeMixin, unittest.TestCase):
|
||||
"""PRD 0015 Phase 3: operator-initiated pipelock allowlist edit."""
|
||||
|
||||
def setUp(self):
|
||||
self._setup_fake_home()
|
||||
self._original = dashboard.apply_allowlist_change
|
||||
|
||||
def tearDown(self):
|
||||
dashboard.apply_allowlist_change = self._original
|
||||
self._teardown_fake_home()
|
||||
|
||||
def test_writes_audit_with_operator_edit_action(self):
|
||||
dashboard.apply_allowlist_change = lambda slug, content: (
|
||||
"old.example\n", content,
|
||||
)
|
||||
dashboard.operator_edit_allowlist("dev", "old.example\nnew.example\n")
|
||||
entries = read_audit_entries("pipelock", "dev")
|
||||
self.assertEqual(1, len(entries))
|
||||
self.assertEqual(supervise.ACTION_OPERATOR_EDIT, entries[0].operator_action)
|
||||
self.assertIn("+new.example", entries[0].diff)
|
||||
|
||||
def test_failure_does_not_write_audit(self):
|
||||
dashboard.apply_allowlist_change = lambda slug, content: (_ for _ in ()).throw(
|
||||
PipelockApplyError("nope")
|
||||
)
|
||||
with self.assertRaises(PipelockApplyError):
|
||||
dashboard.operator_edit_allowlist("dev", "x.example\n")
|
||||
self.assertEqual([], read_audit_entries("pipelock", "dev"))
|
||||
|
||||
|
||||
class TestEditInEditor(unittest.TestCase):
|
||||
def test_runs_editor_returns_edited_content(self):
|
||||
# Fake "editor" is /bin/sh -c 'cat <<EOF > $1 ... EOF'
|
||||
@@ -544,7 +477,7 @@ class TestEditInEditor(unittest.TestCase):
|
||||
os.chmod(editor_script, 0o755)
|
||||
os.environ["EDITOR"] = editor_script
|
||||
try:
|
||||
result = dashboard.edit_in_editor("original")
|
||||
result = supervise_cli.edit_in_editor("original")
|
||||
self.assertEqual("edited", result)
|
||||
finally:
|
||||
os.unlink(editor_script)
|
||||
@@ -566,7 +499,7 @@ class TestEditInEditor(unittest.TestCase):
|
||||
os.chmod(editor_script, 0o755)
|
||||
os.environ["EDITOR"] = editor_script
|
||||
try:
|
||||
result = dashboard.edit_in_editor("original")
|
||||
result = supervise_cli.edit_in_editor("original")
|
||||
self.assertIsNone(result)
|
||||
finally:
|
||||
os.unlink(editor_script)
|
||||
@@ -583,19 +516,19 @@ class TestCapabilityBlockSmolmachinesGuard(_FakeHomeMixin, unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self._setup_fake_home()
|
||||
self._original_apply_capability = dashboard.apply_capability_change
|
||||
dashboard.apply_capability_change = lambda slug, content: ("", content)
|
||||
self._original_apply_capability = supervise_cli.apply_capability_change
|
||||
supervise_cli.apply_capability_change = lambda slug, content: ("", content)
|
||||
|
||||
def tearDown(self):
|
||||
dashboard.apply_capability_change = self._original_apply_capability
|
||||
supervise_cli.apply_capability_change = self._original_apply_capability
|
||||
self._teardown_fake_home()
|
||||
|
||||
def _enqueue_capability(self, slug: str = "dev") -> "dashboard.QueuedProposal":
|
||||
def _enqueue_capability(self, slug: str = "dev") -> "supervise_cli.QueuedProposal":
|
||||
p = _proposal(slug=slug, tool=TOOL_CAPABILITY_BLOCK)
|
||||
qdir = supervise.queue_dir_for_slug(slug)
|
||||
qdir.mkdir(parents=True, exist_ok=True)
|
||||
supervise.write_proposal(qdir, p)
|
||||
return dashboard.QueuedProposal(proposal=p, queue_dir=qdir)
|
||||
return supervise_cli.QueuedProposal(proposal=p, queue_dir=qdir)
|
||||
|
||||
def _write_metadata(self, slug: str, compose_project: str) -> None:
|
||||
from bot_bottle.backend.docker.bottle_state import BottleMetadata, write_metadata
|
||||
@@ -612,18 +545,18 @@ class TestCapabilityBlockSmolmachinesGuard(_FakeHomeMixin, unittest.TestCase):
|
||||
self._write_metadata("dev", compose_project="")
|
||||
qp = self._enqueue_capability("dev")
|
||||
with self.assertRaises(CapabilityApplyError) as ctx:
|
||||
dashboard.approve(qp)
|
||||
supervise_cli.approve(qp)
|
||||
self.assertIn("smolmachines", str(ctx.exception))
|
||||
|
||||
def test_docker_bottle_calls_apply_capability_change(self):
|
||||
self._write_metadata("dev", compose_project="bot-bottle-dev")
|
||||
qp = self._enqueue_capability("dev")
|
||||
dashboard.approve(qp) # must not raise
|
||||
supervise_cli.approve(qp) # must not raise
|
||||
|
||||
def test_no_metadata_falls_through_to_docker_path(self):
|
||||
# No metadata at all → assume Docker (backward-compatible).
|
||||
qp = self._enqueue_capability("dev")
|
||||
dashboard.approve(qp) # must not raise
|
||||
supervise_cli.approve(qp) # must not raise
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
+19
-19
@@ -1,6 +1,6 @@
|
||||
"""Unit: dashboard launch/crash failure logging (issue #100).
|
||||
"""Unit: supervise launch/crash failure logging (issue #100).
|
||||
|
||||
The dashboard runs under curses, so anything written to stderr while the
|
||||
The supervise TUI runs under curses, so anything written to stderr while the
|
||||
TUI owns the terminal is wiped when the terminal is restored. These
|
||||
tests lock the recovery paths: a config error (`Die`) is re-surfaced
|
||||
after the wrapper returns, and an unexpected crash is persisted to a
|
||||
@@ -17,7 +17,7 @@ from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
from bot_bottle import supervise
|
||||
from bot_bottle.cli import dashboard
|
||||
from bot_bottle.cli import supervise as supervise_cli
|
||||
from bot_bottle.log import Die, die
|
||||
|
||||
|
||||
@@ -44,7 +44,7 @@ class _FakeHomeMixin:
|
||||
~/.bot-bottle."""
|
||||
|
||||
def _setup_fake_home(self):
|
||||
self._tmp = tempfile.TemporaryDirectory(prefix="dash-crash-test.")
|
||||
self._tmp = tempfile.TemporaryDirectory(prefix="supervise-crash-test.")
|
||||
self._orig_root = supervise.bot_bottle_root
|
||||
self._root = Path(self._tmp.name) / ".bot-bottle"
|
||||
supervise.bot_bottle_root = lambda: self._root # type: ignore[assignment]
|
||||
@@ -54,7 +54,7 @@ class _FakeHomeMixin:
|
||||
self._tmp.cleanup()
|
||||
|
||||
|
||||
class TestCmdDashboardErrorPaths(_FakeHomeMixin, unittest.TestCase):
|
||||
class TestCmdSuperviseErrorPaths(_FakeHomeMixin, unittest.TestCase):
|
||||
def setUp(self):
|
||||
self._setup_fake_home()
|
||||
|
||||
@@ -63,42 +63,42 @@ class TestCmdDashboardErrorPaths(_FakeHomeMixin, unittest.TestCase):
|
||||
|
||||
def test_keyboard_interrupt_returns_130(self):
|
||||
with mock.patch.object(
|
||||
dashboard.curses, "wrapper", side_effect=KeyboardInterrupt
|
||||
supervise_cli.curses, "wrapper", side_effect=KeyboardInterrupt
|
||||
):
|
||||
self.assertEqual(130, dashboard.cmd_dashboard([]))
|
||||
self.assertEqual(130, supervise_cli.cmd_supervise([]))
|
||||
|
||||
def test_die_resurfaces_message_after_curses(self):
|
||||
buf = io.StringIO()
|
||||
with mock.patch.object(
|
||||
dashboard.curses, "wrapper",
|
||||
supervise_cli.curses, "wrapper",
|
||||
side_effect=Die(1, "manifest parse error at line 3"),
|
||||
):
|
||||
with contextlib.redirect_stderr(buf):
|
||||
rc = dashboard.cmd_dashboard([])
|
||||
rc = supervise_cli.cmd_supervise([])
|
||||
self.assertEqual(1, rc)
|
||||
self.assertIn("manifest parse error at line 3", buf.getvalue())
|
||||
|
||||
def test_die_without_message_has_fallback(self):
|
||||
buf = io.StringIO()
|
||||
with mock.patch.object(dashboard.curses, "wrapper", side_effect=Die(1)):
|
||||
with mock.patch.object(supervise_cli.curses, "wrapper", side_effect=Die(1)):
|
||||
with contextlib.redirect_stderr(buf):
|
||||
rc = dashboard.cmd_dashboard([])
|
||||
rc = supervise_cli.cmd_supervise([])
|
||||
self.assertEqual(1, rc)
|
||||
self.assertIn("fatal error", buf.getvalue())
|
||||
|
||||
def test_unexpected_exception_writes_crash_log(self):
|
||||
buf = io.StringIO()
|
||||
with mock.patch.object(
|
||||
dashboard.curses, "wrapper",
|
||||
supervise_cli.curses, "wrapper",
|
||||
side_effect=ValueError("kaboom in render"),
|
||||
):
|
||||
with contextlib.redirect_stderr(buf):
|
||||
rc = dashboard.cmd_dashboard([])
|
||||
rc = supervise_cli.cmd_supervise([])
|
||||
self.assertEqual(1, rc)
|
||||
out = buf.getvalue()
|
||||
self.assertIn("dashboard crashed: ValueError: kaboom in render", out)
|
||||
self.assertIn("supervise crashed: ValueError: kaboom in render", out)
|
||||
self.assertIn("full traceback written to", out)
|
||||
log_path = self._root / "logs" / "dashboard-crash.log"
|
||||
log_path = self._root / "logs" / "supervise-crash.log"
|
||||
self.assertTrue(log_path.exists())
|
||||
content = log_path.read_text()
|
||||
self.assertIn("kaboom in render", content)
|
||||
@@ -116,10 +116,10 @@ class TestWriteCrashLog(_FakeHomeMixin, unittest.TestCase):
|
||||
try:
|
||||
raise RuntimeError("explode")
|
||||
except RuntimeError as e:
|
||||
path = dashboard._write_crash_log(e)
|
||||
self.assertEqual(self._root / "logs" / "dashboard-crash.log", path)
|
||||
path = supervise_cli._write_crash_log(e)
|
||||
self.assertEqual(self._root / "logs" / "supervise-crash.log", path)
|
||||
text = path.read_text()
|
||||
self.assertIn("=== dashboard crash", text)
|
||||
self.assertIn("=== supervise crash", text)
|
||||
self.assertIn("RuntimeError: explode", text)
|
||||
|
||||
def test_falls_back_to_tempfile_when_home_unwritable(self):
|
||||
@@ -131,7 +131,7 @@ class TestWriteCrashLog(_FakeHomeMixin, unittest.TestCase):
|
||||
try:
|
||||
raise RuntimeError("explode2")
|
||||
except RuntimeError as e:
|
||||
path = dashboard._write_crash_log(e)
|
||||
path = supervise_cli._write_crash_log(e)
|
||||
self.assertTrue(path.exists())
|
||||
self.assertIn("explode2", path.read_text())
|
||||
|
||||
+13
-13
@@ -1,4 +1,4 @@
|
||||
"""Unit: dashboard's detail-view line builder.
|
||||
"""Unit: supervise's detail-view line builder.
|
||||
|
||||
_detail_lines returns (text, attr) tuples. Most are plain; for
|
||||
pipelock-block proposals it appends a "→ would allow host: <host>"
|
||||
@@ -8,7 +8,7 @@ which hostname will land in pipelock's allowlist on approval."""
|
||||
import unittest
|
||||
|
||||
from bot_bottle import supervise
|
||||
from bot_bottle.cli import dashboard
|
||||
from bot_bottle.cli import supervise as supervise_cli
|
||||
from bot_bottle.supervise import (
|
||||
Proposal,
|
||||
TOOL_CAPABILITY_BLOCK,
|
||||
@@ -18,7 +18,7 @@ from bot_bottle.supervise import (
|
||||
)
|
||||
|
||||
|
||||
def _qp(tool: str, payload: str) -> dashboard.QueuedProposal:
|
||||
def _qp(tool: str, payload: str) -> supervise_cli.QueuedProposal:
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
p = Proposal.new(
|
||||
@@ -29,14 +29,14 @@ def _qp(tool: str, payload: str) -> dashboard.QueuedProposal:
|
||||
current_file_hash=sha256_hex(payload),
|
||||
now=datetime(2026, 5, 25, 12, 0, 0, tzinfo=timezone.utc),
|
||||
)
|
||||
return dashboard.QueuedProposal(proposal=p, queue_dir=Path("/tmp/q"))
|
||||
return supervise_cli.QueuedProposal(proposal=p, queue_dir=Path("/tmp/q"))
|
||||
|
||||
|
||||
class TestPipelockHostHighlight(unittest.TestCase):
|
||||
GREEN = 0xDEADBEEF # arbitrary sentinel; _detail_lines passes through
|
||||
|
||||
def test_appends_green_host_line_for_pipelock_block(self):
|
||||
lines = dashboard._detail_lines(
|
||||
lines = supervise_cli._detail_lines(
|
||||
_qp(TOOL_PIPELOCK_BLOCK, "https://api.github.com/repos/foo/bar"),
|
||||
green_attr=self.GREEN,
|
||||
)
|
||||
@@ -47,14 +47,14 @@ class TestPipelockHostHighlight(unittest.TestCase):
|
||||
self.assertEqual(["api.github.com"], green_lines)
|
||||
|
||||
def test_no_green_lines_for_egress_block(self):
|
||||
lines = dashboard._detail_lines(
|
||||
lines = supervise_cli._detail_lines(
|
||||
_qp(TOOL_EGRESS_BLOCK, '{"routes": []}'),
|
||||
green_attr=self.GREEN,
|
||||
)
|
||||
self.assertEqual([], [t for t, a in lines if a == self.GREEN])
|
||||
|
||||
def test_no_green_lines_for_capability_block(self):
|
||||
lines = dashboard._detail_lines(
|
||||
lines = supervise_cli._detail_lines(
|
||||
_qp(TOOL_CAPABILITY_BLOCK, "FROM python:3.13\n"),
|
||||
green_attr=self.GREEN,
|
||||
)
|
||||
@@ -63,8 +63,8 @@ class TestPipelockHostHighlight(unittest.TestCase):
|
||||
def test_skips_host_line_when_url_unparseable(self):
|
||||
# Shouldn't happen in production — supervise_server validates
|
||||
# the URL before queuing — but if a malformed payload ever
|
||||
# reaches the dashboard, don't render a misleading host line.
|
||||
lines = dashboard._detail_lines(
|
||||
# reaches the supervise TUI, don't render a misleading host line.
|
||||
lines = supervise_cli._detail_lines(
|
||||
_qp(TOOL_PIPELOCK_BLOCK, "garbage-not-a-url"),
|
||||
green_attr=self.GREEN,
|
||||
)
|
||||
@@ -73,7 +73,7 @@ class TestPipelockHostHighlight(unittest.TestCase):
|
||||
def test_no_green_attr_passed_still_renders_host(self):
|
||||
# Even without color support (green_attr=0), the host line
|
||||
# is still present — it just won't be coloured.
|
||||
lines = dashboard._detail_lines(
|
||||
lines = supervise_cli._detail_lines(
|
||||
_qp(TOOL_PIPELOCK_BLOCK, "https://api.github.com/x"),
|
||||
green_attr=0,
|
||||
)
|
||||
@@ -86,14 +86,14 @@ class TestFailedUrlHost(unittest.TestCase):
|
||||
def test_extracts_hostname(self):
|
||||
self.assertEqual(
|
||||
"api.github.com",
|
||||
dashboard._failed_url_host("https://api.github.com/repos/foo"),
|
||||
supervise_cli._failed_url_host("https://api.github.com/repos/foo"),
|
||||
)
|
||||
|
||||
def test_returns_empty_for_unparseable(self):
|
||||
self.assertEqual("", dashboard._failed_url_host("not a url"))
|
||||
self.assertEqual("", supervise_cli._failed_url_host("not a url"))
|
||||
|
||||
def test_returns_empty_for_url_without_host(self):
|
||||
self.assertEqual("", dashboard._failed_url_host("https:///nohost"))
|
||||
self.assertEqual("", supervise_cli._failed_url_host("https:///nohost"))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
Reference in New Issue
Block a user