diff --git a/claude_bottle/cli/dashboard.py b/claude_bottle/cli/dashboard.py index 47e4e4d..d88c993 100644 --- a/claude_bottle/cli/dashboard.py +++ b/claude_bottle/cli/dashboard.py @@ -14,6 +14,7 @@ from __future__ import annotations import argparse import curses import os +import shutil import subprocess import sys import tempfile @@ -23,6 +24,7 @@ from datetime import datetime, timezone from pathlib import Path from .. import supervise as _supervise +from ..backend import BottleSpec, get_bottle_backend from ..backend.docker.capability_apply import ( CapabilityApplyError, apply_capability_change, @@ -46,6 +48,7 @@ from ..backend.docker.pipelock_apply import ( render_allowlist_content, ) from ..log import info +from ..manifest import Manifest from ..supervise import ( ACTION_OPERATOR_EDIT, COMPONENT_FOR_TOOL, @@ -64,7 +67,13 @@ from ..supervise import ( write_audit_entry, write_response, ) -from ._common import PROG +from ._common import PROG, USER_CWD +from .start import ( + attach_claude, + capture_session_state, + prepare_with_preflight, + settle_state, +) # Errors any remediation engine may raise. Caught by the TUI key @@ -392,6 +401,288 @@ def edit_in_editor(content: str, *, suffix: str = ".tmp") -> str | None: pass +# --- New-agent flow (PRD 0020 chunks 1+2) ---------------------------------- +# +# `n` opens a picker modal listing the manifest's agents (with a +# running-count next to each). Selecting one runs prepare → preflight +# (modal) → backend.launch().__enter__() → handoff (curses.endwin → +# claude → refresh). The returned (cm, bottle) lives in the main +# loop's `bottles` dict; chunks 3/4 wire Enter / `x` to act on it. + + +def _filter_agents(query: str, names: list[str]) -> list[str]: + """Case-insensitive substring filter for the picker. Pure + function — no curses, easy to unit-test.""" + if not query: + return list(names) + q = query.lower() + return [n for n in names if q in n.lower()] + + +def _picker_modal( + stdscr: "curses._CursesWindow", + names: list[str], + running_counts: dict[str, int], +) -> str | None: + """Modal agent picker. Type to filter; j/k or arrows to + navigate; Enter to confirm; Esc to abort (first press clears + filter if any, second press exits).""" + if not names: + return None + selected = 0 + query = "" + while True: + filtered = _filter_agents(query, names) + if not filtered: + selected = 0 + elif selected >= len(filtered): + selected = len(filtered) - 1 + elif selected < 0: + selected = 0 + + _draw_picker_modal(stdscr, names, filtered, selected, query, running_counts) + try: + key = stdscr.getch() + except KeyboardInterrupt: + return None + + if key == 27: # Esc + if query: + query = "" + selected = 0 + continue + return None + if key in (curses.KEY_ENTER, 10, 13): + if filtered: + return filtered[selected] + continue + if key in (curses.KEY_DOWN, ord("\x0e")): # KEY_DOWN, Ctrl-N + if filtered: + selected = min(selected + 1, len(filtered) - 1) + continue + if key in (curses.KEY_UP, ord("\x10")): # KEY_UP, Ctrl-P + if filtered: + selected = max(selected - 1, 0) + continue + if key in (curses.KEY_BACKSPACE, 127, 8): + query = query[:-1] + continue + # Printable character → append to filter + if 32 <= key < 127: + query += chr(key) + continue + # Anything else: ignore + + +def _draw_picker_modal( + stdscr: "curses._CursesWindow", + all_names: list[str], + filtered: list[str], + selected: int, + query: str, + running_counts: dict[str, int], +) -> None: + """Render the picker modal. Width fits the longest name plus + the `(N running)` suffix; height fits all filtered items plus + a header line, filter line, and border — capped at 80% of + screen height with a scrollable inner list if necessary.""" + h, w = stdscr.getmaxyx() + label_width = max( + (len(n) for n in all_names), default=10, + ) + suffix_width = len(" (99 running)") + inner_width = max(label_width + suffix_width, len("filter: ") + 20, 40) + box_w = min(inner_width + 4, max(20, w - 4)) + max_list_rows = max(3, int(h * 0.6)) + list_rows = min(len(filtered) if filtered else 1, max_list_rows) + box_h = list_rows + 5 # border (2) + title (1) + filter (1) + spacer (1) + box_h = min(box_h, max(7, h - 4)) + top = max(0, (h - box_h) // 2) + left = max(0, (w - box_w) // 2) + + win = curses.newwin(box_h, box_w, top, left) + win.erase() + win.box() + win.addnstr(0, 2, " start agent ", box_w - 4, curses.A_BOLD) + + win.addnstr(1, 2, f"filter: {query}", box_w - 4) + win.hline(2, 1, curses.ACS_HLINE, box_w - 2) + + list_start_row = 3 + visible_rows = box_h - list_start_row - 1 + if not filtered: + win.addnstr( + list_start_row, 2, + "(no agents match filter)", + box_w - 4, curses.A_DIM, + ) + else: + # Simple windowing around `selected`. + first = max(0, selected - visible_rows + 1) + if selected < first: + first = selected + for i, name in enumerate(filtered[first:first + visible_rows]): + row = list_start_row + i + count = running_counts.get(name, 0) + suffix = f" ({count} running)" if count else "" + line = f" {name}{suffix}" + attr = curses.A_REVERSE if (first + i) == selected else curses.A_NORMAL + win.addnstr(row, 1, line, box_w - 2, attr) + + win.addnstr( + box_h - 1, 2, + " Enter: start Esc: cancel type: filter ", + box_w - 4, curses.A_DIM, + ) + win.refresh() + + +def _preflight_modal( + stdscr: "curses._CursesWindow", + plan_text: str, +) -> bool: + """Modal preflight confirmation. `plan_text` is the multi-line + summary the renderer produced; we draw it in a centered box + with `[y/N]` at the bottom and capture the next keypress.""" + lines = plan_text.splitlines() or [""] + h, w = stdscr.getmaxyx() + inner_width = max( + max((len(line) for line in lines), default=10), + len("launch this agent? [y/N]"), + ) + box_w = min(inner_width + 4, max(20, w - 4)) + box_h = min(len(lines) + 5, max(7, h - 4)) + top = max(0, (h - box_h) // 2) + left = max(0, (w - box_w) // 2) + + win = curses.newwin(box_h, box_w, top, left) + win.erase() + win.box() + win.addnstr(0, 2, " launch agent ", box_w - 4, curses.A_BOLD) + for i, line in enumerate(lines[: box_h - 4]): + win.addnstr(1 + i, 2, line, box_w - 4) + win.addnstr( + box_h - 2, 2, + "launch this agent? [y/N]", + box_w - 4, curses.A_BOLD, + ) + win.addnstr( + box_h - 1, 2, + " y: launch N / Esc: abort ", + box_w - 4, curses.A_DIM, + ) + win.refresh() + + while True: + try: + key = stdscr.getch() + except KeyboardInterrupt: + return False + if key in (ord("y"), ord("Y")): + return True + if key in (ord("n"), ord("N"), 27, curses.KEY_ENTER, 10, 13): + return False + + +def _capture_preflight_text(plan) -> str: + """Capture `plan.print` output by temporarily redirecting + stderr. Plan rendering is stderr-bound (existing behavior the + CLI relies on); for the modal we want it as a string.""" + import io + import contextlib + buf = io.StringIO() + with contextlib.redirect_stderr(buf): + plan.print(remote_control=False) + return buf.getvalue().strip("\n") + + +def _running_counts( + bottles: dict, agents_now: list[ActiveAgent], +) -> dict[str, int]: + """Per-agent running count: dashboard-owned + externally- + discovered, summed by agent_name. The picker shows this so the + operator knows whether picking an agent starts a fresh bottle + or a Nth one.""" + counts: dict[str, int] = {} + for a in agents_now: + counts[a.agent_name] = counts.get(a.agent_name, 0) + 1 + return counts + + +def _new_agent_flow( + stdscr: "curses._CursesWindow", + manifest: Manifest, + bottles: dict, + agents_now: list[ActiveAgent], +) -> str: + """Open the picker, prepare + preflight (modal), launch + (enter the context manager but DON'T close it), handoff to + claude. Returns a status-line message for the dashboard footer. + The (cm, bottle) tuple lands in `bottles` keyed by slug; chunks + 3/4 use it for re-attach and explicit stop.""" + names = sorted(manifest.agents.keys()) + picked = _picker_modal(stdscr, names, _running_counts(bottles, agents_now)) + if picked is None: + return "agent start aborted" + + spec = BottleSpec( + manifest=manifest, + agent_name=picked, + copy_cwd=False, + user_cwd=USER_CWD, + ) + # Modal preflight + prompt. `prepare_with_preflight` calls + # render_preflight(plan) once, then prompt_yes() to decide. We + # split the two: render captures the text into a closure, the + # prompt draws the modal + reads y/N. + captured: dict[str, str] = {} + + def _render(plan) -> None: + captured["text"] = _capture_preflight_text(plan) + + def _prompt() -> bool: + return _preflight_modal(stdscr, captured.get("text", "")) + + stage_dir = Path(tempfile.mkdtemp(prefix="claude-bottle-stage.")) + try: + plan, identity = prepare_with_preflight( + spec, + stage_dir=stage_dir, + render_preflight=_render, + prompt_yes=_prompt, + ) + if plan is None: + settle_state(identity) + return f"start of {picked!r} aborted at preflight" + + backend = get_bottle_backend() + # Launch step writes to stderr (image build, network create, + # compose up). Get out of curses' way for the duration so + # the lines render cleanly. The handoff stays endwin'd until + # claude exits, then we refresh. + curses.endwin() + try: + cm = backend.launch(plan) + bottle = cm.__enter__() + except BaseException: + stdscr.refresh() + settle_state(identity) + raise + bottles[plan.slug] = (cm, bottle, identity) + + try: + exit_code = attach_claude(bottle, remote_control=False) + capture_session_state(identity, exit_code) + finally: + stdscr.refresh() + return f"[{plan.slug}] claude session ended (exit {exit_code})" + finally: + # stage_dir was the prepare scratch dir; after PRD 0018 + # chunk 2 it holds nothing the running bottle needs. Reap + # immediately regardless of which branch above ran. + shutil.rmtree(stage_dir, ignore_errors=True) + + # --- TUI ------------------------------------------------------------------- @@ -491,6 +782,21 @@ def _main_loop(stdscr: "curses._CursesWindow") -> None: selected_agent = 0 focus = PANE_PROPOSALS status_line = "" + # PRD 0020: bottles spun up from inside this dashboard session. + # Each entry: slug -> (context-manager, Bottle handle, identity). + # We hold the context manager so chunk 4's `x` can call __exit__ + # on it; chunk 5 quit-cleanup intentionally does NOT iterate this + # dict (the user wants quit to leave bottles running). + bottles: dict[str, tuple] = {} + # Manifest is loaded lazily on first `n` so the dashboard + # doesn't fail to start in a directory with no manifest (e.g., + # when the operator is purely watching pre-existing bottles). + manifest_cache: list[Manifest | None] = [None] + + def _get_manifest() -> Manifest: + if manifest_cache[0] is None: + manifest_cache[0] = Manifest.resolve(USER_CWD) + return manifest_cache[0] while True: pending = discover_pending() if selected >= len(pending): @@ -535,6 +841,17 @@ def _main_loop(stdscr: "curses._CursesWindow") -> None: if key == 9: # Tab focus = PANE_AGENTS if focus == PANE_PROPOSALS else PANE_PROPOSALS continue + if key == ord("n"): + # PRD 0020 chunk 2: open the picker, start + attach to + # the chosen agent, return to the dashboard with the + # bottle running. + try: + manifest = _get_manifest() + except Exception as e: + status_line = f"manifest load failed: {e}" + continue + status_line = _new_agent_flow(stdscr, manifest, bottles, agents) + continue if key in (ord("e"), ord("p")): # PRD 0019 chunk 4: agent-scoped edits. Only fire when # the agents pane is focused on a real selection; @@ -697,7 +1014,7 @@ def _render( row += 1 footer = ( - "[Tab] switch pane [j/k] move [Enter] view " + "[n] new agent [Tab] switch pane [j/k] move [Enter] view " "[a/m/r] proposal [e/p] edit selected agent [q] quit" ) stdscr.hline(h - 2, 0, curses.ACS_HLINE, w) diff --git a/claude_bottle/cli/start.py b/claude_bottle/cli/start.py index 0a9901a..1647b18 100644 --- a/claude_bottle/cli/start.py +++ b/claude_bottle/cli/start.py @@ -2,8 +2,10 @@ interactive claude-code session. The container is torn down when the session ends. -The launch core is shared with `cli.py resume `: see -_launch_bottle below. +The launch core is shared with `cli.py resume ` and (PRD +0020 chunk 1+) the dashboard's in-process start flow: see the +public helpers `prepare_with_preflight`, `attach_claude`, and the +private orchestrator `_launch_bottle`. """ from __future__ import annotations @@ -14,8 +16,10 @@ import shutil import sys import tempfile from pathlib import Path +from typing import Callable -from ..backend import BottleSpec, get_bottle_backend +from ..backend import Bottle, BottleSpec, get_bottle_backend +from ..backend.docker.bottle_plan import DockerBottlePlan from ..backend.docker.bottle_state import ( cleanup_state, is_preserved, @@ -51,73 +55,67 @@ def cmd_start(argv: list[str]) -> int: ) -def _launch_bottle( +# --- Public helpers shared with the dashboard (PRD 0020) ----------------- + + +def prepare_with_preflight( spec: BottleSpec, *, - dry_run: bool, - remote_control: bool, + stage_dir: Path, + render_preflight: Callable[[DockerBottlePlan], None], + prompt_yes: Callable[[], bool], + dry_run: bool = False, +) -> tuple[DockerBottlePlan | None, str]: + """Run `backend.prepare`, render the preflight summary via the + injected callable, prompt y/N via the injected callable. The CLI + binds these to stderr/stdin; the dashboard binds them to a + curses modal. + + Returns `(plan, identity)`. `plan` is None on dry-run or + operator-N, but `identity` is set as soon as `backend.prepare` + returns so callers can reap the prepare-time state dir via + `settle_state(identity)` in their finally — exactly the existing + semantics.""" + backend = get_bottle_backend() + plan = backend.prepare(spec, stage_dir=stage_dir) + identity = _identity_from_plan(plan) + + render_preflight(plan) + + if dry_run: + info("dry-run requested; not starting container.") + return None, identity + if not prompt_yes(): + info("aborted by user") + return None, identity + return plan, identity + + +def attach_claude( + bottle: Bottle, *, remote_control: bool = False, ) -> int: - """Shared launch core for `start` and `resume`. Builds the plan, - prints / dry-runs / prompts as appropriate, brings the bottle up, - attaches claude, and prints the resume hint on session end.""" - stage_dir = Path(tempfile.mkdtemp(prefix="claude-bottle-stage.")) - identity = "" - try: - backend = get_bottle_backend() - plan = backend.prepare(spec, stage_dir=stage_dir) - identity = _identity_from_plan(plan) + """Run claude inside `bottle` as an interactive session. Blocks + until the session ends; returns the claude process's exit code. - plan.print(remote_control=remote_control) - - if dry_run: - info("dry-run requested; not starting container.") - return 0 - - sys.stderr.write("claude-bottle: launch this agent? [y/N] ") - sys.stderr.flush() - reply = read_tty_line() - if reply not in ("y", "Y", "yes", "YES"): - info("aborted by user") - return 0 - - with backend.launch(plan) as bottle: - info( - "attaching interactive claude session " - "(Ctrl-D or 'exit' to leave; container will be removed)" - ) - claude_args = ["--dangerously-skip-permissions"] - if remote_control: - claude_args.append("--remote-control") - exit_code = bottle.exec_claude(claude_args, tty=True) - info( - f"session ended (exit {exit_code}); " - f"container {bottle.name} will be removed" - ) - # While the container is still alive: always snapshot the - # transcript and — if the agent exited non-zero — mark - # the state for preservation. Capability-block already - # did both before triggering teardown from the dashboard; - # this picks up crashes / Ctrl-Cs / OOM kills the same - # way. snapshot_transcript is best-effort so the - # capability-block path's prior snapshot isn't clobbered - # when the container is already gone. - _capture_session_state(identity, exit_code) - return 0 - finally: - # PRD 0018 chunk 2: prepare now writes the bottle's bind-mount - # sources under state//. If we never reached the - # launch context (dry-run, preflight-N, prepare exception), or - # we did but nothing requested preservation, reap them along - # with everything else. _settle_state subsumes the prior - # post-launch settlement and the new pre-launch cleanup. - _settle_state(identity) - shutil.rmtree(stage_dir, ignore_errors=True) + Used as the inner step of `./cli.py start` (one-shot) and by the + dashboard (PRD 0020), which calls it from inside a `curses.endwin + → … → stdscr.refresh()` handoff so the curses surface gets out + of the terminal's way while claude has it.""" + info( + "attaching interactive claude session " + "(Ctrl-D or 'exit' to leave; container will be removed)" + ) + claude_args = ["--dangerously-skip-permissions"] + if remote_control: + claude_args.append("--remote-control") + return bottle.exec_claude(claude_args, tty=True) -def _capture_session_state(identity: str, exit_code: int) -> None: +def capture_session_state(identity: str, exit_code: int) -> None: """Inside the launch context, while the container is still alive: snapshot the transcript and mark for preservation if - claude crashed. Pure-function-ish; tests stub the helpers.""" + claude crashed. Public for the dashboard's death-handling path + (PRD 0020 open question 3).""" if not identity: return snapshot_transcript(identity) @@ -125,7 +123,11 @@ def _capture_session_state(identity: str, exit_code: int) -> None: mark_preserved(identity) -def _settle_state(identity: str) -> None: +def settle_state(identity: str) -> None: + """Post-teardown housekeeping: print the resume hint if the + state was preserved, otherwise reap the per-bottle state dir. + Public so the dashboard's explicit-stop path calls the same + settlement the CLI uses on context exit.""" if not identity: return if is_preserved(identity): @@ -140,3 +142,68 @@ def _identity_from_plan(plan: object) -> str: identity attribute; for now we duck-type to keep this layer backend-agnostic.""" return getattr(plan, "slug", "") + + +def _text_prompt_yes() -> bool: + """Default `prompt_yes` for CLI use: reads y/N from the + controlling tty via stderr prompt + tty-line read.""" + sys.stderr.write("claude-bottle: launch this agent? [y/N] ") + sys.stderr.flush() + reply = read_tty_line() + return reply in ("y", "Y", "yes", "YES") + + +def _text_render_preflight(*, remote_control: bool): + def _render(plan: DockerBottlePlan) -> None: + plan.print(remote_control=remote_control) + return _render + + +def _launch_bottle( + spec: BottleSpec, + *, + dry_run: bool, + remote_control: bool, +) -> int: + """Shared launch core for `start` and `resume`. Builds the plan, + prints / dry-runs / prompts as appropriate, brings the bottle up, + attaches claude, and prints the resume hint on session end.""" + stage_dir = Path(tempfile.mkdtemp(prefix="claude-bottle-stage.")) + identity = "" + try: + plan, identity = prepare_with_preflight( + spec, + stage_dir=stage_dir, + render_preflight=_text_render_preflight(remote_control=remote_control), + prompt_yes=_text_prompt_yes, + dry_run=dry_run, + ) + if plan is None: + return 0 + + backend = get_bottle_backend() + with backend.launch(plan) as bottle: + exit_code = attach_claude(bottle, remote_control=remote_control) + info( + f"session ended (exit {exit_code}); " + f"container {bottle.name} will be removed" + ) + # While the container is still alive: always snapshot the + # transcript and — if the agent exited non-zero — mark + # the state for preservation. Capability-block already + # did both before triggering teardown from the dashboard; + # this picks up crashes / Ctrl-Cs / OOM kills the same + # way. snapshot_transcript is best-effort so the + # capability-block path's prior snapshot isn't clobbered + # when the container is already gone. + capture_session_state(identity, exit_code) + return 0 + finally: + # PRD 0018 chunk 2: prepare now writes the bottle's bind-mount + # sources under state//. If we never reached the + # launch context (dry-run, preflight-N, prepare exception), or + # we did but nothing requested preservation, reap them along + # with everything else. `settle_state` subsumes the prior + # post-launch settlement and the new pre-launch cleanup. + settle_state(identity) + shutil.rmtree(stage_dir, ignore_errors=True) diff --git a/docs/prds/0020-start-and-attach-from-dashboard.md b/docs/prds/0020-start-and-attach-from-dashboard.md new file mode 100644 index 0000000..235f201 --- /dev/null +++ b/docs/prds/0020-start-and-attach-from-dashboard.md @@ -0,0 +1,396 @@ +# PRD 0020: Start and attach to agents from inside the dashboard + +- **Status:** Draft +- **Author:** didericis +- **Created:** 2026-05-26 + +## Summary + +Today the dashboard is read-only: it surfaces pending proposals +and active agents (PRD 0019) but can't *start* an agent or +*re-enter* one. The operator's path is split — they launch +agents from one terminal (`./cli.py start `), and watch +them from another (`./cli.py dashboard`). + +This PRD collapses that split. The dashboard becomes the +operator's single surface: pressing a key opens an agent picker, +selecting one runs the existing prepare → preflight → launch +flow inside a curses-friendly variant, and on yield drops to a +full-screen `docker exec -it … claude` session (the "handoff" +shape from `docs/research/claude-code-pane-in-dashboard.md`). +When the operator exits claude, the dashboard re-renders with +the now-running bottle visible in the agents pane. + +Crucially, the bottle's lifetime is decoupled from both the +claude session AND the dashboard process. Exit claude → back to +dashboard, bottle still running. Start another agent → two +bottles up at once. Quit the dashboard → bottles continue +running. Teardown is **always explicit**: the operator presses +`x` on an agent, or runs `./cli.py cleanup` later. + +## Problem + +Two real frictions today: + +1. **Two terminals for one workflow.** The dashboard is the + right shape to *watch* agents — proposals queue, status + updates, operator-edit verbs — but it's the wrong shape to + *start* them. Today you open a second terminal for that. In + parallel use (3–5 bottles), the operator has 5+ terminals + open and the dashboard's "active agents" pane is hopelessly + behind reality because they just spawned three in a row. + +2. **`./cli.py start` ties the bottle to a single claude + session.** The start command's `ExitStack` brings the bottle + up, runs claude, and tears down on Ctrl-D — fine for a one- + shot session, wrong for "let me bounce in and out of this + bottle a few times while triaging proposals." Today the only + way to re-enter a bottle after exiting claude is to start a + fresh one and lose all in-bottle state. + +The dashboard already discovers active bottles, scopes +operator-edit verbs to a selected agent (PRD 0019), and +captures full-merged logs per bottle (PRD 0018). It already +*wants* to be the primary surface. This PRD finishes that. + +## Goals / Success Criteria + +1. From inside `./cli.py dashboard`, pressing `n` (new) opens + an agent picker listing every agent defined in the manifest. + Selecting one runs `prepare → preflight → launch`. +2. The preflight Y/N summary renders cleanly — either as a + curses modal or via `curses.endwin() → text-mode prompt + → restore`, matching the existing editor-flow pattern. +3. On launch success, the dashboard performs a handoff (option + 1 from the research doc): `curses.endwin()` → `docker exec + -it claude-bottle- claude --dangerously-skip-permissions` + → on exit, `stdscr.refresh()` and re-render with the new + bottle in the agents pane. +4. The bottle's lifetime is owned by the dashboard process, NOT + by any single claude session. Exiting claude (Ctrl-D, `/exit`) + returns to the dashboard with the bottle still running. The + operator can start more agents and re-enter previous ones. +5. Pressing Enter on a selected row in the agents pane re- + attaches to that agent's bottle via the same handoff — drops + to full-screen claude, returns on exit. +6. Pressing `x` (or similar — keybinding decided in design) + on a selected agent stops just that bottle (compose down + + state cleanup) without quitting the dashboard. +7. Quitting the dashboard (`q`) leaves every running bottle + running. Bottle teardown is always explicit (per-bottle `x` + or `./cli.py cleanup`). The next `./cli.py dashboard` + invocation re-discovers them via `list_active_slugs()` and + surfaces re-attach for any it can reconstruct context for + (see "Cross-dashboard re-attach" below). + +## Non-goals + +- **A pane that hosts the claude TUI alongside proposals.** The + embedded-emulator option from the research doc is out of + scope. The handoff (option 1) is the v1; option 2 is a + separate PRD if and when handoff is observably insufficient. +- **Adopting bottles started by an out-of-dashboard `./cli.py + start` invocation.** Those have their own ExitStack-owner and + the dashboard treats them as read-only-watch (already does + today). Re-attach only applies to bottles the *current + dashboard process* started. +- **Resurrecting an out-of-process bottle into a new dashboard + with full re-attach.** A bottle started by `./cli.py start` + in another terminal — or by a previous dashboard run, now + exited — appears in the agents pane (already does, PRD 0019) + and can be re-attached via `docker exec -it claude` because + the agent container is still running `sleep infinity`. That's + in scope. What's *out* is anything that requires the launch- + context object to drive teardown — e.g., the + ExitStack-tracked CA + state cleanup `_settle_state` performs + today. Cross-dashboard re-attach uses the existing + `./cli.py cleanup` for teardown, not an `x` keypress (see + open questions). +- **Multi-window UI.** Single curses window, two existing + panes (proposals + agents); the agent picker is a modal, not + a third pane. +- **Removing `./cli.py start`.** Stays as the script-friendly / + legacy entry point. The dashboard is the new default. + +## Scope + +### In scope + +- Manifest-driven agent picker (curses modal): list view with + j/k navigation + Enter to confirm, Esc to abort. +- Preflight rendering inside the dashboard's curses surface + (modal or drop-and-resume — picked in design). +- A new `_dashboard_start_flow` that wraps prepare + preflight + + launch and returns a `DockerBottle` handle the dashboard + retains alongside its `pending` and `agents` lists. +- A `bottles: dict[slug, DockerBottle]` map on the main loop + that owns every dashboard-launched handle. ExitStack tears + them all down on dashboard exit. +- `Enter` on an agents-pane row → re-attach handoff (docker + exec -it claude into the existing container). +- `x` (or similar) on an agents-pane row → explicit per-bottle + stop without quitting. +- `q` (existing quit key) → tear down all dashboard-launched + bottles before returning. + +### Out of scope + +- Changes to `./cli.py start` itself. It keeps its current + shape; the dashboard reuses its internal pieces (backend. + prepare / backend.launch) without reaching through the CLI + layer. +- Changes to `backend.launch`'s context-manager contract; the + dashboard's bottle map just holds the context-manager-yielded + Bottle and calls `__exit__` on quit / explicit stop. +- New manifest fields. The picker reads what's already there. +- Adopting non-dashboard bottles into the dashboard's owned set. + +## Proposed design + +### Bottle ownership + +Today's flow: + +``` +./cli.py start agent + └─ with backend.launch(plan) as bottle: ← bottle alive while inside `with` + bottle.exec_claude([...], tty=True) ← blocks until claude exits + # context exits → compose down → state cleanup +``` + +The proposed dashboard-driven flow: + +``` +./cli.py dashboard + └─ bottles: dict[str, tuple[ContextManager, DockerBottle]] = {} + + # operator presses `n`, picks agent + cm = backend.launch(plan) + bottle = cm.__enter__() ← enter but don't bind to a `with` + bottles[plan.slug] = (cm, bottle) + + # operator interacts via: + curses.endwin() + bottle.exec_claude([...], tty=True) ← blocks; returns on Ctrl-D + stdscr.refresh() + # bottle is STILL ALIVE — only the claude process exited + + # ... operator presses `x` on selected agent: + cm, _ = bottles.pop(slug) + cm.__exit__(None, None, None) ← tears down just that one + + # ... operator presses `q`: + return # bottles dict still populated; no teardown +``` + +Two shifts: + +1. Bottles outlive any single claude session — the dashboard + manages enter/exit per bottle, not per attach. Exit claude + → still in the dashboard with the bottle running. +2. Bottles outlive the dashboard process itself. Quitting the + dashboard does NOT close the context managers; the docker + compose project keeps running with the agent container in + `sleep infinity`. A subsequent dashboard invocation + re-discovers it via `docker compose ls` (PRD 0019's + `list_active_slugs`) and surfaces re-attach. + + The trade-off: state cleanup that today runs in + `_settle_state` (transcript snapshot, preserve-marker + evaluation, state-dir reap) doesn't fire on a quit-while- + running bottle. It DOES fire when the operator explicitly + stops via `x`, because that calls `cm.__exit__`. For + bottles a previous dashboard quit on, `./cli.py cleanup` + is the path — its compose-down + state-reap logic + already covers the case. + +### Cross-dashboard re-attach + +When the dashboard discovers a bottle in `discover_active_agents` +that it didn't itself start (a previous-dashboard or external +`./cli.py start` bottle), Enter still attaches via `docker exec +-it … claude` — the agent container is running `sleep infinity` +exactly the same way regardless of who started it. The only +thing the current dashboard lacks for those bottles is the +launch-context object needed to drive a clean teardown via +`x`. + +For v1 we surface this honestly: pressing `x` on a non-owned +agent shows a status hint pointing at `./cli.py cleanup` (or +`./cli.py cleanup` targeted at the slug if we add that flag +later). The agent stays alive; the operator handles teardown +out-of-band. Enter (re-attach) works for both owned and +non-owned bottles. + +### Agent picker + +Pressing `n` opens a centered modal listing every agent name +from `spec.manifest.agents`. j/k navigates; Enter selects; Esc +aborts. Width is the longest name + bottle name + a column for +"already running?" so the operator can see at a glance whether +picking an agent starts a fresh one (different slug suffix) or +not. + +``` +┌─ start agent ───────────────────────────┐ +│ implementer dev (running) │ +│ > researcher dev │ +│ triage-bot sandbox │ +└─ Enter: start Esc: cancel ─────────────┘ +``` + +Starting an agent that already has a running bottle is allowed +— each `start` mints a fresh slug — but the picker surfaces the +already-running state so the operator doesn't accidentally +double-launch. + +### Preflight Y/N + +Two viable shapes: + +**Modal** — render the preflight summary lines (`agent / env / +skills / bottle / git gate / egress`) in a centered curses +modal with `[y/N]` at the bottom. Capture the next keypress. + +**Drop-and-resume** — `curses.endwin()`, print the preflight to +stderr, read y/N from stdin, restore curses. Matches the +editor-flow + handoff pattern; lower implementation cost. + +Lean toward **modal** for the y/N because it doesn't flash the +terminal between dashboard frames. Drop-and-resume is acceptable +if modal proves fiddly. + +### Re-attach (Enter on agent) + +Same handoff pattern the new-agent flow uses. For an agent the +dashboard started this session, the dashboard holds the +`DockerBottle` handle in its `bottles` dict and calls +`bottle.exec_claude(...)`. For an agent it discovered via +`list_active_slugs` (previous-dashboard or external start), +the dashboard synthesizes a one-shot `DockerBottle` from the +slug — container name is `claude-bottle-`, no prompt +path because the agent's claude config already has `--append- +system-prompt-file` baked in from the original launch — +and runs the same exec. Either way, Enter drops to +full-screen claude; on exit the dashboard re-renders. + +### Explicit per-bottle stop + +`x` on a dashboard-owned agent: pop the `(cm, bottle)` from +the dict, call `cm.__exit__(None, None, None)` which drives +the existing compose-down + state-settle logic. Refresh the +agents pane. + +`x` on a non-owned agent (discovered via `list_active_slugs` +but not in `bottles` dict): no-op with status hint pointing +at `./cli.py cleanup` (the existing path that tears down +ANY claude-bottle compose project plus reaps state dirs). + +### Dashboard quit + +`q` returns the dashboard process to 0 without touching any +running bottles. The `bottles` dict goes out of scope but +because the context managers' `__exit__` is never invoked, +the `docker compose` project keeps running. The next dashboard +invocation discovers the bottles via `list_active_slugs` and +surfaces re-attach. + +This is a real departure from today's `./cli.py start` +semantics (which couples bottle lifetime to the process via +ExitStack). It's intentional: the dashboard is a watching + +acting surface, not a lifetime owner. + +## Implementation chunks + +Sized for one PR each. + +1. **Refactor `_launch_bottle` so the launch + exec_claude + pieces are separable.** Today's `cli/start.py` runs both + inside one function. Extract `prepare_with_preflight(spec, + *, render_preflight, prompt_yes)` and `attach_claude(bottle, + *, remote_control)`. The CLI's existing one-shot use binds + them as before; the dashboard binds them with curses-aware + render + prompt callables. No behavior change. +2. **Agent picker modal + new-agent flow.** New key `n` opens + the picker; `prepare_with_preflight` runs against the + selected agent; on Y, `backend.launch(plan)` enters the + dashboard's ExitStack; handoff invokes `attach_claude`. +3. **Re-attach via Enter on owned agents-pane row.** Looks up + the slug in the dashboard's `bottles` map; if present → + handoff; else → status-line hint pointing at `./cli.py + resume`. +4. **Explicit per-bottle stop (`x` keybinding).** Pop the + bottle's `close` callback off the stack, call it, refresh. +5. **Quit-cleanup (`q`).** Hook `stack.close()` into the + normal return path. Document the "exiting dashboard tears + down every bottle it started" contract in `dashboard.py`'s + module docstring. + +## Resolved questions + +1. **Modal vs. drop-and-resume for preflight Y/N.** Resolved: + **modal.** Render the preflight lines centered in a curses + sub-window with `[y/N]` at the bottom; capture the next + keypress. If geometry proves fiddly during implementation + we'll fall back to drop-and-resume, but modal is the target. + +2. **Agent picker: text-filter typing.** Resolved: **yes, + include filter typing.** As the operator types, the list + filters to agents whose name matches (substring, + case-insensitive). j/k still navigates within the filtered + set; Esc clears the filter on first press, exits the picker + on the second. + +3. **Container-died-during-claude handling.** Keep the design + as drafted: transcript snapshot (`snapshot_transcript`) + + `mark_preserved` if exit code is non-zero + remove from + the `bottles` dict + status line `"claude session for + [slug] ended with exit N; preserved for resume"`. The + bottle's `cm.__exit__` would normally run on stop; here it + runs as part of the death-handling (the container is + already gone, but compose-down + state-settle still + sequence the network removal + state cleanup correctly). + +4. **Double-start of the same agent.** Allowed. The picker + surfaces a `(N running)` annotation next to any agent name + that already has live bottles in this dashboard's `bottles` + dict OR in `list_active_slugs()`, so the operator sees the + running-count before picking. Selecting an already-running + agent name mints a fresh slug for the new bottle as + normal. + +5. **Quit behavior.** Resolved: **`q` does NOT tear down any + bottles.** Dashboard exit is purely a UI exit; the + bottles dict goes out of scope without invoking `__exit__`, + so the `docker compose` projects keep running. Bottle + teardown is always explicit: per-bottle `x` (for + dashboard-owned), or `./cli.py cleanup` (for everything). + +## Open questions + +6. **Race between handoff and 1s refresh tick.** While the + dashboard's `stdscr.timeout` is set, a key press fires the + handoff and the dashboard sits in `docker exec` for minutes. + `discover_active_agents` / `discover_pending` don't poll + during that window — that's harmless on its own (the moment + we `stdscr.refresh()` after exec returns, the next loop + iter runs discovery and the panes reflect reality), but + it does mean: (a) proposals queued during the claude + session won't fire any operator notification until the + handoff ends, and (b) a bottle that died mid-claude won't + be detectable until the operator exits back to the + dashboard. Not blocking v1 — flagging as a known limitation + to revisit alongside the option-2 embedded-emulator path + from the research doc. + +## References + +- PRD 0018 — compose-per-instance lifecycle (the `backend. + launch` context-manager contract this PRD layers against) +- PRD 0019 — active-agents pane + selection model (the + agents-pane row the re-attach + stop verbs hook into) +- `docs/research/claude-code-pane-in-dashboard.md` — option 1 + (handoff) is what `attach_claude` implements here; options 2 + / 3 are out of scope for this PRD +- `claude_bottle/cli/start.py:_launch_bottle` — the function + chunk 1 extracts the prepare + attach pieces out of diff --git a/tests/unit/test_cli_start_settle.py b/tests/unit/test_cli_start_settle.py index f6fad89..8ae0d05 100644 --- a/tests/unit/test_cli_start_settle.py +++ b/tests/unit/test_cli_start_settle.py @@ -45,25 +45,25 @@ class TestCaptureSessionState(_FakeHomeMixin, unittest.TestCase): self._teardown_fake_home() def test_clean_exit_snapshots_but_does_not_mark(self): - start_mod._capture_session_state("dev-abc", exit_code=0) + start_mod.capture_session_state("dev-abc", exit_code=0) self.assertEqual(["dev-abc"], self._snap_calls) self.assertFalse(bottle_state.is_preserved("dev-abc")) def test_crash_snapshots_and_marks(self): - start_mod._capture_session_state("dev-abc", exit_code=137) + start_mod.capture_session_state("dev-abc", exit_code=137) self.assertEqual(["dev-abc"], self._snap_calls) self.assertTrue(bottle_state.is_preserved("dev-abc")) def test_ctrl_c_treated_as_crash(self): # SIGINT delivers exit 130; the operator may have Ctrl-C'd # because something went wrong, so we preserve. - start_mod._capture_session_state("dev-abc", exit_code=130) + start_mod.capture_session_state("dev-abc", exit_code=130) self.assertTrue(bottle_state.is_preserved("dev-abc")) def test_empty_identity_is_noop(self): # Backends without an identity field shouldn't crash this # path (the _identity_from_plan helper falls back to ""). - start_mod._capture_session_state("", exit_code=137) + start_mod.capture_session_state("", exit_code=137) self.assertEqual([], self._snap_calls) @@ -77,16 +77,16 @@ class TestSettleState(_FakeHomeMixin, unittest.TestCase): def test_preserved_state_survives(self): bottle_state.write_per_bottle_dockerfile("dev-abc", "FROM x\n") bottle_state.mark_preserved("dev-abc") - start_mod._settle_state("dev-abc") + start_mod.settle_state("dev-abc") self.assertTrue(bottle_state.bottle_state_dir("dev-abc").is_dir()) def test_unpreserved_state_is_cleaned(self): bottle_state.write_per_bottle_dockerfile("dev-abc", "FROM x\n") - start_mod._settle_state("dev-abc") + start_mod.settle_state("dev-abc") self.assertFalse(bottle_state.bottle_state_dir("dev-abc").exists()) def test_empty_identity_is_noop(self): - start_mod._settle_state("") # should not raise + start_mod.settle_state("") # should not raise if __name__ == "__main__": diff --git a/tests/unit/test_dashboard_active_agents.py b/tests/unit/test_dashboard_active_agents.py index 28e62ba..d221647 100644 --- a/tests/unit/test_dashboard_active_agents.py +++ b/tests/unit/test_dashboard_active_agents.py @@ -256,6 +256,73 @@ class TestSelectionStatus(unittest.TestCase): self.assertEqual("[no agent selected]", s) +class TestFilterAgents(unittest.TestCase): + """Pure-function picker filter (PRD 0020 chunk 2). Curses-free + so we can exercise the substring + case-insensitivity rules + directly.""" + + NAMES = ["implementer", "researcher", "triage-bot", "ImplDeluxe"] + + def test_empty_query_returns_all(self): + self.assertEqual(self.NAMES, dashboard._filter_agents("", self.NAMES)) + + def test_substring_match(self): + self.assertEqual( + ["implementer", "ImplDeluxe"], + dashboard._filter_agents("impl", self.NAMES), + ) + + def test_case_insensitive(self): + self.assertEqual( + ["implementer", "ImplDeluxe"], + dashboard._filter_agents("IMPL", self.NAMES), + ) + + def test_no_match_returns_empty(self): + self.assertEqual([], dashboard._filter_agents("zzz", self.NAMES)) + + def test_preserves_input_order(self): + # Filtering should never re-sort; the picker draws in the + # order the manifest exposed. + out = dashboard._filter_agents("e", ["beta", "alpha", "echo"]) + self.assertEqual(["beta", "echo"], out) + + +class TestRunningCounts(unittest.TestCase): + """Per-agent running-count surfaced in the picker so the + operator sees `(N running)` before picking. Counts come from + the dashboard's current `discover_active_agents` snapshot.""" + + def _agent(self, agent_name: str) -> dashboard.ActiveAgent: + return dashboard.ActiveAgent( + slug=f"{agent_name}-abc", + agent_name=agent_name, + started_at="", + services=(), + ) + + def test_empty_when_no_active_agents(self): + self.assertEqual({}, dashboard._running_counts({}, [])) + + def test_one_per_unique_agent_name(self): + agents = [self._agent("a"), self._agent("b"), self._agent("c")] + self.assertEqual( + {"a": 1, "b": 1, "c": 1}, + dashboard._running_counts({}, agents), + ) + + def test_counts_collisions(self): + agents = [ + self._agent("implementer"), + self._agent("implementer"), + self._agent("researcher"), + ] + self.assertEqual( + {"implementer": 2, "researcher": 1}, + dashboard._running_counts({}, agents), + ) + + class TestSelectedAgent(unittest.TestCase): """`_selected_agent` is what chunk 4's e/p key handlers use to decide whether to fire and which agent to target."""