Merge pull request 'docs(prd-0020): start + attach to agents from the dashboard' (#44) from dashboard-start-attach-agents into main

2026-05-26 03:27:01 -04:00
parent 8cd867f3d2 309ffaa4ab
commit 5f2b40e679
5 changed files with 919 additions and 72 deletions
@@ -14,6 +14,7 @@ from __future__ import annotations
 import argparse
 import curses
 import os
+import shutil
 import subprocess
 import sys
 import tempfile
@@ -23,6 +24,7 @@ from datetime import datetime, timezone
 from pathlib import Path

 from .. import supervise as _supervise
+from ..backend import BottleSpec, get_bottle_backend
 from ..backend.docker.capability_apply import (
    CapabilityApplyError,
    apply_capability_change,
@@ -46,6 +48,7 @@ from ..backend.docker.pipelock_apply import (
    render_allowlist_content,
 )
 from ..log import info
+from ..manifest import Manifest
 from ..supervise import (
    ACTION_OPERATOR_EDIT,
    COMPONENT_FOR_TOOL,
@@ -64,7 +67,13 @@ from ..supervise import (
    write_audit_entry,
    write_response,
 )
-from ._common import PROG
+from ._common import PROG, USER_CWD
+from .start import (
+    attach_claude,
+    capture_session_state,
+    prepare_with_preflight,
+    settle_state,
+)


 # Errors any remediation engine may raise. Caught by the TUI key
@@ -392,6 +401,288 @@ def edit_in_editor(content: str, *, suffix: str = ".tmp") -> str | None:
            pass


+# --- New-agent flow (PRD 0020 chunks 1+2) ----------------------------------
+#
+# `n` opens a picker modal listing the manifest's agents (with a
+# running-count next to each). Selecting one runs prepare → preflight
+# (modal) → backend.launch().__enter__() → handoff (curses.endwin →
+# claude → refresh). The returned (cm, bottle) lives in the main
+# loop's `bottles` dict; chunks 3/4 wire Enter / `x` to act on it.
+
+
+def _filter_agents(query: str, names: list[str]) -> list[str]:
+    """Case-insensitive substring filter for the picker. Pure
+    function — no curses, easy to unit-test."""
+    if not query:
+        return list(names)
+    q = query.lower()
+    return [n for n in names if q in n.lower()]
+
+
+def _picker_modal(
+    stdscr: "curses._CursesWindow",
+    names: list[str],
+    running_counts: dict[str, int],
+) -> str | None:
+    """Modal agent picker. Type to filter; j/k or arrows to
+    navigate; Enter to confirm; Esc to abort (first press clears
+    filter if any, second press exits)."""
+    if not names:
+        return None
+    selected = 0
+    query = ""
+    while True:
+        filtered = _filter_agents(query, names)
+        if not filtered:
+            selected = 0
+        elif selected >= len(filtered):
+            selected = len(filtered) - 1
+        elif selected < 0:
+            selected = 0
+
+        _draw_picker_modal(stdscr, names, filtered, selected, query, running_counts)
+        try:
+            key = stdscr.getch()
+        except KeyboardInterrupt:
+            return None
+
+        if key == 27:  # Esc
+            if query:
+                query = ""
+                selected = 0
+                continue
+            return None
+        if key in (curses.KEY_ENTER, 10, 13):
+            if filtered:
+                return filtered[selected]
+            continue
+        if key in (curses.KEY_DOWN, ord("\x0e")):  # KEY_DOWN, Ctrl-N
+            if filtered:
+                selected = min(selected + 1, len(filtered) - 1)
+            continue
+        if key in (curses.KEY_UP, ord("\x10")):  # KEY_UP, Ctrl-P
+            if filtered:
+                selected = max(selected - 1, 0)
+            continue
+        if key in (curses.KEY_BACKSPACE, 127, 8):
+            query = query[:-1]
+            continue
+        # Printable character → append to filter
+        if 32 <= key < 127:
+            query += chr(key)
+            continue
+        # Anything else: ignore
+
+
+def _draw_picker_modal(
+    stdscr: "curses._CursesWindow",
+    all_names: list[str],
+    filtered: list[str],
+    selected: int,
+    query: str,
+    running_counts: dict[str, int],
+) -> None:
+    """Render the picker modal. Width fits the longest name plus
+    the `(N running)` suffix; height fits all filtered items plus
+    a header line, filter line, and border — capped at 80% of
+    screen height with a scrollable inner list if necessary."""
+    h, w = stdscr.getmaxyx()
+    label_width = max(
+        (len(n) for n in all_names), default=10,
+    )
+    suffix_width = len(" (99 running)")
+    inner_width = max(label_width + suffix_width, len("filter: ") + 20, 40)
+    box_w = min(inner_width + 4, max(20, w - 4))
+    max_list_rows = max(3, int(h * 0.6))
+    list_rows = min(len(filtered) if filtered else 1, max_list_rows)
+    box_h = list_rows + 5  # border (2) + title (1) + filter (1) + spacer (1)
+    box_h = min(box_h, max(7, h - 4))
+    top = max(0, (h - box_h) // 2)
+    left = max(0, (w - box_w) // 2)
+
+    win = curses.newwin(box_h, box_w, top, left)
+    win.erase()
+    win.box()
+    win.addnstr(0, 2, " start agent ", box_w - 4, curses.A_BOLD)
+
+    win.addnstr(1, 2, f"filter: {query}", box_w - 4)
+    win.hline(2, 1, curses.ACS_HLINE, box_w - 2)
+
+    list_start_row = 3
+    visible_rows = box_h - list_start_row - 1
+    if not filtered:
+        win.addnstr(
+            list_start_row, 2,
+            "(no agents match filter)",
+            box_w - 4, curses.A_DIM,
+        )
+    else:
+        # Simple windowing around `selected`.
+        first = max(0, selected - visible_rows + 1)
+        if selected < first:
+            first = selected
+        for i, name in enumerate(filtered[first:first + visible_rows]):
+            row = list_start_row + i
+            count = running_counts.get(name, 0)
+            suffix = f"  ({count} running)" if count else ""
+            line = f"  {name}{suffix}"
+            attr = curses.A_REVERSE if (first + i) == selected else curses.A_NORMAL
+            win.addnstr(row, 1, line, box_w - 2, attr)
+
+    win.addnstr(
+        box_h - 1, 2,
+        " Enter: start  Esc: cancel  type: filter ",
+        box_w - 4, curses.A_DIM,
+    )
+    win.refresh()
+
+
+def _preflight_modal(
+    stdscr: "curses._CursesWindow",
+    plan_text: str,
+) -> bool:
+    """Modal preflight confirmation. `plan_text` is the multi-line
+    summary the renderer produced; we draw it in a centered box
+    with `[y/N]` at the bottom and capture the next keypress."""
+    lines = plan_text.splitlines() or [""]
+    h, w = stdscr.getmaxyx()
+    inner_width = max(
+        max((len(line) for line in lines), default=10),
+        len("launch this agent? [y/N]"),
+    )
+    box_w = min(inner_width + 4, max(20, w - 4))
+    box_h = min(len(lines) + 5, max(7, h - 4))
+    top = max(0, (h - box_h) // 2)
+    left = max(0, (w - box_w) // 2)
+
+    win = curses.newwin(box_h, box_w, top, left)
+    win.erase()
+    win.box()
+    win.addnstr(0, 2, " launch agent ", box_w - 4, curses.A_BOLD)
+    for i, line in enumerate(lines[: box_h - 4]):
+        win.addnstr(1 + i, 2, line, box_w - 4)
+    win.addnstr(
+        box_h - 2, 2,
+        "launch this agent? [y/N]",
+        box_w - 4, curses.A_BOLD,
+    )
+    win.addnstr(
+        box_h - 1, 2,
+        " y: launch  N / Esc: abort ",
+        box_w - 4, curses.A_DIM,
+    )
+    win.refresh()
+
+    while True:
+        try:
+            key = stdscr.getch()
+        except KeyboardInterrupt:
+            return False
+        if key in (ord("y"), ord("Y")):
+            return True
+        if key in (ord("n"), ord("N"), 27, curses.KEY_ENTER, 10, 13):
+            return False
+
+
+def _capture_preflight_text(plan) -> str:
+    """Capture `plan.print` output by temporarily redirecting
+    stderr. Plan rendering is stderr-bound (existing behavior the
+    CLI relies on); for the modal we want it as a string."""
+    import io
+    import contextlib
+    buf = io.StringIO()
+    with contextlib.redirect_stderr(buf):
+        plan.print(remote_control=False)
+    return buf.getvalue().strip("\n")
+
+
+def _running_counts(
+    bottles: dict, agents_now: list[ActiveAgent],
+) -> dict[str, int]:
+    """Per-agent running count: dashboard-owned + externally-
+    discovered, summed by agent_name. The picker shows this so the
+    operator knows whether picking an agent starts a fresh bottle
+    or a Nth one."""
+    counts: dict[str, int] = {}
+    for a in agents_now:
+        counts[a.agent_name] = counts.get(a.agent_name, 0) + 1
+    return counts
+
+
+def _new_agent_flow(
+    stdscr: "curses._CursesWindow",
+    manifest: Manifest,
+    bottles: dict,
+    agents_now: list[ActiveAgent],
+) -> str:
+    """Open the picker, prepare + preflight (modal), launch
+    (enter the context manager but DON'T close it), handoff to
+    claude. Returns a status-line message for the dashboard footer.
+    The (cm, bottle) tuple lands in `bottles` keyed by slug; chunks
+    3/4 use it for re-attach and explicit stop."""
+    names = sorted(manifest.agents.keys())
+    picked = _picker_modal(stdscr, names, _running_counts(bottles, agents_now))
+    if picked is None:
+        return "agent start aborted"
+
+    spec = BottleSpec(
+        manifest=manifest,
+        agent_name=picked,
+        copy_cwd=False,
+        user_cwd=USER_CWD,
+    )
+    # Modal preflight + prompt. `prepare_with_preflight` calls
+    # render_preflight(plan) once, then prompt_yes() to decide. We
+    # split the two: render captures the text into a closure, the
+    # prompt draws the modal + reads y/N.
+    captured: dict[str, str] = {}
+
+    def _render(plan) -> None:
+        captured["text"] = _capture_preflight_text(plan)
+
+    def _prompt() -> bool:
+        return _preflight_modal(stdscr, captured.get("text", ""))
+
+    stage_dir = Path(tempfile.mkdtemp(prefix="claude-bottle-stage."))
+    try:
+        plan, identity = prepare_with_preflight(
+            spec,
+            stage_dir=stage_dir,
+            render_preflight=_render,
+            prompt_yes=_prompt,
+        )
+        if plan is None:
+            settle_state(identity)
+            return f"start of {picked!r} aborted at preflight"
+
+        backend = get_bottle_backend()
+        # Launch step writes to stderr (image build, network create,
+        # compose up). Get out of curses' way for the duration so
+        # the lines render cleanly. The handoff stays endwin'd until
+        # claude exits, then we refresh.
+        curses.endwin()
+        try:
+            cm = backend.launch(plan)
+            bottle = cm.__enter__()
+        except BaseException:
+            stdscr.refresh()
+            settle_state(identity)
+            raise
+        bottles[plan.slug] = (cm, bottle, identity)
+
+        try:
+            exit_code = attach_claude(bottle, remote_control=False)
+            capture_session_state(identity, exit_code)
+        finally:
+            stdscr.refresh()
+        return f"[{plan.slug}] claude session ended (exit {exit_code})"
+    finally:
+        # stage_dir was the prepare scratch dir; after PRD 0018
+        # chunk 2 it holds nothing the running bottle needs. Reap
+        # immediately regardless of which branch above ran.
+        shutil.rmtree(stage_dir, ignore_errors=True)
+
+
 # --- TUI -------------------------------------------------------------------


@@ -491,6 +782,21 @@ def _main_loop(stdscr: "curses._CursesWindow") -> None:
    selected_agent = 0
    focus = PANE_PROPOSALS
    status_line = ""
+    # PRD 0020: bottles spun up from inside this dashboard session.
+    # Each entry: slug -> (context-manager, Bottle handle, identity).
+    # We hold the context manager so chunk 4's `x` can call __exit__
+    # on it; chunk 5 quit-cleanup intentionally does NOT iterate this
+    # dict (the user wants quit to leave bottles running).
+    bottles: dict[str, tuple] = {}
+    # Manifest is loaded lazily on first `n` so the dashboard
+    # doesn't fail to start in a directory with no manifest (e.g.,
+    # when the operator is purely watching pre-existing bottles).
+    manifest_cache: list[Manifest | None] = [None]
+
+    def _get_manifest() -> Manifest:
+        if manifest_cache[0] is None:
+            manifest_cache[0] = Manifest.resolve(USER_CWD)
+        return manifest_cache[0]
    while True:
        pending = discover_pending()
        if selected >= len(pending):
@@ -535,6 +841,17 @@ def _main_loop(stdscr: "curses._CursesWindow") -> None:
        if key == 9:  # Tab
            focus = PANE_AGENTS if focus == PANE_PROPOSALS else PANE_PROPOSALS
            continue
+        if key == ord("n"):
+            # PRD 0020 chunk 2: open the picker, start + attach to
+            # the chosen agent, return to the dashboard with the
+            # bottle running.
+            try:
+                manifest = _get_manifest()
+            except Exception as e:
+                status_line = f"manifest load failed: {e}"
+                continue
+            status_line = _new_agent_flow(stdscr, manifest, bottles, agents)
+            continue
        if key in (ord("e"), ord("p")):
            # PRD 0019 chunk 4: agent-scoped edits. Only fire when
            # the agents pane is focused on a real selection;
@@ -697,7 +1014,7 @@ def _render(
            row += 1

    footer = (
-        "[Tab] switch pane  [j/k] move  [Enter] view  "
+        "[n] new agent  [Tab] switch pane  [j/k] move  [Enter] view  "
        "[a/m/r] proposal  [e/p] edit selected agent  [q] quit"
    )
    stdscr.hline(h - 2, 0, curses.ACS_HLINE, w)
@@ -2,8 +2,10 @@
 interactive claude-code session. The container is torn down when the
 session ends.

-The launch core is shared with `cli.py resume <identity>`: see
-_launch_bottle below.
+The launch core is shared with `cli.py resume <identity>` and (PRD
+0020 chunk 1+) the dashboard's in-process start flow: see the
+public helpers `prepare_with_preflight`, `attach_claude`, and the
+private orchestrator `_launch_bottle`.
 """

 from __future__ import annotations
@@ -14,8 +16,10 @@ import shutil
 import sys
 import tempfile
 from pathlib import Path
+from typing import Callable

-from ..backend import BottleSpec, get_bottle_backend
+from ..backend import Bottle, BottleSpec, get_bottle_backend
+from ..backend.docker.bottle_plan import DockerBottlePlan
 from ..backend.docker.bottle_state import (
    cleanup_state,
    is_preserved,
@@ -51,73 +55,67 @@ def cmd_start(argv: list[str]) -> int:
    )


-def _launch_bottle(
+# --- Public helpers shared with the dashboard (PRD 0020) -----------------
+
+
+def prepare_with_preflight(
    spec: BottleSpec,
    *,
-    dry_run: bool,
-    remote_control: bool,
+    stage_dir: Path,
+    render_preflight: Callable[[DockerBottlePlan], None],
+    prompt_yes: Callable[[], bool],
+    dry_run: bool = False,
+) -> tuple[DockerBottlePlan | None, str]:
+    """Run `backend.prepare`, render the preflight summary via the
+    injected callable, prompt y/N via the injected callable. The CLI
+    binds these to stderr/stdin; the dashboard binds them to a
+    curses modal.
+
+    Returns `(plan, identity)`. `plan` is None on dry-run or
+    operator-N, but `identity` is set as soon as `backend.prepare`
+    returns so callers can reap the prepare-time state dir via
+    `settle_state(identity)` in their finally — exactly the existing
+    semantics."""
+    backend = get_bottle_backend()
+    plan = backend.prepare(spec, stage_dir=stage_dir)
+    identity = _identity_from_plan(plan)
+
+    render_preflight(plan)
+
+    if dry_run:
+        info("dry-run requested; not starting container.")
+        return None, identity
+    if not prompt_yes():
+        info("aborted by user")
+        return None, identity
+    return plan, identity
+
+
+def attach_claude(
+    bottle: Bottle, *, remote_control: bool = False,
 ) -> int:
-    """Shared launch core for `start` and `resume`. Builds the plan,
-    prints / dry-runs / prompts as appropriate, brings the bottle up,
-    attaches claude, and prints the resume hint on session end."""
-    stage_dir = Path(tempfile.mkdtemp(prefix="claude-bottle-stage."))
-    identity = ""
-    try:
-        backend = get_bottle_backend()
-        plan = backend.prepare(spec, stage_dir=stage_dir)
-        identity = _identity_from_plan(plan)
+    """Run claude inside `bottle` as an interactive session. Blocks
+    until the session ends; returns the claude process's exit code.

-        plan.print(remote_control=remote_control)
-
-        if dry_run:
-            info("dry-run requested; not starting container.")
-            return 0
-
-        sys.stderr.write("claude-bottle: launch this agent? [y/N] ")
-        sys.stderr.flush()
-        reply = read_tty_line()
-        if reply not in ("y", "Y", "yes", "YES"):
-            info("aborted by user")
-            return 0
-
-        with backend.launch(plan) as bottle:
-            info(
-                "attaching interactive claude session "
-                "(Ctrl-D or 'exit' to leave; container will be removed)"
-            )
-            claude_args = ["--dangerously-skip-permissions"]
-            if remote_control:
-                claude_args.append("--remote-control")
-            exit_code = bottle.exec_claude(claude_args, tty=True)
-            info(
-                f"session ended (exit {exit_code}); "
-                f"container {bottle.name} will be removed"
-            )
-            # While the container is still alive: always snapshot the
-            # transcript and — if the agent exited non-zero — mark
-            # the state for preservation. Capability-block already
-            # did both before triggering teardown from the dashboard;
-            # this picks up crashes / Ctrl-Cs / OOM kills the same
-            # way. snapshot_transcript is best-effort so the
-            # capability-block path's prior snapshot isn't clobbered
-            # when the container is already gone.
-            _capture_session_state(identity, exit_code)
-        return 0
-    finally:
-        # PRD 0018 chunk 2: prepare now writes the bottle's bind-mount
-        # sources under state/<slug>/. If we never reached the
-        # launch context (dry-run, preflight-N, prepare exception), or
-        # we did but nothing requested preservation, reap them along
-        # with everything else. _settle_state subsumes the prior
-        # post-launch settlement and the new pre-launch cleanup.
-        _settle_state(identity)
-        shutil.rmtree(stage_dir, ignore_errors=True)
+    Used as the inner step of `./cli.py start` (one-shot) and by the
+    dashboard (PRD 0020), which calls it from inside a `curses.endwin
+    → … → stdscr.refresh()` handoff so the curses surface gets out
+    of the terminal's way while claude has it."""
+    info(
+        "attaching interactive claude session "
+        "(Ctrl-D or 'exit' to leave; container will be removed)"
+    )
+    claude_args = ["--dangerously-skip-permissions"]
+    if remote_control:
+        claude_args.append("--remote-control")
+    return bottle.exec_claude(claude_args, tty=True)


-def _capture_session_state(identity: str, exit_code: int) -> None:
+def capture_session_state(identity: str, exit_code: int) -> None:
    """Inside the launch context, while the container is still
    alive: snapshot the transcript and mark for preservation if
-    claude crashed. Pure-function-ish; tests stub the helpers."""
+    claude crashed. Public for the dashboard's death-handling path
+    (PRD 0020 open question 3)."""
    if not identity:
        return
    snapshot_transcript(identity)
@@ -125,7 +123,11 @@ def _capture_session_state(identity: str, exit_code: int) -> None:
        mark_preserved(identity)


-def _settle_state(identity: str) -> None:
+def settle_state(identity: str) -> None:
+    """Post-teardown housekeeping: print the resume hint if the
+    state was preserved, otherwise reap the per-bottle state dir.
+    Public so the dashboard's explicit-stop path calls the same
+    settlement the CLI uses on context exit."""
    if not identity:
        return
    if is_preserved(identity):
@@ -140,3 +142,68 @@ def _identity_from_plan(plan: object) -> str:
    identity attribute; for now we duck-type to keep this layer
    backend-agnostic."""
    return getattr(plan, "slug", "")
+
+
+def _text_prompt_yes() -> bool:
+    """Default `prompt_yes` for CLI use: reads y/N from the
+    controlling tty via stderr prompt + tty-line read."""
+    sys.stderr.write("claude-bottle: launch this agent? [y/N] ")
+    sys.stderr.flush()
+    reply = read_tty_line()
+    return reply in ("y", "Y", "yes", "YES")
+
+
+def _text_render_preflight(*, remote_control: bool):
+    def _render(plan: DockerBottlePlan) -> None:
+        plan.print(remote_control=remote_control)
+    return _render
+
+
+def _launch_bottle(
+    spec: BottleSpec,
+    *,
+    dry_run: bool,
+    remote_control: bool,
+) -> int:
+    """Shared launch core for `start` and `resume`. Builds the plan,
+    prints / dry-runs / prompts as appropriate, brings the bottle up,
+    attaches claude, and prints the resume hint on session end."""
+    stage_dir = Path(tempfile.mkdtemp(prefix="claude-bottle-stage."))
+    identity = ""
+    try:
+        plan, identity = prepare_with_preflight(
+            spec,
+            stage_dir=stage_dir,
+            render_preflight=_text_render_preflight(remote_control=remote_control),
+            prompt_yes=_text_prompt_yes,
+            dry_run=dry_run,
+        )
+        if plan is None:
+            return 0
+
+        backend = get_bottle_backend()
+        with backend.launch(plan) as bottle:
+            exit_code = attach_claude(bottle, remote_control=remote_control)
+            info(
+                f"session ended (exit {exit_code}); "
+                f"container {bottle.name} will be removed"
+            )
+            # While the container is still alive: always snapshot the
+            # transcript and — if the agent exited non-zero — mark
+            # the state for preservation. Capability-block already
+            # did both before triggering teardown from the dashboard;
+            # this picks up crashes / Ctrl-Cs / OOM kills the same
+            # way. snapshot_transcript is best-effort so the
+            # capability-block path's prior snapshot isn't clobbered
+            # when the container is already gone.
+            capture_session_state(identity, exit_code)
+        return 0
+    finally:
+        # PRD 0018 chunk 2: prepare now writes the bottle's bind-mount
+        # sources under state/<slug>/. If we never reached the
+        # launch context (dry-run, preflight-N, prepare exception), or
+        # we did but nothing requested preservation, reap them along
+        # with everything else. `settle_state` subsumes the prior
+        # post-launch settlement and the new pre-launch cleanup.
+        settle_state(identity)
+        shutil.rmtree(stage_dir, ignore_errors=True)
@@ -0,0 +1,396 @@
+# PRD 0020: Start and attach to agents from inside the dashboard
+
+- **Status:** Draft
+- **Author:** didericis
+- **Created:** 2026-05-26
+
+## Summary
+
+Today the dashboard is read-only: it surfaces pending proposals
+and active agents (PRD 0019) but can't *start* an agent or
+*re-enter* one. The operator's path is split — they launch
+agents from one terminal (`./cli.py start <name>`), and watch
+them from another (`./cli.py dashboard`).
+
+This PRD collapses that split. The dashboard becomes the
+operator's single surface: pressing a key opens an agent picker,
+selecting one runs the existing prepare → preflight → launch
+flow inside a curses-friendly variant, and on yield drops to a
+full-screen `docker exec -it … claude` session (the "handoff"
+shape from `docs/research/claude-code-pane-in-dashboard.md`).
+When the operator exits claude, the dashboard re-renders with
+the now-running bottle visible in the agents pane.
+
+Crucially, the bottle's lifetime is decoupled from both the
+claude session AND the dashboard process. Exit claude → back to
+dashboard, bottle still running. Start another agent → two
+bottles up at once. Quit the dashboard → bottles continue
+running. Teardown is **always explicit**: the operator presses
+`x` on an agent, or runs `./cli.py cleanup` later.
+
+## Problem
+
+Two real frictions today:
+
+1. **Two terminals for one workflow.** The dashboard is the
+   right shape to *watch* agents — proposals queue, status
+   updates, operator-edit verbs — but it's the wrong shape to
+   *start* them. Today you open a second terminal for that. In
+   parallel use (3–5 bottles), the operator has 5+ terminals
+   open and the dashboard's "active agents" pane is hopelessly
+   behind reality because they just spawned three in a row.
+
+2. **`./cli.py start` ties the bottle to a single claude
+   session.** The start command's `ExitStack` brings the bottle
+   up, runs claude, and tears down on Ctrl-D — fine for a one-
+   shot session, wrong for "let me bounce in and out of this
+   bottle a few times while triaging proposals." Today the only
+   way to re-enter a bottle after exiting claude is to start a
+   fresh one and lose all in-bottle state.
+
+The dashboard already discovers active bottles, scopes
+operator-edit verbs to a selected agent (PRD 0019), and
+captures full-merged logs per bottle (PRD 0018). It already
+*wants* to be the primary surface. This PRD finishes that.
+
+## Goals / Success Criteria
+
+1. From inside `./cli.py dashboard`, pressing `n` (new) opens
+   an agent picker listing every agent defined in the manifest.
+   Selecting one runs `prepare → preflight → launch`.
+2. The preflight Y/N summary renders cleanly — either as a
+   curses modal or via `curses.endwin() → text-mode prompt
+   → restore`, matching the existing editor-flow pattern.
+3. On launch success, the dashboard performs a handoff (option
+   1 from the research doc): `curses.endwin()` → `docker exec
+   -it claude-bottle-<slug> claude --dangerously-skip-permissions`
+   → on exit, `stdscr.refresh()` and re-render with the new
+   bottle in the agents pane.
+4. The bottle's lifetime is owned by the dashboard process, NOT
+   by any single claude session. Exiting claude (Ctrl-D, `/exit`)
+   returns to the dashboard with the bottle still running. The
+   operator can start more agents and re-enter previous ones.
+5. Pressing Enter on a selected row in the agents pane re-
+   attaches to that agent's bottle via the same handoff — drops
+   to full-screen claude, returns on exit.
+6. Pressing `x` (or similar — keybinding decided in design)
+   on a selected agent stops just that bottle (compose down +
+   state cleanup) without quitting the dashboard.
+7. Quitting the dashboard (`q`) leaves every running bottle
+   running. Bottle teardown is always explicit (per-bottle `x`
+   or `./cli.py cleanup`). The next `./cli.py dashboard`
+   invocation re-discovers them via `list_active_slugs()` and
+   surfaces re-attach for any it can reconstruct context for
+   (see "Cross-dashboard re-attach" below).
+
+## Non-goals
+
+- **A pane that hosts the claude TUI alongside proposals.** The
+  embedded-emulator option from the research doc is out of
+  scope. The handoff (option 1) is the v1; option 2 is a
+  separate PRD if and when handoff is observably insufficient.
+- **Adopting bottles started by an out-of-dashboard `./cli.py
+  start` invocation.** Those have their own ExitStack-owner and
+  the dashboard treats them as read-only-watch (already does
+  today). Re-attach only applies to bottles the *current
+  dashboard process* started.
+- **Resurrecting an out-of-process bottle into a new dashboard
+  with full re-attach.** A bottle started by `./cli.py start`
+  in another terminal — or by a previous dashboard run, now
+  exited — appears in the agents pane (already does, PRD 0019)
+  and can be re-attached via `docker exec -it claude` because
+  the agent container is still running `sleep infinity`. That's
+  in scope. What's *out* is anything that requires the launch-
+  context object to drive teardown — e.g., the
+  ExitStack-tracked CA + state cleanup `_settle_state` performs
+  today. Cross-dashboard re-attach uses the existing
+  `./cli.py cleanup` for teardown, not an `x` keypress (see
+  open questions).
+- **Multi-window UI.** Single curses window, two existing
+  panes (proposals + agents); the agent picker is a modal, not
+  a third pane.
+- **Removing `./cli.py start`.** Stays as the script-friendly /
+  legacy entry point. The dashboard is the new default.
+
+## Scope
+
+### In scope
+
+- Manifest-driven agent picker (curses modal): list view with
+  j/k navigation + Enter to confirm, Esc to abort.
+- Preflight rendering inside the dashboard's curses surface
+  (modal or drop-and-resume — picked in design).
+- A new `_dashboard_start_flow` that wraps prepare + preflight
+  + launch and returns a `DockerBottle` handle the dashboard
+  retains alongside its `pending` and `agents` lists.
+- A `bottles: dict[slug, DockerBottle]` map on the main loop
+  that owns every dashboard-launched handle. ExitStack tears
+  them all down on dashboard exit.
+- `Enter` on an agents-pane row → re-attach handoff (docker
+  exec -it claude into the existing container).
+- `x` (or similar) on an agents-pane row → explicit per-bottle
+  stop without quitting.
+- `q` (existing quit key) → tear down all dashboard-launched
+  bottles before returning.
+
+### Out of scope
+
+- Changes to `./cli.py start` itself. It keeps its current
+  shape; the dashboard reuses its internal pieces (backend.
+  prepare / backend.launch) without reaching through the CLI
+  layer.
+- Changes to `backend.launch`'s context-manager contract; the
+  dashboard's bottle map just holds the context-manager-yielded
+  Bottle and calls `__exit__` on quit / explicit stop.
+- New manifest fields. The picker reads what's already there.
+- Adopting non-dashboard bottles into the dashboard's owned set.
+
+## Proposed design
+
+### Bottle ownership
+
+Today's flow:
+
+```
+./cli.py start agent
+  └─ with backend.launch(plan) as bottle:        ← bottle alive while inside `with`
+       bottle.exec_claude([...], tty=True)       ← blocks until claude exits
+     # context exits → compose down → state cleanup
+```
+
+The proposed dashboard-driven flow:
+
+```
+./cli.py dashboard
+  └─ bottles: dict[str, tuple[ContextManager, DockerBottle]] = {}
+
+     # operator presses `n`, picks agent
+     cm = backend.launch(plan)
+     bottle = cm.__enter__()                     ← enter but don't bind to a `with`
+     bottles[plan.slug] = (cm, bottle)
+
+     # operator interacts via:
+     curses.endwin()
+     bottle.exec_claude([...], tty=True)         ← blocks; returns on Ctrl-D
+     stdscr.refresh()
+     # bottle is STILL ALIVE — only the claude process exited
+
+     # ... operator presses `x` on selected agent:
+     cm, _ = bottles.pop(slug)
+     cm.__exit__(None, None, None)               ← tears down just that one
+
+     # ... operator presses `q`:
+     return  # bottles dict still populated; no teardown
+```
+
+Two shifts:
+
+1. Bottles outlive any single claude session — the dashboard
+   manages enter/exit per bottle, not per attach. Exit claude
+   → still in the dashboard with the bottle running.
+2. Bottles outlive the dashboard process itself. Quitting the
+   dashboard does NOT close the context managers; the docker
+   compose project keeps running with the agent container in
+   `sleep infinity`. A subsequent dashboard invocation
+   re-discovers it via `docker compose ls` (PRD 0019's
+   `list_active_slugs`) and surfaces re-attach.
+
+   The trade-off: state cleanup that today runs in
+   `_settle_state` (transcript snapshot, preserve-marker
+   evaluation, state-dir reap) doesn't fire on a quit-while-
+   running bottle. It DOES fire when the operator explicitly
+   stops via `x`, because that calls `cm.__exit__`. For
+   bottles a previous dashboard quit on, `./cli.py cleanup`
+   is the path — its compose-down + state-reap logic
+   already covers the case.
+
+### Cross-dashboard re-attach
+
+When the dashboard discovers a bottle in `discover_active_agents`
+that it didn't itself start (a previous-dashboard or external
+`./cli.py start` bottle), Enter still attaches via `docker exec
+-it … claude` — the agent container is running `sleep infinity`
+exactly the same way regardless of who started it. The only
+thing the current dashboard lacks for those bottles is the
+launch-context object needed to drive a clean teardown via
+`x`.
+
+For v1 we surface this honestly: pressing `x` on a non-owned
+agent shows a status hint pointing at `./cli.py cleanup` (or
+`./cli.py cleanup` targeted at the slug if we add that flag
+later). The agent stays alive; the operator handles teardown
+out-of-band. Enter (re-attach) works for both owned and
+non-owned bottles.
+
+### Agent picker
+
+Pressing `n` opens a centered modal listing every agent name
+from `spec.manifest.agents`. j/k navigates; Enter selects; Esc
+aborts. Width is the longest name + bottle name + a column for
+"already running?" so the operator can see at a glance whether
+picking an agent starts a fresh one (different slug suffix) or
+not.
+
+```
+┌─ start agent ───────────────────────────┐
+│   implementer       dev      (running)  │
+│ > researcher        dev                 │
+│   triage-bot        sandbox             │
+└─ Enter: start  Esc: cancel ─────────────┘
+```
+
+Starting an agent that already has a running bottle is allowed
+— each `start` mints a fresh slug — but the picker surfaces the
+already-running state so the operator doesn't accidentally
+double-launch.
+
+### Preflight Y/N
+
+Two viable shapes:
+
+**Modal** — render the preflight summary lines (`agent / env /
+skills / bottle / git gate / egress`) in a centered curses
+modal with `[y/N]` at the bottom. Capture the next keypress.
+
+**Drop-and-resume** — `curses.endwin()`, print the preflight to
+stderr, read y/N from stdin, restore curses. Matches the
+editor-flow + handoff pattern; lower implementation cost.
+
+Lean toward **modal** for the y/N because it doesn't flash the
+terminal between dashboard frames. Drop-and-resume is acceptable
+if modal proves fiddly.
+
+### Re-attach (Enter on agent)
+
+Same handoff pattern the new-agent flow uses. For an agent the
+dashboard started this session, the dashboard holds the
+`DockerBottle` handle in its `bottles` dict and calls
+`bottle.exec_claude(...)`. For an agent it discovered via
+`list_active_slugs` (previous-dashboard or external start),
+the dashboard synthesizes a one-shot `DockerBottle` from the
+slug — container name is `claude-bottle-<slug>`, no prompt
+path because the agent's claude config already has `--append-
+system-prompt-file` baked in from the original launch —
+and runs the same exec. Either way, Enter drops to
+full-screen claude; on exit the dashboard re-renders.
+
+### Explicit per-bottle stop
+
+`x` on a dashboard-owned agent: pop the `(cm, bottle)` from
+the dict, call `cm.__exit__(None, None, None)` which drives
+the existing compose-down + state-settle logic. Refresh the
+agents pane.
+
+`x` on a non-owned agent (discovered via `list_active_slugs`
+but not in `bottles` dict): no-op with status hint pointing
+at `./cli.py cleanup` (the existing path that tears down
+ANY claude-bottle compose project plus reaps state dirs).
+
+### Dashboard quit
+
+`q` returns the dashboard process to 0 without touching any
+running bottles. The `bottles` dict goes out of scope but
+because the context managers' `__exit__` is never invoked,
+the `docker compose` project keeps running. The next dashboard
+invocation discovers the bottles via `list_active_slugs` and
+surfaces re-attach.
+
+This is a real departure from today's `./cli.py start`
+semantics (which couples bottle lifetime to the process via
+ExitStack). It's intentional: the dashboard is a watching +
+acting surface, not a lifetime owner.
+
+## Implementation chunks
+
+Sized for one PR each.
+
+1. **Refactor `_launch_bottle` so the launch + exec_claude
+   pieces are separable.** Today's `cli/start.py` runs both
+   inside one function. Extract `prepare_with_preflight(spec,
+   *, render_preflight, prompt_yes)` and `attach_claude(bottle,
+   *, remote_control)`. The CLI's existing one-shot use binds
+   them as before; the dashboard binds them with curses-aware
+   render + prompt callables. No behavior change.
+2. **Agent picker modal + new-agent flow.** New key `n` opens
+   the picker; `prepare_with_preflight` runs against the
+   selected agent; on Y, `backend.launch(plan)` enters the
+   dashboard's ExitStack; handoff invokes `attach_claude`.
+3. **Re-attach via Enter on owned agents-pane row.** Looks up
+   the slug in the dashboard's `bottles` map; if present →
+   handoff; else → status-line hint pointing at `./cli.py
+   resume`.
+4. **Explicit per-bottle stop (`x` keybinding).** Pop the
+   bottle's `close` callback off the stack, call it, refresh.
+5. **Quit-cleanup (`q`).** Hook `stack.close()` into the
+   normal return path. Document the "exiting dashboard tears
+   down every bottle it started" contract in `dashboard.py`'s
+   module docstring.
+
+## Resolved questions
+
+1. **Modal vs. drop-and-resume for preflight Y/N.** Resolved:
+   **modal.** Render the preflight lines centered in a curses
+   sub-window with `[y/N]` at the bottom; capture the next
+   keypress. If geometry proves fiddly during implementation
+   we'll fall back to drop-and-resume, but modal is the target.
+
+2. **Agent picker: text-filter typing.** Resolved: **yes,
+   include filter typing.** As the operator types, the list
+   filters to agents whose name matches (substring,
+   case-insensitive). j/k still navigates within the filtered
+   set; Esc clears the filter on first press, exits the picker
+   on the second.
+
+3. **Container-died-during-claude handling.** Keep the design
+   as drafted: transcript snapshot (`snapshot_transcript`) +
+   `mark_preserved` if exit code is non-zero + remove from
+   the `bottles` dict + status line `"claude session for
+   [slug] ended with exit N; preserved for resume"`. The
+   bottle's `cm.__exit__` would normally run on stop; here it
+   runs as part of the death-handling (the container is
+   already gone, but compose-down + state-settle still
+   sequence the network removal + state cleanup correctly).
+
+4. **Double-start of the same agent.** Allowed. The picker
+   surfaces a `(N running)` annotation next to any agent name
+   that already has live bottles in this dashboard's `bottles`
+   dict OR in `list_active_slugs()`, so the operator sees the
+   running-count before picking. Selecting an already-running
+   agent name mints a fresh slug for the new bottle as
+   normal.
+
+5. **Quit behavior.** Resolved: **`q` does NOT tear down any
+   bottles.** Dashboard exit is purely a UI exit; the
+   bottles dict goes out of scope without invoking `__exit__`,
+   so the `docker compose` projects keep running. Bottle
+   teardown is always explicit: per-bottle `x` (for
+   dashboard-owned), or `./cli.py cleanup` (for everything).
+
+## Open questions
+
+6. **Race between handoff and 1s refresh tick.** While the
+   dashboard's `stdscr.timeout` is set, a key press fires the
+   handoff and the dashboard sits in `docker exec` for minutes.
+   `discover_active_agents` / `discover_pending` don't poll
+   during that window — that's harmless on its own (the moment
+   we `stdscr.refresh()` after exec returns, the next loop
+   iter runs discovery and the panes reflect reality), but
+   it does mean: (a) proposals queued during the claude
+   session won't fire any operator notification until the
+   handoff ends, and (b) a bottle that died mid-claude won't
+   be detectable until the operator exits back to the
+   dashboard. Not blocking v1 — flagging as a known limitation
+   to revisit alongside the option-2 embedded-emulator path
+   from the research doc.
+
+## References
+
+- PRD 0018 — compose-per-instance lifecycle (the `backend.
+  launch` context-manager contract this PRD layers against)
+- PRD 0019 — active-agents pane + selection model (the
+  agents-pane row the re-attach + stop verbs hook into)
+- `docs/research/claude-code-pane-in-dashboard.md` — option 1
+  (handoff) is what `attach_claude` implements here; options 2
+  / 3 are out of scope for this PRD
+- `claude_bottle/cli/start.py:_launch_bottle` — the function
+  chunk 1 extracts the prepare + attach pieces out of
@@ -45,25 +45,25 @@ class TestCaptureSessionState(_FakeHomeMixin, unittest.TestCase):
        self._teardown_fake_home()

    def test_clean_exit_snapshots_but_does_not_mark(self):
-        start_mod._capture_session_state("dev-abc", exit_code=0)
+        start_mod.capture_session_state("dev-abc", exit_code=0)
        self.assertEqual(["dev-abc"], self._snap_calls)
        self.assertFalse(bottle_state.is_preserved("dev-abc"))

    def test_crash_snapshots_and_marks(self):
-        start_mod._capture_session_state("dev-abc", exit_code=137)
+        start_mod.capture_session_state("dev-abc", exit_code=137)
        self.assertEqual(["dev-abc"], self._snap_calls)
        self.assertTrue(bottle_state.is_preserved("dev-abc"))

    def test_ctrl_c_treated_as_crash(self):
        # SIGINT delivers exit 130; the operator may have Ctrl-C'd
        # because something went wrong, so we preserve.
-        start_mod._capture_session_state("dev-abc", exit_code=130)
+        start_mod.capture_session_state("dev-abc", exit_code=130)
        self.assertTrue(bottle_state.is_preserved("dev-abc"))

    def test_empty_identity_is_noop(self):
        # Backends without an identity field shouldn't crash this
        # path (the _identity_from_plan helper falls back to "").
-        start_mod._capture_session_state("", exit_code=137)
+        start_mod.capture_session_state("", exit_code=137)
        self.assertEqual([], self._snap_calls)


@@ -77,16 +77,16 @@ class TestSettleState(_FakeHomeMixin, unittest.TestCase):
    def test_preserved_state_survives(self):
        bottle_state.write_per_bottle_dockerfile("dev-abc", "FROM x\n")
        bottle_state.mark_preserved("dev-abc")
-        start_mod._settle_state("dev-abc")
+        start_mod.settle_state("dev-abc")
        self.assertTrue(bottle_state.bottle_state_dir("dev-abc").is_dir())

    def test_unpreserved_state_is_cleaned(self):
        bottle_state.write_per_bottle_dockerfile("dev-abc", "FROM x\n")
-        start_mod._settle_state("dev-abc")
+        start_mod.settle_state("dev-abc")
        self.assertFalse(bottle_state.bottle_state_dir("dev-abc").exists())

    def test_empty_identity_is_noop(self):
-        start_mod._settle_state("")  # should not raise
+        start_mod.settle_state("")  # should not raise


 if __name__ == "__main__":
@@ -256,6 +256,73 @@ class TestSelectionStatus(unittest.TestCase):
        self.assertEqual("[no agent selected]", s)


+class TestFilterAgents(unittest.TestCase):
+    """Pure-function picker filter (PRD 0020 chunk 2). Curses-free
+    so we can exercise the substring + case-insensitivity rules
+    directly."""
+
+    NAMES = ["implementer", "researcher", "triage-bot", "ImplDeluxe"]
+
+    def test_empty_query_returns_all(self):
+        self.assertEqual(self.NAMES, dashboard._filter_agents("", self.NAMES))
+
+    def test_substring_match(self):
+        self.assertEqual(
+            ["implementer", "ImplDeluxe"],
+            dashboard._filter_agents("impl", self.NAMES),
+        )
+
+    def test_case_insensitive(self):
+        self.assertEqual(
+            ["implementer", "ImplDeluxe"],
+            dashboard._filter_agents("IMPL", self.NAMES),
+        )
+
+    def test_no_match_returns_empty(self):
+        self.assertEqual([], dashboard._filter_agents("zzz", self.NAMES))
+
+    def test_preserves_input_order(self):
+        # Filtering should never re-sort; the picker draws in the
+        # order the manifest exposed.
+        out = dashboard._filter_agents("e", ["beta", "alpha", "echo"])
+        self.assertEqual(["beta", "echo"], out)
+
+
+class TestRunningCounts(unittest.TestCase):
+    """Per-agent running-count surfaced in the picker so the
+    operator sees `(N running)` before picking. Counts come from
+    the dashboard's current `discover_active_agents` snapshot."""
+
+    def _agent(self, agent_name: str) -> dashboard.ActiveAgent:
+        return dashboard.ActiveAgent(
+            slug=f"{agent_name}-abc",
+            agent_name=agent_name,
+            started_at="",
+            services=(),
+        )
+
+    def test_empty_when_no_active_agents(self):
+        self.assertEqual({}, dashboard._running_counts({}, []))
+
+    def test_one_per_unique_agent_name(self):
+        agents = [self._agent("a"), self._agent("b"), self._agent("c")]
+        self.assertEqual(
+            {"a": 1, "b": 1, "c": 1},
+            dashboard._running_counts({}, agents),
+        )
+
+    def test_counts_collisions(self):
+        agents = [
+            self._agent("implementer"),
+            self._agent("implementer"),
+            self._agent("researcher"),
+        ]
+        self.assertEqual(
+            {"implementer": 2, "researcher": 1},
+            dashboard._running_counts({}, agents),
+        )
+
+
 class TestSelectedAgent(unittest.TestCase):
    """`_selected_agent` is what chunk 4's e/p key handlers use to
    decide whether to fire and which agent to target."""