Merge pull request 'docs(prd-0019): active agents in dashboard + agent-scoped edit verbs' (#38) from dashboard-active-agents into main

2026-05-26 01:14:15 -04:00
parent 6babfcc656 6e4a9f606f
commit 8636982e80
3 changed files with 490 additions and 0 deletions
@@ -27,8 +27,10 @@ from ..backend.docker.capability_apply import (
    CapabilityApplyError,
    apply_capability_change,
 )
+from ..backend.docker.bottle_state import read_metadata
 from ..backend.docker.compose import (
    COMPOSE_PROJECT_PREFIX,
+    compose_project_name,
    list_active_slugs,
 )
 from ..backend.docker.egress_apply import (
@@ -119,6 +121,79 @@ def _discover_active_with_service(service: str) -> list[str]:
    return sorted(set(out))


+@dataclass(frozen=True)
+class ActiveAgent:
+    """One running bottle, as the agents pane displays it (PRD
+    0019). `services` is the set of sidecar service names
+    currently up for this bottle, used to gate which edit verbs
+    apply (no `egress` → `routes edit` is meaningless)."""
+
+    slug: str
+    agent_name: str           # from metadata.json; "?" if missing
+    started_at: str           # ISO 8601 from metadata.json; "" if missing
+    services: tuple[str, ...]  # alphabetical, e.g. ("egress", "pipelock", "supervise")
+
+
+def _parse_services_by_project(stdout: str) -> dict[str, set[str]]:
+    """Parse `docker ps` output formatted as
+    `<project-label>\\t<service-label>` (one line per container)
+    into a `{project: {service, ...}}` mapping. Pure function for
+    testing — the docker invocation is in the caller."""
+    out: dict[str, set[str]] = {}
+    for line in stdout.splitlines():
+        project, _, service = line.partition("\t")
+        if not project or not service:
+            continue
+        out.setdefault(project, set()).add(service)
+    return out
+
+
+def _query_services_by_project() -> dict[str, set[str]]:
+    """One `docker ps` call → `{project: {service, ...}}`. PRD
+    0019 open question #1 picked this shape over per-bottle
+    `compose ps` calls — for hosts with N bottles, this is one
+    subprocess instead of N per refresh tick."""
+    try:
+        r = subprocess.run(
+            [
+                "docker", "ps",
+                "--filter", "label=com.docker.compose.project",
+                "--format",
+                '{{.Label "com.docker.compose.project"}}'
+                "\t"
+                '{{.Label "com.docker.compose.service"}}',
+            ],
+            capture_output=True, text=True, check=False,
+        )
+    except FileNotFoundError:
+        return {}
+    if r.returncode != 0:
+        return {}
+    return _parse_services_by_project(r.stdout or "")
+
+
+def discover_active_agents() -> list[ActiveAgent]:
+    """All currently-running claude-bottle compose projects with
+    their metadata + service set. Returns [] when docker isn't
+    reachable. PRD 0019."""
+    slugs = list_active_slugs()
+    if not slugs:
+        return []
+    services_by_project = _query_services_by_project()
+    out: list[ActiveAgent] = []
+    for slug in slugs:
+        project = compose_project_name(slug)
+        services = services_by_project.get(project, set())
+        metadata = read_metadata(slug)
+        out.append(ActiveAgent(
+            slug=slug,
+            agent_name=metadata.agent_name if metadata else "?",
+            started_at=metadata.started_at if metadata else "",
+            services=tuple(sorted(services)),
+        ))
+    return out
+
+
 def discover_egress_slugs() -> list[str]:
    """Slugs of bottles with a running egress sidecar. Used by
    the operator-initiated `routes edit` verb."""
@@ -0,0 +1,239 @@
+# PRD 0019: Active agents in the dashboard, agent-scoped edit verbs
+
+- **Status:** Draft
+- **Author:** didericis
+- **Created:** 2026-05-26
+
+## Summary
+
+The dashboard today is proposal-centric: it lists every pending
+supervise tool call across every running bottle and lets the
+operator approve / modify / reject from one place. The operator-
+initiated `routes edit` (`e`) and `pipelock edit` (`p`) verbs are
+*global* — they discover every running sidecar of that kind and
+prompt for which bottle to edit if more than one is up.
+
+This PRD adds a first-class "active agents" view to the dashboard
+and reshapes the edit verbs to be **agent-scoped**: the operator
+picks an agent, then `e` / `p` (and any future per-agent verbs)
+target that agent without a separate prompt.
+
+After this PRD the dashboard answers two questions in one screen:
+
+  1. What's queued for me to act on? (existing proposals view)
+  2. What's currently running, and what would I act on if I
+     wanted to push a config edit without an agent prompt?
+
+## Problem
+
+Two rough edges in the current dashboard:
+
+1. **No visibility into what's actually running.** The dashboard
+   shows only pending proposals. If no agent has called a tool,
+   the screen reads "no pending proposals" — even when five
+   bottles are quietly working. The operator has to `docker
+   compose ls` (or `./cli.py cleanup -n` to see the y/N preview)
+   to find out what's actually live.
+
+2. **`e` / `p` re-discover-and-disambiguate every invocation.**
+   Today each press of `e` runs `discover_egress_slugs()`, finds
+   the running egress sidecars, and prompts if there's more than
+   one. The prompt interrupts the keyboard flow — and once the
+   operator picks a bottle, there's no carry-over to the next
+   edit. Editing pipelock for the same bottle right after is
+   another prompt.
+
+The proposal-centric design is fine for the "agent triggered a
+remediation" case but flips the relationship the wrong way for
+the "operator wants to make an unprompted change" case.
+
+## Goals / Success Criteria
+
+1. The dashboard's main screen shows two lists: pending proposals
+   (above) and active agents (below) — both visible at once, no
+   tab / mode switch.
+2. Each active-agent row shows enough for the operator to
+   recognize the bottle at a glance: identity (slug),
+   agent_name (from metadata.json), started_at, and which
+   sidecars are up.
+3. The operator can select an agent row with `j` / `k` /
+   arrow keys (the same nav keys already in use for proposals),
+   with a clear keystroke that swaps the active list (e.g.,
+   `Tab` toggles which list `j` / `k` moves through).
+4. Pressing `e` (routes edit) or `p` (pipelock edit) with an
+   agent selected targets that agent. No disambiguation prompt;
+   no global discover.
+5. Pressing `e` / `p` with NO agent selected is a no-op (status
+   line surfaces "no agent selected"). The global discover-
+   and-prompt path comes out — selection in the agents pane is
+   now the only way to scope an edit.
+6. The active-agents list refreshes on the same ~1s tick as the
+   proposals list so an agent starting / stopping is reflected
+   without operator action.
+
+## Non-goals
+
+- **Per-agent proposal filtering.** The proposals list stays
+  global across bottles. Filtering ("show me only this agent's
+  proposals") might be a follow-up but isn't this PRD.
+- **Agent lifecycle from the dashboard.** Starting / stopping
+  agents stays in `./cli.py start` / `./cli.py cleanup`. The
+  dashboard reads state; it doesn't change it.
+- **Preserved-but-not-running bottles.** The active-agents list
+  is strictly "what's running now" (cross-referenced from
+  `docker compose ls`). Preserved state dirs without a live
+  project don't appear — `./cli.py resume <identity>` is the
+  path for those.
+- **A separate per-agent detail view.** The agent rows are
+  one-line summaries. Pressing Enter on a proposal still drops
+  into proposal-detail; we don't add an analogous agent-detail
+  screen in v1.
+- **Replacing the existing `--once` mode.** `dashboard --once`
+  stays a proposal-only listing. No active-agents output there
+  (different consumers — `--once` is for scripts; the agents
+  view is for the interactive TUI).
+
+## Scope
+
+### In scope
+
+- A new "active agents" pane in the curses TUI, rendered below
+  the proposals pane.
+- A discovery helper that returns `(slug, agent_name,
+  started_at, services_up)` per active compose project. Reads
+  agent_name + started_at from each project's `metadata.json`,
+  cross-references `docker compose ls` for the live list.
+- Tab-toggle selection state: which pane the cursor is in. `j`
+  / `k` / arrow keys move within that pane.
+- Rewire `_operator_edit_routes_flow` and
+  `_operator_edit_allowlist_flow` to require a slug from the
+  caller. The discover-and-prompt scaffolding (no-arg
+  discover + single-bottle shortcut + multi-bottle prompt)
+  comes out. The dashboard's key handlers pass the agents-pane
+  selection in directly, or no-op if nothing is selected.
+- Status-line indicator showing which agent is selected (or
+  "no agent selected" when in the proposals pane).
+- Tests for the new discovery helper.
+
+### Out of scope
+
+- Changes to proposal handling (`a` / `m` / `r` / Enter all
+  unchanged).
+- Changes to the queue-dir / supervise sidecar protocol.
+- New CLI surface beyond what's in `./cli.py dashboard`.
+- Touching the manifest, compose renderer, launch lifecycle.
+
+## Proposed design
+
+### Layout
+
+```
+claude-bottle dashboard  (3 pending, 2 active)
+─────────────────────────────────────────────────────────
+proposals:
+  03:14:22  [implementer-cy7a6]  egress-block         abc123…
+  03:13:55  [researcher-9xqs1]   pipelock-block       def456…
+  03:13:10  [implementer-cy7a6]  capability-block     ghi789…
+
+active agents:
+> implementer-cy7a6  implementer   started 02:55:01  [pipelock,egress,git-gate,supervise]
+  researcher-9xqs1   researcher    started 02:58:14  [pipelock,supervise]
+
+[selected: implementer-cy7a6]  q quit  Tab switch  j/k nav  e routes  p pipelock  a/m/r/Enter
+```
+
+- One screen, two lists. Header counts both totals.
+- A `>` cursor and reverse-video highlight mark the currently
+  selected row in the active pane.
+- Status footer carries `[selected: <slug>]` (or `[no agent
+  selected]`) so it's always clear what `e` / `p` will target.
+
+### Selection model
+
+- `Tab` (or Shift-Tab) toggles which pane `j` / `k` /
+  arrow keys move through.
+- Each pane keeps its own selection index. Switching panes
+  doesn't lose the position in the other.
+- `e` / `p`:
+  - An agent is selected (cursor in the agents pane on a row)
+    → use that agent's slug.
+  - Otherwise → no-op with a status-line "no agent selected".
+    The pre-PRD global discover-and-prompt code paths come out
+    of `_operator_edit_routes_flow` and
+    `_operator_edit_allowlist_flow`.
+
+### Active-agent discovery
+
+A new helper `discover_active_agents()` in dashboard.py
+returns a list of `ActiveAgent(slug, agent_name, started_at,
+services)`:
+
+  1. `list_active_slugs()` (already in
+     `backend/docker/compose.py`) → list of slugs.
+  2. For each slug: read `state/<slug>/metadata.json` →
+     `agent_name`, `started_at`.
+  3. For each slug: `docker compose -p <project> ps --format
+     json` → set of running service names.
+
+Step 3 is the part that's per-bottle and could be slow on
+hosts with many bottles. Open question below.
+
+### Implementation chunks
+
+Sized small.
+
+1. **Discovery helper + dataclass.** Pure-ish: takes
+   `list_active_slugs()` as injected, reads metadata + queries
+   compose ps. Unit-test with mocked subprocess. No UI yet.
+2. **Render the agents pane.** Wire `discover_active_agents`
+   into `_main_loop`'s tick, render below proposals, no
+   selection model yet (cursor stays in proposals).
+3. **Selection state + Tab toggle.** Add the `which_pane`
+   variable, route `j/k/arrow` based on it, status footer.
+4. **Agent-scoped `e` / `p`.** Pass selected slug into the
+   edit flows when the agents pane is focused; keep today's
+   global behavior when the proposals pane is focused.
+
+## Open questions
+
+1. **`compose ps` per bottle: too slow?** On a host with
+   10+ active bottles, calling `docker compose -p <X> ps` per
+   project on every 1s tick is 10+ subprocess calls per
+   second. Options: (a) cache the services list and refresh
+   on a slower cadence (e.g., every 5s); (b) skip the
+   per-bottle services column and just show the slug + agent
+   name; (c) one `docker ps --filter label=...` call that
+   buckets containers by `com.docker.compose.project` label.
+   Probably (c) — one call, no per-bottle fanout.
+
+2. **What if `metadata.json` is missing or stale?** For a
+   bottle started by pre-chunk-3 code (no `compose_project`
+   field), or a state dir written by a tool we don't know
+   about, the metadata read can fail. Render with
+   `agent_name = ?` rather than dropping the row.
+
+3. **Selection persistence across refresh ticks.** If the
+   currently-selected agent is no longer running (it exited
+   between ticks), the selection should fall back to the
+   previous row, not jump to the top. Mirrors the existing
+   proposals-list behavior.
+
+4. **Color / highlight for the selected agent.** The proposals
+   pane uses green for newly-arrived. Agents could use a
+   different attribute (e.g., reverse video for selection,
+   no color for the row itself). Aesthetic decision; pick
+   something readable in the standard 8-color palette.
+
+5. **Selecting a proposal cross-selects its agent?** Possible
+   UX: highlighting a proposal in the proposals pane could
+   auto-move the agents-pane cursor to that proposal's
+   bottle. Cute, but probably confusing — the explicit Tab
+   toggle is clearer. Out of v1.
+
+## References
+
+- PRD 0013 — supervise sidecar (proposals + queue)
+- PRD 0014 / 0015 / 0016 — the apply flows the edit verbs
+  drive
+- PRD 0018 — compose-per-instance; `list_active_slugs` +
+  metadata.json source-of-truth
@@ -0,0 +1,176 @@
+"""Unit: dashboard.discover_active_agents (PRD 0019 chunk 1).
+
+The full discover function fans out to `docker compose ls`, `docker
+ps`, and per-bottle metadata.json reads — too much for a unit test.
+Tests split into:
+
+  - Parser tests for `_parse_services_by_project`: pure function, no
+    I/O, deterministic on its input string.
+  - Integration-shaped tests that monkeypatch the slug list +
+    services map and read metadata from a fake home, then assert
+    the assembled `ActiveAgent` shape.
+
+The actual `docker ps` invocation is exercised by manual probing
+during development and the (real-docker) integration tests; here
+we lock down the shape contract so a regression surfaces in unit CI.
+"""
+
+from __future__ import annotations
+
+import tempfile
+import unittest
+from pathlib import Path
+
+from claude_bottle import supervise
+from claude_bottle.backend.docker import bottle_state
+from claude_bottle.cli import dashboard
+
+
+class TestParseServicesByProject(unittest.TestCase):
+    def test_empty_input(self):
+        self.assertEqual({}, dashboard._parse_services_by_project(""))
+
+    def test_one_container(self):
+        out = dashboard._parse_services_by_project(
+            "claude-bottle-dev-abc\tegress\n"
+        )
+        self.assertEqual({"claude-bottle-dev-abc": {"egress"}}, out)
+
+    def test_multiple_services_per_project(self):
+        out = dashboard._parse_services_by_project(
+            "claude-bottle-dev-abc\tegress\n"
+            "claude-bottle-dev-abc\tpipelock\n"
+            "claude-bottle-dev-abc\tsupervise\n"
+        )
+        self.assertEqual(
+            {"claude-bottle-dev-abc": {"egress", "pipelock", "supervise"}},
+            out,
+        )
+
+    def test_multiple_projects(self):
+        out = dashboard._parse_services_by_project(
+            "proj-a\tegress\n"
+            "proj-b\tpipelock\n"
+            "proj-a\tsupervise\n"
+        )
+        self.assertEqual(
+            {"proj-a": {"egress", "supervise"}, "proj-b": {"pipelock"}},
+            out,
+        )
+
+    def test_skips_lines_missing_either_field(self):
+        # Defends against unlabeled containers slipping into the
+        # output (the filter should prevent it, but be robust).
+        out = dashboard._parse_services_by_project(
+            "claude-bottle-dev-abc\tegress\n"
+            "no-tab-here\n"
+            "\tmissing-project\n"
+            "missing-service\t\n"
+        )
+        self.assertEqual({"claude-bottle-dev-abc": {"egress"}}, out)
+
+
+class _FakeHomeMixin:
+    def _setup_fake_home(self) -> None:
+        self._tmp = tempfile.TemporaryDirectory(prefix="dashboard-aa-test.")
+        original = supervise.claude_bottle_root
+
+        def fake_root() -> Path:
+            return Path(self._tmp.name) / ".claude-bottle"
+
+        supervise.claude_bottle_root = fake_root  # type: ignore[assignment]
+        self._restore_home = lambda: setattr(supervise, "claude_bottle_root", original)
+
+    def _teardown_fake_home(self) -> None:
+        self._restore_home()
+        self._tmp.cleanup()
+
+
+class TestDiscoverActiveAgents(_FakeHomeMixin, unittest.TestCase):
+    def setUp(self) -> None:
+        self._setup_fake_home()
+        self._orig_slugs = dashboard.list_active_slugs
+        self._orig_services = dashboard._query_services_by_project
+
+    def tearDown(self) -> None:
+        dashboard.list_active_slugs = self._orig_slugs
+        dashboard._query_services_by_project = self._orig_services
+        self._teardown_fake_home()
+
+    def _stub(self, slugs: list[str], services_by_project: dict[str, set[str]]) -> None:
+        dashboard.list_active_slugs = lambda: slugs
+        dashboard._query_services_by_project = lambda: services_by_project
+
+    def test_no_active_slugs_returns_empty(self):
+        self._stub([], {})
+        self.assertEqual([], dashboard.discover_active_agents())
+
+    def test_assembles_from_metadata_and_services(self):
+        bottle_state.write_metadata(bottle_state.BottleMetadata(
+            identity="dev-abc",
+            agent_name="implementer",
+            cwd="",
+            copy_cwd=False,
+            started_at="2026-05-26T03:00:00+00:00",
+            compose_project="claude-bottle-dev-abc",
+        ))
+        self._stub(
+            ["dev-abc"],
+            {"claude-bottle-dev-abc": {"pipelock", "egress", "supervise"}},
+        )
+        agents = dashboard.discover_active_agents()
+        self.assertEqual(1, len(agents))
+        a = agents[0]
+        self.assertEqual("dev-abc", a.slug)
+        self.assertEqual("implementer", a.agent_name)
+        self.assertEqual("2026-05-26T03:00:00+00:00", a.started_at)
+        self.assertEqual(("egress", "pipelock", "supervise"), a.services)
+
+    def test_missing_metadata_renders_question_mark(self):
+        # State dir doesn't exist for this slug — agent_name falls
+        # back to "?" rather than dropping the row.
+        self._stub(["mystery-zzz"], {"claude-bottle-mystery-zzz": {"pipelock"}})
+        agents = dashboard.discover_active_agents()
+        self.assertEqual(1, len(agents))
+        self.assertEqual("?", agents[0].agent_name)
+        self.assertEqual("", agents[0].started_at)
+        self.assertEqual(("pipelock",), agents[0].services)
+
+    def test_no_services_for_project_yields_empty_tuple(self):
+        # Race window between `compose up` returning and the actual
+        # containers being listed in `docker ps` — render the row
+        # but with no services.
+        bottle_state.write_metadata(bottle_state.BottleMetadata(
+            identity="warming-up",
+            agent_name="researcher",
+            cwd="",
+            copy_cwd=False,
+            started_at="2026-05-26T03:05:00+00:00",
+            compose_project="claude-bottle-warming-up",
+        ))
+        self._stub(["warming-up"], {})
+        agents = dashboard.discover_active_agents()
+        self.assertEqual((), agents[0].services)
+
+    def test_preserves_slug_order(self):
+        for slug in ("z-1", "a-1", "m-1"):
+            bottle_state.write_metadata(bottle_state.BottleMetadata(
+                identity=slug,
+                agent_name=slug.split("-")[0],
+                cwd="",
+                copy_cwd=False,
+                started_at="t",
+                compose_project=f"claude-bottle-{slug}",
+            ))
+        # list_active_slugs returns sorted; preserve that order in
+        # the output.
+        self._stub(["a-1", "m-1", "z-1"], {})
+        agents = dashboard.discover_active_agents()
+        self.assertEqual(
+            ["a-1", "m-1", "z-1"],
+            [a.slug for a in agents],
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()