Merge pull request 'docs(prd-0019): active agents in dashboard + agent-scoped edit verbs' (#38) from dashboard-active-agents into main
This commit was merged in pull request #38.
This commit is contained in:
@@ -27,8 +27,10 @@ from ..backend.docker.capability_apply import (
|
||||
CapabilityApplyError,
|
||||
apply_capability_change,
|
||||
)
|
||||
from ..backend.docker.bottle_state import read_metadata
|
||||
from ..backend.docker.compose import (
|
||||
COMPOSE_PROJECT_PREFIX,
|
||||
compose_project_name,
|
||||
list_active_slugs,
|
||||
)
|
||||
from ..backend.docker.egress_apply import (
|
||||
@@ -119,6 +121,79 @@ def _discover_active_with_service(service: str) -> list[str]:
|
||||
return sorted(set(out))
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ActiveAgent:
|
||||
"""One running bottle, as the agents pane displays it (PRD
|
||||
0019). `services` is the set of sidecar service names
|
||||
currently up for this bottle, used to gate which edit verbs
|
||||
apply (no `egress` → `routes edit` is meaningless)."""
|
||||
|
||||
slug: str
|
||||
agent_name: str # from metadata.json; "?" if missing
|
||||
started_at: str # ISO 8601 from metadata.json; "" if missing
|
||||
services: tuple[str, ...] # alphabetical, e.g. ("egress", "pipelock", "supervise")
|
||||
|
||||
|
||||
def _parse_services_by_project(stdout: str) -> dict[str, set[str]]:
|
||||
"""Parse `docker ps` output formatted as
|
||||
`<project-label>\\t<service-label>` (one line per container)
|
||||
into a `{project: {service, ...}}` mapping. Pure function for
|
||||
testing — the docker invocation is in the caller."""
|
||||
out: dict[str, set[str]] = {}
|
||||
for line in stdout.splitlines():
|
||||
project, _, service = line.partition("\t")
|
||||
if not project or not service:
|
||||
continue
|
||||
out.setdefault(project, set()).add(service)
|
||||
return out
|
||||
|
||||
|
||||
def _query_services_by_project() -> dict[str, set[str]]:
|
||||
"""One `docker ps` call → `{project: {service, ...}}`. PRD
|
||||
0019 open question #1 picked this shape over per-bottle
|
||||
`compose ps` calls — for hosts with N bottles, this is one
|
||||
subprocess instead of N per refresh tick."""
|
||||
try:
|
||||
r = subprocess.run(
|
||||
[
|
||||
"docker", "ps",
|
||||
"--filter", "label=com.docker.compose.project",
|
||||
"--format",
|
||||
'{{.Label "com.docker.compose.project"}}'
|
||||
"\t"
|
||||
'{{.Label "com.docker.compose.service"}}',
|
||||
],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
except FileNotFoundError:
|
||||
return {}
|
||||
if r.returncode != 0:
|
||||
return {}
|
||||
return _parse_services_by_project(r.stdout or "")
|
||||
|
||||
|
||||
def discover_active_agents() -> list[ActiveAgent]:
|
||||
"""All currently-running claude-bottle compose projects with
|
||||
their metadata + service set. Returns [] when docker isn't
|
||||
reachable. PRD 0019."""
|
||||
slugs = list_active_slugs()
|
||||
if not slugs:
|
||||
return []
|
||||
services_by_project = _query_services_by_project()
|
||||
out: list[ActiveAgent] = []
|
||||
for slug in slugs:
|
||||
project = compose_project_name(slug)
|
||||
services = services_by_project.get(project, set())
|
||||
metadata = read_metadata(slug)
|
||||
out.append(ActiveAgent(
|
||||
slug=slug,
|
||||
agent_name=metadata.agent_name if metadata else "?",
|
||||
started_at=metadata.started_at if metadata else "",
|
||||
services=tuple(sorted(services)),
|
||||
))
|
||||
return out
|
||||
|
||||
|
||||
def discover_egress_slugs() -> list[str]:
|
||||
"""Slugs of bottles with a running egress sidecar. Used by
|
||||
the operator-initiated `routes edit` verb."""
|
||||
|
||||
@@ -0,0 +1,239 @@
|
||||
# PRD 0019: Active agents in the dashboard, agent-scoped edit verbs
|
||||
|
||||
- **Status:** Draft
|
||||
- **Author:** didericis
|
||||
- **Created:** 2026-05-26
|
||||
|
||||
## Summary
|
||||
|
||||
The dashboard today is proposal-centric: it lists every pending
|
||||
supervise tool call across every running bottle and lets the
|
||||
operator approve / modify / reject from one place. The operator-
|
||||
initiated `routes edit` (`e`) and `pipelock edit` (`p`) verbs are
|
||||
*global* — they discover every running sidecar of that kind and
|
||||
prompt for which bottle to edit if more than one is up.
|
||||
|
||||
This PRD adds a first-class "active agents" view to the dashboard
|
||||
and reshapes the edit verbs to be **agent-scoped**: the operator
|
||||
picks an agent, then `e` / `p` (and any future per-agent verbs)
|
||||
target that agent without a separate prompt.
|
||||
|
||||
After this PRD the dashboard answers two questions in one screen:
|
||||
|
||||
1. What's queued for me to act on? (existing proposals view)
|
||||
2. What's currently running, and what would I act on if I
|
||||
wanted to push a config edit without an agent prompt?
|
||||
|
||||
## Problem
|
||||
|
||||
Two rough edges in the current dashboard:
|
||||
|
||||
1. **No visibility into what's actually running.** The dashboard
|
||||
shows only pending proposals. If no agent has called a tool,
|
||||
the screen reads "no pending proposals" — even when five
|
||||
bottles are quietly working. The operator has to `docker
|
||||
compose ls` (or `./cli.py cleanup -n` to see the y/N preview)
|
||||
to find out what's actually live.
|
||||
|
||||
2. **`e` / `p` re-discover-and-disambiguate every invocation.**
|
||||
Today each press of `e` runs `discover_egress_slugs()`, finds
|
||||
the running egress sidecars, and prompts if there's more than
|
||||
one. The prompt interrupts the keyboard flow — and once the
|
||||
operator picks a bottle, there's no carry-over to the next
|
||||
edit. Editing pipelock for the same bottle right after is
|
||||
another prompt.
|
||||
|
||||
The proposal-centric design is fine for the "agent triggered a
|
||||
remediation" case but flips the relationship the wrong way for
|
||||
the "operator wants to make an unprompted change" case.
|
||||
|
||||
## Goals / Success Criteria
|
||||
|
||||
1. The dashboard's main screen shows two lists: pending proposals
|
||||
(above) and active agents (below) — both visible at once, no
|
||||
tab / mode switch.
|
||||
2. Each active-agent row shows enough for the operator to
|
||||
recognize the bottle at a glance: identity (slug),
|
||||
agent_name (from metadata.json), started_at, and which
|
||||
sidecars are up.
|
||||
3. The operator can select an agent row with `j` / `k` /
|
||||
arrow keys (the same nav keys already in use for proposals),
|
||||
with a clear keystroke that swaps the active list (e.g.,
|
||||
`Tab` toggles which list `j` / `k` moves through).
|
||||
4. Pressing `e` (routes edit) or `p` (pipelock edit) with an
|
||||
agent selected targets that agent. No disambiguation prompt;
|
||||
no global discover.
|
||||
5. Pressing `e` / `p` with NO agent selected is a no-op (status
|
||||
line surfaces "no agent selected"). The global discover-
|
||||
and-prompt path comes out — selection in the agents pane is
|
||||
now the only way to scope an edit.
|
||||
6. The active-agents list refreshes on the same ~1s tick as the
|
||||
proposals list so an agent starting / stopping is reflected
|
||||
without operator action.
|
||||
|
||||
## Non-goals
|
||||
|
||||
- **Per-agent proposal filtering.** The proposals list stays
|
||||
global across bottles. Filtering ("show me only this agent's
|
||||
proposals") might be a follow-up but isn't this PRD.
|
||||
- **Agent lifecycle from the dashboard.** Starting / stopping
|
||||
agents stays in `./cli.py start` / `./cli.py cleanup`. The
|
||||
dashboard reads state; it doesn't change it.
|
||||
- **Preserved-but-not-running bottles.** The active-agents list
|
||||
is strictly "what's running now" (cross-referenced from
|
||||
`docker compose ls`). Preserved state dirs without a live
|
||||
project don't appear — `./cli.py resume <identity>` is the
|
||||
path for those.
|
||||
- **A separate per-agent detail view.** The agent rows are
|
||||
one-line summaries. Pressing Enter on a proposal still drops
|
||||
into proposal-detail; we don't add an analogous agent-detail
|
||||
screen in v1.
|
||||
- **Replacing the existing `--once` mode.** `dashboard --once`
|
||||
stays a proposal-only listing. No active-agents output there
|
||||
(different consumers — `--once` is for scripts; the agents
|
||||
view is for the interactive TUI).
|
||||
|
||||
## Scope
|
||||
|
||||
### In scope
|
||||
|
||||
- A new "active agents" pane in the curses TUI, rendered below
|
||||
the proposals pane.
|
||||
- A discovery helper that returns `(slug, agent_name,
|
||||
started_at, services_up)` per active compose project. Reads
|
||||
agent_name + started_at from each project's `metadata.json`,
|
||||
cross-references `docker compose ls` for the live list.
|
||||
- Tab-toggle selection state: which pane the cursor is in. `j`
|
||||
/ `k` / arrow keys move within that pane.
|
||||
- Rewire `_operator_edit_routes_flow` and
|
||||
`_operator_edit_allowlist_flow` to require a slug from the
|
||||
caller. The discover-and-prompt scaffolding (no-arg
|
||||
discover + single-bottle shortcut + multi-bottle prompt)
|
||||
comes out. The dashboard's key handlers pass the agents-pane
|
||||
selection in directly, or no-op if nothing is selected.
|
||||
- Status-line indicator showing which agent is selected (or
|
||||
"no agent selected" when in the proposals pane).
|
||||
- Tests for the new discovery helper.
|
||||
|
||||
### Out of scope
|
||||
|
||||
- Changes to proposal handling (`a` / `m` / `r` / Enter all
|
||||
unchanged).
|
||||
- Changes to the queue-dir / supervise sidecar protocol.
|
||||
- New CLI surface beyond what's in `./cli.py dashboard`.
|
||||
- Touching the manifest, compose renderer, launch lifecycle.
|
||||
|
||||
## Proposed design
|
||||
|
||||
### Layout
|
||||
|
||||
```
|
||||
claude-bottle dashboard (3 pending, 2 active)
|
||||
─────────────────────────────────────────────────────────
|
||||
proposals:
|
||||
03:14:22 [implementer-cy7a6] egress-block abc123…
|
||||
03:13:55 [researcher-9xqs1] pipelock-block def456…
|
||||
03:13:10 [implementer-cy7a6] capability-block ghi789…
|
||||
|
||||
active agents:
|
||||
> implementer-cy7a6 implementer started 02:55:01 [pipelock,egress,git-gate,supervise]
|
||||
researcher-9xqs1 researcher started 02:58:14 [pipelock,supervise]
|
||||
|
||||
[selected: implementer-cy7a6] q quit Tab switch j/k nav e routes p pipelock a/m/r/Enter
|
||||
```
|
||||
|
||||
- One screen, two lists. Header counts both totals.
|
||||
- A `>` cursor and reverse-video highlight mark the currently
|
||||
selected row in the active pane.
|
||||
- Status footer carries `[selected: <slug>]` (or `[no agent
|
||||
selected]`) so it's always clear what `e` / `p` will target.
|
||||
|
||||
### Selection model
|
||||
|
||||
- `Tab` (or Shift-Tab) toggles which pane `j` / `k` /
|
||||
arrow keys move through.
|
||||
- Each pane keeps its own selection index. Switching panes
|
||||
doesn't lose the position in the other.
|
||||
- `e` / `p`:
|
||||
- An agent is selected (cursor in the agents pane on a row)
|
||||
→ use that agent's slug.
|
||||
- Otherwise → no-op with a status-line "no agent selected".
|
||||
The pre-PRD global discover-and-prompt code paths come out
|
||||
of `_operator_edit_routes_flow` and
|
||||
`_operator_edit_allowlist_flow`.
|
||||
|
||||
### Active-agent discovery
|
||||
|
||||
A new helper `discover_active_agents()` in dashboard.py
|
||||
returns a list of `ActiveAgent(slug, agent_name, started_at,
|
||||
services)`:
|
||||
|
||||
1. `list_active_slugs()` (already in
|
||||
`backend/docker/compose.py`) → list of slugs.
|
||||
2. For each slug: read `state/<slug>/metadata.json` →
|
||||
`agent_name`, `started_at`.
|
||||
3. For each slug: `docker compose -p <project> ps --format
|
||||
json` → set of running service names.
|
||||
|
||||
Step 3 is the part that's per-bottle and could be slow on
|
||||
hosts with many bottles. Open question below.
|
||||
|
||||
### Implementation chunks
|
||||
|
||||
Sized small.
|
||||
|
||||
1. **Discovery helper + dataclass.** Pure-ish: takes
|
||||
`list_active_slugs()` as injected, reads metadata + queries
|
||||
compose ps. Unit-test with mocked subprocess. No UI yet.
|
||||
2. **Render the agents pane.** Wire `discover_active_agents`
|
||||
into `_main_loop`'s tick, render below proposals, no
|
||||
selection model yet (cursor stays in proposals).
|
||||
3. **Selection state + Tab toggle.** Add the `which_pane`
|
||||
variable, route `j/k/arrow` based on it, status footer.
|
||||
4. **Agent-scoped `e` / `p`.** Pass selected slug into the
|
||||
edit flows when the agents pane is focused; keep today's
|
||||
global behavior when the proposals pane is focused.
|
||||
|
||||
## Open questions
|
||||
|
||||
1. **`compose ps` per bottle: too slow?** On a host with
|
||||
10+ active bottles, calling `docker compose -p <X> ps` per
|
||||
project on every 1s tick is 10+ subprocess calls per
|
||||
second. Options: (a) cache the services list and refresh
|
||||
on a slower cadence (e.g., every 5s); (b) skip the
|
||||
per-bottle services column and just show the slug + agent
|
||||
name; (c) one `docker ps --filter label=...` call that
|
||||
buckets containers by `com.docker.compose.project` label.
|
||||
Probably (c) — one call, no per-bottle fanout.
|
||||
|
||||
2. **What if `metadata.json` is missing or stale?** For a
|
||||
bottle started by pre-chunk-3 code (no `compose_project`
|
||||
field), or a state dir written by a tool we don't know
|
||||
about, the metadata read can fail. Render with
|
||||
`agent_name = ?` rather than dropping the row.
|
||||
|
||||
3. **Selection persistence across refresh ticks.** If the
|
||||
currently-selected agent is no longer running (it exited
|
||||
between ticks), the selection should fall back to the
|
||||
previous row, not jump to the top. Mirrors the existing
|
||||
proposals-list behavior.
|
||||
|
||||
4. **Color / highlight for the selected agent.** The proposals
|
||||
pane uses green for newly-arrived. Agents could use a
|
||||
different attribute (e.g., reverse video for selection,
|
||||
no color for the row itself). Aesthetic decision; pick
|
||||
something readable in the standard 8-color palette.
|
||||
|
||||
5. **Selecting a proposal cross-selects its agent?** Possible
|
||||
UX: highlighting a proposal in the proposals pane could
|
||||
auto-move the agents-pane cursor to that proposal's
|
||||
bottle. Cute, but probably confusing — the explicit Tab
|
||||
toggle is clearer. Out of v1.
|
||||
|
||||
## References
|
||||
|
||||
- PRD 0013 — supervise sidecar (proposals + queue)
|
||||
- PRD 0014 / 0015 / 0016 — the apply flows the edit verbs
|
||||
drive
|
||||
- PRD 0018 — compose-per-instance; `list_active_slugs` +
|
||||
metadata.json source-of-truth
|
||||
@@ -0,0 +1,176 @@
|
||||
"""Unit: dashboard.discover_active_agents (PRD 0019 chunk 1).
|
||||
|
||||
The full discover function fans out to `docker compose ls`, `docker
|
||||
ps`, and per-bottle metadata.json reads — too much for a unit test.
|
||||
Tests split into:
|
||||
|
||||
- Parser tests for `_parse_services_by_project`: pure function, no
|
||||
I/O, deterministic on its input string.
|
||||
- Integration-shaped tests that monkeypatch the slug list +
|
||||
services map and read metadata from a fake home, then assert
|
||||
the assembled `ActiveAgent` shape.
|
||||
|
||||
The actual `docker ps` invocation is exercised by manual probing
|
||||
during development and the (real-docker) integration tests; here
|
||||
we lock down the shape contract so a regression surfaces in unit CI.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
from claude_bottle import supervise
|
||||
from claude_bottle.backend.docker import bottle_state
|
||||
from claude_bottle.cli import dashboard
|
||||
|
||||
|
||||
class TestParseServicesByProject(unittest.TestCase):
|
||||
def test_empty_input(self):
|
||||
self.assertEqual({}, dashboard._parse_services_by_project(""))
|
||||
|
||||
def test_one_container(self):
|
||||
out = dashboard._parse_services_by_project(
|
||||
"claude-bottle-dev-abc\tegress\n"
|
||||
)
|
||||
self.assertEqual({"claude-bottle-dev-abc": {"egress"}}, out)
|
||||
|
||||
def test_multiple_services_per_project(self):
|
||||
out = dashboard._parse_services_by_project(
|
||||
"claude-bottle-dev-abc\tegress\n"
|
||||
"claude-bottle-dev-abc\tpipelock\n"
|
||||
"claude-bottle-dev-abc\tsupervise\n"
|
||||
)
|
||||
self.assertEqual(
|
||||
{"claude-bottle-dev-abc": {"egress", "pipelock", "supervise"}},
|
||||
out,
|
||||
)
|
||||
|
||||
def test_multiple_projects(self):
|
||||
out = dashboard._parse_services_by_project(
|
||||
"proj-a\tegress\n"
|
||||
"proj-b\tpipelock\n"
|
||||
"proj-a\tsupervise\n"
|
||||
)
|
||||
self.assertEqual(
|
||||
{"proj-a": {"egress", "supervise"}, "proj-b": {"pipelock"}},
|
||||
out,
|
||||
)
|
||||
|
||||
def test_skips_lines_missing_either_field(self):
|
||||
# Defends against unlabeled containers slipping into the
|
||||
# output (the filter should prevent it, but be robust).
|
||||
out = dashboard._parse_services_by_project(
|
||||
"claude-bottle-dev-abc\tegress\n"
|
||||
"no-tab-here\n"
|
||||
"\tmissing-project\n"
|
||||
"missing-service\t\n"
|
||||
)
|
||||
self.assertEqual({"claude-bottle-dev-abc": {"egress"}}, out)
|
||||
|
||||
|
||||
class _FakeHomeMixin:
|
||||
def _setup_fake_home(self) -> None:
|
||||
self._tmp = tempfile.TemporaryDirectory(prefix="dashboard-aa-test.")
|
||||
original = supervise.claude_bottle_root
|
||||
|
||||
def fake_root() -> Path:
|
||||
return Path(self._tmp.name) / ".claude-bottle"
|
||||
|
||||
supervise.claude_bottle_root = fake_root # type: ignore[assignment]
|
||||
self._restore_home = lambda: setattr(supervise, "claude_bottle_root", original)
|
||||
|
||||
def _teardown_fake_home(self) -> None:
|
||||
self._restore_home()
|
||||
self._tmp.cleanup()
|
||||
|
||||
|
||||
class TestDiscoverActiveAgents(_FakeHomeMixin, unittest.TestCase):
|
||||
def setUp(self) -> None:
|
||||
self._setup_fake_home()
|
||||
self._orig_slugs = dashboard.list_active_slugs
|
||||
self._orig_services = dashboard._query_services_by_project
|
||||
|
||||
def tearDown(self) -> None:
|
||||
dashboard.list_active_slugs = self._orig_slugs
|
||||
dashboard._query_services_by_project = self._orig_services
|
||||
self._teardown_fake_home()
|
||||
|
||||
def _stub(self, slugs: list[str], services_by_project: dict[str, set[str]]) -> None:
|
||||
dashboard.list_active_slugs = lambda: slugs
|
||||
dashboard._query_services_by_project = lambda: services_by_project
|
||||
|
||||
def test_no_active_slugs_returns_empty(self):
|
||||
self._stub([], {})
|
||||
self.assertEqual([], dashboard.discover_active_agents())
|
||||
|
||||
def test_assembles_from_metadata_and_services(self):
|
||||
bottle_state.write_metadata(bottle_state.BottleMetadata(
|
||||
identity="dev-abc",
|
||||
agent_name="implementer",
|
||||
cwd="",
|
||||
copy_cwd=False,
|
||||
started_at="2026-05-26T03:00:00+00:00",
|
||||
compose_project="claude-bottle-dev-abc",
|
||||
))
|
||||
self._stub(
|
||||
["dev-abc"],
|
||||
{"claude-bottle-dev-abc": {"pipelock", "egress", "supervise"}},
|
||||
)
|
||||
agents = dashboard.discover_active_agents()
|
||||
self.assertEqual(1, len(agents))
|
||||
a = agents[0]
|
||||
self.assertEqual("dev-abc", a.slug)
|
||||
self.assertEqual("implementer", a.agent_name)
|
||||
self.assertEqual("2026-05-26T03:00:00+00:00", a.started_at)
|
||||
self.assertEqual(("egress", "pipelock", "supervise"), a.services)
|
||||
|
||||
def test_missing_metadata_renders_question_mark(self):
|
||||
# State dir doesn't exist for this slug — agent_name falls
|
||||
# back to "?" rather than dropping the row.
|
||||
self._stub(["mystery-zzz"], {"claude-bottle-mystery-zzz": {"pipelock"}})
|
||||
agents = dashboard.discover_active_agents()
|
||||
self.assertEqual(1, len(agents))
|
||||
self.assertEqual("?", agents[0].agent_name)
|
||||
self.assertEqual("", agents[0].started_at)
|
||||
self.assertEqual(("pipelock",), agents[0].services)
|
||||
|
||||
def test_no_services_for_project_yields_empty_tuple(self):
|
||||
# Race window between `compose up` returning and the actual
|
||||
# containers being listed in `docker ps` — render the row
|
||||
# but with no services.
|
||||
bottle_state.write_metadata(bottle_state.BottleMetadata(
|
||||
identity="warming-up",
|
||||
agent_name="researcher",
|
||||
cwd="",
|
||||
copy_cwd=False,
|
||||
started_at="2026-05-26T03:05:00+00:00",
|
||||
compose_project="claude-bottle-warming-up",
|
||||
))
|
||||
self._stub(["warming-up"], {})
|
||||
agents = dashboard.discover_active_agents()
|
||||
self.assertEqual((), agents[0].services)
|
||||
|
||||
def test_preserves_slug_order(self):
|
||||
for slug in ("z-1", "a-1", "m-1"):
|
||||
bottle_state.write_metadata(bottle_state.BottleMetadata(
|
||||
identity=slug,
|
||||
agent_name=slug.split("-")[0],
|
||||
cwd="",
|
||||
copy_cwd=False,
|
||||
started_at="t",
|
||||
compose_project=f"claude-bottle-{slug}",
|
||||
))
|
||||
# list_active_slugs returns sorted; preserve that order in
|
||||
# the output.
|
||||
self._stub(["a-1", "m-1", "z-1"], {})
|
||||
agents = dashboard.discover_active_agents()
|
||||
self.assertEqual(
|
||||
["a-1", "m-1", "z-1"],
|
||||
[a.slug for a in agents],
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user