refactor(forge): address PR #318 review — PR/Issue split, sqlite state, drop footer
Addresses the five review comments on PR #318: - Split PullRequest from Issue and add a dedicated read_pr method on Forge/ScopedForge/GiteaForge (a PR carries merge state an issue does not); is_pr_open now derives from read_pr. - Replace the JSON-file forge state with a thin swappable CRUD interface (ForgeStateStore) backed by SQLite (SqliteForgeStateStore) at ~/.bot-bottle/bot-bottle.db. - Remove the provenance footer (provenance.py + its test): a mutable, unsigned PR comment is not an audit record. - Reword the PRD: provenance is exposed via an API, not surfaced in the PR; document the Issue/PullRequest split and the SQLite store. pyright clean (whole repo), pylint 10/10, 38 forge/resume unit tests pass; no remaining refs to the removed provenance module or old JSON state API. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> Claude-Session: https://claude.ai/code/session_01WL77TgFxKbs3cidGMG9dz7
This commit is contained in:
@@ -20,7 +20,7 @@ import urllib.error
|
||||
import urllib.request
|
||||
from typing import Any
|
||||
|
||||
from ..forge.base import Comment, Forge, Issue
|
||||
from ..forge.base import Comment, Forge, Issue, PullRequest
|
||||
|
||||
# Bound every Gitea call: a hung instance must not stall the sidecar.
|
||||
_API_TIMEOUT_SECS = 30
|
||||
@@ -124,6 +124,16 @@ class GiteaForge(Forge):
|
||||
state=str(raw.get("state", "")),
|
||||
)
|
||||
|
||||
def read_pr(self, number: int) -> PullRequest:
|
||||
raw = self._client.get_pull(number)
|
||||
return PullRequest(
|
||||
number=int(raw.get("number", number)),
|
||||
title=str(raw.get("title", "")),
|
||||
body=str(raw.get("body", "") or ""),
|
||||
state=str(raw.get("state", "")),
|
||||
merged=bool(raw.get("merged", False)),
|
||||
)
|
||||
|
||||
def read_comments(self, number: int) -> list[Comment]:
|
||||
return [
|
||||
Comment(
|
||||
@@ -154,7 +164,7 @@ class GiteaForge(Forge):
|
||||
return None
|
||||
|
||||
def is_pr_open(self, number: int) -> bool:
|
||||
return self._client.get_pull(number).get("state") == "open"
|
||||
return self.read_pr(number).state == "open"
|
||||
|
||||
|
||||
def _read_error_body(exc: urllib.error.HTTPError) -> str:
|
||||
|
||||
@@ -2,26 +2,25 @@
|
||||
|
||||
The orchestrator tracks one record per forge-targeted issue so it can
|
||||
map an incoming webhook back to the bottle handling it, drive the
|
||||
freeze / rehydrate loop, and run the watchdog. State lives on disk and
|
||||
survives orchestrator restarts:
|
||||
freeze / rehydrate loop, and run the watchdog.
|
||||
|
||||
~/.bot-bottle/forge/<owner>/<repo>/issue-<n>.json
|
||||
|
||||
Writes are atomic (`os.replace`) so a crash mid-write never leaves a
|
||||
truncated record.
|
||||
State is stored in a local SQLite database in `~/.bot-bottle/`. Access
|
||||
goes through the thin `ForgeStateStore` CRUD interface so the backing
|
||||
store (location or engine) can be swapped without touching callers;
|
||||
`SqliteForgeStateStore` is the first implementation.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import abc
|
||||
import json
|
||||
import os
|
||||
from dataclasses import asdict, dataclass, field, fields
|
||||
from typing import Any
|
||||
import sqlite3
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
|
||||
from ...supervise import bot_bottle_root
|
||||
|
||||
_FORGE_SUBDIR = "forge"
|
||||
_DB_FILENAME = "bot-bottle.db"
|
||||
|
||||
# Lifecycle: a bottle is launched (running), frozen on the done signal,
|
||||
# and destroyed when the PR closes.
|
||||
@@ -46,60 +45,127 @@ class ForgeState:
|
||||
status: str = STATUS_RUNNING
|
||||
last_checkin_at: str = ""
|
||||
|
||||
def to_json(self) -> str:
|
||||
return json.dumps(asdict(self), indent=2, sort_keys=True)
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict[str, Any]) -> "ForgeState":
|
||||
# Tolerate unknown keys (forward-compat) by filtering to fields.
|
||||
known = {f.name for f in fields(cls)}
|
||||
return cls(**{k: v for k, v in data.items() if k in known})
|
||||
class ForgeStateStore(abc.ABC):
|
||||
"""Thin CRUD surface over forge state. Implementations back it with a
|
||||
concrete store; callers depend only on this interface so the storage
|
||||
location/engine is swappable."""
|
||||
|
||||
@abc.abstractmethod
|
||||
def upsert(self, state: ForgeState) -> None:
|
||||
"""Insert or replace the record keyed by (owner, repo, issue)."""
|
||||
|
||||
@abc.abstractmethod
|
||||
def get(self, owner: str, repo: str, issue_number: int) -> ForgeState | None:
|
||||
"""Fetch one record, or None when absent."""
|
||||
|
||||
@abc.abstractmethod
|
||||
def delete(self, owner: str, repo: str, issue_number: int) -> None:
|
||||
"""Remove a record. Missing is success (idempotent)."""
|
||||
|
||||
@abc.abstractmethod
|
||||
def all(self) -> list[ForgeState]:
|
||||
"""Every record, for the status table and the watchdog sweep."""
|
||||
|
||||
|
||||
def _forge_root() -> Path:
|
||||
return bot_bottle_root() / _FORGE_SUBDIR
|
||||
def default_db_path() -> Path:
|
||||
return bot_bottle_root() / _DB_FILENAME
|
||||
|
||||
|
||||
def forge_state_path(owner: str, repo: str, issue_number: int) -> Path:
|
||||
return _forge_root() / owner / repo / f"issue-{issue_number}.json"
|
||||
class SqliteForgeStateStore(ForgeStateStore):
|
||||
"""SQLite-backed `ForgeStateStore`. The database lives at
|
||||
`~/.bot-bottle/bot-bottle.db` by default; pass `db_path` to point at
|
||||
a different location (tests, alternate homes)."""
|
||||
|
||||
def __init__(self, db_path: Path | None = None) -> None:
|
||||
self._db_path = db_path or default_db_path()
|
||||
self._db_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with self._connect() as conn:
|
||||
conn.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS forge_state (
|
||||
owner TEXT NOT NULL,
|
||||
repo TEXT NOT NULL,
|
||||
issue_number INTEGER NOT NULL,
|
||||
slug TEXT NOT NULL,
|
||||
agent_name TEXT NOT NULL,
|
||||
bottle_names TEXT NOT NULL,
|
||||
backend_name TEXT NOT NULL,
|
||||
agent_git_user TEXT NOT NULL,
|
||||
pr_number INTEGER,
|
||||
status TEXT NOT NULL,
|
||||
last_checkin_at TEXT NOT NULL,
|
||||
PRIMARY KEY (owner, repo, issue_number)
|
||||
)
|
||||
"""
|
||||
)
|
||||
|
||||
def _connect(self) -> sqlite3.Connection:
|
||||
conn = sqlite3.connect(self._db_path)
|
||||
conn.row_factory = sqlite3.Row
|
||||
return conn
|
||||
|
||||
def upsert(self, state: ForgeState) -> None:
|
||||
with self._connect() as conn:
|
||||
conn.execute(
|
||||
"""
|
||||
INSERT OR REPLACE INTO forge_state (
|
||||
owner, repo, issue_number, slug, agent_name,
|
||||
bottle_names, backend_name, agent_git_user,
|
||||
pr_number, status, last_checkin_at
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(
|
||||
state.owner,
|
||||
state.repo,
|
||||
state.issue_number,
|
||||
state.slug,
|
||||
state.agent_name,
|
||||
json.dumps(state.bottle_names),
|
||||
state.backend_name,
|
||||
state.agent_git_user,
|
||||
state.pr_number,
|
||||
state.status,
|
||||
state.last_checkin_at,
|
||||
),
|
||||
)
|
||||
|
||||
def get(self, owner: str, repo: str, issue_number: int) -> ForgeState | None:
|
||||
with self._connect() as conn:
|
||||
row = conn.execute(
|
||||
"SELECT * FROM forge_state "
|
||||
"WHERE owner = ? AND repo = ? AND issue_number = ?",
|
||||
(owner, repo, issue_number),
|
||||
).fetchone()
|
||||
return _row_to_state(row) if row is not None else None
|
||||
|
||||
def delete(self, owner: str, repo: str, issue_number: int) -> None:
|
||||
with self._connect() as conn:
|
||||
conn.execute(
|
||||
"DELETE FROM forge_state "
|
||||
"WHERE owner = ? AND repo = ? AND issue_number = ?",
|
||||
(owner, repo, issue_number),
|
||||
)
|
||||
|
||||
def all(self) -> list[ForgeState]:
|
||||
with self._connect() as conn:
|
||||
rows = conn.execute(
|
||||
"SELECT * FROM forge_state ORDER BY owner, repo, issue_number"
|
||||
).fetchall()
|
||||
return [_row_to_state(row) for row in rows]
|
||||
|
||||
|
||||
def write_forge_state(state: ForgeState) -> None:
|
||||
"""Persist `state` atomically. Creates parent dirs as needed."""
|
||||
path = forge_state_path(state.owner, state.repo, state.issue_number)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
tmp = path.with_suffix(".json.tmp")
|
||||
tmp.write_text(state.to_json())
|
||||
os.replace(tmp, path)
|
||||
|
||||
|
||||
def read_forge_state(owner: str, repo: str, issue_number: int) -> ForgeState | None:
|
||||
"""Load state for one issue, or None when no record exists."""
|
||||
path = forge_state_path(owner, repo, issue_number)
|
||||
try:
|
||||
data = json.loads(path.read_text())
|
||||
except FileNotFoundError:
|
||||
return None
|
||||
return ForgeState.from_dict(data)
|
||||
|
||||
|
||||
def delete_forge_state(owner: str, repo: str, issue_number: int) -> None:
|
||||
"""Remove an issue's record. Missing file is success (idempotent)."""
|
||||
path = forge_state_path(owner, repo, issue_number)
|
||||
path.unlink(missing_ok=True)
|
||||
|
||||
|
||||
def all_forge_states() -> list[ForgeState]:
|
||||
"""Every persisted record, for the orchestrate-status table and the
|
||||
watchdog sweep. Unreadable files are skipped rather than aborting the
|
||||
whole listing."""
|
||||
root = _forge_root()
|
||||
if not root.is_dir():
|
||||
return []
|
||||
states: list[ForgeState] = []
|
||||
for path in sorted(root.glob("*/*/issue-*.json")):
|
||||
try:
|
||||
states.append(ForgeState.from_dict(json.loads(path.read_text())))
|
||||
except (OSError, ValueError, TypeError):
|
||||
continue
|
||||
return states
|
||||
def _row_to_state(row: sqlite3.Row) -> ForgeState:
|
||||
return ForgeState(
|
||||
owner=row["owner"],
|
||||
repo=row["repo"],
|
||||
issue_number=row["issue_number"],
|
||||
slug=row["slug"],
|
||||
agent_name=row["agent_name"],
|
||||
bottle_names=json.loads(row["bottle_names"]),
|
||||
backend_name=row["backend_name"],
|
||||
agent_git_user=row["agent_git_user"],
|
||||
pr_number=row["pr_number"],
|
||||
status=row["status"],
|
||||
last_checkin_at=row["last_checkin_at"],
|
||||
)
|
||||
|
||||
@@ -1,103 +0,0 @@
|
||||
"""Provenance footer (PRD forge-native-integration, chunk 5).
|
||||
|
||||
Every orchestrator-posted comment ends with this footer — non-optional
|
||||
and not configurable off. It renders the run's audit trail (agent,
|
||||
bottle, timing, exit, gitleaks, done-signal source, egress) as a
|
||||
collapsed markdown block the reviewer sees at the moment of the merge
|
||||
decision.
|
||||
|
||||
The function is pure: the orchestrator, which holds the run context,
|
||||
supplies the values. In particular `egress_routes` is the pre-rendered
|
||||
list of allowed-route lines the orchestrator computed from the run's
|
||||
resolved egress policy — this module does not parse backend-specific
|
||||
egress state. (The PRD sketch named an `egress_log_path`; passing the
|
||||
already-rendered lines keeps the footer builder pure and fully testable
|
||||
and leaves egress-state parsing where the data lives.)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
def _parse(ts: str) -> datetime | None:
|
||||
try:
|
||||
return datetime.fromisoformat(ts)
|
||||
except (ValueError, TypeError):
|
||||
return None
|
||||
|
||||
|
||||
def _format_duration(started_at: str, finished_at: str) -> str:
|
||||
start = _parse(started_at)
|
||||
end = _parse(finished_at)
|
||||
if start is None or end is None:
|
||||
return "unknown"
|
||||
secs = int((end - start).total_seconds())
|
||||
if secs < 0:
|
||||
return "unknown"
|
||||
if secs < 60:
|
||||
return f"{secs}s"
|
||||
return f"{secs // 60}m {secs % 60}s"
|
||||
|
||||
|
||||
def build_provenance_footer(
|
||||
slug: str,
|
||||
*,
|
||||
agent_name: str,
|
||||
bottle_names: tuple[str, ...],
|
||||
started_at: str,
|
||||
finished_at: str,
|
||||
exit_code: int,
|
||||
watchdog_fired: bool = False,
|
||||
gitleaks_clean: bool | None = None,
|
||||
egress_routes: list[str] | None = None,
|
||||
) -> str:
|
||||
"""Return a markdown string for appending to a Gitea comment body.
|
||||
|
||||
`watchdog_fired=True` marks runs where the agent did not signal
|
||||
completion, so reviewers know the audit trail may be incomplete.
|
||||
`gitleaks_clean=None` renders the gitleaks row as "not run".
|
||||
`egress_routes` is omitted entirely when None/empty.
|
||||
"""
|
||||
bottle_label = ", ".join(f"`{b}`" for b in bottle_names) if bottle_names else "—"
|
||||
exit_cell = f"{exit_code} {'✓' if exit_code == 0 else '✗'}"
|
||||
|
||||
if gitleaks_clean is None:
|
||||
gitleaks_cell = "— not run"
|
||||
elif gitleaks_clean:
|
||||
gitleaks_cell = "✓ no secrets detected"
|
||||
else:
|
||||
gitleaks_cell = "✗ secrets detected"
|
||||
|
||||
if watchdog_fired:
|
||||
done_cell = "watchdog — agent did not signal"
|
||||
else:
|
||||
done_cell = "sidecar `signal_done`"
|
||||
|
||||
lines = [
|
||||
"<details><summary>🔬 Run provenance</summary>",
|
||||
"",
|
||||
"| Field | Value |",
|
||||
"|---|---|",
|
||||
f"| agent | `{agent_name}` |",
|
||||
f"| bottle | {bottle_label} |",
|
||||
f"| slug | `{slug}` |",
|
||||
f"| started | {started_at} |",
|
||||
f"| duration | {_format_duration(started_at, finished_at)} |",
|
||||
f"| exit | {exit_cell} |",
|
||||
f"| gitleaks | {gitleaks_cell} |",
|
||||
f"| done signal | {done_cell} |",
|
||||
]
|
||||
|
||||
if egress_routes:
|
||||
lines.append("")
|
||||
lines.append(
|
||||
f"**Egress** (deny-by-default; {len(egress_routes)} "
|
||||
f"route{'s' if len(egress_routes) != 1 else ''} allowed)"
|
||||
)
|
||||
for route in egress_routes:
|
||||
lines.append(f"- {route}")
|
||||
|
||||
lines.append("")
|
||||
lines.append("</details>")
|
||||
return "\n".join(lines)
|
||||
Reference in New Issue
Block a user