refactor(forge): address PR #318 review — PR/Issue split, sqlite state, drop footer
lint / lint (push) Successful in 1m59s
test / unit (pull_request) Successful in 54s
test / integration (pull_request) Successful in 19s
test / coverage (pull_request) Successful in 1m4s

Addresses the five review comments on PR #318:

- Split PullRequest from Issue and add a dedicated read_pr method on
  Forge/ScopedForge/GiteaForge (a PR carries merge state an issue does
  not); is_pr_open now derives from read_pr.
- Replace the JSON-file forge state with a thin swappable CRUD interface
  (ForgeStateStore) backed by SQLite (SqliteForgeStateStore) at
  ~/.bot-bottle/bot-bottle.db.
- Remove the provenance footer (provenance.py + its test): a mutable,
  unsigned PR comment is not an audit record.
- Reword the PRD: provenance is exposed via an API, not surfaced in the
  PR; document the Issue/PullRequest split and the SQLite store.

pyright clean (whole repo), pylint 10/10, 38 forge/resume unit tests pass;
no remaining refs to the removed provenance module or old JSON state API.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01WL77TgFxKbs3cidGMG9dz7
This commit is contained in:
2026-07-01 08:37:25 -04:00
parent f211ece6bf
commit 42004d37fd
9 changed files with 332 additions and 432 deletions
+12 -2
View File
@@ -20,7 +20,7 @@ import urllib.error
import urllib.request
from typing import Any
from ..forge.base import Comment, Forge, Issue
from ..forge.base import Comment, Forge, Issue, PullRequest
# Bound every Gitea call: a hung instance must not stall the sidecar.
_API_TIMEOUT_SECS = 30
@@ -124,6 +124,16 @@ class GiteaForge(Forge):
state=str(raw.get("state", "")),
)
def read_pr(self, number: int) -> PullRequest:
raw = self._client.get_pull(number)
return PullRequest(
number=int(raw.get("number", number)),
title=str(raw.get("title", "")),
body=str(raw.get("body", "") or ""),
state=str(raw.get("state", "")),
merged=bool(raw.get("merged", False)),
)
def read_comments(self, number: int) -> list[Comment]:
return [
Comment(
@@ -154,7 +164,7 @@ class GiteaForge(Forge):
return None
def is_pr_open(self, number: int) -> bool:
return self._client.get_pull(number).get("state") == "open"
return self.read_pr(number).state == "open"
def _read_error_body(exc: urllib.error.HTTPError) -> str:
+126 -60
View File
@@ -2,26 +2,25 @@
The orchestrator tracks one record per forge-targeted issue so it can
map an incoming webhook back to the bottle handling it, drive the
freeze / rehydrate loop, and run the watchdog. State lives on disk and
survives orchestrator restarts:
freeze / rehydrate loop, and run the watchdog.
~/.bot-bottle/forge/<owner>/<repo>/issue-<n>.json
Writes are atomic (`os.replace`) so a crash mid-write never leaves a
truncated record.
State is stored in a local SQLite database in `~/.bot-bottle/`. Access
goes through the thin `ForgeStateStore` CRUD interface so the backing
store (location or engine) can be swapped without touching callers;
`SqliteForgeStateStore` is the first implementation.
"""
from __future__ import annotations
import abc
import json
import os
from dataclasses import asdict, dataclass, field, fields
from typing import Any
import sqlite3
from dataclasses import dataclass, field
from pathlib import Path
from ...supervise import bot_bottle_root
_FORGE_SUBDIR = "forge"
_DB_FILENAME = "bot-bottle.db"
# Lifecycle: a bottle is launched (running), frozen on the done signal,
# and destroyed when the PR closes.
@@ -46,60 +45,127 @@ class ForgeState:
status: str = STATUS_RUNNING
last_checkin_at: str = ""
def to_json(self) -> str:
return json.dumps(asdict(self), indent=2, sort_keys=True)
@classmethod
def from_dict(cls, data: dict[str, Any]) -> "ForgeState":
# Tolerate unknown keys (forward-compat) by filtering to fields.
known = {f.name for f in fields(cls)}
return cls(**{k: v for k, v in data.items() if k in known})
class ForgeStateStore(abc.ABC):
"""Thin CRUD surface over forge state. Implementations back it with a
concrete store; callers depend only on this interface so the storage
location/engine is swappable."""
@abc.abstractmethod
def upsert(self, state: ForgeState) -> None:
"""Insert or replace the record keyed by (owner, repo, issue)."""
@abc.abstractmethod
def get(self, owner: str, repo: str, issue_number: int) -> ForgeState | None:
"""Fetch one record, or None when absent."""
@abc.abstractmethod
def delete(self, owner: str, repo: str, issue_number: int) -> None:
"""Remove a record. Missing is success (idempotent)."""
@abc.abstractmethod
def all(self) -> list[ForgeState]:
"""Every record, for the status table and the watchdog sweep."""
def _forge_root() -> Path:
return bot_bottle_root() / _FORGE_SUBDIR
def default_db_path() -> Path:
return bot_bottle_root() / _DB_FILENAME
def forge_state_path(owner: str, repo: str, issue_number: int) -> Path:
return _forge_root() / owner / repo / f"issue-{issue_number}.json"
class SqliteForgeStateStore(ForgeStateStore):
"""SQLite-backed `ForgeStateStore`. The database lives at
`~/.bot-bottle/bot-bottle.db` by default; pass `db_path` to point at
a different location (tests, alternate homes)."""
def __init__(self, db_path: Path | None = None) -> None:
self._db_path = db_path or default_db_path()
self._db_path.parent.mkdir(parents=True, exist_ok=True)
with self._connect() as conn:
conn.execute(
"""
CREATE TABLE IF NOT EXISTS forge_state (
owner TEXT NOT NULL,
repo TEXT NOT NULL,
issue_number INTEGER NOT NULL,
slug TEXT NOT NULL,
agent_name TEXT NOT NULL,
bottle_names TEXT NOT NULL,
backend_name TEXT NOT NULL,
agent_git_user TEXT NOT NULL,
pr_number INTEGER,
status TEXT NOT NULL,
last_checkin_at TEXT NOT NULL,
PRIMARY KEY (owner, repo, issue_number)
)
"""
)
def _connect(self) -> sqlite3.Connection:
conn = sqlite3.connect(self._db_path)
conn.row_factory = sqlite3.Row
return conn
def upsert(self, state: ForgeState) -> None:
with self._connect() as conn:
conn.execute(
"""
INSERT OR REPLACE INTO forge_state (
owner, repo, issue_number, slug, agent_name,
bottle_names, backend_name, agent_git_user,
pr_number, status, last_checkin_at
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
(
state.owner,
state.repo,
state.issue_number,
state.slug,
state.agent_name,
json.dumps(state.bottle_names),
state.backend_name,
state.agent_git_user,
state.pr_number,
state.status,
state.last_checkin_at,
),
)
def get(self, owner: str, repo: str, issue_number: int) -> ForgeState | None:
with self._connect() as conn:
row = conn.execute(
"SELECT * FROM forge_state "
"WHERE owner = ? AND repo = ? AND issue_number = ?",
(owner, repo, issue_number),
).fetchone()
return _row_to_state(row) if row is not None else None
def delete(self, owner: str, repo: str, issue_number: int) -> None:
with self._connect() as conn:
conn.execute(
"DELETE FROM forge_state "
"WHERE owner = ? AND repo = ? AND issue_number = ?",
(owner, repo, issue_number),
)
def all(self) -> list[ForgeState]:
with self._connect() as conn:
rows = conn.execute(
"SELECT * FROM forge_state ORDER BY owner, repo, issue_number"
).fetchall()
return [_row_to_state(row) for row in rows]
def write_forge_state(state: ForgeState) -> None:
"""Persist `state` atomically. Creates parent dirs as needed."""
path = forge_state_path(state.owner, state.repo, state.issue_number)
path.parent.mkdir(parents=True, exist_ok=True)
tmp = path.with_suffix(".json.tmp")
tmp.write_text(state.to_json())
os.replace(tmp, path)
def read_forge_state(owner: str, repo: str, issue_number: int) -> ForgeState | None:
"""Load state for one issue, or None when no record exists."""
path = forge_state_path(owner, repo, issue_number)
try:
data = json.loads(path.read_text())
except FileNotFoundError:
return None
return ForgeState.from_dict(data)
def delete_forge_state(owner: str, repo: str, issue_number: int) -> None:
"""Remove an issue's record. Missing file is success (idempotent)."""
path = forge_state_path(owner, repo, issue_number)
path.unlink(missing_ok=True)
def all_forge_states() -> list[ForgeState]:
"""Every persisted record, for the orchestrate-status table and the
watchdog sweep. Unreadable files are skipped rather than aborting the
whole listing."""
root = _forge_root()
if not root.is_dir():
return []
states: list[ForgeState] = []
for path in sorted(root.glob("*/*/issue-*.json")):
try:
states.append(ForgeState.from_dict(json.loads(path.read_text())))
except (OSError, ValueError, TypeError):
continue
return states
def _row_to_state(row: sqlite3.Row) -> ForgeState:
return ForgeState(
owner=row["owner"],
repo=row["repo"],
issue_number=row["issue_number"],
slug=row["slug"],
agent_name=row["agent_name"],
bottle_names=json.loads(row["bottle_names"]),
backend_name=row["backend_name"],
agent_git_user=row["agent_git_user"],
pr_number=row["pr_number"],
status=row["status"],
last_checkin_at=row["last_checkin_at"],
)
-103
View File
@@ -1,103 +0,0 @@
"""Provenance footer (PRD forge-native-integration, chunk 5).
Every orchestrator-posted comment ends with this footer — non-optional
and not configurable off. It renders the run's audit trail (agent,
bottle, timing, exit, gitleaks, done-signal source, egress) as a
collapsed markdown block the reviewer sees at the moment of the merge
decision.
The function is pure: the orchestrator, which holds the run context,
supplies the values. In particular `egress_routes` is the pre-rendered
list of allowed-route lines the orchestrator computed from the run's
resolved egress policy — this module does not parse backend-specific
egress state. (The PRD sketch named an `egress_log_path`; passing the
already-rendered lines keeps the footer builder pure and fully testable
and leaves egress-state parsing where the data lives.)
"""
from __future__ import annotations
from datetime import datetime
def _parse(ts: str) -> datetime | None:
try:
return datetime.fromisoformat(ts)
except (ValueError, TypeError):
return None
def _format_duration(started_at: str, finished_at: str) -> str:
start = _parse(started_at)
end = _parse(finished_at)
if start is None or end is None:
return "unknown"
secs = int((end - start).total_seconds())
if secs < 0:
return "unknown"
if secs < 60:
return f"{secs}s"
return f"{secs // 60}m {secs % 60}s"
def build_provenance_footer(
slug: str,
*,
agent_name: str,
bottle_names: tuple[str, ...],
started_at: str,
finished_at: str,
exit_code: int,
watchdog_fired: bool = False,
gitleaks_clean: bool | None = None,
egress_routes: list[str] | None = None,
) -> str:
"""Return a markdown string for appending to a Gitea comment body.
`watchdog_fired=True` marks runs where the agent did not signal
completion, so reviewers know the audit trail may be incomplete.
`gitleaks_clean=None` renders the gitleaks row as "not run".
`egress_routes` is omitted entirely when None/empty.
"""
bottle_label = ", ".join(f"`{b}`" for b in bottle_names) if bottle_names else ""
exit_cell = f"{exit_code} {'' if exit_code == 0 else ''}"
if gitleaks_clean is None:
gitleaks_cell = "— not run"
elif gitleaks_clean:
gitleaks_cell = "✓ no secrets detected"
else:
gitleaks_cell = "✗ secrets detected"
if watchdog_fired:
done_cell = "watchdog — agent did not signal"
else:
done_cell = "sidecar `signal_done`"
lines = [
"<details><summary>🔬 Run provenance</summary>",
"",
"| Field | Value |",
"|---|---|",
f"| agent | `{agent_name}` |",
f"| bottle | {bottle_label} |",
f"| slug | `{slug}` |",
f"| started | {started_at} |",
f"| duration | {_format_duration(started_at, finished_at)} |",
f"| exit | {exit_cell} |",
f"| gitleaks | {gitleaks_cell} |",
f"| done signal | {done_cell} |",
]
if egress_routes:
lines.append("")
lines.append(
f"**Egress** (deny-by-default; {len(egress_routes)} "
f"route{'s' if len(egress_routes) != 1 else ''} allowed)"
)
for route in egress_routes:
lines.append(f"- {route}")
lines.append("")
lines.append("</details>")
return "\n".join(lines)