feat(forge): forge library layer for native integration (PRD chunks 1-3, 5)
lint / lint (push) Failing after 2m9s
test / unit (pull_request) Successful in 58s
test / integration (pull_request) Successful in 21s
test / coverage (pull_request) Successful in 1m23s

Implements the bot-bottle side of the forge-native PRD that is
self-contained in this repo (the forge sidecar and orchestrate command
belong to the separate bot-bottle-orchestrator, a PRD non-goal):

- contrib/forge/base.py: Forge ABC + ScopedForge enforcing the
  read-anywhere / write-scoped model (writes rejected outside the
  assigned issue/PRs via ForgeScopeError).
- contrib/gitea/client.py: GiteaClient (stdlib-only HTTP, mirrors the
  deploy-key provisioner) + GiteaForge. Token held by the caller (the
  sidecar), not injected by cred-proxy.
- contrib/gitea/forge_state.py: ForgeState dataclass + atomic
  read/write/delete/all under ~/.bot-bottle/forge/<owner>/<repo>/.
- contrib/gitea/provenance.py: build_provenance_footer — collapsed
  markdown audit footer; watchdog/gitleaks/egress rendering.
- cli/resume.py: `resume --headless --prompt` reusing the shipped
  assume_yes + headless_prompt launch core (the new half of chunk 1).

47 new unit tests; pylint 9.98/10, pyright clean. Forge sidecar (chunk
4), orchestrate command (chunk 6), and forge_env plumbing are deferred:
their only consumer is the separate orchestrator and they are untestable
in isolation here.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01WL77TgFxKbs3cidGMG9dz7
This commit is contained in:
2026-06-30 19:39:49 -04:00
parent 738990b2df
commit a229a22d54
11 changed files with 1025 additions and 0 deletions
+164
View File
@@ -0,0 +1,164 @@
"""Gitea HTTP client + `GiteaForge` (PRD forge-native-integration, chunk 3).
`GiteaClient` is the thin stdlib-only HTTP transport (mirrors
`deploy_key_provisioner.py`: `urllib.request`, bounded timeouts,
structured error bodies). `GiteaForge` adapts it to the provider-agnostic
`Forge` surface.
Unlike the option-2 design, the token is held here (the sidecar process
owns it) and passed to the client directly — there is no agent-side
cred-proxy route, because the agent never makes forge calls. The HTTP
client is the one piece shared with `GiteaDeployKeyProvisioner`; the two
are deliberately *not* unified behind a common abstract base (see the
deferral note in the PRD).
"""
from __future__ import annotations
import json
import urllib.error
import urllib.request
from typing import Any
from ..forge.base import Comment, Forge, Issue
# Bound every Gitea call: a hung instance must not stall the sidecar.
_API_TIMEOUT_SECS = 30
class GiteaClient:
"""Thin authenticated HTTP client for one repo's Gitea API.
`api_url` is the API base *including* `/api/v1` (matching the
`FORGE_GITEA_API` env var), e.g. `https://gitea.example.com/api/v1`.
"""
def __init__(self, *, api_url: str, owner: str, repo: str, token: str) -> None:
self._api_url = api_url.rstrip("/")
self._owner = owner
self._repo = repo
self._token = token
# --- low-level request -------------------------------------------------
def _request(
self, method: str, path: str, *, body: dict[str, Any] | None = None
) -> tuple[int, Any]:
"""Issue an authenticated request. Returns `(status, parsed_json)`;
parsed_json is None when the response has no body. Raises
`RuntimeError` on any non-2xx except where callers special-case
the HTTPError themselves (membership 404)."""
url = f"{self._api_url}{path}"
data = json.dumps(body).encode() if body is not None else None
headers = {"Authorization": f"token {self._token}"}
if data is not None:
headers["Content-Type"] = "application/json"
req = urllib.request.Request(url, data=data, headers=headers, method=method)
with urllib.request.urlopen(req, timeout=_API_TIMEOUT_SECS) as resp:
raw = resp.read()
parsed = json.loads(raw) if raw else None
return resp.status, parsed
def _repo_path(self, suffix: str) -> str:
return f"/repos/{self._owner}/{self._repo}{suffix}"
# --- operations --------------------------------------------------------
def is_org_member(self, org: str, username: str) -> bool:
"""GET /orgs/{org}/members/{username}: 2xx → member, 404 → not.
Other errors propagate so a misconfigured token fails loudly."""
url = f"{self._api_url}/orgs/{org}/members/{username}"
req = urllib.request.Request(
url, headers={"Authorization": f"token {self._token}"}, method="GET"
)
try:
with urllib.request.urlopen(req, timeout=_API_TIMEOUT_SECS):
return True
except urllib.error.HTTPError as exc:
if exc.code == 404:
return False
raise RuntimeError(
f"org membership check failed for {org}/{username}: "
f"HTTP {exc.code}{_read_error_body(exc)}"
) from exc
def get_issue(self, number: int) -> dict[str, Any]:
_status, body = self._request("GET", self._repo_path(f"/issues/{number}"))
return body or {}
def get_comments(self, number: int) -> list[dict[str, Any]]:
_status, body = self._request(
"GET", self._repo_path(f"/issues/{number}/comments")
)
return body or []
def post_comment(self, number: int, body: str) -> None:
self._request(
"POST",
self._repo_path(f"/issues/{number}/comments"),
body={"body": body},
)
def patch_issue_body(self, number: int, body: str) -> None:
self._request(
"PATCH", self._repo_path(f"/issues/{number}"), body={"body": body}
)
def get_pull(self, number: int) -> dict[str, Any]:
_status, body = self._request("GET", self._repo_path(f"/pulls/{number}"))
return body or {}
class GiteaForge(Forge):
"""`Forge` over a `GiteaClient`."""
def __init__(self, client: GiteaClient) -> None:
self._client = client
def read_issue(self, number: int) -> Issue:
raw = self._client.get_issue(number)
return Issue(
number=int(raw.get("number", number)),
title=str(raw.get("title", "")),
body=str(raw.get("body", "") or ""),
state=str(raw.get("state", "")),
)
def read_comments(self, number: int) -> list[Comment]:
return [
Comment(
id=int(c.get("id", 0)),
user=str((c.get("user") or {}).get("login", "")),
body=str(c.get("body", "") or ""),
)
for c in self._client.get_comments(number)
]
def post_comment(self, number: int, body: str) -> None:
self._client.post_comment(number, body)
def update_description(self, number: int, body: str) -> None:
self._client.patch_issue_body(number, body)
def is_org_member(self, org: str, username: str) -> bool:
return self._client.is_org_member(org, username)
def get_pr_for_issue(self, number: int) -> int | None:
"""Gitea models a PR as an issue with the same number, exposing a
`pull_request` object on the issue. When the queried number is
itself a PR, return it; otherwise None. (The orchestrator tracks
the issue→PR mapping in forge state for the cross-number case.)"""
raw = self._client.get_issue(number)
if raw.get("pull_request"):
return int(raw.get("number", number))
return None
def is_pr_open(self, number: int) -> bool:
return self._client.get_pull(number).get("state") == "open"
def _read_error_body(exc: urllib.error.HTTPError) -> str:
try:
return exc.read().decode("utf-8", errors="replace")
except Exception: # pylint: disable=broad-exception-caught
return ""
+105
View File
@@ -0,0 +1,105 @@
"""Forge state persistence (PRD forge-native-integration, chunk 2).
The orchestrator tracks one record per forge-targeted issue so it can
map an incoming webhook back to the bottle handling it, drive the
freeze / rehydrate loop, and run the watchdog. State lives on disk and
survives orchestrator restarts:
~/.bot-bottle/forge/<owner>/<repo>/issue-<n>.json
Writes are atomic (`os.replace`) so a crash mid-write never leaves a
truncated record.
"""
from __future__ import annotations
import json
import os
from dataclasses import asdict, dataclass, field, fields
from typing import Any
from pathlib import Path
from ...supervise import bot_bottle_root
_FORGE_SUBDIR = "forge"
# Lifecycle: a bottle is launched (running), frozen on the done signal,
# and destroyed when the PR closes.
STATUS_RUNNING = "running"
STATUS_FROZEN = "frozen"
STATUS_DESTROYED = "destroyed"
@dataclass
class ForgeState:
"""One forge-targeted issue's bottle lifecycle record."""
owner: str
repo: str
issue_number: int
slug: str
agent_name: str
bottle_names: list[str] = field(default_factory=list)
backend_name: str = ""
agent_git_user: str = ""
pr_number: int | None = None
status: str = STATUS_RUNNING
last_checkin_at: str = ""
def to_json(self) -> str:
return json.dumps(asdict(self), indent=2, sort_keys=True)
@classmethod
def from_dict(cls, data: dict[str, Any]) -> "ForgeState":
# Tolerate unknown keys (forward-compat) by filtering to fields.
known = {f.name for f in fields(cls)}
return cls(**{k: v for k, v in data.items() if k in known})
def _forge_root() -> Path:
return bot_bottle_root() / _FORGE_SUBDIR
def forge_state_path(owner: str, repo: str, issue_number: int) -> Path:
return _forge_root() / owner / repo / f"issue-{issue_number}.json"
def write_forge_state(state: ForgeState) -> None:
"""Persist `state` atomically. Creates parent dirs as needed."""
path = forge_state_path(state.owner, state.repo, state.issue_number)
path.parent.mkdir(parents=True, exist_ok=True)
tmp = path.with_suffix(".json.tmp")
tmp.write_text(state.to_json())
os.replace(tmp, path)
def read_forge_state(owner: str, repo: str, issue_number: int) -> ForgeState | None:
"""Load state for one issue, or None when no record exists."""
path = forge_state_path(owner, repo, issue_number)
try:
data = json.loads(path.read_text())
except FileNotFoundError:
return None
return ForgeState.from_dict(data)
def delete_forge_state(owner: str, repo: str, issue_number: int) -> None:
"""Remove an issue's record. Missing file is success (idempotent)."""
path = forge_state_path(owner, repo, issue_number)
path.unlink(missing_ok=True)
def all_forge_states() -> list[ForgeState]:
"""Every persisted record, for the orchestrate-status table and the
watchdog sweep. Unreadable files are skipped rather than aborting the
whole listing."""
root = _forge_root()
if not root.is_dir():
return []
states: list[ForgeState] = []
for path in sorted(root.glob("*/*/issue-*.json")):
try:
states.append(ForgeState.from_dict(json.loads(path.read_text())))
except (OSError, ValueError, TypeError):
continue
return states
+103
View File
@@ -0,0 +1,103 @@
"""Provenance footer (PRD forge-native-integration, chunk 5).
Every orchestrator-posted comment ends with this footer — non-optional
and not configurable off. It renders the run's audit trail (agent,
bottle, timing, exit, gitleaks, done-signal source, egress) as a
collapsed markdown block the reviewer sees at the moment of the merge
decision.
The function is pure: the orchestrator, which holds the run context,
supplies the values. In particular `egress_routes` is the pre-rendered
list of allowed-route lines the orchestrator computed from the run's
resolved egress policy — this module does not parse backend-specific
egress state. (The PRD sketch named an `egress_log_path`; passing the
already-rendered lines keeps the footer builder pure and fully testable
and leaves egress-state parsing where the data lives.)
"""
from __future__ import annotations
from datetime import datetime
def _parse(ts: str) -> datetime | None:
try:
return datetime.fromisoformat(ts)
except (ValueError, TypeError):
return None
def _format_duration(started_at: str, finished_at: str) -> str:
start = _parse(started_at)
end = _parse(finished_at)
if start is None or end is None:
return "unknown"
secs = int((end - start).total_seconds())
if secs < 0:
return "unknown"
if secs < 60:
return f"{secs}s"
return f"{secs // 60}m {secs % 60}s"
def build_provenance_footer(
slug: str,
*,
agent_name: str,
bottle_names: tuple[str, ...],
started_at: str,
finished_at: str,
exit_code: int,
watchdog_fired: bool = False,
gitleaks_clean: bool | None = None,
egress_routes: list[str] | None = None,
) -> str:
"""Return a markdown string for appending to a Gitea comment body.
`watchdog_fired=True` marks runs where the agent did not signal
completion, so reviewers know the audit trail may be incomplete.
`gitleaks_clean=None` renders the gitleaks row as "not run".
`egress_routes` is omitted entirely when None/empty.
"""
bottle_label = ", ".join(f"`{b}`" for b in bottle_names) if bottle_names else ""
exit_cell = f"{exit_code} {'' if exit_code == 0 else ''}"
if gitleaks_clean is None:
gitleaks_cell = "— not run"
elif gitleaks_clean:
gitleaks_cell = "✓ no secrets detected"
else:
gitleaks_cell = "✗ secrets detected"
if watchdog_fired:
done_cell = "watchdog — agent did not signal"
else:
done_cell = "sidecar `signal_done`"
lines = [
"<details><summary>🔬 Run provenance</summary>",
"",
"| Field | Value |",
"|---|---|",
f"| agent | `{agent_name}` |",
f"| bottle | {bottle_label} |",
f"| slug | `{slug}` |",
f"| started | {started_at} |",
f"| duration | {_format_duration(started_at, finished_at)} |",
f"| exit | {exit_cell} |",
f"| gitleaks | {gitleaks_cell} |",
f"| done signal | {done_cell} |",
]
if egress_routes:
lines.append("")
lines.append(
f"**Egress** (deny-by-default; {len(egress_routes)} "
f"route{'s' if len(egress_routes) != 1 else ''} allowed)"
)
for route in egress_routes:
lines.append(f"- {route}")
lines.append("")
lines.append("</details>")
return "\n".join(lines)