Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 898b6350bc | |||
| d2081839c9 | |||
| 23015f7fd8 |
@@ -209,15 +209,6 @@ class AgentProvider(ABC):
|
|||||||
the supervise sidecar is reachable. No-op when
|
the supervise sidecar is reachable. No-op when
|
||||||
`plan.supervise_plan is None`."""
|
`plan.supervise_plan is None`."""
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def headless_prompt(self, prompt: str) -> list[str]:
|
|
||||||
"""Return the agent CLI args that deliver `prompt` as the
|
|
||||||
initial task in a non-interactive (headless) session.
|
|
||||||
|
|
||||||
Called only when ``--prompt`` is passed to
|
|
||||||
``./cli.py start --headless``; the returned args are appended
|
|
||||||
after the provider's ``bypass_args`` and ``startup_args``."""
|
|
||||||
|
|
||||||
def provision_ca(self, bottle: "Bottle", plan: "BottlePlan") -> None:
|
def provision_ca(self, bottle: "Bottle", plan: "BottlePlan") -> None:
|
||||||
"""Install the egress MITM CA into the agent's trust store.
|
"""Install the egress MITM CA into the agent's trust store.
|
||||||
|
|
||||||
|
|||||||
@@ -27,34 +27,12 @@ from .start import _launch_bottle
|
|||||||
def cmd_resume(argv: list[str]) -> int:
|
def cmd_resume(argv: list[str]) -> int:
|
||||||
parser = argparse.ArgumentParser(prog=f"{PROG} resume", add_help=True)
|
parser = argparse.ArgumentParser(prog=f"{PROG} resume", add_help=True)
|
||||||
parser.add_argument("--dry-run", action="store_true")
|
parser.add_argument("--dry-run", action="store_true")
|
||||||
parser.add_argument(
|
|
||||||
"--headless",
|
|
||||||
action="store_true",
|
|
||||||
help=(
|
|
||||||
"non-interactive rehydrate: deliver --prompt to the agent and "
|
|
||||||
"skip the y/N preflight. For orchestrators / the freeze-rehydrate "
|
|
||||||
"loop."
|
|
||||||
),
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--prompt",
|
|
||||||
default=None,
|
|
||||||
help="follow-up prompt delivered to the agent (required with --headless)",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"identity",
|
"identity",
|
||||||
help="bottle identity from a prior `start` (see its session-end output)",
|
help="bottle identity from a prior `start` (see its session-end output)",
|
||||||
)
|
)
|
||||||
args = parser.parse_args(argv)
|
args = parser.parse_args(argv)
|
||||||
|
|
||||||
if args.prompt and not args.headless:
|
|
||||||
die("--prompt is only valid with --headless")
|
|
||||||
if args.headless and not args.prompt:
|
|
||||||
die(
|
|
||||||
"--headless requires --prompt: "
|
|
||||||
"./cli.py resume <identity> --headless --prompt 'Address the review'"
|
|
||||||
)
|
|
||||||
|
|
||||||
metadata = read_metadata(args.identity)
|
metadata = read_metadata(args.identity)
|
||||||
if metadata is None:
|
if metadata is None:
|
||||||
die(
|
die(
|
||||||
@@ -78,6 +56,4 @@ def cmd_resume(argv: list[str]) -> int:
|
|||||||
spec,
|
spec,
|
||||||
dry_run=args.dry_run,
|
dry_run=args.dry_run,
|
||||||
backend_name=backend_name,
|
backend_name=backend_name,
|
||||||
assume_yes=args.headless,
|
|
||||||
headless_prompt_text=args.prompt or "",
|
|
||||||
)
|
)
|
||||||
|
|||||||
+7
-142
@@ -2,11 +2,6 @@
|
|||||||
interactive claude-code session. The container is torn down when the
|
interactive claude-code session. The container is torn down when the
|
||||||
session ends.
|
session ends.
|
||||||
|
|
||||||
`--headless` selects a non-interactive launch (agent/bottles/label from
|
|
||||||
flags, no TUI selectors, no y/N prompt) for orchestrators,
|
|
||||||
CI, and webhook dispatch. The agent still execs on the inherited
|
|
||||||
stdio/PTY, so an orchestrator that allocates the PTY drives the session.
|
|
||||||
|
|
||||||
The launch core is shared with `cli.py resume <identity>` through
|
The launch core is shared with `cli.py resume <identity>` through
|
||||||
the private orchestrator `_launch_bottle`.
|
the private orchestrator `_launch_bottle`.
|
||||||
"""
|
"""
|
||||||
@@ -21,7 +16,7 @@ import tempfile
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Callable
|
from typing import Callable
|
||||||
|
|
||||||
from ..agent_provider import get_provider, runtime_for
|
from ..agent_provider import runtime_for
|
||||||
from ..backend import (
|
from ..backend import (
|
||||||
Bottle,
|
Bottle,
|
||||||
BottleSpec,
|
BottleSpec,
|
||||||
@@ -36,7 +31,7 @@ from ..bottle_state import (
|
|||||||
is_preserved,
|
is_preserved,
|
||||||
mark_preserved,
|
mark_preserved,
|
||||||
)
|
)
|
||||||
from ..log import info, die
|
from ..log import info
|
||||||
from ..manifest import Manifest, ManifestIndex
|
from ..manifest import Manifest, ManifestIndex
|
||||||
from ._common import PROG, USER_CWD, read_tty_line
|
from ._common import PROG, USER_CWD, read_tty_line
|
||||||
from . import tui
|
from . import tui
|
||||||
@@ -55,39 +50,6 @@ def cmd_start(argv: list[str]) -> int:
|
|||||||
"or host auto-selection). Overrides the env var when set."
|
"or host auto-selection). Overrides the env var when set."
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
|
||||||
"--headless",
|
|
||||||
action="store_true",
|
|
||||||
help=(
|
|
||||||
"non-interactive launch: take agent/bottles/label from flags, "
|
|
||||||
"skip all prompts. For orchestrators, CI, and webhooks."
|
|
||||||
),
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--bottle",
|
|
||||||
action="append",
|
|
||||||
default=None,
|
|
||||||
metavar="NAME",
|
|
||||||
help=(
|
|
||||||
"bottle to compose, repeatable (order = merge order). In "
|
|
||||||
"--headless, defaults to the agent's own bottle when omitted."
|
|
||||||
),
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--label",
|
|
||||||
default=None,
|
|
||||||
help="bottle label / terminal title (--headless default: agent name)",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--color",
|
|
||||||
default=None,
|
|
||||||
help="bottle color, one of the 16 ANSI color names (--headless default: none)",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--prompt",
|
|
||||||
default=None,
|
|
||||||
help="initial task prompt delivered to the agent (required with --headless)",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"name",
|
"name",
|
||||||
nargs="?",
|
nargs="?",
|
||||||
@@ -99,12 +61,6 @@ def cmd_start(argv: list[str]) -> int:
|
|||||||
dry_run = args.dry_run or os.environ.get("BOT_BOTTLE_DRY_RUN") == "1"
|
dry_run = args.dry_run or os.environ.get("BOT_BOTTLE_DRY_RUN") == "1"
|
||||||
|
|
||||||
manifest = ManifestIndex.resolve(USER_CWD)
|
manifest = ManifestIndex.resolve(USER_CWD)
|
||||||
backend_name: str | None = args.backend
|
|
||||||
|
|
||||||
if args.headless:
|
|
||||||
return _start_headless(
|
|
||||||
manifest, args, dry_run=dry_run, backend_name=backend_name
|
|
||||||
)
|
|
||||||
|
|
||||||
agent_name: str | None = args.name
|
agent_name: str | None = args.name
|
||||||
if agent_name is None:
|
if agent_name is None:
|
||||||
@@ -115,6 +71,8 @@ def cmd_start(argv: list[str]) -> int:
|
|||||||
if agent_name is None:
|
if agent_name is None:
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
backend_name: str | None = args.backend
|
||||||
|
|
||||||
# Bottle multiselect: always show after agent selection so operators
|
# Bottle multiselect: always show after agent selection so operators
|
||||||
# can compose bottles at launch time without editing agent manifests.
|
# can compose bottles at launch time without editing agent manifests.
|
||||||
available_bottles = manifest.all_bottle_names
|
available_bottles = manifest.all_bottle_names
|
||||||
@@ -151,83 +109,6 @@ def cmd_start(argv: list[str]) -> int:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
# --- Headless launch -----------------------------------------------------
|
|
||||||
|
|
||||||
|
|
||||||
def _start_headless(
|
|
||||||
manifest: ManifestIndex,
|
|
||||||
args: argparse.Namespace,
|
|
||||||
*,
|
|
||||||
dry_run: bool,
|
|
||||||
backend_name: str | None,
|
|
||||||
) -> int:
|
|
||||||
"""Non-interactive launch path for orchestrators / CI / webhooks.
|
|
||||||
|
|
||||||
Resolves agent, bottles, label, and color from flags + manifest
|
|
||||||
defaults instead of the TUI selectors, and auto-confirms the
|
|
||||||
preflight. Otherwise runs the same launch core as the interactive
|
|
||||||
path, so the agent still execs on the inherited stdio/PTY — an
|
|
||||||
orchestrator allocates that PTY and relays it to its
|
|
||||||
desktop/mobile clients."""
|
|
||||||
agent_name = args.name
|
|
||||||
if not agent_name:
|
|
||||||
die("--headless requires an agent name: ./cli.py start <agent> --headless")
|
|
||||||
manifest.require_agent(agent_name) # raises ManifestError if unknown
|
|
||||||
|
|
||||||
prompt = args.prompt
|
|
||||||
if not prompt:
|
|
||||||
die(
|
|
||||||
"--headless requires --prompt: "
|
|
||||||
"./cli.py start <agent> --headless --prompt 'Do the thing'"
|
|
||||||
)
|
|
||||||
|
|
||||||
if args.bottle:
|
|
||||||
bottle_names: tuple[str, ...] = tuple(args.bottle)
|
|
||||||
else:
|
|
||||||
default_bottle = _peek_agent_bottle(manifest, agent_name)
|
|
||||||
if not default_bottle:
|
|
||||||
die(
|
|
||||||
f"--headless: agent '{agent_name}' has no default bottle; "
|
|
||||||
f"pass one or more --bottle NAME"
|
|
||||||
)
|
|
||||||
bottle_names = (default_bottle,)
|
|
||||||
|
|
||||||
label = _uniquify_label_headless(args.label or agent_name)
|
|
||||||
|
|
||||||
spec = BottleSpec(
|
|
||||||
manifest=manifest,
|
|
||||||
agent_name=agent_name,
|
|
||||||
copy_cwd=args.cwd,
|
|
||||||
user_cwd=USER_CWD,
|
|
||||||
label=label,
|
|
||||||
color=args.color or "",
|
|
||||||
bottle_names=bottle_names,
|
|
||||||
)
|
|
||||||
return _launch_bottle(
|
|
||||||
spec,
|
|
||||||
dry_run=dry_run,
|
|
||||||
backend_name=backend_name,
|
|
||||||
assume_yes=True,
|
|
||||||
headless_prompt_text=prompt,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _uniquify_label_headless(label: str) -> str:
|
|
||||||
"""Non-interactive analog of `_resolve_unique_label`: if the label's
|
|
||||||
slug collides with a running bottle, append -2, -3, … until free,
|
|
||||||
logging the chosen label. Orchestrators fire-and-forget many bottles,
|
|
||||||
so silently picking a free name beats erroring on every collision."""
|
|
||||||
active_slugs = {a.slug for a in enumerate_active_agents()}
|
|
||||||
if docker_mod.slugify(label) not in active_slugs:
|
|
||||||
return label
|
|
||||||
n = 2
|
|
||||||
while docker_mod.slugify(f"{label}-{n}") in active_slugs:
|
|
||||||
n += 1
|
|
||||||
chosen = f"{label}-{n}"
|
|
||||||
info(f"label '{label}' already in use; using '{chosen}'")
|
|
||||||
return chosen
|
|
||||||
|
|
||||||
|
|
||||||
# --- Launch helpers ------------------------------------------------------
|
# --- Launch helpers ------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
@@ -495,19 +376,10 @@ def _launch_bottle(
|
|||||||
*,
|
*,
|
||||||
dry_run: bool,
|
dry_run: bool,
|
||||||
backend_name: str | None = None,
|
backend_name: str | None = None,
|
||||||
assume_yes: bool = False,
|
|
||||||
headless_prompt_text: str = "",
|
|
||||||
) -> int:
|
) -> int:
|
||||||
"""Shared launch core for `start` and `resume`. Builds the plan,
|
"""Shared launch core for `start` and `resume`. Builds the plan,
|
||||||
prints / dry-runs / prompts as appropriate, brings the bottle up,
|
prints / dry-runs / prompts as appropriate, brings the bottle up,
|
||||||
attaches claude, and prints the resume hint on session end.
|
attaches claude, and prints the resume hint on session end."""
|
||||||
|
|
||||||
`assume_yes` skips the interactive y/N confirmation (headless /
|
|
||||||
orchestrator launches), where there is no human at the prompt.
|
|
||||||
|
|
||||||
`headless_prompt_text` is passed to the provider's `headless_prompt`
|
|
||||||
method and the resulting args are appended to startup_args so the
|
|
||||||
agent receives the initial task without interactive input."""
|
|
||||||
stage_dir = Path(tempfile.mkdtemp(prefix="bot-bottle-stage."))
|
stage_dir = Path(tempfile.mkdtemp(prefix="bot-bottle-stage."))
|
||||||
identity = ""
|
identity = ""
|
||||||
try:
|
try:
|
||||||
@@ -515,7 +387,7 @@ def _launch_bottle(
|
|||||||
spec,
|
spec,
|
||||||
stage_dir=stage_dir,
|
stage_dir=stage_dir,
|
||||||
render_preflight=_text_render_preflight(),
|
render_preflight=_text_render_preflight(),
|
||||||
prompt_yes=(lambda: True) if assume_yes else _text_prompt_yes,
|
prompt_yes=_text_prompt_yes,
|
||||||
dry_run=dry_run,
|
dry_run=dry_run,
|
||||||
backend_name=backend_name,
|
backend_name=backend_name,
|
||||||
)
|
)
|
||||||
@@ -525,17 +397,10 @@ def _launch_bottle(
|
|||||||
backend = get_bottle_backend(backend_name)
|
backend = get_bottle_backend(backend_name)
|
||||||
with backend.launch(plan) as bottle:
|
with backend.launch(plan) as bottle:
|
||||||
agent_provider_template = getattr(plan, "agent_provider_template", "claude")
|
agent_provider_template = getattr(plan, "agent_provider_template", "claude")
|
||||||
extra_args: tuple[str, ...] = ()
|
|
||||||
if headless_prompt_text:
|
|
||||||
extra_args = tuple(
|
|
||||||
get_provider(agent_provider_template).headless_prompt(
|
|
||||||
headless_prompt_text
|
|
||||||
)
|
|
||||||
)
|
|
||||||
exit_code = attach_agent(
|
exit_code = attach_agent(
|
||||||
bottle,
|
bottle,
|
||||||
agent_provider_template=agent_provider_template,
|
agent_provider_template=agent_provider_template,
|
||||||
startup_args=plan.agent_provision.startup_args + extra_args,
|
startup_args=plan.agent_provision.startup_args,
|
||||||
)
|
)
|
||||||
info(
|
info(
|
||||||
f"session ended (exit {exit_code}); "
|
f"session ended (exit {exit_code}); "
|
||||||
|
|||||||
@@ -313,9 +313,6 @@ class ClaudeAgentProvider(AgentProvider):
|
|||||||
f"claude mcp add --scope user --transport http supervise {supervise_url}"
|
f"claude mcp add --scope user --transport http supervise {supervise_url}"
|
||||||
)
|
)
|
||||||
|
|
||||||
def headless_prompt(self, prompt: str) -> list[str]:
|
|
||||||
return ["-p", prompt]
|
|
||||||
|
|
||||||
|
|
||||||
def _exec(bottle: "Bottle", script: str, error: str) -> None:
|
def _exec(bottle: "Bottle", script: str, error: str) -> None:
|
||||||
result = bottle.exec(script, user="root")
|
result = bottle.exec(script, user="root")
|
||||||
|
|||||||
@@ -279,9 +279,6 @@ class CodexAgentProvider(AgentProvider):
|
|||||||
f"codex mcp add supervise --url {shlex.quote(supervise_url)}"
|
f"codex mcp add supervise --url {shlex.quote(supervise_url)}"
|
||||||
)
|
)
|
||||||
|
|
||||||
def headless_prompt(self, prompt: str) -> list[str]:
|
|
||||||
return [prompt]
|
|
||||||
|
|
||||||
|
|
||||||
def _exec(bottle: "Bottle", script: str, error: str) -> None:
|
def _exec(bottle: "Bottle", script: str, error: str) -> None:
|
||||||
result = bottle.exec(script, user="root")
|
result = bottle.exec(script, user="root")
|
||||||
|
|||||||
@@ -1,165 +0,0 @@
|
|||||||
"""Forge abstraction (PRD forge-native-integration, chunk 3).
|
|
||||||
|
|
||||||
The `Forge` abstract class is the provider-agnostic surface a forge
|
|
||||||
sidecar dispatches to: read issues/comments, post comments, edit
|
|
||||||
descriptions, and the membership / PR lookups the orchestrator needs.
|
|
||||||
Each forge (Gitea first) implements it; the sidecar protocol and the
|
|
||||||
agent prompt stay forge-agnostic.
|
|
||||||
|
|
||||||
`signal_done` is deliberately *not* a `Forge` method — completion is a
|
|
||||||
sidecar concept relayed to the orchestrator over a queue dir, not a
|
|
||||||
forge API operation.
|
|
||||||
|
|
||||||
`ScopedForge` enforces the PRD's **read-anywhere / write-scoped** model:
|
|
||||||
reads pass through to any issue/PR for context; writes are rejected
|
|
||||||
unless the target is the assigned issue or one of its PRs. This bounds
|
|
||||||
the blast radius of a prompt-injected agent below repo-wide API-key
|
|
||||||
permissions.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import abc
|
|
||||||
from collections.abc import Iterable
|
|
||||||
from dataclasses import dataclass
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
|
||||||
class Issue:
|
|
||||||
"""A forge issue (not a PR — see `PullRequest`)."""
|
|
||||||
|
|
||||||
number: int
|
|
||||||
title: str
|
|
||||||
body: str
|
|
||||||
state: str # "open" | "closed"
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
|
||||||
class PullRequest:
|
|
||||||
"""A forge pull request. Kept distinct from `Issue` even though some
|
|
||||||
forges model PRs as issues on the wire: the domain objects carry
|
|
||||||
different data (a PR has merge state) and are read through different
|
|
||||||
methods (`read_pr` vs `read_issue`)."""
|
|
||||||
|
|
||||||
number: int
|
|
||||||
title: str
|
|
||||||
body: str
|
|
||||||
state: str # "open" | "closed"
|
|
||||||
merged: bool
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
|
||||||
class Comment:
|
|
||||||
id: int
|
|
||||||
user: str # login of the comment author
|
|
||||||
body: str
|
|
||||||
|
|
||||||
|
|
||||||
class ForgeScopeError(PermissionError):
|
|
||||||
"""Raised by `ScopedForge` when a write targets an issue/PR outside
|
|
||||||
the assigned scope."""
|
|
||||||
|
|
||||||
|
|
||||||
class Forge(abc.ABC):
|
|
||||||
"""Provider-agnostic forge operations. Implementations wrap a
|
|
||||||
per-provider HTTP client and translate to `Issue` / `Comment`."""
|
|
||||||
|
|
||||||
@abc.abstractmethod
|
|
||||||
def read_issue(self, number: int) -> Issue:
|
|
||||||
"""Read an issue body (read-anywhere)."""
|
|
||||||
|
|
||||||
@abc.abstractmethod
|
|
||||||
def read_pr(self, number: int) -> PullRequest:
|
|
||||||
"""Read a pull request, including its merge state (read-anywhere)."""
|
|
||||||
|
|
||||||
@abc.abstractmethod
|
|
||||||
def read_comments(self, number: int) -> list[Comment]:
|
|
||||||
"""Read a thread's comments (read-anywhere)."""
|
|
||||||
|
|
||||||
@abc.abstractmethod
|
|
||||||
def post_comment(self, number: int, body: str) -> None:
|
|
||||||
"""Post a comment to an issue or PR (write-scoped)."""
|
|
||||||
|
|
||||||
@abc.abstractmethod
|
|
||||||
def update_description(self, number: int, body: str) -> None:
|
|
||||||
"""Replace an issue or PR body (write-scoped)."""
|
|
||||||
|
|
||||||
@abc.abstractmethod
|
|
||||||
def is_org_member(self, org: str, username: str) -> bool:
|
|
||||||
"""Whether `username` is a member of `org`."""
|
|
||||||
|
|
||||||
@abc.abstractmethod
|
|
||||||
def get_pr_for_issue(self, number: int) -> int | None:
|
|
||||||
"""The PR number linked to an issue, or None when there is none."""
|
|
||||||
|
|
||||||
@abc.abstractmethod
|
|
||||||
def is_pr_open(self, number: int) -> bool:
|
|
||||||
"""Whether the given PR is still open."""
|
|
||||||
|
|
||||||
|
|
||||||
class ScopedForge(Forge):
|
|
||||||
"""Read-anywhere / write-scoped wrapper around a concrete `Forge`.
|
|
||||||
|
|
||||||
`post_comment` and `update_description` are rejected with
|
|
||||||
`ForgeScopeError` unless the target number is the assigned issue or
|
|
||||||
one of the assigned PRs. Every other method delegates unchanged, so
|
|
||||||
reads, membership checks, and PR lookups work against any number for
|
|
||||||
context.
|
|
||||||
|
|
||||||
The writable set is fixed at construction. The sidecar reconstructs
|
|
||||||
a `ScopedForge` when a PR is discovered (`get_pr_for_issue`) so the
|
|
||||||
new PR becomes writable; this class does not mutate its own scope.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
inner: Forge,
|
|
||||||
*,
|
|
||||||
assigned_issue: int,
|
|
||||||
assigned_prs: Iterable[int] = (),
|
|
||||||
) -> None:
|
|
||||||
self._inner = inner
|
|
||||||
self._assigned_issue = assigned_issue
|
|
||||||
self._writable = {assigned_issue, *assigned_prs}
|
|
||||||
|
|
||||||
@property
|
|
||||||
def writable(self) -> frozenset[int]:
|
|
||||||
return frozenset(self._writable)
|
|
||||||
|
|
||||||
def _check_write(self, number: int) -> None:
|
|
||||||
if number not in self._writable:
|
|
||||||
allowed = ", ".join(str(n) for n in sorted(self._writable))
|
|
||||||
raise ForgeScopeError(
|
|
||||||
f"write to #{number} denied: out of assigned scope "
|
|
||||||
f"(writable: {allowed})"
|
|
||||||
)
|
|
||||||
|
|
||||||
# --- read-anywhere: pass through --------------------------------------
|
|
||||||
|
|
||||||
def read_issue(self, number: int) -> Issue:
|
|
||||||
return self._inner.read_issue(number)
|
|
||||||
|
|
||||||
def read_pr(self, number: int) -> PullRequest:
|
|
||||||
return self._inner.read_pr(number)
|
|
||||||
|
|
||||||
def read_comments(self, number: int) -> list[Comment]:
|
|
||||||
return self._inner.read_comments(number)
|
|
||||||
|
|
||||||
def is_org_member(self, org: str, username: str) -> bool:
|
|
||||||
return self._inner.is_org_member(org, username)
|
|
||||||
|
|
||||||
def get_pr_for_issue(self, number: int) -> int | None:
|
|
||||||
return self._inner.get_pr_for_issue(number)
|
|
||||||
|
|
||||||
def is_pr_open(self, number: int) -> bool:
|
|
||||||
return self._inner.is_pr_open(number)
|
|
||||||
|
|
||||||
# --- write-scoped: check then delegate --------------------------------
|
|
||||||
|
|
||||||
def post_comment(self, number: int, body: str) -> None:
|
|
||||||
self._check_write(number)
|
|
||||||
self._inner.post_comment(number, body)
|
|
||||||
|
|
||||||
def update_description(self, number: int, body: str) -> None:
|
|
||||||
self._check_write(number)
|
|
||||||
self._inner.update_description(number, body)
|
|
||||||
@@ -1,174 +0,0 @@
|
|||||||
"""Gitea HTTP client + `GiteaForge` (PRD forge-native-integration, chunk 3).
|
|
||||||
|
|
||||||
`GiteaClient` is the thin stdlib-only HTTP transport (mirrors
|
|
||||||
`deploy_key_provisioner.py`: `urllib.request`, bounded timeouts,
|
|
||||||
structured error bodies). `GiteaForge` adapts it to the provider-agnostic
|
|
||||||
`Forge` surface.
|
|
||||||
|
|
||||||
Unlike the option-2 design, the token is held here (the sidecar process
|
|
||||||
owns it) and passed to the client directly — there is no agent-side
|
|
||||||
cred-proxy route, because the agent never makes forge calls. The HTTP
|
|
||||||
client is the one piece shared with `GiteaDeployKeyProvisioner`; the two
|
|
||||||
are deliberately *not* unified behind a common abstract base (see the
|
|
||||||
deferral note in the PRD).
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import json
|
|
||||||
import urllib.error
|
|
||||||
import urllib.request
|
|
||||||
from typing import Any
|
|
||||||
|
|
||||||
from ..forge.base import Comment, Forge, Issue, PullRequest
|
|
||||||
|
|
||||||
# Bound every Gitea call: a hung instance must not stall the sidecar.
|
|
||||||
_API_TIMEOUT_SECS = 30
|
|
||||||
|
|
||||||
|
|
||||||
class GiteaClient:
|
|
||||||
"""Thin authenticated HTTP client for one repo's Gitea API.
|
|
||||||
|
|
||||||
`api_url` is the API base *including* `/api/v1` (matching the
|
|
||||||
`FORGE_GITEA_API` env var), e.g. `https://gitea.example.com/api/v1`.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, *, api_url: str, owner: str, repo: str, token: str) -> None:
|
|
||||||
self._api_url = api_url.rstrip("/")
|
|
||||||
self._owner = owner
|
|
||||||
self._repo = repo
|
|
||||||
self._token = token
|
|
||||||
|
|
||||||
# --- low-level request -------------------------------------------------
|
|
||||||
|
|
||||||
def _request(
|
|
||||||
self, method: str, path: str, *, body: dict[str, Any] | None = None
|
|
||||||
) -> tuple[int, Any]:
|
|
||||||
"""Issue an authenticated request. Returns `(status, parsed_json)`;
|
|
||||||
parsed_json is None when the response has no body. Raises
|
|
||||||
`RuntimeError` on any non-2xx except where callers special-case
|
|
||||||
the HTTPError themselves (membership 404)."""
|
|
||||||
url = f"{self._api_url}{path}"
|
|
||||||
data = json.dumps(body).encode() if body is not None else None
|
|
||||||
headers = {"Authorization": f"token {self._token}"}
|
|
||||||
if data is not None:
|
|
||||||
headers["Content-Type"] = "application/json"
|
|
||||||
req = urllib.request.Request(url, data=data, headers=headers, method=method)
|
|
||||||
with urllib.request.urlopen(req, timeout=_API_TIMEOUT_SECS) as resp:
|
|
||||||
raw = resp.read()
|
|
||||||
parsed = json.loads(raw) if raw else None
|
|
||||||
return resp.status, parsed
|
|
||||||
|
|
||||||
def _repo_path(self, suffix: str) -> str:
|
|
||||||
return f"/repos/{self._owner}/{self._repo}{suffix}"
|
|
||||||
|
|
||||||
# --- operations --------------------------------------------------------
|
|
||||||
|
|
||||||
def is_org_member(self, org: str, username: str) -> bool:
|
|
||||||
"""GET /orgs/{org}/members/{username}: 2xx → member, 404 → not.
|
|
||||||
Other errors propagate so a misconfigured token fails loudly."""
|
|
||||||
url = f"{self._api_url}/orgs/{org}/members/{username}"
|
|
||||||
req = urllib.request.Request(
|
|
||||||
url, headers={"Authorization": f"token {self._token}"}, method="GET"
|
|
||||||
)
|
|
||||||
try:
|
|
||||||
with urllib.request.urlopen(req, timeout=_API_TIMEOUT_SECS):
|
|
||||||
return True
|
|
||||||
except urllib.error.HTTPError as exc:
|
|
||||||
if exc.code == 404:
|
|
||||||
return False
|
|
||||||
raise RuntimeError(
|
|
||||||
f"org membership check failed for {org}/{username}: "
|
|
||||||
f"HTTP {exc.code} — {_read_error_body(exc)}"
|
|
||||||
) from exc
|
|
||||||
|
|
||||||
def get_issue(self, number: int) -> dict[str, Any]:
|
|
||||||
_status, body = self._request("GET", self._repo_path(f"/issues/{number}"))
|
|
||||||
return body or {}
|
|
||||||
|
|
||||||
def get_comments(self, number: int) -> list[dict[str, Any]]:
|
|
||||||
_status, body = self._request(
|
|
||||||
"GET", self._repo_path(f"/issues/{number}/comments")
|
|
||||||
)
|
|
||||||
return body or []
|
|
||||||
|
|
||||||
def post_comment(self, number: int, body: str) -> None:
|
|
||||||
self._request(
|
|
||||||
"POST",
|
|
||||||
self._repo_path(f"/issues/{number}/comments"),
|
|
||||||
body={"body": body},
|
|
||||||
)
|
|
||||||
|
|
||||||
def patch_issue_body(self, number: int, body: str) -> None:
|
|
||||||
self._request(
|
|
||||||
"PATCH", self._repo_path(f"/issues/{number}"), body={"body": body}
|
|
||||||
)
|
|
||||||
|
|
||||||
def get_pull(self, number: int) -> dict[str, Any]:
|
|
||||||
_status, body = self._request("GET", self._repo_path(f"/pulls/{number}"))
|
|
||||||
return body or {}
|
|
||||||
|
|
||||||
|
|
||||||
class GiteaForge(Forge):
|
|
||||||
"""`Forge` over a `GiteaClient`."""
|
|
||||||
|
|
||||||
def __init__(self, client: GiteaClient) -> None:
|
|
||||||
self._client = client
|
|
||||||
|
|
||||||
def read_issue(self, number: int) -> Issue:
|
|
||||||
raw = self._client.get_issue(number)
|
|
||||||
return Issue(
|
|
||||||
number=int(raw.get("number", number)),
|
|
||||||
title=str(raw.get("title", "")),
|
|
||||||
body=str(raw.get("body", "") or ""),
|
|
||||||
state=str(raw.get("state", "")),
|
|
||||||
)
|
|
||||||
|
|
||||||
def read_pr(self, number: int) -> PullRequest:
|
|
||||||
raw = self._client.get_pull(number)
|
|
||||||
return PullRequest(
|
|
||||||
number=int(raw.get("number", number)),
|
|
||||||
title=str(raw.get("title", "")),
|
|
||||||
body=str(raw.get("body", "") or ""),
|
|
||||||
state=str(raw.get("state", "")),
|
|
||||||
merged=bool(raw.get("merged", False)),
|
|
||||||
)
|
|
||||||
|
|
||||||
def read_comments(self, number: int) -> list[Comment]:
|
|
||||||
return [
|
|
||||||
Comment(
|
|
||||||
id=int(c.get("id", 0)),
|
|
||||||
user=str((c.get("user") or {}).get("login", "")),
|
|
||||||
body=str(c.get("body", "") or ""),
|
|
||||||
)
|
|
||||||
for c in self._client.get_comments(number)
|
|
||||||
]
|
|
||||||
|
|
||||||
def post_comment(self, number: int, body: str) -> None:
|
|
||||||
self._client.post_comment(number, body)
|
|
||||||
|
|
||||||
def update_description(self, number: int, body: str) -> None:
|
|
||||||
self._client.patch_issue_body(number, body)
|
|
||||||
|
|
||||||
def is_org_member(self, org: str, username: str) -> bool:
|
|
||||||
return self._client.is_org_member(org, username)
|
|
||||||
|
|
||||||
def get_pr_for_issue(self, number: int) -> int | None:
|
|
||||||
"""Gitea models a PR as an issue with the same number, exposing a
|
|
||||||
`pull_request` object on the issue. When the queried number is
|
|
||||||
itself a PR, return it; otherwise None. (The orchestrator tracks
|
|
||||||
the issue→PR mapping in forge state for the cross-number case.)"""
|
|
||||||
raw = self._client.get_issue(number)
|
|
||||||
if raw.get("pull_request"):
|
|
||||||
return int(raw.get("number", number))
|
|
||||||
return None
|
|
||||||
|
|
||||||
def is_pr_open(self, number: int) -> bool:
|
|
||||||
return self.read_pr(number).state == "open"
|
|
||||||
|
|
||||||
|
|
||||||
def _read_error_body(exc: urllib.error.HTTPError) -> str:
|
|
||||||
try:
|
|
||||||
return exc.read().decode("utf-8", errors="replace")
|
|
||||||
except Exception: # pylint: disable=broad-exception-caught
|
|
||||||
return ""
|
|
||||||
@@ -1,171 +0,0 @@
|
|||||||
"""Forge state persistence (PRD forge-native-integration, chunk 2).
|
|
||||||
|
|
||||||
The orchestrator tracks one record per forge-targeted issue so it can
|
|
||||||
map an incoming webhook back to the bottle handling it, drive the
|
|
||||||
freeze / rehydrate loop, and run the watchdog.
|
|
||||||
|
|
||||||
State is stored in a local SQLite database in `~/.bot-bottle/`. Access
|
|
||||||
goes through the thin `ForgeStateStore` CRUD interface so the backing
|
|
||||||
store (location or engine) can be swapped without touching callers;
|
|
||||||
`SqliteForgeStateStore` is the first implementation.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import abc
|
|
||||||
import json
|
|
||||||
import sqlite3
|
|
||||||
from dataclasses import dataclass, field
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
from ...supervise import bot_bottle_root
|
|
||||||
|
|
||||||
_DB_FILENAME = "bot-bottle.db"
|
|
||||||
|
|
||||||
# Lifecycle: a bottle is launched (running), frozen on the done signal,
|
|
||||||
# and destroyed when the PR closes.
|
|
||||||
STATUS_RUNNING = "running"
|
|
||||||
STATUS_FROZEN = "frozen"
|
|
||||||
STATUS_DESTROYED = "destroyed"
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class ForgeState:
|
|
||||||
"""One forge-targeted issue's bottle lifecycle record."""
|
|
||||||
|
|
||||||
owner: str
|
|
||||||
repo: str
|
|
||||||
issue_number: int
|
|
||||||
slug: str
|
|
||||||
agent_name: str
|
|
||||||
bottle_names: list[str] = field(default_factory=list)
|
|
||||||
backend_name: str = ""
|
|
||||||
agent_git_user: str = ""
|
|
||||||
pr_number: int | None = None
|
|
||||||
status: str = STATUS_RUNNING
|
|
||||||
last_checkin_at: str = ""
|
|
||||||
|
|
||||||
|
|
||||||
class ForgeStateStore(abc.ABC):
|
|
||||||
"""Thin CRUD surface over forge state. Implementations back it with a
|
|
||||||
concrete store; callers depend only on this interface so the storage
|
|
||||||
location/engine is swappable."""
|
|
||||||
|
|
||||||
@abc.abstractmethod
|
|
||||||
def upsert(self, state: ForgeState) -> None:
|
|
||||||
"""Insert or replace the record keyed by (owner, repo, issue)."""
|
|
||||||
|
|
||||||
@abc.abstractmethod
|
|
||||||
def get(self, owner: str, repo: str, issue_number: int) -> ForgeState | None:
|
|
||||||
"""Fetch one record, or None when absent."""
|
|
||||||
|
|
||||||
@abc.abstractmethod
|
|
||||||
def delete(self, owner: str, repo: str, issue_number: int) -> None:
|
|
||||||
"""Remove a record. Missing is success (idempotent)."""
|
|
||||||
|
|
||||||
@abc.abstractmethod
|
|
||||||
def all(self) -> list[ForgeState]:
|
|
||||||
"""Every record, for the status table and the watchdog sweep."""
|
|
||||||
|
|
||||||
|
|
||||||
def default_db_path() -> Path:
|
|
||||||
return bot_bottle_root() / _DB_FILENAME
|
|
||||||
|
|
||||||
|
|
||||||
class SqliteForgeStateStore(ForgeStateStore):
|
|
||||||
"""SQLite-backed `ForgeStateStore`. The database lives at
|
|
||||||
`~/.bot-bottle/bot-bottle.db` by default; pass `db_path` to point at
|
|
||||||
a different location (tests, alternate homes)."""
|
|
||||||
|
|
||||||
def __init__(self, db_path: Path | None = None) -> None:
|
|
||||||
self._db_path = db_path or default_db_path()
|
|
||||||
self._db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
||||||
with self._connect() as conn:
|
|
||||||
conn.execute(
|
|
||||||
"""
|
|
||||||
CREATE TABLE IF NOT EXISTS forge_state (
|
|
||||||
owner TEXT NOT NULL,
|
|
||||||
repo TEXT NOT NULL,
|
|
||||||
issue_number INTEGER NOT NULL,
|
|
||||||
slug TEXT NOT NULL,
|
|
||||||
agent_name TEXT NOT NULL,
|
|
||||||
bottle_names TEXT NOT NULL,
|
|
||||||
backend_name TEXT NOT NULL,
|
|
||||||
agent_git_user TEXT NOT NULL,
|
|
||||||
pr_number INTEGER,
|
|
||||||
status TEXT NOT NULL,
|
|
||||||
last_checkin_at TEXT NOT NULL,
|
|
||||||
PRIMARY KEY (owner, repo, issue_number)
|
|
||||||
)
|
|
||||||
"""
|
|
||||||
)
|
|
||||||
|
|
||||||
def _connect(self) -> sqlite3.Connection:
|
|
||||||
conn = sqlite3.connect(self._db_path)
|
|
||||||
conn.row_factory = sqlite3.Row
|
|
||||||
return conn
|
|
||||||
|
|
||||||
def upsert(self, state: ForgeState) -> None:
|
|
||||||
with self._connect() as conn:
|
|
||||||
conn.execute(
|
|
||||||
"""
|
|
||||||
INSERT OR REPLACE INTO forge_state (
|
|
||||||
owner, repo, issue_number, slug, agent_name,
|
|
||||||
bottle_names, backend_name, agent_git_user,
|
|
||||||
pr_number, status, last_checkin_at
|
|
||||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
||||||
""",
|
|
||||||
(
|
|
||||||
state.owner,
|
|
||||||
state.repo,
|
|
||||||
state.issue_number,
|
|
||||||
state.slug,
|
|
||||||
state.agent_name,
|
|
||||||
json.dumps(state.bottle_names),
|
|
||||||
state.backend_name,
|
|
||||||
state.agent_git_user,
|
|
||||||
state.pr_number,
|
|
||||||
state.status,
|
|
||||||
state.last_checkin_at,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
def get(self, owner: str, repo: str, issue_number: int) -> ForgeState | None:
|
|
||||||
with self._connect() as conn:
|
|
||||||
row = conn.execute(
|
|
||||||
"SELECT * FROM forge_state "
|
|
||||||
"WHERE owner = ? AND repo = ? AND issue_number = ?",
|
|
||||||
(owner, repo, issue_number),
|
|
||||||
).fetchone()
|
|
||||||
return _row_to_state(row) if row is not None else None
|
|
||||||
|
|
||||||
def delete(self, owner: str, repo: str, issue_number: int) -> None:
|
|
||||||
with self._connect() as conn:
|
|
||||||
conn.execute(
|
|
||||||
"DELETE FROM forge_state "
|
|
||||||
"WHERE owner = ? AND repo = ? AND issue_number = ?",
|
|
||||||
(owner, repo, issue_number),
|
|
||||||
)
|
|
||||||
|
|
||||||
def all(self) -> list[ForgeState]:
|
|
||||||
with self._connect() as conn:
|
|
||||||
rows = conn.execute(
|
|
||||||
"SELECT * FROM forge_state ORDER BY owner, repo, issue_number"
|
|
||||||
).fetchall()
|
|
||||||
return [_row_to_state(row) for row in rows]
|
|
||||||
|
|
||||||
|
|
||||||
def _row_to_state(row: sqlite3.Row) -> ForgeState:
|
|
||||||
return ForgeState(
|
|
||||||
owner=row["owner"],
|
|
||||||
repo=row["repo"],
|
|
||||||
issue_number=row["issue_number"],
|
|
||||||
slug=row["slug"],
|
|
||||||
agent_name=row["agent_name"],
|
|
||||||
bottle_names=json.loads(row["bottle_names"]),
|
|
||||||
backend_name=row["backend_name"],
|
|
||||||
agent_git_user=row["agent_git_user"],
|
|
||||||
pr_number=row["pr_number"],
|
|
||||||
status=row["status"],
|
|
||||||
last_checkin_at=row["last_checkin_at"],
|
|
||||||
)
|
|
||||||
@@ -315,9 +315,6 @@ class PiAgentProvider(AgentProvider):
|
|||||||
) -> None:
|
) -> None:
|
||||||
del plan, bottle, supervise_url
|
del plan, bottle, supervise_url
|
||||||
|
|
||||||
def headless_prompt(self, prompt: str) -> list[str]:
|
|
||||||
return ["-p", prompt]
|
|
||||||
|
|
||||||
|
|
||||||
def _exec(bottle: "Bottle", script: str, error: str) -> None:
|
def _exec(bottle: "Bottle", script: str, error: str) -> None:
|
||||||
result = bottle.exec(script, user="root")
|
result = bottle.exec(script, user="root")
|
||||||
|
|||||||
@@ -1,239 +0,0 @@
|
|||||||
# PRD prd-new: Forge native integration
|
|
||||||
|
|
||||||
- **Status:** Draft
|
|
||||||
- **Author:** claude
|
|
||||||
- **Created:** 2026-06-29
|
|
||||||
- **Issue:** #317
|
|
||||||
|
|
||||||
## Summary
|
|
||||||
|
|
||||||
Add a webhook-driven orchestration layer that lets Gitea issues and PR comments drive bot-bottle sessions end-to-end with no operator in the loop for the happy path. An issue assigned to a member of the configured agent org and labelled with an agent name triggers a headless bottle launch; the bottle processes the issue, opens a PR, and interacts with the forge through a **forge sidecar** — the agent never touches the Gitea API or its credentials directly. The agent calls `signal_done(status, summary)` on the sidecar when a work unit is complete; the sidecar relays that to the orchestrator over a queue dir (the same pattern as the supervise sidecar), so completion is an unambiguous in-band signal rather than a comment the orchestrator has to parse. The orchestrator freezes the bottle. Subsequent PR comments rehydrate the frozen bottle. The bottle is destroyed when the PR closes.
|
|
||||||
|
|
||||||
The forge sidecar is backed by a `Forge` abstract class with per-provider implementations (Gitea first), so the agent's prompts and the sidecar protocol stay forge-agnostic. The sidecar logs forge operations semantically ("read PR description", "posted comment", "signalled done"), giving richer provenance than post-hoc egress-byte parsing, and enforces a **read-anywhere / write-scoped** permission model: the agent may read for context but may only write to the issue and PRs it was assigned.
|
|
||||||
|
|
||||||
Run provenance is exposed through a **provenance API** (the sidecar's structured operation log plus the run's metadata), not posted back into the forge. We do not surface a provenance footer in the PR — the audit record lives behind the API where it can be retained and queried, rather than as an editable comment.
|
|
||||||
|
|
||||||
The separation of concerns across the two layers: bot-bottle owns the headless launch primitives, the forge sidecar + `Forge` abstraction, and forge state. `bot-bottle-orchestrator` (separate binary) owns the webhook listener, bottle lifecycle loop, and monitoring dashboard; it calls into bot-bottle via `./cli.py orchestrate`, a thin wrapper command. This PRD covers bot-bottle's side of that contract.
|
|
||||||
|
|
||||||
## Problem
|
|
||||||
|
|
||||||
Today an operator must open the TUI, select an agent and bottle, confirm the preflight, and type prompts interactively. This blocks "issue → PR" automation and produces no durable audit record of what the agent did. The security model already provides the right isolation and egress controls, and `start --headless` (#315) already gives `bot-bottle-orchestrator` a non-interactive launch path. The missing pieces are a headless `resume` counterpart for rehydrating frozen bottles, a forge-interaction surface the agent uses to read context, post comments, and signal completion, and the provenance trail that makes the audit story legible to reviewers on every PR.
|
|
||||||
|
|
||||||
That forge-interaction surface could be built two ways: (2) give the agent the Gitea API directly with cred-proxy injecting the token, or (3) put a forge sidecar between the agent and the forge. This PRD takes **option 3**. The deciding factors: a sidecar `signal_done` call is an unambiguous completion signal where comment-parsing is a correctness risk that surfaces in production; the sidecar produces a semantic audit trail rather than HTTP bytes, which is load-bearing for provenance (the stated product priority); and the sidecar can enforce scope tighter than repo-wide API-key permissions, reducing blast radius for a prompt-injected agent. The costs — a second sidecar process per forge run, a new failure mode if it crashes, and per-forge implementation cost — are accepted as the price of those properties.
|
|
||||||
|
|
||||||
## Goals / Success Criteria
|
|
||||||
|
|
||||||
1. Headless launch already exists: `./cli.py start <agent> --headless --prompt` (#315) runs non-interactively with no TUI selectors or y/N preflight. This PRD builds on it rather than re-introducing it. The remaining gap is a matching headless `resume` path (`./cli.py resume --headless`), since rehydrating a frozen bottle for a new prompt is required by the freeze / rehydrate loop and `resume` has no non-interactive entry point today.
|
|
||||||
2. An issue assigned to a member of the configured org (`FORGE_ORG`, default `bot-bottle`) and labelled `bot-bottle:<agent-name>` is the trigger convention. Org membership is verified via the Gitea API at event time.
|
|
||||||
3. Forge-targeted bottles run a **forge sidecar** that exposes a small, forge-agnostic API (comment/issue/PR CRUD plus `signal_done`) over the same queue-dir + HTTP/JSON-RPC machinery as the supervise sidecar. The agent calls the sidecar; it never sees the forge token or forge-specific endpoints.
|
|
||||||
4. The sidecar is backed by a `Forge` abstract class. Gitea is the first concrete implementation; adding a forge means a new subclass, not changes to the agent prompt or sidecar protocol. The sidecar enforces a read-anywhere / write-scoped model: writes are limited to the assigned issue and its PRs; reads are unrestricted for context.
|
|
||||||
5. The agent calls `signal_done(status, summary)` on the sidecar when a work unit is complete; the sidecar relays it to the orchestrator over a queue dir. This is the done signal — no comment parsing. A watchdog timeout (configurable, default 30 min) causes the orchestrator to treat the run as done-without-self-report if the agent exits without signalling.
|
|
||||||
6. Run provenance (agent name, bottle name(s), slug, timing, exit code, gitleaks result, egress summary, and the sidecar's semantic operation log) is available through a provenance API. It is **not** surfaced as a PR footer or any other forge comment.
|
|
||||||
7. Forge state (issue → slug, status) is persisted in a local SQLite database under `~/.bot-bottle/` and survives orchestrator restarts.
|
|
||||||
8. `./cli.py orchestrate status` lists active forge-managed bottles and their issue/PR URLs.
|
|
||||||
9. Unit tests cover: label parsing, org-membership check path, forge state store CRUD (SQLite), headless launch arg construction, forge env var injection, sidecar request dispatch through the `Forge` abstraction, write-scope enforcement (reject writes outside the assigned issue/PRs), and `signal_done` queue relay.
|
|
||||||
|
|
||||||
## Non-goals
|
|
||||||
|
|
||||||
- Webhook signature verification (HMAC-SHA256). Added as a follow-up.
|
|
||||||
- The `bot-bottle-orchestrator` binary itself — this PRD covers bot-bottle's side of the interface only. The orchestrator is a separate project.
|
|
||||||
- GitHub or GitLab support.
|
|
||||||
- Multiple simultaneous forge bottles per issue.
|
|
||||||
- Automatic retry on agent error exit.
|
|
||||||
- Bottle destruction on issue close (PR close only; issue close is ambiguous).
|
|
||||||
- Concurrent multi-issue handling (one blocking run per orchestrator process).
|
|
||||||
- A monitoring dashboard (orchestrator-side concern).
|
|
||||||
- Folding `DeployKeyProvisioner` into the `Forge` abstraction. Deploy-key provisioning runs at bottle-provision time on the host; the forge sidecar runs inside the bottle at agent time. The two have different lifecycles and actors, so coupling them into one class is deferred to a follow-up. This PRD only shares the Gitea HTTP client between them.
|
|
||||||
|
|
||||||
## Design
|
|
||||||
|
|
||||||
### Targeting convention
|
|
||||||
|
|
||||||
An issue is forge-targeted when **both** hold:
|
|
||||||
|
|
||||||
- At least one assignee is a member of the Gitea org named by `FORGE_ORG` (default `bot-bottle`). Checked via `GET /api/v1/orgs/{org}/members/{user}`.
|
|
||||||
- At least one label has the prefix `bot-bottle:`. The suffix names the agent manifest, e.g. `bot-bottle:implementer` → agent `implementer`.
|
|
||||||
|
|
||||||
`FORGE_ORG` is read at orchestrate-command startup. It is not embedded in manifests or state files; the orchestrator stamps its value into log output for auditability.
|
|
||||||
|
|
||||||
An optional label `bot-bottle-bottle:<name>` overrides bottle selection. When absent the agent's default bottle is used.
|
|
||||||
|
|
||||||
### `./cli.py orchestrate` — the thin wrapper
|
|
||||||
|
|
||||||
```
|
|
||||||
./cli.py orchestrate start --agent AGENT [--bottle BOTTLE ...] --prompt PROMPT
|
|
||||||
[--label LABEL] [--backend BACKEND]
|
|
||||||
./cli.py orchestrate resume --slug SLUG --prompt PROMPT [--backend BACKEND]
|
|
||||||
./cli.py orchestrate status
|
|
||||||
```
|
|
||||||
|
|
||||||
`orchestrate start` is a thin shim over the already-shipped `start --headless` (#315): it forwards agent / bottle / label / prompt and adds the forge-specific wiring (`forge_env`, sidecar launch). It does not re-implement headless launch. The caller (`bot-bottle-orchestrator`) manages freeze, state, and the forge sidecar's done signal around it.
|
|
||||||
|
|
||||||
`orchestrate resume` is the shim over the new `resume --headless` (below).
|
|
||||||
|
|
||||||
`orchestrate status` prints the forge state table.
|
|
||||||
|
|
||||||
### Headless primitives — what exists vs. what's new
|
|
||||||
|
|
||||||
Headless **start** already shipped in #315 and this PRD reuses it as-is:
|
|
||||||
|
|
||||||
- `./cli.py start <agent> --headless --prompt TEXT` — no TUI selectors, no y/N preflight. Internally `_start_headless()` calls the shared `_launch_bottle()` with `assume_yes=True` and `headless_prompt_text=prompt`.
|
|
||||||
- The prompt is delivered through `AgentProvider.headless_prompt(prompt)` — claude `-p`, codex positional, pi `-p`. The orchestrator does **not** hand-roll agent args; it relies on this provider abstraction. (An earlier draft proposed `start_headless` / `attach_agent_headless` helpers that constructed `--no-interactive`/`-p` directly — those are dropped as redundant with, and divergent from, what #315 merged.)
|
|
||||||
|
|
||||||
Two additions are needed on top of #315:
|
|
||||||
|
|
||||||
**1. A `forge_env` hook on the headless launch path.** The orchestrator needs to pass forge context + token through to the forge sidecar launched alongside the agent. This is a parameter threaded into `_launch_bottle` (the same core `start --headless` already uses), not a parallel launch function. The agent process itself does not receive the token.
|
|
||||||
|
|
||||||
**2. `resume --headless`** — new in `bot_bottle/cli/resume.py`, mirroring the `--headless` flag on `start`:
|
|
||||||
|
|
||||||
```
|
|
||||||
./cli.py resume <slug> --headless --prompt TEXT
|
|
||||||
```
|
|
||||||
|
|
||||||
It rehydrates a frozen bottle and runs one headless prompt via the same `assume_yes` + `headless_prompt` path, returning the agent's exit code. `resume` has no non-interactive entry point today, so this is genuinely new work rather than a rename of an existing helper.
|
|
||||||
|
|
||||||
### Forge sidecar
|
|
||||||
|
|
||||||
Forge-targeted bottles run a forge sidecar alongside the agent, mirroring the supervise sidecar: a per-bottle process that exposes an HTTP/JSON-RPC endpoint over a Unix socket and relays events to the orchestrator through a queue dir. The agent calls the sidecar; the sidecar holds the forge token and makes the actual forge API calls. The agent never receives the credential and never sees a forge-specific endpoint — swapping Gitea for another forge does not change the agent prompt or the sidecar protocol.
|
|
||||||
|
|
||||||
The sidecar is configured at launch from the forge context (owner, repo, issue, PR) and the token, supplied by the orchestrator — not baked into the agent manifest. Because the sidecar owns the token, forge traffic does not need a cred-proxy egress route on the agent; the agent's egress policy is unchanged by forge targeting.
|
|
||||||
|
|
||||||
**Sidecar protocol** (forge-agnostic; each method maps to a `Forge` call):
|
|
||||||
|
|
||||||
| Method | Scope | Purpose |
|
|
||||||
|---|---|---|
|
|
||||||
| `read_issue(number)` | read-anywhere | Read an issue body for context |
|
|
||||||
| `read_pr(number)` | read-anywhere | Read a PR (incl. merge state) for context |
|
|
||||||
| `read_comments(number)` | read-anywhere | Read a thread for context |
|
|
||||||
| `post_comment(number, body)` | write-scoped | Post to the assigned issue/PR |
|
|
||||||
| `update_description(number, body)` | write-scoped | Edit the assigned issue/PR body |
|
|
||||||
| `signal_done(status, summary)` | — | Relay completion to the orchestrator |
|
|
||||||
|
|
||||||
Issues and PRs are distinct domain objects (`Issue` vs `PullRequest`) read through distinct methods; a PR carries merge state an issue does not.
|
|
||||||
|
|
||||||
**Scope enforcement** is read-anywhere / write-scoped: read methods accept any issue/PR number for context; write methods are rejected unless the target is the assigned issue or one of its PRs. This is tighter than Gitea's repo-wide API-key permissions and bounds the blast radius of a prompt-injected agent. Rejections are logged semantically (operation, target, reason) so the audit trail records attempted out-of-scope writes, not just allowed ones.
|
|
||||||
|
|
||||||
**Semantic audit**: every sidecar call is logged as a structured operation ("read PR #318 description", "posted comment to #317", "signalled done: success") rather than as opaque HTTP bytes. This log feeds provenance directly, with no post-hoc egress-log parsing.
|
|
||||||
|
|
||||||
### `Forge` abstraction — `bot_bottle/contrib/forge/`
|
|
||||||
|
|
||||||
The sidecar dispatches to a `Forge` abstract class. Each provider implements the operations behind the sidecar protocol:
|
|
||||||
|
|
||||||
```python
|
|
||||||
class Forge(abc.ABC):
|
|
||||||
@abc.abstractmethod
|
|
||||||
def read_issue(self, number: int) -> Issue: ...
|
|
||||||
@abc.abstractmethod
|
|
||||||
def read_pr(self, number: int) -> PullRequest: ...
|
|
||||||
@abc.abstractmethod
|
|
||||||
def read_comments(self, number: int) -> list[Comment]: ...
|
|
||||||
@abc.abstractmethod
|
|
||||||
def post_comment(self, number: int, body: str) -> None: ...
|
|
||||||
@abc.abstractmethod
|
|
||||||
def update_description(self, number: int, body: str) -> None: ...
|
|
||||||
@abc.abstractmethod
|
|
||||||
def is_org_member(self, org: str, username: str) -> bool: ...
|
|
||||||
@abc.abstractmethod
|
|
||||||
def get_pr_for_issue(self, number: int) -> int | None: ...
|
|
||||||
@abc.abstractmethod
|
|
||||||
def is_pr_open(self, number: int) -> bool: ...
|
|
||||||
```
|
|
||||||
|
|
||||||
`Issue` and `PullRequest` are separate frozen dataclasses — a PR adds `merged`. `ScopedForge` wraps a concrete `Forge` to enforce the read-anywhere / write-scoped model (`post_comment` / `update_description` raise `ForgeScopeError` outside the assigned issue and PRs).
|
|
||||||
|
|
||||||
`GiteaForge` is the first and only concrete implementation in this PRD. It wraps the Gitea HTTP client (below). Adding GitHub or GitLab later is a new subclass; the sidecar, protocol, and agent prompt are untouched.
|
|
||||||
|
|
||||||
> **Deferred:** `DeployKeyProvisioner` is *not* folded into `Forge` here. Deploy-key provisioning runs on the host at provision time; the sidecar runs in the bottle at agent time. They have different lifecycles and actors, so a shared abstract base would couple two unrelated auth contexts. For now they only share the Gitea HTTP client; a later PRD can revisit unification.
|
|
||||||
|
|
||||||
### Forge env vars
|
|
||||||
|
|
||||||
The orchestrator passes forge context to the **sidecar** (not the agent) at launch. The agent does not need owner/repo/issue env vars to construct API calls, since it only names issue/PR numbers to the sidecar:
|
|
||||||
|
|
||||||
| Var | Example | Purpose |
|
|
||||||
|---|---|---|
|
|
||||||
| `FORGE_GITEA_API` | `https://gitea.dideric.is/api/v1` | Base URL the sidecar calls |
|
|
||||||
| `FORGE_OWNER` | `didericis` | Repo owner |
|
|
||||||
| `FORGE_REPO` | `bot-bottle` | Repo name |
|
|
||||||
| `FORGE_ISSUE_NUMBER` | `317` | Assigned issue (defines write scope) |
|
|
||||||
| `FORGE_PR_NUMBER` | `318` | Assigned PR (empty until PR exists) |
|
|
||||||
|
|
||||||
The agent's forge-specific prompt instructs it to call `signal_done` on the sidecar when a work unit is complete, and to use the sidecar for any comment/description writes. The instruction is forge-agnostic and is part of the forge prompt overlay, not the base agent manifest, so non-forge runs are unaffected.
|
|
||||||
|
|
||||||
### Done signal and watchdog
|
|
||||||
|
|
||||||
The agent calls `signal_done(status, summary)` on the sidecar when it finishes a work unit. The sidecar writes the event to its queue dir; the orchestrator reads it and:
|
|
||||||
|
|
||||||
1. Reads the forge state for `(owner, repo, issue_number)`.
|
|
||||||
2. If `status == "running"`, treats the event as the done signal: freezes the bottle and sets `status = "frozen"`. Provenance is recorded via the provenance API — no comment is posted to the forge.
|
|
||||||
|
|
||||||
Because completion is an explicit `signal_done` call, the orchestrator does not parse comment text to detect "done", and intermediate comments the agent posts mid-run cannot be mistaken for completion.
|
|
||||||
|
|
||||||
**Watchdog**: the orchestrator tracks `last_checkin_at` in forge state, updated on each sidecar event. A background thread wakes every minute. If `now - last_checkin_at > FORGE_WATCHDOG_TIMEOUT` (default 30 min, configurable via env) and `status == "running"`, the orchestrator treats the run as done-without-self-report and freezes the bottle, flagging the run as incomplete in the provenance record.
|
|
||||||
|
|
||||||
**Sidecar-death failure mode**: if the forge sidecar crashes mid-run the agent loses forge access while the bottle is otherwise healthy. The orchestrator detects a dead sidecar (socket/queue gone) the same way it detects a stalled agent and falls back to the watchdog path.
|
|
||||||
|
|
||||||
### Forge state — `bot_bottle/contrib/gitea/forge_state.py`
|
|
||||||
|
|
||||||
State is stored in a local SQLite database at `~/.bot-bottle/bot-bottle.db`. Access goes through a thin CRUD interface, `ForgeStateStore`, so the storage location/engine can be swapped without touching callers. `SqliteForgeStateStore` is the first implementation.
|
|
||||||
|
|
||||||
The `forge_state` table is keyed by `(owner, repo, issue_number)` and carries: `slug`, `agent_name`, `bottle_names` (JSON), `backend_name`, `agent_git_user`, `pr_number` (nullable), `status`, `last_checkin_at`.
|
|
||||||
|
|
||||||
`status`: `"running"` | `"frozen"` | `"destroyed"`.
|
|
||||||
|
|
||||||
Store interface:
|
|
||||||
|
|
||||||
```python
|
|
||||||
class ForgeStateStore(abc.ABC):
|
|
||||||
def upsert(self, state: ForgeState) -> None: ...
|
|
||||||
def get(self, owner: str, repo: str, issue_number: int) -> ForgeState | None: ...
|
|
||||||
def delete(self, owner: str, repo: str, issue_number: int) -> None: ...
|
|
||||||
def all(self) -> list[ForgeState]: ...
|
|
||||||
|
|
||||||
class SqliteForgeStateStore(ForgeStateStore):
|
|
||||||
def __init__(self, db_path: Path | None = None) -> None: ...
|
|
||||||
```
|
|
||||||
|
|
||||||
`upsert` uses `INSERT OR REPLACE` so a re-run for the same issue overwrites in place. The schema is created on first open.
|
|
||||||
|
|
||||||
### Provenance API
|
|
||||||
|
|
||||||
Run provenance — agent, bottle(s), slug, timing, exit code, gitleaks result, egress summary, watchdog-fired flag, and the sidecar's semantic operation log — is exposed through a **provenance API**, not posted into the forge. There is no provenance footer or run-summary comment.
|
|
||||||
|
|
||||||
The rationale (per the monetization positioning): a PR comment is mutable by any maintainer, unsigned, and per-PR, so it is worthless as an audit record and invites false trust. The authoritative record therefore lives behind the API, where it can be retained, queried, and (eventually) signed. Whether any projection of it ever appears in the forge is a separate, out-of-scope decision; this PR does not build one.
|
|
||||||
|
|
||||||
The API surface itself (schema, transport, signing, retention) is **out of scope for this PRD** and belongs with the orchestrator / control-plane work. bot-bottle here only produces the raw material: the sidecar's semantic operation log and the run metadata the orchestrator collects.
|
|
||||||
|
|
||||||
### Gitea HTTP client — `bot_bottle/contrib/gitea/client.py`
|
|
||||||
|
|
||||||
`GiteaForge` (and the existing `GiteaDeployKeyProvisioner`) share one thin HTTP client. Unlike the option-2 design, the token is held by the sidecar process and passed to the client directly — there is no agent-side cred-proxy route to inject it, because the agent never makes forge calls.
|
|
||||||
|
|
||||||
```python
|
|
||||||
class GiteaClient:
|
|
||||||
def __init__(self, *, api_url: str, owner: str, repo: str, token: str) -> None: ...
|
|
||||||
def is_org_member(self, org: str, username: str) -> bool: ...
|
|
||||||
def get_issue(self, number: int) -> dict: ...
|
|
||||||
def get_comments(self, number: int) -> list[dict]: ...
|
|
||||||
def post_comment(self, number: int, body: str) -> None: ...
|
|
||||||
def patch_issue_body(self, number: int, body: str) -> None: ...
|
|
||||||
def get_pull(self, number: int) -> dict: ...
|
|
||||||
```
|
|
||||||
|
|
||||||
`GiteaForge` adapts this client to the `Forge` surface (mapping raw JSON to `Issue` / `PullRequest` / `Comment`). Sharing only the HTTP client (not an abstract base) is the deliberate boundary between the sidecar and the deploy-key provisioner — see the deferral note under the `Forge` abstraction.
|
|
||||||
|
|
||||||
### Implementation chunks
|
|
||||||
|
|
||||||
1. **Headless additions on top of #315** — thread a `forge_env` parameter into the existing `_launch_bottle` core (the one `start --headless` already uses); add a `--headless` path to `cli/resume.py` reusing `assume_yes` + `headless_prompt`. No new `start_headless`/`attach_agent_headless` helpers. Tests: `forge_env` reaches the sidecar/`guest_env`; `resume --headless` skips the TUI and y/N preflight and returns the agent exit code.
|
|
||||||
|
|
||||||
2. **Forge state** — `contrib/gitea/forge_state.py`: `ForgeState` dataclass, `ForgeStateStore` CRUD interface, `SqliteForgeStateStore`. Tests: round-trip, missing → None, `INSERT OR REPLACE` upsert, delete idempotent, `all()` ordering, persistence across store instances.
|
|
||||||
|
|
||||||
3. **`Forge` abstraction + Gitea client** — `contrib/forge/base.py` (`Forge` ABC, `ScopedForge`, `Issue` / `PullRequest` / `Comment`) and `contrib/gitea/client.py` + `GiteaForge`: `is_org_member`, `read_issue`, `read_pr`, `read_comments`, `post_comment`, `update_description`, `get_pr_for_issue`, `is_pr_open`. Tests: mock `urllib.request.urlopen`, assert payloads and 404-as-false for membership; `ScopedForge` write-scope enforcement.
|
|
||||||
|
|
||||||
4. **Forge sidecar** — sidecar process exposing the protocol over a Unix socket, queue-dir relay, write-scope enforcement, semantic op log, `signal_done`. Reuses the supervise sidecar bundle machinery. Tests: dispatch each method to the `Forge`, reject out-of-scope writes, `signal_done` writes a queue event, scope-rejection is logged.
|
|
||||||
|
|
||||||
5. **`./cli.py orchestrate`** — `cli/orchestrate.py` with `start`, `resume`, `status` subcommands wired into `cli.py`; `start` launches the forge sidecar alongside the agent for forge-targeted runs. Tests: arg parsing, `start` delegates to `start --headless`, `resume` delegates to `resume --headless`.
|
|
||||||
|
|
||||||
## Provenance
|
|
||||||
|
|
||||||
Run provenance is captured (sidecar semantic operation log + run metadata) and exposed through a provenance API. It is deliberately **not** surfaced in the forge — no footer, no run-summary comment. A mutable, unsigned PR comment is not an audit record; the authoritative record lives behind the API where it can be retained and signed. The `watchdog_fired` flag marks runs where the agent did not self-report completion so consumers of the API know the record may be incomplete.
|
|
||||||
|
|
||||||
The provenance API's schema, transport, signing, and retention are out of scope for this PRD (control-plane work); bot-bottle here produces the raw material only.
|
|
||||||
@@ -0,0 +1,490 @@
|
|||||||
|
# Monetization & competitive positioning
|
||||||
|
|
||||||
|
Where, if anywhere, bot-bottle has a paid wedge — given a 2026
|
||||||
|
competitive field that has largely commoditized "sandbox a coding
|
||||||
|
agent." Folds together the agent-provider-agnostic framing, the Fly
|
||||||
|
remote-backend idea, the supervisor/egress-audit play, and the
|
||||||
|
solo-dev/Linux brand instinct, then asks the only question that
|
||||||
|
matters: is there a viable path to revenue that the competition does
|
||||||
|
not already foreclose?
|
||||||
|
|
||||||
|
Companion to
|
||||||
|
[`agent-sandbox-landscape.md`](agent-sandbox-landscape.md) (the
|
||||||
|
isolation-tech survey),
|
||||||
|
[`built-in-supervisor-design.md`](built-in-supervisor-design.md) (the
|
||||||
|
supervise surface this would extend), and
|
||||||
|
[`secret-minimization-over-dlp.md`](secret-minimization-over-dlp.md)
|
||||||
|
(why custody, not detection, is the real moat).
|
||||||
|
|
||||||
|
Market data current as of June 2026.
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
**Verdict: a path exists, but it is narrow, and it is not the path the
|
||||||
|
project is currently shaped for.** Every individual property bot-bottle
|
||||||
|
leans on — isolation, BYO-image, egress filtering, OSS, self-hosting —
|
||||||
|
is matched by some competitor, and several are now *free* from the agent
|
||||||
|
vendors themselves. There is exactly one defensible position left: the
|
||||||
|
**bundle** that no single competitor occupies —
|
||||||
|
|
||||||
|
> uniform egress audit + secret custody + policy, across *heterogeneous
|
||||||
|
> coding agents you don't trust*, on your infra or a managed pool.
|
||||||
|
|
||||||
|
Monetization is viable **only** if the product is sold as cross-vendor
|
||||||
|
**fleet governance + egress audit for teams**, not as solo-dev agent
|
||||||
|
safety (which the labs give away free). The solo-dev/Linux/anti-corporate
|
||||||
|
energy is real and worth using — but as a *distribution and trust*
|
||||||
|
engine that drives bottom-up adoption into teams, never as the revenue
|
||||||
|
positioning itself. Get those two wires crossed and the business dies:
|
||||||
|
you'd be courting the lowest-willingness-to-pay audience on earth while
|
||||||
|
repelling the only buyer who pays.
|
||||||
|
|
||||||
|
Net: **viable, conditional, and unforgiving of positioning error.** Do
|
||||||
|
Phase 1 (self-hostable egress-audit dashboard) regardless — it's
|
||||||
|
low-risk and it's the demo that makes everything else legible. Gate the
|
||||||
|
go/no-go on whether 5–10 teams confirm they'd pay for cross-vendor
|
||||||
|
egress audit *before* building the hosted tier.
|
||||||
|
|
||||||
|
## The two axes of "agnostic"
|
||||||
|
|
||||||
|
bot-bottle differentiates on two orthogonal axes, and conflating them
|
||||||
|
muddies the pitch:
|
||||||
|
|
||||||
|
1. **Agent-provider agnostic** — run Claude Code, Codex, Aider, a local
|
||||||
|
model, behind one control layer. Already real in the code
|
||||||
|
(`agent_provider.py`, Claude/Codex templates, BYO Dockerfile). This
|
||||||
|
is the axis the labs *structurally cannot* match — Anthropic only
|
||||||
|
runs Claude, OpenAI only their models. Durable.
|
||||||
|
2. **Compute backend** — local (docker / Apple Container / smolmachines)
|
||||||
|
today; a remote **Fly** backend would add a managed pool. This is the
|
||||||
|
axis that makes "fleet" literal for orgs and opens metered billing.
|
||||||
|
Fly is a strong first remote backend because it also subsumes remote
|
||||||
|
spin-up (Machines API) and the tunnel problem (6PN/WireGuard) — but
|
||||||
|
"provider-agnostic compute" should be *earned* after backend #2, not
|
||||||
|
designed up front (premature generalization trap).
|
||||||
|
|
||||||
|
## Competitive field, by capability
|
||||||
|
|
||||||
|
The field doesn't have one competitor; it has a different set on each
|
||||||
|
capability bot-bottle touches. Five dimensions:
|
||||||
|
|
||||||
|
| Capability | Who has it | bot-bottle's standing |
|
||||||
|
| :-- | :-- | :-- |
|
||||||
|
| **Isolation / sandbox** | Anthropic & OpenAI **native, free**; OSS devcontainer wrappers; E2B/Modal/Daytona/Northflank | Commoditized. Not a wedge. |
|
||||||
|
| **Arbitrary BYO Docker image** | Sandbox PaaS (E2B/Modal/Daytona/Northflank) yes; **managed agents: ~none** (Codex = fixed `codex-universal` + setup scripts; Copilot "not supported"; Devin/Jules constrained) | Wedge **vs. managed agents** (structural: it's their infra). Table stakes vs. PaaS. |
|
||||||
|
| **Egress audit + alerts** | LLM-observability tools (Braintrust/Langfuse/Phoenix/Helicone/Datadog) — but on *model calls*, wrong layer. Network-egress security (DeepInspect, AI gateways) — right layer, but decoupled from the agent, not cross-vendor. Sandbox PaaS = gateway/filter, not an audit surface. | **~Nobody in bot-bottle's exact shape** (per-agent egress, tied to the sandbox, with DLP context, cross-vendor). This is the wedge. |
|
||||||
|
| **OSS / self-hosting** | Managed agents: ~none. Sandbox PaaS: ~half (E2B OSS+self-host; Northflank BYOC; Modal closed; **Daytona leaving OSS**). Devcontainer wrappers: ~all. Observability: several. | Real wedge **vs. managed agents only**. Table stakes vs. PaaS, zero differentiation vs. wrappers. |
|
||||||
|
| **Cross-vendor uniformity** | Nobody — the labs won't, PaaS is agent-neutral infra not agent-aware control, wrappers are single-tool | Wedge. The connective tissue of the whole position. |
|
||||||
|
|
||||||
|
The pattern: **isolation and OSS/self-host are commodity; BYO-image and
|
||||||
|
cross-vendor are wedges only against the managed agents; egress-audit in
|
||||||
|
the integrated form is the one thing genuinely unoccupied.**
|
||||||
|
|
||||||
|
## Where bot-bottle is alone vs. where it's table stakes
|
||||||
|
|
||||||
|
- **Alone (the moat):** egress audit + secret custody + policy, *tied to
|
||||||
|
the agent sandbox*, *with DLP context* (which secret, which host,
|
||||||
|
which agent/task), *uniform across vendors*. No competitor bundles
|
||||||
|
these. An enterprise *could* bolt DeepInspect-style egress monitoring
|
||||||
|
onto a sandbox, so the defensibility is the **integration and
|
||||||
|
per-agent context**, not "we can see egress."
|
||||||
|
- **Table stakes (do not lead with these):** "we sandbox agents" (free
|
||||||
|
from the labs), "we're open source" (E2B is; the wrapper crowd all
|
||||||
|
is), "we self-host" (Northflank BYOC, E2B, every wrapper).
|
||||||
|
|
||||||
|
## The two existential competitive facts
|
||||||
|
|
||||||
|
1. **The agent vendors ship good-enough sandboxing for free.** Claude
|
||||||
|
Code now has Seatbelt/bubblewrap + a network proxy natively; Codex
|
||||||
|
has its own sandbox + approvals. This compresses the *single-vendor,
|
||||||
|
single-dev* market to ~zero willingness-to-pay. It is *why* the
|
||||||
|
product must be cross-vendor fleet governance, not local agent
|
||||||
|
safety.
|
||||||
|
2. **Northflank is converging from the infra side.** It already ships
|
||||||
|
dedicated egress gateways + proxy-based secret injection + BYOC.
|
||||||
|
It is the nearest thing to bot-bottle's differentiator as a managed
|
||||||
|
platform — but infra-first and agent-neutral, not agent-aware,
|
||||||
|
cross-vendor, or audit-first. Watch it.
|
||||||
|
|
||||||
|
## Monetization path (sequenced)
|
||||||
|
|
||||||
|
Open-core: **give away the sandbox, charge for the control plane.**
|
||||||
|
|
||||||
|
- **Phase 0 — validate (1–2 wks, parallel).** Ask 5–10 teams running 2+
|
||||||
|
agents: would you pay for one egress-audit + policy plane across
|
||||||
|
Claude *and* Codex? Gate the rest on a yes.
|
||||||
|
- **Phase 1 — the wedge (self-hostable, OSS).** Multi-bottle egress
|
||||||
|
dashboard + web approval queue + exportable audit log, built over the
|
||||||
|
existing `supervise_server.py` JSON-RPC and the egress event levels
|
||||||
|
(`LOG_BLOCKS` / `LOG_FULL`). Low risk, half-built, and the 30-second
|
||||||
|
demo that sells everything. The compliance hook (75% of enterprises
|
||||||
|
rank auditability #1) lives here.
|
||||||
|
- **Phase 2 — the paywall (hosted team tier).** Multi-tenant supervisor:
|
||||||
|
SSO/RBAC, audit retention, alerting, **centralized policy push**
|
||||||
|
(define egress allowlist + DLP once, enforce across all agents —
|
||||||
|
the moat made concrete). Gate on team/compliance features, *never* on
|
||||||
|
the core security.
|
||||||
|
- **Phase 3 — Fly remote backend.** Managed agent pool → "fleet" becomes
|
||||||
|
literal; metered (agent-hours) billing; subsumes remote spin-up +
|
||||||
|
tunnel.
|
||||||
|
- **Phase 4 — deepen.** Second agent provider done deeply (lean
|
||||||
|
open-source/open-weight for rug-pull resistance); egress anomaly
|
||||||
|
detection (the DLP stream becomes a product); SOC2/audit-export for
|
||||||
|
larger buyers.
|
||||||
|
|
||||||
|
**Do not build first:** the p2p mobile app (least monetizable, 6PN
|
||||||
|
gives the tunnel free), a generic multi-cloud abstraction (premature),
|
||||||
|
or the hosted SaaS before Phase 0.
|
||||||
|
|
||||||
|
## Brand vs. revenue: the solo-dev / Linux instinct
|
||||||
|
|
||||||
|
The instinct to court Linux/hacker/solo-dev users and stay "not too
|
||||||
|
corporate" is **right for distribution, dangerous as strategy.**
|
||||||
|
|
||||||
|
- **Right:** it's how OSS infra gets discovered and trusted (HN, stars,
|
||||||
|
word-of-mouth, security-circle vouching); authenticity is a real moat
|
||||||
|
vs. the corporate players *because the architecture sincerely embodies
|
||||||
|
it* (local-first, `$HOME` trust boundary, no phone-home); and it fits
|
||||||
|
the founder.
|
||||||
|
- **Dangerous:** that audience is the lowest-WTP cohort that exists
|
||||||
|
(self-hosts the free thing, forks rather than pays), and "not too
|
||||||
|
corporate" reads to a VP of Eng as "not enterprise-ready." Building an
|
||||||
|
anti-SaaS brand and then shipping a paid tier invites the sell-out /
|
||||||
|
rug-pull backlash — which **Daytona just triggered** going closed.
|
||||||
|
|
||||||
|
**Resolution — be Tailscale, not a manifesto.** Use the developer-first,
|
||||||
|
respects-you energy as the *funnel*; sell *through* the solo advocate,
|
||||||
|
bottom-up, into the team that pays. Two guardrails:
|
||||||
|
|
||||||
|
1. "Anti-corporate" must not mean "anti-team-features." SSO/RBAC/audit
|
||||||
|
retention *are* the monetization; build them in a developer-respecting
|
||||||
|
way (Tailscale has SSO and is still beloved). Tone is the brand; team
|
||||||
|
features are the product.
|
||||||
|
2. Set the open-core social contract publicly **on day one** — core
|
||||||
|
sandbox open and self-hostable forever; hosted control plane is how
|
||||||
|
the lights stay on. The communities that don't revolt are the ones
|
||||||
|
told the deal upfront.
|
||||||
|
|
||||||
|
Concrete: the README frames the Docker/**Linux** backend as "legacy."
|
||||||
|
If courting the Linux crowd, make the Linux path (Docker+gVisor,
|
||||||
|
libkrun/smolmachines) first-class in the docs, not the fallback.
|
||||||
|
|
||||||
|
## Individuals, mobile, and the Pi-ecosystem reality check
|
||||||
|
|
||||||
|
"Individual devs won't pay" (above) is too blunt and needs refining.
|
||||||
|
The accurate claim: individuals won't pay for **safety-as-insurance**
|
||||||
|
(abstract risk reduction the labs give away free), but they *do* pay for
|
||||||
|
**capability/convenience felt daily** — Claude Pro, Cursor, Tailscale
|
||||||
|
Personal. "Drive my self-hosted agent from my phone" is capability, not
|
||||||
|
insurance, so it has a real (low-priced, high-churn) WTP profile. The
|
||||||
|
self-hoster/Linux crowd specifically pays for **sovereignty/control**,
|
||||||
|
just not for enterprise insurance. So an individual "sovereign remote
|
||||||
|
agent access" tier is *not* unreasonable in principle.
|
||||||
|
|
||||||
|
**But the market has already run that experiment, in public, for free.**
|
||||||
|
The Pi ecosystem (pi.dev) has commoditized every convenience layer an
|
||||||
|
individual product would charge for:
|
||||||
|
|
||||||
|
| Capability | Already free/OSS | bot-bottle differentiates? |
|
||||||
|
| :-- | :-- | :-- |
|
||||||
|
| Remote control from mobile | remote-pi, Paseo, TelePi | ❌ commoditized |
|
||||||
|
| Multi-agent orchestration from mobile | Paseo, pi-agent-dashboard | ❌ commoditized |
|
||||||
|
| **Launch** new agents from mobile | Paseo (`paseo run`) | ❌ commoditized |
|
||||||
|
| Launch into a **sandboxed, egress-audited** env | nobody | ✅ the moat |
|
||||||
|
|
||||||
|
Paseo (`getpaseo/paseo`, on the App Store) does the full thing an
|
||||||
|
individual remote-control tier would charge for — launch *and* attach
|
||||||
|
agents on a laptop/VM/dev-server, driven from mobile over an E2E relay —
|
||||||
|
free and open source. It *orchestrates* agents; it does **not** sandbox them, run
|
||||||
|
an egress chokepoint, DLP-scan, or audit. None of the Pi-ecosystem tools
|
||||||
|
do. So the residue, yet again, is **isolation + governance**, not
|
||||||
|
remote/launch convenience.
|
||||||
|
|
||||||
|
Two takeaways:
|
||||||
|
1. **Don't compete on orchestration/launch/remote UX** — it's a solved,
|
||||||
|
free, fast-moving, App-Store-shipping space around Pi. You won't win
|
||||||
|
it and it isn't the moat.
|
||||||
|
2. **Be the safe runtime orchestrators launch *into*.** Launch-from-mobile
|
||||||
|
is table stakes; *launch-into-a-sealed-egress-audited-bottle* is the
|
||||||
|
differentiator. bot-bottle is the sandbox an orchestrator like Paseo
|
||||||
|
would target, or that you wrap thin orchestration around — never the
|
||||||
|
orchestrator itself.
|
||||||
|
|
||||||
|
Capability layers commoditize fast: every individual/mobile angle
|
||||||
|
probed in this analysis collapsed back to the same cross-vendor +
|
||||||
|
sandbox + egress-audit + custody bundle. Mobile remote belongs as a
|
||||||
|
*funnel delighter* on top of the team product, not a standalone paid
|
||||||
|
line.
|
||||||
|
|
||||||
|
## Forge-native orchestration as the delivery vehicle
|
||||||
|
|
||||||
|
The strongest concrete *product shape* for the moat is not a bespoke
|
||||||
|
dashboard and not a Paseo competitor — it is **the git forge as the
|
||||||
|
orchestrator, with bot-bottle as the safe runtime it launches into.**
|
||||||
|
The forge already provides, for free, everything an orchestrator would
|
||||||
|
otherwise have to build: identity (agent/bot users, signed commits),
|
||||||
|
state (issues, labels, PRs/MRs, comments), triggers (webhooks, CI,
|
||||||
|
comment commands), review (diffs, approvals, status checks), audit
|
||||||
|
(commits/comments/reviews), and permissions (repo access, protected
|
||||||
|
branches, token scopes). bot-bottle supplies the one thing the forge
|
||||||
|
doesn't: **least-privilege, secret-isolated, audited execution of
|
||||||
|
untrusted agents.** Same moat (custody + audit + policy), better
|
||||||
|
vehicle — and it lands the product where teams already live, so it
|
||||||
|
avoids building an agent dashboard before one is needed.
|
||||||
|
|
||||||
|
The flow is essentially free to assemble:
|
||||||
|
|
||||||
|
```
|
||||||
|
issue/PR/MR event → webhook → policy/router → assign agent user +
|
||||||
|
branch/worktree → run agent in an isolated bottle (no ambient secrets)
|
||||||
|
→ commit as agent identity → open PR/MR → CI + human review + merge
|
||||||
|
```
|
||||||
|
|
||||||
|
**Crowding (why this is less saturated than it looks):**
|
||||||
|
|
||||||
|
| Layer | How crowded |
|
||||||
|
| :-- | :-- |
|
||||||
|
| Generic multi-agent orchestrators (worktree/TUI/dashboard) | very — 50–100+ |
|
||||||
|
| Forge-native issue/PR/MR orchestration | moderate — ~10–30 serious |
|
||||||
|
| Self-hostable, least-privilege, audited, forge-portable | **single digits** |
|
||||||
|
|
||||||
|
The deeper you go toward *untrusted-agent safety + auditability +
|
||||||
|
self-hostable + forge-portable*, the emptier it gets.
|
||||||
|
|
||||||
|
**The GitHub/GitLab first-party trap → lead Gitea + sovereignty.**
|
||||||
|
GitHub (Agentic Workflows, Copilot coding agent) and GitLab (Duo Agent
|
||||||
|
Platform) are the forge *vendors* building native issue-to-PR agent
|
||||||
|
orchestration with native identity/permissions/audit. On their turf you
|
||||||
|
lose the integration-depth battle the same way single-vendor agent
|
||||||
|
safety loses to Anthropic/OpenAI — the same "incumbent ships it free,
|
||||||
|
deeper" dynamic, one layer up. So the durable opening is **Gitea +
|
||||||
|
self-hosted** (no first-party agent platform exists — the open Gitea
|
||||||
|
feature request for an AI code agent confirms the vacuum) plus
|
||||||
|
**cross-forge *untrusted-agent* safety**, which no forge vendor will
|
||||||
|
build because they want you running *their* agent, not arbitrary ones
|
||||||
|
under uniform least-privilege across competitors' forges. Cross-vendor
|
||||||
|
neutrality, applied to forges.
|
||||||
|
|
||||||
|
**Buyer reconciliation.** The least-crowded opening (self-hosted Gitea)
|
||||||
|
overlaps the lowest-WTP crowd (indie self-hosters), while the paying
|
||||||
|
teams sit on GitHub/GitLab where first-party competition is fiercest.
|
||||||
|
The intersection that resolves it: **orgs running self-hosted forges for
|
||||||
|
sovereignty/compliance reasons** (regulated, air-gapped, security-
|
||||||
|
conscious, on-prem). They have budget, they run self-hosted GitLab/Gitea,
|
||||||
|
*and* shipping code to a cloud agent vendor is a non-starter — so "run
|
||||||
|
untrusted agents sandboxed, least-privilege, fully audited, inside our
|
||||||
|
forge, on our infra" is a procurement checkbox, not a nicety. That is
|
||||||
|
where "least-crowded" finally meets "has money."
|
||||||
|
|
||||||
|
**Separate moat-hard-parts from cost-hard-parts.** The orchestration
|
||||||
|
"hard parts" are two different things, and conflating them oversells the
|
||||||
|
fit:
|
||||||
|
|
||||||
|
| Moat (your differentiated strength) | Undifferentiated cost (everyone faces) |
|
||||||
|
| :-- | :-- |
|
||||||
|
| permission isolation | idempotency / dedupe / run ledger |
|
||||||
|
| secret handling under malicious prompts | concurrency, locks, cancellation |
|
||||||
|
| run provenance | queueing / scheduling / cleanup |
|
||||||
|
| policy language | merge-conflict handling (~27% agent-PR conflict rate) |
|
||||||
|
|
||||||
|
The right column is generic distributed-systems plumbing that wins you
|
||||||
|
nothing and that merge-conflict resolution especially is a *different
|
||||||
|
competency* from sandbox/custody. Keep it thin in the MVP; do not build a
|
||||||
|
policy DSL + durable ledger + conflict resolver before one org pays.
|
||||||
|
|
||||||
|
**The killer feature: run provenance on every agent PR.** A check/comment
|
||||||
|
answering — which agent, which model, which prompt, which base commit,
|
||||||
|
which policy, which tools, which network egress, which test results —
|
||||||
|
attached at the moment a human reviews. It renders the (invisible)
|
||||||
|
custody + egress-audit work as a PR artifact the buyer sees at the exact
|
||||||
|
trust-decision point. No forge vendor's first-party agent will show you
|
||||||
|
"here is everything the untrusted agent could reach." Build this first.
|
||||||
|
|
||||||
|
**MVP** (`@bot-bottle fix this`): create an isolated worktree/bottle →
|
||||||
|
check out the issue branch → run the selected harness as a named agent
|
||||||
|
user → deny ambient secrets by default → record prompt/model/tools/policy
|
||||||
|
→ commit with bot identity → open PR/MR → attach the run-provenance
|
||||||
|
footer (log + tests + permission/egress summary) → require human merge.
|
||||||
|
The security model *is* the product. This rides the headless launch
|
||||||
|
primitive directly: webhook → `start --headless` into an isolated bottle
|
||||||
|
→ commit as agent identity → PR with provenance.
|
||||||
|
|
||||||
|
Open-core line, refined in the next section: the trigger *convention*
|
||||||
|
(label/assignee) stays open so anyone can adopt it, but the
|
||||||
|
**orchestrator that receives webhooks and governs lifecycle is the paid
|
||||||
|
control plane**; the runtime — and a signed-provenance emission API —
|
||||||
|
stay free.
|
||||||
|
|
||||||
|
## The open/paid boundary, refined: orchestrator as the paid control plane
|
||||||
|
|
||||||
|
The forge-native shape sharpens the open-core line past the rough
|
||||||
|
"trigger free, execution paid" cut above. Working it through four
|
||||||
|
constraints — value capture, provenance integrity, the sovereignty
|
||||||
|
buyer, and what the forge *structurally cannot do* — yields a precise
|
||||||
|
boundary.
|
||||||
|
|
||||||
|
**The orchestrator is the control plane, and the control plane is the
|
||||||
|
paid product.** With the forge supplying identity / state / triggers /
|
||||||
|
review, bot-bottle's orchestrator (`bot-bottle-orchestrator`, already
|
||||||
|
specced as a separate binary in the forge-native PRD) is where webhooks
|
||||||
|
land and bottle lifecycle + governance live. That binary can stay
|
||||||
|
**closed/private from day one** without breaking the open-core contract:
|
||||||
|
the runtime stays OSS; the control plane is how the lights stay on. This
|
||||||
|
is "give away the sandbox, charge for the control plane" made literal —
|
||||||
|
the orchestrator *is* the control plane.
|
||||||
|
|
||||||
|
**Charge for the moat, not the webhook.** Holding webhooks and managing
|
||||||
|
bottle lifecycle is commodity — the forge vendors build it first-party,
|
||||||
|
and it's the "undifferentiated cost" column above (idempotency, queueing,
|
||||||
|
dispatch). If the pitch is "we catch the webhook," they out-build it
|
||||||
|
free. The paid value is the two things the forge *cannot* do:
|
||||||
|
|
||||||
|
1. **See inside the run** — which model / prompt / policy / tools / egress
|
||||||
|
produced the diff, whether a secret nearly left. Runtime-level data
|
||||||
|
only the bottle holds.
|
||||||
|
2. **Aggregate and enforce across runs** — retain / search / export every
|
||||||
|
run across every repo; push one egress/DLP/capability policy
|
||||||
|
fleet-wide and detect drift.
|
||||||
|
|
||||||
|
The explainable heuristic: **anything legible within a single run on a
|
||||||
|
single node is free; anything requiring cross-run aggregation, central
|
||||||
|
enforcement, or identity/fleet management is paid.** That is also the
|
||||||
|
individual-vs-team line — individuals live in single runs, teams need the
|
||||||
|
aggregate.
|
||||||
|
|
||||||
|
**Provenance: emit free (signed), sell the product.** The forge is the
|
||||||
|
wrong system of record for provenance — a markdown footer is mutable by
|
||||||
|
any maintainer, unsigned, per-PR, with no aggregation, so a maintainer
|
||||||
|
could simply edit it. The authoritative record therefore lives in the
|
||||||
|
(paid) control plane. The *runtime* emits **signed** provenance through a
|
||||||
|
**free API** — tamper-evident offline (edit it and the signature breaks;
|
||||||
|
verify with no server), so on-prem teams can route it into their own
|
||||||
|
SIEM. What's paid is the *product* over that stream: retention, search,
|
||||||
|
cross-run, export, policy. Whether a copy also lands in the PR footer is
|
||||||
|
an optional, off-by-default marketing dial — one consumer of the free
|
||||||
|
API, not a free provenance surface, and never the audit record. The
|
||||||
|
mutability "bug" becomes a paid feature: the control plane flags *"PR
|
||||||
|
footer edited / doesn't match the signed run."* (Prometheus model:
|
||||||
|
`/metrics` is free to scrape; managed retention + dashboards are the
|
||||||
|
business.)
|
||||||
|
|
||||||
|
**On-prem priority: self-hosted runners over self-hosted provenance.**
|
||||||
|
The sovereignty buyer's *hard structural constraint* is where the agent
|
||||||
|
**executes** against private code, secrets, and network — that's the
|
||||||
|
runner, and it cannot leave the perimeter. Audit metadata is softer; many
|
||||||
|
regulated orgs ship logs to SaaS while keeping the workload inside. So:
|
||||||
|
|
||||||
|
- Self-hosted **runner** = baseline, always, for that buyer.
|
||||||
|
- Self-hosted **provenance store** = premium tier of the strictest subset
|
||||||
|
(air-gapped, hard data-residency) — and largely covered by the free
|
||||||
|
emission API → their own SIEM, so it may never need to be a product you
|
||||||
|
build.
|
||||||
|
- Precision so you don't trip your own free tier: a single self-hosted
|
||||||
|
runner *is the OSS runtime on their box* — free. What's paid is the
|
||||||
|
**fleet control plane**: enrolling/managing many runners, central
|
||||||
|
policy push, dispatch/identity/quota, health/scaling. You don't sell
|
||||||
|
"a runner," you sell **running a governed fleet**.
|
||||||
|
|
||||||
|
**Resulting tiers:**
|
||||||
|
|
||||||
|
| Layer | What it is | Open/Paid | Deployment |
|
||||||
|
| :-- | :-- | :-- | :-- |
|
||||||
|
| **Runtime** | isolation + ephemeral bottles, cred-proxy, supervise, `start --headless`, signed-provenance emission API | Free / OSS | Always self-host |
|
||||||
|
| **Single runner** | the OSS runtime on a box | Free / OSS | Self-host |
|
||||||
|
| **Control plane** | cross-run audit retention/search/export, central policy push, SSO/RBAC dispatch, fleet management of runners, alerting | **Paid** | Hosted *or* self-host-licensed — same code |
|
||||||
|
| **Capacity** | managed Fly runner pool, metered (agent-hours) | **Paid add-on** | Hosted only |
|
||||||
|
|
||||||
|
Fly stays a **capacity/convenience line, not the moat** — it monetizes
|
||||||
|
even solo hackers (capability, not insurance), but a managed runner pool
|
||||||
|
is reselling compute against Fly/E2B/Northflank on price. It's a bundle
|
||||||
|
attached to the governance, never the thing defended. Self-host is *not*
|
||||||
|
a separate product: on-prem buyers get the same closed control plane,
|
||||||
|
licensed, pointed at their own runners.
|
||||||
|
|
||||||
|
## Risks to the thesis
|
||||||
|
|
||||||
|
- **Lab encroachment.** If Anthropic/OpenAI add cross-agent governance
|
||||||
|
or open their managed egress logs, the wedge narrows. Mitigate by
|
||||||
|
going deep on cross-vendor + custody + audit *now*, while they're
|
||||||
|
single-vendor.
|
||||||
|
- **Rug-pull dependency.** You run the labs' agents; they can restrict
|
||||||
|
their agent to their own sandbox via ToS/tech. Hedge toward
|
||||||
|
open-source/open-weight agents for durability.
|
||||||
|
- **Northflank (or E2B) ships agent-aware audit.** Plausible from the
|
||||||
|
infra side. Your defense is agent-awareness + the supervise approval
|
||||||
|
loop + cross-vendor, not raw egress visibility.
|
||||||
|
- **WTP may simply not be there.** The honest failure mode: teams like
|
||||||
|
the audit but won't pay because "we already sandbox in CI." Phase 0
|
||||||
|
exists to find this out cheaply before building Phase 2/3.
|
||||||
|
- **Forge-vendor encroachment (forge-native path).** GitHub Agentic
|
||||||
|
Workflows / Copilot and GitLab Duo are first-party and deepening.
|
||||||
|
Defense: aim at self-hosted Gitea + sovereignty buyers where no
|
||||||
|
first-party agent platform exists, and at cross-forge untrusted-agent
|
||||||
|
neutrality the vendors won't build. Don't fight them GitHub-native.
|
||||||
|
- **Orchestration-reliability scope creep.** The forge-native build
|
||||||
|
drags in idempotency, queueing, concurrency, and merge-conflict
|
||||||
|
handling — undifferentiated plumbing that isn't the moat. Keep it thin
|
||||||
|
until a paying org forces it.
|
||||||
|
|
||||||
|
## Recommendation
|
||||||
|
|
||||||
|
Build Phase 1 now — it's low-risk, half-built, and the proof artifact.
|
||||||
|
Run Phase 0 in parallel. Treat a clear yes from 5–10 teams as the
|
||||||
|
green light for the hosted tier; treat a soft maybe as a signal to stay
|
||||||
|
an excellent OSS tool with a tip-jar/support model rather than a
|
||||||
|
venture-shaped SaaS. The technology is not the risk — the codebase is
|
||||||
|
exemplary and the architecture already supports the pivot. The risk is
|
||||||
|
**positioning discipline**: sell cross-vendor fleet governance to teams,
|
||||||
|
use the indie brand as the funnel, and never let the anti-corporate
|
||||||
|
aesthetic veto the features that pay.
|
||||||
|
|
||||||
|
## Sources
|
||||||
|
|
||||||
|
- Anthropic — Claude Code sandboxing:
|
||||||
|
https://www.anthropic.com/engineering/claude-code-sandboxing
|
||||||
|
- OpenAI Codex — cloud environments:
|
||||||
|
https://developers.openai.com/codex/cloud/environments ;
|
||||||
|
custom-image feature request:
|
||||||
|
https://community.openai.com/t/feature-request-custom-docker-images/1265333
|
||||||
|
- GitHub Copilot — custom container image (not supported), discussion
|
||||||
|
#194105: https://github.com/orgs/community/discussions/194105
|
||||||
|
- DeepInspect — AI egress monitoring:
|
||||||
|
https://www.deepinspect.ai/blog/ai-egress-monitoring
|
||||||
|
- Braintrust — AI agent observability/alerting:
|
||||||
|
https://www.braintrust.dev/articles/best-ai-agent-observability-tools-2026
|
||||||
|
- E2B (OSS, Apache-2.0): https://github.com/e2b-dev/e2b ;
|
||||||
|
infra/self-host: https://github.com/e2b-dev/infra
|
||||||
|
- Daytona going closed source:
|
||||||
|
https://www.daytona.io/dotfiles/updates/daytona-is-going-closed-source
|
||||||
|
- Northflank — BYOC / egress gateways:
|
||||||
|
https://northflank.com/blog/what-is-byoc-in-cloud-computing ;
|
||||||
|
https://northflank.com/blog/self-hostable-alternatives-to-e2b-for-ai-agents
|
||||||
|
- Modal Sandboxes: https://modal.com/products/sandboxes
|
||||||
|
- AI agent orchestration / enterprise governance (75% cite
|
||||||
|
auditability):
|
||||||
|
https://viston.tech/ai-agent-orchestration-in-2026-moving-from-pilots-to-enterprise-wide-execution/
|
||||||
|
- Pi harness (provider-agnostic CLI): https://pi.dev/packages/remote-pi ;
|
||||||
|
https://github.com/earendil-works/pi
|
||||||
|
- Paseo (launch + attach agents from desktop/mobile, OSS):
|
||||||
|
https://github.com/getpaseo/paseo ;
|
||||||
|
https://apps.apple.com/us/app/paseo-remote-coding-agents/id6758887924
|
||||||
|
- pi-agent-dashboard (mobile-first remote control via mDNS/zrok):
|
||||||
|
https://github.com/BlackBeltTechnology/pi-agent-dashboard
|
||||||
|
- TelePi (Telegram remote control for Pi):
|
||||||
|
https://futurelab.studio/blog/telepi-telegram-remote-control-for-pi/
|
||||||
|
- Forge-native landscape (provided via conversation, not independently
|
||||||
|
re-verified):
|
||||||
|
- awesome-agent-orchestrators (50+ generic orchestrators):
|
||||||
|
https://github.com/andyrewlee/awesome-agent-orchestrators
|
||||||
|
- GitHub Agentic Workflows (first-party repo automation):
|
||||||
|
https://github.blog/ai-and-ml/automate-repository-tasks-with-github-agentic-workflows/
|
||||||
|
- GitLab Duo Agent Platform GA:
|
||||||
|
https://ir.gitlab.com/news/news-details/2026/GitLab-Announces-the-General-Availability-of-GitLab-Duo-Agent-Platform/default.aspx
|
||||||
|
- ai-review (cross-forge review incl. Gitea):
|
||||||
|
https://github.com/Nikita-Filonov/ai-review
|
||||||
|
- Gitea feature request — AI code agent (the vacuum):
|
||||||
|
https://github.com/go-gitea/gitea/issues/34527
|
||||||
|
- Phoenix — safe GitHub issue resolution (label-based webhook state
|
||||||
|
machine): https://arxiv.org/abs/2606.20243
|
||||||
|
- AgenticFlict — ~27% merge-conflict rate in agent PRs:
|
||||||
|
https://arxiv.org/abs/2604.03551
|
||||||
@@ -1,75 +0,0 @@
|
|||||||
"""Unit: `cli.py resume --headless` non-interactive rehydrate path.
|
|
||||||
|
|
||||||
The freeze / rehydrate loop needs a non-interactive `resume`: deliver a
|
|
||||||
follow-up prompt and skip the y/N preflight, reusing the same launch
|
|
||||||
core (`assume_yes` + `headless_prompt_text`) as `start --headless`.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import unittest
|
|
||||||
from typing import Any
|
|
||||||
from unittest.mock import MagicMock, patch
|
|
||||||
|
|
||||||
import bot_bottle.cli.resume as resume_mod
|
|
||||||
from bot_bottle.log import Die
|
|
||||||
|
|
||||||
|
|
||||||
def _metadata():
|
|
||||||
md = MagicMock()
|
|
||||||
md.agent_name = "implementer"
|
|
||||||
md.copy_cwd = False
|
|
||||||
md.cwd = "/repo"
|
|
||||||
md.identity = "implementer-abc12"
|
|
||||||
md.bottle_names = ["claude"]
|
|
||||||
md.backend = "docker"
|
|
||||||
return md
|
|
||||||
|
|
||||||
|
|
||||||
class ResumeHeadlessTest(unittest.TestCase):
|
|
||||||
def setUp(self) -> None:
|
|
||||||
self._launch = patch.object(
|
|
||||||
resume_mod, "_launch_bottle", return_value=0
|
|
||||||
).start()
|
|
||||||
patch.object(
|
|
||||||
resume_mod, "read_metadata", return_value=_metadata()
|
|
||||||
).start()
|
|
||||||
manifest = MagicMock()
|
|
||||||
manifest.require_agent = MagicMock(return_value=None)
|
|
||||||
patch.object(
|
|
||||||
resume_mod.ManifestIndex, "resolve", return_value=manifest
|
|
||||||
).start()
|
|
||||||
self.addCleanup(patch.stopall)
|
|
||||||
|
|
||||||
def _launch_kwargs(self) -> dict[str, Any]:
|
|
||||||
self._launch.assert_called_once()
|
|
||||||
return dict(self._launch.call_args.kwargs)
|
|
||||||
|
|
||||||
def test_headless_passes_assume_yes_and_prompt(self):
|
|
||||||
rc = resume_mod.cmd_resume(
|
|
||||||
["implementer-abc12", "--headless", "--prompt", "Address the review"]
|
|
||||||
)
|
|
||||||
self.assertEqual(0, rc)
|
|
||||||
kwargs = self._launch_kwargs()
|
|
||||||
self.assertTrue(kwargs["assume_yes"])
|
|
||||||
self.assertEqual("Address the review", kwargs["headless_prompt_text"])
|
|
||||||
|
|
||||||
def test_interactive_resume_unchanged(self):
|
|
||||||
resume_mod.cmd_resume(["implementer-abc12"])
|
|
||||||
kwargs = self._launch_kwargs()
|
|
||||||
self.assertFalse(kwargs["assume_yes"])
|
|
||||||
self.assertEqual("", kwargs["headless_prompt_text"])
|
|
||||||
|
|
||||||
def test_headless_without_prompt_errors(self):
|
|
||||||
with self.assertRaises(Die):
|
|
||||||
resume_mod.cmd_resume(["implementer-abc12", "--headless"])
|
|
||||||
self._launch.assert_not_called()
|
|
||||||
|
|
||||||
def test_prompt_without_headless_errors(self):
|
|
||||||
with self.assertRaises(Die):
|
|
||||||
resume_mod.cmd_resume(["implementer-abc12", "--prompt", "hi"])
|
|
||||||
self._launch.assert_not_called()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
unittest.main()
|
|
||||||
@@ -1,188 +0,0 @@
|
|||||||
"""Unit: `cli.py start --headless` non-interactive launch path.
|
|
||||||
|
|
||||||
Headless is the keystone for orchestrators, CI, and webhook
|
|
||||||
dispatch: agent/bottles/label come from flags + manifest defaults, no
|
|
||||||
TUI selectors fire, and the preflight y/N is auto-confirmed
|
|
||||||
(`assume_yes=True`). All actual launch work is stubbed so no container
|
|
||||||
is created.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import os
|
|
||||||
import unittest
|
|
||||||
from unittest.mock import MagicMock, patch
|
|
||||||
|
|
||||||
import bot_bottle.cli.start as start_mod
|
|
||||||
import bot_bottle.cli.tui as tui_mod
|
|
||||||
from bot_bottle.backend import ActiveAgent
|
|
||||||
from bot_bottle.log import Die
|
|
||||||
from bot_bottle.manifest import ManifestError
|
|
||||||
|
|
||||||
|
|
||||||
def _make_manifest(
|
|
||||||
agent_names: list[str],
|
|
||||||
bottle_names: list[str] | None = None,
|
|
||||||
agent_bottle: str = "",
|
|
||||||
):
|
|
||||||
manifest = MagicMock()
|
|
||||||
manifest.agents = {name: MagicMock(bottle=agent_bottle) for name in agent_names}
|
|
||||||
manifest.all_agent_names = sorted(agent_names)
|
|
||||||
manifest.all_bottle_names = sorted(bottle_names or [])
|
|
||||||
manifest.home_md = None # eager mode so _peek_agent_bottle uses agents dict
|
|
||||||
manifest.require_agent = MagicMock(return_value=None)
|
|
||||||
return manifest
|
|
||||||
|
|
||||||
|
|
||||||
def _active_agent(slug: str) -> ActiveAgent:
|
|
||||||
return ActiveAgent(
|
|
||||||
backend_name="docker",
|
|
||||||
slug=slug,
|
|
||||||
agent_name="demo",
|
|
||||||
started_at="2026-01-01T00:00:00+00:00",
|
|
||||||
services=(),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class TestCmdStartHeadless(unittest.TestCase):
|
|
||||||
"""Drive `cmd_start --headless` with launch + TUI stubbed out."""
|
|
||||||
|
|
||||||
def setUp(self):
|
|
||||||
self._manifest = _make_manifest(
|
|
||||||
["researcher", "implementer"], ["claude", "dev"], agent_bottle="claude"
|
|
||||||
)
|
|
||||||
patch(
|
|
||||||
"bot_bottle.cli.start.ManifestIndex.resolve",
|
|
||||||
return_value=self._manifest,
|
|
||||||
).start()
|
|
||||||
self._launch_mock = patch(
|
|
||||||
"bot_bottle.cli.start._launch_bottle", return_value=0
|
|
||||||
).start()
|
|
||||||
# No bottles running by default → no label collision.
|
|
||||||
patch(
|
|
||||||
"bot_bottle.cli.start.enumerate_active_agents", return_value=[]
|
|
||||||
).start()
|
|
||||||
# If any TUI picker fires in headless mode, that's a bug.
|
|
||||||
self._agent_picker = patch.object(tui_mod, "filter_select").start()
|
|
||||||
self._bottle_picker = patch.object(tui_mod, "filter_multiselect").start()
|
|
||||||
self._modal = patch.object(tui_mod, "name_color_modal").start()
|
|
||||||
patch.dict(os.environ, {}, clear=False).start()
|
|
||||||
os.environ.pop("BOT_BOTTLE_BACKEND", None)
|
|
||||||
self.addCleanup(patch.stopall)
|
|
||||||
|
|
||||||
def _spec(self):
|
|
||||||
self._launch_mock.assert_called_once()
|
|
||||||
return self._launch_mock.call_args[0][0]
|
|
||||||
|
|
||||||
# -- no TUI in headless --------------------------------------------
|
|
||||||
|
|
||||||
def test_headless_fires_no_pickers(self):
|
|
||||||
rc = start_mod.cmd_start(
|
|
||||||
["--headless", "researcher", "--bottle", "claude", "--prompt", "Do it"]
|
|
||||||
)
|
|
||||||
self.assertEqual(0, rc)
|
|
||||||
self._agent_picker.assert_not_called()
|
|
||||||
self._bottle_picker.assert_not_called()
|
|
||||||
self._modal.assert_not_called()
|
|
||||||
|
|
||||||
def test_headless_assume_yes_forwarded(self):
|
|
||||||
start_mod.cmd_start(
|
|
||||||
["--headless", "researcher", "--bottle", "claude", "--prompt", "Do it"]
|
|
||||||
)
|
|
||||||
self.assertTrue(self._launch_mock.call_args[1]["assume_yes"])
|
|
||||||
|
|
||||||
# -- prompt --------------------------------------------------------
|
|
||||||
|
|
||||||
def test_headless_without_prompt_dies(self):
|
|
||||||
with self.assertRaises(Die):
|
|
||||||
start_mod.cmd_start(["--headless", "researcher", "--bottle", "claude"])
|
|
||||||
self._launch_mock.assert_not_called()
|
|
||||||
|
|
||||||
def test_headless_prompt_forwarded_to_launch(self):
|
|
||||||
start_mod.cmd_start(
|
|
||||||
["--headless", "researcher", "--bottle", "claude",
|
|
||||||
"--prompt", "Implement issue #42"]
|
|
||||||
)
|
|
||||||
self.assertEqual(
|
|
||||||
"Implement issue #42",
|
|
||||||
self._launch_mock.call_args[1]["headless_prompt_text"],
|
|
||||||
)
|
|
||||||
|
|
||||||
# -- bottle resolution ---------------------------------------------
|
|
||||||
|
|
||||||
def test_explicit_bottles_forwarded_in_order(self):
|
|
||||||
start_mod.cmd_start(
|
|
||||||
["--headless", "researcher", "--bottle", "dev", "--bottle", "claude",
|
|
||||||
"--prompt", "Do it"]
|
|
||||||
)
|
|
||||||
self.assertEqual(("dev", "claude"), self._spec().bottle_names)
|
|
||||||
|
|
||||||
def test_omitted_bottle_falls_back_to_agent_default(self):
|
|
||||||
start_mod.cmd_start(["--headless", "implementer", "--prompt", "Do it"])
|
|
||||||
self.assertEqual(("claude",), self._spec().bottle_names)
|
|
||||||
|
|
||||||
def test_no_bottle_and_no_default_dies(self):
|
|
||||||
manifest = _make_manifest(["researcher"], ["claude"], agent_bottle="")
|
|
||||||
with patch(
|
|
||||||
"bot_bottle.cli.start.ManifestIndex.resolve", return_value=manifest
|
|
||||||
):
|
|
||||||
with self.assertRaises(Die):
|
|
||||||
start_mod.cmd_start(
|
|
||||||
["--headless", "researcher", "--prompt", "Do it"]
|
|
||||||
)
|
|
||||||
self._launch_mock.assert_not_called()
|
|
||||||
|
|
||||||
# -- agent resolution ----------------------------------------------
|
|
||||||
|
|
||||||
def test_missing_agent_name_dies(self):
|
|
||||||
with self.assertRaises(Die):
|
|
||||||
start_mod.cmd_start(["--headless"])
|
|
||||||
self._launch_mock.assert_not_called()
|
|
||||||
|
|
||||||
def test_unknown_agent_raises_manifest_error(self):
|
|
||||||
self._manifest.require_agent.side_effect = ManifestError("agent 'x' not defined")
|
|
||||||
with self.assertRaises(ManifestError):
|
|
||||||
start_mod.cmd_start(
|
|
||||||
["--headless", "x", "--bottle", "claude", "--prompt", "Do it"]
|
|
||||||
)
|
|
||||||
self._launch_mock.assert_not_called()
|
|
||||||
|
|
||||||
# -- label / color -------------------------------------------------
|
|
||||||
|
|
||||||
def test_label_defaults_to_agent_name(self):
|
|
||||||
start_mod.cmd_start(
|
|
||||||
["--headless", "researcher", "--bottle", "claude", "--prompt", "Do it"]
|
|
||||||
)
|
|
||||||
self.assertEqual("researcher", self._spec().label)
|
|
||||||
|
|
||||||
def test_explicit_label_and_color_forwarded(self):
|
|
||||||
start_mod.cmd_start(
|
|
||||||
["--headless", "researcher", "--bottle", "claude",
|
|
||||||
"--label", "nightly", "--color", "green", "--prompt", "Do it"]
|
|
||||||
)
|
|
||||||
spec = self._spec()
|
|
||||||
self.assertEqual("nightly", spec.label)
|
|
||||||
self.assertEqual("green", spec.color)
|
|
||||||
|
|
||||||
def test_label_collision_uniquifies(self):
|
|
||||||
with patch(
|
|
||||||
"bot_bottle.cli.start.enumerate_active_agents",
|
|
||||||
return_value=[_active_agent("researcher")],
|
|
||||||
):
|
|
||||||
start_mod.cmd_start(
|
|
||||||
["--headless", "researcher", "--bottle", "claude", "--prompt", "Do it"]
|
|
||||||
)
|
|
||||||
self.assertEqual("researcher-2", self._spec().label)
|
|
||||||
|
|
||||||
# -- backend wiring ------------------------------------------------
|
|
||||||
|
|
||||||
def test_backend_flag_forwarded(self):
|
|
||||||
start_mod.cmd_start(
|
|
||||||
["--headless", "--backend=docker", "researcher", "--bottle", "claude",
|
|
||||||
"--prompt", "Do it"]
|
|
||||||
)
|
|
||||||
self.assertEqual("docker", self._launch_mock.call_args[1]["backend_name"])
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
unittest.main()
|
|
||||||
@@ -343,14 +343,5 @@ class TestClaudeSuperviseMcp(unittest.TestCase):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestClaudeHeadlessPrompt(unittest.TestCase):
|
|
||||||
def test_returns_p_flag_and_prompt(self):
|
|
||||||
self.assertEqual(["-p", "Do the task"], ClaudeAgentProvider().headless_prompt("Do the task"))
|
|
||||||
|
|
||||||
def test_preserves_prompt_text_verbatim(self):
|
|
||||||
text = "Fix issue #42: the widget breaks on empty input"
|
|
||||||
self.assertEqual(["-p", text], ClaudeAgentProvider().headless_prompt(text))
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|||||||
@@ -314,14 +314,5 @@ class TestCodexSuperviseMcp(unittest.TestCase):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestCodexHeadlessPrompt(unittest.TestCase):
|
|
||||||
def test_returns_prompt_as_positional_arg(self):
|
|
||||||
self.assertEqual(["Do the task"], CodexAgentProvider().headless_prompt("Do the task"))
|
|
||||||
|
|
||||||
def test_preserves_prompt_text_verbatim(self):
|
|
||||||
text = "Fix issue #42: the widget breaks on empty input"
|
|
||||||
self.assertEqual([text], CodexAgentProvider().headless_prompt(text))
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|||||||
@@ -1,107 +0,0 @@
|
|||||||
"""Unit: Forge abstraction + ScopedForge (PRD forge-native-integration)."""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import unittest
|
|
||||||
|
|
||||||
from bot_bottle.contrib.forge.base import (
|
|
||||||
Comment,
|
|
||||||
Forge,
|
|
||||||
ForgeScopeError,
|
|
||||||
Issue,
|
|
||||||
PullRequest,
|
|
||||||
ScopedForge,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class _RecordingForge(Forge):
|
|
||||||
"""In-memory fake that records writes."""
|
|
||||||
|
|
||||||
def __init__(self) -> None:
|
|
||||||
self.comments: list[tuple[int, str]] = []
|
|
||||||
self.descriptions: list[tuple[int, str]] = []
|
|
||||||
|
|
||||||
def read_issue(self, number: int) -> Issue:
|
|
||||||
return Issue(number=number, title="t", body="b", state="open")
|
|
||||||
|
|
||||||
def read_pr(self, number: int) -> PullRequest:
|
|
||||||
return PullRequest(
|
|
||||||
number=number, title="pr", body="b", state="open", merged=False
|
|
||||||
)
|
|
||||||
|
|
||||||
def read_comments(self, number: int) -> list[Comment]:
|
|
||||||
return [Comment(id=1, user="alice", body="hi")]
|
|
||||||
|
|
||||||
def post_comment(self, number: int, body: str) -> None:
|
|
||||||
self.comments.append((number, body))
|
|
||||||
|
|
||||||
def update_description(self, number: int, body: str) -> None:
|
|
||||||
self.descriptions.append((number, body))
|
|
||||||
|
|
||||||
def is_org_member(self, org: str, username: str) -> bool:
|
|
||||||
return username == "member"
|
|
||||||
|
|
||||||
def get_pr_for_issue(self, number: int) -> int | None:
|
|
||||||
return 99 if number == 17 else None
|
|
||||||
|
|
||||||
def is_pr_open(self, number: int) -> bool:
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
class TestScopedForgeReads(unittest.TestCase):
|
|
||||||
def setUp(self) -> None:
|
|
||||||
self.inner = _RecordingForge()
|
|
||||||
self.scoped = ScopedForge(self.inner, assigned_issue=17, assigned_prs=[42])
|
|
||||||
|
|
||||||
def test_reads_pass_through_to_any_number(self):
|
|
||||||
# A number well outside the writable scope still reads fine.
|
|
||||||
self.assertEqual(123, self.scoped.read_issue(123).number)
|
|
||||||
self.assertEqual("alice", self.scoped.read_comments(500)[0].user)
|
|
||||||
|
|
||||||
def test_read_pr_passes_through(self):
|
|
||||||
pr = self.scoped.read_pr(999)
|
|
||||||
self.assertIsInstance(pr, PullRequest)
|
|
||||||
self.assertEqual(999, pr.number)
|
|
||||||
self.assertFalse(pr.merged)
|
|
||||||
|
|
||||||
def test_membership_and_pr_lookups_delegate(self):
|
|
||||||
self.assertTrue(self.scoped.is_org_member("bot-bottle", "member"))
|
|
||||||
self.assertFalse(self.scoped.is_org_member("bot-bottle", "stranger"))
|
|
||||||
self.assertEqual(99, self.scoped.get_pr_for_issue(17))
|
|
||||||
self.assertTrue(self.scoped.is_pr_open(8000))
|
|
||||||
|
|
||||||
|
|
||||||
class TestScopedForgeWrites(unittest.TestCase):
|
|
||||||
def setUp(self) -> None:
|
|
||||||
self.inner = _RecordingForge()
|
|
||||||
self.scoped = ScopedForge(self.inner, assigned_issue=17, assigned_prs=[42])
|
|
||||||
|
|
||||||
def test_writable_set_is_issue_plus_prs(self):
|
|
||||||
self.assertEqual(frozenset({17, 42}), self.scoped.writable)
|
|
||||||
|
|
||||||
def test_write_to_assigned_issue_allowed(self):
|
|
||||||
self.scoped.post_comment(17, "done")
|
|
||||||
self.assertEqual([(17, "done")], self.inner.comments)
|
|
||||||
|
|
||||||
def test_write_to_assigned_pr_allowed(self):
|
|
||||||
self.scoped.update_description(42, "new body")
|
|
||||||
self.assertEqual([(42, "new body")], self.inner.descriptions)
|
|
||||||
|
|
||||||
def test_comment_outside_scope_rejected(self):
|
|
||||||
with self.assertRaises(ForgeScopeError) as ctx:
|
|
||||||
self.scoped.post_comment(500, "spam")
|
|
||||||
self.assertIn("500", str(ctx.exception))
|
|
||||||
self.assertEqual([], self.inner.comments)
|
|
||||||
|
|
||||||
def test_description_outside_scope_rejected(self):
|
|
||||||
with self.assertRaises(ForgeScopeError):
|
|
||||||
self.scoped.update_description(500, "tamper")
|
|
||||||
self.assertEqual([], self.inner.descriptions)
|
|
||||||
|
|
||||||
def test_scope_error_is_permission_error(self):
|
|
||||||
# Sidecars can catch the stdlib base type.
|
|
||||||
self.assertIn(PermissionError, ForgeScopeError.__mro__)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
unittest.main()
|
|
||||||
@@ -1,145 +0,0 @@
|
|||||||
"""Unit: GiteaClient + GiteaForge (PRD forge-native-integration)."""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import json
|
|
||||||
import unittest
|
|
||||||
import urllib.error
|
|
||||||
from io import BytesIO
|
|
||||||
from unittest.mock import MagicMock, patch
|
|
||||||
|
|
||||||
from bot_bottle.contrib.gitea.client import GiteaClient, GiteaForge
|
|
||||||
|
|
||||||
|
|
||||||
def _client() -> GiteaClient:
|
|
||||||
return GiteaClient(
|
|
||||||
api_url="https://gitea.example.com/api/v1",
|
|
||||||
owner="didericis",
|
|
||||||
repo="bot-bottle",
|
|
||||||
token="test-token",
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _resp(body: object, status: int = 200) -> MagicMock:
|
|
||||||
resp = MagicMock()
|
|
||||||
resp.read.return_value = json.dumps(body).encode() if body is not None else b""
|
|
||||||
resp.status = status
|
|
||||||
resp.__enter__ = lambda s: s # type: ignore
|
|
||||||
resp.__exit__ = MagicMock(return_value=False)
|
|
||||||
return resp
|
|
||||||
|
|
||||||
|
|
||||||
def _http_error(code: int, body: str = "") -> urllib.error.HTTPError:
|
|
||||||
return urllib.error.HTTPError(
|
|
||||||
url="http://x", code=code, msg="err", hdrs=None, # type: ignore[arg-type]
|
|
||||||
fp=BytesIO(body.encode()),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
_URLOPEN = "bot_bottle.contrib.gitea.client.urllib.request.urlopen"
|
|
||||||
|
|
||||||
|
|
||||||
class TestOrgMembership(unittest.TestCase):
|
|
||||||
def test_member_returns_true_on_2xx(self):
|
|
||||||
with patch(_URLOPEN, return_value=_resp(None, 204)) as m:
|
|
||||||
self.assertTrue(_client().is_org_member("bot-bottle", "alice"))
|
|
||||||
req = m.call_args.args[0]
|
|
||||||
self.assertIn("/orgs/bot-bottle/members/alice", req.full_url)
|
|
||||||
|
|
||||||
def test_nonmember_returns_false_on_404(self):
|
|
||||||
with patch(_URLOPEN, side_effect=_http_error(404)):
|
|
||||||
self.assertFalse(_client().is_org_member("bot-bottle", "stranger"))
|
|
||||||
|
|
||||||
def test_other_http_error_raises(self):
|
|
||||||
with patch(_URLOPEN, side_effect=_http_error(403, "forbidden")):
|
|
||||||
with self.assertRaises(RuntimeError) as ctx:
|
|
||||||
_client().is_org_member("bot-bottle", "alice")
|
|
||||||
self.assertIn("403", str(ctx.exception))
|
|
||||||
|
|
||||||
|
|
||||||
class TestForgeReads(unittest.TestCase):
|
|
||||||
def test_read_issue_maps_fields(self):
|
|
||||||
raw = {"number": 17, "title": "Bug", "body": "broken", "state": "open"}
|
|
||||||
with patch(_URLOPEN, return_value=_resp(raw)) as m:
|
|
||||||
issue = GiteaForge(_client()).read_issue(17)
|
|
||||||
self.assertEqual((17, "Bug", "broken", "open"),
|
|
||||||
(issue.number, issue.title, issue.body, issue.state))
|
|
||||||
self.assertIn("/repos/didericis/bot-bottle/issues/17",
|
|
||||||
m.call_args.args[0].full_url)
|
|
||||||
|
|
||||||
def test_read_issue_tolerates_null_body(self):
|
|
||||||
raw = {"number": 17, "title": "T", "body": None, "state": "open"}
|
|
||||||
with patch(_URLOPEN, return_value=_resp(raw)):
|
|
||||||
self.assertEqual("", GiteaForge(_client()).read_issue(17).body)
|
|
||||||
|
|
||||||
def test_read_comments_maps_user_login(self):
|
|
||||||
raw = [
|
|
||||||
{"id": 1, "user": {"login": "alice"}, "body": "hi"},
|
|
||||||
{"id": 2, "user": {"login": "bob"}, "body": "yo"},
|
|
||||||
]
|
|
||||||
with patch(_URLOPEN, return_value=_resp(raw)):
|
|
||||||
comments = GiteaForge(_client()).read_comments(17)
|
|
||||||
self.assertEqual(["alice", "bob"], [c.user for c in comments])
|
|
||||||
self.assertEqual([1, 2], [c.id for c in comments])
|
|
||||||
|
|
||||||
|
|
||||||
class TestForgeWrites(unittest.TestCase):
|
|
||||||
def test_post_comment_payload_and_url(self):
|
|
||||||
with patch(_URLOPEN, return_value=_resp(None, 201)) as m:
|
|
||||||
GiteaForge(_client()).post_comment(17, "done ✓")
|
|
||||||
req = m.call_args.args[0]
|
|
||||||
self.assertEqual("POST", req.method)
|
|
||||||
self.assertIn("/repos/didericis/bot-bottle/issues/17/comments", req.full_url)
|
|
||||||
self.assertEqual("done ✓", json.loads(req.data)["body"])
|
|
||||||
|
|
||||||
def test_update_description_patches_issue(self):
|
|
||||||
with patch(_URLOPEN, return_value=_resp(None, 200)) as m:
|
|
||||||
GiteaForge(_client()).update_description(17, "edited")
|
|
||||||
req = m.call_args.args[0]
|
|
||||||
self.assertEqual("PATCH", req.method)
|
|
||||||
self.assertTrue(req.full_url.endswith("/issues/17"))
|
|
||||||
self.assertEqual("edited", json.loads(req.data)["body"])
|
|
||||||
|
|
||||||
def test_auth_header_sent(self):
|
|
||||||
with patch(_URLOPEN, return_value=_resp(None, 201)) as m:
|
|
||||||
GiteaForge(_client()).post_comment(17, "x")
|
|
||||||
self.assertEqual("token test-token",
|
|
||||||
m.call_args.args[0].headers["Authorization"])
|
|
||||||
|
|
||||||
|
|
||||||
class TestPRHelpers(unittest.TestCase):
|
|
||||||
def test_get_pr_for_issue_returns_number_when_issue_is_pr(self):
|
|
||||||
raw = {"number": 18, "pull_request": {"merged": False}}
|
|
||||||
with patch(_URLOPEN, return_value=_resp(raw)):
|
|
||||||
self.assertEqual(18, GiteaForge(_client()).get_pr_for_issue(18))
|
|
||||||
|
|
||||||
def test_get_pr_for_issue_none_for_plain_issue(self):
|
|
||||||
raw = {"number": 17, "pull_request": None}
|
|
||||||
with patch(_URLOPEN, return_value=_resp(raw)):
|
|
||||||
self.assertIsNone(GiteaForge(_client()).get_pr_for_issue(17))
|
|
||||||
|
|
||||||
def test_is_pr_open_true_when_state_open(self):
|
|
||||||
with patch(_URLOPEN, return_value=_resp({"state": "open"})):
|
|
||||||
self.assertTrue(GiteaForge(_client()).is_pr_open(18))
|
|
||||||
|
|
||||||
def test_is_pr_open_false_when_closed(self):
|
|
||||||
with patch(_URLOPEN, return_value=_resp({"state": "closed"})):
|
|
||||||
self.assertFalse(GiteaForge(_client()).is_pr_open(18))
|
|
||||||
|
|
||||||
def test_read_pr_maps_fields_including_merged(self):
|
|
||||||
raw = {"number": 18, "title": "Fix", "body": "patch",
|
|
||||||
"state": "closed", "merged": True}
|
|
||||||
with patch(_URLOPEN, return_value=_resp(raw)) as m:
|
|
||||||
pr = GiteaForge(_client()).read_pr(18)
|
|
||||||
self.assertEqual((18, "Fix", "patch", "closed", True),
|
|
||||||
(pr.number, pr.title, pr.body, pr.state, pr.merged))
|
|
||||||
self.assertIn("/repos/didericis/bot-bottle/pulls/18",
|
|
||||||
m.call_args.args[0].full_url)
|
|
||||||
|
|
||||||
def test_read_pr_merged_defaults_false(self):
|
|
||||||
with patch(_URLOPEN, return_value=_resp({"number": 18, "state": "open"})):
|
|
||||||
self.assertFalse(GiteaForge(_client()).read_pr(18).merged)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
unittest.main()
|
|
||||||
@@ -1,99 +0,0 @@
|
|||||||
"""Unit: SQLite forge state store (PRD forge-native-integration)."""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import tempfile
|
|
||||||
import unittest
|
|
||||||
from dataclasses import replace
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
from bot_bottle.contrib.gitea.forge_state import (
|
|
||||||
STATUS_FROZEN,
|
|
||||||
STATUS_RUNNING,
|
|
||||||
ForgeState,
|
|
||||||
SqliteForgeStateStore,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _state(**over: object) -> ForgeState:
|
|
||||||
base = ForgeState(
|
|
||||||
owner="didericis",
|
|
||||||
repo="bot-bottle",
|
|
||||||
issue_number=17,
|
|
||||||
slug="implementer-abc12",
|
|
||||||
agent_name="implementer",
|
|
||||||
bottle_names=["claude"],
|
|
||||||
backend_name="docker",
|
|
||||||
agent_git_user="didericis-claude",
|
|
||||||
pr_number=42,
|
|
||||||
status=STATUS_FROZEN,
|
|
||||||
last_checkin_at="2026-06-29T12:04:12-04:00",
|
|
||||||
)
|
|
||||||
return replace(base, **over)
|
|
||||||
|
|
||||||
|
|
||||||
class ForgeStateStoreTest(unittest.TestCase):
|
|
||||||
def setUp(self) -> None:
|
|
||||||
tmp = Path(self.enterContext(tempfile.TemporaryDirectory())) # pylint: disable=consider-using-with
|
|
||||||
self.store = SqliteForgeStateStore(tmp / "sub" / "bot-bottle.db")
|
|
||||||
|
|
||||||
def test_round_trip(self):
|
|
||||||
self.store.upsert(_state())
|
|
||||||
self.assertEqual(_state(), self.store.get("didericis", "bot-bottle", 17))
|
|
||||||
|
|
||||||
def test_missing_returns_none(self):
|
|
||||||
self.assertIsNone(self.store.get("nobody", "nope", 1))
|
|
||||||
|
|
||||||
def test_creates_db_parent_dirs(self):
|
|
||||||
# setUp pointed at a non-existent 'sub/' dir; init must create it.
|
|
||||||
self.assertIsNone(self.store.get("x", "y", 1)) # no raise
|
|
||||||
|
|
||||||
def test_upsert_replaces(self):
|
|
||||||
self.store.upsert(_state(status=STATUS_RUNNING))
|
|
||||||
self.store.upsert(_state(status=STATUS_FROZEN))
|
|
||||||
got = self.store.get("didericis", "bot-bottle", 17)
|
|
||||||
assert got is not None
|
|
||||||
self.assertEqual(STATUS_FROZEN, got.status)
|
|
||||||
# Still one row, not two.
|
|
||||||
self.assertEqual(1, len(self.store.all()))
|
|
||||||
|
|
||||||
def test_delete_is_idempotent(self):
|
|
||||||
self.store.upsert(_state())
|
|
||||||
self.store.delete("didericis", "bot-bottle", 17)
|
|
||||||
self.store.delete("didericis", "bot-bottle", 17) # no raise
|
|
||||||
self.assertIsNone(self.store.get("didericis", "bot-bottle", 17))
|
|
||||||
|
|
||||||
def test_all_lists_across_repos_sorted(self):
|
|
||||||
self.store.upsert(_state(issue_number=18, slug="other"))
|
|
||||||
self.store.upsert(_state(issue_number=17))
|
|
||||||
self.store.upsert(_state(owner="acme", repo="widget", issue_number=3))
|
|
||||||
states = self.store.all()
|
|
||||||
self.assertEqual(3, len(states))
|
|
||||||
self.assertEqual(
|
|
||||||
[("acme", 3), ("didericis", 17), ("didericis", 18)],
|
|
||||||
[(s.owner, s.issue_number) for s in states],
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_all_empty(self):
|
|
||||||
self.assertEqual([], self.store.all())
|
|
||||||
|
|
||||||
def test_bottle_names_list_preserved(self):
|
|
||||||
self.store.upsert(_state(bottle_names=["claude", "dev"]))
|
|
||||||
got = self.store.get("didericis", "bot-bottle", 17)
|
|
||||||
assert got is not None
|
|
||||||
self.assertEqual(["claude", "dev"], got.bottle_names)
|
|
||||||
|
|
||||||
def test_pr_number_nullable(self):
|
|
||||||
self.store.upsert(_state(pr_number=None))
|
|
||||||
got = self.store.get("didericis", "bot-bottle", 17)
|
|
||||||
assert got is not None
|
|
||||||
self.assertIsNone(got.pr_number)
|
|
||||||
|
|
||||||
def test_persists_across_store_instances(self):
|
|
||||||
self.store.upsert(_state())
|
|
||||||
reopened = SqliteForgeStateStore(self.store._db_path) # pylint: disable=protected-access
|
|
||||||
self.assertEqual(_state(), reopened.get("didericis", "bot-bottle", 17))
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
unittest.main()
|
|
||||||
@@ -223,14 +223,5 @@ class TestPiDockerfile(unittest.TestCase):
|
|||||||
self.assertIn("chmod 1777 /tmp /var/tmp", dockerfile)
|
self.assertIn("chmod 1777 /tmp /var/tmp", dockerfile)
|
||||||
|
|
||||||
|
|
||||||
class TestPiHeadlessPrompt(unittest.TestCase):
|
|
||||||
def test_returns_p_flag_and_prompt(self):
|
|
||||||
self.assertEqual(["-p", "Do the task"], PiAgentProvider().headless_prompt("Do the task"))
|
|
||||||
|
|
||||||
def test_preserves_prompt_text_verbatim(self):
|
|
||||||
text = "Fix issue #42: the widget breaks on empty input"
|
|
||||||
self.assertEqual(["-p", text], PiAgentProvider().headless_prompt(text))
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|||||||
@@ -38,7 +38,6 @@ class _Provider(AgentProvider):
|
|||||||
def provision_prompt(self, plan, bottle): ... # type: ignore[override]
|
def provision_prompt(self, plan, bottle): ... # type: ignore[override]
|
||||||
def provision(self, plan, bottle): ... # type: ignore[override]
|
def provision(self, plan, bottle): ... # type: ignore[override]
|
||||||
def provision_supervise_mcp(self, plan, bottle, supervise_url): ... # type: ignore[override]
|
def provision_supervise_mcp(self, plan, bottle, supervise_url): ... # type: ignore[override]
|
||||||
def headless_prompt(self, prompt): return [] # type: ignore[override]
|
|
||||||
|
|
||||||
|
|
||||||
_PROVIDER = _Provider()
|
_PROVIDER = _Provider()
|
||||||
|
|||||||
@@ -49,7 +49,6 @@ class _Provider(AgentProvider):
|
|||||||
def provision_prompt(self, plan, bottle): ... # type: ignore[override]
|
def provision_prompt(self, plan, bottle): ... # type: ignore[override]
|
||||||
def provision(self, plan, bottle): ... # type: ignore[override]
|
def provision(self, plan, bottle): ... # type: ignore[override]
|
||||||
def provision_supervise_mcp(self, plan, bottle, supervise_url): ... # type: ignore[override]
|
def provision_supervise_mcp(self, plan, bottle, supervise_url): ... # type: ignore[override]
|
||||||
def headless_prompt(self, prompt): return [] # type: ignore[override]
|
|
||||||
|
|
||||||
|
|
||||||
_PROVIDER = _Provider()
|
_PROVIDER = _Provider()
|
||||||
|
|||||||
Reference in New Issue
Block a user