0efc07ba67
Closes #178. The backend provision functions now receive a Bottle handle with exec / cp_in methods instead of a raw target string. Provisioner modules use bottle.exec and bottle.cp_in in place of inlined subprocess.run(["docker", "exec"/"cp", ...]) and direct _smolvm.machine_cp / machine_exec calls. This decouples the provisioners from backend-specific runtime primitives so future refactors (e.g. the supervise rework) can swap the bottle's exec implementation without touching every provisioner. Each launch.py constructs the Bottle handle before calling provision so it can be passed in; provision_prompt's return value is wired back onto the bottle's prompt path attribute after the fact.
505 lines
20 KiB
Python
505 lines
20 KiB
Python
"""Per-backend bottle factories.
|
|
|
|
A bottle is a running, isolated environment with claude inside. Each
|
|
backend exposes five methods:
|
|
|
|
prepare(spec, stage_dir=...) -> BottlePlan
|
|
Resolves names, validates host-side prerequisites, and writes
|
|
scratch files. No remote/runtime resources are created yet.
|
|
Safe to call before the y/N preflight.
|
|
|
|
launch(plan) -> ContextManager[Bottle]
|
|
Brings up the container (or VM, or remote machine), provisions
|
|
it, yields a Bottle handle, and tears everything down on exit.
|
|
|
|
prepare_cleanup() -> BottleCleanupPlan
|
|
Enumerates orphaned resources left behind by previous bottles
|
|
(containers, networks, ...). Idempotent; no side effects.
|
|
|
|
cleanup(plan) -> None
|
|
Actually removes everything described by the cleanup plan.
|
|
|
|
enumerate_active() -> Sequence[ActiveAgent]
|
|
Return every currently-running bottle on this backend, with
|
|
enough metadata for callers (CLI `list active`, dashboard
|
|
agents pane) to render a row.
|
|
|
|
Selection is driven by `--backend` on `start` or
|
|
BOT_BOTTLE_BACKEND (env var; default "docker"). Per PRD 0003 the
|
|
manifest does not carry a backend field; the host picks.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
import sys
|
|
from abc import ABC, abstractmethod
|
|
from contextlib import AbstractContextManager
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
from typing import Any, Generic, Sequence, TypeVar
|
|
|
|
from ..agent_provider import AgentProvisionPlan
|
|
from ..egress import EgressPlan
|
|
from ..git_gate import GitGatePlan
|
|
from ..log import die, info
|
|
from ..manifest import GitEntry, Manifest
|
|
from ..supervise import SupervisePlan
|
|
from ..util import expand_tilde
|
|
from ..workspace import WorkspacePlan
|
|
from .print_util import print_multi, visible_agent_env_names
|
|
from .util import host_skill_dir
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class BottleSpec:
|
|
"""CLI-supplied intent. Backend-agnostic — each backend's prepare
|
|
step consumes it and produces its own backend-specific plan.
|
|
Resolved values (image names, container name, scratch paths, runsc
|
|
availability) live on the plan, not the spec."""
|
|
|
|
manifest: Manifest
|
|
agent_name: str
|
|
copy_cwd: bool
|
|
user_cwd: str
|
|
# PRD 0016 follow-up: when set, the backend's prepare step uses
|
|
# this identity instead of minting a fresh one — the resume path
|
|
# (`cli.py resume <identity>`) sets this to continue an existing
|
|
# bottle's state. Empty string for a fresh `start`.
|
|
identity: str = ""
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class BottlePlan(ABC):
|
|
"""Base output of a backend's prepare step. Concrete subclasses
|
|
(e.g. DockerBottlePlan) add backend-specific resolved fields."""
|
|
|
|
spec: BottleSpec
|
|
stage_dir: Path
|
|
git_gate_plan: GitGatePlan
|
|
egress_plan: EgressPlan
|
|
supervise_plan: SupervisePlan | None
|
|
agent_provision: AgentProvisionPlan
|
|
workspace_plan: WorkspacePlan
|
|
|
|
def print(self, *, remote_control: bool) -> None:
|
|
"""Render the y/N preflight summary to stderr."""
|
|
del remote_control
|
|
spec = self.spec
|
|
manifest = spec.manifest
|
|
agent = manifest.agents[spec.agent_name]
|
|
bottle = manifest.bottle_for(spec.agent_name)
|
|
|
|
env_names = visible_agent_env_names(
|
|
sorted(
|
|
set(bottle.env.keys())
|
|
| set(self.agent_provision.guest_env.keys())
|
|
),
|
|
hidden_env_names=self.agent_provision.hidden_env_names,
|
|
)
|
|
|
|
print(file=sys.stderr)
|
|
info(f"agent : {spec.agent_name}")
|
|
info(f"provider : {self.agent_provision.template}")
|
|
print_multi("env ", env_names)
|
|
print_multi("skills ", list(agent.skills))
|
|
info(f"bottle : {agent.bottle}")
|
|
|
|
identity = manifest.git_identity_summary(spec.agent_name)
|
|
if identity:
|
|
info(f" git identity : {identity}")
|
|
|
|
git_lines = [
|
|
f"{u.name} → {u.upstream_host}:{u.upstream_port}"
|
|
for u in self.git_gate_plan.upstreams
|
|
]
|
|
if git_lines:
|
|
print_multi(" git gate ", git_lines)
|
|
|
|
if self.egress_plan.routes:
|
|
egress_lines = []
|
|
for r in self.egress_plan.routes:
|
|
auth = f" [auth:{r.auth_scheme}]" if r.auth_scheme else ""
|
|
egress_lines.append(f"{r.host}{auth}")
|
|
print_multi(" egress ", egress_lines)
|
|
print(file=sys.stderr)
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class BottleCleanupPlan(ABC):
|
|
"""Base output of a backend's prepare_cleanup step. Concrete
|
|
subclasses (e.g. DockerBottleCleanupPlan) carry backend-specific
|
|
lists of resources to be removed and implement `print` + `empty`."""
|
|
|
|
@abstractmethod
|
|
def print(self) -> None:
|
|
"""Render the cleanup y/N summary to stderr."""
|
|
|
|
@property
|
|
@abstractmethod
|
|
def empty(self) -> bool:
|
|
"""True iff there is nothing to clean up; the CLI uses this to
|
|
short-circuit before showing the y/N."""
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class ExecResult:
|
|
"""Captured result of `Bottle.exec`. Backend-neutral: the Docker
|
|
impl populates it from a `subprocess.CompletedProcess`, but a
|
|
future fly/smolmachines backend could populate it from any source
|
|
that produces a returncode + captured streams."""
|
|
|
|
returncode: int
|
|
stdout: str
|
|
stderr: str
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class ActiveAgent:
|
|
"""One currently-running agent, as the CLI `list active` and
|
|
dashboard agents pane render it. ("Agent" is the project's
|
|
consistent name for the thing running inside a bottle — the
|
|
bottle is the container, the agent is what runs in it.)
|
|
|
|
Fields are deliberately backend-neutral. `services` is the set
|
|
of sidecar daemons currently up for this bottle (`pipelock`,
|
|
`egress`, `git-gate`, `supervise`); the dashboard uses it to
|
|
gate edit verbs. `backend_name` is the matching key in
|
|
`_BACKENDS` (`docker` / `smolmachines`) — used by the active-
|
|
list rendering to disambiguate and by the dashboard's
|
|
re-attach path."""
|
|
|
|
backend_name: str
|
|
slug: str
|
|
agent_name: str # from metadata.json; "?" if missing
|
|
started_at: str # ISO 8601 from metadata.json; "" if missing
|
|
services: tuple[str, ...] # alphabetical
|
|
|
|
|
|
class Bottle(ABC):
|
|
"""Handle to a running bottle. Yielded by a backend's launch step.
|
|
|
|
`exec_agent` runs the selected agent CLI inside the bottle and
|
|
blocks until the session ends. `exec` runs a POSIX shell script inside the bottle
|
|
and returns the captured result. `cp_in` copies a host path into
|
|
the bottle. `close` is an idempotent alias for context-manager
|
|
teardown.
|
|
"""
|
|
|
|
name: str
|
|
|
|
@abstractmethod
|
|
def agent_argv(
|
|
self, argv: list[str], *, tty: bool = True,
|
|
) -> list[str]:
|
|
"""Return the host-side argv that runs the selected agent
|
|
inside the bottle. Used by `exec_agent` for foreground
|
|
handoffs and by the dashboard's tmux `respawn-pane` flow,
|
|
which needs the argv up front (it spawns claude in a tmux
|
|
pane rather than as a child of the current process).
|
|
|
|
Implementations transparently inject
|
|
`--append-system-prompt-file` when the bottle was launched
|
|
with a provisioned prompt path."""
|
|
...
|
|
|
|
@abstractmethod
|
|
def exec_agent(self, argv: list[str], *, tty: bool = True) -> int: ...
|
|
|
|
@abstractmethod
|
|
def exec(self, script: str, *, user: str = "node") -> ExecResult:
|
|
"""Run `script` as a POSIX shell script inside the bottle as
|
|
`user` (default `node`, matching the agent image's USER
|
|
directive) and return the captured stdout/stderr/returncode.
|
|
The bottle's environment (including HTTPS_PROXY pointing at
|
|
the pipelock sidecar) is inherited by the child. Non-zero
|
|
exit does not raise — callers inspect `returncode`
|
|
themselves.
|
|
|
|
Pass `user="root"` for shell-outs that need privileged file
|
|
writes / package install — provisioning calls that need root
|
|
bypass `Bottle.exec` and use the backend-specific raw
|
|
machine-exec helper, but the tests have a legitimate use
|
|
case for arbitrary-user runs."""
|
|
|
|
@abstractmethod
|
|
def cp_in(self, host_path: str, container_path: str) -> None: ...
|
|
|
|
@abstractmethod
|
|
def close(self) -> None: ...
|
|
|
|
|
|
|
|
|
|
PlanT = TypeVar("PlanT", bound=BottlePlan)
|
|
CleanupT = TypeVar("CleanupT", bound=BottleCleanupPlan)
|
|
|
|
|
|
class BottleBackend(ABC, Generic[PlanT, CleanupT]):
|
|
"""Abstract base for selectable bottle backends. Concrete subclasses
|
|
(e.g. DockerBottleBackend) own their own prepare/launch impls.
|
|
Parameterized over the backend's concrete plan + cleanup-plan types
|
|
so subclass methods get the narrow type without isinstance
|
|
boilerplate."""
|
|
|
|
name: str
|
|
|
|
def prepare(self, spec: BottleSpec, *, stage_dir: Path) -> PlanT:
|
|
"""Template method: run cross-backend host-side validation, then
|
|
delegate to the subclass's `_resolve_plan` for the
|
|
backend-specific resolution (names, scratch files, etc.). The
|
|
validation step is enforced here so a future backend cannot
|
|
accidentally skip it. No remote/runtime resources are created."""
|
|
self._validate(spec)
|
|
return self._resolve_plan(spec, stage_dir=stage_dir)
|
|
|
|
def _validate(self, spec: BottleSpec) -> None:
|
|
"""Cross-backend pre-launch checks. Confirms the agent exists,
|
|
the named skills are present on the host, and every git
|
|
IdentityFile resolves. Subclasses with additional preconditions
|
|
should override and call `super()._validate(spec)` first."""
|
|
manifest = spec.manifest
|
|
manifest.require_agent(spec.agent_name)
|
|
agent = manifest.agents[spec.agent_name]
|
|
bottle = manifest.bottle_for(spec.agent_name)
|
|
self._validate_skills(agent.skills)
|
|
self._validate_git_entries(bottle.git)
|
|
self._validate_agent_provider_dockerfile(spec)
|
|
|
|
def _validate_skills(self, skills: Sequence[str]) -> None:
|
|
"""Each named skill must be a directory under the host's
|
|
`~/.claude/skills/`. The check is purely host-side, so the
|
|
default impl covers every backend."""
|
|
for name in skills:
|
|
path = host_skill_dir(name)
|
|
if not os.path.isdir(path):
|
|
die(
|
|
f"skill '{name}' not found on host at {path}. "
|
|
f"Create it under ~/.claude/skills/, then re-run."
|
|
)
|
|
|
|
def _validate_git_entries(self, entries: Sequence[GitEntry]) -> None:
|
|
"""Each entry's IdentityFile must exist on the host (after
|
|
expanding leading ~) — the git-gate copies it in at start time
|
|
to authenticate the upstream push (PRD 0008). Shape is already
|
|
enforced by Manifest validation; this only checks presence."""
|
|
for entry in entries:
|
|
key = expand_tilde(entry.IdentityFile)
|
|
if not os.path.isfile(key):
|
|
die(f"git upstream key file not found for '{entry.Name}': {key}")
|
|
|
|
def _validate_agent_provider_dockerfile(self, spec: BottleSpec) -> None:
|
|
bottle = spec.manifest.bottle_for(spec.agent_name)
|
|
dockerfile = bottle.agent_provider.dockerfile
|
|
if not dockerfile:
|
|
return
|
|
path = Path(expand_tilde(dockerfile))
|
|
if not path.is_absolute():
|
|
path = Path(spec.user_cwd) / path
|
|
if not path.is_file():
|
|
die(
|
|
f"agent_provider.dockerfile for bottle "
|
|
f"'{spec.manifest.agents[spec.agent_name].bottle}' not found: {path}"
|
|
)
|
|
|
|
@abstractmethod
|
|
def _resolve_plan(self, spec: BottleSpec, *, stage_dir: Path) -> PlanT:
|
|
"""Backend-specific plan resolution: image/container names,
|
|
env-file, prompt-file, proxy plan, runtime detection. Called by
|
|
`prepare` after `_validate` succeeds."""
|
|
|
|
@abstractmethod
|
|
def launch(self, plan: PlanT) -> AbstractContextManager[Bottle]:
|
|
"""Build/run the bottle and yield a handle; tear down on exit."""
|
|
|
|
def provision(self, plan: PlanT, bottle: "Bottle") -> str | None:
|
|
"""Copy host-side files (CA cert, prompt, skills, .git) into
|
|
the running bottle. Called from `launch` after the container
|
|
/ machine is up. Returns the in-container prompt path if a
|
|
prompt was provisioned, else None — the Bottle handle uses it
|
|
to decide whether to add provider-specific prompt args to the
|
|
agent's argv.
|
|
|
|
Default orchestration: ca → prompt → skills → workspace → git →
|
|
supervise. CA install runs first so the agent's trust store
|
|
is rebuilt before anything inside the agent makes a TLS call.
|
|
Subclasses typically don't override this; they implement the
|
|
sub-methods below.
|
|
|
|
PRD 0017: cred-proxy's agent-side dotfile rewrites (~/.npmrc,
|
|
~/.gitconfig insteadOf, tea config) are gone. Egress-proxy is
|
|
on the agent's HTTP_PROXY path so every tool that respects
|
|
HTTPS_PROXY (claude-code, git over HTTPS, npm, curl) is
|
|
intercepted without per-tool reconfiguration."""
|
|
self.provision_ca(plan, bottle)
|
|
prompt_path = self.provision_prompt(plan, bottle)
|
|
self.provision_provider_auth(plan, bottle)
|
|
self.provision_skills(plan, bottle)
|
|
self.provision_workspace(plan, bottle)
|
|
self.provision_git(plan, bottle)
|
|
self.provision_supervise(plan, bottle)
|
|
return prompt_path
|
|
|
|
def provision_ca(self, plan: PlanT, bottle: "Bottle") -> None:
|
|
"""Install the per-bottle CA into the agent's trust store so
|
|
the agent trusts the bumped CONNECT cert egress (was
|
|
pipelock, pre-PRD-0017) presents. Default impl is a no-op so
|
|
backends that don't yet support TLS interception (every backend
|
|
except Docker today) aren't forced to implement it. The Docker
|
|
backend overrides to docker-cp the cert in and run
|
|
`update-ca-certificates`."""
|
|
|
|
def provision_provider_auth(self, plan: PlanT, bottle: "Bottle") -> None:
|
|
"""Install non-secret provider auth marker files into the agent
|
|
home when a provider needs them to select the right auth mode.
|
|
The default is no-op."""
|
|
|
|
@abstractmethod
|
|
def provision_prompt(self, plan: PlanT, bottle: "Bottle") -> str | None:
|
|
"""Copy the prompt file into the running bottle. Returns the
|
|
in-container path iff the agent has a non-empty prompt;
|
|
callers use the return value to decide whether to add
|
|
provider-specific prompt args to the agent's argv."""
|
|
|
|
@abstractmethod
|
|
def provision_skills(self, plan: PlanT, bottle: "Bottle") -> None:
|
|
"""Copy the agent's named skills from the host into the
|
|
running bottle. No-op when the agent has no skills."""
|
|
|
|
def provision_workspace(self, plan: PlanT, bottle: "Bottle") -> None:
|
|
"""Copy the operator workspace into the running bottle when
|
|
the backend cannot bake it into the agent image. Default is
|
|
no-op for backends like Docker that handle this before launch."""
|
|
|
|
@abstractmethod
|
|
def provision_git(self, plan: PlanT, bottle: "Bottle") -> None:
|
|
"""Copy the host's cwd `.git` directory into the running
|
|
bottle if the user requested --cwd. No-op otherwise."""
|
|
|
|
def provision_supervise(self, plan: PlanT, bottle: "Bottle") -> None:
|
|
"""Write the in-bottle Claude Code MCP config so the agent
|
|
discovers the per-bottle supervise sidecar (PRD 0013).
|
|
No-op when bottle.supervise is False or the backend doesn't
|
|
support the supervise sidecar yet. The Docker backend
|
|
overrides."""
|
|
|
|
@abstractmethod
|
|
def prepare_cleanup(self) -> CleanupT:
|
|
"""Enumerate orphaned resources from previous bottles. No side
|
|
effects; safe to call before the y/N."""
|
|
|
|
@abstractmethod
|
|
def cleanup(self, plan: CleanupT) -> None:
|
|
"""Remove everything described by the cleanup plan."""
|
|
|
|
@abstractmethod
|
|
def enumerate_active(self) -> Sequence[ActiveAgent]:
|
|
"""Return every currently-running agent on this backend.
|
|
Empty when none. Backend-specific: docker queries `docker
|
|
compose ls`; smolmachines queries `smolvm machine ls --json`
|
|
+ cross-references its bundle container."""
|
|
|
|
@classmethod
|
|
@abstractmethod
|
|
def is_available(cls) -> bool:
|
|
"""Whether this backend's runtime prerequisites are satisfied
|
|
on the current host. Docker → `docker` on PATH; smolmachines
|
|
→ `smolvm` on PATH. Used by the cross-backend
|
|
`enumerate_active_agents` / `cmd_cleanup` to skip backends
|
|
the operator hasn't installed, so a docker-only host
|
|
doesn't fail when `cli.py list active` walks past
|
|
smolmachines."""
|
|
|
|
|
|
# Import concrete backend classes AFTER the base types are defined, so
|
|
# each backend module can pull BottleSpec / BottlePlan / BottleBackend
|
|
# via `from . import ...` without hitting a partially-initialized module.
|
|
from .docker import DockerBottleBackend # noqa: E402
|
|
from .smolmachines import SmolmachinesBottleBackend # noqa: E402
|
|
|
|
|
|
# The dict is heterogeneous: each value is a BottleBackend specialized
|
|
# over its own plan type. Concrete plan types are erased here because
|
|
# the registry is selected at runtime and the CLI only needs the
|
|
# unparameterized methods (prepare → plan → launch(plan), cleanup, etc.).
|
|
_BACKENDS: dict[str, BottleBackend[Any, Any]] = {
|
|
"docker": DockerBottleBackend(),
|
|
"smolmachines": SmolmachinesBottleBackend(),
|
|
}
|
|
|
|
|
|
def get_bottle_backend(
|
|
name: str | None = None,
|
|
) -> BottleBackend[Any, Any]:
|
|
"""Resolve the bottle backend.
|
|
|
|
`name` precedence:
|
|
1. explicit arg (CLI `--backend=<name>` passes through here)
|
|
2. BOT_BOTTLE_BACKEND env var
|
|
3. default `docker`
|
|
|
|
Dies with a pointer at the known backends if the chosen name
|
|
isn't implemented."""
|
|
resolved = name or os.environ.get("BOT_BOTTLE_BACKEND") or "docker"
|
|
if resolved not in _BACKENDS:
|
|
known = ", ".join(sorted(_BACKENDS))
|
|
die(f"unknown backend {resolved!r}; known backends: {known}")
|
|
return _BACKENDS[resolved]
|
|
|
|
|
|
def known_backend_names() -> tuple[str, ...]:
|
|
"""Sorted tuple of all backend keys in `_BACKENDS`. Used by
|
|
argparse (`--backend` choices) and the dashboard's backend
|
|
picker."""
|
|
return tuple(sorted(_BACKENDS))
|
|
|
|
|
|
def has_backend(name: str) -> bool:
|
|
"""Whether the named backend's runtime prerequisites are
|
|
available on the current host. Cross-backend callers (list,
|
|
cleanup) skip unavailable backends so a docker-only host
|
|
doesn't fail when the smolmachines backend isn't installed,
|
|
and vice versa.
|
|
|
|
Returns False for unknown names so callers can pass
|
|
arbitrary input without separate validation."""
|
|
if name not in _BACKENDS:
|
|
return False
|
|
return _BACKENDS[name].is_available()
|
|
|
|
|
|
def enumerate_active_agents() -> list[ActiveAgent]:
|
|
"""All currently-running agents, across every available
|
|
backend. Used by CLI `list active` and the dashboard's agents
|
|
pane so neither has to know which backends exist. Skips
|
|
backends whose `is_available()` reports False.
|
|
|
|
Sorted by `(started_at, slug)` so the list is stable across
|
|
dashboard refresh ticks — agents don't shift position while
|
|
the operator navigates with arrow keys. ISO 8601 timestamps
|
|
sort lexicographically in chronological order; `slug` is the
|
|
deterministic tiebreaker. Agents with missing metadata
|
|
(`started_at == ""`) sort first."""
|
|
out: list[ActiveAgent] = []
|
|
for name in known_backend_names():
|
|
if not has_backend(name):
|
|
continue
|
|
out.extend(_BACKENDS[name].enumerate_active())
|
|
out.sort(key=lambda a: (a.started_at, a.slug))
|
|
return out
|
|
|
|
|
|
__all__ = [
|
|
"ActiveAgent",
|
|
"Bottle",
|
|
"BottleBackend",
|
|
"BottleCleanupPlan",
|
|
"BottlePlan",
|
|
"BottleSpec",
|
|
"ExecResult",
|
|
"enumerate_active_agents",
|
|
"get_bottle_backend",
|
|
"has_backend",
|
|
"known_backend_names",
|
|
]
|