refactor!: rename project to bot-bottle
Assisted-by: Codex
This commit is contained in:
@@ -0,0 +1,439 @@
|
||||
"""Per-backend bottle factories.
|
||||
|
||||
A bottle is a running, isolated environment with claude inside. Each
|
||||
backend exposes five methods:
|
||||
|
||||
prepare(spec, stage_dir=...) -> BottlePlan
|
||||
Resolves names, validates host-side prerequisites, and writes
|
||||
scratch files. No remote/runtime resources are created yet.
|
||||
Safe to call before the y/N preflight.
|
||||
|
||||
launch(plan) -> ContextManager[Bottle]
|
||||
Brings up the container (or VM, or remote machine), provisions
|
||||
it, yields a Bottle handle, and tears everything down on exit.
|
||||
|
||||
prepare_cleanup() -> BottleCleanupPlan
|
||||
Enumerates orphaned resources left behind by previous bottles
|
||||
(containers, networks, ...). Idempotent; no side effects.
|
||||
|
||||
cleanup(plan) -> None
|
||||
Actually removes everything described by the cleanup plan.
|
||||
|
||||
enumerate_active() -> Sequence[ActiveAgent]
|
||||
Return every currently-running bottle on this backend, with
|
||||
enough metadata for callers (CLI `list active`, dashboard
|
||||
agents pane) to render a row.
|
||||
|
||||
Selection is driven by `--backend` on `start` or
|
||||
BOT_BOTTLE_BACKEND (env var; default "docker"). Per PRD 0003 the
|
||||
manifest does not carry a backend field; the host picks.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from abc import ABC, abstractmethod
|
||||
from contextlib import AbstractContextManager
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any, Generic, Sequence, TypeVar
|
||||
|
||||
from ..log import die
|
||||
from ..manifest import GitEntry, Manifest
|
||||
from ..util import expand_tilde
|
||||
from .util import host_skill_dir
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class BottleSpec:
|
||||
"""CLI-supplied intent. Backend-agnostic — each backend's prepare
|
||||
step consumes it and produces its own backend-specific plan.
|
||||
Resolved values (image names, container name, scratch paths, runsc
|
||||
availability) live on the plan, not the spec."""
|
||||
|
||||
manifest: Manifest
|
||||
agent_name: str
|
||||
copy_cwd: bool
|
||||
user_cwd: str
|
||||
# PRD 0016 follow-up: when set, the backend's prepare step uses
|
||||
# this identity instead of minting a fresh one — the resume path
|
||||
# (`cli.py resume <identity>`) sets this to continue an existing
|
||||
# bottle's state. Empty string for a fresh `start`.
|
||||
identity: str = ""
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class BottlePlan(ABC):
|
||||
"""Base output of a backend's prepare step. Concrete subclasses
|
||||
(e.g. DockerBottlePlan) add backend-specific resolved fields and
|
||||
implement `print`."""
|
||||
|
||||
spec: BottleSpec
|
||||
stage_dir: Path
|
||||
|
||||
@abstractmethod
|
||||
def print(self, *, remote_control: bool) -> None:
|
||||
"""Render the y/N preflight summary to stderr."""
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class BottleCleanupPlan(ABC):
|
||||
"""Base output of a backend's prepare_cleanup step. Concrete
|
||||
subclasses (e.g. DockerBottleCleanupPlan) carry backend-specific
|
||||
lists of resources to be removed and implement `print` + `empty`."""
|
||||
|
||||
@abstractmethod
|
||||
def print(self) -> None:
|
||||
"""Render the cleanup y/N summary to stderr."""
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def empty(self) -> bool:
|
||||
"""True iff there is nothing to clean up; the CLI uses this to
|
||||
short-circuit before showing the y/N."""
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ExecResult:
|
||||
"""Captured result of `Bottle.exec`. Backend-neutral: the Docker
|
||||
impl populates it from a `subprocess.CompletedProcess`, but a
|
||||
future fly/smolmachines backend could populate it from any source
|
||||
that produces a returncode + captured streams."""
|
||||
|
||||
returncode: int
|
||||
stdout: str
|
||||
stderr: str
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ActiveAgent:
|
||||
"""One currently-running agent, as the CLI `list active` and
|
||||
dashboard agents pane render it. ("Agent" is the project's
|
||||
consistent name for the thing running inside a bottle — the
|
||||
bottle is the container, the agent is what runs in it.)
|
||||
|
||||
Fields are deliberately backend-neutral. `services` is the set
|
||||
of sidecar daemons currently up for this bottle (`pipelock`,
|
||||
`egress`, `git-gate`, `supervise`); the dashboard uses it to
|
||||
gate edit verbs. `backend_name` is the matching key in
|
||||
`_BACKENDS` (`docker` / `smolmachines`) — used by the active-
|
||||
list rendering to disambiguate and by the dashboard's
|
||||
re-attach path."""
|
||||
|
||||
backend_name: str
|
||||
slug: str
|
||||
agent_name: str # from metadata.json; "?" if missing
|
||||
started_at: str # ISO 8601 from metadata.json; "" if missing
|
||||
services: tuple[str, ...] # alphabetical
|
||||
|
||||
|
||||
class Bottle(ABC):
|
||||
"""Handle to a running bottle. Yielded by a backend's launch step.
|
||||
|
||||
`exec_claude` runs `claude` inside the bottle and blocks until the
|
||||
session ends. `exec` runs a POSIX shell script inside the bottle
|
||||
and returns the captured result. `cp_in` copies a host path into
|
||||
the bottle. `close` is an idempotent alias for context-manager
|
||||
teardown.
|
||||
"""
|
||||
|
||||
name: str
|
||||
|
||||
@abstractmethod
|
||||
def claude_argv(
|
||||
self, argv: list[str], *, tty: bool = True,
|
||||
) -> list[str]:
|
||||
"""Return the host-side argv that runs `claude <argv>`
|
||||
inside the bottle. Used by `exec_claude` for foreground
|
||||
handoffs and by the dashboard's tmux `respawn-pane` flow,
|
||||
which needs the argv up front (it spawns claude in a tmux
|
||||
pane rather than as a child of the current process).
|
||||
|
||||
Implementations transparently inject
|
||||
`--append-system-prompt-file` when the bottle was launched
|
||||
with a provisioned prompt path."""
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def exec_claude(self, argv: list[str], *, tty: bool = True) -> int: ...
|
||||
|
||||
@abstractmethod
|
||||
def exec(self, script: str, *, user: str = "node") -> ExecResult:
|
||||
"""Run `script` as a POSIX shell script inside the bottle as
|
||||
`user` (default `node`, matching the agent image's USER
|
||||
directive) and return the captured stdout/stderr/returncode.
|
||||
The bottle's environment (including HTTPS_PROXY pointing at
|
||||
the pipelock sidecar) is inherited by the child. Non-zero
|
||||
exit does not raise — callers inspect `returncode`
|
||||
themselves.
|
||||
|
||||
Pass `user="root"` for shell-outs that need privileged file
|
||||
writes / package install — provisioning calls that need root
|
||||
bypass `Bottle.exec` and use the backend-specific raw
|
||||
machine-exec helper, but the tests have a legitimate use
|
||||
case for arbitrary-user runs."""
|
||||
|
||||
@abstractmethod
|
||||
def cp_in(self, host_path: str, container_path: str) -> None: ...
|
||||
|
||||
@abstractmethod
|
||||
def close(self) -> None: ...
|
||||
|
||||
|
||||
|
||||
|
||||
PlanT = TypeVar("PlanT", bound=BottlePlan)
|
||||
CleanupT = TypeVar("CleanupT", bound=BottleCleanupPlan)
|
||||
|
||||
|
||||
class BottleBackend(ABC, Generic[PlanT, CleanupT]):
|
||||
"""Abstract base for selectable bottle backends. Concrete subclasses
|
||||
(e.g. DockerBottleBackend) own their own prepare/launch impls.
|
||||
Parameterized over the backend's concrete plan + cleanup-plan types
|
||||
so subclass methods get the narrow type without isinstance
|
||||
boilerplate."""
|
||||
|
||||
name: str
|
||||
|
||||
def prepare(self, spec: BottleSpec, *, stage_dir: Path) -> PlanT:
|
||||
"""Template method: run cross-backend host-side validation, then
|
||||
delegate to the subclass's `_resolve_plan` for the
|
||||
backend-specific resolution (names, scratch files, etc.). The
|
||||
validation step is enforced here so a future backend cannot
|
||||
accidentally skip it. No remote/runtime resources are created."""
|
||||
self._validate(spec)
|
||||
return self._resolve_plan(spec, stage_dir=stage_dir)
|
||||
|
||||
def _validate(self, spec: BottleSpec) -> None:
|
||||
"""Cross-backend pre-launch checks. Confirms the agent exists,
|
||||
the named skills are present on the host, and every git
|
||||
IdentityFile resolves. Subclasses with additional preconditions
|
||||
should override and call `super()._validate(spec)` first."""
|
||||
manifest = spec.manifest
|
||||
manifest.require_agent(spec.agent_name)
|
||||
agent = manifest.agents[spec.agent_name]
|
||||
bottle = manifest.bottle_for(spec.agent_name)
|
||||
self._validate_skills(agent.skills)
|
||||
self._validate_git_entries(bottle.git)
|
||||
self._validate_agent_provider_dockerfile(spec)
|
||||
|
||||
def _validate_skills(self, skills: Sequence[str]) -> None:
|
||||
"""Each named skill must be a directory under the host's
|
||||
`~/.claude/skills/`. The check is purely host-side, so the
|
||||
default impl covers every backend."""
|
||||
for name in skills:
|
||||
path = host_skill_dir(name)
|
||||
if not os.path.isdir(path):
|
||||
die(
|
||||
f"skill '{name}' not found on host at {path}. "
|
||||
f"Create it under ~/.claude/skills/, then re-run."
|
||||
)
|
||||
|
||||
def _validate_git_entries(self, entries: Sequence[GitEntry]) -> None:
|
||||
"""Each entry's IdentityFile must exist on the host (after
|
||||
expanding leading ~) — the git-gate copies it in at start time
|
||||
to authenticate the upstream push (PRD 0008). Shape is already
|
||||
enforced by Manifest validation; this only checks presence."""
|
||||
for entry in entries:
|
||||
key = expand_tilde(entry.IdentityFile)
|
||||
if not os.path.isfile(key):
|
||||
die(f"git upstream key file not found for '{entry.Name}': {key}")
|
||||
|
||||
def _validate_agent_provider_dockerfile(self, spec: BottleSpec) -> None:
|
||||
bottle = spec.manifest.bottle_for(spec.agent_name)
|
||||
dockerfile = bottle.agent_provider.dockerfile
|
||||
if not dockerfile:
|
||||
return
|
||||
path = Path(expand_tilde(dockerfile))
|
||||
if not path.is_absolute():
|
||||
path = Path(spec.user_cwd) / path
|
||||
if not path.is_file():
|
||||
die(
|
||||
f"agent_provider.dockerfile for bottle "
|
||||
f"'{spec.manifest.agents[spec.agent_name].bottle}' not found: {path}"
|
||||
)
|
||||
|
||||
@abstractmethod
|
||||
def _resolve_plan(self, spec: BottleSpec, *, stage_dir: Path) -> PlanT:
|
||||
"""Backend-specific plan resolution: image/container names,
|
||||
env-file, prompt-file, proxy plan, runtime detection. Called by
|
||||
`prepare` after `_validate` succeeds."""
|
||||
|
||||
@abstractmethod
|
||||
def launch(self, plan: PlanT) -> AbstractContextManager[Bottle]:
|
||||
"""Build/run the bottle and yield a handle; tear down on exit."""
|
||||
|
||||
def provision(self, plan: PlanT, target: str) -> str | None:
|
||||
"""Copy host-side files (CA cert, prompt, skills, .git) into
|
||||
the running bottle. Called from `launch` after the container
|
||||
/ machine is up. `target` identifies the running instance in
|
||||
backend-specific terms (Docker: resolved container name; fly:
|
||||
machine id). Returns the in-container prompt path if a prompt
|
||||
was provisioned, else None — the Bottle handle uses it to
|
||||
decide whether to add --append-system-prompt-file to claude's
|
||||
argv.
|
||||
|
||||
Default orchestration: ca → prompt → skills → git →
|
||||
supervise. CA install runs first so the agent's trust store
|
||||
is rebuilt before anything inside the agent makes a TLS call.
|
||||
Subclasses typically don't override this; they implement the
|
||||
sub-methods below.
|
||||
|
||||
PRD 0017: cred-proxy's agent-side dotfile rewrites (~/.npmrc,
|
||||
~/.gitconfig insteadOf, tea config) are gone. Egress-proxy is
|
||||
on the agent's HTTP_PROXY path so every tool that respects
|
||||
HTTPS_PROXY (claude-code, git over HTTPS, npm, curl) is
|
||||
intercepted without per-tool reconfiguration."""
|
||||
self.provision_ca(plan, target)
|
||||
prompt_path = self.provision_prompt(plan, target)
|
||||
self.provision_skills(plan, target)
|
||||
self.provision_git(plan, target)
|
||||
self.provision_supervise(plan, target)
|
||||
return prompt_path
|
||||
|
||||
def provision_ca(self, plan: PlanT, target: str) -> None:
|
||||
"""Install the per-bottle CA into the agent's trust store so
|
||||
the agent trusts the bumped CONNECT cert egress (was
|
||||
pipelock, pre-PRD-0017) presents. Default impl is a no-op so
|
||||
backends that don't yet support TLS interception (every backend
|
||||
except Docker today) aren't forced to implement it. The Docker
|
||||
backend overrides to docker-cp the cert in and run
|
||||
`update-ca-certificates`."""
|
||||
|
||||
@abstractmethod
|
||||
def provision_prompt(self, plan: PlanT, target: str) -> str | None:
|
||||
"""Copy the prompt file into the running bottle. Returns the
|
||||
in-container path iff the agent has a non-empty prompt;
|
||||
callers use the return value to decide whether to add
|
||||
--append-system-prompt-file to claude's argv."""
|
||||
|
||||
@abstractmethod
|
||||
def provision_skills(self, plan: PlanT, target: str) -> None:
|
||||
"""Copy the agent's named skills from the host into the
|
||||
running bottle. No-op when the agent has no skills."""
|
||||
|
||||
@abstractmethod
|
||||
def provision_git(self, plan: PlanT, target: str) -> None:
|
||||
"""Copy the host's cwd `.git` directory into the running
|
||||
bottle if the user requested --cwd. No-op otherwise."""
|
||||
|
||||
def provision_supervise(self, plan: PlanT, target: str) -> None:
|
||||
"""Write the in-bottle Claude Code MCP config so the agent
|
||||
discovers the per-bottle supervise sidecar (PRD 0013).
|
||||
No-op when bottle.supervise is False or the backend doesn't
|
||||
support the supervise sidecar yet. The Docker backend
|
||||
overrides."""
|
||||
|
||||
@abstractmethod
|
||||
def prepare_cleanup(self) -> CleanupT:
|
||||
"""Enumerate orphaned resources from previous bottles. No side
|
||||
effects; safe to call before the y/N."""
|
||||
|
||||
@abstractmethod
|
||||
def cleanup(self, plan: CleanupT) -> None:
|
||||
"""Remove everything described by the cleanup plan."""
|
||||
|
||||
@abstractmethod
|
||||
def enumerate_active(self) -> Sequence[ActiveAgent]:
|
||||
"""Return every currently-running agent on this backend.
|
||||
Empty when none. Backend-specific: docker queries `docker
|
||||
compose ls`; smolmachines queries `smolvm machine ls --json`
|
||||
+ cross-references its bundle container."""
|
||||
|
||||
@classmethod
|
||||
@abstractmethod
|
||||
def is_available(cls) -> bool:
|
||||
"""Whether this backend's runtime prerequisites are satisfied
|
||||
on the current host. Docker → `docker` on PATH; smolmachines
|
||||
→ `smolvm` on PATH. Used by the cross-backend
|
||||
`enumerate_active_agents` / `cmd_cleanup` to skip backends
|
||||
the operator hasn't installed, so a docker-only host
|
||||
doesn't fail when `cli.py list active` walks past
|
||||
smolmachines."""
|
||||
|
||||
|
||||
# Import concrete backend classes AFTER the base types are defined, so
|
||||
# each backend module can pull BottleSpec / BottlePlan / BottleBackend
|
||||
# via `from . import ...` without hitting a partially-initialized module.
|
||||
from .docker import DockerBottleBackend # noqa: E402
|
||||
from .smolmachines import SmolmachinesBottleBackend # noqa: E402
|
||||
|
||||
|
||||
# The dict is heterogeneous: each value is a BottleBackend specialized
|
||||
# over its own plan type. Concrete plan types are erased here because
|
||||
# the registry is selected at runtime and the CLI only needs the
|
||||
# unparameterized methods (prepare → plan → launch(plan), cleanup, etc.).
|
||||
_BACKENDS: dict[str, BottleBackend[Any, Any]] = {
|
||||
"docker": DockerBottleBackend(),
|
||||
"smolmachines": SmolmachinesBottleBackend(),
|
||||
}
|
||||
|
||||
|
||||
def get_bottle_backend(
|
||||
name: str | None = None,
|
||||
) -> BottleBackend[Any, Any]:
|
||||
"""Resolve the bottle backend.
|
||||
|
||||
`name` precedence:
|
||||
1. explicit arg (CLI `--backend=<name>` passes through here)
|
||||
2. BOT_BOTTLE_BACKEND env var
|
||||
3. default `docker`
|
||||
|
||||
Dies with a pointer at the known backends if the chosen name
|
||||
isn't implemented."""
|
||||
resolved = name or os.environ.get("BOT_BOTTLE_BACKEND") or "docker"
|
||||
if resolved not in _BACKENDS:
|
||||
known = ", ".join(sorted(_BACKENDS))
|
||||
die(f"unknown backend {resolved!r}; known backends: {known}")
|
||||
return _BACKENDS[resolved]
|
||||
|
||||
|
||||
def known_backend_names() -> tuple[str, ...]:
|
||||
"""Sorted tuple of all backend keys in `_BACKENDS`. Used by
|
||||
argparse (`--backend` choices) and the dashboard's backend
|
||||
picker."""
|
||||
return tuple(sorted(_BACKENDS))
|
||||
|
||||
|
||||
def has_backend(name: str) -> bool:
|
||||
"""Whether the named backend's runtime prerequisites are
|
||||
available on the current host. Cross-backend callers (list,
|
||||
cleanup) skip unavailable backends so a docker-only host
|
||||
doesn't fail when the smolmachines backend isn't installed,
|
||||
and vice versa.
|
||||
|
||||
Returns False for unknown names so callers can pass
|
||||
arbitrary input without separate validation."""
|
||||
if name not in _BACKENDS:
|
||||
return False
|
||||
return _BACKENDS[name].is_available()
|
||||
|
||||
|
||||
def enumerate_active_agents() -> list[ActiveAgent]:
|
||||
"""All currently-running agents, across every available
|
||||
backend. Used by CLI `list active` and the dashboard's agents
|
||||
pane so neither has to know which backends exist. Skips
|
||||
backends whose `is_available()` reports False. Ordered by
|
||||
backend name, then by whatever each backend's
|
||||
`enumerate_active` returns."""
|
||||
out: list[ActiveAgent] = []
|
||||
for name in known_backend_names():
|
||||
if not has_backend(name):
|
||||
continue
|
||||
out.extend(_BACKENDS[name].enumerate_active())
|
||||
return out
|
||||
|
||||
|
||||
__all__ = [
|
||||
"ActiveAgent",
|
||||
"Bottle",
|
||||
"BottleBackend",
|
||||
"BottleCleanupPlan",
|
||||
"BottlePlan",
|
||||
"BottleSpec",
|
||||
"ExecResult",
|
||||
"enumerate_active_agents",
|
||||
"get_bottle_backend",
|
||||
"has_backend",
|
||||
"known_backend_names",
|
||||
]
|
||||
@@ -0,0 +1,33 @@
|
||||
"""Docker bottle backend.
|
||||
|
||||
The bulk of the implementation lives in sibling modules:
|
||||
|
||||
- util: thin Docker subprocess wrappers
|
||||
- network: Docker network plumbing
|
||||
- pipelock: DockerPipelockProxy lifecycle
|
||||
- bottle_plan: DockerBottlePlan
|
||||
- bottle_cleanup_plan: DockerBottleCleanupPlan
|
||||
- bottle: DockerBottle handle
|
||||
- prepare: host-side resolution into a DockerBottlePlan
|
||||
- launch: bring-up + teardown context manager
|
||||
- cleanup: orphan enumeration, removal, active listing
|
||||
- backend: DockerBottleBackend façade wiring the above
|
||||
|
||||
This file only re-exports the public names so
|
||||
`from bot_bottle.backend.docker import DockerBottleBackend` keeps
|
||||
working.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from .backend import DockerBottleBackend
|
||||
from .bottle import DockerBottle
|
||||
from .bottle_cleanup_plan import DockerBottleCleanupPlan
|
||||
from .bottle_plan import DockerBottlePlan
|
||||
|
||||
__all__ = [
|
||||
"DockerBottle",
|
||||
"DockerBottleBackend",
|
||||
"DockerBottleCleanupPlan",
|
||||
"DockerBottlePlan",
|
||||
]
|
||||
@@ -0,0 +1,81 @@
|
||||
"""DockerBottleBackend — the Docker implementation of BottleBackend.
|
||||
|
||||
This module is a thin façade. The real work lives in four siblings:
|
||||
|
||||
- prepare.py — host-side resolution into a DockerBottlePlan
|
||||
- launch.py — bring-up + teardown context manager
|
||||
- cleanup.py — orphan enumeration + removal
|
||||
- enumerate.py — active-agent listing
|
||||
|
||||
The base class's `prepare` template runs cross-backend host-side
|
||||
validation before calling `_resolve_plan` here.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import shutil
|
||||
from contextlib import contextmanager
|
||||
from pathlib import Path
|
||||
from typing import Generator, Sequence
|
||||
|
||||
from .. import ActiveAgent, BottleBackend, BottleSpec
|
||||
from . import cleanup as _cleanup
|
||||
from . import enumerate as _enumerate
|
||||
from . import launch as _launch
|
||||
from . import prepare as _prepare
|
||||
from .bottle import DockerBottle
|
||||
from .bottle_cleanup_plan import DockerBottleCleanupPlan
|
||||
from .bottle_plan import DockerBottlePlan
|
||||
from .provision import ca as _ca
|
||||
from .provision import git as _git
|
||||
from .provision import prompt as _prompt
|
||||
from .provision import skills as _skills
|
||||
from .provision import supervise as _supervise_prov
|
||||
|
||||
|
||||
class DockerBottleBackend(BottleBackend["DockerBottlePlan", "DockerBottleCleanupPlan"]):
|
||||
"""Docker backend implementation. Selected by BOT_BOTTLE_BACKEND
|
||||
(default)."""
|
||||
|
||||
name = "docker"
|
||||
|
||||
@classmethod
|
||||
def is_available(cls) -> bool:
|
||||
"""`docker` on PATH is sufficient; we don't probe `docker info`
|
||||
eagerly because the cross-backend enumerator runs this on
|
||||
every `list active` and we'd pay a subprocess per call. A
|
||||
broken daemon will surface its own error during prepare /
|
||||
launch."""
|
||||
return shutil.which("docker") is not None
|
||||
|
||||
def _resolve_plan(self, spec: BottleSpec, *, stage_dir: Path) -> DockerBottlePlan:
|
||||
return _prepare.resolve_plan(spec, stage_dir=stage_dir)
|
||||
|
||||
@contextmanager
|
||||
def launch(self, plan: DockerBottlePlan) -> Generator[DockerBottle, None, None]:
|
||||
with _launch.launch(plan, provision=self.provision) as bottle:
|
||||
yield bottle
|
||||
|
||||
def provision_ca(self, plan: DockerBottlePlan, target: str) -> None:
|
||||
_ca.provision_ca(plan, target)
|
||||
|
||||
def provision_prompt(self, plan: DockerBottlePlan, target: str) -> str | None:
|
||||
return _prompt.provision_prompt(plan, target)
|
||||
|
||||
def provision_skills(self, plan: DockerBottlePlan, target: str) -> None:
|
||||
_skills.provision_skills(plan, target)
|
||||
|
||||
def provision_git(self, plan: DockerBottlePlan, target: str) -> None:
|
||||
_git.provision_git(plan, target)
|
||||
|
||||
def provision_supervise(self, plan: DockerBottlePlan, target: str) -> None:
|
||||
_supervise_prov.provision_supervise(plan, target)
|
||||
|
||||
def prepare_cleanup(self) -> DockerBottleCleanupPlan:
|
||||
return _cleanup.prepare_cleanup()
|
||||
|
||||
def cleanup(self, plan: DockerBottleCleanupPlan) -> None:
|
||||
_cleanup.cleanup(plan)
|
||||
|
||||
def enumerate_active(self) -> Sequence[ActiveAgent]:
|
||||
return _enumerate.enumerate_active()
|
||||
@@ -0,0 +1,84 @@
|
||||
"""DockerBottle — concrete Bottle handle yielded by DockerBottleBackend."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
from typing import Callable
|
||||
|
||||
from ...agent_provider import prompt_args
|
||||
from .. import Bottle, ExecResult
|
||||
|
||||
|
||||
class DockerBottle(Bottle):
|
||||
"""Concrete Bottle for Docker."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
container: str,
|
||||
teardown: Callable[[], None],
|
||||
prompt_path_in_container: str | None,
|
||||
*,
|
||||
agent_command: str = "claude",
|
||||
agent_prompt_mode: str = "claude_append_file",
|
||||
):
|
||||
self.name = container
|
||||
self._teardown = teardown
|
||||
self._prompt_path = prompt_path_in_container
|
||||
self._agent_command = agent_command
|
||||
self._agent_prompt_mode = agent_prompt_mode
|
||||
self.agent_command = agent_command
|
||||
self.agent_provider_template = (
|
||||
"codex" if agent_command == "codex" else "claude"
|
||||
)
|
||||
self._closed = False
|
||||
|
||||
def claude_argv(
|
||||
self, argv: list[str], *, tty: bool = True,
|
||||
) -> list[str]:
|
||||
full_argv = list(argv)
|
||||
full_argv.extend(
|
||||
prompt_args(self._agent_prompt_mode, self._prompt_path, argv=full_argv)
|
||||
)
|
||||
cmd = ["docker", "exec"]
|
||||
if tty:
|
||||
cmd.append("-it")
|
||||
cmd.extend([self.name, self._agent_command, *full_argv])
|
||||
return cmd
|
||||
|
||||
def exec_claude(self, argv: list[str], *, tty: bool = True) -> int:
|
||||
return subprocess.run(
|
||||
self.claude_argv(argv, tty=tty), check=False,
|
||||
).returncode
|
||||
|
||||
def exec(self, script: str, *, user: str = "node") -> ExecResult:
|
||||
# Pipe via stdin to `sh -s` so the caller never has to worry
|
||||
# about quoting; the script source lands inside the container
|
||||
# without crossing argv. `-u <user>` overrides the image's
|
||||
# default USER — defaults to `node` which is already the
|
||||
# image's USER, so the explicit flag is a no-op there but
|
||||
# keeps the cross-backend contract uniform.
|
||||
result = subprocess.run(
|
||||
["docker", "exec", "-u", user, "-i", self.name, "sh", "-s"],
|
||||
input=script,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False,
|
||||
)
|
||||
return ExecResult(
|
||||
returncode=result.returncode,
|
||||
stdout=result.stdout,
|
||||
stderr=result.stderr,
|
||||
)
|
||||
|
||||
def cp_in(self, host_path: str, container_path: str) -> None:
|
||||
subprocess.run(
|
||||
["docker", "cp", host_path, f"{self.name}:{container_path}"],
|
||||
stdout=subprocess.DEVNULL,
|
||||
check=True,
|
||||
)
|
||||
|
||||
def close(self) -> None:
|
||||
if self._closed:
|
||||
return
|
||||
self._closed = True
|
||||
self._teardown()
|
||||
@@ -0,0 +1,59 @@
|
||||
"""DockerBottleCleanupPlan — concrete subclass of BottleCleanupPlan.
|
||||
|
||||
PRD 0018 chunk 4: cleanup is centered on compose projects. `docker
|
||||
compose ls` is the source of truth for what's running; the plan
|
||||
carries the projects to `compose down`, plus three fallback buckets
|
||||
for legacy / orphan resources:
|
||||
|
||||
- stray_containers: pre-compose `bot-bottle-*` containers not
|
||||
attached to any compose project. Cleared via `docker rm -f`.
|
||||
- stray_networks: same idea for networks. Cleared via
|
||||
`docker network rm`.
|
||||
- orphan_state_dirs: per-bottle state dirs under
|
||||
~/.bot-bottle/state/ that have no live compose project AND
|
||||
no `.preserve` marker. Reaped via `shutil.rmtree`.
|
||||
|
||||
Compose-managed networks are removed by `compose down --volumes`,
|
||||
so they don't appear in stray_networks for a normal project — only
|
||||
truly leftover ones.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
|
||||
from ...log import info
|
||||
from .. import BottleCleanupPlan
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class DockerBottleCleanupPlan(BottleCleanupPlan):
|
||||
"""Resources DockerBottleBackend.cleanup will remove. Produced by
|
||||
`prepare_cleanup`; sorted so the y/N output is stable."""
|
||||
|
||||
projects: tuple[str, ...]
|
||||
stray_containers: tuple[str, ...]
|
||||
stray_networks: tuple[str, ...]
|
||||
orphan_state_dirs: tuple[str, ...]
|
||||
|
||||
@property
|
||||
def empty(self) -> bool:
|
||||
return (
|
||||
not self.projects
|
||||
and not self.stray_containers
|
||||
and not self.stray_networks
|
||||
and not self.orphan_state_dirs
|
||||
)
|
||||
|
||||
def print(self) -> None:
|
||||
print(file=sys.stderr)
|
||||
for name in self.projects:
|
||||
info(f"compose project: {name}")
|
||||
for name in self.stray_containers:
|
||||
info(f"stray container: {name}")
|
||||
for name in self.stray_networks:
|
||||
info(f"stray network: {name}")
|
||||
for name in self.orphan_state_dirs:
|
||||
info(f"orphan state: {name}")
|
||||
print(file=sys.stderr)
|
||||
@@ -0,0 +1,97 @@
|
||||
"""DockerBottlePlan — concrete subclass of BottlePlan.
|
||||
|
||||
Carries the Docker-specific resolved fields produced by
|
||||
DockerBottleBackend.prepare. The launch step consumes it without
|
||||
further resolution; show_plan-style rendering is the `print` method.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
|
||||
from ...egress import EgressPlan
|
||||
from ...git_gate import GitGatePlan
|
||||
from ...log import info
|
||||
from ...pipelock import PipelockProxyPlan
|
||||
from ...supervise import SupervisePlan
|
||||
from .. import BottlePlan
|
||||
from ..print_util import print_multi
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class DockerBottlePlan(BottlePlan):
|
||||
"""Docker-specific resolved fields produced by
|
||||
DockerBottleBackend.prepare. Inherits `spec` and `stage_dir` from
|
||||
BottlePlan."""
|
||||
|
||||
slug: str
|
||||
container_name: str
|
||||
container_name_pinned: bool
|
||||
image: str
|
||||
derived_image: str # "" -> no derived image
|
||||
runtime_image: str # image actually launched (derived or base)
|
||||
# Absolute path to the Dockerfile that builds `image`. Empty means
|
||||
# use the repo's default Dockerfile. Populated to a per-bottle
|
||||
# state file (~/.bot-bottle/state/<slug>/Dockerfile) after a
|
||||
# capability-block remediation (PRD 0016).
|
||||
dockerfile_path: str
|
||||
env_file: Path # docker --env-file: NAME=VALUE literals
|
||||
# name -> value for vars forwarded into the docker-run child process
|
||||
# via subprocess env (so values never land on argv or in a file).
|
||||
# repr=False keeps secret/interpolated/OAuth values out of any
|
||||
# accidental log of the plan dataclass.
|
||||
forwarded_env: dict[str, str] = field(repr=False)
|
||||
prompt_file: Path
|
||||
proxy_plan: PipelockProxyPlan
|
||||
git_gate_plan: GitGatePlan
|
||||
egress_plan: EgressPlan
|
||||
# None when bottle.supervise is False. PRD 0013 supervise sidecar
|
||||
# is opt-in via the manifest's bottle.supervise field.
|
||||
supervise_plan: SupervisePlan | None
|
||||
use_runsc: bool
|
||||
agent_command: str = "claude"
|
||||
agent_prompt_mode: str = "claude_append_file"
|
||||
agent_provider_template: str = "claude"
|
||||
|
||||
def print(self, *, remote_control: bool) -> None:
|
||||
"""Render the y/N preflight summary to stderr — compact form
|
||||
intended to fit on screen without scrolling. The full
|
||||
structured shape (image, container, runtime, etc.) lives on
|
||||
this dataclass for tooling that wants to introspect it."""
|
||||
del remote_control # not surfaced in the compact summary
|
||||
spec = self.spec
|
||||
manifest = spec.manifest
|
||||
agent = manifest.agents[spec.agent_name]
|
||||
bottle = manifest.bottle_for(spec.agent_name)
|
||||
# The agent sees the union of literal env names (rendered into
|
||||
# --env-file) and forwarded env names (`-e NAME` with the
|
||||
# value arriving via subprocess env). The forwarded set holds
|
||||
# the OAuth token (CLAUDE_CODE_OAUTH_TOKEN) and any host-env
|
||||
# interpolations from the manifest; egress holds
|
||||
# upstream tokens in its own environ, so no token forwarding
|
||||
# from the agent to the proxy is needed.
|
||||
env_names = sorted(set(bottle.env.keys()) | set(self.forwarded_env.keys()))
|
||||
|
||||
print(file=sys.stderr)
|
||||
info(f"agent : {spec.agent_name}")
|
||||
info(f"provider : {self.agent_provider_template}")
|
||||
print_multi("env ", env_names)
|
||||
print_multi("skills ", list(agent.skills))
|
||||
info(f"bottle : {agent.bottle}")
|
||||
|
||||
git_lines = [
|
||||
f"{u.upstream_host}:{u.upstream_port}"
|
||||
for u in self.git_gate_plan.upstreams
|
||||
]
|
||||
if git_lines:
|
||||
print_multi(" git gate ", git_lines)
|
||||
|
||||
if self.egress_plan.routes:
|
||||
egress_lines = []
|
||||
for r in self.egress_plan.routes:
|
||||
auth = f" [auth:{r.auth_scheme}]" if r.auth_scheme else ""
|
||||
egress_lines.append(f"{r.host}{auth}")
|
||||
print_multi(" egress ", egress_lines)
|
||||
print(file=sys.stderr)
|
||||
@@ -0,0 +1,328 @@
|
||||
"""Per-bottle persistent state (PRD 0016).
|
||||
|
||||
Holds the per-bottle Dockerfile override that capability-block
|
||||
remediation writes, the transcript snapshot the state-preservation
|
||||
helper saves before teardown, and the launch metadata that lets
|
||||
`cli.py resume <identity>` reconstruct a bottle's spec. State
|
||||
lives at:
|
||||
|
||||
~/.bot-bottle/state/<identity>/
|
||||
metadata.json — agent_name + cwd + started_at (for resume)
|
||||
Dockerfile — per-bottle override (absent → use repo's)
|
||||
transcript/ — last snapshotted agent state (best-effort)
|
||||
|
||||
When the per-bottle Dockerfile is present, the launch step builds
|
||||
the agent image with a per-bottle tag (bot-bottle-rebuilt-<id>)
|
||||
from this file rather than the repo's. The build context is still
|
||||
the repo root so the Dockerfile can COPY bot_bottle source files
|
||||
the same way the original does.
|
||||
|
||||
Identity model:
|
||||
- Every `cli.py start <agent>` mints a fresh identity via
|
||||
`bottle_identity(agent_name)`: slug-prefix for readability plus a
|
||||
5-char random suffix for parallel-safe uniqueness. The metadata
|
||||
written at launch time pins (agent_name, cwd) to that identity.
|
||||
- `cli.py resume <identity>` reads the metadata and re-launches a
|
||||
bottle pinned to the same identity, picking up any per-bottle
|
||||
Dockerfile and transcript snapshot.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import dataclasses
|
||||
import json
|
||||
import secrets
|
||||
import string
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
from ... import supervise as _supervise
|
||||
from . import util as docker_mod
|
||||
|
||||
|
||||
# Directory layout: ~/.bot-bottle/state/<identity>/...
|
||||
_STATE_SUBDIR = "state"
|
||||
_PER_BOTTLE_DOCKERFILE_NAME = "Dockerfile"
|
||||
_TRANSCRIPT_SUBDIR = "transcript"
|
||||
# Per-sidecar scratch subdirs. PRD 0018 chunk 2: bind-mount sources
|
||||
# live here so chunk 3's `docker compose up` can find them at stable
|
||||
# paths. Each sidecar's `prepare()` writes config + CAs into its own
|
||||
# subdir; the launch step is unchanged today (still `docker cp`).
|
||||
_PIPELOCK_SUBDIR = "pipelock"
|
||||
_EGRESS_SUBDIR = "egress"
|
||||
_GIT_GATE_SUBDIR = "git-gate"
|
||||
_SUPERVISE_SUBDIR = "supervise"
|
||||
_AGENT_SUBDIR = "agent"
|
||||
_METADATA_NAME = "metadata.json"
|
||||
# Live-config dir bind-mounted into the supervise sidecar (read-only).
|
||||
# Host's apply paths keep these files fresh so supervise's
|
||||
# `list-pipelock-allowlist` / `list-egress-routes` MCP tools
|
||||
# return the current state — not a snapshot from launch time.
|
||||
_LIVE_CONFIG_SUBDIR = "live-config"
|
||||
LIVE_CONFIG_ROUTES_NAME = "routes.yaml"
|
||||
LIVE_CONFIG_ALLOWLIST_NAME = "allowlist"
|
||||
# Empty marker file. capability_apply writes it before teardown so
|
||||
# cli.py's session-end cleanup knows to preserve the state dir for
|
||||
# `cli.py resume <identity>`. Absent = clean up.
|
||||
_PRESERVE_MARKER = ".preserve"
|
||||
|
||||
# 5 chars of base36 alphabet ≈ 60M combinations. Plenty for human
|
||||
# operators starting bottles by hand; collision-free in practice.
|
||||
_RANDOM_SUFFIX_LEN = 5
|
||||
_SUFFIX_ALPHABET = string.ascii_lowercase + string.digits
|
||||
|
||||
|
||||
def bottle_identity(agent_name: str) -> str:
|
||||
"""Mint a fresh per-launch bottle identity. The slug-prefix is
|
||||
`slugify(agent_name)` for readability; the suffix is 5 random
|
||||
base36 chars so two simultaneous `start <agent>` invocations
|
||||
don't collide on container/network names.
|
||||
|
||||
Every call produces a different identity (non-deterministic).
|
||||
To continue an existing bottle's state, use the recorded
|
||||
identity from BottleMetadata via `cli.py resume <identity>`,
|
||||
not this function."""
|
||||
slug = docker_mod.slugify(agent_name)
|
||||
suffix = "".join(secrets.choice(_SUFFIX_ALPHABET) for _ in range(_RANDOM_SUFFIX_LEN))
|
||||
return f"{slug}-{suffix}"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class BottleMetadata:
|
||||
"""Persistent record of how a bottle was launched, written at
|
||||
start time and read by `cli.py resume`. Lives at
|
||||
~/.bot-bottle/state/<identity>/metadata.json."""
|
||||
|
||||
identity: str
|
||||
agent_name: str
|
||||
cwd: str # empty string when --cwd was not passed
|
||||
copy_cwd: bool
|
||||
started_at: str # ISO 8601 UTC
|
||||
# PRD 0018 chunk 3: derivable from identity via
|
||||
# `compose_project_name(identity)`, but persisted explicitly so
|
||||
# dashboard / cleanup / resume tooling can read it without
|
||||
# importing the compose module. Empty string for state dirs
|
||||
# written before chunk 3 (resume / inspect should fall back to
|
||||
# deriving from identity in that case).
|
||||
compose_project: str = ""
|
||||
|
||||
|
||||
def metadata_path(identity: str) -> Path:
|
||||
return bottle_state_dir(identity) / _METADATA_NAME
|
||||
|
||||
|
||||
def write_metadata(metadata: BottleMetadata) -> Path:
|
||||
"""Persist `metadata` to ~/.bot-bottle/state/<identity>/metadata.json.
|
||||
Mode 0o644 — no secrets, just (agent_name, cwd, timestamp)."""
|
||||
path = metadata_path(metadata.identity)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(json.dumps(dataclasses.asdict(metadata), indent=2) + "\n")
|
||||
path.chmod(0o644)
|
||||
return path
|
||||
|
||||
|
||||
def read_metadata(identity: str) -> BottleMetadata | None:
|
||||
"""Return the metadata for `identity`, or None if no state has
|
||||
been recorded for it. Used by `cli.py resume` to reconstruct
|
||||
the launch spec."""
|
||||
path = metadata_path(identity)
|
||||
if not path.is_file():
|
||||
return None
|
||||
raw = json.loads(path.read_text())
|
||||
if not isinstance(raw, dict):
|
||||
return None
|
||||
return BottleMetadata(
|
||||
identity=str(raw.get("identity", identity)),
|
||||
agent_name=str(raw.get("agent_name", "")),
|
||||
cwd=str(raw.get("cwd", "")),
|
||||
copy_cwd=bool(raw.get("copy_cwd", False)),
|
||||
started_at=str(raw.get("started_at", "")),
|
||||
compose_project=str(raw.get("compose_project", "")),
|
||||
)
|
||||
|
||||
|
||||
def bottle_state_dir(identity: str) -> Path:
|
||||
"""Per-bottle state directory on the host. Created lazily by the
|
||||
write helpers; readers tolerate its absence."""
|
||||
return _supervise.bot_bottle_root() / _STATE_SUBDIR / identity
|
||||
|
||||
|
||||
def per_bottle_dockerfile_path(identity: str) -> Path:
|
||||
return bottle_state_dir(identity) / _PER_BOTTLE_DOCKERFILE_NAME
|
||||
|
||||
|
||||
def per_bottle_dockerfile(identity: str) -> str | None:
|
||||
"""Return the per-bottle Dockerfile content if present, else
|
||||
None. None means: use the repo's Dockerfile (the original
|
||||
pre-capability-block behavior)."""
|
||||
p = per_bottle_dockerfile_path(identity)
|
||||
if p.is_file():
|
||||
return p.read_text()
|
||||
return None
|
||||
|
||||
|
||||
def write_per_bottle_dockerfile(identity: str, content: str) -> Path:
|
||||
p = per_bottle_dockerfile_path(identity)
|
||||
p.parent.mkdir(parents=True, exist_ok=True)
|
||||
p.write_text(content)
|
||||
p.chmod(0o644)
|
||||
return p
|
||||
|
||||
|
||||
def per_bottle_image_tag(identity: str) -> str:
|
||||
"""Image tag for a rebuilt bottle. Distinct from the base
|
||||
bot-bottle-claude:latest so per-bottle rebuilds don't collide in
|
||||
the docker image cache."""
|
||||
return f"bot-bottle-rebuilt-{identity}:latest"
|
||||
|
||||
|
||||
def live_config_dir(identity: str) -> Path:
|
||||
"""Per-bottle live-config dir. Bind-mounted read-only into the
|
||||
supervise sidecar; the host's apply paths refresh the files on
|
||||
every operator approval so the agent's `list-*` MCP tools always
|
||||
return current state."""
|
||||
return bottle_state_dir(identity) / _LIVE_CONFIG_SUBDIR
|
||||
|
||||
|
||||
def live_routes_path(identity: str) -> Path:
|
||||
return live_config_dir(identity) / LIVE_CONFIG_ROUTES_NAME
|
||||
|
||||
|
||||
def live_allowlist_path(identity: str) -> Path:
|
||||
return live_config_dir(identity) / LIVE_CONFIG_ALLOWLIST_NAME
|
||||
|
||||
|
||||
def write_live_config(
|
||||
identity: str, *, routes: str = "", allowlist: str = "",
|
||||
) -> Path:
|
||||
"""Initialise (or refresh) the live-config dir. Empty-string args
|
||||
leave the existing file alone (caller passes only what it knows).
|
||||
Returns the live-config dir path."""
|
||||
d = live_config_dir(identity)
|
||||
d.mkdir(parents=True, exist_ok=True)
|
||||
if routes:
|
||||
p = live_routes_path(identity)
|
||||
p.write_text(routes)
|
||||
p.chmod(0o644)
|
||||
if allowlist:
|
||||
p = live_allowlist_path(identity)
|
||||
p.write_text(allowlist)
|
||||
p.chmod(0o644)
|
||||
return d
|
||||
|
||||
|
||||
def transcript_snapshot_dir(identity: str) -> Path:
|
||||
"""Where capability_apply stashes the agent's transcript before
|
||||
teardown, so the next `cli.py start <agent>` can offer to
|
||||
resume from it."""
|
||||
return bottle_state_dir(identity) / _TRANSCRIPT_SUBDIR
|
||||
|
||||
|
||||
# --- Per-sidecar scratch subdirs (PRD 0018 chunk 2) ------------------------
|
||||
#
|
||||
# Each sidecar gets its own subdir under the bottle's state dir for
|
||||
# bind-mount sources (config, CAs, hooks, etc.). Prepare-time writes
|
||||
# land here; the state dir's normal cleanup (`cleanup_state`) reaps
|
||||
# them along with everything else when the bottle session ends and
|
||||
# nothing requested preservation.
|
||||
|
||||
|
||||
def pipelock_state_dir(identity: str) -> Path:
|
||||
"""State subdir for the pipelock sidecar: pipelock.yaml + the
|
||||
per-bottle CA cert/key. Bind-mount source from chunk 3 onward."""
|
||||
return bottle_state_dir(identity) / _PIPELOCK_SUBDIR
|
||||
|
||||
|
||||
def egress_state_dir(identity: str) -> Path:
|
||||
"""State subdir for the egress sidecar: routes.yaml + the
|
||||
per-bottle mitmproxy CA. Bind-mount source from chunk 3 onward."""
|
||||
return bottle_state_dir(identity) / _EGRESS_SUBDIR
|
||||
|
||||
|
||||
def git_gate_state_dir(identity: str) -> Path:
|
||||
"""State subdir for the git-gate sidecar: entrypoint + hooks +
|
||||
per-upstream known_hosts. Bind-mount source from chunk 3
|
||||
onward."""
|
||||
return bottle_state_dir(identity) / _GIT_GATE_SUBDIR
|
||||
|
||||
|
||||
def supervise_state_dir(identity: str) -> Path:
|
||||
"""State subdir for the supervise sidecar's current-config dir
|
||||
(bind-mounted into the agent at /etc/bot-bottle/current-config).
|
||||
The queue dir is intentionally NOT under here — it lives at
|
||||
~/.bot-bottle/queue/<slug>/ alongside the audit logs, so it
|
||||
survives state-dir cleanup."""
|
||||
return bottle_state_dir(identity) / _SUPERVISE_SUBDIR
|
||||
|
||||
|
||||
def agent_state_dir(identity: str) -> Path:
|
||||
"""State subdir for the agent's prepare-time scratch files: the
|
||||
env file (docker --env-file source) and the prompt file."""
|
||||
return bottle_state_dir(identity) / _AGENT_SUBDIR
|
||||
|
||||
|
||||
# --- Preserve-on-close marker ----------------------------------------------
|
||||
|
||||
|
||||
def preserve_marker_path(identity: str) -> Path:
|
||||
return bottle_state_dir(identity) / _PRESERVE_MARKER
|
||||
|
||||
|
||||
def mark_preserved(identity: str) -> Path:
|
||||
"""Mark this bottle's state for preservation across session
|
||||
teardown. Written by capability_apply.apply_capability_change so
|
||||
cli.py's session-end cleanup leaves the state dir intact for a
|
||||
subsequent `cli.py resume`."""
|
||||
path = preserve_marker_path(identity)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.touch()
|
||||
return path
|
||||
|
||||
|
||||
def is_preserved(identity: str) -> bool:
|
||||
return preserve_marker_path(identity).exists()
|
||||
|
||||
|
||||
def clear_preserve_marker(identity: str) -> None:
|
||||
"""Idempotent removal. Called at fresh launch (start or resume)
|
||||
so a marker left from a prior capability-block doesn't keep
|
||||
state alive past the next normal session-end."""
|
||||
try:
|
||||
preserve_marker_path(identity).unlink()
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
|
||||
|
||||
def cleanup_state(identity: str) -> None:
|
||||
"""Remove the per-bottle state dir entirely. Called by cli.py
|
||||
when a bottle session ends and is_preserved(identity) is False.
|
||||
Idempotent — missing dir is success."""
|
||||
import shutil
|
||||
state_dir = bottle_state_dir(identity)
|
||||
if state_dir.is_dir():
|
||||
shutil.rmtree(state_dir, ignore_errors=True)
|
||||
|
||||
|
||||
__all__ = [
|
||||
"BottleMetadata",
|
||||
"agent_state_dir",
|
||||
"bottle_identity",
|
||||
"bottle_state_dir",
|
||||
"cleanup_state",
|
||||
"clear_preserve_marker",
|
||||
"egress_state_dir",
|
||||
"git_gate_state_dir",
|
||||
"is_preserved",
|
||||
"mark_preserved",
|
||||
"metadata_path",
|
||||
"per_bottle_dockerfile",
|
||||
"per_bottle_dockerfile_path",
|
||||
"per_bottle_image_tag",
|
||||
"pipelock_state_dir",
|
||||
"preserve_marker_path",
|
||||
"read_metadata",
|
||||
"supervise_state_dir",
|
||||
"transcript_snapshot_dir",
|
||||
"write_metadata",
|
||||
"write_per_bottle_dockerfile",
|
||||
]
|
||||
@@ -0,0 +1,220 @@
|
||||
"""capability_apply — host-side orchestrator for capability-block
|
||||
remediation (PRD 0016).
|
||||
|
||||
On approval of a capability-block proposal, the dashboard calls
|
||||
apply_capability_change(slug, new_dockerfile) which:
|
||||
|
||||
1. Snapshots the agent's transcript dir to
|
||||
~/.bot-bottle/state/<slug>/transcript/ (best-effort).
|
||||
2. Pushes the agent's working tree via `git push` (best-effort —
|
||||
no upstream / no commits / no git repo all skip with a log).
|
||||
3. Writes the new Dockerfile to
|
||||
~/.bot-bottle/state/<slug>/Dockerfile (PRD 0016 Phase 1
|
||||
state). The next `cli.py start <agent>` picks it up.
|
||||
4. Force-removes the agent container + all sidecars + the
|
||||
per-bottle networks. Idempotent — missing resources are not
|
||||
errors.
|
||||
|
||||
Returns (before, after) Dockerfile contents so the dashboard can
|
||||
record / render the diff. (capability-block has no audit log per
|
||||
PRD 0013 — the per-bottle Dockerfile state is its own record.)
|
||||
|
||||
This is "fire-and-forget" from the agent's perspective: by the time
|
||||
the dashboard writes the response file the supervise sidecar is
|
||||
gone, so the agent's tool call connection drops without ever
|
||||
receiving the response. The replacement agent (next manual
|
||||
`cli.py start`) sees the new Dockerfile and starts from there.
|
||||
v1 does not auto-relaunch — see PRD 0016's capability-block return
|
||||
semantics open question.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
from ...log import info, warn
|
||||
from .bottle_state import (
|
||||
mark_preserved,
|
||||
per_bottle_dockerfile,
|
||||
per_bottle_dockerfile_path,
|
||||
transcript_snapshot_dir,
|
||||
write_per_bottle_dockerfile,
|
||||
)
|
||||
from .sidecar_bundle import sidecar_bundle_container_name
|
||||
|
||||
|
||||
# Agent home inside the container (per the repo Dockerfile's
|
||||
# `USER node` + `WORKDIR /home/node`). Used to locate the transcript
|
||||
# dir + the workspace dir for git push.
|
||||
_AGENT_HOME_IN_CONTAINER = "/home/node"
|
||||
_AGENT_TRANSCRIPT_IN_CONTAINER = f"{_AGENT_HOME_IN_CONTAINER}/.claude"
|
||||
_AGENT_WORKSPACE_IN_CONTAINER = f"{_AGENT_HOME_IN_CONTAINER}/workspace"
|
||||
|
||||
# Per-bottle resource name patterns (mirroring prepare.py).
|
||||
def _agent_container_name(slug: str) -> str:
|
||||
return f"bot-bottle-{slug}"
|
||||
|
||||
|
||||
def _per_bottle_container_names(slug: str) -> list[str]:
|
||||
"""All container names that belong to this bottle. Missing
|
||||
containers are silently skipped by the teardown helper, so it's
|
||||
fine to include names that don't exist for a given bottle."""
|
||||
return [
|
||||
_agent_container_name(slug),
|
||||
sidecar_bundle_container_name(slug),
|
||||
]
|
||||
|
||||
|
||||
def _per_bottle_network_names(slug: str) -> list[str]:
|
||||
return [
|
||||
f"bot-bottle-net-{slug}",
|
||||
f"bot-bottle-egress-{slug}",
|
||||
]
|
||||
|
||||
|
||||
class CapabilityApplyError(RuntimeError):
|
||||
"""Raised when the apply fails in a way that should keep the
|
||||
proposal pending (so the operator can retry). Best-effort
|
||||
failures (transcript snapshot, git push) do not raise — they
|
||||
just log and proceed."""
|
||||
|
||||
|
||||
# --- Public helpers --------------------------------------------------------
|
||||
|
||||
|
||||
def fetch_current_dockerfile(slug: str) -> str:
|
||||
"""Return the Dockerfile content the next `cli.py start <agent>`
|
||||
would use for this bottle. If a per-bottle override exists, that
|
||||
one; otherwise the repo's Dockerfile.
|
||||
|
||||
Used by the operator-edit verb to show the current source of
|
||||
truth, and by apply_capability_change for the before-diff."""
|
||||
override = per_bottle_dockerfile(slug)
|
||||
if override is not None:
|
||||
return override
|
||||
repo_dockerfile = _repo_dockerfile_path()
|
||||
if repo_dockerfile.is_file():
|
||||
return repo_dockerfile.read_text()
|
||||
raise CapabilityApplyError(
|
||||
f"no per-bottle Dockerfile for {slug} and no repo Dockerfile at "
|
||||
f"{repo_dockerfile}"
|
||||
)
|
||||
|
||||
|
||||
def apply_capability_change(slug: str, new_dockerfile: str) -> tuple[str, str]:
|
||||
"""End-to-end capability-block remediation. See module docstring
|
||||
for the sequence. Returns (before, after) Dockerfile content."""
|
||||
if not new_dockerfile.strip():
|
||||
raise CapabilityApplyError("proposed Dockerfile is empty")
|
||||
before = fetch_current_dockerfile(slug)
|
||||
|
||||
snapshot_transcript(slug)
|
||||
_push_working_tree(slug)
|
||||
write_per_bottle_dockerfile(slug, new_dockerfile)
|
||||
# Set the preserve marker BEFORE teardown so cli.py's session-end
|
||||
# cleanup sees it and keeps the state dir intact for the
|
||||
# operator's `cli.py resume <identity>`. Without the marker the
|
||||
# state dir would be deleted as part of normal session end.
|
||||
mark_preserved(slug)
|
||||
_teardown_bottle(slug)
|
||||
|
||||
return before, new_dockerfile
|
||||
|
||||
|
||||
# --- Internals -------------------------------------------------------------
|
||||
|
||||
|
||||
def _repo_dockerfile_path() -> Path:
|
||||
"""Path to the repo's Claude Dockerfile (one dir above this module's
|
||||
package root). Resolved at call time so the path is correct
|
||||
regardless of where this module is imported from."""
|
||||
# bot_bottle/backend/docker/capability_apply.py -> repo root
|
||||
return Path(__file__).resolve().parent.parent.parent.parent / "Dockerfile.claude"
|
||||
|
||||
|
||||
def snapshot_transcript(slug: str) -> None:
|
||||
"""`docker cp` /home/node/.claude out of the agent container into
|
||||
~/.bot-bottle/state/<slug>/transcript/. Best-effort: missing
|
||||
container, missing dir, or cp error all log a warning and return.
|
||||
The transcript is what `claude --resume` reads to pick up where
|
||||
the agent left off.
|
||||
|
||||
Called from two places:
|
||||
- capability-apply, before tearing the bottle down.
|
||||
- cli.py's session-end path, before the launch context closes,
|
||||
so a crash or normal exit also leaves a transcript on disk
|
||||
(deleted along with the state dir on clean exit, kept on
|
||||
crash or capability-block per the preserve marker)."""
|
||||
container = _agent_container_name(slug)
|
||||
dest = transcript_snapshot_dir(slug)
|
||||
if dest.exists():
|
||||
# Remove any prior snapshot so the new one is a clean copy.
|
||||
shutil.rmtree(dest, ignore_errors=True)
|
||||
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||
r = subprocess.run(
|
||||
["docker", "cp", f"{container}:{_AGENT_TRANSCRIPT_IN_CONTAINER}", str(dest)],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
if r.returncode != 0:
|
||||
warn(
|
||||
f"transcript snapshot skipped "
|
||||
f"({(r.stderr or '').strip() or 'no transcript dir in container?'})"
|
||||
)
|
||||
return
|
||||
info(f"transcript snapshotted to {dest}")
|
||||
|
||||
|
||||
def _push_working_tree(slug: str) -> None:
|
||||
"""`docker exec <agent> git push` from /home/node/workspace.
|
||||
Best-effort: not-a-git-repo, no upstream, nothing-to-push, no
|
||||
network all log a warning and return. The replacement bottle
|
||||
will pick up whatever's actually upstream."""
|
||||
container = _agent_container_name(slug)
|
||||
r = subprocess.run(
|
||||
[
|
||||
"docker", "exec", container, "sh", "-c",
|
||||
f"cd {_AGENT_WORKSPACE_IN_CONTAINER} && "
|
||||
f"git rev-parse --is-inside-work-tree >/dev/null 2>&1 && "
|
||||
f"git push origin HEAD 2>&1 || true",
|
||||
],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
if r.returncode != 0:
|
||||
warn(
|
||||
f"capability-apply: git push skipped "
|
||||
f"({(r.stderr or '').strip() or 'docker exec failed'})"
|
||||
)
|
||||
return
|
||||
output = (r.stdout or "").strip()
|
||||
if output:
|
||||
info(f"capability-apply: git push: {output}")
|
||||
else:
|
||||
info("capability-apply: git push ran (no output — likely not a git workspace)")
|
||||
|
||||
|
||||
def _teardown_bottle(slug: str) -> None:
|
||||
"""Force-remove all per-bottle docker resources. Idempotent —
|
||||
`docker rm -f` / `docker network rm` silently ignore missing
|
||||
names, so this can be called even mid-rebuild."""
|
||||
info(f"capability-apply: tearing down bottle {slug}")
|
||||
for name in _per_bottle_container_names(slug):
|
||||
subprocess.run(
|
||||
["docker", "rm", "-f", name],
|
||||
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False,
|
||||
)
|
||||
for net in _per_bottle_network_names(slug):
|
||||
subprocess.run(
|
||||
["docker", "network", "rm", net],
|
||||
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False,
|
||||
)
|
||||
|
||||
|
||||
__all__ = [
|
||||
"CapabilityApplyError",
|
||||
"apply_capability_change",
|
||||
"fetch_current_dockerfile",
|
||||
"snapshot_transcript",
|
||||
]
|
||||
@@ -0,0 +1,180 @@
|
||||
"""Cleanup for the Docker bottle backend.
|
||||
|
||||
PRD 0018 chunk 4: cleanup is centered on `docker compose ls`.
|
||||
Pre-compose code paths could leave bare containers / networks
|
||||
without a compose project; those still show up via the prefix
|
||||
scan, just as a fallback bucket alongside the project list.
|
||||
|
||||
`prepare_cleanup` enumerates:
|
||||
|
||||
- Live compose projects whose name starts with `bot-bottle-`.
|
||||
- `bot-bottle-*` containers that aren't part of any compose
|
||||
project (legacy orphans).
|
||||
- `bot-bottle-*` networks that aren't tied to a compose
|
||||
project (legacy orphans; compose-managed networks come down
|
||||
with `compose down --volumes` and don't appear here).
|
||||
- State dirs under ~/.bot-bottle/state/<identity>/ with no
|
||||
live compose project AND no `.preserve` marker.
|
||||
|
||||
`cleanup` removes everything in the plan.
|
||||
|
||||
Active-agent enumeration lives in `backend/docker/enumerate.py`
|
||||
(mirror of `backend/smolmachines/enumerate.py`).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import shutil
|
||||
import subprocess
|
||||
|
||||
from ... import supervise as _supervise
|
||||
from ...log import info, warn
|
||||
from . import util as docker_mod
|
||||
from .bottle_cleanup_plan import DockerBottleCleanupPlan
|
||||
from .bottle_state import bottle_state_dir, is_preserved
|
||||
from .compose import COMPOSE_PROJECT_PREFIX, list_compose_projects
|
||||
|
||||
|
||||
def _list_prefixed_containers() -> list[str]:
|
||||
"""All bot-bottle-prefixed containers, running or stopped."""
|
||||
result = subprocess.run(
|
||||
["docker", "ps", "-a",
|
||||
"--filter", f"name=^{COMPOSE_PROJECT_PREFIX}",
|
||||
"--format", "{{.Names}}\t{{.Label \"com.docker.compose.project\"}}"],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
warn(f"docker ps failed: {result.stderr.strip()}")
|
||||
return []
|
||||
out: list[str] = []
|
||||
for line in (result.stdout or "").splitlines():
|
||||
if not line:
|
||||
continue
|
||||
name, _, project = line.partition("\t")
|
||||
# Stray = no compose label. Compose-managed containers carry
|
||||
# `com.docker.compose.project=<name>`; we'll reap those via
|
||||
# `compose down`, not via container rm.
|
||||
if not project:
|
||||
out.append(name)
|
||||
return sorted(set(out))
|
||||
|
||||
|
||||
def _list_prefixed_networks() -> list[str]:
|
||||
"""All bot-bottle-prefixed networks not currently attached
|
||||
to a compose project. Compose-managed networks have a
|
||||
`com.docker.compose.project` label; bare ones (from pre-compose
|
||||
code paths) don't."""
|
||||
result = subprocess.run(
|
||||
["docker", "network", "ls",
|
||||
"--filter", f"name={COMPOSE_PROJECT_PREFIX}",
|
||||
"--format", "{{.Name}}\t{{.Label \"com.docker.compose.project\"}}"],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
warn(f"docker network ls failed: {result.stderr.strip()}")
|
||||
return []
|
||||
out: list[str] = []
|
||||
for line in (result.stdout or "").splitlines():
|
||||
if not line:
|
||||
continue
|
||||
name, _, project = line.partition("\t")
|
||||
if not project:
|
||||
out.append(name)
|
||||
return sorted(set(out))
|
||||
|
||||
|
||||
def _list_orphan_state_dirs(
|
||||
live_projects: set[str], protected_identities: set[str],
|
||||
) -> list[str]:
|
||||
"""State identities whose compose project isn't running and
|
||||
that don't have a `.preserve` marker. `.preserve` means the
|
||||
user (or an auto-preserve-on-crash) wants the state kept for
|
||||
`resume`.
|
||||
|
||||
`protected_identities` is the set of slugs that are live in
|
||||
ANY backend — used so this docker-side check doesn't reap a
|
||||
running smolmachines bottle's state dir (the layout is shared
|
||||
across both backends)."""
|
||||
state_root = _supervise.bot_bottle_root() / "state"
|
||||
if not state_root.is_dir():
|
||||
return []
|
||||
orphans: list[str] = []
|
||||
for child in sorted(state_root.iterdir()):
|
||||
if not child.is_dir():
|
||||
continue
|
||||
identity = child.name
|
||||
project = f"{COMPOSE_PROJECT_PREFIX}{identity}"
|
||||
if project in live_projects:
|
||||
continue
|
||||
if identity in protected_identities:
|
||||
continue
|
||||
if is_preserved(identity):
|
||||
continue
|
||||
orphans.append(identity)
|
||||
return orphans
|
||||
|
||||
|
||||
def prepare_cleanup() -> DockerBottleCleanupPlan:
|
||||
"""Enumerate everything cleanup will touch. No removals.
|
||||
|
||||
Pulls the union of live identities across backends via
|
||||
`enumerate_active_agents()` so the orphan-state-dir bucket
|
||||
doesn't include slugs whose smolmachines VM is still up."""
|
||||
docker_mod.require_docker()
|
||||
projects = list_compose_projects()
|
||||
project_set = set(projects)
|
||||
# Late import to avoid a circular at module-load time —
|
||||
# the backend package's __init__ imports this module.
|
||||
from .. import enumerate_active_agents
|
||||
protected = {a.slug for a in enumerate_active_agents()}
|
||||
return DockerBottleCleanupPlan(
|
||||
projects=tuple(projects),
|
||||
stray_containers=tuple(_list_prefixed_containers()),
|
||||
stray_networks=tuple(_list_prefixed_networks()),
|
||||
orphan_state_dirs=tuple(
|
||||
_list_orphan_state_dirs(project_set, protected),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def cleanup(plan: DockerBottleCleanupPlan) -> None:
|
||||
"""Remove everything in the plan. Projects first (whose `compose
|
||||
down` reaps their containers + networks atomically), then stray
|
||||
legacy resources, then orphan state dirs."""
|
||||
for project in plan.projects:
|
||||
info(f"docker compose down ({project})")
|
||||
result = subprocess.run(
|
||||
["docker", "compose", "-p", project, "down", "--volumes"],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
warn(
|
||||
f"compose down failed for {project}: "
|
||||
f"{result.stderr.strip()}"
|
||||
)
|
||||
|
||||
for name in plan.stray_containers:
|
||||
info(f"removing stray container {name}")
|
||||
subprocess.run(
|
||||
["docker", "rm", "-f", name],
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
check=False,
|
||||
)
|
||||
|
||||
for name in plan.stray_networks:
|
||||
info(f"removing stray network {name}")
|
||||
subprocess.run(
|
||||
["docker", "network", "rm", name],
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
check=False,
|
||||
)
|
||||
|
||||
for identity in plan.orphan_state_dirs:
|
||||
path = bottle_state_dir(identity)
|
||||
info(f"removing orphan state dir {path}")
|
||||
try:
|
||||
shutil.rmtree(path, ignore_errors=True)
|
||||
except OSError as e:
|
||||
warn(f"failed to remove {path}: {e}")
|
||||
@@ -0,0 +1,528 @@
|
||||
"""Compose-spec rendering for a Docker bottle (PRD 0018, chunk 1).
|
||||
|
||||
`bottle_plan_to_compose(plan)` returns a Compose v2 spec dict
|
||||
describing the per-bottle container topology — one project per
|
||||
bottle instance, services for the agent + every applicable sidecar,
|
||||
two networks, no named volumes.
|
||||
|
||||
Pure function. No I/O, no subprocess. Expects every launch-time
|
||||
field (network names, CA host paths, etc.) on the plan's inner
|
||||
plans to be populated; chunks 2+3 own that ordering. Chunk 1 just
|
||||
encodes the translation so it can be unit-tested in isolation.
|
||||
|
||||
Conditional services follow the plan content (matches the
|
||||
SDK-call branching in `launch.py` today):
|
||||
|
||||
- pipelock + agent: always.
|
||||
- git-gate: iff plan.git_gate_plan.upstreams.
|
||||
- egress: iff plan.egress_plan.routes.
|
||||
- supervise: iff plan.supervise_plan is not None.
|
||||
|
||||
Naming:
|
||||
|
||||
- Compose project: `bot-bottle-<slug>`.
|
||||
- Service names (inside the file): `agent`, `pipelock`,
|
||||
`egress`, `git-gate`, `supervise`.
|
||||
- `container_name:` matches today's pattern
|
||||
(`bot-bottle-<service>-<slug>`) so dashboard/cleanup discovery
|
||||
via the prefix scan keeps working through the transition.
|
||||
- Network aliases preserve the current dial-by-shortname pattern
|
||||
for `egress` / `supervise`, and add the long container-name as
|
||||
an internal-network alias for `pipelock` / `git-gate` so any
|
||||
caller still referencing the long name resolves.
|
||||
|
||||
Sidecars that are built (egress, git-gate, supervise) get a
|
||||
compose `build:` block pointing at the repo Dockerfile; the
|
||||
`image:` tag is set explicitly so cached images on the daemon
|
||||
aren't rebuilt on every up.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from ...egress import (
|
||||
EGRESS_HOSTNAME,
|
||||
EGRESS_ROUTES_IN_CONTAINER,
|
||||
)
|
||||
from ...git_gate import GIT_GATE_HOSTNAME, git_gate_aggregate_extra_hosts
|
||||
from ...log import die, warn
|
||||
from ...pipelock import PIPELOCK_HOSTNAME
|
||||
from ...supervise import (
|
||||
CURRENT_CONFIG_DIR_IN_AGENT,
|
||||
QUEUE_DIR_IN_CONTAINER,
|
||||
SUPERVISE_HOSTNAME,
|
||||
SUPERVISE_PORT,
|
||||
)
|
||||
from ...util import expand_tilde
|
||||
from .bottle_plan import DockerBottlePlan
|
||||
from .egress import (
|
||||
EGRESS_CA_IN_CONTAINER,
|
||||
EGRESS_PIPELOCK_CA_IN_CONTAINER,
|
||||
)
|
||||
from .git_gate import (
|
||||
GIT_GATE_ACCESS_HOOK_IN_CONTAINER,
|
||||
GIT_GATE_CREDS_DIR_IN_CONTAINER,
|
||||
GIT_GATE_ENTRYPOINT_IN_CONTAINER,
|
||||
GIT_GATE_HOOK_IN_CONTAINER,
|
||||
)
|
||||
from .pipelock import (
|
||||
PIPELOCK_CA_CERT_IN_CONTAINER,
|
||||
PIPELOCK_CA_KEY_IN_CONTAINER,
|
||||
PIPELOCK_PORT,
|
||||
)
|
||||
from .provision.ca import AGENT_CA_BUNDLE, AGENT_CA_PATH
|
||||
from .sidecar_bundle import (
|
||||
SIDECAR_BUNDLE_DOCKERFILE,
|
||||
SIDECAR_BUNDLE_IMAGE,
|
||||
sidecar_bundle_container_name,
|
||||
)
|
||||
|
||||
|
||||
# Repo root, used as the build context for the bundle Dockerfile.
|
||||
_REPO_DIR = str(Path(__file__).resolve().parent.parent.parent.parent)
|
||||
|
||||
|
||||
def bottle_plan_to_compose(plan: DockerBottlePlan) -> dict[str, Any]:
|
||||
"""Render a Compose v2 spec dict from a fully-resolved
|
||||
DockerBottlePlan.
|
||||
|
||||
The plan must have its inner plans (`proxy_plan`,
|
||||
`git_gate_plan`, `egress_plan`, `supervise_plan`) populated
|
||||
with launch-time fields — network names, CA host paths,
|
||||
pipelock_proxy_url. The renderer doesn't validate; callers
|
||||
feed it a fully-resolved plan or get an incomplete compose
|
||||
spec back.
|
||||
"""
|
||||
project = f"bot-bottle-{plan.slug}"
|
||||
services: dict[str, Any] = {
|
||||
"sidecars": _sidecar_bundle_service(plan),
|
||||
"agent": _agent_service(plan),
|
||||
}
|
||||
return {
|
||||
"name": project,
|
||||
"services": services,
|
||||
"networks": _networks(plan),
|
||||
}
|
||||
|
||||
|
||||
def _networks(plan: DockerBottlePlan) -> dict[str, Any]:
|
||||
"""Compose-managed networks with explicit `name:` matching the
|
||||
existing slug-suffixed convention. Compose creates them on `up`
|
||||
and destroys them on `down`. The internal one is `--internal`
|
||||
(no default gateway); the egress one is a normal user-defined
|
||||
bridge."""
|
||||
return {
|
||||
"internal": {
|
||||
"name": plan.proxy_plan.internal_network,
|
||||
"internal": True,
|
||||
},
|
||||
"egress": {
|
||||
"name": plan.proxy_plan.egress_network,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _bind(host: str | Path, target: str, *, read_only: bool = True) -> dict[str, Any]:
|
||||
"""One bind-mount entry in the long-form `volumes:` shape.
|
||||
Long form is preferred over `host:target:ro` strings because
|
||||
it's easier to inspect in tests and survives whitespace in
|
||||
host paths."""
|
||||
return {
|
||||
"type": "bind",
|
||||
"source": str(host),
|
||||
"target": target,
|
||||
"read_only": read_only,
|
||||
}
|
||||
|
||||
|
||||
def _sidecar_bundle_service(plan: DockerBottlePlan) -> dict[str, Any]:
|
||||
"""The `sidecars` service: one container per bottle, bundle
|
||||
image, all four daemons under a Python init supervisor.
|
||||
|
||||
Mechanics:
|
||||
|
||||
- Daemon subset narrows via `BOT_BOTTLE_SIDECAR_DAEMONS`
|
||||
env. pipelock is always present; egress / git-gate /
|
||||
supervise are conditional on the plan.
|
||||
- Volumes are the union of the four daemons' bind-mounts,
|
||||
preserving the same in-container paths so each daemon
|
||||
finds its config / hooks / CA where it expects.
|
||||
- Environment is the union of *daemon-private* env vars
|
||||
(EGRESS_UPSTREAM_PROXY, SUPERVISE_BOTTLE_SLUG, etc).
|
||||
HTTPS_PROXY is NOT propagated here — see the comment in
|
||||
egress_entrypoint.sh; setting it at the container level
|
||||
would route git-gate's git fetches through pipelock,
|
||||
which is wrong.
|
||||
- Network aliases register every legacy short/long
|
||||
hostname (pipelock, egress, git-gate, supervise plus
|
||||
their `bot-bottle-<service>-<slug>` long forms) so
|
||||
the agent's HTTPS_PROXY URL and any other inter-service
|
||||
reference resolves to the bundle.
|
||||
"""
|
||||
daemons: list[str] = ["egress", "pipelock"]
|
||||
if plan.git_gate_plan.upstreams:
|
||||
daemons.append("git-gate")
|
||||
if plan.supervise_plan is not None:
|
||||
daemons.append("supervise")
|
||||
|
||||
env: list[str] = [f"BOT_BOTTLE_SIDECAR_DAEMONS={','.join(daemons)}"]
|
||||
volumes: list[dict[str, Any]] = []
|
||||
|
||||
# --- pipelock ----------------------------------------------------
|
||||
pp = plan.proxy_plan
|
||||
volumes += [
|
||||
_bind(pp.yaml_path, "/etc/pipelock.yaml"),
|
||||
_bind(pp.ca_cert_host_path, PIPELOCK_CA_CERT_IN_CONTAINER),
|
||||
_bind(pp.ca_key_host_path, PIPELOCK_CA_KEY_IN_CONTAINER),
|
||||
]
|
||||
|
||||
# --- egress (always part of the bundle; the EGRESS_UPSTREAM_*
|
||||
# env vars + ca bind-mounts are needed iff routes exist; when
|
||||
# the bottle has no routes the egress daemon falls back to its
|
||||
# `regular@9099` mode and is unused) -----------------------------
|
||||
ep = plan.egress_plan
|
||||
if ep.routes:
|
||||
env.append(f"EGRESS_UPSTREAM_PROXY={ep.pipelock_proxy_url}")
|
||||
env.append(f"EGRESS_UPSTREAM_CA={EGRESS_PIPELOCK_CA_IN_CONTAINER}")
|
||||
volumes += [
|
||||
_bind(ep.routes_path, EGRESS_ROUTES_IN_CONTAINER),
|
||||
_bind(ep.mitmproxy_ca_host_path, EGRESS_CA_IN_CONTAINER),
|
||||
_bind(ep.pipelock_ca_host_path, EGRESS_PIPELOCK_CA_IN_CONTAINER),
|
||||
]
|
||||
for token_env in sorted(ep.token_env_map.keys()):
|
||||
env.append(token_env)
|
||||
|
||||
# --- git-gate ----------------------------------------------------
|
||||
extra_hosts: list[str] = []
|
||||
gp = plan.git_gate_plan
|
||||
if gp.upstreams:
|
||||
volumes += [
|
||||
_bind(gp.entrypoint_script, GIT_GATE_ENTRYPOINT_IN_CONTAINER),
|
||||
_bind(gp.hook_script, GIT_GATE_HOOK_IN_CONTAINER),
|
||||
_bind(gp.access_hook_script, GIT_GATE_ACCESS_HOOK_IN_CONTAINER),
|
||||
]
|
||||
for u in gp.upstreams:
|
||||
keypath = expand_tilde(u.identity_file)
|
||||
volumes.append(_bind(
|
||||
keypath,
|
||||
f"{GIT_GATE_CREDS_DIR_IN_CONTAINER}/{u.name}-key",
|
||||
))
|
||||
extra_map = git_gate_aggregate_extra_hosts(gp.upstreams)
|
||||
extra_hosts = [f"{host}:{ip}" for host, ip in sorted(extra_map.items())]
|
||||
|
||||
# --- supervise ---------------------------------------------------
|
||||
sp = plan.supervise_plan
|
||||
if sp is not None:
|
||||
env += [
|
||||
f"SUPERVISE_BOTTLE_SLUG={plan.slug}",
|
||||
f"SUPERVISE_QUEUE_DIR={QUEUE_DIR_IN_CONTAINER}",
|
||||
f"SUPERVISE_PORT={SUPERVISE_PORT}",
|
||||
]
|
||||
volumes.append({
|
||||
"type": "bind",
|
||||
"source": str(sp.queue_dir),
|
||||
"target": QUEUE_DIR_IN_CONTAINER,
|
||||
"read_only": False,
|
||||
})
|
||||
|
||||
# Internal-network aliases: the agent reaches each daemon through
|
||||
# its short name (pipelock / egress / git-gate / supervise) which
|
||||
# the bundle answers as if it were the daemon itself.
|
||||
internal_aliases = [
|
||||
PIPELOCK_HOSTNAME,
|
||||
EGRESS_HOSTNAME,
|
||||
]
|
||||
if gp.upstreams:
|
||||
internal_aliases.append(GIT_GATE_HOSTNAME)
|
||||
if sp is not None:
|
||||
internal_aliases.append(SUPERVISE_HOSTNAME)
|
||||
|
||||
service: dict[str, Any] = {
|
||||
"image": SIDECAR_BUNDLE_IMAGE,
|
||||
"build": {
|
||||
"context": _REPO_DIR,
|
||||
"dockerfile": SIDECAR_BUNDLE_DOCKERFILE,
|
||||
},
|
||||
"container_name": sidecar_bundle_container_name(plan.slug),
|
||||
"networks": {
|
||||
"internal": {"aliases": internal_aliases},
|
||||
"egress": None,
|
||||
},
|
||||
"environment": env,
|
||||
"volumes": volumes,
|
||||
}
|
||||
if extra_hosts:
|
||||
service["extra_hosts"] = extra_hosts
|
||||
return service
|
||||
|
||||
|
||||
def _agent_service(plan: DockerBottlePlan) -> dict[str, Any]:
|
||||
"""Agent container. Runs `sleep infinity`; claude is `docker
|
||||
exec -it`'d into it later. No TTY at the container level —
|
||||
interactivity is per-exec. HTTP_PROXY/HTTPS_PROXY point at the
|
||||
egress short-alias when an egress is declared, otherwise
|
||||
straight at pipelock's container name. CA trust trio matches
|
||||
the existing launch.py wiring."""
|
||||
proxy_url = _agent_proxy_url(plan)
|
||||
no_proxy = _agent_no_proxy(plan)
|
||||
env: list[str] = [
|
||||
f"HTTPS_PROXY={proxy_url}",
|
||||
f"HTTP_PROXY={proxy_url}",
|
||||
f"https_proxy={proxy_url}",
|
||||
f"http_proxy={proxy_url}",
|
||||
f"NO_PROXY={no_proxy}",
|
||||
f"no_proxy={no_proxy}",
|
||||
f"NODE_EXTRA_CA_CERTS={AGENT_CA_PATH}",
|
||||
f"SSL_CERT_FILE={AGENT_CA_BUNDLE}",
|
||||
f"REQUESTS_CA_BUNDLE={AGENT_CA_BUNDLE}",
|
||||
]
|
||||
# Forwarded vars (OAuth token, manifest host-interpolations):
|
||||
# bare name → inherits from compose-up process env, value
|
||||
# never lands on argv or in the compose file.
|
||||
for name in sorted(plan.forwarded_env.keys()):
|
||||
env.append(name)
|
||||
|
||||
service: dict[str, Any] = {
|
||||
"image": plan.runtime_image,
|
||||
"container_name": plan.container_name,
|
||||
"command": ["sleep", "infinity"],
|
||||
"networks": {"internal": None},
|
||||
"environment": env,
|
||||
}
|
||||
if plan.use_runsc:
|
||||
service["runtime"] = "runsc"
|
||||
if plan.env_file and plan.env_file.exists() and plan.env_file.stat().st_size > 0:
|
||||
service["env_file"] = [str(plan.env_file)]
|
||||
|
||||
volumes: list[dict[str, Any]] = []
|
||||
if plan.supervise_plan is not None:
|
||||
volumes.append(_bind(
|
||||
plan.supervise_plan.current_config_dir,
|
||||
CURRENT_CONFIG_DIR_IN_AGENT,
|
||||
))
|
||||
if volumes:
|
||||
service["volumes"] = volumes
|
||||
|
||||
# The init supervisor inside the bundle owns intra-bundle
|
||||
# daemon ordering, so the agent only waits for the bundle
|
||||
# container itself.
|
||||
service["depends_on"] = ["sidecars"]
|
||||
|
||||
return service
|
||||
|
||||
|
||||
def _agent_proxy_url(plan: DockerBottlePlan) -> str:
|
||||
"""Pick the agent's HTTP_PROXY. With egress declared, the agent
|
||||
goes through egress (which in turn HTTPS_PROXYs to pipelock on
|
||||
its outbound leg). Without egress, the agent talks straight to
|
||||
pipelock."""
|
||||
if plan.egress_plan.routes:
|
||||
from .egress import EGRESS_PORT
|
||||
return f"http://{EGRESS_HOSTNAME}:{EGRESS_PORT}"
|
||||
return f"http://{PIPELOCK_HOSTNAME}:{PIPELOCK_PORT}"
|
||||
|
||||
|
||||
def _agent_no_proxy(plan: DockerBottlePlan) -> str:
|
||||
"""NO_PROXY for the agent. Matches the launch.py rules:
|
||||
loopback always, supervise hostname when the supervise sidecar
|
||||
is up (the MCP long-poll pattern needs to bypass pipelock's
|
||||
idle timeout)."""
|
||||
hosts = ["localhost", "127.0.0.1"]
|
||||
if plan.supervise_plan is not None:
|
||||
hosts.append(SUPERVISE_HOSTNAME)
|
||||
return ",".join(hosts)
|
||||
|
||||
|
||||
# --- Lifecycle helpers (PRD 0018 chunk 3) ----------------------------------
|
||||
#
|
||||
# The renderer above is pure. The helpers below own the I/O side:
|
||||
# serialize the spec to disk, drive `docker compose up`, dump the
|
||||
# merged log file on teardown, and `docker compose down` to clean up
|
||||
# (networks are pre-created externally so `down` leaves them alone;
|
||||
# the launch step removes them in its own teardown step).
|
||||
|
||||
|
||||
COMPOSE_FILE_NAME = "docker-compose.yml"
|
||||
COMPOSE_LOG_NAME = "compose.log"
|
||||
|
||||
|
||||
COMPOSE_PROJECT_PREFIX = "bot-bottle-"
|
||||
|
||||
|
||||
def compose_project_name(slug: str) -> str:
|
||||
"""Stable mapping from slug → compose project. Matches the
|
||||
`name:` field the renderer emits, so `docker compose ls`
|
||||
enumeration and direct CLI invocations agree on the project
|
||||
identifier."""
|
||||
return f"{COMPOSE_PROJECT_PREFIX}{slug}"
|
||||
|
||||
|
||||
def slug_from_compose_project(project: str) -> str:
|
||||
"""Inverse of `compose_project_name`: strip the prefix to get
|
||||
the underlying slug. Returns empty string if the project name
|
||||
doesn't start with the expected prefix."""
|
||||
if not project.startswith(COMPOSE_PROJECT_PREFIX):
|
||||
return ""
|
||||
return project[len(COMPOSE_PROJECT_PREFIX):]
|
||||
|
||||
|
||||
def list_compose_projects(*, include_stopped: bool = True) -> list[str]:
|
||||
"""All compose project names starting with `bot-bottle-`.
|
||||
`include_stopped=True` (default) runs `docker compose ls --all`
|
||||
so exited projects appear too; pass False to get only projects
|
||||
with at least one running container.
|
||||
|
||||
Returns [] on docker daemon errors or malformed output rather
|
||||
than raising — callers should treat the empty list as "no
|
||||
projects discoverable", not "no projects exist"."""
|
||||
argv = ["docker", "compose", "ls", "--format", "json"]
|
||||
if include_stopped:
|
||||
argv.insert(3, "--all")
|
||||
try:
|
||||
result = subprocess.run(
|
||||
argv, capture_output=True, text=True, check=False,
|
||||
)
|
||||
except FileNotFoundError:
|
||||
# docker binary not on PATH — same shape as a daemon-down
|
||||
# error from the caller's POV: no projects discoverable.
|
||||
return []
|
||||
if result.returncode != 0:
|
||||
warn(f"docker compose ls failed: {result.stderr.strip()}")
|
||||
return []
|
||||
try:
|
||||
projects = json.loads(result.stdout or "[]")
|
||||
except json.JSONDecodeError as e:
|
||||
warn(f"docker compose ls returned malformed JSON: {e}")
|
||||
return []
|
||||
names: list[str] = []
|
||||
for p in projects:
|
||||
if not isinstance(p, dict):
|
||||
continue
|
||||
name = str(p.get("Name", ""))
|
||||
if name.startswith(COMPOSE_PROJECT_PREFIX):
|
||||
names.append(name)
|
||||
return sorted(set(names))
|
||||
|
||||
|
||||
def list_active_slugs(*, include_stopped: bool = False) -> list[str]:
|
||||
"""Slugs (project name minus prefix) of currently-running
|
||||
bottles. Used by the dashboard's operator-edit verbs to choose
|
||||
a bottle to apply a config edit to."""
|
||||
return sorted(
|
||||
slug for slug in (
|
||||
slug_from_compose_project(p)
|
||||
for p in list_compose_projects(include_stopped=include_stopped)
|
||||
) if slug
|
||||
)
|
||||
|
||||
|
||||
def compose_file_path(state_dir: Path) -> Path:
|
||||
return state_dir / COMPOSE_FILE_NAME
|
||||
|
||||
|
||||
def compose_log_path(state_dir: Path) -> Path:
|
||||
return state_dir / COMPOSE_LOG_NAME
|
||||
|
||||
|
||||
def write_compose_file(spec: dict[str, Any], path: Path) -> Path:
|
||||
"""Serialize the compose dict to disk. JSON content with a
|
||||
`.yml` filename — JSON is a strict subset of YAML 1.2 for the
|
||||
constructs the renderer uses (mappings, lists, strings, bools,
|
||||
nulls), and `docker compose -f file.yml` parses it as YAML.
|
||||
Avoids a yaml dependency while keeping the file `cat`-readable.
|
||||
"""
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(json.dumps(spec, indent=2, sort_keys=False) + "\n")
|
||||
path.chmod(0o644)
|
||||
return path
|
||||
|
||||
|
||||
def _compose_argv(project: str, compose_file: Path, *cmd: str) -> list[str]:
|
||||
return [
|
||||
"docker", "compose",
|
||||
"-p", project,
|
||||
"-f", str(compose_file),
|
||||
*cmd,
|
||||
]
|
||||
|
||||
|
||||
def compose_up(
|
||||
project: str,
|
||||
compose_file: Path,
|
||||
*,
|
||||
env: dict[str, str] | None = None,
|
||||
) -> None:
|
||||
"""`docker compose up -d` for the project. Env-inheritance is
|
||||
via `env=` on the subprocess — every `environment: [NAME]` (bare
|
||||
name) entry in the compose file resolves to whatever value
|
||||
`NAME` has in `env` at exec time. Secrets never land on argv or
|
||||
in the compose file."""
|
||||
argv = _compose_argv(project, compose_file, "up", "-d")
|
||||
result = subprocess.run(
|
||||
argv, capture_output=True, text=True, env=env, check=False,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
sys.stderr.write(result.stderr)
|
||||
die(f"docker compose up failed for project {project}")
|
||||
|
||||
|
||||
def compose_dump_logs(project: str, compose_file: Path, output: Path) -> None:
|
||||
"""Write the merged stdout/stderr of every service to `output`
|
||||
using `docker compose logs --no-color --timestamps`. Best-effort
|
||||
— failures here shouldn't block teardown. The interleaved single
|
||||
file is what the user reads post-mortem; per-service tail still
|
||||
works through `docker compose logs -f <service>` while the
|
||||
project is up."""
|
||||
output.parent.mkdir(parents=True, exist_ok=True)
|
||||
argv = _compose_argv(project, compose_file, "logs", "--no-color", "--timestamps")
|
||||
try:
|
||||
with open(output, "wb") as f:
|
||||
subprocess.run(
|
||||
argv,
|
||||
stdout=f,
|
||||
stderr=subprocess.STDOUT,
|
||||
check=False,
|
||||
)
|
||||
output.chmod(0o644)
|
||||
except OSError as e:
|
||||
warn(f"failed to write compose log to {output}: {e}")
|
||||
|
||||
|
||||
def compose_down(project: str, compose_file: Path) -> None:
|
||||
"""`docker compose down` for the project. External networks are
|
||||
intentionally NOT removed by compose (`external: true` on the
|
||||
networks block); the launch step's own teardown removes them
|
||||
via `network_remove` so the per-bottle ephemeral subnet doesn't
|
||||
accumulate."""
|
||||
argv = _compose_argv(project, compose_file, "down")
|
||||
result = subprocess.run(
|
||||
argv, capture_output=True, text=True, check=False,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
warn(
|
||||
f"docker compose down failed for project {project}: "
|
||||
f"{result.stderr.strip()}"
|
||||
)
|
||||
|
||||
|
||||
__all__ = [
|
||||
"COMPOSE_FILE_NAME",
|
||||
"COMPOSE_LOG_NAME",
|
||||
"COMPOSE_PROJECT_PREFIX",
|
||||
"bottle_plan_to_compose",
|
||||
"compose_down",
|
||||
"compose_dump_logs",
|
||||
"compose_file_path",
|
||||
"compose_log_path",
|
||||
"compose_project_name",
|
||||
"compose_up",
|
||||
"list_active_slugs",
|
||||
"list_compose_projects",
|
||||
"slug_from_compose_project",
|
||||
"write_compose_file",
|
||||
]
|
||||
@@ -0,0 +1,123 @@
|
||||
"""Docker-side egress helpers: port pin, in-container CA paths,
|
||||
container naming, and the host-side mitmproxy CA mint. The
|
||||
prepare-time routes-yaml rendering itself lives on the
|
||||
platform-neutral `Egress` ABC — backends instantiate it directly.
|
||||
|
||||
The per-container `.start()` / `.stop()` lifecycle was removed in
|
||||
PRD 0024 chunk 3; the sidecar bundle (PRD 0024) runs egress
|
||||
under its python init supervisor."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
from ...log import die
|
||||
|
||||
|
||||
# Listening port the egress daemon binds inside the bundle. The
|
||||
# agent's HTTP_PROXY env var resolves to `http://egress:<port>`,
|
||||
# and the bundle's network aliases route `egress` to itself.
|
||||
EGRESS_PORT = int(os.environ.get("BOT_BOTTLE_EGRESS_PORT", "9099"))
|
||||
|
||||
# In-container path for mitmproxy's CA. The format is a single PEM
|
||||
# file holding BOTH the cert and the private key, concatenated. The
|
||||
# upstream-trust CA (pipelock's, so egress trusts the upstream
|
||||
# leg) is a separate file because pipelock keeps a different CA on
|
||||
# its end.
|
||||
EGRESS_CA_IN_CONTAINER = "/home/mitmproxy/.mitmproxy/mitmproxy-ca.pem"
|
||||
EGRESS_PIPELOCK_CA_IN_CONTAINER = (
|
||||
"/home/mitmproxy/.mitmproxy/pipelock-ca.pem"
|
||||
)
|
||||
|
||||
|
||||
def egress_tls_init(stage_dir: Path) -> tuple[Path, Path]:
|
||||
"""Mint the per-bottle egress MITM CA via host `openssl req`.
|
||||
|
||||
Returns `(mitmproxy_pem, cert_only_pem)`:
|
||||
- `mitmproxy_pem` is the single-PEM concat (cert + key)
|
||||
mitmproxy reads from `~/.mitmproxy/mitmproxy-ca.pem`.
|
||||
- `cert_only_pem` is the cert alone — installed into the agent's
|
||||
trust store by `provision_ca` so the agent trusts the bumped
|
||||
CONNECT cert egress presents.
|
||||
|
||||
Why openssl req (not the pipelock binary's `tls init`):
|
||||
pipelock's CA generator stamps a non-standard `Subject Key
|
||||
Identifier` on the CA (random rather than SHA-1 of the pubkey).
|
||||
mitmproxy computes the `Authority Key Identifier` on each leaf
|
||||
it mints as SHA-1(issuer's pubkey). openssl's chain validator
|
||||
uses the leaf's AKI to find the issuer cert by SKI; pipelock's
|
||||
SKI doesn't match → openssl reports "unable to get local issuer
|
||||
certificate" even though the CA is right there in the trust
|
||||
store. openssl req's `subjectKeyIdentifier=hash` extension uses
|
||||
SHA-1(pubkey), matching mitmproxy's computation.
|
||||
|
||||
Both files live under `<stage_dir>/egress-ca/` (mode 644 —
|
||||
`docker cp` preserves the mode into the container, where the
|
||||
mitmproxy user (uid 1000) reads them; the host stage_dir is
|
||||
mode 700 so the private key isn't world-exposed)."""
|
||||
work = stage_dir / "egress-ca"
|
||||
work.mkdir(exist_ok=True)
|
||||
key_path = work / "ca-key.pem"
|
||||
cert_path = work / "ca.pem"
|
||||
cnf_path = work / "ca.cnf"
|
||||
|
||||
# RSA-2048 — broad mitmproxy compatibility (its default leaf-cert
|
||||
# config matches RSA CAs without surprise), and openssl req's
|
||||
# default behavior here is exactly what we want.
|
||||
keygen = subprocess.run(
|
||||
["openssl", "genrsa", "-out", str(key_path), "2048"],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
if keygen.returncode != 0:
|
||||
die(f"egress ca keygen failed: {keygen.stderr.strip()}")
|
||||
# Standalone private key — never docker-cp'd, never bind-mounted
|
||||
# (mitmproxy reads the cert+key concat below). Lock to owner-
|
||||
# only so it doesn't sit at the default umask on disk.
|
||||
key_path.chmod(0o600)
|
||||
|
||||
# `subjectKeyIdentifier=hash` makes openssl compute the SKI as
|
||||
# SHA-1(pubkey), matching how mitmproxy computes the AKI on the
|
||||
# leaves it later mints. Without this, chain validation breaks
|
||||
# despite the CA being present in the trust store.
|
||||
cnf_path.write_text(
|
||||
"[req]\n"
|
||||
"distinguished_name = req_dn\n"
|
||||
"prompt = no\n"
|
||||
"x509_extensions = v3_ca\n"
|
||||
"\n"
|
||||
"[req_dn]\n"
|
||||
"O = bot-bottle\n"
|
||||
"CN = bot-bottle egress CA\n"
|
||||
"\n"
|
||||
"[v3_ca]\n"
|
||||
"basicConstraints = critical, CA:TRUE\n"
|
||||
"keyUsage = critical, keyCertSign, cRLSign\n"
|
||||
"subjectKeyIdentifier = hash\n"
|
||||
)
|
||||
cnf_path.chmod(0o644)
|
||||
|
||||
req = subprocess.run(
|
||||
["openssl", "req", "-x509", "-new", "-nodes",
|
||||
"-key", str(key_path),
|
||||
"-sha256", "-days", "365",
|
||||
"-config", str(cnf_path),
|
||||
"-out", str(cert_path)],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
if req.returncode != 0:
|
||||
die(f"egress ca cert generation failed: {req.stderr.strip()}")
|
||||
|
||||
cert_path.chmod(0o644)
|
||||
# mitmproxy reads cert + key from a single concatenated PEM file.
|
||||
# This file IS bind-mounted into the egress container (chunk 3+),
|
||||
# where mitmproxy runs as uid 1000 — so the host file has to be
|
||||
# world-readable for the container's user to read it through the
|
||||
# mount. Owner-only mode on the parent dir (state/<slug>/, under
|
||||
# ~/.bot-bottle which inherits ~'s 0o700) is what actually
|
||||
# restricts who can reach this file on the host.
|
||||
mitm = work / "mitmproxy-ca.pem"
|
||||
mitm.write_bytes(cert_path.read_bytes() + key_path.read_bytes())
|
||||
mitm.chmod(0o644)
|
||||
return (mitm, cert_path)
|
||||
@@ -0,0 +1,343 @@
|
||||
"""Host-side helper to apply a routes.yaml change to a running
|
||||
egress sidecar (PRD 0014 retargeted by PRD 0017 chunk 3).
|
||||
|
||||
Used by the supervise dashboard when the operator approves an
|
||||
egress-block proposal (or runs the operator-initiated
|
||||
`routes edit <bottle>` verb). Fetches the current routes.yaml via
|
||||
`docker exec cat`, validates the new content, writes it into the
|
||||
sidecar via `docker cp`, then `docker kill --signal HUP` to make
|
||||
the addon reload without dropping connections.
|
||||
|
||||
Also mirrors the new route hosts into pipelock's hostname allowlist
|
||||
so the downstream leg lets them through — egress enforces
|
||||
the path-aware allowlist on the agent leg, pipelock enforces the
|
||||
hostname allowlist + DLP body scan on the upstream leg, and a
|
||||
host added to one must be in the other or the request 403s
|
||||
somewhere along the chain.
|
||||
|
||||
Raises EgressApplyError on any failure — the dashboard
|
||||
surfaces the message and keeps the proposal pending so the
|
||||
operator can retry.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
from ...egress import EGRESS_ROUTES_IN_CONTAINER
|
||||
from ...egress_addon_core import load_routes
|
||||
from ...yaml_subset import YamlSubsetError, parse_yaml_subset
|
||||
from .bottle_state import egress_state_dir
|
||||
from .sidecar_bundle import sidecar_bundle_container_name
|
||||
from .pipelock_apply import (
|
||||
PipelockApplyError,
|
||||
apply_allowlist_change,
|
||||
fetch_current_allowlist,
|
||||
parse_allowlist_content,
|
||||
render_allowlist_content,
|
||||
)
|
||||
|
||||
|
||||
def _render_routes_payload(routes_list: list[dict[str, object]]) -> str:
|
||||
"""Render a list-of-dicts routes payload as YAML matching the
|
||||
shape `egress_render_routes` produces. The apply path
|
||||
round-trips current routes.yaml through this so the file the
|
||||
sidecar sees stays in the YAML format the addon expects."""
|
||||
if not routes_list:
|
||||
return "routes: []\n"
|
||||
lines: list[str] = ["routes:"]
|
||||
for entry in routes_list:
|
||||
host = str(entry.get("host", ""))
|
||||
lines.append(f' - host: "{host}"')
|
||||
auth_scheme = entry.get("auth_scheme")
|
||||
token_env = entry.get("token_env")
|
||||
if auth_scheme and token_env:
|
||||
lines.append(f' auth_scheme: "{auth_scheme}"')
|
||||
lines.append(f' token_env: "{token_env}"')
|
||||
paths = entry.get("path_allowlist") or []
|
||||
if paths:
|
||||
lines.append(" path_allowlist:")
|
||||
for p in paths:
|
||||
lines.append(f' - "{p}"')
|
||||
return "\n".join(lines) + "\n"
|
||||
|
||||
|
||||
def _egress_routes_host_path(slug: str) -> Path:
|
||||
"""The bind-mount source for the egress sidecar's routes.yaml.
|
||||
Must match what egress.prepare wrote at chunk-2 paths."""
|
||||
return egress_state_dir(slug) / "egress_routes.yaml"
|
||||
|
||||
|
||||
class EgressApplyError(RuntimeError):
|
||||
"""Raised when fetch / apply fails. Caller renders to the
|
||||
operator; does not crash the dashboard."""
|
||||
|
||||
|
||||
def fetch_current_routes(slug: str) -> str:
|
||||
"""Read the live routes.yaml from the running egress sidecar
|
||||
for `slug`. Returns the file content as a string. Raises
|
||||
EgressApplyError if the sidecar isn't reachable or the read
|
||||
fails."""
|
||||
container = sidecar_bundle_container_name(slug)
|
||||
r = subprocess.run(
|
||||
["docker", "exec", container, "cat", EGRESS_ROUTES_IN_CONTAINER],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
if r.returncode != 0:
|
||||
raise EgressApplyError(
|
||||
f"could not read routes.yaml from {container}: "
|
||||
f"{(r.stderr or '').strip() or 'container not running?'}"
|
||||
)
|
||||
return r.stdout
|
||||
|
||||
|
||||
def validate_routes_content(content: str) -> None:
|
||||
"""Syntactic check before SIGHUP — the addon's reload also
|
||||
validates, but failing here keeps the old routes live and gives
|
||||
the operator a clearer error than the addon's stderr line."""
|
||||
try:
|
||||
load_routes(content)
|
||||
except ValueError as e:
|
||||
raise EgressApplyError(
|
||||
f"proposed routes.yaml is not valid: {e}"
|
||||
) from e
|
||||
|
||||
|
||||
def _hosts_in_routes(content: str) -> list[str]:
|
||||
"""Extract the host list from a routes.yaml content string.
|
||||
Uses the addon's own parser so any host the addon will match on
|
||||
also lands in pipelock's allowlist. Returns sorted+deduped."""
|
||||
try:
|
||||
routes = load_routes(content)
|
||||
except ValueError as e:
|
||||
raise EgressApplyError(
|
||||
f"proposed routes.yaml is not valid: {e}"
|
||||
) from e
|
||||
return sorted({r.host for r in routes if r.host})
|
||||
|
||||
|
||||
# Pipelock's allowlist parser accepts only literal hostnames:
|
||||
# `[A-Za-z0-9_.-]+`. Anything else (wildcards, IPv6 literals,
|
||||
# stray characters) is silently dropped from the mirror so the
|
||||
# pipelock apply doesn't fail parse before the new yaml is even
|
||||
# written. The dropped hosts stay on egress's route table —
|
||||
# but the addon does exact-host match only, so they'll never
|
||||
# match anything either. (Wildcard host matching was removed —
|
||||
# see `match_route` in egress_addon_core for the rationale.)
|
||||
_PIPELOCK_HOST_RE = re.compile(r"^[A-Za-z0-9_.-]+$")
|
||||
|
||||
|
||||
def _pipelock_safe_hosts(hosts: list[str]) -> list[str]:
|
||||
"""Drop any host pipelock's allowlist parser would reject.
|
||||
Order preserved."""
|
||||
return [h for h in hosts if _PIPELOCK_HOST_RE.match(h)]
|
||||
|
||||
|
||||
def _mirror_hosts_to_pipelock(slug: str, hosts: list[str]) -> None:
|
||||
"""Ensure every pipelock-compatible `hosts` entry is on
|
||||
pipelock's allowlist. Fetches pipelock's current allowlist,
|
||||
merges, re-applies. Hosts pipelock can't represent (wildcards,
|
||||
etc.) are silently skipped — they stay live on egress
|
||||
but aren't enforced at pipelock. No-op if every host is already
|
||||
present (apply still restarts pipelock if any host is new).
|
||||
Raises EgressApplyError on pipelock failures so the
|
||||
caller's diff/audit reflects the half-state."""
|
||||
safe_hosts = _pipelock_safe_hosts(hosts)
|
||||
try:
|
||||
current = fetch_current_allowlist(slug)
|
||||
existing = parse_allowlist_content(current)
|
||||
merged = sorted(set(existing) | set(safe_hosts))
|
||||
if merged == sorted(existing):
|
||||
return # nothing to add
|
||||
apply_allowlist_change(slug, render_allowlist_content(merged))
|
||||
except PipelockApplyError as e:
|
||||
# Mirror runs BEFORE the egress write, so egress
|
||||
# is unchanged on this failure path. Report it as a
|
||||
# pipelock-side problem so the operator looks in the right
|
||||
# place; their `pipelock edit` flow can repair manually.
|
||||
raise EgressApplyError(
|
||||
f"pipelock allowlist mirror failed (egress NOT "
|
||||
f"updated): {e}. Fix pipelock's allowlist manually with "
|
||||
f"`pipelock edit <bottle>` then retry the proposal."
|
||||
) from e
|
||||
|
||||
|
||||
def apply_routes_change(slug: str, new_content: str) -> tuple[str, str]:
|
||||
"""Apply `new_content` to the egress sidecar for `slug`:
|
||||
1. Fetch current routes.yaml (for the before-diff).
|
||||
2. Validate the new content via the addon's own parser.
|
||||
3. Mirror the route hosts onto pipelock's allowlist (so the
|
||||
downstream hostname gate lets them through).
|
||||
4. Write to a temp file, `docker cp` into the egress
|
||||
sidecar.
|
||||
5. `docker kill --signal HUP` so the addon reloads.
|
||||
|
||||
Order matters: pipelock first, then egress. If the
|
||||
pipelock step fails, egress hasn't been touched and the
|
||||
old routes stay live. If the egress step fails after
|
||||
pipelock succeeded, pipelock has the host in its allowlist but
|
||||
egress doesn't enforce it yet — harmless extra-permissive
|
||||
state at pipelock, and a re-approval will land the egress
|
||||
side.
|
||||
|
||||
Returns (before, after) where `after` == `new_content`. Raises
|
||||
EgressApplyError on any step."""
|
||||
container = sidecar_bundle_container_name(slug)
|
||||
before = fetch_current_routes(slug)
|
||||
validate_routes_content(new_content)
|
||||
|
||||
# Pipelock mirror first — if it fails, egress stays intact
|
||||
# and the operator gets a clear error about the half-state.
|
||||
_mirror_hosts_to_pipelock(slug, _hosts_in_routes(new_content))
|
||||
|
||||
# routes.yaml is bind-mounted into the egress container as a
|
||||
# SINGLE FILE. Docker single-file bind mounts pin the source
|
||||
# inode at mount time; write-temp-then-rename swaps the inode
|
||||
# on the host, which leaves the container's mount pointing at
|
||||
# the now-orphaned old inode (so the SIGHUP'd reload re-reads
|
||||
# unchanged content). Write in-place instead. Lose file-level
|
||||
# atomicity, but the apply path issues SIGHUP only AFTER the
|
||||
# write returns, and the addon's `load_routes` raises
|
||||
# `ValueError` on a partial read and keeps the previous
|
||||
# in-memory routes — so a SIGHUP that hypothetically raced an
|
||||
# in-flight write is non-disruptive.
|
||||
target = _egress_routes_host_path(slug)
|
||||
target.parent.mkdir(parents=True, exist_ok=True)
|
||||
target.write_text(new_content)
|
||||
# mitmproxy in the container reads through the bind mount as
|
||||
# uid 1000; the host file has to be world-readable for that
|
||||
# read to succeed (parent dir at 0o700 still restricts who
|
||||
# can reach the file on the host). Routes content is not
|
||||
# secret — tokens live in the container's environ — so 0o644
|
||||
# is the right trade-off.
|
||||
target.chmod(0o644)
|
||||
sig = subprocess.run(
|
||||
["docker", "kill", "--signal", "HUP", container],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
if sig.returncode != 0:
|
||||
raise EgressApplyError(
|
||||
f"failed to SIGHUP {container}: "
|
||||
f"{(sig.stderr or '').strip()}"
|
||||
)
|
||||
|
||||
return before, new_content
|
||||
|
||||
|
||||
def _merge_single_route(
|
||||
current_yaml: str, new_route: dict[str, object],
|
||||
) -> str:
|
||||
"""Merge a single proposed route into the current routes.yaml
|
||||
content, returning the merged YAML string.
|
||||
|
||||
Behavior:
|
||||
- If `new_route['host']` is NOT in the current routes →
|
||||
append the route.
|
||||
- If the host IS already present → union the path_allowlist
|
||||
entries (proposed ∪ existing). The existing `auth_scheme`
|
||||
and `token_env` are preserved — agent-proposed auth changes
|
||||
on an existing host are ignored, matching the tool's
|
||||
documented semantics.
|
||||
|
||||
Round-trips the file through `yaml_subset` (the same parser
|
||||
the addon uses), so the merged output is in the YAML format
|
||||
the sidecar reads. Token VALUES never appear here; the routes
|
||||
file carries only env-var slot NAMES."""
|
||||
try:
|
||||
cfg = parse_yaml_subset(current_yaml)
|
||||
except YamlSubsetError as e:
|
||||
raise EgressApplyError(
|
||||
f"current routes.yaml is not valid YAML: {e}"
|
||||
) from e
|
||||
routes = cfg.get("routes")
|
||||
if not isinstance(routes, list):
|
||||
raise EgressApplyError(
|
||||
"current routes.yaml: 'routes' is not a list"
|
||||
)
|
||||
|
||||
new_host = str(new_route.get("host", "")).lower()
|
||||
if not new_host:
|
||||
raise EgressApplyError(
|
||||
"proposed route is missing 'host'"
|
||||
)
|
||||
|
||||
proposed_paths = list(new_route.get("path_allowlist") or [])
|
||||
|
||||
# Look for an existing entry with the same host (case-insensitive).
|
||||
for entry in routes:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
if str(entry.get("host", "")).lower() == new_host:
|
||||
# Merge path_allowlist: union proposed + existing, ordered
|
||||
# by first-seen so existing paths stay in original order.
|
||||
existing_paths: list[str] = list(entry.get("path_allowlist") or [])
|
||||
seen = {p: None for p in existing_paths}
|
||||
for p in proposed_paths:
|
||||
seen.setdefault(p, None)
|
||||
merged_paths = list(seen.keys())
|
||||
if merged_paths:
|
||||
entry["path_allowlist"] = merged_paths
|
||||
# Preserve existing auth — tool description says agent-
|
||||
# proposed auth on an existing host is ignored.
|
||||
break
|
||||
else:
|
||||
# Host not present; build a new route entry from the
|
||||
# proposed fields. Need to assign a token_env slot if
|
||||
# `auth` was proposed (otherwise the addon's parser rejects
|
||||
# a half-set auth pair). Slots: count existing slots, pick
|
||||
# the next free index.
|
||||
entry = {"host": new_route["host"]}
|
||||
if proposed_paths:
|
||||
entry["path_allowlist"] = proposed_paths
|
||||
auth = new_route.get("auth")
|
||||
if isinstance(auth, dict) and auth.get("scheme") and auth.get("token_ref"):
|
||||
existing_slots = sorted({
|
||||
str(r.get("token_env"))
|
||||
for r in routes
|
||||
if isinstance(r, dict) and r.get("token_env")
|
||||
})
|
||||
next_idx = len(existing_slots)
|
||||
entry["auth_scheme"] = str(auth["scheme"])
|
||||
entry["token_env"] = f"EGRESS_TOKEN_{next_idx}"
|
||||
# NOTE: the addon reads token VALUES from its container's
|
||||
# environ keyed by token_env. A newly-added auth route at
|
||||
# runtime points at a slot that has no env value → the
|
||||
# addon will 403 with "token env unset" until the operator
|
||||
# arranges for the value to land in the container's env.
|
||||
# Recording this here so the operator-facing diff carries
|
||||
# the slot name they'll need to provision.
|
||||
routes.append(entry)
|
||||
|
||||
return _render_routes_payload(routes)
|
||||
|
||||
|
||||
def add_route(slug: str, proposed_route_json: str) -> tuple[str, str]:
|
||||
"""Apply a single-route addition to the egress. Parses the
|
||||
agent's proposed route, fetches the current routes file, merges,
|
||||
and applies via `apply_routes_change`. Returns (before, after)
|
||||
full-file content for the audit log."""
|
||||
try:
|
||||
proposed = json.loads(proposed_route_json)
|
||||
except json.JSONDecodeError as e:
|
||||
raise EgressApplyError(
|
||||
f"proposed route is not valid JSON: {e}"
|
||||
) from e
|
||||
if not isinstance(proposed, dict):
|
||||
raise EgressApplyError(
|
||||
"proposed route must be a JSON object"
|
||||
)
|
||||
current = fetch_current_routes(slug)
|
||||
merged = _merge_single_route(current, proposed)
|
||||
return apply_routes_change(slug, merged)
|
||||
|
||||
|
||||
__all__ = [
|
||||
"EgressApplyError",
|
||||
"add_route",
|
||||
"apply_routes_change",
|
||||
"fetch_current_routes",
|
||||
"validate_routes_content",
|
||||
]
|
||||
@@ -0,0 +1,80 @@
|
||||
"""Active-agent enumeration for the docker backend.
|
||||
|
||||
Mirrors `backend/smolmachines/enumerate.py`: returns
|
||||
`ActiveAgent` records the CLI `list active` command and the
|
||||
dashboard agents pane consume. Empty when docker isn't reachable
|
||||
— gated by `has_backend('docker')` at the cross-backend caller
|
||||
so this module trusts that docker is available when called.
|
||||
|
||||
The parser (`_parse_services_by_project`) is exposed for direct
|
||||
unit testing; the docker `docker ps` invocation is in
|
||||
`_query_services_by_project`."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
|
||||
from .. import ActiveAgent
|
||||
from .bottle_state import read_metadata
|
||||
from .compose import compose_project_name, list_active_slugs
|
||||
|
||||
|
||||
def enumerate_active() -> list[ActiveAgent]:
|
||||
"""All currently-running docker-backed agents. Caller is
|
||||
responsible for gating on `has_backend('docker')` if it
|
||||
matters; if docker is missing the `docker ps` call below
|
||||
returns an empty list silently."""
|
||||
slugs = list_active_slugs(include_stopped=False)
|
||||
if not slugs:
|
||||
return []
|
||||
services_by_project = _query_services_by_project()
|
||||
out: list[ActiveAgent] = []
|
||||
for slug in slugs:
|
||||
project = compose_project_name(slug)
|
||||
services = services_by_project.get(project, set())
|
||||
metadata = read_metadata(slug)
|
||||
out.append(ActiveAgent(
|
||||
backend_name="docker",
|
||||
slug=slug,
|
||||
agent_name=metadata.agent_name if metadata else "?",
|
||||
started_at=metadata.started_at if metadata else "",
|
||||
services=tuple(sorted(services)),
|
||||
))
|
||||
return out
|
||||
|
||||
|
||||
def _parse_services_by_project(stdout: str) -> dict[str, set[str]]:
|
||||
"""Parse `docker ps` output formatted as
|
||||
`<project-label>\\t<service-label>` (one line per container)
|
||||
into a `{project: {service, ...}}` mapping. Pure function for
|
||||
testing — the docker invocation is in `_query_services_by_project`."""
|
||||
out: dict[str, set[str]] = {}
|
||||
for line in stdout.splitlines():
|
||||
project, _, service = line.partition("\t")
|
||||
if not project or not service:
|
||||
continue
|
||||
out.setdefault(project, set()).add(service)
|
||||
return out
|
||||
|
||||
|
||||
def _query_services_by_project() -> dict[str, set[str]]:
|
||||
"""One `docker ps` call → `{project: {service, ...}}`. Used
|
||||
by the CLI's `list active` and the dashboard's agents pane —
|
||||
one subprocess per refresh tick, not one per bottle."""
|
||||
try:
|
||||
r = subprocess.run(
|
||||
[
|
||||
"docker", "ps",
|
||||
"--filter", "label=com.docker.compose.project",
|
||||
"--format",
|
||||
'{{.Label "com.docker.compose.project"}}'
|
||||
"\t"
|
||||
'{{.Label "com.docker.compose.service"}}',
|
||||
],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
except FileNotFoundError:
|
||||
return {}
|
||||
if r.returncode != 0:
|
||||
return {}
|
||||
return _parse_services_by_project(r.stdout or "")
|
||||
@@ -0,0 +1,16 @@
|
||||
"""Docker-side git-gate constants: in-container paths the renderer's
|
||||
bind-mounts target + the listening port. The prepare-time entrypoint
|
||||
/ hook render lives on the platform-neutral `GitGate` ABC — backends
|
||||
instantiate it directly. The git-gate daemon's container lifecycle
|
||||
is owned by the sidecar bundle (PRD 0024)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
GIT_GATE_ENTRYPOINT_IN_CONTAINER = "/git-gate-entrypoint.sh"
|
||||
GIT_GATE_HOOK_IN_CONTAINER = "/etc/git-gate/pre-receive"
|
||||
GIT_GATE_ACCESS_HOOK_IN_CONTAINER = "/etc/git-gate/access-hook"
|
||||
GIT_GATE_CREDS_DIR_IN_CONTAINER = "/git-gate/creds"
|
||||
|
||||
# git daemon's default listening port.
|
||||
GIT_GATE_PORT = 9418
|
||||
@@ -0,0 +1,218 @@
|
||||
"""Launch step for the Docker bottle backend.
|
||||
|
||||
PRD 0018 chunk 3: each instance is one `docker compose` project.
|
||||
|
||||
The flow is:
|
||||
|
||||
1. Build the agent's base + derived image (compose builds the
|
||||
sidecar images via the `build:` directive on first up).
|
||||
2. Pre-create the per-bottle networks. We do this outside compose
|
||||
so we can inspect the assigned internal CIDR and embed it in
|
||||
pipelock's yaml (compose's `external: true` lets the compose
|
||||
file reference these pre-existing networks).
|
||||
3. Mint the per-bottle CAs (chunk 2 writes them under
|
||||
state/<slug>/{pipelock,egress}/).
|
||||
4. Re-render pipelock yaml with the now-known internal CIDR so
|
||||
the SSRF allowlist exempts the bottle's own subnet.
|
||||
5. Populate the inner plans with launch-time fields so the
|
||||
renderer can read network names, CA paths, pipelock URL.
|
||||
6. Render the compose spec, write it to
|
||||
state/<slug>/docker-compose.yml, write metadata.json.
|
||||
7. `docker compose up -d` (token + OAuth values flow into the
|
||||
compose subprocess env so `environment: [NAME]` bare-name
|
||||
entries inherit without rendering values into the file).
|
||||
8. Provision (CA install, prompt copy, skills, git, supervise
|
||||
config) — unchanged, uses `docker exec`.
|
||||
9. Yield a DockerBottle handle. `exec_claude` runs claude via
|
||||
`docker exec -it` exactly like the pre-compose world.
|
||||
|
||||
Teardown (ExitStack callbacks fire in reverse):
|
||||
- Dump `docker compose logs --no-color --timestamps` to
|
||||
state/<slug>/compose.log (best-effort).
|
||||
- `docker compose down` removes the project's containers (not the
|
||||
external networks).
|
||||
- `network_remove` deletes the two networks we pre-created.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import dataclasses
|
||||
import os
|
||||
from contextlib import ExitStack, contextmanager
|
||||
from pathlib import Path
|
||||
from typing import Callable, Generator
|
||||
|
||||
from ...egress import egress_resolve_token_values
|
||||
from ...log import info
|
||||
from . import network as network_mod
|
||||
from . import util as docker_mod
|
||||
from .bottle import DockerBottle
|
||||
from .bottle_plan import DockerBottlePlan
|
||||
from .bottle_state import (
|
||||
bottle_state_dir,
|
||||
egress_state_dir,
|
||||
pipelock_state_dir,
|
||||
)
|
||||
from .compose import (
|
||||
bottle_plan_to_compose,
|
||||
compose_down,
|
||||
compose_dump_logs,
|
||||
compose_file_path,
|
||||
compose_log_path,
|
||||
compose_project_name,
|
||||
compose_up,
|
||||
write_compose_file,
|
||||
)
|
||||
from .egress import egress_tls_init
|
||||
from .pipelock import (
|
||||
BUNDLE_LOCAL_PIPELOCK_URL,
|
||||
pipelock_tls_init,
|
||||
)
|
||||
|
||||
|
||||
# Where the repo root lives, for `docker build` context. Computed once.
|
||||
_REPO_DIR = str(Path(__file__).resolve().parent.parent.parent.parent)
|
||||
|
||||
|
||||
@contextmanager
|
||||
def launch(
|
||||
plan: DockerBottlePlan,
|
||||
*,
|
||||
provision: Callable[[DockerBottlePlan, str], str | None],
|
||||
) -> Generator[DockerBottle, None, None]:
|
||||
"""Build, launch, and provision a Docker bottle via compose.
|
||||
Teardown on exit."""
|
||||
stack = ExitStack()
|
||||
|
||||
def teardown() -> None:
|
||||
try:
|
||||
stack.close()
|
||||
except BaseException:
|
||||
# Teardown must not raise; swallow so the caller's
|
||||
# __exit__ path can still propagate the original error.
|
||||
pass
|
||||
|
||||
try:
|
||||
# Step 1: agent image build. Sidecar images get built lazily by
|
||||
# `docker compose up` via the renderer's `build:` directives.
|
||||
docker_mod.build_image(
|
||||
plan.image, _REPO_DIR,
|
||||
dockerfile=plan.dockerfile_path,
|
||||
)
|
||||
if plan.derived_image:
|
||||
docker_mod.build_image_with_cwd(
|
||||
plan.derived_image, plan.image, plan.spec.user_cwd
|
||||
)
|
||||
|
||||
# Networks: compose-managed. The names are derived
|
||||
# deterministically from the slug so the renderer can put
|
||||
# them on the services and `compose up` creates them with
|
||||
# those names. The empirical spike confirmed pipelock's
|
||||
# SSRF guard only checks proxied-request destinations, not
|
||||
# source IPs — so the bottle's own internal CIDR doesn't
|
||||
# need to be in `ssrf.ip_allowlist`. Pre-create + CIDR
|
||||
# introspection are gone; compose owns the network
|
||||
# lifecycle.
|
||||
internal_network = network_mod.network_name_for_slug(plan.slug)
|
||||
egress_network = network_mod.network_egress_name_for_slug(plan.slug)
|
||||
|
||||
# Mint per-bottle CAs into state/<slug>/{pipelock,egress}/.
|
||||
ca_cert_host, ca_key_host = pipelock_tls_init(pipelock_state_dir(plan.slug))
|
||||
egress_ca_host, egress_ca_cert_only = egress_tls_init(
|
||||
egress_state_dir(plan.slug),
|
||||
)
|
||||
|
||||
# Populate launch-time fields on every inner plan so the
|
||||
# renderer reads concrete network names, CA paths, and
|
||||
# pipelock URL.
|
||||
proxy_plan = dataclasses.replace(
|
||||
plan.proxy_plan,
|
||||
internal_network=internal_network,
|
||||
internal_network_cidr="",
|
||||
egress_network=egress_network,
|
||||
ca_cert_host_path=ca_cert_host,
|
||||
ca_key_host_path=ca_key_host,
|
||||
)
|
||||
git_gate_plan = plan.git_gate_plan
|
||||
if git_gate_plan.upstreams:
|
||||
git_gate_plan = dataclasses.replace(
|
||||
git_gate_plan,
|
||||
internal_network=internal_network,
|
||||
egress_network=egress_network,
|
||||
)
|
||||
egress_plan = plan.egress_plan
|
||||
if egress_plan.routes:
|
||||
egress_plan = dataclasses.replace(
|
||||
egress_plan,
|
||||
internal_network=internal_network,
|
||||
egress_network=egress_network,
|
||||
mitmproxy_ca_host_path=egress_ca_host,
|
||||
mitmproxy_ca_cert_only_host_path=egress_ca_cert_only,
|
||||
pipelock_ca_host_path=ca_cert_host,
|
||||
pipelock_proxy_url=BUNDLE_LOCAL_PIPELOCK_URL,
|
||||
)
|
||||
supervise_plan = plan.supervise_plan
|
||||
if supervise_plan is not None:
|
||||
supervise_plan = dataclasses.replace(
|
||||
supervise_plan,
|
||||
internal_network=internal_network,
|
||||
)
|
||||
plan = dataclasses.replace(
|
||||
plan,
|
||||
proxy_plan=proxy_plan,
|
||||
git_gate_plan=git_gate_plan,
|
||||
egress_plan=egress_plan,
|
||||
supervise_plan=supervise_plan,
|
||||
)
|
||||
|
||||
# Step 6: render + write the compose file. metadata.json
|
||||
# was written at prepare time and already carries
|
||||
# compose_project; nothing to update here.
|
||||
state_dir = bottle_state_dir(plan.slug)
|
||||
spec = bottle_plan_to_compose(plan)
|
||||
compose_file = write_compose_file(spec, compose_file_path(state_dir))
|
||||
project = compose_project_name(plan.slug)
|
||||
|
||||
# Step 7: compose up. Token values + the OAuth placeholder
|
||||
# flow through subprocess env; the compose file holds only
|
||||
# bare names for the secret-carrying entries.
|
||||
token_values: dict[str, str] = {}
|
||||
if plan.egress_plan.routes:
|
||||
token_values = egress_resolve_token_values(
|
||||
plan.egress_plan.token_env_map, dict(os.environ),
|
||||
)
|
||||
compose_env: dict[str, str] = {
|
||||
**os.environ,
|
||||
**plan.forwarded_env,
|
||||
**token_values,
|
||||
}
|
||||
info(
|
||||
f"docker compose up -d (project {project}, "
|
||||
f"{len(spec['services'])} services)"
|
||||
)
|
||||
compose_up(project, compose_file, env=compose_env)
|
||||
|
||||
# Register teardown in reverse order: log dump first, then
|
||||
# `compose down`. Networks come down last via callbacks
|
||||
# registered in step 2.
|
||||
stack.callback(compose_down, project, compose_file)
|
||||
stack.callback(
|
||||
compose_dump_logs, project, compose_file, compose_log_path(state_dir),
|
||||
)
|
||||
|
||||
# Step 8: provision. Unchanged — uses `docker exec` against
|
||||
# the agent container by its known name.
|
||||
prompt_path = provision(plan, plan.container_name)
|
||||
|
||||
# Step 9: yield. exec_claude continues to use `docker exec -it`
|
||||
# — the agent runs `sleep infinity` per the renderer's
|
||||
# service spec.
|
||||
yield DockerBottle(
|
||||
plan.container_name,
|
||||
teardown,
|
||||
prompt_path,
|
||||
agent_command=plan.agent_command,
|
||||
agent_prompt_mode=plan.agent_prompt_mode,
|
||||
)
|
||||
finally:
|
||||
teardown()
|
||||
@@ -0,0 +1,133 @@
|
||||
"""Docker network plumbing for the per-agent egress topology.
|
||||
|
||||
The agent container sits on a Docker `--internal` network (no default
|
||||
gateway). Pipelock straddles that network and a per-agent user-defined
|
||||
bridge for upstream egress. We deliberately do NOT use Docker's legacy
|
||||
`bridge` network because only user-defined bridges run Docker's
|
||||
embedded DNS resolver, which pipelock needs to resolve api.anthropic.com
|
||||
and similar upstream hostnames.
|
||||
|
||||
Naming: bot-bottle-net-<slug> (internal),
|
||||
bot-bottle-egress-<slug> (egress). Numeric suffix on conflict
|
||||
(-2, -3, ..., capped at 100).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
|
||||
from ...log import die, info, warn
|
||||
|
||||
|
||||
def network_name_for_slug(slug: str) -> str:
|
||||
return f"bot-bottle-net-{slug}"
|
||||
|
||||
|
||||
def network_egress_name_for_slug(slug: str) -> str:
|
||||
return f"bot-bottle-egress-{slug}"
|
||||
|
||||
|
||||
def network_exists(name: str) -> bool:
|
||||
"""Uses `docker network inspect`, not `docker network ls -f name=...`,
|
||||
because the latter does substring matching."""
|
||||
return (
|
||||
subprocess.run(
|
||||
["docker", "network", "inspect", name],
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
check=False,
|
||||
).returncode
|
||||
== 0
|
||||
)
|
||||
|
||||
|
||||
def _network_create_with_prefix(base: str, internal: bool) -> str:
|
||||
"""Create a per-agent Docker network whose name is <base> (with
|
||||
-2, -3, ... appended on conflict, capped at 100). Returns the
|
||||
resolved name."""
|
||||
name = base
|
||||
suffix = 2
|
||||
while network_exists(name):
|
||||
name = f"{base}-{suffix}"
|
||||
suffix += 1
|
||||
if suffix > 100:
|
||||
die(
|
||||
f"could not find a free network name after {base}-99; "
|
||||
f"clean up old networks with 'docker network rm <name>'"
|
||||
)
|
||||
|
||||
kind = "internal" if internal else "bridge (egress)"
|
||||
args = ["docker", "network", "create"]
|
||||
if internal:
|
||||
args.append("--internal")
|
||||
args.append(name)
|
||||
info(f"creating {kind} network {name}")
|
||||
result = subprocess.run(args, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, check=False)
|
||||
if result.returncode != 0:
|
||||
flag = " --internal" if internal else ""
|
||||
die(f"docker network create{flag} {name} failed")
|
||||
return name
|
||||
|
||||
|
||||
def network_create_internal(slug: str) -> str:
|
||||
"""Create a Docker `--internal` network for the agent. Returns the
|
||||
resolved name."""
|
||||
return _network_create_with_prefix(network_name_for_slug(slug), internal=True)
|
||||
|
||||
|
||||
def network_create_egress(slug: str) -> str:
|
||||
"""Create a per-agent user-defined bridge (NOT the legacy `bridge`)
|
||||
so the pipelock sidecar has working DNS for upstream hostnames."""
|
||||
return _network_create_with_prefix(network_egress_name_for_slug(slug), internal=False)
|
||||
|
||||
|
||||
def network_inspect_cidr(name: str) -> str:
|
||||
"""Return the IPv4 CIDR Docker assigned to a user-defined network.
|
||||
|
||||
Used by pipelock's SSRF guard exception: the bottle's internal
|
||||
network sits in RFC1918 space, so pipelock's `internal:` list
|
||||
would block any agent request whose destination resolves there
|
||||
— including the cred-proxy sidecar's address. Adding the
|
||||
network's CIDR to pipelock's `ssrf.ip_allowlist` lets traffic
|
||||
targeted at the bottle's own sidecars through while pipelock
|
||||
still body-scans and api_allowlist-gates as usual."""
|
||||
result = subprocess.run(
|
||||
["docker", "network", "inspect",
|
||||
"--format", "{{range .IPAM.Config}}{{.Subnet}}{{end}}", name],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
die(f"docker network inspect {name} failed: {result.stderr.strip()}")
|
||||
cidr = result.stdout.strip()
|
||||
if not cidr:
|
||||
die(f"network {name!r} has no IPAM subnet configured")
|
||||
return cidr
|
||||
|
||||
|
||||
def network_attach(network: str, container: str) -> None:
|
||||
result = subprocess.run(
|
||||
["docker", "network", "connect", network, container],
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
check=False,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
die(f"docker network connect {network} {container} failed")
|
||||
|
||||
|
||||
def network_remove(name: str) -> bool:
|
||||
"""Idempotent: a missing network is treated as success so this can
|
||||
be called from a teardown trap. Returns True if removal succeeded
|
||||
(or the network was already gone)."""
|
||||
if not network_exists(name):
|
||||
return True
|
||||
result = subprocess.run(
|
||||
["docker", "network", "rm", name],
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
check=False,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
warn(f"failed to remove network {name}; clean up with 'docker network rm {name}'")
|
||||
return False
|
||||
return True
|
||||
@@ -0,0 +1,81 @@
|
||||
"""Docker-side pipelock helpers: image pin, container naming, and
|
||||
the one-shot `pipelock tls init` host-side CA mint. The
|
||||
prepare-time YAML rendering itself lives on the platform-neutral
|
||||
`PipelockProxy` ABC — backends instantiate it directly.
|
||||
|
||||
The per-container `.start()` / `.stop()` lifecycle was deleted in
|
||||
PRD 0024 chunk 3; compose-up owns the container lifecycle (PRD
|
||||
0018) and the bundle path (PRD 0024) collapses pipelock + egress
|
||||
+ git-gate + supervise into one container."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
from ...log import die
|
||||
# Re-exported for the compose renderer + smolmachines launch step
|
||||
# (they used to import these from this module before they moved to
|
||||
# the platform-neutral pipelock module).
|
||||
from ...pipelock import ( # noqa: F401
|
||||
PIPELOCK_CA_CERT_IN_CONTAINER,
|
||||
PIPELOCK_CA_KEY_IN_CONTAINER,
|
||||
)
|
||||
|
||||
|
||||
# Pipelock image, pinned by digest. The digest is the multi-arch image
|
||||
# index for ghcr.io/luckypipewrench/pipelock:2.3.0.
|
||||
PIPELOCK_IMAGE = os.environ.get(
|
||||
"BOT_BOTTLE_PIPELOCK_IMAGE",
|
||||
"ghcr.io/luckypipewrench/pipelock@sha256:3b1a39417b98406ddc5dc2d8fcb42865ddc0c68a43d355db55f0f8cb06bc6de9",
|
||||
)
|
||||
|
||||
# Listening port for pipelock's forward proxy.
|
||||
PIPELOCK_PORT = os.environ.get("BOT_BOTTLE_PIPELOCK_PORT", "8888")
|
||||
|
||||
|
||||
# The URL egress dials for its upstream HTTPS_PROXY. egress and
|
||||
# pipelock share the same container's network namespace inside the
|
||||
# sidecar bundle, so loopback reaches pipelock directly — no docker
|
||||
# DNS aliases involved.
|
||||
BUNDLE_LOCAL_PIPELOCK_URL = f"http://127.0.0.1:{PIPELOCK_PORT}"
|
||||
|
||||
|
||||
def pipelock_tls_init(stage_dir: Path) -> tuple[Path, Path]:
|
||||
"""Generate a fresh per-bottle CA via a one-shot pipelock container.
|
||||
|
||||
Runs `pipelock tls init` against a host-mounted scratch dir, leaving
|
||||
`ca.pem` (public cert, mode 600) and `ca-key.pem` (private key, mode
|
||||
600) under `<stage_dir>/pipelock-ca/`. Returns the two host paths.
|
||||
|
||||
The image is pinned (same digest the running sidecar uses) so the
|
||||
generated CA matches what the sidecar expects. Output is owned by
|
||||
whatever UID the one-shot ran as; the compose renderer's
|
||||
bind-mounts pin the files in place at runtime, so ownership
|
||||
inside the running sidecar (root in pipelock's distroless image)
|
||||
is independent."""
|
||||
work = stage_dir / "pipelock-ca"
|
||||
work.mkdir(exist_ok=True)
|
||||
result = subprocess.run(
|
||||
["docker", "run", "--rm",
|
||||
"-v", f"{work}:/h",
|
||||
"-e", "PIPELOCK_HOME=/h",
|
||||
PIPELOCK_IMAGE, "tls", "init"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
die(f"pipelock tls init failed: {result.stderr.strip()}")
|
||||
cert = work / "ca.pem"
|
||||
key = work / "ca-key.pem"
|
||||
if not cert.is_file() or not key.is_file():
|
||||
die(f"pipelock tls init did not produce ca files in {work}")
|
||||
# Explicit perms in case a future pipelock release changes
|
||||
# defaults. Pipelock runs as root in its distroless image and
|
||||
# bind-mounts work with 0o600 (root reads everything); the key
|
||||
# has no reason to be readable to anyone else on the host.
|
||||
key.chmod(0o600)
|
||||
cert.chmod(0o644)
|
||||
return (cert, key)
|
||||
@@ -0,0 +1,200 @@
|
||||
"""pipelock_apply — host-side helper to apply an api_allowlist
|
||||
change to a running pipelock sidecar (PRD 0015).
|
||||
|
||||
Used by the supervise dashboard when the operator approves a
|
||||
pipelock-block proposal (or runs the operator-initiated `pipelock
|
||||
edit <bottle>` verb). Fetches the current pipelock.yaml via `docker
|
||||
exec`, parses it, swaps the api_allowlist with the proposed hosts,
|
||||
re-renders, writes back via the bind-mount path, then signals the
|
||||
bundle supervisor to restart the pipelock daemon (`docker kill
|
||||
--signal USR1`) so
|
||||
pipelock picks up the new config.
|
||||
|
||||
v1 uses restart, not SIGHUP — pipelock has no in-process reload
|
||||
hook and adding one is the "SIGHUP reload for pipelock" open
|
||||
question in PRD 0015. Restart drops in-flight outbound calls; the
|
||||
agent's HTTP client retries pick up against the restarted proxy.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
from ...pipelock import pipelock_render_yaml
|
||||
from ...yaml_subset import YamlSubsetError, parse_yaml_subset
|
||||
from .bottle_state import pipelock_state_dir
|
||||
from .sidecar_bundle import sidecar_bundle_container_name
|
||||
|
||||
|
||||
def _pipelock_yaml_host_path(slug: str) -> Path:
|
||||
"""The bind-mount source for the pipelock sidecar's
|
||||
pipelock.yaml — matches what pipelock.prepare wrote at chunk-2
|
||||
paths."""
|
||||
return pipelock_state_dir(slug) / "pipelock.yaml"
|
||||
|
||||
|
||||
PIPELOCK_YAML_IN_CONTAINER = "/etc/pipelock.yaml"
|
||||
|
||||
# Allowlist proposals are one-hostname-per-line. Blank lines and
|
||||
# `#`-prefixed comments are ignored. The character set matches the
|
||||
# supervise sidecar's syntactic check on the agent's pipelock-block
|
||||
# proposal (alphanumerics + dot/dash/underscore).
|
||||
_HOST_OK = re.compile(r"^[A-Za-z0-9_.-]+$")
|
||||
|
||||
|
||||
class PipelockApplyError(RuntimeError):
|
||||
"""Raised when fetch / parse / apply fails. The dashboard renders
|
||||
the message and keeps the proposal pending — never crashes."""
|
||||
|
||||
|
||||
def parse_allowlist_content(content: str) -> list[str]:
|
||||
"""One hostname per line. Blanks and `#` comments are ignored.
|
||||
Raises PipelockApplyError if a line has a disallowed character."""
|
||||
hosts: list[str] = []
|
||||
for i, raw_line in enumerate(content.splitlines(), start=1):
|
||||
line = raw_line.strip()
|
||||
if not line or line.startswith("#"):
|
||||
continue
|
||||
if not _HOST_OK.match(line):
|
||||
raise PipelockApplyError(
|
||||
f"allowlist line {i}: {line!r} has disallowed characters"
|
||||
)
|
||||
hosts.append(line)
|
||||
return hosts
|
||||
|
||||
|
||||
def render_allowlist_content(hosts: list[str]) -> str:
|
||||
"""Hosts → one-per-line string (the operator-facing format)."""
|
||||
if not hosts:
|
||||
return ""
|
||||
return "\n".join(hosts) + "\n"
|
||||
|
||||
|
||||
def fetch_current_yaml(slug: str) -> str:
|
||||
"""Read the live /etc/pipelock.yaml from the sidecar bundle.
|
||||
|
||||
Uses `docker cp` because pipelock inside the bundle is the
|
||||
distroless pipelock binary with no shell, and `docker cp` is a
|
||||
daemon-API tarball copy that works regardless of what's
|
||||
available inside the container.
|
||||
|
||||
Raises PipelockApplyError if the read fails."""
|
||||
container = sidecar_bundle_container_name(slug)
|
||||
fd, tmp_path = tempfile.mkstemp(prefix="cb-pipelock-fetch.", suffix=".yaml")
|
||||
os.close(fd)
|
||||
try:
|
||||
r = subprocess.run(
|
||||
[
|
||||
"docker", "cp",
|
||||
f"{container}:{PIPELOCK_YAML_IN_CONTAINER}", tmp_path,
|
||||
],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
if r.returncode != 0:
|
||||
raise PipelockApplyError(
|
||||
f"could not fetch pipelock.yaml from {container}: "
|
||||
f"{(r.stderr or '').strip() or 'container not running?'}"
|
||||
)
|
||||
return Path(tmp_path).read_text()
|
||||
finally:
|
||||
try:
|
||||
Path(tmp_path).unlink()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def fetch_current_allowlist(slug: str) -> str:
|
||||
"""Fetch the live yaml, extract api_allowlist, render as one-per-
|
||||
line — the operator-facing format for the TUI / agent's
|
||||
current-config mount."""
|
||||
yaml = fetch_current_yaml(slug)
|
||||
try:
|
||||
cfg = parse_yaml_subset(yaml)
|
||||
except YamlSubsetError as e:
|
||||
raise PipelockApplyError(f"running pipelock yaml: {e}") from e
|
||||
hosts = cfg.get("api_allowlist", [])
|
||||
if not isinstance(hosts, list):
|
||||
raise PipelockApplyError(
|
||||
"running pipelock yaml: api_allowlist is not a list"
|
||||
)
|
||||
return render_allowlist_content([str(h) for h in hosts])
|
||||
|
||||
|
||||
def apply_allowlist_change(
|
||||
slug: str, new_allowlist_content: str,
|
||||
) -> tuple[str, str]:
|
||||
"""Apply `new_allowlist_content` to the sidecar bundle:
|
||||
1. Parse the proposed hosts (one per line).
|
||||
2. Fetch + parse current pipelock.yaml.
|
||||
3. Replace api_allowlist with the proposed hosts; re-render.
|
||||
4. Write the new yaml to the bind-mount source.
|
||||
5. `docker kill --signal USR1 <bundle>` so the supervisor
|
||||
restarts the pipelock daemon in place (leaving egress,
|
||||
git-gate, and supervise running). Pipelock has no
|
||||
in-process reload; the supervisor's per-daemon restart
|
||||
keeps the agent's MCP socket alive — a whole-bundle
|
||||
`docker restart` would bounce supervise too.
|
||||
|
||||
Returns (before, after) where both are one-per-line allowlist
|
||||
strings (operator-facing format). Raises PipelockApplyError on
|
||||
any failure; the sidecar's existing config stays in place until
|
||||
the host write succeeds, and the SIGUSR1 is what makes it
|
||||
live."""
|
||||
new_hosts = parse_allowlist_content(new_allowlist_content)
|
||||
container = sidecar_bundle_container_name(slug)
|
||||
current_yaml = fetch_current_yaml(slug)
|
||||
try:
|
||||
cfg = parse_yaml_subset(current_yaml)
|
||||
except YamlSubsetError as e:
|
||||
raise PipelockApplyError(f"running pipelock yaml: {e}") from e
|
||||
current_hosts = cfg.get("api_allowlist", [])
|
||||
if not isinstance(current_hosts, list):
|
||||
raise PipelockApplyError(
|
||||
"running pipelock yaml: api_allowlist is not a list"
|
||||
)
|
||||
|
||||
before = render_allowlist_content([str(h) for h in current_hosts])
|
||||
after = render_allowlist_content(new_hosts)
|
||||
|
||||
cfg["api_allowlist"] = new_hosts
|
||||
rendered = pipelock_render_yaml(cfg)
|
||||
|
||||
# pipelock.yaml is bind-mounted into the container as a SINGLE
|
||||
# FILE — same Docker single-file inode issue as egress_apply:
|
||||
# write-temp-then-rename swaps the host inode and leaves the
|
||||
# container's mount pointing at the orphaned old one. Write
|
||||
# in-place. The SIGUSR1 below makes the new content live
|
||||
# (pipelock has no in-process reload, so the supervisor
|
||||
# restarts the pipelock daemon in response).
|
||||
target = _pipelock_yaml_host_path(slug)
|
||||
target.parent.mkdir(parents=True, exist_ok=True)
|
||||
target.write_text(rendered)
|
||||
# pipelock runs as root in its distroless image — any mode is
|
||||
# fine — but 0o600 matches what prepare wrote.
|
||||
target.chmod(0o600)
|
||||
restart = subprocess.run(
|
||||
["docker", "kill", "--signal", "USR1", container],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
if restart.returncode != 0:
|
||||
raise PipelockApplyError(
|
||||
f"failed to signal {container} for pipelock restart: "
|
||||
f"{(restart.stderr or '').strip()}"
|
||||
)
|
||||
|
||||
return before, after
|
||||
|
||||
|
||||
__all__ = [
|
||||
"PIPELOCK_YAML_IN_CONTAINER",
|
||||
"PipelockApplyError",
|
||||
"apply_allowlist_change",
|
||||
"fetch_current_allowlist",
|
||||
"fetch_current_yaml",
|
||||
"parse_allowlist_content",
|
||||
"render_allowlist_content",
|
||||
]
|
||||
@@ -0,0 +1,272 @@
|
||||
"""Prepare step for the Docker bottle backend.
|
||||
|
||||
`resolve_plan` does all host-side resolution (image and container
|
||||
names, env-file, prompt-file, proxy plan, runtime detection) and
|
||||
returns a frozen DockerBottlePlan. No Docker resources are created;
|
||||
the only side effects are scratch files under `stage_dir` and a probe
|
||||
of `docker info`. Cross-backend host-side validation has already run
|
||||
via the base class's `prepare` template before this is called.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
from ...agent_provider import runtime_for
|
||||
from ...egress import Egress
|
||||
from ...env import ResolvedEnv, resolve_env
|
||||
from ...git_gate import GitGate
|
||||
from ...log import die
|
||||
from ...pipelock import PipelockProxy
|
||||
from ...supervise import Supervise
|
||||
from .. import BottleSpec
|
||||
from . import util as docker_mod
|
||||
from .bottle_plan import DockerBottlePlan
|
||||
from .bottle_state import (
|
||||
BottleMetadata,
|
||||
agent_state_dir,
|
||||
bottle_identity,
|
||||
clear_preserve_marker,
|
||||
egress_state_dir,
|
||||
git_gate_state_dir,
|
||||
per_bottle_dockerfile,
|
||||
per_bottle_dockerfile_path,
|
||||
per_bottle_image_tag,
|
||||
pipelock_state_dir,
|
||||
supervise_state_dir,
|
||||
write_metadata,
|
||||
)
|
||||
from .sidecar_bundle import sidecar_bundle_container_name
|
||||
|
||||
|
||||
def resolve_plan(
|
||||
spec: BottleSpec,
|
||||
*,
|
||||
stage_dir: Path,
|
||||
) -> DockerBottlePlan:
|
||||
"""Resolve Docker-specific names and write scratch files. Trusts
|
||||
that the agent and its skills/git-gate keys are present —
|
||||
validation already ran in the base class."""
|
||||
docker_mod.require_docker()
|
||||
|
||||
proxy = PipelockProxy()
|
||||
git_gate = GitGate()
|
||||
egress = Egress()
|
||||
supervise = Supervise()
|
||||
|
||||
manifest = spec.manifest
|
||||
agent = manifest.agents[spec.agent_name]
|
||||
bottle = manifest.bottle_for(spec.agent_name)
|
||||
provider = bottle.agent_provider
|
||||
provider_runtime = runtime_for(provider.template)
|
||||
|
||||
# PRD 0016 follow-up: identity, not bare slug. A fresh `start`
|
||||
# mints a random-suffixed identity (so parallel runs of the same
|
||||
# agent in the same cwd don't collide on container/network
|
||||
# names); a `resume` passes the recorded identity in via
|
||||
# spec.identity to continue an existing bottle's state.
|
||||
slug = spec.identity or bottle_identity(spec.agent_name)
|
||||
# Record the launch metadata so `cli.py resume <identity>` can
|
||||
# reconstruct the spec. Idempotent — re-writes on resume with a
|
||||
# refreshed started_at.
|
||||
write_metadata(BottleMetadata(
|
||||
identity=slug,
|
||||
agent_name=spec.agent_name,
|
||||
cwd=spec.user_cwd if spec.copy_cwd else "",
|
||||
copy_cwd=spec.copy_cwd,
|
||||
started_at=datetime.now(timezone.utc).isoformat(),
|
||||
compose_project=f"bot-bottle-{slug}",
|
||||
))
|
||||
# Clear any leftover preserve marker from a prior capability-block
|
||||
# so this fresh launch can be cleaned up at session-end unless
|
||||
# the agent triggers another capability-block.
|
||||
clear_preserve_marker(slug)
|
||||
|
||||
# PRD 0016 capability-block: if a per-bottle Dockerfile has been
|
||||
# written (via apply_capability_change), the base image becomes
|
||||
# per_bottle_image_tag(slug) built from that file. --cwd still
|
||||
# layers a derived image on top.
|
||||
dockerfile_path = ""
|
||||
if per_bottle_dockerfile(slug) is not None:
|
||||
image_default = per_bottle_image_tag(slug)
|
||||
dockerfile_path = str(per_bottle_dockerfile_path(slug))
|
||||
elif provider.dockerfile:
|
||||
image_default = f"bot-bottle-{provider.template}:{slug}"
|
||||
dockerfile_path = _resolve_manifest_dockerfile(provider.dockerfile, spec)
|
||||
elif provider_runtime.dockerfile:
|
||||
image_default = provider_runtime.image
|
||||
dockerfile_path = provider_runtime.dockerfile
|
||||
else:
|
||||
image_default = provider_runtime.image
|
||||
image = os.environ.get("BOT_BOTTLE_IMAGE", image_default)
|
||||
derived_image = ""
|
||||
runtime_image = image
|
||||
if spec.copy_cwd:
|
||||
derived_image = os.environ.get(
|
||||
"BOT_BOTTLE_DERIVED_IMAGE", f"bot-bottle-cwd:{slug}"
|
||||
)
|
||||
runtime_image = derived_image
|
||||
|
||||
default_container = f"bot-bottle-{slug}"
|
||||
pinned_container = os.environ.get("BOT_BOTTLE_CONTAINER", "")
|
||||
container_name_pinned = bool(pinned_container)
|
||||
if container_name_pinned:
|
||||
container_name = pinned_container
|
||||
if docker_mod.container_exists(container_name):
|
||||
die(
|
||||
f"container '{container_name}' already exists "
|
||||
f"(pinned via BOT_BOTTLE_CONTAINER). "
|
||||
f"Remove it with 'docker rm -f {container_name}' or unset the override."
|
||||
)
|
||||
else:
|
||||
container_name = ""
|
||||
for candidate in docker_mod.container_name_candidates(default_container):
|
||||
if not docker_mod.container_exists(candidate):
|
||||
container_name = candidate
|
||||
break
|
||||
if not container_name:
|
||||
die(
|
||||
f"could not find a free container name after "
|
||||
f"{default_container}-{docker_mod.MAX_CONTAINER_SUFFIX}; "
|
||||
f"clean up old containers with 'docker rm -f <name>'"
|
||||
)
|
||||
|
||||
# Probe the sidecar-bundle container name for an orphan from a
|
||||
# previous run. Otherwise a stale bundle surfaces as a
|
||||
# docker-create conflict deep inside launch() with no actionable
|
||||
# hint; failing fast here points at the cleanup command.
|
||||
bundle_name = sidecar_bundle_container_name(slug)
|
||||
if docker_mod.container_exists(bundle_name):
|
||||
die(
|
||||
f"sidecar bundle container '{bundle_name}' already exists. "
|
||||
f"This is an orphan from a previous run; clean it up with "
|
||||
f"'./cli.py cleanup' (or 'docker rm -f {bundle_name}') and "
|
||||
f"retry."
|
||||
)
|
||||
|
||||
# PRD 0018 chunk 2: prepare-time scratch files live under
|
||||
# ~/.bot-bottle/state/<slug>/<service>/ so chunk 3's compose
|
||||
# bind-mounts can point at stable paths. The state subdirs are
|
||||
# cleaned up by start.py's session-end teardown unless something
|
||||
# explicitly preserves the state dir (capability-block, crash).
|
||||
agent_dir = agent_state_dir(slug)
|
||||
agent_dir.mkdir(parents=True, exist_ok=True)
|
||||
env_file = agent_dir / "agent.env"
|
||||
prompt_file = agent_dir / "prompt.txt"
|
||||
prompt_file.write_text("")
|
||||
prompt_file.chmod(0o600)
|
||||
|
||||
pipelock_dir = pipelock_state_dir(slug)
|
||||
pipelock_dir.mkdir(parents=True, exist_ok=True)
|
||||
proxy_plan = proxy.prepare(bottle, slug, pipelock_dir)
|
||||
|
||||
git_gate_dir = git_gate_state_dir(slug)
|
||||
git_gate_dir.mkdir(parents=True, exist_ok=True)
|
||||
git_gate_plan = git_gate.prepare(bottle, slug, git_gate_dir)
|
||||
|
||||
egress_dir = egress_state_dir(slug)
|
||||
egress_dir.mkdir(parents=True, exist_ok=True)
|
||||
egress_plan = egress.prepare(bottle, slug, egress_dir)
|
||||
|
||||
supervise_plan = None
|
||||
if bottle.supervise:
|
||||
# Current Dockerfile for the agent image. Read from the repo
|
||||
# root; for `--cwd` derived images the base Dockerfile is what
|
||||
# the agent should propose changes against (the derived layer
|
||||
# is just a workspace copy).
|
||||
# (routes.yaml + pipelock allowlist used to land here too but
|
||||
# PRD 0017 chunk 3 moved them behind the
|
||||
# `list-egress-routes` MCP tool so the agent gets live
|
||||
# state rather than a launch-time snapshot.)
|
||||
supervise_dockerfile_path = (
|
||||
Path(dockerfile_path)
|
||||
if dockerfile_path
|
||||
else Path(__file__).resolve().parent.parent.parent.parent / "Dockerfile.claude"
|
||||
)
|
||||
dockerfile_content = (
|
||||
supervise_dockerfile_path.read_text()
|
||||
if supervise_dockerfile_path.is_file()
|
||||
else ""
|
||||
)
|
||||
supervise_dir = supervise_state_dir(slug)
|
||||
supervise_dir.mkdir(parents=True, exist_ok=True)
|
||||
supervise_plan = supervise.prepare(
|
||||
slug, supervise_dir,
|
||||
dockerfile_content=dockerfile_content,
|
||||
)
|
||||
resolved = resolve_env(manifest, spec.agent_name)
|
||||
# Everything that should reach the bottle by-name (so its value
|
||||
# never lands on argv or in env_file) goes into one dict. Nothing
|
||||
# mutates the host os.environ.
|
||||
forwarded_env: dict[str, str] = dict(resolved.forwarded)
|
||||
# When the bottle declares an egress route with the
|
||||
# `claude_code_oauth` role marker, claude-code's outbound
|
||||
# Authorization gets stripped + re-injected by egress. The
|
||||
# agent's environ still needs *something* claude-code recognises
|
||||
# as a credential or it refuses to start; ship a non-secret
|
||||
# placeholder. The placeholder isn't any real token value, so
|
||||
# leaking it would tell an attacker only that egress is in
|
||||
# front. Manifest validation enforces singleton on this role.
|
||||
has_provider_auth = any(
|
||||
provider_runtime.auth_role in r.roles for r in egress_plan.routes
|
||||
)
|
||||
if has_provider_auth:
|
||||
forwarded_env[provider_runtime.placeholder_env] = "egress-placeholder"
|
||||
if provider.template == "claude" and has_provider_auth:
|
||||
# Belt-and-braces: turn off telemetry endpoints (statsig,
|
||||
# error reporting) that egress can't gate by auth.
|
||||
forwarded_env.setdefault("CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC", "1")
|
||||
forwarded_env.setdefault("DISABLE_ERROR_REPORTING", "1")
|
||||
_write_env_file(resolved, env_file)
|
||||
prompt_file.write_text(agent.prompt)
|
||||
|
||||
use_runsc = docker_mod.runsc_available()
|
||||
|
||||
return DockerBottlePlan(
|
||||
spec=spec,
|
||||
stage_dir=stage_dir,
|
||||
slug=slug,
|
||||
container_name=container_name,
|
||||
container_name_pinned=container_name_pinned,
|
||||
image=image,
|
||||
derived_image=derived_image,
|
||||
runtime_image=runtime_image,
|
||||
dockerfile_path=dockerfile_path,
|
||||
env_file=env_file,
|
||||
forwarded_env=forwarded_env,
|
||||
prompt_file=prompt_file,
|
||||
proxy_plan=proxy_plan,
|
||||
git_gate_plan=git_gate_plan,
|
||||
egress_plan=egress_plan,
|
||||
supervise_plan=supervise_plan,
|
||||
use_runsc=use_runsc,
|
||||
agent_command=provider_runtime.command,
|
||||
agent_prompt_mode=provider_runtime.prompt_mode,
|
||||
agent_provider_template=provider.template,
|
||||
)
|
||||
|
||||
|
||||
def _write_env_file(resolved: ResolvedEnv, env_file: Path) -> None:
|
||||
"""Serialize the literal portion of a ResolvedEnv into docker's
|
||||
`--env-file` syntax (NAME=VALUE per line, mode 600 since the file
|
||||
may carry verbatim values from the manifest). Forwarded names ride
|
||||
on the plan as a structured tuple instead."""
|
||||
env_lines: list[str] = []
|
||||
for name, value in resolved.literals.items():
|
||||
if "\n" in value:
|
||||
die(
|
||||
f"env entry {name} (literal) contains a newline; "
|
||||
f"docker --env-file cannot represent multi-line values."
|
||||
)
|
||||
env_lines.append(f"{name}={value}")
|
||||
env_file.write_text("\n".join(env_lines) + ("\n" if env_lines else ""))
|
||||
env_file.chmod(0o600)
|
||||
|
||||
|
||||
def _resolve_manifest_dockerfile(path_value: str, spec: BottleSpec) -> str:
|
||||
path = Path(os.path.expanduser(path_value))
|
||||
if not path.is_absolute():
|
||||
path = Path(spec.user_cwd) / path
|
||||
return str(path)
|
||||
@@ -0,0 +1,8 @@
|
||||
"""Per-provisioner modules for the Docker backend.
|
||||
|
||||
Each module exports one top-level function:
|
||||
provision_<thing>(plan: DockerBottlePlan, target: str) -> ...
|
||||
|
||||
`DockerBottleBackend.provision_*` methods delegate to these. The
|
||||
abstract `BottleBackend.provision_*` surface is unchanged; this
|
||||
subpackage exists only to keep `backend.py` from being a god-file."""
|
||||
@@ -0,0 +1,103 @@
|
||||
"""Install the per-bottle MITM CA into the agent container's trust
|
||||
store.
|
||||
|
||||
Post-PRD-0017 the CA depends on the agent's HTTP_PROXY target:
|
||||
|
||||
- Bottle declares `egress.routes[]` → agent's HTTP_PROXY
|
||||
points at egress; the cert the agent must trust is the
|
||||
one egress mints leaf certs with (the egress CA).
|
||||
- No egress routes → agent's HTTP_PROXY points straight at
|
||||
pipelock; the cert the agent must trust is pipelock's CA (the
|
||||
pre-cutover behavior).
|
||||
|
||||
By the time this provisioner runs, the corresponding `tls_init`
|
||||
helper has generated the chosen CA under `plan.stage_dir`, and the
|
||||
sidecar (pipelock or egress) is up referencing the
|
||||
in-container CA paths.
|
||||
|
||||
Cert lands on Debian's standard source path
|
||||
(`/usr/local/share/ca-certificates/`); `update-ca-certificates`
|
||||
rebuilds `/etc/ssl/certs/ca-certificates.crt`, which is what curl,
|
||||
Python `ssl`, and OpenSSL-based tools all read by default. The env
|
||||
trio set on the agent's `docker run` covers Node
|
||||
(`NODE_EXTRA_CA_CERTS`) and Python `requests` /
|
||||
`SSL_CERT_FILE`-honoring libraries that don't load the system
|
||||
bundle.
|
||||
|
||||
The fingerprint is computed via stdlib (`ssl.PEM_cert_to_DER_cert`
|
||||
+ `hashlib.sha256`) and logged once to stderr. The private key
|
||||
stays on the host (under `stage_dir`) until teardown wipes the
|
||||
stage dir; nothing in the agent ever sees it."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import ssl
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
from ....log import info
|
||||
from ..bottle_plan import DockerBottlePlan
|
||||
|
||||
|
||||
# Debian-family path for sources that `update-ca-certificates` reads.
|
||||
# Bundle path is what the command rebuilds and what every standard
|
||||
# TLS consumer in the image reads.
|
||||
AGENT_CA_PATH = "/usr/local/share/ca-certificates/bot-bottle-mitm-ca.crt"
|
||||
AGENT_CA_BUNDLE = "/etc/ssl/certs/ca-certificates.crt"
|
||||
|
||||
|
||||
def _select_ca_cert(plan: DockerBottlePlan) -> tuple[Path, str]:
|
||||
"""Pick the CA cert (and a short label for the log line) that
|
||||
matches the proxy the agent's HTTP_PROXY points at. Egress-proxy
|
||||
wins when the bottle declares any routes (it sits in front of
|
||||
pipelock); else pipelock."""
|
||||
if plan.egress_plan.routes:
|
||||
cert = plan.egress_plan.mitmproxy_ca_cert_only_host_path
|
||||
if cert == Path() or not cert.is_file():
|
||||
from ....log import die
|
||||
die(
|
||||
f"egress CA cert missing at {cert or '(empty)'}; "
|
||||
f"launch must have called egress_tls_init and "
|
||||
f"re-bound the plan before provision"
|
||||
)
|
||||
return cert, "egress"
|
||||
cert = plan.proxy_plan.ca_cert_host_path
|
||||
if not cert or not cert.is_file():
|
||||
from ....log import die
|
||||
die(
|
||||
f"pipelock CA cert missing at {cert or '(empty)'}; "
|
||||
f"launch must have called pipelock_tls_init and re-bound "
|
||||
f"the plan before provision"
|
||||
)
|
||||
return cert, "pipelock"
|
||||
|
||||
|
||||
def provision_ca(plan: DockerBottlePlan, target: str) -> None:
|
||||
"""Copy the agent-facing CA cert into the agent, rebuild the
|
||||
trust bundle, emit a one-line fingerprint log. Called from
|
||||
`BottleBackend.provision` after the agent container is up."""
|
||||
container = target
|
||||
cert_host_path, label = _select_ca_cert(plan)
|
||||
|
||||
subprocess.run(
|
||||
["docker", "cp", str(cert_host_path), f"{container}:{AGENT_CA_PATH}"],
|
||||
stdout=subprocess.DEVNULL,
|
||||
check=True,
|
||||
)
|
||||
subprocess.run(
|
||||
["docker", "exec", "-u", "0", container, "chmod", "644", AGENT_CA_PATH],
|
||||
stdout=subprocess.DEVNULL,
|
||||
check=True,
|
||||
)
|
||||
subprocess.run(
|
||||
["docker", "exec", "-u", "0", container, "update-ca-certificates"],
|
||||
stdout=subprocess.DEVNULL,
|
||||
check=True,
|
||||
)
|
||||
|
||||
# Stdlib SHA-256 of the cert's DER bytes — the standard
|
||||
# fingerprint form. Never the private key.
|
||||
der = ssl.PEM_cert_to_DER_cert(cert_host_path.read_text())
|
||||
fingerprint = hashlib.sha256(der).hexdigest()
|
||||
info(f"{label} ca fingerprint: sha256:{fingerprint[:32]}...")
|
||||
@@ -0,0 +1,121 @@
|
||||
"""Git provisioning inside a running Docker bottle.
|
||||
|
||||
Three concerns, all about git in the agent:
|
||||
|
||||
1. If --cwd was passed AND the host cwd has a .git, copy that .git
|
||||
into /home/node/workspace/.git so the agent operates on the
|
||||
user's repo.
|
||||
2. If the bottle declares `git` entries (PRD 0008), write a
|
||||
~/.gitconfig with insteadOf rules so every git operation
|
||||
against a declared upstream (push, fetch, clone, pull,
|
||||
ls-remote) transparently hits the per-agent git-gate. The
|
||||
gate mirrors the upstream in both directions, so URL
|
||||
rewriting is symmetric.
|
||||
3. If the bottle declares `git.user` (issue #86), set
|
||||
`git config --global user.{name,email}` inside the bottle so
|
||||
the agent's commits are attributed to that identity.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
from ....git_gate import GIT_GATE_HOSTNAME, git_gate_render_gitconfig
|
||||
from ....log import info
|
||||
from .. import util as docker_mod
|
||||
from ..bottle_plan import DockerBottlePlan
|
||||
|
||||
|
||||
def provision_git(plan: DockerBottlePlan, target: str) -> None:
|
||||
"""Set up git inside the bottle. Runs all three subcases; each
|
||||
no-ops when its condition isn't met."""
|
||||
_provision_cwd_git(plan, target)
|
||||
_provision_git_gate_config(plan, target)
|
||||
_provision_git_user(plan, target)
|
||||
|
||||
|
||||
def _provision_cwd_git(plan: DockerBottlePlan, target: str) -> None:
|
||||
"""If --cwd was set and the host cwd has a .git directory, copy
|
||||
it into /home/node/workspace/.git and fix ownership. No-op
|
||||
otherwise."""
|
||||
if not (plan.spec.copy_cwd and Path(plan.spec.user_cwd, ".git").is_dir()):
|
||||
return
|
||||
container = target
|
||||
info(f"copying {plan.spec.user_cwd}/.git -> {container}:/home/node/workspace/.git")
|
||||
subprocess.run(
|
||||
["docker", "cp", f"{plan.spec.user_cwd}/.git", f"{container}:/home/node/workspace/.git"],
|
||||
stdout=subprocess.DEVNULL,
|
||||
check=True,
|
||||
)
|
||||
subprocess.run(
|
||||
[
|
||||
"docker", "exec", "-u", "0", container,
|
||||
"chown", "-R", "node:node", "/home/node/workspace/.git",
|
||||
],
|
||||
stdout=subprocess.DEVNULL,
|
||||
check=True,
|
||||
)
|
||||
|
||||
|
||||
def _provision_git_gate_config(plan: DockerBottlePlan, target: str) -> None:
|
||||
"""Write ~/.gitconfig in the bottle with the git-gate
|
||||
insteadOf rules. No-op when the bottle has no `git` entries."""
|
||||
bottle = plan.spec.manifest.bottle_for(plan.spec.agent_name)
|
||||
if not bottle.git:
|
||||
return
|
||||
container = target
|
||||
container_home = os.environ.get("BOT_BOTTLE_CONTAINER_HOME", "/home/node")
|
||||
container_gitconfig = f"{container_home}/.gitconfig"
|
||||
|
||||
content = git_gate_render_gitconfig(bottle.git, GIT_GATE_HOSTNAME)
|
||||
config_file = plan.stage_dir / "agent_gitconfig"
|
||||
config_file.write_text(content)
|
||||
config_file.chmod(0o600)
|
||||
|
||||
info(f"writing {container_gitconfig} with {len(bottle.git)} insteadOf rule(s)")
|
||||
subprocess.run(
|
||||
["docker", "cp", str(config_file), f"{container}:{container_gitconfig}"],
|
||||
stdout=subprocess.DEVNULL,
|
||||
check=True,
|
||||
)
|
||||
docker_mod.docker_exec_root(container, ["chown", "node:node", container_gitconfig])
|
||||
docker_mod.docker_exec_root(container, ["chmod", "644", container_gitconfig])
|
||||
|
||||
|
||||
def _provision_git_user(plan: DockerBottlePlan, target: str) -> None:
|
||||
"""Apply `git config --global user.{name,email}` inside the
|
||||
bottle so the agent's commits are attributed to the operator-
|
||||
chosen identity instead of the agent image's default
|
||||
(which is no user — git would refuse to commit at all
|
||||
until the agent ran its own `git config`).
|
||||
|
||||
Runs as the `node` user so `--global` lands in
|
||||
`/home/node/.gitconfig` (matching the existing
|
||||
`_provision_git_gate_config` write location). No-op when the
|
||||
bottle didn't declare `git.user`.
|
||||
|
||||
Each field set independently — name-only or email-only
|
||||
configs only run the `git config` line for the field
|
||||
present."""
|
||||
bottle = plan.spec.manifest.bottle_for(plan.spec.agent_name)
|
||||
gu = bottle.git_user
|
||||
if gu.is_empty():
|
||||
return
|
||||
if gu.name:
|
||||
info(f"git config --global user.name = {gu.name!r}")
|
||||
subprocess.run(
|
||||
["docker", "exec", "-u", "node", target,
|
||||
"git", "config", "--global", "user.name", gu.name],
|
||||
stdout=subprocess.DEVNULL,
|
||||
check=True,
|
||||
)
|
||||
if gu.email:
|
||||
info(f"git config --global user.email = {gu.email!r}")
|
||||
subprocess.run(
|
||||
["docker", "exec", "-u", "node", target,
|
||||
"git", "config", "--global", "user.email", gu.email],
|
||||
stdout=subprocess.DEVNULL,
|
||||
check=True,
|
||||
)
|
||||
@@ -0,0 +1,43 @@
|
||||
"""Copy the agent prompt into a running Docker bottle.
|
||||
|
||||
The prompt file is always copied (so the in-container path always
|
||||
exists) but `--append-system-prompt-file` only fires when the agent
|
||||
actually has a prompt — the return value signals which case."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
from ..bottle_plan import DockerBottlePlan
|
||||
|
||||
|
||||
def provision_prompt(plan: DockerBottlePlan, target: str) -> str | None:
|
||||
"""Copy the prompt file into the container, fix ownership/mode.
|
||||
Returns the in-container path if the agent has a non-empty
|
||||
prompt (drives --append-system-prompt-file), else None. The
|
||||
file is copied either way so the path always exists."""
|
||||
container = target
|
||||
container_home = os.environ.get("BOT_BOTTLE_CONTAINER_HOME", "/home/node")
|
||||
in_container_prompt_path = f"{container_home}/.bot-bottle-prompt.txt"
|
||||
|
||||
subprocess.run(
|
||||
["docker", "cp", str(plan.prompt_file), f"{container}:{in_container_prompt_path}"],
|
||||
stdout=subprocess.DEVNULL,
|
||||
check=True,
|
||||
)
|
||||
# `docker cp` preserves host UID; re-own/mode as root so node
|
||||
# can read its own mode-600 prompt regardless of host UID.
|
||||
subprocess.run(
|
||||
["docker", "exec", "-u", "0", container, "chown", "node:node", in_container_prompt_path],
|
||||
stdout=subprocess.DEVNULL,
|
||||
check=True,
|
||||
)
|
||||
subprocess.run(
|
||||
["docker", "exec", "-u", "0", container, "chmod", "600", in_container_prompt_path],
|
||||
stdout=subprocess.DEVNULL,
|
||||
check=True,
|
||||
)
|
||||
|
||||
agent = plan.spec.manifest.agents[plan.spec.agent_name]
|
||||
return in_container_prompt_path if agent.prompt else None
|
||||
@@ -0,0 +1,62 @@
|
||||
"""Copy host-side skill directories into a running Docker bottle.
|
||||
|
||||
Skills are validated on the host before launch by the base class's
|
||||
`BottleBackend._validate_skills` (called from `prepare`); this module
|
||||
assumes that validation has already run. A skill disappearing between
|
||||
validation and copy still dies loudly rather than silently producing
|
||||
a partial container."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
from ....log import die, info
|
||||
from ...util import host_skill_dir
|
||||
from ..bottle_plan import DockerBottlePlan
|
||||
|
||||
|
||||
def provision_skills(plan: DockerBottlePlan, target: str) -> None:
|
||||
"""Copy each of the agent's named skills from the host's
|
||||
~/.claude/skills/<name>/ into the container's equivalent path.
|
||||
For each skill: ensure parent dir, wipe any prior copy, then
|
||||
`docker cp <host>/. <container>:<dst>/` so the contents are
|
||||
copied into a freshly-created destination dir. No-op when the
|
||||
agent has no skills."""
|
||||
agent = plan.spec.manifest.agents[plan.spec.agent_name]
|
||||
if not agent.skills:
|
||||
return
|
||||
|
||||
container = target
|
||||
container_home = os.environ.get("BOT_BOTTLE_CONTAINER_HOME", "/home/node")
|
||||
skills_dir = os.environ.get(
|
||||
"BOT_BOTTLE_CONTAINER_SKILLS_DIR", f"{container_home}/.claude/skills"
|
||||
)
|
||||
|
||||
subprocess.run(
|
||||
["docker", "exec", container, "mkdir", "-p", skills_dir],
|
||||
stdout=subprocess.DEVNULL,
|
||||
check=True,
|
||||
)
|
||||
|
||||
for n in agent.skills:
|
||||
src = host_skill_dir(n)
|
||||
if not os.path.isdir(src):
|
||||
die(f"skill '{n}' disappeared from host between validation and copy at {src}.")
|
||||
dst = f"{skills_dir}/{n}"
|
||||
info(f"copying skill {n} into {container}:{dst}")
|
||||
subprocess.run(
|
||||
["docker", "exec", container, "rm", "-rf", dst],
|
||||
stdout=subprocess.DEVNULL,
|
||||
check=True,
|
||||
)
|
||||
subprocess.run(
|
||||
["docker", "exec", container, "mkdir", "-p", dst],
|
||||
stdout=subprocess.DEVNULL,
|
||||
check=True,
|
||||
)
|
||||
subprocess.run(
|
||||
["docker", "cp", f"{src}/.", f"{container}:{dst}/"],
|
||||
stdout=subprocess.DEVNULL,
|
||||
check=True,
|
||||
)
|
||||
@@ -0,0 +1,65 @@
|
||||
"""Supervise sidecar provisioning inside a running Docker bottle
|
||||
(PRD 0013).
|
||||
|
||||
Registers the per-bottle supervise sidecar as an HTTP MCP server in
|
||||
the agent's claude-code config so the agent discovers the three
|
||||
stuck-recovery MCP tools (cred-proxy-block, pipelock-block,
|
||||
capability-block) at startup.
|
||||
|
||||
Uses `claude mcp add` rather than writing JSON directly. claude-code
|
||||
owns the on-disk config format (`~/.claude.json` `mcpServers` shape,
|
||||
field names, scope semantics) and changes it between versions; the
|
||||
official command handles whatever the installed version expects.
|
||||
|
||||
No-op when bottle.supervise is False — bottles that haven't opted
|
||||
into the supervise sidecar shouldn't get an MCP entry pointing at a
|
||||
sidecar that isn't running.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
|
||||
from ....log import info, warn
|
||||
from ....supervise import SUPERVISE_HOSTNAME, SUPERVISE_PORT
|
||||
from ..bottle_plan import DockerBottlePlan
|
||||
|
||||
|
||||
_SUPERVISE_MCP_NAME = "supervise"
|
||||
|
||||
|
||||
def supervise_mcp_url() -> str:
|
||||
return f"http://{SUPERVISE_HOSTNAME}:{SUPERVISE_PORT}/"
|
||||
|
||||
|
||||
def provision_supervise(plan: DockerBottlePlan, target: str) -> None:
|
||||
"""Run `claude mcp add` inside the agent container to register
|
||||
the supervise sidecar in claude-code's user config. No-op when
|
||||
bottle.supervise is False.
|
||||
|
||||
Failure is logged but not fatal: the bottle still works (you
|
||||
just can't call supervise tools from the agent until the entry
|
||||
is added manually). The operator sees the warning at launch."""
|
||||
if plan.supervise_plan is None:
|
||||
return
|
||||
url = supervise_mcp_url()
|
||||
argv = [
|
||||
"docker", "exec", "-u", "node", target,
|
||||
"claude", "mcp", "add",
|
||||
"--scope", "user",
|
||||
"--transport", "http",
|
||||
_SUPERVISE_MCP_NAME,
|
||||
url,
|
||||
]
|
||||
info(f"registering supervise MCP server in agent claude config → {url}")
|
||||
r = subprocess.run(argv, capture_output=True, text=True, check=False)
|
||||
if r.returncode != 0:
|
||||
warn(
|
||||
f"`claude mcp add supervise` failed (exit {r.returncode}): "
|
||||
f"{(r.stderr or r.stdout or '').strip()}. Inside the bottle, "
|
||||
f"register manually with: "
|
||||
f"claude mcp add --scope user --transport http supervise {url}"
|
||||
)
|
||||
|
||||
|
||||
__all__ = ["provision_supervise", "supervise_mcp_url"]
|
||||
@@ -0,0 +1,31 @@
|
||||
"""Sidecar bundle constants + helpers for the Docker backend
|
||||
(PRD 0024).
|
||||
|
||||
The bundle image (built by Dockerfile.sidecars, PRD 0024 chunk 1)
|
||||
runs pipelock + egress + git-gate + supervise as one container
|
||||
per bottle under a small Python init supervisor. As of chunk 5
|
||||
the bundle is the only shape — the legacy four-sidecar topology
|
||||
and its `BOT_BOTTLE_SIDECAR_BUNDLE` feature flag are gone."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
|
||||
|
||||
# Bundle image. Defaults to a built-locally tag (built from the
|
||||
# repo's Dockerfile.sidecars via compose `build:`). Operators
|
||||
# pinning to a published digest can override via env, matching
|
||||
# the existing `BOT_BOTTLE_PIPELOCK_IMAGE` shape.
|
||||
SIDECAR_BUNDLE_IMAGE = os.environ.get(
|
||||
"BOT_BOTTLE_SIDECAR_IMAGE",
|
||||
"bot-bottle-sidecars:latest",
|
||||
)
|
||||
|
||||
SIDECAR_BUNDLE_DOCKERFILE = "Dockerfile.sidecars"
|
||||
|
||||
|
||||
def sidecar_bundle_container_name(slug: str) -> str:
|
||||
"""`bot-bottle-sidecars-<slug>`. Same prefix scheme as the
|
||||
per-sidecar containers it replaces, so the dashboard's
|
||||
discovery-by-prefix logic keeps working."""
|
||||
return f"bot-bottle-sidecars-{slug}"
|
||||
@@ -0,0 +1,186 @@
|
||||
"""Docker host-side primitives used by DockerBottleBackend: probing
|
||||
for docker on PATH, slugifying agent names, checking image/container
|
||||
existence, and building images."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
from typing import Iterable, Iterator
|
||||
|
||||
from ...log import die, info
|
||||
|
||||
|
||||
# Cap on the suffix the container-name conflict logic will try before
|
||||
# giving up: base, base-2, ..., base-MAX_CONTAINER_SUFFIX.
|
||||
MAX_CONTAINER_SUFFIX = 100
|
||||
|
||||
|
||||
def container_name_candidates(base: str) -> Iterator[str]:
|
||||
"""Yield `base`, then `base-2`, `base-3`, ... up to
|
||||
`base-MAX_CONTAINER_SUFFIX`. Both the prepare-time probe and the
|
||||
launch-time race retry walk this sequence."""
|
||||
yield base
|
||||
for suffix in range(2, MAX_CONTAINER_SUFFIX + 1):
|
||||
yield f"{base}-{suffix}"
|
||||
|
||||
|
||||
def runsc_available() -> bool:
|
||||
"""Return True if the Docker daemon has the gVisor (`runsc`) runtime
|
||||
registered. Called once per prepare; the result lives on the plan."""
|
||||
r = subprocess.run(
|
||||
["docker", "info", "--format", "{{json .Runtimes}}"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False,
|
||||
)
|
||||
return r.returncode == 0 and "runsc" in r.stdout
|
||||
|
||||
|
||||
def require_docker() -> None:
|
||||
"""Fail with an install pointer if `docker` is not on PATH."""
|
||||
if shutil.which("docker") is None:
|
||||
info("Docker is required but was not found on PATH.")
|
||||
info("macOS: install Docker Desktop https://docs.docker.com/desktop/install/mac-install/")
|
||||
info("Linux: install Docker Engine https://docs.docker.com/engine/install/")
|
||||
die("docker not found")
|
||||
|
||||
|
||||
def image_exists(ref: str) -> bool:
|
||||
return _silent_run(["docker", "image", "inspect", ref]) == 0
|
||||
|
||||
|
||||
def container_exists(name: str) -> bool:
|
||||
"""Returns True if a container (running or stopped) with the given
|
||||
name exists. Uses `docker ps -a -q -f name=^<name>$` so substring
|
||||
matches don't false-positive."""
|
||||
result = subprocess.run(
|
||||
["docker", "ps", "-a", "-q", "-f", f"name=^{name}$"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True,
|
||||
)
|
||||
return bool(result.stdout.strip())
|
||||
|
||||
|
||||
def force_remove_container(name: str) -> None:
|
||||
"""`docker rm -f` the named container if it exists. No-op if it
|
||||
doesn't — and the rm itself is best-effort (errors swallowed) so
|
||||
this is safe to register as a teardown callback."""
|
||||
if container_exists(name):
|
||||
subprocess.run(
|
||||
["docker", "rm", "-f", name],
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
check=False,
|
||||
)
|
||||
|
||||
|
||||
def docker_exec_root(container: str, argv: list[str]) -> None:
|
||||
"""Run `docker exec -u 0` in the named container, check=True. Used
|
||||
by SSH provisioning to chown/chmod files that need root."""
|
||||
subprocess.run(
|
||||
["docker", "exec", "-u", "0", container, *argv],
|
||||
stdout=subprocess.DEVNULL,
|
||||
check=True,
|
||||
)
|
||||
|
||||
|
||||
_SLUG_RE = re.compile(r"[^a-z0-9]+")
|
||||
|
||||
|
||||
def slugify(name: str) -> str:
|
||||
"""Lowercase, non-alnum runs → '-', trimmed. Dies on empty result."""
|
||||
if not name:
|
||||
die("slugify: missing name")
|
||||
slug = _SLUG_RE.sub("-", name.lower()).strip("-")
|
||||
if not slug:
|
||||
die(f"name '{name}' produced an empty slug; use alphanumeric characters")
|
||||
return slug
|
||||
|
||||
|
||||
def build_image(ref: str, context: str, *, dockerfile: str = "") -> None:
|
||||
"""Invokes `docker build` every call. Layer cache makes no-change
|
||||
rebuilds cheap; running every time means Dockerfile edits land
|
||||
without manual `docker rmi`.
|
||||
|
||||
`dockerfile` is an optional path (relative to `context`, or
|
||||
absolute) for callers that need to build from a non-default
|
||||
Dockerfile in the same context — e.g. `Dockerfile.git-gate`."""
|
||||
info(f"building image {ref} from {context} (layer cache keeps repeat builds fast)")
|
||||
args = ["docker", "build", "-t", ref]
|
||||
if dockerfile:
|
||||
args.extend(["-f", dockerfile])
|
||||
args.append(context)
|
||||
subprocess.run(args, check=True)
|
||||
|
||||
|
||||
_TRUST_DIALOG_NODE_SCRIPT = (
|
||||
'const fs=require("fs"),p=process.env.HOME+"/.claude.json",'
|
||||
'c=JSON.parse(fs.readFileSync(p,"utf8"));'
|
||||
'c.projects=c.projects||{};'
|
||||
'c.projects[process.env.HOME+"/workspace"]={hasTrustDialogAccepted:true};'
|
||||
'fs.writeFileSync(p,JSON.stringify(c,null,2));'
|
||||
)
|
||||
|
||||
|
||||
def build_image_with_cwd(derived: str, base: str, cwd: str) -> None:
|
||||
"""Build a thin derived image that copies <cwd> into
|
||||
/home/node/workspace and adds a trust-dialog entry for it."""
|
||||
import os
|
||||
|
||||
if not os.path.isdir(cwd):
|
||||
die(f"cwd not found at {cwd}")
|
||||
info(f"building image {derived} from {base} with {cwd} -> /home/node/workspace")
|
||||
dockerfile = (
|
||||
f"FROM {base}\n"
|
||||
f"COPY --chown=node:node . /home/node/workspace\n"
|
||||
f"RUN node -e '{_TRUST_DIALOG_NODE_SCRIPT}'\n"
|
||||
f"WORKDIR /home/node/workspace\n"
|
||||
)
|
||||
subprocess.run(
|
||||
["docker", "build", "-t", derived, "-f", "-", cwd],
|
||||
input=dockerfile,
|
||||
text=True,
|
||||
check=True,
|
||||
)
|
||||
|
||||
|
||||
def image_id(ref: str) -> str:
|
||||
"""Return the content-addressed image ID (e.g.
|
||||
`sha256:abcd...`) for `ref`. The smolmachines backend keys its
|
||||
`.smolmachine` artifact cache on this, so a Dockerfile change
|
||||
that produces a new image automatically invalidates the cache."""
|
||||
r = subprocess.run(
|
||||
["docker", "image", "inspect", "--format", "{{.Id}}", ref],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False,
|
||||
)
|
||||
if r.returncode != 0:
|
||||
die(
|
||||
f"docker image inspect for {ref!r} failed: "
|
||||
f"{(r.stderr or '').strip() or '<no stderr>'}"
|
||||
)
|
||||
return r.stdout.strip()
|
||||
|
||||
|
||||
def save(ref: str, output: str) -> None:
|
||||
"""`docker save REF -o OUTPUT`. Writes a tarball of the image
|
||||
layers + manifest to the host path. Used by smolmachines
|
||||
prepare to hand the agent image to a containerized crane that
|
||||
pushes it to the ephemeral registry — bypassing the docker
|
||||
daemon's `docker push` (which on Docker Desktop can't reach a
|
||||
host-loopback registry and refuses plain-HTTP pushes to
|
||||
non-loopback hosts)."""
|
||||
subprocess.run(["docker", "save", ref, "-o", output], check=True)
|
||||
|
||||
|
||||
def _silent_run(cmd: Iterable[str]) -> int:
|
||||
return subprocess.run(
|
||||
list(cmd),
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
check=False,
|
||||
).returncode
|
||||
@@ -0,0 +1,28 @@
|
||||
"""Shared print helpers for BottlePlan.print implementations.
|
||||
|
||||
Lifts the multi-value label printer out of DockerBottlePlan so the
|
||||
smolmachines backend (and any future backend) renders the same
|
||||
two-column scannable preflight without duplicating the indent
|
||||
math."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Sequence
|
||||
|
||||
from ..log import info
|
||||
|
||||
|
||||
def print_multi(label: str, values: Sequence[str]) -> None:
|
||||
"""Print `label: <value>` with continuation lines indented to
|
||||
align under the first value. Empty `values` renders `(none)`.
|
||||
|
||||
Used by every backend's `BottlePlan.print` for env / skills /
|
||||
git / egress — one item per line keeps the preflight summary
|
||||
scannable when an agent has many of any of these."""
|
||||
if not values:
|
||||
info(f"{label}: (none)")
|
||||
return
|
||||
info(f"{label}: {values[0]}")
|
||||
indent = " " * (len(label) + 2)
|
||||
for v in values[1:]:
|
||||
info(f"{indent}{v}")
|
||||
@@ -0,0 +1,15 @@
|
||||
"""smolmachines bottle backend (PRD 0023).
|
||||
|
||||
Selectable via `BOT_BOTTLE_BACKEND=smolmachines`. Runs each
|
||||
bottle inside a per-agent microVM (libkrun / Hypervisor.framework
|
||||
on macOS) with a userspace gvproxy gateway as the egress
|
||||
primitive. The sidecar bundle (PRD 0024) runs as a host-side
|
||||
docker container reached only through gvproxy's port-forward list.
|
||||
|
||||
Chunk 1 (this commit) ships the backend skeleton + Smolfile +
|
||||
gvproxy renderers + preflight check. VM lifecycle, sidecar
|
||||
bringup, and provisioning land in later chunks."""
|
||||
|
||||
from .backend import SmolmachinesBottleBackend # noqa: F401
|
||||
|
||||
__all__ = ["SmolmachinesBottleBackend"]
|
||||
@@ -0,0 +1,86 @@
|
||||
"""SmolmachinesBottleBackend — the smolmachines implementation of
|
||||
BottleBackend (PRD 0023)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from contextlib import contextmanager
|
||||
from pathlib import Path
|
||||
from typing import Generator, Sequence
|
||||
|
||||
from .. import ActiveAgent, BottleBackend, BottleSpec
|
||||
from . import cleanup as _cleanup
|
||||
from . import enumerate as _enumerate
|
||||
from . import launch as _launch
|
||||
from . import prepare as _prepare
|
||||
from . import smolvm as _smolvm
|
||||
from .bottle import SmolmachinesBottle
|
||||
from .bottle_cleanup_plan import SmolmachinesBottleCleanupPlan
|
||||
from .bottle_plan import SmolmachinesBottlePlan
|
||||
from .provision import ca as _ca
|
||||
from .provision import git as _git
|
||||
from .provision import prompt as _prompt
|
||||
from .provision import skills as _skills
|
||||
from .provision import supervise as _supervise
|
||||
|
||||
|
||||
class SmolmachinesBottleBackend(
|
||||
BottleBackend["SmolmachinesBottlePlan", "SmolmachinesBottleCleanupPlan"]
|
||||
):
|
||||
"""smolmachines backend. Selected by
|
||||
`BOT_BOTTLE_BACKEND=smolmachines`."""
|
||||
|
||||
name = "smolmachines"
|
||||
|
||||
@classmethod
|
||||
def is_available(cls) -> bool:
|
||||
"""`smolvm` on PATH. The backend additionally needs macOS
|
||||
for libkrun + TSI, but `enumerate_active` / `cleanup` are
|
||||
host-shell ops that gracefully no-op on Linux too — the
|
||||
runtime check happens at `prepare`."""
|
||||
return _smolvm.is_available()
|
||||
|
||||
def _resolve_plan(
|
||||
self, spec: BottleSpec, *, stage_dir: Path
|
||||
) -> SmolmachinesBottlePlan:
|
||||
return _prepare.resolve_plan(spec, stage_dir=stage_dir)
|
||||
|
||||
@contextmanager
|
||||
def launch(
|
||||
self, plan: SmolmachinesBottlePlan
|
||||
) -> Generator[SmolmachinesBottle, None, None]:
|
||||
with _launch.launch(plan, provision=self.provision) as bottle:
|
||||
yield bottle
|
||||
|
||||
def provision_ca(
|
||||
self, plan: SmolmachinesBottlePlan, target: str
|
||||
) -> None:
|
||||
_ca.provision_ca(plan, target)
|
||||
|
||||
def provision_prompt(
|
||||
self, plan: SmolmachinesBottlePlan, target: str
|
||||
) -> str | None:
|
||||
return _prompt.provision_prompt(plan, target)
|
||||
|
||||
def provision_skills(
|
||||
self, plan: SmolmachinesBottlePlan, target: str
|
||||
) -> None:
|
||||
_skills.provision_skills(plan, target)
|
||||
|
||||
def provision_git(
|
||||
self, plan: SmolmachinesBottlePlan, target: str
|
||||
) -> None:
|
||||
_git.provision_git(plan, target)
|
||||
|
||||
def provision_supervise(
|
||||
self, plan: SmolmachinesBottlePlan, target: str
|
||||
) -> None:
|
||||
_supervise.provision_supervise(plan, target)
|
||||
|
||||
def prepare_cleanup(self) -> SmolmachinesBottleCleanupPlan:
|
||||
return _cleanup.prepare_cleanup()
|
||||
|
||||
def cleanup(self, plan: SmolmachinesBottleCleanupPlan) -> None:
|
||||
_cleanup.cleanup(plan)
|
||||
|
||||
def enumerate_active(self) -> Sequence[ActiveAgent]:
|
||||
return _enumerate.enumerate_active()
|
||||
@@ -0,0 +1,179 @@
|
||||
"""SmolmachinesBottle — running-instance handle (PRD 0023 chunk 2d).
|
||||
|
||||
Routes `exec_claude` / `exec` / `cp_in` through `smolvm machine
|
||||
exec` / `smolvm machine cp`. The handle is yielded by `launch`
|
||||
and torn down via the surrounding ExitStack on context exit;
|
||||
`close` is a no-op idempotent alias so the BottleBackend ABC's
|
||||
context-manager contract is satisfied.
|
||||
|
||||
User context: `smolvm machine exec` runs commands as root in the
|
||||
VM, but the agent image's USER is `node` and claude-code refuses
|
||||
to run as root with `--dangerously-skip-permissions`. Both
|
||||
`exec_claude` and `exec` switch to the requested user (default
|
||||
`node`) via `runuser -u <user> --` and set `HOME` / `USER`
|
||||
through `smolvm -e` — avoiding `runuser -l`'s login-shell wiring
|
||||
(PAM session setup, /etc/profile sourcing) which can hang on a
|
||||
minimal Debian VM with no PAM session config."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
import sys
|
||||
from typing import Mapping
|
||||
|
||||
from ...agent_provider import prompt_args
|
||||
from .. import Bottle, ExecResult
|
||||
from . import pty_resize as _pty_resize
|
||||
from . import smolvm as _smolvm
|
||||
|
||||
|
||||
# Absolute path to the pty_resize wrapper. Invoke as
|
||||
# `python <path>` rather than `python -m <dotted-path>` so the
|
||||
# wrapper runs regardless of cwd / sys.path — it has no
|
||||
# bot_bottle.* imports, so it's self-contained.
|
||||
_PTY_RESIZE_SCRIPT = _pty_resize.__file__
|
||||
|
||||
|
||||
# Per-user env the agent image's USER (node) expects. claude
|
||||
# reads ~/.claude.json + writes session state under ~/.claude/;
|
||||
# bare `runuser -u` inherits root's HOME=/root, which claude
|
||||
# can't write to. Set HOME / USER explicitly through smolvm -e
|
||||
# so the child process sees them.
|
||||
_HOME_FOR = {
|
||||
"node": "/home/node",
|
||||
"root": "/root",
|
||||
}
|
||||
|
||||
|
||||
def _env_flags_for(user: str) -> list[str]:
|
||||
home = _HOME_FOR.get(user, f"/home/{user}")
|
||||
return ["-e", f"HOME={home}", "-e", f"USER={user}"]
|
||||
|
||||
|
||||
def _guest_env_flags(env: Mapping[str, str]) -> list[str]:
|
||||
"""Render `{K: V}` into a flat `-e K=V` argv slice for
|
||||
`smolvm machine exec`. `smolvm machine create -e` set env
|
||||
on PID 1 but it doesn't propagate to fresh exec process
|
||||
trees, so we have to re-pass them every call."""
|
||||
out: list[str] = []
|
||||
for k, v in env.items():
|
||||
out += ["-e", f"{k}={v}"]
|
||||
return out
|
||||
|
||||
|
||||
class SmolmachinesBottle(Bottle):
|
||||
"""Handle returned by `SmolmachinesBottleBackend.launch`. The
|
||||
underlying VM lifecycle (create / start / stop / delete) lives
|
||||
on the launch ExitStack — this class only routes runtime
|
||||
operations to the right `smolvm machine ...` subcommand."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
machine_name: str,
|
||||
*,
|
||||
prompt_path: str | None = None,
|
||||
guest_env: Mapping[str, str] | None = None,
|
||||
agent_command: str = "claude",
|
||||
agent_prompt_mode: str = "claude_append_file",
|
||||
) -> None:
|
||||
self.name = machine_name
|
||||
# In-VM path to the agent's prompt file. None when the
|
||||
# agent declared no prompt (file still exists; we just
|
||||
# don't pass --append-system-prompt-file).
|
||||
self._prompt_path = prompt_path
|
||||
# Env vars the agent process needs (HTTPS_PROXY,
|
||||
# CLAUDE_CODE_OAUTH_TOKEN, manifest-declared bottle env, …).
|
||||
# Forwarded on every `smolvm machine exec` via `-e K=V`
|
||||
# because exec doesn't inherit from machine_create's env.
|
||||
self._guest_env = dict(guest_env or {})
|
||||
self._agent_command = agent_command
|
||||
self._agent_prompt_mode = agent_prompt_mode
|
||||
self.agent_command = agent_command
|
||||
self.agent_provider_template = (
|
||||
"codex" if agent_command == "codex" else "claude"
|
||||
)
|
||||
|
||||
def claude_argv(
|
||||
self, argv: list[str], *, tty: bool = True,
|
||||
) -> list[str]:
|
||||
flags = ["smolvm", "machine", "exec", "--name", self.name]
|
||||
if tty:
|
||||
flags += ["-i", "-t"]
|
||||
flags += _env_flags_for("node")
|
||||
flags += _guest_env_flags(self._guest_env)
|
||||
claude_tail = [self._agent_command]
|
||||
provider_prompt_args = prompt_args(
|
||||
self._agent_prompt_mode, self._prompt_path, argv=argv,
|
||||
)
|
||||
if self._agent_prompt_mode == "codex_read_prompt_file":
|
||||
claude_tail += argv
|
||||
claude_tail += provider_prompt_args
|
||||
else:
|
||||
claude_tail += provider_prompt_args
|
||||
claude_tail += argv
|
||||
flags += ["--", "runuser", "-u", "node", "--", *claude_tail]
|
||||
if not tty:
|
||||
# No PTY allocated — no SIGWINCH to forward, no resize
|
||||
# bridge needed. Skip the wrapper so non-interactive
|
||||
# exec paths (e.g., provisioning shell-outs that
|
||||
# happen to go through this method) stay light.
|
||||
return flags
|
||||
return [
|
||||
sys.executable, _PTY_RESIZE_SCRIPT,
|
||||
self.name, "--", *flags,
|
||||
]
|
||||
|
||||
def exec_claude(self, argv: list[str], *, tty: bool = True) -> int:
|
||||
"""Run `claude` interactively inside the VM as the `node`
|
||||
user. Inherits the operator's terminal (stdin / stdout /
|
||||
stderr) so the session feels native. Blocks until claude
|
||||
exits; returns the in-VM exit code.
|
||||
|
||||
We bypass the captured-output `machine_exec` helper here
|
||||
because that one wraps stdout/stderr in pipes — fine for
|
||||
scripted exec, wrong for an interactive shell. Drop down
|
||||
to `subprocess.run` with the TTY inherited.
|
||||
|
||||
UID switches via `runuser -u node --` (not `-l`) so we
|
||||
avoid login-shell wiring. HOME / USER come from `smolvm
|
||||
-e` instead, which sets them on the process env."""
|
||||
return subprocess.run(
|
||||
self.claude_argv(argv, tty=tty), check=False,
|
||||
).returncode
|
||||
|
||||
def exec(self, script: str, *, user: str = "node") -> ExecResult:
|
||||
"""Run a POSIX shell script as `user` (default `node`) and
|
||||
capture the result. Matches the docker backend's `exec`,
|
||||
which defaults to the image's USER (also node) — so test
|
||||
helpers / provision shell-outs run with the same identity
|
||||
on both backends. Pass `user="root"` for tests that need
|
||||
root.
|
||||
|
||||
`runuser -u <user> -- /bin/sh -c <script>` switches UID
|
||||
without invoking a login shell; HOME / USER are set via
|
||||
`smolvm -e` (see `_env_flags_for`)."""
|
||||
argv = (
|
||||
_env_flags_for(user)
|
||||
+ _guest_env_flags(self._guest_env)
|
||||
+ ["--", "runuser", "-u", user, "--", "/bin/sh", "-c", script]
|
||||
)
|
||||
# _smolvm.machine_exec expects argv (the bit after `--`);
|
||||
# the -e flags go before, so call smolvm directly.
|
||||
r = subprocess.run(
|
||||
["smolvm", "machine", "exec", "--name", self.name] + argv,
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
return ExecResult(
|
||||
returncode=r.returncode,
|
||||
stdout=r.stdout or "",
|
||||
stderr=r.stderr or "",
|
||||
)
|
||||
|
||||
def cp_in(self, host_path: str, container_path: str) -> None:
|
||||
"""Copy a host path into the guest at `container_path`."""
|
||||
_smolvm.machine_cp(host_path, f"{self.name}:{container_path}")
|
||||
|
||||
def close(self) -> None:
|
||||
# Real teardown lives on the launch ExitStack; this is just
|
||||
# the idempotent alias the BottleBackend ABC expects.
|
||||
pass
|
||||
@@ -0,0 +1,55 @@
|
||||
"""SmolmachinesBottleCleanupPlan — concrete BottleCleanupPlan (issue #77).
|
||||
|
||||
Tracks the resources `SmolmachinesBottleBackend.cleanup` will
|
||||
remove:
|
||||
|
||||
- machines: smolvm machines whose name starts with
|
||||
`bot-bottle-` (running or stopped). Stopped +
|
||||
deleted via `smolvm machine stop` + `machine delete -f`.
|
||||
- bundles: docker containers `bot-bottle-sidecars-<slug>`
|
||||
left over from a smolmachines bottle (the bundle's
|
||||
port-forwards stay published on lo0 aliases until
|
||||
the container is gone). Removed via `docker rm -f`.
|
||||
- networks: docker networks `bot-bottle-bundle-<slug>`
|
||||
attached to the bundles. Removed via
|
||||
`docker network rm`.
|
||||
|
||||
Smolmachines state dirs live under the same `~/.bot-bottle/state/`
|
||||
path the docker backend uses; the docker backend's
|
||||
`prepare_cleanup` already enumerates orphan state dirs and is the
|
||||
single source of truth for that bucket (consults
|
||||
`enumerate_active_bottles()` so it doesn't reap a live
|
||||
smolmachines bottle's dir)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
|
||||
from ...log import info
|
||||
from .. import BottleCleanupPlan
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SmolmachinesBottleCleanupPlan(BottleCleanupPlan):
|
||||
"""Resources SmolmachinesBottleBackend.cleanup will remove.
|
||||
Produced by `prepare_cleanup`; sorted so the y/N output is
|
||||
stable."""
|
||||
|
||||
machines: tuple[str, ...] = ()
|
||||
bundles: tuple[str, ...] = ()
|
||||
networks: tuple[str, ...] = ()
|
||||
|
||||
@property
|
||||
def empty(self) -> bool:
|
||||
return not self.machines and not self.bundles and not self.networks
|
||||
|
||||
def print(self) -> None:
|
||||
print(file=sys.stderr)
|
||||
for name in self.machines:
|
||||
info(f"smolvm machine: {name}")
|
||||
for name in self.bundles:
|
||||
info(f"bundle container:{name}")
|
||||
for name in self.networks:
|
||||
info(f"bundle network: {name}")
|
||||
print(file=sys.stderr)
|
||||
@@ -0,0 +1,128 @@
|
||||
"""SmolmachinesBottlePlan — concrete BottlePlan for the smolmachines
|
||||
backend (PRD 0023).
|
||||
|
||||
Slug + bundle docker subnet / gateway / pinned IP + smolvm
|
||||
machine name + agent `.smolmachine` artifact + per-bottle guest
|
||||
env. Provisioning fields (CA cert path, prompt path, etc.) land
|
||||
in chunk 4."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
from ...egress import EgressPlan
|
||||
from ...git_gate import GitGatePlan
|
||||
from ...log import info
|
||||
from ...pipelock import PipelockProxyPlan
|
||||
from ...supervise import SupervisePlan
|
||||
from .. import BottlePlan
|
||||
from ..print_util import print_multi
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SmolmachinesBottlePlan(BottlePlan):
|
||||
"""Resolved fields the launch step needs to bring up the bottle.
|
||||
|
||||
Inherits `spec` and `stage_dir` from BottlePlan."""
|
||||
|
||||
slug: str
|
||||
# Per-bottle docker subnet for the sidecar bundle container.
|
||||
# The bundle runs at `bundle_ip` (always `.2`); the gateway is
|
||||
# at `.1`. smolvm's TSI allowlist is set to `bundle_ip/32`.
|
||||
bundle_subnet: str
|
||||
bundle_gateway: str
|
||||
bundle_ip: str
|
||||
# smolvm machine name + agent image source. machine_create
|
||||
# boots from a packed `.smolmachine` artifact (pre-baked at
|
||||
# prepare time via `smolvm pack create`); using `--from`
|
||||
# instead of `--image` avoids the registry-pull race we hit
|
||||
# when machine_start tried to fetch on-demand and the libkrun
|
||||
# agent's network attempt got refused by macOS.
|
||||
#
|
||||
# Chunk 2d ships with a public placeholder image (alpine)
|
||||
# since bot-bottle-claude:latest lives in the operator's local
|
||||
# docker daemon and smolvm's crane backend can't read from
|
||||
# there; chunk 4 resolves the agent-image-conversion gap
|
||||
# (push to a registry first, or smolvm grows a docker-daemon
|
||||
# transport).
|
||||
machine_name: str
|
||||
# Agent image ref (docker tag). `launch` runs the
|
||||
# build → save → registry push → smolvm pack pipeline against
|
||||
# this and feeds the resulting `.smolmachine` artifact to
|
||||
# `machine_create --from`. The pipeline runs at launch time
|
||||
# (not prepare time) so the docker build output doesn't garble
|
||||
# the dashboard's preflight modal.
|
||||
agent_image_ref: str
|
||||
# In-guest env vars (HTTPS_PROXY etc) — IP-literal URLs since
|
||||
# the guest has no DNS resolver inside the TSI allowlist.
|
||||
# Passed to `smolvm machine create` as `-e K=V` flags.
|
||||
# Smolfile-rendering is gone (smolvm 0.8.0's
|
||||
# `--smolfile` is mutually exclusive with `--from`, and
|
||||
# `--from` is the path that avoids the registry-pull race).
|
||||
guest_env: dict[str, str]
|
||||
# Path to the agent's prompt file on the host. Always written
|
||||
# (mode 0o600) so the in-VM path always exists; the file is
|
||||
# empty when the agent has no prompt — claude-code reads it
|
||||
# via --append-system-prompt-file only when non-empty.
|
||||
prompt_file: Path
|
||||
# Inner Plans for the four bundle daemons. The same shape the
|
||||
# docker backend uses — same `.prepare()` calls produced
|
||||
# them — but our launch step doesn't populate the
|
||||
# docker-specific network fields (internal_network,
|
||||
# egress_network) because the smolmachines bundle isn't on
|
||||
# docker's `--internal` + egress bridge topology; it's on a
|
||||
# per-bottle bridge with a pinned IP. The unused fields stay
|
||||
# at their dataclass defaults.
|
||||
proxy_plan: PipelockProxyPlan
|
||||
git_gate_plan: GitGatePlan
|
||||
egress_plan: EgressPlan
|
||||
# None when bottle.supervise is False, matching the docker
|
||||
# backend's convention.
|
||||
supervise_plan: SupervisePlan | None
|
||||
# Agent-side endpoints. On Docker Desktop the docker bridge
|
||||
# IPs aren't reachable from the smolvm guest (TSI uses macOS
|
||||
# networking; docker container IPs live in the daemon's VM),
|
||||
# so the agent dials the bundle via host loopback +
|
||||
# docker-published random ports. Empty at prepare time;
|
||||
# launch populates these after bundle bringup via
|
||||
# `dataclasses.replace`. Format: a `host:port` for git-gate
|
||||
# (insteadOf URL prefix) + full URLs for proxy / supervise.
|
||||
agent_proxy_url: str = ""
|
||||
agent_git_gate_host: str = ""
|
||||
agent_supervise_url: str = ""
|
||||
agent_command: str = "claude"
|
||||
agent_prompt_mode: str = "claude_append_file"
|
||||
agent_provider_template: str = "claude"
|
||||
agent_dockerfile_path: str = ""
|
||||
|
||||
def print(self, *, remote_control: bool) -> None:
|
||||
"""Compact y/N preflight. Same shape as the Docker
|
||||
backend's so operators see one format across backends."""
|
||||
del remote_control # not surfaced in the compact summary
|
||||
spec = self.spec
|
||||
manifest = spec.manifest
|
||||
agent = manifest.agents[spec.agent_name]
|
||||
bottle = manifest.bottle_for(spec.agent_name)
|
||||
|
||||
env_names = sorted(bottle.env.keys())
|
||||
upstreams = [
|
||||
f"{g.Name} → {g.Upstream}" for g in bottle.git
|
||||
]
|
||||
# Use the resolved egress_plan (lowercase `host` on the
|
||||
# plan-level EgressRoute) rather than `bottle.egress.routes`,
|
||||
# which is the manifest's capitalized-attr form.
|
||||
routes = [r.host for r in self.egress_plan.routes]
|
||||
|
||||
print(file=sys.stderr)
|
||||
info(f"agent : {spec.agent_name}")
|
||||
info(f"provider : {self.agent_provider_template}")
|
||||
print_multi("env ", env_names)
|
||||
print_multi("skills ", list(agent.skills))
|
||||
info(f"bottle : {agent.bottle}")
|
||||
if upstreams:
|
||||
print_multi(" git gate ", upstreams)
|
||||
if routes:
|
||||
print_multi(" egress ", routes)
|
||||
print(file=sys.stderr)
|
||||
@@ -0,0 +1,159 @@
|
||||
"""Cleanup + active-listing for the smolmachines backend (issue #77).
|
||||
|
||||
`prepare_cleanup` enumerates leftover smolmachines resources:
|
||||
|
||||
- smolvm machines (`smolvm machine ls --json`) whose name starts
|
||||
with `bot-bottle-`.
|
||||
- bundle docker containers (`bot-bottle-sidecars-<slug>`).
|
||||
- bundle docker networks (`bot-bottle-bundle-<slug>`).
|
||||
|
||||
State dirs live under `~/.bot-bottle/state/<identity>/` —
|
||||
shared layout with the docker backend, which has the single
|
||||
orphan-state-dir enumerator (it already consults
|
||||
`enumerate_active_agents()` so a live smolmachines bottle's dir
|
||||
is preserved).
|
||||
|
||||
`cleanup` removes everything in the plan: stop + delete each VM,
|
||||
force-rm each container, rm each network. Each step is
|
||||
best-effort — a failure on one resource doesn't block the others."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
|
||||
from ...log import info, warn
|
||||
from . import sidecar_bundle as _bundle
|
||||
from . import smolvm as _smolvm
|
||||
from .bottle_cleanup_plan import SmolmachinesBottleCleanupPlan
|
||||
|
||||
|
||||
# Both names start with the same prefix the launcher uses.
|
||||
_VM_PREFIX = "bot-bottle-"
|
||||
_BUNDLE_PREFIX = _bundle.bundle_container_name("") # `bot-bottle-sidecars-`
|
||||
_NETWORK_PREFIX = _bundle.bundle_network_name("") # `bot-bottle-bundle-`
|
||||
|
||||
|
||||
def prepare_cleanup() -> SmolmachinesBottleCleanupPlan:
|
||||
"""Enumerate every smolmachines-owned resource on the host.
|
||||
No side effects. Returns an empty plan when smolvm isn't on
|
||||
PATH (no machines to reap) — `cleanup` is a no-op in that
|
||||
case too."""
|
||||
machines = _list_bot_bottle_machines()
|
||||
bundles = _list_bundle_containers()
|
||||
networks = _list_bundle_networks()
|
||||
return SmolmachinesBottleCleanupPlan(
|
||||
machines=tuple(sorted(machines)),
|
||||
bundles=tuple(sorted(bundles)),
|
||||
networks=tuple(sorted(networks)),
|
||||
)
|
||||
|
||||
|
||||
def cleanup(plan: SmolmachinesBottleCleanupPlan) -> None:
|
||||
"""Remove everything in the plan. Order matters: stop VMs
|
||||
first (they hold ports on lo0 aliases via libkrun), then the
|
||||
bundle containers (which hold the host port-forwards), then
|
||||
the networks (which docker won't reap until the containers
|
||||
are gone)."""
|
||||
for name in plan.machines:
|
||||
info(f"stopping smolvm machine {name}")
|
||||
subprocess.run(
|
||||
["smolvm", "machine", "stop", "--name", name],
|
||||
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
|
||||
check=False,
|
||||
)
|
||||
info(f"deleting smolvm machine {name}")
|
||||
r = subprocess.run(
|
||||
["smolvm", "machine", "delete", "-f", name],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
if r.returncode != 0:
|
||||
warn(
|
||||
f"smolvm machine delete -f {name} failed: "
|
||||
f"{(r.stderr or '').strip()}"
|
||||
)
|
||||
|
||||
for name in plan.bundles:
|
||||
info(f"removing bundle container {name}")
|
||||
subprocess.run(
|
||||
["docker", "rm", "-f", name],
|
||||
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
|
||||
check=False,
|
||||
)
|
||||
|
||||
for name in plan.networks:
|
||||
info(f"removing bundle network {name}")
|
||||
r = subprocess.run(
|
||||
["docker", "network", "rm", name],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
if r.returncode != 0 and "no such network" not in (r.stderr or "").lower():
|
||||
warn(
|
||||
f"docker network rm {name} failed: "
|
||||
f"{(r.stderr or '').strip()}"
|
||||
)
|
||||
|
||||
|
||||
def _list_bot_bottle_machines() -> list[str]:
|
||||
"""All smolvm machines named `bot-bottle-*`, regardless of
|
||||
state (running / stopped / created). Empty when smolvm isn't
|
||||
installed."""
|
||||
if not _smolvm.is_available():
|
||||
return []
|
||||
r = subprocess.run(
|
||||
["smolvm", "machine", "ls", "--json"],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
if r.returncode != 0:
|
||||
return []
|
||||
try:
|
||||
machines = json.loads(r.stdout or "[]")
|
||||
except json.JSONDecodeError:
|
||||
return []
|
||||
return [
|
||||
m["name"] for m in machines
|
||||
if isinstance(m, dict)
|
||||
and m.get("name", "").startswith(_VM_PREFIX)
|
||||
]
|
||||
|
||||
|
||||
def _list_bundle_containers() -> list[str]:
|
||||
"""All docker containers named `bot-bottle-sidecars-*`,
|
||||
running or stopped. Empty when docker isn't installed."""
|
||||
# Late import: `backend/__init__` imports this module
|
||||
# transitively via the smolmachines backend.
|
||||
from .. import has_backend
|
||||
if not has_backend("docker"):
|
||||
return []
|
||||
r = subprocess.run(
|
||||
["docker", "ps", "-a",
|
||||
"--filter", f"name=^{_BUNDLE_PREFIX}",
|
||||
"--format", "{{.Names}}"],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
if r.returncode != 0:
|
||||
return []
|
||||
return [
|
||||
line for line in (r.stdout or "").splitlines()
|
||||
if line and line.startswith(_BUNDLE_PREFIX)
|
||||
]
|
||||
|
||||
|
||||
def _list_bundle_networks() -> list[str]:
|
||||
"""All docker networks named `bot-bottle-bundle-*`. Empty
|
||||
when docker isn't installed."""
|
||||
from .. import has_backend
|
||||
if not has_backend("docker"):
|
||||
return []
|
||||
r = subprocess.run(
|
||||
["docker", "network", "ls",
|
||||
"--filter", f"name={_NETWORK_PREFIX}",
|
||||
"--format", "{{.Name}}"],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
if r.returncode != 0:
|
||||
return []
|
||||
return [
|
||||
line for line in (r.stdout or "").splitlines()
|
||||
if line and line.startswith(_NETWORK_PREFIX)
|
||||
]
|
||||
@@ -0,0 +1,121 @@
|
||||
"""Active-agent enumeration for the smolmachines backend (PRD
|
||||
0023 chunk 4 follow-up + issue #77).
|
||||
|
||||
Returns a list of `ActiveAgent` records — same shape the docker
|
||||
backend produces — so CLI `list active` and the dashboard agents
|
||||
pane render both backends through one code path.
|
||||
|
||||
A smolmachines agent is "active" when its smolvm guest is
|
||||
running. We cross-reference against the per-bottle sidecar
|
||||
bundle container to populate the `services` field (which daemons
|
||||
are up in the bundle); without a bundle we still surface the VM
|
||||
so the operator can see + clean it up.
|
||||
|
||||
The cross-backend caller gates on `has_backend("smolmachines")`
|
||||
and `has_backend("docker")`, so this module assumes both are
|
||||
available when called. Both subprocess calls below still
|
||||
tolerate "command not on PATH" defensively, but the gate is the
|
||||
intended access pattern."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
|
||||
from .. import ActiveAgent
|
||||
from ..docker.bottle_state import read_metadata
|
||||
from . import sidecar_bundle as _bundle
|
||||
|
||||
|
||||
# Smolvm VM names produced by prepare are `bot-bottle-<slug>`,
|
||||
# matching the bundle container name pattern. We use the prefix
|
||||
# both as a filter and to strip back to the slug.
|
||||
_VM_NAME_PREFIX = "bot-bottle-"
|
||||
|
||||
|
||||
def enumerate_active() -> list[ActiveAgent]:
|
||||
"""All currently-running smolmachines-backed agents. Empty
|
||||
list when no matching VMs are running. Caller is responsible
|
||||
for gating on `has_backend('smolmachines')` if needed; if
|
||||
smolvm is missing the `smolvm machine ls` call below returns
|
||||
nothing silently."""
|
||||
result = subprocess.run(
|
||||
["smolvm", "machine", "ls", "--json"],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
return []
|
||||
try:
|
||||
machines = json.loads(result.stdout or "[]")
|
||||
except json.JSONDecodeError:
|
||||
return []
|
||||
services_by_slug = _query_bundle_services()
|
||||
out: list[ActiveAgent] = []
|
||||
for m in machines:
|
||||
name = m.get("name") or ""
|
||||
state = m.get("state") or ""
|
||||
if state != "running" or not name.startswith(_VM_NAME_PREFIX):
|
||||
continue
|
||||
slug = name[len(_VM_NAME_PREFIX):]
|
||||
metadata = read_metadata(slug)
|
||||
out.append(ActiveAgent(
|
||||
backend_name="smolmachines",
|
||||
slug=slug,
|
||||
agent_name=metadata.agent_name if metadata else "?",
|
||||
started_at=metadata.started_at if metadata else "",
|
||||
services=services_by_slug.get(slug, ()),
|
||||
))
|
||||
return out
|
||||
|
||||
|
||||
def _query_bundle_services() -> dict[str, tuple[str, ...]]:
|
||||
"""`{slug: ('egress', 'pipelock', ...)}` from each running
|
||||
bundle container's `BOT_BOTTLE_SIDECAR_DAEMONS` env var.
|
||||
Smolmachines bundles all run the PRD-0024 image with the
|
||||
same daemon set declared via env, so one inspect per bundle
|
||||
gets us the picture without exec'ing into the container.
|
||||
|
||||
Returns an empty mapping when the docker backend isn't
|
||||
available — the bundle services field on each ActiveAgent
|
||||
just shows up empty, matching the docker backend's "starting"
|
||||
state."""
|
||||
# Late import: `has_backend` lives on the backend package's
|
||||
# __init__, which imports this module transitively. Pulling
|
||||
# the name in at call time sidesteps the cycle.
|
||||
from .. import has_backend
|
||||
if not has_backend("docker"):
|
||||
return {}
|
||||
ps = subprocess.run(
|
||||
["docker", "ps",
|
||||
"--filter", "name=" + _bundle.bundle_container_name(""),
|
||||
"--format", "{{.Names}}"],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
if ps.returncode != 0:
|
||||
return {}
|
||||
out: dict[str, tuple[str, ...]] = {}
|
||||
for line in (ps.stdout or "").splitlines():
|
||||
name = line.strip()
|
||||
if not name:
|
||||
continue
|
||||
slug = name.removeprefix(_bundle.bundle_container_name(""))
|
||||
if not slug:
|
||||
continue
|
||||
inspect = subprocess.run(
|
||||
["docker", "inspect", name, "--format", "{{json .Config.Env}}"],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
if inspect.returncode != 0:
|
||||
continue
|
||||
try:
|
||||
env_list = json.loads(inspect.stdout or "[]")
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
for entry in env_list:
|
||||
key, _, value = entry.partition("=")
|
||||
if key == "BOT_BOTTLE_SIDECAR_DAEMONS":
|
||||
out[slug] = tuple(sorted(
|
||||
d for d in value.split(",") if d
|
||||
))
|
||||
break
|
||||
return out
|
||||
@@ -0,0 +1,468 @@
|
||||
"""End-to-end launch flow for the smolmachines backend
|
||||
(PRD 0023 chunks 2d + 4b).
|
||||
|
||||
Brings up the per-bottle docker bridge + sidecar bundle (with
|
||||
real daemons + their config files), creates + starts the smolvm
|
||||
guest pointed at the bundle's pinned IP via TSI's
|
||||
`--allow-cidr <bundle-ip>/32` allowlist, yields a
|
||||
`SmolmachinesBottle` handle, tears everything down on context
|
||||
exit.
|
||||
|
||||
The bundle's daemons consume the inner Plans the docker backend
|
||||
already produces: pipelock reads its yaml + CA from the
|
||||
PipelockProxyPlan; egress reads routes + CAs from the EgressPlan
|
||||
+ EGRESS_UPSTREAM_PROXY pointing at `127.0.0.1:8888` (bundle
|
||||
local), since the agent dials pipelock first (not egress) on the
|
||||
smolmachines path. Git-gate + supervise plumb through the same
|
||||
plans the docker backend uses, minus the docker-network fields
|
||||
that don't apply here."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import dataclasses
|
||||
import os
|
||||
import time
|
||||
from contextlib import ExitStack, contextmanager
|
||||
from pathlib import Path
|
||||
from typing import Callable, Generator
|
||||
|
||||
from ...egress import EGRESS_ROUTES_IN_CONTAINER, egress_resolve_token_values
|
||||
from ...pipelock import (
|
||||
PIPELOCK_CA_CERT_IN_CONTAINER,
|
||||
PIPELOCK_CA_KEY_IN_CONTAINER,
|
||||
)
|
||||
from ...supervise import QUEUE_DIR_IN_CONTAINER, SUPERVISE_PORT
|
||||
from ...util import expand_tilde
|
||||
from ..docker import util as docker_mod
|
||||
from ..docker.egress import (
|
||||
EGRESS_CA_IN_CONTAINER,
|
||||
EGRESS_PIPELOCK_CA_IN_CONTAINER,
|
||||
EGRESS_PORT as _EGRESS_PORT,
|
||||
egress_tls_init,
|
||||
)
|
||||
from ..docker.git_gate import (
|
||||
GIT_GATE_ACCESS_HOOK_IN_CONTAINER,
|
||||
GIT_GATE_CREDS_DIR_IN_CONTAINER,
|
||||
GIT_GATE_ENTRYPOINT_IN_CONTAINER,
|
||||
GIT_GATE_HOOK_IN_CONTAINER,
|
||||
GIT_GATE_PORT as _GIT_GATE_PORT,
|
||||
)
|
||||
from ..docker.pipelock import (
|
||||
BUNDLE_LOCAL_PIPELOCK_URL,
|
||||
PIPELOCK_PORT as _PIPELOCK_PORT_STR,
|
||||
pipelock_tls_init,
|
||||
)
|
||||
from . import loopback_alias as _loopback
|
||||
from . import sidecar_bundle as _bundle
|
||||
from . import smolvm as _smolvm
|
||||
from .bottle import SmolmachinesBottle
|
||||
from .bottle_plan import SmolmachinesBottlePlan
|
||||
from .local_registry import crane_push_tarball, ephemeral_registry
|
||||
|
||||
|
||||
# Repo root, used as the `docker build` context for the agent image.
|
||||
_REPO_DIR = str(Path(__file__).resolve().parent.parent.parent.parent)
|
||||
|
||||
|
||||
# Per-host cache for `smolvm pack create` outputs. Keyed by the
|
||||
# docker image ID so a Dockerfile change automatically invalidates
|
||||
# the cache. `pack create` is idempotent on the smolvm side but
|
||||
# takes several seconds even on a no-op rebuild.
|
||||
_SMOLMACHINE_CACHE_DIR = Path.home() / ".cache" / "bot-bottle" / "smolmachines"
|
||||
|
||||
|
||||
# Container-internal listening ports for each bundle daemon. The
|
||||
# bundle publishes each one on a random host loopback port (see
|
||||
# `_bundle.start_bundle`), and `_bundle.bundle_host_port` looks
|
||||
# them up post-start. Pipelock's port is an env-overridable string
|
||||
# in docker.pipelock; coerce to int here.
|
||||
_PIPELOCK_PORT = int(_PIPELOCK_PORT_STR)
|
||||
_SUPERVISE_PORT = SUPERVISE_PORT
|
||||
|
||||
|
||||
@contextmanager
|
||||
def launch(
|
||||
plan: SmolmachinesBottlePlan,
|
||||
*,
|
||||
provision: Callable[[SmolmachinesBottlePlan, str], str | None],
|
||||
) -> Generator[SmolmachinesBottle, None, None]:
|
||||
"""Build + run the bottle and yield a handle; tear everything
|
||||
down on exit. Errors during bringup unwind any partial state
|
||||
via the ExitStack."""
|
||||
stack = ExitStack()
|
||||
try:
|
||||
# 1. Reserve a loopback alias for this bottle. macOS only
|
||||
# routes 127.0.0.1 by default; the per-bottle alias is
|
||||
# what bundles the docker port-publishes and TSI allowlist
|
||||
# against, so this bottle can't reach other bottles' (or
|
||||
# other host services') ports on the loopback. Lazy
|
||||
# sudo-driven on first use per boot. No-op on Linux.
|
||||
_loopback.ensure_pool()
|
||||
loopback_ip = _loopback.allocate(plan.slug)
|
||||
|
||||
# 2. Per-bottle docker bridge.
|
||||
network = _bundle.bundle_network_name(plan.slug)
|
||||
_bundle.create_bundle_network(network, plan.bundle_subnet, plan.bundle_gateway)
|
||||
stack.callback(_bundle.remove_bundle_network, network)
|
||||
|
||||
# 2. Mint per-bottle CAs and update the inner Plans with
|
||||
# their launch-time paths. pipelock always runs in the
|
||||
# bundle; egress's CA is only minted when the bottle
|
||||
# declares routes (otherwise egress runs idle without
|
||||
# MITM and the CA files would be unused).
|
||||
ca_cert_host, ca_key_host = pipelock_tls_init(plan.proxy_plan.yaml_path.parent)
|
||||
proxy_plan = dataclasses.replace(
|
||||
plan.proxy_plan,
|
||||
ca_cert_host_path=ca_cert_host,
|
||||
ca_key_host_path=ca_key_host,
|
||||
)
|
||||
egress_plan = plan.egress_plan
|
||||
if egress_plan.routes:
|
||||
egress_ca_host, egress_ca_cert_only = egress_tls_init(
|
||||
plan.egress_plan.routes_path.parent,
|
||||
)
|
||||
egress_plan = dataclasses.replace(
|
||||
egress_plan,
|
||||
mitmproxy_ca_host_path=egress_ca_host,
|
||||
mitmproxy_ca_cert_only_host_path=egress_ca_cert_only,
|
||||
pipelock_ca_host_path=ca_cert_host,
|
||||
# On smolmachines, egress's upstream is pipelock
|
||||
# on the bundle's localhost — they're in the same
|
||||
# container's network namespace.
|
||||
pipelock_proxy_url=BUNDLE_LOCAL_PIPELOCK_URL,
|
||||
)
|
||||
plan = dataclasses.replace(
|
||||
plan, proxy_plan=proxy_plan, egress_plan=egress_plan,
|
||||
)
|
||||
|
||||
# 3. Build the BundleLaunchSpec from the (now-resolved)
|
||||
# inner Plans: daemon subset, env, bind-mounts, and the
|
||||
# loopback alias to bind published ports against. The
|
||||
# spec's ports_to_publish list expands depending on which
|
||||
# daemons the agent needs to reach from the smolvm guest.
|
||||
bundle_spec = _bundle_launch_spec(plan, network, loopback_ip)
|
||||
token_env = _resolve_token_env(plan, os.environ)
|
||||
_bundle.start_bundle(bundle_spec, env={**os.environ, **token_env})
|
||||
stack.callback(_bundle.stop_bundle, plan.slug)
|
||||
|
||||
# 4. Discover the host-side ports docker assigned for the
|
||||
# bundle's published container ports, and bind the
|
||||
# agent's URLs to `<loopback_ip>:<host port>`. Docker
|
||||
# container IPs (192.168.x.x in the daemon's bridge)
|
||||
# aren't reachable from the smolvm guest on macOS — TSI
|
||||
# uses macOS networking, and macOS sees the daemon's
|
||||
# bridge via the published-port loopback forward only.
|
||||
#
|
||||
# Proxy hop order matches the docker backend: when the
|
||||
# bottle declares egress routes, the agent's first hop is
|
||||
# egress (for token injection), then pipelock. Without
|
||||
# routes, the agent dials pipelock directly. Whichever
|
||||
# one is "agent-facing" is the daemon whose port we
|
||||
# publish on host loopback; the other stays bundle-
|
||||
# internal as the upstream proxy.
|
||||
if plan.egress_plan.routes:
|
||||
agent_facing_port = _EGRESS_PORT
|
||||
else:
|
||||
agent_facing_port = _PIPELOCK_PORT
|
||||
agent_facing_host_port = _bundle.bundle_host_port(
|
||||
plan.slug, agent_facing_port, host_ip=loopback_ip,
|
||||
)
|
||||
agent_proxy_url = f"http://{loopback_ip}:{agent_facing_host_port}"
|
||||
agent_git_gate_host = ""
|
||||
if plan.git_gate_plan.upstreams:
|
||||
git_gate_host_port = _bundle.bundle_host_port(
|
||||
plan.slug, _GIT_GATE_PORT, host_ip=loopback_ip,
|
||||
)
|
||||
agent_git_gate_host = f"{loopback_ip}:{git_gate_host_port}"
|
||||
agent_supervise_url = ""
|
||||
if plan.supervise_plan is not None:
|
||||
supervise_host_port = _bundle.bundle_host_port(
|
||||
plan.slug, _SUPERVISE_PORT, host_ip=loopback_ip,
|
||||
)
|
||||
agent_supervise_url = f"http://{loopback_ip}:{supervise_host_port}/"
|
||||
|
||||
# Stamp the URLs onto the plan + guest_env. provision_git
|
||||
# and provision_supervise read the plan fields; the agent
|
||||
# reads guest_env on every exec_claude.
|
||||
#
|
||||
# NO_PROXY has to include the per-bottle loopback alias —
|
||||
# otherwise claude's HTTPS_PROXY catches direct calls to
|
||||
# the supervise URL (`http://<alias>:<port>/`) and proxies
|
||||
# them through egress, which has no route for the alias
|
||||
# and rejects with "Failed to connect". The git-gate URL
|
||||
# uses git://, not affected by HTTP_PROXY, so the alias
|
||||
# only has to be in NO_PROXY for the MCP / supervise
|
||||
# path. Append rather than overwrite so prepare.py's
|
||||
# `localhost,127.0.0.1` baseline stays in place.
|
||||
existing_no_proxy = plan.guest_env.get("NO_PROXY", "localhost,127.0.0.1")
|
||||
guest_env = {
|
||||
**plan.guest_env,
|
||||
"HTTPS_PROXY": agent_proxy_url,
|
||||
"HTTP_PROXY": agent_proxy_url,
|
||||
"NO_PROXY": f"{existing_no_proxy},{loopback_ip}",
|
||||
}
|
||||
if agent_git_gate_host:
|
||||
guest_env["GIT_GATE_URL"] = f"git://{agent_git_gate_host}"
|
||||
if agent_supervise_url:
|
||||
guest_env["MCP_SUPERVISE_URL"] = agent_supervise_url
|
||||
plan = dataclasses.replace(
|
||||
plan,
|
||||
guest_env=guest_env,
|
||||
agent_proxy_url=agent_proxy_url,
|
||||
agent_git_gate_host=agent_git_gate_host,
|
||||
agent_supervise_url=agent_supervise_url,
|
||||
)
|
||||
|
||||
# 5. Build the agent image and pack it into a
|
||||
# `.smolmachine` artifact (or hit the per-Dockerfile-digest
|
||||
# cache). Runs here, not in prepare, so the docker-build
|
||||
# output doesn't garble the dashboard's preflight modal:
|
||||
# both the curses-endwin path and the tmux pane-routing
|
||||
# path redirect stderr around `launch` already.
|
||||
agent_from_path = _ensure_smolmachine(
|
||||
plan.agent_image_ref,
|
||||
dockerfile=plan.agent_dockerfile_path,
|
||||
)
|
||||
|
||||
# smolvm VM. --from carries the pre-packed .smolmachine
|
||||
# artifact; --allow-cidr + -e carry the per-bottle TSI
|
||||
# allowlist + env. The allowlist is the per-bottle
|
||||
# loopback alias — narrowing it to one /32 keeps the
|
||||
# agent from reaching other host loopback services or
|
||||
# other bottles' published ports. Smolfile isn't usable
|
||||
# here — smolvm 0.8.0 makes `--from` and `--smolfile`
|
||||
# mutually exclusive.
|
||||
_smolvm.machine_create(
|
||||
plan.machine_name,
|
||||
from_path=agent_from_path,
|
||||
allow_cidrs=[f"{loopback_ip}/32"],
|
||||
env=plan.guest_env,
|
||||
)
|
||||
stack.callback(_smolvm.machine_delete, plan.machine_name)
|
||||
# Workaround smolvm 0.8.0: `--allow-cidr` is silently
|
||||
# dropped when combined with `--from`. Patch the persisted
|
||||
# state DB to set the allowlist before start so the booted
|
||||
# VM's TSI actually enforces. See loopback_alias's module
|
||||
# docstring for the investigation that led here.
|
||||
_loopback.force_allowlist(plan.machine_name, [f"{loopback_ip}/32"])
|
||||
_smolvm.machine_start(plan.machine_name)
|
||||
stack.callback(_smolvm.machine_stop, plan.machine_name)
|
||||
|
||||
# 6. Repair filesystem ownership + perms that smolvm's
|
||||
# pack process remapped to the host invoker's uid (501
|
||||
# on macOS) rather than preserving the image's expected
|
||||
# ownership.
|
||||
#
|
||||
# - /home/node → node:node so the node user can write
|
||||
# its own dotfiles (claude appendFileSync on
|
||||
# ~/.claude.json otherwise bails with ENOENT/EPERM
|
||||
# and the TUI hangs without surfacing the error).
|
||||
# - /tmp + /var/tmp → root:root mode 1777 so non-root
|
||||
# processes can create their per-uid scratch dirs
|
||||
# (claude-code creates /tmp/claude-<uid>/ as soon as
|
||||
# it spawns a Bash tool call).
|
||||
#
|
||||
# All folded into one sh -c so we only pay one
|
||||
# machine_exec round trip — back-to-back exec calls
|
||||
# right after machine_start hit a SIGKILL race in
|
||||
# libkrun's exec channel (see provision_ca for the
|
||||
# other half of this same workaround).
|
||||
_smolvm.machine_exec(plan.machine_name, [
|
||||
"sh", "-c",
|
||||
"chown -R node:node /home/node && "
|
||||
"chown root:root /tmp /var/tmp && "
|
||||
"chmod 1777 /tmp /var/tmp",
|
||||
])
|
||||
|
||||
# Wait briefly for the VM to settle. Back-to-back smolvm
|
||||
# machine_exec calls immediately after machine_start
|
||||
# occasionally SIGKILL the in-VM child at ~100ms (looks
|
||||
# like a VM warm-up race in libkrun's exec channel).
|
||||
# 1.5s is empirically enough to dodge it; provisioning
|
||||
# already takes seconds so the wait is amortized.
|
||||
time.sleep(1.5)
|
||||
|
||||
# 7. Provision (CA / prompt / skills / git / supervise).
|
||||
prompt_path = provision(plan, plan.machine_name)
|
||||
|
||||
yield SmolmachinesBottle(
|
||||
plan.machine_name,
|
||||
prompt_path=prompt_path,
|
||||
guest_env=plan.guest_env,
|
||||
agent_command=plan.agent_command,
|
||||
agent_prompt_mode=plan.agent_prompt_mode,
|
||||
)
|
||||
finally:
|
||||
stack.close()
|
||||
|
||||
|
||||
def _bundle_launch_spec(
|
||||
plan: SmolmachinesBottlePlan, network: str, loopback_ip: str,
|
||||
) -> _bundle.BundleLaunchSpec:
|
||||
"""Build a BundleLaunchSpec from the resolved inner Plans.
|
||||
|
||||
Daemons in the CSV:
|
||||
- egress + pipelock are always present (pipelock is the
|
||||
agent's first hop; egress is its upstream).
|
||||
- git-gate is conditional on plan.git_gate_plan.upstreams.
|
||||
- supervise is conditional on plan.supervise_plan.
|
||||
|
||||
Env + volumes are the union of the four daemons' needs, with
|
||||
daemon-private values only (HTTPS_PROXY is scoped to the
|
||||
egress process by egress_entrypoint.sh — see PRD 0024's bundle
|
||||
bind-address PR)."""
|
||||
daemons: list[str] = ["egress", "pipelock"]
|
||||
env: list[str] = []
|
||||
volumes: list[tuple[str, str, bool]] = []
|
||||
|
||||
# In this Docker-Desktop-compatible topology, whichever daemon
|
||||
# is "agent-facing" gets its port published on the host
|
||||
# loopback (see `_ensure_smolmachine`'s discovery loop) and the
|
||||
# other stays bundle-internal. The bundle is NOT reachable by
|
||||
# bridge IP from the smolvm guest, so the
|
||||
# PRD-0023-chunk-3 EGRESS_LISTEN_HOST=127.0.0.1 mitigation
|
||||
# isn't needed: the agent can only dial whatever daemon's
|
||||
# host port we publish, period.
|
||||
|
||||
# --- pipelock ---------------------------------------------
|
||||
pp = plan.proxy_plan
|
||||
volumes += [
|
||||
(str(pp.yaml_path), "/etc/pipelock.yaml", True),
|
||||
(str(pp.ca_cert_host_path), PIPELOCK_CA_CERT_IN_CONTAINER, True),
|
||||
(str(pp.ca_key_host_path), PIPELOCK_CA_KEY_IN_CONTAINER, True),
|
||||
]
|
||||
|
||||
# --- egress -----------------------------------------------
|
||||
ep = plan.egress_plan
|
||||
if ep.routes:
|
||||
env.append(f"EGRESS_UPSTREAM_PROXY={ep.pipelock_proxy_url}")
|
||||
env.append(f"EGRESS_UPSTREAM_CA={EGRESS_PIPELOCK_CA_IN_CONTAINER}")
|
||||
volumes += [
|
||||
(str(ep.routes_path), EGRESS_ROUTES_IN_CONTAINER, True),
|
||||
(str(ep.mitmproxy_ca_host_path), EGRESS_CA_IN_CONTAINER, True),
|
||||
(str(ep.pipelock_ca_host_path), EGRESS_PIPELOCK_CA_IN_CONTAINER, True),
|
||||
]
|
||||
# Bare-name entries for upstream-token slots. Their values
|
||||
# come from the docker-run subprocess env (inherited from
|
||||
# the operator's shell), never landing on argv.
|
||||
for token_env in sorted(ep.token_env_map.keys()):
|
||||
env.append(token_env)
|
||||
|
||||
# --- git-gate ---------------------------------------------
|
||||
extra_hosts: list[str] = []
|
||||
gp = plan.git_gate_plan
|
||||
if gp.upstreams:
|
||||
daemons.append("git-gate")
|
||||
volumes += [
|
||||
(str(gp.entrypoint_script), GIT_GATE_ENTRYPOINT_IN_CONTAINER, True),
|
||||
(str(gp.hook_script), GIT_GATE_HOOK_IN_CONTAINER, True),
|
||||
(str(gp.access_hook_script), GIT_GATE_ACCESS_HOOK_IN_CONTAINER, True),
|
||||
]
|
||||
for u in gp.upstreams:
|
||||
keypath = expand_tilde(u.identity_file)
|
||||
volumes.append((
|
||||
keypath,
|
||||
f"{GIT_GATE_CREDS_DIR_IN_CONTAINER}/{u.name}-key",
|
||||
True,
|
||||
))
|
||||
|
||||
# --- supervise --------------------------------------------
|
||||
sp = plan.supervise_plan
|
||||
if sp is not None:
|
||||
daemons.append("supervise")
|
||||
env += [
|
||||
f"SUPERVISE_BOTTLE_SLUG={plan.slug}",
|
||||
f"SUPERVISE_QUEUE_DIR={QUEUE_DIR_IN_CONTAINER}",
|
||||
f"SUPERVISE_PORT={SUPERVISE_PORT}",
|
||||
]
|
||||
volumes.append((str(sp.queue_dir), QUEUE_DIR_IN_CONTAINER, False))
|
||||
|
||||
# Container ports the agent reaches from the smolvm guest —
|
||||
# published on host loopback so the guest can dial via TSI +
|
||||
# macOS networking. The HTTP/HTTPS chokepoint is whichever
|
||||
# daemon's port we publish: egress when routes are declared
|
||||
# (token injection first, then forwards to bundle-internal
|
||||
# pipelock), pipelock otherwise.
|
||||
if ep.routes:
|
||||
ports_to_publish: list[int] = [_EGRESS_PORT]
|
||||
else:
|
||||
ports_to_publish = [_PIPELOCK_PORT]
|
||||
if gp.upstreams:
|
||||
ports_to_publish.append(_GIT_GATE_PORT)
|
||||
if sp is not None:
|
||||
ports_to_publish.append(_SUPERVISE_PORT)
|
||||
|
||||
return _bundle.BundleLaunchSpec(
|
||||
slug=plan.slug,
|
||||
network_name=network,
|
||||
subnet=plan.bundle_subnet,
|
||||
gateway=plan.bundle_gateway,
|
||||
bundle_ip=plan.bundle_ip,
|
||||
daemons_csv=",".join(daemons),
|
||||
environment=tuple(env),
|
||||
volumes=tuple(volumes),
|
||||
ports_to_publish=tuple(ports_to_publish),
|
||||
publish_host_ip=loopback_ip,
|
||||
)
|
||||
|
||||
|
||||
def _resolve_token_env(
|
||||
plan: SmolmachinesBottlePlan, host_env: object
|
||||
) -> dict[str, str]:
|
||||
"""Resolve the egress token env-var values from the host's
|
||||
environ so they reach the bundle's process env via docker's
|
||||
`-e NAME` inheritance. Empty when no routes declare auth."""
|
||||
ep = plan.egress_plan
|
||||
if not ep.routes:
|
||||
return {}
|
||||
return egress_resolve_token_values(ep.token_env_map, dict(host_env))
|
||||
|
||||
|
||||
def _ensure_smolmachine(image_ref: str, *, dockerfile: str = "") -> Path:
|
||||
"""Build the agent docker image and convert it into a
|
||||
`.smolmachine` artifact, caching the result under
|
||||
`~/.cache/bot-bottle/smolmachines/` keyed by the docker image
|
||||
ID (so a Dockerfile change automatically invalidates the cache).
|
||||
|
||||
Returns the `.smolmachine.smolmachine` sidecar path — that's
|
||||
the file `machine create --from` consumes (pack create produces
|
||||
a launcher binary at `.smolmachine` plus the sidecar alongside
|
||||
it; the sidecar is the actual artifact).
|
||||
|
||||
Conversion path: `docker build` (the existing layer cache
|
||||
makes no-change rebuilds cheap) → `docker save` to a tarball
|
||||
→ spin up an ephemeral registry on a private docker network →
|
||||
`crane push --insecure` from a one-shot container on the same
|
||||
network → `smolvm pack create --image localhost:<host port>/...`
|
||||
→ tear down the registry + network. The crane push detour
|
||||
sidesteps the Docker-Desktop daemon's HTTPS preference for
|
||||
non-loopback registries — see the `local_registry` module
|
||||
docstring for the gory details.
|
||||
|
||||
Each pack-create costs several seconds even on a hot cache,
|
||||
so we skip the whole pipeline when the cached sidecar is
|
||||
already on disk for this image ID."""
|
||||
_SMOLMACHINE_CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
docker_mod.build_image(image_ref, _REPO_DIR, dockerfile=dockerfile)
|
||||
# `sha256:abcd...` -> `abcd...` first 16 chars: short enough to
|
||||
# keep filenames manageable, long enough to make collisions
|
||||
# astronomically unlikely.
|
||||
digest = docker_mod.image_id(image_ref).split(":", 1)[-1][:16]
|
||||
binary = _SMOLMACHINE_CACHE_DIR / f"{digest}.smolmachine"
|
||||
sidecar = _SMOLMACHINE_CACHE_DIR / f"{digest}.smolmachine.smolmachine"
|
||||
if sidecar.is_file():
|
||||
return sidecar
|
||||
tarball = _SMOLMACHINE_CACHE_DIR / f"{digest}.image.tar"
|
||||
docker_mod.save(image_ref, str(tarball))
|
||||
try:
|
||||
with ephemeral_registry() as handle:
|
||||
push_ref = f"{handle.push_endpoint}/bot-bottle:{digest}"
|
||||
pack_ref = f"{handle.pull_endpoint}/bot-bottle:{digest}"
|
||||
crane_push_tarball(handle, str(tarball), push_ref)
|
||||
_smolvm.pack_create(pack_ref, binary)
|
||||
finally:
|
||||
# Tarball is ~500MB-1GB for the agent image; reclaim once
|
||||
# the smolmachine artifact exists. The artifact itself is
|
||||
# the long-lived cache entry.
|
||||
tarball.unlink(missing_ok=True)
|
||||
return sidecar
|
||||
@@ -0,0 +1,234 @@
|
||||
"""Ephemeral local OCI registry for the smolmachines agent-image
|
||||
conversion path (PRD 0023 chunk 4c).
|
||||
|
||||
`smolvm pack create --image <ref>` only accepts OCI registry refs
|
||||
— it can't read the local docker daemon's image cache, an OCI
|
||||
layout directory, or a `docker save` tarball. To convert the
|
||||
agent's Dockerfile-built image into a `.smolmachine` artifact we
|
||||
spin up a short-lived `registry:2.8.3` container alongside a
|
||||
`crane` helper container on a private docker network, push via
|
||||
`crane push --insecure <tarball> <registry-container>:5000/...`,
|
||||
and let smolvm pull from the registry's published host port. The
|
||||
network + both containers are torn down after the pack completes.
|
||||
|
||||
Why this two-container dance instead of plain `docker push`:
|
||||
- Docker Desktop's daemon runs in its own Linux VM, so its
|
||||
`localhost` is not the host's loopback. A registry bound to
|
||||
the host's 127.0.0.1 is unreachable from the daemon side.
|
||||
- `host.docker.internal` is reachable from the daemon but isn't
|
||||
in Docker's default insecure-registries CIDRs (only `::1/128`
|
||||
and `127.0.0.0/8` are), so `docker push` to it tries HTTPS,
|
||||
hits a plain-HTTP registry, and dies with
|
||||
`http: server gave HTTP response to HTTPS client`. Adding
|
||||
`host.docker.internal` to daemon.json works but is a one-time
|
||||
manual step the user has to do in Docker Desktop's UI.
|
||||
- Going through a docker network sidesteps the host-vs-daemon
|
||||
loopback mismatch (crane and registry containers see each
|
||||
other on the network) AND the HTTPS preference (crane has an
|
||||
`--insecure` flag that forces plain HTTP).
|
||||
|
||||
The registry is also published on a random host port so smolvm
|
||||
— a host process — can pull from `localhost:<port>` via Docker's
|
||||
port-forward. smolvm's bundled crane auto-falls-back to HTTP for
|
||||
localhost addresses, so no insecure-registries config is needed
|
||||
on that side either."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import socket
|
||||
import subprocess
|
||||
import time
|
||||
import uuid
|
||||
from contextlib import contextmanager
|
||||
from dataclasses import dataclass
|
||||
from typing import Iterator
|
||||
|
||||
from ...log import die
|
||||
|
||||
|
||||
# registry:2.8.3, pinned by digest. Same env-override pattern as the
|
||||
# pipelock image pin in bot_bottle/backend/docker/pipelock.py.
|
||||
REGISTRY_IMAGE = os.environ.get(
|
||||
"BOT_BOTTLE_REGISTRY_IMAGE",
|
||||
"registry@sha256:a3d8aaa63ed8681a604f1dea0aa03f100d5895b6a58ace528858a7b332415373",
|
||||
)
|
||||
|
||||
|
||||
# gcr.io/go-containerregistry/crane:latest, pinned by digest. ~10MB,
|
||||
# stable upstream from Google; we only invoke `crane push --insecure`
|
||||
# against a localhost-equivalent registry, so the trust surface is
|
||||
# narrow.
|
||||
CRANE_IMAGE = os.environ.get(
|
||||
"BOT_BOTTLE_CRANE_IMAGE",
|
||||
"gcr.io/go-containerregistry/crane@sha256:0ae17ecb34315aa7cbff28f6eddee3b7adae0b2f90101260d990804db1eb0084",
|
||||
)
|
||||
|
||||
|
||||
# Internal port the registry binds to inside its container — fixed
|
||||
# by the registry:2 image. The host-side mapping is random.
|
||||
_REGISTRY_CONTAINER_PORT = "5000"
|
||||
|
||||
|
||||
# How long to wait for the registry's HTTP layer to bind before
|
||||
# giving up. Two seconds is empirically enough; 10s leaves headroom
|
||||
# for slow CI runners without making the failure mode chatty.
|
||||
_READY_TIMEOUT_S = 10.0
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class RegistryHandle:
|
||||
"""Everything callers need to push to + pull from the ephemeral
|
||||
registry.
|
||||
|
||||
`network` is the per-session docker network — a `crane push`
|
||||
container has to join it to reach the registry by name.
|
||||
`push_endpoint` is the `<host>:<port>` form to embed in image
|
||||
refs given to the crane push container (resolves via docker
|
||||
network DNS). `pull_endpoint` is the `<host>:<port>` form a
|
||||
host process (smolvm) uses; the registry's host port mapping
|
||||
backs this."""
|
||||
|
||||
network: str
|
||||
push_endpoint: str
|
||||
pull_endpoint: str
|
||||
|
||||
|
||||
@contextmanager
|
||||
def ephemeral_registry() -> Iterator[RegistryHandle]:
|
||||
"""Bring up a per-session docker network + a `registry:2.8.3`
|
||||
container on it (published on a random host port), yield a
|
||||
`RegistryHandle`, force-remove both on exit.
|
||||
|
||||
The container is started with `--rm` so a clean exit cleans up
|
||||
on its own; the `finally` block force-removes on abnormal exit
|
||||
(the calling process crashes between yield and close)."""
|
||||
session_id = uuid.uuid4().hex[:12]
|
||||
network = f"bot-bottle-registry-net-{session_id}"
|
||||
registry_name = f"bot-bottle-registry-{session_id}"
|
||||
|
||||
subprocess.run(
|
||||
["docker", "network", "create", network],
|
||||
check=True,
|
||||
capture_output=True,
|
||||
)
|
||||
try:
|
||||
subprocess.run(
|
||||
[
|
||||
"docker", "run", "-d", "--rm",
|
||||
"--name", registry_name,
|
||||
"--network", network,
|
||||
# `-p :5000` (no IP prefix) binds the container's
|
||||
# port 5000 on a random host port across all
|
||||
# interfaces. The host side reaches the registry
|
||||
# via this port — smolvm's `pack create` pulls from
|
||||
# `localhost:<port>` and the docker port-forward
|
||||
# routes there.
|
||||
"-p", _REGISTRY_CONTAINER_PORT,
|
||||
REGISTRY_IMAGE,
|
||||
],
|
||||
check=True,
|
||||
capture_output=True,
|
||||
)
|
||||
try:
|
||||
port = _host_port(registry_name)
|
||||
_wait_ready(port)
|
||||
yield RegistryHandle(
|
||||
network=network,
|
||||
push_endpoint=f"{registry_name}:{_REGISTRY_CONTAINER_PORT}",
|
||||
pull_endpoint=f"localhost:{port}",
|
||||
)
|
||||
finally:
|
||||
subprocess.run(
|
||||
["docker", "rm", "-f", registry_name],
|
||||
check=False,
|
||||
capture_output=True,
|
||||
)
|
||||
finally:
|
||||
subprocess.run(
|
||||
["docker", "network", "rm", network],
|
||||
check=False,
|
||||
capture_output=True,
|
||||
)
|
||||
|
||||
|
||||
def crane_push_tarball(handle: RegistryHandle, tarball_path: str, ref: str) -> None:
|
||||
"""Run `crane push --insecure <tarball> <ref>` inside a one-shot
|
||||
container on the registry's docker network. `ref` should
|
||||
reference the registry by `handle.push_endpoint` so the crane
|
||||
container resolves it via docker network DNS.
|
||||
|
||||
Doesn't go through `docker push` to avoid the Docker-Desktop
|
||||
daemon's HTTPS preference for non-loopback hostnames — crane's
|
||||
`--insecure` flag forces plain HTTP, which is what the
|
||||
registry container speaks."""
|
||||
r = subprocess.run(
|
||||
[
|
||||
"docker", "run", "--rm",
|
||||
"--network", handle.network,
|
||||
"-v", f"{tarball_path}:/img.tar:ro",
|
||||
CRANE_IMAGE,
|
||||
"push", "--insecure", "/img.tar", ref,
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False,
|
||||
)
|
||||
if r.returncode != 0:
|
||||
die(
|
||||
f"crane push of {tarball_path!r} to {ref!r} failed: "
|
||||
f"{(r.stderr or r.stdout or '').strip() or '<no output>'}"
|
||||
)
|
||||
|
||||
|
||||
def _host_port(name: str) -> int:
|
||||
"""Resolve the host-side port docker mapped to the registry's
|
||||
container port. `docker port <name> 5000/tcp` returns one or
|
||||
more `host:port` lines (one per address family) — we take the
|
||||
first."""
|
||||
r = subprocess.run(
|
||||
["docker", "port", name, f"{_REGISTRY_CONTAINER_PORT}/tcp"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False,
|
||||
)
|
||||
if r.returncode != 0:
|
||||
die(
|
||||
f"docker port {name} {_REGISTRY_CONTAINER_PORT}/tcp failed: "
|
||||
f"{(r.stderr or '').strip() or '<no stderr>'}"
|
||||
)
|
||||
# `0.0.0.0:54321\n[::]:54321\n` — split on the last colon to
|
||||
# handle either IPv4 or IPv6 host syntax.
|
||||
line = (r.stdout or "").splitlines()[0].strip()
|
||||
_, _, port_str = line.rpartition(":")
|
||||
try:
|
||||
return int(port_str)
|
||||
except ValueError:
|
||||
die(f"unexpected `docker port` output: {line!r}")
|
||||
return -1 # unreachable; die() never returns
|
||||
|
||||
|
||||
def _wait_ready(port: int) -> None:
|
||||
"""Block until the registry's HTTP layer accepts a TCP
|
||||
connection on `127.0.0.1:<port>`, or `_READY_TIMEOUT_S`
|
||||
elapses.
|
||||
|
||||
A successful TCP connect is sufficient — registry:2.8.3 binds
|
||||
after it's ready to serve `/v2/` requests, so the push that
|
||||
follows will land on a working server. We probe loopback
|
||||
specifically (not via the docker network) because this helper
|
||||
runs on the host."""
|
||||
deadline = time.monotonic() + _READY_TIMEOUT_S
|
||||
last_err: Exception | None = None
|
||||
while time.monotonic() < deadline:
|
||||
try:
|
||||
with socket.create_connection(("127.0.0.1", port), timeout=0.5):
|
||||
return
|
||||
except OSError as e:
|
||||
last_err = e
|
||||
time.sleep(0.1)
|
||||
die(
|
||||
f"local registry on 127.0.0.1:{port} did not accept "
|
||||
f"connections within {_READY_TIMEOUT_S:.0f}s "
|
||||
f"(last error: {last_err})"
|
||||
)
|
||||
@@ -0,0 +1,254 @@
|
||||
"""Per-bottle loopback alias allocation + TSI allowlist
|
||||
enforcement (PRD 0023, follow-up to PR #74).
|
||||
|
||||
After the pivot to host-loopback port-forwards, the smolmachines
|
||||
TSI allowlist was `127.0.0.1/32` — which meant the agent VM could
|
||||
reach **any** service bound to macOS's loopback, not just the
|
||||
bundle's published ports. Real downgrade from the docker
|
||||
backend's `--internal` network isolation.
|
||||
|
||||
This module narrows the allowlist by allocating each bottle a
|
||||
unique loopback alias (`127.0.0.16` .. `127.0.0.31`). The
|
||||
bundle's port-forwards bind to that alias, and the alias's /32
|
||||
is what TSI allows.
|
||||
|
||||
**Smolvm 0.8.0 quirk + workaround.** `smolvm machine create
|
||||
--from <smolmachine> --net --allow-cidr X/32` silently drops the
|
||||
flag — verified empirically that the agent process's allowlist
|
||||
ends up `null` in smolvm's persistent state DB (`~/Library/
|
||||
Application Support/smolvm/server/smolvm.db`, `vms` table,
|
||||
`data` BLOB), and the booted VM reaches all of `127.0.0.0/8`
|
||||
regardless of what we passed. Workaround: after machine_create,
|
||||
open the SQLite DB and patch the row's `allowed_cidrs` field
|
||||
directly. Smolvm reads the DB at machine_start, so the patched
|
||||
value takes effect on boot. Tested: enforcement is real — the
|
||||
guest's connect to a non-allowlisted IP fails with `Permission
|
||||
denied`. Other paths we tried (machine update, stop-edit-
|
||||
agent.config.json-restart, --smolfile, --image localhost:N/...)
|
||||
were dead ends.
|
||||
|
||||
macOS only configures `127.0.0.1` on `lo0` by default; the
|
||||
additional aliases require `sudo ifconfig lo0 alias`. We lazily
|
||||
sudo-add the missing pool on first use per boot — the aliases
|
||||
persist on `lo0` until reboot, so subsequent launches don't
|
||||
prompt.
|
||||
|
||||
Linux native daemons share the host's network namespace; the
|
||||
whole `127.0.0.0/8` is reachable by default and aliases are
|
||||
unnecessary. The pool logic detects native-Linux and skips sudo
|
||||
entirely; the DB patch is also gated on macOS.
|
||||
|
||||
Allocation is coordinated by inspecting running bundle
|
||||
containers' published host IPs — each bottle's bundle owns the
|
||||
alias appearing in its port bindings. The lowest-numbered free
|
||||
alias gets handed to a new bottle."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import platform
|
||||
import re
|
||||
import sqlite3
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import Iterable
|
||||
|
||||
from ...log import die, info
|
||||
|
||||
|
||||
# smolvm's persistent VM state on macOS — a SQLite DB whose `vms`
|
||||
# table holds one JSON BLOB per machine. The Linux path is
|
||||
# different, but smolmachines is macOS-only in v1 (PRD 0023) so
|
||||
# we hard-code this. If the file moves under us we'll see a
|
||||
# clear FileNotFoundError; not worth defensive cross-platform
|
||||
# detection until the backend actually needs Linux.
|
||||
_SMOLVM_DB_PATH = (
|
||||
Path.home()
|
||||
/ "Library"
|
||||
/ "Application Support"
|
||||
/ "smolvm"
|
||||
/ "server"
|
||||
/ "smolvm.db"
|
||||
)
|
||||
|
||||
|
||||
# Sixteen aliases by default. Tunable for hosts that want more
|
||||
# concurrent bottles (each bottle reserves one alias for its
|
||||
# bundle bringup). The range is chosen to avoid the reserved
|
||||
# 127.0.0.1/2/3 ports (1 is the default, 2 is sometimes used by
|
||||
# CUPS, 3 by other macOS services) and stay well clear of
|
||||
# 127.0.0.53 (systemd-resolved) and 127.0.0.54 (libvirt).
|
||||
_POOL_START = 16
|
||||
_POOL_END = 31 # inclusive
|
||||
|
||||
|
||||
# Loopback aliases pool: 127.0.0.<start>..127.0.0.<end>.
|
||||
def _pool_addresses() -> list[str]:
|
||||
return [f"127.0.0.{i}" for i in range(_POOL_START, _POOL_END + 1)]
|
||||
|
||||
|
||||
def _is_macos() -> bool:
|
||||
return platform.system() == "Darwin"
|
||||
|
||||
|
||||
def ensure_pool() -> None:
|
||||
"""Make sure each address in the pool is up on `lo0`. Lazily
|
||||
runs `sudo ifconfig lo0 alias <ip>/32 up` for missing entries
|
||||
(sudo prompts once, then the aliases persist on lo0 until
|
||||
reboot). No-op on non-macOS hosts."""
|
||||
if not _is_macos():
|
||||
return
|
||||
missing = [ip for ip in _pool_addresses() if not _alias_present(ip)]
|
||||
if not missing:
|
||||
return
|
||||
info(
|
||||
f"smolmachines needs {len(missing)} loopback alias(es) on lo0 "
|
||||
f"({', '.join(missing[:3])}{', ...' if len(missing) > 3 else ''}) "
|
||||
f"to scope per-bottle TSI allowlists. sudo will prompt once; "
|
||||
f"aliases persist until reboot."
|
||||
)
|
||||
for ip in missing:
|
||||
result = subprocess.run(
|
||||
["sudo", "-p", "bot-bottle (loopback alias): ",
|
||||
"ifconfig", "lo0", "alias", f"{ip}/32", "up"],
|
||||
check=False,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
die(
|
||||
f"sudo ifconfig lo0 alias {ip} failed (exit "
|
||||
f"{result.returncode}). Re-run with sudo available, "
|
||||
f"or add manually: sudo ifconfig lo0 alias {ip}/32 up"
|
||||
)
|
||||
|
||||
|
||||
def force_allowlist(machine_name: str, allowed_cidrs: list[str]) -> None:
|
||||
"""Patch smolvm's persistent VM-state DB to set the machine's
|
||||
`allowed_cidrs` to the given list. Workaround for smolvm
|
||||
0.8.0's silent-drop of `--allow-cidr` when used with `--from`.
|
||||
|
||||
Must run AFTER `smolvm machine create` (the row has to
|
||||
exist) and BEFORE `smolvm machine start` (smolvm reads the
|
||||
row on start; in-flight VMs don't pick up changes). Once
|
||||
smolvm honors the CLI flag upstream this whole function is
|
||||
redundant — flag-respecting create + remove this call from
|
||||
launch.
|
||||
|
||||
No-op on non-macOS — the DB path differs and the Linux
|
||||
smolmachines code path isn't exercised in v1."""
|
||||
if not _is_macos():
|
||||
return
|
||||
if not _SMOLVM_DB_PATH.is_file():
|
||||
die(
|
||||
f"smolvm state DB not found at {_SMOLVM_DB_PATH}. "
|
||||
f"smolvm 0.8.0 expected? `smolvm --version` to check."
|
||||
)
|
||||
con = sqlite3.connect(str(_SMOLVM_DB_PATH))
|
||||
try:
|
||||
cur = con.cursor()
|
||||
row = cur.execute(
|
||||
"SELECT data FROM vms WHERE name = ?", (machine_name,),
|
||||
).fetchone()
|
||||
if row is None:
|
||||
die(
|
||||
f"smolvm DB has no row for machine {machine_name!r} — "
|
||||
f"machine_create must run before force_allowlist."
|
||||
)
|
||||
cfg = json.loads(row[0])
|
||||
cfg["allowed_cidrs"] = list(allowed_cidrs)
|
||||
# Write as BLOB (the column type smolvm uses) — passing a
|
||||
# plain str makes sqlite store it as Text and smolvm then
|
||||
# fails to read it.
|
||||
cur.execute(
|
||||
"UPDATE vms SET data = ? WHERE name = ?",
|
||||
(sqlite3.Binary(json.dumps(cfg).encode()), machine_name),
|
||||
)
|
||||
con.commit()
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
|
||||
def allocate(slug: str) -> str:
|
||||
"""Pick the lowest-numbered alias from the pool not already
|
||||
in use by a running smolmachines bundle. Bails when the pool
|
||||
is exhausted — the caller should report the limit to the
|
||||
operator. `slug` is logged for traceability; not otherwise
|
||||
used (no on-disk reservation, allocation is purely
|
||||
docker-state-driven).
|
||||
|
||||
On non-macOS the whole `127.0.0.0/8` is loopback by default;
|
||||
`127.0.0.1` is fine to share and we skip the alias dance.
|
||||
This still returns a deterministic address so launch.py's
|
||||
callers don't have to branch on platform."""
|
||||
if not _is_macos():
|
||||
return "127.0.0.1"
|
||||
in_use = _aliases_in_use()
|
||||
for ip in _pool_addresses():
|
||||
if ip not in in_use:
|
||||
return ip
|
||||
die(
|
||||
f"smolmachines loopback alias pool exhausted "
|
||||
f"({_POOL_END - _POOL_START + 1} aliases, all in use). "
|
||||
f"Stop a running bottle (`smolvm machine ls --json`) or "
|
||||
f"raise _POOL_END in loopback_alias.py."
|
||||
)
|
||||
return "" # unreachable; die() never returns
|
||||
|
||||
|
||||
def _alias_present(ip: str) -> bool:
|
||||
"""True iff `ifconfig lo0` shows `<ip>` as an inet address.
|
||||
Exact-match — `127.0.0.1` shouldn't match `127.0.0.16`."""
|
||||
result = subprocess.run(
|
||||
["/sbin/ifconfig", "lo0"],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
return False
|
||||
pattern = re.compile(rf"\binet {re.escape(ip)}\b")
|
||||
return bool(pattern.search(result.stdout or ""))
|
||||
|
||||
|
||||
def _aliases_in_use() -> set[str]:
|
||||
"""Aliases already bound by another smolmachines bundle's
|
||||
published-port mappings. We inspect every container whose
|
||||
name matches the smolmachines bundle prefix and pull the
|
||||
`HostIp` out of its port bindings."""
|
||||
result = subprocess.run(
|
||||
["docker", "ps", "--format", "{{.Names}}",
|
||||
"--filter", "name=bot-bottle-sidecars-"],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
return set()
|
||||
names = [n.strip() for n in (result.stdout or "").splitlines() if n.strip()]
|
||||
in_use: set[str] = set()
|
||||
for name in names:
|
||||
in_use.update(_host_ips_for_container(name))
|
||||
return in_use
|
||||
|
||||
|
||||
def _host_ips_for_container(name: str) -> Iterable[str]:
|
||||
"""Yield the `HostIp` values across all port bindings on
|
||||
container `name`. A bundle binds three or four ports and
|
||||
they all share the same HostIp, so callers can take any."""
|
||||
result = subprocess.run(
|
||||
["docker", "inspect", name,
|
||||
"--format", "{{json .HostConfig.PortBindings}}"],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
return ()
|
||||
try:
|
||||
bindings = json.loads(result.stdout or "{}")
|
||||
except json.JSONDecodeError:
|
||||
return ()
|
||||
seen: set[str] = set()
|
||||
for _port, mappings in (bindings or {}).items():
|
||||
for m in mappings or []:
|
||||
host_ip = m.get("HostIp") or ""
|
||||
if host_ip:
|
||||
seen.add(host_ip)
|
||||
return seen
|
||||
|
||||
|
||||
__all__ = ["allocate", "ensure_pool", "force_allowlist"]
|
||||
@@ -0,0 +1,192 @@
|
||||
"""smolmachines `_resolve_plan` (PRD 0023 chunks 2d + 4c).
|
||||
|
||||
Resolves the per-bottle docker subnet + bundle IP and assembles
|
||||
the guest env. The agent's docker image build → smolmachine
|
||||
pack pipeline runs in `launch.launch`, not here, so the
|
||||
dashboard's preflight modal isn't garbled by docker-build output
|
||||
before the operator has confirmed.
|
||||
|
||||
No VM bringup — that's `launch.launch`'s job."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
from ...agent_provider import runtime_for
|
||||
from ...backend import BottleSpec
|
||||
from ...backend.docker.bottle_state import (
|
||||
BottleMetadata,
|
||||
agent_state_dir,
|
||||
bottle_identity,
|
||||
egress_state_dir,
|
||||
git_gate_state_dir,
|
||||
pipelock_state_dir,
|
||||
supervise_state_dir,
|
||||
write_metadata,
|
||||
)
|
||||
from ...egress import Egress
|
||||
from ...git_gate import GitGate
|
||||
from ...pipelock import PipelockProxy
|
||||
from ...supervise import Supervise
|
||||
from .bottle_plan import SmolmachinesBottlePlan
|
||||
from .util import smolmachines_bundle_subnet, smolmachines_preflight
|
||||
|
||||
|
||||
# Gateway ports the bundle exposes inside its container — pipelock
|
||||
# HTTPS proxy, git-gate's git-daemon, supervise's MCP. The agent
|
||||
# inside the smolvm guest dials these on the bundle's pinned IP.
|
||||
_BUNDLE_PIPELOCK_PORT = 8888
|
||||
_BUNDLE_GIT_GATE_PORT = 9418
|
||||
_BUNDLE_SUPERVISE_PORT = 9100
|
||||
|
||||
|
||||
def resolve_plan(
|
||||
spec: BottleSpec, *, stage_dir: Path
|
||||
) -> SmolmachinesBottlePlan:
|
||||
"""Materialize the smolmachines plan. The bundle's docker
|
||||
subnet + pinned IP are derived from the slug; the agent's
|
||||
`.smolmachine` artifact is built (or cache-hit) here so
|
||||
launch's `machine create --from` boots without a registry
|
||||
pull. Per-bottle guest env + the TSI allow_cidrs land on the
|
||||
plan for launch to pass straight through to
|
||||
`machine create` flags."""
|
||||
smolmachines_preflight()
|
||||
|
||||
manifest = spec.manifest
|
||||
bottle = manifest.bottle_for(spec.agent_name)
|
||||
provider = bottle.agent_provider
|
||||
provider_runtime = runtime_for(provider.template)
|
||||
|
||||
slug = spec.identity or bottle_identity(spec.agent_name)
|
||||
|
||||
# Record minimal metadata so `cli.py resume` can recover the
|
||||
# slug. Same schema as the docker backend.
|
||||
write_metadata(BottleMetadata(
|
||||
identity=slug,
|
||||
agent_name=spec.agent_name,
|
||||
cwd=spec.user_cwd if spec.copy_cwd else "",
|
||||
copy_cwd=spec.copy_cwd,
|
||||
started_at=datetime.now(timezone.utc).isoformat(),
|
||||
# No compose project for smolmachines bottles; chunk 4
|
||||
# will give dashboard discovery a backend-specific path.
|
||||
compose_project="",
|
||||
))
|
||||
|
||||
subnet, gateway, bundle_ip = smolmachines_bundle_subnet(slug)
|
||||
|
||||
# Agent's env: the prepare-time view doesn't yet know the
|
||||
# host loopback ports the bundle's daemons get published on
|
||||
# (those come from docker AFTER `docker run` returns), so
|
||||
# HTTPS_PROXY / GIT_GATE_URL / MCP_SUPERVISE_URL are
|
||||
# populated in launch.py and stamped onto guest_env there.
|
||||
# What we set here is the part that doesn't depend on
|
||||
# bundle bringup — bottle.env literals, the empty-NO_PROXY
|
||||
# safe default, and the TLS trust env trio
|
||||
# (NODE_EXTRA_CA_CERTS / SSL_CERT_FILE / REQUESTS_CA_BUNDLE)
|
||||
# pointing at Debian's update-ca-certificates output bundle.
|
||||
guest_env: dict[str, str] = {
|
||||
**bottle.env,
|
||||
"NO_PROXY": "localhost,127.0.0.1",
|
||||
"NODE_EXTRA_CA_CERTS": "/etc/ssl/certs/ca-certificates.crt",
|
||||
"SSL_CERT_FILE": "/etc/ssl/certs/ca-certificates.crt",
|
||||
"REQUESTS_CA_BUNDLE": "/etc/ssl/certs/ca-certificates.crt",
|
||||
}
|
||||
|
||||
# Inner Plans for the four bundle daemons. The ABCs are
|
||||
# platform-neutral — `.prepare()` writes config files + returns
|
||||
# a Plan dataclass with no backend-specific assumptions. State
|
||||
# dirs are still keyed by slug under the docker backend's
|
||||
# bottle_state layout (shared on-host convention; not a docker
|
||||
# dependency).
|
||||
pipelock_dir = pipelock_state_dir(slug)
|
||||
pipelock_dir.mkdir(parents=True, exist_ok=True)
|
||||
proxy_plan = PipelockProxy().prepare(bottle, slug, pipelock_dir)
|
||||
|
||||
git_gate_dir = git_gate_state_dir(slug)
|
||||
git_gate_dir.mkdir(parents=True, exist_ok=True)
|
||||
git_gate_plan = GitGate().prepare(bottle, slug, git_gate_dir)
|
||||
|
||||
egress_dir = egress_state_dir(slug)
|
||||
egress_dir.mkdir(parents=True, exist_ok=True)
|
||||
egress_plan = Egress().prepare(bottle, slug, egress_dir)
|
||||
|
||||
# Claude-code refuses to start without *something* it
|
||||
# recognises as a credential. When the bottle has an egress
|
||||
# route carrying the `claude_code_oauth` role marker, egress
|
||||
# strips + re-injects the real Authorization header on the
|
||||
# outbound leg using a token held in egress's own environ — so
|
||||
# the agent gets a non-secret placeholder here (matches the
|
||||
# docker backend's forwarded_env logic in
|
||||
# bot_bottle/backend/docker/prepare.py).
|
||||
has_provider_auth = any(
|
||||
provider_runtime.auth_role in r.roles for r in egress_plan.routes
|
||||
)
|
||||
if has_provider_auth:
|
||||
guest_env[provider_runtime.placeholder_env] = "egress-placeholder"
|
||||
if provider.template == "claude" and has_provider_auth:
|
||||
guest_env.setdefault("CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC", "1")
|
||||
guest_env.setdefault("DISABLE_ERROR_REPORTING", "1")
|
||||
|
||||
supervise_plan = None
|
||||
if bottle.supervise:
|
||||
supervise_dir = supervise_state_dir(slug)
|
||||
supervise_dir.mkdir(parents=True, exist_ok=True)
|
||||
supervise_plan = Supervise().prepare(slug, supervise_dir)
|
||||
|
||||
# Prompt file is always written (mode 0o600) so the in-VM
|
||||
# path always exists. Content is the agent's `prompt`
|
||||
# field (markdown body) — empty for agents with no prompt.
|
||||
# claude-code reads it via --append-system-prompt-file only
|
||||
# when non-empty, but the file must exist either way to
|
||||
# match the docker backend's contract.
|
||||
agent_dir = agent_state_dir(slug)
|
||||
agent_dir.mkdir(parents=True, exist_ok=True)
|
||||
prompt_file = agent_dir / "prompt.txt"
|
||||
agent = manifest.agents[spec.agent_name]
|
||||
prompt_file.write_text(agent.prompt or "")
|
||||
prompt_file.chmod(0o600)
|
||||
|
||||
machine_name = f"bot-bottle-{slug}"
|
||||
# Stash the agent image ref — `launch.launch` runs the
|
||||
# build → pack pipeline at bringup. Honors BOT_BOTTLE_IMAGE
|
||||
# to match the docker backend's `resolve_plan` default.
|
||||
agent_dockerfile_path = ""
|
||||
if provider.dockerfile:
|
||||
agent_dockerfile_path = _resolve_manifest_dockerfile(provider.dockerfile, spec)
|
||||
image_default = f"bot-bottle-{provider.template}:{slug}"
|
||||
elif provider_runtime.dockerfile:
|
||||
agent_dockerfile_path = provider_runtime.dockerfile
|
||||
image_default = provider_runtime.image
|
||||
else:
|
||||
image_default = provider_runtime.image
|
||||
agent_image_ref = os.environ.get("BOT_BOTTLE_IMAGE", image_default)
|
||||
|
||||
return SmolmachinesBottlePlan(
|
||||
spec=spec,
|
||||
stage_dir=stage_dir,
|
||||
slug=slug,
|
||||
bundle_subnet=subnet,
|
||||
bundle_gateway=gateway,
|
||||
bundle_ip=bundle_ip,
|
||||
machine_name=machine_name,
|
||||
agent_image_ref=agent_image_ref,
|
||||
guest_env=guest_env,
|
||||
prompt_file=prompt_file,
|
||||
proxy_plan=proxy_plan,
|
||||
git_gate_plan=git_gate_plan,
|
||||
egress_plan=egress_plan,
|
||||
supervise_plan=supervise_plan,
|
||||
agent_command=provider_runtime.command,
|
||||
agent_prompt_mode=provider_runtime.prompt_mode,
|
||||
agent_provider_template=provider.template,
|
||||
agent_dockerfile_path=agent_dockerfile_path,
|
||||
)
|
||||
|
||||
|
||||
def _resolve_manifest_dockerfile(path_value: str, spec: BottleSpec) -> str:
|
||||
path = Path(os.path.expanduser(path_value))
|
||||
if not path.is_absolute():
|
||||
path = Path(spec.user_cwd) / path
|
||||
return str(path)
|
||||
@@ -0,0 +1,14 @@
|
||||
"""Provisioning helpers for the smolmachines backend (PRD 0023
|
||||
chunk 4).
|
||||
|
||||
Each method maps onto one of `BottleBackend`'s `provision_*`
|
||||
overrides. They run after the VM is up + the bundle is reachable
|
||||
and copy host-side state (prompt, skills, .git, CA cert,
|
||||
supervise MCP config) into the guest via `smolvm machine cp` /
|
||||
`smolvm machine exec`.
|
||||
|
||||
Chunk 4a ships `provision_prompt` and `provision_skills` — the
|
||||
two that don't depend on agent-image tooling (claude-code,
|
||||
update-ca-certificates) beyond `cp` and `mkdir`. provision_ca /
|
||||
provision_git / provision_supervise land once the agent-image
|
||||
gap is solved."""
|
||||
@@ -0,0 +1,104 @@
|
||||
"""Install the per-bottle MITM CA into the smolmachines guest's
|
||||
trust store (PRD 0023 chunk 4d).
|
||||
|
||||
Mirrors `backend.docker.provision.ca`: select the right CA (egress
|
||||
when the bottle has routes, else pipelock), `smolvm machine cp` it
|
||||
to Debian's `/usr/local/share/ca-certificates/` path,
|
||||
`update-ca-certificates` to rebuild the trust bundle, and log the
|
||||
fingerprint once. The selected cert depends on the agent's
|
||||
HTTP_PROXY target — same logic as the docker backend, since the
|
||||
agent dials the same daemons through the same bundle.
|
||||
|
||||
`smolvm machine exec` runs commands as root in the VM (no `-u`
|
||||
flag exists; the VM init is root), so we don't need the explicit
|
||||
`-u 0` the docker backend uses on its `docker exec` calls."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import ssl
|
||||
from pathlib import Path
|
||||
|
||||
from ....log import die, info
|
||||
from ...docker.provision.ca import AGENT_CA_BUNDLE, AGENT_CA_PATH
|
||||
from .. import smolvm as _smolvm
|
||||
from ..bottle_plan import SmolmachinesBottlePlan
|
||||
|
||||
|
||||
def _select_ca_cert(plan: SmolmachinesBottlePlan) -> tuple[Path, str]:
|
||||
"""Pick the CA cert (and a short label for the log line) that
|
||||
matches the proxy the agent's HTTP_PROXY points at. Egress-proxy
|
||||
wins when the bottle declares any routes; else pipelock.
|
||||
|
||||
The launch step minted both CAs (pipelock always; egress when
|
||||
routes are declared) and stored their host paths back into the
|
||||
inner Plans via `dataclasses.replace`. If those paths are empty
|
||||
here something has gone wrong in launch's bringup."""
|
||||
if plan.egress_plan.routes:
|
||||
cert = plan.egress_plan.mitmproxy_ca_cert_only_host_path
|
||||
if cert == Path() or not cert.is_file():
|
||||
die(
|
||||
f"egress CA cert missing at {cert or '(empty)'}; "
|
||||
f"launch must have called egress_tls_init and "
|
||||
f"re-bound the plan before provision"
|
||||
)
|
||||
return cert, "egress"
|
||||
cert = plan.proxy_plan.ca_cert_host_path
|
||||
if not cert or not cert.is_file():
|
||||
die(
|
||||
f"pipelock CA cert missing at {cert or '(empty)'}; "
|
||||
f"launch must have called pipelock_tls_init and re-bound "
|
||||
f"the plan before provision"
|
||||
)
|
||||
return cert, "pipelock"
|
||||
|
||||
|
||||
def provision_ca(plan: SmolmachinesBottlePlan, target: str) -> None:
|
||||
"""Copy the agent-facing CA cert into the guest, rebuild the
|
||||
trust bundle, emit a one-line fingerprint log. Called from
|
||||
`BottleBackend.provision` after the smolvm guest is up."""
|
||||
cert_host_path, label = _select_ca_cert(plan)
|
||||
|
||||
_smolvm.machine_cp(str(cert_host_path), f"{target}:{AGENT_CA_PATH}")
|
||||
# Mode 0644 — readable to non-root tools in the guest.
|
||||
# update-ca-certificates rebuilds the bundle at AGENT_CA_BUNDLE,
|
||||
# which is what curl / Python ssl / OpenSSL-based tools read by
|
||||
# default. The env trio (NODE_EXTRA_CA_CERTS / SSL_CERT_FILE /
|
||||
# REQUESTS_CA_BUNDLE) on the guest_env covers Node + Python
|
||||
# `requests` / libraries that don't load the system bundle.
|
||||
#
|
||||
# chown + chmod + update-ca-certificates run in one
|
||||
# `sh -c` so we only pay one machine_exec round trip; the
|
||||
# `&&` chaining surfaces the first failure as the return
|
||||
# code.
|
||||
r = _smolvm.machine_exec(target, [
|
||||
"sh", "-c",
|
||||
f"chown root:root {AGENT_CA_PATH} && "
|
||||
f"chmod 644 {AGENT_CA_PATH} && "
|
||||
f"update-ca-certificates",
|
||||
])
|
||||
if r.returncode != 0 or "1 added" not in (r.stdout or ""):
|
||||
# update-ca-certificates not adding our cert is fatal —
|
||||
# claude-code's TLS handshake against the egress-MITM'd
|
||||
# api.anthropic.com would fail downstream. Bail early
|
||||
# with what we can see (output is captured by smolvm so
|
||||
# we can surface it).
|
||||
die(
|
||||
f"update-ca-certificates didn't add the agent CA "
|
||||
f"(exit {r.returncode}): "
|
||||
f"stdout={(r.stdout or '').strip()!r} "
|
||||
f"stderr={(r.stderr or '').strip()!r}"
|
||||
)
|
||||
|
||||
# Stdlib SHA-256 of the cert's DER bytes — the standard
|
||||
# fingerprint form. Never the private key.
|
||||
der = ssl.PEM_cert_to_DER_cert(cert_host_path.read_text())
|
||||
fingerprint = hashlib.sha256(der).hexdigest()
|
||||
info(f"{label} ca fingerprint: sha256:{fingerprint[:32]}...")
|
||||
|
||||
|
||||
# Re-exported for the launch/provision_ca caller + tests. The path
|
||||
# constants come from the docker module because they're tied to
|
||||
# Debian's `update-ca-certificates` layout — same in both backends
|
||||
# since both guest images are Debian-family.
|
||||
__all__ = ["AGENT_CA_BUNDLE", "AGENT_CA_PATH", "provision_ca"]
|
||||
@@ -0,0 +1,141 @@
|
||||
"""Git provisioning inside a running smolmachines bottle
|
||||
(PRD 0023 chunk 4d).
|
||||
|
||||
Three concerns, all about git in the agent:
|
||||
|
||||
1. If --cwd was passed AND the host cwd has a .git, copy that
|
||||
.git into /home/node/workspace/.git so the agent operates on
|
||||
the user's repo.
|
||||
2. If the bottle declares `git` entries (PRD 0008), write a
|
||||
~/.gitconfig with insteadOf rules so every git operation
|
||||
against a declared upstream transparently hits the per-bottle
|
||||
git-gate. The gate mirrors the upstream in both directions,
|
||||
so URL rewriting is symmetric.
|
||||
3. If the bottle declares `git.user` (issue #86), set
|
||||
`git config --global user.{name,email}` inside the guest so
|
||||
the agent's commits are attributed to that identity.
|
||||
|
||||
Differs from `backend.docker.provision.git` in one address detail:
|
||||
the TSI-allowlisted guest can only reach the bundle's pinned IP
|
||||
(no DNS resolver in the /32 allowlist), so the insteadOf URLs
|
||||
are `git://<bundle_ip>:<port>/<name>.git` rather than the
|
||||
docker backend's `git://git-gate/<name>.git`. The render itself
|
||||
is the shared `git_gate_render_gitconfig` on the platform-neutral
|
||||
git_gate module."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
from ....git_gate import git_gate_render_gitconfig
|
||||
from ....log import info
|
||||
from .. import smolvm as _smolvm
|
||||
from ..bottle_plan import SmolmachinesBottlePlan
|
||||
|
||||
|
||||
# `node` is the agent user from the repo Dockerfile. Override via
|
||||
# BOT_BOTTLE_GUEST_HOME mirrors the docker backend's
|
||||
# BOT_BOTTLE_CONTAINER_HOME knob — same purpose, different
|
||||
# transport.
|
||||
_DEFAULT_GUEST_HOME = "/home/node"
|
||||
|
||||
|
||||
def _guest_home() -> str:
|
||||
return os.environ.get("BOT_BOTTLE_GUEST_HOME", _DEFAULT_GUEST_HOME)
|
||||
|
||||
|
||||
def provision_git(plan: SmolmachinesBottlePlan, target: str) -> None:
|
||||
"""Set up git inside the guest. Runs all three subcases; each
|
||||
no-ops when its condition isn't met."""
|
||||
_provision_cwd_git(plan, target)
|
||||
_provision_git_gate_config(plan, target)
|
||||
_provision_git_user(plan, target)
|
||||
|
||||
|
||||
def _provision_cwd_git(plan: SmolmachinesBottlePlan, target: str) -> None:
|
||||
"""If --cwd was set and the host cwd has a .git directory, copy
|
||||
it into <guest_home>/workspace/.git and fix ownership. No-op
|
||||
otherwise."""
|
||||
if not (plan.spec.copy_cwd and Path(plan.spec.user_cwd, ".git").is_dir()):
|
||||
return
|
||||
guest_workspace_git = f"{_guest_home()}/workspace/.git"
|
||||
info(f"copying {plan.spec.user_cwd}/.git -> {target}:{guest_workspace_git}")
|
||||
# mkdir -p the workspace dir so `machine cp` lands the .git
|
||||
# directly there even on first-time bottles.
|
||||
_smolvm.machine_exec(target, ["mkdir", "-p", f"{_guest_home()}/workspace"])
|
||||
_smolvm.machine_cp(
|
||||
f"{plan.spec.user_cwd}/.git", f"{target}:{guest_workspace_git}",
|
||||
)
|
||||
# `machine cp` lands files as root; the agent runs as node so
|
||||
# the workspace tree must be chowned over.
|
||||
_smolvm.machine_exec(
|
||||
target, ["chown", "-R", "node:node", guest_workspace_git],
|
||||
)
|
||||
|
||||
|
||||
def _provision_git_gate_config(plan: SmolmachinesBottlePlan, target: str) -> None:
|
||||
"""Write ~/.gitconfig in the guest with the git-gate insteadOf
|
||||
rules. No-op when the bottle has no `git` entries."""
|
||||
bottle = plan.spec.manifest.bottle_for(plan.spec.agent_name)
|
||||
if not bottle.git:
|
||||
return
|
||||
|
||||
# `127.0.0.1:<host port>` form: the bundle's git-gate port
|
||||
# is published on host loopback at launch time so the
|
||||
# smolvm guest (which can only reach macOS networking via
|
||||
# TSI, not the docker bridge IP) can dial it. launch.py
|
||||
# populates `plan.agent_git_gate_host` after bundle bringup.
|
||||
content = git_gate_render_gitconfig(bottle.git, plan.agent_git_gate_host)
|
||||
|
||||
guest_gitconfig = f"{_guest_home()}/.gitconfig"
|
||||
# Stage the file under the plan's stage_dir so `machine cp`
|
||||
# has a stable host path. The plan's stage_dir is cleaned up
|
||||
# by start.py's session-end teardown.
|
||||
with tempfile.NamedTemporaryFile(
|
||||
"w", dir=str(plan.stage_dir), prefix="gitconfig.",
|
||||
delete=False,
|
||||
) as f:
|
||||
f.write(content)
|
||||
config_file = Path(f.name)
|
||||
os.chmod(config_file, 0o600)
|
||||
|
||||
info(f"writing {guest_gitconfig} with {len(bottle.git)} insteadOf rule(s)")
|
||||
_smolvm.machine_cp(str(config_file), f"{target}:{guest_gitconfig}")
|
||||
_smolvm.machine_exec(target, ["chown", "node:node", guest_gitconfig])
|
||||
_smolvm.machine_exec(target, ["chmod", "644", guest_gitconfig])
|
||||
|
||||
|
||||
def _provision_git_user(
|
||||
plan: SmolmachinesBottlePlan, target: str,
|
||||
) -> None:
|
||||
"""Apply `git config --global user.{name,email}` inside the
|
||||
guest as the node user so --global lands in the same
|
||||
`/home/node/.gitconfig` that `_provision_git_gate_config`
|
||||
writes to. No-op when the bottle didn't declare `git.user`.
|
||||
|
||||
Runs via `runuser -u node --`; HOME is forced via smolvm's
|
||||
`-e` flag because runuser (without -l) inherits root's
|
||||
HOME=/root, which would put --global in the wrong file."""
|
||||
bottle = plan.spec.manifest.bottle_for(plan.spec.agent_name)
|
||||
gu = bottle.git_user
|
||||
if gu.is_empty():
|
||||
return
|
||||
env = {"HOME": _guest_home(), "USER": "node"}
|
||||
if gu.name:
|
||||
info(f"git config --global user.name = {gu.name!r}")
|
||||
_smolvm.machine_exec(
|
||||
target,
|
||||
["runuser", "-u", "node", "--",
|
||||
"git", "config", "--global", "user.name", gu.name],
|
||||
env=env,
|
||||
)
|
||||
if gu.email:
|
||||
info(f"git config --global user.email = {gu.email!r}")
|
||||
_smolvm.machine_exec(
|
||||
target,
|
||||
["runuser", "-u", "node", "--",
|
||||
"git", "config", "--global", "user.email", gu.email],
|
||||
env=env,
|
||||
)
|
||||
@@ -0,0 +1,42 @@
|
||||
"""Copy the agent prompt into a running smolmachines bottle.
|
||||
|
||||
The prompt file is always copied (so the in-guest path always
|
||||
exists) but `--append-system-prompt-file` only fires when the
|
||||
agent actually has a prompt — the return value signals which
|
||||
case, mirroring the docker backend's contract.
|
||||
|
||||
`smolvm machine cp` lands files as root inside the VM; the claude
|
||||
process runs as `node`, so we chown + chmod the prompt after the
|
||||
copy. Same flow as the docker backend's provision_prompt."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
|
||||
from .. import smolvm as _smolvm
|
||||
from ..bottle_plan import SmolmachinesBottlePlan
|
||||
|
||||
|
||||
# `node` is the agent user from the repo Dockerfile.
|
||||
# BOT_BOTTLE_GUEST_HOME mirrors the docker backend's
|
||||
# BOT_BOTTLE_CONTAINER_HOME knob.
|
||||
_DEFAULT_GUEST_HOME = "/home/node"
|
||||
|
||||
|
||||
def provision_prompt(plan: SmolmachinesBottlePlan, target: str) -> str | None:
|
||||
"""Copy the prompt file into the running smolvm guest, fix
|
||||
ownership/mode. Returns the in-guest path if the agent has a
|
||||
non-empty prompt (drives --append-system-prompt-file), else
|
||||
None. The file is copied either way so the path always
|
||||
exists — mirrors the docker backend's behavior."""
|
||||
guest_home = os.environ.get("BOT_BOTTLE_GUEST_HOME", _DEFAULT_GUEST_HOME)
|
||||
in_guest_prompt_path = f"{guest_home}/.bot-bottle-prompt.txt"
|
||||
|
||||
_smolvm.machine_cp(str(plan.prompt_file), f"{target}:{in_guest_prompt_path}")
|
||||
# machine cp lands as root, source's 0o600 mode is preserved —
|
||||
# node can't read its own prompt without these two.
|
||||
_smolvm.machine_exec(target, ["chown", "node:node", in_guest_prompt_path])
|
||||
_smolvm.machine_exec(target, ["chmod", "600", in_guest_prompt_path])
|
||||
|
||||
agent = plan.spec.manifest.agents[plan.spec.agent_name]
|
||||
return in_guest_prompt_path if agent.prompt else None
|
||||
@@ -0,0 +1,63 @@
|
||||
"""Copy host-side skill directories into a running smolmachines
|
||||
bottle.
|
||||
|
||||
Skills are validated on the host before launch by
|
||||
`BottleBackend._validate_skills`; this module assumes that
|
||||
validation has already run. A skill that disappears between
|
||||
validation and copy still dies loudly rather than silently
|
||||
producing a partial guest."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
|
||||
from ....log import die, info
|
||||
from ...util import host_skill_dir
|
||||
from .. import smolvm as _smolvm
|
||||
from ..bottle_plan import SmolmachinesBottlePlan
|
||||
|
||||
|
||||
# In-guest path mirrors the docker backend's claude-skills
|
||||
# convention (~/.claude/skills/<name>/) under the node user's
|
||||
# home — same path as the real bot-bottle image's
|
||||
# /home/node/.claude/skills (pre-created in the Dockerfile).
|
||||
_DEFAULT_SKILLS_DIR = "/home/node/.claude/skills"
|
||||
|
||||
|
||||
def provision_skills(plan: SmolmachinesBottlePlan, target: str) -> None:
|
||||
"""Copy each of the agent's named skills from the host's
|
||||
~/.claude/skills/<name>/ into the guest's equivalent path.
|
||||
For each skill: `mkdir -p` the destination, `smolvm machine cp`
|
||||
the host source dir over, then chown the result to node:node so
|
||||
the agent can read it. No-op when the agent has no skills.
|
||||
|
||||
smolvm machine cp on a directory copies recursively (same
|
||||
semantics as `cp -r`); unlike docker cp's trailing-slash
|
||||
convention, smolvm doesn't need the `/.` suffix dance.
|
||||
|
||||
machine cp lands files as root inside the VM, so we chown each
|
||||
skill tree over to node:node after the copy — same pattern as
|
||||
the docker backend's provision_prompt."""
|
||||
agent = plan.spec.manifest.agents[plan.spec.agent_name]
|
||||
if not agent.skills:
|
||||
return
|
||||
|
||||
skills_dir = os.environ.get(
|
||||
"BOT_BOTTLE_GUEST_SKILLS_DIR", _DEFAULT_SKILLS_DIR,
|
||||
)
|
||||
|
||||
_smolvm.machine_exec(target, ["mkdir", "-p", skills_dir])
|
||||
|
||||
for name in agent.skills:
|
||||
src = host_skill_dir(name)
|
||||
if not os.path.isdir(src):
|
||||
die(
|
||||
f"skill {name!r} disappeared from host between "
|
||||
f"validation and copy at {src}."
|
||||
)
|
||||
dst = f"{skills_dir}/{name}"
|
||||
info(f"copying skill {name} into {target}:{dst}")
|
||||
# Wipe any prior copy so re-runs don't accumulate.
|
||||
_smolvm.machine_exec(target, ["rm", "-rf", dst])
|
||||
_smolvm.machine_cp(src, f"{target}:{dst}")
|
||||
_smolvm.machine_exec(target, ["chown", "-R", "node:node", dst])
|
||||
@@ -0,0 +1,67 @@
|
||||
"""Supervise sidecar provisioning inside a running smolmachines
|
||||
bottle (PRD 0023 chunk 4d; PRD 0013 supervise plane).
|
||||
|
||||
Registers the per-bottle supervise sidecar as an HTTP MCP server
|
||||
in the agent's claude-code config so the agent discovers the
|
||||
stuck-recovery MCP tools (pipelock-block, capability-block) at
|
||||
startup.
|
||||
|
||||
Mirrors `backend.docker.provision.supervise` — same `claude mcp
|
||||
add` call, just dispatched via `smolvm machine exec` instead of
|
||||
`docker exec`, and against `<bundle_ip>:<port>` instead of the
|
||||
short `supervise` alias (no DNS in the TSI-allowlisted guest)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from ....log import info, warn
|
||||
from .. import smolvm as _smolvm
|
||||
from ..bottle_plan import SmolmachinesBottlePlan
|
||||
|
||||
|
||||
_SUPERVISE_MCP_NAME = "supervise"
|
||||
|
||||
|
||||
def provision_supervise(plan: SmolmachinesBottlePlan, target: str) -> None:
|
||||
"""Run `claude mcp add` inside the guest to register the
|
||||
supervise sidecar in claude-code's user config. No-op when
|
||||
bottle.supervise is False.
|
||||
|
||||
The URL is the agent-side endpoint launch.py populated after
|
||||
bundle bringup — `http://127.0.0.1:<host port>/` rather than
|
||||
the bundle's docker bridge IP, because that bridge isn't
|
||||
reachable from the smolvm guest on macOS.
|
||||
|
||||
Failure is logged but not fatal: the bottle still works (you
|
||||
just can't call supervise tools from the agent until the entry
|
||||
is added manually). The operator sees the warning at launch."""
|
||||
if plan.supervise_plan is None:
|
||||
return
|
||||
url = plan.agent_supervise_url
|
||||
info(f"registering supervise MCP server in agent claude config → {url}")
|
||||
# `claude mcp add --scope user` writes to ~/.claude.json. The
|
||||
# agent is the `node` user; smolvm machine_exec runs as root
|
||||
# by default, so we have to switch user explicitly and set
|
||||
# HOME so the config lands in /home/node/.claude.json (where
|
||||
# the agent's claude actually reads it from).
|
||||
r = _smolvm.machine_exec(
|
||||
target,
|
||||
[
|
||||
"runuser", "-u", "node", "--",
|
||||
"env", "HOME=/home/node",
|
||||
"claude", "mcp", "add",
|
||||
"--scope", "user",
|
||||
"--transport", "http",
|
||||
_SUPERVISE_MCP_NAME,
|
||||
url,
|
||||
],
|
||||
)
|
||||
if r.returncode != 0:
|
||||
warn(
|
||||
f"`claude mcp add supervise` failed (exit {r.returncode}): "
|
||||
f"{(r.stderr or r.stdout or '').strip()}. Inside the bottle, "
|
||||
f"register manually with: "
|
||||
f"claude mcp add --scope user --transport http supervise {url}"
|
||||
)
|
||||
|
||||
|
||||
__all__ = ["provision_supervise"]
|
||||
@@ -0,0 +1,149 @@
|
||||
"""Host-side SIGWINCH → in-VM PTY resize bridge (issue #82).
|
||||
|
||||
smolvm 0.8.0 `machine exec -t` allocates an in-VM PTY but never
|
||||
forwards the host terminal's window size (TIOCSWINSZ) to it. The
|
||||
PTY's initial size is `0 0`, and any host-side resize during the
|
||||
session goes unnoticed — the in-VM claude TUI keeps rendering for
|
||||
whatever (typically tiny) box it last saw, ignoring the operator's
|
||||
tmux pane resize. `docker exec -it` does this forwarding
|
||||
automatically; smolvm doesn't.
|
||||
|
||||
This module wraps `smolvm machine exec` with a thin parent
|
||||
process that:
|
||||
|
||||
1. Spawns the original argv as a child (it gets the inherited
|
||||
TTY, so claude's stdin/stdout/stderr work unchanged).
|
||||
2. On startup + every host SIGWINCH, reads the host terminal
|
||||
size via TIOCGWINSZ on stdin (or stderr if stdin isn't a
|
||||
TTY — tmux respawn-pane gives us a TTY on stdout/stderr)
|
||||
and pushes it into the VM with a side-channel
|
||||
`smolvm machine exec -- sh -c 'for f in /dev/pts/*; do
|
||||
stty -F $f cols X rows Y; done'`. The kernel delivers
|
||||
SIGWINCH to the foreground process group on the slave end
|
||||
automatically, so claude picks up the new size without
|
||||
extra signalling.
|
||||
3. Waits on the child and exits with its returncode.
|
||||
|
||||
The dashboard's tmux pane respawn calls `bottle.claude_argv`
|
||||
which now prepends `[sys.executable, -m, ..., <machine>, --, ...]`
|
||||
to the smolvm argv. Foreground handoff (curses endwin →
|
||||
subprocess.run) goes through the same path so behavior is
|
||||
identical.
|
||||
|
||||
Removable once smolvm grows native SIGWINCH forwarding (upstream
|
||||
follow-up tracked separately)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import fcntl
|
||||
import signal
|
||||
import struct
|
||||
import subprocess
|
||||
import sys
|
||||
import termios
|
||||
import threading
|
||||
|
||||
|
||||
# How long to wait after the main exec starts before pushing the
|
||||
# initial size. Concurrent `smolvm machine exec` invocations race
|
||||
# libkrun's per-exec OCI config write during the main exec's
|
||||
# bringup window; the side-channel firing immediately corrupts
|
||||
# `config.json` and the main exec dies with SIGKILL (rc=137) or
|
||||
# libkrun's "parse error: trailing garbage" depending on
|
||||
# scheduling. Two seconds is well past the bringup window on a
|
||||
# warm VM, well under the operator's "this is unresponsive"
|
||||
# threshold, and short enough that claude's initial render
|
||||
# almost always fires after the size has been set.
|
||||
_STARTUP_SYNC_DELAY_SEC = 2.0
|
||||
|
||||
|
||||
def _read_winsize() -> tuple[int, int] | None:
|
||||
"""Return `(rows, cols)` from whichever of stdin / stdout /
|
||||
stderr is a TTY, or None if none are. Different invocation
|
||||
surfaces give us different TTYs:
|
||||
|
||||
- foreground handoff (curses endwin → subprocess.run): all
|
||||
three are the operator's terminal.
|
||||
- tmux respawn-pane: tmux sets all three to the pane's PTY.
|
||||
- non-TTY (someone piped stdin in tests): none are; the
|
||||
sync just no-ops, which is the right behavior."""
|
||||
for fd in (sys.stdin.fileno(), sys.stdout.fileno(), sys.stderr.fileno()):
|
||||
try:
|
||||
data = fcntl.ioctl(fd, termios.TIOCGWINSZ, b"\x00" * 8)
|
||||
except OSError:
|
||||
continue
|
||||
rows, cols, _, _ = struct.unpack("hhhh", data)
|
||||
if rows > 0 and cols > 0:
|
||||
return rows, cols
|
||||
return None
|
||||
|
||||
|
||||
def _push_size(machine: str, rows: int, cols: int) -> None:
|
||||
"""Side-channel `smolvm machine exec` that sets the size of
|
||||
every PTY in the VM. The shell `for` loop covers the case of
|
||||
multiple concurrent interactive sessions (rare but cheap to
|
||||
handle); `stty -F` returns silently on PTYs that don't apply.
|
||||
|
||||
Best-effort: swallow failures. A failed resize doesn't break
|
||||
the session — it just leaves the in-VM PTY at its old size.
|
||||
|
||||
`stdin=DEVNULL` is load-bearing: under tmux, inheriting the
|
||||
pane PTY here means two concurrent smolvm processes (this one
|
||||
and the agent session the wrapper is shepherding) share the
|
||||
PTY's foreground-process-group / input plumbing, and smolvm
|
||||
bails with an internal config-parse error or SIGKILL within
|
||||
~100ms of the side-channel firing. Outside tmux the same
|
||||
pattern survived, presumably because iTerm's PTY plumbing is
|
||||
more forgiving than tmux's, but the DEVNULL is the right
|
||||
default either way — the side-channel never needs stdin."""
|
||||
subprocess.run(
|
||||
["smolvm", "machine", "exec", "--name", machine, "--",
|
||||
"sh", "-c",
|
||||
f"for f in /dev/pts/*; do "
|
||||
f"stty -F \"$f\" cols {cols} rows {rows} 2>/dev/null; "
|
||||
f"done"],
|
||||
stdin=subprocess.DEVNULL,
|
||||
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
|
||||
check=False,
|
||||
)
|
||||
|
||||
|
||||
def main(argv: list[str]) -> int:
|
||||
"""Entry point. `argv` shape: `<machine> -- <smolvm-argv...>`.
|
||||
|
||||
We don't use argparse — the `--` separator is the contract and
|
||||
everything past it is forwarded verbatim. Keeps the wrapper
|
||||
transparent for callers building argv programmatically."""
|
||||
if len(argv) < 3 or argv[1] != "--":
|
||||
sys.stderr.write(
|
||||
"usage: python -m bot_bottle.backend.smolmachines.pty_resize "
|
||||
"<machine> -- <smolvm-argv...>\n"
|
||||
)
|
||||
return 2
|
||||
machine = argv[0]
|
||||
inner = argv[2:]
|
||||
|
||||
def sync(*_args) -> None:
|
||||
size = _read_winsize()
|
||||
if size is None:
|
||||
return
|
||||
_push_size(machine, *size)
|
||||
|
||||
signal.signal(signal.SIGWINCH, sync)
|
||||
|
||||
proc = subprocess.Popen(inner)
|
||||
# Initial sync is deferred — see _STARTUP_SYNC_DELAY_SEC.
|
||||
# daemon=True so the timer doesn't block exit when the child
|
||||
# finishes before the delay elapses.
|
||||
timer = threading.Timer(_STARTUP_SYNC_DELAY_SEC, sync)
|
||||
timer.daemon = True
|
||||
timer.start()
|
||||
while True:
|
||||
try:
|
||||
return proc.wait()
|
||||
except KeyboardInterrupt:
|
||||
proc.send_signal(signal.SIGINT)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main(sys.argv[1:]))
|
||||
@@ -0,0 +1,221 @@
|
||||
"""Per-bottle sidecar bundle bringup for the smolmachines backend
|
||||
(PRD 0023).
|
||||
|
||||
Two docker resources per bottle live here:
|
||||
|
||||
- **A dedicated bridge network**, subnet derived from the slug.
|
||||
The bundle container gets a pinned IP at `<subnet>.2` so the
|
||||
smolvm guest's TSI allowlist (`<bundle-ip>/32`) has a stable
|
||||
target. Without pinning, we'd have to inspect the container's
|
||||
assigned IP after start and feed it back into the Smolfile
|
||||
— a race we can sidestep with `--ip`.
|
||||
|
||||
- **The bundle container itself**, running the PRD 0024 bundle
|
||||
image (`bot-bottle-sidecars:latest` by default). Same
|
||||
image, same daemons, same daemon-private env / bind-mounts
|
||||
as the docker backend.
|
||||
|
||||
This module ships the lifecycle primitives only — create
|
||||
network, start bundle, stop bundle, remove network — wrapped
|
||||
around `subprocess.run(["docker", ...])`. Wiring them into the
|
||||
launch flow + populating the `BundleLaunchSpec` from the inner
|
||||
Plans (PipelockProxyPlan, EgressPlan, …) lands in chunk 2d."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Sequence
|
||||
|
||||
from ...log import die, warn
|
||||
from ..docker.sidecar_bundle import SIDECAR_BUNDLE_IMAGE
|
||||
|
||||
|
||||
def bundle_network_name(slug: str) -> str:
|
||||
"""`bot-bottle-bundle-<slug>` — distinct from the docker
|
||||
backend's `bot-bottle-net-<slug>` so a smolmachines bottle
|
||||
and a docker bottle for the same agent don't collide on
|
||||
network name."""
|
||||
return f"bot-bottle-bundle-{slug}"
|
||||
|
||||
|
||||
def bundle_container_name(slug: str) -> str:
|
||||
"""`bot-bottle-sidecars-<slug>` — same name shape the docker
|
||||
backend uses for the bundle (PRD 0024 chunk 5). The dashboard's
|
||||
prefix-based discovery covers both backends with one filter."""
|
||||
return f"bot-bottle-sidecars-{slug}"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class BundleLaunchSpec:
|
||||
"""Everything `start_bundle` needs to bring up one bundle
|
||||
container. Populated by chunk-2d's launch flow from the inner
|
||||
Plans the prepare step already produces."""
|
||||
|
||||
slug: str
|
||||
network_name: str
|
||||
subnet: str
|
||||
gateway: str
|
||||
bundle_ip: str
|
||||
image: str = SIDECAR_BUNDLE_IMAGE
|
||||
# Daemon subset CSV for BOT_BOTTLE_SIDECAR_DAEMONS. The
|
||||
# supervisor inside the bundle reads it to skip
|
||||
# bottle-irrelevant daemons (e.g. supervise=False bottles).
|
||||
daemons_csv: str = "egress,pipelock"
|
||||
# Plain "KEY=VALUE" strings + "KEY" bare names (the bare-name
|
||||
# form inherits the value from the docker-run subprocess env,
|
||||
# matching the docker backend's compose-up secret-forwarding
|
||||
# pattern).
|
||||
environment: Sequence[str] = field(default_factory=tuple)
|
||||
# (host_path, container_path, read_only) bind mounts.
|
||||
volumes: Sequence[tuple[str, str, bool]] = field(default_factory=tuple)
|
||||
# Container ports to publish on `publish_host_ip`, random
|
||||
# host-side port per entry. The smolvm guest's TSI talks via
|
||||
# macOS networking, so docker container IPs (192.168.x.x in
|
||||
# the daemon's bridge) aren't directly reachable from the
|
||||
# guest — host-loopback port-forwards are. Egress's port
|
||||
# is bundle-internal and never published.
|
||||
ports_to_publish: Sequence[int] = field(default_factory=tuple)
|
||||
# Loopback IP to bind published ports against. Per-bottle
|
||||
# loopback aliases (`127.0.0.16` etc., added via sudo
|
||||
# ifconfig lo0 alias) narrow the TSI allowlist so a bottle
|
||||
# can't reach other bottles' (or other host services') ports
|
||||
# via 127.0.0.1.
|
||||
publish_host_ip: str = "127.0.0.1"
|
||||
|
||||
|
||||
def create_bundle_network(network_name: str, subnet: str, gateway: str) -> None:
|
||||
"""`docker network create` with an explicit subnet + gateway
|
||||
so the bundle's `--ip` lands on the address the Smolfile's
|
||||
TSI allowlist points at. Idempotent on the caller's side —
|
||||
`start_bundle` catches the "network exists" error and treats
|
||||
it as success (chunk-2d teardown is paired with each create).
|
||||
"""
|
||||
result = subprocess.run(
|
||||
["docker", "network", "create",
|
||||
"--subnet", subnet, "--gateway", gateway,
|
||||
network_name],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
# Already-exists is fine on a resume path; everything else
|
||||
# is fatal — the bundle won't have an addressable network.
|
||||
if "already exists" in (result.stderr or "").lower():
|
||||
return
|
||||
die(
|
||||
f"docker network create {network_name} failed: "
|
||||
f"{(result.stderr or '').strip()}"
|
||||
)
|
||||
|
||||
|
||||
def remove_bundle_network(network_name: str) -> None:
|
||||
"""Idempotent: a missing network returns success."""
|
||||
result = subprocess.run(
|
||||
["docker", "network", "rm", network_name],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
if result.returncode == 0:
|
||||
return
|
||||
if "no such network" in (result.stderr or "").lower():
|
||||
return
|
||||
# Network with attached containers is the common non-fatal
|
||||
# case during a partial teardown — warn but don't die.
|
||||
warn(
|
||||
f"docker network rm {network_name} failed: "
|
||||
f"{(result.stderr or '').strip()}"
|
||||
)
|
||||
|
||||
|
||||
def start_bundle(spec: BundleLaunchSpec, *,
|
||||
env: dict[str, str] | None = None) -> None:
|
||||
"""Bring the bundle container up on the per-bottle bridge with
|
||||
the pinned IP. Argv is built deterministically from `spec`;
|
||||
`env` is the host subprocess env (forwarded values for any
|
||||
bare-name entries in `spec.environment`)."""
|
||||
container = bundle_container_name(spec.slug)
|
||||
argv = [
|
||||
"docker", "run",
|
||||
"--name", container,
|
||||
"--detach",
|
||||
"--rm",
|
||||
"--network", spec.network_name,
|
||||
"--ip", spec.bundle_ip,
|
||||
"-e", f"BOT_BOTTLE_SIDECAR_DAEMONS={spec.daemons_csv}",
|
||||
]
|
||||
for entry in spec.environment:
|
||||
argv += ["-e", entry]
|
||||
for host_path, container_path, read_only in spec.volumes:
|
||||
suffix = ":ro" if read_only else ""
|
||||
argv += ["-v", f"{host_path}:{container_path}{suffix}"]
|
||||
# Loopback-only host port-forwards — the smolvm guest's TSI
|
||||
# uses macOS networking, and macOS loopback is the only host
|
||||
# surface that round-trips into Docker Desktop's daemon VM.
|
||||
# Binds to the per-bottle alias so TSI's IP-only allowlist
|
||||
# narrows reachability to this bottle's bundle only.
|
||||
for port in spec.ports_to_publish:
|
||||
argv += ["-p", f"{spec.publish_host_ip}::{port}"]
|
||||
argv.append(spec.image)
|
||||
result = subprocess.run(
|
||||
argv, capture_output=True, text=True,
|
||||
env=dict(env) if env is not None else None, check=False,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
die(
|
||||
f"docker run for bundle {container} failed: "
|
||||
f"{(result.stderr or '').strip()}"
|
||||
)
|
||||
|
||||
|
||||
def bundle_host_port(
|
||||
slug: str, container_port: int, *, host_ip: str = "127.0.0.1",
|
||||
) -> int:
|
||||
"""`docker port <bundle> <container_port>/tcp` → the random
|
||||
host-side port docker assigned for the binding on `host_ip`.
|
||||
Called after `start_bundle` on each container port listed in
|
||||
`BundleLaunchSpec.ports_to_publish` so the launch step can
|
||||
build the agent's HTTPS_PROXY / GIT_GATE / SUPERVISE URLs in
|
||||
`<host_ip>:<host port>` form."""
|
||||
container = bundle_container_name(slug)
|
||||
result = subprocess.run(
|
||||
["docker", "port", container, f"{container_port}/tcp"],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
die(
|
||||
f"docker port {container} {container_port}/tcp failed: "
|
||||
f"{(result.stderr or '').strip() or '<no stderr>'}"
|
||||
)
|
||||
# Each line looks like `127.0.0.16:54321` — one per address
|
||||
# family / host IP. Match on the expected host_ip prefix so
|
||||
# bottles bound to per-bottle aliases pick the right line.
|
||||
for raw in (result.stdout or "").splitlines():
|
||||
line = raw.strip()
|
||||
if line.startswith(f"{host_ip}:"):
|
||||
_, _, port_str = line.rpartition(":")
|
||||
try:
|
||||
return int(port_str)
|
||||
except ValueError:
|
||||
die(f"unexpected `docker port` output: {line!r}")
|
||||
die(
|
||||
f"no port mapping on {host_ip} for {container} "
|
||||
f"{container_port}/tcp; got: {(result.stdout or '').strip()!r}"
|
||||
)
|
||||
return -1 # unreachable; die() never returns
|
||||
|
||||
|
||||
def stop_bundle(slug: str) -> None:
|
||||
"""Idempotent: a missing container returns success."""
|
||||
container = bundle_container_name(slug)
|
||||
result = subprocess.run(
|
||||
["docker", "rm", "-f", container],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
if result.returncode == 0:
|
||||
return
|
||||
if "no such container" in (result.stderr or "").lower():
|
||||
return
|
||||
warn(
|
||||
f"docker rm -f {container} failed: "
|
||||
f"{(result.stderr or '').strip()}"
|
||||
)
|
||||
@@ -0,0 +1,217 @@
|
||||
"""Thin subprocess wrapper around the `smolvm` CLI (PRD 0023).
|
||||
|
||||
One thin Python function per smolvm subcommand the launch flow
|
||||
needs. Two design choices worth flagging:
|
||||
|
||||
- **No daemon, no SDK.** smolvm 0.8.0 ships a `smolvm serve`
|
||||
HTTP API as the long-term-clean integration target. The
|
||||
project's stdlib-first ethos + the lower-overhead CLI calls
|
||||
push v1 to shell out via `subprocess.run`. If a future
|
||||
smolvm release makes `serve` mandatory (or significantly
|
||||
faster), revisit.
|
||||
|
||||
- **Two return shapes.** `SmolvmRunResult` (returncode + stdout
|
||||
+ stderr captured) is returned by `machine_exec` because the
|
||||
caller cares about the in-VM command's exit status, and by
|
||||
test helpers that introspect output. The other calls
|
||||
(`machine_start`, `machine_stop`, `pack_create`, etc.) raise
|
||||
`SmolvmError` on non-zero exit — failure to start a VM is
|
||||
fatal to the launch flow, not something callers want to
|
||||
branch on.
|
||||
|
||||
The wrapper is unit-tested with `subprocess.run` mocked; the
|
||||
integration smoke test (chunk 2d) exercises against a real
|
||||
smolvm binary."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import shutil
|
||||
import subprocess
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Mapping, Sequence
|
||||
|
||||
|
||||
_SMOLVM = "smolvm"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SmolvmRunResult:
|
||||
"""Captured result of an in-VM command. Mirrors the structure
|
||||
`Bottle.exec` returns so callers can hand it straight through."""
|
||||
returncode: int
|
||||
stdout: str
|
||||
stderr: str
|
||||
|
||||
|
||||
class SmolvmError(RuntimeError):
|
||||
"""Raised when a smolvm subprocess returns non-zero on a path
|
||||
where the caller has no useful branch to take (start failed,
|
||||
pack failed, etc.). Carries the captured stderr for the
|
||||
operator-facing log line."""
|
||||
|
||||
def __init__(self, argv: Sequence[str], result: subprocess.CompletedProcess):
|
||||
self.argv = list(argv)
|
||||
self.returncode = result.returncode
|
||||
self.stdout = result.stdout
|
||||
self.stderr = result.stderr
|
||||
cmd = " ".join(self.argv)
|
||||
super().__init__(
|
||||
f"{cmd!r} failed (exit {result.returncode}): "
|
||||
f"{(result.stderr or '').strip() or '<no stderr>'}"
|
||||
)
|
||||
|
||||
|
||||
def _smolvm(*args: str, env: Mapping[str, str] | None = None,
|
||||
check: bool = True) -> subprocess.CompletedProcess:
|
||||
"""One subprocess call into the smolvm CLI. `check=True`
|
||||
raises SmolvmError on non-zero; `check=False` returns the
|
||||
CompletedProcess for the caller to inspect."""
|
||||
argv = [_SMOLVM, *args]
|
||||
result = subprocess.run(
|
||||
argv,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=dict(env) if env is not None else None,
|
||||
check=False,
|
||||
)
|
||||
if check and result.returncode != 0:
|
||||
raise SmolvmError(argv, result)
|
||||
return result
|
||||
|
||||
|
||||
# --- Pack ----------------------------------------------------------------
|
||||
|
||||
|
||||
def pack_create(image: str, output: Path) -> None:
|
||||
"""`smolvm pack create --image <image> -o <output>`. Converts
|
||||
an OCI image into a self-contained `.smolmachine` artifact
|
||||
smolvm can boot via `machine create --from`. Idempotent on the
|
||||
smolvm side — re-running with the same image+output rebuilds
|
||||
from layer cache."""
|
||||
_smolvm("pack", "create", "--image", image, "-o", str(output))
|
||||
|
||||
|
||||
# --- Machine lifecycle ---------------------------------------------------
|
||||
|
||||
|
||||
def machine_create(
|
||||
name: str,
|
||||
*,
|
||||
image: str | None = None,
|
||||
from_path: Path | None = None,
|
||||
allow_cidrs: Sequence[str] = (),
|
||||
env: Mapping[str, str] | None = None,
|
||||
) -> None:
|
||||
"""`smolvm machine create NAME [--image IMG | --from PATH]
|
||||
[--allow-cidr CIDR ...] [-e K=V ...]`. NAME is positional
|
||||
(the CLI's exception to the `--name` pattern other
|
||||
subcommands use).
|
||||
|
||||
`image` (registry ref like `alpine:latest`) and `from_path`
|
||||
(a `.smolmachine` artifact) are mutually exclusive — one or
|
||||
the other tells smolvm what to boot. The wrapper doesn't
|
||||
enforce exclusivity; smolvm errors clearly enough.
|
||||
|
||||
`allow_cidrs` and `env` are passed as CLI flags instead of a
|
||||
Smolfile because `--from` and `--smolfile` are themselves
|
||||
mutually exclusive in smolvm 0.8.0 — and we want `--from`'s
|
||||
no-pull-at-start property. The flag form gives the same
|
||||
result without the Smolfile complication.
|
||||
|
||||
`--net` is sent explicitly when `allow_cidrs` is non-empty.
|
||||
smolvm 0.8.0's docs say `--allow-cidr` implies `--net`, but
|
||||
empirically the implication only fires when no `--from` is
|
||||
set — `--from PATH --allow-cidr X/32` silently produces a
|
||||
machine with `network: false` and no routes in the guest, so
|
||||
the agent can't reach the bundle's pinned IP."""
|
||||
args: list[str] = ["machine", "create"]
|
||||
if image is not None:
|
||||
args += ["--image", image]
|
||||
if from_path is not None:
|
||||
args += ["--from", str(from_path)]
|
||||
if allow_cidrs:
|
||||
args.append("--net")
|
||||
for cidr in allow_cidrs:
|
||||
args += ["--allow-cidr", cidr]
|
||||
if env:
|
||||
for k, v in env.items():
|
||||
args += ["-e", f"{k}={v}"]
|
||||
args.append(name)
|
||||
_smolvm(*args)
|
||||
|
||||
|
||||
def machine_start(name: str) -> None:
|
||||
"""`smolvm machine start --name NAME`."""
|
||||
_smolvm("machine", "start", "--name", name)
|
||||
|
||||
|
||||
def machine_stop(name: str) -> None:
|
||||
"""`smolvm machine stop --name NAME`. Idempotent against
|
||||
already-stopped machines: smolvm prints a notice and exits 0
|
||||
in that case, so no special handling here."""
|
||||
_smolvm("machine", "stop", "--name", name)
|
||||
|
||||
|
||||
def machine_delete(name: str) -> None:
|
||||
"""`smolvm machine delete -f NAME`. NAME is positional. `-f`
|
||||
skips the interactive confirmation — required for
|
||||
non-interactive teardown."""
|
||||
_smolvm("machine", "delete", "-f", name)
|
||||
|
||||
|
||||
def machine_exec(
|
||||
name: str,
|
||||
argv: Sequence[str],
|
||||
*,
|
||||
env: Mapping[str, str] | None = None,
|
||||
workdir: str | None = None,
|
||||
timeout: str | None = None,
|
||||
) -> SmolvmRunResult:
|
||||
"""`smolvm machine exec --name NAME [-w DIR] [--timeout DUR]
|
||||
[-e K=V ...] -- ARGV...`. Returns the captured result rather
|
||||
than raising — callers (including `Bottle.exec`) care about
|
||||
the in-VM command's exit code, not just whether smolvm ran.
|
||||
|
||||
`env` here is in-VM env vars (`-e K=V`), not the host
|
||||
subprocess env — smolvm's own argv carries them through the
|
||||
VMM."""
|
||||
flags: list[str] = ["machine", "exec", "--name", name]
|
||||
if workdir is not None:
|
||||
flags += ["-w", workdir]
|
||||
if timeout is not None:
|
||||
flags += ["--timeout", timeout]
|
||||
if env:
|
||||
for k, v in env.items():
|
||||
flags += ["-e", f"{k}={v}"]
|
||||
# `--` separator before the command. smolvm's CLI requires it
|
||||
# so its own flag parser doesn't grab argv items that look
|
||||
# like flags.
|
||||
flags.append("--")
|
||||
flags += list(argv)
|
||||
result = _smolvm(*flags, check=False)
|
||||
return SmolvmRunResult(
|
||||
returncode=result.returncode,
|
||||
stdout=result.stdout or "",
|
||||
stderr=result.stderr or "",
|
||||
)
|
||||
|
||||
|
||||
def machine_cp(src: str, dst: str) -> None:
|
||||
"""`smolvm machine cp SRC DST`. Path syntax: `machine:path` to
|
||||
reference a path inside the VM, bare path for the host. Both
|
||||
SRC and DST are positional; either side can be machine: or
|
||||
bare. Empty path is a no-op (returns immediately without
|
||||
invoking smolvm)."""
|
||||
if not src or not dst:
|
||||
return
|
||||
_smolvm("machine", "cp", src, dst)
|
||||
|
||||
|
||||
# --- Discovery -----------------------------------------------------------
|
||||
|
||||
|
||||
def is_available() -> bool:
|
||||
"""True iff `smolvm` is on PATH. Used by the integration test
|
||||
suite's skip-guards."""
|
||||
return shutil.which(_SMOLVM) is not None
|
||||
@@ -0,0 +1,48 @@
|
||||
"""Slug / preflight / subnet helpers for the smolmachines backend
|
||||
(PRD 0023). Kept in its own module so the renderers can be
|
||||
unit-tested without importing the docker subprocess paths."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import shutil
|
||||
|
||||
from ...log import die
|
||||
|
||||
|
||||
def smolmachines_preflight() -> None:
|
||||
"""Ensure `smolvm` is on PATH before the launch flow runs.
|
||||
Called from `_resolve_plan`; gives the operator a clear
|
||||
install pointer rather than a cryptic FileNotFoundError
|
||||
later. `gvproxy` is no longer required — see the PRD's design
|
||||
pivot section."""
|
||||
if shutil.which("smolvm") is not None:
|
||||
return
|
||||
die(
|
||||
"BOT_BOTTLE_BACKEND=smolmachines requires `smolvm` on "
|
||||
"PATH. Install with: "
|
||||
"curl -sSL https://smolmachines.com/install.sh | sh"
|
||||
)
|
||||
|
||||
|
||||
def smolmachines_bundle_subnet(slug: str) -> tuple[str, str, str]:
|
||||
"""Derive a per-bottle docker subnet + gateway IP + bundle IP
|
||||
from the slug.
|
||||
|
||||
Returns `(subnet_cidr, gateway_ip, bundle_ip)`. The third
|
||||
octet comes from SHA-256 of the slug mod 254 (skipping 17 to
|
||||
avoid the docker-default bridge), so parallel bottles get
|
||||
distinct /24s and `resume` reuses the same /24. The bundle
|
||||
container always lands at `.2`; gateway is `.1`; the smolvm
|
||||
Smolfile's `allow_cidrs` is `<bundle_ip>/32`."""
|
||||
digest = hashlib.sha256(slug.encode("utf-8")).digest()
|
||||
octet = (digest[0] % 254) + 1
|
||||
# Skip the docker-default bridge to dodge the most common
|
||||
# collision (operators with `docker0` at 172.17.x.x or a
|
||||
# 192.168.17.x VPN client).
|
||||
if octet == 17:
|
||||
octet = 18
|
||||
subnet = f"192.168.{octet}.0/24"
|
||||
gateway = f"192.168.{octet}.1"
|
||||
bundle_ip = f"192.168.{octet}.2"
|
||||
return subnet, gateway, bundle_ip
|
||||
@@ -0,0 +1,18 @@
|
||||
"""Cross-backend utility helpers — host-side primitives shared by
|
||||
every backend implementation. Backend-specific helpers live one level
|
||||
deeper (e.g. bot_bottle/backend/docker/util.py)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
|
||||
from ..log import die
|
||||
|
||||
|
||||
def host_skill_dir(name: str) -> str:
|
||||
"""Return the host-side path for a named skill:
|
||||
`$HOME/.claude/skills/<name>`. Dies if HOME is unset."""
|
||||
home = os.environ.get("HOME")
|
||||
if not home:
|
||||
die("HOME not set")
|
||||
return f"{home}/.claude/skills/{name}"
|
||||
Reference in New Issue
Block a user