refactor!: rename project to bot-bottle

Assisted-by: Codex
This commit is contained in:
2026-05-28 17:56:14 -04:00
parent 8875d8cc17
commit c08b09dc9f
200 changed files with 1271 additions and 1271 deletions
+33
View File
@@ -0,0 +1,33 @@
"""Docker bottle backend.
The bulk of the implementation lives in sibling modules:
- util: thin Docker subprocess wrappers
- network: Docker network plumbing
- pipelock: DockerPipelockProxy lifecycle
- bottle_plan: DockerBottlePlan
- bottle_cleanup_plan: DockerBottleCleanupPlan
- bottle: DockerBottle handle
- prepare: host-side resolution into a DockerBottlePlan
- launch: bring-up + teardown context manager
- cleanup: orphan enumeration, removal, active listing
- backend: DockerBottleBackend façade wiring the above
This file only re-exports the public names so
`from bot_bottle.backend.docker import DockerBottleBackend` keeps
working.
"""
from __future__ import annotations
from .backend import DockerBottleBackend
from .bottle import DockerBottle
from .bottle_cleanup_plan import DockerBottleCleanupPlan
from .bottle_plan import DockerBottlePlan
__all__ = [
"DockerBottle",
"DockerBottleBackend",
"DockerBottleCleanupPlan",
"DockerBottlePlan",
]
+81
View File
@@ -0,0 +1,81 @@
"""DockerBottleBackend — the Docker implementation of BottleBackend.
This module is a thin façade. The real work lives in four siblings:
- prepare.py — host-side resolution into a DockerBottlePlan
- launch.py — bring-up + teardown context manager
- cleanup.py — orphan enumeration + removal
- enumerate.py — active-agent listing
The base class's `prepare` template runs cross-backend host-side
validation before calling `_resolve_plan` here.
"""
from __future__ import annotations
import shutil
from contextlib import contextmanager
from pathlib import Path
from typing import Generator, Sequence
from .. import ActiveAgent, BottleBackend, BottleSpec
from . import cleanup as _cleanup
from . import enumerate as _enumerate
from . import launch as _launch
from . import prepare as _prepare
from .bottle import DockerBottle
from .bottle_cleanup_plan import DockerBottleCleanupPlan
from .bottle_plan import DockerBottlePlan
from .provision import ca as _ca
from .provision import git as _git
from .provision import prompt as _prompt
from .provision import skills as _skills
from .provision import supervise as _supervise_prov
class DockerBottleBackend(BottleBackend["DockerBottlePlan", "DockerBottleCleanupPlan"]):
"""Docker backend implementation. Selected by BOT_BOTTLE_BACKEND
(default)."""
name = "docker"
@classmethod
def is_available(cls) -> bool:
"""`docker` on PATH is sufficient; we don't probe `docker info`
eagerly because the cross-backend enumerator runs this on
every `list active` and we'd pay a subprocess per call. A
broken daemon will surface its own error during prepare /
launch."""
return shutil.which("docker") is not None
def _resolve_plan(self, spec: BottleSpec, *, stage_dir: Path) -> DockerBottlePlan:
return _prepare.resolve_plan(spec, stage_dir=stage_dir)
@contextmanager
def launch(self, plan: DockerBottlePlan) -> Generator[DockerBottle, None, None]:
with _launch.launch(plan, provision=self.provision) as bottle:
yield bottle
def provision_ca(self, plan: DockerBottlePlan, target: str) -> None:
_ca.provision_ca(plan, target)
def provision_prompt(self, plan: DockerBottlePlan, target: str) -> str | None:
return _prompt.provision_prompt(plan, target)
def provision_skills(self, plan: DockerBottlePlan, target: str) -> None:
_skills.provision_skills(plan, target)
def provision_git(self, plan: DockerBottlePlan, target: str) -> None:
_git.provision_git(plan, target)
def provision_supervise(self, plan: DockerBottlePlan, target: str) -> None:
_supervise_prov.provision_supervise(plan, target)
def prepare_cleanup(self) -> DockerBottleCleanupPlan:
return _cleanup.prepare_cleanup()
def cleanup(self, plan: DockerBottleCleanupPlan) -> None:
_cleanup.cleanup(plan)
def enumerate_active(self) -> Sequence[ActiveAgent]:
return _enumerate.enumerate_active()
+84
View File
@@ -0,0 +1,84 @@
"""DockerBottle — concrete Bottle handle yielded by DockerBottleBackend."""
from __future__ import annotations
import subprocess
from typing import Callable
from ...agent_provider import prompt_args
from .. import Bottle, ExecResult
class DockerBottle(Bottle):
"""Concrete Bottle for Docker."""
def __init__(
self,
container: str,
teardown: Callable[[], None],
prompt_path_in_container: str | None,
*,
agent_command: str = "claude",
agent_prompt_mode: str = "claude_append_file",
):
self.name = container
self._teardown = teardown
self._prompt_path = prompt_path_in_container
self._agent_command = agent_command
self._agent_prompt_mode = agent_prompt_mode
self.agent_command = agent_command
self.agent_provider_template = (
"codex" if agent_command == "codex" else "claude"
)
self._closed = False
def claude_argv(
self, argv: list[str], *, tty: bool = True,
) -> list[str]:
full_argv = list(argv)
full_argv.extend(
prompt_args(self._agent_prompt_mode, self._prompt_path, argv=full_argv)
)
cmd = ["docker", "exec"]
if tty:
cmd.append("-it")
cmd.extend([self.name, self._agent_command, *full_argv])
return cmd
def exec_claude(self, argv: list[str], *, tty: bool = True) -> int:
return subprocess.run(
self.claude_argv(argv, tty=tty), check=False,
).returncode
def exec(self, script: str, *, user: str = "node") -> ExecResult:
# Pipe via stdin to `sh -s` so the caller never has to worry
# about quoting; the script source lands inside the container
# without crossing argv. `-u <user>` overrides the image's
# default USER — defaults to `node` which is already the
# image's USER, so the explicit flag is a no-op there but
# keeps the cross-backend contract uniform.
result = subprocess.run(
["docker", "exec", "-u", user, "-i", self.name, "sh", "-s"],
input=script,
capture_output=True,
text=True,
check=False,
)
return ExecResult(
returncode=result.returncode,
stdout=result.stdout,
stderr=result.stderr,
)
def cp_in(self, host_path: str, container_path: str) -> None:
subprocess.run(
["docker", "cp", host_path, f"{self.name}:{container_path}"],
stdout=subprocess.DEVNULL,
check=True,
)
def close(self) -> None:
if self._closed:
return
self._closed = True
self._teardown()
@@ -0,0 +1,59 @@
"""DockerBottleCleanupPlan — concrete subclass of BottleCleanupPlan.
PRD 0018 chunk 4: cleanup is centered on compose projects. `docker
compose ls` is the source of truth for what's running; the plan
carries the projects to `compose down`, plus three fallback buckets
for legacy / orphan resources:
- stray_containers: pre-compose `bot-bottle-*` containers not
attached to any compose project. Cleared via `docker rm -f`.
- stray_networks: same idea for networks. Cleared via
`docker network rm`.
- orphan_state_dirs: per-bottle state dirs under
~/.bot-bottle/state/ that have no live compose project AND
no `.preserve` marker. Reaped via `shutil.rmtree`.
Compose-managed networks are removed by `compose down --volumes`,
so they don't appear in stray_networks for a normal project — only
truly leftover ones.
"""
from __future__ import annotations
import sys
from dataclasses import dataclass
from ...log import info
from .. import BottleCleanupPlan
@dataclass(frozen=True)
class DockerBottleCleanupPlan(BottleCleanupPlan):
"""Resources DockerBottleBackend.cleanup will remove. Produced by
`prepare_cleanup`; sorted so the y/N output is stable."""
projects: tuple[str, ...]
stray_containers: tuple[str, ...]
stray_networks: tuple[str, ...]
orphan_state_dirs: tuple[str, ...]
@property
def empty(self) -> bool:
return (
not self.projects
and not self.stray_containers
and not self.stray_networks
and not self.orphan_state_dirs
)
def print(self) -> None:
print(file=sys.stderr)
for name in self.projects:
info(f"compose project: {name}")
for name in self.stray_containers:
info(f"stray container: {name}")
for name in self.stray_networks:
info(f"stray network: {name}")
for name in self.orphan_state_dirs:
info(f"orphan state: {name}")
print(file=sys.stderr)
+97
View File
@@ -0,0 +1,97 @@
"""DockerBottlePlan — concrete subclass of BottlePlan.
Carries the Docker-specific resolved fields produced by
DockerBottleBackend.prepare. The launch step consumes it without
further resolution; show_plan-style rendering is the `print` method.
"""
from __future__ import annotations
import sys
from dataclasses import dataclass, field
from pathlib import Path
from ...egress import EgressPlan
from ...git_gate import GitGatePlan
from ...log import info
from ...pipelock import PipelockProxyPlan
from ...supervise import SupervisePlan
from .. import BottlePlan
from ..print_util import print_multi
@dataclass(frozen=True)
class DockerBottlePlan(BottlePlan):
"""Docker-specific resolved fields produced by
DockerBottleBackend.prepare. Inherits `spec` and `stage_dir` from
BottlePlan."""
slug: str
container_name: str
container_name_pinned: bool
image: str
derived_image: str # "" -> no derived image
runtime_image: str # image actually launched (derived or base)
# Absolute path to the Dockerfile that builds `image`. Empty means
# use the repo's default Dockerfile. Populated to a per-bottle
# state file (~/.bot-bottle/state/<slug>/Dockerfile) after a
# capability-block remediation (PRD 0016).
dockerfile_path: str
env_file: Path # docker --env-file: NAME=VALUE literals
# name -> value for vars forwarded into the docker-run child process
# via subprocess env (so values never land on argv or in a file).
# repr=False keeps secret/interpolated/OAuth values out of any
# accidental log of the plan dataclass.
forwarded_env: dict[str, str] = field(repr=False)
prompt_file: Path
proxy_plan: PipelockProxyPlan
git_gate_plan: GitGatePlan
egress_plan: EgressPlan
# None when bottle.supervise is False. PRD 0013 supervise sidecar
# is opt-in via the manifest's bottle.supervise field.
supervise_plan: SupervisePlan | None
use_runsc: bool
agent_command: str = "claude"
agent_prompt_mode: str = "claude_append_file"
agent_provider_template: str = "claude"
def print(self, *, remote_control: bool) -> None:
"""Render the y/N preflight summary to stderr — compact form
intended to fit on screen without scrolling. The full
structured shape (image, container, runtime, etc.) lives on
this dataclass for tooling that wants to introspect it."""
del remote_control # not surfaced in the compact summary
spec = self.spec
manifest = spec.manifest
agent = manifest.agents[spec.agent_name]
bottle = manifest.bottle_for(spec.agent_name)
# The agent sees the union of literal env names (rendered into
# --env-file) and forwarded env names (`-e NAME` with the
# value arriving via subprocess env). The forwarded set holds
# the OAuth token (CLAUDE_CODE_OAUTH_TOKEN) and any host-env
# interpolations from the manifest; egress holds
# upstream tokens in its own environ, so no token forwarding
# from the agent to the proxy is needed.
env_names = sorted(set(bottle.env.keys()) | set(self.forwarded_env.keys()))
print(file=sys.stderr)
info(f"agent : {spec.agent_name}")
info(f"provider : {self.agent_provider_template}")
print_multi("env ", env_names)
print_multi("skills ", list(agent.skills))
info(f"bottle : {agent.bottle}")
git_lines = [
f"{u.upstream_host}:{u.upstream_port}"
for u in self.git_gate_plan.upstreams
]
if git_lines:
print_multi(" git gate ", git_lines)
if self.egress_plan.routes:
egress_lines = []
for r in self.egress_plan.routes:
auth = f" [auth:{r.auth_scheme}]" if r.auth_scheme else ""
egress_lines.append(f"{r.host}{auth}")
print_multi(" egress ", egress_lines)
print(file=sys.stderr)
+328
View File
@@ -0,0 +1,328 @@
"""Per-bottle persistent state (PRD 0016).
Holds the per-bottle Dockerfile override that capability-block
remediation writes, the transcript snapshot the state-preservation
helper saves before teardown, and the launch metadata that lets
`cli.py resume <identity>` reconstruct a bottle's spec. State
lives at:
~/.bot-bottle/state/<identity>/
metadata.json — agent_name + cwd + started_at (for resume)
Dockerfile — per-bottle override (absent → use repo's)
transcript/ — last snapshotted agent state (best-effort)
When the per-bottle Dockerfile is present, the launch step builds
the agent image with a per-bottle tag (bot-bottle-rebuilt-<id>)
from this file rather than the repo's. The build context is still
the repo root so the Dockerfile can COPY bot_bottle source files
the same way the original does.
Identity model:
- Every `cli.py start <agent>` mints a fresh identity via
`bottle_identity(agent_name)`: slug-prefix for readability plus a
5-char random suffix for parallel-safe uniqueness. The metadata
written at launch time pins (agent_name, cwd) to that identity.
- `cli.py resume <identity>` reads the metadata and re-launches a
bottle pinned to the same identity, picking up any per-bottle
Dockerfile and transcript snapshot.
"""
from __future__ import annotations
import dataclasses
import json
import secrets
import string
from dataclasses import dataclass
from pathlib import Path
from ... import supervise as _supervise
from . import util as docker_mod
# Directory layout: ~/.bot-bottle/state/<identity>/...
_STATE_SUBDIR = "state"
_PER_BOTTLE_DOCKERFILE_NAME = "Dockerfile"
_TRANSCRIPT_SUBDIR = "transcript"
# Per-sidecar scratch subdirs. PRD 0018 chunk 2: bind-mount sources
# live here so chunk 3's `docker compose up` can find them at stable
# paths. Each sidecar's `prepare()` writes config + CAs into its own
# subdir; the launch step is unchanged today (still `docker cp`).
_PIPELOCK_SUBDIR = "pipelock"
_EGRESS_SUBDIR = "egress"
_GIT_GATE_SUBDIR = "git-gate"
_SUPERVISE_SUBDIR = "supervise"
_AGENT_SUBDIR = "agent"
_METADATA_NAME = "metadata.json"
# Live-config dir bind-mounted into the supervise sidecar (read-only).
# Host's apply paths keep these files fresh so supervise's
# `list-pipelock-allowlist` / `list-egress-routes` MCP tools
# return the current state — not a snapshot from launch time.
_LIVE_CONFIG_SUBDIR = "live-config"
LIVE_CONFIG_ROUTES_NAME = "routes.yaml"
LIVE_CONFIG_ALLOWLIST_NAME = "allowlist"
# Empty marker file. capability_apply writes it before teardown so
# cli.py's session-end cleanup knows to preserve the state dir for
# `cli.py resume <identity>`. Absent = clean up.
_PRESERVE_MARKER = ".preserve"
# 5 chars of base36 alphabet ≈ 60M combinations. Plenty for human
# operators starting bottles by hand; collision-free in practice.
_RANDOM_SUFFIX_LEN = 5
_SUFFIX_ALPHABET = string.ascii_lowercase + string.digits
def bottle_identity(agent_name: str) -> str:
"""Mint a fresh per-launch bottle identity. The slug-prefix is
`slugify(agent_name)` for readability; the suffix is 5 random
base36 chars so two simultaneous `start <agent>` invocations
don't collide on container/network names.
Every call produces a different identity (non-deterministic).
To continue an existing bottle's state, use the recorded
identity from BottleMetadata via `cli.py resume <identity>`,
not this function."""
slug = docker_mod.slugify(agent_name)
suffix = "".join(secrets.choice(_SUFFIX_ALPHABET) for _ in range(_RANDOM_SUFFIX_LEN))
return f"{slug}-{suffix}"
@dataclass(frozen=True)
class BottleMetadata:
"""Persistent record of how a bottle was launched, written at
start time and read by `cli.py resume`. Lives at
~/.bot-bottle/state/<identity>/metadata.json."""
identity: str
agent_name: str
cwd: str # empty string when --cwd was not passed
copy_cwd: bool
started_at: str # ISO 8601 UTC
# PRD 0018 chunk 3: derivable from identity via
# `compose_project_name(identity)`, but persisted explicitly so
# dashboard / cleanup / resume tooling can read it without
# importing the compose module. Empty string for state dirs
# written before chunk 3 (resume / inspect should fall back to
# deriving from identity in that case).
compose_project: str = ""
def metadata_path(identity: str) -> Path:
return bottle_state_dir(identity) / _METADATA_NAME
def write_metadata(metadata: BottleMetadata) -> Path:
"""Persist `metadata` to ~/.bot-bottle/state/<identity>/metadata.json.
Mode 0o644 — no secrets, just (agent_name, cwd, timestamp)."""
path = metadata_path(metadata.identity)
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(json.dumps(dataclasses.asdict(metadata), indent=2) + "\n")
path.chmod(0o644)
return path
def read_metadata(identity: str) -> BottleMetadata | None:
"""Return the metadata for `identity`, or None if no state has
been recorded for it. Used by `cli.py resume` to reconstruct
the launch spec."""
path = metadata_path(identity)
if not path.is_file():
return None
raw = json.loads(path.read_text())
if not isinstance(raw, dict):
return None
return BottleMetadata(
identity=str(raw.get("identity", identity)),
agent_name=str(raw.get("agent_name", "")),
cwd=str(raw.get("cwd", "")),
copy_cwd=bool(raw.get("copy_cwd", False)),
started_at=str(raw.get("started_at", "")),
compose_project=str(raw.get("compose_project", "")),
)
def bottle_state_dir(identity: str) -> Path:
"""Per-bottle state directory on the host. Created lazily by the
write helpers; readers tolerate its absence."""
return _supervise.bot_bottle_root() / _STATE_SUBDIR / identity
def per_bottle_dockerfile_path(identity: str) -> Path:
return bottle_state_dir(identity) / _PER_BOTTLE_DOCKERFILE_NAME
def per_bottle_dockerfile(identity: str) -> str | None:
"""Return the per-bottle Dockerfile content if present, else
None. None means: use the repo's Dockerfile (the original
pre-capability-block behavior)."""
p = per_bottle_dockerfile_path(identity)
if p.is_file():
return p.read_text()
return None
def write_per_bottle_dockerfile(identity: str, content: str) -> Path:
p = per_bottle_dockerfile_path(identity)
p.parent.mkdir(parents=True, exist_ok=True)
p.write_text(content)
p.chmod(0o644)
return p
def per_bottle_image_tag(identity: str) -> str:
"""Image tag for a rebuilt bottle. Distinct from the base
bot-bottle-claude:latest so per-bottle rebuilds don't collide in
the docker image cache."""
return f"bot-bottle-rebuilt-{identity}:latest"
def live_config_dir(identity: str) -> Path:
"""Per-bottle live-config dir. Bind-mounted read-only into the
supervise sidecar; the host's apply paths refresh the files on
every operator approval so the agent's `list-*` MCP tools always
return current state."""
return bottle_state_dir(identity) / _LIVE_CONFIG_SUBDIR
def live_routes_path(identity: str) -> Path:
return live_config_dir(identity) / LIVE_CONFIG_ROUTES_NAME
def live_allowlist_path(identity: str) -> Path:
return live_config_dir(identity) / LIVE_CONFIG_ALLOWLIST_NAME
def write_live_config(
identity: str, *, routes: str = "", allowlist: str = "",
) -> Path:
"""Initialise (or refresh) the live-config dir. Empty-string args
leave the existing file alone (caller passes only what it knows).
Returns the live-config dir path."""
d = live_config_dir(identity)
d.mkdir(parents=True, exist_ok=True)
if routes:
p = live_routes_path(identity)
p.write_text(routes)
p.chmod(0o644)
if allowlist:
p = live_allowlist_path(identity)
p.write_text(allowlist)
p.chmod(0o644)
return d
def transcript_snapshot_dir(identity: str) -> Path:
"""Where capability_apply stashes the agent's transcript before
teardown, so the next `cli.py start <agent>` can offer to
resume from it."""
return bottle_state_dir(identity) / _TRANSCRIPT_SUBDIR
# --- Per-sidecar scratch subdirs (PRD 0018 chunk 2) ------------------------
#
# Each sidecar gets its own subdir under the bottle's state dir for
# bind-mount sources (config, CAs, hooks, etc.). Prepare-time writes
# land here; the state dir's normal cleanup (`cleanup_state`) reaps
# them along with everything else when the bottle session ends and
# nothing requested preservation.
def pipelock_state_dir(identity: str) -> Path:
"""State subdir for the pipelock sidecar: pipelock.yaml + the
per-bottle CA cert/key. Bind-mount source from chunk 3 onward."""
return bottle_state_dir(identity) / _PIPELOCK_SUBDIR
def egress_state_dir(identity: str) -> Path:
"""State subdir for the egress sidecar: routes.yaml + the
per-bottle mitmproxy CA. Bind-mount source from chunk 3 onward."""
return bottle_state_dir(identity) / _EGRESS_SUBDIR
def git_gate_state_dir(identity: str) -> Path:
"""State subdir for the git-gate sidecar: entrypoint + hooks +
per-upstream known_hosts. Bind-mount source from chunk 3
onward."""
return bottle_state_dir(identity) / _GIT_GATE_SUBDIR
def supervise_state_dir(identity: str) -> Path:
"""State subdir for the supervise sidecar's current-config dir
(bind-mounted into the agent at /etc/bot-bottle/current-config).
The queue dir is intentionally NOT under here — it lives at
~/.bot-bottle/queue/<slug>/ alongside the audit logs, so it
survives state-dir cleanup."""
return bottle_state_dir(identity) / _SUPERVISE_SUBDIR
def agent_state_dir(identity: str) -> Path:
"""State subdir for the agent's prepare-time scratch files: the
env file (docker --env-file source) and the prompt file."""
return bottle_state_dir(identity) / _AGENT_SUBDIR
# --- Preserve-on-close marker ----------------------------------------------
def preserve_marker_path(identity: str) -> Path:
return bottle_state_dir(identity) / _PRESERVE_MARKER
def mark_preserved(identity: str) -> Path:
"""Mark this bottle's state for preservation across session
teardown. Written by capability_apply.apply_capability_change so
cli.py's session-end cleanup leaves the state dir intact for a
subsequent `cli.py resume`."""
path = preserve_marker_path(identity)
path.parent.mkdir(parents=True, exist_ok=True)
path.touch()
return path
def is_preserved(identity: str) -> bool:
return preserve_marker_path(identity).exists()
def clear_preserve_marker(identity: str) -> None:
"""Idempotent removal. Called at fresh launch (start or resume)
so a marker left from a prior capability-block doesn't keep
state alive past the next normal session-end."""
try:
preserve_marker_path(identity).unlink()
except FileNotFoundError:
pass
def cleanup_state(identity: str) -> None:
"""Remove the per-bottle state dir entirely. Called by cli.py
when a bottle session ends and is_preserved(identity) is False.
Idempotent — missing dir is success."""
import shutil
state_dir = bottle_state_dir(identity)
if state_dir.is_dir():
shutil.rmtree(state_dir, ignore_errors=True)
__all__ = [
"BottleMetadata",
"agent_state_dir",
"bottle_identity",
"bottle_state_dir",
"cleanup_state",
"clear_preserve_marker",
"egress_state_dir",
"git_gate_state_dir",
"is_preserved",
"mark_preserved",
"metadata_path",
"per_bottle_dockerfile",
"per_bottle_dockerfile_path",
"per_bottle_image_tag",
"pipelock_state_dir",
"preserve_marker_path",
"read_metadata",
"supervise_state_dir",
"transcript_snapshot_dir",
"write_metadata",
"write_per_bottle_dockerfile",
]
@@ -0,0 +1,220 @@
"""capability_apply — host-side orchestrator for capability-block
remediation (PRD 0016).
On approval of a capability-block proposal, the dashboard calls
apply_capability_change(slug, new_dockerfile) which:
1. Snapshots the agent's transcript dir to
~/.bot-bottle/state/<slug>/transcript/ (best-effort).
2. Pushes the agent's working tree via `git push` (best-effort —
no upstream / no commits / no git repo all skip with a log).
3. Writes the new Dockerfile to
~/.bot-bottle/state/<slug>/Dockerfile (PRD 0016 Phase 1
state). The next `cli.py start <agent>` picks it up.
4. Force-removes the agent container + all sidecars + the
per-bottle networks. Idempotent — missing resources are not
errors.
Returns (before, after) Dockerfile contents so the dashboard can
record / render the diff. (capability-block has no audit log per
PRD 0013 — the per-bottle Dockerfile state is its own record.)
This is "fire-and-forget" from the agent's perspective: by the time
the dashboard writes the response file the supervise sidecar is
gone, so the agent's tool call connection drops without ever
receiving the response. The replacement agent (next manual
`cli.py start`) sees the new Dockerfile and starts from there.
v1 does not auto-relaunch — see PRD 0016's capability-block return
semantics open question.
"""
from __future__ import annotations
import os
import shutil
import subprocess
from pathlib import Path
from ...log import info, warn
from .bottle_state import (
mark_preserved,
per_bottle_dockerfile,
per_bottle_dockerfile_path,
transcript_snapshot_dir,
write_per_bottle_dockerfile,
)
from .sidecar_bundle import sidecar_bundle_container_name
# Agent home inside the container (per the repo Dockerfile's
# `USER node` + `WORKDIR /home/node`). Used to locate the transcript
# dir + the workspace dir for git push.
_AGENT_HOME_IN_CONTAINER = "/home/node"
_AGENT_TRANSCRIPT_IN_CONTAINER = f"{_AGENT_HOME_IN_CONTAINER}/.claude"
_AGENT_WORKSPACE_IN_CONTAINER = f"{_AGENT_HOME_IN_CONTAINER}/workspace"
# Per-bottle resource name patterns (mirroring prepare.py).
def _agent_container_name(slug: str) -> str:
return f"bot-bottle-{slug}"
def _per_bottle_container_names(slug: str) -> list[str]:
"""All container names that belong to this bottle. Missing
containers are silently skipped by the teardown helper, so it's
fine to include names that don't exist for a given bottle."""
return [
_agent_container_name(slug),
sidecar_bundle_container_name(slug),
]
def _per_bottle_network_names(slug: str) -> list[str]:
return [
f"bot-bottle-net-{slug}",
f"bot-bottle-egress-{slug}",
]
class CapabilityApplyError(RuntimeError):
"""Raised when the apply fails in a way that should keep the
proposal pending (so the operator can retry). Best-effort
failures (transcript snapshot, git push) do not raise — they
just log and proceed."""
# --- Public helpers --------------------------------------------------------
def fetch_current_dockerfile(slug: str) -> str:
"""Return the Dockerfile content the next `cli.py start <agent>`
would use for this bottle. If a per-bottle override exists, that
one; otherwise the repo's Dockerfile.
Used by the operator-edit verb to show the current source of
truth, and by apply_capability_change for the before-diff."""
override = per_bottle_dockerfile(slug)
if override is not None:
return override
repo_dockerfile = _repo_dockerfile_path()
if repo_dockerfile.is_file():
return repo_dockerfile.read_text()
raise CapabilityApplyError(
f"no per-bottle Dockerfile for {slug} and no repo Dockerfile at "
f"{repo_dockerfile}"
)
def apply_capability_change(slug: str, new_dockerfile: str) -> tuple[str, str]:
"""End-to-end capability-block remediation. See module docstring
for the sequence. Returns (before, after) Dockerfile content."""
if not new_dockerfile.strip():
raise CapabilityApplyError("proposed Dockerfile is empty")
before = fetch_current_dockerfile(slug)
snapshot_transcript(slug)
_push_working_tree(slug)
write_per_bottle_dockerfile(slug, new_dockerfile)
# Set the preserve marker BEFORE teardown so cli.py's session-end
# cleanup sees it and keeps the state dir intact for the
# operator's `cli.py resume <identity>`. Without the marker the
# state dir would be deleted as part of normal session end.
mark_preserved(slug)
_teardown_bottle(slug)
return before, new_dockerfile
# --- Internals -------------------------------------------------------------
def _repo_dockerfile_path() -> Path:
"""Path to the repo's Claude Dockerfile (one dir above this module's
package root). Resolved at call time so the path is correct
regardless of where this module is imported from."""
# bot_bottle/backend/docker/capability_apply.py -> repo root
return Path(__file__).resolve().parent.parent.parent.parent / "Dockerfile.claude"
def snapshot_transcript(slug: str) -> None:
"""`docker cp` /home/node/.claude out of the agent container into
~/.bot-bottle/state/<slug>/transcript/. Best-effort: missing
container, missing dir, or cp error all log a warning and return.
The transcript is what `claude --resume` reads to pick up where
the agent left off.
Called from two places:
- capability-apply, before tearing the bottle down.
- cli.py's session-end path, before the launch context closes,
so a crash or normal exit also leaves a transcript on disk
(deleted along with the state dir on clean exit, kept on
crash or capability-block per the preserve marker)."""
container = _agent_container_name(slug)
dest = transcript_snapshot_dir(slug)
if dest.exists():
# Remove any prior snapshot so the new one is a clean copy.
shutil.rmtree(dest, ignore_errors=True)
dest.parent.mkdir(parents=True, exist_ok=True)
r = subprocess.run(
["docker", "cp", f"{container}:{_AGENT_TRANSCRIPT_IN_CONTAINER}", str(dest)],
capture_output=True, text=True, check=False,
)
if r.returncode != 0:
warn(
f"transcript snapshot skipped "
f"({(r.stderr or '').strip() or 'no transcript dir in container?'})"
)
return
info(f"transcript snapshotted to {dest}")
def _push_working_tree(slug: str) -> None:
"""`docker exec <agent> git push` from /home/node/workspace.
Best-effort: not-a-git-repo, no upstream, nothing-to-push, no
network all log a warning and return. The replacement bottle
will pick up whatever's actually upstream."""
container = _agent_container_name(slug)
r = subprocess.run(
[
"docker", "exec", container, "sh", "-c",
f"cd {_AGENT_WORKSPACE_IN_CONTAINER} && "
f"git rev-parse --is-inside-work-tree >/dev/null 2>&1 && "
f"git push origin HEAD 2>&1 || true",
],
capture_output=True, text=True, check=False,
)
if r.returncode != 0:
warn(
f"capability-apply: git push skipped "
f"({(r.stderr or '').strip() or 'docker exec failed'})"
)
return
output = (r.stdout or "").strip()
if output:
info(f"capability-apply: git push: {output}")
else:
info("capability-apply: git push ran (no output — likely not a git workspace)")
def _teardown_bottle(slug: str) -> None:
"""Force-remove all per-bottle docker resources. Idempotent —
`docker rm -f` / `docker network rm` silently ignore missing
names, so this can be called even mid-rebuild."""
info(f"capability-apply: tearing down bottle {slug}")
for name in _per_bottle_container_names(slug):
subprocess.run(
["docker", "rm", "-f", name],
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False,
)
for net in _per_bottle_network_names(slug):
subprocess.run(
["docker", "network", "rm", net],
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False,
)
__all__ = [
"CapabilityApplyError",
"apply_capability_change",
"fetch_current_dockerfile",
"snapshot_transcript",
]
+180
View File
@@ -0,0 +1,180 @@
"""Cleanup for the Docker bottle backend.
PRD 0018 chunk 4: cleanup is centered on `docker compose ls`.
Pre-compose code paths could leave bare containers / networks
without a compose project; those still show up via the prefix
scan, just as a fallback bucket alongside the project list.
`prepare_cleanup` enumerates:
- Live compose projects whose name starts with `bot-bottle-`.
- `bot-bottle-*` containers that aren't part of any compose
project (legacy orphans).
- `bot-bottle-*` networks that aren't tied to a compose
project (legacy orphans; compose-managed networks come down
with `compose down --volumes` and don't appear here).
- State dirs under ~/.bot-bottle/state/<identity>/ with no
live compose project AND no `.preserve` marker.
`cleanup` removes everything in the plan.
Active-agent enumeration lives in `backend/docker/enumerate.py`
(mirror of `backend/smolmachines/enumerate.py`).
"""
from __future__ import annotations
import shutil
import subprocess
from ... import supervise as _supervise
from ...log import info, warn
from . import util as docker_mod
from .bottle_cleanup_plan import DockerBottleCleanupPlan
from .bottle_state import bottle_state_dir, is_preserved
from .compose import COMPOSE_PROJECT_PREFIX, list_compose_projects
def _list_prefixed_containers() -> list[str]:
"""All bot-bottle-prefixed containers, running or stopped."""
result = subprocess.run(
["docker", "ps", "-a",
"--filter", f"name=^{COMPOSE_PROJECT_PREFIX}",
"--format", "{{.Names}}\t{{.Label \"com.docker.compose.project\"}}"],
capture_output=True, text=True, check=False,
)
if result.returncode != 0:
warn(f"docker ps failed: {result.stderr.strip()}")
return []
out: list[str] = []
for line in (result.stdout or "").splitlines():
if not line:
continue
name, _, project = line.partition("\t")
# Stray = no compose label. Compose-managed containers carry
# `com.docker.compose.project=<name>`; we'll reap those via
# `compose down`, not via container rm.
if not project:
out.append(name)
return sorted(set(out))
def _list_prefixed_networks() -> list[str]:
"""All bot-bottle-prefixed networks not currently attached
to a compose project. Compose-managed networks have a
`com.docker.compose.project` label; bare ones (from pre-compose
code paths) don't."""
result = subprocess.run(
["docker", "network", "ls",
"--filter", f"name={COMPOSE_PROJECT_PREFIX}",
"--format", "{{.Name}}\t{{.Label \"com.docker.compose.project\"}}"],
capture_output=True, text=True, check=False,
)
if result.returncode != 0:
warn(f"docker network ls failed: {result.stderr.strip()}")
return []
out: list[str] = []
for line in (result.stdout or "").splitlines():
if not line:
continue
name, _, project = line.partition("\t")
if not project:
out.append(name)
return sorted(set(out))
def _list_orphan_state_dirs(
live_projects: set[str], protected_identities: set[str],
) -> list[str]:
"""State identities whose compose project isn't running and
that don't have a `.preserve` marker. `.preserve` means the
user (or an auto-preserve-on-crash) wants the state kept for
`resume`.
`protected_identities` is the set of slugs that are live in
ANY backend — used so this docker-side check doesn't reap a
running smolmachines bottle's state dir (the layout is shared
across both backends)."""
state_root = _supervise.bot_bottle_root() / "state"
if not state_root.is_dir():
return []
orphans: list[str] = []
for child in sorted(state_root.iterdir()):
if not child.is_dir():
continue
identity = child.name
project = f"{COMPOSE_PROJECT_PREFIX}{identity}"
if project in live_projects:
continue
if identity in protected_identities:
continue
if is_preserved(identity):
continue
orphans.append(identity)
return orphans
def prepare_cleanup() -> DockerBottleCleanupPlan:
"""Enumerate everything cleanup will touch. No removals.
Pulls the union of live identities across backends via
`enumerate_active_agents()` so the orphan-state-dir bucket
doesn't include slugs whose smolmachines VM is still up."""
docker_mod.require_docker()
projects = list_compose_projects()
project_set = set(projects)
# Late import to avoid a circular at module-load time —
# the backend package's __init__ imports this module.
from .. import enumerate_active_agents
protected = {a.slug for a in enumerate_active_agents()}
return DockerBottleCleanupPlan(
projects=tuple(projects),
stray_containers=tuple(_list_prefixed_containers()),
stray_networks=tuple(_list_prefixed_networks()),
orphan_state_dirs=tuple(
_list_orphan_state_dirs(project_set, protected),
),
)
def cleanup(plan: DockerBottleCleanupPlan) -> None:
"""Remove everything in the plan. Projects first (whose `compose
down` reaps their containers + networks atomically), then stray
legacy resources, then orphan state dirs."""
for project in plan.projects:
info(f"docker compose down ({project})")
result = subprocess.run(
["docker", "compose", "-p", project, "down", "--volumes"],
capture_output=True, text=True, check=False,
)
if result.returncode != 0:
warn(
f"compose down failed for {project}: "
f"{result.stderr.strip()}"
)
for name in plan.stray_containers:
info(f"removing stray container {name}")
subprocess.run(
["docker", "rm", "-f", name],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
)
for name in plan.stray_networks:
info(f"removing stray network {name}")
subprocess.run(
["docker", "network", "rm", name],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
)
for identity in plan.orphan_state_dirs:
path = bottle_state_dir(identity)
info(f"removing orphan state dir {path}")
try:
shutil.rmtree(path, ignore_errors=True)
except OSError as e:
warn(f"failed to remove {path}: {e}")
+528
View File
@@ -0,0 +1,528 @@
"""Compose-spec rendering for a Docker bottle (PRD 0018, chunk 1).
`bottle_plan_to_compose(plan)` returns a Compose v2 spec dict
describing the per-bottle container topology — one project per
bottle instance, services for the agent + every applicable sidecar,
two networks, no named volumes.
Pure function. No I/O, no subprocess. Expects every launch-time
field (network names, CA host paths, etc.) on the plan's inner
plans to be populated; chunks 2+3 own that ordering. Chunk 1 just
encodes the translation so it can be unit-tested in isolation.
Conditional services follow the plan content (matches the
SDK-call branching in `launch.py` today):
- pipelock + agent: always.
- git-gate: iff plan.git_gate_plan.upstreams.
- egress: iff plan.egress_plan.routes.
- supervise: iff plan.supervise_plan is not None.
Naming:
- Compose project: `bot-bottle-<slug>`.
- Service names (inside the file): `agent`, `pipelock`,
`egress`, `git-gate`, `supervise`.
- `container_name:` matches today's pattern
(`bot-bottle-<service>-<slug>`) so dashboard/cleanup discovery
via the prefix scan keeps working through the transition.
- Network aliases preserve the current dial-by-shortname pattern
for `egress` / `supervise`, and add the long container-name as
an internal-network alias for `pipelock` / `git-gate` so any
caller still referencing the long name resolves.
Sidecars that are built (egress, git-gate, supervise) get a
compose `build:` block pointing at the repo Dockerfile; the
`image:` tag is set explicitly so cached images on the daemon
aren't rebuilt on every up.
"""
from __future__ import annotations
import json
import subprocess
import sys
from pathlib import Path
from typing import Any
from ...egress import (
EGRESS_HOSTNAME,
EGRESS_ROUTES_IN_CONTAINER,
)
from ...git_gate import GIT_GATE_HOSTNAME, git_gate_aggregate_extra_hosts
from ...log import die, warn
from ...pipelock import PIPELOCK_HOSTNAME
from ...supervise import (
CURRENT_CONFIG_DIR_IN_AGENT,
QUEUE_DIR_IN_CONTAINER,
SUPERVISE_HOSTNAME,
SUPERVISE_PORT,
)
from ...util import expand_tilde
from .bottle_plan import DockerBottlePlan
from .egress import (
EGRESS_CA_IN_CONTAINER,
EGRESS_PIPELOCK_CA_IN_CONTAINER,
)
from .git_gate import (
GIT_GATE_ACCESS_HOOK_IN_CONTAINER,
GIT_GATE_CREDS_DIR_IN_CONTAINER,
GIT_GATE_ENTRYPOINT_IN_CONTAINER,
GIT_GATE_HOOK_IN_CONTAINER,
)
from .pipelock import (
PIPELOCK_CA_CERT_IN_CONTAINER,
PIPELOCK_CA_KEY_IN_CONTAINER,
PIPELOCK_PORT,
)
from .provision.ca import AGENT_CA_BUNDLE, AGENT_CA_PATH
from .sidecar_bundle import (
SIDECAR_BUNDLE_DOCKERFILE,
SIDECAR_BUNDLE_IMAGE,
sidecar_bundle_container_name,
)
# Repo root, used as the build context for the bundle Dockerfile.
_REPO_DIR = str(Path(__file__).resolve().parent.parent.parent.parent)
def bottle_plan_to_compose(plan: DockerBottlePlan) -> dict[str, Any]:
"""Render a Compose v2 spec dict from a fully-resolved
DockerBottlePlan.
The plan must have its inner plans (`proxy_plan`,
`git_gate_plan`, `egress_plan`, `supervise_plan`) populated
with launch-time fields — network names, CA host paths,
pipelock_proxy_url. The renderer doesn't validate; callers
feed it a fully-resolved plan or get an incomplete compose
spec back.
"""
project = f"bot-bottle-{plan.slug}"
services: dict[str, Any] = {
"sidecars": _sidecar_bundle_service(plan),
"agent": _agent_service(plan),
}
return {
"name": project,
"services": services,
"networks": _networks(plan),
}
def _networks(plan: DockerBottlePlan) -> dict[str, Any]:
"""Compose-managed networks with explicit `name:` matching the
existing slug-suffixed convention. Compose creates them on `up`
and destroys them on `down`. The internal one is `--internal`
(no default gateway); the egress one is a normal user-defined
bridge."""
return {
"internal": {
"name": plan.proxy_plan.internal_network,
"internal": True,
},
"egress": {
"name": plan.proxy_plan.egress_network,
},
}
def _bind(host: str | Path, target: str, *, read_only: bool = True) -> dict[str, Any]:
"""One bind-mount entry in the long-form `volumes:` shape.
Long form is preferred over `host:target:ro` strings because
it's easier to inspect in tests and survives whitespace in
host paths."""
return {
"type": "bind",
"source": str(host),
"target": target,
"read_only": read_only,
}
def _sidecar_bundle_service(plan: DockerBottlePlan) -> dict[str, Any]:
"""The `sidecars` service: one container per bottle, bundle
image, all four daemons under a Python init supervisor.
Mechanics:
- Daemon subset narrows via `BOT_BOTTLE_SIDECAR_DAEMONS`
env. pipelock is always present; egress / git-gate /
supervise are conditional on the plan.
- Volumes are the union of the four daemons' bind-mounts,
preserving the same in-container paths so each daemon
finds its config / hooks / CA where it expects.
- Environment is the union of *daemon-private* env vars
(EGRESS_UPSTREAM_PROXY, SUPERVISE_BOTTLE_SLUG, etc).
HTTPS_PROXY is NOT propagated here — see the comment in
egress_entrypoint.sh; setting it at the container level
would route git-gate's git fetches through pipelock,
which is wrong.
- Network aliases register every legacy short/long
hostname (pipelock, egress, git-gate, supervise plus
their `bot-bottle-<service>-<slug>` long forms) so
the agent's HTTPS_PROXY URL and any other inter-service
reference resolves to the bundle.
"""
daemons: list[str] = ["egress", "pipelock"]
if plan.git_gate_plan.upstreams:
daemons.append("git-gate")
if plan.supervise_plan is not None:
daemons.append("supervise")
env: list[str] = [f"BOT_BOTTLE_SIDECAR_DAEMONS={','.join(daemons)}"]
volumes: list[dict[str, Any]] = []
# --- pipelock ----------------------------------------------------
pp = plan.proxy_plan
volumes += [
_bind(pp.yaml_path, "/etc/pipelock.yaml"),
_bind(pp.ca_cert_host_path, PIPELOCK_CA_CERT_IN_CONTAINER),
_bind(pp.ca_key_host_path, PIPELOCK_CA_KEY_IN_CONTAINER),
]
# --- egress (always part of the bundle; the EGRESS_UPSTREAM_*
# env vars + ca bind-mounts are needed iff routes exist; when
# the bottle has no routes the egress daemon falls back to its
# `regular@9099` mode and is unused) -----------------------------
ep = plan.egress_plan
if ep.routes:
env.append(f"EGRESS_UPSTREAM_PROXY={ep.pipelock_proxy_url}")
env.append(f"EGRESS_UPSTREAM_CA={EGRESS_PIPELOCK_CA_IN_CONTAINER}")
volumes += [
_bind(ep.routes_path, EGRESS_ROUTES_IN_CONTAINER),
_bind(ep.mitmproxy_ca_host_path, EGRESS_CA_IN_CONTAINER),
_bind(ep.pipelock_ca_host_path, EGRESS_PIPELOCK_CA_IN_CONTAINER),
]
for token_env in sorted(ep.token_env_map.keys()):
env.append(token_env)
# --- git-gate ----------------------------------------------------
extra_hosts: list[str] = []
gp = plan.git_gate_plan
if gp.upstreams:
volumes += [
_bind(gp.entrypoint_script, GIT_GATE_ENTRYPOINT_IN_CONTAINER),
_bind(gp.hook_script, GIT_GATE_HOOK_IN_CONTAINER),
_bind(gp.access_hook_script, GIT_GATE_ACCESS_HOOK_IN_CONTAINER),
]
for u in gp.upstreams:
keypath = expand_tilde(u.identity_file)
volumes.append(_bind(
keypath,
f"{GIT_GATE_CREDS_DIR_IN_CONTAINER}/{u.name}-key",
))
extra_map = git_gate_aggregate_extra_hosts(gp.upstreams)
extra_hosts = [f"{host}:{ip}" for host, ip in sorted(extra_map.items())]
# --- supervise ---------------------------------------------------
sp = plan.supervise_plan
if sp is not None:
env += [
f"SUPERVISE_BOTTLE_SLUG={plan.slug}",
f"SUPERVISE_QUEUE_DIR={QUEUE_DIR_IN_CONTAINER}",
f"SUPERVISE_PORT={SUPERVISE_PORT}",
]
volumes.append({
"type": "bind",
"source": str(sp.queue_dir),
"target": QUEUE_DIR_IN_CONTAINER,
"read_only": False,
})
# Internal-network aliases: the agent reaches each daemon through
# its short name (pipelock / egress / git-gate / supervise) which
# the bundle answers as if it were the daemon itself.
internal_aliases = [
PIPELOCK_HOSTNAME,
EGRESS_HOSTNAME,
]
if gp.upstreams:
internal_aliases.append(GIT_GATE_HOSTNAME)
if sp is not None:
internal_aliases.append(SUPERVISE_HOSTNAME)
service: dict[str, Any] = {
"image": SIDECAR_BUNDLE_IMAGE,
"build": {
"context": _REPO_DIR,
"dockerfile": SIDECAR_BUNDLE_DOCKERFILE,
},
"container_name": sidecar_bundle_container_name(plan.slug),
"networks": {
"internal": {"aliases": internal_aliases},
"egress": None,
},
"environment": env,
"volumes": volumes,
}
if extra_hosts:
service["extra_hosts"] = extra_hosts
return service
def _agent_service(plan: DockerBottlePlan) -> dict[str, Any]:
"""Agent container. Runs `sleep infinity`; claude is `docker
exec -it`'d into it later. No TTY at the container level —
interactivity is per-exec. HTTP_PROXY/HTTPS_PROXY point at the
egress short-alias when an egress is declared, otherwise
straight at pipelock's container name. CA trust trio matches
the existing launch.py wiring."""
proxy_url = _agent_proxy_url(plan)
no_proxy = _agent_no_proxy(plan)
env: list[str] = [
f"HTTPS_PROXY={proxy_url}",
f"HTTP_PROXY={proxy_url}",
f"https_proxy={proxy_url}",
f"http_proxy={proxy_url}",
f"NO_PROXY={no_proxy}",
f"no_proxy={no_proxy}",
f"NODE_EXTRA_CA_CERTS={AGENT_CA_PATH}",
f"SSL_CERT_FILE={AGENT_CA_BUNDLE}",
f"REQUESTS_CA_BUNDLE={AGENT_CA_BUNDLE}",
]
# Forwarded vars (OAuth token, manifest host-interpolations):
# bare name → inherits from compose-up process env, value
# never lands on argv or in the compose file.
for name in sorted(plan.forwarded_env.keys()):
env.append(name)
service: dict[str, Any] = {
"image": plan.runtime_image,
"container_name": plan.container_name,
"command": ["sleep", "infinity"],
"networks": {"internal": None},
"environment": env,
}
if plan.use_runsc:
service["runtime"] = "runsc"
if plan.env_file and plan.env_file.exists() and plan.env_file.stat().st_size > 0:
service["env_file"] = [str(plan.env_file)]
volumes: list[dict[str, Any]] = []
if plan.supervise_plan is not None:
volumes.append(_bind(
plan.supervise_plan.current_config_dir,
CURRENT_CONFIG_DIR_IN_AGENT,
))
if volumes:
service["volumes"] = volumes
# The init supervisor inside the bundle owns intra-bundle
# daemon ordering, so the agent only waits for the bundle
# container itself.
service["depends_on"] = ["sidecars"]
return service
def _agent_proxy_url(plan: DockerBottlePlan) -> str:
"""Pick the agent's HTTP_PROXY. With egress declared, the agent
goes through egress (which in turn HTTPS_PROXYs to pipelock on
its outbound leg). Without egress, the agent talks straight to
pipelock."""
if plan.egress_plan.routes:
from .egress import EGRESS_PORT
return f"http://{EGRESS_HOSTNAME}:{EGRESS_PORT}"
return f"http://{PIPELOCK_HOSTNAME}:{PIPELOCK_PORT}"
def _agent_no_proxy(plan: DockerBottlePlan) -> str:
"""NO_PROXY for the agent. Matches the launch.py rules:
loopback always, supervise hostname when the supervise sidecar
is up (the MCP long-poll pattern needs to bypass pipelock's
idle timeout)."""
hosts = ["localhost", "127.0.0.1"]
if plan.supervise_plan is not None:
hosts.append(SUPERVISE_HOSTNAME)
return ",".join(hosts)
# --- Lifecycle helpers (PRD 0018 chunk 3) ----------------------------------
#
# The renderer above is pure. The helpers below own the I/O side:
# serialize the spec to disk, drive `docker compose up`, dump the
# merged log file on teardown, and `docker compose down` to clean up
# (networks are pre-created externally so `down` leaves them alone;
# the launch step removes them in its own teardown step).
COMPOSE_FILE_NAME = "docker-compose.yml"
COMPOSE_LOG_NAME = "compose.log"
COMPOSE_PROJECT_PREFIX = "bot-bottle-"
def compose_project_name(slug: str) -> str:
"""Stable mapping from slug → compose project. Matches the
`name:` field the renderer emits, so `docker compose ls`
enumeration and direct CLI invocations agree on the project
identifier."""
return f"{COMPOSE_PROJECT_PREFIX}{slug}"
def slug_from_compose_project(project: str) -> str:
"""Inverse of `compose_project_name`: strip the prefix to get
the underlying slug. Returns empty string if the project name
doesn't start with the expected prefix."""
if not project.startswith(COMPOSE_PROJECT_PREFIX):
return ""
return project[len(COMPOSE_PROJECT_PREFIX):]
def list_compose_projects(*, include_stopped: bool = True) -> list[str]:
"""All compose project names starting with `bot-bottle-`.
`include_stopped=True` (default) runs `docker compose ls --all`
so exited projects appear too; pass False to get only projects
with at least one running container.
Returns [] on docker daemon errors or malformed output rather
than raising — callers should treat the empty list as "no
projects discoverable", not "no projects exist"."""
argv = ["docker", "compose", "ls", "--format", "json"]
if include_stopped:
argv.insert(3, "--all")
try:
result = subprocess.run(
argv, capture_output=True, text=True, check=False,
)
except FileNotFoundError:
# docker binary not on PATH — same shape as a daemon-down
# error from the caller's POV: no projects discoverable.
return []
if result.returncode != 0:
warn(f"docker compose ls failed: {result.stderr.strip()}")
return []
try:
projects = json.loads(result.stdout or "[]")
except json.JSONDecodeError as e:
warn(f"docker compose ls returned malformed JSON: {e}")
return []
names: list[str] = []
for p in projects:
if not isinstance(p, dict):
continue
name = str(p.get("Name", ""))
if name.startswith(COMPOSE_PROJECT_PREFIX):
names.append(name)
return sorted(set(names))
def list_active_slugs(*, include_stopped: bool = False) -> list[str]:
"""Slugs (project name minus prefix) of currently-running
bottles. Used by the dashboard's operator-edit verbs to choose
a bottle to apply a config edit to."""
return sorted(
slug for slug in (
slug_from_compose_project(p)
for p in list_compose_projects(include_stopped=include_stopped)
) if slug
)
def compose_file_path(state_dir: Path) -> Path:
return state_dir / COMPOSE_FILE_NAME
def compose_log_path(state_dir: Path) -> Path:
return state_dir / COMPOSE_LOG_NAME
def write_compose_file(spec: dict[str, Any], path: Path) -> Path:
"""Serialize the compose dict to disk. JSON content with a
`.yml` filename — JSON is a strict subset of YAML 1.2 for the
constructs the renderer uses (mappings, lists, strings, bools,
nulls), and `docker compose -f file.yml` parses it as YAML.
Avoids a yaml dependency while keeping the file `cat`-readable.
"""
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(json.dumps(spec, indent=2, sort_keys=False) + "\n")
path.chmod(0o644)
return path
def _compose_argv(project: str, compose_file: Path, *cmd: str) -> list[str]:
return [
"docker", "compose",
"-p", project,
"-f", str(compose_file),
*cmd,
]
def compose_up(
project: str,
compose_file: Path,
*,
env: dict[str, str] | None = None,
) -> None:
"""`docker compose up -d` for the project. Env-inheritance is
via `env=` on the subprocess — every `environment: [NAME]` (bare
name) entry in the compose file resolves to whatever value
`NAME` has in `env` at exec time. Secrets never land on argv or
in the compose file."""
argv = _compose_argv(project, compose_file, "up", "-d")
result = subprocess.run(
argv, capture_output=True, text=True, env=env, check=False,
)
if result.returncode != 0:
sys.stderr.write(result.stderr)
die(f"docker compose up failed for project {project}")
def compose_dump_logs(project: str, compose_file: Path, output: Path) -> None:
"""Write the merged stdout/stderr of every service to `output`
using `docker compose logs --no-color --timestamps`. Best-effort
— failures here shouldn't block teardown. The interleaved single
file is what the user reads post-mortem; per-service tail still
works through `docker compose logs -f <service>` while the
project is up."""
output.parent.mkdir(parents=True, exist_ok=True)
argv = _compose_argv(project, compose_file, "logs", "--no-color", "--timestamps")
try:
with open(output, "wb") as f:
subprocess.run(
argv,
stdout=f,
stderr=subprocess.STDOUT,
check=False,
)
output.chmod(0o644)
except OSError as e:
warn(f"failed to write compose log to {output}: {e}")
def compose_down(project: str, compose_file: Path) -> None:
"""`docker compose down` for the project. External networks are
intentionally NOT removed by compose (`external: true` on the
networks block); the launch step's own teardown removes them
via `network_remove` so the per-bottle ephemeral subnet doesn't
accumulate."""
argv = _compose_argv(project, compose_file, "down")
result = subprocess.run(
argv, capture_output=True, text=True, check=False,
)
if result.returncode != 0:
warn(
f"docker compose down failed for project {project}: "
f"{result.stderr.strip()}"
)
__all__ = [
"COMPOSE_FILE_NAME",
"COMPOSE_LOG_NAME",
"COMPOSE_PROJECT_PREFIX",
"bottle_plan_to_compose",
"compose_down",
"compose_dump_logs",
"compose_file_path",
"compose_log_path",
"compose_project_name",
"compose_up",
"list_active_slugs",
"list_compose_projects",
"slug_from_compose_project",
"write_compose_file",
]
+123
View File
@@ -0,0 +1,123 @@
"""Docker-side egress helpers: port pin, in-container CA paths,
container naming, and the host-side mitmproxy CA mint. The
prepare-time routes-yaml rendering itself lives on the
platform-neutral `Egress` ABC — backends instantiate it directly.
The per-container `.start()` / `.stop()` lifecycle was removed in
PRD 0024 chunk 3; the sidecar bundle (PRD 0024) runs egress
under its python init supervisor."""
from __future__ import annotations
import os
import subprocess
from pathlib import Path
from ...log import die
# Listening port the egress daemon binds inside the bundle. The
# agent's HTTP_PROXY env var resolves to `http://egress:<port>`,
# and the bundle's network aliases route `egress` to itself.
EGRESS_PORT = int(os.environ.get("BOT_BOTTLE_EGRESS_PORT", "9099"))
# In-container path for mitmproxy's CA. The format is a single PEM
# file holding BOTH the cert and the private key, concatenated. The
# upstream-trust CA (pipelock's, so egress trusts the upstream
# leg) is a separate file because pipelock keeps a different CA on
# its end.
EGRESS_CA_IN_CONTAINER = "/home/mitmproxy/.mitmproxy/mitmproxy-ca.pem"
EGRESS_PIPELOCK_CA_IN_CONTAINER = (
"/home/mitmproxy/.mitmproxy/pipelock-ca.pem"
)
def egress_tls_init(stage_dir: Path) -> tuple[Path, Path]:
"""Mint the per-bottle egress MITM CA via host `openssl req`.
Returns `(mitmproxy_pem, cert_only_pem)`:
- `mitmproxy_pem` is the single-PEM concat (cert + key)
mitmproxy reads from `~/.mitmproxy/mitmproxy-ca.pem`.
- `cert_only_pem` is the cert alone — installed into the agent's
trust store by `provision_ca` so the agent trusts the bumped
CONNECT cert egress presents.
Why openssl req (not the pipelock binary's `tls init`):
pipelock's CA generator stamps a non-standard `Subject Key
Identifier` on the CA (random rather than SHA-1 of the pubkey).
mitmproxy computes the `Authority Key Identifier` on each leaf
it mints as SHA-1(issuer's pubkey). openssl's chain validator
uses the leaf's AKI to find the issuer cert by SKI; pipelock's
SKI doesn't match → openssl reports "unable to get local issuer
certificate" even though the CA is right there in the trust
store. openssl req's `subjectKeyIdentifier=hash` extension uses
SHA-1(pubkey), matching mitmproxy's computation.
Both files live under `<stage_dir>/egress-ca/` (mode 644 —
`docker cp` preserves the mode into the container, where the
mitmproxy user (uid 1000) reads them; the host stage_dir is
mode 700 so the private key isn't world-exposed)."""
work = stage_dir / "egress-ca"
work.mkdir(exist_ok=True)
key_path = work / "ca-key.pem"
cert_path = work / "ca.pem"
cnf_path = work / "ca.cnf"
# RSA-2048 — broad mitmproxy compatibility (its default leaf-cert
# config matches RSA CAs without surprise), and openssl req's
# default behavior here is exactly what we want.
keygen = subprocess.run(
["openssl", "genrsa", "-out", str(key_path), "2048"],
capture_output=True, text=True, check=False,
)
if keygen.returncode != 0:
die(f"egress ca keygen failed: {keygen.stderr.strip()}")
# Standalone private key — never docker-cp'd, never bind-mounted
# (mitmproxy reads the cert+key concat below). Lock to owner-
# only so it doesn't sit at the default umask on disk.
key_path.chmod(0o600)
# `subjectKeyIdentifier=hash` makes openssl compute the SKI as
# SHA-1(pubkey), matching how mitmproxy computes the AKI on the
# leaves it later mints. Without this, chain validation breaks
# despite the CA being present in the trust store.
cnf_path.write_text(
"[req]\n"
"distinguished_name = req_dn\n"
"prompt = no\n"
"x509_extensions = v3_ca\n"
"\n"
"[req_dn]\n"
"O = bot-bottle\n"
"CN = bot-bottle egress CA\n"
"\n"
"[v3_ca]\n"
"basicConstraints = critical, CA:TRUE\n"
"keyUsage = critical, keyCertSign, cRLSign\n"
"subjectKeyIdentifier = hash\n"
)
cnf_path.chmod(0o644)
req = subprocess.run(
["openssl", "req", "-x509", "-new", "-nodes",
"-key", str(key_path),
"-sha256", "-days", "365",
"-config", str(cnf_path),
"-out", str(cert_path)],
capture_output=True, text=True, check=False,
)
if req.returncode != 0:
die(f"egress ca cert generation failed: {req.stderr.strip()}")
cert_path.chmod(0o644)
# mitmproxy reads cert + key from a single concatenated PEM file.
# This file IS bind-mounted into the egress container (chunk 3+),
# where mitmproxy runs as uid 1000 — so the host file has to be
# world-readable for the container's user to read it through the
# mount. Owner-only mode on the parent dir (state/<slug>/, under
# ~/.bot-bottle which inherits ~'s 0o700) is what actually
# restricts who can reach this file on the host.
mitm = work / "mitmproxy-ca.pem"
mitm.write_bytes(cert_path.read_bytes() + key_path.read_bytes())
mitm.chmod(0o644)
return (mitm, cert_path)
+343
View File
@@ -0,0 +1,343 @@
"""Host-side helper to apply a routes.yaml change to a running
egress sidecar (PRD 0014 retargeted by PRD 0017 chunk 3).
Used by the supervise dashboard when the operator approves an
egress-block proposal (or runs the operator-initiated
`routes edit <bottle>` verb). Fetches the current routes.yaml via
`docker exec cat`, validates the new content, writes it into the
sidecar via `docker cp`, then `docker kill --signal HUP` to make
the addon reload without dropping connections.
Also mirrors the new route hosts into pipelock's hostname allowlist
so the downstream leg lets them through — egress enforces
the path-aware allowlist on the agent leg, pipelock enforces the
hostname allowlist + DLP body scan on the upstream leg, and a
host added to one must be in the other or the request 403s
somewhere along the chain.
Raises EgressApplyError on any failure — the dashboard
surfaces the message and keeps the proposal pending so the
operator can retry.
"""
from __future__ import annotations
import json
import re
import subprocess
from pathlib import Path
from ...egress import EGRESS_ROUTES_IN_CONTAINER
from ...egress_addon_core import load_routes
from ...yaml_subset import YamlSubsetError, parse_yaml_subset
from .bottle_state import egress_state_dir
from .sidecar_bundle import sidecar_bundle_container_name
from .pipelock_apply import (
PipelockApplyError,
apply_allowlist_change,
fetch_current_allowlist,
parse_allowlist_content,
render_allowlist_content,
)
def _render_routes_payload(routes_list: list[dict[str, object]]) -> str:
"""Render a list-of-dicts routes payload as YAML matching the
shape `egress_render_routes` produces. The apply path
round-trips current routes.yaml through this so the file the
sidecar sees stays in the YAML format the addon expects."""
if not routes_list:
return "routes: []\n"
lines: list[str] = ["routes:"]
for entry in routes_list:
host = str(entry.get("host", ""))
lines.append(f' - host: "{host}"')
auth_scheme = entry.get("auth_scheme")
token_env = entry.get("token_env")
if auth_scheme and token_env:
lines.append(f' auth_scheme: "{auth_scheme}"')
lines.append(f' token_env: "{token_env}"')
paths = entry.get("path_allowlist") or []
if paths:
lines.append(" path_allowlist:")
for p in paths:
lines.append(f' - "{p}"')
return "\n".join(lines) + "\n"
def _egress_routes_host_path(slug: str) -> Path:
"""The bind-mount source for the egress sidecar's routes.yaml.
Must match what egress.prepare wrote at chunk-2 paths."""
return egress_state_dir(slug) / "egress_routes.yaml"
class EgressApplyError(RuntimeError):
"""Raised when fetch / apply fails. Caller renders to the
operator; does not crash the dashboard."""
def fetch_current_routes(slug: str) -> str:
"""Read the live routes.yaml from the running egress sidecar
for `slug`. Returns the file content as a string. Raises
EgressApplyError if the sidecar isn't reachable or the read
fails."""
container = sidecar_bundle_container_name(slug)
r = subprocess.run(
["docker", "exec", container, "cat", EGRESS_ROUTES_IN_CONTAINER],
capture_output=True, text=True, check=False,
)
if r.returncode != 0:
raise EgressApplyError(
f"could not read routes.yaml from {container}: "
f"{(r.stderr or '').strip() or 'container not running?'}"
)
return r.stdout
def validate_routes_content(content: str) -> None:
"""Syntactic check before SIGHUP — the addon's reload also
validates, but failing here keeps the old routes live and gives
the operator a clearer error than the addon's stderr line."""
try:
load_routes(content)
except ValueError as e:
raise EgressApplyError(
f"proposed routes.yaml is not valid: {e}"
) from e
def _hosts_in_routes(content: str) -> list[str]:
"""Extract the host list from a routes.yaml content string.
Uses the addon's own parser so any host the addon will match on
also lands in pipelock's allowlist. Returns sorted+deduped."""
try:
routes = load_routes(content)
except ValueError as e:
raise EgressApplyError(
f"proposed routes.yaml is not valid: {e}"
) from e
return sorted({r.host for r in routes if r.host})
# Pipelock's allowlist parser accepts only literal hostnames:
# `[A-Za-z0-9_.-]+`. Anything else (wildcards, IPv6 literals,
# stray characters) is silently dropped from the mirror so the
# pipelock apply doesn't fail parse before the new yaml is even
# written. The dropped hosts stay on egress's route table —
# but the addon does exact-host match only, so they'll never
# match anything either. (Wildcard host matching was removed —
# see `match_route` in egress_addon_core for the rationale.)
_PIPELOCK_HOST_RE = re.compile(r"^[A-Za-z0-9_.-]+$")
def _pipelock_safe_hosts(hosts: list[str]) -> list[str]:
"""Drop any host pipelock's allowlist parser would reject.
Order preserved."""
return [h for h in hosts if _PIPELOCK_HOST_RE.match(h)]
def _mirror_hosts_to_pipelock(slug: str, hosts: list[str]) -> None:
"""Ensure every pipelock-compatible `hosts` entry is on
pipelock's allowlist. Fetches pipelock's current allowlist,
merges, re-applies. Hosts pipelock can't represent (wildcards,
etc.) are silently skipped — they stay live on egress
but aren't enforced at pipelock. No-op if every host is already
present (apply still restarts pipelock if any host is new).
Raises EgressApplyError on pipelock failures so the
caller's diff/audit reflects the half-state."""
safe_hosts = _pipelock_safe_hosts(hosts)
try:
current = fetch_current_allowlist(slug)
existing = parse_allowlist_content(current)
merged = sorted(set(existing) | set(safe_hosts))
if merged == sorted(existing):
return # nothing to add
apply_allowlist_change(slug, render_allowlist_content(merged))
except PipelockApplyError as e:
# Mirror runs BEFORE the egress write, so egress
# is unchanged on this failure path. Report it as a
# pipelock-side problem so the operator looks in the right
# place; their `pipelock edit` flow can repair manually.
raise EgressApplyError(
f"pipelock allowlist mirror failed (egress NOT "
f"updated): {e}. Fix pipelock's allowlist manually with "
f"`pipelock edit <bottle>` then retry the proposal."
) from e
def apply_routes_change(slug: str, new_content: str) -> tuple[str, str]:
"""Apply `new_content` to the egress sidecar for `slug`:
1. Fetch current routes.yaml (for the before-diff).
2. Validate the new content via the addon's own parser.
3. Mirror the route hosts onto pipelock's allowlist (so the
downstream hostname gate lets them through).
4. Write to a temp file, `docker cp` into the egress
sidecar.
5. `docker kill --signal HUP` so the addon reloads.
Order matters: pipelock first, then egress. If the
pipelock step fails, egress hasn't been touched and the
old routes stay live. If the egress step fails after
pipelock succeeded, pipelock has the host in its allowlist but
egress doesn't enforce it yet — harmless extra-permissive
state at pipelock, and a re-approval will land the egress
side.
Returns (before, after) where `after` == `new_content`. Raises
EgressApplyError on any step."""
container = sidecar_bundle_container_name(slug)
before = fetch_current_routes(slug)
validate_routes_content(new_content)
# Pipelock mirror first — if it fails, egress stays intact
# and the operator gets a clear error about the half-state.
_mirror_hosts_to_pipelock(slug, _hosts_in_routes(new_content))
# routes.yaml is bind-mounted into the egress container as a
# SINGLE FILE. Docker single-file bind mounts pin the source
# inode at mount time; write-temp-then-rename swaps the inode
# on the host, which leaves the container's mount pointing at
# the now-orphaned old inode (so the SIGHUP'd reload re-reads
# unchanged content). Write in-place instead. Lose file-level
# atomicity, but the apply path issues SIGHUP only AFTER the
# write returns, and the addon's `load_routes` raises
# `ValueError` on a partial read and keeps the previous
# in-memory routes — so a SIGHUP that hypothetically raced an
# in-flight write is non-disruptive.
target = _egress_routes_host_path(slug)
target.parent.mkdir(parents=True, exist_ok=True)
target.write_text(new_content)
# mitmproxy in the container reads through the bind mount as
# uid 1000; the host file has to be world-readable for that
# read to succeed (parent dir at 0o700 still restricts who
# can reach the file on the host). Routes content is not
# secret — tokens live in the container's environ — so 0o644
# is the right trade-off.
target.chmod(0o644)
sig = subprocess.run(
["docker", "kill", "--signal", "HUP", container],
capture_output=True, text=True, check=False,
)
if sig.returncode != 0:
raise EgressApplyError(
f"failed to SIGHUP {container}: "
f"{(sig.stderr or '').strip()}"
)
return before, new_content
def _merge_single_route(
current_yaml: str, new_route: dict[str, object],
) -> str:
"""Merge a single proposed route into the current routes.yaml
content, returning the merged YAML string.
Behavior:
- If `new_route['host']` is NOT in the current routes →
append the route.
- If the host IS already present → union the path_allowlist
entries (proposed existing). The existing `auth_scheme`
and `token_env` are preserved — agent-proposed auth changes
on an existing host are ignored, matching the tool's
documented semantics.
Round-trips the file through `yaml_subset` (the same parser
the addon uses), so the merged output is in the YAML format
the sidecar reads. Token VALUES never appear here; the routes
file carries only env-var slot NAMES."""
try:
cfg = parse_yaml_subset(current_yaml)
except YamlSubsetError as e:
raise EgressApplyError(
f"current routes.yaml is not valid YAML: {e}"
) from e
routes = cfg.get("routes")
if not isinstance(routes, list):
raise EgressApplyError(
"current routes.yaml: 'routes' is not a list"
)
new_host = str(new_route.get("host", "")).lower()
if not new_host:
raise EgressApplyError(
"proposed route is missing 'host'"
)
proposed_paths = list(new_route.get("path_allowlist") or [])
# Look for an existing entry with the same host (case-insensitive).
for entry in routes:
if not isinstance(entry, dict):
continue
if str(entry.get("host", "")).lower() == new_host:
# Merge path_allowlist: union proposed + existing, ordered
# by first-seen so existing paths stay in original order.
existing_paths: list[str] = list(entry.get("path_allowlist") or [])
seen = {p: None for p in existing_paths}
for p in proposed_paths:
seen.setdefault(p, None)
merged_paths = list(seen.keys())
if merged_paths:
entry["path_allowlist"] = merged_paths
# Preserve existing auth — tool description says agent-
# proposed auth on an existing host is ignored.
break
else:
# Host not present; build a new route entry from the
# proposed fields. Need to assign a token_env slot if
# `auth` was proposed (otherwise the addon's parser rejects
# a half-set auth pair). Slots: count existing slots, pick
# the next free index.
entry = {"host": new_route["host"]}
if proposed_paths:
entry["path_allowlist"] = proposed_paths
auth = new_route.get("auth")
if isinstance(auth, dict) and auth.get("scheme") and auth.get("token_ref"):
existing_slots = sorted({
str(r.get("token_env"))
for r in routes
if isinstance(r, dict) and r.get("token_env")
})
next_idx = len(existing_slots)
entry["auth_scheme"] = str(auth["scheme"])
entry["token_env"] = f"EGRESS_TOKEN_{next_idx}"
# NOTE: the addon reads token VALUES from its container's
# environ keyed by token_env. A newly-added auth route at
# runtime points at a slot that has no env value → the
# addon will 403 with "token env unset" until the operator
# arranges for the value to land in the container's env.
# Recording this here so the operator-facing diff carries
# the slot name they'll need to provision.
routes.append(entry)
return _render_routes_payload(routes)
def add_route(slug: str, proposed_route_json: str) -> tuple[str, str]:
"""Apply a single-route addition to the egress. Parses the
agent's proposed route, fetches the current routes file, merges,
and applies via `apply_routes_change`. Returns (before, after)
full-file content for the audit log."""
try:
proposed = json.loads(proposed_route_json)
except json.JSONDecodeError as e:
raise EgressApplyError(
f"proposed route is not valid JSON: {e}"
) from e
if not isinstance(proposed, dict):
raise EgressApplyError(
"proposed route must be a JSON object"
)
current = fetch_current_routes(slug)
merged = _merge_single_route(current, proposed)
return apply_routes_change(slug, merged)
__all__ = [
"EgressApplyError",
"add_route",
"apply_routes_change",
"fetch_current_routes",
"validate_routes_content",
]
+80
View File
@@ -0,0 +1,80 @@
"""Active-agent enumeration for the docker backend.
Mirrors `backend/smolmachines/enumerate.py`: returns
`ActiveAgent` records the CLI `list active` command and the
dashboard agents pane consume. Empty when docker isn't reachable
— gated by `has_backend('docker')` at the cross-backend caller
so this module trusts that docker is available when called.
The parser (`_parse_services_by_project`) is exposed for direct
unit testing; the docker `docker ps` invocation is in
`_query_services_by_project`."""
from __future__ import annotations
import subprocess
from .. import ActiveAgent
from .bottle_state import read_metadata
from .compose import compose_project_name, list_active_slugs
def enumerate_active() -> list[ActiveAgent]:
"""All currently-running docker-backed agents. Caller is
responsible for gating on `has_backend('docker')` if it
matters; if docker is missing the `docker ps` call below
returns an empty list silently."""
slugs = list_active_slugs(include_stopped=False)
if not slugs:
return []
services_by_project = _query_services_by_project()
out: list[ActiveAgent] = []
for slug in slugs:
project = compose_project_name(slug)
services = services_by_project.get(project, set())
metadata = read_metadata(slug)
out.append(ActiveAgent(
backend_name="docker",
slug=slug,
agent_name=metadata.agent_name if metadata else "?",
started_at=metadata.started_at if metadata else "",
services=tuple(sorted(services)),
))
return out
def _parse_services_by_project(stdout: str) -> dict[str, set[str]]:
"""Parse `docker ps` output formatted as
`<project-label>\\t<service-label>` (one line per container)
into a `{project: {service, ...}}` mapping. Pure function for
testing — the docker invocation is in `_query_services_by_project`."""
out: dict[str, set[str]] = {}
for line in stdout.splitlines():
project, _, service = line.partition("\t")
if not project or not service:
continue
out.setdefault(project, set()).add(service)
return out
def _query_services_by_project() -> dict[str, set[str]]:
"""One `docker ps` call → `{project: {service, ...}}`. Used
by the CLI's `list active` and the dashboard's agents pane —
one subprocess per refresh tick, not one per bottle."""
try:
r = subprocess.run(
[
"docker", "ps",
"--filter", "label=com.docker.compose.project",
"--format",
'{{.Label "com.docker.compose.project"}}'
"\t"
'{{.Label "com.docker.compose.service"}}',
],
capture_output=True, text=True, check=False,
)
except FileNotFoundError:
return {}
if r.returncode != 0:
return {}
return _parse_services_by_project(r.stdout or "")
+16
View File
@@ -0,0 +1,16 @@
"""Docker-side git-gate constants: in-container paths the renderer's
bind-mounts target + the listening port. The prepare-time entrypoint
/ hook render lives on the platform-neutral `GitGate` ABC backends
instantiate it directly. The git-gate daemon's container lifecycle
is owned by the sidecar bundle (PRD 0024)."""
from __future__ import annotations
GIT_GATE_ENTRYPOINT_IN_CONTAINER = "/git-gate-entrypoint.sh"
GIT_GATE_HOOK_IN_CONTAINER = "/etc/git-gate/pre-receive"
GIT_GATE_ACCESS_HOOK_IN_CONTAINER = "/etc/git-gate/access-hook"
GIT_GATE_CREDS_DIR_IN_CONTAINER = "/git-gate/creds"
# git daemon's default listening port.
GIT_GATE_PORT = 9418
+218
View File
@@ -0,0 +1,218 @@
"""Launch step for the Docker bottle backend.
PRD 0018 chunk 3: each instance is one `docker compose` project.
The flow is:
1. Build the agent's base + derived image (compose builds the
sidecar images via the `build:` directive on first up).
2. Pre-create the per-bottle networks. We do this outside compose
so we can inspect the assigned internal CIDR and embed it in
pipelock's yaml (compose's `external: true` lets the compose
file reference these pre-existing networks).
3. Mint the per-bottle CAs (chunk 2 writes them under
state/<slug>/{pipelock,egress}/).
4. Re-render pipelock yaml with the now-known internal CIDR so
the SSRF allowlist exempts the bottle's own subnet.
5. Populate the inner plans with launch-time fields so the
renderer can read network names, CA paths, pipelock URL.
6. Render the compose spec, write it to
state/<slug>/docker-compose.yml, write metadata.json.
7. `docker compose up -d` (token + OAuth values flow into the
compose subprocess env so `environment: [NAME]` bare-name
entries inherit without rendering values into the file).
8. Provision (CA install, prompt copy, skills, git, supervise
config) unchanged, uses `docker exec`.
9. Yield a DockerBottle handle. `exec_claude` runs claude via
`docker exec -it` exactly like the pre-compose world.
Teardown (ExitStack callbacks fire in reverse):
- Dump `docker compose logs --no-color --timestamps` to
state/<slug>/compose.log (best-effort).
- `docker compose down` removes the project's containers (not the
external networks).
- `network_remove` deletes the two networks we pre-created.
"""
from __future__ import annotations
import dataclasses
import os
from contextlib import ExitStack, contextmanager
from pathlib import Path
from typing import Callable, Generator
from ...egress import egress_resolve_token_values
from ...log import info
from . import network as network_mod
from . import util as docker_mod
from .bottle import DockerBottle
from .bottle_plan import DockerBottlePlan
from .bottle_state import (
bottle_state_dir,
egress_state_dir,
pipelock_state_dir,
)
from .compose import (
bottle_plan_to_compose,
compose_down,
compose_dump_logs,
compose_file_path,
compose_log_path,
compose_project_name,
compose_up,
write_compose_file,
)
from .egress import egress_tls_init
from .pipelock import (
BUNDLE_LOCAL_PIPELOCK_URL,
pipelock_tls_init,
)
# Where the repo root lives, for `docker build` context. Computed once.
_REPO_DIR = str(Path(__file__).resolve().parent.parent.parent.parent)
@contextmanager
def launch(
plan: DockerBottlePlan,
*,
provision: Callable[[DockerBottlePlan, str], str | None],
) -> Generator[DockerBottle, None, None]:
"""Build, launch, and provision a Docker bottle via compose.
Teardown on exit."""
stack = ExitStack()
def teardown() -> None:
try:
stack.close()
except BaseException:
# Teardown must not raise; swallow so the caller's
# __exit__ path can still propagate the original error.
pass
try:
# Step 1: agent image build. Sidecar images get built lazily by
# `docker compose up` via the renderer's `build:` directives.
docker_mod.build_image(
plan.image, _REPO_DIR,
dockerfile=plan.dockerfile_path,
)
if plan.derived_image:
docker_mod.build_image_with_cwd(
plan.derived_image, plan.image, plan.spec.user_cwd
)
# Networks: compose-managed. The names are derived
# deterministically from the slug so the renderer can put
# them on the services and `compose up` creates them with
# those names. The empirical spike confirmed pipelock's
# SSRF guard only checks proxied-request destinations, not
# source IPs — so the bottle's own internal CIDR doesn't
# need to be in `ssrf.ip_allowlist`. Pre-create + CIDR
# introspection are gone; compose owns the network
# lifecycle.
internal_network = network_mod.network_name_for_slug(plan.slug)
egress_network = network_mod.network_egress_name_for_slug(plan.slug)
# Mint per-bottle CAs into state/<slug>/{pipelock,egress}/.
ca_cert_host, ca_key_host = pipelock_tls_init(pipelock_state_dir(plan.slug))
egress_ca_host, egress_ca_cert_only = egress_tls_init(
egress_state_dir(plan.slug),
)
# Populate launch-time fields on every inner plan so the
# renderer reads concrete network names, CA paths, and
# pipelock URL.
proxy_plan = dataclasses.replace(
plan.proxy_plan,
internal_network=internal_network,
internal_network_cidr="",
egress_network=egress_network,
ca_cert_host_path=ca_cert_host,
ca_key_host_path=ca_key_host,
)
git_gate_plan = plan.git_gate_plan
if git_gate_plan.upstreams:
git_gate_plan = dataclasses.replace(
git_gate_plan,
internal_network=internal_network,
egress_network=egress_network,
)
egress_plan = plan.egress_plan
if egress_plan.routes:
egress_plan = dataclasses.replace(
egress_plan,
internal_network=internal_network,
egress_network=egress_network,
mitmproxy_ca_host_path=egress_ca_host,
mitmproxy_ca_cert_only_host_path=egress_ca_cert_only,
pipelock_ca_host_path=ca_cert_host,
pipelock_proxy_url=BUNDLE_LOCAL_PIPELOCK_URL,
)
supervise_plan = plan.supervise_plan
if supervise_plan is not None:
supervise_plan = dataclasses.replace(
supervise_plan,
internal_network=internal_network,
)
plan = dataclasses.replace(
plan,
proxy_plan=proxy_plan,
git_gate_plan=git_gate_plan,
egress_plan=egress_plan,
supervise_plan=supervise_plan,
)
# Step 6: render + write the compose file. metadata.json
# was written at prepare time and already carries
# compose_project; nothing to update here.
state_dir = bottle_state_dir(plan.slug)
spec = bottle_plan_to_compose(plan)
compose_file = write_compose_file(spec, compose_file_path(state_dir))
project = compose_project_name(plan.slug)
# Step 7: compose up. Token values + the OAuth placeholder
# flow through subprocess env; the compose file holds only
# bare names for the secret-carrying entries.
token_values: dict[str, str] = {}
if plan.egress_plan.routes:
token_values = egress_resolve_token_values(
plan.egress_plan.token_env_map, dict(os.environ),
)
compose_env: dict[str, str] = {
**os.environ,
**plan.forwarded_env,
**token_values,
}
info(
f"docker compose up -d (project {project}, "
f"{len(spec['services'])} services)"
)
compose_up(project, compose_file, env=compose_env)
# Register teardown in reverse order: log dump first, then
# `compose down`. Networks come down last via callbacks
# registered in step 2.
stack.callback(compose_down, project, compose_file)
stack.callback(
compose_dump_logs, project, compose_file, compose_log_path(state_dir),
)
# Step 8: provision. Unchanged — uses `docker exec` against
# the agent container by its known name.
prompt_path = provision(plan, plan.container_name)
# Step 9: yield. exec_claude continues to use `docker exec -it`
# — the agent runs `sleep infinity` per the renderer's
# service spec.
yield DockerBottle(
plan.container_name,
teardown,
prompt_path,
agent_command=plan.agent_command,
agent_prompt_mode=plan.agent_prompt_mode,
)
finally:
teardown()
+133
View File
@@ -0,0 +1,133 @@
"""Docker network plumbing for the per-agent egress topology.
The agent container sits on a Docker `--internal` network (no default
gateway). Pipelock straddles that network and a per-agent user-defined
bridge for upstream egress. We deliberately do NOT use Docker's legacy
`bridge` network because only user-defined bridges run Docker's
embedded DNS resolver, which pipelock needs to resolve api.anthropic.com
and similar upstream hostnames.
Naming: bot-bottle-net-<slug> (internal),
bot-bottle-egress-<slug> (egress). Numeric suffix on conflict
(-2, -3, ..., capped at 100).
"""
from __future__ import annotations
import subprocess
from ...log import die, info, warn
def network_name_for_slug(slug: str) -> str:
return f"bot-bottle-net-{slug}"
def network_egress_name_for_slug(slug: str) -> str:
return f"bot-bottle-egress-{slug}"
def network_exists(name: str) -> bool:
"""Uses `docker network inspect`, not `docker network ls -f name=...`,
because the latter does substring matching."""
return (
subprocess.run(
["docker", "network", "inspect", name],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
).returncode
== 0
)
def _network_create_with_prefix(base: str, internal: bool) -> str:
"""Create a per-agent Docker network whose name is <base> (with
-2, -3, ... appended on conflict, capped at 100). Returns the
resolved name."""
name = base
suffix = 2
while network_exists(name):
name = f"{base}-{suffix}"
suffix += 1
if suffix > 100:
die(
f"could not find a free network name after {base}-99; "
f"clean up old networks with 'docker network rm <name>'"
)
kind = "internal" if internal else "bridge (egress)"
args = ["docker", "network", "create"]
if internal:
args.append("--internal")
args.append(name)
info(f"creating {kind} network {name}")
result = subprocess.run(args, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, check=False)
if result.returncode != 0:
flag = " --internal" if internal else ""
die(f"docker network create{flag} {name} failed")
return name
def network_create_internal(slug: str) -> str:
"""Create a Docker `--internal` network for the agent. Returns the
resolved name."""
return _network_create_with_prefix(network_name_for_slug(slug), internal=True)
def network_create_egress(slug: str) -> str:
"""Create a per-agent user-defined bridge (NOT the legacy `bridge`)
so the pipelock sidecar has working DNS for upstream hostnames."""
return _network_create_with_prefix(network_egress_name_for_slug(slug), internal=False)
def network_inspect_cidr(name: str) -> str:
"""Return the IPv4 CIDR Docker assigned to a user-defined network.
Used by pipelock's SSRF guard exception: the bottle's internal
network sits in RFC1918 space, so pipelock's `internal:` list
would block any agent request whose destination resolves there
including the cred-proxy sidecar's address. Adding the
network's CIDR to pipelock's `ssrf.ip_allowlist` lets traffic
targeted at the bottle's own sidecars through while pipelock
still body-scans and api_allowlist-gates as usual."""
result = subprocess.run(
["docker", "network", "inspect",
"--format", "{{range .IPAM.Config}}{{.Subnet}}{{end}}", name],
capture_output=True, text=True, check=False,
)
if result.returncode != 0:
die(f"docker network inspect {name} failed: {result.stderr.strip()}")
cidr = result.stdout.strip()
if not cidr:
die(f"network {name!r} has no IPAM subnet configured")
return cidr
def network_attach(network: str, container: str) -> None:
result = subprocess.run(
["docker", "network", "connect", network, container],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
)
if result.returncode != 0:
die(f"docker network connect {network} {container} failed")
def network_remove(name: str) -> bool:
"""Idempotent: a missing network is treated as success so this can
be called from a teardown trap. Returns True if removal succeeded
(or the network was already gone)."""
if not network_exists(name):
return True
result = subprocess.run(
["docker", "network", "rm", name],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
)
if result.returncode != 0:
warn(f"failed to remove network {name}; clean up with 'docker network rm {name}'")
return False
return True
+81
View File
@@ -0,0 +1,81 @@
"""Docker-side pipelock helpers: image pin, container naming, and
the one-shot `pipelock tls init` host-side CA mint. The
prepare-time YAML rendering itself lives on the platform-neutral
`PipelockProxy` ABC backends instantiate it directly.
The per-container `.start()` / `.stop()` lifecycle was deleted in
PRD 0024 chunk 3; compose-up owns the container lifecycle (PRD
0018) and the bundle path (PRD 0024) collapses pipelock + egress
+ git-gate + supervise into one container."""
from __future__ import annotations
import os
import subprocess
from pathlib import Path
from ...log import die
# Re-exported for the compose renderer + smolmachines launch step
# (they used to import these from this module before they moved to
# the platform-neutral pipelock module).
from ...pipelock import ( # noqa: F401
PIPELOCK_CA_CERT_IN_CONTAINER,
PIPELOCK_CA_KEY_IN_CONTAINER,
)
# Pipelock image, pinned by digest. The digest is the multi-arch image
# index for ghcr.io/luckypipewrench/pipelock:2.3.0.
PIPELOCK_IMAGE = os.environ.get(
"BOT_BOTTLE_PIPELOCK_IMAGE",
"ghcr.io/luckypipewrench/pipelock@sha256:3b1a39417b98406ddc5dc2d8fcb42865ddc0c68a43d355db55f0f8cb06bc6de9",
)
# Listening port for pipelock's forward proxy.
PIPELOCK_PORT = os.environ.get("BOT_BOTTLE_PIPELOCK_PORT", "8888")
# The URL egress dials for its upstream HTTPS_PROXY. egress and
# pipelock share the same container's network namespace inside the
# sidecar bundle, so loopback reaches pipelock directly — no docker
# DNS aliases involved.
BUNDLE_LOCAL_PIPELOCK_URL = f"http://127.0.0.1:{PIPELOCK_PORT}"
def pipelock_tls_init(stage_dir: Path) -> tuple[Path, Path]:
"""Generate a fresh per-bottle CA via a one-shot pipelock container.
Runs `pipelock tls init` against a host-mounted scratch dir, leaving
`ca.pem` (public cert, mode 600) and `ca-key.pem` (private key, mode
600) under `<stage_dir>/pipelock-ca/`. Returns the two host paths.
The image is pinned (same digest the running sidecar uses) so the
generated CA matches what the sidecar expects. Output is owned by
whatever UID the one-shot ran as; the compose renderer's
bind-mounts pin the files in place at runtime, so ownership
inside the running sidecar (root in pipelock's distroless image)
is independent."""
work = stage_dir / "pipelock-ca"
work.mkdir(exist_ok=True)
result = subprocess.run(
["docker", "run", "--rm",
"-v", f"{work}:/h",
"-e", "PIPELOCK_HOME=/h",
PIPELOCK_IMAGE, "tls", "init"],
capture_output=True,
text=True,
check=False,
)
if result.returncode != 0:
die(f"pipelock tls init failed: {result.stderr.strip()}")
cert = work / "ca.pem"
key = work / "ca-key.pem"
if not cert.is_file() or not key.is_file():
die(f"pipelock tls init did not produce ca files in {work}")
# Explicit perms in case a future pipelock release changes
# defaults. Pipelock runs as root in its distroless image and
# bind-mounts work with 0o600 (root reads everything); the key
# has no reason to be readable to anyone else on the host.
key.chmod(0o600)
cert.chmod(0o644)
return (cert, key)
+200
View File
@@ -0,0 +1,200 @@
"""pipelock_apply — host-side helper to apply an api_allowlist
change to a running pipelock sidecar (PRD 0015).
Used by the supervise dashboard when the operator approves a
pipelock-block proposal (or runs the operator-initiated `pipelock
edit <bottle>` verb). Fetches the current pipelock.yaml via `docker
exec`, parses it, swaps the api_allowlist with the proposed hosts,
re-renders, writes back via the bind-mount path, then signals the
bundle supervisor to restart the pipelock daemon (`docker kill
--signal USR1`) so
pipelock picks up the new config.
v1 uses restart, not SIGHUP pipelock has no in-process reload
hook and adding one is the "SIGHUP reload for pipelock" open
question in PRD 0015. Restart drops in-flight outbound calls; the
agent's HTTP client retries pick up against the restarted proxy.
"""
from __future__ import annotations
import os
import re
import subprocess
import tempfile
from pathlib import Path
from ...pipelock import pipelock_render_yaml
from ...yaml_subset import YamlSubsetError, parse_yaml_subset
from .bottle_state import pipelock_state_dir
from .sidecar_bundle import sidecar_bundle_container_name
def _pipelock_yaml_host_path(slug: str) -> Path:
"""The bind-mount source for the pipelock sidecar's
pipelock.yaml matches what pipelock.prepare wrote at chunk-2
paths."""
return pipelock_state_dir(slug) / "pipelock.yaml"
PIPELOCK_YAML_IN_CONTAINER = "/etc/pipelock.yaml"
# Allowlist proposals are one-hostname-per-line. Blank lines and
# `#`-prefixed comments are ignored. The character set matches the
# supervise sidecar's syntactic check on the agent's pipelock-block
# proposal (alphanumerics + dot/dash/underscore).
_HOST_OK = re.compile(r"^[A-Za-z0-9_.-]+$")
class PipelockApplyError(RuntimeError):
"""Raised when fetch / parse / apply fails. The dashboard renders
the message and keeps the proposal pending never crashes."""
def parse_allowlist_content(content: str) -> list[str]:
"""One hostname per line. Blanks and `#` comments are ignored.
Raises PipelockApplyError if a line has a disallowed character."""
hosts: list[str] = []
for i, raw_line in enumerate(content.splitlines(), start=1):
line = raw_line.strip()
if not line or line.startswith("#"):
continue
if not _HOST_OK.match(line):
raise PipelockApplyError(
f"allowlist line {i}: {line!r} has disallowed characters"
)
hosts.append(line)
return hosts
def render_allowlist_content(hosts: list[str]) -> str:
"""Hosts → one-per-line string (the operator-facing format)."""
if not hosts:
return ""
return "\n".join(hosts) + "\n"
def fetch_current_yaml(slug: str) -> str:
"""Read the live /etc/pipelock.yaml from the sidecar bundle.
Uses `docker cp` because pipelock inside the bundle is the
distroless pipelock binary with no shell, and `docker cp` is a
daemon-API tarball copy that works regardless of what's
available inside the container.
Raises PipelockApplyError if the read fails."""
container = sidecar_bundle_container_name(slug)
fd, tmp_path = tempfile.mkstemp(prefix="cb-pipelock-fetch.", suffix=".yaml")
os.close(fd)
try:
r = subprocess.run(
[
"docker", "cp",
f"{container}:{PIPELOCK_YAML_IN_CONTAINER}", tmp_path,
],
capture_output=True, text=True, check=False,
)
if r.returncode != 0:
raise PipelockApplyError(
f"could not fetch pipelock.yaml from {container}: "
f"{(r.stderr or '').strip() or 'container not running?'}"
)
return Path(tmp_path).read_text()
finally:
try:
Path(tmp_path).unlink()
except OSError:
pass
def fetch_current_allowlist(slug: str) -> str:
"""Fetch the live yaml, extract api_allowlist, render as one-per-
line the operator-facing format for the TUI / agent's
current-config mount."""
yaml = fetch_current_yaml(slug)
try:
cfg = parse_yaml_subset(yaml)
except YamlSubsetError as e:
raise PipelockApplyError(f"running pipelock yaml: {e}") from e
hosts = cfg.get("api_allowlist", [])
if not isinstance(hosts, list):
raise PipelockApplyError(
"running pipelock yaml: api_allowlist is not a list"
)
return render_allowlist_content([str(h) for h in hosts])
def apply_allowlist_change(
slug: str, new_allowlist_content: str,
) -> tuple[str, str]:
"""Apply `new_allowlist_content` to the sidecar bundle:
1. Parse the proposed hosts (one per line).
2. Fetch + parse current pipelock.yaml.
3. Replace api_allowlist with the proposed hosts; re-render.
4. Write the new yaml to the bind-mount source.
5. `docker kill --signal USR1 <bundle>` so the supervisor
restarts the pipelock daemon in place (leaving egress,
git-gate, and supervise running). Pipelock has no
in-process reload; the supervisor's per-daemon restart
keeps the agent's MCP socket alive — a whole-bundle
`docker restart` would bounce supervise too.
Returns (before, after) where both are one-per-line allowlist
strings (operator-facing format). Raises PipelockApplyError on
any failure; the sidecar's existing config stays in place until
the host write succeeds, and the SIGUSR1 is what makes it
live."""
new_hosts = parse_allowlist_content(new_allowlist_content)
container = sidecar_bundle_container_name(slug)
current_yaml = fetch_current_yaml(slug)
try:
cfg = parse_yaml_subset(current_yaml)
except YamlSubsetError as e:
raise PipelockApplyError(f"running pipelock yaml: {e}") from e
current_hosts = cfg.get("api_allowlist", [])
if not isinstance(current_hosts, list):
raise PipelockApplyError(
"running pipelock yaml: api_allowlist is not a list"
)
before = render_allowlist_content([str(h) for h in current_hosts])
after = render_allowlist_content(new_hosts)
cfg["api_allowlist"] = new_hosts
rendered = pipelock_render_yaml(cfg)
# pipelock.yaml is bind-mounted into the container as a SINGLE
# FILE — same Docker single-file inode issue as egress_apply:
# write-temp-then-rename swaps the host inode and leaves the
# container's mount pointing at the orphaned old one. Write
# in-place. The SIGUSR1 below makes the new content live
# (pipelock has no in-process reload, so the supervisor
# restarts the pipelock daemon in response).
target = _pipelock_yaml_host_path(slug)
target.parent.mkdir(parents=True, exist_ok=True)
target.write_text(rendered)
# pipelock runs as root in its distroless image — any mode is
# fine — but 0o600 matches what prepare wrote.
target.chmod(0o600)
restart = subprocess.run(
["docker", "kill", "--signal", "USR1", container],
capture_output=True, text=True, check=False,
)
if restart.returncode != 0:
raise PipelockApplyError(
f"failed to signal {container} for pipelock restart: "
f"{(restart.stderr or '').strip()}"
)
return before, after
__all__ = [
"PIPELOCK_YAML_IN_CONTAINER",
"PipelockApplyError",
"apply_allowlist_change",
"fetch_current_allowlist",
"fetch_current_yaml",
"parse_allowlist_content",
"render_allowlist_content",
]
+272
View File
@@ -0,0 +1,272 @@
"""Prepare step for the Docker bottle backend.
`resolve_plan` does all host-side resolution (image and container
names, env-file, prompt-file, proxy plan, runtime detection) and
returns a frozen DockerBottlePlan. No Docker resources are created;
the only side effects are scratch files under `stage_dir` and a probe
of `docker info`. Cross-backend host-side validation has already run
via the base class's `prepare` template before this is called.
"""
from __future__ import annotations
import os
from datetime import datetime, timezone
from pathlib import Path
from ...agent_provider import runtime_for
from ...egress import Egress
from ...env import ResolvedEnv, resolve_env
from ...git_gate import GitGate
from ...log import die
from ...pipelock import PipelockProxy
from ...supervise import Supervise
from .. import BottleSpec
from . import util as docker_mod
from .bottle_plan import DockerBottlePlan
from .bottle_state import (
BottleMetadata,
agent_state_dir,
bottle_identity,
clear_preserve_marker,
egress_state_dir,
git_gate_state_dir,
per_bottle_dockerfile,
per_bottle_dockerfile_path,
per_bottle_image_tag,
pipelock_state_dir,
supervise_state_dir,
write_metadata,
)
from .sidecar_bundle import sidecar_bundle_container_name
def resolve_plan(
spec: BottleSpec,
*,
stage_dir: Path,
) -> DockerBottlePlan:
"""Resolve Docker-specific names and write scratch files. Trusts
that the agent and its skills/git-gate keys are present
validation already ran in the base class."""
docker_mod.require_docker()
proxy = PipelockProxy()
git_gate = GitGate()
egress = Egress()
supervise = Supervise()
manifest = spec.manifest
agent = manifest.agents[spec.agent_name]
bottle = manifest.bottle_for(spec.agent_name)
provider = bottle.agent_provider
provider_runtime = runtime_for(provider.template)
# PRD 0016 follow-up: identity, not bare slug. A fresh `start`
# mints a random-suffixed identity (so parallel runs of the same
# agent in the same cwd don't collide on container/network
# names); a `resume` passes the recorded identity in via
# spec.identity to continue an existing bottle's state.
slug = spec.identity or bottle_identity(spec.agent_name)
# Record the launch metadata so `cli.py resume <identity>` can
# reconstruct the spec. Idempotent — re-writes on resume with a
# refreshed started_at.
write_metadata(BottleMetadata(
identity=slug,
agent_name=spec.agent_name,
cwd=spec.user_cwd if spec.copy_cwd else "",
copy_cwd=spec.copy_cwd,
started_at=datetime.now(timezone.utc).isoformat(),
compose_project=f"bot-bottle-{slug}",
))
# Clear any leftover preserve marker from a prior capability-block
# so this fresh launch can be cleaned up at session-end unless
# the agent triggers another capability-block.
clear_preserve_marker(slug)
# PRD 0016 capability-block: if a per-bottle Dockerfile has been
# written (via apply_capability_change), the base image becomes
# per_bottle_image_tag(slug) built from that file. --cwd still
# layers a derived image on top.
dockerfile_path = ""
if per_bottle_dockerfile(slug) is not None:
image_default = per_bottle_image_tag(slug)
dockerfile_path = str(per_bottle_dockerfile_path(slug))
elif provider.dockerfile:
image_default = f"bot-bottle-{provider.template}:{slug}"
dockerfile_path = _resolve_manifest_dockerfile(provider.dockerfile, spec)
elif provider_runtime.dockerfile:
image_default = provider_runtime.image
dockerfile_path = provider_runtime.dockerfile
else:
image_default = provider_runtime.image
image = os.environ.get("BOT_BOTTLE_IMAGE", image_default)
derived_image = ""
runtime_image = image
if spec.copy_cwd:
derived_image = os.environ.get(
"BOT_BOTTLE_DERIVED_IMAGE", f"bot-bottle-cwd:{slug}"
)
runtime_image = derived_image
default_container = f"bot-bottle-{slug}"
pinned_container = os.environ.get("BOT_BOTTLE_CONTAINER", "")
container_name_pinned = bool(pinned_container)
if container_name_pinned:
container_name = pinned_container
if docker_mod.container_exists(container_name):
die(
f"container '{container_name}' already exists "
f"(pinned via BOT_BOTTLE_CONTAINER). "
f"Remove it with 'docker rm -f {container_name}' or unset the override."
)
else:
container_name = ""
for candidate in docker_mod.container_name_candidates(default_container):
if not docker_mod.container_exists(candidate):
container_name = candidate
break
if not container_name:
die(
f"could not find a free container name after "
f"{default_container}-{docker_mod.MAX_CONTAINER_SUFFIX}; "
f"clean up old containers with 'docker rm -f <name>'"
)
# Probe the sidecar-bundle container name for an orphan from a
# previous run. Otherwise a stale bundle surfaces as a
# docker-create conflict deep inside launch() with no actionable
# hint; failing fast here points at the cleanup command.
bundle_name = sidecar_bundle_container_name(slug)
if docker_mod.container_exists(bundle_name):
die(
f"sidecar bundle container '{bundle_name}' already exists. "
f"This is an orphan from a previous run; clean it up with "
f"'./cli.py cleanup' (or 'docker rm -f {bundle_name}') and "
f"retry."
)
# PRD 0018 chunk 2: prepare-time scratch files live under
# ~/.bot-bottle/state/<slug>/<service>/ so chunk 3's compose
# bind-mounts can point at stable paths. The state subdirs are
# cleaned up by start.py's session-end teardown unless something
# explicitly preserves the state dir (capability-block, crash).
agent_dir = agent_state_dir(slug)
agent_dir.mkdir(parents=True, exist_ok=True)
env_file = agent_dir / "agent.env"
prompt_file = agent_dir / "prompt.txt"
prompt_file.write_text("")
prompt_file.chmod(0o600)
pipelock_dir = pipelock_state_dir(slug)
pipelock_dir.mkdir(parents=True, exist_ok=True)
proxy_plan = proxy.prepare(bottle, slug, pipelock_dir)
git_gate_dir = git_gate_state_dir(slug)
git_gate_dir.mkdir(parents=True, exist_ok=True)
git_gate_plan = git_gate.prepare(bottle, slug, git_gate_dir)
egress_dir = egress_state_dir(slug)
egress_dir.mkdir(parents=True, exist_ok=True)
egress_plan = egress.prepare(bottle, slug, egress_dir)
supervise_plan = None
if bottle.supervise:
# Current Dockerfile for the agent image. Read from the repo
# root; for `--cwd` derived images the base Dockerfile is what
# the agent should propose changes against (the derived layer
# is just a workspace copy).
# (routes.yaml + pipelock allowlist used to land here too but
# PRD 0017 chunk 3 moved them behind the
# `list-egress-routes` MCP tool so the agent gets live
# state rather than a launch-time snapshot.)
supervise_dockerfile_path = (
Path(dockerfile_path)
if dockerfile_path
else Path(__file__).resolve().parent.parent.parent.parent / "Dockerfile.claude"
)
dockerfile_content = (
supervise_dockerfile_path.read_text()
if supervise_dockerfile_path.is_file()
else ""
)
supervise_dir = supervise_state_dir(slug)
supervise_dir.mkdir(parents=True, exist_ok=True)
supervise_plan = supervise.prepare(
slug, supervise_dir,
dockerfile_content=dockerfile_content,
)
resolved = resolve_env(manifest, spec.agent_name)
# Everything that should reach the bottle by-name (so its value
# never lands on argv or in env_file) goes into one dict. Nothing
# mutates the host os.environ.
forwarded_env: dict[str, str] = dict(resolved.forwarded)
# When the bottle declares an egress route with the
# `claude_code_oauth` role marker, claude-code's outbound
# Authorization gets stripped + re-injected by egress. The
# agent's environ still needs *something* claude-code recognises
# as a credential or it refuses to start; ship a non-secret
# placeholder. The placeholder isn't any real token value, so
# leaking it would tell an attacker only that egress is in
# front. Manifest validation enforces singleton on this role.
has_provider_auth = any(
provider_runtime.auth_role in r.roles for r in egress_plan.routes
)
if has_provider_auth:
forwarded_env[provider_runtime.placeholder_env] = "egress-placeholder"
if provider.template == "claude" and has_provider_auth:
# Belt-and-braces: turn off telemetry endpoints (statsig,
# error reporting) that egress can't gate by auth.
forwarded_env.setdefault("CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC", "1")
forwarded_env.setdefault("DISABLE_ERROR_REPORTING", "1")
_write_env_file(resolved, env_file)
prompt_file.write_text(agent.prompt)
use_runsc = docker_mod.runsc_available()
return DockerBottlePlan(
spec=spec,
stage_dir=stage_dir,
slug=slug,
container_name=container_name,
container_name_pinned=container_name_pinned,
image=image,
derived_image=derived_image,
runtime_image=runtime_image,
dockerfile_path=dockerfile_path,
env_file=env_file,
forwarded_env=forwarded_env,
prompt_file=prompt_file,
proxy_plan=proxy_plan,
git_gate_plan=git_gate_plan,
egress_plan=egress_plan,
supervise_plan=supervise_plan,
use_runsc=use_runsc,
agent_command=provider_runtime.command,
agent_prompt_mode=provider_runtime.prompt_mode,
agent_provider_template=provider.template,
)
def _write_env_file(resolved: ResolvedEnv, env_file: Path) -> None:
"""Serialize the literal portion of a ResolvedEnv into docker's
`--env-file` syntax (NAME=VALUE per line, mode 600 since the file
may carry verbatim values from the manifest). Forwarded names ride
on the plan as a structured tuple instead."""
env_lines: list[str] = []
for name, value in resolved.literals.items():
if "\n" in value:
die(
f"env entry {name} (literal) contains a newline; "
f"docker --env-file cannot represent multi-line values."
)
env_lines.append(f"{name}={value}")
env_file.write_text("\n".join(env_lines) + ("\n" if env_lines else ""))
env_file.chmod(0o600)
def _resolve_manifest_dockerfile(path_value: str, spec: BottleSpec) -> str:
path = Path(os.path.expanduser(path_value))
if not path.is_absolute():
path = Path(spec.user_cwd) / path
return str(path)
@@ -0,0 +1,8 @@
"""Per-provisioner modules for the Docker backend.
Each module exports one top-level function:
provision_<thing>(plan: DockerBottlePlan, target: str) -> ...
`DockerBottleBackend.provision_*` methods delegate to these. The
abstract `BottleBackend.provision_*` surface is unchanged; this
subpackage exists only to keep `backend.py` from being a god-file."""
+103
View File
@@ -0,0 +1,103 @@
"""Install the per-bottle MITM CA into the agent container's trust
store.
Post-PRD-0017 the CA depends on the agent's HTTP_PROXY target:
- Bottle declares `egress.routes[]` agent's HTTP_PROXY
points at egress; the cert the agent must trust is the
one egress mints leaf certs with (the egress CA).
- No egress routes agent's HTTP_PROXY points straight at
pipelock; the cert the agent must trust is pipelock's CA (the
pre-cutover behavior).
By the time this provisioner runs, the corresponding `tls_init`
helper has generated the chosen CA under `plan.stage_dir`, and the
sidecar (pipelock or egress) is up referencing the
in-container CA paths.
Cert lands on Debian's standard source path
(`/usr/local/share/ca-certificates/`); `update-ca-certificates`
rebuilds `/etc/ssl/certs/ca-certificates.crt`, which is what curl,
Python `ssl`, and OpenSSL-based tools all read by default. The env
trio set on the agent's `docker run` covers Node
(`NODE_EXTRA_CA_CERTS`) and Python `requests` /
`SSL_CERT_FILE`-honoring libraries that don't load the system
bundle.
The fingerprint is computed via stdlib (`ssl.PEM_cert_to_DER_cert`
+ `hashlib.sha256`) and logged once to stderr. The private key
stays on the host (under `stage_dir`) until teardown wipes the
stage dir; nothing in the agent ever sees it."""
from __future__ import annotations
import hashlib
import ssl
import subprocess
from pathlib import Path
from ....log import info
from ..bottle_plan import DockerBottlePlan
# Debian-family path for sources that `update-ca-certificates` reads.
# Bundle path is what the command rebuilds and what every standard
# TLS consumer in the image reads.
AGENT_CA_PATH = "/usr/local/share/ca-certificates/bot-bottle-mitm-ca.crt"
AGENT_CA_BUNDLE = "/etc/ssl/certs/ca-certificates.crt"
def _select_ca_cert(plan: DockerBottlePlan) -> tuple[Path, str]:
"""Pick the CA cert (and a short label for the log line) that
matches the proxy the agent's HTTP_PROXY points at. Egress-proxy
wins when the bottle declares any routes (it sits in front of
pipelock); else pipelock."""
if plan.egress_plan.routes:
cert = plan.egress_plan.mitmproxy_ca_cert_only_host_path
if cert == Path() or not cert.is_file():
from ....log import die
die(
f"egress CA cert missing at {cert or '(empty)'}; "
f"launch must have called egress_tls_init and "
f"re-bound the plan before provision"
)
return cert, "egress"
cert = plan.proxy_plan.ca_cert_host_path
if not cert or not cert.is_file():
from ....log import die
die(
f"pipelock CA cert missing at {cert or '(empty)'}; "
f"launch must have called pipelock_tls_init and re-bound "
f"the plan before provision"
)
return cert, "pipelock"
def provision_ca(plan: DockerBottlePlan, target: str) -> None:
"""Copy the agent-facing CA cert into the agent, rebuild the
trust bundle, emit a one-line fingerprint log. Called from
`BottleBackend.provision` after the agent container is up."""
container = target
cert_host_path, label = _select_ca_cert(plan)
subprocess.run(
["docker", "cp", str(cert_host_path), f"{container}:{AGENT_CA_PATH}"],
stdout=subprocess.DEVNULL,
check=True,
)
subprocess.run(
["docker", "exec", "-u", "0", container, "chmod", "644", AGENT_CA_PATH],
stdout=subprocess.DEVNULL,
check=True,
)
subprocess.run(
["docker", "exec", "-u", "0", container, "update-ca-certificates"],
stdout=subprocess.DEVNULL,
check=True,
)
# Stdlib SHA-256 of the cert's DER bytes — the standard
# fingerprint form. Never the private key.
der = ssl.PEM_cert_to_DER_cert(cert_host_path.read_text())
fingerprint = hashlib.sha256(der).hexdigest()
info(f"{label} ca fingerprint: sha256:{fingerprint[:32]}...")
+121
View File
@@ -0,0 +1,121 @@
"""Git provisioning inside a running Docker bottle.
Three concerns, all about git in the agent:
1. If --cwd was passed AND the host cwd has a .git, copy that .git
into /home/node/workspace/.git so the agent operates on the
user's repo.
2. If the bottle declares `git` entries (PRD 0008), write a
~/.gitconfig with insteadOf rules so every git operation
against a declared upstream (push, fetch, clone, pull,
ls-remote) transparently hits the per-agent git-gate. The
gate mirrors the upstream in both directions, so URL
rewriting is symmetric.
3. If the bottle declares `git.user` (issue #86), set
`git config --global user.{name,email}` inside the bottle so
the agent's commits are attributed to that identity.
"""
from __future__ import annotations
import os
import subprocess
from pathlib import Path
from ....git_gate import GIT_GATE_HOSTNAME, git_gate_render_gitconfig
from ....log import info
from .. import util as docker_mod
from ..bottle_plan import DockerBottlePlan
def provision_git(plan: DockerBottlePlan, target: str) -> None:
"""Set up git inside the bottle. Runs all three subcases; each
no-ops when its condition isn't met."""
_provision_cwd_git(plan, target)
_provision_git_gate_config(plan, target)
_provision_git_user(plan, target)
def _provision_cwd_git(plan: DockerBottlePlan, target: str) -> None:
"""If --cwd was set and the host cwd has a .git directory, copy
it into /home/node/workspace/.git and fix ownership. No-op
otherwise."""
if not (plan.spec.copy_cwd and Path(plan.spec.user_cwd, ".git").is_dir()):
return
container = target
info(f"copying {plan.spec.user_cwd}/.git -> {container}:/home/node/workspace/.git")
subprocess.run(
["docker", "cp", f"{plan.spec.user_cwd}/.git", f"{container}:/home/node/workspace/.git"],
stdout=subprocess.DEVNULL,
check=True,
)
subprocess.run(
[
"docker", "exec", "-u", "0", container,
"chown", "-R", "node:node", "/home/node/workspace/.git",
],
stdout=subprocess.DEVNULL,
check=True,
)
def _provision_git_gate_config(plan: DockerBottlePlan, target: str) -> None:
"""Write ~/.gitconfig in the bottle with the git-gate
insteadOf rules. No-op when the bottle has no `git` entries."""
bottle = plan.spec.manifest.bottle_for(plan.spec.agent_name)
if not bottle.git:
return
container = target
container_home = os.environ.get("BOT_BOTTLE_CONTAINER_HOME", "/home/node")
container_gitconfig = f"{container_home}/.gitconfig"
content = git_gate_render_gitconfig(bottle.git, GIT_GATE_HOSTNAME)
config_file = plan.stage_dir / "agent_gitconfig"
config_file.write_text(content)
config_file.chmod(0o600)
info(f"writing {container_gitconfig} with {len(bottle.git)} insteadOf rule(s)")
subprocess.run(
["docker", "cp", str(config_file), f"{container}:{container_gitconfig}"],
stdout=subprocess.DEVNULL,
check=True,
)
docker_mod.docker_exec_root(container, ["chown", "node:node", container_gitconfig])
docker_mod.docker_exec_root(container, ["chmod", "644", container_gitconfig])
def _provision_git_user(plan: DockerBottlePlan, target: str) -> None:
"""Apply `git config --global user.{name,email}` inside the
bottle so the agent's commits are attributed to the operator-
chosen identity instead of the agent image's default
(which is no user git would refuse to commit at all
until the agent ran its own `git config`).
Runs as the `node` user so `--global` lands in
`/home/node/.gitconfig` (matching the existing
`_provision_git_gate_config` write location). No-op when the
bottle didn't declare `git.user`.
Each field set independently name-only or email-only
configs only run the `git config` line for the field
present."""
bottle = plan.spec.manifest.bottle_for(plan.spec.agent_name)
gu = bottle.git_user
if gu.is_empty():
return
if gu.name:
info(f"git config --global user.name = {gu.name!r}")
subprocess.run(
["docker", "exec", "-u", "node", target,
"git", "config", "--global", "user.name", gu.name],
stdout=subprocess.DEVNULL,
check=True,
)
if gu.email:
info(f"git config --global user.email = {gu.email!r}")
subprocess.run(
["docker", "exec", "-u", "node", target,
"git", "config", "--global", "user.email", gu.email],
stdout=subprocess.DEVNULL,
check=True,
)
@@ -0,0 +1,43 @@
"""Copy the agent prompt into a running Docker bottle.
The prompt file is always copied (so the in-container path always
exists) but `--append-system-prompt-file` only fires when the agent
actually has a prompt the return value signals which case."""
from __future__ import annotations
import os
import subprocess
from ..bottle_plan import DockerBottlePlan
def provision_prompt(plan: DockerBottlePlan, target: str) -> str | None:
"""Copy the prompt file into the container, fix ownership/mode.
Returns the in-container path if the agent has a non-empty
prompt (drives --append-system-prompt-file), else None. The
file is copied either way so the path always exists."""
container = target
container_home = os.environ.get("BOT_BOTTLE_CONTAINER_HOME", "/home/node")
in_container_prompt_path = f"{container_home}/.bot-bottle-prompt.txt"
subprocess.run(
["docker", "cp", str(plan.prompt_file), f"{container}:{in_container_prompt_path}"],
stdout=subprocess.DEVNULL,
check=True,
)
# `docker cp` preserves host UID; re-own/mode as root so node
# can read its own mode-600 prompt regardless of host UID.
subprocess.run(
["docker", "exec", "-u", "0", container, "chown", "node:node", in_container_prompt_path],
stdout=subprocess.DEVNULL,
check=True,
)
subprocess.run(
["docker", "exec", "-u", "0", container, "chmod", "600", in_container_prompt_path],
stdout=subprocess.DEVNULL,
check=True,
)
agent = plan.spec.manifest.agents[plan.spec.agent_name]
return in_container_prompt_path if agent.prompt else None
@@ -0,0 +1,62 @@
"""Copy host-side skill directories into a running Docker bottle.
Skills are validated on the host before launch by the base class's
`BottleBackend._validate_skills` (called from `prepare`); this module
assumes that validation has already run. A skill disappearing between
validation and copy still dies loudly rather than silently producing
a partial container."""
from __future__ import annotations
import os
import subprocess
from ....log import die, info
from ...util import host_skill_dir
from ..bottle_plan import DockerBottlePlan
def provision_skills(plan: DockerBottlePlan, target: str) -> None:
"""Copy each of the agent's named skills from the host's
~/.claude/skills/<name>/ into the container's equivalent path.
For each skill: ensure parent dir, wipe any prior copy, then
`docker cp <host>/. <container>:<dst>/` so the contents are
copied into a freshly-created destination dir. No-op when the
agent has no skills."""
agent = plan.spec.manifest.agents[plan.spec.agent_name]
if not agent.skills:
return
container = target
container_home = os.environ.get("BOT_BOTTLE_CONTAINER_HOME", "/home/node")
skills_dir = os.environ.get(
"BOT_BOTTLE_CONTAINER_SKILLS_DIR", f"{container_home}/.claude/skills"
)
subprocess.run(
["docker", "exec", container, "mkdir", "-p", skills_dir],
stdout=subprocess.DEVNULL,
check=True,
)
for n in agent.skills:
src = host_skill_dir(n)
if not os.path.isdir(src):
die(f"skill '{n}' disappeared from host between validation and copy at {src}.")
dst = f"{skills_dir}/{n}"
info(f"copying skill {n} into {container}:{dst}")
subprocess.run(
["docker", "exec", container, "rm", "-rf", dst],
stdout=subprocess.DEVNULL,
check=True,
)
subprocess.run(
["docker", "exec", container, "mkdir", "-p", dst],
stdout=subprocess.DEVNULL,
check=True,
)
subprocess.run(
["docker", "cp", f"{src}/.", f"{container}:{dst}/"],
stdout=subprocess.DEVNULL,
check=True,
)
@@ -0,0 +1,65 @@
"""Supervise sidecar provisioning inside a running Docker bottle
(PRD 0013).
Registers the per-bottle supervise sidecar as an HTTP MCP server in
the agent's claude-code config so the agent discovers the three
stuck-recovery MCP tools (cred-proxy-block, pipelock-block,
capability-block) at startup.
Uses `claude mcp add` rather than writing JSON directly. claude-code
owns the on-disk config format (`~/.claude.json` `mcpServers` shape,
field names, scope semantics) and changes it between versions; the
official command handles whatever the installed version expects.
No-op when bottle.supervise is False bottles that haven't opted
into the supervise sidecar shouldn't get an MCP entry pointing at a
sidecar that isn't running.
"""
from __future__ import annotations
import subprocess
from ....log import info, warn
from ....supervise import SUPERVISE_HOSTNAME, SUPERVISE_PORT
from ..bottle_plan import DockerBottlePlan
_SUPERVISE_MCP_NAME = "supervise"
def supervise_mcp_url() -> str:
return f"http://{SUPERVISE_HOSTNAME}:{SUPERVISE_PORT}/"
def provision_supervise(plan: DockerBottlePlan, target: str) -> None:
"""Run `claude mcp add` inside the agent container to register
the supervise sidecar in claude-code's user config. No-op when
bottle.supervise is False.
Failure is logged but not fatal: the bottle still works (you
just can't call supervise tools from the agent until the entry
is added manually). The operator sees the warning at launch."""
if plan.supervise_plan is None:
return
url = supervise_mcp_url()
argv = [
"docker", "exec", "-u", "node", target,
"claude", "mcp", "add",
"--scope", "user",
"--transport", "http",
_SUPERVISE_MCP_NAME,
url,
]
info(f"registering supervise MCP server in agent claude config → {url}")
r = subprocess.run(argv, capture_output=True, text=True, check=False)
if r.returncode != 0:
warn(
f"`claude mcp add supervise` failed (exit {r.returncode}): "
f"{(r.stderr or r.stdout or '').strip()}. Inside the bottle, "
f"register manually with: "
f"claude mcp add --scope user --transport http supervise {url}"
)
__all__ = ["provision_supervise", "supervise_mcp_url"]
@@ -0,0 +1,31 @@
"""Sidecar bundle constants + helpers for the Docker backend
(PRD 0024).
The bundle image (built by Dockerfile.sidecars, PRD 0024 chunk 1)
runs pipelock + egress + git-gate + supervise as one container
per bottle under a small Python init supervisor. As of chunk 5
the bundle is the only shape the legacy four-sidecar topology
and its `BOT_BOTTLE_SIDECAR_BUNDLE` feature flag are gone."""
from __future__ import annotations
import os
# Bundle image. Defaults to a built-locally tag (built from the
# repo's Dockerfile.sidecars via compose `build:`). Operators
# pinning to a published digest can override via env, matching
# the existing `BOT_BOTTLE_PIPELOCK_IMAGE` shape.
SIDECAR_BUNDLE_IMAGE = os.environ.get(
"BOT_BOTTLE_SIDECAR_IMAGE",
"bot-bottle-sidecars:latest",
)
SIDECAR_BUNDLE_DOCKERFILE = "Dockerfile.sidecars"
def sidecar_bundle_container_name(slug: str) -> str:
"""`bot-bottle-sidecars-<slug>`. Same prefix scheme as the
per-sidecar containers it replaces, so the dashboard's
discovery-by-prefix logic keeps working."""
return f"bot-bottle-sidecars-{slug}"
+186
View File
@@ -0,0 +1,186 @@
"""Docker host-side primitives used by DockerBottleBackend: probing
for docker on PATH, slugifying agent names, checking image/container
existence, and building images."""
from __future__ import annotations
import re
import shutil
import subprocess
from typing import Iterable, Iterator
from ...log import die, info
# Cap on the suffix the container-name conflict logic will try before
# giving up: base, base-2, ..., base-MAX_CONTAINER_SUFFIX.
MAX_CONTAINER_SUFFIX = 100
def container_name_candidates(base: str) -> Iterator[str]:
"""Yield `base`, then `base-2`, `base-3`, ... up to
`base-MAX_CONTAINER_SUFFIX`. Both the prepare-time probe and the
launch-time race retry walk this sequence."""
yield base
for suffix in range(2, MAX_CONTAINER_SUFFIX + 1):
yield f"{base}-{suffix}"
def runsc_available() -> bool:
"""Return True if the Docker daemon has the gVisor (`runsc`) runtime
registered. Called once per prepare; the result lives on the plan."""
r = subprocess.run(
["docker", "info", "--format", "{{json .Runtimes}}"],
capture_output=True,
text=True,
check=False,
)
return r.returncode == 0 and "runsc" in r.stdout
def require_docker() -> None:
"""Fail with an install pointer if `docker` is not on PATH."""
if shutil.which("docker") is None:
info("Docker is required but was not found on PATH.")
info("macOS: install Docker Desktop https://docs.docker.com/desktop/install/mac-install/")
info("Linux: install Docker Engine https://docs.docker.com/engine/install/")
die("docker not found")
def image_exists(ref: str) -> bool:
return _silent_run(["docker", "image", "inspect", ref]) == 0
def container_exists(name: str) -> bool:
"""Returns True if a container (running or stopped) with the given
name exists. Uses `docker ps -a -q -f name=^<name>$` so substring
matches don't false-positive."""
result = subprocess.run(
["docker", "ps", "-a", "-q", "-f", f"name=^{name}$"],
capture_output=True,
text=True,
check=True,
)
return bool(result.stdout.strip())
def force_remove_container(name: str) -> None:
"""`docker rm -f` the named container if it exists. No-op if it
doesn't — and the rm itself is best-effort (errors swallowed) so
this is safe to register as a teardown callback."""
if container_exists(name):
subprocess.run(
["docker", "rm", "-f", name],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
)
def docker_exec_root(container: str, argv: list[str]) -> None:
"""Run `docker exec -u 0` in the named container, check=True. Used
by SSH provisioning to chown/chmod files that need root."""
subprocess.run(
["docker", "exec", "-u", "0", container, *argv],
stdout=subprocess.DEVNULL,
check=True,
)
_SLUG_RE = re.compile(r"[^a-z0-9]+")
def slugify(name: str) -> str:
"""Lowercase, non-alnum runs → '-', trimmed. Dies on empty result."""
if not name:
die("slugify: missing name")
slug = _SLUG_RE.sub("-", name.lower()).strip("-")
if not slug:
die(f"name '{name}' produced an empty slug; use alphanumeric characters")
return slug
def build_image(ref: str, context: str, *, dockerfile: str = "") -> None:
"""Invokes `docker build` every call. Layer cache makes no-change
rebuilds cheap; running every time means Dockerfile edits land
without manual `docker rmi`.
`dockerfile` is an optional path (relative to `context`, or
absolute) for callers that need to build from a non-default
Dockerfile in the same context e.g. `Dockerfile.git-gate`."""
info(f"building image {ref} from {context} (layer cache keeps repeat builds fast)")
args = ["docker", "build", "-t", ref]
if dockerfile:
args.extend(["-f", dockerfile])
args.append(context)
subprocess.run(args, check=True)
_TRUST_DIALOG_NODE_SCRIPT = (
'const fs=require("fs"),p=process.env.HOME+"/.claude.json",'
'c=JSON.parse(fs.readFileSync(p,"utf8"));'
'c.projects=c.projects||{};'
'c.projects[process.env.HOME+"/workspace"]={hasTrustDialogAccepted:true};'
'fs.writeFileSync(p,JSON.stringify(c,null,2));'
)
def build_image_with_cwd(derived: str, base: str, cwd: str) -> None:
"""Build a thin derived image that copies <cwd> into
/home/node/workspace and adds a trust-dialog entry for it."""
import os
if not os.path.isdir(cwd):
die(f"cwd not found at {cwd}")
info(f"building image {derived} from {base} with {cwd} -> /home/node/workspace")
dockerfile = (
f"FROM {base}\n"
f"COPY --chown=node:node . /home/node/workspace\n"
f"RUN node -e '{_TRUST_DIALOG_NODE_SCRIPT}'\n"
f"WORKDIR /home/node/workspace\n"
)
subprocess.run(
["docker", "build", "-t", derived, "-f", "-", cwd],
input=dockerfile,
text=True,
check=True,
)
def image_id(ref: str) -> str:
"""Return the content-addressed image ID (e.g.
`sha256:abcd...`) for `ref`. The smolmachines backend keys its
`.smolmachine` artifact cache on this, so a Dockerfile change
that produces a new image automatically invalidates the cache."""
r = subprocess.run(
["docker", "image", "inspect", "--format", "{{.Id}}", ref],
capture_output=True,
text=True,
check=False,
)
if r.returncode != 0:
die(
f"docker image inspect for {ref!r} failed: "
f"{(r.stderr or '').strip() or '<no stderr>'}"
)
return r.stdout.strip()
def save(ref: str, output: str) -> None:
"""`docker save REF -o OUTPUT`. Writes a tarball of the image
layers + manifest to the host path. Used by smolmachines
prepare to hand the agent image to a containerized crane that
pushes it to the ephemeral registry bypassing the docker
daemon's `docker push` (which on Docker Desktop can't reach a
host-loopback registry and refuses plain-HTTP pushes to
non-loopback hosts)."""
subprocess.run(["docker", "save", ref, "-o", output], check=True)
def _silent_run(cmd: Iterable[str]) -> int:
return subprocess.run(
list(cmd),
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
).returncode