refactor(bottles): split factory into prepare + launch phases
test / run tests/run_tests.py (pull_request) Successful in 15s

The Docker factory had absorbed live container ops but left the
host-side prep (image-name resolution, container-name collision
retry, pipelock yaml generation, env_resolve writes, host
validation) in cli/start.py. That kept ~half the Docker-specific
logic outside the abstraction.

Split the factory into two phases:

  prepare_docker_bottle(spec, stage_dir=...) -> DockerBottlePlan
      Resolves names, validates skills/SSH, writes scratch files.
      No Docker resources created yet.

  launch_docker_bottle(plan) -> ContextManager[Bottle]
      Builds image, creates networks, boots pipelock, runs the
      agent container, provisions files. Teardown on exit.

DockerBottleSpec shrinks to intent-only inputs (manifest, agent
name, --cwd flag, user_cwd, forward_oauth_token). The CLI no longer
references docker_mod, pipelock, skills, ssh, or env_resolve.

get_bottle_factory becomes get_bottle_platform returning a
BottlePlatform with .prepare and .launch — one selectable thing per
platform.

The Bottle handle now remembers the in-container prompt path and
adds --append-system-prompt-file to claude's argv when present, so
the CLI no longer needs to know the path.

cmd_start: ~148 lines down from 229. Tests pass; dry-run output
byte-identical.
This commit is contained in:
2026-05-10 22:36:26 -04:00
parent a284d85296
commit 4f16b3a9e1
3 changed files with 244 additions and 228 deletions
+35 -17
View File
@@ -1,27 +1,35 @@
"""Per-platform bottle factories.
A bottle is a running, isolated environment with claude inside. Each
platform exposes a factory (currently only Docker) that owns the
end-to-end lifecycle: image build, container/sidecar launch, file
provisioning, and teardown.
platform exposes two functions:
Selection is driven by the CLAUDE_BOTTLE_PLATFORM env var (default
"docker"). Per PRD 0003 the manifest does not carry a platform field;
the host environment picks.
prepare(spec, stage_dir=...) -> Plan
Resolves names, validates host-side prerequisites, and writes
scratch files. No remote/runtime resources are created yet.
Safe to call before the y/N preflight.
launch(plan) -> ContextManager[Bottle]
Brings up the container (or VM, or remote machine), provisions
it, yields a Bottle handle, and tears everything down on exit.
Selection is driven by CLAUDE_BOTTLE_PLATFORM (default "docker"). Per
PRD 0003 the manifest does not carry a platform field; the host
environment picks.
"""
from __future__ import annotations
import os
from contextlib import AbstractContextManager
from dataclasses import dataclass
from typing import Callable, Protocol
from ..log import die
from .docker import create_docker_bottle
from .docker import launch_docker_bottle, prepare_docker_bottle
class Bottle(Protocol):
"""Handle to a running bottle. Yielded by a factory's context manager.
"""Handle to a running bottle. Yielded by a platform's launch step.
`exec_claude` runs `claude` inside the bottle and blocks until the
session ends. `cp_in` copies a host path into the bottle. `close`
@@ -35,20 +43,30 @@ class Bottle(Protocol):
def close(self) -> None: ...
BottleFactory = Callable[..., AbstractContextManager[Bottle]]
@dataclass(frozen=True)
class BottlePlatform:
"""Bundles a platform's two-phase factory under one selectable name."""
name: str
prepare: Callable[..., object]
launch: Callable[..., AbstractContextManager[Bottle]]
_FACTORIES: dict[str, BottleFactory] = {
"docker": create_docker_bottle,
_PLATFORMS: dict[str, BottlePlatform] = {
"docker": BottlePlatform(
name="docker",
prepare=prepare_docker_bottle,
launch=launch_docker_bottle,
),
}
def get_bottle_factory() -> BottleFactory:
"""Resolve the bottle factory for the active platform. Dies with a
pointer at the known platforms if CLAUDE_BOTTLE_PLATFORM names an
def get_bottle_platform() -> BottlePlatform:
"""Resolve the bottle platform for the active environment. Dies with
a pointer at the known platforms if CLAUDE_BOTTLE_PLATFORM names an
unimplemented one."""
name = os.environ.get("CLAUDE_BOTTLE_PLATFORM", "docker")
if name not in _FACTORIES:
known = ", ".join(sorted(_FACTORIES))
if name not in _PLATFORMS:
known = ", ".join(sorted(_PLATFORMS))
die(f"unknown CLAUDE_BOTTLE_PLATFORM={name!r}; known platforms: {known}")
return _FACTORIES[name]
return _PLATFORMS[name]
+181 -91
View File
@@ -1,16 +1,18 @@
"""Docker bottle factory.
`create_docker_bottle` owns the end-to-end Docker lifecycle:
Two phases:
1. Probe whether gVisor (`runsc`) is registered with the daemon.
2. Build the base image (and a per-cwd derived image if --cwd).
3. Create the per-agent internal + egress networks.
4. Boot the pipelock sidecar on both networks.
5. Launch the agent container, with `--runtime=runsc` iff available.
6. Copy the prompt, skills, SSH keys, and (optionally) .git into the
running container.
7. Yield a `Bottle` handle for `exec_claude` / `cp_in`.
8. Tear everything down (container, sidecar, both networks) on exit.
prepare_docker_bottle(spec, stage_dir=...) -> DockerBottlePlan
Resolve names, validate host-side prerequisites, and write
scratch files (env_file, args_file, prompt, pipelock yaml) to
stage_dir. No Docker resources are created yet. Suitable to call
before the y/N preflight.
launch_docker_bottle(plan) -> ContextManager[Bottle]
Build the image, create networks, boot the pipelock sidecar,
launch the agent container (with `--runtime=runsc` iff the
daemon has gVisor registered), and copy prompt/skills/ssh/.git
into the running container. Teardown on exit.
The Bottle Protocol lives in `claude_bottle.bottles.__init__`.
"""
@@ -30,6 +32,7 @@ from .. import network as network_mod
from .. import pipelock
from .. import skills as skills_mod
from .. import ssh as ssh_mod
from ..env_resolve import env_resolve
from ..log import die, info
from ..manifest import Manifest
@@ -39,9 +42,7 @@ from ..manifest import Manifest
def runsc_available() -> bool:
"""Return True if the Docker daemon has the gVisor (`runsc`) runtime
registered. Called twice per `start`: once during the preflight to
render the runtime label, once inside the factory to set `--runtime`.
`docker info` is cheap; the duplication is not worth caching."""
registered. Called once per prepare; the result lives on the plan."""
r = subprocess.run(
["docker", "info", "--format", "{{json .Runtimes}}"],
capture_output=True,
@@ -50,58 +51,66 @@ def runsc_available() -> bool:
return r.returncode == 0 and "runsc" in r.stdout
def docker_runtime_label() -> str:
"""Human-readable label for the runtime that `create_docker_bottle`
would select right now. Shown in the y/N preflight."""
return "runsc (gVisor)" if runsc_available() else "runc (default)"
# --- Spec ------------------------------------------------------------------
# --- Spec + Plan -----------------------------------------------------------
@dataclass(frozen=True)
class DockerBottleSpec:
"""Host-side inputs assembled by the CLI before factory entry. Every
field is a value the factory consumes; nothing here is platform-
agnostic enough yet to lift into a shared spec (only Docker exists)."""
"""CLI-supplied inputs to the Docker factory. Small and intent-only;
everything else (image names, container name, scratch file paths,
runsc availability) is resolved by prepare_docker_bottle."""
agent_name: str
slug: str
manifest: Manifest
agent_name: str
copy_cwd: bool
user_cwd: str
forward_oauth_token: bool
@dataclass(frozen=True)
class DockerBottlePlan:
"""Output of prepare_docker_bottle. Frozen; the launch step consumes
it without further resolution. show_plan reads from it directly."""
spec: DockerBottleSpec
slug: str
container_name: str
container_name_pinned: bool
image: str
derived_image: str # "" -> no derived image
runtime_image: str # image to actually launch (derived or base)
user_cwd: str
copy_cwd_git: bool
derived_image: str # "" -> no derived image
runtime_image: str # image actually launched (derived or base)
stage_dir: Path
prompt_file: Path
env_file: Path
args_file: Path
prompt_file: Path
pipelock_yaml_path: Path
pipelock_yaml_filename: str
forward_oauth_token: bool
allowlist_summary: str
use_runsc: bool
# --- Bottle handle ---------------------------------------------------------
class _DockerBottle:
"""Concrete Bottle for Docker. Holds the resolved container name and
a teardown closure. Not exported — the factory yields it via the
Bottle Protocol."""
"""Concrete Bottle for Docker. Holds the container name plus the
in-container prompt path so exec_claude can transparently add
--append-system-prompt-file when a prompt was provisioned."""
def __init__(self, container: str, teardown):
def __init__(self, container: str, teardown, prompt_path_in_container: str | None):
self.name = container
self._teardown = teardown
self._prompt_path = prompt_path_in_container
self._closed = False
def exec_claude(self, argv: list[str], *, tty: bool = True) -> int:
full_argv = list(argv)
if self._prompt_path:
full_argv.extend(["--append-system-prompt-file", self._prompt_path])
cmd = ["docker", "exec"]
if tty:
cmd.append("-it")
cmd.extend([self.name, "claude", *argv])
cmd.extend([self.name, "claude", *full_argv])
return subprocess.run(cmd).returncode
def cp_in(self, host_path: str, container_path: str) -> None:
@@ -118,7 +127,98 @@ class _DockerBottle:
self._teardown()
# --- Factory ---------------------------------------------------------------
# --- Prepare ---------------------------------------------------------------
def prepare_docker_bottle(spec: DockerBottleSpec, *, stage_dir: Path) -> DockerBottlePlan:
"""Resolve names, validate, write scratch files. No Docker resources
are created; the only side effects are host-side files under
stage_dir and a probe of `docker info`."""
docker_mod.require_docker()
manifest = spec.manifest
manifest.require_agent(spec.agent_name)
agent = manifest.agents[spec.agent_name]
bottle = manifest.bottle_for(spec.agent_name)
bottle_name = agent.bottle
slug = docker_mod.slugify(spec.agent_name)
image = os.environ.get("CLAUDE_BOTTLE_IMAGE", "claude-bottle:latest")
derived_image = ""
runtime_image = image
if spec.copy_cwd:
derived_image = os.environ.get(
"CLAUDE_BOTTLE_DERIVED_IMAGE", f"claude-bottle:cwd-{slug}"
)
runtime_image = derived_image
default_container = f"claude-bottle-{slug}"
pinned_container = os.environ.get("CLAUDE_BOTTLE_CONTAINER", "")
container_name = pinned_container or default_container
container_name_pinned = bool(pinned_container)
suffix = 2
if container_name_pinned:
if docker_mod.container_exists(container_name):
die(
f"container '{container_name}' already exists "
f"(pinned via CLAUDE_BOTTLE_CONTAINER). "
f"Remove it with 'docker rm -f {container_name}' or unset the override."
)
else:
while docker_mod.container_exists(container_name):
container_name = f"{default_container}-{suffix}"
suffix += 1
if suffix > 100:
die(
f"could not find a free container name after "
f"{default_container}-99; clean up old containers with "
f"'docker rm -f <name>'"
)
if agent.skills:
skills_mod.skills_validate_all(list(agent.skills))
if bottle.ssh:
ssh_mod.ssh_validate_entries(bottle.ssh)
env_file = stage_dir / "agent.env"
args_file = stage_dir / "docker-args"
prompt_file = stage_dir / "prompt.txt"
pipelock_yaml_filename = "pipelock.yaml"
pipelock_yaml = stage_dir / pipelock_yaml_filename
env_file.write_text("")
env_file.chmod(0o600)
args_file.write_text("")
prompt_file.write_text("")
prompt_file.chmod(0o600)
pipelock.pipelock_write_yaml(manifest, bottle_name, pipelock_yaml)
env_resolve(manifest, spec.agent_name, env_file, args_file)
prompt_file.write_text(agent.prompt)
allowlist_summary = pipelock.pipelock_allowlist_summary(manifest, bottle_name)
use_runsc = runsc_available()
return DockerBottlePlan(
spec=spec,
slug=slug,
container_name=container_name,
container_name_pinned=container_name_pinned,
image=image,
derived_image=derived_image,
runtime_image=runtime_image,
stage_dir=stage_dir,
env_file=env_file,
args_file=args_file,
prompt_file=prompt_file,
pipelock_yaml_path=pipelock_yaml,
pipelock_yaml_filename=pipelock_yaml_filename,
allowlist_summary=allowlist_summary,
use_runsc=use_runsc,
)
# --- Launch ----------------------------------------------------------------
# Where the repo root lives, for `docker build` context. Computed once.
@@ -126,10 +226,8 @@ _REPO_DIR = str(Path(__file__).resolve().parent.parent.parent)
@contextmanager
def create_docker_bottle(spec: DockerBottleSpec) -> Iterator[_DockerBottle]:
def launch_docker_bottle(plan: DockerBottlePlan) -> Iterator[_DockerBottle]:
"""Build, launch, and provision a Docker bottle. Teardown on exit."""
# Teardown bookkeeping. Each entry is populated as the matching
# resource comes up; teardown walks them in reverse, idempotently.
state: dict[str, str] = {
"container": "",
"pipelock": "",
@@ -147,7 +245,7 @@ def create_docker_bottle(spec: DockerBottleSpec) -> Iterator[_DockerBottle]:
)
state["container"] = ""
if state["pipelock"]:
pipelock.pipelock_stop(spec.slug)
pipelock.pipelock_stop(plan.slug)
state["pipelock"] = ""
if state["internal_network"]:
network_mod.network_remove(state["internal_network"])
@@ -161,28 +259,28 @@ def create_docker_bottle(spec: DockerBottleSpec) -> Iterator[_DockerBottle]:
pass
try:
use_runsc = runsc_available()
docker_mod.build_image(plan.image, _REPO_DIR)
if plan.derived_image:
docker_mod.build_image_with_cwd(
plan.derived_image, plan.image, plan.spec.user_cwd
)
docker_mod.build_image(spec.image, _REPO_DIR)
if spec.derived_image:
docker_mod.build_image_with_cwd(spec.derived_image, spec.image, spec.user_cwd)
state["internal_network"] = network_mod.network_create_internal(spec.slug)
state["egress_network"] = network_mod.network_create_egress(spec.slug)
state["internal_network"] = network_mod.network_create_internal(plan.slug)
state["egress_network"] = network_mod.network_create_egress(plan.slug)
state["pipelock"] = pipelock.pipelock_start(
spec.slug,
plan.slug,
state["internal_network"],
state["egress_network"],
spec.stage_dir,
spec.pipelock_yaml_filename,
plan.stage_dir,
plan.pipelock_yaml_filename,
)
container = _run_agent_container(spec, state["internal_network"], use_runsc)
container = _run_agent_container(plan, state["internal_network"])
state["container"] = container
_provision_container(spec, container)
prompt_path = _provision_container(plan, container)
bottle = _DockerBottle(container, teardown)
bottle = _DockerBottle(container, teardown, prompt_path)
yield bottle
finally:
teardown()
@@ -191,30 +289,26 @@ def create_docker_bottle(spec: DockerBottleSpec) -> Iterator[_DockerBottle]:
# --- Internals -------------------------------------------------------------
def _run_agent_container(
spec: DockerBottleSpec,
internal_network: str,
use_runsc: bool,
) -> str:
def _run_agent_container(plan: DockerBottlePlan, internal_network: str) -> str:
"""Build the `docker run` argv and execute it, handling name-conflict
races by incrementing the suffix (unless the name was user-pinned).
Returns the resolved container name."""
proxy_url = pipelock.pipelock_proxy_url(spec.slug)
proxy_url = pipelock.pipelock_proxy_url(plan.slug)
docker_args: list[str] = [
"--rm", "-d",
"--name", spec.container_name,
"--name", plan.container_name,
"--network", internal_network,
"-e", f"HTTPS_PROXY={proxy_url}",
"-e", f"HTTP_PROXY={proxy_url}",
"-e", "NO_PROXY=localhost,127.0.0.1",
]
if use_runsc:
if plan.use_runsc:
docker_args.extend(["--runtime", "runsc"])
if spec.env_file.stat().st_size > 0:
docker_args.extend(["--env-file", str(spec.env_file)])
if plan.env_file.stat().st_size > 0:
docker_args.extend(["--env-file", str(plan.env_file)])
# ARGS_FILE pairs (-e, NAME) line-by-line.
args_lines = spec.args_file.read_text().splitlines()
args_lines = plan.args_file.read_text().splitlines()
i = 0
while i < len(args_lines):
flag = args_lines[i]
@@ -227,16 +321,16 @@ def _run_agent_container(
i += 1
docker_args.extend([flag, vname])
if spec.forward_oauth_token:
if plan.spec.forward_oauth_token:
os.environ["CLAUDE_CODE_OAUTH_TOKEN"] = os.environ["CLAUDE_BOTTLE_OAUTH_TOKEN"]
docker_args.extend(["-e", "CLAUDE_CODE_OAUTH_TOKEN"])
docker_args.extend([spec.runtime_image, "sleep", "infinity"])
docker_args.extend([plan.runtime_image, "sleep", "infinity"])
info(f"starting container {spec.container_name} from {spec.runtime_image}")
info(f"starting container {plan.container_name} from {plan.runtime_image}")
container = spec.container_name
base_name = spec.container_name
container = plan.container_name
base_name = plan.container_name
suffix = 2
while True:
run_result = subprocess.run(
@@ -247,7 +341,7 @@ def _run_agent_container(
if run_result.returncode == 0:
return container
err_text = run_result.stderr
if spec.container_name_pinned or "is already in use" not in err_text:
if plan.container_name_pinned or "is already in use" not in err_text:
sys.stderr.write(err_text + "\n")
die(f"docker run failed for container '{container}'")
if suffix > 100:
@@ -262,44 +356,45 @@ def _run_agent_container(
info(f"name conflict; retrying as {container}")
def _provision_container(spec: DockerBottleSpec, container: str) -> None:
def _provision_container(plan: DockerBottlePlan, container: str) -> str | None:
"""Copy prompt, skills, ssh keys, and (optionally) .git into the
running container, fixing up ownership/mode where the host UID
would otherwise leave files unreadable by the in-container `node`."""
running container. Returns the in-container prompt path if a prompt
was provisioned, else None — the Bottle handle uses it to decide
whether to add --append-system-prompt-file to claude's argv."""
container_home = os.environ.get("CLAUDE_BOTTLE_CONTAINER_HOME", "/home/node")
container_prompt_path = f"{container_home}/.claude-bottle-prompt.txt"
in_container_prompt_path = f"{container_home}/.claude-bottle-prompt.txt"
subprocess.run(
["docker", "cp", str(spec.prompt_file), f"{container}:{container_prompt_path}"],
["docker", "cp", str(plan.prompt_file), f"{container}:{in_container_prompt_path}"],
stdout=subprocess.DEVNULL,
check=True,
)
# `docker cp` preserves host UID; re-own/mode as root so node can
# read its own mode-600 prompt regardless of host UID.
subprocess.run(
["docker", "exec", "-u", "0", container, "chown", "node:node", container_prompt_path],
["docker", "exec", "-u", "0", container, "chown", "node:node", in_container_prompt_path],
stdout=subprocess.DEVNULL,
check=True,
)
subprocess.run(
["docker", "exec", "-u", "0", container, "chmod", "600", container_prompt_path],
["docker", "exec", "-u", "0", container, "chmod", "600", in_container_prompt_path],
stdout=subprocess.DEVNULL,
check=True,
)
agent = spec.manifest.agents[spec.agent_name]
agent = plan.spec.manifest.agents[plan.spec.agent_name]
if agent.skills:
skills_mod.skills_copy_into(container, list(agent.skills))
bottle = spec.manifest.bottle_for(spec.agent_name)
bottle = plan.spec.manifest.bottle_for(plan.spec.agent_name)
if bottle.ssh:
proxy_host_port = pipelock.pipelock_proxy_host_port(spec.slug)
ssh_mod.ssh_setup(container, spec.stage_dir, proxy_host_port, bottle.ssh)
proxy_host_port = pipelock.pipelock_proxy_host_port(plan.slug)
ssh_mod.ssh_setup(container, plan.stage_dir, proxy_host_port, bottle.ssh)
if spec.copy_cwd_git:
info(f"copying {spec.user_cwd}/.git -> {container}:/home/node/workspace/.git")
if plan.spec.copy_cwd and Path(plan.spec.user_cwd, ".git").is_dir():
info(f"copying {plan.spec.user_cwd}/.git -> {container}:/home/node/workspace/.git")
subprocess.run(
["docker", "cp", f"{spec.user_cwd}/.git", f"{container}:/home/node/workspace/.git"],
["docker", "cp", f"{plan.spec.user_cwd}/.git", f"{container}:/home/node/workspace/.git"],
stdout=subprocess.DEVNULL,
check=True,
)
@@ -312,9 +407,4 @@ def _provision_container(spec: DockerBottleSpec, container: str) -> None:
check=True,
)
def container_prompt_path() -> str:
"""The path inside the container where the prompt file lands. Used
by start.py to pass `--append-system-prompt-file` to claude."""
container_home = os.environ.get("CLAUDE_BOTTLE_CONTAINER_HOME", "/home/node")
return f"{container_home}/.claude-bottle-prompt.txt"
return in_container_prompt_path if agent.prompt else None