cefdc8c6e9
PRD 0018 chunk 3. Each instance is now one `docker compose` project:
- launch.py renders the compose spec via chunk-1's
bottle_plan_to_compose, writes it to state/<slug>/docker-compose.yml,
`docker compose up -d`s, and (on teardown) dumps
`docker compose logs --no-color --timestamps` to
state/<slug>/compose.log before `docker compose down`.
- Networks are pre-created (`docker network create --internal` +
user-defined bridge) so pipelock yaml can know the internal CIDR
before compose-up. Compose references them with `external: true`;
the launch step's ExitStack still owns network removal.
- Agent still runs `sleep infinity`; claude reaches it via
`docker exec -it` exactly like before (per the PRD's resolved
TTY question).
- metadata.json grows a `compose_project` field so dashboard /
cleanup tooling can derive compose invocations without
re-deriving the slug.
Security follow-ups from chunk-2 review:
(b) CA private keys: pipelock + egress ca-key.pem land at 0o600
explicitly. The mitmproxy cert+key concat stays 0o644 because
the egress container's uid-1000 user reads it through the
bind mount; parent dir at 0o700 still restricts host-side
reach.
(c) Apply atomicity: egress_apply + pipelock_apply switch from
`docker cp` to host-side write-temp-then-rename on the
bind-mount source. POSIX rename is atomic on the same
filesystem, so a sidecar SIGHUP racing the apply can't see
a half-written routes.yaml / pipelock.yaml.
Per-sidecar Docker{Sidecar}.start/stop methods stay in place — the
integration test suite drives them directly to validate each image
in isolation, which is still useful. launch.py no longer calls
them; a follow-up chunk can prune if the integration tests move to
the compose lifecycle.
git-gate entrypoint's chmod 600 on the keyfile + known_hosts now
tolerates EROFS (`|| true`) — the host SSH key is already 0600
(SSH refuses to load otherwise), so the inside-container chmod
was already a no-op in the docker-cp path and now just needs to
not error on the read-only bind mount.
422 unit tests pass; supervise integration test passes; end-to-end
`./cli.py start implementer` brings up the project, attaches,
captures full merged logs on teardown, and reaps all containers +
networks.
246 lines
9.3 KiB
Python
246 lines
9.3 KiB
Python
"""Launch step for the Docker bottle backend.
|
|
|
|
PRD 0018 chunk 3: each instance is one `docker compose` project.
|
|
|
|
The flow is:
|
|
|
|
1. Build the agent's base + derived image (compose builds the
|
|
sidecar images via the `build:` directive on first up).
|
|
2. Pre-create the per-bottle networks. We do this outside compose
|
|
so we can inspect the assigned internal CIDR and embed it in
|
|
pipelock's yaml (compose's `external: true` lets the compose
|
|
file reference these pre-existing networks).
|
|
3. Mint the per-bottle CAs (chunk 2 writes them under
|
|
state/<slug>/{pipelock,egress}/).
|
|
4. Re-render pipelock yaml with the now-known internal CIDR so
|
|
the SSRF allowlist exempts the bottle's own subnet.
|
|
5. Populate the inner plans with launch-time fields so the
|
|
renderer can read network names, CA paths, pipelock URL.
|
|
6. Render the compose spec, write it to
|
|
state/<slug>/docker-compose.yml, write metadata.json.
|
|
7. `docker compose up -d` (token + OAuth values flow into the
|
|
compose subprocess env so `environment: [NAME]` bare-name
|
|
entries inherit without rendering values into the file).
|
|
8. Provision (CA install, prompt copy, skills, git, supervise
|
|
config) — unchanged, uses `docker exec`.
|
|
9. Yield a DockerBottle handle. `exec_claude` runs claude via
|
|
`docker exec -it` exactly like the pre-compose world.
|
|
|
|
Teardown (ExitStack callbacks fire in reverse):
|
|
- Dump `docker compose logs --no-color --timestamps` to
|
|
state/<slug>/compose.log (best-effort).
|
|
- `docker compose down` removes the project's containers (not the
|
|
external networks).
|
|
- `network_remove` deletes the two networks we pre-created.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import dataclasses
|
|
import os
|
|
from contextlib import ExitStack, contextmanager
|
|
from pathlib import Path
|
|
from typing import Callable, Generator
|
|
|
|
from ...egress import egress_resolve_token_values
|
|
from ...log import info
|
|
from ...pipelock import pipelock_build_config, pipelock_render_yaml
|
|
from . import network as network_mod
|
|
from . import util as docker_mod
|
|
from .bottle import DockerBottle
|
|
from .bottle_plan import DockerBottlePlan
|
|
from .bottle_state import (
|
|
bottle_state_dir,
|
|
egress_state_dir,
|
|
pipelock_state_dir,
|
|
)
|
|
from .compose import (
|
|
bottle_plan_to_compose,
|
|
compose_down,
|
|
compose_dump_logs,
|
|
compose_file_path,
|
|
compose_log_path,
|
|
compose_project_name,
|
|
compose_up,
|
|
write_compose_file,
|
|
)
|
|
from .egress import (
|
|
DockerEgress,
|
|
egress_tls_init,
|
|
)
|
|
from .git_gate import DockerGitGate
|
|
from .pipelock import (
|
|
PIPELOCK_CA_CERT_IN_CONTAINER,
|
|
PIPELOCK_CA_KEY_IN_CONTAINER,
|
|
DockerPipelockProxy,
|
|
pipelock_proxy_url,
|
|
pipelock_tls_init,
|
|
)
|
|
from .supervise import DockerSupervise
|
|
|
|
|
|
# Where the repo root lives, for `docker build` context. Computed once.
|
|
_REPO_DIR = str(Path(__file__).resolve().parent.parent.parent.parent)
|
|
|
|
|
|
@contextmanager
|
|
def launch(
|
|
plan: DockerBottlePlan,
|
|
*,
|
|
proxy: DockerPipelockProxy,
|
|
git_gate: DockerGitGate,
|
|
egress: DockerEgress,
|
|
supervise: DockerSupervise,
|
|
provision: Callable[[DockerBottlePlan, str], str | None],
|
|
) -> Generator[DockerBottle, None, None]:
|
|
"""Build, launch, and provision a Docker bottle via compose.
|
|
Teardown on exit. The per-sidecar `proxy / git_gate / egress /
|
|
supervise` parameters are vestigial from the pre-compose flow —
|
|
kept for backwards-compat with backend.py's call site; the
|
|
`start()`/`stop()` methods on those classes are no longer
|
|
invoked (chunk 3 collapsed them into the compose service spec).
|
|
They'll be removed entirely in a follow-up cleanup."""
|
|
del proxy, git_gate, egress, supervise # not invoked in compose flow
|
|
|
|
stack = ExitStack()
|
|
|
|
def teardown() -> None:
|
|
try:
|
|
stack.close()
|
|
except BaseException:
|
|
# Teardown must not raise; swallow so the caller's
|
|
# __exit__ path can still propagate the original error.
|
|
pass
|
|
|
|
try:
|
|
# Step 1: agent image build. Sidecar images get built lazily by
|
|
# `docker compose up` via the renderer's `build:` directives.
|
|
docker_mod.build_image(
|
|
plan.image, _REPO_DIR,
|
|
dockerfile=plan.dockerfile_path,
|
|
)
|
|
if plan.derived_image:
|
|
docker_mod.build_image_with_cwd(
|
|
plan.derived_image, plan.image, plan.spec.user_cwd
|
|
)
|
|
|
|
# Step 2: pre-create networks so we know the internal CIDR
|
|
# before pipelock yaml renders.
|
|
internal_network = network_mod.network_create_internal(plan.slug)
|
|
stack.callback(network_mod.network_remove, internal_network)
|
|
|
|
egress_network = network_mod.network_create_egress(plan.slug)
|
|
stack.callback(network_mod.network_remove, egress_network)
|
|
|
|
internal_cidr = network_mod.network_inspect_cidr(internal_network)
|
|
|
|
# Step 3: mint per-bottle CAs into state/<slug>/{pipelock,egress}/.
|
|
ca_cert_host, ca_key_host = pipelock_tls_init(pipelock_state_dir(plan.slug))
|
|
egress_ca_host, egress_ca_cert_only = egress_tls_init(
|
|
egress_state_dir(plan.slug),
|
|
)
|
|
|
|
# Step 4: re-render pipelock yaml with the SSRF allowlist now
|
|
# that we know the internal CIDR. Prepare wrote the yaml
|
|
# without the ssrf block; overwrite the same path so the
|
|
# bind-mount picks up the updated content.
|
|
bottle = plan.spec.manifest.bottle_for(plan.spec.agent_name)
|
|
cfg = pipelock_build_config(
|
|
bottle,
|
|
ca_cert_path=PIPELOCK_CA_CERT_IN_CONTAINER,
|
|
ca_key_path=PIPELOCK_CA_KEY_IN_CONTAINER,
|
|
ssrf_ip_allowlist=(internal_cidr,),
|
|
)
|
|
plan.proxy_plan.yaml_path.write_text(pipelock_render_yaml(cfg))
|
|
plan.proxy_plan.yaml_path.chmod(0o600)
|
|
|
|
# Step 5: populate launch-time fields on every inner plan so
|
|
# the renderer reads concrete network names, CA paths, and
|
|
# pipelock URL. Match the field-by-field replacement the
|
|
# pre-compose launch did, just rolled into one pass.
|
|
proxy_plan = dataclasses.replace(
|
|
plan.proxy_plan,
|
|
internal_network=internal_network,
|
|
internal_network_cidr=internal_cidr,
|
|
egress_network=egress_network,
|
|
ca_cert_host_path=ca_cert_host,
|
|
ca_key_host_path=ca_key_host,
|
|
)
|
|
git_gate_plan = plan.git_gate_plan
|
|
if git_gate_plan.upstreams:
|
|
git_gate_plan = dataclasses.replace(
|
|
git_gate_plan,
|
|
internal_network=internal_network,
|
|
egress_network=egress_network,
|
|
)
|
|
egress_plan = plan.egress_plan
|
|
if egress_plan.routes:
|
|
egress_plan = dataclasses.replace(
|
|
egress_plan,
|
|
internal_network=internal_network,
|
|
egress_network=egress_network,
|
|
mitmproxy_ca_host_path=egress_ca_host,
|
|
mitmproxy_ca_cert_only_host_path=egress_ca_cert_only,
|
|
pipelock_ca_host_path=ca_cert_host,
|
|
pipelock_proxy_url=pipelock_proxy_url(plan.slug),
|
|
)
|
|
supervise_plan = plan.supervise_plan
|
|
if supervise_plan is not None:
|
|
supervise_plan = dataclasses.replace(
|
|
supervise_plan,
|
|
internal_network=internal_network,
|
|
)
|
|
plan = dataclasses.replace(
|
|
plan,
|
|
proxy_plan=proxy_plan,
|
|
git_gate_plan=git_gate_plan,
|
|
egress_plan=egress_plan,
|
|
supervise_plan=supervise_plan,
|
|
)
|
|
|
|
# Step 6: render + write the compose file. metadata.json
|
|
# was written at prepare time and already carries
|
|
# compose_project; nothing to update here.
|
|
state_dir = bottle_state_dir(plan.slug)
|
|
spec = bottle_plan_to_compose(plan)
|
|
compose_file = write_compose_file(spec, compose_file_path(state_dir))
|
|
project = compose_project_name(plan.slug)
|
|
|
|
# Step 7: compose up. Token values + the OAuth placeholder
|
|
# flow through subprocess env; the compose file holds only
|
|
# bare names for the secret-carrying entries.
|
|
token_values: dict[str, str] = {}
|
|
if plan.egress_plan.routes:
|
|
token_values = egress_resolve_token_values(
|
|
plan.egress_plan.token_env_map, dict(os.environ),
|
|
)
|
|
compose_env: dict[str, str] = {
|
|
**os.environ,
|
|
**plan.forwarded_env,
|
|
**token_values,
|
|
}
|
|
info(
|
|
f"docker compose up -d (project {project}, "
|
|
f"{len(spec['services'])} services)"
|
|
)
|
|
compose_up(project, compose_file, env=compose_env)
|
|
|
|
# Register teardown in reverse order: log dump first, then
|
|
# `compose down`. Networks come down last via callbacks
|
|
# registered in step 2.
|
|
stack.callback(compose_down, project, compose_file)
|
|
stack.callback(
|
|
compose_dump_logs, project, compose_file, compose_log_path(state_dir),
|
|
)
|
|
|
|
# Step 8: provision. Unchanged — uses `docker exec` against
|
|
# the agent container by its known name.
|
|
prompt_path = provision(plan, plan.container_name)
|
|
|
|
# Step 9: yield. exec_claude continues to use `docker exec -it`
|
|
# — the agent runs `sleep infinity` per the renderer's
|
|
# service spec.
|
|
yield DockerBottle(plan.container_name, teardown, prompt_path)
|
|
finally:
|
|
teardown()
|