Merge pull request 'feat(launch): switch start to docker compose project per bottle' (#35) from chunk-3-compose-lifecycle into main
This commit was merged in pull request #35.
This commit is contained in:
@@ -98,6 +98,13 @@ class BottleMetadata:
|
||||
cwd: str # empty string when --cwd was not passed
|
||||
copy_cwd: bool
|
||||
started_at: str # ISO 8601 UTC
|
||||
# PRD 0018 chunk 3: derivable from identity via
|
||||
# `compose_project_name(identity)`, but persisted explicitly so
|
||||
# dashboard / cleanup / resume tooling can read it without
|
||||
# importing the compose module. Empty string for state dirs
|
||||
# written before chunk 3 (resume / inspect should fall back to
|
||||
# deriving from identity in that case).
|
||||
compose_project: str = ""
|
||||
|
||||
|
||||
def metadata_path(identity: str) -> Path:
|
||||
@@ -130,6 +137,7 @@ def read_metadata(identity: str) -> BottleMetadata | None:
|
||||
cwd=str(raw.get("cwd", "")),
|
||||
copy_cwd=bool(raw.get("copy_cwd", False)),
|
||||
started_at=str(raw.get("started_at", "")),
|
||||
compose_project=str(raw.get("compose_project", "")),
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -39,6 +39,9 @@ aren't rebuilt on every up.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
@@ -46,6 +49,7 @@ from ...egress import (
|
||||
EGRESS_HOSTNAME,
|
||||
EGRESS_ROUTES_IN_CONTAINER,
|
||||
)
|
||||
from ...log import die, warn
|
||||
from ...git_gate import git_gate_aggregate_extra_hosts
|
||||
from ...supervise import (
|
||||
CURRENT_CONFIG_DIR_IN_AGENT,
|
||||
@@ -126,18 +130,21 @@ def bottle_plan_to_compose(plan: DockerBottlePlan) -> dict[str, Any]:
|
||||
|
||||
|
||||
def _networks(plan: DockerBottlePlan) -> dict[str, Any]:
|
||||
"""Two compose-managed networks with explicit `name:` matching
|
||||
the existing slug-suffixed convention. The internal one is
|
||||
`--internal` (no default gateway); the egress one is a normal
|
||||
user-defined bridge so the upstream-bound sidecars can resolve
|
||||
+ reach the outside world."""
|
||||
"""Both networks are `external: true` — chunk 3 pre-creates them
|
||||
via `docker network create` so pipelock's yaml can embed the
|
||||
internal-network CIDR in its SSRF allowlist before compose-up.
|
||||
Compose just references the pre-existing networks by name.
|
||||
Network lifecycle (create / remove) is owned by the compose-
|
||||
lifecycle helpers, not compose itself; `docker compose down`
|
||||
leaves external networks alone."""
|
||||
return {
|
||||
"internal": {
|
||||
"name": plan.proxy_plan.internal_network,
|
||||
"internal": True,
|
||||
"external": True,
|
||||
},
|
||||
"egress": {
|
||||
"name": plan.proxy_plan.egress_network,
|
||||
"external": True,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -382,4 +389,125 @@ def _agent_no_proxy(plan: DockerBottlePlan) -> str:
|
||||
return ",".join(hosts)
|
||||
|
||||
|
||||
__all__ = ["bottle_plan_to_compose"]
|
||||
# --- Lifecycle helpers (PRD 0018 chunk 3) ----------------------------------
|
||||
#
|
||||
# The renderer above is pure. The helpers below own the I/O side:
|
||||
# serialize the spec to disk, drive `docker compose up`, dump the
|
||||
# merged log file on teardown, and `docker compose down` to clean up
|
||||
# (networks are pre-created externally so `down` leaves them alone;
|
||||
# the launch step removes them in its own teardown step).
|
||||
|
||||
|
||||
COMPOSE_FILE_NAME = "docker-compose.yml"
|
||||
COMPOSE_LOG_NAME = "compose.log"
|
||||
|
||||
|
||||
def compose_project_name(slug: str) -> str:
|
||||
"""Stable mapping from slug → compose project. Matches the
|
||||
`name:` field the renderer emits, so `docker compose ls`
|
||||
enumeration and direct CLI invocations agree on the project
|
||||
identifier."""
|
||||
return f"claude-bottle-{slug}"
|
||||
|
||||
|
||||
def compose_file_path(state_dir: Path) -> Path:
|
||||
return state_dir / COMPOSE_FILE_NAME
|
||||
|
||||
|
||||
def compose_log_path(state_dir: Path) -> Path:
|
||||
return state_dir / COMPOSE_LOG_NAME
|
||||
|
||||
|
||||
def write_compose_file(spec: dict[str, Any], path: Path) -> Path:
|
||||
"""Serialize the compose dict to disk. JSON content with a
|
||||
`.yml` filename — JSON is a strict subset of YAML 1.2 for the
|
||||
constructs the renderer uses (mappings, lists, strings, bools,
|
||||
nulls), and `docker compose -f file.yml` parses it as YAML.
|
||||
Avoids a yaml dependency while keeping the file `cat`-readable.
|
||||
"""
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(json.dumps(spec, indent=2, sort_keys=False) + "\n")
|
||||
path.chmod(0o644)
|
||||
return path
|
||||
|
||||
|
||||
def _compose_argv(project: str, compose_file: Path, *cmd: str) -> list[str]:
|
||||
return [
|
||||
"docker", "compose",
|
||||
"-p", project,
|
||||
"-f", str(compose_file),
|
||||
*cmd,
|
||||
]
|
||||
|
||||
|
||||
def compose_up(
|
||||
project: str,
|
||||
compose_file: Path,
|
||||
*,
|
||||
env: dict[str, str] | None = None,
|
||||
) -> None:
|
||||
"""`docker compose up -d` for the project. Env-inheritance is
|
||||
via `env=` on the subprocess — every `environment: [NAME]` (bare
|
||||
name) entry in the compose file resolves to whatever value
|
||||
`NAME` has in `env` at exec time. Secrets never land on argv or
|
||||
in the compose file."""
|
||||
argv = _compose_argv(project, compose_file, "up", "-d")
|
||||
result = subprocess.run(
|
||||
argv, capture_output=True, text=True, env=env, check=False,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
sys.stderr.write(result.stderr)
|
||||
die(f"docker compose up failed for project {project}")
|
||||
|
||||
|
||||
def compose_dump_logs(project: str, compose_file: Path, output: Path) -> None:
|
||||
"""Write the merged stdout/stderr of every service to `output`
|
||||
using `docker compose logs --no-color --timestamps`. Best-effort
|
||||
— failures here shouldn't block teardown. The interleaved single
|
||||
file is what the user reads post-mortem; per-service tail still
|
||||
works through `docker compose logs -f <service>` while the
|
||||
project is up."""
|
||||
output.parent.mkdir(parents=True, exist_ok=True)
|
||||
argv = _compose_argv(project, compose_file, "logs", "--no-color", "--timestamps")
|
||||
try:
|
||||
with open(output, "wb") as f:
|
||||
subprocess.run(
|
||||
argv,
|
||||
stdout=f,
|
||||
stderr=subprocess.STDOUT,
|
||||
check=False,
|
||||
)
|
||||
output.chmod(0o644)
|
||||
except OSError as e:
|
||||
warn(f"failed to write compose log to {output}: {e}")
|
||||
|
||||
|
||||
def compose_down(project: str, compose_file: Path) -> None:
|
||||
"""`docker compose down` for the project. External networks are
|
||||
intentionally NOT removed by compose (`external: true` on the
|
||||
networks block); the launch step's own teardown removes them
|
||||
via `network_remove` so the per-bottle ephemeral subnet doesn't
|
||||
accumulate."""
|
||||
argv = _compose_argv(project, compose_file, "down")
|
||||
result = subprocess.run(
|
||||
argv, capture_output=True, text=True, check=False,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
warn(
|
||||
f"docker compose down failed for project {project}: "
|
||||
f"{result.stderr.strip()}"
|
||||
)
|
||||
|
||||
|
||||
__all__ = [
|
||||
"COMPOSE_FILE_NAME",
|
||||
"COMPOSE_LOG_NAME",
|
||||
"bottle_plan_to_compose",
|
||||
"compose_down",
|
||||
"compose_dump_logs",
|
||||
"compose_file_path",
|
||||
"compose_log_path",
|
||||
"compose_project_name",
|
||||
"compose_up",
|
||||
"write_compose_file",
|
||||
]
|
||||
|
||||
@@ -114,6 +114,10 @@ def egress_tls_init(stage_dir: Path) -> tuple[Path, Path]:
|
||||
)
|
||||
if keygen.returncode != 0:
|
||||
die(f"egress ca keygen failed: {keygen.stderr.strip()}")
|
||||
# Standalone private key — never docker-cp'd, never bind-mounted
|
||||
# (mitmproxy reads the cert+key concat below). Lock to owner-
|
||||
# only so it doesn't sit at the default umask on disk.
|
||||
key_path.chmod(0o600)
|
||||
|
||||
# `subjectKeyIdentifier=hash` makes openssl compute the SKI as
|
||||
# SHA-1(pubkey), matching how mitmproxy computes the AKI on the
|
||||
@@ -149,6 +153,12 @@ def egress_tls_init(stage_dir: Path) -> tuple[Path, Path]:
|
||||
|
||||
cert_path.chmod(0o644)
|
||||
# mitmproxy reads cert + key from a single concatenated PEM file.
|
||||
# This file IS bind-mounted into the egress container (chunk 3+),
|
||||
# where mitmproxy runs as uid 1000 — so the host file has to be
|
||||
# world-readable for the container's user to read it through the
|
||||
# mount. Owner-only mode on the parent dir (state/<slug>/, under
|
||||
# ~/.claude-bottle which inherits ~'s 0o700) is what actually
|
||||
# restricts who can reach this file on the host.
|
||||
mitm = work / "mitmproxy-ca.pem"
|
||||
mitm.write_bytes(cert_path.read_bytes() + key_path.read_bytes())
|
||||
mitm.chmod(0o644)
|
||||
|
||||
@@ -31,6 +31,7 @@ from pathlib import Path
|
||||
|
||||
from ...egress import EGRESS_ROUTES_IN_CONTAINER
|
||||
from ...egress_addon_core import load_routes
|
||||
from .bottle_state import egress_state_dir
|
||||
from .egress import egress_container_name
|
||||
from .pipelock_apply import (
|
||||
PipelockApplyError,
|
||||
@@ -41,6 +42,12 @@ from .pipelock_apply import (
|
||||
)
|
||||
|
||||
|
||||
def _egress_routes_host_path(slug: str) -> Path:
|
||||
"""The bind-mount source for the egress sidecar's routes.yaml.
|
||||
Must match what egress.prepare wrote at chunk-2 paths."""
|
||||
return egress_state_dir(slug) / "egress_routes.yaml"
|
||||
|
||||
|
||||
class EgressApplyError(RuntimeError):
|
||||
"""Raised when fetch / apply fails. Caller renders to the
|
||||
operator; does not crash the dashboard."""
|
||||
@@ -163,31 +170,29 @@ def apply_routes_change(slug: str, new_content: str) -> tuple[str, str]:
|
||||
# and the operator gets a clear error about the half-state.
|
||||
_mirror_hosts_to_pipelock(slug, _hosts_in_routes(new_content))
|
||||
|
||||
fd, tmp_path = tempfile.mkstemp(prefix="cb-routes.", suffix=".yaml")
|
||||
# PRD 0018 chunk 3 + security item (c): routes.yaml is bind-
|
||||
# mounted into the egress container, so the write target is the
|
||||
# host path the sidecar reads through the mount. POSIX
|
||||
# rename-onto-self is atomic on the same filesystem, so a sidecar
|
||||
# SIGHUP racing the apply can never observe a half-written file —
|
||||
# it sees either the old bytes or the new ones.
|
||||
target = _egress_routes_host_path(slug)
|
||||
target.parent.mkdir(parents=True, exist_ok=True)
|
||||
fd, tmp_path_str = tempfile.mkstemp(
|
||||
prefix=".egress_routes.", suffix=".yaml.tmp", dir=str(target.parent),
|
||||
)
|
||||
tmp_path = Path(tmp_path_str)
|
||||
try:
|
||||
with os.fdopen(fd, "w") as f:
|
||||
f.write(new_content)
|
||||
# mkstemp creates the file with mode 0600. `docker cp`
|
||||
# preserves mode + host uid into the container, so without
|
||||
# chmod the file lands as 0600 owned by the host user's uid,
|
||||
# which inside the container is not mitmproxy (uid 1000) —
|
||||
# the addon's reload then fails with PermissionError on the
|
||||
# SIGHUP-triggered re-read and the old routes table stays in
|
||||
# memory. Bump to 0644 so mitmproxy can read it post-cp;
|
||||
# the host stage_dir doesn't apply to this tmp file but the
|
||||
# content isn't secret (no tokens — those live in the
|
||||
# container's environ), so 0644 in /tmp is fine.
|
||||
# mitmproxy in the container reads through the bind mount as
|
||||
# uid 1000; the host file has to be world-readable for that
|
||||
# read to succeed (parent dir at 0o700 still restricts who
|
||||
# can reach the file on the host). Routes content is not
|
||||
# secret — tokens live in the container's environ — so 0o644
|
||||
# is the right trade-off.
|
||||
os.chmod(tmp_path, 0o644)
|
||||
cp = subprocess.run(
|
||||
["docker", "cp", tmp_path,
|
||||
f"{container}:{EGRESS_ROUTES_IN_CONTAINER}"],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
if cp.returncode != 0:
|
||||
raise EgressApplyError(
|
||||
f"failed to copy routes.yaml into {container}: "
|
||||
f"{(cp.stderr or '').strip()}"
|
||||
)
|
||||
os.replace(tmp_path, target)
|
||||
sig = subprocess.run(
|
||||
["docker", "kill", "--signal", "HUP", container],
|
||||
capture_output=True, text=True, check=False,
|
||||
@@ -197,11 +202,15 @@ def apply_routes_change(slug: str, new_content: str) -> tuple[str, str]:
|
||||
f"failed to SIGHUP {container}: "
|
||||
f"{(sig.stderr or '').strip()}"
|
||||
)
|
||||
finally:
|
||||
except BaseException:
|
||||
# On any failure pre-rename, drop the tmp file. Post-rename
|
||||
# there's nothing to clean up — `os.replace` is atomic so
|
||||
# either the new file is in place or the old one still is.
|
||||
try:
|
||||
Path(tmp_path).unlink()
|
||||
tmp_path.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
raise
|
||||
|
||||
return before, new_content
|
||||
|
||||
|
||||
@@ -1,34 +1,72 @@
|
||||
"""Launch step for the Docker bottle backend.
|
||||
|
||||
`launch` is a context manager: builds the image(s), creates the per-
|
||||
agent networks, brings up the pipelock sidecar, starts the agent
|
||||
container, then runs the provision step. Teardown is sequenced via an
|
||||
ExitStack so callbacks fire in reverse-order of registration even if
|
||||
something raises mid-bring-up.
|
||||
PRD 0018 chunk 3: each instance is one `docker compose` project.
|
||||
|
||||
The flow is:
|
||||
|
||||
1. Build the agent's base + derived image (compose builds the
|
||||
sidecar images via the `build:` directive on first up).
|
||||
2. Pre-create the per-bottle networks. We do this outside compose
|
||||
so we can inspect the assigned internal CIDR and embed it in
|
||||
pipelock's yaml (compose's `external: true` lets the compose
|
||||
file reference these pre-existing networks).
|
||||
3. Mint the per-bottle CAs (chunk 2 writes them under
|
||||
state/<slug>/{pipelock,egress}/).
|
||||
4. Re-render pipelock yaml with the now-known internal CIDR so
|
||||
the SSRF allowlist exempts the bottle's own subnet.
|
||||
5. Populate the inner plans with launch-time fields so the
|
||||
renderer can read network names, CA paths, pipelock URL.
|
||||
6. Render the compose spec, write it to
|
||||
state/<slug>/docker-compose.yml, write metadata.json.
|
||||
7. `docker compose up -d` (token + OAuth values flow into the
|
||||
compose subprocess env so `environment: [NAME]` bare-name
|
||||
entries inherit without rendering values into the file).
|
||||
8. Provision (CA install, prompt copy, skills, git, supervise
|
||||
config) — unchanged, uses `docker exec`.
|
||||
9. Yield a DockerBottle handle. `exec_claude` runs claude via
|
||||
`docker exec -it` exactly like the pre-compose world.
|
||||
|
||||
Teardown (ExitStack callbacks fire in reverse):
|
||||
- Dump `docker compose logs --no-color --timestamps` to
|
||||
state/<slug>/compose.log (best-effort).
|
||||
- `docker compose down` removes the project's containers (not the
|
||||
external networks).
|
||||
- `network_remove` deletes the two networks we pre-created.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import dataclasses
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from contextlib import ExitStack, contextmanager
|
||||
from pathlib import Path
|
||||
from typing import Callable, Generator
|
||||
|
||||
from ...log import die, info
|
||||
from ...egress import egress_resolve_token_values
|
||||
from ...log import info
|
||||
from ...pipelock import pipelock_build_config, pipelock_render_yaml
|
||||
from ...supervise import CURRENT_CONFIG_DIR_IN_AGENT, SUPERVISE_HOSTNAME
|
||||
from . import network as network_mod
|
||||
from . import util as docker_mod
|
||||
from .bottle import DockerBottle
|
||||
from .bottle_plan import DockerBottlePlan
|
||||
from .bottle_state import egress_state_dir, pipelock_state_dir
|
||||
from .bottle_state import (
|
||||
bottle_state_dir,
|
||||
egress_state_dir,
|
||||
pipelock_state_dir,
|
||||
)
|
||||
from .compose import (
|
||||
bottle_plan_to_compose,
|
||||
compose_down,
|
||||
compose_dump_logs,
|
||||
compose_file_path,
|
||||
compose_log_path,
|
||||
compose_project_name,
|
||||
compose_up,
|
||||
write_compose_file,
|
||||
)
|
||||
from .egress import (
|
||||
DockerEgress,
|
||||
egress_tls_init,
|
||||
egress_url,
|
||||
)
|
||||
from .git_gate import DockerGitGate
|
||||
from .pipelock import (
|
||||
@@ -38,7 +76,6 @@ from .pipelock import (
|
||||
pipelock_proxy_url,
|
||||
pipelock_tls_init,
|
||||
)
|
||||
from .provision.ca import AGENT_CA_BUNDLE, AGENT_CA_PATH
|
||||
from .supervise import DockerSupervise
|
||||
|
||||
|
||||
@@ -56,10 +93,15 @@ def launch(
|
||||
supervise: DockerSupervise,
|
||||
provision: Callable[[DockerBottlePlan, str], str | None],
|
||||
) -> Generator[DockerBottle, None, None]:
|
||||
"""Build, launch, and provision a Docker bottle. Teardown on exit.
|
||||
"""Build, launch, and provision a Docker bottle via compose.
|
||||
Teardown on exit. The per-sidecar `proxy / git_gate / egress /
|
||||
supervise` parameters are vestigial from the pre-compose flow —
|
||||
kept for backwards-compat with backend.py's call site; the
|
||||
`start()`/`stop()` methods on those classes are no longer
|
||||
invoked (chunk 3 collapsed them into the compose service spec).
|
||||
They'll be removed entirely in a follow-up cleanup."""
|
||||
del proxy, git_gate, egress, supervise # not invoked in compose flow
|
||||
|
||||
`provision` is the backend's provision orchestrator (passed in so
|
||||
this module stays free of backend-class plumbing)."""
|
||||
stack = ExitStack()
|
||||
|
||||
def teardown() -> None:
|
||||
@@ -71,6 +113,8 @@ def launch(
|
||||
pass
|
||||
|
||||
try:
|
||||
# Step 1: agent image build. Sidecar images get built lazily by
|
||||
# `docker compose up` via the renderer's `build:` directives.
|
||||
docker_mod.build_image(
|
||||
plan.image, _REPO_DIR,
|
||||
dockerfile=plan.dockerfile_path,
|
||||
@@ -80,45 +124,26 @@ def launch(
|
||||
plan.derived_image, plan.image, plan.spec.user_cwd
|
||||
)
|
||||
|
||||
# Step 2: pre-create networks so we know the internal CIDR
|
||||
# before pipelock yaml renders.
|
||||
internal_network = network_mod.network_create_internal(plan.slug)
|
||||
stack.callback(network_mod.network_remove, internal_network)
|
||||
|
||||
egress_network = network_mod.network_create_egress(plan.slug)
|
||||
stack.callback(network_mod.network_remove, egress_network)
|
||||
|
||||
# Docker assigns a CIDR to the new internal network. Pipelock's
|
||||
# SSRF guard otherwise rejects any destination resolving into
|
||||
# RFC1918 space — which includes the sibling sidecars
|
||||
# (egress → pipelock on the upstream leg, etc.).
|
||||
# Allowlist the bottle's own internal subnet so internal
|
||||
# traffic passes through pipelock; api_allowlist + body-scanning
|
||||
# still apply.
|
||||
internal_cidr = network_mod.network_inspect_cidr(internal_network)
|
||||
|
||||
# Per-bottle ephemeral CAs (PRD 0006 + PRD 0017). Two
|
||||
# separate CAs:
|
||||
# - pipelock CA: signs MITM certs pipelock presents on the
|
||||
# egress → upstream leg.
|
||||
# - egress CA: signs MITM certs egress presents
|
||||
# to the agent on the agent → egress leg.
|
||||
# Both are minted by one-shot pipelock containers (pipelock's
|
||||
# `tls init` is a known-good RSA CA minter) under stage_dir;
|
||||
# the .start steps docker-cp the files in. Private keys never
|
||||
# leave the host stage dir, which start.py's outer finally
|
||||
# `shutil.rmtree`s after the sidecars are torn down.
|
||||
# PRD 0018 chunk 2: CAs live under the bottle's state subdirs
|
||||
# so chunk 3's compose bind-mounts have stable sources. The
|
||||
# subdirs were created by prepare; tls_init makes the
|
||||
# `pipelock-ca/` and `egress-ca/` children under them.
|
||||
# Step 3: mint per-bottle CAs into state/<slug>/{pipelock,egress}/.
|
||||
ca_cert_host, ca_key_host = pipelock_tls_init(pipelock_state_dir(plan.slug))
|
||||
egress_ca_host, egress_ca_cert_only = egress_tls_init(
|
||||
egress_state_dir(plan.slug),
|
||||
)
|
||||
|
||||
# Re-render the pipelock yaml with the SSRF allowlist now that
|
||||
# we know the internal CIDR. Prepare wrote the yaml without
|
||||
# the ssrf block (CIDR wasn't known yet); overwrite the same
|
||||
# path so .start docker-cp's the updated content.
|
||||
# Step 4: re-render pipelock yaml with the SSRF allowlist now
|
||||
# that we know the internal CIDR. Prepare wrote the yaml
|
||||
# without the ssrf block; overwrite the same path so the
|
||||
# bind-mount picks up the updated content.
|
||||
bottle = plan.spec.manifest.bottle_for(plan.spec.agent_name)
|
||||
cfg = pipelock_build_config(
|
||||
bottle,
|
||||
@@ -129,6 +154,10 @@ def launch(
|
||||
plan.proxy_plan.yaml_path.write_text(pipelock_render_yaml(cfg))
|
||||
plan.proxy_plan.yaml_path.chmod(0o600)
|
||||
|
||||
# Step 5: populate launch-time fields on every inner plan so
|
||||
# the renderer reads concrete network names, CA paths, and
|
||||
# pipelock URL. Match the field-by-field replacement the
|
||||
# pre-compose launch did, just rolled into one pass.
|
||||
proxy_plan = dataclasses.replace(
|
||||
plan.proxy_plan,
|
||||
internal_network=internal_network,
|
||||
@@ -137,40 +166,17 @@ def launch(
|
||||
ca_cert_host_path=ca_cert_host,
|
||||
ca_key_host_path=ca_key_host,
|
||||
)
|
||||
# Re-bind the outer plan so provision_ca (which runs later
|
||||
# from `provision(plan, container)`) can read the populated
|
||||
# CA paths off plan.proxy_plan.
|
||||
plan = dataclasses.replace(plan, proxy_plan=proxy_plan)
|
||||
pipelock_name = proxy.start(plan.proxy_plan)
|
||||
stack.callback(proxy.stop, pipelock_name)
|
||||
|
||||
# Git gate (PRD 0008). One sidecar per agent, only brought up
|
||||
# when the bottle has git entries. Same internal + egress
|
||||
# network attachment as the other sidecars; agent dials it as
|
||||
# `git://<container-name>/<name>.git` via the pushInsteadOf
|
||||
# rules provision_git writes into ~/.gitconfig.
|
||||
if plan.git_gate_plan.upstreams:
|
||||
git_gate_plan = plan.git_gate_plan
|
||||
if git_gate_plan.upstreams:
|
||||
git_gate_plan = dataclasses.replace(
|
||||
plan.git_gate_plan,
|
||||
git_gate_plan,
|
||||
internal_network=internal_network,
|
||||
egress_network=egress_network,
|
||||
)
|
||||
plan = dataclasses.replace(plan, git_gate_plan=git_gate_plan)
|
||||
git_gate_name = git_gate.start(plan.git_gate_plan)
|
||||
stack.callback(git_gate.stop, git_gate_name)
|
||||
|
||||
# Egress-proxy (PRD 0017). One sidecar per bottle when
|
||||
# bottle.egress.routes is non-empty. Must come up AFTER
|
||||
# pipelock — egress routes its outbound HTTPS through
|
||||
# pipelock (HTTPS_PROXY in environ + the pipelock CA in its
|
||||
# trust store) so the egress allowlist + body scanner sit on
|
||||
# the egress → upstream leg. Must come up BEFORE the
|
||||
# agent so DNS resolution for `egress` succeeds on the
|
||||
# agent's first call; tokens flow from the host env into the
|
||||
# sidecar's environ, not the agent's.
|
||||
if plan.egress_plan.routes:
|
||||
egress_plan = plan.egress_plan
|
||||
if egress_plan.routes:
|
||||
egress_plan = dataclasses.replace(
|
||||
plan.egress_plan,
|
||||
egress_plan,
|
||||
internal_network=internal_network,
|
||||
egress_network=egress_network,
|
||||
mitmproxy_ca_host_path=egress_ca_host,
|
||||
@@ -178,151 +184,62 @@ def launch(
|
||||
pipelock_ca_host_path=ca_cert_host,
|
||||
pipelock_proxy_url=pipelock_proxy_url(plan.slug),
|
||||
)
|
||||
plan = dataclasses.replace(plan, egress_plan=egress_plan)
|
||||
egress_name = egress.start(plan.egress_plan)
|
||||
stack.callback(egress.stop, egress_name)
|
||||
|
||||
# Supervise sidecar (PRD 0013). Opt-in via bottle.supervise.
|
||||
# Internal-network only — the sidecar makes no outbound calls.
|
||||
# Must come up BEFORE the agent so DNS resolution for
|
||||
# `supervise` succeeds on the agent's first tool call.
|
||||
if plan.supervise_plan is not None:
|
||||
supervise_plan = plan.supervise_plan
|
||||
if supervise_plan is not None:
|
||||
supervise_plan = dataclasses.replace(
|
||||
plan.supervise_plan,
|
||||
supervise_plan,
|
||||
internal_network=internal_network,
|
||||
)
|
||||
plan = dataclasses.replace(plan, supervise_plan=supervise_plan)
|
||||
supervise_name = supervise.start(plan.supervise_plan)
|
||||
stack.callback(supervise.stop, supervise_name)
|
||||
plan = dataclasses.replace(
|
||||
plan,
|
||||
proxy_plan=proxy_plan,
|
||||
git_gate_plan=git_gate_plan,
|
||||
egress_plan=egress_plan,
|
||||
supervise_plan=supervise_plan,
|
||||
)
|
||||
|
||||
container = _run_agent_container(plan, internal_network)
|
||||
stack.callback(docker_mod.force_remove_container, container)
|
||||
# Step 6: render + write the compose file. metadata.json
|
||||
# was written at prepare time and already carries
|
||||
# compose_project; nothing to update here.
|
||||
state_dir = bottle_state_dir(plan.slug)
|
||||
spec = bottle_plan_to_compose(plan)
|
||||
compose_file = write_compose_file(spec, compose_file_path(state_dir))
|
||||
project = compose_project_name(plan.slug)
|
||||
|
||||
prompt_path = provision(plan, container)
|
||||
# Step 7: compose up. Token values + the OAuth placeholder
|
||||
# flow through subprocess env; the compose file holds only
|
||||
# bare names for the secret-carrying entries.
|
||||
token_values: dict[str, str] = {}
|
||||
if plan.egress_plan.routes:
|
||||
token_values = egress_resolve_token_values(
|
||||
plan.egress_plan.token_env_map, dict(os.environ),
|
||||
)
|
||||
compose_env: dict[str, str] = {
|
||||
**os.environ,
|
||||
**plan.forwarded_env,
|
||||
**token_values,
|
||||
}
|
||||
info(
|
||||
f"docker compose up -d (project {project}, "
|
||||
f"{len(spec['services'])} services)"
|
||||
)
|
||||
compose_up(project, compose_file, env=compose_env)
|
||||
|
||||
yield DockerBottle(container, teardown, prompt_path)
|
||||
# Register teardown in reverse order: log dump first, then
|
||||
# `compose down`. Networks come down last via callbacks
|
||||
# registered in step 2.
|
||||
stack.callback(compose_down, project, compose_file)
|
||||
stack.callback(
|
||||
compose_dump_logs, project, compose_file, compose_log_path(state_dir),
|
||||
)
|
||||
|
||||
# Step 8: provision. Unchanged — uses `docker exec` against
|
||||
# the agent container by its known name.
|
||||
prompt_path = provision(plan, plan.container_name)
|
||||
|
||||
# Step 9: yield. exec_claude continues to use `docker exec -it`
|
||||
# — the agent runs `sleep infinity` per the renderer's
|
||||
# service spec.
|
||||
yield DockerBottle(plan.container_name, teardown, prompt_path)
|
||||
finally:
|
||||
teardown()
|
||||
|
||||
|
||||
def _agent_no_proxy(plan: DockerBottlePlan) -> str:
|
||||
"""NO_PROXY value for the agent container. Standard loopback +
|
||||
`supervise` when the supervise sidecar is enabled.
|
||||
|
||||
Supervise needs to bypass pipelock because the MCP tool-call
|
||||
pattern is long-poll: claude-code opens an HTTPS-style request to
|
||||
http://supervise:9100/, the sidecar holds it open until the
|
||||
operator approves (potentially minutes), then returns the
|
||||
response. Pipelock is a forward proxy with idle timeouts;
|
||||
pipelock cuts the long-polled connection well before the operator
|
||||
can act, and claude-code reports the tool as ✘ failed even
|
||||
though /mcp shows ✔ connected.
|
||||
|
||||
The supervise sidecar is on the bottle's internal network with
|
||||
the `supervise` network-alias, so the agent can dial it
|
||||
directly via docker DNS. Body-scanning the supervise traffic
|
||||
isn't critical — the operator reviews every proposal in the TUI."""
|
||||
hosts = ["localhost", "127.0.0.1"]
|
||||
if plan.supervise_plan is not None:
|
||||
hosts.append(SUPERVISE_HOSTNAME)
|
||||
return ",".join(hosts)
|
||||
|
||||
|
||||
def _agent_proxy_url(plan: DockerBottlePlan) -> str:
|
||||
"""Pick the proxy URL the agent's HTTP_PROXY env points at. PRD
|
||||
0017: when an egress is declared, the agent goes through
|
||||
egress (which in turn uses HTTPS_PROXY=pipelock on its
|
||||
outbound leg). Otherwise the agent talks straight to pipelock —
|
||||
keeps the network surface minimal for bottles that don't need
|
||||
path filtering or credential injection."""
|
||||
if plan.egress_plan.routes:
|
||||
return egress_url()
|
||||
return pipelock_proxy_url(plan.slug)
|
||||
|
||||
|
||||
def _run_agent_container(plan: DockerBottlePlan, internal_network: str) -> str:
|
||||
"""Build the `docker run` argv and execute it, handling name-
|
||||
conflict races by incrementing the suffix (unless the name was
|
||||
user-pinned). Returns the resolved container name."""
|
||||
proxy_url = _agent_proxy_url(plan)
|
||||
no_proxy = _agent_no_proxy(plan)
|
||||
# Set BOTH cases of every *_PROXY var. libcurl's CVE-2016-5388
|
||||
# httpoxy mitigation makes it ignore uppercase `HTTP_PROXY` for
|
||||
# `http://` URLs and only honor lowercase `http_proxy`. Without
|
||||
# the lowercase var, plain-HTTP requests from the agent bypass
|
||||
# egress entirely (going direct, then failing with
|
||||
# "network unreachable" because the agent's bridge is
|
||||
# --internal). Lowercase HTTPS_PROXY isn't strictly needed but
|
||||
# we set it for symmetry — some tools check one or the other.
|
||||
docker_args: list[str] = [
|
||||
"--rm", "-d",
|
||||
"--name", plan.container_name,
|
||||
"--network", internal_network,
|
||||
"-e", f"HTTPS_PROXY={proxy_url}",
|
||||
"-e", f"HTTP_PROXY={proxy_url}",
|
||||
"-e", f"https_proxy={proxy_url}",
|
||||
"-e", f"http_proxy={proxy_url}",
|
||||
"-e", f"NO_PROXY={no_proxy}",
|
||||
"-e", f"no_proxy={no_proxy}",
|
||||
# CA trust trio for the agent process. Docker propagates
|
||||
# run-time env into `docker exec`, so `claude` sees these
|
||||
# without per-exec threading. NODE_EXTRA_CA_CERTS points at
|
||||
# the cert file (Node appends it to its bundled roots);
|
||||
# SSL_CERT_FILE / REQUESTS_CA_BUNDLE point at the system
|
||||
# bundle that `update-ca-certificates` rebuilds in
|
||||
# provision_ca.
|
||||
"-e", f"NODE_EXTRA_CA_CERTS={AGENT_CA_PATH}",
|
||||
"-e", f"SSL_CERT_FILE={AGENT_CA_BUNDLE}",
|
||||
"-e", f"REQUESTS_CA_BUNDLE={AGENT_CA_BUNDLE}",
|
||||
]
|
||||
if plan.use_runsc:
|
||||
docker_args.extend(["--runtime", "runsc"])
|
||||
if plan.env_file.stat().st_size > 0:
|
||||
docker_args.extend(["--env-file", str(plan.env_file)])
|
||||
for name in plan.forwarded_env:
|
||||
docker_args.extend(["-e", name])
|
||||
|
||||
# PRD 0013: read-only current-config mount so the agent can read
|
||||
# routes.yaml / allowlist / Dockerfile before composing a
|
||||
# supervise tool-call proposal. Mounted from the per-bottle
|
||||
# stage_dir/current-config/ populated at prepare time.
|
||||
if plan.supervise_plan is not None:
|
||||
docker_args.extend([
|
||||
"-v",
|
||||
f"{plan.supervise_plan.current_config_dir}:{CURRENT_CONFIG_DIR_IN_AGENT}:ro",
|
||||
])
|
||||
|
||||
docker_args.extend([plan.runtime_image, "sleep", "infinity"])
|
||||
|
||||
info(f"starting container {plan.container_name} from {plan.runtime_image}")
|
||||
|
||||
# Inject forwarded values (secrets, interpolated host vars, the
|
||||
# renamed OAuth token) into the docker-run child's env so the
|
||||
# `-e NAME` flags above pick them up — without touching our own
|
||||
# os.environ or putting values on argv.
|
||||
child_env: dict[str, str] = {**os.environ, **plan.forwarded_env}
|
||||
|
||||
name_idx = docker_args.index("--name") + 1
|
||||
for candidate in docker_mod.container_name_candidates(plan.container_name):
|
||||
docker_args[name_idx] = candidate
|
||||
run_result = subprocess.run(
|
||||
["docker", "run", *docker_args],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=child_env,
|
||||
check=False,
|
||||
)
|
||||
if run_result.returncode == 0:
|
||||
return candidate
|
||||
err_text = run_result.stderr
|
||||
if plan.container_name_pinned or "is already in use" not in err_text:
|
||||
sys.stderr.write(err_text + "\n")
|
||||
die(f"docker run failed for container '{candidate}'")
|
||||
info(f"name conflict on {candidate}; retrying with next candidate")
|
||||
die(
|
||||
f"could not find a free container name after "
|
||||
f"{plan.container_name}-{docker_mod.MAX_CONTAINER_SUFFIX} retries; "
|
||||
f"clean up old containers"
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -67,6 +67,12 @@ def pipelock_tls_init(stage_dir: Path) -> tuple[Path, Path]:
|
||||
key = work / "ca-key.pem"
|
||||
if not cert.is_file() or not key.is_file():
|
||||
die(f"pipelock tls init did not produce ca files in {work}")
|
||||
# Explicit perms in case a future pipelock release changes
|
||||
# defaults. Pipelock runs as root in its distroless image and
|
||||
# bind-mounts work with 0o600 (root reads everything); the key
|
||||
# has no reason to be readable to anyone else on the host.
|
||||
key.chmod(0o600)
|
||||
cert.chmod(0o644)
|
||||
return (cert, key)
|
||||
|
||||
|
||||
|
||||
@@ -24,9 +24,17 @@ from pathlib import Path
|
||||
|
||||
from ...pipelock import pipelock_render_yaml
|
||||
from ...yaml_subset import parse_yaml_subset
|
||||
from .bottle_state import pipelock_state_dir
|
||||
from .pipelock import pipelock_container_name
|
||||
|
||||
|
||||
def _pipelock_yaml_host_path(slug: str) -> Path:
|
||||
"""The bind-mount source for the pipelock sidecar's
|
||||
pipelock.yaml — matches what pipelock.prepare wrote at chunk-2
|
||||
paths."""
|
||||
return pipelock_state_dir(slug) / "pipelock.yaml"
|
||||
|
||||
|
||||
PIPELOCK_YAML_IN_CONTAINER = "/etc/pipelock.yaml"
|
||||
|
||||
# Allowlist proposals are one-hostname-per-line. Blank lines and
|
||||
@@ -141,19 +149,26 @@ def apply_allowlist_change(
|
||||
cfg["api_allowlist"] = new_hosts
|
||||
rendered = pipelock_render_yaml(cfg)
|
||||
|
||||
fd, tmp_path = tempfile.mkstemp(prefix="cb-pipelock-yaml.", suffix=".yaml")
|
||||
# PRD 0018 chunk 3 + security item (c): pipelock.yaml is
|
||||
# bind-mounted into the container, so the write target is the
|
||||
# host path the sidecar reads. POSIX rename is atomic on the
|
||||
# same filesystem, which matters less here than for the
|
||||
# SIGHUP-reload egress case (pipelock fully restarts and
|
||||
# re-reads on boot), but the pattern is uniform across both
|
||||
# apply paths.
|
||||
target = _pipelock_yaml_host_path(slug)
|
||||
target.parent.mkdir(parents=True, exist_ok=True)
|
||||
fd, tmp_path_str = tempfile.mkstemp(
|
||||
prefix=".pipelock.", suffix=".yaml.tmp", dir=str(target.parent),
|
||||
)
|
||||
tmp_path = Path(tmp_path_str)
|
||||
try:
|
||||
with os.fdopen(fd, "w") as f:
|
||||
f.write(rendered)
|
||||
cp = subprocess.run(
|
||||
["docker", "cp", tmp_path, f"{container}:{PIPELOCK_YAML_IN_CONTAINER}"],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
if cp.returncode != 0:
|
||||
raise PipelockApplyError(
|
||||
f"failed to copy pipelock.yaml into {container}: "
|
||||
f"{(cp.stderr or '').strip()}"
|
||||
)
|
||||
# pipelock runs as root in its distroless image — any mode
|
||||
# is fine — but 0o600 matches what prepare wrote.
|
||||
os.chmod(tmp_path, 0o600)
|
||||
os.replace(tmp_path, target)
|
||||
restart = subprocess.run(
|
||||
["docker", "restart", container],
|
||||
capture_output=True, text=True, check=False,
|
||||
@@ -163,11 +178,12 @@ def apply_allowlist_change(
|
||||
f"failed to restart {container}: "
|
||||
f"{(restart.stderr or '').strip()}"
|
||||
)
|
||||
finally:
|
||||
except BaseException:
|
||||
try:
|
||||
Path(tmp_path).unlink()
|
||||
tmp_path.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
raise
|
||||
|
||||
return before, after
|
||||
|
||||
|
||||
@@ -72,6 +72,7 @@ def resolve_plan(
|
||||
cwd=spec.user_cwd if spec.copy_cwd else "",
|
||||
copy_cwd=spec.copy_cwd,
|
||||
started_at=datetime.now(timezone.utc).isoformat(),
|
||||
compose_project=f"claude-bottle-{slug}",
|
||||
))
|
||||
# Clear any leftover preserve marker from a prior capability-block
|
||||
# so this fresh launch can be cleaned up at session-end unless
|
||||
|
||||
@@ -163,9 +163,15 @@ def git_gate_render_entrypoint(upstreams: tuple[GitGateUpstream, ...]) -> str:
|
||||
" keyfile=/git-gate/creds/${name}-key",
|
||||
" hostsfile=/git-gate/creds/${name}-known_hosts",
|
||||
"",
|
||||
" chmod 600 \"$keyfile\"",
|
||||
# `|| true`: PRD 0018 chunk 3+ bind-mounts these RO from the
|
||||
# host, so chmod-syscalls fail with EROFS. The files already
|
||||
# have the right perms on the host (SSH requires 0600 to load
|
||||
# the key in the first place), so the chmod is best-effort
|
||||
# cleanup for the legacy docker-cp path where the file
|
||||
# landed at the host's umask perms.
|
||||
" chmod 600 \"$keyfile\" 2>/dev/null || true",
|
||||
" if [ -f \"$hostsfile\" ]; then",
|
||||
" chmod 600 \"$hostsfile\"",
|
||||
" chmod 600 \"$hostsfile\" 2>/dev/null || true",
|
||||
" fi",
|
||||
"",
|
||||
" repo=/git/${name}.git",
|
||||
|
||||
@@ -1,42 +0,0 @@
|
||||
"""Unit: agent NO_PROXY value builder (PR #25 follow-up).
|
||||
|
||||
claude-code's HTTP MCP client must bypass pipelock for the supervise
|
||||
sidecar — long-poll tool calls would hit pipelock's idle timeout
|
||||
otherwise. This test pins the rule: localhost always; supervise iff
|
||||
the supervise sidecar is in the plan."""
|
||||
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
from claude_bottle.backend.docker.launch import _agent_no_proxy
|
||||
|
||||
|
||||
class _FakePlan:
|
||||
"""Just enough plan shape for the helper — no full DockerBottlePlan
|
||||
construction needed."""
|
||||
|
||||
def __init__(self, supervise_plan):
|
||||
self.supervise_plan = supervise_plan
|
||||
|
||||
|
||||
class _SentinelSupervisePlan:
|
||||
"""The helper only checks `supervise_plan is not None`; any object
|
||||
is fine."""
|
||||
|
||||
|
||||
class TestAgentNoProxy(unittest.TestCase):
|
||||
def test_loopback_only_when_no_supervise(self):
|
||||
self.assertEqual(
|
||||
"localhost,127.0.0.1",
|
||||
_agent_no_proxy(_FakePlan(supervise_plan=None)),
|
||||
)
|
||||
|
||||
def test_supervise_appended_when_enabled(self):
|
||||
self.assertEqual(
|
||||
"localhost,127.0.0.1,supervise",
|
||||
_agent_no_proxy(_FakePlan(supervise_plan=_SentinelSupervisePlan())),
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -176,19 +176,20 @@ class TestProjectAndNetworks(unittest.TestCase):
|
||||
spec = bottle_plan_to_compose(_plan())
|
||||
self.assertEqual(f"claude-bottle-{SLUG}", spec["name"])
|
||||
|
||||
def test_internal_network_is_internal(self):
|
||||
def test_internal_network_marked_external(self):
|
||||
# Chunk 3 pre-creates networks with `docker network create
|
||||
# --internal` so pipelock can know the CIDR before compose-up.
|
||||
# Compose references the network by name with `external: true`.
|
||||
spec = bottle_plan_to_compose(_plan())
|
||||
net = spec["networks"]["internal"]
|
||||
self.assertEqual(f"claude-bottle-net-{SLUG}", net["name"])
|
||||
self.assertTrue(net["internal"])
|
||||
self.assertTrue(net["external"])
|
||||
|
||||
def test_egress_network_is_external_bridge(self):
|
||||
def test_egress_network_marked_external(self):
|
||||
spec = bottle_plan_to_compose(_plan())
|
||||
net = spec["networks"]["egress"]
|
||||
self.assertEqual(f"claude-bottle-egress-{SLUG}", net["name"])
|
||||
# No `internal:` key on the egress network — defaults to a
|
||||
# normal user-defined bridge.
|
||||
self.assertNotIn("internal", net)
|
||||
self.assertTrue(net["external"])
|
||||
|
||||
|
||||
class TestPipelockAlwaysPresent(unittest.TestCase):
|
||||
|
||||
Reference in New Issue
Block a user