From cefdc8c6e9800f99f455aa0b1a6540471383ff23 Mon Sep 17 00:00:00 2001
From: didericis <eric@dideric.is>
Date: Mon, 25 May 2026 23:16:40 -0400
Subject: [PATCH] feat(launch): switch start to docker compose project per
 bottle
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PRD 0018 chunk 3. Each instance is now one `docker compose` project:

  - launch.py renders the compose spec via chunk-1's
    bottle_plan_to_compose, writes it to state/<slug>/docker-compose.yml,
    `docker compose up -d`s, and (on teardown) dumps
    `docker compose logs --no-color --timestamps` to
    state/<slug>/compose.log before `docker compose down`.
  - Networks are pre-created (`docker network create --internal` +
    user-defined bridge) so pipelock yaml can know the internal CIDR
    before compose-up. Compose references them with `external: true`;
    the launch step's ExitStack still owns network removal.
  - Agent still runs `sleep infinity`; claude reaches it via
    `docker exec -it` exactly like before (per the PRD's resolved
    TTY question).
  - metadata.json grows a `compose_project` field so dashboard /
    cleanup tooling can derive compose invocations without
    re-deriving the slug.

Security follow-ups from chunk-2 review:

  (b) CA private keys: pipelock + egress ca-key.pem land at 0o600
      explicitly. The mitmproxy cert+key concat stays 0o644 because
      the egress container's uid-1000 user reads it through the
      bind mount; parent dir at 0o700 still restricts host-side
      reach.
  (c) Apply atomicity: egress_apply + pipelock_apply switch from
      `docker cp` to host-side write-temp-then-rename on the
      bind-mount source. POSIX rename is atomic on the same
      filesystem, so a sidecar SIGHUP racing the apply can't see
      a half-written routes.yaml / pipelock.yaml.

Per-sidecar Docker{Sidecar}.start/stop methods stay in place — the
integration test suite drives them directly to validate each image
in isolation, which is still useful. launch.py no longer calls
them; a follow-up chunk can prune if the integration tests move to
the compose lifecycle.

git-gate entrypoint's chmod 600 on the keyfile + known_hosts now
tolerates EROFS (`|| true`) — the host SSH key is already 0600
(SSH refuses to load otherwise), so the inside-container chmod
was already a no-op in the docker-cp path and now just needs to
not error on the read-only bind mount.

422 unit tests pass; supervise integration test passes; end-to-end
`./cli.py start implementer` brings up the project, attaches,
captures full merged logs on teardown, and reaps all containers +
networks.
---
 claude_bottle/backend/docker/bottle_state.py  |   8 +
 claude_bottle/backend/docker/compose.py       | 142 +++++++-
 claude_bottle/backend/docker/egress.py        |  10 +
 claude_bottle/backend/docker/egress_apply.py  |  55 +--
 claude_bottle/backend/docker/launch.py        | 337 +++++++-----------
 claude_bottle/backend/docker/pipelock.py      |   6 +
 .../backend/docker/pipelock_apply.py          |  40 ++-
 claude_bottle/backend/docker/prepare.py       |   1 +
 claude_bottle/git_gate.py                     |  10 +-
 tests/unit/test_agent_no_proxy.py             |  42 ---
 tests/unit/test_compose.py                    |  13 +-
 11 files changed, 362 insertions(+), 302 deletions(-)
 delete mode 100644 tests/unit/test_agent_no_proxy.py
diff --git a/claude_bottle/backend/docker/bottle_state.py b/claude_bottle/backend/docker/bottle_state.py
index 136c1cf..1ea2c6b 100644
--- a/claude_bottle/backend/docker/bottle_state.py
+++ b/claude_bottle/backend/docker/bottle_state.py
@@ -98,6 +98,13 @@ class BottleMetadata:
     cwd: str           # empty string when --cwd was not passed
     copy_cwd: bool
     started_at: str    # ISO 8601 UTC
+    # PRD 0018 chunk 3: derivable from identity via
+    # `compose_project_name(identity)`, but persisted explicitly so
+    # dashboard / cleanup / resume tooling can read it without
+    # importing the compose module. Empty string for state dirs
+    # written before chunk 3 (resume / inspect should fall back to
+    # deriving from identity in that case).
+    compose_project: str = ""
 
 
 def metadata_path(identity: str) -> Path:
@@ -130,6 +137,7 @@ def read_metadata(identity: str) -> BottleMetadata | None:
         cwd=str(raw.get("cwd", "")),
         copy_cwd=bool(raw.get("copy_cwd", False)),
         started_at=str(raw.get("started_at", "")),
+        compose_project=str(raw.get("compose_project", "")),
     )
 
 
diff --git a/claude_bottle/backend/docker/compose.py b/claude_bottle/backend/docker/compose.py
index 00fcbaf..432316c 100644
--- a/claude_bottle/backend/docker/compose.py
+++ b/claude_bottle/backend/docker/compose.py
@@ -39,6 +39,9 @@ aren't rebuilt on every up.
 
 from __future__ import annotations
 
+import json
+import subprocess
+import sys
 from pathlib import Path
 from typing import Any
 
@@ -46,6 +49,7 @@ from ...egress import (
     EGRESS_HOSTNAME,
     EGRESS_ROUTES_IN_CONTAINER,
 )
+from ...log import die, warn
 from ...git_gate import git_gate_aggregate_extra_hosts
 from ...supervise import (
     CURRENT_CONFIG_DIR_IN_AGENT,
@@ -126,18 +130,21 @@ def bottle_plan_to_compose(plan: DockerBottlePlan) -> dict[str, Any]:
 
 
 def _networks(plan: DockerBottlePlan) -> dict[str, Any]:
-    """Two compose-managed networks with explicit `name:` matching
-    the existing slug-suffixed convention. The internal one is
-    `--internal` (no default gateway); the egress one is a normal
-    user-defined bridge so the upstream-bound sidecars can resolve
-    + reach the outside world."""
+    """Both networks are `external: true` — chunk 3 pre-creates them
+    via `docker network create` so pipelock's yaml can embed the
+    internal-network CIDR in its SSRF allowlist before compose-up.
+    Compose just references the pre-existing networks by name.
+    Network lifecycle (create / remove) is owned by the compose-
+    lifecycle helpers, not compose itself; `docker compose down`
+    leaves external networks alone."""
     return {
         "internal": {
             "name": plan.proxy_plan.internal_network,
-            "internal": True,
+            "external": True,
         },
         "egress": {
             "name": plan.proxy_plan.egress_network,
+            "external": True,
         },
     }
 
@@ -382,4 +389,125 @@ def _agent_no_proxy(plan: DockerBottlePlan) -> str:
     return ",".join(hosts)
 
 
-__all__ = ["bottle_plan_to_compose"]
+# --- Lifecycle helpers (PRD 0018 chunk 3) ----------------------------------
+#
+# The renderer above is pure. The helpers below own the I/O side:
+# serialize the spec to disk, drive `docker compose up`, dump the
+# merged log file on teardown, and `docker compose down` to clean up
+# (networks are pre-created externally so `down` leaves them alone;
+# the launch step removes them in its own teardown step).
+
+
+COMPOSE_FILE_NAME = "docker-compose.yml"
+COMPOSE_LOG_NAME = "compose.log"
+
+
+def compose_project_name(slug: str) -> str:
+    """Stable mapping from slug → compose project. Matches the
+    `name:` field the renderer emits, so `docker compose ls`
+    enumeration and direct CLI invocations agree on the project
+    identifier."""
+    return f"claude-bottle-{slug}"
+
+
+def compose_file_path(state_dir: Path) -> Path:
+    return state_dir / COMPOSE_FILE_NAME
+
+
+def compose_log_path(state_dir: Path) -> Path:
+    return state_dir / COMPOSE_LOG_NAME
+
+
+def write_compose_file(spec: dict[str, Any], path: Path) -> Path:
+    """Serialize the compose dict to disk. JSON content with a
+    `.yml` filename — JSON is a strict subset of YAML 1.2 for the
+    constructs the renderer uses (mappings, lists, strings, bools,
+    nulls), and `docker compose -f file.yml` parses it as YAML.
+    Avoids a yaml dependency while keeping the file `cat`-readable.
+    """
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(json.dumps(spec, indent=2, sort_keys=False) + "\n")
+    path.chmod(0o644)
+    return path
+
+
+def _compose_argv(project: str, compose_file: Path, *cmd: str) -> list[str]:
+    return [
+        "docker", "compose",
+        "-p", project,
+        "-f", str(compose_file),
+        *cmd,
+    ]
+
+
+def compose_up(
+    project: str,
+    compose_file: Path,
+    *,
+    env: dict[str, str] | None = None,
+) -> None:
+    """`docker compose up -d` for the project. Env-inheritance is
+    via `env=` on the subprocess — every `environment: [NAME]` (bare
+    name) entry in the compose file resolves to whatever value
+    `NAME` has in `env` at exec time. Secrets never land on argv or
+    in the compose file."""
+    argv = _compose_argv(project, compose_file, "up", "-d")
+    result = subprocess.run(
+        argv, capture_output=True, text=True, env=env, check=False,
+    )
+    if result.returncode != 0:
+        sys.stderr.write(result.stderr)
+        die(f"docker compose up failed for project {project}")
+
+
+def compose_dump_logs(project: str, compose_file: Path, output: Path) -> None:
+    """Write the merged stdout/stderr of every service to `output`
+    using `docker compose logs --no-color --timestamps`. Best-effort
+    — failures here shouldn't block teardown. The interleaved single
+    file is what the user reads post-mortem; per-service tail still
+    works through `docker compose logs -f <service>` while the
+    project is up."""
+    output.parent.mkdir(parents=True, exist_ok=True)
+    argv = _compose_argv(project, compose_file, "logs", "--no-color", "--timestamps")
+    try:
+        with open(output, "wb") as f:
+            subprocess.run(
+                argv,
+                stdout=f,
+                stderr=subprocess.STDOUT,
+                check=False,
+            )
+        output.chmod(0o644)
+    except OSError as e:
+        warn(f"failed to write compose log to {output}: {e}")
+
+
+def compose_down(project: str, compose_file: Path) -> None:
+    """`docker compose down` for the project. External networks are
+    intentionally NOT removed by compose (`external: true` on the
+    networks block); the launch step's own teardown removes them
+    via `network_remove` so the per-bottle ephemeral subnet doesn't
+    accumulate."""
+    argv = _compose_argv(project, compose_file, "down")
+    result = subprocess.run(
+        argv, capture_output=True, text=True, check=False,
+    )
+    if result.returncode != 0:
+        warn(
+            f"docker compose down failed for project {project}: "
+            f"{result.stderr.strip()}"
+        )
+
+
+__all__ = [
+    "COMPOSE_FILE_NAME",
+    "COMPOSE_LOG_NAME",
+    "bottle_plan_to_compose",
+    "compose_down",
+    "compose_dump_logs",
+    "compose_file_path",
+    "compose_log_path",
+    "compose_project_name",
+    "compose_up",
+    "write_compose_file",
+]
diff --git a/claude_bottle/backend/docker/egress.py b/claude_bottle/backend/docker/egress.py
index 9d1f5f1..c535608 100644
--- a/claude_bottle/backend/docker/egress.py
+++ b/claude_bottle/backend/docker/egress.py
@@ -114,6 +114,10 @@ def egress_tls_init(stage_dir: Path) -> tuple[Path, Path]:
     )
     if keygen.returncode != 0:
         die(f"egress ca keygen failed: {keygen.stderr.strip()}")
+    # Standalone private key — never docker-cp'd, never bind-mounted
+    # (mitmproxy reads the cert+key concat below). Lock to owner-
+    # only so it doesn't sit at the default umask on disk.
+    key_path.chmod(0o600)
 
     # `subjectKeyIdentifier=hash` makes openssl compute the SKI as
     # SHA-1(pubkey), matching how mitmproxy computes the AKI on the
@@ -149,6 +153,12 @@ def egress_tls_init(stage_dir: Path) -> tuple[Path, Path]:
 
     cert_path.chmod(0o644)
     # mitmproxy reads cert + key from a single concatenated PEM file.
+    # This file IS bind-mounted into the egress container (chunk 3+),
+    # where mitmproxy runs as uid 1000 — so the host file has to be
+    # world-readable for the container's user to read it through the
+    # mount. Owner-only mode on the parent dir (state/<slug>/, under
+    # ~/.claude-bottle which inherits ~'s 0o700) is what actually
+    # restricts who can reach this file on the host.
     mitm = work / "mitmproxy-ca.pem"
     mitm.write_bytes(cert_path.read_bytes() + key_path.read_bytes())
     mitm.chmod(0o644)
diff --git a/claude_bottle/backend/docker/egress_apply.py b/claude_bottle/backend/docker/egress_apply.py
index 63e633a..eb72cd8 100644
--- a/claude_bottle/backend/docker/egress_apply.py
+++ b/claude_bottle/backend/docker/egress_apply.py
@@ -31,6 +31,7 @@ from pathlib import Path
 
 from ...egress import EGRESS_ROUTES_IN_CONTAINER
 from ...egress_addon_core import load_routes
+from .bottle_state import egress_state_dir
 from .egress import egress_container_name
 from .pipelock_apply import (
     PipelockApplyError,
@@ -41,6 +42,12 @@ from .pipelock_apply import (
 )
 
 
+def _egress_routes_host_path(slug: str) -> Path:
+    """The bind-mount source for the egress sidecar's routes.yaml.
+    Must match what egress.prepare wrote at chunk-2 paths."""
+    return egress_state_dir(slug) / "egress_routes.yaml"
+
+
 class EgressApplyError(RuntimeError):
     """Raised when fetch / apply fails. Caller renders to the
     operator; does not crash the dashboard."""
@@ -163,31 +170,29 @@ def apply_routes_change(slug: str, new_content: str) -> tuple[str, str]:
     # and the operator gets a clear error about the half-state.
     _mirror_hosts_to_pipelock(slug, _hosts_in_routes(new_content))
 
-    fd, tmp_path = tempfile.mkstemp(prefix="cb-routes.", suffix=".yaml")
+    # PRD 0018 chunk 3 + security item (c): routes.yaml is bind-
+    # mounted into the egress container, so the write target is the
+    # host path the sidecar reads through the mount. POSIX
+    # rename-onto-self is atomic on the same filesystem, so a sidecar
+    # SIGHUP racing the apply can never observe a half-written file —
+    # it sees either the old bytes or the new ones.
+    target = _egress_routes_host_path(slug)
+    target.parent.mkdir(parents=True, exist_ok=True)
+    fd, tmp_path_str = tempfile.mkstemp(
+        prefix=".egress_routes.", suffix=".yaml.tmp", dir=str(target.parent),
+    )
+    tmp_path = Path(tmp_path_str)
     try:
         with os.fdopen(fd, "w") as f:
             f.write(new_content)
-        # mkstemp creates the file with mode 0600. `docker cp`
-        # preserves mode + host uid into the container, so without
-        # chmod the file lands as 0600 owned by the host user's uid,
-        # which inside the container is not mitmproxy (uid 1000) —
-        # the addon's reload then fails with PermissionError on the
-        # SIGHUP-triggered re-read and the old routes table stays in
-        # memory. Bump to 0644 so mitmproxy can read it post-cp;
-        # the host stage_dir doesn't apply to this tmp file but the
-        # content isn't secret (no tokens — those live in the
-        # container's environ), so 0644 in /tmp is fine.
+        # mitmproxy in the container reads through the bind mount as
+        # uid 1000; the host file has to be world-readable for that
+        # read to succeed (parent dir at 0o700 still restricts who
+        # can reach the file on the host). Routes content is not
+        # secret — tokens live in the container's environ — so 0o644
+        # is the right trade-off.
         os.chmod(tmp_path, 0o644)
-        cp = subprocess.run(
-            ["docker", "cp", tmp_path,
-             f"{container}:{EGRESS_ROUTES_IN_CONTAINER}"],
-            capture_output=True, text=True, check=False,
-        )
-        if cp.returncode != 0:
-            raise EgressApplyError(
-                f"failed to copy routes.yaml into {container}: "
-                f"{(cp.stderr or '').strip()}"
-            )
+        os.replace(tmp_path, target)
         sig = subprocess.run(
             ["docker", "kill", "--signal", "HUP", container],
             capture_output=True, text=True, check=False,
@@ -197,11 +202,15 @@ def apply_routes_change(slug: str, new_content: str) -> tuple[str, str]:
                 f"failed to SIGHUP {container}: "
                 f"{(sig.stderr or '').strip()}"
             )
-    finally:
+    except BaseException:
+        # On any failure pre-rename, drop the tmp file. Post-rename
+        # there's nothing to clean up — `os.replace` is atomic so
+        # either the new file is in place or the old one still is.
         try:
-            Path(tmp_path).unlink()
+            tmp_path.unlink()
         except OSError:
             pass
+        raise
 
     return before, new_content
 
diff --git a/claude_bottle/backend/docker/launch.py b/claude_bottle/backend/docker/launch.py
index 4745781..5bb439f 100644
--- a/claude_bottle/backend/docker/launch.py
+++ b/claude_bottle/backend/docker/launch.py
@@ -1,34 +1,72 @@
 """Launch step for the Docker bottle backend.
 
-`launch` is a context manager: builds the image(s), creates the per-
-agent networks, brings up the pipelock sidecar, starts the agent
-container, then runs the provision step. Teardown is sequenced via an
-ExitStack so callbacks fire in reverse-order of registration even if
-something raises mid-bring-up.
+PRD 0018 chunk 3: each instance is one `docker compose` project.
+
+The flow is:
+
+  1. Build the agent's base + derived image (compose builds the
+     sidecar images via the `build:` directive on first up).
+  2. Pre-create the per-bottle networks. We do this outside compose
+     so we can inspect the assigned internal CIDR and embed it in
+     pipelock's yaml (compose's `external: true` lets the compose
+     file reference these pre-existing networks).
+  3. Mint the per-bottle CAs (chunk 2 writes them under
+     state/<slug>/{pipelock,egress}/).
+  4. Re-render pipelock yaml with the now-known internal CIDR so
+     the SSRF allowlist exempts the bottle's own subnet.
+  5. Populate the inner plans with launch-time fields so the
+     renderer can read network names, CA paths, pipelock URL.
+  6. Render the compose spec, write it to
+     state/<slug>/docker-compose.yml, write metadata.json.
+  7. `docker compose up -d` (token + OAuth values flow into the
+     compose subprocess env so `environment: [NAME]` bare-name
+     entries inherit without rendering values into the file).
+  8. Provision (CA install, prompt copy, skills, git, supervise
+     config) — unchanged, uses `docker exec`.
+  9. Yield a DockerBottle handle. `exec_claude` runs claude via
+     `docker exec -it` exactly like the pre-compose world.
+
+Teardown (ExitStack callbacks fire in reverse):
+  - Dump `docker compose logs --no-color --timestamps` to
+    state/<slug>/compose.log (best-effort).
+  - `docker compose down` removes the project's containers (not the
+    external networks).
+  - `network_remove` deletes the two networks we pre-created.
 """
 
 from __future__ import annotations
 
 import dataclasses
 import os
-import subprocess
-import sys
 from contextlib import ExitStack, contextmanager
 from pathlib import Path
 from typing import Callable, Generator
 
-from ...log import die, info
+from ...egress import egress_resolve_token_values
+from ...log import info
 from ...pipelock import pipelock_build_config, pipelock_render_yaml
-from ...supervise import CURRENT_CONFIG_DIR_IN_AGENT, SUPERVISE_HOSTNAME
 from . import network as network_mod
 from . import util as docker_mod
 from .bottle import DockerBottle
 from .bottle_plan import DockerBottlePlan
-from .bottle_state import egress_state_dir, pipelock_state_dir
+from .bottle_state import (
+    bottle_state_dir,
+    egress_state_dir,
+    pipelock_state_dir,
+)
+from .compose import (
+    bottle_plan_to_compose,
+    compose_down,
+    compose_dump_logs,
+    compose_file_path,
+    compose_log_path,
+    compose_project_name,
+    compose_up,
+    write_compose_file,
+)
 from .egress import (
     DockerEgress,
     egress_tls_init,
-    egress_url,
 )
 from .git_gate import DockerGitGate
 from .pipelock import (
@@ -38,7 +76,6 @@ from .pipelock import (
     pipelock_proxy_url,
     pipelock_tls_init,
 )
-from .provision.ca import AGENT_CA_BUNDLE, AGENT_CA_PATH
 from .supervise import DockerSupervise
 
 
@@ -56,10 +93,15 @@ def launch(
     supervise: DockerSupervise,
     provision: Callable[[DockerBottlePlan, str], str | None],
 ) -> Generator[DockerBottle, None, None]:
-    """Build, launch, and provision a Docker bottle. Teardown on exit.
+    """Build, launch, and provision a Docker bottle via compose.
+    Teardown on exit. The per-sidecar `proxy / git_gate / egress /
+    supervise` parameters are vestigial from the pre-compose flow —
+    kept for backwards-compat with backend.py's call site; the
+    `start()`/`stop()` methods on those classes are no longer
+    invoked (chunk 3 collapsed them into the compose service spec).
+    They'll be removed entirely in a follow-up cleanup."""
+    del proxy, git_gate, egress, supervise  # not invoked in compose flow
 
-    `provision` is the backend's provision orchestrator (passed in so
-    this module stays free of backend-class plumbing)."""
     stack = ExitStack()
 
     def teardown() -> None:
@@ -71,6 +113,8 @@ def launch(
             pass
 
     try:
+        # Step 1: agent image build. Sidecar images get built lazily by
+        # `docker compose up` via the renderer's `build:` directives.
         docker_mod.build_image(
             plan.image, _REPO_DIR,
             dockerfile=plan.dockerfile_path,
@@ -80,45 +124,26 @@ def launch(
                 plan.derived_image, plan.image, plan.spec.user_cwd
             )
 
+        # Step 2: pre-create networks so we know the internal CIDR
+        # before pipelock yaml renders.
         internal_network = network_mod.network_create_internal(plan.slug)
         stack.callback(network_mod.network_remove, internal_network)
 
         egress_network = network_mod.network_create_egress(plan.slug)
         stack.callback(network_mod.network_remove, egress_network)
 
-        # Docker assigns a CIDR to the new internal network. Pipelock's
-        # SSRF guard otherwise rejects any destination resolving into
-        # RFC1918 space — which includes the sibling sidecars
-        # (egress → pipelock on the upstream leg, etc.).
-        # Allowlist the bottle's own internal subnet so internal
-        # traffic passes through pipelock; api_allowlist + body-scanning
-        # still apply.
         internal_cidr = network_mod.network_inspect_cidr(internal_network)
 
-        # Per-bottle ephemeral CAs (PRD 0006 + PRD 0017). Two
-        # separate CAs:
-        #   - pipelock CA: signs MITM certs pipelock presents on the
-        #     egress → upstream leg.
-        #   - egress CA: signs MITM certs egress presents
-        #     to the agent on the agent → egress leg.
-        # Both are minted by one-shot pipelock containers (pipelock's
-        # `tls init` is a known-good RSA CA minter) under stage_dir;
-        # the .start steps docker-cp the files in. Private keys never
-        # leave the host stage dir, which start.py's outer finally
-        # `shutil.rmtree`s after the sidecars are torn down.
-        # PRD 0018 chunk 2: CAs live under the bottle's state subdirs
-        # so chunk 3's compose bind-mounts have stable sources. The
-        # subdirs were created by prepare; tls_init makes the
-        # `pipelock-ca/` and `egress-ca/` children under them.
+        # Step 3: mint per-bottle CAs into state/<slug>/{pipelock,egress}/.
         ca_cert_host, ca_key_host = pipelock_tls_init(pipelock_state_dir(plan.slug))
         egress_ca_host, egress_ca_cert_only = egress_tls_init(
             egress_state_dir(plan.slug),
         )
 
-        # Re-render the pipelock yaml with the SSRF allowlist now that
-        # we know the internal CIDR. Prepare wrote the yaml without
-        # the ssrf block (CIDR wasn't known yet); overwrite the same
-        # path so .start docker-cp's the updated content.
+        # Step 4: re-render pipelock yaml with the SSRF allowlist now
+        # that we know the internal CIDR. Prepare wrote the yaml
+        # without the ssrf block; overwrite the same path so the
+        # bind-mount picks up the updated content.
         bottle = plan.spec.manifest.bottle_for(plan.spec.agent_name)
         cfg = pipelock_build_config(
             bottle,
@@ -129,6 +154,10 @@ def launch(
         plan.proxy_plan.yaml_path.write_text(pipelock_render_yaml(cfg))
         plan.proxy_plan.yaml_path.chmod(0o600)
 
+        # Step 5: populate launch-time fields on every inner plan so
+        # the renderer reads concrete network names, CA paths, and
+        # pipelock URL. Match the field-by-field replacement the
+        # pre-compose launch did, just rolled into one pass.
         proxy_plan = dataclasses.replace(
             plan.proxy_plan,
             internal_network=internal_network,
@@ -137,40 +166,17 @@ def launch(
             ca_cert_host_path=ca_cert_host,
             ca_key_host_path=ca_key_host,
         )
-        # Re-bind the outer plan so provision_ca (which runs later
-        # from `provision(plan, container)`) can read the populated
-        # CA paths off plan.proxy_plan.
-        plan = dataclasses.replace(plan, proxy_plan=proxy_plan)
-        pipelock_name = proxy.start(plan.proxy_plan)
-        stack.callback(proxy.stop, pipelock_name)
-
-        # Git gate (PRD 0008). One sidecar per agent, only brought up
-        # when the bottle has git entries. Same internal + egress
-        # network attachment as the other sidecars; agent dials it as
-        # `git://<container-name>/<name>.git` via the pushInsteadOf
-        # rules provision_git writes into ~/.gitconfig.
-        if plan.git_gate_plan.upstreams:
+        git_gate_plan = plan.git_gate_plan
+        if git_gate_plan.upstreams:
             git_gate_plan = dataclasses.replace(
-                plan.git_gate_plan,
+                git_gate_plan,
                 internal_network=internal_network,
                 egress_network=egress_network,
             )
-            plan = dataclasses.replace(plan, git_gate_plan=git_gate_plan)
-            git_gate_name = git_gate.start(plan.git_gate_plan)
-            stack.callback(git_gate.stop, git_gate_name)
-
-        # Egress-proxy (PRD 0017). One sidecar per bottle when
-        # bottle.egress.routes is non-empty. Must come up AFTER
-        # pipelock — egress routes its outbound HTTPS through
-        # pipelock (HTTPS_PROXY in environ + the pipelock CA in its
-        # trust store) so the egress allowlist + body scanner sit on
-        # the egress → upstream leg. Must come up BEFORE the
-        # agent so DNS resolution for `egress` succeeds on the
-        # agent's first call; tokens flow from the host env into the
-        # sidecar's environ, not the agent's.
-        if plan.egress_plan.routes:
+        egress_plan = plan.egress_plan
+        if egress_plan.routes:
             egress_plan = dataclasses.replace(
-                plan.egress_plan,
+                egress_plan,
                 internal_network=internal_network,
                 egress_network=egress_network,
                 mitmproxy_ca_host_path=egress_ca_host,
@@ -178,151 +184,62 @@ def launch(
                 pipelock_ca_host_path=ca_cert_host,
                 pipelock_proxy_url=pipelock_proxy_url(plan.slug),
             )
-            plan = dataclasses.replace(plan, egress_plan=egress_plan)
-            egress_name = egress.start(plan.egress_plan)
-            stack.callback(egress.stop, egress_name)
-
-        # Supervise sidecar (PRD 0013). Opt-in via bottle.supervise.
-        # Internal-network only — the sidecar makes no outbound calls.
-        # Must come up BEFORE the agent so DNS resolution for
-        # `supervise` succeeds on the agent's first tool call.
-        if plan.supervise_plan is not None:
+        supervise_plan = plan.supervise_plan
+        if supervise_plan is not None:
             supervise_plan = dataclasses.replace(
-                plan.supervise_plan,
+                supervise_plan,
                 internal_network=internal_network,
             )
-            plan = dataclasses.replace(plan, supervise_plan=supervise_plan)
-            supervise_name = supervise.start(plan.supervise_plan)
-            stack.callback(supervise.stop, supervise_name)
+        plan = dataclasses.replace(
+            plan,
+            proxy_plan=proxy_plan,
+            git_gate_plan=git_gate_plan,
+            egress_plan=egress_plan,
+            supervise_plan=supervise_plan,
+        )
 
-        container = _run_agent_container(plan, internal_network)
-        stack.callback(docker_mod.force_remove_container, container)
+        # Step 6: render + write the compose file. metadata.json
+        # was written at prepare time and already carries
+        # compose_project; nothing to update here.
+        state_dir = bottle_state_dir(plan.slug)
+        spec = bottle_plan_to_compose(plan)
+        compose_file = write_compose_file(spec, compose_file_path(state_dir))
+        project = compose_project_name(plan.slug)
 
-        prompt_path = provision(plan, container)
+        # Step 7: compose up. Token values + the OAuth placeholder
+        # flow through subprocess env; the compose file holds only
+        # bare names for the secret-carrying entries.
+        token_values: dict[str, str] = {}
+        if plan.egress_plan.routes:
+            token_values = egress_resolve_token_values(
+                plan.egress_plan.token_env_map, dict(os.environ),
+            )
+        compose_env: dict[str, str] = {
+            **os.environ,
+            **plan.forwarded_env,
+            **token_values,
+        }
+        info(
+            f"docker compose up -d  (project {project}, "
+            f"{len(spec['services'])} services)"
+        )
+        compose_up(project, compose_file, env=compose_env)
 
-        yield DockerBottle(container, teardown, prompt_path)
+        # Register teardown in reverse order: log dump first, then
+        # `compose down`. Networks come down last via callbacks
+        # registered in step 2.
+        stack.callback(compose_down, project, compose_file)
+        stack.callback(
+            compose_dump_logs, project, compose_file, compose_log_path(state_dir),
+        )
+
+        # Step 8: provision. Unchanged — uses `docker exec` against
+        # the agent container by its known name.
+        prompt_path = provision(plan, plan.container_name)
+
+        # Step 9: yield. exec_claude continues to use `docker exec -it`
+        # — the agent runs `sleep infinity` per the renderer's
+        # service spec.
+        yield DockerBottle(plan.container_name, teardown, prompt_path)
     finally:
         teardown()
-
-
-def _agent_no_proxy(plan: DockerBottlePlan) -> str:
-    """NO_PROXY value for the agent container. Standard loopback +
-    `supervise` when the supervise sidecar is enabled.
-
-    Supervise needs to bypass pipelock because the MCP tool-call
-    pattern is long-poll: claude-code opens an HTTPS-style request to
-    http://supervise:9100/, the sidecar holds it open until the
-    operator approves (potentially minutes), then returns the
-    response. Pipelock is a forward proxy with idle timeouts;
-    pipelock cuts the long-polled connection well before the operator
-    can act, and claude-code reports the tool as ✘ failed even
-    though /mcp shows ✔ connected.
-
-    The supervise sidecar is on the bottle's internal network with
-    the `supervise` network-alias, so the agent can dial it
-    directly via docker DNS. Body-scanning the supervise traffic
-    isn't critical — the operator reviews every proposal in the TUI."""
-    hosts = ["localhost", "127.0.0.1"]
-    if plan.supervise_plan is not None:
-        hosts.append(SUPERVISE_HOSTNAME)
-    return ",".join(hosts)
-
-
-def _agent_proxy_url(plan: DockerBottlePlan) -> str:
-    """Pick the proxy URL the agent's HTTP_PROXY env points at. PRD
-    0017: when an egress is declared, the agent goes through
-    egress (which in turn uses HTTPS_PROXY=pipelock on its
-    outbound leg). Otherwise the agent talks straight to pipelock —
-    keeps the network surface minimal for bottles that don't need
-    path filtering or credential injection."""
-    if plan.egress_plan.routes:
-        return egress_url()
-    return pipelock_proxy_url(plan.slug)
-
-
-def _run_agent_container(plan: DockerBottlePlan, internal_network: str) -> str:
-    """Build the `docker run` argv and execute it, handling name-
-    conflict races by incrementing the suffix (unless the name was
-    user-pinned). Returns the resolved container name."""
-    proxy_url = _agent_proxy_url(plan)
-    no_proxy = _agent_no_proxy(plan)
-    # Set BOTH cases of every *_PROXY var. libcurl's CVE-2016-5388
-    # httpoxy mitigation makes it ignore uppercase `HTTP_PROXY` for
-    # `http://` URLs and only honor lowercase `http_proxy`. Without
-    # the lowercase var, plain-HTTP requests from the agent bypass
-    # egress entirely (going direct, then failing with
-    # "network unreachable" because the agent's bridge is
-    # --internal). Lowercase HTTPS_PROXY isn't strictly needed but
-    # we set it for symmetry — some tools check one or the other.
-    docker_args: list[str] = [
-        "--rm", "-d",
-        "--name", plan.container_name,
-        "--network", internal_network,
-        "-e", f"HTTPS_PROXY={proxy_url}",
-        "-e", f"HTTP_PROXY={proxy_url}",
-        "-e", f"https_proxy={proxy_url}",
-        "-e", f"http_proxy={proxy_url}",
-        "-e", f"NO_PROXY={no_proxy}",
-        "-e", f"no_proxy={no_proxy}",
-        # CA trust trio for the agent process. Docker propagates
-        # run-time env into `docker exec`, so `claude` sees these
-        # without per-exec threading. NODE_EXTRA_CA_CERTS points at
-        # the cert file (Node appends it to its bundled roots);
-        # SSL_CERT_FILE / REQUESTS_CA_BUNDLE point at the system
-        # bundle that `update-ca-certificates` rebuilds in
-        # provision_ca.
-        "-e", f"NODE_EXTRA_CA_CERTS={AGENT_CA_PATH}",
-        "-e", f"SSL_CERT_FILE={AGENT_CA_BUNDLE}",
-        "-e", f"REQUESTS_CA_BUNDLE={AGENT_CA_BUNDLE}",
-    ]
-    if plan.use_runsc:
-        docker_args.extend(["--runtime", "runsc"])
-    if plan.env_file.stat().st_size > 0:
-        docker_args.extend(["--env-file", str(plan.env_file)])
-    for name in plan.forwarded_env:
-        docker_args.extend(["-e", name])
-
-    # PRD 0013: read-only current-config mount so the agent can read
-    # routes.yaml / allowlist / Dockerfile before composing a
-    # supervise tool-call proposal. Mounted from the per-bottle
-    # stage_dir/current-config/ populated at prepare time.
-    if plan.supervise_plan is not None:
-        docker_args.extend([
-            "-v",
-            f"{plan.supervise_plan.current_config_dir}:{CURRENT_CONFIG_DIR_IN_AGENT}:ro",
-        ])
-
-    docker_args.extend([plan.runtime_image, "sleep", "infinity"])
-
-    info(f"starting container {plan.container_name} from {plan.runtime_image}")
-
-    # Inject forwarded values (secrets, interpolated host vars, the
-    # renamed OAuth token) into the docker-run child's env so the
-    # `-e NAME` flags above pick them up — without touching our own
-    # os.environ or putting values on argv.
-    child_env: dict[str, str] = {**os.environ, **plan.forwarded_env}
-
-    name_idx = docker_args.index("--name") + 1
-    for candidate in docker_mod.container_name_candidates(plan.container_name):
-        docker_args[name_idx] = candidate
-        run_result = subprocess.run(
-            ["docker", "run", *docker_args],
-            capture_output=True,
-            text=True,
-            env=child_env,
-            check=False,
-        )
-        if run_result.returncode == 0:
-            return candidate
-        err_text = run_result.stderr
-        if plan.container_name_pinned or "is already in use" not in err_text:
-            sys.stderr.write(err_text + "\n")
-            die(f"docker run failed for container '{candidate}'")
-        info(f"name conflict on {candidate}; retrying with next candidate")
-    die(
-        f"could not find a free container name after "
-        f"{plan.container_name}-{docker_mod.MAX_CONTAINER_SUFFIX} retries; "
-        f"clean up old containers"
-    )
-
-
diff --git a/claude_bottle/backend/docker/pipelock.py b/claude_bottle/backend/docker/pipelock.py
index 89ce9f2..a7b3a6b 100644
--- a/claude_bottle/backend/docker/pipelock.py
+++ b/claude_bottle/backend/docker/pipelock.py
@@ -67,6 +67,12 @@ def pipelock_tls_init(stage_dir: Path) -> tuple[Path, Path]:
     key = work / "ca-key.pem"
     if not cert.is_file() or not key.is_file():
         die(f"pipelock tls init did not produce ca files in {work}")
+    # Explicit perms in case a future pipelock release changes
+    # defaults. Pipelock runs as root in its distroless image and
+    # bind-mounts work with 0o600 (root reads everything); the key
+    # has no reason to be readable to anyone else on the host.
+    key.chmod(0o600)
+    cert.chmod(0o644)
     return (cert, key)
 
 
diff --git a/claude_bottle/backend/docker/pipelock_apply.py b/claude_bottle/backend/docker/pipelock_apply.py
index 9a27cd2..6c3ae0a 100644
--- a/claude_bottle/backend/docker/pipelock_apply.py
+++ b/claude_bottle/backend/docker/pipelock_apply.py
@@ -24,9 +24,17 @@ from pathlib import Path
 
 from ...pipelock import pipelock_render_yaml
 from ...yaml_subset import parse_yaml_subset
+from .bottle_state import pipelock_state_dir
 from .pipelock import pipelock_container_name
 
 
+def _pipelock_yaml_host_path(slug: str) -> Path:
+    """The bind-mount source for the pipelock sidecar's
+    pipelock.yaml — matches what pipelock.prepare wrote at chunk-2
+    paths."""
+    return pipelock_state_dir(slug) / "pipelock.yaml"
+
+
 PIPELOCK_YAML_IN_CONTAINER = "/etc/pipelock.yaml"
 
 # Allowlist proposals are one-hostname-per-line. Blank lines and
@@ -141,19 +149,26 @@ def apply_allowlist_change(
     cfg["api_allowlist"] = new_hosts
     rendered = pipelock_render_yaml(cfg)
 
-    fd, tmp_path = tempfile.mkstemp(prefix="cb-pipelock-yaml.", suffix=".yaml")
+    # PRD 0018 chunk 3 + security item (c): pipelock.yaml is
+    # bind-mounted into the container, so the write target is the
+    # host path the sidecar reads. POSIX rename is atomic on the
+    # same filesystem, which matters less here than for the
+    # SIGHUP-reload egress case (pipelock fully restarts and
+    # re-reads on boot), but the pattern is uniform across both
+    # apply paths.
+    target = _pipelock_yaml_host_path(slug)
+    target.parent.mkdir(parents=True, exist_ok=True)
+    fd, tmp_path_str = tempfile.mkstemp(
+        prefix=".pipelock.", suffix=".yaml.tmp", dir=str(target.parent),
+    )
+    tmp_path = Path(tmp_path_str)
     try:
         with os.fdopen(fd, "w") as f:
             f.write(rendered)
-        cp = subprocess.run(
-            ["docker", "cp", tmp_path, f"{container}:{PIPELOCK_YAML_IN_CONTAINER}"],
-            capture_output=True, text=True, check=False,
-        )
-        if cp.returncode != 0:
-            raise PipelockApplyError(
-                f"failed to copy pipelock.yaml into {container}: "
-                f"{(cp.stderr or '').strip()}"
-            )
+        # pipelock runs as root in its distroless image — any mode
+        # is fine — but 0o600 matches what prepare wrote.
+        os.chmod(tmp_path, 0o600)
+        os.replace(tmp_path, target)
         restart = subprocess.run(
             ["docker", "restart", container],
             capture_output=True, text=True, check=False,
@@ -163,11 +178,12 @@ def apply_allowlist_change(
                 f"failed to restart {container}: "
                 f"{(restart.stderr or '').strip()}"
             )
-    finally:
+    except BaseException:
         try:
-            Path(tmp_path).unlink()
+            tmp_path.unlink()
         except OSError:
             pass
+        raise
 
     return before, after
 
diff --git a/claude_bottle/backend/docker/prepare.py b/claude_bottle/backend/docker/prepare.py
index 555ac14..f19a627 100644
--- a/claude_bottle/backend/docker/prepare.py
+++ b/claude_bottle/backend/docker/prepare.py
@@ -72,6 +72,7 @@ def resolve_plan(
         cwd=spec.user_cwd if spec.copy_cwd else "",
         copy_cwd=spec.copy_cwd,
         started_at=datetime.now(timezone.utc).isoformat(),
+        compose_project=f"claude-bottle-{slug}",
     ))
     # Clear any leftover preserve marker from a prior capability-block
     # so this fresh launch can be cleaned up at session-end unless
diff --git a/claude_bottle/git_gate.py b/claude_bottle/git_gate.py
index 0de6c22..db092a2 100644
--- a/claude_bottle/git_gate.py
+++ b/claude_bottle/git_gate.py
@@ -163,9 +163,15 @@ def git_gate_render_entrypoint(upstreams: tuple[GitGateUpstream, ...]) -> str:
         "  keyfile=/git-gate/creds/${name}-key",
         "  hostsfile=/git-gate/creds/${name}-known_hosts",
         "",
-        "  chmod 600 \"$keyfile\"",
+        # `|| true`: PRD 0018 chunk 3+ bind-mounts these RO from the
+        # host, so chmod-syscalls fail with EROFS. The files already
+        # have the right perms on the host (SSH requires 0600 to load
+        # the key in the first place), so the chmod is best-effort
+        # cleanup for the legacy docker-cp path where the file
+        # landed at the host's umask perms.
+        "  chmod 600 \"$keyfile\" 2>/dev/null || true",
         "  if [ -f \"$hostsfile\" ]; then",
-        "    chmod 600 \"$hostsfile\"",
+        "    chmod 600 \"$hostsfile\" 2>/dev/null || true",
         "  fi",
         "",
         "  repo=/git/${name}.git",
diff --git a/tests/unit/test_agent_no_proxy.py b/tests/unit/test_agent_no_proxy.py
deleted file mode 100644
index d72a2b3..0000000
--- a/tests/unit/test_agent_no_proxy.py
+++ /dev/null
@@ -1,42 +0,0 @@
-"""Unit: agent NO_PROXY value builder (PR #25 follow-up).
-
-claude-code's HTTP MCP client must bypass pipelock for the supervise
-sidecar — long-poll tool calls would hit pipelock's idle timeout
-otherwise. This test pins the rule: localhost always; supervise iff
-the supervise sidecar is in the plan."""
-
-import unittest
-from pathlib import Path
-
-from claude_bottle.backend.docker.launch import _agent_no_proxy
-
-
-class _FakePlan:
-    """Just enough plan shape for the helper — no full DockerBottlePlan
-    construction needed."""
-
-    def __init__(self, supervise_plan):
-        self.supervise_plan = supervise_plan
-
-
-class _SentinelSupervisePlan:
-    """The helper only checks `supervise_plan is not None`; any object
-    is fine."""
-
-
-class TestAgentNoProxy(unittest.TestCase):
-    def test_loopback_only_when_no_supervise(self):
-        self.assertEqual(
-            "localhost,127.0.0.1",
-            _agent_no_proxy(_FakePlan(supervise_plan=None)),
-        )
-
-    def test_supervise_appended_when_enabled(self):
-        self.assertEqual(
-            "localhost,127.0.0.1,supervise",
-            _agent_no_proxy(_FakePlan(supervise_plan=_SentinelSupervisePlan())),
-        )
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/tests/unit/test_compose.py b/tests/unit/test_compose.py
index 11aa01c..7306833 100644
--- a/tests/unit/test_compose.py
+++ b/tests/unit/test_compose.py
@@ -176,19 +176,20 @@ class TestProjectAndNetworks(unittest.TestCase):
         spec = bottle_plan_to_compose(_plan())
         self.assertEqual(f"claude-bottle-{SLUG}", spec["name"])
 
-    def test_internal_network_is_internal(self):
+    def test_internal_network_marked_external(self):
+        # Chunk 3 pre-creates networks with `docker network create
+        # --internal` so pipelock can know the CIDR before compose-up.
+        # Compose references the network by name with `external: true`.
         spec = bottle_plan_to_compose(_plan())
         net = spec["networks"]["internal"]
         self.assertEqual(f"claude-bottle-net-{SLUG}", net["name"])
-        self.assertTrue(net["internal"])
+        self.assertTrue(net["external"])
 
-    def test_egress_network_is_external_bridge(self):
+    def test_egress_network_marked_external(self):
         spec = bottle_plan_to_compose(_plan())
         net = spec["networks"]["egress"]
         self.assertEqual(f"claude-bottle-egress-{SLUG}", net["name"])
-        # No `internal:` key on the egress network — defaults to a
-        # normal user-defined bridge.
-        self.assertNotIn("internal", net)
+        self.assertTrue(net["external"])
 
 
 class TestPipelockAlwaysPresent(unittest.TestCase):