bot-bottle/claude_bottle/backend/docker/launch.py

"""Launch step for the Docker bottle backend.

PRD 0018 chunk 3: each instance is one `docker compose` project.

The flow is:

  1. Build the agent's base + derived image (compose builds the
     sidecar images via the `build:` directive on first up).
  2. Pre-create the per-bottle networks. We do this outside compose
     so we can inspect the assigned internal CIDR and embed it in
     pipelock's yaml (compose's `external: true` lets the compose
     file reference these pre-existing networks).
  3. Mint the per-bottle CAs (chunk 2 writes them under
     state/<slug>/{pipelock,egress}/).
  4. Re-render pipelock yaml with the now-known internal CIDR so
     the SSRF allowlist exempts the bottle's own subnet.
  5. Populate the inner plans with launch-time fields so the
     renderer can read network names, CA paths, pipelock URL.
  6. Render the compose spec, write it to
     state/<slug>/docker-compose.yml, write metadata.json.
  7. `docker compose up -d` (token + OAuth values flow into the
     compose subprocess env so `environment: [NAME]` bare-name
     entries inherit without rendering values into the file).
  8. Provision (CA install, prompt copy, skills, git, supervise
     config) — unchanged, uses `docker exec`.
  9. Yield a DockerBottle handle. `exec_claude` runs claude via
     `docker exec -it` exactly like the pre-compose world.

Teardown (ExitStack callbacks fire in reverse):
  - Dump `docker compose logs --no-color --timestamps` to
    state/<slug>/compose.log (best-effort).
  - `docker compose down` removes the project's containers (not the
    external networks).
  - `network_remove` deletes the two networks we pre-created.
"""

from __future__ import annotations

import dataclasses
import os
from contextlib import ExitStack, contextmanager
from pathlib import Path
from typing import Callable, Generator

from ...egress import egress_resolve_token_values
from ...log import info
from ...pipelock import pipelock_build_config, pipelock_render_yaml
from . import network as network_mod
from . import util as docker_mod
from .bottle import DockerBottle
from .bottle_plan import DockerBottlePlan
from .bottle_state import (
    bottle_state_dir,
    egress_state_dir,
    pipelock_state_dir,
)
from .compose import (
    bottle_plan_to_compose,
    compose_down,
    compose_dump_logs,
    compose_file_path,
    compose_log_path,
    compose_project_name,
    compose_up,
    write_compose_file,
)
from .egress import (
    DockerEgress,
    egress_tls_init,
)
from .git_gate import DockerGitGate
from .pipelock import (
    PIPELOCK_CA_CERT_IN_CONTAINER,
    PIPELOCK_CA_KEY_IN_CONTAINER,
    DockerPipelockProxy,
    pipelock_proxy_url,
    pipelock_tls_init,
)
from .supervise import DockerSupervise


# Where the repo root lives, for `docker build` context. Computed once.
_REPO_DIR = str(Path(__file__).resolve().parent.parent.parent.parent)


@contextmanager
def launch(
    plan: DockerBottlePlan,
    *,
    proxy: DockerPipelockProxy,
    git_gate: DockerGitGate,
    egress: DockerEgress,
    supervise: DockerSupervise,
    provision: Callable[[DockerBottlePlan, str], str | None],
) -> Generator[DockerBottle, None, None]:
    """Build, launch, and provision a Docker bottle via compose.
    Teardown on exit. The per-sidecar `proxy / git_gate / egress /
    supervise` parameters are vestigial from the pre-compose flow —
    kept for backwards-compat with backend.py's call site; the
    `start()`/`stop()` methods on those classes are no longer
    invoked (chunk 3 collapsed them into the compose service spec).
    They'll be removed entirely in a follow-up cleanup."""
    del proxy, git_gate, egress, supervise  # not invoked in compose flow

    stack = ExitStack()

    def teardown() -> None:
        try:
            stack.close()
        except BaseException:
            # Teardown must not raise; swallow so the caller's
            # __exit__ path can still propagate the original error.
            pass

    try:
        # Step 1: agent image build. Sidecar images get built lazily by
        # `docker compose up` via the renderer's `build:` directives.
        docker_mod.build_image(
            plan.image, _REPO_DIR,
            dockerfile=plan.dockerfile_path,
        )
        if plan.derived_image:
            docker_mod.build_image_with_cwd(
                plan.derived_image, plan.image, plan.spec.user_cwd
            )

        # Step 2: pre-create networks so we know the internal CIDR
        # before pipelock yaml renders.
        internal_network = network_mod.network_create_internal(plan.slug)
        stack.callback(network_mod.network_remove, internal_network)

        egress_network = network_mod.network_create_egress(plan.slug)
        stack.callback(network_mod.network_remove, egress_network)

        internal_cidr = network_mod.network_inspect_cidr(internal_network)

        # Step 3: mint per-bottle CAs into state/<slug>/{pipelock,egress}/.
        ca_cert_host, ca_key_host = pipelock_tls_init(pipelock_state_dir(plan.slug))
        egress_ca_host, egress_ca_cert_only = egress_tls_init(
            egress_state_dir(plan.slug),
        )

        # Step 4: re-render pipelock yaml with the SSRF allowlist now
        # that we know the internal CIDR. Prepare wrote the yaml
        # without the ssrf block; overwrite the same path so the
        # bind-mount picks up the updated content.
        bottle = plan.spec.manifest.bottle_for(plan.spec.agent_name)
        cfg = pipelock_build_config(
            bottle,
            ca_cert_path=PIPELOCK_CA_CERT_IN_CONTAINER,
            ca_key_path=PIPELOCK_CA_KEY_IN_CONTAINER,
            ssrf_ip_allowlist=(internal_cidr,),
        )
        plan.proxy_plan.yaml_path.write_text(pipelock_render_yaml(cfg))
        plan.proxy_plan.yaml_path.chmod(0o600)

        # Step 5: populate launch-time fields on every inner plan so
        # the renderer reads concrete network names, CA paths, and
        # pipelock URL. Match the field-by-field replacement the
        # pre-compose launch did, just rolled into one pass.
        proxy_plan = dataclasses.replace(
            plan.proxy_plan,
            internal_network=internal_network,
            internal_network_cidr=internal_cidr,
            egress_network=egress_network,
            ca_cert_host_path=ca_cert_host,
            ca_key_host_path=ca_key_host,
        )
        git_gate_plan = plan.git_gate_plan
        if git_gate_plan.upstreams:
            git_gate_plan = dataclasses.replace(
                git_gate_plan,
                internal_network=internal_network,
                egress_network=egress_network,
            )
        egress_plan = plan.egress_plan
        if egress_plan.routes:
            egress_plan = dataclasses.replace(
                egress_plan,
                internal_network=internal_network,
                egress_network=egress_network,
                mitmproxy_ca_host_path=egress_ca_host,
                mitmproxy_ca_cert_only_host_path=egress_ca_cert_only,
                pipelock_ca_host_path=ca_cert_host,
                pipelock_proxy_url=pipelock_proxy_url(plan.slug),
            )
        supervise_plan = plan.supervise_plan
        if supervise_plan is not None:
            supervise_plan = dataclasses.replace(
                supervise_plan,
                internal_network=internal_network,
            )
        plan = dataclasses.replace(
            plan,
            proxy_plan=proxy_plan,
            git_gate_plan=git_gate_plan,
            egress_plan=egress_plan,
            supervise_plan=supervise_plan,
        )

        # Step 6: render + write the compose file. metadata.json
        # was written at prepare time and already carries
        # compose_project; nothing to update here.
        state_dir = bottle_state_dir(plan.slug)
        spec = bottle_plan_to_compose(plan)
        compose_file = write_compose_file(spec, compose_file_path(state_dir))
        project = compose_project_name(plan.slug)

        # Step 7: compose up. Token values + the OAuth placeholder
        # flow through subprocess env; the compose file holds only
        # bare names for the secret-carrying entries.
        token_values: dict[str, str] = {}
        if plan.egress_plan.routes:
            token_values = egress_resolve_token_values(
                plan.egress_plan.token_env_map, dict(os.environ),
            )
        compose_env: dict[str, str] = {
            **os.environ,
            **plan.forwarded_env,
            **token_values,
        }
        info(
            f"docker compose up -d  (project {project}, "
            f"{len(spec['services'])} services)"
        )
        compose_up(project, compose_file, env=compose_env)

        # Register teardown in reverse order: log dump first, then
        # `compose down`. Networks come down last via callbacks
        # registered in step 2.
        stack.callback(compose_down, project, compose_file)
        stack.callback(
            compose_dump_logs, project, compose_file, compose_log_path(state_dir),
        )

        # Step 8: provision. Unchanged — uses `docker exec` against
        # the agent container by its known name.
        prompt_path = provision(plan, plan.container_name)

        # Step 9: yield. exec_claude continues to use `docker exec -it`
        # — the agent runs `sleep infinity` per the renderer's
        # service spec.
        yield DockerBottle(plan.container_name, teardown, prompt_path)
    finally:
        teardown()