PRD 0001: Per-agent egress proxy via pipelock (#1)

2026-05-08 01:56:43 -04:00
parent 08597ebcf8
commit ba7616a4ae
20 changed files with 1977 additions and 12 deletions
@@ -0,0 +1,182 @@
+#!/usr/bin/env bash
+# Docker network plumbing for the per-agent egress-proxy topology
+# (PRD 0001).
+#
+# The egress design (see docs/research/pipelock-assessment.md
+# §"Deployment topology") puts the agent container on a Docker
+# `--internal` network — Docker omits the default gateway from
+# `internal: true` networks at the iptables level inside the engine /
+# LinuxKit VM, so the only address the agent can reach is the pipelock
+# sidecar attached to the same network. The pipelock sidecar itself
+# also needs egress to the upstream internet, so it is placed on a
+# second (user-defined bridge) network as well. We deliberately do
+# NOT use Docker's legacy `bridge` network for this: the legacy bridge
+# has no embedded DNS resolver, so pipelock would be unable to resolve
+# `api.anthropic.com` and Claude Code traffic would dead-end. Only
+# user-defined bridges run Docker's built-in DNS, so we create one
+# per agent.
+#
+# This module is the network-only half of that split: create / attach
+# / teardown of both the per-agent internal network and the per-agent
+# user-defined egress bridge, with no pipelock specifics. Keeping
+# pipelock-agnostic helpers here means a future PRD can reuse them
+# for a different sidecar (e.g. an iptables-only layer) without
+# entangling the two concerns.
+#
+# Naming: claude-bottle-net-<slug> (internal),
+# claude-bottle-egress-<slug> (egress). On conflict we append a
+# numeric suffix (-2, -3, ...) to mirror the container-naming scheme
+# in cli.sh, so two parallel starts of the same agent get distinct
+# networks.
+#
+# Idempotent: safe to source multiple times.
+
+if [ -n "${CLAUDE_BOTTLE_LIB_NETWORK_SOURCED:-}" ]; then
+  return 0
+fi
+CLAUDE_BOTTLE_LIB_NETWORK_SOURCED=1
+
+_iso_lib_network_dir="$(CDPATH= cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)"
+# shellcheck source=./log.sh
+. "${_iso_lib_network_dir}/log.sh"
+
+# network_name_for_slug <slug> — prints the canonical internal-network
+# name for a given agent slug. No conflict resolution; that lives in
+# network_create_internal.
+network_name_for_slug() {
+  local slug="${1:?network_name_for_slug: missing slug}"
+  printf 'claude-bottle-net-%s' "$slug"
+}
+
+# network_egress_name_for_slug <slug> — prints the canonical egress-network
+# name for a given agent slug. No conflict resolution; that lives in
+# network_create_egress.
+network_egress_name_for_slug() {
+  local slug="${1:?network_egress_name_for_slug: missing slug}"
+  printf 'claude-bottle-egress-%s' "$slug"
+}
+
+# network_exists <name> — returns 0 if the named docker network exists,
+# else 1. Uses `docker network inspect` (not `docker network ls -f name=...`)
+# because the latter does substring matching, which would falsely report
+# claude-bottle-net-foo as existing when only claude-bottle-net-foo-2 was
+# present.
+network_exists() {
+  local name="${1:?network_exists: missing network name}"
+  docker network inspect "$name" >/dev/null 2>&1
+}
+
+# _network_create_with_prefix <prefix> <internal: 0|1>
+#
+# Internal helper. Creates a per-agent Docker network whose name is
+# <prefix> (with -2, -3, ... appended on conflict, capped at 100).
+# When <internal> is 1, the network is created with `--internal` (no
+# default gateway). When 0, it's a plain user-defined bridge with
+# upstream connectivity. Echoes the resolved name on stdout.
+_network_create_with_prefix() {
+  local base="${1:?_network_create_with_prefix: missing prefix}"
+  local internal_flag="${2:?_network_create_with_prefix: missing internal flag}"
+
+  local name="$base"
+  local _suffix=2
+  while network_exists "$name"; do
+    name="${base}-${_suffix}"
+    _suffix=$((_suffix + 1))
+    if [ "$_suffix" -gt 100 ]; then
+      die "could not find a free network name after ${base}-99; clean up old networks with 'docker network rm <name>'"
+    fi
+  done
+
+  local kind="bridge (egress)"
+  local args=()
+  if [ "$internal_flag" = "1" ]; then
+    kind="internal"
+    args+=(--internal)
+  fi
+  info "creating ${kind} network ${name}"
+  # Defaults give us a bridge driver with Docker-managed addressing,
+  # which is what we want for both internal and egress networks.
+  if ! docker network create "${args[@]}" "$name" >/dev/null; then
+    die "docker network create ${args[*]} ${name} failed"
+  fi
+  printf '%s' "$name"
+}
+
+# network_create_internal <slug>
+#
+# Creates a Docker `--internal` network for the agent and prints the
+# resolved network name on stdout. If the canonical name is already
+# taken, appends -2, -3, ... (capped at 100, matching the
+# container-name retry loop in cli.sh) until a free name is found.
+#
+# `--internal` is the load-bearing flag: Docker creates the bridge
+# without a default route, so the agent container attached here cannot
+# reach the public internet directly. The pipelock sidecar (attached
+# to both this network and a per-agent egress network) is the only
+# egress route.
+#
+# Side effect: emits one info line naming the network actually created.
+network_create_internal() {
+  local slug="${1:?network_create_internal: missing slug}"
+  local base
+  base="$(network_name_for_slug "$slug")"
+  _network_create_with_prefix "$base" 1
+}
+
+# network_create_egress <slug>
+#
+# Creates a per-agent user-defined bridge network used by the pipelock
+# sidecar for upstream egress, and prints the resolved network name on
+# stdout. Conflict resolution mirrors network_create_internal.
+#
+# We use a user-defined bridge (NOT the legacy `bridge` network)
+# because only user-defined bridges run Docker's embedded DNS resolver
+# — pipelock needs DNS to resolve `api.anthropic.com` and similar
+# upstream hostnames. The legacy `bridge` network would force pipelock
+# onto the host's resolv.conf and fail in environments where Docker
+# Desktop's NAT path is the only working DNS route.
+#
+# Side effect: emits one info line naming the network actually created.
+network_create_egress() {
+  local slug="${1:?network_create_egress: missing slug}"
+  local base
+  base="$(network_egress_name_for_slug "$slug")"
+  _network_create_with_prefix "$base" 0
+}
+
+# network_attach <network> <container>
+#
+# Attaches an already-running container to the named network. Used to
+# add the pipelock sidecar to a second (default-bridge) network so it
+# has upstream egress, while staying reachable from the agent on the
+# internal network.
+#
+# Note: for the agent container itself we pass `--network <name>` to
+# `docker run` directly in cli.sh rather than using this function. The
+# agent never touches anything except the internal network.
+network_attach() {
+  local network="${1:?network_attach: missing network name}"
+  local container="${2:?network_attach: missing container name}"
+  if ! docker network connect "$network" "$container" >/dev/null 2>&1; then
+    die "docker network connect ${network} ${container} failed"
+  fi
+}
+
+# network_remove <name>
+#
+# Removes the named network. Idempotent: a missing network is treated
+# as success so this can be called unconditionally from a teardown
+# trap. A network that still has containers attached will fail to
+# remove; the caller is expected to tear those containers down first.
+network_remove() {
+  local name="${1:?network_remove: missing network name}"
+  if ! network_exists "$name"; then
+    return 0
+  fi
+  if ! docker network rm "$name" >/dev/null 2>&1; then
+    # Don't `die` here: this runs in cleanup paths where we'd rather
+    # warn and continue than abort and leave more orphans behind.
+    warn "failed to remove network ${name}; clean up with 'docker network rm ${name}'"
+    return 1
+  fi
+}