183 lines
7.3 KiB
Bash
183 lines
7.3 KiB
Bash
#!/usr/bin/env bash
|
|
# Docker network plumbing for the per-agent egress-proxy topology
|
|
# (PRD 0001).
|
|
#
|
|
# The egress design (see docs/research/pipelock-assessment.md
|
|
# §"Deployment topology") puts the agent container on a Docker
|
|
# `--internal` network — Docker omits the default gateway from
|
|
# `internal: true` networks at the iptables level inside the engine /
|
|
# LinuxKit VM, so the only address the agent can reach is the pipelock
|
|
# sidecar attached to the same network. The pipelock sidecar itself
|
|
# also needs egress to the upstream internet, so it is placed on a
|
|
# second (user-defined bridge) network as well. We deliberately do
|
|
# NOT use Docker's legacy `bridge` network for this: the legacy bridge
|
|
# has no embedded DNS resolver, so pipelock would be unable to resolve
|
|
# `api.anthropic.com` and Claude Code traffic would dead-end. Only
|
|
# user-defined bridges run Docker's built-in DNS, so we create one
|
|
# per agent.
|
|
#
|
|
# This module is the network-only half of that split: create / attach
|
|
# / teardown of both the per-agent internal network and the per-agent
|
|
# user-defined egress bridge, with no pipelock specifics. Keeping
|
|
# pipelock-agnostic helpers here means a future PRD can reuse them
|
|
# for a different sidecar (e.g. an iptables-only layer) without
|
|
# entangling the two concerns.
|
|
#
|
|
# Naming: claude-bottle-net-<slug> (internal),
|
|
# claude-bottle-egress-<slug> (egress). On conflict we append a
|
|
# numeric suffix (-2, -3, ...) to mirror the container-naming scheme
|
|
# in cli.sh, so two parallel starts of the same agent get distinct
|
|
# networks.
|
|
#
|
|
# Idempotent: safe to source multiple times.
|
|
|
|
if [ -n "${CLAUDE_BOTTLE_LIB_NETWORK_SOURCED:-}" ]; then
|
|
return 0
|
|
fi
|
|
CLAUDE_BOTTLE_LIB_NETWORK_SOURCED=1
|
|
|
|
_iso_lib_network_dir="$(CDPATH= cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)"
|
|
# shellcheck source=./log.sh
|
|
. "${_iso_lib_network_dir}/log.sh"
|
|
|
|
# network_name_for_slug <slug> — prints the canonical internal-network
|
|
# name for a given agent slug. No conflict resolution; that lives in
|
|
# network_create_internal.
|
|
network_name_for_slug() {
|
|
local slug="${1:?network_name_for_slug: missing slug}"
|
|
printf 'claude-bottle-net-%s' "$slug"
|
|
}
|
|
|
|
# network_egress_name_for_slug <slug> — prints the canonical egress-network
|
|
# name for a given agent slug. No conflict resolution; that lives in
|
|
# network_create_egress.
|
|
network_egress_name_for_slug() {
|
|
local slug="${1:?network_egress_name_for_slug: missing slug}"
|
|
printf 'claude-bottle-egress-%s' "$slug"
|
|
}
|
|
|
|
# network_exists <name> — returns 0 if the named docker network exists,
|
|
# else 1. Uses `docker network inspect` (not `docker network ls -f name=...`)
|
|
# because the latter does substring matching, which would falsely report
|
|
# claude-bottle-net-foo as existing when only claude-bottle-net-foo-2 was
|
|
# present.
|
|
network_exists() {
|
|
local name="${1:?network_exists: missing network name}"
|
|
docker network inspect "$name" >/dev/null 2>&1
|
|
}
|
|
|
|
# _network_create_with_prefix <prefix> <internal: 0|1>
|
|
#
|
|
# Internal helper. Creates a per-agent Docker network whose name is
|
|
# <prefix> (with -2, -3, ... appended on conflict, capped at 100).
|
|
# When <internal> is 1, the network is created with `--internal` (no
|
|
# default gateway). When 0, it's a plain user-defined bridge with
|
|
# upstream connectivity. Echoes the resolved name on stdout.
|
|
_network_create_with_prefix() {
|
|
local base="${1:?_network_create_with_prefix: missing prefix}"
|
|
local internal_flag="${2:?_network_create_with_prefix: missing internal flag}"
|
|
|
|
local name="$base"
|
|
local _suffix=2
|
|
while network_exists "$name"; do
|
|
name="${base}-${_suffix}"
|
|
_suffix=$((_suffix + 1))
|
|
if [ "$_suffix" -gt 100 ]; then
|
|
die "could not find a free network name after ${base}-99; clean up old networks with 'docker network rm <name>'"
|
|
fi
|
|
done
|
|
|
|
local kind="bridge (egress)"
|
|
local args=()
|
|
if [ "$internal_flag" = "1" ]; then
|
|
kind="internal"
|
|
args+=(--internal)
|
|
fi
|
|
info "creating ${kind} network ${name}"
|
|
# Defaults give us a bridge driver with Docker-managed addressing,
|
|
# which is what we want for both internal and egress networks.
|
|
if ! docker network create "${args[@]}" "$name" >/dev/null; then
|
|
die "docker network create ${args[*]} ${name} failed"
|
|
fi
|
|
printf '%s' "$name"
|
|
}
|
|
|
|
# network_create_internal <slug>
|
|
#
|
|
# Creates a Docker `--internal` network for the agent and prints the
|
|
# resolved network name on stdout. If the canonical name is already
|
|
# taken, appends -2, -3, ... (capped at 100, matching the
|
|
# container-name retry loop in cli.sh) until a free name is found.
|
|
#
|
|
# `--internal` is the load-bearing flag: Docker creates the bridge
|
|
# without a default route, so the agent container attached here cannot
|
|
# reach the public internet directly. The pipelock sidecar (attached
|
|
# to both this network and a per-agent egress network) is the only
|
|
# egress route.
|
|
#
|
|
# Side effect: emits one info line naming the network actually created.
|
|
network_create_internal() {
|
|
local slug="${1:?network_create_internal: missing slug}"
|
|
local base
|
|
base="$(network_name_for_slug "$slug")"
|
|
_network_create_with_prefix "$base" 1
|
|
}
|
|
|
|
# network_create_egress <slug>
|
|
#
|
|
# Creates a per-agent user-defined bridge network used by the pipelock
|
|
# sidecar for upstream egress, and prints the resolved network name on
|
|
# stdout. Conflict resolution mirrors network_create_internal.
|
|
#
|
|
# We use a user-defined bridge (NOT the legacy `bridge` network)
|
|
# because only user-defined bridges run Docker's embedded DNS resolver
|
|
# — pipelock needs DNS to resolve `api.anthropic.com` and similar
|
|
# upstream hostnames. The legacy `bridge` network would force pipelock
|
|
# onto the host's resolv.conf and fail in environments where Docker
|
|
# Desktop's NAT path is the only working DNS route.
|
|
#
|
|
# Side effect: emits one info line naming the network actually created.
|
|
network_create_egress() {
|
|
local slug="${1:?network_create_egress: missing slug}"
|
|
local base
|
|
base="$(network_egress_name_for_slug "$slug")"
|
|
_network_create_with_prefix "$base" 0
|
|
}
|
|
|
|
# network_attach <network> <container>
|
|
#
|
|
# Attaches an already-running container to the named network. Used to
|
|
# add the pipelock sidecar to a second (default-bridge) network so it
|
|
# has upstream egress, while staying reachable from the agent on the
|
|
# internal network.
|
|
#
|
|
# Note: for the agent container itself we pass `--network <name>` to
|
|
# `docker run` directly in cli.sh rather than using this function. The
|
|
# agent never touches anything except the internal network.
|
|
network_attach() {
|
|
local network="${1:?network_attach: missing network name}"
|
|
local container="${2:?network_attach: missing container name}"
|
|
if ! docker network connect "$network" "$container" >/dev/null 2>&1; then
|
|
die "docker network connect ${network} ${container} failed"
|
|
fi
|
|
}
|
|
|
|
# network_remove <name>
|
|
#
|
|
# Removes the named network. Idempotent: a missing network is treated
|
|
# as success so this can be called unconditionally from a teardown
|
|
# trap. A network that still has containers attached will fail to
|
|
# remove; the caller is expected to tear those containers down first.
|
|
network_remove() {
|
|
local name="${1:?network_remove: missing network name}"
|
|
if ! network_exists "$name"; then
|
|
return 0
|
|
fi
|
|
if ! docker network rm "$name" >/dev/null 2>&1; then
|
|
# Don't `die` here: this runs in cleanup paths where we'd rather
|
|
# warn and continue than abort and leave more orphans behind.
|
|
warn "failed to remove network ${name}; clean up with 'docker network rm ${name}'"
|
|
return 1
|
|
fi
|
|
}
|