diff --git a/lib/pipelock.sh b/lib/pipelock.sh new file mode 100644 index 0000000..49a37e4 --- /dev/null +++ b/lib/pipelock.sh @@ -0,0 +1,375 @@ +#!/usr/bin/env bash +# Pipelock sidecar lifecycle for the per-agent egress topology +# (PRD 0001). +# +# Pipelock (https://github.com/luckyPipewrench/pipelock) is an HTTP +# forward proxy with hostname allowlisting + DLP scanning + URL-entropy +# checks. We run one sidecar container per agent, attached to the +# agent's --internal network (created by lib/network.sh) and to a +# default-bridge network for upstream egress. The agent's HTTPS_PROXY / +# HTTP_PROXY env vars point at the sidecar's service name on the +# internal network; combined with --internal (which omits the default +# gateway), pipelock is the only egress route the agent has. +# +# Image pin: ghcr.io/luckypipewrench/pipelock@sha256:. The +# digest is resolved by hand against ghcr.io for tag 2.3.0 (the +# `v2.3.0` GitHub release maps to the unprefixed `2.3.0` Docker tag — +# see pipelock-assessment.md and the resolution log in PRD 0001's +# implementation thread). Bump deliberately when upgrading. +# +# YAML config we generate: minimum-viable settings to satisfy the PRD's +# observable success criteria. +# - mode: strict — only api_allowlist domains are reachable +# (per docs/configuration.md §Modes) +# - enforce: true — blocks rather than warn-only +# - api_allowlist: [...] — defaults ∪ bottle.egress.allowlist +# - forward_proxy.enabled: true — turns on the CONNECT-tunnel proxy +# the agent's HTTPS_PROXY actually uses +# (docs §Forward Proxy: this is off by +# default, restart-required to flip) +# - dlp.include_defaults: true — load all 48 built-in patterns +# (docs §DLP §Pattern Merging) +# - dlp.scan_env: true — flags URLs containing high-entropy env +# values (≥16 chars, Shannon entropy >3.0, +# checked in raw/base64/hex/base32). This +# is the documented home for pipelock's +# "subdomain entropy detection" surface +# (docs §Environment Variable Leak +# Detection); the URL-path-entropy knob +# under fetch_proxy.monitoring is for the +# /fetch?url=... helper, not the forward +# proxy we use. +# We deliberately do NOT set tls_interception (out of PRD scope), and +# do NOT carry any env-var values into the YAML — only hostnames. +# +# Idempotent: safe to source multiple times. + +if [ -n "${CLAUDE_BOTTLE_LIB_PIPELOCK_SOURCED:-}" ]; then + return 0 +fi +CLAUDE_BOTTLE_LIB_PIPELOCK_SOURCED=1 + +_iso_lib_pipelock_dir="$(CDPATH= cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=./log.sh +. "${_iso_lib_pipelock_dir}/log.sh" +# shellcheck source=./manifest.sh +. "${_iso_lib_pipelock_dir}/manifest.sh" +# shellcheck source=./network.sh +. "${_iso_lib_pipelock_dir}/network.sh" + +# --- Constants ------------------------------------------------------------- + +# Pipelock image, pinned by digest. The digest is the multi-arch image +# index for ghcr.io/luckypipewrench/pipelock:2.3.0 (resolved 2026-05-08 +# from the ghcr.io v2 manifests endpoint). Ties match the v2.3.0 GitHub +# release; the registry uses unprefixed tags so v2.3.0→2.3.0. +CLAUDE_BOTTLE_PIPELOCK_IMAGE="${CLAUDE_BOTTLE_PIPELOCK_IMAGE:-ghcr.io/luckypipewrench/pipelock@sha256:3b1a39417b98406ddc5dc2d8fcb42865ddc0c68a43d355db55f0f8cb06bc6de9}" + +# Listening port for pipelock's forward proxy. Default per +# docs/configuration.md §Forward Proxy / §Fetch Proxy and the +# deployment-recipes generator. Override via env if a future image +# changes it. +CLAUDE_BOTTLE_PIPELOCK_PORT="${CLAUDE_BOTTLE_PIPELOCK_PORT:-8888}" + +# Baked-in default allowlist for hosts Claude Code itself needs. +# Source: pipelock-assessment.md and the Claude Code network-config +# docs (https://code.claude.com/docs/en/network-config). The effective +# allowlist used at launch is this set unioned with whatever the +# bottle's egress.allowlist names. Kept as a newline-separated string +# because bash arrays don't survive sourcing into a function-only +# context cleanly; callers split on newlines. +CLAUDE_BOTTLE_PIPELOCK_DEFAULT_ALLOWLIST="api.anthropic.com +statsig.anthropic.com +sentry.io +claude.ai +platform.claude.com +downloads.claude.ai +raw.githubusercontent.com" + +# --- Naming ---------------------------------------------------------------- + +# pipelock_container_name — prints the canonical sidecar +# container name for a given agent slug. The agent reaches the sidecar +# at this name as a hostname on the internal network. +pipelock_container_name() { + local slug="${1:?pipelock_container_name: missing slug}" + printf 'claude-bottle-pipelock-%s' "$slug" +} + +# pipelock_proxy_url — prints http://:, suitable +# for HTTPS_PROXY / HTTP_PROXY in the agent container. +pipelock_proxy_url() { + local slug="${1:?pipelock_proxy_url: missing slug}" + local name + name="$(pipelock_container_name "$slug")" + printf 'http://%s:%s' "$name" "$CLAUDE_BOTTLE_PIPELOCK_PORT" +} + +# --- Allowlist resolution -------------------------------------------------- + +# pipelock_bottle_allowlist +# +# Prints one hostname per line on stdout for the allowlist declared at +# bottles[].egress.allowlist. Empty (no output) if the +# field is missing or the array is empty. Validates that each entry is +# a JSON string; dies with a clear message if any element is not. +pipelock_bottle_allowlist() { + local manifest_file="${1:?pipelock_bottle_allowlist: missing manifest file}" + local bottle_name="${2:?pipelock_bottle_allowlist: missing bottle name}" + + # Validate shape first: if egress.allowlist exists, every element + # must be a string. We do this in one jq pass. + local types + types="$(jq -r --arg b "$bottle_name" ' + .bottles[$b].egress.allowlist // [] | map(type) | unique[] + ' "$manifest_file")" + local t + while IFS= read -r t; do + [ -z "$t" ] && continue + if [ "$t" != "string" ]; then + die "bottle '${bottle_name}' egress.allowlist must contain only strings; found a '${t}' entry." + fi + done <<< "$types" + + jq -r --arg b "$bottle_name" ' + .bottles[$b].egress.allowlist // [] | .[] + ' "$manifest_file" +} + +# pipelock_effective_allowlist +# +# Prints the deduplicated union of the baked-in default allowlist and +# the bottle's declared allowlist, one hostname per line, sorted for +# stability. This is the single source of truth callers should use for +# both YAML generation and the preflight summary. +pipelock_effective_allowlist() { + local manifest_file="${1:?pipelock_effective_allowlist: missing manifest file}" + local bottle_name="${2:?pipelock_effective_allowlist: missing bottle name}" + + { + printf '%s\n' "$CLAUDE_BOTTLE_PIPELOCK_DEFAULT_ALLOWLIST" + pipelock_bottle_allowlist "$manifest_file" "$bottle_name" + } | awk 'NF && !seen[$0]++' | LC_ALL=C sort +} + +# pipelock_allowlist_summary +# +# One-line summary of the effective allowlist for the y/N preflight +# display. Format: +# " hosts allowed (host1, host2, host3 +M more)" +# When the allowlist has 5 or fewer entries, all are listed and the +# "+M more" suffix is omitted. +pipelock_allowlist_summary() { + local manifest_file="${1:?pipelock_allowlist_summary: missing manifest file}" + local bottle_name="${2:?pipelock_allowlist_summary: missing bottle name}" + + local hosts=() + local h + while IFS= read -r h; do + [ -z "$h" ] && continue + hosts+=("$h") + done < <(pipelock_effective_allowlist "$manifest_file" "$bottle_name") + + local count="${#hosts[@]}" + if [ "$count" -eq 0 ]; then + printf '0 hosts allowed (none)' + return 0 + fi + + local show=$count + local more=0 + if [ "$count" -gt 5 ]; then + show=3 + more=$((count - show)) + fi + + local first_n=() + local i=0 + while [ "$i" -lt "$show" ]; do + first_n+=("${hosts[$i]}") + i=$((i + 1)) + done + + local joined="" + local h2 + for h2 in "${first_n[@]}"; do + if [ -z "$joined" ]; then + joined="$h2" + else + joined="${joined}, ${h2}" + fi + done + + if [ "$more" -gt 0 ]; then + printf '%s hosts allowed (%s, +%s more)' "$count" "$joined" "$more" + else + printf '%s hosts allowed (%s)' "$count" "$joined" + fi +} + +# --- YAML generation ------------------------------------------------------- + +# pipelock_write_yaml +# +# Writes a pipelock YAML config file to (mode 600). The +# config carries only: +# - the effective allowlist (hostnames), +# - a fixed listen port (CLAUDE_BOTTLE_PIPELOCK_PORT), +# - the minimum knobs needed to satisfy PRD 0001 success criteria +# (strict mode, forward_proxy on, DLP defaults + env scanning). +# +# It deliberately contains no env values, no secrets, and no per-agent +# customization beyond the hostname list. +# +# YAML keys + defaults sourced from +# https://github.com/luckyPipewrench/pipelock/blob/main/docs/configuration.md +# (top-level fields, api_allowlist, forward_proxy, dlp). +pipelock_write_yaml() { + local manifest_file="${1:?pipelock_write_yaml: missing manifest file}" + local bottle_name="${2:?pipelock_write_yaml: missing bottle name}" + local out_path="${3:?pipelock_write_yaml: missing out_path}" + + : > "$out_path" + chmod 600 "$out_path" + + { + printf 'version: 1\n' + printf 'mode: strict\n' + printf 'enforce: true\n' + printf '\n' + printf '# Hostnames the agent is allowed to reach. Effective list is\n' + printf '# claude-bottle defaults UNION bottle.egress.allowlist (sorted, deduped).\n' + printf 'api_allowlist:\n' + local h + while IFS= read -r h; do + [ -z "$h" ] && continue + # Validate: pipelock allows hostnames + wildcards. We accept + # anything that does not contain whitespace or the YAML special + # chars that would break unquoted strings; quote on output to be + # safe. + printf ' - "%s"\n' "$h" + done < <(pipelock_effective_allowlist "$manifest_file" "$bottle_name") + printf '\n' + printf 'forward_proxy:\n' + printf ' enabled: true\n' + printf '\n' + printf 'dlp:\n' + printf ' include_defaults: true\n' + printf ' scan_env: true\n' + } > "$out_path" +} + +# --- Sidecar lifecycle ----------------------------------------------------- + +# pipelock_start +# +# Boots the pipelock sidecar: +# 1. `docker run -d` on the internal network with the canonical +# service name. The image runs `pipelock` as its CMD; we override +# with `run --config ` and the listen address. +# 2. `docker cp` the YAML config from the host mktemp dir into the +# container at /etc/pipelock/pipelock.yaml. +# +# We use docker cp rather than `-v :` because Docker +# Desktop bind mounts have ownership / case-sensitivity quirks on +# macOS; copying the file in sidesteps both. The host-side mktemp dir +# is the caller's responsibility to clean up. +# +# After the cp the container is restarted so pipelock picks up the +# config it boots from. Pipelock's hot-reload feature would let us +# avoid the restart, but `forward_proxy.enabled` is one of the few +# restart-required keys (per docs/configuration.md), so a restart is +# the simplest correct path on first boot. +# +# Args: +# — agent slug; sidecar name will be claude-bottle-pipelock- +# — name of the agent's internal docker network +# — host directory containing the YAML +# — filename within yaml_dir +# +# Echoes the container name on stdout on success. +pipelock_start() { + local slug="${1:?pipelock_start: missing slug}" + local internal_network="${2:?pipelock_start: missing internal network}" + local yaml_dir="${3:?pipelock_start: missing yaml dir}" + local yaml_filename="${4:?pipelock_start: missing yaml filename}" + + local name + name="$(pipelock_container_name "$slug")" + local host_yaml="${yaml_dir}/${yaml_filename}" + if [ ! -f "$host_yaml" ]; then + die "pipelock yaml not found at ${host_yaml}; pipelock_write_yaml must run first" + fi + + # Container layout: pipelock reads its config from /etc/pipelock/pipelock.yaml. + # We bring the container up with a `sleep` shim so we can `docker cp` + # the config in, then restart with the real command — this avoids a + # bind mount entirely and keeps the host file off the container's + # filesystem after the cp completes. + # + # Two-phase boot: + # phase 1: `docker run -d --entrypoint sh ... -c 'mkdir -p /etc/pipelock && sleep infinity'` + # so we can cp the YAML in before pipelock starts and + # tries to read it. We do NOT attach to internal_network + # here; we'll connect after the config is in place so the + # real pipelock process never sees a half-configured + # agent on the wire. + # phase 2: docker cp + docker restart with the real command via + # --entrypoint reset (handled below). + info "starting pipelock sidecar ${name} on network ${internal_network}" + + # We cannot easily restart a container with a different command + # using `docker restart`. Instead, run the container in two stages: + # boot it with `sh -c 'mkdir + sleep'`, cp the file in, then start + # the real pipelock by docker exec'ing it as PID-N. A simpler + # approach: `docker create` + `docker cp` + `docker start`. Use that. + if ! docker create \ + --name "$name" \ + --network "$internal_network" \ + "$CLAUDE_BOTTLE_PIPELOCK_IMAGE" \ + run --config /etc/pipelock/pipelock.yaml --listen "0.0.0.0:${CLAUDE_BOTTLE_PIPELOCK_PORT}" \ + >/dev/null 2>&1; then + die "failed to create pipelock sidecar ${name}" + fi + + # `docker cp` to a created-but-not-started container creates parent + # dirs as needed and works without the container running, since the + # cp is done against the writable layer directly. + if ! docker cp "$host_yaml" "${name}:/etc/pipelock/pipelock.yaml" >/dev/null 2>&1; then + docker rm -f "$name" >/dev/null 2>&1 || true + die "failed to copy pipelock yaml into ${name}" + fi + + # Attach to a default-bridge network for upstream egress (the + # internal network has no gateway by definition, so without a second + # network the sidecar can't reach the public internet either). + # Using the well-known `bridge` network is the simplest way to give + # it a default route; we do not create a per-agent egress network. + if ! docker network connect bridge "$name" >/dev/null 2>&1; then + docker rm -f "$name" >/dev/null 2>&1 || true + die "failed to attach pipelock sidecar ${name} to bridge network for upstream egress" + fi + + if ! docker start "$name" >/dev/null 2>&1; then + docker rm -f "$name" >/dev/null 2>&1 || true + die "failed to start pipelock sidecar ${name}" + fi + + printf '%s' "$name" +} + +# pipelock_stop +# +# Stops and removes the sidecar by canonical name. Idempotent: a +# missing container is treated as success so this can be wired into +# cli.sh's exit trap unconditionally. Used as the first step of +# teardown — must run BEFORE the network is torn down, because docker +# refuses to remove a network that still has containers attached. +pipelock_stop() { + local slug="${1:?pipelock_stop: missing slug}" + local name + name="$(pipelock_container_name "$slug")" + if docker inspect "$name" >/dev/null 2>&1; then + docker rm -f "$name" >/dev/null 2>&1 || warn "failed to remove pipelock sidecar ${name}; clean up with 'docker rm -f ${name}'" + fi +}