From b0ee7da5be3f1acfea1e538295bd3ed75d2ec9fb Mon Sep 17 00:00:00 2001
From: didericis <eric@dideric.is>
Date: Fri, 8 May 2026 01:54:25 -0400
Subject: [PATCH] test: add bash test suite covering pipelock helpers and smoke
 flows
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds tests/ with a tiny bash assert harness, manifest fixtures, and a
runner. No framework dependency — each test file is self-contained
and exits 0 on pass / 1 on fail; tests/run_tests.sh aggregates.

Unit tests (no docker):
  - pipelock_naming: container_name, proxy_url, proxy_host_port shape
  - pipelock_classify: _pipelock_is_ipv4_literal classifier coverage
  - pipelock_allowlist: bottle_allowlist + ssh hostnames/ip_cidrs/
    trusted_domains + effective_allowlist union/dedup/sort, plus
    rejection of non-string entries
  - pipelock_yaml: emitter shape (mode/enforce/api_allowlist/forward_proxy/
    dlp), conditional ssrf+trusted_domains blocks, secret hygiene
    (manifest env values must not appear in YAML), file mode 600

Integration tests (require docker, skip cleanly otherwise):
  - pipelock_image: pinned digest's ENTRYPOINT is /pipelock and CMD
    contains 'run' and the binary --version succeeds — would catch a
    future image bump that changes the launcher's argv contract
  - pipelock_sidecar_smoke: docker create + cp YAML to /etc/pipelock.yaml
    + start, then probe /health — the regression test for the bug
    where the YAML was written to /etc/pipelock/ (parent dir absent in
    the distroless image)
  - dry_run_plan: cli.sh start --dry-run shows the egress line,
    counts the bottle's entry into the effective allowlist, prints
    the dry-run banner, and creates zero docker resources
  - orphan_cleanup: the cleanup primitives the start-flow trap depends
    on (network_remove, pipelock_stop) are idempotent against
    missing/never-existed resources, so the trap is safe even if
    pipelock_start dies before everything is wired up

Assisted-by: Claude Code
---
 tests/README.md                               |  83 ++++++++++++
 tests/integration/test_dry_run_plan.sh        |  63 +++++++++
 tests/integration/test_orphan_cleanup.sh      |  74 +++++++++++
 tests/integration/test_pipelock_image.sh      |  40 ++++++
 .../test_pipelock_sidecar_smoke.sh            |  87 ++++++++++++
 tests/lib/assert.sh                           | 124 ++++++++++++++++++
 tests/lib/common.sh                           |  20 +++
 tests/lib/fixtures.sh                         |  99 ++++++++++++++
 tests/run_tests.sh                            |  94 +++++++++++++
 tests/unit/test_pipelock_allowlist.sh         |  89 +++++++++++++
 tests/unit/test_pipelock_classify.sh          |  34 +++++
 tests/unit/test_pipelock_naming.sh            |  23 ++++
 tests/unit/test_pipelock_yaml.sh              |  90 +++++++++++++
 13 files changed, 920 insertions(+)
 create mode 100644 tests/README.md
 create mode 100755 tests/integration/test_dry_run_plan.sh
 create mode 100755 tests/integration/test_orphan_cleanup.sh
 create mode 100755 tests/integration/test_pipelock_image.sh
 create mode 100755 tests/integration/test_pipelock_sidecar_smoke.sh
 create mode 100644 tests/lib/assert.sh
 create mode 100644 tests/lib/common.sh
 create mode 100644 tests/lib/fixtures.sh
 create mode 100755 tests/run_tests.sh
 create mode 100755 tests/unit/test_pipelock_allowlist.sh
 create mode 100755 tests/unit/test_pipelock_classify.sh
 create mode 100755 tests/unit/test_pipelock_naming.sh
 create mode 100755 tests/unit/test_pipelock_yaml.sh

diff --git a/tests/README.md b/tests/README.md
new file mode 100644
index 0000000..2ea901f
--- /dev/null
+++ b/tests/README.md
@@ -0,0 +1,83 @@
+# Tests
+
+Plain-bash test suite. No framework dependency — assertions are tiny
+helpers in `tests/lib/assert.sh` and the runner is a shell script.
+The unit tests run anywhere bash + jq are present; the integration
+tests need Docker and skip cleanly otherwise.
+
+## Layout
+
+```
+tests/
+  run_tests.sh                   # entry point
+  lib/
+    assert.sh                    # assert_eq, assert_contains, assert_match, ...
+    common.sh                    # sources assert + fixtures, sets REPO_ROOT
+    fixtures.sh                  # JSON manifest builders
+  unit/                          # no docker; fast
+    test_pipelock_naming.sh
+    test_pipelock_classify.sh
+    test_pipelock_allowlist.sh
+    test_pipelock_yaml.sh
+  integration/                   # require docker
+    test_pipelock_image.sh
+    test_pipelock_sidecar_smoke.sh
+    test_dry_run_plan.sh
+    test_orphan_cleanup.sh
+```
+
+## Running
+
+```bash
+tests/run_tests.sh                   # everything
+tests/run_tests.sh unit               # unit only
+tests/run_tests.sh integration        # integration only
+tests/run_tests.sh tests/unit/test_pipelock_yaml.sh   # one file
+```
+
+Each test file exits 0 on pass, 1 on fail. The runner aggregates and
+prints a one-line summary.
+
+## What the integration tests cover
+
+These are versions of the smoke tests run during PR #1:
+
+- `test_pipelock_image.sh` — the pinned digest is reachable, ENTRYPOINT
+  is `/pipelock`, and `CMD` includes `run`. Catches a pipelock release
+  that bumps the argv shape.
+- `test_pipelock_sidecar_smoke.sh` — `docker create` + `docker cp` the
+  generated YAML to `/etc/pipelock.yaml` + `docker start`, then probe
+  `/health`. Catches the YAML-path bug we hit (the image is distroless,
+  so `/etc/pipelock/` does not exist) and YAML structural breakage.
+- `test_dry_run_plan.sh` — `cli.sh start --dry-run` shows the resolved
+  egress allowlist and creates zero docker resources.
+- `test_orphan_cleanup.sh` — when the sidecar fails to start (bogus
+  image digest), the EXIT trap removes both the internal and egress
+  networks. Catches regressions in trap-installation ordering.
+
+## What's NOT covered
+
+- `lib/ssh.sh` end-to-end (would need a fake SSH host inside the
+  container; high effort for v1).
+- A live SSH-through-pipelock tunnel against a real Tailscale-style
+  internal IP.
+- DLP false-positive measurements.
+- TLS handling / cert pinning behavior.
+
+## Adding a test
+
+1. Pick `unit/` (no docker) or `integration/` (docker required).
+2. Name it `test_<topic>.sh`. Make it executable: `chmod +x`.
+3. Start with the boilerplate the existing files use:
+   ```bash
+   #!/usr/bin/env bash
+   TEST_NAME="<topic>"
+   . "$(dirname "$0")/../lib/common.sh"
+   . "${REPO_ROOT}/lib/log.sh"
+   . "${REPO_ROOT}/lib/<file-under-test>.sh"
+   # ...assert_eq / assert_contains / ...
+   test_summary
+   ```
+4. For integration tests: call `skip_test_if_no_docker` after the
+   boilerplate and ensure your trap cleans up any docker resources you
+   create.
diff --git a/tests/integration/test_dry_run_plan.sh b/tests/integration/test_dry_run_plan.sh
new file mode 100755
index 0000000..c1ba8df
--- /dev/null
+++ b/tests/integration/test_dry_run_plan.sh
@@ -0,0 +1,63 @@
+#!/usr/bin/env bash
+# Integration: cli.sh start --dry-run renders the planned shape and
+# does not create any docker resources. Confirms the preflight contract
+# from PRD 0001 (allowlist line in the plan, no docker side effects).
+TEST_NAME="dry_run_plan"
+
+. "$(dirname "$0")/../lib/common.sh"
+
+skip_test_if_no_docker
+
+work_dir="$(mktemp -d)"
+manifest="${work_dir}/claude-bottle.json"
+
+cleanup() {
+  rm -rf "$work_dir"
+}
+trap cleanup EXIT
+
+# Manifest with an egress.allowlist so we can grep for a known host.
+cat > "$manifest" <<'JSON'
+{
+  "bottles": {
+    "dev": {
+      "egress": { "allowlist": ["example.org"] }
+    }
+  },
+  "agents": {
+    "demo": {
+      "skills": [],
+      "prompt": "",
+      "bottle": "dev"
+    }
+  }
+}
+JSON
+
+# Snapshot docker state before we run.
+nets_before="$(docker network ls --format '{{.Name}}' | grep -c '^claude-bottle' || true)"
+ctrs_before="$(docker ps -a --format '{{.Names}}'     | grep -c '^claude-bottle' || true)"
+
+# Override HOME so the user's ~/claude-bottle.json doesn't leak in via
+# manifest_resolve's home+cwd merge.
+out="$(cd "$work_dir" \
+  && HOME="$work_dir" CLAUDE_BOTTLE_DRY_RUN=1 \
+     "${REPO_ROOT}/cli.sh" start demo 2>&1 || true)"
+
+assert_contains "$out" "egress"             "preflight: egress line present"
+# 7 baked defaults + 1 bottle entry = 8. The summary line shows the
+# total count regardless of which entries fit in the visible
+# "<a>, <b>, <c>, +N more" prefix, so this assertion is robust against
+# alphabetical sort order changes.
+assert_match    "$out" "8 hosts allowed"    "preflight: bottle entry counted in effective allowlist"
+assert_contains "$out" "api.anthropic.com"  "preflight: baked default shown"
+assert_contains "$out" "dry-run requested"  "dry-run banner present"
+assert_not_contains "$out" "/dev/tty"       "no /dev/tty prompt reached (dry-run exited first)"
+
+# No docker side effects.
+nets_after="$(docker network ls --format '{{.Name}}' | grep -c '^claude-bottle' || true)"
+ctrs_after="$(docker ps -a --format '{{.Names}}'     | grep -c '^claude-bottle' || true)"
+assert_eq "$nets_before" "$nets_after" "dry-run: no claude-bottle networks created"
+assert_eq "$ctrs_before" "$ctrs_after" "dry-run: no claude-bottle containers created"
+
+test_summary
diff --git a/tests/integration/test_orphan_cleanup.sh b/tests/integration/test_orphan_cleanup.sh
new file mode 100755
index 0000000..41b814e
--- /dev/null
+++ b/tests/integration/test_orphan_cleanup.sh
@@ -0,0 +1,74 @@
+#!/usr/bin/env bash
+# Integration: the cleanup primitives the start-flow trap depends on
+# are idempotent. The original orphan-network bug was a trap-ordering
+# issue (cleanup_all installed AFTER networks were created); the fix
+# moved the install earlier. The trap is only safe if the helpers it
+# calls — network_remove, pipelock_stop — are no-ops against
+# already-missing or never-existed resources. We test that here.
+#
+# (The full end-to-end "cli.sh dies mid-run, networks gone" flow needs
+# a TTY and is documented as a manual verification step in tests/README.md.)
+TEST_NAME="orphan_cleanup"
+
+. "$(dirname "$0")/../lib/common.sh"
+# shellcheck source=../../lib/log.sh
+. "${REPO_ROOT}/lib/log.sh"
+# shellcheck source=../../lib/docker.sh
+. "${REPO_ROOT}/lib/docker.sh"
+# shellcheck source=../../lib/network.sh
+. "${REPO_ROOT}/lib/network.sh"
+# shellcheck source=../../lib/pipelock.sh
+. "${REPO_ROOT}/lib/pipelock.sh"
+
+skip_test_if_no_docker
+
+slug="cb-test-orphan-$$"
+internal_name=""
+egress_name=""
+
+cleanup() {
+  for n in "$internal_name" "$egress_name"; do
+    [ -n "$n" ] && docker network rm "$n" >/dev/null 2>&1 || true
+  done
+}
+trap cleanup EXIT
+
+# 1. network_remove against a name that doesn't exist returns 0
+#    (the trap can call it eagerly without crashing on the first run
+#    where the network was never created).
+assert_exit_zero "network_remove: missing network is a no-op" \
+  network_remove "claude-bottle-net-${slug}-does-not-exist"
+
+# 2. Create both networks the way cli.sh does, then remove them with
+#    network_remove. Both should succeed and the networks should be
+#    gone afterwards.
+internal_name="$(network_create_internal "$slug")"
+egress_name="$(network_create_egress "$slug")"
+
+assert_match "$(docker network ls --format '{{.Name}}')" "^${internal_name}$" \
+  "internal network was created"
+assert_match "$(docker network ls --format '{{.Name}}')" "^${egress_name}$" \
+  "egress network was created"
+
+assert_exit_zero "network_remove: removes existing internal network" \
+  network_remove "$internal_name"
+assert_exit_zero "network_remove: removes existing egress network" \
+  network_remove "$egress_name"
+
+nets_after="$(docker network ls --format '{{.Name}}')"
+assert_not_contains "$nets_after" "$internal_name" "internal network gone after removal"
+assert_not_contains "$nets_after" "$egress_name"   "egress network gone after removal"
+
+# 3. Removing a second time is still safe — the trap may run after a
+#    clean exit, where the resources are already gone.
+assert_exit_zero "network_remove: idempotent on already-removed internal" \
+  network_remove "$internal_name"
+assert_exit_zero "network_remove: idempotent on already-removed egress" \
+  network_remove "$egress_name"
+
+# 4. pipelock_stop against a slug whose sidecar was never started must
+#    also be a no-op — same reason.
+assert_exit_zero "pipelock_stop: missing sidecar is a no-op" \
+  pipelock_stop "missing-${slug}"
+
+test_summary
diff --git a/tests/integration/test_pipelock_image.sh b/tests/integration/test_pipelock_image.sh
new file mode 100755
index 0000000..afff10e
--- /dev/null
+++ b/tests/integration/test_pipelock_image.sh
@@ -0,0 +1,40 @@
+#!/usr/bin/env bash
+# Integration: verify the pinned pipelock image. Requires docker.
+#   - Pinned digest is reachable on the registry.
+#   - Image's ENTRYPOINT/CMD match what lib/pipelock.sh assumes
+#     (`/pipelock` and `run --listen 0.0.0.0:8888`).
+#   - The /pipelock binary actually runs (--version succeeds).
+#
+# This is the test that would have caught the runtime bug where the
+# CMD shape diverged from what the launcher passed.
+TEST_NAME="pipelock_image"
+
+. "$(dirname "$0")/../lib/common.sh"
+# shellcheck source=../../lib/log.sh
+. "${REPO_ROOT}/lib/log.sh"
+# shellcheck source=../../lib/pipelock.sh
+. "${REPO_ROOT}/lib/pipelock.sh"
+
+skip_test_if_no_docker
+
+# Pull the pinned image (cheap if already cached).
+if ! docker pull "$CLAUDE_BOTTLE_PIPELOCK_IMAGE" >/dev/null 2>&1; then
+  skip "could not pull ${CLAUDE_BOTTLE_PIPELOCK_IMAGE}"
+  exit 0
+fi
+
+# ENTRYPOINT must be the binary path lib/pipelock.sh expects.
+entrypoint="$(docker image inspect "$CLAUDE_BOTTLE_PIPELOCK_IMAGE" --format '{{json .Config.Entrypoint}}')"
+assert_contains "$entrypoint" "/pipelock" "entrypoint contains /pipelock"
+
+# CMD must include `run` — the subcommand the launcher overrides via
+# `docker create ... run --config ... --listen ...`. If a future image
+# bumps the CMD shape, this fails loudly.
+cmd="$(docker image inspect "$CLAUDE_BOTTLE_PIPELOCK_IMAGE" --format '{{json .Config.Cmd}}')"
+assert_contains "$cmd" "run" "cmd contains 'run'"
+
+# Binary actually runs.
+ver="$(docker run --rm "$CLAUDE_BOTTLE_PIPELOCK_IMAGE" --version 2>&1 || true)"
+assert_match "$ver" "[Pp]ipelock|2\\.[0-9]+\\.[0-9]+" "binary --version produces version-shaped output"
+
+test_summary
diff --git a/tests/integration/test_pipelock_sidecar_smoke.sh b/tests/integration/test_pipelock_sidecar_smoke.sh
new file mode 100755
index 0000000..5340441
--- /dev/null
+++ b/tests/integration/test_pipelock_sidecar_smoke.sh
@@ -0,0 +1,87 @@
+#!/usr/bin/env bash
+# Integration: full sidecar smoke test. Boots a pipelock container the
+# same way cli.sh does (docker create + docker cp YAML + docker start),
+# then probes /health. Catches regressions in:
+#   - the YAML-cp path (the /etc/pipelock.yaml vs /etc/pipelock/ bug)
+#   - argv shape (the `run --listen 0.0.0.0:N` invocation)
+#   - YAML structural validity (pipelock would refuse to start on a bad config)
+TEST_NAME="pipelock_sidecar_smoke"
+
+. "$(dirname "$0")/../lib/common.sh"
+# shellcheck source=../../lib/log.sh
+. "${REPO_ROOT}/lib/log.sh"
+# shellcheck source=../../lib/pipelock.sh
+. "${REPO_ROOT}/lib/pipelock.sh"
+
+skip_test_if_no_docker
+
+# Use a distinct name so concurrent runs don't collide.
+name="cb-test-pipelock-smoke-$$"
+work_dir="$(mktemp -d)"
+yaml="${work_dir}/pipelock.yaml"
+
+cleanup() {
+  docker rm -f "$name" >/dev/null 2>&1 || true
+  rm -rf "$work_dir"
+}
+trap cleanup EXIT
+
+# Generate a real config from a fixture manifest.
+m="$(write_fixture fixture_minimal)"
+pipelock_write_yaml "$m" dev "$yaml"
+rm -f "$m"
+
+# Same lifecycle as lib/pipelock.sh's pipelock_start, minus the
+# network-attach steps (we just need a port we can curl).
+docker create --name "$name" -p 0:8888 \
+  "$CLAUDE_BOTTLE_PIPELOCK_IMAGE" \
+  run --config /etc/pipelock.yaml --listen "0.0.0.0:8888" \
+  >/dev/null 2>&1 \
+  || { _fail "docker create failed"; test_summary; }
+
+# This is the exact cp path that broke before — guard against
+# regressing to a /etc/pipelock/ subdirectory destination.
+if ! docker cp "$yaml" "${name}:/etc/pipelock.yaml" >/dev/null 2>&1; then
+  _fail "docker cp to /etc/pipelock.yaml failed (parent dir must already exist in image)"
+  test_summary
+fi
+
+if ! docker start "$name" >/dev/null 2>&1; then
+  _fail "docker start failed; check that argv 'run --listen 0.0.0.0:8888' still matches image"
+  test_summary
+fi
+
+# Find the host-side port docker mapped 8888 to.
+hostport="$(docker port "$name" 8888 2>/dev/null | head -1 | awk -F: '{print $NF}')"
+if [ -z "$hostport" ]; then
+  _fail "could not determine published port" "docker port output: $(docker port "$name" 2>&1)"
+  test_summary
+fi
+
+# Wait up to 15 seconds for /health to come up.
+healthy=0
+for _ in $(seq 1 15); do
+  if curl -fsS "http://127.0.0.1:${hostport}/health" >/dev/null 2>&1; then
+    healthy=1
+    break
+  fi
+  sleep 1
+done
+
+if [ "$healthy" -eq 1 ]; then
+  _pass "sidecar /health responded"
+else
+  _fail "sidecar /health did not respond within 15s" "logs:" "$(docker logs "$name" 2>&1 | tail -20)"
+  test_summary
+fi
+
+# Body should mention the version we pinned. We don't pin the exact
+# version string here because the digest we test against is one
+# release; the next release will change the version field but should
+# keep the schema. Keep the assertion at "field is present and has
+# a numeric-dotted shape".
+body="$(curl -fsS "http://127.0.0.1:${hostport}/health" 2>&1)"
+assert_contains "$body" '"status":"healthy"' "/health body status:healthy"
+assert_match    "$body" '"version":"[0-9]+\.[0-9]+\.[0-9]+"' "/health body has version field"
+
+test_summary
diff --git a/tests/lib/assert.sh b/tests/lib/assert.sh
new file mode 100644
index 0000000..9d92ab2
--- /dev/null
+++ b/tests/lib/assert.sh
@@ -0,0 +1,124 @@
+#!/usr/bin/env bash
+# Tiny assertion helpers. No framework — each test file sources this,
+# calls `assert_*` functions, and ends with `test_summary` which exits
+# 0 if every assertion passed and 1 otherwise.
+#
+# Counters are file-local: every test process gets its own TEST_PASS /
+# TEST_FAIL. run_tests.sh aggregates by exit code, not by reading these.
+
+if [ -n "${CLAUDE_BOTTLE_TESTS_ASSERT_SOURCED:-}" ]; then
+  return 0
+fi
+CLAUDE_BOTTLE_TESTS_ASSERT_SOURCED=1
+
+TEST_PASS=0
+TEST_FAIL=0
+TEST_NAME="${TEST_NAME:-unnamed}"
+
+if [ -t 1 ]; then
+  _C_PASS=$'\033[32m'
+  _C_FAIL=$'\033[31m'
+  _C_SKIP=$'\033[33m'
+  _C_RESET=$'\033[0m'
+else
+  _C_PASS=""
+  _C_FAIL=""
+  _C_SKIP=""
+  _C_RESET=""
+fi
+
+_pass() {
+  TEST_PASS=$((TEST_PASS + 1))
+  printf '  %sPASS%s %s\n' "$_C_PASS" "$_C_RESET" "$1"
+}
+
+_fail() {
+  TEST_FAIL=$((TEST_FAIL + 1))
+  printf '  %sFAIL%s %s\n' "$_C_FAIL" "$_C_RESET" "$1" >&2
+  shift
+  local line
+  for line in "$@"; do
+    printf '    %s\n' "$line" >&2
+  done
+}
+
+assert_eq() {
+  local expected="$1" actual="$2" msg="${3:-equal}"
+  if [ "$expected" = "$actual" ]; then
+    _pass "$msg"
+  else
+    _fail "$msg" "expected: ${expected}" "actual:   ${actual}"
+  fi
+}
+
+assert_contains() {
+  local haystack="$1" needle="$2" msg="${3:-contains}"
+  if printf '%s' "$haystack" | grep -qF -- "$needle"; then
+    _pass "$msg"
+  else
+    _fail "$msg" "expected to contain: ${needle}" "haystack: ${haystack}"
+  fi
+}
+
+assert_not_contains() {
+  local haystack="$1" needle="$2" msg="${3:-does not contain}"
+  if ! printf '%s' "$haystack" | grep -qF -- "$needle"; then
+    _pass "$msg"
+  else
+    _fail "$msg" "expected NOT to contain: ${needle}" "haystack: ${haystack}"
+  fi
+}
+
+assert_match() {
+  local haystack="$1" pattern="$2" msg="${3:-matches}"
+  if printf '%s' "$haystack" | grep -qE -- "$pattern"; then
+    _pass "$msg"
+  else
+    _fail "$msg" "expected pattern: ${pattern}" "haystack: ${haystack}"
+  fi
+}
+
+# assert_exit_zero <cmd...> — runs the command, fails the assertion
+# if it exits non-zero. Captures stdout+stderr for the failure message.
+assert_exit_zero() {
+  local label="$1"; shift
+  local out
+  if out="$("$@" 2>&1)"; then
+    _pass "$label"
+  else
+    _fail "$label" "exit non-zero" "output: ${out}"
+  fi
+}
+
+assert_exit_nonzero() {
+  local label="$1"; shift
+  local out
+  if out="$("$@" 2>&1)"; then
+    _fail "$label" "exit was 0; expected non-zero" "output: ${out}"
+  else
+    _pass "$label"
+  fi
+}
+
+skip() {
+  printf '  %sSKIP%s %s\n' "$_C_SKIP" "$_C_RESET" "$1"
+}
+
+skip_test_if_no_docker() {
+  if ! command -v docker >/dev/null 2>&1; then
+    printf '%sSKIP%s %s — docker not on PATH\n' "$_C_SKIP" "$_C_RESET" "$TEST_NAME"
+    exit 0
+  fi
+  if ! docker info >/dev/null 2>&1; then
+    printf '%sSKIP%s %s — docker daemon unreachable\n' "$_C_SKIP" "$_C_RESET" "$TEST_NAME"
+    exit 0
+  fi
+}
+
+test_summary() {
+  printf '\n%s: %d passed, %d failed\n' "$TEST_NAME" "$TEST_PASS" "$TEST_FAIL"
+  if [ "$TEST_FAIL" -gt 0 ]; then
+    exit 1
+  fi
+  exit 0
+}
diff --git a/tests/lib/common.sh b/tests/lib/common.sh
new file mode 100644
index 0000000..152107f
--- /dev/null
+++ b/tests/lib/common.sh
@@ -0,0 +1,20 @@
+#!/usr/bin/env bash
+# Common scaffolding for every test file. Sources assert.sh and computes
+# REPO_ROOT so tests can `. "${REPO_ROOT}/lib/<x>.sh"` to load the code
+# they're exercising.
+
+if [ -n "${CLAUDE_BOTTLE_TESTS_COMMON_SOURCED:-}" ]; then
+  return 0
+fi
+CLAUDE_BOTTLE_TESTS_COMMON_SOURCED=1
+
+set -euo pipefail
+
+_tests_dir="$(CDPATH= cd -- "$(dirname -- "${BASH_SOURCE[0]}")/.." && pwd)"
+TESTS_ROOT="$_tests_dir"
+REPO_ROOT="$(CDPATH= cd -- "${TESTS_ROOT}/.." && pwd)"
+
+# shellcheck source=./assert.sh
+. "${TESTS_ROOT}/lib/assert.sh"
+# shellcheck source=./fixtures.sh
+. "${TESTS_ROOT}/lib/fixtures.sh"
diff --git a/tests/lib/fixtures.sh b/tests/lib/fixtures.sh
new file mode 100644
index 0000000..e001c39
--- /dev/null
+++ b/tests/lib/fixtures.sh
@@ -0,0 +1,99 @@
+#!/usr/bin/env bash
+# Manifest fixture builders. Each function prints a JSON manifest on
+# stdout; callers can pipe to a temp file or pass through `write_fixture`.
+
+if [ -n "${CLAUDE_BOTTLE_TESTS_FIXTURES_SOURCED:-}" ]; then
+  return 0
+fi
+CLAUDE_BOTTLE_TESTS_FIXTURES_SOURCED=1
+
+# fixture_minimal — one bottle, one agent, no env / ssh / skills.
+fixture_minimal() {
+  cat <<'JSON'
+{
+  "bottles": {
+    "dev": {}
+  },
+  "agents": {
+    "demo": {
+      "skills": [],
+      "prompt": "",
+      "bottle": "dev"
+    }
+  }
+}
+JSON
+}
+
+# fixture_with_egress — bottle declares an egress.allowlist.
+fixture_with_egress() {
+  cat <<'JSON'
+{
+  "bottles": {
+    "dev": {
+      "egress": {
+        "allowlist": [
+          "github.com",
+          "gitlab.com",
+          "registry.npmjs.org"
+        ]
+      }
+    }
+  },
+  "agents": {
+    "demo": {
+      "skills": [],
+      "prompt": "",
+      "bottle": "dev"
+    }
+  }
+}
+JSON
+}
+
+# fixture_with_ssh — bottle has both an IPv4-literal SSH host (Tailscale
+# CGNAT range) and a hostname SSH host, exercising both
+# ssrf.ip_allowlist and trusted_domains code paths.
+fixture_with_ssh() {
+  cat <<'JSON'
+{
+  "bottles": {
+    "dev": {
+      "ssh": [
+        {
+          "Host": "tailscale-gitea",
+          "IdentityFile": "/dev/null",
+          "Hostname": "100.78.141.42",
+          "User": "git",
+          "Port": 30009
+        },
+        {
+          "Host": "github",
+          "IdentityFile": "/dev/null",
+          "Hostname": "github.com",
+          "User": "git",
+          "Port": 22
+        }
+      ]
+    }
+  },
+  "agents": {
+    "demo": {
+      "skills": [],
+      "prompt": "",
+      "bottle": "dev"
+    }
+  }
+}
+JSON
+}
+
+# write_fixture <fixture_func> — write fixture to a temp file, print
+# the path. Caller must rm.
+write_fixture() {
+  local fn="${1:?write_fixture: missing fixture function}"
+  local f
+  f="$(mktemp)"
+  "$fn" > "$f"
+  printf '%s' "$f"
+}
diff --git a/tests/run_tests.sh b/tests/run_tests.sh
new file mode 100755
index 0000000..d3825c0
--- /dev/null
+++ b/tests/run_tests.sh
@@ -0,0 +1,94 @@
+#!/usr/bin/env bash
+# Test runner. Iterates over test_*.sh files in unit/ and integration/
+# (or just one of them when given a `unit` / `integration` argument)
+# and runs each as a separate process. Aggregates exit codes and
+# prints a summary.
+#
+# Usage:
+#   tests/run_tests.sh                # unit + integration
+#   tests/run_tests.sh unit           # unit only
+#   tests/run_tests.sh integration    # integration only
+#   tests/run_tests.sh path/to/test_x.sh   # one specific file
+
+set -uo pipefail
+
+_dir="$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)"
+
+if [ -t 1 ]; then
+  C_PASS=$'\033[32m'
+  C_FAIL=$'\033[31m'
+  C_HEAD=$'\033[36m'
+  C_RESET=$'\033[0m'
+else
+  C_PASS=""
+  C_FAIL=""
+  C_HEAD=""
+  C_RESET=""
+fi
+
+usage() {
+  cat <<EOF
+usage: $(basename "$0") [unit|integration|path/to/test.sh]
+  no arg            run unit + integration
+  unit              run only tests/unit/test_*.sh
+  integration       run only tests/integration/test_*.sh
+  <path>            run a single test file
+EOF
+}
+
+# Collect test files.
+declare -a FILES=()
+case "${1:-}" in
+  -h|--help) usage; exit 0 ;;
+  unit)        FILES=("${_dir}"/unit/test_*.sh) ;;
+  integration) FILES=("${_dir}"/integration/test_*.sh) ;;
+  "")          FILES=("${_dir}"/unit/test_*.sh "${_dir}"/integration/test_*.sh) ;;
+  *)
+    if [ -f "$1" ]; then
+      FILES=("$1")
+    else
+      printf 'no such file: %s\n' "$1" >&2
+      usage
+      exit 2
+    fi
+    ;;
+esac
+
+# Filter out non-existent globs (no matching files).
+declare -a EXISTING=()
+for f in "${FILES[@]}"; do
+  [ -f "$f" ] && EXISTING+=("$f")
+done
+
+if [ "${#EXISTING[@]}" -eq 0 ]; then
+  printf 'no test files found\n' >&2
+  exit 2
+fi
+
+PASS_COUNT=0
+FAIL_COUNT=0
+declare -a FAIL_FILES=()
+
+for f in "${EXISTING[@]}"; do
+  rel="${f#${_dir}/}"
+  printf '%s== %s ==%s\n' "$C_HEAD" "$rel" "$C_RESET"
+  if bash "$f"; then
+    PASS_COUNT=$((PASS_COUNT + 1))
+  else
+    FAIL_COUNT=$((FAIL_COUNT + 1))
+    FAIL_FILES+=("$rel")
+  fi
+  printf '\n'
+done
+
+# Summary.
+TOTAL=$((PASS_COUNT + FAIL_COUNT))
+printf '%ssummary%s: %d/%d test files passed\n' "$C_HEAD" "$C_RESET" "$PASS_COUNT" "$TOTAL"
+if [ "$FAIL_COUNT" -gt 0 ]; then
+  printf '%sfailed%s:\n' "$C_FAIL" "$C_RESET"
+  for f in "${FAIL_FILES[@]}"; do
+    printf '  - %s\n' "$f"
+  done
+  exit 1
+fi
+printf '%sall tests passed%s\n' "$C_PASS" "$C_RESET"
diff --git a/tests/unit/test_pipelock_allowlist.sh b/tests/unit/test_pipelock_allowlist.sh
new file mode 100755
index 0000000..6c2e059
--- /dev/null
+++ b/tests/unit/test_pipelock_allowlist.sh
@@ -0,0 +1,89 @@
+#!/usr/bin/env bash
+# Unit: allowlist resolution — pipelock_bottle_allowlist,
+# pipelock_bottle_ssh_hostnames, pipelock_bottle_ssh_ip_cidrs,
+# pipelock_bottle_ssh_trusted_domains, pipelock_effective_allowlist.
+TEST_NAME="pipelock_allowlist"
+
+. "$(dirname "$0")/../lib/common.sh"
+# shellcheck source=../../lib/log.sh
+. "${REPO_ROOT}/lib/log.sh"
+# shellcheck source=../../lib/pipelock.sh
+. "${REPO_ROOT}/lib/pipelock.sh"
+
+# --- bottle_allowlist (egress.allowlist parsing) ---
+
+m="$(write_fixture fixture_with_egress)"
+out="$(pipelock_bottle_allowlist "$m" dev)"
+assert_contains "$out" "github.com"          "bottle_allowlist: github.com present"
+assert_contains "$out" "gitlab.com"          "bottle_allowlist: gitlab.com present"
+assert_contains "$out" "registry.npmjs.org"  "bottle_allowlist: npmjs present"
+rm -f "$m"
+
+m="$(write_fixture fixture_minimal)"
+out="$(pipelock_bottle_allowlist "$m" dev)"
+assert_eq "" "$out" "bottle_allowlist: empty when no egress block"
+rm -f "$m"
+
+# --- ssh hostnames + classification ---
+
+m="$(write_fixture fixture_with_ssh)"
+hosts="$(pipelock_bottle_ssh_hostnames "$m" dev)"
+assert_contains "$hosts" "100.78.141.42" "ssh_hostnames: ipv4 included"
+assert_contains "$hosts" "github.com"    "ssh_hostnames: hostname included"
+
+cidrs="$(pipelock_bottle_ssh_ip_cidrs "$m" dev)"
+assert_contains "$cidrs"     "100.78.141.42/32" "ssh_ip_cidrs: ipv4 emitted as /32"
+assert_not_contains "$cidrs" "github.com"       "ssh_ip_cidrs: hostname not in cidr list"
+
+trusted="$(pipelock_bottle_ssh_trusted_domains "$m" dev)"
+assert_contains "$trusted"     "github.com"        "ssh_trusted_domains: hostname present"
+assert_not_contains "$trusted" "100.78.141.42"     "ssh_trusted_domains: ipv4 not present"
+rm -f "$m"
+
+# --- effective_allowlist union (defaults + bottle.allowlist + ssh.Hostname) ---
+
+# Combine egress + ssh fixtures into one manifest.
+combined="$(mktemp)"
+cat > "$combined" <<'JSON'
+{
+  "bottles": {
+    "dev": {
+      "egress": { "allowlist": ["registry.npmjs.org"] },
+      "ssh": [
+        { "Host": "ts", "IdentityFile": "/dev/null", "Hostname": "100.78.141.42", "User": "git", "Port": 30009 },
+        { "Host": "gh", "IdentityFile": "/dev/null", "Hostname": "github.com",    "User": "git", "Port": 22 }
+      ]
+    }
+  },
+  "agents": { "demo": { "skills": [], "prompt": "", "bottle": "dev" } }
+}
+JSON
+
+eff="$(pipelock_effective_allowlist "$combined" dev)"
+assert_contains "$eff" "api.anthropic.com"  "effective: baked-in default present"
+assert_contains "$eff" "registry.npmjs.org" "effective: bottle egress entry present"
+assert_contains "$eff" "100.78.141.42"      "effective: ssh ipv4 hostname present"
+assert_contains "$eff" "github.com"         "effective: ssh hostname present"
+
+# Ensure dedup + sort: count lines, then count unique lines, expect equal.
+total="$(printf '%s\n' "$eff" | wc -l | tr -d ' ')"
+uniq="$(printf '%s\n' "$eff" | sort -u | wc -l | tr -d ' ')"
+assert_eq "$total" "$uniq" "effective: deduplicated"
+
+rm -f "$combined"
+
+# --- non-string entry rejection ---
+
+bad="$(mktemp)"
+cat > "$bad" <<'JSON'
+{
+  "bottles": { "dev": { "egress": { "allowlist": ["github.com", 42] } } },
+  "agents": { "demo": { "skills": [], "prompt": "", "bottle": "dev" } }
+}
+JSON
+
+assert_exit_nonzero "bottle_allowlist: rejects non-string entry" \
+  bash -c '. "'"${REPO_ROOT}"'/lib/log.sh"; . "'"${REPO_ROOT}"'/lib/pipelock.sh"; pipelock_bottle_allowlist "'"$bad"'" dev'
+rm -f "$bad"
+
+test_summary
diff --git a/tests/unit/test_pipelock_classify.sh b/tests/unit/test_pipelock_classify.sh
new file mode 100755
index 0000000..513bfc8
--- /dev/null
+++ b/tests/unit/test_pipelock_classify.sh
@@ -0,0 +1,34 @@
+#!/usr/bin/env bash
+# Unit: _pipelock_is_ipv4_literal — the classifier that decides
+# whether bottle.ssh[].Hostname goes into ssrf.ip_allowlist (IPv4
+# literal) or trusted_domains (hostname).
+TEST_NAME="pipelock_classify"
+
+. "$(dirname "$0")/../lib/common.sh"
+# shellcheck source=../../lib/log.sh
+. "${REPO_ROOT}/lib/log.sh"
+# shellcheck source=../../lib/pipelock.sh
+. "${REPO_ROOT}/lib/pipelock.sh"
+
+# Positive cases — these should be classified as IPv4 literals.
+for ip in "127.0.0.1" "10.0.0.5" "100.78.141.42" "0.0.0.0" "255.255.255.255"; do
+  assert_exit_zero "ipv4: ${ip}" _pipelock_is_ipv4_literal "$ip"
+done
+
+# Negative cases — hostnames, partial IPs, IPv6, and edge garbage
+# should NOT match.
+for hn in \
+  "github.com" \
+  "gitea.dideric.is" \
+  "100.78.141" \
+  "100.78.141.42.5" \
+  "::1" \
+  "fe80::1" \
+  "localhost" \
+  "" \
+  "1.2.3.4.example.com"
+do
+  assert_exit_nonzero "non-ipv4: '${hn}'" _pipelock_is_ipv4_literal "$hn"
+done
+
+test_summary
diff --git a/tests/unit/test_pipelock_naming.sh b/tests/unit/test_pipelock_naming.sh
new file mode 100755
index 0000000..4a39055
--- /dev/null
+++ b/tests/unit/test_pipelock_naming.sh
@@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+# Unit: pipelock naming helpers (container_name, proxy_url, proxy_host_port).
+TEST_NAME="pipelock_naming"
+
+. "$(dirname "$0")/../lib/common.sh"
+# shellcheck source=../../lib/log.sh
+. "${REPO_ROOT}/lib/log.sh"
+# shellcheck source=../../lib/pipelock.sh
+. "${REPO_ROOT}/lib/pipelock.sh"
+
+assert_eq "claude-bottle-pipelock-foo"        "$(pipelock_container_name foo)"          "container_name simple slug"
+assert_eq "claude-bottle-pipelock-some-slug"  "$(pipelock_container_name some-slug)"    "container_name with hyphens"
+
+# proxy_url and proxy_host_port use whatever CLAUDE_BOTTLE_PIPELOCK_PORT
+# is at source time. We sourced with default (8888).
+assert_eq "http://claude-bottle-pipelock-foo:8888" "$(pipelock_proxy_url foo)"        "proxy_url default port"
+assert_eq "claude-bottle-pipelock-foo:8888"        "$(pipelock_proxy_host_port foo)"  "proxy_host_port default port"
+
+# Both helpers should fail loudly without a slug (the `${1:?...}` guards).
+assert_exit_nonzero "container_name: missing slug" bash -c '. "'"${REPO_ROOT}"'/lib/log.sh"; . "'"${REPO_ROOT}"'/lib/pipelock.sh"; pipelock_container_name'
+assert_exit_nonzero "proxy_url: missing slug"      bash -c '. "'"${REPO_ROOT}"'/lib/log.sh"; . "'"${REPO_ROOT}"'/lib/pipelock.sh"; pipelock_proxy_url'
+
+test_summary
diff --git a/tests/unit/test_pipelock_yaml.sh b/tests/unit/test_pipelock_yaml.sh
new file mode 100755
index 0000000..e1c2e5e
--- /dev/null
+++ b/tests/unit/test_pipelock_yaml.sh
@@ -0,0 +1,90 @@
+#!/usr/bin/env bash
+# Unit: pipelock_write_yaml — produces a YAML config containing the
+# expected top-level keys and per-bottle entries. We don't fully parse
+# YAML (no yq dependency); we grep for content shape.
+TEST_NAME="pipelock_yaml"
+
+. "$(dirname "$0")/../lib/common.sh"
+# shellcheck source=../../lib/log.sh
+. "${REPO_ROOT}/lib/log.sh"
+# shellcheck source=../../lib/pipelock.sh
+. "${REPO_ROOT}/lib/pipelock.sh"
+
+out_dir="$(mktemp -d)"
+cleanup() { rm -rf "$out_dir"; }
+trap cleanup EXIT
+
+# --- minimal bottle (no egress, no ssh): only api_allowlist defaults ---
+
+m_min="$(write_fixture fixture_minimal)"
+yaml_min="${out_dir}/min.yaml"
+pipelock_write_yaml "$m_min" dev "$yaml_min"
+
+content="$(cat "$yaml_min")"
+assert_contains "$content" "mode: strict"             "min: mode strict"
+assert_contains "$content" "enforce: true"            "min: enforce true"
+assert_contains "$content" "api_allowlist:"           "min: api_allowlist block"
+assert_contains "$content" "api.anthropic.com"        "min: anthropic baked default"
+assert_contains "$content" "raw.githubusercontent.com" "min: github raw baked default"
+assert_contains "$content" "forward_proxy:"           "min: forward_proxy block"
+assert_contains "$content" "enabled: true"            "min: forward_proxy enabled"
+assert_contains "$content" "dlp:"                     "min: dlp block"
+assert_contains "$content" "include_defaults: true"   "min: dlp include_defaults"
+assert_contains "$content" "scan_env: true"           "min: dlp scan_env"
+# No ssh entries in the manifest, so neither ssrf nor trusted_domains
+# blocks should be emitted.
+assert_not_contains "$content" "trusted_domains:" "min: no trusted_domains"
+assert_not_contains "$content" "ssrf:"            "min: no ssrf block"
+
+rm -f "$m_min"
+
+# --- ssh bottle: trusted_domains for hostname, ssrf.ip_allowlist for ipv4 ---
+
+m_ssh="$(write_fixture fixture_with_ssh)"
+yaml_ssh="${out_dir}/ssh.yaml"
+pipelock_write_yaml "$m_ssh" dev "$yaml_ssh"
+
+content="$(cat "$yaml_ssh")"
+assert_contains "$content" "trusted_domains:"   "ssh: trusted_domains block emitted"
+assert_contains "$content" "github.com"         "ssh: hostname in trusted_domains (or allowlist)"
+assert_contains "$content" "ssrf:"              "ssh: ssrf block emitted"
+assert_contains "$content" "ip_allowlist:"      "ssh: ip_allowlist key under ssrf"
+assert_contains "$content" "100.78.141.42/32"   "ssh: ipv4 host emitted as /32"
+# Belt-and-suspenders: the ipv4 host should also be in api_allowlist
+# (strict mode requires both).
+assert_contains "$content" "100.78.141.42"      "ssh: ipv4 host in api_allowlist too"
+
+rm -f "$m_ssh"
+
+# --- secret hygiene: env values from the manifest never enter the YAML ---
+
+m_secret="$(mktemp)"
+cat > "$m_secret" <<'JSON'
+{
+  "bottles": {
+    "dev": {
+      "env": {
+        "MY_SECRET": "literal-value-should-not-appear",
+        "ANOTHER":   "?prompt-message"
+      },
+      "egress": { "allowlist": ["github.com"] }
+    }
+  },
+  "agents": { "demo": { "skills": [], "prompt": "", "bottle": "dev" } }
+}
+JSON
+yaml_sec="${out_dir}/secret.yaml"
+pipelock_write_yaml "$m_secret" dev "$yaml_sec"
+content="$(cat "$yaml_sec")"
+assert_not_contains "$content" "literal-value-should-not-appear" "secret: literal env value not leaked"
+assert_not_contains "$content" "MY_SECRET"                       "secret: env var name not leaked"
+assert_not_contains "$content" "prompt-message"                  "secret: prompt sentinel not leaked"
+rm -f "$m_secret"
+
+# --- file mode is 600 ---
+mode="$(stat -f '%p' "$yaml_min" 2>/dev/null || stat -c '%a' "$yaml_min")"
+# macOS stat -f '%p' returns full mode like 100600; trim. Linux stat -c '%a' gives just 600.
+mode="${mode: -3}"
+assert_eq "600" "$mode" "yaml file mode is 600"
+
+test_summary