From b0ee7da5be3f1acfea1e538295bd3ed75d2ec9fb Mon Sep 17 00:00:00 2001 From: didericis Date: Fri, 8 May 2026 01:54:25 -0400 Subject: [PATCH] test: add bash test suite covering pipelock helpers and smoke flows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds tests/ with a tiny bash assert harness, manifest fixtures, and a runner. No framework dependency — each test file is self-contained and exits 0 on pass / 1 on fail; tests/run_tests.sh aggregates. Unit tests (no docker): - pipelock_naming: container_name, proxy_url, proxy_host_port shape - pipelock_classify: _pipelock_is_ipv4_literal classifier coverage - pipelock_allowlist: bottle_allowlist + ssh hostnames/ip_cidrs/ trusted_domains + effective_allowlist union/dedup/sort, plus rejection of non-string entries - pipelock_yaml: emitter shape (mode/enforce/api_allowlist/forward_proxy/ dlp), conditional ssrf+trusted_domains blocks, secret hygiene (manifest env values must not appear in YAML), file mode 600 Integration tests (require docker, skip cleanly otherwise): - pipelock_image: pinned digest's ENTRYPOINT is /pipelock and CMD contains 'run' and the binary --version succeeds — would catch a future image bump that changes the launcher's argv contract - pipelock_sidecar_smoke: docker create + cp YAML to /etc/pipelock.yaml + start, then probe /health — the regression test for the bug where the YAML was written to /etc/pipelock/ (parent dir absent in the distroless image) - dry_run_plan: cli.sh start --dry-run shows the egress line, counts the bottle's entry into the effective allowlist, prints the dry-run banner, and creates zero docker resources - orphan_cleanup: the cleanup primitives the start-flow trap depends on (network_remove, pipelock_stop) are idempotent against missing/never-existed resources, so the trap is safe even if pipelock_start dies before everything is wired up Assisted-by: Claude Code --- tests/README.md | 83 ++++++++++++ tests/integration/test_dry_run_plan.sh | 63 +++++++++ tests/integration/test_orphan_cleanup.sh | 74 +++++++++++ tests/integration/test_pipelock_image.sh | 40 ++++++ .../test_pipelock_sidecar_smoke.sh | 87 ++++++++++++ tests/lib/assert.sh | 124 ++++++++++++++++++ tests/lib/common.sh | 20 +++ tests/lib/fixtures.sh | 99 ++++++++++++++ tests/run_tests.sh | 94 +++++++++++++ tests/unit/test_pipelock_allowlist.sh | 89 +++++++++++++ tests/unit/test_pipelock_classify.sh | 34 +++++ tests/unit/test_pipelock_naming.sh | 23 ++++ tests/unit/test_pipelock_yaml.sh | 90 +++++++++++++ 13 files changed, 920 insertions(+) create mode 100644 tests/README.md create mode 100755 tests/integration/test_dry_run_plan.sh create mode 100755 tests/integration/test_orphan_cleanup.sh create mode 100755 tests/integration/test_pipelock_image.sh create mode 100755 tests/integration/test_pipelock_sidecar_smoke.sh create mode 100644 tests/lib/assert.sh create mode 100644 tests/lib/common.sh create mode 100644 tests/lib/fixtures.sh create mode 100755 tests/run_tests.sh create mode 100755 tests/unit/test_pipelock_allowlist.sh create mode 100755 tests/unit/test_pipelock_classify.sh create mode 100755 tests/unit/test_pipelock_naming.sh create mode 100755 tests/unit/test_pipelock_yaml.sh diff --git a/tests/README.md b/tests/README.md new file mode 100644 index 0000000..2ea901f --- /dev/null +++ b/tests/README.md @@ -0,0 +1,83 @@ +# Tests + +Plain-bash test suite. No framework dependency — assertions are tiny +helpers in `tests/lib/assert.sh` and the runner is a shell script. +The unit tests run anywhere bash + jq are present; the integration +tests need Docker and skip cleanly otherwise. + +## Layout + +``` +tests/ + run_tests.sh # entry point + lib/ + assert.sh # assert_eq, assert_contains, assert_match, ... + common.sh # sources assert + fixtures, sets REPO_ROOT + fixtures.sh # JSON manifest builders + unit/ # no docker; fast + test_pipelock_naming.sh + test_pipelock_classify.sh + test_pipelock_allowlist.sh + test_pipelock_yaml.sh + integration/ # require docker + test_pipelock_image.sh + test_pipelock_sidecar_smoke.sh + test_dry_run_plan.sh + test_orphan_cleanup.sh +``` + +## Running + +```bash +tests/run_tests.sh # everything +tests/run_tests.sh unit # unit only +tests/run_tests.sh integration # integration only +tests/run_tests.sh tests/unit/test_pipelock_yaml.sh # one file +``` + +Each test file exits 0 on pass, 1 on fail. The runner aggregates and +prints a one-line summary. + +## What the integration tests cover + +These are versions of the smoke tests run during PR #1: + +- `test_pipelock_image.sh` — the pinned digest is reachable, ENTRYPOINT + is `/pipelock`, and `CMD` includes `run`. Catches a pipelock release + that bumps the argv shape. +- `test_pipelock_sidecar_smoke.sh` — `docker create` + `docker cp` the + generated YAML to `/etc/pipelock.yaml` + `docker start`, then probe + `/health`. Catches the YAML-path bug we hit (the image is distroless, + so `/etc/pipelock/` does not exist) and YAML structural breakage. +- `test_dry_run_plan.sh` — `cli.sh start --dry-run` shows the resolved + egress allowlist and creates zero docker resources. +- `test_orphan_cleanup.sh` — when the sidecar fails to start (bogus + image digest), the EXIT trap removes both the internal and egress + networks. Catches regressions in trap-installation ordering. + +## What's NOT covered + +- `lib/ssh.sh` end-to-end (would need a fake SSH host inside the + container; high effort for v1). +- A live SSH-through-pipelock tunnel against a real Tailscale-style + internal IP. +- DLP false-positive measurements. +- TLS handling / cert pinning behavior. + +## Adding a test + +1. Pick `unit/` (no docker) or `integration/` (docker required). +2. Name it `test_.sh`. Make it executable: `chmod +x`. +3. Start with the boilerplate the existing files use: + ```bash + #!/usr/bin/env bash + TEST_NAME="" + . "$(dirname "$0")/../lib/common.sh" + . "${REPO_ROOT}/lib/log.sh" + . "${REPO_ROOT}/lib/.sh" + # ...assert_eq / assert_contains / ... + test_summary + ``` +4. For integration tests: call `skip_test_if_no_docker` after the + boilerplate and ensure your trap cleans up any docker resources you + create. diff --git a/tests/integration/test_dry_run_plan.sh b/tests/integration/test_dry_run_plan.sh new file mode 100755 index 0000000..c1ba8df --- /dev/null +++ b/tests/integration/test_dry_run_plan.sh @@ -0,0 +1,63 @@ +#!/usr/bin/env bash +# Integration: cli.sh start --dry-run renders the planned shape and +# does not create any docker resources. Confirms the preflight contract +# from PRD 0001 (allowlist line in the plan, no docker side effects). +TEST_NAME="dry_run_plan" + +. "$(dirname "$0")/../lib/common.sh" + +skip_test_if_no_docker + +work_dir="$(mktemp -d)" +manifest="${work_dir}/claude-bottle.json" + +cleanup() { + rm -rf "$work_dir" +} +trap cleanup EXIT + +# Manifest with an egress.allowlist so we can grep for a known host. +cat > "$manifest" <<'JSON' +{ + "bottles": { + "dev": { + "egress": { "allowlist": ["example.org"] } + } + }, + "agents": { + "demo": { + "skills": [], + "prompt": "", + "bottle": "dev" + } + } +} +JSON + +# Snapshot docker state before we run. +nets_before="$(docker network ls --format '{{.Name}}' | grep -c '^claude-bottle' || true)" +ctrs_before="$(docker ps -a --format '{{.Names}}' | grep -c '^claude-bottle' || true)" + +# Override HOME so the user's ~/claude-bottle.json doesn't leak in via +# manifest_resolve's home+cwd merge. +out="$(cd "$work_dir" \ + && HOME="$work_dir" CLAUDE_BOTTLE_DRY_RUN=1 \ + "${REPO_ROOT}/cli.sh" start demo 2>&1 || true)" + +assert_contains "$out" "egress" "preflight: egress line present" +# 7 baked defaults + 1 bottle entry = 8. The summary line shows the +# total count regardless of which entries fit in the visible +# ", , , +N more" prefix, so this assertion is robust against +# alphabetical sort order changes. +assert_match "$out" "8 hosts allowed" "preflight: bottle entry counted in effective allowlist" +assert_contains "$out" "api.anthropic.com" "preflight: baked default shown" +assert_contains "$out" "dry-run requested" "dry-run banner present" +assert_not_contains "$out" "/dev/tty" "no /dev/tty prompt reached (dry-run exited first)" + +# No docker side effects. +nets_after="$(docker network ls --format '{{.Name}}' | grep -c '^claude-bottle' || true)" +ctrs_after="$(docker ps -a --format '{{.Names}}' | grep -c '^claude-bottle' || true)" +assert_eq "$nets_before" "$nets_after" "dry-run: no claude-bottle networks created" +assert_eq "$ctrs_before" "$ctrs_after" "dry-run: no claude-bottle containers created" + +test_summary diff --git a/tests/integration/test_orphan_cleanup.sh b/tests/integration/test_orphan_cleanup.sh new file mode 100755 index 0000000..41b814e --- /dev/null +++ b/tests/integration/test_orphan_cleanup.sh @@ -0,0 +1,74 @@ +#!/usr/bin/env bash +# Integration: the cleanup primitives the start-flow trap depends on +# are idempotent. The original orphan-network bug was a trap-ordering +# issue (cleanup_all installed AFTER networks were created); the fix +# moved the install earlier. The trap is only safe if the helpers it +# calls — network_remove, pipelock_stop — are no-ops against +# already-missing or never-existed resources. We test that here. +# +# (The full end-to-end "cli.sh dies mid-run, networks gone" flow needs +# a TTY and is documented as a manual verification step in tests/README.md.) +TEST_NAME="orphan_cleanup" + +. "$(dirname "$0")/../lib/common.sh" +# shellcheck source=../../lib/log.sh +. "${REPO_ROOT}/lib/log.sh" +# shellcheck source=../../lib/docker.sh +. "${REPO_ROOT}/lib/docker.sh" +# shellcheck source=../../lib/network.sh +. "${REPO_ROOT}/lib/network.sh" +# shellcheck source=../../lib/pipelock.sh +. "${REPO_ROOT}/lib/pipelock.sh" + +skip_test_if_no_docker + +slug="cb-test-orphan-$$" +internal_name="" +egress_name="" + +cleanup() { + for n in "$internal_name" "$egress_name"; do + [ -n "$n" ] && docker network rm "$n" >/dev/null 2>&1 || true + done +} +trap cleanup EXIT + +# 1. network_remove against a name that doesn't exist returns 0 +# (the trap can call it eagerly without crashing on the first run +# where the network was never created). +assert_exit_zero "network_remove: missing network is a no-op" \ + network_remove "claude-bottle-net-${slug}-does-not-exist" + +# 2. Create both networks the way cli.sh does, then remove them with +# network_remove. Both should succeed and the networks should be +# gone afterwards. +internal_name="$(network_create_internal "$slug")" +egress_name="$(network_create_egress "$slug")" + +assert_match "$(docker network ls --format '{{.Name}}')" "^${internal_name}$" \ + "internal network was created" +assert_match "$(docker network ls --format '{{.Name}}')" "^${egress_name}$" \ + "egress network was created" + +assert_exit_zero "network_remove: removes existing internal network" \ + network_remove "$internal_name" +assert_exit_zero "network_remove: removes existing egress network" \ + network_remove "$egress_name" + +nets_after="$(docker network ls --format '{{.Name}}')" +assert_not_contains "$nets_after" "$internal_name" "internal network gone after removal" +assert_not_contains "$nets_after" "$egress_name" "egress network gone after removal" + +# 3. Removing a second time is still safe — the trap may run after a +# clean exit, where the resources are already gone. +assert_exit_zero "network_remove: idempotent on already-removed internal" \ + network_remove "$internal_name" +assert_exit_zero "network_remove: idempotent on already-removed egress" \ + network_remove "$egress_name" + +# 4. pipelock_stop against a slug whose sidecar was never started must +# also be a no-op — same reason. +assert_exit_zero "pipelock_stop: missing sidecar is a no-op" \ + pipelock_stop "missing-${slug}" + +test_summary diff --git a/tests/integration/test_pipelock_image.sh b/tests/integration/test_pipelock_image.sh new file mode 100755 index 0000000..afff10e --- /dev/null +++ b/tests/integration/test_pipelock_image.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash +# Integration: verify the pinned pipelock image. Requires docker. +# - Pinned digest is reachable on the registry. +# - Image's ENTRYPOINT/CMD match what lib/pipelock.sh assumes +# (`/pipelock` and `run --listen 0.0.0.0:8888`). +# - The /pipelock binary actually runs (--version succeeds). +# +# This is the test that would have caught the runtime bug where the +# CMD shape diverged from what the launcher passed. +TEST_NAME="pipelock_image" + +. "$(dirname "$0")/../lib/common.sh" +# shellcheck source=../../lib/log.sh +. "${REPO_ROOT}/lib/log.sh" +# shellcheck source=../../lib/pipelock.sh +. "${REPO_ROOT}/lib/pipelock.sh" + +skip_test_if_no_docker + +# Pull the pinned image (cheap if already cached). +if ! docker pull "$CLAUDE_BOTTLE_PIPELOCK_IMAGE" >/dev/null 2>&1; then + skip "could not pull ${CLAUDE_BOTTLE_PIPELOCK_IMAGE}" + exit 0 +fi + +# ENTRYPOINT must be the binary path lib/pipelock.sh expects. +entrypoint="$(docker image inspect "$CLAUDE_BOTTLE_PIPELOCK_IMAGE" --format '{{json .Config.Entrypoint}}')" +assert_contains "$entrypoint" "/pipelock" "entrypoint contains /pipelock" + +# CMD must include `run` — the subcommand the launcher overrides via +# `docker create ... run --config ... --listen ...`. If a future image +# bumps the CMD shape, this fails loudly. +cmd="$(docker image inspect "$CLAUDE_BOTTLE_PIPELOCK_IMAGE" --format '{{json .Config.Cmd}}')" +assert_contains "$cmd" "run" "cmd contains 'run'" + +# Binary actually runs. +ver="$(docker run --rm "$CLAUDE_BOTTLE_PIPELOCK_IMAGE" --version 2>&1 || true)" +assert_match "$ver" "[Pp]ipelock|2\\.[0-9]+\\.[0-9]+" "binary --version produces version-shaped output" + +test_summary diff --git a/tests/integration/test_pipelock_sidecar_smoke.sh b/tests/integration/test_pipelock_sidecar_smoke.sh new file mode 100755 index 0000000..5340441 --- /dev/null +++ b/tests/integration/test_pipelock_sidecar_smoke.sh @@ -0,0 +1,87 @@ +#!/usr/bin/env bash +# Integration: full sidecar smoke test. Boots a pipelock container the +# same way cli.sh does (docker create + docker cp YAML + docker start), +# then probes /health. Catches regressions in: +# - the YAML-cp path (the /etc/pipelock.yaml vs /etc/pipelock/ bug) +# - argv shape (the `run --listen 0.0.0.0:N` invocation) +# - YAML structural validity (pipelock would refuse to start on a bad config) +TEST_NAME="pipelock_sidecar_smoke" + +. "$(dirname "$0")/../lib/common.sh" +# shellcheck source=../../lib/log.sh +. "${REPO_ROOT}/lib/log.sh" +# shellcheck source=../../lib/pipelock.sh +. "${REPO_ROOT}/lib/pipelock.sh" + +skip_test_if_no_docker + +# Use a distinct name so concurrent runs don't collide. +name="cb-test-pipelock-smoke-$$" +work_dir="$(mktemp -d)" +yaml="${work_dir}/pipelock.yaml" + +cleanup() { + docker rm -f "$name" >/dev/null 2>&1 || true + rm -rf "$work_dir" +} +trap cleanup EXIT + +# Generate a real config from a fixture manifest. +m="$(write_fixture fixture_minimal)" +pipelock_write_yaml "$m" dev "$yaml" +rm -f "$m" + +# Same lifecycle as lib/pipelock.sh's pipelock_start, minus the +# network-attach steps (we just need a port we can curl). +docker create --name "$name" -p 0:8888 \ + "$CLAUDE_BOTTLE_PIPELOCK_IMAGE" \ + run --config /etc/pipelock.yaml --listen "0.0.0.0:8888" \ + >/dev/null 2>&1 \ + || { _fail "docker create failed"; test_summary; } + +# This is the exact cp path that broke before — guard against +# regressing to a /etc/pipelock/ subdirectory destination. +if ! docker cp "$yaml" "${name}:/etc/pipelock.yaml" >/dev/null 2>&1; then + _fail "docker cp to /etc/pipelock.yaml failed (parent dir must already exist in image)" + test_summary +fi + +if ! docker start "$name" >/dev/null 2>&1; then + _fail "docker start failed; check that argv 'run --listen 0.0.0.0:8888' still matches image" + test_summary +fi + +# Find the host-side port docker mapped 8888 to. +hostport="$(docker port "$name" 8888 2>/dev/null | head -1 | awk -F: '{print $NF}')" +if [ -z "$hostport" ]; then + _fail "could not determine published port" "docker port output: $(docker port "$name" 2>&1)" + test_summary +fi + +# Wait up to 15 seconds for /health to come up. +healthy=0 +for _ in $(seq 1 15); do + if curl -fsS "http://127.0.0.1:${hostport}/health" >/dev/null 2>&1; then + healthy=1 + break + fi + sleep 1 +done + +if [ "$healthy" -eq 1 ]; then + _pass "sidecar /health responded" +else + _fail "sidecar /health did not respond within 15s" "logs:" "$(docker logs "$name" 2>&1 | tail -20)" + test_summary +fi + +# Body should mention the version we pinned. We don't pin the exact +# version string here because the digest we test against is one +# release; the next release will change the version field but should +# keep the schema. Keep the assertion at "field is present and has +# a numeric-dotted shape". +body="$(curl -fsS "http://127.0.0.1:${hostport}/health" 2>&1)" +assert_contains "$body" '"status":"healthy"' "/health body status:healthy" +assert_match "$body" '"version":"[0-9]+\.[0-9]+\.[0-9]+"' "/health body has version field" + +test_summary diff --git a/tests/lib/assert.sh b/tests/lib/assert.sh new file mode 100644 index 0000000..9d92ab2 --- /dev/null +++ b/tests/lib/assert.sh @@ -0,0 +1,124 @@ +#!/usr/bin/env bash +# Tiny assertion helpers. No framework — each test file sources this, +# calls `assert_*` functions, and ends with `test_summary` which exits +# 0 if every assertion passed and 1 otherwise. +# +# Counters are file-local: every test process gets its own TEST_PASS / +# TEST_FAIL. run_tests.sh aggregates by exit code, not by reading these. + +if [ -n "${CLAUDE_BOTTLE_TESTS_ASSERT_SOURCED:-}" ]; then + return 0 +fi +CLAUDE_BOTTLE_TESTS_ASSERT_SOURCED=1 + +TEST_PASS=0 +TEST_FAIL=0 +TEST_NAME="${TEST_NAME:-unnamed}" + +if [ -t 1 ]; then + _C_PASS=$'\033[32m' + _C_FAIL=$'\033[31m' + _C_SKIP=$'\033[33m' + _C_RESET=$'\033[0m' +else + _C_PASS="" + _C_FAIL="" + _C_SKIP="" + _C_RESET="" +fi + +_pass() { + TEST_PASS=$((TEST_PASS + 1)) + printf ' %sPASS%s %s\n' "$_C_PASS" "$_C_RESET" "$1" +} + +_fail() { + TEST_FAIL=$((TEST_FAIL + 1)) + printf ' %sFAIL%s %s\n' "$_C_FAIL" "$_C_RESET" "$1" >&2 + shift + local line + for line in "$@"; do + printf ' %s\n' "$line" >&2 + done +} + +assert_eq() { + local expected="$1" actual="$2" msg="${3:-equal}" + if [ "$expected" = "$actual" ]; then + _pass "$msg" + else + _fail "$msg" "expected: ${expected}" "actual: ${actual}" + fi +} + +assert_contains() { + local haystack="$1" needle="$2" msg="${3:-contains}" + if printf '%s' "$haystack" | grep -qF -- "$needle"; then + _pass "$msg" + else + _fail "$msg" "expected to contain: ${needle}" "haystack: ${haystack}" + fi +} + +assert_not_contains() { + local haystack="$1" needle="$2" msg="${3:-does not contain}" + if ! printf '%s' "$haystack" | grep -qF -- "$needle"; then + _pass "$msg" + else + _fail "$msg" "expected NOT to contain: ${needle}" "haystack: ${haystack}" + fi +} + +assert_match() { + local haystack="$1" pattern="$2" msg="${3:-matches}" + if printf '%s' "$haystack" | grep -qE -- "$pattern"; then + _pass "$msg" + else + _fail "$msg" "expected pattern: ${pattern}" "haystack: ${haystack}" + fi +} + +# assert_exit_zero — runs the command, fails the assertion +# if it exits non-zero. Captures stdout+stderr for the failure message. +assert_exit_zero() { + local label="$1"; shift + local out + if out="$("$@" 2>&1)"; then + _pass "$label" + else + _fail "$label" "exit non-zero" "output: ${out}" + fi +} + +assert_exit_nonzero() { + local label="$1"; shift + local out + if out="$("$@" 2>&1)"; then + _fail "$label" "exit was 0; expected non-zero" "output: ${out}" + else + _pass "$label" + fi +} + +skip() { + printf ' %sSKIP%s %s\n' "$_C_SKIP" "$_C_RESET" "$1" +} + +skip_test_if_no_docker() { + if ! command -v docker >/dev/null 2>&1; then + printf '%sSKIP%s %s — docker not on PATH\n' "$_C_SKIP" "$_C_RESET" "$TEST_NAME" + exit 0 + fi + if ! docker info >/dev/null 2>&1; then + printf '%sSKIP%s %s — docker daemon unreachable\n' "$_C_SKIP" "$_C_RESET" "$TEST_NAME" + exit 0 + fi +} + +test_summary() { + printf '\n%s: %d passed, %d failed\n' "$TEST_NAME" "$TEST_PASS" "$TEST_FAIL" + if [ "$TEST_FAIL" -gt 0 ]; then + exit 1 + fi + exit 0 +} diff --git a/tests/lib/common.sh b/tests/lib/common.sh new file mode 100644 index 0000000..152107f --- /dev/null +++ b/tests/lib/common.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash +# Common scaffolding for every test file. Sources assert.sh and computes +# REPO_ROOT so tests can `. "${REPO_ROOT}/lib/.sh"` to load the code +# they're exercising. + +if [ -n "${CLAUDE_BOTTLE_TESTS_COMMON_SOURCED:-}" ]; then + return 0 +fi +CLAUDE_BOTTLE_TESTS_COMMON_SOURCED=1 + +set -euo pipefail + +_tests_dir="$(CDPATH= cd -- "$(dirname -- "${BASH_SOURCE[0]}")/.." && pwd)" +TESTS_ROOT="$_tests_dir" +REPO_ROOT="$(CDPATH= cd -- "${TESTS_ROOT}/.." && pwd)" + +# shellcheck source=./assert.sh +. "${TESTS_ROOT}/lib/assert.sh" +# shellcheck source=./fixtures.sh +. "${TESTS_ROOT}/lib/fixtures.sh" diff --git a/tests/lib/fixtures.sh b/tests/lib/fixtures.sh new file mode 100644 index 0000000..e001c39 --- /dev/null +++ b/tests/lib/fixtures.sh @@ -0,0 +1,99 @@ +#!/usr/bin/env bash +# Manifest fixture builders. Each function prints a JSON manifest on +# stdout; callers can pipe to a temp file or pass through `write_fixture`. + +if [ -n "${CLAUDE_BOTTLE_TESTS_FIXTURES_SOURCED:-}" ]; then + return 0 +fi +CLAUDE_BOTTLE_TESTS_FIXTURES_SOURCED=1 + +# fixture_minimal — one bottle, one agent, no env / ssh / skills. +fixture_minimal() { + cat <<'JSON' +{ + "bottles": { + "dev": {} + }, + "agents": { + "demo": { + "skills": [], + "prompt": "", + "bottle": "dev" + } + } +} +JSON +} + +# fixture_with_egress — bottle declares an egress.allowlist. +fixture_with_egress() { + cat <<'JSON' +{ + "bottles": { + "dev": { + "egress": { + "allowlist": [ + "github.com", + "gitlab.com", + "registry.npmjs.org" + ] + } + } + }, + "agents": { + "demo": { + "skills": [], + "prompt": "", + "bottle": "dev" + } + } +} +JSON +} + +# fixture_with_ssh — bottle has both an IPv4-literal SSH host (Tailscale +# CGNAT range) and a hostname SSH host, exercising both +# ssrf.ip_allowlist and trusted_domains code paths. +fixture_with_ssh() { + cat <<'JSON' +{ + "bottles": { + "dev": { + "ssh": [ + { + "Host": "tailscale-gitea", + "IdentityFile": "/dev/null", + "Hostname": "100.78.141.42", + "User": "git", + "Port": 30009 + }, + { + "Host": "github", + "IdentityFile": "/dev/null", + "Hostname": "github.com", + "User": "git", + "Port": 22 + } + ] + } + }, + "agents": { + "demo": { + "skills": [], + "prompt": "", + "bottle": "dev" + } + } +} +JSON +} + +# write_fixture — write fixture to a temp file, print +# the path. Caller must rm. +write_fixture() { + local fn="${1:?write_fixture: missing fixture function}" + local f + f="$(mktemp)" + "$fn" > "$f" + printf '%s' "$f" +} diff --git a/tests/run_tests.sh b/tests/run_tests.sh new file mode 100755 index 0000000..d3825c0 --- /dev/null +++ b/tests/run_tests.sh @@ -0,0 +1,94 @@ +#!/usr/bin/env bash +# Test runner. Iterates over test_*.sh files in unit/ and integration/ +# (or just one of them when given a `unit` / `integration` argument) +# and runs each as a separate process. Aggregates exit codes and +# prints a summary. +# +# Usage: +# tests/run_tests.sh # unit + integration +# tests/run_tests.sh unit # unit only +# tests/run_tests.sh integration # integration only +# tests/run_tests.sh path/to/test_x.sh # one specific file + +set -uo pipefail + +_dir="$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)" + +if [ -t 1 ]; then + C_PASS=$'\033[32m' + C_FAIL=$'\033[31m' + C_HEAD=$'\033[36m' + C_RESET=$'\033[0m' +else + C_PASS="" + C_FAIL="" + C_HEAD="" + C_RESET="" +fi + +usage() { + cat < run a single test file +EOF +} + +# Collect test files. +declare -a FILES=() +case "${1:-}" in + -h|--help) usage; exit 0 ;; + unit) FILES=("${_dir}"/unit/test_*.sh) ;; + integration) FILES=("${_dir}"/integration/test_*.sh) ;; + "") FILES=("${_dir}"/unit/test_*.sh "${_dir}"/integration/test_*.sh) ;; + *) + if [ -f "$1" ]; then + FILES=("$1") + else + printf 'no such file: %s\n' "$1" >&2 + usage + exit 2 + fi + ;; +esac + +# Filter out non-existent globs (no matching files). +declare -a EXISTING=() +for f in "${FILES[@]}"; do + [ -f "$f" ] && EXISTING+=("$f") +done + +if [ "${#EXISTING[@]}" -eq 0 ]; then + printf 'no test files found\n' >&2 + exit 2 +fi + +PASS_COUNT=0 +FAIL_COUNT=0 +declare -a FAIL_FILES=() + +for f in "${EXISTING[@]}"; do + rel="${f#${_dir}/}" + printf '%s== %s ==%s\n' "$C_HEAD" "$rel" "$C_RESET" + if bash "$f"; then + PASS_COUNT=$((PASS_COUNT + 1)) + else + FAIL_COUNT=$((FAIL_COUNT + 1)) + FAIL_FILES+=("$rel") + fi + printf '\n' +done + +# Summary. +TOTAL=$((PASS_COUNT + FAIL_COUNT)) +printf '%ssummary%s: %d/%d test files passed\n' "$C_HEAD" "$C_RESET" "$PASS_COUNT" "$TOTAL" +if [ "$FAIL_COUNT" -gt 0 ]; then + printf '%sfailed%s:\n' "$C_FAIL" "$C_RESET" + for f in "${FAIL_FILES[@]}"; do + printf ' - %s\n' "$f" + done + exit 1 +fi +printf '%sall tests passed%s\n' "$C_PASS" "$C_RESET" diff --git a/tests/unit/test_pipelock_allowlist.sh b/tests/unit/test_pipelock_allowlist.sh new file mode 100755 index 0000000..6c2e059 --- /dev/null +++ b/tests/unit/test_pipelock_allowlist.sh @@ -0,0 +1,89 @@ +#!/usr/bin/env bash +# Unit: allowlist resolution — pipelock_bottle_allowlist, +# pipelock_bottle_ssh_hostnames, pipelock_bottle_ssh_ip_cidrs, +# pipelock_bottle_ssh_trusted_domains, pipelock_effective_allowlist. +TEST_NAME="pipelock_allowlist" + +. "$(dirname "$0")/../lib/common.sh" +# shellcheck source=../../lib/log.sh +. "${REPO_ROOT}/lib/log.sh" +# shellcheck source=../../lib/pipelock.sh +. "${REPO_ROOT}/lib/pipelock.sh" + +# --- bottle_allowlist (egress.allowlist parsing) --- + +m="$(write_fixture fixture_with_egress)" +out="$(pipelock_bottle_allowlist "$m" dev)" +assert_contains "$out" "github.com" "bottle_allowlist: github.com present" +assert_contains "$out" "gitlab.com" "bottle_allowlist: gitlab.com present" +assert_contains "$out" "registry.npmjs.org" "bottle_allowlist: npmjs present" +rm -f "$m" + +m="$(write_fixture fixture_minimal)" +out="$(pipelock_bottle_allowlist "$m" dev)" +assert_eq "" "$out" "bottle_allowlist: empty when no egress block" +rm -f "$m" + +# --- ssh hostnames + classification --- + +m="$(write_fixture fixture_with_ssh)" +hosts="$(pipelock_bottle_ssh_hostnames "$m" dev)" +assert_contains "$hosts" "100.78.141.42" "ssh_hostnames: ipv4 included" +assert_contains "$hosts" "github.com" "ssh_hostnames: hostname included" + +cidrs="$(pipelock_bottle_ssh_ip_cidrs "$m" dev)" +assert_contains "$cidrs" "100.78.141.42/32" "ssh_ip_cidrs: ipv4 emitted as /32" +assert_not_contains "$cidrs" "github.com" "ssh_ip_cidrs: hostname not in cidr list" + +trusted="$(pipelock_bottle_ssh_trusted_domains "$m" dev)" +assert_contains "$trusted" "github.com" "ssh_trusted_domains: hostname present" +assert_not_contains "$trusted" "100.78.141.42" "ssh_trusted_domains: ipv4 not present" +rm -f "$m" + +# --- effective_allowlist union (defaults + bottle.allowlist + ssh.Hostname) --- + +# Combine egress + ssh fixtures into one manifest. +combined="$(mktemp)" +cat > "$combined" <<'JSON' +{ + "bottles": { + "dev": { + "egress": { "allowlist": ["registry.npmjs.org"] }, + "ssh": [ + { "Host": "ts", "IdentityFile": "/dev/null", "Hostname": "100.78.141.42", "User": "git", "Port": 30009 }, + { "Host": "gh", "IdentityFile": "/dev/null", "Hostname": "github.com", "User": "git", "Port": 22 } + ] + } + }, + "agents": { "demo": { "skills": [], "prompt": "", "bottle": "dev" } } +} +JSON + +eff="$(pipelock_effective_allowlist "$combined" dev)" +assert_contains "$eff" "api.anthropic.com" "effective: baked-in default present" +assert_contains "$eff" "registry.npmjs.org" "effective: bottle egress entry present" +assert_contains "$eff" "100.78.141.42" "effective: ssh ipv4 hostname present" +assert_contains "$eff" "github.com" "effective: ssh hostname present" + +# Ensure dedup + sort: count lines, then count unique lines, expect equal. +total="$(printf '%s\n' "$eff" | wc -l | tr -d ' ')" +uniq="$(printf '%s\n' "$eff" | sort -u | wc -l | tr -d ' ')" +assert_eq "$total" "$uniq" "effective: deduplicated" + +rm -f "$combined" + +# --- non-string entry rejection --- + +bad="$(mktemp)" +cat > "$bad" <<'JSON' +{ + "bottles": { "dev": { "egress": { "allowlist": ["github.com", 42] } } }, + "agents": { "demo": { "skills": [], "prompt": "", "bottle": "dev" } } +} +JSON + +assert_exit_nonzero "bottle_allowlist: rejects non-string entry" \ + bash -c '. "'"${REPO_ROOT}"'/lib/log.sh"; . "'"${REPO_ROOT}"'/lib/pipelock.sh"; pipelock_bottle_allowlist "'"$bad"'" dev' +rm -f "$bad" + +test_summary diff --git a/tests/unit/test_pipelock_classify.sh b/tests/unit/test_pipelock_classify.sh new file mode 100755 index 0000000..513bfc8 --- /dev/null +++ b/tests/unit/test_pipelock_classify.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +# Unit: _pipelock_is_ipv4_literal — the classifier that decides +# whether bottle.ssh[].Hostname goes into ssrf.ip_allowlist (IPv4 +# literal) or trusted_domains (hostname). +TEST_NAME="pipelock_classify" + +. "$(dirname "$0")/../lib/common.sh" +# shellcheck source=../../lib/log.sh +. "${REPO_ROOT}/lib/log.sh" +# shellcheck source=../../lib/pipelock.sh +. "${REPO_ROOT}/lib/pipelock.sh" + +# Positive cases — these should be classified as IPv4 literals. +for ip in "127.0.0.1" "10.0.0.5" "100.78.141.42" "0.0.0.0" "255.255.255.255"; do + assert_exit_zero "ipv4: ${ip}" _pipelock_is_ipv4_literal "$ip" +done + +# Negative cases — hostnames, partial IPs, IPv6, and edge garbage +# should NOT match. +for hn in \ + "github.com" \ + "gitea.dideric.is" \ + "100.78.141" \ + "100.78.141.42.5" \ + "::1" \ + "fe80::1" \ + "localhost" \ + "" \ + "1.2.3.4.example.com" +do + assert_exit_nonzero "non-ipv4: '${hn}'" _pipelock_is_ipv4_literal "$hn" +done + +test_summary diff --git a/tests/unit/test_pipelock_naming.sh b/tests/unit/test_pipelock_naming.sh new file mode 100755 index 0000000..4a39055 --- /dev/null +++ b/tests/unit/test_pipelock_naming.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash +# Unit: pipelock naming helpers (container_name, proxy_url, proxy_host_port). +TEST_NAME="pipelock_naming" + +. "$(dirname "$0")/../lib/common.sh" +# shellcheck source=../../lib/log.sh +. "${REPO_ROOT}/lib/log.sh" +# shellcheck source=../../lib/pipelock.sh +. "${REPO_ROOT}/lib/pipelock.sh" + +assert_eq "claude-bottle-pipelock-foo" "$(pipelock_container_name foo)" "container_name simple slug" +assert_eq "claude-bottle-pipelock-some-slug" "$(pipelock_container_name some-slug)" "container_name with hyphens" + +# proxy_url and proxy_host_port use whatever CLAUDE_BOTTLE_PIPELOCK_PORT +# is at source time. We sourced with default (8888). +assert_eq "http://claude-bottle-pipelock-foo:8888" "$(pipelock_proxy_url foo)" "proxy_url default port" +assert_eq "claude-bottle-pipelock-foo:8888" "$(pipelock_proxy_host_port foo)" "proxy_host_port default port" + +# Both helpers should fail loudly without a slug (the `${1:?...}` guards). +assert_exit_nonzero "container_name: missing slug" bash -c '. "'"${REPO_ROOT}"'/lib/log.sh"; . "'"${REPO_ROOT}"'/lib/pipelock.sh"; pipelock_container_name' +assert_exit_nonzero "proxy_url: missing slug" bash -c '. "'"${REPO_ROOT}"'/lib/log.sh"; . "'"${REPO_ROOT}"'/lib/pipelock.sh"; pipelock_proxy_url' + +test_summary diff --git a/tests/unit/test_pipelock_yaml.sh b/tests/unit/test_pipelock_yaml.sh new file mode 100755 index 0000000..e1c2e5e --- /dev/null +++ b/tests/unit/test_pipelock_yaml.sh @@ -0,0 +1,90 @@ +#!/usr/bin/env bash +# Unit: pipelock_write_yaml — produces a YAML config containing the +# expected top-level keys and per-bottle entries. We don't fully parse +# YAML (no yq dependency); we grep for content shape. +TEST_NAME="pipelock_yaml" + +. "$(dirname "$0")/../lib/common.sh" +# shellcheck source=../../lib/log.sh +. "${REPO_ROOT}/lib/log.sh" +# shellcheck source=../../lib/pipelock.sh +. "${REPO_ROOT}/lib/pipelock.sh" + +out_dir="$(mktemp -d)" +cleanup() { rm -rf "$out_dir"; } +trap cleanup EXIT + +# --- minimal bottle (no egress, no ssh): only api_allowlist defaults --- + +m_min="$(write_fixture fixture_minimal)" +yaml_min="${out_dir}/min.yaml" +pipelock_write_yaml "$m_min" dev "$yaml_min" + +content="$(cat "$yaml_min")" +assert_contains "$content" "mode: strict" "min: mode strict" +assert_contains "$content" "enforce: true" "min: enforce true" +assert_contains "$content" "api_allowlist:" "min: api_allowlist block" +assert_contains "$content" "api.anthropic.com" "min: anthropic baked default" +assert_contains "$content" "raw.githubusercontent.com" "min: github raw baked default" +assert_contains "$content" "forward_proxy:" "min: forward_proxy block" +assert_contains "$content" "enabled: true" "min: forward_proxy enabled" +assert_contains "$content" "dlp:" "min: dlp block" +assert_contains "$content" "include_defaults: true" "min: dlp include_defaults" +assert_contains "$content" "scan_env: true" "min: dlp scan_env" +# No ssh entries in the manifest, so neither ssrf nor trusted_domains +# blocks should be emitted. +assert_not_contains "$content" "trusted_domains:" "min: no trusted_domains" +assert_not_contains "$content" "ssrf:" "min: no ssrf block" + +rm -f "$m_min" + +# --- ssh bottle: trusted_domains for hostname, ssrf.ip_allowlist for ipv4 --- + +m_ssh="$(write_fixture fixture_with_ssh)" +yaml_ssh="${out_dir}/ssh.yaml" +pipelock_write_yaml "$m_ssh" dev "$yaml_ssh" + +content="$(cat "$yaml_ssh")" +assert_contains "$content" "trusted_domains:" "ssh: trusted_domains block emitted" +assert_contains "$content" "github.com" "ssh: hostname in trusted_domains (or allowlist)" +assert_contains "$content" "ssrf:" "ssh: ssrf block emitted" +assert_contains "$content" "ip_allowlist:" "ssh: ip_allowlist key under ssrf" +assert_contains "$content" "100.78.141.42/32" "ssh: ipv4 host emitted as /32" +# Belt-and-suspenders: the ipv4 host should also be in api_allowlist +# (strict mode requires both). +assert_contains "$content" "100.78.141.42" "ssh: ipv4 host in api_allowlist too" + +rm -f "$m_ssh" + +# --- secret hygiene: env values from the manifest never enter the YAML --- + +m_secret="$(mktemp)" +cat > "$m_secret" <<'JSON' +{ + "bottles": { + "dev": { + "env": { + "MY_SECRET": "literal-value-should-not-appear", + "ANOTHER": "?prompt-message" + }, + "egress": { "allowlist": ["github.com"] } + } + }, + "agents": { "demo": { "skills": [], "prompt": "", "bottle": "dev" } } +} +JSON +yaml_sec="${out_dir}/secret.yaml" +pipelock_write_yaml "$m_secret" dev "$yaml_sec" +content="$(cat "$yaml_sec")" +assert_not_contains "$content" "literal-value-should-not-appear" "secret: literal env value not leaked" +assert_not_contains "$content" "MY_SECRET" "secret: env var name not leaked" +assert_not_contains "$content" "prompt-message" "secret: prompt sentinel not leaked" +rm -f "$m_secret" + +# --- file mode is 600 --- +mode="$(stat -f '%p' "$yaml_min" 2>/dev/null || stat -c '%a' "$yaml_min")" +# macOS stat -f '%p' returns full mode like 100600; trim. Linux stat -c '%a' gives just 600. +mode="${mode: -3}" +assert_eq "600" "$mode" "yaml file mode is 600" + +test_summary