refactor(demo): drive recording through real cli.py instead of a harness
test / unit (push) Successful in 14s
test / integration (push) Successful in 29s

The previous demo harness called the backend Python API directly,
which didn't match what a user typing `./cli.py start <agent>` would
actually see. The recording now goes through the real CLI surface:

- claude-bottle.demo.json + scripts/demo-setup.sh stage a demo
  manifest (one bottle, FAKE_TOKEN env, one unreachable git upstream)
  alongside a dummy SSH identity at ~/.cache/claude-bottle-demo/.
- docs/demo.tape types `./cli.py start demo`, answers the y/N
  preflight, and runs four bash probes via claude's `!` prefix
  (curl x3 + git push), so the recording shows real preflight output
  and real probe results.
- scripts/demo.sh wraps setup -> cli.py -> teardown for human use;
  scripts/demo-record.sh does the same around `vhs docs/demo.tape`.
- .gitignore picks up claude-bottle.json so a user's local manifest
  doesn't get tracked alongside .example / .demo siblings.

scripts/demo_harness.py is removed -- its behavior is fully replaced
by the cli.py + `!` flow.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-13 01:26:41 -04:00
parent 4ef1cc58df
commit 030a6bc793
9 changed files with 176 additions and 314 deletions
+4
View File
@@ -1,6 +1,10 @@
.DS_Store
Thumbs.db
# User's local manifest — may contain real secrets. The repo ships
# claude-bottle.example.json and claude-bottle.demo.json for reference.
claude-bottle.json
# Claude Code local state — agent memory, scheduler lock, etc.
.claude/
+25
View File
@@ -0,0 +1,25 @@
{
"bottles": {
"demo": {
"env": {
"FAKE_TOKEN": "ghp_aB3cD4eF5gH6iJ7kL8mN9oP0qR1sT2uV3wX4yZ"
},
"git": [
{
"Name": "foo",
"Upstream": "ssh://git@upstream.invalid/path.git",
"IdentityFile": "~/.cache/claude-bottle-demo/fake-key",
"KnownHostKey": "ssh-ed25519 AAAAEXAMPLE"
}
]
}
},
"agents": {
"demo": {
"bottle": "demo",
"skills": [],
"prompt": ""
}
}
}
BIN
View File
Binary file not shown.

Before

Width:  |  Height:  |  Size: 183 KiB

After

Width:  |  Height:  |  Size: 2.4 MiB

+55 -22
View File
@@ -1,38 +1,71 @@
# VHS tape — produces docs/demo.gif from scripts/demo.sh.
# VHS tape — drives `./cli.py start demo` interactively and runs four
# bash probes via claude's `!` prefix. Setup (manifest + dummy SSH key
# + image pre-warm) and teardown happen outside the tape; record via
# `bash scripts/demo-record.sh`, which wraps both.
#
# Usage:
# brew install vhs # if you don't have it
# vhs docs/demo.tape # ~60-90s; writes docs/demo.gif
#
# Re-record on changes:
# - new scenarios → tweak Height below
# - faster overall → shorten the Sleep after the trailing summary
#
# The harness paces itself with its own time.sleep() calls so each
# scenario block has time to be read; VHS only needs to capture the
# whole run end-to-end.
# Re-record when the probe results, manifest, or cli.py preflight
# rendering change.
Output docs/demo.gif
Set Shell "bash"
Set FontSize 14
Set Width 1100
Set FontSize 13
Set Width 1180
Set Height 780
Set Padding 20
Set Theme "Catppuccin Mocha"
Set TypingSpeed 60ms
Set PlaybackSpeed 1.0
Set TypingSpeed 40ms
Hide
Type "clear"
Enter
Show
Type "bash scripts/demo.sh"
Sleep 500ms
# Real cli.py invocation — what a user with claude-bottle.json in cwd
# would type. The bottle declares one allowlist (only baked-in
# defaults), one git upstream (unreachable on purpose so gitleaks runs
# before the gate would forward), and a FAKE_TOKEN env var shaped like
# a GitHub PAT.
Type "./cli.py start demo"
Enter
Sleep 8s
# Confirm the y/N preflight. cli.py reads from /dev/tty.
Type "y"
Enter
# Warm-cache run takes ~14s; first-time runs that build images will be
# longer, but the wrapper pre-warms quietly so the recording sees a
# warm path. Pad a few seconds so the trailing PASS summary holds.
Sleep 20s
# Wait for the bottle to launch: networks created, pipelock + git-gate
# sidecars started, agent container started, claude boots.
Sleep 22s
# Probe 1 — allowlisted HTTPS reaches an allowlisted host via the
# bumped TLS tunnel. Baseline: the proxy isn't just blocking everything.
Type `! curl --proxy "$HTTPS_PROXY" -sw 'status=%{http_code}\n' -o /dev/null https://raw.githubusercontent.com/git/git/master/README.md`
Enter
Sleep 5s
# Probe 2 — non-allowlisted host. Pipelock's host filter refuses to
# forward; DLP doesn't even get a chance to run.
Type `! curl --proxy "$HTTPS_PROXY" -sw 'status=%{http_code}\n' -o /dev/null http://example.com/`
Enter
Sleep 5s
# Probe 3 — allowlisted host BUT body carries a credential pattern.
# api.anthropic.com is on the baked-in allowlist, so the host check
# passes; the DLP body scanner has to catch the ghp_ pattern.
Type `! curl --proxy "$HTTPS_PROXY" -sw 'status=%{http_code}\n' -o /dev/null --data "token=$FAKE_TOKEN" http://api.anthropic.com/dlp-probe`
Enter
Sleep 5s
# Probe 4 — git push of a file containing an AKIA-shaped key. The
# bottle's ~/.gitconfig rewrites the upstream URL to the git-gate via
# `insteadOf`, so this push hits the gate, gitleaks runs in the
# pre-receive hook, and rejects the ref before the gate would forward.
Type `! cd /tmp && rm -rf r && git init -qb main r && cd r && git config user.email demo@x && git config user.name demo && echo AKIAQRJHK7N5ZPM2VXTL > leak.txt && git add . && git commit -qm leak && git push ssh://git@upstream.invalid/path.git main`
Enter
Sleep 10s
# Leave claude. The launcher tears down the container, sidecars, and
# networks on session end.
Ctrl+D
Sleep 4s
+22
View File
@@ -0,0 +1,22 @@
#!/usr/bin/env bash
# Record docs/demo.gif via VHS. Runs setup, invokes `vhs docs/demo.tape`,
# always tears down. Requires `vhs` (brew install vhs).
set -euo pipefail
cd "$(dirname "$0")/.."
if ! command -v vhs >/dev/null 2>&1; then
echo "demo-record: vhs not found on PATH (brew install vhs)" >&2
exit 1
fi
if [ -z "${CLAUDE_BOTTLE_OAUTH_TOKEN:-}" ]; then
echo "demo-record: CLAUDE_BOTTLE_OAUTH_TOKEN is unset; claude inside the bottle will not auth" >&2
exit 1
fi
bash scripts/demo-setup.sh
trap 'bash scripts/demo-teardown.sh' EXIT
vhs docs/demo.tape
+39
View File
@@ -0,0 +1,39 @@
#!/usr/bin/env bash
# Prepare the working directory to run the recorded demo via cli.py:
# - back up any existing claude-bottle.json so the user's real config
# isn't clobbered
# - install claude-bottle.demo.json as claude-bottle.json
# - create a dummy SSH identity at the path the demo manifest expects
# - pre-warm the bottle + git-gate images quietly so the recording
# doesn't spend its first 30s in BuildKit output
set -euo pipefail
cd "$(dirname "$0")/.."
if ! docker info >/dev/null 2>&1; then
echo "demo-setup: docker daemon not reachable" >&2
exit 1
fi
# Back up an existing local manifest (untouched if absent). Stored
# alongside the manifest with a deterministic name so teardown can
# find it without state files.
if [ -f claude-bottle.json ]; then
cp claude-bottle.json claude-bottle.json.demo-backup
fi
cp claude-bottle.demo.json claude-bottle.json
# Dummy SSH identity — the git-gate validator wants a readable file at
# the IdentityFile path. Contents don't matter for the demo: the
# unreachable upstream means the gate never actually uses the key.
fake_key_dir="$HOME/.cache/claude-bottle-demo"
mkdir -p "$fake_key_dir"
chmod 700 "$fake_key_dir"
printf 'not-a-real-key\n' > "$fake_key_dir/fake-key"
chmod 600 "$fake_key_dir/fake-key"
# Build the image graph quietly so the recorded run shows only the
# bottle launch and the four `!` probes, not BuildKit progress.
docker build -q -t claude-bottle:latest . >/dev/null 2>&1 || true
docker build -q -f Dockerfile.git-gate -t claude-bottle-git-gate:latest . >/dev/null 2>&1 || true
+14
View File
@@ -0,0 +1,14 @@
#!/usr/bin/env bash
# Undo what demo-setup.sh did. Restores any pre-existing
# claude-bottle.json, removes the dummy SSH identity. Idempotent.
set -euo pipefail
cd "$(dirname "$0")/.."
rm -f claude-bottle.json
if [ -f claude-bottle.json.demo-backup ]; then
mv claude-bottle.json.demo-backup claude-bottle.json
fi
rm -rf "$HOME/.cache/claude-bottle-demo"
+17 -44
View File
@@ -1,56 +1,29 @@
#!/usr/bin/env bash
# Demo runner: builds the image graph if needed, then runs the four-scenario
# harness against a real bottle. Designed to produce screen-recordable
# output — paced banners, color, no Python tracebacks unless something
# actually breaks.
# Human-runnable demo wrapper. Stages the demo manifest and dummy
# identity (see scripts/demo-setup.sh), launches `./cli.py start demo`
# interactively, then restores prior state. The recorded GIF
# (docs/demo.gif) goes through the same flow via docs/demo.tape.
#
# Usage:
# bash scripts/demo.sh # run live
# vhs docs/demo.tape # record to docs/demo.gif
# Once attached to claude inside the bottle, use the `!` prefix to run
# bash directly — e.g.
# ! curl --proxy "$HTTPS_PROXY" -sw 'status=%{http_code}\n' \
# -o /dev/null http://example.com/
# returns 403 because example.com is not on the bottle's allowlist.
set -euo pipefail
cd "$(dirname "$0")/.."
verbose=0
for arg in "$@"; do
case "$arg" in
-v|--verbose) verbose=1 ;;
-h|--help)
cat <<EOF
Usage: bash scripts/demo.sh [--verbose]
Runs four pipelock + git-gate probes against a real bottle and prints
PASS/BLOCK verdicts. Without --verbose, Docker build chatter and
backend log lines are suppressed so the output is recordable.
if [ -z "${CLAUDE_BOTTLE_OAUTH_TOKEN:-}" ]; then
cat <<'EOF' >&2
demo: CLAUDE_BOTTLE_OAUTH_TOKEN is unset. The bottle launches claude,
which needs the token to authenticate. Set it in your shell env (e.g.
~/.zshrc) — see README §Auth — then re-run.
EOF
exit 0 ;;
esac
done
if ! command -v docker >/dev/null 2>&1; then
echo "docker not found on PATH — install Docker Desktop or equivalent first" >&2
exit 1
fi
if ! docker info >/dev/null 2>&1; then
echo "docker daemon not reachable — start Docker and re-run" >&2
exit 1
fi
bash scripts/demo-setup.sh
trap 'bash scripts/demo-teardown.sh' EXIT
# Pre-warm the image graph quietly so the recorded run shows only the
# four scenario blocks, not BuildKit progress. The backend rebuilds
# (cache-hit) on launch regardless; doing it once up front keeps the
# launch-time chatter short.
if [ "$verbose" = 0 ]; then
docker build -q -t claude-bottle:latest . >/dev/null 2>&1 || true
docker build -q -f Dockerfile.git-gate -t claude-bottle-git-gate:latest . >/dev/null 2>&1 || true
fi
if [ "$verbose" = 1 ]; then
exec python3 -u scripts/demo_harness.py
else
# Stderr carries backend info() lines and BuildKit chatter; drop it.
# The harness writes all scenario output (banners, results) to stdout.
exec python3 -u scripts/demo_harness.py 2>/dev/null
fi
./cli.py start demo
-248
View File
@@ -1,248 +0,0 @@
"""End-to-end demo: spin up one bottle and show pipelock + git-gate
in action across four scenarios. Mirrors the integration tests in
tests/integration/ but with paced output suitable for screen
recording. Run with `bash scripts/demo.sh` for a banner-wrapped
session; this file is also runnable directly as `python -u
scripts/demo_harness.py`.
The bottle declares one `git` upstream (unreachable on purpose: the
gitleaks pre-receive hook runs before the gate would try to forward
to the real upstream) and one `FAKE_TOKEN` env var whose value has
the shape of a GitHub PAT so pipelock's DLP layer recognizes it.
All four probes run inside the same bottle so the demo proves the
same agent that can reach `api.anthropic.com` is the one that gets
blocked from reaching `example.com` and from posting credentials
anywhere.
"""
from __future__ import annotations
import sys
import tempfile
import time
from pathlib import Path
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
from claude_bottle.backend import BottleSpec, get_bottle_backend # noqa: E402
from claude_bottle.manifest import Manifest # noqa: E402
FAKE_TOKEN = "ghp_aB3cD4eF5gH6iJ7kL8mN9oP0qR1sT2uV3wX4yZ"
FAKE_AWS_KEY = "AKIAQRJHK7N5ZPM2VXTL"
ALLOW_TARGET = "https://raw.githubusercontent.com/git/git/master/README.md"
BLOCK_HOST = "example.com"
DLP_TARGET_HOST = "api.anthropic.com"
def banner(n: int, total: int, title: str) -> None:
bar = "" * 60
print(f"\n\033[1;36m{bar}\033[0m")
print(f"\033[1;36mScenario {n}/{total}\033[0m \033[1m{title}\033[0m")
print(f"\033[1;36m{bar}\033[0m")
sys.stdout.flush()
def cmd(s: str) -> None:
print(f"\033[2m$ {s}\033[0m")
sys.stdout.flush()
def verdict(passed: bool, msg: str) -> None:
if passed:
print(f"\033[1;32m✓ {msg}\033[0m")
else:
print(f"\033[1;31m✗ {msg}\033[0m")
sys.stdout.flush()
def pause(seconds: float = 1.2) -> None:
time.sleep(seconds)
def scenario_allow(bottle) -> bool:
banner(1, 4, f"Allowlisted HTTPS GET → {ALLOW_TARGET.split('/')[2]}")
cmd(f'curl --proxy "$HTTPS_PROXY" {ALLOW_TARGET}')
script = (
"set -eu\n"
'curl --proxy "$HTTPS_PROXY" -s --max-time 10 '
"-w 'status=%{http_code} ' -o /tmp/body.txt "
f"{ALLOW_TARGET}\n"
'echo "len=$(wc -c < /tmp/body.txt)"\n'
)
result = bottle.exec(script)
print(result.stdout.rstrip())
ok = "status=200" in result.stdout and "len=0" not in result.stdout
verdict(ok, "forwarded — the proxy isn't just blocking everything")
return ok
_HTTP_PROBE = r"""
const http = require('http');
const proxy = new URL(process.env.HTTPS_PROXY);
const target = process.env.PROBE_TARGET; // absolute http URL
const body = process.env.PROBE_BODY || '';
const method = process.env.PROBE_METHOD || 'GET';
const url = new URL(target);
const opts = {
host: proxy.hostname, port: proxy.port, method,
path: target,
headers: { Host: url.host },
};
if (body) {
opts.headers['Content-Type'] = 'application/x-www-form-urlencoded';
opts.headers['Content-Length'] = Buffer.byteLength(body);
}
const req = http.request(opts, (res) => {
res.resume();
res.on('end', () => { console.log('status=' + res.statusCode); process.exit(0); });
});
req.on('error', (e) => { console.log('error=' + (e.code||'') + ' ' + e.message); process.exit(0); });
req.setTimeout(5000, () => { console.log('timeout'); req.destroy(); });
if (body) req.write(body);
req.end();
"""
def _node_probe(bottle, *, target: str, method: str = "GET", body: str = ""):
script = (
"set -e\n"
"cat > /tmp/probe.js <<'PROBE_EOF'\n"
f"{_HTTP_PROBE}\n"
"PROBE_EOF\n"
f"PROBE_TARGET={target!r} PROBE_METHOD={method!r} "
f"PROBE_BODY={body!r} node /tmp/probe.js\n"
)
return bottle.exec(script)
def scenario_block_host(bottle) -> bool:
banner(2, 4, f"Non-allowlisted host → GET http://{BLOCK_HOST}/")
cmd(f"node probe.js # GET http://{BLOCK_HOST}/")
result = _node_probe(bottle, target=f"http://{BLOCK_HOST}/")
print(result.stdout.rstrip())
ok = "status=200" not in result.stdout
verdict(ok, f"blocked at the host allowlist — DLP never had to run")
return ok
def scenario_block_dlp(bottle) -> bool:
banner(
3, 4,
f"Allowlisted host + secret in body → POST http://{DLP_TARGET_HOST}/dlp-probe",
)
cmd(f"node probe.js # POST token=ghp_… to http://{DLP_TARGET_HOST}/dlp-probe")
result = _node_probe(
bottle,
target=f"http://{DLP_TARGET_HOST}/dlp-probe",
method="POST",
body=f"token={FAKE_TOKEN}",
)
print(result.stdout.rstrip())
ok = "status=403" in result.stdout
verdict(
ok,
"blocked by pipelock DLP — host was allowed, body matched a credential pattern",
)
return ok
def scenario_block_git_push(bottle) -> bool:
banner(4, 4, "git push of a file containing AKIA… → git-gate")
cmd("echo AKIA… > leak.txt && git commit -m leak && git push")
push_script = (
"set -e\n"
"cd /tmp\n"
"rm -rf repo && git init -q -b main repo && cd repo\n"
"git config user.email demo@example.com\n"
"git config user.name demo\n"
f"echo '{FAKE_AWS_KEY}' > leak.txt\n"
"git add leak.txt\n"
"git commit -q -m 'oops: hardcoded key'\n"
# `~/.gitconfig`'s insteadOf rewrites this to git://<gate>/foo.git.
# Retry briefly because git-daemon takes a moment to bind after
# the gate container starts.
"for i in $(seq 1 15); do\n"
" out=$(git push ssh://git@upstream.invalid/path.git main 2>&1) && echo \"$out\" && exit 0\n"
" case \"$out\" in *'gitleaks'*|*'leaks found'*|*'rejected'*) echo \"$out\"; exit 1;; esac\n"
" sleep 1\n"
"done\n"
"echo TIMEOUT_WAITING_FOR_GATE; exit 2\n"
)
result = bottle.exec(push_script)
out = (result.stdout + result.stderr).rstrip()
# Only print the gitleaks-relevant tail; raw git stderr is noisy.
tail = "\n".join(out.splitlines()[-8:])
print(tail)
ok = result.returncode != 0 and (
"gitleaks rejected" in out or "leaks found" in out
)
verdict(
ok,
"rejected by git-gate pre-receive hook — upstream never saw the ref",
)
return ok
def main() -> int:
stage_dir = Path(tempfile.mkdtemp(prefix="cb-demo-stage."))
fake_key = stage_dir / "fake-key"
fake_key.write_text("not-a-real-key\n")
manifest = Manifest.from_json_obj({
"bottles": {
"demo": {
"env": {"FAKE_TOKEN": FAKE_TOKEN},
"git": [{
"Name": "foo",
"Upstream": "ssh://git@upstream.invalid/path.git",
"IdentityFile": str(fake_key),
"KnownHostKey": "ssh-ed25519 AAAAEXAMPLE",
}],
},
},
"agents": {
"demo": {"skills": [], "prompt": "", "bottle": "demo"},
},
})
print("\033[1mclaude-bottle demo: pipelock + git-gate, four probes, one bottle\033[0m")
print(
"\033[2mbuilding image graph, starting bottle and sidecars…\033[0m"
)
sys.stdout.flush()
backend = get_bottle_backend()
spec = BottleSpec(
manifest=manifest,
agent_name="demo",
copy_cwd=False,
user_cwd=str(stage_dir),
forward_oauth_token=False,
)
plan = backend.prepare(spec, stage_dir=stage_dir)
results: list[bool] = []
with backend.launch(plan) as bottle:
pause(0.6)
results.append(scenario_allow(bottle))
pause(2.0)
results.append(scenario_block_host(bottle))
pause(2.0)
results.append(scenario_block_dlp(bottle))
pause(2.0)
results.append(scenario_block_git_push(bottle))
pause(1.0)
bar = "" * 60
print(f"\n\033[1;36m{bar}\033[0m")
passed = sum(1 for r in results if r)
color = "32" if passed == len(results) else "31"
print(f"\033[1;{color}m{passed}/{len(results)} scenarios passed\033[0m")
print(f"\033[1;36m{bar}\033[0m")
return 0 if passed == len(results) else 1
if __name__ == "__main__":
sys.exit(main())