From c45f384fb85bacf285730a9ce98a1a0d94c43de0 Mon Sep 17 00:00:00 2001 From: didericis Date: Thu, 7 May 2026 22:45:36 -0400 Subject: [PATCH] Initial commit --- .claude/skills/init-entry/SKILL.md | 114 +++ .dockerignore | 10 + .gitignore | 6 + CLAUDE.md | 199 ++++ Dockerfile | 79 ++ README.md | 75 ++ cli.sh | 966 ++++++++++++++++++ docs/INDEX.md | 1 + docs/JOURNAL.md | 5 + docs/prds/.gitkeep | 0 docs/research/.gitkeep | 0 .../host-dispatch-to-container-agents.md | 58 ++ .../landscape-containerized-claude.md | 76 ++ .../local-vs-remote-agent-execution.md | 231 +++++ lib/docker.sh | 96 ++ lib/env.sh | 33 + lib/env_resolve.sh | 205 ++++ lib/log.sh | 24 + lib/manifest.sh | 243 +++++ lib/skills.sh | 101 ++ lib/ssh.sh | 205 ++++ 21 files changed, 2727 insertions(+) create mode 100644 .claude/skills/init-entry/SKILL.md create mode 100644 .dockerignore create mode 100644 .gitignore create mode 100644 CLAUDE.md create mode 100644 Dockerfile create mode 100644 README.md create mode 100755 cli.sh create mode 100644 docs/INDEX.md create mode 100644 docs/JOURNAL.md create mode 100644 docs/prds/.gitkeep create mode 100644 docs/research/.gitkeep create mode 100644 docs/research/host-dispatch-to-container-agents.md create mode 100644 docs/research/landscape-containerized-claude.md create mode 100644 docs/research/local-vs-remote-agent-execution.md create mode 100644 lib/docker.sh create mode 100644 lib/env.sh create mode 100644 lib/env_resolve.sh create mode 100644 lib/log.sh create mode 100644 lib/manifest.sh create mode 100644 lib/skills.sh create mode 100644 lib/ssh.sh diff --git a/.claude/skills/init-entry/SKILL.md b/.claude/skills/init-entry/SKILL.md new file mode 100644 index 0000000..4cc004a --- /dev/null +++ b/.claude/skills/init-entry/SKILL.md @@ -0,0 +1,114 @@ +--- +name: init-entry +description: Add a new timestamped entry to the project journal at ./docs/JOURNAL.md in the current working directory, creating the file (and `docs/`) if missing. Use when the user invokes /init-entry or asks to "log this in the journal", "add a journal entry", "record this", "write this down", or similar. Entries are stream-of-thought prose, not templates — newest on top, append-only. +--- + +# Add a journal entry + +Adds a new entry to `./docs/JOURNAL.md` in the current working directory. The journal is an append-only log of unstructured stream-of-thought entries, newest on top. Each entry is a timestamp heading followed by freeform prose — whatever was on the mind worth capturing. No title, no template, no required sections. Do NOT under ANY CIRCUMSTANCES delete old entries when appending to the top. + +## Format + +Each entry looks like: + +``` +## YYYY-MM-DD HH:MM + +[github](tag://github) [postgres](tag://postgres) + + +``` + +**Tag line rules:** + +- Tags appear on the line immediately under the timestamp heading (no blank line between header and tags), then a blank line before the body. +- Format is markdown links with the custom `tag://` URI scheme: `[name](tag://name)`. Multiple tags are space-separated on one line. +- The label and the URI value are usually the same word; differ only when there's a real reason (e.g. `[GitHub Actions](tag://github)`). +- **Tags are conditional.** Include them only when the entry has a coherent theme worth grouping across future entries. If nothing themes-up cleanly, omit the line entirely. Don't tag everything. + +## Step 1 — Get the entry text from the invoker + +The body of the entry must come from the invoker, verbatim. Your role is mechanical — timestamp, format, insert — not authorial. Do not draft, paraphrase, expand, summarize, or "improve" the prose. Do not draft from conversation context. Do not offer a draft for the invoker to react to. Do not propose what the entry "could be." + +Get a timestamp by shelling out: `date '+%Y-%m-%d %H:%M'`. Use that exact value as the entry heading. + +If the invoker passed content alongside the command (e.g. `/init-entry switched to postgres because sqlite locking was killing the worker pool`), use it as the body verbatim. If they passed nothing, ask what they want to capture and wait for their words. + +Show the entry as it will be written (their timestamp + their prose) before writing to disk, so they can confirm formatting and insertion. Don't edit the content unprompted. + +## Step 2 — Locate or create the journal + +From the current working directory: + +1. If `./docs/JOURNAL.md` exists → Step 3 (insert). +2. If `./docs/` exists but no `JOURNAL.md` → Step 4 (create). +3. If `./docs/` doesn't exist → create the directory, then Step 4. + +## Step 3 — Insert into existing journal + +Read `./docs/JOURNAL.md`. Find the insertion point: + +- Scan for the first line matching `^## ` (an existing entry heading). +- Insert the new entry **before** that line, separated by one blank line above and below. +- If no `^## ` line exists, append the entry to the end of the file with one blank line separating it from prior content. + +Never edit, reorder, or delete existing entries. The journal is append-only. + +## Step 4 — Create a new journal file + +Write `./docs/JOURNAL.md` with this skeleton followed by the new entry, so the format stays consistent regardless of which skill seeded the file: + +``` +# Journal + +Append-only stream of thought. Newest entries on top. Each entry is a timestamp +followed by freeform prose. Tag entries with `[name](tag://name)` links under +the header — only when a coherent theme emerges. Otherwise just write. + +## YYYY-MM-DD HH:MM + + +``` + +Unix line endings, no trailing whitespace, no emojis. + +## Step 5 — Commit and push + +After the entry is written, commit and push it so the journal stays in sync with the remote. This step is best-effort: report failures inline, never retry, and never unwind the on-disk write. + +1. If `git rev-parse --git-dir` fails (the cwd is not inside a git repo), skip this step entirely and note in the report that the entry was written but not committed. +2. Stage only the journal file: + ``` + git add docs/JOURNAL.md + ``` + Never use `git add -A` or `git add .` — unrelated working-tree changes are not the journal's concern and must not be swept into the commit. +3. Commit with a mechanical message derived from the timestamp (and tags, if any). Pass it via HEREDOC so quoting stays clean: + ``` + git commit -m "$(cat <<'EOF' + Journal: 2026-05-02 03:28 + EOF + )" + ``` + With tags, append them in parens after the timestamp: `Journal: 2026-05-02 03:28 (auth, postgres)`. The message must not paraphrase or summarize the entry body — the same no-synthesis rule that protects the body protects the commit log. +4. If the current branch has an upstream (`git rev-parse --abbrev-ref --symbolic-full-name @{u}` succeeds), run `git push`. If no upstream is configured or the push fails for any reason, report it and continue. Do not set an upstream, force-push, or otherwise paper over the failure. + +If any of steps 2–4 fail, surface the failure in the report but do not amend, reset, or modify the on-disk state. + +## Step 6 — Report + +One short line: path to the file, "created" or "updated", and the timestamp. Append commit sha (or "not committed: ") and push result (or "no upstream" / "push failed: "). Stop there. + +## Hard rules + +- **Human-authored body.** Entry prose comes from the invoker, not from you. Don't draft, paraphrase, expand, or summarize. If invoked without content, ask. The journal is a record of the human's thinking — your synthesis doesn't belong in it. +- **Append-only.** Never edit or delete existing entries. To revise a past thought, write a new entry that references the prior timestamp. +- **No title in the heading.** Just the timestamp. Resisting the urge to title each entry is the point — entries are stream of thought, not curated essays. +- **No template inside the body.** Don't write `**What changed:**`, `**Considered:**`, `**Ruled out:**` as sub-headings. Plain prose. +- **Tags are conditional and live directly under the header.** Use `[name](tag://name)` markdown link format. Omit the tag line when no coherent theme exists. Don't invent themes. +- **Today's timestamp only.** Don't backdate. +- **No emojis.** +- **Don't volunteer to add anything else** (indexes, table of contents, README links, summaries). The journal stays text-only. +- **Mechanical commit message.** The commit message is derived from the timestamp and tags only. Never paraphrase the entry body into the subject line. +- **Scoped commit.** Only `docs/JOURNAL.md` is staged. Never sweep in unrelated working-tree changes, never amend a previous commit, never bypass hooks. diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..46d3b10 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,10 @@ +# Keep the Docker build context lean. The Dockerfile doesn't COPY anything +# from the repo today, so this is mostly defensive — but it also means +# `docker build` doesn't ship gigabytes-of-history (.git) to the daemon as +# the repo grows. +.git +.gitignore +.dockerignore +.claude +docs +*.md diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6f3c868 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +.DS_Store +Thumbs.db + +# mcp-server (TypeScript) +mcp-server/node_modules/ +mcp-server/dist/ diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..5c65203 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,199 @@ +# claude-bottle + +## What this is + +claude-bottle spins up an isolated container for running Claude Code with a +curated set of skills and env vars. The point is to run Claude with broad +permissions inside a sandbox, so a misbehaving agent cannot reach the host. +Bash scripts orchestrate the container lifecycle and the copying of skills +and env vars into it. + +## Goals + +- Minimize risk of running claude with full permissions +- Allow me to easily spin up agent tasks in parallel +- Create isolated, well defined, easily updated, shareable agents + +## Non-goals + +- Communicating between agents directly +- Self hosted VMs (v1 uses local Docker containers, not VMs) +- Advanced agent auditing (lean on git history for auditing) + +## Repository layout + +- `README.md` — short public-facing description. +- `CLAUDE.md` — this file, orientation for future Claude sessions. +- `.gitignore` — OS junk. +- `claude-bottle.json` — manifest of named agents (env / skills / prompt + per agent), consumed by `cli.sh`. See "Manifest" under + "Intended design". +- `docs/INDEX.md` — pointer to the journal and research notes. +- `docs/JOURNAL.md` — append-only log of decisions and state changes. +- `docs/prds/` — product requirement docs. +- `docs/research/` — research notes (empty for now, kept tracked via `.gitkeep`). +- `.claude/skills/init-entry/` — project-local Claude Code skill providing `/init-entry` for adding journal entries. Snapshotted from `~/.claude/skills/init-entry/` at scaffold time; refresh deliberately if it drifts. + +The container launcher scripts (`Dockerfile`, `cli.sh`, +`lib/*.sh`) landed in PRD 0001 and were +extended in PRD 0002 with `lib/manifest.sh`, +`lib/env_resolve.sh`, and `lib/skills.sh`. Note: any +future repo-root `skills//` directory (skills sent into the +container) is a distinct concept from `.claude/skills//` (Claude +Code skills used while working in this repo) — don't conflate them. + +## Conventions + +- Text-driven content. `docs/JOURNAL.md` is an append-only stream of thought, + newest first. Entries are timestamps followed by freeform prose — no + templates, no required sections. Add entries with `/init-entry`. +- Product requirement docs live in `docs/prds/`. +- Research notes live in `docs/research/`. +- Low dependencies by default. The project is bash-first; ask before adding new + tools, runtimes, or package managers. + +## Intended design + +PRD 0002 lands the manifest-driven agent flow described below. The +`defaults/` directory and the repo-side `skills/` snapshot/diff loop +sketched at scaffold time are deferred — see "Deferred from the +scaffold sketch" at the end of this section. + +### Manifest + +Per-agent configuration lives in `claude-bottle.json` under an `"agents"` key. +`cli.sh` looks for this file in two locations and merges them: + +1. **Current working directory** (`$PWD/claude-bottle.json`) — project-local agents. +2. **Home directory** (`$HOME/claude-bottle.json`) — personal global agents. + +If both exist, the two `agents` objects are merged (home is the base, cwd +entries win on a same-agent-name conflict). +If neither file exists, `cli.sh` dies with a clear message. + +Each agent has three attributes: + +- `env` — hash of env vars. Each value is a JSON string whose mode + is selected by sentinel prefix: + - `"?"` — value is prompted at runtime from `/dev/tty` + (silent), exported into the launcher process, and forwarded to + the container via `docker run -e NAME` (no `=value`). Never + written to disk, never on argv. The launcher always asks, even + if a same-named var is already in the parent shell. `` + is rendered verbatim as the prompt body; the launcher appends + ` (input hidden): `. Bare `"?"` is allowed and falls back to a + default `claude-bottle: secret value for NAME` prompt. + - `"${HOST_VAR}"` — exact `${IDENT}` form, where `IDENT` matches + `[A-Za-z_][A-Za-z0-9_]*`. Value is read from `$HOST_VAR` in the + host process env at launch time. Treated the same as a secret on + the wire: copied into this process under the target name, + forwarded as `-e NAME` (no `=value`), never written to disk. + - any other string — literal value, hardcoded in the manifest. + Written to a mode-600 env-file under `mktemp -d` and passed to + docker via `--env-file`. Newlines are rejected up front because + docker `--env-file` cannot represent them. A literal whose text + starts with `?` or matches `${IDENT}` is not representable in + v1 — pick a different value or revisit the convention. +- `skills` — list of skill names. Each is `docker cp`'d from + `~/.claude/skills//` into the running container's + `~/.claude/skills//`, preserving per-skill directory structure + (no flattening, no archives). If a referenced skill is missing on + the host, `cli.sh` fails with a clear message naming the skill + and the path checked. The host→repo fallback and host↔repo diff + prompt described in the original sketch are deferred. +- `prompt` — string prepended to the chat when the container session + boots. Delivered by writing the string to a file inside the + container via `docker cp` (so the prompt content does not land on + `docker exec` argv) and passing it to + `claude --append-system-prompt-file `. Note: as of the + claude-code version pinned in the Dockerfile, this flag is real but + is not surfaced in the alphabetized `claude --help` output (only + mentioned obliquely under `--bare`); a future rename or removal will + break the launcher with a clear error from claude itself. Bare + `start` (no ``) is intentionally not supported — `` + remains required. +- `ssh` — optional array of SSH host entries. Each entry is an object + with five required keys: + - `Host` — the `Host` alias written to `~/.ssh/config` in the + container (also the name you use as the ssh destination). + - `IdentityFile` — absolute path to the private key file on the host + (leading `~` is expanded). At launch the key is `docker cp`'d into + `/root/.claude-bottle-keys/` (mode 700, root-owned), loaded into a + root-owned `ssh-agent` listening on `/run/claude-bottle-agent.sock`, + and the key file is then deleted. The agent socket is `chmod 666` + so the `node` user can connect; the agent protocol only exposes + signing operations, never the key bytes. Keys must be + passphrase-less (no TTY for `ssh-add` to prompt against). + - `Hostname` — the actual hostname or IP for `HostName`. + - `User` — the SSH username for `User`. + - `Port` — the SSH port number for `Port`. + - `KnownHostKey` — (optional) the host's public key, written to + `~/.ssh/known_hosts` under both the `Host` alias and the + `Hostname` (so the lookup succeeds whether the connection uses + the alias or the raw IP/host, e.g. a git remote URL with the + bare IP). Eliminates the interactive host-verification prompt on + first connect. + + Per-Host blocks in `~/.ssh/config` use `IdentityAgent + /run/claude-bottle-agent-public.sock` rather than `IdentityFile`, so SSH + always reaches the agent regardless of `SSH_AUTH_SOCK`. The public + socket is served by a root-owned `socat` forwarder, not by the agent + itself: OpenSSH's `ssh-agent` enforces a `SO_PEERCRED`-based UID-match + check on every connection (only accepts peers with euid 0 or matching + the agent's own uid), so non-root callers like `node` are rejected + even when the socket is mode 666. `socat` runs as root, accepts node's + connections on the public socket, and proxies to the real agent socket + at `/run/claude-bottle-agent.sock`; from the agent's perspective the peer + is uid 0 and passes the check. + + Why an in-container agent (not a bind-mounted host agent): Docker + Desktop on macOS does not forward Unix-domain socket `connect()` + across the macOS↔Linux VM boundary (returns `ENOTSUP`). Running the + agent inside the container sidesteps that while preserving the + isolation property we want (node can use the key for SSH but cannot + read the bytes — root-owned agent and forwarder, no `CAP_SYS_PTRACE`). + + `cli.sh start` validates that every key file exists on the host + before the y/N prompt, then after the container is running it spawns + the in-container `ssh-agent`, loads the keys, deletes the key files, + and writes `~/.ssh/config` (mode 600) with one `Host` block per + entry. + +Agent keys (the top-level keys of `claude-bottle.json`) should already be +slug-friendly (lowercase, alphanumeric + hyphens). The container name +is `claude-bottle-`, with a numeric suffix appended on conflict — +so two parallel starts of the same agent get distinct containers +(`claude-bottle-journal`, `claude-bottle-journal-2`, ...) instead of the +second failing. Two distinct agent keys that slug to the same value +(e.g. `"Review PR"` and `"review-pr"`) will both work but become hard +to tell apart in `docker ps`; pick keys that are already slugs to +avoid that ambiguity. `CLAUDE_BOTTLE_CONTAINER` still pins an exact name +and keeps the strict-conflict failure if it's already taken. + +### Confirmation + +Before launching a container, `cli.sh` shows the resolved plan and +waits for a single `y/N`: + +- agent name, image, container name +- env var names (never values; secrets are also never identified + separately, since the name itself plus the manifest is the source + of truth) +- skill names being sent +- prompt length and first line only + +Pass `--dry-run` (or set `CLAUDE_BOTTLE_DRY_RUN=1`) to print the plan +and exit before any `docker run` / `docker cp` / `docker exec`. + +### Deferred from the scaffold sketch + +The pre-PRD-0002 sketch described a repo-root `skills//` +snapshot, a `defaults/secrets.json` + `defaults/config.env`, and a +host↔repo skill diff loop. None of that is implemented; v1 reads the +manifest, prompts secrets, forwards literals via `--env-file`, and +copies host skills directly into the container. Reopen the question +in the journal if and when the snapshot story matters. + +## When you're unsure + +Ask. Default to drafting in chat over editing files when the request is ambiguous. diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..944a2fc --- /dev/null +++ b/Dockerfile @@ -0,0 +1,79 @@ +# claude-bottle container image. +# +# Goal: a small, cache-friendly base that ships claude-code (the +# `@anthropic-ai/claude-code` npm package, CLI name `claude`) ready to run +# interactively. The container is ephemeral; per PRD 0001 v1 the host +# filesystem is not mounted in. +# +# Layer ordering is deliberate: the npm install lives in its own layer so +# changes to the rest of the repo (or to the CMD) don't bust it. + +# Current Node LTS; slim variant keeps the image small while still +# providing apt-get for any future additions. +FROM node:22-slim + +# Install runtime system deps. claude-code shells out to git for several +# features (status checks, commits, PR creation) — without git in the +# image, those features fail in surprising ways once the user does any +# real work. ca-certificates is already in the slim base; listed for +# clarity in case the base ever drops it. socat is the privileged +# forwarder for the in-container ssh-agent (see lib/ssh.sh): the agent +# runs as root and rejects non-root connections, so socat sits between +# node and the agent socket. +RUN apt-get update \ + && apt-get install -y --no-install-recommends git ca-certificates openssh-client socat \ + && rm -rf /var/lib/apt/lists/* + +# Install claude-code globally. Pinned to the version verified in the v1 +# build (`claude --version` returns 2.1.126). Bump deliberately when +# rolling forward; an unpinned install would mean rebuilds silently pick +# up new behavior. +RUN npm install -g --no-fund --no-audit @anthropic-ai/claude-code@2.1.126 \ + && npm cache clean --force + +# Run as a non-root user. The node image already provides a `node` user +# (uid 1000) with a home directory, which is where claude-code will write +# its session state. +USER node +WORKDIR /home/node + +# Pre-create the skills directory so PRD 0002's host->container skill +# copier (scripts/lib/skills.sh) drops files into a path owned by the +# `node` user. `skills_copy_into` also `mkdir -p`s defensively, but +# baking it into the image avoids a permission-confusion footgun if a +# future change to the launcher copies in as a different user. +RUN mkdir -p /home/node/.claude/skills + +# Pre-populate ~/.claude.json so claude skips the first-run onboarding +# screens on every fresh container launch: the theme picker +# (`hasCompletedOnboarding`), the "trust this folder" dialog +# (`projects..hasTrustDialogAccepted`), the implicit theme +# default, and the bypass-permissions-mode warning that fires the +# first time `--dangerously-skip-permissions` is used in a profile +# (`bypassPermissionsModeAccepted`). Without these, an ephemeral +# container shows all four on every start. The fifth screen — +# "Detected a custom API key in your environment" — only fires on the +# ANTHROPIC_API_KEY fallback path; under the primary OAuth-token flow +# (CLAUDE_CODE_OAUTH_TOKEN) it does not appear. When it does fire it +# is handled at launch time by scripts/lib/auth.sh, which computes +# the key suffix inside the container so the value never crosses +# host argv. +# +# Heredoc delimiter is unquoted so $HOME expands; no other `$` appears +# in the body, so this is safe under dash (Docker's default RUN shell). +RUN cat > "$HOME/.claude.json" <`), many bottles run in parallel, and each +one's powers are scoped to what the manifest grants it: a curated set +of skills, env vars, and a starting prompt. When the session ends the +bottle is destroyed and the genie does not persist. + +## Goals + +- Minimize risk of running claude with full permissions +- Allow me to easily spin up agent tasks in parallel +- Create isolated, well defined, easily updated, shareable agents + +## Non-goals + +- Communicating between agents directly +- Self hosted VMs (v1 uses local Docker containers, not VMs) +- Advanced agent auditing (lean on git history for auditing) + +## Quickstart + +Requires Docker on the host and a long-lived Claude Code OAuth token in +your shell env. + +```sh +./cli.sh start # builds the image on first run, drops you into claude +``` + +The container is removed automatically when the session ends. If the script +is killed with SIGKILL the exit trap won't fire and the container may be +left running; remove it with `docker rm -f `. + +## Auth: OAuth token, not API key + +claude-bottle authenticates `claude` inside the container with the same +Pro/Max subscription you already use on the host, via a long-lived OAuth +token. No `ANTHROPIC_API_KEY` is needed. + +**Why a token instead of mounting `~/.claude.json`:** on macOS, Claude +Code stores OAuth credentials in the encrypted Keychain, not in +`~/.claude.json`. Mounting that file into a Linux container does not +carry the credentials with it. Linux hosts keep credentials in +`~/.claude/.credentials.json`, but to keep the launcher portable +claude-bottle uses the env-var path on every host. + +**One-time setup on the host:** + +```sh +claude setup-token # browser login, prints a ~1-year OAuth token +``` + +Stash the token in your shell env (e.g. `~/.zshrc` or a secret manager) +as `CLAUDE_BOTTLE_OAUTH_TOKEN`: + +```sh +export CLAUDE_BOTTLE_OAUTH_TOKEN="" +``` + +`cli.sh` automatically forwards it to every container as +`CLAUDE_CODE_OAUTH_TOKEN` via `docker run -e` — no manifest wiring +required, and the value is never written to disk or placed on argv. + +Inside the container, `claude` picks up `CLAUDE_CODE_OAUTH_TOKEN` and +authenticates against your subscription. Caveats: the token is bound +to your subscription tier (Pro/Max/Team/Enterprise), it does not work +with `claude --bare` (which only reads `ANTHROPIC_API_KEY`), and if it +leaks, regenerate via `claude setup-token` again. Reference: +. diff --git a/cli.sh b/cli.sh new file mode 100755 index 0000000..399a3f3 --- /dev/null +++ b/cli.sh @@ -0,0 +1,966 @@ +#!/usr/bin/env bash +# cli.sh — manage claude-bottle containers. +# +# usage: cli.sh [args...] +# +# Commands: +# build build (or rebuild) the claude-bottle Docker image. +# cleanup stop and remove all active claude-bottle containers. +# info print env, skills, and prompt details for a named agent. +# list list available agents or active containers. +# start boot a sandboxed container for a named agent and attach an +# interactive claude-code session. The container is torn down +# when the session ends. + +set -euo pipefail + +# Capture the user's cwd before anything else touches it. +USER_CWD="${PWD}" + +SCRIPT_DIR="$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)" +REPO_DIR="$SCRIPT_DIR" + +# shellcheck source=lib/log.sh +. "${SCRIPT_DIR}/lib/log.sh" +# shellcheck source=lib/docker.sh +. "${SCRIPT_DIR}/lib/docker.sh" +# shellcheck source=lib/env.sh +. "${SCRIPT_DIR}/lib/env.sh" +# shellcheck source=lib/manifest.sh +. "${SCRIPT_DIR}/lib/manifest.sh" +# shellcheck source=lib/env_resolve.sh +. "${SCRIPT_DIR}/lib/env_resolve.sh" +# shellcheck source=lib/skills.sh +. "${SCRIPT_DIR}/lib/skills.sh" +# shellcheck source=lib/ssh.sh +. "${SCRIPT_DIR}/lib/ssh.sh" + +usage() { + printf 'usage: %s [args...]\n' "$(basename "$0")" >&2 + printf '\n' >&2 + printf 'Commands:\n' >&2 + printf ' build build (or rebuild) the claude-bottle Docker image\n' >&2 + printf ' cleanup stop and remove all active claude-bottle containers\n' >&2 + printf ' edit open an agent in vim for editing\n' >&2 + printf ' info print env, skills, and prompt details for a named agent\n' >&2 + printf ' init interactively create a new agent and add it to claude-bottle.json\n' >&2 + printf ' list list available agents or active containers\n' >&2 + printf ' start boot a container for a named agent and attach an interactive session\n' >&2 + printf '\n' >&2 + printf "Run '%s --help' for command-specific usage.\n" "$(basename "$0")" >&2 +} + +cmd_build() { + require_docker + build_image "${CLAUDE_BOTTLE_IMAGE:-claude-bottle:latest}" "$REPO_DIR" +} + +cmd_info() { + usage_info() { + printf 'usage: %s info \n' "$(basename "$0")" >&2 + printf ' must be defined in claude-bottle.json at the repo root.\n' >&2 + } + + if [ "$#" -lt 1 ]; then + usage_info + exit 2 + fi + + case "$1" in + -h|--help) usage_info; exit 0 ;; + -*) usage_info; die "unknown flag: $1" ;; + esac + + local NAME="$1" + require_jq + local MANIFEST_FILE + MANIFEST_FILE="$(mktemp -t claude-bottle-manifest.XXXXXX.json)" + trap 'rm -f "${MANIFEST_FILE:-}"' EXIT + manifest_resolve "$USER_CWD" > "$MANIFEST_FILE" + manifest_require_agent "$MANIFEST_FILE" "$NAME" + + local env_names="" _en + while IFS= read -r _en; do + [ -z "$_en" ] && continue + env_names="${env_names:+${env_names}, }${_en}" + done < <(manifest_env_names "$MANIFEST_FILE" "$NAME") + + local skill_names=() _sk + while IFS= read -r _sk; do + [ -z "$_sk" ] && continue + skill_names+=("$_sk") + done < <(manifest_skills "$MANIFEST_FILE" "$NAME") + + local prompt_content prompt_len prompt_first_line + prompt_content="$(manifest_prompt "$MANIFEST_FILE" "$NAME")" + prompt_len="${#prompt_content}" + prompt_first_line="$(printf '%s' "$prompt_content" | awk 'NR==1{print; exit}')" + + local box_name + box_name="$(manifest_agent_box "$MANIFEST_FILE" "$NAME")" + + local ssh_entries=() _se + while IFS= read -r _se; do + [ -z "$_se" ] && continue + ssh_entries+=("$_se") + done < <(manifest_ssh "$MANIFEST_FILE" "$NAME") + + printf '\n' + info "agent : ${NAME}" + info "env (names only): ${env_names:-(none)}" + info "skills : ${skill_names[*]:-(none)}" + info "prompt : ${prompt_len} chars; first line: ${prompt_first_line:-(empty)}" + if [ -n "$box_name" ]; then + info "box : ${box_name}" + if [ "${#ssh_entries[@]}" -gt 0 ]; then + local _n _h _u _p _k _khk + for _se in "${ssh_entries[@]}"; do + _n="$(printf '%s' "$_se" | jq -r '.Host')" + _h="$(printf '%s' "$_se" | jq -r '.Hostname')" + _u="$(printf '%s' "$_se" | jq -r '.User')" + _p="$(printf '%s' "$_se" | jq -r '.Port')" + _k="$(printf '%s' "$_se" | jq -r '.IdentityFile')" + _khk="$(printf '%s' "$_se" | jq -r '.KnownHostKey // empty')" + info " ssh host : ${_n} (Hostname=${_h}, User=${_u}, Port=${_p}, IdentityFile=${_k})" + [ -n "$_khk" ] && info " KnownHostKey: ${_khk}" + done + else + info " ssh hosts : (none)" + fi + else + info "box : (none)" + fi + printf '\n' +} + +cmd_list() { + usage_list() { + printf 'usage: %s list \n' "$(basename "$0")" >&2 + printf ' available list agent names defined in claude-bottle.json\n' >&2 + printf ' active list running claude-bottle containers\n' >&2 + } + + if [ "$#" -lt 1 ]; then + usage_list + exit 2 + fi + + case "$1" in + available) + require_jq + local MANIFEST_FILE + MANIFEST_FILE="$(mktemp -t claude-bottle-manifest.XXXXXX.json)" + trap 'rm -f "${MANIFEST_FILE:-}"' EXIT + manifest_resolve "$USER_CWD" > "$MANIFEST_FILE" + jq -r '.agents | keys_unsorted[]' "$MANIFEST_FILE" + ;; + active) + require_docker + local containers + containers="$(docker ps --filter 'name=^claude-bottle-' --format '{{.Names}}{{"\t"}}{{.Status}}' 2>/dev/null || true)" + if [ -z "$containers" ]; then + info "no active claude-bottle containers" + return 0 + fi + printf '\n' + local name status + while IFS=$'\t' read -r name status; do + info "container: ${name} status: ${status}" + done <<< "$containers" + printf '\n' + ;; + -h|--help) usage_list; exit 0 ;; + *) usage_list; die "unknown argument: $1" ;; + esac +} + +cmd_cleanup() { + require_docker + local containers + containers="$(docker ps --filter 'name=^claude-bottle-' --format '{{.Names}}' 2>/dev/null || true)" + if [ -z "$containers" ]; then + info "no active claude-bottle containers" + return 0 + fi + printf '\n' >&2 + local name + while IFS= read -r name; do + info "found: ${name}" + done <<< "$containers" + printf '\n' >&2 + printf 'claude-bottle: remove all of the above? [y/N] ' >&2 + local REPLY + IFS= read -r REPLY /dev/null + done <<< "$containers" + info "done" +} + +# --------------------------------------------------------------------------- +# cmd_start — bring up an ephemeral claude-bottle container configured for a +# named agent from the repo-root claude-bottle.json manifest, and drop the +# user into an interactive claude-code session inside it. +# +# Lifecycle (per PRD 0001 "ephemeral" requirement): the container is +# removed automatically when the interactive session ends. We use +# `docker run --rm -d` plus a trap that forces removal on exit, so +# signals like Ctrl-C also clean up. +# +# ASSUMPTION: the container is started detached (`-d`) running `sleep +# infinity` so that skills and config can be copied in via `docker cp` +# before `docker exec` attaches the claude session. The container therefore +# stays alive in the background between launch and attach — the EXIT/INT/TERM +# trap is what guarantees teardown on normal exit. SIGKILL bypasses the +# trap; if this process is killed that way the container will be left +# running and must be removed manually with `docker rm -f `. +# +# Per-agent configuration (PRD 0002): +# - env vars in three modes (secret-prompted, literal, interpolated +# from the host process env). Resolved by lib/env_resolve.sh. +# * secret → prompted from /dev/tty, exported, forwarded via +# `docker run -e NAME` (no `=value`). +# * interpolated→ copied from a host var into this process under +# the target name, forwarded the same way as a +# secret (off argv, off disk). +# * literal → written to a mode-600 env-file under mktemp -d +# and forwarded with `--env-file `. +# - skills: host directories under ~/.claude/skills// are +# `docker cp`'d into the running container's +# ~/.claude/skills// by lib/skills.sh. +# - prompt: written to a host-side mode-600 file, then `docker cp`'d +# into the container (so the prompt content never lands on +# `docker exec` argv) and passed to +# `claude --append-system-prompt-file `. +# +# Confirmation: the resolved plan (skill names, env var names — never +# values, prompt length and first line) is shown before launch and +# gated on a single y/N. +# +# Dry-run: pass --dry-run (or set CLAUDE_BOTTLE_DRY_RUN=1) to print the +# resolved plan and exit BEFORE docker run / cp / exec. Used for +# verifying the manifest wiring without booting Claude. +# --------------------------------------------------------------------------- +cmd_start() { + usage_start() { + printf 'usage: %s start [--dry-run] [--cwd] \n' "$(basename "$0")" >&2 + printf ' must be defined in claude-bottle.json at the repo root.\n' >&2 + printf ' --cwd copy the current working directory into a derived image at\n' >&2 + printf ' /home/node/workspace and start claude there.\n' >&2 + } + + local DRY_RUN="${CLAUDE_BOTTLE_DRY_RUN:-0}" + local COPY_CWD=0 + local NAME="" + + while [ "$#" -gt 0 ]; do + case "$1" in + --dry-run) DRY_RUN=1; shift ;; + --cwd) COPY_CWD=1; shift ;; + -h|--help) usage_start; exit 0 ;; + --) shift; break ;; + -*) usage_start; die "unknown flag: $1" ;; + *) + if [ -z "$NAME" ]; then + NAME="$1" + else + usage_start; die "unexpected extra argument: $1" + fi + shift + ;; + esac + done + + if [ -z "${NAME:-}" ]; then + usage_start + exit 2 + fi + + local SLUG + SLUG="$(slugify "$NAME")" + + local IMAGE="${CLAUDE_BOTTLE_IMAGE:-claude-bottle:latest}" + # Default container name is claude-bottle-. If the user pinned a + # specific name via CLAUDE_BOTTLE_CONTAINER we honor it as-is below. + # Otherwise we auto-suffix on conflict so concurrent starts of the + # same agent get distinct containers (claude-bottle-journal, + # claude-bottle-journal-2, ...). Final resolution happens just below, + # after require_docker, since container_exists needs docker reachable. + local DEFAULT_CONTAINER="claude-bottle-${SLUG}" + local PINNED_CONTAINER="${CLAUDE_BOTTLE_CONTAINER:-}" + + # When --cwd is on, runtime image is a thin derived image FROM $IMAGE + # with the user's cwd COPY'd in. Tag it per-agent so the layer cache + # stays effective across repeated launches of the same agent. + local RUNTIME_IMAGE="$IMAGE" + local DERIVED_IMAGE="" + if [ "$COPY_CWD" = "1" ]; then + DERIVED_IMAGE="${CLAUDE_BOTTLE_DERIVED_IMAGE:-claude-bottle:cwd-${SLUG}}" + RUNTIME_IMAGE="$DERIVED_IMAGE" + fi + + require_docker + require_jq + + # Resolve the manifest (merge USER_CWD and HOME configs) into a temp file + # early so it is available for all subsequent manifest calls. + # Not declared local: the EXIT trap fires after cmd_start returns, so local + # variables would already be out of scope when cleanup_all runs. + MANIFEST_FILE="$(mktemp -t claude-bottle-manifest.XXXXXX.json)" + trap 'rm -f "${MANIFEST_FILE:-}"' EXIT + manifest_resolve "$USER_CWD" > "$MANIFEST_FILE" + + manifest_require_agent "$MANIFEST_FILE" "$NAME" + + # Not declared local: needed by cleanup_all after cmd_start returns (see MANIFEST_FILE note above). + CONTAINER="" + local _suffix=2 + if [ -n "$PINNED_CONTAINER" ]; then + CONTAINER="$PINNED_CONTAINER" + if container_exists "$CONTAINER"; then + die "container '${CONTAINER}' already exists (pinned via CLAUDE_BOTTLE_CONTAINER). Remove it with 'docker rm -f ${CONTAINER}' or unset the override." + fi + else + CONTAINER="$DEFAULT_CONTAINER" + while container_exists "$CONTAINER"; do + CONTAINER="${DEFAULT_CONTAINER}-${_suffix}" + _suffix=$((_suffix + 1)) + if [ "$_suffix" -gt 100 ]; then + die "could not find a free container name after ${DEFAULT_CONTAINER}-99; clean up old containers with 'docker rm -f '" + fi + done + fi + + # --- Plan resolution (host-only, no container yet) --- + + # Collect the env names (for display) and the skill names (for both + # display and validation). + local ENV_NAMES_LIST="" + local _en + while IFS= read -r _en; do + [ -z "$_en" ] && continue + if [ -z "$ENV_NAMES_LIST" ]; then + ENV_NAMES_LIST="$_en" + else + ENV_NAMES_LIST="${ENV_NAMES_LIST}, ${_en}" + fi + done < <(manifest_env_names "$MANIFEST_FILE" "$NAME") + + # CLAUDE_BOTTLE_OAUTH_TOKEN → CLAUDE_CODE_OAUTH_TOKEN forwarding. + # When the host has the token set, it is always forwarded regardless of the + # manifest so that every container can authenticate without wiring the token + # into each agent definition. + local FORWARD_OAUTH_TOKEN=0 + if [ -n "${CLAUDE_BOTTLE_OAUTH_TOKEN:-}" ]; then + FORWARD_OAUTH_TOKEN=1 + if [ -z "$ENV_NAMES_LIST" ]; then + ENV_NAMES_LIST="CLAUDE_CODE_OAUTH_TOKEN" + else + ENV_NAMES_LIST="${ENV_NAMES_LIST}, CLAUDE_CODE_OAUTH_TOKEN" + fi + fi + + # Skills as an array. + local SKILL_NAMES=() + local _sk + while IFS= read -r _sk; do + [ -z "$_sk" ] && continue + SKILL_NAMES+=("$_sk") + done < <(manifest_skills "$MANIFEST_FILE" "$NAME") + + # Validate every requested skill exists on the host BEFORE the y/N. + if [ "${#SKILL_NAMES[@]}" -gt 0 ]; then + skills_validate_all "${SKILL_NAMES[@]}" + fi + + # Resolve the box referenced by this agent and validate it exists. + # A box is required — agents without one are rejected before launch. + local BOX_NAME + BOX_NAME="$(manifest_agent_box "$MANIFEST_FILE" "$NAME")" + if [ -z "$BOX_NAME" ]; then + die "agent '${NAME}' has no 'box' field. Add a box association to this agent in claude-bottle.json." + fi + manifest_require_box "$MANIFEST_FILE" "$BOX_NAME" + + # SSH entries come from the agent's box (empty if no box set). + local SSH_ENTRIES=() + local _se + while IFS= read -r _se; do + [ -z "$_se" ] && continue + SSH_ENTRIES+=("$_se") + done < <(manifest_ssh "$MANIFEST_FILE" "$NAME") + + # Validate key files exist on the host BEFORE the y/N. + if [ "${#SSH_ENTRIES[@]}" -gt 0 ]; then + ssh_validate_entries "${SSH_ENTRIES[@]}" + fi + + # Stage env-file + args-file under a mktemp dir; clean up on exit. + # Not declared local: needed by cleanup_stage after cmd_start returns (see MANIFEST_FILE note above). + STAGE_DIR="$(mktemp -d -t claude-bottle-stage.XXXXXX)" + local ENV_FILE="${STAGE_DIR}/agent.env" + local ARGS_FILE="${STAGE_DIR}/docker-args" + local PROMPT_FILE="${STAGE_DIR}/prompt.txt" + : > "$ENV_FILE" + chmod 600 "$ENV_FILE" + : > "$ARGS_FILE" + : > "$PROMPT_FILE" + chmod 600 "$PROMPT_FILE" + + cleanup_stage() { + if [ -n "${STAGE_DIR:-}" ] && [ -d "$STAGE_DIR" ]; then + rm -rf "$STAGE_DIR" + fi + rm -f "${MANIFEST_FILE:-}" + } + trap cleanup_stage EXIT + + # Resolve env entries: prompts secrets (silent /dev/tty), copies + # interpolated host vars into this process, writes literal pairs to + # ENV_FILE. + env_resolve "$MANIFEST_FILE" "$NAME" "$ENV_FILE" "$ARGS_FILE" + + # Read the prompt and write it to PROMPT_FILE. Inside the container the + # prompt will be passed via `--append-system-prompt-file `, so + # the content does NOT land on `docker exec` argv even if it grows + # arbitrarily large. + local PROMPT_CONTENT + PROMPT_CONTENT="$(manifest_prompt "$MANIFEST_FILE" "$NAME")" + printf '%s' "$PROMPT_CONTENT" > "$PROMPT_FILE" + + local PROMPT_LEN="${#PROMPT_CONTENT}" + local PROMPT_FIRST_LINE + PROMPT_FIRST_LINE="$(printf '%s' "$PROMPT_CONTENT" | awk 'NR==1{print; exit}')" + + # --- Show plan + confirm --- + + printf '\n' >&2 + info "agent : ${NAME}" + info "image : ${IMAGE}" + if [ -n "$DERIVED_IMAGE" ]; then + info "cwd : ${USER_CWD} -> /home/node/workspace (derived: ${DERIVED_IMAGE})" + fi + info "container : ${CONTAINER}" + info "stage dir : ${STAGE_DIR}" + if [ -n "$ENV_NAMES_LIST" ]; then + info "env (names only): ${ENV_NAMES_LIST}" + else + info "env (names only): (none)" + fi + if [ "${#SKILL_NAMES[@]}" -gt 0 ]; then + info "skills : ${SKILL_NAMES[*]}" + else + info "skills : (none)" + fi + if [ -n "$BOX_NAME" ]; then + info "box : ${BOX_NAME}" + if [ "${#SSH_ENTRIES[@]}" -gt 0 ]; then + local _ssh_names="" _se + for _se in "${SSH_ENTRIES[@]}"; do + local _n + _n="$(printf '%s' "$_se" | jq -r '.Host')" + _ssh_names="${_ssh_names:+${_ssh_names}, }${_n}" + done + info " ssh hosts : ${_ssh_names}" + else + info " ssh hosts : (none)" + fi + else + info "box : (none)" + fi + info "prompt : ${PROMPT_LEN} chars; first line: ${PROMPT_FIRST_LINE:-(empty)}" + printf '\n' >&2 + + if [ "$DRY_RUN" = "1" ]; then + info "dry-run requested; not starting container." + exit 0 + fi + + printf 'claude-bottle: launch this agent? [y/N] ' >&2 + local REPLY + IFS= read -r REPLY /dev/null 2>&1 || true + fi + cleanup_stage + } + # Replaces the cleanup_stage EXIT trap above; cleanup_all calls cleanup_stage internally. + trap cleanup_all EXIT INT TERM + + # Assemble docker run argv: + # - --rm -d --name CONTAINER + # - --env-file ENV_FILE (only if it has any entries) + # - one `-e NAME` pair per line in ARGS_FILE (secret + interpolated) + # - IMAGE + # - sleep infinity (so we can `docker exec` an interactive session) + local DOCKER_ARGS=(--rm -d --name "$CONTAINER") + if [ -s "$ENV_FILE" ]; then + DOCKER_ARGS+=(--env-file "$ENV_FILE") + fi + # Read pairs of (-e, NAME) lines from ARGS_FILE. + local flag vname + while IFS= read -r flag; do + [ -z "$flag" ] && continue + IFS= read -r vname || break + DOCKER_ARGS+=("$flag" "$vname") + done <"$ARGS_FILE" + if [ "$FORWARD_OAUTH_TOKEN" = "1" ]; then + export CLAUDE_CODE_OAUTH_TOKEN="$CLAUDE_BOTTLE_OAUTH_TOKEN" + DOCKER_ARGS+=(-e CLAUDE_CODE_OAUTH_TOKEN) + fi + DOCKER_ARGS+=("$RUNTIME_IMAGE" sleep infinity) + + info "starting container ${CONTAINER} from ${RUNTIME_IMAGE}" + # The pre-check loop above is best-effort: two parallel starts can both + # observe the same bare name as free, so we also retry here when docker + # rejects the run with a name conflict. Pinned names skip the retry — + # user-chosen, user-owned. + local RUN_ERR_FILE="${STAGE_DIR}/docker-run.err" + local RUN_ERR_TEXT + while :; do + : > "$RUN_ERR_FILE" + if docker run "${DOCKER_ARGS[@]}" >/dev/null 2>"$RUN_ERR_FILE"; then + break + fi + RUN_ERR_TEXT="$(cat "$RUN_ERR_FILE")" + if [ -n "$PINNED_CONTAINER" ] || ! printf '%s' "$RUN_ERR_TEXT" | grep -q "is already in use"; then + printf '%s\n' "$RUN_ERR_TEXT" >&2 + die "docker run failed for container '${CONTAINER}'" + fi + if [ "$_suffix" -gt 100 ]; then + die "could not find a free container name after ${DEFAULT_CONTAINER}-99 retries; clean up old containers with 'docker rm -f '" + fi + CONTAINER="${DEFAULT_CONTAINER}-${_suffix}" + _suffix=$((_suffix + 1)) + DOCKER_ARGS[3]="$CONTAINER" + info "name conflict; retrying as ${CONTAINER}" + done + + # Copy prompt file into the container WITHOUT putting its contents on + # argv. `docker cp` reads the file from disk and streams it in. + local CONTAINER_PROMPT_PATH="${CLAUDE_BOTTLE_CONTAINER_HOME:-/home/node}/.claude-bottle-prompt.txt" + docker cp "$PROMPT_FILE" "${CONTAINER}:${CONTAINER_PROMPT_PATH}" >/dev/null + # `docker cp` preserves the host file's numeric UID, which on hosts where + # the user is not uid 1000 (e.g. macOS uid 501) leaves the in-container + # file unreadable by the `node` user. Re-own and re-mode as root inside + # the container so `node` can read its own mode-600 prompt regardless of + # host UID. + docker exec -u 0 "$CONTAINER" chown node:node "$CONTAINER_PROMPT_PATH" >/dev/null + docker exec -u 0 "$CONTAINER" chmod 600 "$CONTAINER_PROMPT_PATH" >/dev/null + + # Copy each requested skill. + if [ "${#SKILL_NAMES[@]}" -gt 0 ]; then + skills_copy_into "$CONTAINER" "${SKILL_NAMES[@]}" + fi + + # Set up SSH keys and config. + if [ "${#SSH_ENTRIES[@]}" -gt 0 ]; then + ssh_setup "$CONTAINER" "$STAGE_DIR" "${SSH_ENTRIES[@]}" + fi + + # When --cwd is on, ship the host repo's .git directory in via `docker cp` + # rather than the build-time COPY. Two reasons: (1) build-time COPY honors + # the host project's .dockerignore, which often excludes .git/ (e.g. the + # openemr fork) — without .git the agent inside has no branch, no remotes, + # and can't commit; (2) keeping .git out of the cached image layer avoids + # bloating the layer (a real repo's .git can be several GB) and avoids + # baking a stale snapshot of refs/index into the image. The cp at run + # time means the agent always sees the host's current refs. + if [ "$COPY_CWD" = "1" ] && [ -d "$USER_CWD/.git" ]; then + info "copying ${USER_CWD}/.git -> ${CONTAINER}:/home/node/workspace/.git" + docker cp "$USER_CWD/.git" "${CONTAINER}:/home/node/workspace/.git" >/dev/null + docker exec -u 0 "$CONTAINER" chown -R node:node /home/node/workspace/.git >/dev/null + fi + + info "attaching interactive claude session (Ctrl-D or 'exit' to leave; container will be removed)" + # --remote-control: enable Remote Control (hidden flag; see --remote-control-session-name-prefix + # in `claude --help` — the prefix flag is the only surfaced piece, the toggle itself is hidden, + # same pattern as --append-system-prompt-file). + # --dangerously-skip-permissions: bypass permission prompts. Safe here because the whole point of + # claude-bottle is sandboxing claude inside a container (see CLAUDE.md "What this is"). + local CLAUDE_ARGS=(--remote-control --dangerously-skip-permissions) + # `|| true` so a non-zero exit from the REPL doesn't skip the trap output. + if [ -n "$PROMPT_CONTENT" ]; then + docker exec -it "$CONTAINER" claude "${CLAUDE_ARGS[@]}" --append-system-prompt-file "$CONTAINER_PROMPT_PATH" || true + else + docker exec -it "$CONTAINER" claude "${CLAUDE_ARGS[@]}" || true + fi + + info "session ended; container ${CONTAINER} will be removed" +} + +# --------------------------------------------------------------------------- +# cmd_init — interactively populate a new agent and write it to either +# ~/claude-bottle.json (user) or ./claude-bottle.json (project). +# +# Prompts for: +# - agent name (required) +# - env vars: name + mode (secret / interpolated / literal) +# - skills (space-separated) +# - system prompt (multi-line, terminated by a lone ".") +# - SSH host entries (optional) +# +# Merges the new agent into the target file if it already exists +# (existing agents are preserved; name conflicts prompt for confirmation). +# --------------------------------------------------------------------------- +cmd_init() { + usage_init() { + printf 'usage: %s init \n' "$(basename "$0")" >&2 + printf ' user add the agent to ~/claude-bottle.json\n' >&2 + printf ' project add the agent to ./claude-bottle.json in the current directory\n' >&2 + } + + if [ "$#" -lt 1 ]; then + usage_init + exit 2 + fi + + local SCOPE TARGET_FILE + case "$1" in + -h|--help) usage_init; exit 0 ;; + user) SCOPE="user"; TARGET_FILE="${HOME}/claude-bottle.json" ;; + project) SCOPE="project"; TARGET_FILE="${USER_CWD}/claude-bottle.json" ;; + *) usage_init; die "expected 'user' or 'project', got: $1" ;; + esac + + require_jq + + printf '\n' >&2 + info "claude-bottle init — adding a new agent to ${TARGET_FILE}" + printf '\n' >&2 + + # --- Agent name --- + local AGENT_NAME="" + while [ -z "$AGENT_NAME" ]; do + printf 'Agent name: ' >&2 + IFS= read -r AGENT_NAME /dev/null 2>&1; then + printf 'claude-bottle: agent "%s" already exists in %s. Overwrite? [y/N] ' "$AGENT_NAME" "$TARGET_FILE" >&2 + local _ow + IFS= read -r _ow &2 + printf 'Skills (space or comma separated, or Enter for none): ' >&2 + local _skills_input="" + IFS= read -r _skills_input &2 + info "System prompt — enter text, then a lone '.' on its own line to finish (just '.' to leave empty):" + local PROMPT_CONTENT="" _pline _pfirst=1 + while :; do + IFS= read -r _pline &2 + printf 'Associate this agent with a box? [y/N] ' >&2 + local _box_yn="" + IFS= read -r _box_yn &2 + IFS= read -r BOX_NAME /dev/null 2>&1; then + _box_exists=1 + info "Box '${BOX_NAME}' already exists in ${TARGET_FILE}; agent will reference it." + else + info "Creating new box '${BOX_NAME}'." + + # --- Env vars (stored on the box) --- + printf '\n' >&2 + info "Env vars — enter each var name then its mode. Press Enter with no name to finish." + info " Modes: secret (prompt at runtime) | interpolated (read from host env) | literal (hardcoded value)" + while :; do + printf '\n Var name (or Enter to finish): ' >&2 + local _vname="" + IFS= read -r _vname &2 + local _vmode="" + IFS= read -r _vmode &2 + local _smsg="" + IFS= read -r _smsg &2 + local _hvar="" + IFS= read -r _hvar &2 + IFS= read -r _vval &2 + local _ssh_yn="" + IFS= read -r _ssh_yn &2 + local _shost="" + IFS= read -r _shost &2 + local _shostname="" + IFS= read -r _shostname &2 + local _suser="" + IFS= read -r _suser &2 + local _sport="" + IFS= read -r _sport &2 + local _sidentity="" + IFS= read -r _sidentity &2 + local _skhk="" + IFS= read -r _skhk &2 + + local TMP_FILE + TMP_FILE="$(mktemp -t claude-bottle-init.XXXXXX.json)" + + if [ -f "$TARGET_FILE" ]; then + if ! jq -e . "$TARGET_FILE" >/dev/null 2>&1; then + rm -f "$TMP_FILE" + die "${TARGET_FILE} exists but is not valid JSON; fix or remove it first" + fi + if ! printf '%s' "$NEW_ENTRY" | jq -s '{ + "boxes": ((.[0].boxes // {}) * (.[1].boxes // {})), + "agents": ((.[0].agents // {}) * (.[1].agents // {})) + }' "$TARGET_FILE" - > "$TMP_FILE"; then + rm -f "$TMP_FILE" + die "failed to merge agent into ${TARGET_FILE}" + fi + else + if ! printf '%s\n' "$NEW_ENTRY" > "$TMP_FILE"; then + rm -f "$TMP_FILE" + die "failed to write ${TARGET_FILE}" + fi + fi + + mv "$TMP_FILE" "$TARGET_FILE" + + info "Agent '${AGENT_NAME}' written to ${TARGET_FILE}." + info "Run '$(basename "$0") info ${AGENT_NAME}' to verify." + printf '\n' >&2 +} + +# --------------------------------------------------------------------------- +# cmd_edit — open an agent entry in vim at the line where its key appears. +# --------------------------------------------------------------------------- +cmd_edit() { + usage_edit() { + printf 'usage: %s edit \n' "$(basename "$0")" >&2 + printf ' user edit an agent in ~/claude-bottle.json\n' >&2 + printf ' project edit an agent in ./claude-bottle.json in the current directory\n' >&2 + printf ' name of the agent to jump to\n' >&2 + } + + if [ "$#" -lt 2 ]; then + usage_edit + exit 2 + fi + + local TARGET_FILE + case "$1" in + -h|--help) usage_edit; exit 0 ;; + user) TARGET_FILE="${HOME}/claude-bottle.json" ;; + project) TARGET_FILE="${USER_CWD}/claude-bottle.json" ;; + *) usage_edit; die "expected 'user' or 'project', got: $1" ;; + esac + + local NAME="$2" + + require_jq + + if [ ! -f "$TARGET_FILE" ]; then + die "${TARGET_FILE} does not exist" + fi + + if ! jq -e --arg n "$NAME" '.agents | has($n)' "$TARGET_FILE" >/dev/null 2>&1; then + die "agent '${NAME}' not found in ${TARGET_FILE}" + fi + + local LINE + LINE="$(grep -Fn "\"${NAME}\"" "$TARGET_FILE" | head -1 | cut -d: -f1)" + LINE="${LINE:-1}" + + exec vim +"${LINE}" "$TARGET_FILE" +} + +# --------------------------------------------------------------------------- +# Dispatch +# --------------------------------------------------------------------------- + +if [ "$#" -lt 1 ]; then + usage + exit 2 +fi + +COMMAND="$1" +shift + +case "$COMMAND" in + build) cmd_build ;; + cleanup) cmd_cleanup ;; + edit) cmd_edit "$@" ;; + info) cmd_info "$@" ;; + init) cmd_init "$@" ;; + list) cmd_list "$@" ;; + start) cmd_start "$@" ;; + -h|--help) usage; exit 0 ;; + *) usage; die "unknown command: ${COMMAND}" ;; +esac diff --git a/docs/INDEX.md b/docs/INDEX.md new file mode 100644 index 0000000..26b3cc3 --- /dev/null +++ b/docs/INDEX.md @@ -0,0 +1 @@ +Decisions and state changes are logged in `JOURNAL.md`. Research notes live in `research/`. diff --git a/docs/JOURNAL.md b/docs/JOURNAL.md new file mode 100644 index 0000000..36922fe --- /dev/null +++ b/docs/JOURNAL.md @@ -0,0 +1,5 @@ +# Journal + +Append-only stream of thought. Newest entries on top. Each entry is a timestamp +followed by freeform prose. Tag entries with `[name](tag://name)` links under +the header — only when a coherent theme emerges. Otherwise just write. diff --git a/docs/prds/.gitkeep b/docs/prds/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/docs/research/.gitkeep b/docs/research/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/docs/research/host-dispatch-to-container-agents.md b/docs/research/host-dispatch-to-container-agents.md new file mode 100644 index 0000000..8e47d54 --- /dev/null +++ b/docs/research/host-dispatch-to-container-agents.md @@ -0,0 +1,58 @@ +# Host Dispatch to Container Agents + +## Question + +Can host Claude decide which claude-bottle container to spin up for a task, while guaranteeing the work executes in the container and not on the host? + +## Claude Code Agent Mechanisms + +Claude Code provides two mechanisms for defining reusable agent behavior: + +**Skills** (`.claude/skills//SKILL.md`) run inline in the main conversation context. They're reusable workflows invoked via `/skill-name`, with optional tool pre-approval. + +**Subagents** (`.claude/agents/.md`) run in an isolated context window with a custom system prompt and a declared tool allowlist. They're invoked by natural language, `@agent-name`, or `claude --agent`. The `tools:` frontmatter is enforced — the subagent cannot call tools not in the list. (See [Claude Code subagents docs](https://code.claude.com/docs/en/sub-agents.md), "Choose the subagent scope" and "Write subagent files" sections.) + +"Isolated context window" means only conversational isolation (fresh LLM state, summarized output). It is not process, filesystem, or network isolation. Subagents still run on the host with full user permissions. + +## The Reliability Problem + +The previous approach used an MCP server to bridge host Claude and claude-bottle containers. It failed because host Claude had both work-capable tools (Edit, Write, Bash) and MCP dispatch tools. Claude could choose to do the work itself rather than dispatch, with no enforcement mechanism to prevent it. + +## Why Tool Restriction Solves It + +Claude Code's subagent `tools:` allowlist is architecturally enforced — not a prompt-level suggestion. If the host subagent is defined with only container-dispatch tools and no Edit/Write/Bash, it is incapable of doing implementation work. Dispatch becomes the only available path. + +## Reliable Dispatch Architecture + +Three pieces in combination give a 100% guarantee: + +1. **Restricted host subagent** — a `.claude/agents/claude-bottle-dispatch.md` with `tools:` limited to MCP container tools and git-read operations. No Edit, Write, or arbitrary Bash. + +2. **MCP server** — exposes tools the restricted host can call: + - `list_agents()` — available agents from the manifest (host Claude decides which to use) + - `run_agent(agent_name, task)` — starts a container non-interactively, returns a job ID + - `get_status(job_id)` — check running/done + - `get_output(job_id)` — read results + +3. **Non-interactive container run mode** — `cli.sh run ""` passes the task to `claude --print` inside the container and captures output. Currently `cli.sh start` is interactive only; this mode does not yet exist. + +## Proposal + +Build host-dispatch-to-container in two deliverables: + +**Deliverable 1: Non-interactive run mode for claude-bottle** + +Extend `cli.sh` with a `run ` subcommand. Starts the container, writes the task prompt to a file inside it (same `docker cp` pattern used for `--append-system-prompt-file`), invokes `claude --print` with the prompt, streams stdout back to the host, and exits when Claude finishes. Results committed and pushed from inside the container as usual. + +**Deliverable 2: MCP server wrapping claude-bottle** + +A minimal MCP server (bash or node) exposing `list_agents`, `run_agent`, `get_status`, `get_output`. Registered in the host Claude Code settings so a restricted dispatch subagent can call it. + +The combination enforces the container boundary at the tool layer, not the prompt layer — making it structurally impossible for host Claude to do implementation work itself. + +**Critical:** the tool restriction only applies within the dispatch agent's context. A normal Claude session has its full toolset and may never invoke the dispatch agent regardless of its description. The dispatch agent must be the *entry point* for the session, not an optional subagent a full-tool host might call. Two ways to enforce this: + +- Launch with `claude --agent claude-bottle-dispatch` — makes the dispatch agent the primary agent for the session. +- Set `agent: claude-bottle-dispatch` in the project `.claude/settings.json` — same effect automatically for any `claude` invocation in that directory. + +Without one of these, the guarantee does not hold. diff --git a/docs/research/landscape-containerized-claude.md b/docs/research/landscape-containerized-claude.md new file mode 100644 index 0000000..9c43ae6 --- /dev/null +++ b/docs/research/landscape-containerized-claude.md @@ -0,0 +1,76 @@ +# Landscape: containerized Claude Code agent tools + +Research into whether claude-bottle is redundant with existing projects, and +whether it's worth publishing. + +## Summary + +The "Claude Code in Docker" space is active but not saturated. claude-bottle +occupies a distinct position: no surveyed project combines all five of its +defining features. Publishing is likely worthwhile, with the main risk being +claudebox expanding to absorb the same niche. + +## Closest competitor: claudebox + +[RchGrav/claudebox](https://github.com/RchGrav/claudebox) is the most +feature-complete analog. It runs Claude Code in Docker with per-project +isolated images, 15+ pre-configured dev-language profiles, and per-project +network firewall allowlists. Actively maintained with multiple forks. + +What it lacks: manifest-driven named agents, per-agent env resolution modes +(prompt / host-forward / literal), skill directory injection, per-agent system +prompts, SSH-agent forwarding without copying private keys, home+project +manifest merge. + +## Other surveyed projects + +- **textcortex/claude-code-sandbox → spritz** — evolved toward + Kubernetes-native multi-agent infra; not bash-first or local-Docker. + Original sandbox repo is archived. +- **trailofbits/claude-code-devcontainer** — devcontainer config for security + audits; not a general agent launcher. +- **Several small solo repos** (arezi/claude-sandbox, nkrefman/claude-sandbox, + VishalJ99/claude-docker) — lightweight Docker wrappers with no multi-agent + config layer. +- **Docker's official sandbox templates** — launch-and-run Dockerfiles plus an + npm-based runtime; not a manifest-driven fleet manager. + +## Adjacent (different model) + +- **dagger/container-use** (mid-2025) — exposes an MCP server so the *agent* + spins up its own containers with Git worktrees. Inverted model vs. claude-bottle + (agent controls container rather than being launched into one by a manifest). + Still marked early-development. +- **E2B, Northflank, Cloudflare Sandbox SDK** — cloud-hosted SaaS sandbox + runtimes; fundamentally different architecture. + +## What no found project does + +None combine: +1. Named-agent JSON manifest with per-agent env resolution (prompt / host-forward / literal) +2. Claude Code skills directory injection +3. Per-agent system prompts +4. SSH-agent key forwarding without copying private keys into the container +5. Home + project manifest merge + +## Publishing verdict + +Worth publishing. Differentiators that matter to the target audience (power +users running parallel Claude Code sessions with distinct personas/tooling): + +- The bash-first, low-dependency design — competitors are npm-based or + Kubernetes-native. +- Named agents with distinct skills and system prompts, not just language profiles. +- SSH forwarding without key copying. + +Main risk: claudebox adds manifest/agent config. The space is moving fast +enough that publishing sooner is better if establishing prior art matters. + +Discovery will be slow without active promotion; an Anthropic Discord post or +HN "Show HN" would do most of the work. + +## Caveats + +- GitHub search cannot surface private or very new repos comprehensively. +- Counts (stars, forks) were not confirmed for every project. +- Research conducted 2026-05-07; the space moves fast. diff --git a/docs/research/local-vs-remote-agent-execution.md b/docs/research/local-vs-remote-agent-execution.md new file mode 100644 index 0000000..e29ea22 --- /dev/null +++ b/docs/research/local-vs-remote-agent-execution.md @@ -0,0 +1,231 @@ +# Local vs. Remote Agent Execution: Security & Privacy Tradeoffs + +Research notes on when to run containerized Claude Code agents on a remote machine +outside the local network versus inside it, focusing on security and privacy concerns. +Relevant to a potential claude-bottle extension for remote agent execution. + +--- + +## The core mental model + +The topology decision isn't "local = safe, remote = dangerous." The real variables are: +**what can the agent reach if compromised**, **what's on the host if the container +escapes**, and **whether credentials are short-lived and scoped**. + +--- + +## Threat landscape by topology + +### Local (current claude-bottle model) + +- Container escape → developer laptop → `~/.ssh`, `~/.aws`, browser cookies, Keychain, everything +- Outbound: Docker containers have full internet access by default; no egress monitoring on most home networks +- Lateral movement: compromised container can reach the LAN — NAS, other machines, internal services +- Notable: CVE-2025-59536 (CVSS 8.7, Feb 2026) — a poisoned `.claude/settings.json` in a repo gives RCE when Claude Code opens it. `--dangerously-skip-permissions` removes the last gate. +- Supply chain: MCP servers, skills, and npm packages pulled during agent execution. ~20% of ClawHub skills were found malicious in early 2026. + +**What local topology protects:** +- No inbound attack surface — nothing listening on a public port +- Secrets stay physically on your hardware; no transit risk +- Network egress bounded by the host router + +### Remote machine + +- New inbound attack surface (SSH/API port must be open; CISA notes exploitation of remote access vulns within 9–13 days of disclosure) +- Secrets must travel from local to remote — each transit is a new exposure class +- If the VM has a cloud IAM role attached → blast radius includes cloud APIs (S3, RDS, IAM, etc.) +- Compromised remote host can read env vars injected into containers, intercept docker exec sessions, exfiltrate skills and prompts +- **Worst case:** a remote VM connected back to the LAN via VPN is the worst of both worlds — internet-facing attack surface + full LAN access. This pattern should never be built. +- Multiple agents sharing the same remote host creates cross-tenant bleed risk. + +**What remote topology can offer:** +- Better isolation from the developer laptop — a compromised container doesn't reach `~/.ssh` or local credentials unless explicitly forwarded +- Ephemeral compute — a cloud VM torn down after each session leaves no persistent attack surface +- Cloud-native network controls (Security Groups, VPC firewall, PrivateLink) can be more granular than home router rules +- VPC flow logs + CloudTrail give an audit trail that a home network doesn't + +--- + +## Data sensitivity — when "on-prem" matters + +Data that should not leave the local network absent extraordinary controls: + +| Data type | Why | +|---|---| +| Private SSH keys | If copied to remote, stored outside your control; lateral movement via key reuse | +| Secrets forwarded into containers (API tokens, OAuth tokens) | In-transit exposure; remote machine persistence risk | +| Source code under NDA or with unreleased IP | Contractual and competitive risk | +| PHI (HIPAA) | Requires BAA with every system that touches it; standard cloud VMs don't qualify | +| PII of EU residents (GDPR) | Cannot legally transit US infrastructure without SCCs | +| Internal API credentials for LAN systems | Sending to a remote agent that can reach back via VPN creates a remote-controlled pivot | + +Data that can go remote with proper controls: +- Public open-source code +- Non-sensitive project scaffolding +- Prompts that don't contain embedded secrets +- Derived outputs (build artifacts, test results) that don't contain source data + +--- + +## Blast radius comparison + +### Worst case: local container compromise + +1. Container escape via kernel exploit → developer laptop +2. From laptop: `~/.ssh/id_rsa`, `~/.aws/credentials`, browser session cookies, macOS Keychain, `~/.claude` +3. Lateral movement to LAN — internal services, NAS, other dev machines +4. Outbound: anything the home/office network allows + +### Worst case: remote container compromise + +1. Container escape → remote host +2. From remote host: host environment credentials, any attached IAM role, secrets in the host filesystem +3. If a VPN or SSH tunnel links remote machine to local network → full lateral movement back through that tunnel +4. Cloud API access if the VM has IAM permissions → S3, RDS, EC2, Secrets Manager, etc. + +**Remote blast radius is potentially larger than local if:** +- The remote VM has cloud IAM permissions broader than the local laptop environment +- The remote machine is connected back to the local network via VPN (creating a pivot) +- Multiple agents share the same remote host + +**Remote blast radius is smaller if:** +- The remote VM is strictly isolated (no VPN back to LAN) +- IAM role is locked to minimum necessary permissions +- Remote host is ephemeral and torn down after each session + +Key insight: once a container is compromised via prompt injection, the blast radius is dominated by what the agent *can reach*, not by where it physically runs. Studies have measured an 82.4% inter-agent compromise rate in multi-agent systems. + +--- + +## Credentials and secrets + +### Local topology (current claude-bottle) + +- Secrets live in the host environment or are prompted from `/dev/tty` +- Forwarded to containers via `-e NAME` (not `=value`), never on argv, never in env-files for secrets +- On container teardown, secrets are gone from that process space +- Risk: container escape to host reaches the host env where the parent process ran + +### Remote topology + +Secrets must travel from their source to the remote machine. Mechanisms in increasing security order: + +1. **SSH env forwarding (`AcceptEnv`/`SendEnv`)** — secrets in plaintext in the SSH session; logged by some SSH daemon configurations +2. **Encrypted orchestration channel** — secrets encrypted in transit, but remote machine must decrypt and expose them in memory +3. **Vault/cloud secrets manager + dynamic credentials** — remote machine fetches its own short-lived secrets; local machine never sends secrets at all (best option) + +An 8,640x reduction in abuse window comes from switching from 90-day keys to 15-minute tokens. Static long-lived tokens (`CLAUDE_CODE_OAUTH_TOKEN`, `GITLAB_TOKEN`) are the biggest risk in a remote topology. + +**Proxy-inject pattern:** agent makes unauthenticated requests; a proxy outside the container injects credentials per-request. The container never sees the raw token. Anthropic's secure deployment docs recommend this pattern. + +--- + +## Egress and exfiltration risk + +### Local topology + +- Monitoring: whatever the home/office router supports — usually minimal +- Containment: `--network none` + a proxy socket provides the strongest containment; claude-bottle does not currently do this +- DLP: essentially none unless specifically deployed on the LAN +- Domain fronting risk: even allowlisted-domain proxies can be bypassed via domain fronting — an agent that can reach `api.anthropic.com` could relay data to an attacker-controlled backend through that domain +- **claude-bottle today: containers have full outbound internet access. No egress restrictions.** + +### Remote topology (cloud VM) + +- VPC flow logs capture every connection attempt by IP/port — better than most home networks if configured +- Security Groups can allowlist specific endpoints; a NAT gateway can be locked down +- DLP tooling is more mature in cloud environments (AWS Macie, GCP DLP API, Cloudflare AI Gateway) +- But only if configured. A raw cloud VM with default settings has worse egress monitoring than a corporate network. + +Strongest exfiltration controls for either topology: +1. `--network none` in Docker + a unix socket proxy that enforces domain allowlists +2. TLS inspection at the proxy to defeat domain fronting +3. Audit log all outbound traffic at the proxy + +--- + +## Compliance + +### HIPAA + +- PHI must stay within HIPAA-covered infrastructure. Standard commercial cloud VMs require a signed BAA with the provider. AWS Bedrock, Azure OpenAI, and Google Cloud can be configured with BAA coverage; a generic EC2 cannot. +- If the agent processes any PHI, the remote machine must be in a HIPAA-qualified environment. +- "Minimum necessary" rule: an agent with broad filesystem access to a repo containing PHI violates this unless carefully scoped. + +### GDPR + +- EU personal data cannot legally be sent to US infrastructure without Standard Contractual Clauses or adequacy decisions. +- Local execution (if the developer is EU-based) sidesteps this. Remote execution on a US cloud VM requires SCCs. + +### SOC2 + +- No specific data residency mandate, but all infrastructure that touches in-scope data must implement the trust criteria. A remote VM needs audit logs, access controls, and continuous monitoring — more operational overhead than local execution. + +--- + +## Decision heuristics + +| Scenario | Recommendation | +|---|---| +| Solo developer, personal projects, no regulated data | Local is fine; add egress proxy for defense-in-depth | +| Regulated data (HIPAA, GDPR, SOC2) | Local unless the remote environment is fully qualified | +| Long-running automated tasks, no secrets in content | Remote VM acceptable with egress controls and ephemeral lifecycle | +| Parallel agent fleet | Remote VM cluster with per-agent IAM roles, strict egress, centralized secret management | +| Agent receives credentials that give network access to LAN | Keep local; never build a VPN-connected remote agent that can pivot back to the LAN | +| Source code with embedded secrets (`.env`, API keys in config) | Local only, or sanitize before sending to remote | +| Lack infrastructure maturity for proper remote config | Local — a poorly configured VM is strictly worse than a well-configured local container | + +--- + +## Concrete recommendations if extending claude-bottle for remote + +1. **Never build the VPN-pivot pattern.** A remote agent connected back to the LAN via VPN is the worst of both worlds. If a remote agent needs LAN resources, expose those through a narrow API, not a VPN. + +2. **Add `--network none` + a proxy socket, even locally first.** The single highest-leverage change available right now. Bounds egress to allowlisted domains, prevents arbitrary exfiltration regardless of topology. Anthropic's secure deployment docs show exactly how to do this. + +3. **Use dynamic, short-lived credentials for the remote context.** Replace static tokens with per-task dynamically issued credentials (Vault with approle auth, or cloud workload identity federation). This eliminates the "credential concentration on remote host" problem. + +4. **Proxy-inject credentials; don't send them to the container.** The proxy runs on the remote host (not in the container); the credential lives only in the proxy process. + +5. **Scope IAM/cloud permissions to the minimum.** No `*:*` policies. No admin roles on the VM. + +6. **Make the remote VM ephemeral.** Spin it up for the session, tear it down when done. No persistent credentials or data between sessions. + +7. **Enable VPC flow logs and forward them somewhere.** Going remote buys you egress monitoring you don't have locally — but only if configured. + +8. **Validate hooks before execution.** `init-work` / `init-free-agent` copies a project into the container. That project may contain poisoned `.claude/settings.json` hooks (CVE-2025-59536 vector). `--dangerously-skip-permissions` removes the last gate; consider validating hooks before execution. + +--- + +## Bottom line + +For the current claude-bottle use case (developer feature implementation, no regulated data, +single developer), local execution is the right default. The biggest unaddressed risk +right now isn't topology — it's that containers have unrestricted outbound internet access. +Adding `--network none` + a proxy socket would be higher-leverage than any topology change. + +Remote execution becomes worth the complexity for parallelism at scale, long-running +unattended tasks, or strict separation of agent compute from developer hardware — but +only with egress controls, ephemeral lifecycle, and dynamic credential management in place. + +--- + +## Sources + +- Penligent AI — AI Agents Hacking in 2026: Defending the New Execution Boundary +- Repello AI — The Agentic AI security threat landscape in 2026 +- arxiv 2601.17548 — Prompt Injection Attacks on Agentic Coding Assistants +- Unit42 (Palo Alto) — Navigating Security Tradeoffs of AI Agents +- Trend Micro — Unveiling AI Agent Vulnerabilities Part III: Data Exfiltration +- Help Net Security — 29 million leaked secrets in 2025: AI agents credentials are out of control +- GitGuardian — Short-Lived Credentials in Agentic Systems: A Practical Trade-off Guide +- Aembit — Securing AI Agents Without Static Credentials +- Anthropic Docs — Securely Deploying AI Agents +- Anthropic Engineering — Claude Code Sandboxing +- TrueFoundry — Claude Code Sandboxing: Network Isolation, File System Controls +- TrueFoundry — LLM Deployment in Regulated Industries: HIPAA, SOC2 & GDPR Playbook +- MindStudio — AI Agent Compliance: GDPR SOC 2 and Beyond +- OX Security — The Mother of All AI Supply Chains: Critical MCP Vulnerability +- The Hacker News — Anthropic MCP Design Vulnerability Enables RCE +- Straiker — Agent Hijacking: How Prompt Injection Leads to Full AI System Compromise +- Seraphic Security — Secure Remote Access Best Practices 2025 diff --git a/lib/docker.sh b/lib/docker.sh new file mode 100644 index 0000000..8873404 --- /dev/null +++ b/lib/docker.sh @@ -0,0 +1,96 @@ +#!/usr/bin/env bash +# Docker helpers. Build/inspect primitives shared by cli.sh +# (and reusable by future skill-sync / secret-injection scripts). +# Idempotent: safe to source multiple times. + +if [ -n "${CLAUDE_BOTTLE_LIB_DOCKER_SOURCED:-}" ]; then + return 0 +fi +CLAUDE_BOTTLE_LIB_DOCKER_SOURCED=1 + +_iso_lib_docker_dir="$(CDPATH= cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=./log.sh +. "${_iso_lib_docker_dir}/log.sh" + +# require_docker — fails with an install pointer if `docker` is not on PATH. +require_docker() { + if ! command -v docker >/dev/null 2>&1; then + info "Docker is required but was not found on PATH." + info "macOS: install Docker Desktop https://docs.docker.com/desktop/install/mac-install/" + info "Linux: install Docker Engine https://docs.docker.com/engine/install/" + die "docker not found" + fi +} + +# image_exists — returns 0 if the named local image exists, else 1. +image_exists() { + local ref="${1:?image_exists: missing image reference}" + docker image inspect "$ref" >/dev/null 2>&1 +} + +# container_exists — returns 0 if a container (running or stopped) +# with the given name exists, else 1. +container_exists() { + local name="${1:?container_exists: missing container name}" + # `docker ps -a -q -f name=^$` prints the container id if it exists. + local id + id="$(docker ps -a -q -f "name=^${name}$" 2>/dev/null || true)" + [ -n "$id" ] +} + +# slugify — prints a DNS-safe slug (lowercase, non-alnum runs → '-', +# trimmed) on stdout. Exits non-zero if the result is empty. +slugify() { + local input="${1:?slugify: missing name}" + local slug + slug="$(printf '%s' "$input" | tr '[:upper:]' '[:lower:]' | sed -E 's/[^a-z0-9]+/-/g; s/^-+//; s/-+$//')" + if [ -z "$slug" ]; then + die "name '${input}' produced an empty slug; use alphanumeric characters" + fi + printf '%s' "$slug" +} + +# build_image — invokes `docker build` every call. The +# layer cache makes no-change rebuilds cheap (typically <1s); always running +# the build means edits to the Dockerfile (or anything COPY'd in) take +# effect on the next cli.sh without the user having to manually `docker +# rmi` first. +build_image() { + local ref="${1:?build_image: missing image reference}" + local context="${2:?build_image: missing build context directory}" + + info "building image ${ref} from ${context} (layer cache keeps repeat builds fast)" + docker build -t "$ref" "$context" +} + +# build_image_with_cwd +# +# Builds a thin derived image that copies the contents of into +# /home/node/workspace (owned by node:node) and sets WORKDIR there, so +# the launched claude session starts inside the user's project. +# +# The Dockerfile is piped via stdin (`-f -`) so no file is written into +# — only the build context is read from there. Any .dockerignore +# already in is honored automatically by docker build. +# +# A trust-dialog entry for /home/node/workspace is added to +# ~/.claude.json during the build, because the baked-in entry in the +# base image only covers /home/node and claude's "trust this folder" +# prompt is keyed on cwd. +build_image_with_cwd() { + local derived="${1:?build_image_with_cwd: missing derived ref}" + local base="${2:?build_image_with_cwd: missing base ref}" + local cwd="${3:?build_image_with_cwd: missing cwd}" + + if [ ! -d "$cwd" ]; then + die "cwd not found at ${cwd}" + fi + + info "building image ${derived} from ${base} with ${cwd} -> /home/node/workspace" + docker build -t "$derived" -f - "$cwd" < — fails with a clear message if the named env var is +# unset or empty. Crucially does NOT print the value, the length, or any +# substring; only the variable name is echoed. +# +# Usage: +# require_env ANTHROPIC_API_KEY +require_env() { + local name="${1:-}" + if [ -z "$name" ]; then + die "require_env: missing variable name argument" + fi + + # Indirect expansion to read the named variable without naming it twice. + local value="${!name-}" + if [ -z "$value" ]; then + die "required env var ${name} is not set. Export it in your shell and re-run." + fi +} diff --git a/lib/env_resolve.sh b/lib/env_resolve.sh new file mode 100644 index 0000000..872a99b --- /dev/null +++ b/lib/env_resolve.sh @@ -0,0 +1,205 @@ +#!/usr/bin/env bash +# Env resolver. Walks the env entries for one agent in claude-bottle.json +# and produces: +# 1. The list of `docker run` arg fragments needed to forward each var. +# Both `secret` and `interpolated` entries become `-e NAME` (no +# `=value`) so Docker inherits the value from this process env +# without rendering it on argv or persisting it to disk. +# Only `literal` entries are written to a host-disk env-file and +# forwarded with `--env-file `. +# 2. The export side-effect of populating this process's env with +# secret values prompted from the user, and with interpolated +# values copied from the matching host var, so `-e NAME` actually +# has something to inherit. +# +# Each env entry is a JSON string. Mode is selected by sentinel prefix: +# "?" → secret (prompt at runtime). Bare "?" uses a default +# prompt; "?" uses as the prompt body. +# "${HOST_VAR}" → interpolated from $HOST_VAR in the host process env +# any other str → literal (the JSON string is the value verbatim) +# A literal whose text starts with "?" or matches "${IDENT}" is not +# representable in v1 — pick a different value or change the convention. +# +# Critical rules (re-read CLAUDE.md "Checking env vars safely"): +# - NEVER echo, log, or interpolate the value of a secret or +# interpolated env var. Both modes are treated as potentially +# sensitive: nothing about their value (other than presence / +# length) ever lands on disk, in a log line, or on argv. +# - The env-file written for literal values lives under `mktemp -d` +# with mode 600 and is removed on script exit by the caller's trap. +# Secrets and interpolated values never go to this file. +# - Errors mention only the variable NAME, never any portion of the value. +# +# Idempotent: safe to source multiple times. + +if [ -n "${CLAUDE_BOTTLE_LIB_ENV_RESOLVE_SOURCED:-}" ]; then + return 0 +fi +CLAUDE_BOTTLE_LIB_ENV_RESOLVE_SOURCED=1 + +_iso_lib_env_resolve_dir="$(CDPATH= cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=./log.sh +. "${_iso_lib_env_resolve_dir}/log.sh" +# shellcheck source=./manifest.sh +. "${_iso_lib_env_resolve_dir}/manifest.sh" + +# env_entry_kind — prints "secret", "interpolated", or +# "literal" based on the sentinel form of the entry. Never echoes the +# value of an interpolated entry — only its host-var NAME via the +# captured submatch. Secret-mode prompt text (everything after the +# leading "?") is extracted by env_entry_secret_prompt, not here. +env_entry_kind() { + local raw="${1-}" + case "$raw" in + \?*) + printf 'secret' + return 0 + ;; + esac + if [[ "$raw" =~ ^\$\{[A-Za-z_][A-Za-z0-9_]*\}$ ]]; then + printf 'interpolated' + return 0 + fi + printf 'literal' +} + +# env_entry_secret_prompt — for a secret entry (one whose +# raw value starts with "?"), prints the prompt body (everything after +# the leading "?"). Empty for a bare "?", which signals "use default +# prompt." Caller is responsible for falling back to a default. +env_entry_secret_prompt() { + local raw="${1-}" + printf '%s' "${raw#\?}" +} + +# env_entry_interpolated_from — for an interpolated entry, +# prints the host var name (the identifier between `${` and `}`). +env_entry_interpolated_from() { + local raw="${1-}" + local inner="${raw#\$\{}" + inner="${inner%\}}" + printf '%s' "$inner" +} + +# _read_secret_silent [] — prompt the user for a +# secret value on the tty without echoing the keystrokes. Stores the +# value in the global variable named by $1 via printf -v. Stdin +# redirection from /dev/tty so this still works under `<(...)` and +# other non-tty stdin situations. +# +# If is provided and non-empty, the prompt rendered to +# the tty is " (input hidden): "; otherwise it falls back +# to "claude-bottle: secret value for (input hidden): ". The "(input +# hidden): " tail is always appended by this function — manifest +# authors write the message text only. +# +# We never `echo "$VALUE"` or interpolate it elsewhere; the only consumer +# is `export "$NAME=$VALUE"` immediately below. +_read_secret_silent() { + local target="${1:?_read_secret_silent: missing target var name}" + local prompt_body="${2-}" + local value="" + # Use the controlling tty for both the prompt and the read so this is + # robust even if stdin is a pipe. + if [ ! -t 0 ] && [ ! -t 2 ]; then + die "cannot prompt for secret '${target}': no tty available. Run from an interactive shell." + fi + # `printf` to /dev/tty for the prompt, `read -s` from /dev/tty for the value. + if [ -n "$prompt_body" ]; then + printf '%s (input hidden): ' "$prompt_body" >/dev/tty + else + printf 'claude-bottle: secret value for %s (input hidden): ' "$target" >/dev/tty + fi + # IFS= read -rs to read one line, raw, silent. + IFS= read -rs value /dev/tty + if [ -z "$value" ]; then + die "empty value provided for secret '${target}'. Re-run and supply a value." + fi + # Indirect assignment — never expose value via expansion in a string we + # log or pass anywhere else. + printf -v "$target" '%s' "$value" + # Scrub our local copy. + value="" +} + +# env_resolve +# +# Iterates the agent's env entries. For each entry: +# - secret → ALWAYS prompt for the value (even if already set in +# this process env), export it into this process, and +# append `-e NAME` to (one arg per +# line; a NAME with no `=value`). +# - interpolated→ read the host process env value of the named host var; +# if unset, die with the host-var name. Copy into this +# process under the target name and append `-e NAME` to +# . Never written to disk. +# - literal → append `NAME=VALUE` to ; the resolver +# does NOT add anything to for this entry +# (the caller adds a single `--env-file ` +# if the file is non-empty). +# +# The caller is responsible for: +# - creating as an empty file with mode 600 under a +# mktemp dir, +# - creating as an empty file, +# - cleaning both up on exit (trap), +# - reading line-by-line into the docker-run argv. +# +# Returns 0 on success, dies on any error. +env_resolve() { + local manifest_file="${1:?env_resolve: missing manifest file}" + local agent="${2:?env_resolve: missing agent name}" + local env_file="${3:?env_resolve: missing env_file path}" + local out_args="${4:?env_resolve: missing out_args path}" + + local name raw kind from prompt_body + while IFS= read -r name; do + [ -z "$name" ] && continue + raw="$(manifest_env_entry "$manifest_file" "$agent" "$name")" + kind="$(env_entry_kind "$raw")" + case "$kind" in + secret) + # Always prompt — never trust an already-exported host value. + # A "?"-prefixed entry in the manifest is the user's signal + # that this variable must be supplied interactively at launch + # time, even if a same-named var is already in the parent shell. + prompt_body="$(env_entry_secret_prompt "$raw")" + _read_secret_silent "$name" "$prompt_body" + # Export so child processes (docker run) inherit. `-e NAME` (no + # value) on docker run picks up from the parent process env. + export "${name?}" + printf -- '-e\n%s\n' "$name" >>"$out_args" + ;; + interpolated) + from="$(env_entry_interpolated_from "$raw")" + # Treat interpolated values as potentially sensitive: never write + # them to disk and never put them on argv. Instead, copy the host + # var into THIS process under the target name (so Docker can + # inherit it via `-e NAME`), and emit `-e NAME` in the args file. + # The check below uses indirect expansion only to determine + # presence — no expansion of the value lands in any output. + if [ -z "${!from-}" ]; then + die "env entry ${name} is interpolated from \$${from}, but \$${from} is unset or empty in the host environment." + fi + # Copy via printf -v + indirect read. We use a brief local then + # immediately export under $name and scrub the local. + local _interp_val + _interp_val="${!from}" + printf -v "${name?}" '%s' "$_interp_val" + _interp_val="" + export "${name?}" + printf -- '-e\n%s\n' "$name" >>"$out_args" + ;; + literal) + # Multi-line literal values are not supported by docker --env-file, + # so reject them up front rather than letting docker fail with a + # confusing message. + case "$raw" in + *$'\n'*) die "env entry ${name} (literal) contains a newline; docker --env-file cannot represent multi-line values." ;; + esac + printf '%s=%s\n' "$name" "$raw" >>"$env_file" + ;; + esac + done < <(manifest_env_names "$manifest_file" "$agent") +} diff --git a/lib/log.sh b/lib/log.sh new file mode 100644 index 0000000..d379093 --- /dev/null +++ b/lib/log.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash +# Tiny logging wrappers. Sourced by entry-point scripts. +# Idempotent: safe to source multiple times. + +if [ -n "${CLAUDE_BOTTLE_LIB_LOG_SOURCED:-}" ]; then + return 0 +fi +CLAUDE_BOTTLE_LIB_LOG_SOURCED=1 + +# info — informational message to stderr. +info() { + printf 'claude-bottle: %s\n' "$*" >&2 +} + +# warn — warning to stderr. +warn() { + printf 'claude-bottle: warning: %s\n' "$*" >&2 +} + +# die — error to stderr, exit 1. +die() { + printf 'claude-bottle: error: %s\n' "$*" >&2 + exit 1 +} diff --git a/lib/manifest.sh b/lib/manifest.sh new file mode 100644 index 0000000..378ead4 --- /dev/null +++ b/lib/manifest.sh @@ -0,0 +1,243 @@ +#!/usr/bin/env bash +# Manifest helpers. Read claude-bottle.json and pull the definition for a named +# agent. +# +# The manifest schema is documented in CLAUDE.md "Intended design". In +# short: +# { +# "boxes": { +# "": { +# "env": { "": , ... }, +# "ssh": [ , ... ] +# }, +# ... +# }, +# "agents": { +# "": { +# "skills": [ "", ... ], +# "prompt": "", +# "box": "" +# }, +# ... +# } +# } +# +# A box groups shared infrastructure (SSH keys, known hosts) that multiple +# agents can reference by name. The "box" field is required on every agent; +# cli.sh start rejects agents that omit it. +# +# An is a JSON string. Mode is selected by sentinel prefix: +# "?" → prompt for the value at runtime, displaying +# (bare "?" is allowed; uses a default prompt) +# "${HOST_VAR}" → interpolate from $HOST_VAR in the host process env +# any other str → literal (the JSON string is the value verbatim) +# The classification lives in env_resolve.sh (env_entry_kind); this +# module only fetches the raw string and validates that it is a string. +# +# Manifest parsing happens on the host with `jq`, never inside the +# container. We never echo env *values* here — only names. For literal +# entries the "name" and the value happen to be the same shape (both +# are JSON strings), so callers must take care not to log the result of +# manifest_env_entry. +# +# All functions (except manifest_resolve) take a manifest_file argument — +# the path to a resolved JSON file, typically produced by manifest_resolve. +# +# Idempotent: safe to source multiple times. + +if [ -n "${CLAUDE_BOTTLE_LIB_MANIFEST_SOURCED:-}" ]; then + return 0 +fi +CLAUDE_BOTTLE_LIB_MANIFEST_SOURCED=1 + +_iso_lib_manifest_dir="$(CDPATH= cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=./log.sh +. "${_iso_lib_manifest_dir}/log.sh" + +# require_jq — fails with an install pointer if `jq` is not on PATH. +require_jq() { + if ! command -v jq >/dev/null 2>&1; then + info "jq is required on the host for claude-bottle manifest parsing." + info "macOS: brew install jq" + info "Linux: apt-get install jq (or your distro equivalent)" + die "jq not found" + fi +} + +# manifest_resolve — looks for claude-bottle.json in and in $HOME, +# merges the two (cwd entries override home entries for the same agent name), +# and prints the merged JSON to stdout. Dies if neither file is found or if +# either found file is not valid JSON. +manifest_resolve() { + local cwd="${1:?manifest_resolve: missing cwd}" + local cwd_file="${cwd}/claude-bottle.json" + local home_file="${HOME}/claude-bottle.json" + + local has_cwd=0 has_home=0 + + if [ -f "$cwd_file" ]; then + if ! jq -e . "$cwd_file" >/dev/null 2>&1; then + die "claude-bottle.json at ${cwd_file} is not valid JSON" + fi + has_cwd=1 + fi + + if [ -f "$home_file" ]; then + if ! jq -e . "$home_file" >/dev/null 2>&1; then + die "claude-bottle.json at ${home_file} is not valid JSON" + fi + has_home=1 + fi + + if [ "$has_cwd" = "0" ] && [ "$has_home" = "0" ]; then + die "no claude-bottle.json found in ${cwd} or ${HOME}" + elif [ "$has_cwd" = "1" ] && [ "$has_home" = "0" ]; then + cat "$cwd_file" + elif [ "$has_cwd" = "0" ] && [ "$has_home" = "1" ]; then + cat "$home_file" + else + # Merge: home is the base, cwd overrides on name conflict for both boxes and agents. + jq -s '{ + "boxes": ((.[0].boxes // {}) * (.[1].boxes // {})), + "agents": ((.[0].agents // {}) * (.[1].agents // {})) + }' "$home_file" "$cwd_file" + fi +} + +# manifest_has_agent — returns 0 if the agent key +# exists in the manifest, else 1. +manifest_has_agent() { + local manifest_file="${1:?manifest_has_agent: missing manifest file}" + local name="${2:?manifest_has_agent: missing agent name}" + jq -e --arg n "$name" '.agents | has($n)' "$manifest_file" >/dev/null 2>&1 +} + +# manifest_require_agent — like manifest_has_agent but +# dies with a useful message (and prints the available agent names) if the +# named agent is not defined. +manifest_require_agent() { + local manifest_file="${1:?manifest_require_agent: missing manifest file}" + local name="${2:?manifest_require_agent: missing agent name}" + if ! manifest_has_agent "$manifest_file" "$name"; then + local available + available="$(jq -r '.agents | keys_unsorted | join(", ")' "$manifest_file" 2>/dev/null || echo "")" + if [ -n "$available" ]; then + die "agent '${name}' not defined in claude-bottle.json. Available: ${available}" + else + die "agent '${name}' not defined in claude-bottle.json (manifest is empty)." + fi + fi +} + +# manifest_env_names — prints one env-var name per line +# on stdout (the keys of boxes[agent.box].env, in declaration order). No values. +# Prints nothing if the agent has no box or the box has no env. +manifest_env_names() { + local manifest_file="${1:?manifest_env_names: missing manifest file}" + local name="${2:?manifest_env_names: missing agent name}" + jq -r --arg n "$name" ' + .agents[$n].box as $box | + if ($box == null or $box == "") then empty + else (.boxes[$box].env // {} | keys_unsorted[]) + end + ' "$manifest_file" +} + +# manifest_env_entry — prints the raw +# string value of a single env entry on stdout (no quoting, no JSON +# encoding). Env entries live on the agent's box (boxes[agent.box].env). +# Used by env_resolve.sh, which classifies the result by sentinel. Dies +# if the agent has no box, or the entry is not a JSON string; the +# prompt-at-runtime form is "?", not JSON null. +manifest_env_entry() { + local manifest_file="${1:?manifest_env_entry: missing manifest file}" + local agent="${2:?manifest_env_entry: missing agent name}" + local var="${3:?manifest_env_entry: missing env var name}" + local box + box="$(jq -r --arg a "$agent" '.agents[$a].box // ""' "$manifest_file")" + if [ -z "$box" ]; then + die "env entry ${var} for agent ${agent}: agent has no 'box' field" + fi + local entry_type + entry_type="$(jq -r --arg b "$box" --arg v "$var" '.boxes[$b].env[$v] | type' "$manifest_file")" + if [ "$entry_type" != "string" ]; then + die "env entry ${var} for agent ${agent} must be a JSON string (was ${entry_type}). Use \"?\" for prompt-at-runtime." + fi + jq -r --arg b "$box" --arg v "$var" '.boxes[$b].env[$v]' "$manifest_file" +} + +# manifest_skills — prints one skill name per line on +# stdout (the elements of agent.skills, in order). +manifest_skills() { + local manifest_file="${1:?manifest_skills: missing manifest file}" + local name="${2:?manifest_skills: missing agent name}" + jq -r --arg n "$name" '.agents[$n].skills // [] | .[]' "$manifest_file" +} + +# manifest_prompt — prints the prompt string on stdout +# (no trailing newline manipulation; the raw value goes out). Empty string +# if not set. +manifest_prompt() { + local manifest_file="${1:?manifest_prompt: missing manifest file}" + local name="${2:?manifest_prompt: missing agent name}" + jq -r --arg n "$name" '.agents[$n].prompt // ""' "$manifest_file" +} + +# manifest_agent_box — prints the box name referenced +# by the agent on stdout, or an empty string if the agent has no "box" field. +manifest_agent_box() { + local manifest_file="${1:?manifest_agent_box: missing manifest file}" + local name="${2:?manifest_agent_box: missing agent name}" + jq -r --arg n "$name" '.agents[$n].box // ""' "$manifest_file" +} + +# manifest_has_box — returns 0 if the named box +# exists in the manifest, else 1. +manifest_has_box() { + local manifest_file="${1:?manifest_has_box: missing manifest file}" + local box_name="${2:?manifest_has_box: missing box name}" + jq -e --arg b "$box_name" '.boxes | has($b)' "$manifest_file" >/dev/null 2>&1 +} + +# manifest_require_box — like manifest_has_box but +# dies with a useful message (and prints available box names) if the box is +# not defined. +manifest_require_box() { + local manifest_file="${1:?manifest_require_box: missing manifest file}" + local box_name="${2:?manifest_require_box: missing box name}" + if ! manifest_has_box "$manifest_file" "$box_name"; then + local available + available="$(jq -r '.boxes // {} | keys_unsorted | join(", ")' "$manifest_file" 2>/dev/null || echo "")" + if [ -n "$available" ]; then + die "box '${box_name}' not defined in claude-bottle.json. Available boxes: ${available}" + else + die "box '${box_name}' not defined in claude-bottle.json (no boxes defined)." + fi + fi +} + +# manifest_box_ssh — prints one compact JSON object +# per line for each ssh entry in boxes[box_name].ssh. Prints nothing if the +# box has no ssh array or it is empty. +manifest_box_ssh() { + local manifest_file="${1:?manifest_box_ssh: missing manifest file}" + local box_name="${2:?manifest_box_ssh: missing box name}" + jq -c --arg b "$box_name" '.boxes[$b].ssh // [] | .[]' "$manifest_file" +} + +# manifest_ssh — prints one compact JSON object per line +# for each ssh entry associated with the agent. SSH entries are resolved via +# the agent's "box" field: if set, entries come from boxes[box].ssh; if the +# agent has no "box" field, prints nothing. +# Each object has: Host, IdentityFile, Hostname, User, Port (required); +# KnownHostKey (optional). +manifest_ssh() { + local manifest_file="${1:?manifest_ssh: missing manifest file}" + local name="${2:?manifest_ssh: missing agent name}" + local box + box="$(jq -r --arg n "$name" '.agents[$n].box // ""' "$manifest_file")" + if [ -z "$box" ]; then + return 0 + fi + jq -c --arg b "$box" '.boxes[$b].ssh // [] | .[]' "$manifest_file" +} diff --git a/lib/skills.sh b/lib/skills.sh new file mode 100644 index 0000000..011fb61 --- /dev/null +++ b/lib/skills.sh @@ -0,0 +1,101 @@ +#!/usr/bin/env bash +# Skill copier. Copies named skills from the host's ~/.claude/skills// +# into the running container's ~/.claude/skills//, preserving +# directory structure (no flattening, no archives), per CLAUDE.md +# "Intended design". +# +# Scope of THIS file (matches PRD 0002 "Open question 3" resolution): +# - host → container only. +# - if a referenced skill is missing on the host, fail with a clear +# message naming the skill. No silent skipping. The repo-side +# `skills//` snapshot and host↔repo diff prompt described in +# CLAUDE.md "Intended design" are deferred. +# +# Idempotent: safe to source multiple times. + +if [ -n "${CLAUDE_BOTTLE_LIB_SKILLS_SOURCED:-}" ]; then + return 0 +fi +CLAUDE_BOTTLE_LIB_SKILLS_SOURCED=1 + +_iso_lib_skills_dir="$(CDPATH= cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=./log.sh +. "${_iso_lib_skills_dir}/log.sh" + +# Container-side home/skills paths. The Dockerfile sets the user to `node` +# (uid 1000) with home /home/node, so this is where claude-code looks. +CLAUDE_BOTTLE_CONTAINER_HOME="${CLAUDE_BOTTLE_CONTAINER_HOME:-/home/node}" +CLAUDE_BOTTLE_CONTAINER_SKILLS_DIR="${CLAUDE_BOTTLE_CONTAINER_SKILLS_DIR:-${CLAUDE_BOTTLE_CONTAINER_HOME}/.claude/skills}" + +# host_skill_dir — prints the absolute host path for a skill. +host_skill_dir() { + local name="${1:?host_skill_dir: missing skill name}" + printf '%s/.claude/skills/%s' "${HOME:?HOME not set}" "$name" +} + +# host_skill_exists — returns 0 if the host has a skill directory +# at ~/.claude/skills//, else 1. +host_skill_exists() { + local name="${1:?host_skill_exists: missing skill name}" + [ -d "$(host_skill_dir "$name")" ] +} + +# require_host_skill — dies with a clear message if the named +# skill is missing on the host. The error names the skill and the path +# checked. +require_host_skill() { + local name="${1:?require_host_skill: missing skill name}" + if ! host_skill_exists "$name"; then + die "skill '${name}' not found on host at $(host_skill_dir "$name"). Create it under ~/.claude/skills/, then re-run." + fi +} + +# skills_validate_all [ ...] — checks every named skill +# exists on the host, dies on the first one that does not. No copy yet. +# Use this BEFORE the confirmation prompt so the user does not get +# asked y/N for a plan that's already known to fail. +skills_validate_all() { + local n + for n in "$@"; do + require_host_skill "$n" + done +} + +# skills_copy_into [ ...] +# +# For each named skill: +# 1. ensure ~/.claude/skills/ exists in the container (mkdir -p) +# 2. `docker cp /. ://` +# — the trailing `/.` on the source preserves directory structure +# and copies the contents into a freshly-created destination dir, +# avoiding the docker-cp quirk where copying `dir` (no slash) into +# an existing `dest/` would nest as `dest/dir/`. +# +# The destination directory is removed first if it already exists, so +# repeated calls produce a deterministic state. +skills_copy_into() { + local container="${1:?skills_copy_into: missing container name}" + shift + if [ "$#" -eq 0 ]; then + return 0 + fi + + # Ensure the target parent dir exists in the container. This is a + # no-op if the Dockerfile already created it, but cheap and defensive. + docker exec "$container" mkdir -p "${CLAUDE_BOTTLE_CONTAINER_SKILLS_DIR}" >/dev/null + + local n src dst + for n in "$@"; do + src="$(host_skill_dir "$n")" + if [ ! -d "$src" ]; then + die "skill '${n}' disappeared from host between validation and copy at ${src}." + fi + dst="${CLAUDE_BOTTLE_CONTAINER_SKILLS_DIR}/${n}" + info "copying skill ${n} into ${container}:${dst}" + # Wipe any prior copy so we're deterministic, then create empty dst + # and copy contents-of-src into it via the `/.` source-suffix trick. + docker exec "$container" rm -rf "$dst" >/dev/null + docker exec "$container" mkdir -p "$dst" >/dev/null + docker cp "${src}/." "${container}:${dst}/" >/dev/null + done +} diff --git a/lib/ssh.sh b/lib/ssh.sh new file mode 100644 index 0000000..5ac4fa2 --- /dev/null +++ b/lib/ssh.sh @@ -0,0 +1,205 @@ +#!/usr/bin/env bash +# SSH helpers. Validates ssh entries from claude-bottle.json, then sets up SSH +# inside the container via a root-owned ssh-agent so the `node` user (Claude) +# can use the keys for SSH operations but cannot read the key bytes. +# +# Why an in-container agent (not bind-mounted from host): Docker Desktop on +# macOS does not forward Unix-domain socket connect() across the macOS↔Linux +# VM boundary — connect() returns ENOTSUP. Running ssh-agent inside the +# container sidesteps that entirely and keeps the same isolation guarantee. +# +# How the isolation works: +# - Keys are docker cp'd to /root/.claude-bottle-keys/ (mode 700, root-owned). +# /root itself is mode 700 in the node:22-slim base image, so node (uid +# 1000) cannot even traverse into it. +# - ssh-agent runs as root, listening on /run/claude-bottle-agent.sock. Each +# key is loaded with ssh-add, then the key file is deleted. The bytes +# now live only in the agent process's memory. +# - The agent socket stays root-only. OpenSSH's ssh-agent enforces a +# SO_PEERCRED-based UID match: it rejects every connection whose peer +# euid is neither 0 nor the agent's own uid. chmod'ing the socket open +# does *not* defeat this — the kernel-level check still rejects node. +# - To bridge that, a root-owned socat forwarder listens on +# /run/claude-bottle-agent-public.sock (mode 666) and proxies bytes to the +# real agent socket. From the agent's view, socat (uid 0) is the peer +# and passes the UID check. From node's view, the public socket is the +# accessible endpoint. +# - node cannot ptrace the root-owned agent or socat (no CAP_SYS_PTRACE in +# a default container), so /proc//mem is off-limits and the key +# bytes never leave root-owned memory. +# - ~/.ssh/config in node's home points each Host at the public socket via +# IdentityAgent, so SSH always reaches the forwarder regardless of +# SSH_AUTH_SOCK. +# +# Limitation: keys must be passphrase-less. ssh-add prompts on /dev/tty for +# passphrases, but our docker exec has no TTY. Adding SSH_ASKPASS support is +# possible but not implemented in v1. +# +# Each ssh entry is a JSON object (jq -c) with keys: +# Host SSH Host alias +# IdentityFile absolute path to the private key file on the host +# Hostname the actual hostname or IP +# User SSH username +# Port SSH port (number) +# KnownHostKey (optional) host public key — written to known_hosts under +# both the Host alias and the Hostname so the lookup works +# whether SSH connects via the alias or the raw IP/host. +# +# Idempotent: safe to source multiple times. + +if [ -n "${CLAUDE_BOTTLE_LIB_SSH_SOURCED:-}" ]; then + return 0 +fi +CLAUDE_BOTTLE_LIB_SSH_SOURCED=1 + +_iso_lib_ssh_dir="$(CDPATH= cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=./log.sh +. "${_iso_lib_ssh_dir}/log.sh" + +# ssh_validate_entries ... — checks that each entry has the +# required fields and that its IdentityFile exists on the host. Dies on the +# first problem. +ssh_validate_entries() { + local entry name key + for entry in "$@"; do + name="$(printf '%s' "$entry" | jq -r '.Host // empty')" + key="$(printf '%s' "$entry" | jq -r '.IdentityFile // empty')" + [ -n "$name" ] || die "ssh entry missing required field 'Host': ${entry}" + [ -n "$key" ] || die "ssh entry '${name}' missing required field 'IdentityFile'" + # Expand a leading ~ so callers can use ~/... paths. + key="${key/#\~/$HOME}" + [ -f "$key" ] || die "ssh key file not found for host '${name}': ${key}" + done +} + +# ssh_setup ... — sets up SSH in the +# container so node (Claude) can authenticate using each entry's key without +# the key file being readable by node. +# +# Lifecycle: +# 1. Create ~/.ssh (700) for node and /root/.claude-bottle-keys (700) for root. +# 2. docker cp each key into /root/.claude-bottle-keys/, chown root, chmod 600. +# 3. Boot ssh-agent at /run/claude-bottle-agent.sock (root-only), ssh-add each +# key, delete the key file, rmdir the keys staging dir. +# 4. Boot a root-owned socat forwarder on /run/claude-bottle-agent-public.sock +# (mode 666) proxying to the agent socket. Bridges the UID-match check +# that would otherwise reject node's connections (see file header). +# 5. Install ~/.ssh/config (IdentityAgent → public socket) and +# ~/.ssh/known_hosts under node's home. +ssh_setup() { + local container="${1:?ssh_setup: missing container}" + local stage_dir="${2:?ssh_setup: missing stage dir}" + shift 2 + + local container_home="${CLAUDE_BOTTLE_CONTAINER_HOME:-/home/node}" + local container_ssh="${container_home}/.ssh" + local agent_socket="/run/claude-bottle-agent.sock" + local public_socket="/run/claude-bottle-agent-public.sock" + local keys_dir="/root/.claude-bottle-keys" + + # ~/.ssh for node (700, owned by node). + docker exec -u 0 "$container" mkdir -p "$container_ssh" >/dev/null + docker exec -u 0 "$container" chown node:node "$container_ssh" >/dev/null + docker exec -u 0 "$container" chmod 700 "$container_ssh" >/dev/null + + # /root/.claude-bottle-keys for root (700, root-owned). /root is already 700 + # in node:22-slim, so node can't traverse here either way; setting both + # layers keeps the intent explicit. + docker exec -u 0 "$container" mkdir -p "$keys_dir" >/dev/null + docker exec -u 0 "$container" chown root:root "$keys_dir" >/dev/null + docker exec -u 0 "$container" chmod 700 "$keys_dir" >/dev/null + + local config_file="${stage_dir}/ssh_config" + local known_hosts_file="${stage_dir}/ssh_known_hosts" + : > "$config_file" + chmod 600 "$config_file" + : > "$known_hosts_file" + chmod 600 "$known_hosts_file" + + local entry name key hostname user port known_host_key key_basename container_key_path + local container_key_paths=() + for entry in "$@"; do + name="$(printf '%s' "$entry" | jq -r '.Host')" + key="$(printf '%s' "$entry" | jq -r '.IdentityFile')" + hostname="$(printf '%s' "$entry" | jq -r '.Hostname')" + user="$(printf '%s' "$entry" | jq -r '.User')" + port="$(printf '%s' "$entry" | jq -r '.Port')" + known_host_key="$(printf '%s' "$entry" | jq -r '.KnownHostKey // empty')" + + key="${key/#\~/$HOME}" + key_basename="$(basename "$key")" + container_key_path="${keys_dir}/${key_basename}" + + info "copying ssh key for '${name}' -> ${container} (root-only staging)" + docker cp "$key" "${container}:${container_key_path}" >/dev/null + docker exec -u 0 "$container" chown root:root "$container_key_path" >/dev/null + docker exec -u 0 "$container" chmod 600 "$container_key_path" >/dev/null + + container_key_paths+=("$container_key_path") + + # No IdentityFile — IdentityAgent points SSH at the public (forwarded) + # socket. Pointing at the real agent socket directly would be rejected + # by ssh-agent's UID-match check (see file header). + printf 'Host %s\n HostName %s\n User %s\n Port %s\n IdentityAgent %s\n\n' \ + "$name" "$hostname" "$user" "$port" "$public_socket" >> "$config_file" + + if [ -n "$known_host_key" ]; then + # Write under both the Host alias and the Hostname so SSH finds the key + # whether the connection uses the alias (`ssh `) or a raw IP/host + # (e.g. git remote URLs that bypass the alias). Skip the duplicate when + # they're already the same string. + if [ "$port" = "22" ]; then + printf '%s %s\n' "$name" "$known_host_key" >> "$known_hosts_file" + [ "$hostname" != "$name" ] && printf '%s %s\n' "$hostname" "$known_host_key" >> "$known_hosts_file" + else + printf '[%s]:%s %s\n' "$name" "$port" "$known_host_key" >> "$known_hosts_file" + [ "$hostname" != "$name" ] && printf '[%s]:%s %s\n' "$hostname" "$port" "$known_host_key" >> "$known_hosts_file" + fi + fi + done + + # Boot the agent, load each key, delete the key files, then start the + # root-owned socat forwarder that exposes a node-accessible socket. One + # docker exec so the whole sequence is atomic — if any step fails (e.g. + # passphrase-protected key), set -e dies before we leave behind a + # half-initialized agent. + info "starting in-container ssh-agent at ${agent_socket} (forwarded via ${public_socket})" + local setup_script="set -eu +ssh-agent -a ${agent_socket} >/dev/null +" + local kp + for kp in "${container_key_paths[@]}"; do + setup_script+="SSH_AUTH_SOCK=${agent_socket} ssh-add ${kp} +rm -f ${kp} +" + done + setup_script+="rmdir ${keys_dir} 2>/dev/null || true +# Start the forwarder. Detach from the calling shell so it survives this +# docker exec returning. socat (running as root) connects to the agent on +# node's behalf; the agent's UID-match check sees uid 0 and accepts. +nohup socat UNIX-LISTEN:${public_socket},fork,reuseaddr,mode=666 UNIX-CONNECT:${agent_socket} /dev/null 2>&1 & +# Wait briefly for the forwarder to bind. Without this, an SSH client that +# fires immediately after this script returns can race the listener and hit +# ENOENT/ECONNREFUSED on the public socket. +i=0 +while [ \$i -lt 20 ]; do + [ -S ${public_socket} ] && break + i=\$((i + 1)) + sleep 0.1 +done +[ -S ${public_socket} ] || { echo 'claude-bottle: socat forwarder failed to bind ${public_socket}' >&2; exit 1; } +" + docker exec -u 0 "$container" sh -c "$setup_script" + + info "writing ${container_ssh}/config" + docker cp "$config_file" "${container}:${container_ssh}/config" >/dev/null + docker exec -u 0 "$container" chown node:node "${container_ssh}/config" >/dev/null + docker exec -u 0 "$container" chmod 600 "${container_ssh}/config" >/dev/null + + if [ -s "$known_hosts_file" ]; then + info "writing ${container_ssh}/known_hosts" + docker cp "$known_hosts_file" "${container}:${container_ssh}/known_hosts" >/dev/null + docker exec -u 0 "$container" chown node:node "${container_ssh}/known_hosts" >/dev/null + docker exec -u 0 "$container" chmod 600 "${container_ssh}/known_hosts" >/dev/null + fi +}