diff --git a/Dockerfile.cred-proxy b/Dockerfile.cred-proxy new file mode 100644 index 0000000..82f3769 --- /dev/null +++ b/Dockerfile.cred-proxy @@ -0,0 +1,50 @@ +# Per-bottle cred-proxy sidecar image (PRD 0010). +# +# Holds API tokens (Anthropic OAuth, GitHub PAT, Gitea PAT, npm) in +# this container's environ, strips inbound Authorization headers, and +# injects the configured one before forwarding to the real upstream +# over HTTPS. The agent's environ carries only URLs pointing at this +# sidecar — the upstream credentials never reach the agent container. +# +# Stdlib-only Python; no pip install layer. The route table lands at +# /run/cred-proxy/routes.json via `docker cp` from the backend's +# start step. + +# python:3.13-alpine. Pinned by digest for reproducibility — the +# proxy script is stdlib-only so a Python minor-version drift would +# only affect the runtime, not API surface, but pinning makes the +# image bytes deterministic. +FROM python@sha256:420cd0bf0f3998275875e02ecd5808168cf0843cbb4d3c536432f729247b2acc + +# `ca-certificates` ships /usr/sbin/update-ca-certificates and the +# system trust store. The backend's start step `docker cp`s the +# per-bottle pipelock CA into /usr/local/share/ca-certificates/ so +# the entrypoint's update-ca-certificates picks it up — cred-proxy's +# outbound HTTPS then trusts pipelock's bumped certs and outbound +# traffic routes through pipelock (HTTPS_PROXY in the environ). +RUN apk add --no-cache ca-certificates + +# The proxy script ships as a single file. Tests in tests/unit/ import +# it as `claude_bottle.cred_proxy_server`; the container runs it +# directly as a script. No package install, no other modules pulled. +COPY claude_bottle/cred_proxy_server.py /app/cred_proxy_server.py + +# Pre-create the runtime directory the backend's start step will +# `docker cp` routes.json into. docker cp does not create +# intermediate dirs, so the mkdir must be baked into the image. +RUN mkdir -p /run/cred-proxy + +# Listening port. The agent's environ resolves the cred-proxy host +# via Docker's embedded DNS on the per-bottle internal network and +# dials this port. Surfaced as EXPOSE for documentation; not required +# for the internal network to route to it. +EXPOSE 9099 + +# Entry runs update-ca-certificates so the per-bottle pipelock CA +# docker-cp'd by the backend's start step is folded into +# /etc/ssl/certs/ca-certificates.crt before python comes up. Then +# exec into the server so PID 1 is python (clean signal handling +# and exit codes). Output of update-ca-certificates is silenced — +# the entry script prints one line per cert under normal operation, +# which the test suite would otherwise treat as a log smell. +ENTRYPOINT ["sh", "-c", "update-ca-certificates >/dev/null 2>&1 && exec python3 /app/cred_proxy_server.py"] diff --git a/README.md b/README.md index b35310c..32b2469 100644 --- a/README.md +++ b/README.md @@ -71,36 +71,53 @@ pieces of v1. A bottle is the agent container plus up to three per-protocol egress sidecars on a per-agent Docker `--internal` network. The agent has no -default route off-box; its only way out is through the pipelock -sidecar (for HTTP/HTTPS), the ssh-gate sidecar (for SSH), or the -git-gate sidecar (for git operations against declared upstreams). -Each sidecar also sits on an egress network that does have internet -access, so the agent's traffic always passes through a container -that enforces the manifest before it leaves the host. +default route off-box. All HTTP and HTTPS egress — from the agent +*and* from cred-proxy when it dials an upstream — funnels through +pipelock, where the egress allowlist, TLS interception, and +request-body DLP scanner enforce the manifest before any byte leaves +the host. The only egress that doesn't traverse pipelock is git-gate's +SSH push/fetch to `bottle.git` upstreams — pipelock can't proxy SSH, +so git-gate is its own L4-style egress path with gitleaks doing the +pre-receive scan. ``` host ( ./cli.py ) │ starts │ stops ▼ - ┌─────────────────────────── bottle ──────────────────────────┐ - │ │ - │ ┌──────────────────┐ │ - │ │ agent image │ HTTPS_PROXY ┌────────────────┐ │ HTTPS to - │ │ (claude-code, │ ───────────────► │ pipelock image │──┼──► allowlisted - │ │ built locally) │ │ (TLS bump, DLP,│ │ hosts - │ │ │ │ allowlist) │ │ - │ │ skills, env, │ └────────────────┘ │ - │ │ ~/.gitconfig │ │ - │ │ │ git ops ┌────────────────┐ │ SSH (push/ - │ │ │ ───────────────► │ git-gate image │──┼──► fetch) to - │ │ │ │ (gitleaks + │ │ bottle.git - │ │ │ │ git daemon) │ │ upstreams - │ └──────────────────┘ └────────────────┘ │ - │ │ - │ agent on internal network (no default route); │ - │ sidecars also attached to an egress network. │ - └─────────────────────────────────────────────────────────────┘ + ┌─────────────────────────── bottle ──────────────────────────────────┐ + │ │ + │ ┌──────────────────┐ │ + │ │ agent image │ HTTPS_PROXY │ + │ │ (claude-code, │ ────────────────────────┐ │ + │ │ built locally) │ │ │ + │ │ │ plain HTTP │ │ + │ │ skills, env, │ (token injection) ┌────▼─────────┐ │ + │ │ ~/.gitconfig, │ ──────────────────►│ cred-proxy │ │ + │ │ ~/.npmrc, tea │ │ (strips/inj │ │ + │ │ │ │ Authoriz.) │ │ + │ │ environ: URLs │ └─────┬────────┘ │ + │ │ only, no real │ HTTPS_PROXY │ │ + │ │ tokens │ ▼ │ + │ │ │ ┌────────────────┐ │ HTTPS to + │ │ │ │ pipelock image │──────────┼──► allowlisted + │ │ │ │ (TLS bump, DLP │ │ hosts (incl. + │ │ │ │ body scan, │ │ cred-proxy + │ │ │ │ allowlist) │ │ upstreams) + │ │ │ └────────────────┘ │ + │ │ │ │ + │ │ │ git:// ┌────────────────┐ │ SSH push/fetch + │ │ │ ────────────────►│ git-gate image │──────────┼──► to bottle.git + │ │ │ │ (gitleaks + │ │ upstreams + │ └──────────────────┘ │ git daemon) │ │ (direct — not + │ └────────────────┘ │ via pipelock) + │ │ + │ agent on internal network (no default route); pipelock, │ + │ cred-proxy, and git-gate straddle internal + egress networks. │ + │ pipelock is the single HTTP/HTTPS chokepoint — cred-proxy's │ + │ outbound traverses it too. git-gate's SSH egress is direct │ + │ because pipelock is HTTP-only. │ + └─────────────────────────────────────────────────────────────────────┘ ``` - **agent image** — built from the repo `Dockerfile` (`node:22-slim` @@ -129,6 +146,26 @@ that enforces the manifest before it leaves the host. `insteadOf` rewrite still keys off the original hostname. Brought up only when `bottle.git` has entries. Design in `docs/prds/0008-git-gate.md`. +- **cred-proxy image** — per-bottle sidecar (`python:3.13-alpine` + base, stdlib-only) that holds API tokens declared in + `bottle.cred_proxy.routes`. Each route names a `path`, + `upstream`, `auth_scheme`, and `token_ref` (host env var); the + agent dials `http://cred-proxy:9099...` over plain HTTP + and the proxy strips any inbound `Authorization`, injects + ` ` using the value held only in its own + container's environ, and forwards to the real upstream over + HTTPS. SSE responses stream back unbuffered. The cred-proxy's + outbound HTTPS routes through pipelock (it trusts pipelock's + per-bottle CA), so pipelock's egress allowlist + body scanner + apply to cred-proxy traffic the same way they apply to direct + agent traffic. Smart-HTTP push paths (`/git-receive-pack`, + `/info/refs?service=git-receive-pack`) are refused at the + proxy — push must go through `bottle.git` / git-gate where + gitleaks runs. Optional per-route `role` tags drive agent-side + rewrites: `anthropic-base-url`, `npm-registry`, `git-insteadof`, + `tea-login`. The agent's `printenv` shows only proxy URLs — + none of the real token values. Design in + `docs/prds/0010-cred-proxy.md`. When the agent exits, `cli.py` tears down every sidecar that was brought up and the two networks; nothing about a bottle persists @@ -172,6 +209,32 @@ project entries overriding home entries on key conflict). } ], + // Routes declared here are held by a per-bottle cred-proxy + // sidecar, not the agent. Each route names a path the agent + // dials, the upstream the proxy forwards to, an auth_scheme, + // and a token_ref (host env var). The value goes into the + // sidecar's environ via `docker create -e`, never touches + // argv or disk. Optional `role` tags drive agent-side + // rewrites: `anthropic-base-url` (sets ANTHROPIC_BASE_URL), + // `npm-registry` (writes ~/.npmrc), `git-insteadof` (writes + // ~/.gitconfig), `tea-login` (writes ~/.config/tea/config.yml). + // See `docs/prds/0010-cred-proxy.md`. + "cred_proxy": { + "routes": [ + { "path": "/anthropic/", "upstream": "https://api.anthropic.com", + "auth_scheme": "Bearer", "token_ref": "CLAUDE_BOTTLE_OAUTH_TOKEN", + "role": "anthropic-base-url" }, + { "path": "/gh-api/", "upstream": "https://api.github.com", + "auth_scheme": "Bearer", "token_ref": "GITHUB_PAT" }, + { "path": "/gh-git/", "upstream": "https://github.com", + "auth_scheme": "Bearer", "token_ref": "GITHUB_PAT", + "role": "git-insteadof" }, + { "path": "/npm/", "upstream": "https://registry.npmjs.org", + "auth_scheme": "Bearer", "token_ref": "NPM_TOKEN", + "role": "npm-registry" } + ] + }, + // Egress is forced through a per-agent // [pipelock](https://github.com/luckyPipewrench/pipelock) sidecar // on a Docker `--internal` network — without the proxy the agent @@ -231,15 +294,36 @@ as `CLAUDE_BOTTLE_OAUTH_TOKEN`: export CLAUDE_BOTTLE_OAUTH_TOKEN="" ``` -`cli.py` automatically forwards it to every container as -`CLAUDE_CODE_OAUTH_TOKEN` via `docker run -e` — no manifest wiring -required, and the value is never written to disk or placed on argv. +The bottle reaches the Anthropic API only through the cred-proxy +sidecar. To let `claude` authenticate, declare a route in +`bottle.cred_proxy.routes` with `role: "anthropic-base-url"` and +`token_ref: "CLAUDE_BOTTLE_OAUTH_TOKEN"`: -Inside the container, `claude` picks up `CLAUDE_CODE_OAUTH_TOKEN` and -authenticates against your subscription. Caveats: the token is bound -to your subscription tier (Pro/Max/Team/Enterprise), it does not work -with `claude --bare` (which only reads `ANTHROPIC_API_KEY`), and if it -leaks, regenerate via `claude setup-token` again. Reference: +```jsonc +{ + "path": "/anthropic/", + "upstream": "https://api.anthropic.com", + "auth_scheme": "Bearer", + "token_ref": "CLAUDE_BOTTLE_OAUTH_TOKEN", + "role": "anthropic-base-url" +} +``` + +At launch, `cli.py` reads `CLAUDE_BOTTLE_OAUTH_TOKEN` from the host +env and forwards it into the cred-proxy container's environ — never +into the agent's. The agent receives `ANTHROPIC_BASE_URL` pointing at +`http://cred-proxy:9099/anthropic` and a non-secret placeholder for +`CLAUDE_CODE_OAUTH_TOKEN` (claude-code refuses to start without one; +the proxy strips and replaces the header on every request). `printenv` +inside the agent does not surface the real token, and the value is +never written to disk or placed on argv on the host. + +A bottle without an `anthropic-base-url` route has no path to the +Anthropic API — there is no fallback that forwards the token directly +to the agent. Caveats: the token is bound to your subscription tier +(Pro/Max/Team/Enterprise), it does not work with `claude --bare` +(which only reads `ANTHROPIC_API_KEY`), and if it leaks, regenerate +via `claude setup-token` again. Reference: . ## Trademarks diff --git a/claude-bottle.example.json b/claude-bottle.example.json index 1ac6163..c6be907 100644 --- a/claude-bottle.example.json +++ b/claude-bottle.example.json @@ -36,6 +36,44 @@ "files.pythonhosted.org" ] } + }, + + "agentic": { + "env": { + "GIT_AUTHOR_NAME": "Eric Diderich", + "NODE_ENV": "development" + }, + "cred_proxy": { + "routes": [ + { "path": "/anthropic/", + "upstream": "https://api.anthropic.com", + "auth_scheme": "Bearer", + "token_ref": "CLAUDE_BOTTLE_OAUTH_TOKEN", + "role": "anthropic-base-url" }, + + { "path": "/gh-api/", + "upstream": "https://api.github.com", + "auth_scheme": "Bearer", + "token_ref": "GH_PAT" }, + { "path": "/gh-git/", + "upstream": "https://github.com", + "auth_scheme": "Bearer", + "token_ref": "GH_PAT", + "role": "git-insteadof" }, + + { "path": "/gitea/dideric/", + "upstream": "https://gitea.dideric.is", + "auth_scheme": "token", + "token_ref": "GITEA_TOKEN", + "role": ["git-insteadof", "tea-login"] }, + + { "path": "/npm/", + "upstream": "https://registry.npmjs.org", + "auth_scheme": "Bearer", + "token_ref": "NPM_TOKEN", + "role": "npm-registry" } + ] + } } }, @@ -52,6 +90,12 @@ "prompt": "You help maintain Gitea-hosted projects. Prefer small, focused commits. Follow Conventional Commits. Run tests before pushing." }, + "agentic-helper": { + "bottle": "agentic", + "skills": [], + "prompt": "You operate against APIs whose credentials live in a per-bottle cred-proxy sidecar. Your environ carries only proxy URLs." + }, + "minimal": { "bottle": "default", "skills": [], diff --git a/claude_bottle/backend/__init__.py b/claude_bottle/backend/__init__.py index ba677b6..45c65ac 100644 --- a/claude_bottle/backend/__init__.py +++ b/claude_bottle/backend/__init__.py @@ -53,7 +53,6 @@ class BottleSpec: agent_name: str copy_cwd: bool user_cwd: str - forward_oauth_token: bool @dataclass(frozen=True) @@ -214,15 +213,17 @@ class BottleBackend(ABC, Generic[PlanT, CleanupT]): decide whether to add --append-system-prompt-file to claude's argv. - Default orchestration: ca → prompt → skills → git. CA install - runs first so the agent's trust store is rebuilt before - anything inside the agent makes a TLS call. Subclasses - typically don't override this; they implement the sub-methods - below.""" + Default orchestration: ca → prompt → skills → git → + cred_proxy. CA install runs first so the agent's trust store + is rebuilt before anything inside the agent makes a TLS call. + cred_proxy runs last because it appends to ~/.gitconfig (which + provision_git writes). Subclasses typically don't override + this; they implement the sub-methods below.""" self.provision_ca(plan, target) prompt_path = self.provision_prompt(plan, target) self.provision_skills(plan, target) self.provision_git(plan, target) + self.provision_cred_proxy(plan, target) return prompt_path def provision_ca(self, plan: PlanT, target: str) -> None: @@ -251,6 +252,12 @@ class BottleBackend(ABC, Generic[PlanT, CleanupT]): """Copy the host's cwd `.git` directory into the running bottle if the user requested --cwd. No-op otherwise.""" + def provision_cred_proxy(self, plan: PlanT, target: str) -> None: + """Drop the cred-proxy agent-side dotfiles (.npmrc, + .gitconfig insteadOf, ~/.config/tea/config.yml) per PRD 0010. + Default impl is a no-op for backends that don't yet support + the cred-proxy sidecar; the Docker backend overrides.""" + @abstractmethod def prepare_cleanup(self) -> CleanupT: """Enumerate orphaned resources from previous bottles. No side diff --git a/claude_bottle/backend/docker/backend.py b/claude_bottle/backend/docker/backend.py index 55baa8b..3ea0c0e 100644 --- a/claude_bottle/backend/docker/backend.py +++ b/claude_bottle/backend/docker/backend.py @@ -23,9 +23,11 @@ from . import prepare as _prepare from .bottle import DockerBottle from .bottle_cleanup_plan import DockerBottleCleanupPlan from .bottle_plan import DockerBottlePlan +from .cred_proxy import DockerCredProxy from .git_gate import DockerGitGate from .pipelock import DockerPipelockProxy from .provision import ca as _ca +from .provision import cred_proxy as _cred_proxy from .provision import git as _git from .provision import prompt as _prompt from .provision import skills as _skills @@ -40,6 +42,7 @@ class DockerBottleBackend(BottleBackend["DockerBottlePlan", "DockerBottleCleanup def __init__(self) -> None: self._proxy = DockerPipelockProxy() self._git_gate = DockerGitGate() + self._cred_proxy = DockerCredProxy() def _resolve_plan(self, spec: BottleSpec, *, stage_dir: Path) -> DockerBottlePlan: return _prepare.resolve_plan( @@ -47,6 +50,7 @@ class DockerBottleBackend(BottleBackend["DockerBottlePlan", "DockerBottleCleanup stage_dir=stage_dir, proxy=self._proxy, git_gate=self._git_gate, + cred_proxy=self._cred_proxy, ) @contextmanager @@ -55,6 +59,7 @@ class DockerBottleBackend(BottleBackend["DockerBottlePlan", "DockerBottleCleanup plan, proxy=self._proxy, git_gate=self._git_gate, + cred_proxy=self._cred_proxy, provision=self.provision, ) as bottle: yield bottle @@ -71,6 +76,9 @@ class DockerBottleBackend(BottleBackend["DockerBottlePlan", "DockerBottleCleanup def provision_git(self, plan: DockerBottlePlan, target: str) -> None: _git.provision_git(plan, target) + def provision_cred_proxy(self, plan: DockerBottlePlan, target: str) -> None: + _cred_proxy.provision_cred_proxy(plan, target) + def prepare_cleanup(self) -> DockerBottleCleanupPlan: return _cleanup.prepare_cleanup() diff --git a/claude_bottle/backend/docker/bottle_plan.py b/claude_bottle/backend/docker/bottle_plan.py index af635de..e02ca9c 100644 --- a/claude_bottle/backend/docker/bottle_plan.py +++ b/claude_bottle/backend/docker/bottle_plan.py @@ -11,6 +11,7 @@ import sys from dataclasses import dataclass, field from pathlib import Path +from ...cred_proxy import CredProxyPlan from ...git_gate import GitGatePlan from ...log import info from ...manifest import Agent, Bottle @@ -51,6 +52,7 @@ class DockerBottlePlan(BottlePlan): prompt_file: Path proxy_plan: PipelockProxyPlan git_gate_plan: GitGatePlan + cred_proxy_plan: CredProxyPlan allowlist_summary: str use_runsc: bool @@ -59,9 +61,13 @@ class DockerBottlePlan(BottlePlan): manifest = spec.manifest agent = manifest.agents[spec.agent_name] bottle = manifest.bottle_for(spec.agent_name) - env_names = list(bottle.env.keys()) - if spec.forward_oauth_token: - env_names.append("CLAUDE_CODE_OAUTH_TOKEN") + # The agent sees the union of literal env names (rendered into + # --env-file) and forwarded env names (`-e NAME` with the value + # arriving via subprocess env). The forwarded set already + # reflects PRD 0010's switch — when cred-proxy holds the + # anthropic token, CLAUDE_CODE_OAUTH_TOKEN is absent and + # ANTHROPIC_BASE_URL is present. + env_names = sorted(set(bottle.env.keys()) | set(self.forwarded_env.keys())) return _PlanView( agent=agent, bottle=bottle, @@ -100,6 +106,14 @@ class DockerBottlePlan(BottlePlan): info(f" git gate : {'; '.join(git_lines)}") else: info(" git remotes : (none)") + if self.cred_proxy_plan.routes: + lines = [f"{r.path}→{r.upstream}" for r in self.cred_proxy_plan.routes] + refs = sorted({r.token_ref for r in self.cred_proxy_plan.routes}) + info(f" cred-proxy : {len(lines)} route(s); tokens: {', '.join(refs)}") + for line in lines: + info(f" {line}") + else: + info(" cred-proxy : (none)") info(f" egress : {self.allowlist_summary}") info(" tls intercept : pipelock (per-bottle ephemeral CA, generated at launch)") info( @@ -132,6 +146,16 @@ class DockerBottlePlan(BottlePlan): } for u in self.git_gate_plan.upstreams ], + "cred_proxy": [ + { + "path": r.path, + "upstream": r.upstream, + "auth_scheme": r.auth_scheme, + "token_ref": r.token_ref, + "roles": list(r.roles), + } + for r in self.cred_proxy_plan.routes + ], "egress": { "host_count": len(hosts), "hosts": hosts, diff --git a/claude_bottle/backend/docker/cred_proxy.py b/claude_bottle/backend/docker/cred_proxy.py new file mode 100644 index 0000000..d0cfd69 --- /dev/null +++ b/claude_bottle/backend/docker/cred_proxy.py @@ -0,0 +1,250 @@ +"""DockerCredProxy — the Docker-specific lifecycle for the per-bottle +cred-proxy sidecar (PRD 0010). Inherits the platform-agnostic prepare +step (route lift + routes.json render + token-env-map derivation) +from `CredProxy`.""" + +from __future__ import annotations + +import os +import subprocess +from pathlib import Path + +from ...cred_proxy import ( + CRED_PROXY_HOSTNAME, + CredProxy, + CredProxyPlan, + cred_proxy_resolve_token_values, +) +from ...log import die, info, warn +from . import util as docker_mod + + +CRED_PROXY_IMAGE = os.environ.get( + "CLAUDE_BOTTLE_CRED_PROXY_IMAGE", + "claude-bottle-cred-proxy:latest", +) + +CRED_PROXY_DOCKERFILE = "Dockerfile.cred-proxy" + +# Listening port inside the sidecar. The agent dials cred-proxy on +# this port; surfaced as a constant so the provisioner and tests can +# both reference it. +CRED_PROXY_PORT = int(os.environ.get("CLAUDE_BOTTLE_CRED_PROXY_PORT", "9099")) + +# In-container path the proxy server reads its route table from. +# Pre-created in Dockerfile.cred-proxy so `docker cp` can drop the +# file directly. +CRED_PROXY_ROUTES_IN_CONTAINER = "/run/cred-proxy/routes.json" + +# In-container path for the per-bottle pipelock CA. Alpine's +# update-ca-certificates picks anything ending in `.crt` under +# /usr/local/share/ca-certificates/ and folds it into the system +# trust store at boot — so cred-proxy's HTTPS client trusts +# pipelock's bumped certs when pipelock MITMs the outbound leg. +CRED_PROXY_PIPELOCK_CA_IN_CONTAINER = "/usr/local/share/ca-certificates/pipelock.crt" + +# Repo root, for `docker build` context. Resolved from this file's +# location: claude_bottle/backend/docker/cred_proxy.py → repo root. +_REPO_DIR = str(Path(__file__).resolve().parent.parent.parent.parent) + + +def cred_proxy_container_name(slug: str) -> str: + return f"claude-bottle-cred-proxy-{slug}" + + +def cred_proxy_url() -> str: + """Base URL the agent dials. Stable across bottles because the + sidecar attaches `--network-alias cred-proxy` on the internal + network; the container name (which carries the slug) is not + referenced by agent-side config.""" + return f"http://{CRED_PROXY_HOSTNAME}:{CRED_PROXY_PORT}" + + +def build_cred_proxy_image() -> None: + """Build the cred-proxy image from `Dockerfile.cred-proxy`. + Called by `DockerCredProxy.start`; exposed at module level so + integration tests can build it without running the full launch + pipeline.""" + docker_mod.build_image(CRED_PROXY_IMAGE, _REPO_DIR, dockerfile=CRED_PROXY_DOCKERFILE) + + +class DockerCredProxy(CredProxy): + """Brings the cred-proxy sidecar up and down via Docker.""" + + def start(self, plan: CredProxyPlan) -> str: + """Boot the cred-proxy sidecar: + 1. Resolve every host TokenRef env var into a concrete + value. Fails early if any are unset. + 2. Build the cred-proxy image (no-op when cache is hot). + 3. `docker create` on the internal network with + `--network-alias cred-proxy` and one `-e CRED_PROXY_TOKEN_N` + flag per route. The values arrive via subprocess env, so + they never land on argv. + 4. `docker cp` the routes.json into the container. + 5. Attach to the per-agent egress network so the proxy can + reach the real upstream over HTTPS. + 6. `docker start`. + Returns the container name (the target passed to `.stop`).""" + if not plan.routes: + die("DockerCredProxy.start called with no routes; caller should skip") + if not plan.internal_network or not plan.egress_network: + die( + "DockerCredProxy.start: internal_network / egress_network must be " + "populated on the plan before start" + ) + if not plan.routes_path.is_file(): + die( + f"cred-proxy routes file missing at {plan.routes_path}; " + f"CredProxy.prepare must run first" + ) + # pipelock fields are populated by launch.py in production; both + # must be present (URL + CA) or both absent. Mixing is a wiring + # bug. Both-absent is supported only as a test escape hatch: + # the integration tests in tests/integration/ exercise header + # injection in isolation and do not bring pipelock up. + route_via_pipelock = bool(plan.pipelock_proxy_url) or plan.pipelock_ca_host_path != Path() + if route_via_pipelock: + if not plan.pipelock_proxy_url: + die( + "DockerCredProxy.start: pipelock_ca_host_path is set but " + "pipelock_proxy_url is empty; populate both or neither." + ) + if not plan.pipelock_ca_host_path.is_file(): + die( + f"DockerCredProxy.start: pipelock CA missing at " + f"{plan.pipelock_ca_host_path}; pipelock_tls_init must run first" + ) + + # Resolve host env vars into concrete values. This must + # happen at start time (not prepare) — the values flow into + # the sidecar's environ via subprocess env. The plan never + # holds them. + token_values = cred_proxy_resolve_token_values(plan.token_env_map, dict(os.environ)) + + build_cred_proxy_image() + + name = cred_proxy_container_name(plan.slug) + info(f"starting cred-proxy sidecar {name} on network {plan.internal_network}") + + create_args = [ + "docker", "create", + "--name", name, + "--network", plan.internal_network, + "--network-alias", CRED_PROXY_HOSTNAME, + ] + if route_via_pipelock: + # Route cred-proxy's outbound HTTPS through pipelock so + # the egress allowlist + DLP body scanner apply to its + # traffic. Pipelock MITMs each handshake with the + # per-bottle CA we docker cp in below. + create_args.extend([ + "-e", f"HTTPS_PROXY={plan.pipelock_proxy_url}", + "-e", f"HTTP_PROXY={plan.pipelock_proxy_url}", + "-e", "NO_PROXY=localhost,127.0.0.1", + ]) + # One -e flag per token slot; values arrive via subprocess env. + # docker create with `-e NAME` (no =VALUE) reads NAME from the + # current process env at create time. We pass `env=child_env` + # to subprocess.run so the value comes from token_values, not + # the host's os.environ directly — keeps the resolver in one + # place and lets cred_proxy_resolve_token_values surface + # missing-env errors with a clear hint. + for token_env in sorted(plan.token_env_map.keys()): + create_args.extend(["-e", token_env]) + create_args.append(CRED_PROXY_IMAGE) + + child_env: dict[str, str] = {**os.environ, **token_values} + + create_result = subprocess.run( + create_args, capture_output=True, text=True, env=child_env, check=False, + ) + if create_result.returncode != 0: + die( + f"failed to create cred-proxy sidecar {name}: " + f"{create_result.stderr.strip()}" + ) + + cps: list[tuple[str, str, str]] = [ + (str(plan.routes_path), CRED_PROXY_ROUTES_IN_CONTAINER, "routes.json"), + ] + if route_via_pipelock: + # CA must land BEFORE `docker start` so the entrypoint's + # update-ca-certificates picks it up. Docker cp's the + # file in even on the stopped container — that's the + # whole reason this works without a custom build step. + cps.append(( + str(plan.pipelock_ca_host_path), + CRED_PROXY_PIPELOCK_CA_IN_CONTAINER, + "pipelock CA", + )) + for src, dst, label in cps: + cp_result = subprocess.run( + ["docker", "cp", src, f"{name}:{dst}"], + capture_output=True, + text=True, + check=False, + ) + if cp_result.returncode != 0: + subprocess.run( + ["docker", "rm", "-f", name], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ) + die( + f"failed to copy {label} into {name}: " + f"{cp_result.stderr.strip()}" + ) + + connect_result = subprocess.run( + ["docker", "network", "connect", plan.egress_network, name], + capture_output=True, text=True, check=False, + ) + if connect_result.returncode != 0: + subprocess.run( + ["docker", "rm", "-f", name], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ) + die( + f"failed to attach cred-proxy sidecar {name} to egress network " + f"{plan.egress_network}: {connect_result.stderr.strip()}" + ) + + start_result = subprocess.run( + ["docker", "start", name], capture_output=True, text=True, check=False, + ) + if start_result.returncode != 0: + subprocess.run( + ["docker", "rm", "-f", name], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ) + die( + f"failed to start cred-proxy sidecar {name}: " + f"{start_result.stderr.strip()}" + ) + + return name + + def stop(self, target: str) -> None: + """Idempotent: missing container is success. `target` is the + container name returned by `.start`.""" + if subprocess.run( + ["docker", "inspect", target], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ).returncode == 0: + if subprocess.run( + ["docker", "rm", "-f", target], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ).returncode != 0: + warn( + f"failed to remove cred-proxy sidecar {target}; " + f"clean up with 'docker rm -f {target}'" + ) diff --git a/claude_bottle/backend/docker/git_gate.py b/claude_bottle/backend/docker/git_gate.py index 5ad312c..935968b 100644 --- a/claude_bottle/backend/docker/git_gate.py +++ b/claude_bottle/backend/docker/git_gate.py @@ -110,13 +110,14 @@ class DockerGitGate(GitGate): for host, ip in git_gate_aggregate_extra_hosts(plan.upstreams).items(): create_args.extend(["--add-host", f"{host}:{ip}"]) create_args.append(GIT_GATE_IMAGE) - if subprocess.run( - create_args, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - check=False, - ).returncode != 0: - die(f"failed to create git-gate sidecar {name}") + create_result = subprocess.run( + create_args, capture_output=True, text=True, check=False, + ) + if create_result.returncode != 0: + die( + f"failed to create git-gate sidecar {name}: " + f"{create_result.stderr.strip()}" + ) # Order matters: entrypoint + hook first so they're present # when docker start fires. Per-upstream creds afterwards. @@ -166,12 +167,11 @@ class DockerGitGate(GitGate): f"{cp_result.stderr.strip()}" ) - if subprocess.run( + connect_result = subprocess.run( ["docker", "network", "connect", plan.egress_network, name], - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - check=False, - ).returncode != 0: + capture_output=True, text=True, check=False, + ) + if connect_result.returncode != 0: subprocess.run( ["docker", "rm", "-f", name], stdout=subprocess.DEVNULL, @@ -180,22 +180,23 @@ class DockerGitGate(GitGate): ) die( f"failed to attach git-gate sidecar {name} to egress network " - f"{plan.egress_network}" + f"{plan.egress_network}: {connect_result.stderr.strip()}" ) - if subprocess.run( - ["docker", "start", name], - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - check=False, - ).returncode != 0: + start_result = subprocess.run( + ["docker", "start", name], capture_output=True, text=True, check=False, + ) + if start_result.returncode != 0: subprocess.run( ["docker", "rm", "-f", name], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False, ) - die(f"failed to start git-gate sidecar {name}") + die( + f"failed to start git-gate sidecar {name}: " + f"{start_result.stderr.strip()}" + ) return name diff --git a/claude_bottle/backend/docker/launch.py b/claude_bottle/backend/docker/launch.py index c1575bc..c59fb7f 100644 --- a/claude_bottle/backend/docker/launch.py +++ b/claude_bottle/backend/docker/launch.py @@ -18,12 +18,20 @@ from pathlib import Path from typing import Callable, Generator from ...log import die, info +from ...pipelock import pipelock_build_config, pipelock_render_yaml from . import network as network_mod from . import util as docker_mod from .bottle import DockerBottle from .bottle_plan import DockerBottlePlan +from .cred_proxy import DockerCredProxy from .git_gate import DockerGitGate -from .pipelock import DockerPipelockProxy, pipelock_proxy_url, pipelock_tls_init +from .pipelock import ( + PIPELOCK_CA_CERT_IN_CONTAINER, + PIPELOCK_CA_KEY_IN_CONTAINER, + DockerPipelockProxy, + pipelock_proxy_url, + pipelock_tls_init, +) from .provision.ca import AGENT_CA_BUNDLE, AGENT_CA_PATH @@ -37,6 +45,7 @@ def launch( *, proxy: DockerPipelockProxy, git_gate: DockerGitGate, + cred_proxy: DockerCredProxy, provision: Callable[[DockerBottlePlan, str], str | None], ) -> Generator[DockerBottle, None, None]: """Build, launch, and provision a Docker bottle. Teardown on exit. @@ -66,6 +75,14 @@ def launch( egress_network = network_mod.network_create_egress(plan.slug) stack.callback(network_mod.network_remove, egress_network) + # Docker assigns a CIDR to the new internal network. Pipelock's + # SSRF guard otherwise rejects any destination resolving into + # RFC1918 space — which includes the cred-proxy / git-gate / + # pipelock sidecars themselves. Allowlist the bottle's own + # internal subnet so the agent can reach its sidecars via + # pipelock; api_allowlist + body-scanning still apply. + internal_cidr = network_mod.network_inspect_cidr(internal_network) + # Per-bottle ephemeral CA for pipelock's TLS interception # (PRD 0006). One-shot pipelock container writes ca.pem + # ca-key.pem under plan.stage_dir; .start docker-cp's them @@ -73,9 +90,25 @@ def launch( # stage dir, which start.py's outer finally `shutil.rmtree`s # after the sidecar is torn down. ca_cert_host, ca_key_host = pipelock_tls_init(plan.stage_dir) + + # Re-render the pipelock yaml with the SSRF allowlist now that + # we know the internal CIDR. Prepare wrote the yaml without + # the ssrf block (CIDR wasn't known yet); overwrite the same + # path so .start docker-cp's the updated content. + bottle = plan.spec.manifest.bottle_for(plan.spec.agent_name) + cfg = pipelock_build_config( + bottle, + ca_cert_path=PIPELOCK_CA_CERT_IN_CONTAINER, + ca_key_path=PIPELOCK_CA_KEY_IN_CONTAINER, + ssrf_ip_allowlist=(internal_cidr,), + ) + plan.proxy_plan.yaml_path.write_text(pipelock_render_yaml(cfg)) + plan.proxy_plan.yaml_path.chmod(0o600) + proxy_plan = dataclasses.replace( plan.proxy_plan, internal_network=internal_network, + internal_network_cidr=internal_cidr, egress_network=egress_network, ca_cert_host_path=ca_cert_host, ca_key_host_path=ca_key_host, @@ -102,6 +135,27 @@ def launch( git_gate_name = git_gate.start(plan.git_gate_plan) stack.callback(git_gate.stop, git_gate_name) + # Cred-proxy (PRD 0010). One sidecar per bottle when + # bottle.cred_proxy.routes is non-empty. Must come up AFTER pipelock + # — cred-proxy routes its outbound HTTPS through pipelock + # (HTTPS_PROXY in environ + the per-bottle CA in its trust + # store) so the egress allowlist + body scanner sit in the + # cred-proxy path too. Must come up BEFORE the agent so DNS + # resolution for `cred-proxy` succeeds on the agent's first + # call; tokens flow from the host env into the sidecar's + # environ, not the agent's. + if plan.cred_proxy_plan.routes: + cred_proxy_plan = dataclasses.replace( + plan.cred_proxy_plan, + internal_network=internal_network, + egress_network=egress_network, + pipelock_ca_host_path=ca_cert_host, + pipelock_proxy_url=pipelock_proxy_url(plan.slug), + ) + plan = dataclasses.replace(plan, cred_proxy_plan=cred_proxy_plan) + cred_proxy_name = cred_proxy.start(plan.cred_proxy_plan) + stack.callback(cred_proxy.stop, cred_proxy_name) + container = _run_agent_container(plan, internal_network) stack.callback(docker_mod.force_remove_container, container) diff --git a/claude_bottle/backend/docker/network.py b/claude_bottle/backend/docker/network.py index 1d082d8..9cc0981 100644 --- a/claude_bottle/backend/docker/network.py +++ b/claude_bottle/backend/docker/network.py @@ -81,6 +81,29 @@ def network_create_egress(slug: str) -> str: return _network_create_with_prefix(network_egress_name_for_slug(slug), internal=False) +def network_inspect_cidr(name: str) -> str: + """Return the IPv4 CIDR Docker assigned to a user-defined network. + + Used by pipelock's SSRF guard exception: the bottle's internal + network sits in RFC1918 space, so pipelock's `internal:` list + would block any agent request whose destination resolves there + — including the cred-proxy sidecar's address. Adding the + network's CIDR to pipelock's `ssrf.ip_allowlist` lets traffic + targeted at the bottle's own sidecars through while pipelock + still body-scans and api_allowlist-gates as usual.""" + result = subprocess.run( + ["docker", "network", "inspect", + "--format", "{{range .IPAM.Config}}{{.Subnet}}{{end}}", name], + capture_output=True, text=True, check=False, + ) + if result.returncode != 0: + die(f"docker network inspect {name} failed: {result.stderr.strip()}") + cidr = result.stdout.strip() + if not cidr: + die(f"network {name!r} has no IPAM subnet configured") + return cidr + + def network_attach(network: str, container: str) -> None: result = subprocess.run( ["docker", "network", "connect", network, container], diff --git a/claude_bottle/backend/docker/pipelock.py b/claude_bottle/backend/docker/pipelock.py index 7359da4..89ce9f2 100644 --- a/claude_bottle/backend/docker/pipelock.py +++ b/claude_bottle/backend/docker/pipelock.py @@ -110,8 +110,14 @@ class DockerPipelockProxy(PipelockProxy): "run", "--config", "/etc/pipelock.yaml", "--listen", f"0.0.0.0:{PIPELOCK_PORT}", ] - if subprocess.run(create_args, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False).returncode != 0: - die(f"failed to create pipelock sidecar {name}") + create_result = subprocess.run( + create_args, capture_output=True, text=True, check=False, + ) + if create_result.returncode != 0: + die( + f"failed to create pipelock sidecar {name}: " + f"{create_result.stderr.strip()}" + ) for src, dst, label in ( (plan.yaml_path, "/etc/pipelock.yaml", "yaml"), @@ -131,23 +137,32 @@ class DockerPipelockProxy(PipelockProxy): ) die(f"failed to copy pipelock {label} into {name}: {cp_result.stderr.strip()}") - if subprocess.run( + connect_result = subprocess.run( ["docker", "network", "connect", plan.egress_network, name], - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - check=False, - ).returncode != 0: - subprocess.run(["docker", "rm", "-f", name], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False) - die(f"failed to attach pipelock sidecar {name} to egress network {plan.egress_network}") + capture_output=True, text=True, check=False, + ) + if connect_result.returncode != 0: + subprocess.run( + ["docker", "rm", "-f", name], + stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False, + ) + die( + f"failed to attach pipelock sidecar {name} to egress network " + f"{plan.egress_network}: {connect_result.stderr.strip()}" + ) - if subprocess.run( - ["docker", "start", name], - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - check=False, - ).returncode != 0: - subprocess.run(["docker", "rm", "-f", name], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False) - die(f"failed to start pipelock sidecar {name}") + start_result = subprocess.run( + ["docker", "start", name], capture_output=True, text=True, check=False, + ) + if start_result.returncode != 0: + subprocess.run( + ["docker", "rm", "-f", name], + stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False, + ) + die( + f"failed to start pipelock sidecar {name}: " + f"{start_result.stderr.strip()}" + ) return name diff --git a/claude_bottle/backend/docker/prepare.py b/claude_bottle/backend/docker/prepare.py index 074d8d7..8c23f38 100644 --- a/claude_bottle/backend/docker/prepare.py +++ b/claude_bottle/backend/docker/prepare.py @@ -19,8 +19,13 @@ from ...log import die from .. import BottleSpec from . import util as docker_mod from .bottle_plan import DockerBottlePlan -from .git_gate import DockerGitGate -from .pipelock import DockerPipelockProxy +from .cred_proxy import ( + DockerCredProxy, + cred_proxy_container_name, + cred_proxy_url, +) +from .git_gate import DockerGitGate, git_gate_container_name +from .pipelock import DockerPipelockProxy, pipelock_container_name def resolve_plan( @@ -29,6 +34,7 @@ def resolve_plan( stage_dir: Path, proxy: DockerPipelockProxy, git_gate: DockerGitGate, + cred_proxy: DockerCredProxy, ) -> DockerBottlePlan: """Resolve Docker-specific names and write scratch files. Trusts that the agent and its skills/git-gate keys are present — @@ -74,6 +80,29 @@ def resolve_plan( f"clean up old containers with 'docker rm -f '" ) + # Probe sidecar container names for orphans from a previous run. + # Sidecar names are deterministic from the slug; an orphan would + # surface as a docker-create conflict deep inside launch() with no + # actionable hint. Fail fast here with a cleanup pointer instead. + # Only probe sidecars this launch will actually try to create: + # pipelock always; git-gate when bottle.git is non-empty; cred-proxy + # when bottle.cred_proxy.routes is non-empty. + sidecar_probes: list[tuple[str, str]] = [ + ("pipelock", pipelock_container_name(slug)), + ] + if bottle.git: + sidecar_probes.append(("git-gate", git_gate_container_name(slug))) + if bottle.cred_proxy.routes: + sidecar_probes.append(("cred-proxy", cred_proxy_container_name(slug))) + for label, sidecar_name in sidecar_probes: + if docker_mod.container_exists(sidecar_name): + die( + f"{label} sidecar container '{sidecar_name}' already exists. " + f"This is an orphan from a previous run; clean it up with " + f"'./cli.py cleanup' (or 'docker rm -f {sidecar_name}') and " + f"retry." + ) + env_file = stage_dir / "agent.env" prompt_file = stage_dir / "prompt.txt" prompt_file.write_text("") @@ -81,14 +110,44 @@ def resolve_plan( proxy_plan = proxy.prepare(bottle, slug, stage_dir) git_gate_plan = git_gate.prepare(bottle, slug, stage_dir) + cred_proxy_plan = cred_proxy.prepare(bottle, slug, stage_dir) resolved = resolve_env(manifest, spec.agent_name) # Everything that should reach the bottle by-name (so its value - # never lands on argv or in env_file) goes into one dict. The - # rename from CLAUDE_BOTTLE_OAUTH_TOKEN to CLAUDE_CODE_OAUTH_TOKEN - # happens here; nothing mutates the host os.environ. + # never lands on argv or in env_file) goes into one dict. Nothing + # mutates the host os.environ. forwarded_env: dict[str, str] = dict(resolved.forwarded) - if spec.forward_oauth_token: - forwarded_env["CLAUDE_CODE_OAUTH_TOKEN"] = os.environ["CLAUDE_BOTTLE_OAUTH_TOKEN"] + # Find the (at most one) cred-proxy route claiming the + # anthropic-base-url role. Manifest validation enforces the + # singleton constraint. cred-proxy is the only path the Anthropic + # OAuth token reaches the bottle — there is no fallback that + # forwards it into the agent's environ directly. Bottles that + # need claude-code to authenticate must declare an + # anthropic-base-url route. + anthropic_route = next( + (r for r in cred_proxy_plan.routes if "anthropic-base-url" in r.roles), + None, + ) + if anthropic_route is not None: + # Point claude-code at the cred-proxy. The sidecar holds the + # OAuth token; the agent's environ does not. Strip the + # trailing slash so claude-code's path-join produces e.g. + # http://cred-proxy:9099/anthropic/v1/messages. + forwarded_env["ANTHROPIC_BASE_URL"] = ( + f"{cred_proxy_url()}{anthropic_route.path}".rstrip("/") + ) + # claude-code refuses to start without *some* credential in + # its env. The proxy strips inbound Authorization on every + # request and injects the real one — so a non-secret + # placeholder is sufficient and the SC1 test still holds + # (the placeholder is not a `cred_proxy.routes[].TokenRef` + # value). The agent cannot exfiltrate this string because + # it carries no meaning to api.anthropic.com. + forwarded_env["CLAUDE_CODE_OAUTH_TOKEN"] = "cred-proxy-placeholder" + # Belt-and-braces: turn off telemetry endpoints that don't + # route through ANTHROPIC_BASE_URL (statsig, error reporting). + # PRD 0010 open question default. + forwarded_env.setdefault("CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC", "1") + forwarded_env.setdefault("DISABLE_ERROR_REPORTING", "1") _write_env_file(resolved, env_file) prompt_file.write_text(agent.prompt) @@ -109,6 +168,7 @@ def resolve_plan( prompt_file=prompt_file, proxy_plan=proxy_plan, git_gate_plan=git_gate_plan, + cred_proxy_plan=cred_proxy_plan, allowlist_summary=allowlist_summary, use_runsc=use_runsc, ) diff --git a/claude_bottle/backend/docker/provision/cred_proxy.py b/claude_bottle/backend/docker/provision/cred_proxy.py new file mode 100644 index 0000000..53da4ea --- /dev/null +++ b/claude_bottle/backend/docker/provision/cred_proxy.py @@ -0,0 +1,238 @@ +"""Cred-proxy provisioning inside a running Docker bottle (PRD 0010). + +Writes the agent-side configuration that points each tool at the +per-bottle cred-proxy sidecar: + + - ~/.npmrc — `registry=` pointing at /npm/ + - ~/.gitconfig (appended) — `insteadOf` rules for the + github / gitea hosts the bottle + declared a token for + - ~/.config/tea/config.yml — per-gitea login pointing at + /gitea// + +The ANTHROPIC_BASE_URL env var is set at `docker run -e` time by the +backend's launch step, not here — it has to be in the agent's environ +before claude starts, and there is no point in writing it to a dotfile +the agent would have to source. See `prepare.py` for that. +""" + +from __future__ import annotations + +import os +import subprocess +from pathlib import Path + +from ....cred_proxy import CredProxyRoute +from ....log import info +from .. import util as docker_mod +from ..bottle_plan import DockerBottlePlan +from ..cred_proxy import cred_proxy_url + + +def provision_cred_proxy(plan: DockerBottlePlan, target: str) -> None: + """Drop the agent-side dotfiles for each declared cred-proxy + route. No-op when the bottle has no routes.""" + routes = plan.cred_proxy_plan.routes + if not routes: + return + bottle = plan.spec.manifest.bottle_for(plan.spec.agent_name) + git_gate_hosts = {g.UpstreamHost for g in bottle.git} + _provision_npmrc(plan, target, routes) + _provision_gitconfig(plan, target, routes, git_gate_hosts) + _provision_tea_config(plan, target, routes) + + +# --- npm -------------------------------------------------------------------- + + +def render_npmrc(routes: tuple[CredProxyRoute, ...]) -> str: + """Render `~/.npmrc` content. Driven by the `npm-registry` role: + finds the (single) route that claims it and writes a registry= + line at the proxy. Empty string when no such route exists, so + callers can branch on emptiness. + + The proxy strips inbound Authorization and injects its own — the + npmrc deliberately carries no `_authToken`. The registry alone + is enough. Manifest validation enforces that the role is a + singleton, so the first match is the only match.""" + for r in routes: + if "npm-registry" in r.roles: + return f"registry={cred_proxy_url()}{r.path}\n" + return "" + + +def _provision_npmrc( + plan: DockerBottlePlan, + target: str, + routes: tuple[CredProxyRoute, ...], +) -> None: + content = render_npmrc(routes) + if not content: + return + container_home = os.environ.get("CLAUDE_BOTTLE_CONTAINER_HOME", "/home/node") + container_npmrc = f"{container_home}/.npmrc" + npmrc = plan.stage_dir / "agent_npmrc" + npmrc.write_text(content) + npmrc.chmod(0o600) + info(f"writing {container_npmrc} (cred-proxy npm registry)") + subprocess.run( + ["docker", "cp", str(npmrc), f"{target}:{container_npmrc}"], + stdout=subprocess.DEVNULL, + check=True, + ) + docker_mod.docker_exec_root(target, ["chown", "node:node", container_npmrc]) + docker_mod.docker_exec_root(target, ["chmod", "644", container_npmrc]) + + +# --- git config ------------------------------------------------------------- + + +def render_cred_proxy_gitconfig( + routes: tuple[CredProxyRoute, ...], + git_gate_hosts: set[str] = frozenset(), # type: ignore[assignment] +) -> str: + """Render the `~/.gitconfig` fragment for cred-proxy insteadOf + rewrites. Driven by the `git-insteadof` role: each route that + claims it produces a `[url ""] insteadOf = + /` block. Empty string when no such route exists. + + The rewrite is suppressed for any route whose upstream host is + also declared in `bottle.git`. git-gate is the canonical git + path on those hosts — its pre-receive runs gitleaks before + forwarding the push. A cred-proxy `https:///` rewrite + would route HTTPS git ops around the gate. cred-proxy still + refuses smart-HTTP push at runtime (defense in depth), but + suppressing the rewrite means `git clone https:///...` + doesn't have a tempting shortcut that just confuses on push. + + The insteadOf left-hand side comes from `route.upstream` (with a + trailing `/` so insteadOf matches at the directory boundary), + so the same renderer handles github.com, gitea.dideric.is, and + any future host the user wires up.""" + rules: list[str] = [] + for r in routes: + if "git-insteadof" not in r.roles: + continue + # Strip scheme to derive the host for the git-gate overlap + # check. urllib.parse-free parse: same shape we accept in + # manifest validation. + host = r.upstream.removeprefix("https://").partition("/")[0].partition(":")[0] + if host in git_gate_hosts: + continue + rules.append( + f'[url "{cred_proxy_url()}{r.path}"]\n' + f"\tinsteadOf = {r.upstream}/\n" + ) + if not rules: + return "" + return ( + "# claude-bottle cred-proxy (PRD 0010): rewrite https:/// to\n" + "# the per-bottle cred-proxy sidecar, which holds the upstream\n" + "# credential and injects the Authorization header.\n" + + "".join(rules) + ) + + +def _provision_gitconfig( + plan: DockerBottlePlan, + target: str, + routes: tuple[CredProxyRoute, ...], + git_gate_hosts: set[str], +) -> None: + """Append the cred-proxy insteadOf rules to ~/.gitconfig. Runs + after `provision_git`, so any git-gate rules already live in the + file; we append rather than overwrite. Hosts already brokered by + git-gate are skipped — git-gate is the canonical git path there.""" + content = render_cred_proxy_gitconfig(routes, git_gate_hosts) + if not content: + return + container_home = os.environ.get("CLAUDE_BOTTLE_CONTAINER_HOME", "/home/node") + container_gitconfig = f"{container_home}/.gitconfig" + info(f"appending cred-proxy insteadOf rules to {container_gitconfig}") + # Use `tee -a` over stdin so the content never lands on argv and the + # append is atomic from the agent's perspective. `tee` runs as the + # node user (the default in the container) so ownership is preserved. + result = subprocess.run( + ["docker", "exec", "-i", target, "tee", "-a", container_gitconfig], + input=content, + text=True, + capture_output=True, + check=False, + ) + if result.returncode != 0: + # Fall back to root-tee in case ~/.gitconfig didn't exist as the + # node user yet (no git-gate rules were written). The chown + # below makes ownership consistent. + result_root = subprocess.run( + ["docker", "exec", "-i", "-u", "0", target, + "tee", "-a", container_gitconfig], + input=content, + text=True, + capture_output=True, + check=True, + ) + _ = result_root # silence unused + docker_mod.docker_exec_root(target, ["chown", "node:node", container_gitconfig]) + docker_mod.docker_exec_root(target, ["chmod", "644", container_gitconfig]) + + +# --- tea -------------------------------------------------------------------- + + +def render_tea_config(routes: tuple[CredProxyRoute, ...]) -> str: + """Render `~/.config/tea/config.yml`. Driven by the `tea-login` + role: each route that claims it produces one `logins:` entry + pointing at the cred-proxy. The proxy substitutes the real + token at request time; the value in `token:` here is a + placeholder. `tea` refuses to make calls without a non-empty + token field, so the placeholder is necessary.""" + tea_routes = [r for r in routes if "tea-login" in r.roles] + if not tea_routes: + return "" + lines = ["logins:"] + for r in tea_routes: + # Derive a stable login name from the upstream host. The + # path may not encode the host (e.g. `/gitea/dideric/` vs + # upstream gitea.dideric.is), so we read it off `upstream`. + host = r.upstream.removeprefix("https://").partition("/")[0].partition(":")[0] + lines.extend([ + f"- name: {host}", + f" url: {cred_proxy_url()}{r.path}", + " token: cred-proxy-placeholder", + " default: false", + " ssh_host: \"\"", + " ssh_key: \"\"", + " insecure: false", + ]) + return "\n".join(lines) + "\n" + + +def _provision_tea_config( + plan: DockerBottlePlan, + target: str, + routes: tuple[CredProxyRoute, ...], +) -> None: + content = render_tea_config(routes) + if not content: + return + container_home = os.environ.get("CLAUDE_BOTTLE_CONTAINER_HOME", "/home/node") + container_tea = f"{container_home}/.config/tea/config.yml" + cfg = plan.stage_dir / "agent_tea_config.yml" + cfg.write_text(content) + cfg.chmod(0o600) + info( + f"writing {container_tea} " + f"({len([r for r in routes if 'tea-login' in r.roles])} tea login(s))" + ) + docker_mod.docker_exec_root( + target, ["mkdir", "-p", str(Path(container_tea).parent)] + ) + subprocess.run( + ["docker", "cp", str(cfg), f"{target}:{container_tea}"], + stdout=subprocess.DEVNULL, + check=True, + ) + docker_mod.docker_exec_root(target, [ + "chown", "-R", "node:node", str(Path(container_tea).parent), + ]) + docker_mod.docker_exec_root(target, ["chmod", "600", container_tea]) diff --git a/claude_bottle/cli/start.py b/claude_bottle/cli/start.py index 585bd75..a98e330 100644 --- a/claude_bottle/cli/start.py +++ b/claude_bottle/cli/start.py @@ -42,7 +42,6 @@ def cmd_start(argv: list[str]) -> int: agent_name=args.name, copy_cwd=args.cwd, user_cwd=USER_CWD, - forward_oauth_token=bool(os.environ.get("CLAUDE_BOTTLE_OAUTH_TOKEN")), ) stage_dir = Path(tempfile.mkdtemp(prefix="claude-bottle-stage.")) diff --git a/claude_bottle/cred_proxy.py b/claude_bottle/cred_proxy.py new file mode 100644 index 0000000..0856d85 --- /dev/null +++ b/claude_bottle/cred_proxy.py @@ -0,0 +1,268 @@ +"""Per-bottle credential proxy (PRD 0010). + +A fourth per-bottle sidecar that holds API tokens (Anthropic OAuth, +GitHub PAT, Gitea PAT, npm token) and injects them as `Authorization` +headers on the agent's behalf. The agent's environ carries only URLs +pointing at `cred-proxy:/`; the upstream credentials live +exclusively in the cred-proxy container's environ. + +The boundary is the container line — different PID, mount, and network +namespaces separate the agent's container from the cred-proxy's, so +the agent cannot ptrace into the proxy, cannot read its environ via +/proc, and cannot share memory. Reaching the proxy's environ requires +escaping the agent container, the same threshold pipelock and +git-gate already rely on. + +This module defines the abstract proxy (`CredProxy`), its plan +dataclass (`CredProxyPlan`), and the resolved per-route shape +(`CredProxyRoute`). The sidecar's start/stop lifecycle is backend- +specific and lives on concrete subclasses (see +`claude_bottle/backend/docker/cred_proxy.py`). +""" + +from __future__ import annotations + +import json +from abc import ABC, abstractmethod +from dataclasses import dataclass +from pathlib import Path + +from .log import die +from .manifest import Bottle + + +# DNS name agents use to reach the per-bottle cred-proxy sidecar. +# Backend-agnostic by contract: every concrete backend (Docker today, +# others later) attaches this name to its sidecar on the bottle's +# internal network so the agent's manifest-driven URLs (`http:// +# cred-proxy:9099/...`) work without a backend-specific hostname. +# pipelock's allowlist also references this when adding the +# auto-allow entry for cred-proxy traffic from the agent. +CRED_PROXY_HOSTNAME = "cred-proxy" + + +@dataclass(frozen=True) +class CredProxyRoute: + """One resolved route on the cred-proxy sidecar. Maps a path + under the proxy to a real upstream, an auth scheme, an + in-container env-var slot, and optional provisioner roles. + + Distinct from `manifest.CredProxyRoute` (the declaration shape + with Capitalize fields): this is the runtime view after the + abstract `CredProxy.prepare` step assigns token slots and + normalizes URLs. Modules that need both alias one on import. + + `path` is the agent-facing prefix (e.g. `/anthropic/`). + `upstream` is the upstream base URL with scheme. `auth_scheme` + is the literal word that precedes the token in the injected + header (`Bearer` for most upstreams; `token` for Gitea — + sidesteps go-gitea/gitea#16734). + + `token_env` is the env-var name inside the cred-proxy container + (e.g. `CRED_PROXY_TOKEN_0`); `token_ref` is the host env var the + CLI reads at launch and forwards into the container's environ + under `token_env`. Routes that share a `token_ref` coalesce to + one `token_env` slot. + + `roles` are the provisioner tags from the manifest route (see + `manifest.CRED_PROXY_ROLES`). Each tag drives one agent-side + rewrite when this route's dotfile family is written.""" + + path: str + upstream: str + auth_scheme: str + token_env: str + token_ref: str + roles: tuple[str, ...] = () + + +@dataclass(frozen=True) +class CredProxyPlan: + """Output of CredProxy.prepare; consumed by .start. + + The slug + routes_path + routes + token_env_map fields are + filled at prepare time (host-side, side-effect-free on docker). + The network + pipelock fields are populated by the backend's + launch step via `dataclasses.replace` once those resources + exist. Empty defaults are sentinels meaning "not yet set"; + `.start` validates that they are populated. + + `token_env_map` is `{: }`. + The backend's start step reads `os.environ[token_ref]` and + forwards the value into the cred-proxy container's environ under + `token_env`. The plan itself never holds token values — secrets + never land in a dataclass that might be logged. + + `pipelock_ca_host_path` is the host path of the per-bottle CA + pipelock will present on bumped TLS handshakes; the cred-proxy + image's entrypoint runs `update-ca-certificates` over it so the + proxy's HTTPS client trusts pipelock's CA. `pipelock_proxy_url` + is the URL cred-proxy sets as `HTTPS_PROXY` in its environ so + outbound HTTPS traverses pipelock — making pipelock's body + scanner part of the cred-proxy egress path.""" + + slug: str + routes_path: Path + routes: tuple[CredProxyRoute, ...] + token_env_map: dict[str, str] + internal_network: str = "" + egress_network: str = "" + pipelock_ca_host_path: Path = Path() + pipelock_proxy_url: str = "" + + +def cred_proxy_routes_for_bottle( + bottle: Bottle, +) -> tuple[CredProxyRoute, ...]: + """Lift each `bottle.cred_proxy.routes[]` manifest entry into a + resolved CredProxyRoute. Order is preserved so route lookup at + the proxy is stable. + + Token-env slots are assigned per distinct `token_ref`: the first + route with `token_ref` "GH_PAT" gets `CRED_PROXY_TOKEN_0`; a + second route with the same `token_ref` shares slot 0. The launch + step forwards each `token_ref`'s value from the host environ into + the sidecar's environ under the matching slot name once. + + Manifest validation already enforced uniqueness rules (no + duplicate paths, singleton-role enforcement).""" + out: list[CredProxyRoute] = [] + slot_for_token: dict[str, str] = {} + for r in bottle.cred_proxy.routes: + token_env = slot_for_token.get(r.TokenRef) + if token_env is None: + token_env = f"CRED_PROXY_TOKEN_{len(slot_for_token)}" + slot_for_token[r.TokenRef] = token_env + out.append(CredProxyRoute( + path=r.Path, + upstream=r.Upstream.rstrip("/"), + auth_scheme=r.AuthScheme, + token_env=token_env, + token_ref=r.TokenRef, + roles=r.Role, + )) + return tuple(out) + + +def cred_proxy_token_env_map( + routes: tuple[CredProxyRoute, ...], +) -> dict[str, str]: + """Collapse the route list into `{token_env: token_ref}`. Two + routes that share a token (gh-api + gh-git) coalesce; the result + is the set of env vars the backend's start step must forward into + the sidecar's environ.""" + out: dict[str, str] = {} + for r in routes: + existing = out.get(r.token_env) + if existing is not None and existing != r.token_ref: + die( + f"cred-proxy plan conflict: {r.token_env} maps to both " + f"{existing!r} and {r.token_ref!r}. Two routes sharing a " + f"token slot must reference the same host env var." + ) + out[r.token_env] = r.token_ref + return out + + +def cred_proxy_render_routes( + routes: tuple[CredProxyRoute, ...], +) -> str: + """Serialize the route table for the cred-proxy server to read. + JSON, no token values, no host env-var names — the only thing + the proxy needs at runtime is the path → upstream + auth-scheme + + in-container env-var mapping. The actual token values arrive via + the container's environ.""" + payload = { + "routes": [ + { + "path": r.path, + "upstream": r.upstream, + "auth_scheme": r.auth_scheme, + "token_env": r.token_env, + } + for r in routes + ], + } + return json.dumps(payload, indent=2, sort_keys=False) + "\n" + + +def cred_proxy_resolve_token_values( + token_env_map: dict[str, str], + host_env: dict[str, str], +) -> dict[str, str]: + """Read `host_env[TokenRef]` for each entry in `token_env_map` and + return `{token_env: }`. Dies (with a clear pointer at the + missing var name) if any TokenRef is unset. + + Pure function: takes the host env as an argument so tests can pass + a sealed mapping without touching `os.environ`.""" + out: dict[str, str] = {} + for token_env, token_ref in token_env_map.items(): + value = host_env.get(token_ref) + if value is None: + die( + f"cred-proxy: host env var '{token_ref}' is unset. Set it " + f"before launching, or remove the corresponding route from " + f"bottle.cred_proxy.routes." + ) + if not value: + die( + f"cred-proxy: host env var '{token_ref}' is empty. The " + f"cred-proxy will not inject an empty token; set it to the " + f"real value or remove the route." + ) + out[token_env] = value + return out + + +class CredProxy(ABC): + """The per-bottle credential proxy. Encapsulates the host-side + prepare (route lift + routes.json render + token-env-map + derivation); the sidecar's start/stop lifecycle is backend- + specific and lives on concrete subclasses.""" + + def prepare(self, bottle: Bottle, slug: str, stage_dir: Path) -> CredProxyPlan: + """Lift `bottle.cred_proxy.routes` into resolved routes, + render the routes.json (mode 600) under `stage_dir`, and + return the plan. Pure host-side, no docker subprocess. The + token-env map records the mapping the launch step uses to + forward values from the host's environ into the sidecar's + environ. + + Returned plan is incomplete: the launch step must fill + `internal_network` / `egress_network` via `dataclasses.replace` + before passing it to `.start`.""" + routes = cred_proxy_routes_for_bottle(bottle) + routes_path = stage_dir / "cred_proxy_routes.json" + routes_path.write_text(cred_proxy_render_routes(routes)) + routes_path.chmod(0o600) + return CredProxyPlan( + slug=slug, + routes_path=routes_path, + routes=routes, + token_env_map=cred_proxy_token_env_map(routes), + ) + + @abstractmethod + def start(self, plan: CredProxyPlan) -> str: + """Bring up the cred-proxy sidecar according to `plan`. Returns + the target string identifying the running instance — the same + value to pass to `.stop`. Backend-specific.""" + + @abstractmethod + def stop(self, target: str) -> None: + """Tear down the cred-proxy sidecar identified by `target` (the + value `.start` returned). Idempotent: a missing target is + success. Backend-specific.""" + + +__all__ = [ + "CRED_PROXY_HOSTNAME", + "CredProxy", + "CredProxyPlan", + "CredProxyRoute", + "cred_proxy_render_routes", + "cred_proxy_resolve_token_values", + "cred_proxy_routes_for_bottle", + "cred_proxy_token_env_map", +] diff --git a/claude_bottle/cred_proxy_server.py b/claude_bottle/cred_proxy_server.py new file mode 100644 index 0000000..5fc1c8a --- /dev/null +++ b/claude_bottle/cred_proxy_server.py @@ -0,0 +1,447 @@ +"""Cred-proxy HTTP server (PRD 0010). + +Runs inside the per-bottle cred-proxy sidecar. Reads +`/run/cred-proxy/routes.json` (laid down by the backend's start step +via `docker cp`) and listens on `0.0.0.0:`. For each request: + + 1. Match the request path against the longest route prefix. + 2. Strip any inbound `Authorization` header (the agent cannot + smuggle a stolen token through this path). + 3. Inject the configured header using the value of the env var + named by the route's `token_env`. + 4. Forward to the upstream over HTTPS, preserving method, path + suffix, query string, request body, and the remaining headers. + 5. Stream the response back without buffering — SSE-safe. + +The agent talks plain HTTP to this server (loopback-equivalent across +the per-bottle internal docker network). The cred-proxy talks HTTPS +outbound through pipelock to the real upstream. Tokens live in this +container's environ; they never land in routes.json on disk and never +reach the agent's container. + +Stdlib-only: this file ships into a minimal Python image with no pip +install layer. The constants are duplicated from `cred_proxy.py` so +the server doesn't need to import the rest of the package. +""" + +from __future__ import annotations + +import http.client +import http.server +import json +import os +import socketserver +import sys +import typing +import urllib.parse +from dataclasses import dataclass + + +# --- Config / route table --------------------------------------------------- + + +@dataclass(frozen=True) +class Route: + """One row of the proxy's route table. + + `path` is the agent-facing prefix (e.g. `/anthropic/`); the + incoming request's path starts with this. `upstream_scheme` / + `upstream_host` / `upstream_base_path` are the parsed pieces of + the upstream URL — the request's path after the prefix is + appended to `upstream_base_path`. `auth_scheme` is the literal + word in the injected header (`Bearer` or `token`). `token_env` + is the env-var name this container reads to get the token.""" + + path: str + upstream_scheme: str + upstream_host: str + upstream_port: int + upstream_base_path: str + auth_scheme: str + token_env: str + + +def parse_routes(payload: dict[str, object]) -> tuple[Route, ...]: + """Parse the routes.json payload into a tuple of `Route`s. Sorted + by descending path length so longest-prefix match is the first + hit in iteration order.""" + raw = payload.get("routes") + if not isinstance(raw, list): + raise ValueError("routes.json: 'routes' must be a list") + out: list[Route] = [] + for r in raw: + if not isinstance(r, dict): + raise ValueError(f"routes.json: route must be an object (got {type(r).__name__})") + path = r["path"] + upstream = r["upstream"] + auth_scheme = r["auth_scheme"] + token_env = r["token_env"] + if not isinstance(path, str) or not path.startswith("/") or not path.endswith("/"): + raise ValueError(f"routes.json: path {path!r} must start and end with /") + if not isinstance(upstream, str): + raise ValueError("routes.json: upstream must be a string") + if not isinstance(auth_scheme, str): + raise ValueError("routes.json: auth_scheme must be a string") + if not isinstance(token_env, str) or not token_env: + raise ValueError("routes.json: token_env must be a non-empty string") + parsed = urllib.parse.urlsplit(upstream) + if parsed.scheme not in ("http", "https"): + raise ValueError(f"routes.json: upstream scheme must be http or https (got {parsed.scheme!r})") + if not parsed.hostname: + raise ValueError(f"routes.json: upstream {upstream!r} missing host") + port = parsed.port or (443 if parsed.scheme == "https" else 80) + base_path = parsed.path or "" + out.append(Route( + path=path, + upstream_scheme=parsed.scheme, + upstream_host=parsed.hostname, + upstream_port=port, + upstream_base_path=base_path, + auth_scheme=auth_scheme, + token_env=token_env, + )) + out.sort(key=lambda r: len(r.path), reverse=True) + return tuple(out) + + +def select_route(routes: typing.Sequence[Route], request_path: str) -> Route | None: + """Return the longest-prefix matching route, or None. Caller is + responsible for stripping any query string before passing + `request_path`.""" + for r in routes: + if request_path.startswith(r.path): + return r + return None + + +def is_git_push_request(path: str, query: str) -> bool: + """Return True if the request is a git smart-HTTP push. + + git push over HTTPS hits two endpoints: + GET /info/refs?service=git-receive-pack (capabilities) + POST /git-receive-pack (the push) + + Fetches use `service=git-upload-pack` / `/git-upload-pack` and are + not blocked. cred-proxy refuses push because git-gate's pre-receive + gitleaks scan is the gate for outbound git data; routing push + through cred-proxy would bypass that. Use the bottle.git SSH path + if you need to push. + """ + if path.endswith("/git-receive-pack"): + return True + if path.endswith("/info/refs"): + # Query string is parsed leniently — `service=git-receive-pack` + # may appear with other params in any order. + for pair in query.split("&"): + k, _, v = pair.partition("=") + if k == "service" and v == "git-receive-pack": + return True + return False + + +# --- Header handling -------------------------------------------------------- + + +# Hop-by-hop headers (RFC 7230 §6.1). Stripped before forwarding. +# Plus `host` (we set it for the upstream) and any `authorization` / +# `proxy-authorization` (the proxy injects its own, never forwards +# the agent's). +_HOP_BY_HOP = frozenset({ + "connection", + "keep-alive", + "proxy-authenticate", + "proxy-authorization", + "te", + "trailers", + "transfer-encoding", + "upgrade", +}) + +# Strip the agent's Accept-Encoding on the upstream leg and force +# `identity` instead. The response then flows back uncompressed, +# which lets pipelock's response scanner read the body — pipelock +# 2.3.0 has no decompression path and otherwise blocks with +# "compressed sse_stream response cannot be scanned". The cost is +# bandwidth from upstream; for LLM SSE streams this is negligible +# and the DLP coverage on the agent leg is the win. +_STRIPPED = _HOP_BY_HOP | frozenset({ + "host", "authorization", "content-length", "accept-encoding", +}) + + +def build_forward_headers( + incoming: typing.Iterable[tuple[str, str]], + *, + auth_scheme: str, + token: str, + upstream_host: str, +) -> list[tuple[str, str]]: + """Build the header list to send upstream. + + - Strip hop-by-hop headers, the inbound Authorization (the agent + cannot smuggle a stolen token), and Host (we set it ourselves). + - Strip Content-Length too: http.client recomputes it when we + pass `body` to `request()`. + - Honor the `Connection: close, x, y, z` form by also stripping + every listed header name. + - Inject `Authorization: ` and a Host header + pointing at the upstream. + - Force `Accept-Encoding: identity` so the upstream returns + uncompressed bytes — pipelock's response scanner can't read + gzip/br/deflate and would otherwise 403 the response. + """ + incoming_list = list(incoming) + # Headers listed in `Connection:` are also hop-by-hop for this hop. + extra_hop: set[str] = set() + for name, value in incoming_list: + if name.lower() == "connection": + for token_name in value.split(","): + extra_hop.add(token_name.strip().lower()) + forwarded: list[tuple[str, str]] = [] + for name, value in incoming_list: + lname = name.lower() + if lname in _STRIPPED or lname in extra_hop: + continue + forwarded.append((name, value)) + forwarded.append(("Host", upstream_host)) + forwarded.append(("Authorization", f"{auth_scheme} {token}")) + forwarded.append(("Accept-Encoding", "identity")) + return forwarded + + +def filter_response_headers( + incoming: typing.Iterable[tuple[str, str]], +) -> list[tuple[str, str]]: + """Build the response header list to send back to the agent. + Strip hop-by-hop + `transfer-encoding` (we let the client's + HTTP/1.1 default chunking handle streamed bodies).""" + incoming_list = list(incoming) + extra_hop: set[str] = set() + for name, value in incoming_list: + if name.lower() == "connection": + for token_name in value.split(","): + extra_hop.add(token_name.strip().lower()) + out: list[tuple[str, str]] = [] + for name, value in incoming_list: + lname = name.lower() + if lname in _HOP_BY_HOP or lname in extra_hop: + continue + out.append((name, value)) + return out + + +# --- HTTP handler ----------------------------------------------------------- + + +# How many bytes to read off the upstream response per chunk. Small +# enough that SSE keep-alive `:` lines (~1 byte) and per-event payloads +# (~hundreds of bytes) round-trip without waiting for a larger buffer +# to fill. Large enough to not dominate syscall overhead under load. +STREAM_CHUNK = 4096 + + +class CredProxyHandler(http.server.BaseHTTPRequestHandler): + """Per-request handler. The routes + tokens are read off the + server instance (set by `serve()`).""" + + # Quieter logs: the default writes one line per request to stderr. + # Useful in debug but noisy in normal operation. + def log_message(self, format: str, *args: typing.Any) -> None: + if os.environ.get("CRED_PROXY_DEBUG"): + super().log_message(format, *args) + + def do_GET(self) -> None: self._proxy() + def do_POST(self) -> None: self._proxy() + def do_PUT(self) -> None: self._proxy() + def do_DELETE(self) -> None: self._proxy() + def do_PATCH(self) -> None: self._proxy() + def do_HEAD(self) -> None: self._proxy() + def do_OPTIONS(self) -> None: self._proxy() + + def _proxy(self) -> None: + server = typing.cast("CredProxyServer", self.server) + path, _, query = self.path.partition("?") + if is_git_push_request(path, query): + self.send_error( + 403, + "cred-proxy: git push over HTTPS is not supported; " + "use the bottle.git SSH path (gitleaks-scanned by " + "git-gate's pre-receive hook)", + ) + return + route = select_route(server.routes, path) + if route is None: + self.send_error(404, f"no route for {path!r}") + return + token = server.tokens.get(route.token_env) + if not token: + self.send_error(500, f"cred-proxy: env var {route.token_env} unset in sidecar") + return + + suffix = path[len(route.path):] + upstream_path = route.upstream_base_path.rstrip("/") + "/" + suffix + if query: + upstream_path = f"{upstream_path}?{query}" + + # Read the request body, if any. We do not stream the body up + # because http.client doesn't accept a streamable body for + # arbitrary methods cleanly. v1 buffers — claude's tool-use + # requests are small JSON payloads; SSE flows are in the + # response direction only. + body: bytes | None = None + length_header = self.headers.get("Content-Length") + if length_header is not None: + try: + length = int(length_header) + except ValueError: + self.send_error(400, "invalid Content-Length") + return + if length > 0: + body = self.rfile.read(length) + elif self.headers.get("Transfer-Encoding", "").lower() == "chunked": + self.send_error(411, "cred-proxy: chunked request bodies not supported in v1") + return + + forward_headers = build_forward_headers( + self.headers.items(), + auth_scheme=route.auth_scheme, + token=token, + upstream_host=route.upstream_host, + ) + + if route.upstream_scheme == "https": + conn: http.client.HTTPConnection = http.client.HTTPSConnection( + route.upstream_host, route.upstream_port, timeout=300, + ) + else: + conn = http.client.HTTPConnection( + route.upstream_host, route.upstream_port, timeout=300, + ) + + try: + conn.request(self.command, upstream_path, body=body, + headers=dict(forward_headers)) + resp = conn.getresponse() + except (OSError, http.client.HTTPException) as e: + try: + conn.close() + except Exception: + pass + self.send_error(502, f"upstream connection failed: {e}") + return + + try: + self._stream_response(resp) + finally: + try: + conn.close() + except Exception: + pass + + def _stream_response(self, resp: http.client.HTTPResponse) -> None: + out_headers = filter_response_headers(resp.getheaders()) + # We send Connection: close so the agent's client closes after + # each request; simplifies streaming bookkeeping and keeps + # the handler stateless per request. + self.send_response(resp.status, resp.reason) + for name, value in out_headers: + self.send_header(name, value) + self.send_header("Connection", "close") + self.end_headers() + try: + while True: + chunk = resp.read(STREAM_CHUNK) + if not chunk: + break + self.wfile.write(chunk) + self.wfile.flush() + except (BrokenPipeError, ConnectionResetError): + # Agent disconnected mid-stream; that's fine. + return + + +class CredProxyServer(socketserver.ThreadingMixIn, http.server.HTTPServer): + """Threaded HTTP server. `routes` + `tokens` are populated by + `serve()` before `serve_forever()`.""" + + allow_reuse_address = True + daemon_threads = True + + routes: tuple[Route, ...] = () + tokens: dict[str, str] = {} + + +# --- Entry point ------------------------------------------------------------ + + +DEFAULT_ROUTES_PATH = "/run/cred-proxy/routes.json" +DEFAULT_PORT = 9099 + + +def load_routes(path: str) -> tuple[Route, ...]: + with open(path, "r", encoding="utf-8") as f: + payload = json.load(f) + if not isinstance(payload, dict): + raise ValueError(f"{path}: top-level must be an object") + return parse_routes(payload) + + +def load_tokens(routes: tuple[Route, ...], environ: typing.Mapping[str, str]) -> dict[str, str]: + """Read each route's `token_env` from the supplied environ. Missing + entries default to empty string; the handler returns 500 for + unset tokens at request time so the operator can spot the + misconfig in the cred-proxy's logs without the proxy refusing to + boot.""" + out: dict[str, str] = {} + for r in routes: + out[r.token_env] = environ.get(r.token_env, "") + return out + + +def serve( + *, + routes_path: str = DEFAULT_ROUTES_PATH, + port: int = DEFAULT_PORT, + bind: str = "0.0.0.0", + environ: typing.Mapping[str, str] | None = None, +) -> typing.NoReturn: + """Bring up the server and run until killed. Exits non-zero on + config error so the container's restart policy can surface the + failure rather than silently retrying.""" + env = environ if environ is not None else os.environ + routes = load_routes(routes_path) + tokens = load_tokens(routes, env) + server = CredProxyServer((bind, port), CredProxyHandler) + server.routes = routes + server.tokens = tokens + sys.stderr.write( + f"cred-proxy listening on {bind}:{port}; " + f"{len(routes)} route(s): " + f"{', '.join(r.path for r in routes)}\n" + ) + sys.stderr.flush() + try: + server.serve_forever() + except KeyboardInterrupt: + pass + finally: + server.server_close() + sys.exit(0) + + +def main(argv: list[str]) -> int: + """Tiny argv shim: no flags in v1, all config via env vars. + + `CRED_PROXY_ROUTES` overrides the routes path (default + `/run/cred-proxy/routes.json`). `CRED_PROXY_PORT` overrides the + listen port. Both have defaults so the container needs no extra + config to come up.""" + routes_path = os.environ.get("CRED_PROXY_ROUTES", DEFAULT_ROUTES_PATH) + port = int(os.environ.get("CRED_PROXY_PORT", str(DEFAULT_PORT))) + bind = os.environ.get("CRED_PROXY_BIND", "0.0.0.0") + serve(routes_path=routes_path, port=port, bind=bind) + return 0 # serve() does not return. + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv)) diff --git a/claude_bottle/manifest.py b/claude_bottle/manifest.py index face952..f5dba36 100644 --- a/claude_bottle/manifest.py +++ b/claude_bottle/manifest.py @@ -5,9 +5,10 @@ Schema (see CLAUDE.md "Intended design"): { "bottles": { "": { - "env": { "": , ... }, - "git": [ , ... ], - "egress": { "allowlist": [ "", ... ] } + "env": { "": , ... }, + "git": [ , ... ], + "cred_proxy": { "routes": [ , ... ] }, + "egress": { "allowlist": [ "", ... ] } } }, "agents": { @@ -113,6 +114,154 @@ class GitEntry: ) +CRED_PROXY_AUTH_SCHEMES = ("Bearer", "token") + +# Provisioner role tags a route may carry. Each tag drives one +# agent-side rewrite when the cred-proxy sidecar comes up. +# anthropic-base-url: set ANTHROPIC_BASE_URL= +# npm-registry: write ~/.npmrc registry= +# git-insteadof: write ~/.gitconfig [url ""] +# insteadOf = / +# tea-login: add an entry to ~/.config/tea/config.yml +# (login url = ) +# Routes without a `role` are pure proxy entries with no agent-side +# rewrite — useful for upstreams whose tools the user wires up by +# hand. +CRED_PROXY_ROLES = frozenset({ + "anthropic-base-url", + "npm-registry", + "git-insteadof", + "tea-login", +}) + +# Roles whose semantics imply a single route can carry them. A second +# route claiming the same role would make the provisioner's choice +# ambiguous (which path goes into ANTHROPIC_BASE_URL?). +CRED_PROXY_SINGLETON_ROLES = frozenset({ + "anthropic-base-url", + "npm-registry", +}) + + +@dataclass(frozen=True) +class CredProxyRoute: + """One route on the per-bottle cred-proxy sidecar (PRD 0010). + + The agent dials `http://cred-proxy:...`; the sidecar + strips any inbound `Authorization` header, injects + ` ` using the value of the host env var named + by `TokenRef`, and forwards the rest of the request to `Upstream`. + + `Path` is the agent-facing prefix (must start and end with `/`). + `Upstream` is the upstream base URL (https only) — the request + path after `Path` is appended to it. `AuthScheme` is the literal + word that precedes the token in the injected header (`Bearer` for + most upstreams, `token` for Gitea — sidesteps go-gitea/gitea#16734). + `TokenRef` names the host env var holding the credential value; + the CLI reads it at launch and forwards into the sidecar's environ. + `Role` carries optional provisioner tags (see CRED_PROXY_ROLES). + + `UpstreamHost` is parsed from `Upstream` for the pipelock allowlist + + the git-insteadof suppression check.""" + + Path: str + Upstream: str + AuthScheme: str + TokenRef: str + Role: tuple[str, ...] = () + UpstreamHost: str = "" + + @classmethod + def from_dict(cls, bottle_name: str, idx: int, raw: object) -> "CredProxyRoute": + label = f"bottle '{bottle_name}' cred_proxy.routes[{idx}]" + d = _as_json_object(raw, label) + path = d.get("path") + if not isinstance(path, str) or not path: + die(f"{label} missing required string field 'path'") + if not (path.startswith("/") and path.endswith("/")): + die(f"{label} path {path!r} must start and end with '/'") + upstream = d.get("upstream") + if not isinstance(upstream, str) or not upstream: + die(f"{label} missing required string field 'upstream'") + host = _parse_https_host(upstream, f"{label} upstream") + auth_scheme = d.get("auth_scheme") + if not isinstance(auth_scheme, str) or not auth_scheme: + die(f"{label} missing required string field 'auth_scheme'") + if auth_scheme not in CRED_PROXY_AUTH_SCHEMES: + die( + f"{label} auth_scheme {auth_scheme!r} is not one of " + f"{', '.join(CRED_PROXY_AUTH_SCHEMES)}" + ) + token_ref = d.get("token_ref") + if not isinstance(token_ref, str) or not token_ref: + die( + f"{label} missing required string field 'token_ref' " + f"(name of the host env var holding the token value)" + ) + role_raw = d.get("role") + roles: tuple[str, ...] = () + if role_raw is None: + roles = () + elif isinstance(role_raw, str): + roles = (role_raw,) + elif isinstance(role_raw, list): + role_list = cast(list[object], role_raw) + collected: list[str] = [] + for r in role_list: + if not isinstance(r, str): + die(f"{label} role items must be strings (got {type(r).__name__})") + collected.append(r) + roles = tuple(collected) + else: + die( + f"{label} role must be a string or a list of strings " + f"(was {type(role_raw).__name__})" + ) + for r in roles: + if r not in CRED_PROXY_ROLES: + die( + f"{label} role {r!r} is not one of " + f"{', '.join(sorted(CRED_PROXY_ROLES))}" + ) + return cls( + Path=path, + Upstream=upstream, + AuthScheme=auth_scheme, + TokenRef=token_ref, + Role=roles, + UpstreamHost=host, + ) + + +@dataclass(frozen=True) +class CredProxyConfig: + """Per-bottle cred-proxy configuration. Today this is just the + route table; the nesting under `cred_proxy:` leaves room for + per-bottle proxy settings (port override, log level, etc.) in + follow-ups.""" + + routes: tuple[CredProxyRoute, ...] = () + + @classmethod + def from_dict(cls, bottle_name: str, raw: object) -> "CredProxyConfig": + d = _as_json_object(raw, f"bottle '{bottle_name}' cred_proxy") + routes_raw = d.get("routes") + routes: tuple[CredProxyRoute, ...] = () + if routes_raw is not None: + if not isinstance(routes_raw, list): + die( + f"bottle '{bottle_name}' cred_proxy.routes must be an array " + f"(was {type(routes_raw).__name__})" + ) + routes_list = cast(list[object], routes_raw) + routes = tuple( + CredProxyRoute.from_dict(bottle_name, i, entry) + for i, entry in enumerate(routes_list) + ) + _validate_cred_proxy_routes(bottle_name, routes) + return cls(routes=routes) + + DLP_ACTIONS = ("block", "warn") @@ -168,6 +317,7 @@ class BottleEgress: class Bottle: env: Mapping[str, str] = field(default_factory=_empty_str_dict) git: tuple[GitEntry, ...] = () + cred_proxy: CredProxyConfig = field(default_factory=CredProxyConfig) egress: BottleEgress = field(default_factory=BottleEgress) @classmethod @@ -215,6 +365,20 @@ class Bottle: ) _validate_unique_git_names(name, git) + if "tokens" in d: + die( + f"bottle '{name}' has a 'tokens' field. The shape was reworked: " + f"each route now lives under 'cred_proxy.routes' with explicit " + f"path / upstream / auth_scheme / token_ref / role[]. See " + f"docs/prds/0010-cred-proxy.md." + ) + + cred_proxy = ( + CredProxyConfig.from_dict(name, d["cred_proxy"]) + if "cred_proxy" in d + else CredProxyConfig() + ) + egress_raw = d.get("egress") egress = ( BottleEgress.from_dict(name, egress_raw) @@ -222,7 +386,7 @@ class Bottle: else BottleEgress() ) - return cls(env=env, git=git, egress=egress) + return cls(env=env, git=git, cred_proxy=cred_proxy, egress=egress) @dataclass(frozen=True) @@ -441,6 +605,60 @@ def _parse_git_upstream(url: str, label: str) -> tuple[str, str, str, str]: return (user, host, port, path) +def _parse_https_host(url: str, label: str) -> str: + """Extract the host from an `https://host[:port][/path]` URL. + Dies if `url` is not an https:// URL or the host segment is empty. + Used to derive `CredProxyRoute.UpstreamHost` from a route's + `upstream` so pipelock's allowlist (and the provisioner's git-gate + overlap check) can match on host alone.""" + if not url.startswith("https://"): + die(f"{label} must be an https:// URL (was {url!r})") + rest = url[len("https://"):] + hostport, _, _ = rest.partition("/") + host, _, _port = hostport.partition(":") + if not host: + die(f"{label} host is empty in {url!r}") + return host + + +def _validate_cred_proxy_routes( + bottle_name: str, + routes: tuple[CredProxyRoute, ...], +) -> None: + """Cross-validation for `bottle.cred_proxy.routes`: + + - Paths must be unique within the bottle (the proxy routes by + longest-prefix match; duplicate paths leave the choice + undefined). + - Singleton roles (`anthropic-base-url`, `npm-registry`) may + appear on at most one route — the provisioner uses them to + write a single dotfile entry, so two routes claiming the role + would make the choice ambiguous. + + No cross-validation against `bottle.git` is performed. git-gate + (SSH push/fetch) and cred-proxy (HTTPS REST + git smart-HTTP + fetch) broker different protocols; declaring both on the same + host is a legitimate dev setup. + """ + seen_paths: dict[str, None] = {} + for r in routes: + if r.Path in seen_paths: + die( + f"bottle '{bottle_name}' cred_proxy.routes has duplicate path " + f"{r.Path!r}; each path must be unique on the proxy." + ) + seen_paths[r.Path] = None + for role in CRED_PROXY_SINGLETON_ROLES: + with_role = [r for r in routes if role in r.Role] + if len(with_role) > 1: + paths = ", ".join(r.Path for r in with_role) + die( + f"bottle '{bottle_name}' cred_proxy.routes has {len(with_role)} " + f"routes with role {role!r} (paths: {paths}); this role drives a " + f"single agent-side rewrite — pick one." + ) + + def _validate_unique_git_names(bottle_name: str, git: tuple[GitEntry, ...]) -> None: seen: dict[str, None] = {} for g in git: diff --git a/claude_bottle/pipelock.py b/claude_bottle/pipelock.py index 867d54f..db85926 100644 --- a/claude_bottle/pipelock.py +++ b/claude_bottle/pipelock.py @@ -17,6 +17,7 @@ from dataclasses import dataclass from pathlib import Path from typing import cast +from .cred_proxy import CRED_PROXY_HOSTNAME from .manifest import Bottle # Baked-in default allowlist for hosts Claude Code itself needs. @@ -55,20 +56,106 @@ def pipelock_bottle_allowlist(bottle: Bottle) -> list[str]: return list(bottle.egress.allowlist) +def pipelock_token_hosts(bottle: Bottle) -> list[str]: + """Hostnames the cred-proxy sidecar (PRD 0010) talks to upstream + on the agent's behalf. Derived from each route's + `upstream.UpstreamHost` in `bottle.cred_proxy.routes`. Returned + sorted+deduped. + + These hosts must be on pipelock's allowlist so cred-proxy's + outbound HTTPS traffic can leave the egress network. They are + NOT auto-added to passthrough_domains: cred-proxy's HTTPS client + trusts pipelock's per-bottle CA at runtime (installed via + docker cp + update-ca-certificates in the cred-proxy image), + so pipelock MITMs and body-scans the cred-proxy → upstream leg + the same way it does direct agent traffic.""" + hosts = {r.UpstreamHost for r in bottle.cred_proxy.routes if r.UpstreamHost} + return sorted(hosts) + + def pipelock_effective_allowlist(bottle: Bottle) -> list[str]: - """Deduplicated union of: baked-in defaults, bottle.egress.allowlist. - Sorted for stability. Git upstreams declared in `bottle.git` do NOT - contribute here — git traffic flows through the per-agent git-gate - sidecar (PRD 0008), not pipelock.""" + """Deduplicated union of: baked-in defaults, bottle.egress.allowlist, + the cred-proxy upstream hosts derived from bottle.cred_proxy.routes, + and the cred-proxy sidecar's own hostname when any cred_proxy route + is declared. Sorted for stability. Git upstreams declared in + `bottle.git` do NOT contribute here — git traffic flows through the + per-agent git-gate sidecar (PRD 0008), not pipelock. + + The cred-proxy hostname is auto-added because the agent's + HTTP_PROXY points at pipelock, so a manifest-driven URL like + `http://cred-proxy:9099/anthropic/...` arrives at pipelock as a + request for hostname `cred-proxy`. Without this auto-allow, + pipelock would 403 the request before it reached the sidecar.""" seen: dict[str, None] = {} for h in DEFAULT_ALLOWLIST: seen.setdefault(h, None) for h in pipelock_bottle_allowlist(bottle): if h: seen.setdefault(h, None) + for h in pipelock_token_hosts(bottle): + seen.setdefault(h, None) + if bottle.cred_proxy.routes: + seen.setdefault(CRED_PROXY_HOSTNAME, None) return sorted(seen.keys()) +def pipelock_seed_phrase_detection_enabled(bottle: Bottle) -> bool: + """Whether pipelock's BIP-39 seed-phrase detector stays on for + this bottle. + + LLM conversation bodies legitimately trip the detector — any 12+ + English words that pass the BIP-39 checksum match — so any + bottle that routes claude through pipelock's body scanner gets + blocked on the first real chat. We tried two narrower knobs + first: + + - `suppress: [{rule, path}]` — pipelock accepts the schema + but the entry only silences the alert; the body_dlp block + still fires. + - `rules.disabled: ["dlp:BIP-39 Seed Phrase"]` — same shape, + same outcome: 403 still returned. + + Empirically only `seed_phrase_detection.enabled: false` + actually stops the block (verified by sending a 12-word BIP-39 + body through three pipelock instances). It is a global toggle + — there is no per-path / per-host knob in pipelock 2.3.0 — so + we turn the detector off for the entire bottle when an + `anthropic-base-url` route is declared. The trade-off is + accepted: BIP-39 detection has little value in claude-bottle's + threat model (the agent has no access to a user's crypto + wallet seeds; the patterns that matter — gh*_, sk-ant-, AKIA, + etc. — keep firing).""" + return not any( + "anthropic-base-url" in r.Role for r in bottle.cred_proxy.routes + ) + + +def pipelock_effective_tls_passthrough(bottle: Bottle) -> list[str]: + """Hostnames pipelock should pass through (no TLS MITM, no body + scan). Default carries the LLM API endpoint — its request bodies + are user-authored conversation text that legitimately trips DLP + scanners (notably pipelock's BIP-39 seed-phrase detector). Every + other allowlisted host is MITM'd by pipelock's per-bottle CA so + its body scanner sees the cleartext. + + cred-proxy upstream hosts (github, gitea, npm) are deliberately + NOT auto-added here. cred-proxy's HTTPS client trusts pipelock's + CA at runtime (folded into its trust store via docker cp + + update-ca-certificates), so pipelock can MITM the cred-proxy → + upstream leg and body-scan it the same way it body-scans the + agent's direct HTTPS traffic. Without this, an agent that pushed + a secret via cred-proxy's /gh-git/ path would have no body + scanner in front of it. The PRD's earlier reasoning that + cred-proxy hosts needed passthrough was a workaround for the + cert-trust gap that no longer exists. + + `bottle` is kept on the signature for forward-compat (a future + knob might let a manifest opt a host into passthrough); today + the returned list is independent of the bottle.""" + del bottle # not consulted; see docstring. + return sorted(DEFAULT_TLS_PASSTHROUGH) + + def pipelock_allowlist_summary(bottle: Bottle) -> str: """One-line summary for the y/N preflight display: " hosts allowed (host1, host2, host3, +M more)".""" @@ -96,6 +183,7 @@ def pipelock_build_config( *, ca_cert_path: str = "", ca_key_path: str = "", + ssrf_ip_allowlist: tuple[str, ...] = (), ) -> dict[str, object]: """Build the structured pipelock config dict the sidecar will load. @@ -110,7 +198,17 @@ def pipelock_build_config( Pass both or neither: both → emit `tls_interception` block with `enabled: true`; neither → omit the block entirely (pipelock falls back to its built-in default of `enabled: false`). Used - by PRD 0006 to turn on pipelock's native TLS interception.""" + by PRD 0006 to turn on pipelock's native TLS interception. + + `ssrf_ip_allowlist` is the list of IPs / CIDRs that bypass + pipelock's SSRF guard. Pipelock blocks RFC1918-resolved + destinations by default, which would catch the agent's + cred-proxy traffic (cred-proxy sits on the bottle's internal + Docker network in 172.x space). Pass the bottle's internal + network CIDR here so `cred-proxy:9099` requests get through + pipelock while api_allowlist + body-scanning still apply. Empty + by default; omitted from the rendered yaml when empty so + pipelock keeps its built-in SSRF defaults.""" cfg: dict[str, object] = { "version": 1, "mode": "strict", @@ -118,6 +216,8 @@ def pipelock_build_config( "api_allowlist": pipelock_effective_allowlist(bottle), "forward_proxy": {"enabled": True}, } + if not pipelock_seed_phrase_detection_enabled(bottle): + cfg["seed_phrase_detection"] = {"enabled": False} cfg["dlp"] = {"include_defaults": True, "scan_env": True} # Body-scan enforcement is a separate pipelock section (each DLP # "surface" — body, MCP, response — has its own action). Pipelock's @@ -135,8 +235,10 @@ def pipelock_build_config( "enabled": True, "ca_cert": ca_cert_path, "ca_key": ca_key_path, - "passthrough_domains": list(DEFAULT_TLS_PASSTHROUGH), + "passthrough_domains": pipelock_effective_tls_passthrough(bottle), } + if ssrf_ip_allowlist: + cfg["ssrf"] = {"ip_allowlist": list(ssrf_ip_allowlist)} return cfg @@ -156,6 +258,11 @@ def pipelock_render_yaml(cfg: dict[str, object]) -> str: for h in cast(list[str], cfg["api_allowlist"]): lines.append(f' - "{h}"') lines.append("") + if "seed_phrase_detection" in cfg: + lines.append("seed_phrase_detection:") + spd = cast(dict[str, object], cfg["seed_phrase_detection"]) + lines.append(f" enabled: {_bool(spd['enabled'])}") + lines.append("") lines.append("forward_proxy:") fp = cast(dict[str, object], cfg["forward_proxy"]) lines.append(f" enabled: {_bool(fp['enabled'])}") @@ -180,6 +287,13 @@ def pipelock_render_yaml(cfg: dict[str, object]) -> str: lines.append(" passthrough_domains:") for d in passthrough: lines.append(f' - "{d}"') + if "ssrf" in cfg: + lines.append("") + lines.append("ssrf:") + ssrf = cast(dict[str, object], cfg["ssrf"]) + lines.append(" ip_allowlist:") + for ip in cast(list[str], ssrf["ip_allowlist"]): + lines.append(f' - "{ip}"') return "\n".join(lines) + "\n" @@ -196,14 +310,21 @@ class PipelockProxyPlan: already so it doesn't need the host paths to be valid). The remaining fields are populated by the backend's launch step via `dataclasses.replace`: internal/egress networks once - those networks exist, and the CA host paths once the - one-shot `pipelock tls init` has run. Empty defaults are - sentinels meaning "not yet set"; `.start` validates that - they are populated.""" + those networks exist, the CA host paths once the one-shot + `pipelock tls init` has run, and `internal_network_cidr` once + Docker has assigned a subnet to the internal network. Empty + defaults are sentinels meaning "not yet set"; `.start` validates + that they are populated. + + `internal_network_cidr` ends up on pipelock's `ssrf.ip_allowlist` + so the agent's requests at `cred-proxy:9099` (or any other + bottle-internal sidecar) bypass pipelock's RFC1918 SSRF guard + while api_allowlist and body-scanning still apply.""" yaml_path: Path slug: str internal_network: str = "" + internal_network_cidr: str = "" egress_network: str = "" ca_cert_host_path: Path = Path() ca_key_host_path: Path = Path() diff --git a/docs/prds/0010-cred-proxy.md b/docs/prds/0010-cred-proxy.md new file mode 100644 index 0000000..4d58c83 --- /dev/null +++ b/docs/prds/0010-cred-proxy.md @@ -0,0 +1,543 @@ +# PRD 0010: Credential proxy for agent-bound API tokens + +- **Status:** Draft +- **Author:** didericis +- **Created:** 2026-05-13 + +## Summary + +Per-bottle sidecar container that holds API tokens (Anthropic +OAuth, GitHub PAT, Gitea PAT, npm token). The agent container +keeps only URLs in its environ; the sidecar injects the right +`Authorization` header and forwards over TLS to the upstream. The +boundary is the container line — PID, mount, and network +namespaces separate the agent's container from the sidecar's, so +from inside the agent the sidecar's processes are not visible in +`/proc`, cannot be `ptrace`'d, and share no memory. Reaching the +sidecar's environ requires escaping the agent container — the same +threshold pipelock and git-gate already rely on. + +AWS / SigV4 is explicitly out of scope — it is per-request signing, +not header injection, and does not fit this proxy's shape. If a +bottle needs AWS credentials later, that lives in a separate PRD. + +## Problem + +Today `CLAUDE_CODE_OAUTH_TOKEN` (and any `bottle.env` secrets such +as a Gitea PAT, GitHub PAT, or npm token) gets `docker run -e`'d +straight into the agent's environ. Inside the bottle the agent +runs as `node` with `--dangerously-skip-permissions`; its Bash +tool can do `printenv`, `cat /proc/self/environ`, or +`node -e 'console.log(process.env)'` and capture every value into +the conversation. From there a prompt-injected or hijacked agent +can exfil over any allowed egress (api.anthropic.com itself if +nothing else). + +Linux has no per-env-var ACL — once a variable is in a process's +environ, the process and its descendants own it. The credible +boundary is container-level: hold the credential in a separate +container the agent cannot reach. Default Docker's namespace +isolation enforces that — the same property pipelock and git-gate +already rely on. + +The research note +[`agent-credential-proxy-landscape.md`](../research/agent-credential-proxy-landscape.md) +surveys the existing tools and concludes that a small +claude-bottle-specific reverse proxy is less work and less risk +than either adopting nono (alpha, unaudited) or Infisical Agent +Vault (TLS-MITM topology that doubles up on pipelock's CA stack). +This PRD is the build. + +## Goals / Success Criteria + +Each test runs inside a bottle whose manifest declares the four +common upstreams (Anthropic, GitHub, Gitea, npm) as +`bottle.cred_proxy.routes` entries: + +1. **No plaintext tokens in the agent's environ.** `printenv` and + `cat /proc/self/environ` from the agent's shell return only + URLs pointing at `cred-proxy:/...`. None of the + `cred_proxy.routes[].token_ref` host env-var values appear. +2. **Container boundary holds.** From the agent's shell, `ps aux` + does not list the cred-proxy process; there is no `/proc/` + entry for it to read. The sidecar's hostname (`cred-proxy`) + resolves only on the bottle's internal network — from a + different bottle or from the host, the name does not resolve. +3. **Anthropic API works.** `claude` makes a successful streaming + tool-use round-trip via `ANTHROPIC_BASE_URL` → + `cred-proxy:/anthropic`. SSE chunks arrive without + buffering; `anthropic-version`, `anthropic-beta`, and + `X-Claude-Code-Session-Id` headers round-trip untouched. +4. **`tea` / REST API against declared upstreams works.** + `tea pr list` against a route's upstream succeeds; the + upstream sees the proxy-injected token, not the agent's. + `git push` is *not* on the cred-proxy path — that goes + through `bottle.git` / git-gate (where gitleaks runs). +5. **npm install works.** `npm install ` + succeeds against the registry pointed at the proxy. A scoped + install that requires the token (e.g. against a private + registry) also succeeds. +6. **Wrong token rejected at the source, not silently swapped.** + If the agent tries to send its own `Authorization: …` header, + the proxy strips and replaces with the configured one. A + manifest token revoked at the upstream produces a 401 to the + agent, not a 5xx. Git smart-HTTP push paths + (`/git-receive-pack`, `/info/refs?service=git-receive-pack`) + return 403 unconditionally — push must go through git-gate's + gitleaks-scanned SSH path. + +## Non-goals + +- **AWS / SigV4.** Per-request signing is a different shape; a + bearer-injecting proxy doesn't help. Hold for a future PRD + (likely an IMDS emulator sidecar handing out short-lived STS + credentials). +- **DB-backed credential store.** Flat env / mode-600 file only. + The LiteLLM CVE-2026-42208 incident is the cautionary tale: + any DB-backed credential gateway is itself a high-value attack + target. +- **Generic LLM-gateway features.** No cost tracking, no + fallbacks, no virtual keys, no multi-tenant routing, no usage + metering. The proxy is a credential-injection trust endpoint, + not a gateway. +- **Subsuming pipelock.** pipelock keeps its egress-allowlist + role. It drops the `api.anthropic.com` TLS-MITM job because + cred-proxy is now the trust endpoint for that host; everything + else pipelock does stays. +- **TLS interception inside the bottle.** The agent talks plain + HTTP to loopback; cred-proxy speaks real HTTPS outbound. No + container-local CA, no `golang/go#28866` loopback workaround. +- **Cross-bottle credential sharing.** One proxy per bottle, same + one-sidecar-per-agent posture as pipelock and git-gate. +- **`claude --bare` mode.** Reads only `ANTHROPIC_API_KEY`, not + the OAuth token. Not in claude-bottle's flow today. +- **MCP-server tokens, package-installer tokens for languages + beyond npm.** PyPI / Bun / cargo can land in a follow-up if + needed; the routing pattern generalizes. + +## Scope + +### In scope + +- **Manifest field.** `bottle.cred_proxy.routes: [Route, ...]`. + Each route carries `path` (agent-facing prefix), `upstream` + (HTTPS upstream URL), `auth_scheme` (`Bearer` or `token`), + `token_ref` (name of a host env var the CLI resolves at launch + time), and an optional `role` (string or list of strings — see + "Agent-side rewrites" below). Routes are independent — there is + no `Kind` enum or per-kind hardcoded path/upstream mapping; the + manifest is the source of truth for the proxy's runtime route + table. +- **cred-proxy sidecar.** Runs as its own container on the + bottle's internal docker network with hostname `cred-proxy`, + listening on `0.0.0.0:` bound to the internal interface. + No host port published. Holds the tokens in the sidecar + container's environ — never on argv, never written to disk. + Per-route handler: inject the configured header, forward over + TLS, stream the response back without buffering. +- **Agent-side rewrites.** A route's `role` (string or list of + strings) drives optional agent-side dotfile/env writes when the + sidecar comes up. Known roles: + - `anthropic-base-url` (singleton): sets + `ANTHROPIC_BASE_URL=http://cred-proxy:` in + the agent's environ. Used for the Anthropic OAuth path. + - `npm-registry` (singleton): writes + `registry=http://cred-proxy:` to `~/.npmrc`. + - `git-insteadof`: writes a `[url "http://cred-proxy:"] + insteadOf = /` block to `~/.gitconfig`. + Suppressed when `bottle.git` already brokers the same host: + git-gate is the canonical git path there — its pre-receive + runs gitleaks before forwarding pushes; a cred-proxy + `https:///` rewrite would route HTTPS git ops around + the gate. (cred-proxy independently refuses smart-HTTP push + paths at runtime — see "Smart-HTTP push refused" below — but + suppressing the rewrite means `git clone https:///...` + doesn't have a tempting shortcut that just confuses later.) + - `tea-login`: adds a `logins:` entry to + `~/.config/tea/config.yml` pointing at the proxy. Used for + Gitea instances; combine with `git-insteadof` for full agent + coverage. + + Routes without a `role` are pure proxy entries — the proxy + handles them at runtime, but no agent-side rewrite happens. The + singleton roles must appear on at most one route per bottle + (manifest validation enforces this). +- **Sidecar lifecycle.** Mirrors `DockerGitGate` / + `DockerPipelockProxy` in shape: `prepare` is host-side and + side-effect-free; `start` does `docker create` + `docker start` + on the bottle's internal network with hostname `cred-proxy`; + `stop` is idempotent `docker rm -f`. Container name: + `claude-bottle-cred-proxy-`. The agent container starts + after the sidecar is up so DNS resolution succeeds on the + agent's first call. +- **pipelock interop.** cred-proxy's outbound HTTPS traverses + pipelock: the sidecar's environ sets `HTTPS_PROXY` / + `HTTP_PROXY` to the per-bottle pipelock URL, and the cred-proxy + image's entrypoint runs `update-ca-certificates` over the + per-bottle pipelock CA (`docker cp`'d into + `/usr/local/share/ca-certificates/pipelock.crt` before start) + so cred-proxy's HTTPS client trusts pipelock's bumped certs. + Pipelock's allowlist + body scanner therefore apply to + cred-proxy → upstream the same way they apply to direct agent + traffic. Only `api.anthropic.com` stays on + `passthrough_domains` (its bodies are LLM conversation text + that legitimately trips DLP heuristics); github / gitea / npm + hosts are auto-added to the allowlist (so cred-proxy can reach + them) but NOT to passthrough, so pipelock body-scans them. +- **Smart-HTTP push refused.** cred-proxy returns 403 for paths + matching `/info/refs?service=git-receive-pack` and any path + ending in `/git-receive-pack`. Fetch (upload-pack) is allowed. + Push must go through `bottle.git` / git-gate, where the + gitleaks pre-receive hook runs. This holds even when no + matching `bottle.git` entry exists — the proxy is not a + scanned-push path, period. +- **Plan rendering.** `bottle_plan.py` and the y/N preflight + show: which tokens are configured (kind + ref name, not the + value), the proxy port, the routes the proxy will publish. +- **Drop the existing `CLAUDE_CODE_OAUTH_TOKEN` forward in + `prepare.py`.** Today it lands in the agent's environ; once + this PRD ships, it lands in the cred-proxy sidecar's environ + instead. +- **Tests.** Integration tests for each of the six success + criteria; unit tests for manifest parsing, route table + generation, header injection. + +### Out of scope + +- AWS / SigV4 (see Non-goals). +- Per-method / per-path allowlist *inside* a kind. Defer to a + follow-up once observed traffic stabilizes. +- Replacing `bottle.env` for non-token secrets. The proxy + handles the four kinds listed above; other env vars keep their + current path. +- Migrating an in-flight bottle from "token in agent env" to + "token via proxy" mid-session. Restart required. +- Audit logging. The proxy doesn't write request logs in v1. + Add only if a concrete debugging need surfaces. + +## Proposed Design + +### Architecture + +``` +┌── Host (macOS) ──────────────────────────────────────────────────┐ +│ Secrets at rest (keychain / .env): │ +│ CLAUDE_BOTTLE_OAUTH_TOKEN, GITHUB_TOKEN, │ +│ GITEA_SERVER_TOKEN, NPM_TOKEN │ +│ │ docker run -e KEY (no =VALUE on argv) │ +│ ▼ │ +│ ┌── per-bottle internal docker network ──────────────────────┐ │ +│ │ │ │ +│ │ ┌── agent container ─────────────────────────────────┐ │ │ +│ │ │ claude as node (UID 1000) │ │ │ +│ │ │ --dangerously-skip-permissions │ │ │ +│ │ │ environ: URLs only, no plaintext tokens │ │ │ +│ │ │ ANTHROPIC_BASE_URL=http://cred-proxy:PORT/an.. │ │ │ +│ │ │ npm registry → http://cred-proxy:PORT/npm/ │ │ │ +│ │ │ git insteadOf → http://cred-proxy:PORT/... │ │ │ +│ │ │ tea --url → http://cred-proxy:PORT/gite │ │ │ +│ │ └────────────┬───────────────────────────────────────┘ │ │ +│ │ │ HTTP, DNS → cred-proxy │ │ +│ │ ▼ │ │ +│ │ ┌── cred-proxy sidecar ──────────────────────────────┐ │ │ +│ │ │ distroless image, no shell, runs as root │ │ │ +│ │ │ hostname: cred-proxy listens 0.0.0.0:PORT │ │ │ +│ │ │ tokens live ONLY in this container's environ │ │ │ +│ │ │ /anthropic → api.anthropic.com Bearer │ │ │ +│ │ │ /gh-api → api.github.com Bearer │ │ │ +│ │ │ /gh-git → github.com Bearer │ │ │ +│ │ │ /gitea → gitea.dideric.is token │ │ │ +│ │ │ /npm → registry.npmjs.org Bearer │ │ │ +│ │ │ SSE pass-through, no buffering │ │ │ +│ │ └────────────┬───────────────────────────────────────┘ │ │ +│ │ │ HTTPS │ │ +│ │ ▼ │ │ +│ │ ┌── pipelock sidecar (egress allowlist) ─────────────┐ │ │ +│ │ │ allow: api.anthropic.com, api.github.com, │ │ │ +│ │ │ github.com, gitea.dideric.is, │ │ │ +│ │ │ registry.npmjs.org │ │ │ +│ │ │ block: statsig, sentry, autoupdater, * │ │ │ +│ │ └────────────┬───────────────────────────────────────┘ │ │ +│ └────────────────┼───────────────────────────────────────────┘ │ +│ ▼ │ +└────────────────────┼─────────────────────────────────────────────┘ + ▼ + Upstream APIs + + +Why the agent can't reach the sidecar's environ: + ┌───────────────────────────────────────────────────────────────┐ + │ Different container = different PID, mount, and network ns. │ + │ The agent's /proc shows only the agent's own processes; │ + │ the cred-proxy PID is not visible — no /proc//environ │ + │ to read, no PID to ptrace, no shared memory. │ + │ │ + │ Reaching the sidecar's environ requires escaping the agent │ + │ container — the same threshold pipelock and git-gate rely │ + │ on. Default Docker isolation is the boundary. │ + └───────────────────────────────────────────────────────────────┘ +``` + +### New components + +- **`claude_bottle/cred_proxy.py`** (new): abstract `CredProxy` + + `CredProxyPlan` dataclass. `prepare` is host-side and + side-effect-free; renders the route table and resolves + `TokenRef`s against host env. Mirrors the existing `GitGate` / + `Pipelock` shape. +- **`claude_bottle/backend/docker/cred_proxy.py`** (new): + `DockerCredProxy` concrete subclass. `start` does + `docker create` on the bottle's internal network with hostname + `cred-proxy`, copies the route-table file into the container, + then `docker start`. `stop` is idempotent `docker rm -f`. + Container name: `claude-bottle-cred-proxy-`. +- **`claude_bottle/backend/docker/provision/cred_proxy.py`** + (new): renders `ANTHROPIC_BASE_URL`, `~/.npmrc`, + `~/.gitconfig` `insteadOf` blocks, and `~/.config/tea/config.yml` + into the agent's home for each declared kind — all pointing at + `http://cred-proxy:/...`. +- **cred-proxy image.** Minimal base + the proxy binary, no + shell. Pinned by digest, baked at build time. Footprint sized + to match git-gate's image rather than the full agent image. + +### Existing code touched + +- **`claude_bottle/manifest.py`** — add `CredProxyRoute`, + `CredProxyConfig`, `Bottle.cred_proxy: CredProxyConfig`. Parse + + validate route shape, role enum, path uniqueness, singleton- + role constraints. +- **`claude_bottle/backend/docker/prepare.py`** — drop the + legacy `CLAUDE_BOTTLE_OAUTH_TOKEN` → `CLAUDE_CODE_OAUTH_TOKEN` + forward entirely. cred-proxy is the only path the Anthropic + OAuth token reaches the bottle. When a route claims the + `anthropic-base-url` role, write `ANTHROPIC_BASE_URL` + (pointing at the proxy) plus a non-secret placeholder for + `CLAUDE_CODE_OAUTH_TOKEN` (claude-code refuses to start + otherwise; the proxy strips & replaces on every request). + Bottles that need claude-code to authenticate must declare + the route; there is no fallback. +- **`claude_bottle/backend/docker/backend.py`** — instantiate + `DockerCredProxy` alongside `DockerPipelockProxy` and + `DockerGitGate`; thread its `prepare` / `start` / `stop` + through `resolve_plan` / `launch`. +- **`claude_bottle/backend/docker/launch.py`** — add cred-proxy + start/stop to the `ExitStack` after pipelock and before the + agent; populate `pipelock_proxy_url` + `pipelock_ca_host_path` + on the cred-proxy plan so its outbound HTTPS routes through + pipelock. +- **`claude_bottle/backend/docker/bottle_plan.py`** — new + `cred_proxy_plan` field; preflight shows route count + token + refs + a path→upstream line per route; `to_dict` emits a + `cred_proxy` array of `{path, upstream, auth_scheme, token_ref, + roles}`. +- **`claude_bottle/pipelock.py`** — `pipelock_token_hosts` derives + from each route's `UpstreamHost` (not a hardcoded Kind→hosts + map). Allowlist auto-includes them; passthrough does not (the + proxy trusts pipelock's CA so MITM works). +- **`README.md`** — architecture diagram includes the cred-proxy + lane; manifest section documents `bottle.cred_proxy.routes`. +- **`claude-bottle.example.json`** — one bottle demonstrates the + four common routes (Anthropic, GitHub, Gitea, npm). +- **Tests** — manifest parsing/validation, route lift + token-env + slot assignment, role-based dispatch in the provisioner, + pipelock allowlist derivation from routes. Integration test + exercises header inject + smart-HTTP push refusal. + +### Data model changes + +```python +@dataclass(frozen=True) +class CredProxyRoute: + Path: str # "/anthropic/" — must start and end with / + Upstream: str # "https://api.anthropic.com" — https only + AuthScheme: str # "Bearer" or "token" + TokenRef: str # name of host env var + Role: tuple[str, ...] = () # provisioner tags; see CRED_PROXY_ROLES + UpstreamHost: str = "" # derived from Upstream + +@dataclass(frozen=True) +class CredProxyConfig: + routes: tuple[CredProxyRoute, ...] = () + +@dataclass(frozen=True) +class Bottle: + ... + cred_proxy: CredProxyConfig = field(default_factory=CredProxyConfig) +``` + +Validation: + +- `Path` non-empty, starts and ends with `/`; unique across all + routes in a bottle (the proxy routes by longest-prefix match). +- `Upstream` is `https://...` with a non-empty host. +- `AuthScheme` is one of `Bearer`, `token`. +- `TokenRef` non-empty; its value is resolved against + `os.environ` at launch (fail fast with a clear "host env var X + is unset" if missing). +- `Role` items are one of `anthropic-base-url`, `npm-registry`, + `git-insteadof`, `tea-login`. Single string accepted as sugar + for a one-item list. +- Singleton roles (`anthropic-base-url`, `npm-registry`) appear + on at most one route per bottle. +- A route MAY name the same host as a `bottle.git` entry. The + two paths broker different protocols — git-gate holds an SSH + `IdentityFile` for push/fetch and runs gitleaks; cred-proxy + holds a PAT for HTTPS REST API calls (`tea`, `gh`, octokit). + The common dev setup uses both on the same host. The + provisioner's `git-insteadof` role is suppressed in that case + (see Agent-side rewrites). + +### Example routes + +| Common upstream | Route | +|------------------------|-------------------------------------------------------------------------------------------------------------------------------------| +| Anthropic API | `{path: "/anthropic/", upstream: "https://api.anthropic.com", auth_scheme: "Bearer", token_ref: "…", role: "anthropic-base-url"}` | +| GitHub REST API | `{path: "/gh-api/", upstream: "https://api.github.com", auth_scheme: "Bearer", token_ref: "…"}` | +| GitHub git transport | `{path: "/gh-git/", upstream: "https://github.com", auth_scheme: "Bearer", token_ref: "…", role: "git-insteadof"}` | +| Gitea instance | `{path: "/gitea//", upstream: "https://", auth_scheme: "token", token_ref: "…", role: ["git-insteadof", "tea-login"]}` | +| npm registry | `{path: "/npm/", upstream: "https://registry.npmjs.org", auth_scheme: "Bearer", token_ref: "…", role: "npm-registry"}` | + +Gitea uses `Authorization: token` rather than `Bearer` to +sidestep `go-gitea/gitea#16734`. The proxy strips any incoming +`Authorization` header before injecting its own — the agent +cannot smuggle a stolen token through this path. + +### External dependencies + +The proxy binary. Two real options: + +- **Python (stdlib)** — `http.server` + `urllib`/`http.client`, + no new pip packages. Matches CLAUDE.md's "bash-first, low-deps" + posture. SSE pass-through is fiddly but doable. +- **Go single binary** — cleaner SSE story, smaller runtime, + one static binary in a scratch/distroless image. New build + dependency. + +Default: Python in a minimal `python:3.X-slim` image (or alpine +if we want smaller). Reconsider in the implementation PR if SSE +behavior is troublesome under load. + +No new Python packages. No DB. No admin API. The proxy's +configuration is a single mode-600 JSON file copied into the +sidecar at `docker create` time and read by the proxy at startup +from `/run/cred-proxy/routes.json`. + +## Future work + +- **AWS / SigV4.** Likely an IMDS emulator sidecar handing out + short-lived STS tokens. Different threat model (the agent + ends up holding the STS creds — the proxy just shortens + their lifetime). Separate PRD. +- **Per-method / per-path allowlist** inside a kind. Once the + set of API operations claude actually performs is observed, + reject everything else. Narrows the within-allowlist surface. +- **Short-lived token minting.** For services that support it + (GitHub Apps, GitLab project-access tokens, fine-grained + PATs with TTL), have the proxy mint a fresh per-session + child credential from a long-lived parent. +- **Smolmachines colocation.** Same packing question as + pipelock / git-gate; under a future microVM backend the + cred-proxy could share a VM with the agent (today's per-bottle + network gives it its own container, not its own VM) or sit in + its own VM (stricter isolation, an extra TCP hop). Backend + decision, not a manifest decision. +- **More kinds.** PyPI, Bun, cargo, Docker Hub. The routing + pattern generalizes; add as needed. + +## Considered alternatives + +### In-container proxy (root inside the agent container) + +Run cred-proxy as PID 1 of the agent container, listening on +`127.0.0.1:`, with claude exec'd as `node` (UID 1000) only +after the proxy is bound. The boundary in that shape is the +kernel's cross-UID `ptrace_may_access` check — `node` cannot read +root's `/proc//environ` and cannot `ptrace` attach. + +Pros: one less container per bottle; slightly faster bottle +startup; no extra docker create/start/stop dance. + +Rejected because: + +- **Weaker isolation.** The boundary collapses to UID separation + alone. Any container-root compromise inside the agent (setuid + bug in the image, accidentally mounted docker socket, a kernel + CVE, accidental `--privileged`) reads the proxy's environ via + `/proc//environ`. The sidecar's namespace separation + cannot be bypassed from inside the agent container without a + container escape. +- **Inconsistent with the existing topology.** pipelock and + git-gate are already sidecars on the bottle's internal network. + cred-proxy slots into the same shape and reuses the same + lifecycle abstractions (`BottleBackend.prepare/start/stop`, + `ExitStack` ordering, plan rendering). +- **Coupled to the agent image.** The proxy binary, its + entrypoint, and its priv-drop logic would all live in the + agent's Dockerfile. A sidecar image evolves independently — + agents can change base, language, or tooling without touching + the proxy. +- **PID-1 babysitting.** The "proxy supervises, then `exec + setpriv → node`" entrypoint introduces a class of issues + (zombie reaping, signal forwarding, exit-code propagation) that + the sidecar shape avoids. + +## Open questions + +- **~~Field name.~~** Resolved during iteration: routes live at + `bottle.cred_proxy.routes` (the nested object reserves room for + per-bottle proxy settings later). Each route is independent; + no `Kind` enum on the route. A `role` field drives the + optional agent-side rewrites — see "Agent-side rewrites" in + Scope. +- **Python vs Go for the proxy.** Default: Python, revisit + during implementation if SSE pass-through is unreliable. +- **Sidecar image base.** Distroless (smallest, no shell — hardest + to debug), Python slim (debuggable, larger), or scratch + a + statically-linked Go binary (smallest if Go). Default: whatever + fits the chosen language with the smallest non-shell base; + revisit if debuggability bites during implementation. +- **Belt-and-braces on outbound telemetry.** Set + `CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC=1` and + `DISABLE_ERROR_REPORTING=1` in the agent's environ by + default? Default: yes — they don't route through + `ANTHROPIC_BASE_URL`, so the proxy doesn't catch them; the + flags are the only off switch. +- **`git push` over a rewritten URL vs. credential-helper + shim.** `[url "http://…"] insteadOf = "https://github.com/"` + captures push/fetch/clone/pull/ls-remote in one config knob; + a credential helper would need separate wiring. Default: + `insteadOf`. +- **Token-refresh story for the Anthropic OAuth token.** The + token is ~1-year and there's no client-side refresh, so the + proxy holds a static value. The 1-year blast radius is the + cost, documented in + [`claude-code-token-revocation.md`](../research/claude-code-token-revocation.md). + No design change here; flagged for awareness. +- **`anthropics/claude-code#36998`.** Older claude-code + versions bypassed `ANTHROPIC_BASE_URL` for some startup + calls (auth validation, org lookup). Marked closed upstream; + the implementation PR verifies with `strace -e connect` + against the pinned claude-code build before trusting the + isolation. + +## References + +- [`docs/research/agent-credential-proxy-landscape.md`](../research/agent-credential-proxy-landscape.md) + — landscape research; this PRD is the build path that note + recommends. +- [`docs/research/secret-minimization-over-dlp.md`](../research/secret-minimization-over-dlp.md) + — architectural framing: why moving the credential matters + more than scanning egress. +- PRD 0006: pipelock TLS interception — the + `api.anthropic.com` TLS-MITM responsibility cred-proxy takes + over. +- PRD 0008: Git gate — the credential-broker pattern this PRD + reuses (gate holds creds, agent gets a rewritten URL, gate + makes the upstream connection). +- [`anthropics/claude-code#36998`](https://github.com/anthropics/claude-code/issues/36998) + — historic `ANTHROPIC_BASE_URL` bypass. +- [`go-gitea/gitea#16734`](https://github.com/go-gitea/gitea/issues/16734) + — why Gitea uses `Authorization: token`, not `Bearer`. +- [`golang/go#28866`](https://github.com/golang/go/issues/28866) + — the `HTTPS_PROXY` loopback bug; not hit here because we're + a reverse proxy, not a forward proxy. diff --git a/tests/integration/_fake_upstream.py b/tests/integration/_fake_upstream.py new file mode 100644 index 0000000..f5c2264 --- /dev/null +++ b/tests/integration/_fake_upstream.py @@ -0,0 +1,91 @@ +"""A capture-and-echo HTTP server used as a fake upstream behind the +cred-proxy in integration tests. + +Captures the last request's method, path, and headers under +/__last_request (as JSON). Returns a fixed 200 OK with a deterministic +body for every other path. Tests probe /__last_request to assert on +header injection (PRD 0010 SC3/SC6). + +Stdlib-only; runs inside a python:alpine container with a single +bind-mount. +""" + +from __future__ import annotations + +import http.server +import json +import os +import socketserver +import sys +import threading + + +_lock = threading.Lock() +_last_request: dict[str, object] = {} + + +class Handler(http.server.BaseHTTPRequestHandler): + def log_message(self, format: str, *args: object) -> None: + # Quiet — the test reads the capture endpoint, not stderr. + return + + def _capture_and_respond(self) -> None: + # Skip capturing the inspection endpoints so the test's own + # query to /__last_request doesn't overwrite the request it + # came in to inspect. + if not self.path.startswith("/__"): + with _lock: + global _last_request + _last_request = { + "method": self.command, + "path": self.path, + "headers": [[k, v] for k, v in self.headers.items()], + } + if self.path == "/__last_request": + body = json.dumps(_last_request, indent=2).encode("utf-8") + self.send_response(200) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(body))) + self.end_headers() + self.wfile.write(body) + return + if self.path == "/__sse": + # SSE-style streaming response. Used by the no-buffering + # test: three events with short flushes between them. + self.send_response(200) + self.send_header("Content-Type", "text/event-stream") + self.send_header("Cache-Control", "no-cache") + self.end_headers() + for i in range(3): + self.wfile.write(f"data: event-{i}\n\n".encode("utf-8")) + self.wfile.flush() + return + body = b'{"upstream":"fake","ok":true}\n' + self.send_response(200) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(body))) + self.end_headers() + self.wfile.write(body) + + def do_GET(self) -> None: self._capture_and_respond() + def do_POST(self) -> None: self._capture_and_respond() + def do_PUT(self) -> None: self._capture_and_respond() + def do_DELETE(self) -> None: self._capture_and_respond() + def do_PATCH(self) -> None: self._capture_and_respond() + + +class FakeServer(socketserver.ThreadingMixIn, http.server.HTTPServer): + allow_reuse_address = True + daemon_threads = True + + +def main() -> None: + port = int(os.environ.get("FAKE_UPSTREAM_PORT", "8080")) + server = FakeServer(("0.0.0.0", port), Handler) + sys.stderr.write(f"fake-upstream listening on :{port}\n") + sys.stderr.flush() + server.serve_forever() + + +if __name__ == "__main__": + main() diff --git a/tests/integration/test_cred_proxy_sidecar.py b/tests/integration/test_cred_proxy_sidecar.py new file mode 100644 index 0000000..c407380 --- /dev/null +++ b/tests/integration/test_cred_proxy_sidecar.py @@ -0,0 +1,273 @@ +"""Integration: drive `DockerCredProxy.prepare` → `.start` against a +fake upstream container, then verify header injection / strip-and- +replace at the wire level (PRD 0010 SC2, SC3, SC6). + +Topology mirrors production: a per-bottle internal docker network (no +default gateway) for the agent ↔ cred-proxy leg, and an egress network +for cred-proxy ↔ upstream. The "agent" is a curl container on the +internal net; the "upstream" is the fake-upstream container on the +egress net. cred-proxy straddles both. +""" + +from __future__ import annotations + +import json +import os +import shutil +import subprocess +import tempfile +import unittest +from pathlib import Path + +from claude_bottle.backend.docker.cred_proxy import ( + CRED_PROXY_HOSTNAME, + CRED_PROXY_PORT, + DockerCredProxy, + build_cred_proxy_image, + cred_proxy_container_name, +) +from claude_bottle.backend.docker.network import ( + network_create_egress, + network_create_internal, + network_remove, +) +from tests._docker import skip_unless_docker + + +CURL_IMAGE = "curlimages/curl:latest" +FAKE_UPSTREAM_IMAGE = "python:3.13-alpine" +FAKE_UPSTREAM_HOST = "fake-upstream" +FAKE_UPSTREAM_PORT = "8080" + + +def _make_routes_json(upstream_host: str, upstream_port: str) -> str: + payload = { + "routes": [ + { + "path": "/fake/", + "upstream": f"http://{upstream_host}:{upstream_port}", + "auth_scheme": "Bearer", + "token_env": "CRED_PROXY_TOKEN_0", + }, + ], + } + return json.dumps(payload, indent=2) + "\n" + + +@skip_unless_docker() +class TestCredProxySidecar(unittest.TestCase): + @classmethod + def setUpClass(cls): + # Pre-pull the probe + fake-upstream base images so per-test + # retries don't race the registry. Skip if pulls fail (the + # canary suite separately probes registry health). + for image in (CURL_IMAGE, FAKE_UPSTREAM_IMAGE): + r = subprocess.run( + ["docker", "pull", image], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ) + if r.returncode != 0: + raise unittest.SkipTest(f"could not pull {image}") + build_cred_proxy_image() + + def setUp(self): + self.slug = f"cb-test-cp-{os.getpid()}" + self.proxy_name = "" + self.fake_name = f"fake-upstream-{self.slug}" + self.internal_net = "" + self.egress_net = "" + self.work_dir = Path(tempfile.mkdtemp()) + + def tearDown(self): + for name in (self.proxy_name, self.fake_name): + if name: + subprocess.run( + ["docker", "rm", "-f", name], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ) + for n in (self.internal_net, self.egress_net): + if n: + network_remove(n) + shutil.rmtree(self.work_dir, ignore_errors=True) + + def _bring_up_fake_upstream(self) -> None: + """Run the fake-upstream container on the egress network with + the host stable name `fake-upstream`. Bind-mount the script + from tests/integration/.""" + repo_dir = str(Path(__file__).resolve().parent.parent.parent) + script = "tests/integration/_fake_upstream.py" + r = subprocess.run( + [ + "docker", "run", "-d", + "--name", self.fake_name, + "--hostname", FAKE_UPSTREAM_HOST, + "--network", self.egress_net, + "--network-alias", FAKE_UPSTREAM_HOST, + "-v", f"{repo_dir}/{script}:/srv.py:ro", + "-e", f"FAKE_UPSTREAM_PORT={FAKE_UPSTREAM_PORT}", + FAKE_UPSTREAM_IMAGE, + "python3", "/srv.py", + ], + capture_output=True, text=True, check=False, + ) + if r.returncode != 0: + self.fail(f"failed to start fake-upstream: {r.stderr}") + + def _start_cred_proxy_via_production_code(self) -> str: + """Run DockerCredProxy.start with a plan that points at the + fake upstream. We bypass the manifest path so we can route + the proxy at a test-only upstream (the fake-upstream + container) without going through the parser.""" + from claude_bottle.cred_proxy import ( + CredProxyPlan, + CredProxyRoute, + ) + routes_path = self.work_dir / "routes.json" + routes_path.write_text(_make_routes_json(FAKE_UPSTREAM_HOST, FAKE_UPSTREAM_PORT)) + routes_path.chmod(0o600) + plan = CredProxyPlan( + slug=self.slug, + routes_path=routes_path, + routes=(CredProxyRoute( + path="/fake/", + upstream=f"http://{FAKE_UPSTREAM_HOST}:{FAKE_UPSTREAM_PORT}", + auth_scheme="Bearer", + token_env="CRED_PROXY_TOKEN_0", + token_ref="TEST_TOKEN", + ),), + token_env_map={"CRED_PROXY_TOKEN_0": "TEST_TOKEN"}, + internal_network=self.internal_net, + egress_network=self.egress_net, + ) + # Inject the host-side TEST_TOKEN into our process env so the + # production resolver picks it up. + os.environ["TEST_TOKEN"] = "real-token-injected-by-proxy" + try: + return DockerCredProxy().start(plan) + finally: + os.environ.pop("TEST_TOKEN", None) + + def _curl_via_internal_net(self, path: str, *extra: str) -> str: + """Run a sibling curl container on the internal network — same + access topology the agent uses in production — to hit the + cred-proxy. Returns stdout.""" + r = subprocess.run( + [ + "docker", "run", "--rm", + "--network", self.internal_net, + CURL_IMAGE, + "-s", "--max-time", "10", + "--retry", "20", "--retry-delay", "1", "--retry-connrefused", + *extra, + f"http://{CRED_PROXY_HOSTNAME}:{CRED_PROXY_PORT}{path}", + ], + capture_output=True, text=True, timeout=60, check=False, + ) + self.assertEqual(0, r.returncode, + f"curl failed: stdout={r.stdout!r} stderr={r.stderr!r}") + return r.stdout + + def _query_fake_capture(self) -> dict: + """Read the fake upstream's /__last_request endpoint to see + what headers it received.""" + r = subprocess.run( + [ + "docker", "run", "--rm", + "--network", self.egress_net, + CURL_IMAGE, + "-s", "--max-time", "10", + "--retry", "5", "--retry-delay", "1", "--retry-connrefused", + f"http://{FAKE_UPSTREAM_HOST}:{FAKE_UPSTREAM_PORT}/__last_request", + ], + capture_output=True, text=True, timeout=30, check=False, + ) + self.assertEqual(0, r.returncode, f"capture query failed: {r.stderr}") + return json.loads(r.stdout) + + @unittest.skipIf( + os.environ.get("GITEA_ACTIONS") == "true", + "skipped under act_runner: docker socket mount topology breaks " + "in-process visibility of networks created on the host daemon", + ) + def test_end_to_end_header_injection_and_strip(self): + """Full bring-up via the production DockerCredProxy code path, + then send a request from a sibling curl container with the + agent's `Authorization` header. The fake upstream's capture + must show: + - the agent's Authorization was stripped (no `stolen` token) + - the cred-proxy injected `Bearer real-token-injected-by-proxy` + - the request reached the upstream at all + """ + self.internal_net = network_create_internal(self.slug) + self.egress_net = network_create_egress(self.slug) + self._bring_up_fake_upstream() + self.proxy_name = self._start_cred_proxy_via_production_code() + self.assertEqual(cred_proxy_container_name(self.slug), self.proxy_name) + + # Agent → cred-proxy with a smuggled Authorization header. + body = self._curl_via_internal_net( + "/fake/v1/messages", + "-H", "Authorization: Bearer stolen-by-prompt-injection", + "-X", "POST", + "-H", "Content-Type: application/json", + "--data-binary", '{"hello":"world"}', + ) + # The fake upstream responds with a fixed body. + self.assertIn('"upstream":"fake"', body) + + # Now ask the fake upstream what headers it actually saw. + captured = self._query_fake_capture() + self.assertEqual("POST", captured["method"]) + self.assertEqual("/v1/messages", captured["path"], + "the /fake/ prefix should be stripped before forwarding") + + headers = {k.lower(): v for k, v in captured["headers"]} + self.assertEqual( + "Bearer real-token-injected-by-proxy", + headers.get("authorization"), + "cred-proxy must strip the inbound Authorization and inject " + "the configured value", + ) + self.assertNotIn("stolen", headers.get("authorization", ""), + "the agent's smuggled token must NOT reach upstream") + self.assertEqual( + FAKE_UPSTREAM_HOST, + headers.get("host"), + "Host header should point at the upstream, not the proxy", + ) + + @unittest.skipIf( + os.environ.get("GITEA_ACTIONS") == "true", + "skipped under act_runner: docker socket mount topology breaks " + "in-process visibility of networks created on the host daemon", + ) + def test_unknown_path_returns_404(self): + """An agent reaching for an unconfigured route gets a 404, + not a silent forward to anywhere.""" + self.internal_net = network_create_internal(self.slug) + self.egress_net = network_create_egress(self.slug) + self._bring_up_fake_upstream() + self.proxy_name = self._start_cred_proxy_via_production_code() + + r = subprocess.run( + [ + "docker", "run", "--rm", + "--network", self.internal_net, + CURL_IMAGE, + "-s", "-o", "/dev/null", "-w", "%{http_code}", + "--max-time", "10", + "--retry", "20", "--retry-delay", "1", "--retry-connrefused", + f"http://{CRED_PROXY_HOSTNAME}:{CRED_PROXY_PORT}/not-a-route", + ], + capture_output=True, text=True, timeout=60, check=False, + ) + self.assertEqual(0, r.returncode) + self.assertEqual("404", r.stdout.strip()) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/integration/test_pipelock_allow_node.py b/tests/integration/test_pipelock_allow_node.py index 1d68d57..20bf1d1 100644 --- a/tests/integration/test_pipelock_allow_node.py +++ b/tests/integration/test_pipelock_allow_node.py @@ -79,7 +79,6 @@ class TestPipelockAllowsNode(unittest.TestCase): agent_name="demo", copy_cwd=False, user_cwd=str(stage_dir), - forward_oauth_token=False, ) plan = backend.prepare(spec, stage_dir=stage_dir) with backend.launch(plan) as bottle: diff --git a/tests/integration/test_pipelock_allows_normal_https.py b/tests/integration/test_pipelock_allows_normal_https.py index 97b1732..41acabe 100644 --- a/tests/integration/test_pipelock_allows_normal_https.py +++ b/tests/integration/test_pipelock_allows_normal_https.py @@ -44,7 +44,6 @@ class TestPipelockAllowsNormalHttps(unittest.TestCase): agent_name="demo", copy_cwd=False, user_cwd=str(stage_dir), - forward_oauth_token=False, ) plan = backend.prepare(spec, stage_dir=stage_dir) with backend.launch(plan) as bottle: diff --git a/tests/integration/test_pipelock_block_node.py b/tests/integration/test_pipelock_block_node.py index ba95888..62708f2 100644 --- a/tests/integration/test_pipelock_block_node.py +++ b/tests/integration/test_pipelock_block_node.py @@ -75,7 +75,6 @@ class TestPipelockBlocksNode(unittest.TestCase): agent_name="demo", copy_cwd=False, user_cwd=str(stage_dir), - forward_oauth_token=False, ) plan = backend.prepare(spec, stage_dir=stage_dir) with backend.launch(plan) as bottle: diff --git a/tests/integration/test_pipelock_blocks_secret_https_post.py b/tests/integration/test_pipelock_blocks_secret_https_post.py index 92f9f80..2b597ae 100644 --- a/tests/integration/test_pipelock_blocks_secret_https_post.py +++ b/tests/integration/test_pipelock_blocks_secret_https_post.py @@ -63,7 +63,6 @@ class TestPipelockBlocksSecretHttpsPost(unittest.TestCase): agent_name="demo", copy_cwd=False, user_cwd=str(stage_dir), - forward_oauth_token=False, ) plan = backend.prepare(spec, stage_dir=stage_dir) with backend.launch(plan) as bottle: diff --git a/tests/integration/test_pipelock_blocks_secret_post.py b/tests/integration/test_pipelock_blocks_secret_post.py index 6d6fb72..8c58bb6 100644 --- a/tests/integration/test_pipelock_blocks_secret_post.py +++ b/tests/integration/test_pipelock_blocks_secret_post.py @@ -99,7 +99,6 @@ class TestPipelockBlocksSecretPost(unittest.TestCase): agent_name="demo", copy_cwd=False, user_cwd=str(stage_dir), - forward_oauth_token=False, ) plan = backend.prepare(spec, stage_dir=stage_dir) with backend.launch(plan) as bottle: diff --git a/tests/integration/test_pipelock_llm_passthrough.py b/tests/integration/test_pipelock_llm_passthrough.py index 7fecb3b..bca19b7 100644 --- a/tests/integration/test_pipelock_llm_passthrough.py +++ b/tests/integration/test_pipelock_llm_passthrough.py @@ -60,7 +60,6 @@ class TestPipelockLlmPassthrough(unittest.TestCase): agent_name="demo", copy_cwd=False, user_cwd=str(stage_dir), - forward_oauth_token=False, ) plan = backend.prepare(spec, stage_dir=stage_dir) with backend.launch(plan) as bottle: diff --git a/tests/unit/test_cred_proxy.py b/tests/unit/test_cred_proxy.py new file mode 100644 index 0000000..b62cd7c --- /dev/null +++ b/tests/unit/test_cred_proxy.py @@ -0,0 +1,200 @@ +"""Unit: CredProxy route lift + routes.json render + token resolution +(PRD 0010).""" + +import json +import unittest + +from claude_bottle.cred_proxy import ( + cred_proxy_render_routes, + cred_proxy_resolve_token_values, + cred_proxy_token_env_map, + cred_proxy_routes_for_bottle, +) +from claude_bottle.log import Die +from claude_bottle.manifest import Manifest + + +def _bottle(routes): + return Manifest.from_json_obj({ + "bottles": {"dev": {"cred_proxy": {"routes": routes}}}, + "agents": {"demo": {"skills": [], "prompt": "", "bottle": "dev"}}, + }).bottles["dev"] + + +class TestUpstreamLift(unittest.TestCase): + def test_single_route_yields_single_upstream(self): + b = _bottle([ + {"path": "/anthropic/", "upstream": "https://api.anthropic.com", + "auth_scheme": "Bearer", "token_ref": "CLAUDE_BOTTLE_OAUTH_TOKEN", + "role": "anthropic-base-url"}, + ]) + upstreams = cred_proxy_routes_for_bottle(b) + self.assertEqual(1, len(upstreams)) + u = upstreams[0] + self.assertEqual("/anthropic/", u.path) + self.assertEqual("https://api.anthropic.com", u.upstream) + self.assertEqual("Bearer", u.auth_scheme) + self.assertEqual("CRED_PROXY_TOKEN_0", u.token_env) + self.assertEqual("CLAUDE_BOTTLE_OAUTH_TOKEN", u.token_ref) + self.assertEqual(("anthropic-base-url",), u.roles) + + def test_shared_token_ref_collapses_to_one_slot(self): + # Two github routes share GH_PAT — they share token_env. + b = _bottle([ + {"path": "/gh-api/", "upstream": "https://api.github.com", + "auth_scheme": "Bearer", "token_ref": "GH_PAT"}, + {"path": "/gh-git/", "upstream": "https://github.com", + "auth_scheme": "Bearer", "token_ref": "GH_PAT", + "role": "git-insteadof"}, + ]) + upstreams = cred_proxy_routes_for_bottle(b) + self.assertEqual(2, len(upstreams)) + self.assertEqual({"CRED_PROXY_TOKEN_0"}, + {u.token_env for u in upstreams}) + + def test_distinct_token_refs_get_distinct_slots(self): + b = _bottle([ + {"path": "/a/", "upstream": "https://a.example", + "auth_scheme": "Bearer", "token_ref": "T1"}, + {"path": "/b/", "upstream": "https://b.example", + "auth_scheme": "Bearer", "token_ref": "T2"}, + {"path": "/c/", "upstream": "https://c.example", + "auth_scheme": "Bearer", "token_ref": "T1"}, + ]) + upstreams = cred_proxy_routes_for_bottle(b) + # T1 -> slot 0, T2 -> slot 1, T1 reuses slot 0. + self.assertEqual("CRED_PROXY_TOKEN_0", upstreams[0].token_env) + self.assertEqual("CRED_PROXY_TOKEN_1", upstreams[1].token_env) + self.assertEqual("CRED_PROXY_TOKEN_0", upstreams[2].token_env) + + def test_upstream_trailing_slash_stripped(self): + b = _bottle([ + {"path": "/x/", "upstream": "https://gitea.dideric.is/", + "auth_scheme": "token", "token_ref": "T"}, + ]) + self.assertEqual("https://gitea.dideric.is", + cred_proxy_routes_for_bottle(b)[0].upstream) + + def test_roles_list_passes_through(self): + b = _bottle([ + {"path": "/gitea/x/", "upstream": "https://gitea.example.com", + "auth_scheme": "token", "token_ref": "T", + "role": ["git-insteadof", "tea-login"]}, + ]) + self.assertEqual(("git-insteadof", "tea-login"), + cred_proxy_routes_for_bottle(b)[0].roles) + + def test_empty_routes_yields_empty_upstreams(self): + b = _bottle([]) + self.assertEqual((), cred_proxy_routes_for_bottle(b)) + + +class TestTokenEnvMap(unittest.TestCase): + def test_distinct_envs_yield_full_map(self): + b = _bottle([ + {"path": "/a/", "upstream": "https://a.example", + "auth_scheme": "Bearer", "token_ref": "A"}, + {"path": "/b/", "upstream": "https://b.example", + "auth_scheme": "Bearer", "token_ref": "B"}, + ]) + m = cred_proxy_token_env_map(cred_proxy_routes_for_bottle(b)) + self.assertEqual({"CRED_PROXY_TOKEN_0": "A", + "CRED_PROXY_TOKEN_1": "B"}, m) + + def test_shared_token_ref_yields_one_env(self): + b = _bottle([ + {"path": "/gh-api/", "upstream": "https://api.github.com", + "auth_scheme": "Bearer", "token_ref": "GH"}, + {"path": "/gh-git/", "upstream": "https://github.com", + "auth_scheme": "Bearer", "token_ref": "GH"}, + ]) + m = cred_proxy_token_env_map(cred_proxy_routes_for_bottle(b)) + self.assertEqual({"CRED_PROXY_TOKEN_0": "GH"}, m) + + +class TestRoutesRender(unittest.TestCase): + def test_renders_json_with_expected_shape(self): + b = _bottle([ + {"path": "/anthropic/", "upstream": "https://api.anthropic.com", + "auth_scheme": "Bearer", "token_ref": "CLAUDE_BOTTLE_OAUTH_TOKEN"}, + {"path": "/gitea/x/", "upstream": "https://gitea.dideric.is", + "auth_scheme": "token", "token_ref": "GITEA_TOKEN"}, + ]) + rendered = cred_proxy_render_routes(cred_proxy_routes_for_bottle(b)) + payload = json.loads(rendered) + self.assertEqual(["routes"], list(payload.keys())) + self.assertEqual(2, len(payload["routes"])) + first = payload["routes"][0] + self.assertEqual({"path", "upstream", "auth_scheme", "token_env"}, + set(first.keys())) + + def test_routes_carry_no_token_values_or_host_env_names(self): + # routes.json lives mode-600 in the staging dir and gets + # docker cp'd into the sidecar — it must not leak secret values + # or the host-side TokenRef name. + b = _bottle([{"path": "/x/", "upstream": "https://x.example", + "auth_scheme": "Bearer", "token_ref": "GITHUB_TOKEN"}]) + rendered = cred_proxy_render_routes(cred_proxy_routes_for_bottle(b)) + self.assertNotIn("GITHUB_TOKEN", rendered) + + def test_empty_upstreams_renders_empty_routes_array(self): + rendered = cred_proxy_render_routes(()) + self.assertEqual({"routes": []}, json.loads(rendered)) + + +class TestResolveTokenValues(unittest.TestCase): + def test_resolves_present_env(self): + out = cred_proxy_resolve_token_values( + {"CRED_PROXY_TOKEN_0": "FOO"}, + {"FOO": "the-value"}, + ) + self.assertEqual({"CRED_PROXY_TOKEN_0": "the-value"}, out) + + def test_unset_host_env_dies(self): + with self.assertRaises(Die): + cred_proxy_resolve_token_values( + {"CRED_PROXY_TOKEN_0": "MISSING"}, + {}, + ) + + def test_empty_host_env_dies(self): + with self.assertRaises(Die): + cred_proxy_resolve_token_values( + {"CRED_PROXY_TOKEN_0": "FOO"}, + {"FOO": ""}, + ) + + +class TestCredProxyPrepare(unittest.TestCase): + def test_prepare_writes_routes_file_and_returns_plan(self): + import tempfile + from pathlib import Path + + from claude_bottle.cred_proxy import CredProxy, CredProxyPlan + + class StubCredProxy(CredProxy): + def start(self, plan): return "" + def stop(self, target): return None + + b = _bottle([ + {"path": "/gh-api/", "upstream": "https://api.github.com", + "auth_scheme": "Bearer", "token_ref": "GITHUB_TOKEN"}, + {"path": "/gh-git/", "upstream": "https://github.com", + "auth_scheme": "Bearer", "token_ref": "GITHUB_TOKEN", + "role": "git-insteadof"}, + ]) + with tempfile.TemporaryDirectory() as td: + stage = Path(td) + plan = StubCredProxy().prepare(b, "test-slug", stage) + self.assertIsInstance(plan, CredProxyPlan) + self.assertEqual("test-slug", plan.slug) + self.assertTrue(plan.routes_path.is_file()) + self.assertEqual(0o600, plan.routes_path.stat().st_mode & 0o777) + payload = json.loads(plan.routes_path.read_text()) + self.assertEqual(2, len(payload["routes"])) + self.assertEqual({"CRED_PROXY_TOKEN_0": "GITHUB_TOKEN"}, + plan.token_env_map) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/unit/test_cred_proxy_server.py b/tests/unit/test_cred_proxy_server.py new file mode 100644 index 0000000..bace39a --- /dev/null +++ b/tests/unit/test_cred_proxy_server.py @@ -0,0 +1,262 @@ +"""Unit: cred-proxy server pure functions — route parsing, route +selection, header injection (PRD 0010).""" + +import unittest + +from claude_bottle.cred_proxy_server import ( + Route, + build_forward_headers, + filter_response_headers, + is_git_push_request, + load_tokens, + parse_routes, + select_route, +) + + +class TestParseRoutes(unittest.TestCase): + def test_parses_minimal_payload(self): + routes = parse_routes({"routes": [ + {"path": "/anthropic/", "upstream": "https://api.anthropic.com", + "auth_scheme": "Bearer", "token_env": "CRED_PROXY_TOKEN_0"}, + ]}) + self.assertEqual(1, len(routes)) + r = routes[0] + self.assertEqual("/anthropic/", r.path) + self.assertEqual("https", r.upstream_scheme) + self.assertEqual("api.anthropic.com", r.upstream_host) + self.assertEqual(443, r.upstream_port) + self.assertEqual("", r.upstream_base_path) + self.assertEqual("Bearer", r.auth_scheme) + self.assertEqual("CRED_PROXY_TOKEN_0", r.token_env) + + def test_extracts_port_from_upstream(self): + routes = parse_routes({"routes": [ + {"path": "/gitea/gitea.dideric.is/", + "upstream": "https://gitea.dideric.is:30443", + "auth_scheme": "token", "token_env": "CRED_PROXY_TOKEN_0"}, + ]}) + self.assertEqual(30443, routes[0].upstream_port) + + def test_sorted_by_descending_path_length(self): + # /a/b/ should come before /a/ so longest-prefix is first. + routes = parse_routes({"routes": [ + {"path": "/a/", "upstream": "https://x.example", + "auth_scheme": "Bearer", "token_env": "T1"}, + {"path": "/a/b/", "upstream": "https://y.example", + "auth_scheme": "Bearer", "token_env": "T2"}, + ]}) + self.assertEqual("/a/b/", routes[0].path) + self.assertEqual("/a/", routes[1].path) + + def test_bad_path_rejected(self): + with self.assertRaises(ValueError): + parse_routes({"routes": [ + {"path": "no-leading-slash", "upstream": "https://x", + "auth_scheme": "Bearer", "token_env": "T"}, + ]}) + + def test_non_http_scheme_rejected(self): + with self.assertRaises(ValueError): + parse_routes({"routes": [ + {"path": "/x/", "upstream": "ftp://x.example/", + "auth_scheme": "Bearer", "token_env": "T"}, + ]}) + + +class TestSelectRoute(unittest.TestCase): + def setUp(self): + self.routes = parse_routes({"routes": [ + {"path": "/anthropic/", "upstream": "https://api.anthropic.com", + "auth_scheme": "Bearer", "token_env": "T_A"}, + {"path": "/gh-api/", "upstream": "https://api.github.com", + "auth_scheme": "Bearer", "token_env": "T_G"}, + {"path": "/gitea/gitea.dideric.is/", + "upstream": "https://gitea.dideric.is", + "auth_scheme": "token", "token_env": "T_T"}, + ]}) + + def test_matches_prefix(self): + r = select_route(self.routes, "/anthropic/v1/messages") + assert r is not None + self.assertEqual("/anthropic/", r.path) + + def test_no_match_returns_none(self): + self.assertIsNone(select_route(self.routes, "/other/path")) + + def test_picks_longest_prefix(self): + routes = parse_routes({"routes": [ + {"path": "/a/", "upstream": "https://x.example", + "auth_scheme": "Bearer", "token_env": "T1"}, + {"path": "/a/long/", "upstream": "https://y.example", + "auth_scheme": "Bearer", "token_env": "T2"}, + ]}) + r = select_route(routes, "/a/long/sub") + assert r is not None + self.assertEqual("/a/long/", r.path) + + +class TestBuildForwardHeaders(unittest.TestCase): + def test_strips_authorization_and_injects(self): + headers = build_forward_headers( + [("Authorization", "Bearer stolen-token"), + ("Content-Type", "application/json")], + auth_scheme="Bearer", + token="real-token", + upstream_host="api.anthropic.com", + ) + names = [n.lower() for n, _ in headers] + # Only one Authorization remains, with the injected value. + auth_values = [v for n, v in headers if n.lower() == "authorization"] + self.assertEqual(["Bearer real-token"], auth_values) + self.assertEqual(1, names.count("authorization")) + # Content-Type passes through. + self.assertIn(("Content-Type", "application/json"), headers) + + def test_strips_authorization_case_insensitive(self): + headers = build_forward_headers( + [("authorization", "Bearer stolen")], + auth_scheme="Bearer", + token="real", + upstream_host="x.example", + ) + auth_values = [v for n, v in headers if n.lower() == "authorization"] + self.assertEqual(["Bearer real"], auth_values) + + def test_strips_hop_by_hop(self): + headers = build_forward_headers( + [("Connection", "keep-alive, x-custom"), + ("X-Custom", "should-be-dropped"), + ("Keep-Alive", "300"), + ("Transfer-Encoding", "chunked"), + ("X-Real", "kept")], + auth_scheme="Bearer", + token="t", + upstream_host="x.example", + ) + names = [n.lower() for n, _ in headers] + self.assertNotIn("connection", names) + self.assertNotIn("keep-alive", names) + self.assertNotIn("transfer-encoding", names) + self.assertNotIn("x-custom", names) # listed in Connection: -> hop-by-hop + self.assertIn("x-real", names) + + def test_forces_identity_accept_encoding(self): + # The agent's gzip/br Accept-Encoding gets replaced with + # `identity` so the upstream returns uncompressed bytes — + # pipelock's response scanner can't read compressed bodies + # and would 403 with "compressed sse_stream response cannot + # be scanned". + headers = build_forward_headers( + [("Accept-Encoding", "gzip, deflate, br")], + auth_scheme="Bearer", token="t", upstream_host="x.example", + ) + ae = [v for n, v in headers if n.lower() == "accept-encoding"] + self.assertEqual(["identity"], ae) + + def test_strips_content_length(self): + # http.client recomputes Content-Length; passing it through + # double-counts and breaks the upstream. + headers = build_forward_headers( + [("Content-Length", "999")], + auth_scheme="Bearer", token="t", upstream_host="x.example", + ) + names = [n.lower() for n, _ in headers] + self.assertNotIn("content-length", names) + + def test_sets_host_to_upstream(self): + headers = build_forward_headers( + [("Host", "cred-proxy:9099")], + auth_scheme="Bearer", token="t", upstream_host="api.anthropic.com", + ) + host_values = [v for n, v in headers if n.lower() == "host"] + self.assertEqual(["api.anthropic.com"], host_values) + + def test_uses_token_scheme(self): + # gitea uses Authorization: token , not Bearer. + headers = build_forward_headers( + [], + auth_scheme="token", token="abc123", upstream_host="gitea.dideric.is", + ) + auth_values = [v for n, v in headers if n.lower() == "authorization"] + self.assertEqual(["token abc123"], auth_values) + + +class TestFilterResponseHeaders(unittest.TestCase): + def test_strips_hop_by_hop_only(self): + out = filter_response_headers([ + ("Content-Type", "text/event-stream"), + ("Connection", "close"), + ("Transfer-Encoding", "chunked"), + ("Cache-Control", "no-cache"), + ]) + names = [n.lower() for n, _ in out] + self.assertIn("content-type", names) + self.assertIn("cache-control", names) + self.assertNotIn("connection", names) + self.assertNotIn("transfer-encoding", names) + + +class TestIsGitPushRequest(unittest.TestCase): + """git push over HTTPS goes through /info/refs?service=git-receive-pack + (capabilities probe) then POST /git-receive-pack (the push body). + Fetches use /git-upload-pack and are not blocked — the bypass we're + closing is push, since git-gate's gitleaks pre-receive is the scanner + for outbound git data.""" + + def test_push_capabilities_probe_blocked(self): + self.assertTrue(is_git_push_request( + "/gh-git/owner/repo.git/info/refs", + "service=git-receive-pack", + )) + + def test_push_body_blocked(self): + self.assertTrue(is_git_push_request( + "/gh-git/owner/repo.git/git-receive-pack", "", + )) + + def test_fetch_capabilities_allowed(self): + self.assertFalse(is_git_push_request( + "/gh-git/owner/repo.git/info/refs", + "service=git-upload-pack", + )) + + def test_fetch_body_allowed(self): + self.assertFalse(is_git_push_request( + "/gh-git/owner/repo.git/git-upload-pack", "", + )) + + def test_rest_api_allowed(self): + # tea/gh-style REST calls hit /api/v1/... — unrelated. + self.assertFalse(is_git_push_request( + "/gitea/gitea.dideric.is/api/v1/repos/x/y", "", + )) + + def test_push_with_extra_query_params(self): + # `service` may appear with other params in any order. + self.assertTrue(is_git_push_request( + "/gh-git/owner/repo.git/info/refs", + "trace=1&service=git-receive-pack", + )) + + +class TestLoadTokens(unittest.TestCase): + def test_reads_per_route_env(self): + routes = ( + Route("/a/", "https", "x", 443, "", "Bearer", "T_0"), + Route("/b/", "https", "y", 443, "", "Bearer", "T_1"), + ) + out = load_tokens(routes, {"T_0": "val0", "T_1": "val1"}) + self.assertEqual({"T_0": "val0", "T_1": "val1"}, out) + + def test_missing_env_yields_empty_string(self): + # The handler returns 500 at request time rather than the + # server refusing to start. This keeps the operator's failure + # signal in the cred-proxy's logs. + routes = (Route("/a/", "https", "x", 443, "", "Bearer", "T_0"),) + out = load_tokens(routes, {}) + self.assertEqual({"T_0": ""}, out) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/unit/test_docker_cred_proxy.py b/tests/unit/test_docker_cred_proxy.py new file mode 100644 index 0000000..69b3cee --- /dev/null +++ b/tests/unit/test_docker_cred_proxy.py @@ -0,0 +1,105 @@ +"""Unit: DockerCredProxy helpers + early-exit guards (PRD 0010). + +The full docker lifecycle is exercised by integration tests; here we +cover the pure helpers and the validation checks `.start` runs +before touching docker.""" + +import tempfile +import unittest +from pathlib import Path + +from claude_bottle.backend.docker.cred_proxy import ( + CRED_PROXY_HOSTNAME, + CRED_PROXY_PORT, + DockerCredProxy, + cred_proxy_container_name, + cred_proxy_url, +) +from claude_bottle.cred_proxy import CredProxyPlan, CredProxyRoute +from claude_bottle.log import Die + + +def _empty_plan(**overrides): + base = { + "slug": "demo", + "routes_path": Path("/nonexistent"), + "routes": (), + "token_env_map": {}, + "internal_network": "", + "egress_network": "", + "pipelock_ca_host_path": Path(), + "pipelock_proxy_url": "", + } + base.update(overrides) + return CredProxyPlan(**base) + + +class TestNameAndUrl(unittest.TestCase): + def test_container_name_carries_slug(self): + self.assertEqual("claude-bottle-cred-proxy-demo", + cred_proxy_container_name("demo")) + + def test_url_uses_alias_not_container_name(self): + # The URL agents dial is stable across bottles — the slug + # never appears in it. That's the whole point of attaching + # --network-alias cred-proxy on the internal network. + self.assertEqual(f"http://{CRED_PROXY_HOSTNAME}:{CRED_PROXY_PORT}", + cred_proxy_url()) + + +class TestStartGuards(unittest.TestCase): + def setUp(self): + self.proxy = DockerCredProxy() + + def test_empty_upstreams_dies(self): + with self.assertRaises(Die): + self.proxy.start(_empty_plan()) + + def test_missing_internal_network_dies(self): + upstream = CredProxyRoute( + path="/anthropic/", + upstream="https://api.anthropic.com", + auth_scheme="Bearer", token_env="CRED_PROXY_TOKEN_0", + token_ref="T", + ) + with self.assertRaises(Die): + self.proxy.start(_empty_plan(routes=(upstream,))) + + def test_missing_routes_file_dies(self): + upstream = CredProxyRoute( + path="/anthropic/", + upstream="https://api.anthropic.com", + auth_scheme="Bearer", token_env="CRED_PROXY_TOKEN_0", + token_ref="T", + ) + with self.assertRaises(Die): + self.proxy.start(_empty_plan( + routes=(upstream,), + internal_network="net-x", + egress_network="egress-x", + routes_path=Path("/tmp/cred-proxy-test-does-not-exist.json"), + )) + + def test_pipelock_url_without_ca_dies(self): + # URL set + CA path empty/missing is a wiring bug: either both + # populated (production) or both empty (test escape hatch). + upstream = CredProxyRoute( + path="/anthropic/", + upstream="https://api.anthropic.com", + auth_scheme="Bearer", token_env="CRED_PROXY_TOKEN_0", + token_ref="T", + ) + with tempfile.NamedTemporaryFile() as routes: + with self.assertRaises(Die): + self.proxy.start(_empty_plan( + routes=(upstream,), + internal_network="net-x", + egress_network="egress-x", + routes_path=Path(routes.name), + pipelock_proxy_url="http://pipelock:8888", + pipelock_ca_host_path=Path("/tmp/cred-proxy-no-ca.pem"), + )) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/unit/test_manifest_tokens.py b/tests/unit/test_manifest_tokens.py new file mode 100644 index 0000000..c6cd8ab --- /dev/null +++ b/tests/unit/test_manifest_tokens.py @@ -0,0 +1,174 @@ +"""Unit: bottle.cred_proxy.routes manifest parsing + validation (PRD 0010).""" + +import unittest + +from claude_bottle.log import Die +from claude_bottle.manifest import Manifest + + +def _manifest(routes, git=None): + bottle: dict[str, object] = {"cred_proxy": {"routes": routes}} + if git is not None: + bottle["git"] = git + return { + "bottles": {"dev": bottle}, + "agents": {"demo": {"skills": [], "prompt": "", "bottle": "dev"}}, + } + + +class TestCredProxyRouteParsing(unittest.TestCase): + def test_parses_minimal_route(self): + m = Manifest.from_json_obj(_manifest([ + {"path": "/anthropic/", + "upstream": "https://api.anthropic.com", + "auth_scheme": "Bearer", + "token_ref": "CLAUDE_BOTTLE_OAUTH_TOKEN"}, + ])) + routes = m.bottles["dev"].cred_proxy.routes + self.assertEqual(1, len(routes)) + r = routes[0] + self.assertEqual("/anthropic/", r.Path) + self.assertEqual("https://api.anthropic.com", r.Upstream) + self.assertEqual("Bearer", r.AuthScheme) + self.assertEqual("CLAUDE_BOTTLE_OAUTH_TOKEN", r.TokenRef) + self.assertEqual((), r.Role) + self.assertEqual("api.anthropic.com", r.UpstreamHost) + + def test_role_string_normalizes_to_tuple(self): + m = Manifest.from_json_obj(_manifest([ + {"path": "/anthropic/", "upstream": "https://api.anthropic.com", + "auth_scheme": "Bearer", "token_ref": "T", + "role": "anthropic-base-url"}, + ])) + self.assertEqual(("anthropic-base-url",), + m.bottles["dev"].cred_proxy.routes[0].Role) + + def test_role_list_supported(self): + m = Manifest.from_json_obj(_manifest([ + {"path": "/gitea/x/", "upstream": "https://gitea.example.com", + "auth_scheme": "token", "token_ref": "T", + "role": ["git-insteadof", "tea-login"]}, + ])) + self.assertEqual(("git-insteadof", "tea-login"), + m.bottles["dev"].cred_proxy.routes[0].Role) + + def test_upstream_host_extracted(self): + m = Manifest.from_json_obj(_manifest([ + {"path": "/gitea/x/", "upstream": "https://gitea.dideric.is:30443", + "auth_scheme": "token", "token_ref": "T"}, + ])) + self.assertEqual("gitea.dideric.is", + m.bottles["dev"].cred_proxy.routes[0].UpstreamHost) + + +class TestCredProxyRouteValidation(unittest.TestCase): + def _route(self, **overrides): + base = { + "path": "/x/", + "upstream": "https://example.com", + "auth_scheme": "Bearer", + "token_ref": "TOK", + } + base.update(overrides) + return base + + def test_missing_path_dies(self): + with self.assertRaises(Die): + Manifest.from_json_obj(_manifest([self._route(path=None)])) + + def test_path_without_trailing_slash_dies(self): + with self.assertRaises(Die): + Manifest.from_json_obj(_manifest([self._route(path="/no-slash")])) + + def test_path_without_leading_slash_dies(self): + with self.assertRaises(Die): + Manifest.from_json_obj(_manifest([self._route(path="no-slash/")])) + + def test_missing_upstream_dies(self): + with self.assertRaises(Die): + Manifest.from_json_obj(_manifest([self._route(upstream=None)])) + + def test_non_https_upstream_dies(self): + with self.assertRaises(Die): + Manifest.from_json_obj(_manifest([self._route(upstream="http://x.example")])) + + def test_unknown_auth_scheme_dies(self): + with self.assertRaises(Die): + Manifest.from_json_obj(_manifest([self._route(auth_scheme="Basic")])) + + def test_missing_token_ref_dies(self): + with self.assertRaises(Die): + Manifest.from_json_obj(_manifest([self._route(token_ref=None)])) + + def test_unknown_role_dies(self): + with self.assertRaises(Die): + Manifest.from_json_obj(_manifest([self._route(role="something-made-up")])) + + +class TestCredProxyCrossValidation(unittest.TestCase): + def test_duplicate_path_dies(self): + with self.assertRaises(Die): + Manifest.from_json_obj(_manifest([ + {"path": "/x/", "upstream": "https://a.example", + "auth_scheme": "Bearer", "token_ref": "T1"}, + {"path": "/x/", "upstream": "https://b.example", + "auth_scheme": "Bearer", "token_ref": "T2"}, + ])) + + def test_two_routes_same_anthropic_role_dies(self): + with self.assertRaises(Die): + Manifest.from_json_obj(_manifest([ + {"path": "/anthropic/", "upstream": "https://api.anthropic.com", + "auth_scheme": "Bearer", "token_ref": "A1", + "role": "anthropic-base-url"}, + {"path": "/anthropic-2/", "upstream": "https://api.anthropic.com", + "auth_scheme": "Bearer", "token_ref": "A2", + "role": "anthropic-base-url"}, + ])) + + def test_multiple_git_insteadof_ok(self): + # git-insteadof is not a singleton role — each route can + # independently rewrite its own host. + m = Manifest.from_json_obj(_manifest([ + {"path": "/gh-git/", "upstream": "https://github.com", + "auth_scheme": "Bearer", "token_ref": "GH", + "role": "git-insteadof"}, + {"path": "/gitea/x/", "upstream": "https://gitea.example.com", + "auth_scheme": "token", "token_ref": "GT", + "role": "git-insteadof"}, + ])) + self.assertEqual(2, len(m.bottles["dev"].cred_proxy.routes)) + + +class TestLegacyTokensField(unittest.TestCase): + def test_legacy_tokens_field_dies_with_hint(self): + # The PRD-iteration shape ({"tokens": [{Kind: ...}]}) was + # replaced by cred_proxy.routes; old manifests must fail + # loudly with a pointer. + with self.assertRaises(Die): + Manifest.from_json_obj({ + "bottles": {"dev": {"tokens": [ + {"Kind": "anthropic", "TokenRef": "T"}, + ]}}, + "agents": {"demo": {"skills": [], "prompt": "", "bottle": "dev"}}, + }) + + +class TestEmptyCredProxy(unittest.TestCase): + def test_no_cred_proxy_field_yields_empty_routes(self): + m = Manifest.from_json_obj({ + "bottles": {"dev": {}}, + "agents": {"demo": {"skills": [], "prompt": "", "bottle": "dev"}}, + }) + self.assertEqual((), m.bottles["dev"].cred_proxy.routes) + + def test_routes_array_type_required(self): + with self.assertRaises(Die): + Manifest.from_json_obj({ + "bottles": {"dev": {"cred_proxy": {"routes": "not-a-list"}}}, + "agents": {"demo": {"skills": [], "prompt": "", "bottle": "dev"}}, + }) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/unit/test_pipelock_allowlist.py b/tests/unit/test_pipelock_allowlist.py index e50d9d6..a10fae1 100644 --- a/tests/unit/test_pipelock_allowlist.py +++ b/tests/unit/test_pipelock_allowlist.py @@ -1,36 +1,115 @@ -"""Unit: pipelock_effective_allowlist — the union of baked-in defaults -and bottle.egress.allowlist. Git upstreams declared in bottle.git do not -contribute here; they flow through the per-agent git-gate (PRD 0008).""" +"""Unit: pipelock_effective_allowlist — the union of baked-in defaults, +bottle.egress.allowlist, and cred-proxy upstream hosts derived from +bottle.cred_proxy.routes (PRD 0010). Git upstreams declared in bottle.git +do not contribute here; they flow through the per-agent git-gate (PRD 0008).""" import unittest from claude_bottle.manifest import Manifest -from claude_bottle.pipelock import pipelock_effective_allowlist +from claude_bottle.pipelock import ( + pipelock_effective_allowlist, + pipelock_effective_tls_passthrough, + pipelock_token_hosts, +) + + +def _bottle(spec): + return Manifest.from_json_obj({ + "bottles": {"dev": spec}, + "agents": {"demo": {"skills": [], "prompt": "", "bottle": "dev"}}, + }).bottles["dev"] class TestEffectiveAllowlist(unittest.TestCase): def test_union_and_dedup(self): - manifest = Manifest.from_json_obj({ - "bottles": { - "dev": { - "egress": { - "allowlist": [ - "registry.npmjs.org", - # Duplicate of a baked default; the union - # must dedupe. - "api.anthropic.com", - ], - }, - }, + eff = pipelock_effective_allowlist(_bottle({ + "egress": { + "allowlist": [ + "registry.npmjs.org", + # Duplicate of a baked default; the union must dedupe. + "api.anthropic.com", + ], }, - "agents": {"demo": {"skills": [], "prompt": "", "bottle": "dev"}}, - }) - eff = pipelock_effective_allowlist(manifest.bottles["dev"]) + })) self.assertIn("api.anthropic.com", eff, "baked default present") self.assertIn("registry.npmjs.org", eff, "egress.allowlist present") self.assertEqual(len(eff), len(set(eff)), "deduplicated") self.assertEqual(eff, sorted(eff), "sorted") +def _routes(routes): + return {"cred_proxy": {"routes": routes}} + + +class TestTokenHosts(unittest.TestCase): + def test_each_route_contributes_its_upstream_host(self): + hosts = pipelock_token_hosts(_bottle(_routes([ + {"path": "/gh-api/", "upstream": "https://api.github.com", + "auth_scheme": "Bearer", "token_ref": "GH"}, + {"path": "/gh-git/", "upstream": "https://github.com", + "auth_scheme": "Bearer", "token_ref": "GH"}, + ]))) + self.assertEqual(["api.github.com", "github.com"], hosts) + + def test_dedupe_across_routes(self): + hosts = pipelock_token_hosts(_bottle(_routes([ + {"path": "/a/", "upstream": "https://x.example", + "auth_scheme": "Bearer", "token_ref": "T1"}, + {"path": "/b/", "upstream": "https://x.example", + "auth_scheme": "Bearer", "token_ref": "T2"}, + ]))) + self.assertEqual(["x.example"], hosts) + + def test_no_routes_empty(self): + self.assertEqual([], pipelock_token_hosts(_bottle({}))) + + +class TestAllowlistWithTokens(unittest.TestCase): + def test_route_hosts_added_to_allowlist(self): + eff = pipelock_effective_allowlist(_bottle(_routes([ + {"path": "/npm/", "upstream": "https://registry.npmjs.org", + "auth_scheme": "Bearer", "token_ref": "N"}, + {"path": "/gh-api/", "upstream": "https://api.github.com", + "auth_scheme": "Bearer", "token_ref": "G"}, + ]))) + self.assertIn("registry.npmjs.org", eff) + self.assertIn("api.github.com", eff) + + def test_cred_proxy_hostname_auto_added_when_routes_exist(self): + # The agent's HTTP_PROXY points at pipelock, so a request for + # http://cred-proxy:9099/... arrives at pipelock as a request + # for hostname `cred-proxy`. pipelock must allow it or the + # agent can't reach its own sidecar. + eff = pipelock_effective_allowlist(_bottle(_routes([ + {"path": "/x/", "upstream": "https://x.example", + "auth_scheme": "Bearer", "token_ref": "T"}, + ]))) + self.assertIn("cred-proxy", eff) + + def test_cred_proxy_hostname_NOT_added_when_no_routes(self): + # No cred-proxy sidecar, no auto-allow. + eff = pipelock_effective_allowlist(_bottle({})) + self.assertNotIn("cred-proxy", eff) + + +class TestTlsPassthrough(unittest.TestCase): + def test_default_includes_api_anthropic(self): + passthrough = pipelock_effective_tls_passthrough(_bottle({})) + self.assertEqual(["api.anthropic.com"], passthrough) + + def test_route_hosts_NOT_added_to_passthrough(self): + # cred-proxy now trusts pipelock's per-bottle CA, so pipelock + # can MITM the cred-proxy -> upstream leg and body-scan it. + # Auto-adding cred-proxy hosts to passthrough would silently + # disable that second scanner. + passthrough = pipelock_effective_tls_passthrough(_bottle(_routes([ + {"path": "/gh-api/", "upstream": "https://api.github.com", + "auth_scheme": "Bearer", "token_ref": "G"}, + {"path": "/npm/", "upstream": "https://registry.npmjs.org", + "auth_scheme": "Bearer", "token_ref": "N"}, + ]))) + self.assertEqual(["api.anthropic.com"], passthrough) + + if __name__ == "__main__": unittest.main() diff --git a/tests/unit/test_pipelock_yaml.py b/tests/unit/test_pipelock_yaml.py index caa1105..68caed6 100644 --- a/tests/unit/test_pipelock_yaml.py +++ b/tests/unit/test_pipelock_yaml.py @@ -77,6 +77,49 @@ class TestBuildConfig(unittest.TestCase): ca_cert_path="/etc/pipelock-ca.pem", ) + def test_ssrf_block_omitted_when_no_allowlist(self): + cfg = pipelock_build_config(fixture_minimal().bottles["dev"]) + self.assertNotIn("ssrf", cfg) + + def test_ssrf_block_emitted_when_allowlist_supplied(self): + # The bottle's internal Docker subnet lands here at launch + # time so cred-proxy:9099 (172.x.x.x) doesn't trip pipelock's + # RFC1918 SSRF guard. + cfg = pipelock_build_config( + fixture_minimal().bottles["dev"], + ssrf_ip_allowlist=("172.20.0.0/16",), + ) + self.assertIn("ssrf", cfg) + self.assertEqual({"ip_allowlist": ["172.20.0.0/16"]}, cfg["ssrf"]) + + def test_seed_phrase_detection_left_at_default_when_no_anthropic_route(self): + # No override emitted -> pipelock keeps its built-in default + # (BIP-39 detection enabled). Bottles that don't carry an + # Anthropic route don't need the false-positive workaround. + cfg = pipelock_build_config(fixture_minimal().bottles["dev"]) + self.assertNotIn("seed_phrase_detection", cfg) + + def test_seed_phrase_detection_disabled_for_anthropic_route(self): + # claude-code's chat bodies trip pipelock's BIP-39 detector + # (12+ English words that pass the checksum). pipelock 2.3.0 + # has no per-path knob for this detector, and both `suppress` + # and `rules.disabled` only silence alerts — the block still + # fires. The only knob that actually skips the block is the + # global on/off, so we flip it off whenever the bottle is set + # up to route claude through pipelock. + from claude_bottle.manifest import Manifest + bottle = Manifest.from_json_obj({ + "bottles": {"dev": {"cred_proxy": {"routes": [ + {"path": "/anthropic/", + "upstream": "https://api.anthropic.com", + "auth_scheme": "Bearer", "token_ref": "T", + "role": "anthropic-base-url"}, + ]}}}, + "agents": {"demo": {"skills": [], "prompt": "", "bottle": "dev"}}, + }).bottles["dev"] + cfg = pipelock_build_config(bottle) + self.assertEqual({"enabled": False}, cfg["seed_phrase_detection"]) + class TestRenderAndWrite(unittest.TestCase): def setUp(self): @@ -148,6 +191,33 @@ class TestRenderAndWrite(unittest.TestCase): self.assertIn("passthrough_domains:", content) self.assertIn('- "api.anthropic.com"', content) + def test_render_emits_ssrf_block_when_allowlist_given(self): + cfg = pipelock_build_config( + fixture_minimal().bottles["dev"], + ca_cert_path="/etc/pipelock-ca.pem", + ca_key_path="/etc/pipelock-ca-key.pem", + ssrf_ip_allowlist=("172.20.0.0/16",), + ) + text = pipelock_render_yaml(cfg) + self.assertIn("ssrf:", text) + self.assertIn("ip_allowlist:", text) + self.assertIn('- "172.20.0.0/16"', text) + + def test_render_emits_seed_phrase_off_for_anthropic_route(self): + from claude_bottle.manifest import Manifest + bottle = Manifest.from_json_obj({ + "bottles": {"dev": {"cred_proxy": {"routes": [ + {"path": "/anthropic/", + "upstream": "https://api.anthropic.com", + "auth_scheme": "Bearer", "token_ref": "T", + "role": "anthropic-base-url"}, + ]}}}, + "agents": {"demo": {"skills": [], "prompt": "", "bottle": "dev"}}, + }).bottles["dev"] + text = pipelock_render_yaml(pipelock_build_config(bottle)) + self.assertIn("seed_phrase_detection:", text) + self.assertIn("enabled: false", text) + if __name__ == "__main__": unittest.main() diff --git a/tests/unit/test_provision_cred_proxy.py b/tests/unit/test_provision_cred_proxy.py new file mode 100644 index 0000000..6fc026a --- /dev/null +++ b/tests/unit/test_provision_cred_proxy.py @@ -0,0 +1,161 @@ +"""Unit: cred-proxy agent-side provisioner renderers (PRD 0010). + +The docker cp / docker exec side effects are exercised by integration +tests; these unit tests cover the pure render functions.""" + +import unittest + +from claude_bottle.backend.docker.provision.cred_proxy import ( + render_cred_proxy_gitconfig, + render_npmrc, + render_tea_config, +) +from claude_bottle.cred_proxy import cred_proxy_routes_for_bottle +from claude_bottle.manifest import Manifest + + +def _bottle(routes): + return Manifest.from_json_obj({ + "bottles": {"dev": {"cred_proxy": {"routes": routes}}}, + "agents": {"demo": {"skills": [], "prompt": "", "bottle": "dev"}}, + }).bottles["dev"] + + +def _upstreams(routes): + return cred_proxy_routes_for_bottle(_bottle(routes)) + + +class TestRenderNpmrc(unittest.TestCase): + def test_empty_when_no_role(self): + self.assertEqual("", render_npmrc(_upstreams([]))) + self.assertEqual("", render_npmrc(_upstreams([ + {"path": "/x/", "upstream": "https://x.example", + "auth_scheme": "Bearer", "token_ref": "T"}, + ]))) + + def test_writes_registry_line_for_npm_registry_role(self): + out = render_npmrc(_upstreams([ + {"path": "/npm/", "upstream": "https://registry.npmjs.org", + "auth_scheme": "Bearer", "token_ref": "NPM_TOKEN", + "role": "npm-registry"}, + ])) + self.assertEqual("registry=http://cred-proxy:9099/npm/\n", out) + + def test_omits_authtoken(self): + # The proxy injects Authorization at request time. + out = render_npmrc(_upstreams([ + {"path": "/npm/", "upstream": "https://registry.npmjs.org", + "auth_scheme": "Bearer", "token_ref": "NPM_TOKEN", + "role": "npm-registry"}, + ])) + self.assertNotIn("_authToken", out) + self.assertNotIn("NPM_TOKEN", out) + + +class TestRenderGitconfig(unittest.TestCase): + def test_empty_when_no_role(self): + self.assertEqual("", render_cred_proxy_gitconfig(_upstreams([ + {"path": "/anthropic/", "upstream": "https://api.anthropic.com", + "auth_scheme": "Bearer", "token_ref": "A"}, + ]))) + + def test_writes_insteadof_for_git_insteadof_role(self): + out = render_cred_proxy_gitconfig(_upstreams([ + {"path": "/gh-git/", "upstream": "https://github.com", + "auth_scheme": "Bearer", "token_ref": "GH", + "role": "git-insteadof"}, + ])) + self.assertIn('[url "http://cred-proxy:9099/gh-git/"]', out) + self.assertIn("insteadOf = https://github.com/", out) + + def test_gitea_writes_per_host_insteadof(self): + out = render_cred_proxy_gitconfig(_upstreams([ + {"path": "/gitea/dideric/", "upstream": "https://gitea.dideric.is", + "auth_scheme": "token", "token_ref": "GITEA", + "role": "git-insteadof"}, + ])) + self.assertIn('[url "http://cred-proxy:9099/gitea/dideric/"]', out) + self.assertIn("insteadOf = https://gitea.dideric.is/", out) + + def test_two_routes_yield_two_rules(self): + out = render_cred_proxy_gitconfig(_upstreams([ + {"path": "/gh-git/", "upstream": "https://github.com", + "auth_scheme": "Bearer", "token_ref": "GH", + "role": "git-insteadof"}, + {"path": "/gitea/x/", "upstream": "https://gitea.example.com", + "auth_scheme": "token", "token_ref": "GT", + "role": "git-insteadof"}, + ])) + self.assertEqual(2, out.count("insteadOf")) + self.assertIn("github.com", out) + self.assertIn("gitea.example.com", out) + + def test_suppressed_when_git_gate_covers_host(self): + # When bottle.git brokers github.com over SSH, git-gate is the + # canonical git path. The cred-proxy https://github.com/ + # rewrite would let the agent push over HTTPS — bypassing + # gitleaks. Suppress it. + out = render_cred_proxy_gitconfig( + _upstreams([ + {"path": "/gh-git/", "upstream": "https://github.com", + "auth_scheme": "Bearer", "token_ref": "GH", + "role": "git-insteadof"}, + ]), + {"github.com"}, + ) + self.assertEqual("", out) + + def test_partial_suppression_keeps_other_hosts(self): + out = render_cred_proxy_gitconfig( + _upstreams([ + {"path": "/gitea/a/", "upstream": "https://gitea.dideric.is", + "auth_scheme": "token", "token_ref": "T1", + "role": "git-insteadof"}, + {"path": "/gitea/b/", "upstream": "https://gitea.example.com", + "auth_scheme": "token", "token_ref": "T2", + "role": "git-insteadof"}, + ]), + {"gitea.dideric.is"}, + ) + self.assertNotIn("gitea.dideric.is/", out) + self.assertIn("gitea.example.com/", out) + + +class TestRenderTeaConfig(unittest.TestCase): + def test_empty_when_no_role(self): + self.assertEqual("", render_tea_config(_upstreams([ + {"path": "/gh-git/", "upstream": "https://github.com", + "auth_scheme": "Bearer", "token_ref": "G"}, + ]))) + + def test_single_login_block(self): + out = render_tea_config(_upstreams([ + {"path": "/gitea/dideric/", "upstream": "https://gitea.dideric.is", + "auth_scheme": "token", "token_ref": "GITEA", + "role": "tea-login"}, + ])) + self.assertIn("logins:", out) + # Login name comes from the upstream host, not the path — + # the path may not encode the host. + self.assertIn("- name: gitea.dideric.is", out) + self.assertIn("url: http://cred-proxy:9099/gitea/dideric/", out) + self.assertIn("token: cred-proxy-placeholder", out) + self.assertNotIn("GITEA", out) + + +class TestCombinedRoles(unittest.TestCase): + """A single gitea route typically carries both `git-insteadof` + and `tea-login` — the renderers should each fire independently.""" + + def test_gitea_route_fires_both_renderers(self): + routes = _upstreams([ + {"path": "/gitea/x/", "upstream": "https://gitea.example.com", + "auth_scheme": "token", "token_ref": "T", + "role": ["git-insteadof", "tea-login"]}, + ]) + self.assertIn("insteadOf", render_cred_proxy_gitconfig(routes)) + self.assertIn("logins:", render_tea_config(routes)) + + +if __name__ == "__main__": + unittest.main()