diff --git a/Dockerfile.cred-proxy b/Dockerfile.cred-proxy index 451d7cc..82f3769 100644 --- a/Dockerfile.cred-proxy +++ b/Dockerfile.cred-proxy @@ -16,6 +16,14 @@ # image bytes deterministic. FROM python@sha256:420cd0bf0f3998275875e02ecd5808168cf0843cbb4d3c536432f729247b2acc +# `ca-certificates` ships /usr/sbin/update-ca-certificates and the +# system trust store. The backend's start step `docker cp`s the +# per-bottle pipelock CA into /usr/local/share/ca-certificates/ so +# the entrypoint's update-ca-certificates picks it up — cred-proxy's +# outbound HTTPS then trusts pipelock's bumped certs and outbound +# traffic routes through pipelock (HTTPS_PROXY in the environ). +RUN apk add --no-cache ca-certificates + # The proxy script ships as a single file. Tests in tests/unit/ import # it as `claude_bottle.cred_proxy_server`; the container runs it # directly as a script. No package install, no other modules pulled. @@ -32,4 +40,11 @@ RUN mkdir -p /run/cred-proxy # for the internal network to route to it. EXPOSE 9099 -ENTRYPOINT ["python3", "/app/cred_proxy_server.py"] +# Entry runs update-ca-certificates so the per-bottle pipelock CA +# docker-cp'd by the backend's start step is folded into +# /etc/ssl/certs/ca-certificates.crt before python comes up. Then +# exec into the server so PID 1 is python (clean signal handling +# and exit codes). Output of update-ca-certificates is silenced — +# the entry script prints one line per cert under normal operation, +# which the test suite would otherwise treat as a log smell. +ENTRYPOINT ["sh", "-c", "update-ca-certificates >/dev/null 2>&1 && exec python3 /app/cred_proxy_server.py"] diff --git a/claude_bottle/backend/docker/cred_proxy.py b/claude_bottle/backend/docker/cred_proxy.py index 4e28bab..54213cc 100644 --- a/claude_bottle/backend/docker/cred_proxy.py +++ b/claude_bottle/backend/docker/cred_proxy.py @@ -42,6 +42,13 @@ CRED_PROXY_HOSTNAME = "cred-proxy" # file directly. CRED_PROXY_ROUTES_IN_CONTAINER = "/run/cred-proxy/routes.json" +# In-container path for the per-bottle pipelock CA. Alpine's +# update-ca-certificates picks anything ending in `.crt` under +# /usr/local/share/ca-certificates/ and folds it into the system +# trust store at boot — so cred-proxy's HTTPS client trusts +# pipelock's bumped certs when pipelock MITMs the outbound leg. +CRED_PROXY_PIPELOCK_CA_IN_CONTAINER = "/usr/local/share/ca-certificates/pipelock.crt" + # Repo root, for `docker build` context. Resolved from this file's # location: claude_bottle/backend/docker/cred_proxy.py → repo root. _REPO_DIR = str(Path(__file__).resolve().parent.parent.parent.parent) @@ -96,6 +103,23 @@ class DockerCredProxy(CredProxy): f"cred-proxy routes file missing at {plan.routes_path}; " f"CredProxy.prepare must run first" ) + # pipelock fields are populated by launch.py in production; both + # must be present (URL + CA) or both absent. Mixing is a wiring + # bug. Both-absent is supported only as a test escape hatch: + # the integration tests in tests/integration/ exercise header + # injection in isolation and do not bring pipelock up. + route_via_pipelock = bool(plan.pipelock_proxy_url) or plan.pipelock_ca_host_path != Path() + if route_via_pipelock: + if not plan.pipelock_proxy_url: + die( + "DockerCredProxy.start: pipelock_ca_host_path is set but " + "pipelock_proxy_url is empty; populate both or neither." + ) + if not plan.pipelock_ca_host_path.is_file(): + die( + f"DockerCredProxy.start: pipelock CA missing at " + f"{plan.pipelock_ca_host_path}; pipelock_tls_init must run first" + ) # Resolve host env vars into concrete values. This must # happen at start time (not prepare) — the values flow into @@ -114,6 +138,16 @@ class DockerCredProxy(CredProxy): "--network", plan.internal_network, "--network-alias", CRED_PROXY_HOSTNAME, ] + if route_via_pipelock: + # Route cred-proxy's outbound HTTPS through pipelock so + # the egress allowlist + DLP body scanner apply to its + # traffic. Pipelock MITMs each handshake with the + # per-bottle CA we docker cp in below. + create_args.extend([ + "-e", f"HTTPS_PROXY={plan.pipelock_proxy_url}", + "-e", f"HTTP_PROXY={plan.pipelock_proxy_url}", + "-e", "NO_PROXY=localhost,127.0.0.1", + ]) # One -e flag per token slot; values arrive via subprocess env. # docker create with `-e NAME` (no =VALUE) reads NAME from the # current process env at create time. We pass `env=child_env` @@ -136,24 +170,37 @@ class DockerCredProxy(CredProxy): ).returncode != 0: die(f"failed to create cred-proxy sidecar {name}") - cp_result = subprocess.run( - ["docker", "cp", str(plan.routes_path), - f"{name}:{CRED_PROXY_ROUTES_IN_CONTAINER}"], - capture_output=True, - text=True, - check=False, - ) - if cp_result.returncode != 0: - subprocess.run( - ["docker", "rm", "-f", name], - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, + cps: list[tuple[str, str, str]] = [ + (str(plan.routes_path), CRED_PROXY_ROUTES_IN_CONTAINER, "routes.json"), + ] + if route_via_pipelock: + # CA must land BEFORE `docker start` so the entrypoint's + # update-ca-certificates picks it up. Docker cp's the + # file in even on the stopped container — that's the + # whole reason this works without a custom build step. + cps.append(( + str(plan.pipelock_ca_host_path), + CRED_PROXY_PIPELOCK_CA_IN_CONTAINER, + "pipelock CA", + )) + for src, dst, label in cps: + cp_result = subprocess.run( + ["docker", "cp", src, f"{name}:{dst}"], + capture_output=True, + text=True, check=False, ) - die( - f"failed to copy routes.json into {name}: " - f"{cp_result.stderr.strip()}" - ) + if cp_result.returncode != 0: + subprocess.run( + ["docker", "rm", "-f", name], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ) + die( + f"failed to copy {label} into {name}: " + f"{cp_result.stderr.strip()}" + ) if subprocess.run( ["docker", "network", "connect", plan.egress_network, name], diff --git a/claude_bottle/backend/docker/launch.py b/claude_bottle/backend/docker/launch.py index a274333..a32747d 100644 --- a/claude_bottle/backend/docker/launch.py +++ b/claude_bottle/backend/docker/launch.py @@ -105,15 +105,21 @@ def launch( stack.callback(git_gate.stop, git_gate_name) # Cred-proxy (PRD 0010). One sidecar per bottle when - # bottle.tokens declares any kind. Must come up before the - # agent so DNS resolution for `cred-proxy` succeeds on the - # agent's first call; tokens flow from the host env into the - # sidecar's environ, not the agent's. + # bottle.tokens declares any kind. Must come up AFTER pipelock + # — cred-proxy routes its outbound HTTPS through pipelock + # (HTTPS_PROXY in environ + the per-bottle CA in its trust + # store) so the egress allowlist + body scanner sit in the + # cred-proxy path too. Must come up BEFORE the agent so DNS + # resolution for `cred-proxy` succeeds on the agent's first + # call; tokens flow from the host env into the sidecar's + # environ, not the agent's. if plan.cred_proxy_plan.upstreams: cred_proxy_plan = dataclasses.replace( plan.cred_proxy_plan, internal_network=internal_network, egress_network=egress_network, + pipelock_ca_host_path=ca_cert_host, + pipelock_proxy_url=pipelock_proxy_url(plan.slug), ) plan = dataclasses.replace(plan, cred_proxy_plan=cred_proxy_plan) cred_proxy_name = cred_proxy.start(plan.cred_proxy_plan) diff --git a/claude_bottle/backend/docker/provision/cred_proxy.py b/claude_bottle/backend/docker/provision/cred_proxy.py index 5fecde9..e946be3 100644 --- a/claude_bottle/backend/docker/provision/cred_proxy.py +++ b/claude_bottle/backend/docker/provision/cred_proxy.py @@ -35,8 +35,10 @@ def provision_cred_proxy(plan: DockerBottlePlan, target: str) -> None: upstreams = plan.cred_proxy_plan.upstreams if not upstreams: return + bottle = plan.spec.manifest.bottle_for(plan.spec.agent_name) + git_gate_hosts = {g.UpstreamHost for g in bottle.git} _provision_npmrc(plan, target, upstreams) - _provision_gitconfig(plan, target, upstreams) + _provision_gitconfig(plan, target, upstreams, git_gate_hosts) _provision_tea_config(plan, target, upstreams) @@ -82,29 +84,41 @@ def _provision_npmrc( # --- git config ------------------------------------------------------------- -def render_cred_proxy_gitconfig(upstreams: tuple[CredProxyUpstream, ...]) -> str: +def render_cred_proxy_gitconfig( + upstreams: tuple[CredProxyUpstream, ...], + git_gate_hosts: set[str] = frozenset(), # type: ignore[assignment] +) -> str: """Render the `~/.gitconfig` fragment for cred-proxy insteadOf rewrites. Empty string when no github / gitea routes are declared. - github expands to two rewrites: https://github.com/... → /gh-git/... - (the git transport endpoint), and the agent's git client reaches - api.github.com over the same proxy via the /gh-api/ route, but - that's used by tools that call the GitHub API directly (gh, tea, - octokit) rather than `git` itself. + The rewrite is suppressed for any host that's also declared in + `bottle.git`. git-gate is the canonical git path on those hosts — + its pre-receive runs gitleaks before forwarding the push. A + cred-proxy https:/// rewrite would route HTTPS git ops + around the gate. cred-proxy still refuses smart-HTTP push at + runtime (defense in depth), but suppressing the rewrite means + `git clone https:///...` doesn't have a tempting shortcut + that just confuses on push. - Gitea entries get one rewrite per declared host, pointing at - /gitea//. The path component scopes the credential - so multiple gitea instances coexist on one proxy.""" + github expands to one rewrite (https://github.com/... → /gh-git/..., + the git transport endpoint); /gh-api/ stays unmapped here because + tools call api.github.com directly rather than through git. + Gitea entries get one rewrite per declared host.""" rules: list[str] = [] for u in upstreams: if u.kind == "github" and u.path == "/gh-git/": + if "github.com" in git_gate_hosts: + continue rules.append( f'[url "{cred_proxy_url()}/gh-git/"]\n' f"\tinsteadOf = https://github.com/\n" ) elif u.kind == "gitea": - # u.upstream is the configured gitea URL (e.g. - # https://gitea.dideric.is) and u.path is /gitea//. + # u.path is /gitea//; derive the host the same way + # the route table did so we match git_gate's UpstreamHost. + host = u.path[len("/gitea/"):].rstrip("/") + if host in git_gate_hosts: + continue rules.append( f'[url "{cred_proxy_url()}{u.path}"]\n' f"\tinsteadOf = {u.upstream}/\n" @@ -123,11 +137,13 @@ def _provision_gitconfig( plan: DockerBottlePlan, target: str, upstreams: tuple[CredProxyUpstream, ...], + git_gate_hosts: set[str], ) -> None: """Append the cred-proxy insteadOf rules to ~/.gitconfig. Runs after `provision_git`, so any git-gate rules already live in the - file; we append rather than overwrite.""" - content = render_cred_proxy_gitconfig(upstreams) + file; we append rather than overwrite. Hosts already brokered by + git-gate are skipped — git-gate is the canonical git path there.""" + content = render_cred_proxy_gitconfig(upstreams, git_gate_hosts) if not content: return container_home = os.environ.get("CLAUDE_BOTTLE_CONTAINER_HOME", "/home/node") diff --git a/claude_bottle/cred_proxy.py b/claude_bottle/cred_proxy.py index ab53db4..672c9d5 100644 --- a/claude_bottle/cred_proxy.py +++ b/claude_bottle/cred_proxy.py @@ -64,16 +64,24 @@ class CredProxyPlan: The slug + routes_path + upstreams + token_env_map fields are filled at prepare time (host-side, side-effect-free on docker). - The network fields are populated by the backend's launch step - via `dataclasses.replace` once those networks exist. Empty - defaults are sentinels meaning "not yet set"; `.start` validates - that they are populated. + The network + pipelock fields are populated by the backend's + launch step via `dataclasses.replace` once those resources + exist. Empty defaults are sentinels meaning "not yet set"; + `.start` validates that they are populated. `token_env_map` is `{: }`. The backend's start step reads `os.environ[TokenRef]` and forwards the value into the cred-proxy container's environ under `token_env`. The plan itself never holds token values — secrets - never land in a dataclass that might be logged.""" + never land in a dataclass that might be logged. + + `pipelock_ca_host_path` is the host path of the per-bottle CA + pipelock will present on bumped TLS handshakes; the cred-proxy + image's entrypoint runs `update-ca-certificates` over it so the + proxy's HTTPS client trusts pipelock's CA. `pipelock_proxy_url` + is the URL cred-proxy sets as `HTTPS_PROXY` in its environ so + outbound HTTPS traverses pipelock — making pipelock's body + scanner part of the cred-proxy egress path.""" slug: str routes_path: Path @@ -81,6 +89,8 @@ class CredProxyPlan: token_env_map: dict[str, str] internal_network: str = "" egress_network: str = "" + pipelock_ca_host_path: Path = Path() + pipelock_proxy_url: str = "" # Hardcoded upstream URLs for the non-gitea Kinds. Gitea's URL is per- diff --git a/claude_bottle/cred_proxy_server.py b/claude_bottle/cred_proxy_server.py index 6d756bb..1a0f4a3 100644 --- a/claude_bottle/cred_proxy_server.py +++ b/claude_bottle/cred_proxy_server.py @@ -114,6 +114,31 @@ def select_route(routes: typing.Sequence[Route], request_path: str) -> Route | N return None +def is_git_push_request(path: str, query: str) -> bool: + """Return True if the request is a git smart-HTTP push. + + git push over HTTPS hits two endpoints: + GET /info/refs?service=git-receive-pack (capabilities) + POST /git-receive-pack (the push) + + Fetches use `service=git-upload-pack` / `/git-upload-pack` and are + not blocked. cred-proxy refuses push because git-gate's pre-receive + gitleaks scan is the gate for outbound git data; routing push + through cred-proxy would bypass that. Use the bottle.git SSH path + if you need to push. + """ + if path.endswith("/git-receive-pack"): + return True + if path.endswith("/info/refs"): + # Query string is parsed leniently — `service=git-receive-pack` + # may appear with other params in any order. + for pair in query.split("&"): + k, _, v = pair.partition("=") + if k == "service" and v == "git-receive-pack": + return True + return False + + # --- Header handling -------------------------------------------------------- @@ -223,6 +248,14 @@ class CredProxyHandler(http.server.BaseHTTPRequestHandler): def _proxy(self) -> None: server = typing.cast("CredProxyServer", self.server) path, _, query = self.path.partition("?") + if is_git_push_request(path, query): + self.send_error( + 403, + "cred-proxy: git push over HTTPS is not supported; " + "use the bottle.git SSH path (gitleaks-scanned by " + "git-gate's pre-receive hook)", + ) + return route = select_route(server.routes, path) if route is None: self.send_error(404, f"no route for {path!r}") diff --git a/claude_bottle/pipelock.py b/claude_bottle/pipelock.py index 597fef1..6b8abf0 100644 --- a/claude_bottle/pipelock.py +++ b/claude_bottle/pipelock.py @@ -100,16 +100,28 @@ def pipelock_effective_allowlist(bottle: Bottle) -> list[str]: def pipelock_effective_tls_passthrough(bottle: Bottle) -> list[str]: """Hostnames pipelock should pass through (no TLS MITM, no body - scan). Default carries the LLM API endpoint (its request bodies - legitimately trip DLP); cred-proxy upstream hosts are added so - cred-proxy's HTTPS client (which trusts only the real CA bundle) - can complete the upstream handshake.""" - seen: dict[str, None] = {} - for h in DEFAULT_TLS_PASSTHROUGH: - seen.setdefault(h, None) - for h in pipelock_token_hosts(bottle): - seen.setdefault(h, None) - return sorted(seen.keys()) + scan). Default carries the LLM API endpoint — its request bodies + are user-authored conversation text that legitimately trips DLP + scanners (notably pipelock's BIP-39 seed-phrase detector). Every + other allowlisted host is MITM'd by pipelock's per-bottle CA so + its body scanner sees the cleartext. + + cred-proxy upstream hosts (github, gitea, npm) are deliberately + NOT auto-added here. cred-proxy's HTTPS client trusts pipelock's + CA at runtime (folded into its trust store via docker cp + + update-ca-certificates), so pipelock can MITM the cred-proxy → + upstream leg and body-scan it the same way it body-scans the + agent's direct HTTPS traffic. Without this, an agent that pushed + a secret via cred-proxy's /gh-git/ path would have no body + scanner in front of it. The PRD's earlier reasoning that + cred-proxy hosts needed passthrough was a workaround for the + cert-trust gap that no longer exists. + + `bottle` is kept on the signature for forward-compat (a future + knob might let a manifest opt a host into passthrough); today + the returned list is independent of the bottle.""" + del bottle # not consulted; see docstring. + return sorted(DEFAULT_TLS_PASSTHROUGH) def pipelock_allowlist_summary(bottle: Bottle) -> str: diff --git a/docs/prds/0010-cred-proxy.md b/docs/prds/0010-cred-proxy.md index 4737051..5c92ef5 100644 --- a/docs/prds/0010-cred-proxy.md +++ b/docs/prds/0010-cred-proxy.md @@ -130,7 +130,16 @@ supported kinds (anthropic, github, gitea, npm): the agent's environ - `~/.npmrc` `registry = http://cred-proxy:/npm/` - `~/.gitconfig` `[url …] insteadOf = …` for each declared - `github` / `gitea` upstream + `github` / `gitea` upstream, **except** when a `bottle.git` + entry already brokers the same host. git-gate is the canonical + git path on those hosts — its pre-receive runs gitleaks before + forwarding the push; a cred-proxy `https:///` rewrite + would route HTTPS git ops around the gate, and `git push` over + HTTPS to the same host via cred-proxy carries no gitleaks + equivalent. (cred-proxy independently refuses smart-HTTP push + paths at runtime — see "Smart-HTTP push refused" below — but + suppressing the rewrite means `git clone https:///...` + doesn't have a tempting shortcut that just confuses later.) - `~/.config/tea/config.yml` with the proxy URL for each declared `gitea` entry - **Sidecar lifecycle.** Mirrors `DockerGitGate` / @@ -141,11 +150,27 @@ supported kinds (anthropic, github, gitea, npm): `claude-bottle-cred-proxy-`. The agent container starts after the sidecar is up so DNS resolution succeeds on the agent's first call. -- **pipelock interop.** cred-proxy's outbound HTTPS still - traverses pipelock — pipelock keeps its egress-allowlist role - for the four upstream hosts. Drop `api.anthropic.com` from - pipelock's TLS-MITM list (cred-proxy is now the trust endpoint - for that host); the host stays on the plain HTTPS allowlist. +- **pipelock interop.** cred-proxy's outbound HTTPS traverses + pipelock: the sidecar's environ sets `HTTPS_PROXY` / + `HTTP_PROXY` to the per-bottle pipelock URL, and the cred-proxy + image's entrypoint runs `update-ca-certificates` over the + per-bottle pipelock CA (`docker cp`'d into + `/usr/local/share/ca-certificates/pipelock.crt` before start) + so cred-proxy's HTTPS client trusts pipelock's bumped certs. + Pipelock's allowlist + body scanner therefore apply to + cred-proxy → upstream the same way they apply to direct agent + traffic. Only `api.anthropic.com` stays on + `passthrough_domains` (its bodies are LLM conversation text + that legitimately trips DLP heuristics); github / gitea / npm + hosts are auto-added to the allowlist (so cred-proxy can reach + them) but NOT to passthrough, so pipelock body-scans them. +- **Smart-HTTP push refused.** cred-proxy returns 403 for paths + matching `/info/refs?service=git-receive-pack` and any path + ending in `/git-receive-pack`. Fetch (upload-pack) is allowed. + Push must go through `bottle.git` / git-gate, where the + gitleaks pre-receive hook runs. This holds even when no + matching `bottle.git` entry exists — the proxy is not a + scanned-push path, period. - **Plan rendering.** `bottle_plan.py` and the y/N preflight show: which tokens are configured (kind + ref name, not the value), the proxy port, the routes the proxy will publish. diff --git a/tests/unit/test_cred_proxy_server.py b/tests/unit/test_cred_proxy_server.py index f3f22fd..ce22889 100644 --- a/tests/unit/test_cred_proxy_server.py +++ b/tests/unit/test_cred_proxy_server.py @@ -7,6 +7,7 @@ from claude_bottle.cred_proxy_server import ( Route, build_forward_headers, filter_response_headers, + is_git_push_request, load_tokens, parse_routes, select_route, @@ -183,6 +184,49 @@ class TestFilterResponseHeaders(unittest.TestCase): self.assertNotIn("transfer-encoding", names) +class TestIsGitPushRequest(unittest.TestCase): + """git push over HTTPS goes through /info/refs?service=git-receive-pack + (capabilities probe) then POST /git-receive-pack (the push body). + Fetches use /git-upload-pack and are not blocked — the bypass we're + closing is push, since git-gate's gitleaks pre-receive is the scanner + for outbound git data.""" + + def test_push_capabilities_probe_blocked(self): + self.assertTrue(is_git_push_request( + "/gh-git/owner/repo.git/info/refs", + "service=git-receive-pack", + )) + + def test_push_body_blocked(self): + self.assertTrue(is_git_push_request( + "/gh-git/owner/repo.git/git-receive-pack", "", + )) + + def test_fetch_capabilities_allowed(self): + self.assertFalse(is_git_push_request( + "/gh-git/owner/repo.git/info/refs", + "service=git-upload-pack", + )) + + def test_fetch_body_allowed(self): + self.assertFalse(is_git_push_request( + "/gh-git/owner/repo.git/git-upload-pack", "", + )) + + def test_rest_api_allowed(self): + # tea/gh-style REST calls hit /api/v1/... — unrelated. + self.assertFalse(is_git_push_request( + "/gitea/gitea.dideric.is/api/v1/repos/x/y", "", + )) + + def test_push_with_extra_query_params(self): + # `service` may appear with other params in any order. + self.assertTrue(is_git_push_request( + "/gh-git/owner/repo.git/info/refs", + "trace=1&service=git-receive-pack", + )) + + class TestLoadTokens(unittest.TestCase): def test_reads_per_route_env(self): routes = ( diff --git a/tests/unit/test_docker_cred_proxy.py b/tests/unit/test_docker_cred_proxy.py index f292996..5a0be20 100644 --- a/tests/unit/test_docker_cred_proxy.py +++ b/tests/unit/test_docker_cred_proxy.py @@ -4,6 +4,7 @@ The full docker lifecycle is exercised by integration tests; here we cover the pure helpers and the validation checks `.start` runs before touching docker.""" +import tempfile import unittest from pathlib import Path @@ -26,6 +27,8 @@ def _empty_plan(**overrides): "token_env_map": {}, "internal_network": "", "egress_network": "", + "pipelock_ca_host_path": Path(), + "pipelock_proxy_url": "", } base.update(overrides) return CredProxyPlan(**base) @@ -77,6 +80,26 @@ class TestStartGuards(unittest.TestCase): routes_path=Path("/tmp/cred-proxy-test-does-not-exist.json"), )) + def test_pipelock_url_without_ca_dies(self): + # URL set + CA path empty/missing is a wiring bug: either both + # populated (production) or both empty (test escape hatch). + upstream = CredProxyUpstream( + kind="anthropic", path="/anthropic/", + upstream="https://api.anthropic.com", + auth_scheme="Bearer", token_env="CRED_PROXY_TOKEN_0", + token_ref="T", + ) + with tempfile.NamedTemporaryFile() as routes: + with self.assertRaises(Die): + self.proxy.start(_empty_plan( + upstreams=(upstream,), + internal_network="net-x", + egress_network="egress-x", + routes_path=Path(routes.name), + pipelock_proxy_url="http://pipelock:8888", + pipelock_ca_host_path=Path("/tmp/cred-proxy-no-ca.pem"), + )) + if __name__ == "__main__": unittest.main() diff --git a/tests/unit/test_pipelock_allowlist.py b/tests/unit/test_pipelock_allowlist.py index d5a5cf5..bd8bb31 100644 --- a/tests/unit/test_pipelock_allowlist.py +++ b/tests/unit/test_pipelock_allowlist.py @@ -92,9 +92,13 @@ class TestTlsPassthrough(unittest.TestCase): passthrough = pipelock_effective_tls_passthrough(_bottle({})) self.assertEqual(["api.anthropic.com"], passthrough) - def test_token_hosts_added_to_passthrough(self): - # cred-proxy validates upstream certs with the real CA bundle; - # pipelock must not MITM these or the handshake fails. + def test_token_hosts_NOT_added_to_passthrough(self): + # cred-proxy now trusts pipelock's per-bottle CA (loaded into + # its container's trust store via docker cp + update-ca- + # certificates at start time), so pipelock can MITM the + # cred-proxy -> upstream leg and body-scan it. Auto-adding + # cred-proxy hosts to passthrough would silently disable that + # second scanner for github / gitea / npm. passthrough = pipelock_effective_tls_passthrough(_bottle({ "tokens": [ {"Kind": "github", "TokenRef": "G"}, @@ -103,10 +107,7 @@ class TestTlsPassthrough(unittest.TestCase): "Url": "https://gitea.dideric.is"}, ], })) - for host in ("api.anthropic.com", "api.github.com", "github.com", - "registry.npmjs.org", "gitea.dideric.is"): - self.assertIn(host, passthrough) - self.assertEqual(passthrough, sorted(passthrough), "sorted") + self.assertEqual(["api.anthropic.com"], passthrough) if __name__ == "__main__": diff --git a/tests/unit/test_provision_cred_proxy.py b/tests/unit/test_provision_cred_proxy.py index dbf7730..5093cc6 100644 --- a/tests/unit/test_provision_cred_proxy.py +++ b/tests/unit/test_provision_cred_proxy.py @@ -83,6 +83,40 @@ class TestRenderGitconfig(unittest.TestCase): self.assertIn("gitea.dideric.is/", out) self.assertIn("gitea.example.com/", out) + def test_github_suppressed_when_git_gate_covers_host(self): + # When bottle.git brokers github.com over SSH, git-gate is the + # canonical git path. The cred-proxy https://github.com/ + # rewrite would let the agent push over HTTPS — bypassing + # gitleaks. Suppress it. + out = render_cred_proxy_gitconfig( + _upstreams([{"Kind": "github", "TokenRef": "GH"}]), + {"github.com"}, + ) + self.assertEqual("", out) + + def test_gitea_suppressed_when_git_gate_covers_host(self): + out = render_cred_proxy_gitconfig( + _upstreams([{"Kind": "gitea", "TokenRef": "T", + "Url": "https://gitea.dideric.is"}]), + {"gitea.dideric.is"}, + ) + self.assertEqual("", out) + + def test_partial_suppression_keeps_other_giteas(self): + # Two gitea instances; git-gate brokers one. The other still + # gets the cred-proxy rewrite. + out = render_cred_proxy_gitconfig( + _upstreams([ + {"Kind": "gitea", "TokenRef": "T1", + "Url": "https://gitea.dideric.is"}, + {"Kind": "gitea", "TokenRef": "T2", + "Url": "https://gitea.example.com"}, + ]), + {"gitea.dideric.is"}, + ) + self.assertNotIn("gitea.dideric.is/", out) + self.assertIn("gitea.example.com/", out) + class TestRenderTeaConfig(unittest.TestCase): def test_empty_when_no_gitea(self):