fix(cred_proxy): close git-push bypass + route through pipelock (PRD 0010)
test / unit (pull_request) Successful in 15s
test / integration (pull_request) Successful in 29s

Three coupled fixes that close a documented bypass of git-gate's
gitleaks pre-receive hook:

1. cred-proxy refuses git smart-HTTP push at runtime. Any path
   ending in /git-receive-pack or /info/refs?service=git-receive-pack
   returns 403 with a pointer at the bottle.git SSH path. Fetch
   (upload-pack) is still allowed — the bypass we're closing is
   push, where gitleaks is the load-bearing scanner. Hard guarantee.

2. The provisioner suppresses the cred-proxy `~/.gitconfig` insteadOf
   rewrite for any host already declared in bottle.git. git-gate is
   the canonical git path there; we don't write a competing rule
   that would let `git clone https://<host>/...` succeed in ways
   that confuse on push. Defense in depth — (1) is the hard guarantee.

3. cred-proxy routes its outbound HTTPS through pipelock. The
   sidecar's environ now sets HTTPS_PROXY=<pipelock-url>, and the
   image's entrypoint runs `update-ca-certificates` over the
   per-bottle pipelock CA (docker cp'd into
   /usr/local/share/ca-certificates/pipelock.crt before start) so
   the proxy's HTTPS client trusts pipelock's bumped certs.

   Consequence: pipelock's allowlist + body scanner now sit in the
   cred-proxy egress path the same way they sit in front of direct
   agent traffic. The cred-proxy upstream hosts (api.github.com,
   github.com, gitea hosts, registry.npmjs.org) come OFF
   pipelock's passthrough_domains. Only api.anthropic.com remains
   on passthrough (LLM body content legitimately trips DLP).

PRD 0010 updated to reflect all three. Tests adjusted: the
"cred-proxy hosts go on passthrough" assertion in
test_pipelock_allowlist flips to "they don't", a new
TestIsGitPushRequest exercises the smart-HTTP refusal predicate,
and the gitconfig renderer tests cover the per-host suppression
matrix.
This commit is contained in:
2026-05-13 21:09:33 -04:00
parent c8ab90d01d
commit 27b2d78b11
12 changed files with 329 additions and 63 deletions
+63 -16
View File
@@ -42,6 +42,13 @@ CRED_PROXY_HOSTNAME = "cred-proxy"
# file directly.
CRED_PROXY_ROUTES_IN_CONTAINER = "/run/cred-proxy/routes.json"
# In-container path for the per-bottle pipelock CA. Alpine's
# update-ca-certificates picks anything ending in `.crt` under
# /usr/local/share/ca-certificates/ and folds it into the system
# trust store at boot — so cred-proxy's HTTPS client trusts
# pipelock's bumped certs when pipelock MITMs the outbound leg.
CRED_PROXY_PIPELOCK_CA_IN_CONTAINER = "/usr/local/share/ca-certificates/pipelock.crt"
# Repo root, for `docker build` context. Resolved from this file's
# location: claude_bottle/backend/docker/cred_proxy.py → repo root.
_REPO_DIR = str(Path(__file__).resolve().parent.parent.parent.parent)
@@ -96,6 +103,23 @@ class DockerCredProxy(CredProxy):
f"cred-proxy routes file missing at {plan.routes_path}; "
f"CredProxy.prepare must run first"
)
# pipelock fields are populated by launch.py in production; both
# must be present (URL + CA) or both absent. Mixing is a wiring
# bug. Both-absent is supported only as a test escape hatch:
# the integration tests in tests/integration/ exercise header
# injection in isolation and do not bring pipelock up.
route_via_pipelock = bool(plan.pipelock_proxy_url) or plan.pipelock_ca_host_path != Path()
if route_via_pipelock:
if not plan.pipelock_proxy_url:
die(
"DockerCredProxy.start: pipelock_ca_host_path is set but "
"pipelock_proxy_url is empty; populate both or neither."
)
if not plan.pipelock_ca_host_path.is_file():
die(
f"DockerCredProxy.start: pipelock CA missing at "
f"{plan.pipelock_ca_host_path}; pipelock_tls_init must run first"
)
# Resolve host env vars into concrete values. This must
# happen at start time (not prepare) — the values flow into
@@ -114,6 +138,16 @@ class DockerCredProxy(CredProxy):
"--network", plan.internal_network,
"--network-alias", CRED_PROXY_HOSTNAME,
]
if route_via_pipelock:
# Route cred-proxy's outbound HTTPS through pipelock so
# the egress allowlist + DLP body scanner apply to its
# traffic. Pipelock MITMs each handshake with the
# per-bottle CA we docker cp in below.
create_args.extend([
"-e", f"HTTPS_PROXY={plan.pipelock_proxy_url}",
"-e", f"HTTP_PROXY={plan.pipelock_proxy_url}",
"-e", "NO_PROXY=localhost,127.0.0.1",
])
# One -e flag per token slot; values arrive via subprocess env.
# docker create with `-e NAME` (no =VALUE) reads NAME from the
# current process env at create time. We pass `env=child_env`
@@ -136,24 +170,37 @@ class DockerCredProxy(CredProxy):
).returncode != 0:
die(f"failed to create cred-proxy sidecar {name}")
cp_result = subprocess.run(
["docker", "cp", str(plan.routes_path),
f"{name}:{CRED_PROXY_ROUTES_IN_CONTAINER}"],
capture_output=True,
text=True,
check=False,
)
if cp_result.returncode != 0:
subprocess.run(
["docker", "rm", "-f", name],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
cps: list[tuple[str, str, str]] = [
(str(plan.routes_path), CRED_PROXY_ROUTES_IN_CONTAINER, "routes.json"),
]
if route_via_pipelock:
# CA must land BEFORE `docker start` so the entrypoint's
# update-ca-certificates picks it up. Docker cp's the
# file in even on the stopped container — that's the
# whole reason this works without a custom build step.
cps.append((
str(plan.pipelock_ca_host_path),
CRED_PROXY_PIPELOCK_CA_IN_CONTAINER,
"pipelock CA",
))
for src, dst, label in cps:
cp_result = subprocess.run(
["docker", "cp", src, f"{name}:{dst}"],
capture_output=True,
text=True,
check=False,
)
die(
f"failed to copy routes.json into {name}: "
f"{cp_result.stderr.strip()}"
)
if cp_result.returncode != 0:
subprocess.run(
["docker", "rm", "-f", name],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
)
die(
f"failed to copy {label} into {name}: "
f"{cp_result.stderr.strip()}"
)
if subprocess.run(
["docker", "network", "connect", plan.egress_network, name],
+10 -4
View File
@@ -105,15 +105,21 @@ def launch(
stack.callback(git_gate.stop, git_gate_name)
# Cred-proxy (PRD 0010). One sidecar per bottle when
# bottle.tokens declares any kind. Must come up before the
# agent so DNS resolution for `cred-proxy` succeeds on the
# agent's first call; tokens flow from the host env into the
# sidecar's environ, not the agent's.
# bottle.tokens declares any kind. Must come up AFTER pipelock
# — cred-proxy routes its outbound HTTPS through pipelock
# (HTTPS_PROXY in environ + the per-bottle CA in its trust
# store) so the egress allowlist + body scanner sit in the
# cred-proxy path too. Must come up BEFORE the agent so DNS
# resolution for `cred-proxy` succeeds on the agent's first
# call; tokens flow from the host env into the sidecar's
# environ, not the agent's.
if plan.cred_proxy_plan.upstreams:
cred_proxy_plan = dataclasses.replace(
plan.cred_proxy_plan,
internal_network=internal_network,
egress_network=egress_network,
pipelock_ca_host_path=ca_cert_host,
pipelock_proxy_url=pipelock_proxy_url(plan.slug),
)
plan = dataclasses.replace(plan, cred_proxy_plan=cred_proxy_plan)
cred_proxy_name = cred_proxy.start(plan.cred_proxy_plan)
@@ -35,8 +35,10 @@ def provision_cred_proxy(plan: DockerBottlePlan, target: str) -> None:
upstreams = plan.cred_proxy_plan.upstreams
if not upstreams:
return
bottle = plan.spec.manifest.bottle_for(plan.spec.agent_name)
git_gate_hosts = {g.UpstreamHost for g in bottle.git}
_provision_npmrc(plan, target, upstreams)
_provision_gitconfig(plan, target, upstreams)
_provision_gitconfig(plan, target, upstreams, git_gate_hosts)
_provision_tea_config(plan, target, upstreams)
@@ -82,29 +84,41 @@ def _provision_npmrc(
# --- git config -------------------------------------------------------------
def render_cred_proxy_gitconfig(upstreams: tuple[CredProxyUpstream, ...]) -> str:
def render_cred_proxy_gitconfig(
upstreams: tuple[CredProxyUpstream, ...],
git_gate_hosts: set[str] = frozenset(), # type: ignore[assignment]
) -> str:
"""Render the `~/.gitconfig` fragment for cred-proxy insteadOf
rewrites. Empty string when no github / gitea routes are declared.
github expands to two rewrites: https://github.com/... /gh-git/...
(the git transport endpoint), and the agent's git client reaches
api.github.com over the same proxy via the /gh-api/ route, but
that's used by tools that call the GitHub API directly (gh, tea,
octokit) rather than `git` itself.
The rewrite is suppressed for any host that's also declared in
`bottle.git`. git-gate is the canonical git path on those hosts
its pre-receive runs gitleaks before forwarding the push. A
cred-proxy https://<host>/ rewrite would route HTTPS git ops
around the gate. cred-proxy still refuses smart-HTTP push at
runtime (defense in depth), but suppressing the rewrite means
`git clone https://<host>/...` doesn't have a tempting shortcut
that just confuses on push.
Gitea entries get one rewrite per declared host, pointing at
/gitea/<host>/. The path component scopes the credential
so multiple gitea instances coexist on one proxy."""
github expands to one rewrite (https://github.com/... /gh-git/...,
the git transport endpoint); /gh-api/ stays unmapped here because
tools call api.github.com directly rather than through git.
Gitea entries get one rewrite per declared host."""
rules: list[str] = []
for u in upstreams:
if u.kind == "github" and u.path == "/gh-git/":
if "github.com" in git_gate_hosts:
continue
rules.append(
f'[url "{cred_proxy_url()}/gh-git/"]\n'
f"\tinsteadOf = https://github.com/\n"
)
elif u.kind == "gitea":
# u.upstream is the configured gitea URL (e.g.
# https://gitea.dideric.is) and u.path is /gitea/<host>/.
# u.path is /gitea/<host>/; derive the host the same way
# the route table did so we match git_gate's UpstreamHost.
host = u.path[len("/gitea/"):].rstrip("/")
if host in git_gate_hosts:
continue
rules.append(
f'[url "{cred_proxy_url()}{u.path}"]\n'
f"\tinsteadOf = {u.upstream}/\n"
@@ -123,11 +137,13 @@ def _provision_gitconfig(
plan: DockerBottlePlan,
target: str,
upstreams: tuple[CredProxyUpstream, ...],
git_gate_hosts: set[str],
) -> None:
"""Append the cred-proxy insteadOf rules to ~/.gitconfig. Runs
after `provision_git`, so any git-gate rules already live in the
file; we append rather than overwrite."""
content = render_cred_proxy_gitconfig(upstreams)
file; we append rather than overwrite. Hosts already brokered by
git-gate are skipped git-gate is the canonical git path there."""
content = render_cred_proxy_gitconfig(upstreams, git_gate_hosts)
if not content:
return
container_home = os.environ.get("CLAUDE_BOTTLE_CONTAINER_HOME", "/home/node")
+15 -5
View File
@@ -64,16 +64,24 @@ class CredProxyPlan:
The slug + routes_path + upstreams + token_env_map fields are
filled at prepare time (host-side, side-effect-free on docker).
The network fields are populated by the backend's launch step
via `dataclasses.replace` once those networks exist. Empty
defaults are sentinels meaning "not yet set"; `.start` validates
that they are populated.
The network + pipelock fields are populated by the backend's
launch step via `dataclasses.replace` once those resources
exist. Empty defaults are sentinels meaning "not yet set";
`.start` validates that they are populated.
`token_env_map` is `{<token_env in container>: <TokenRef on host>}`.
The backend's start step reads `os.environ[TokenRef]` and forwards
the value into the cred-proxy container's environ under
`token_env`. The plan itself never holds token values secrets
never land in a dataclass that might be logged."""
never land in a dataclass that might be logged.
`pipelock_ca_host_path` is the host path of the per-bottle CA
pipelock will present on bumped TLS handshakes; the cred-proxy
image's entrypoint runs `update-ca-certificates` over it so the
proxy's HTTPS client trusts pipelock's CA. `pipelock_proxy_url`
is the URL cred-proxy sets as `HTTPS_PROXY` in its environ so
outbound HTTPS traverses pipelock making pipelock's body
scanner part of the cred-proxy egress path."""
slug: str
routes_path: Path
@@ -81,6 +89,8 @@ class CredProxyPlan:
token_env_map: dict[str, str]
internal_network: str = ""
egress_network: str = ""
pipelock_ca_host_path: Path = Path()
pipelock_proxy_url: str = ""
# Hardcoded upstream URLs for the non-gitea Kinds. Gitea's URL is per-
+33
View File
@@ -114,6 +114,31 @@ def select_route(routes: typing.Sequence[Route], request_path: str) -> Route | N
return None
def is_git_push_request(path: str, query: str) -> bool:
"""Return True if the request is a git smart-HTTP push.
git push over HTTPS hits two endpoints:
GET <repo>/info/refs?service=git-receive-pack (capabilities)
POST <repo>/git-receive-pack (the push)
Fetches use `service=git-upload-pack` / `/git-upload-pack` and are
not blocked. cred-proxy refuses push because git-gate's pre-receive
gitleaks scan is the gate for outbound git data; routing push
through cred-proxy would bypass that. Use the bottle.git SSH path
if you need to push.
"""
if path.endswith("/git-receive-pack"):
return True
if path.endswith("/info/refs"):
# Query string is parsed leniently — `service=git-receive-pack`
# may appear with other params in any order.
for pair in query.split("&"):
k, _, v = pair.partition("=")
if k == "service" and v == "git-receive-pack":
return True
return False
# --- Header handling --------------------------------------------------------
@@ -223,6 +248,14 @@ class CredProxyHandler(http.server.BaseHTTPRequestHandler):
def _proxy(self) -> None:
server = typing.cast("CredProxyServer", self.server)
path, _, query = self.path.partition("?")
if is_git_push_request(path, query):
self.send_error(
403,
"cred-proxy: git push over HTTPS is not supported; "
"use the bottle.git SSH path (gitleaks-scanned by "
"git-gate's pre-receive hook)",
)
return
route = select_route(server.routes, path)
if route is None:
self.send_error(404, f"no route for {path!r}")
+22 -10
View File
@@ -100,16 +100,28 @@ def pipelock_effective_allowlist(bottle: Bottle) -> list[str]:
def pipelock_effective_tls_passthrough(bottle: Bottle) -> list[str]:
"""Hostnames pipelock should pass through (no TLS MITM, no body
scan). Default carries the LLM API endpoint (its request bodies
legitimately trip DLP); cred-proxy upstream hosts are added so
cred-proxy's HTTPS client (which trusts only the real CA bundle)
can complete the upstream handshake."""
seen: dict[str, None] = {}
for h in DEFAULT_TLS_PASSTHROUGH:
seen.setdefault(h, None)
for h in pipelock_token_hosts(bottle):
seen.setdefault(h, None)
return sorted(seen.keys())
scan). Default carries the LLM API endpoint its request bodies
are user-authored conversation text that legitimately trips DLP
scanners (notably pipelock's BIP-39 seed-phrase detector). Every
other allowlisted host is MITM'd by pipelock's per-bottle CA so
its body scanner sees the cleartext.
cred-proxy upstream hosts (github, gitea, npm) are deliberately
NOT auto-added here. cred-proxy's HTTPS client trusts pipelock's
CA at runtime (folded into its trust store via docker cp +
update-ca-certificates), so pipelock can MITM the cred-proxy
upstream leg and body-scan it the same way it body-scans the
agent's direct HTTPS traffic. Without this, an agent that pushed
a secret via cred-proxy's /gh-git/ path would have no body
scanner in front of it. The PRD's earlier reasoning that
cred-proxy hosts needed passthrough was a workaround for the
cert-trust gap that no longer exists.
`bottle` is kept on the signature for forward-compat (a future
knob might let a manifest opt a host into passthrough); today
the returned list is independent of the bottle."""
del bottle # not consulted; see docstring.
return sorted(DEFAULT_TLS_PASSTHROUGH)
def pipelock_allowlist_summary(bottle: Bottle) -> str: