From fdd06c54d2f9af4181eaf549e2d30616b59d2e21 Mon Sep 17 00:00:00 2001 From: didericis Date: Tue, 12 May 2026 21:37:04 -0400 Subject: [PATCH] feat(git-gate): mirror fetch through access-hook (bidirectional) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The gate is now a transparent mirror, not push-only. Per-repo init now runs `git remote add --mirror=fetch origin ` so a later `git fetch origin` mirrors the upstream's full ref graph at canonical paths. The pre-receive hook forwards accepted refs via `git push origin` (renamed from upstream). New: an access-hook script wired via `git daemon --access-hook` runs `git fetch origin --prune` against the real upstream before every upload-pack request (clone, fetch, pull, ls-remote). On upstream error the hook exits non-zero — the agent's fetch fails rather than the gate serving stale data. The pre-existing smoke test (ls-remote against unreachable upstream returns refs) had to invert: under the bidirectional design any ls-remote success is necessarily a success against the upstream, so the unreachable-upstream case now correctly fails closed. --- claude_bottle/backend/docker/git_gate.py | 7 ++ claude_bottle/git_gate.py | 116 +++++++++++++++++---- tests/integration/test_git_gate_sidecar.py | 61 +++++++---- tests/unit/test_git_gate.py | 43 +++++++- 4 files changed, 180 insertions(+), 47 deletions(-) diff --git a/claude_bottle/backend/docker/git_gate.py b/claude_bottle/backend/docker/git_gate.py index b01d9a9..e3c11a3 100644 --- a/claude_bottle/backend/docker/git_gate.py +++ b/claude_bottle/backend/docker/git_gate.py @@ -23,6 +23,7 @@ GIT_GATE_DOCKERFILE = "Dockerfile.git-gate" GIT_GATE_ENTRYPOINT_IN_CONTAINER = "/git-gate-entrypoint.sh" GIT_GATE_HOOK_IN_CONTAINER = "/etc/git-gate/pre-receive" +GIT_GATE_ACCESS_HOOK_IN_CONTAINER = "/etc/git-gate/access-hook" GIT_GATE_CREDS_DIR_IN_CONTAINER = "/git-gate/creds" # git daemon's default listening port. Surfaced as a constant because @@ -85,6 +86,11 @@ class DockerGitGate(GitGate): f"git-gate hook missing at {plan.hook_script}; " f"GitGate.prepare must run first" ) + if not plan.access_hook_script.is_file(): + die( + f"git-gate access-hook missing at {plan.access_hook_script}; " + f"GitGate.prepare must run first" + ) build_git_gate_image() @@ -111,6 +117,7 @@ class DockerGitGate(GitGate): cps: list[tuple[str, str, str]] = [ (str(plan.entrypoint_script), GIT_GATE_ENTRYPOINT_IN_CONTAINER, "entrypoint"), (str(plan.hook_script), GIT_GATE_HOOK_IN_CONTAINER, "pre-receive hook"), + (str(plan.access_hook_script), GIT_GATE_ACCESS_HOOK_IN_CONTAINER, "access-hook"), ] for u in plan.upstreams: keypath = expand_tilde(u.identity_file) diff --git a/claude_bottle/git_gate.py b/claude_bottle/git_gate.py index 7e49ee8..3841bf0 100644 --- a/claude_bottle/git_gate.py +++ b/claude_bottle/git_gate.py @@ -1,11 +1,19 @@ """Per-agent git-gate (PRD 0008). A third per-agent sidecar that fronts the bottle's declared git -upstreams. Each `bottle.git` entry maps to a bare repo on the gate; -the gate runs `git daemon --enable=receive-pack` so the agent can -push to it via `git:///.git`. A pre-receive hook scans -the incoming refs with gitleaks; on clean, it forwards the refs to -the real upstream using a credential the gate holds. +upstreams as a transparent mirror. Each `bottle.git` entry maps to +a bare repo on the gate; `git daemon` serves the bare repos over +`git:///.git`. Two hooks make the mirror bidirectional: + +- **`pre-receive`** (push path) — gitleaks-scans incoming refs and, + on clean, forwards them to the real upstream with the + gate-resident credential. +- **`--access-hook`** (fetch path) — runs `git fetch origin --prune` + against the real upstream before every `upload-pack`, so an + agent fetch returns whatever the upstream has *now*. Fail-closed + if the upstream is unreachable. + +The agent never sees the upstream credential under either path. Why a third sidecar (not folded into pipelock or ssh-gate): the gate is the only one of the three that holds upstream push @@ -53,16 +61,20 @@ class GitGateUpstream: class GitGatePlan: """Output of GitGate.prepare; consumed by .start. - `upstreams` + `slug` + `entrypoint_script` + `hook_script` are - filled in at prepare time (host-side, side-effect-free on docker). - The network fields are populated by the backend's launch step via - `dataclasses.replace` once those networks exist. Empty defaults - are sentinels meaning "not yet set"; `.start` validates that - they are populated.""" + The script + slug + upstream fields are filled at prepare time + (host-side, side-effect-free on docker). The network fields are + populated by the backend's launch step via `dataclasses.replace` + once those networks exist. Empty defaults are sentinels meaning + "not yet set"; `.start` validates that they are populated. + + `hook_script` is the shared `pre-receive` for push-time gating; + `access_hook_script` is `git daemon`'s `--access-hook` for the + fetch-time upstream refresh.""" slug: str entrypoint_script: Path hook_script: Path + access_hook_script: Path upstreams: tuple[GitGateUpstream, ...] internal_network: str = "" egress_network: str = "" @@ -101,8 +113,8 @@ def git_gate_render_entrypoint(upstreams: tuple[GitGateUpstream, ...]) -> str: upstream, then `exec git daemon`. The function reads `/git-gate/creds/-{key,known_hosts}` (laid down by `DockerGitGate.start` via docker cp) and wires them into each - bare repo's config so the shared pre-receive hook can pick them - up at push time.""" + bare repo's config; the access-hook + pre-receive hook pick those + paths up at fetch / push time.""" lines = [ "#!/bin/sh", "set -eu", @@ -121,8 +133,13 @@ def git_gate_render_entrypoint(upstreams: tuple[GitGateUpstream, ...]) -> str: " repo=/git/${name}.git", " if [ ! -d \"$repo\" ]; then", " git init --bare \"$repo\" >/dev/null", + # --mirror=fetch sets remote.origin.fetch = +refs/*:refs/* so", + # a later `git fetch origin` mirrors the upstream's full ref", + # graph (heads, tags, notes) into the bare repo at canonical", + # paths. It does NOT set remote.origin.mirror=true, so an", + # explicit `git push origin :` still pushes one ref.", + " git -C \"$repo\" remote add --mirror=fetch origin \"$upstream_url\"", " fi", - " git -C \"$repo\" config remote.upstream.url \"$upstream_url\"", " git -C \"$repo\" config git-gate.identityFile \"$keyfile\"", " git -C \"$repo\" config git-gate.knownHosts \"$hostsfile\"", " git -C \"$repo\" config receive.denyCurrentBranch ignore", @@ -143,6 +160,7 @@ def git_gate_render_entrypoint(upstreams: tuple[GitGateUpstream, ...]) -> str: " --base-path=/git \\", " --export-all \\", " --enable=receive-pack \\", + " --access-hook=/etc/git-gate/access-hook \\", " --verbose", ]) return "\n".join(lines) + "\n" @@ -150,9 +168,9 @@ def git_gate_render_entrypoint(upstreams: tuple[GitGateUpstream, ...]) -> str: def git_gate_render_hook() -> str: """The shared pre-receive hook: gitleaks-scan all incoming refs, - then forward each accepted ref to the real upstream using the - per-repo credential. Failure in either phase aborts the push so - the agent sees a real rejection. POSIX sh. + then forward each accepted ref to the real upstream (`origin`) + using the per-repo credential. Failure in either phase aborts + the push so the agent sees a real rejection. POSIX sh. Two phases (scan all, then push all) keeps a hit on ref N from half-pushing refs 1..N-1; both phases re-read stdin from a temp @@ -183,7 +201,8 @@ while IFS=' ' read -r old new ref; do fi done < "$refs_file" -# Phase 2: forward each ref to the upstream. +# Phase 2: forward each ref to the upstream (`origin`, configured +# in the entrypoint via `git remote add --mirror=fetch`). keyfile=$(git config --get git-gate.identityFile) hostsfile=$(git config --get git-gate.knownHosts) if [ ! -f "$hostsfile" ]; then @@ -200,8 +219,8 @@ while IFS=' ' read -r old new ref; do else refspec="$new:$ref" fi - echo "git-gate: forwarding $ref to upstream" >&2 - if ! GIT_SSH_COMMAND="$ssh_cmd" git push upstream "$refspec" 1>&2; then + echo "git-gate: forwarding $ref to origin" >&2 + if ! GIT_SSH_COMMAND="$ssh_cmd" git push origin "$refspec" 1>&2; then echo "git-gate: upstream push failed for $ref" >&2 exit 1 fi @@ -211,6 +230,52 @@ exit 0 """ +def git_gate_render_access_hook() -> str: + """`git daemon --access-hook` script. Runs before each protocol + service; for `upload-pack` (fetch / clone / ls-remote / pull) it + refreshes the bare repo from upstream first, so the response + reflects upstream's current state. For other services (notably + `receive-pack`) it returns 0 immediately and lets the existing + pre-receive hook gate the operation. POSIX sh. + + The hook receives: + $1 service name (`upload-pack`, `receive-pack`, ...) + $2 absolute path to the resolved repo + $3 client hostname (unused) + $4 client tcp address (unused) + + Fail-closed on upstream errors: the agent's fetch fails too, + so it never silently sees stale data — matches the PRD's + 'equivalent to operations against the upstream' contract.""" + return r"""#!/bin/sh +# git-gate access-hook (PRD 0008). $1=service $2=repo $3=host $4=peer +set -u +service=$1 +repo_dir=$2 + +# Push path keeps its own gating in pre-receive (gitleaks + +# forward). Only refresh-from-upstream on fetch operations. +if [ "$service" != "upload-pack" ]; then + exit 0 +fi + +keyfile=$(git -C "$repo_dir" config --get git-gate.identityFile 2>/dev/null || true) +hostsfile=$(git -C "$repo_dir" config --get git-gate.knownHosts 2>/dev/null || true) +if [ -z "$keyfile" ] || [ ! -f "$hostsfile" ]; then + echo "git-gate: missing credentials for $repo_dir; refusing fetch" >&2 + exit 1 +fi +ssh_cmd="ssh -i $keyfile -o UserKnownHostsFile=$hostsfile -o StrictHostKeyChecking=yes -o IdentitiesOnly=yes" + +echo "git-gate: refreshing $repo_dir from upstream" >&2 +if ! GIT_SSH_COMMAND="$ssh_cmd" git -C "$repo_dir" fetch origin --prune >&2; then + echo "git-gate: upstream fetch failed for $repo_dir; refusing to serve stale data" >&2 + exit 1 +fi +exit 0 +""" + + class GitGate(ABC): """The per-agent git-gate. Encapsulates the host-side prepare (upstream lift + entrypoint/hook render); the sidecar's @@ -219,8 +284,8 @@ class GitGate(ABC): def prepare(self, bottle: Bottle, slug: str, stage_dir: Path) -> GitGatePlan: """Compute the upstream table from `bottle.git` and write the - entrypoint + pre-receive scripts (mode 600) under `stage_dir`. - Pure host-side, no docker subprocess. + entrypoint, pre-receive hook, and access-hook scripts (mode + 600) under `stage_dir`. Pure host-side, no docker subprocess. Returned plan is incomplete: the launch step must fill `internal_network` / `egress_network` via `dataclasses.replace` @@ -232,10 +297,17 @@ class GitGate(ABC): hook = stage_dir / "git_gate_pre_receive.sh" hook.write_text(git_gate_render_hook()) hook.chmod(0o600) + access_hook = stage_dir / "git_gate_access_hook.sh" + access_hook.write_text(git_gate_render_access_hook()) + # 0o700 (not 0o600): git daemon execs --access-hook directly, + # not via `sh`, so the script needs the x bit. docker cp + # preserves source mode into the container. + access_hook.chmod(0o700) return GitGatePlan( slug=slug, entrypoint_script=entrypoint, hook_script=hook, + access_hook_script=access_hook, upstreams=upstreams, ) diff --git a/tests/integration/test_git_gate_sidecar.py b/tests/integration/test_git_gate_sidecar.py index 5c1b533..2537b6b 100644 --- a/tests/integration/test_git_gate_sidecar.py +++ b/tests/integration/test_git_gate_sidecar.py @@ -2,16 +2,19 @@ Two tests against a real Docker daemon: - 1. A freshly-started gate answers ls-remote requests on its - internal-network address. Proves the daemon is up and the - bare repos rendered by the entrypoint are exported. + 1. ls-remote against a gate whose upstream is unreachable fails + with the access-hook's fail-closed rejection. Proves the + daemon is bound to its port AND the access-hook is wired: + a working ls-remote against the gate is necessarily a working + ls-remote against the upstream (PRD 0008's transparent-mirror + contract). 2. A push containing a gitleaks-detectable secret is rejected by the pre-receive hook with a non-zero exit on the agent - side and a gitleaks-rejection line in the response. This is - the PRD's success criterion. + side and a gitleaks-rejection line in the response. The PRD's + primary success criterion. -A successful clean-push roundtrip needs a real upstream SSH host; -deferred to a follow-up integration test. +A successful round-trip (clone through gate reflects upstream) +needs a reachable upstream SSH host; deferred to a follow-up. """ import dataclasses @@ -124,14 +127,18 @@ class TestGitGateSidecar(unittest.TestCase): "skipped under act_runner: docker socket mount topology breaks " "in-process visibility of networks created on the host daemon", ) - def test_ls_remote_succeeds_against_fresh_gate(self): - """A freshly-started gate has an empty bare repo per upstream; - `git ls-remote` returns no refs and exits 0. Probes the gate - from a sibling container on the same internal network — same - access topology the agent uses in production.""" + def test_ls_remote_fails_closed_when_upstream_unreachable(self): + """The gate's access-hook runs `git fetch origin --prune` before + every upload-pack. With the fixture's deliberately unreachable + `ssh://git@upstream.invalid/...`, that fetch fails and the + hook exits 1; the daemon reports access-denied. Asserting + non-zero here is what proves the access-hook is wired: under + the v1 (push-only) design ls-remote against a fresh gate + returned exit 0 with no refs.""" gate = self._start_gate("foo") - # git ls-remote retries weren't strictly needed in local runs, - # but the daemon takes a beat to bind after docker start. + # Daemon still has to bind first; retry the TCP connect a few + # times. The expected end state is a non-zero exit from the + # daemon's access-denied response — not a connection refused. probe = subprocess.run( ["docker", "run", "--rm", "--network", self.internal_net, @@ -139,15 +146,23 @@ class TestGitGateSidecar(unittest.TestCase): CLIENT_IMAGE, "-c", f"for i in $(seq 1 15); do " - f" git ls-remote git://{gate}/foo.git >/tmp/out 2>&1 && exit 0;" + f" out=$(git ls-remote git://{gate}/foo.git 2>&1) && exit 99;" + f" case \"$out\" in *'access denied'*|*'not exported'*) " + f" echo \"$out\"; exit 1;; esac;" f" sleep 1;" f"done;" - f"cat /tmp/out; exit 1"], + f"echo TIMEOUT; exit 2"], capture_output=True, text=True, timeout=60, check=False, ) + # exit 1: daemon access-denied as expected. exit 99 would mean + # ls-remote actually succeeded against the unreachable upstream + # (impossible — would indicate stale-data serving, the very + # thing the access-hook is meant to prevent). self.assertEqual( - 0, probe.returncode, - f"ls-remote failed: stdout={probe.stdout!r} stderr={probe.stderr!r}", + 1, probe.returncode, + f"expected fail-closed access-denied; got " + f"exit={probe.returncode} stdout={probe.stdout!r} " + f"stderr={probe.stderr!r}", ) @unittest.skipIf( @@ -164,10 +179,14 @@ class TestGitGateSidecar(unittest.TestCase): push_script = ( "set -e\n" "cd /tmp\n" - # Wait for git daemon to bind. ls-remote retries until - # connection works; we then assume the gate is ready. + # Wait for git daemon to bind. Under the v1.1 design, + # ls-remote never returns 0 against an unreachable + # upstream (access-hook fail-closed), so we wait for *any* + # response (the daemon's access-denied line) as the + # readiness signal. f"for i in $(seq 1 15); do " - f" git ls-remote git://{gate}/foo.git >/dev/null 2>&1 && break;" + f" out=$(git ls-remote git://{gate}/foo.git 2>&1) || true;" + f" case \"$out\" in *'remote error'*|*'access denied'*) break;; esac;" f" sleep 1;" f"done\n" "git init -q -b main repo\n" diff --git a/tests/unit/test_git_gate.py b/tests/unit/test_git_gate.py index 5dc8d8b..ff2d402 100644 --- a/tests/unit/test_git_gate.py +++ b/tests/unit/test_git_gate.py @@ -10,6 +10,7 @@ from claude_bottle.git_gate import ( GitGatePlan, GitGateUpstream, git_gate_known_hosts_line, + git_gate_render_access_hook, git_gate_render_entrypoint, git_gate_render_hook, git_gate_upstreams_for_bottle, @@ -87,6 +88,12 @@ class TestEntrypointRender(unittest.TestCase): self.assertIn("exec git daemon", script) self.assertIn("--enable=receive-pack", script) self.assertIn("--base-path=/git", script) + # The access-hook is what makes fetch a mirror operation + # against the upstream (PRD 0008 v1.1). + self.assertIn("--access-hook=/etc/git-gate/access-hook", script) + # Each repo's `origin` remote is wired to the upstream via + # --mirror=fetch so `git fetch origin` mirrors all refs. + self.assertIn("remote add --mirror=fetch origin", script) def test_empty_upstreams_still_execs_daemon(self): # A no-upstream gate is a no-op for repos but the daemon still @@ -97,17 +104,36 @@ class TestEntrypointRender(unittest.TestCase): class TestHookRender(unittest.TestCase): - def test_hook_has_two_phases(self): + def test_pre_receive_hook_has_two_phases(self): hook = git_gate_render_hook() - # Phase 1: gitleaks. Phase 2: forward. + # Phase 1: gitleaks. Phase 2: forward to origin. self.assertIn("gitleaks git", hook) - self.assertIn("git push upstream", hook) + self.assertIn("git push origin", hook) # KnownHostKey absence is fail-closed. self.assertIn("refusing to push", hook) # Stdin is buffered to a tempfile so both phases can re-read. self.assertIn("refs_file=$(mktemp)", hook) +class TestAccessHookRender(unittest.TestCase): + def test_access_hook_refreshes_origin_on_upload_pack(self): + hook = git_gate_render_access_hook() + # Service-name guard: only upload-pack (fetch / clone / pull / + # ls-remote) triggers the upstream refresh; receive-pack + # bypasses this and the pre-receive hook gates it instead. + self.assertIn('service=$1', hook) + self.assertIn('"$service" != "upload-pack"', hook) + # The fetch is what makes the gate a transparent mirror. + self.assertIn("git -C \"$repo_dir\" fetch origin --prune", hook) + + def test_access_hook_fail_closed_on_upstream_error(self): + hook = git_gate_render_access_hook() + # Upstream-fetch failure exits non-zero, which propagates to + # the agent's fetch as a real error rather than stale data. + self.assertIn("refusing to serve stale data", hook) + self.assertIn("exit 1", hook) + + class TestPrepare(unittest.TestCase): def setUp(self): self.stage = Path(tempfile.mkdtemp()) @@ -117,7 +143,7 @@ class TestPrepare(unittest.TestCase): shutil.rmtree(self.stage, ignore_errors=True) - def test_prepare_writes_entrypoint_and_hook_mode_600(self): + def test_prepare_writes_all_three_scripts(self): plan = _StubGate().prepare( fixture_with_git().bottles["dev"], "demo", self.stage ) @@ -127,8 +153,17 @@ class TestPrepare(unittest.TestCase): self.assertEqual( self.stage / "git_gate_pre_receive.sh", plan.hook_script ) + self.assertEqual( + self.stage / "git_gate_access_hook.sh", plan.access_hook_script + ) + # Entrypoint + pre-receive are mode 600 (loaded into the + # gate by docker cp and then `install -m 755`'d into each + # bare repo's hooks/ — source bit doesn't matter). The + # access-hook is execed directly by git daemon, so it has to + # carry the x bit through docker cp. self.assertEqual(0o600, os.stat(plan.entrypoint_script).st_mode & 0o777) self.assertEqual(0o600, os.stat(plan.hook_script).st_mode & 0o777) + self.assertEqual(0o700, os.stat(plan.access_hook_script).st_mode & 0o777) def test_prepare_plan_carries_upstreams_and_slug(self): plan = _StubGate().prepare(