feat(git-gate): mirror fetch through access-hook (bidirectional)
test / unit (pull_request) Successful in 11s
test / integration (pull_request) Successful in 14s

The gate is now a transparent mirror, not push-only. Per-repo
init now runs `git remote add --mirror=fetch origin <url>` so a
later `git fetch origin` mirrors the upstream's full ref graph at
canonical paths. The pre-receive hook forwards accepted refs via
`git push origin` (renamed from upstream).

New: an access-hook script wired via `git daemon --access-hook`
runs `git fetch origin --prune` against the real upstream before
every upload-pack request (clone, fetch, pull, ls-remote). On
upstream error the hook exits non-zero — the agent's fetch fails
rather than the gate serving stale data.

The pre-existing smoke test (ls-remote against unreachable
upstream returns refs) had to invert: under the bidirectional
design any ls-remote success is necessarily a success against
the upstream, so the unreachable-upstream case now correctly
fails closed.
This commit is contained in:
2026-05-12 21:37:04 -04:00
parent ae7e22065f
commit fdd06c54d2
4 changed files with 180 additions and 47 deletions
+7
View File
@@ -23,6 +23,7 @@ GIT_GATE_DOCKERFILE = "Dockerfile.git-gate"
GIT_GATE_ENTRYPOINT_IN_CONTAINER = "/git-gate-entrypoint.sh"
GIT_GATE_HOOK_IN_CONTAINER = "/etc/git-gate/pre-receive"
GIT_GATE_ACCESS_HOOK_IN_CONTAINER = "/etc/git-gate/access-hook"
GIT_GATE_CREDS_DIR_IN_CONTAINER = "/git-gate/creds"
# git daemon's default listening port. Surfaced as a constant because
@@ -85,6 +86,11 @@ class DockerGitGate(GitGate):
f"git-gate hook missing at {plan.hook_script}; "
f"GitGate.prepare must run first"
)
if not plan.access_hook_script.is_file():
die(
f"git-gate access-hook missing at {plan.access_hook_script}; "
f"GitGate.prepare must run first"
)
build_git_gate_image()
@@ -111,6 +117,7 @@ class DockerGitGate(GitGate):
cps: list[tuple[str, str, str]] = [
(str(plan.entrypoint_script), GIT_GATE_ENTRYPOINT_IN_CONTAINER, "entrypoint"),
(str(plan.hook_script), GIT_GATE_HOOK_IN_CONTAINER, "pre-receive hook"),
(str(plan.access_hook_script), GIT_GATE_ACCESS_HOOK_IN_CONTAINER, "access-hook"),
]
for u in plan.upstreams:
keypath = expand_tilde(u.identity_file)
+94 -22
View File
@@ -1,11 +1,19 @@
"""Per-agent git-gate (PRD 0008).
A third per-agent sidecar that fronts the bottle's declared git
upstreams. Each `bottle.git` entry maps to a bare repo on the gate;
the gate runs `git daemon --enable=receive-pack` so the agent can
push to it via `git://<gate>/<name>.git`. A pre-receive hook scans
the incoming refs with gitleaks; on clean, it forwards the refs to
the real upstream using a credential the gate holds.
upstreams as a transparent mirror. Each `bottle.git` entry maps to
a bare repo on the gate; `git daemon` serves the bare repos over
`git://<gate>/<name>.git`. Two hooks make the mirror bidirectional:
- **`pre-receive`** (push path) — gitleaks-scans incoming refs and,
on clean, forwards them to the real upstream with the
gate-resident credential.
- **`--access-hook`** (fetch path) — runs `git fetch origin --prune`
against the real upstream before every `upload-pack`, so an
agent fetch returns whatever the upstream has *now*. Fail-closed
if the upstream is unreachable.
The agent never sees the upstream credential under either path.
Why a third sidecar (not folded into pipelock or ssh-gate): the
gate is the only one of the three that holds upstream push
@@ -53,16 +61,20 @@ class GitGateUpstream:
class GitGatePlan:
"""Output of GitGate.prepare; consumed by .start.
`upstreams` + `slug` + `entrypoint_script` + `hook_script` are
filled in at prepare time (host-side, side-effect-free on docker).
The network fields are populated by the backend's launch step via
`dataclasses.replace` once those networks exist. Empty defaults
are sentinels meaning "not yet set"; `.start` validates that
they are populated."""
The script + slug + upstream fields are filled at prepare time
(host-side, side-effect-free on docker). The network fields are
populated by the backend's launch step via `dataclasses.replace`
once those networks exist. Empty defaults are sentinels meaning
"not yet set"; `.start` validates that they are populated.
`hook_script` is the shared `pre-receive` for push-time gating;
`access_hook_script` is `git daemon`'s `--access-hook` for the
fetch-time upstream refresh."""
slug: str
entrypoint_script: Path
hook_script: Path
access_hook_script: Path
upstreams: tuple[GitGateUpstream, ...]
internal_network: str = ""
egress_network: str = ""
@@ -101,8 +113,8 @@ def git_gate_render_entrypoint(upstreams: tuple[GitGateUpstream, ...]) -> str:
upstream, then `exec git daemon`. The function reads
`/git-gate/creds/<name>-{key,known_hosts}` (laid down by
`DockerGitGate.start` via docker cp) and wires them into each
bare repo's config so the shared pre-receive hook can pick them
up at push time."""
bare repo's config; the access-hook + pre-receive hook pick those
paths up at fetch / push time."""
lines = [
"#!/bin/sh",
"set -eu",
@@ -121,8 +133,13 @@ def git_gate_render_entrypoint(upstreams: tuple[GitGateUpstream, ...]) -> str:
" repo=/git/${name}.git",
" if [ ! -d \"$repo\" ]; then",
" git init --bare \"$repo\" >/dev/null",
# --mirror=fetch sets remote.origin.fetch = +refs/*:refs/* so",
# a later `git fetch origin` mirrors the upstream's full ref",
# graph (heads, tags, notes) into the bare repo at canonical",
# paths. It does NOT set remote.origin.mirror=true, so an",
# explicit `git push origin <ref>:<ref>` still pushes one ref.",
" git -C \"$repo\" remote add --mirror=fetch origin \"$upstream_url\"",
" fi",
" git -C \"$repo\" config remote.upstream.url \"$upstream_url\"",
" git -C \"$repo\" config git-gate.identityFile \"$keyfile\"",
" git -C \"$repo\" config git-gate.knownHosts \"$hostsfile\"",
" git -C \"$repo\" config receive.denyCurrentBranch ignore",
@@ -143,6 +160,7 @@ def git_gate_render_entrypoint(upstreams: tuple[GitGateUpstream, ...]) -> str:
" --base-path=/git \\",
" --export-all \\",
" --enable=receive-pack \\",
" --access-hook=/etc/git-gate/access-hook \\",
" --verbose",
])
return "\n".join(lines) + "\n"
@@ -150,9 +168,9 @@ def git_gate_render_entrypoint(upstreams: tuple[GitGateUpstream, ...]) -> str:
def git_gate_render_hook() -> str:
"""The shared pre-receive hook: gitleaks-scan all incoming refs,
then forward each accepted ref to the real upstream using the
per-repo credential. Failure in either phase aborts the push so
the agent sees a real rejection. POSIX sh.
then forward each accepted ref to the real upstream (`origin`)
using the per-repo credential. Failure in either phase aborts
the push so the agent sees a real rejection. POSIX sh.
Two phases (scan all, then push all) keeps a hit on ref N from
half-pushing refs 1..N-1; both phases re-read stdin from a temp
@@ -183,7 +201,8 @@ while IFS=' ' read -r old new ref; do
fi
done < "$refs_file"
# Phase 2: forward each ref to the upstream.
# Phase 2: forward each ref to the upstream (`origin`, configured
# in the entrypoint via `git remote add --mirror=fetch`).
keyfile=$(git config --get git-gate.identityFile)
hostsfile=$(git config --get git-gate.knownHosts)
if [ ! -f "$hostsfile" ]; then
@@ -200,8 +219,8 @@ while IFS=' ' read -r old new ref; do
else
refspec="$new:$ref"
fi
echo "git-gate: forwarding $ref to upstream" >&2
if ! GIT_SSH_COMMAND="$ssh_cmd" git push upstream "$refspec" 1>&2; then
echo "git-gate: forwarding $ref to origin" >&2
if ! GIT_SSH_COMMAND="$ssh_cmd" git push origin "$refspec" 1>&2; then
echo "git-gate: upstream push failed for $ref" >&2
exit 1
fi
@@ -211,6 +230,52 @@ exit 0
"""
def git_gate_render_access_hook() -> str:
"""`git daemon --access-hook` script. Runs before each protocol
service; for `upload-pack` (fetch / clone / ls-remote / pull) it
refreshes the bare repo from upstream first, so the response
reflects upstream's current state. For other services (notably
`receive-pack`) it returns 0 immediately and lets the existing
pre-receive hook gate the operation. POSIX sh.
The hook receives:
$1 service name (`upload-pack`, `receive-pack`, ...)
$2 absolute path to the resolved repo
$3 client hostname (unused)
$4 client tcp address (unused)
Fail-closed on upstream errors: the agent's fetch fails too,
so it never silently sees stale data — matches the PRD's
'equivalent to operations against the upstream' contract."""
return r"""#!/bin/sh
# git-gate access-hook (PRD 0008). $1=service $2=repo $3=host $4=peer
set -u
service=$1
repo_dir=$2
# Push path keeps its own gating in pre-receive (gitleaks +
# forward). Only refresh-from-upstream on fetch operations.
if [ "$service" != "upload-pack" ]; then
exit 0
fi
keyfile=$(git -C "$repo_dir" config --get git-gate.identityFile 2>/dev/null || true)
hostsfile=$(git -C "$repo_dir" config --get git-gate.knownHosts 2>/dev/null || true)
if [ -z "$keyfile" ] || [ ! -f "$hostsfile" ]; then
echo "git-gate: missing credentials for $repo_dir; refusing fetch" >&2
exit 1
fi
ssh_cmd="ssh -i $keyfile -o UserKnownHostsFile=$hostsfile -o StrictHostKeyChecking=yes -o IdentitiesOnly=yes"
echo "git-gate: refreshing $repo_dir from upstream" >&2
if ! GIT_SSH_COMMAND="$ssh_cmd" git -C "$repo_dir" fetch origin --prune >&2; then
echo "git-gate: upstream fetch failed for $repo_dir; refusing to serve stale data" >&2
exit 1
fi
exit 0
"""
class GitGate(ABC):
"""The per-agent git-gate. Encapsulates the host-side prepare
(upstream lift + entrypoint/hook render); the sidecar's
@@ -219,8 +284,8 @@ class GitGate(ABC):
def prepare(self, bottle: Bottle, slug: str, stage_dir: Path) -> GitGatePlan:
"""Compute the upstream table from `bottle.git` and write the
entrypoint + pre-receive scripts (mode 600) under `stage_dir`.
Pure host-side, no docker subprocess.
entrypoint, pre-receive hook, and access-hook scripts (mode
600) under `stage_dir`. Pure host-side, no docker subprocess.
Returned plan is incomplete: the launch step must fill
`internal_network` / `egress_network` via `dataclasses.replace`
@@ -232,10 +297,17 @@ class GitGate(ABC):
hook = stage_dir / "git_gate_pre_receive.sh"
hook.write_text(git_gate_render_hook())
hook.chmod(0o600)
access_hook = stage_dir / "git_gate_access_hook.sh"
access_hook.write_text(git_gate_render_access_hook())
# 0o700 (not 0o600): git daemon execs --access-hook directly,
# not via `sh`, so the script needs the x bit. docker cp
# preserves source mode into the container.
access_hook.chmod(0o700)
return GitGatePlan(
slug=slug,
entrypoint_script=entrypoint,
hook_script=hook,
access_hook_script=access_hook,
upstreams=upstreams,
)
+40 -21
View File
@@ -2,16 +2,19 @@
Two tests against a real Docker daemon:
1. A freshly-started gate answers ls-remote requests on its
internal-network address. Proves the daemon is up and the
bare repos rendered by the entrypoint are exported.
1. ls-remote against a gate whose upstream is unreachable fails
with the access-hook's fail-closed rejection. Proves the
daemon is bound to its port AND the access-hook is wired:
a working ls-remote against the gate is necessarily a working
ls-remote against the upstream (PRD 0008's transparent-mirror
contract).
2. A push containing a gitleaks-detectable secret is rejected
by the pre-receive hook with a non-zero exit on the agent
side and a gitleaks-rejection line in the response. This is
the PRD's success criterion.
side and a gitleaks-rejection line in the response. The PRD's
primary success criterion.
A successful clean-push roundtrip needs a real upstream SSH host;
deferred to a follow-up integration test.
A successful round-trip (clone through gate reflects upstream)
needs a reachable upstream SSH host; deferred to a follow-up.
"""
import dataclasses
@@ -124,14 +127,18 @@ class TestGitGateSidecar(unittest.TestCase):
"skipped under act_runner: docker socket mount topology breaks "
"in-process visibility of networks created on the host daemon",
)
def test_ls_remote_succeeds_against_fresh_gate(self):
"""A freshly-started gate has an empty bare repo per upstream;
`git ls-remote` returns no refs and exits 0. Probes the gate
from a sibling container on the same internal network — same
access topology the agent uses in production."""
def test_ls_remote_fails_closed_when_upstream_unreachable(self):
"""The gate's access-hook runs `git fetch origin --prune` before
every upload-pack. With the fixture's deliberately unreachable
`ssh://git@upstream.invalid/...`, that fetch fails and the
hook exits 1; the daemon reports access-denied. Asserting
non-zero here is what proves the access-hook is wired: under
the v1 (push-only) design ls-remote against a fresh gate
returned exit 0 with no refs."""
gate = self._start_gate("foo")
# git ls-remote retries weren't strictly needed in local runs,
# but the daemon takes a beat to bind after docker start.
# Daemon still has to bind first; retry the TCP connect a few
# times. The expected end state is a non-zero exit from the
# daemon's access-denied response — not a connection refused.
probe = subprocess.run(
["docker", "run", "--rm",
"--network", self.internal_net,
@@ -139,15 +146,23 @@ class TestGitGateSidecar(unittest.TestCase):
CLIENT_IMAGE,
"-c",
f"for i in $(seq 1 15); do "
f" git ls-remote git://{gate}/foo.git >/tmp/out 2>&1 && exit 0;"
f" out=$(git ls-remote git://{gate}/foo.git 2>&1) && exit 99;"
f" case \"$out\" in *'access denied'*|*'not exported'*) "
f" echo \"$out\"; exit 1;; esac;"
f" sleep 1;"
f"done;"
f"cat /tmp/out; exit 1"],
f"echo TIMEOUT; exit 2"],
capture_output=True, text=True, timeout=60, check=False,
)
# exit 1: daemon access-denied as expected. exit 99 would mean
# ls-remote actually succeeded against the unreachable upstream
# (impossible — would indicate stale-data serving, the very
# thing the access-hook is meant to prevent).
self.assertEqual(
0, probe.returncode,
f"ls-remote failed: stdout={probe.stdout!r} stderr={probe.stderr!r}",
1, probe.returncode,
f"expected fail-closed access-denied; got "
f"exit={probe.returncode} stdout={probe.stdout!r} "
f"stderr={probe.stderr!r}",
)
@unittest.skipIf(
@@ -164,10 +179,14 @@ class TestGitGateSidecar(unittest.TestCase):
push_script = (
"set -e\n"
"cd /tmp\n"
# Wait for git daemon to bind. ls-remote retries until
# connection works; we then assume the gate is ready.
# Wait for git daemon to bind. Under the v1.1 design,
# ls-remote never returns 0 against an unreachable
# upstream (access-hook fail-closed), so we wait for *any*
# response (the daemon's access-denied line) as the
# readiness signal.
f"for i in $(seq 1 15); do "
f" git ls-remote git://{gate}/foo.git >/dev/null 2>&1 && break;"
f" out=$(git ls-remote git://{gate}/foo.git 2>&1) || true;"
f" case \"$out\" in *'remote error'*|*'access denied'*) break;; esac;"
f" sleep 1;"
f"done\n"
"git init -q -b main repo\n"
+39 -4
View File
@@ -10,6 +10,7 @@ from claude_bottle.git_gate import (
GitGatePlan,
GitGateUpstream,
git_gate_known_hosts_line,
git_gate_render_access_hook,
git_gate_render_entrypoint,
git_gate_render_hook,
git_gate_upstreams_for_bottle,
@@ -87,6 +88,12 @@ class TestEntrypointRender(unittest.TestCase):
self.assertIn("exec git daemon", script)
self.assertIn("--enable=receive-pack", script)
self.assertIn("--base-path=/git", script)
# The access-hook is what makes fetch a mirror operation
# against the upstream (PRD 0008 v1.1).
self.assertIn("--access-hook=/etc/git-gate/access-hook", script)
# Each repo's `origin` remote is wired to the upstream via
# --mirror=fetch so `git fetch origin` mirrors all refs.
self.assertIn("remote add --mirror=fetch origin", script)
def test_empty_upstreams_still_execs_daemon(self):
# A no-upstream gate is a no-op for repos but the daemon still
@@ -97,17 +104,36 @@ class TestEntrypointRender(unittest.TestCase):
class TestHookRender(unittest.TestCase):
def test_hook_has_two_phases(self):
def test_pre_receive_hook_has_two_phases(self):
hook = git_gate_render_hook()
# Phase 1: gitleaks. Phase 2: forward.
# Phase 1: gitleaks. Phase 2: forward to origin.
self.assertIn("gitleaks git", hook)
self.assertIn("git push upstream", hook)
self.assertIn("git push origin", hook)
# KnownHostKey absence is fail-closed.
self.assertIn("refusing to push", hook)
# Stdin is buffered to a tempfile so both phases can re-read.
self.assertIn("refs_file=$(mktemp)", hook)
class TestAccessHookRender(unittest.TestCase):
def test_access_hook_refreshes_origin_on_upload_pack(self):
hook = git_gate_render_access_hook()
# Service-name guard: only upload-pack (fetch / clone / pull /
# ls-remote) triggers the upstream refresh; receive-pack
# bypasses this and the pre-receive hook gates it instead.
self.assertIn('service=$1', hook)
self.assertIn('"$service" != "upload-pack"', hook)
# The fetch is what makes the gate a transparent mirror.
self.assertIn("git -C \"$repo_dir\" fetch origin --prune", hook)
def test_access_hook_fail_closed_on_upstream_error(self):
hook = git_gate_render_access_hook()
# Upstream-fetch failure exits non-zero, which propagates to
# the agent's fetch as a real error rather than stale data.
self.assertIn("refusing to serve stale data", hook)
self.assertIn("exit 1", hook)
class TestPrepare(unittest.TestCase):
def setUp(self):
self.stage = Path(tempfile.mkdtemp())
@@ -117,7 +143,7 @@ class TestPrepare(unittest.TestCase):
shutil.rmtree(self.stage, ignore_errors=True)
def test_prepare_writes_entrypoint_and_hook_mode_600(self):
def test_prepare_writes_all_three_scripts(self):
plan = _StubGate().prepare(
fixture_with_git().bottles["dev"], "demo", self.stage
)
@@ -127,8 +153,17 @@ class TestPrepare(unittest.TestCase):
self.assertEqual(
self.stage / "git_gate_pre_receive.sh", plan.hook_script
)
self.assertEqual(
self.stage / "git_gate_access_hook.sh", plan.access_hook_script
)
# Entrypoint + pre-receive are mode 600 (loaded into the
# gate by docker cp and then `install -m 755`'d into each
# bare repo's hooks/ — source bit doesn't matter). The
# access-hook is execed directly by git daemon, so it has to
# carry the x bit through docker cp.
self.assertEqual(0o600, os.stat(plan.entrypoint_script).st_mode & 0o777)
self.assertEqual(0o600, os.stat(plan.hook_script).st_mode & 0o777)
self.assertEqual(0o700, os.stat(plan.access_hook_script).st_mode & 0o777)
def test_prepare_plan_carries_upstreams_and_slug(self):
plan = _StubGate().prepare(