feat(git-gate): mirror fetch through access-hook (bidirectional)
The gate is now a transparent mirror, not push-only. Per-repo init now runs `git remote add --mirror=fetch origin <url>` so a later `git fetch origin` mirrors the upstream's full ref graph at canonical paths. The pre-receive hook forwards accepted refs via `git push origin` (renamed from upstream). New: an access-hook script wired via `git daemon --access-hook` runs `git fetch origin --prune` against the real upstream before every upload-pack request (clone, fetch, pull, ls-remote). On upstream error the hook exits non-zero — the agent's fetch fails rather than the gate serving stale data. The pre-existing smoke test (ls-remote against unreachable upstream returns refs) had to invert: under the bidirectional design any ls-remote success is necessarily a success against the upstream, so the unreachable-upstream case now correctly fails closed.
This commit is contained in:
@@ -23,6 +23,7 @@ GIT_GATE_DOCKERFILE = "Dockerfile.git-gate"
|
||||
|
||||
GIT_GATE_ENTRYPOINT_IN_CONTAINER = "/git-gate-entrypoint.sh"
|
||||
GIT_GATE_HOOK_IN_CONTAINER = "/etc/git-gate/pre-receive"
|
||||
GIT_GATE_ACCESS_HOOK_IN_CONTAINER = "/etc/git-gate/access-hook"
|
||||
GIT_GATE_CREDS_DIR_IN_CONTAINER = "/git-gate/creds"
|
||||
|
||||
# git daemon's default listening port. Surfaced as a constant because
|
||||
@@ -85,6 +86,11 @@ class DockerGitGate(GitGate):
|
||||
f"git-gate hook missing at {plan.hook_script}; "
|
||||
f"GitGate.prepare must run first"
|
||||
)
|
||||
if not plan.access_hook_script.is_file():
|
||||
die(
|
||||
f"git-gate access-hook missing at {plan.access_hook_script}; "
|
||||
f"GitGate.prepare must run first"
|
||||
)
|
||||
|
||||
build_git_gate_image()
|
||||
|
||||
@@ -111,6 +117,7 @@ class DockerGitGate(GitGate):
|
||||
cps: list[tuple[str, str, str]] = [
|
||||
(str(plan.entrypoint_script), GIT_GATE_ENTRYPOINT_IN_CONTAINER, "entrypoint"),
|
||||
(str(plan.hook_script), GIT_GATE_HOOK_IN_CONTAINER, "pre-receive hook"),
|
||||
(str(plan.access_hook_script), GIT_GATE_ACCESS_HOOK_IN_CONTAINER, "access-hook"),
|
||||
]
|
||||
for u in plan.upstreams:
|
||||
keypath = expand_tilde(u.identity_file)
|
||||
|
||||
+94
-22
@@ -1,11 +1,19 @@
|
||||
"""Per-agent git-gate (PRD 0008).
|
||||
|
||||
A third per-agent sidecar that fronts the bottle's declared git
|
||||
upstreams. Each `bottle.git` entry maps to a bare repo on the gate;
|
||||
the gate runs `git daemon --enable=receive-pack` so the agent can
|
||||
push to it via `git://<gate>/<name>.git`. A pre-receive hook scans
|
||||
the incoming refs with gitleaks; on clean, it forwards the refs to
|
||||
the real upstream using a credential the gate holds.
|
||||
upstreams as a transparent mirror. Each `bottle.git` entry maps to
|
||||
a bare repo on the gate; `git daemon` serves the bare repos over
|
||||
`git://<gate>/<name>.git`. Two hooks make the mirror bidirectional:
|
||||
|
||||
- **`pre-receive`** (push path) — gitleaks-scans incoming refs and,
|
||||
on clean, forwards them to the real upstream with the
|
||||
gate-resident credential.
|
||||
- **`--access-hook`** (fetch path) — runs `git fetch origin --prune`
|
||||
against the real upstream before every `upload-pack`, so an
|
||||
agent fetch returns whatever the upstream has *now*. Fail-closed
|
||||
if the upstream is unreachable.
|
||||
|
||||
The agent never sees the upstream credential under either path.
|
||||
|
||||
Why a third sidecar (not folded into pipelock or ssh-gate): the
|
||||
gate is the only one of the three that holds upstream push
|
||||
@@ -53,16 +61,20 @@ class GitGateUpstream:
|
||||
class GitGatePlan:
|
||||
"""Output of GitGate.prepare; consumed by .start.
|
||||
|
||||
`upstreams` + `slug` + `entrypoint_script` + `hook_script` are
|
||||
filled in at prepare time (host-side, side-effect-free on docker).
|
||||
The network fields are populated by the backend's launch step via
|
||||
`dataclasses.replace` once those networks exist. Empty defaults
|
||||
are sentinels meaning "not yet set"; `.start` validates that
|
||||
they are populated."""
|
||||
The script + slug + upstream fields are filled at prepare time
|
||||
(host-side, side-effect-free on docker). The network fields are
|
||||
populated by the backend's launch step via `dataclasses.replace`
|
||||
once those networks exist. Empty defaults are sentinels meaning
|
||||
"not yet set"; `.start` validates that they are populated.
|
||||
|
||||
`hook_script` is the shared `pre-receive` for push-time gating;
|
||||
`access_hook_script` is `git daemon`'s `--access-hook` for the
|
||||
fetch-time upstream refresh."""
|
||||
|
||||
slug: str
|
||||
entrypoint_script: Path
|
||||
hook_script: Path
|
||||
access_hook_script: Path
|
||||
upstreams: tuple[GitGateUpstream, ...]
|
||||
internal_network: str = ""
|
||||
egress_network: str = ""
|
||||
@@ -101,8 +113,8 @@ def git_gate_render_entrypoint(upstreams: tuple[GitGateUpstream, ...]) -> str:
|
||||
upstream, then `exec git daemon`. The function reads
|
||||
`/git-gate/creds/<name>-{key,known_hosts}` (laid down by
|
||||
`DockerGitGate.start` via docker cp) and wires them into each
|
||||
bare repo's config so the shared pre-receive hook can pick them
|
||||
up at push time."""
|
||||
bare repo's config; the access-hook + pre-receive hook pick those
|
||||
paths up at fetch / push time."""
|
||||
lines = [
|
||||
"#!/bin/sh",
|
||||
"set -eu",
|
||||
@@ -121,8 +133,13 @@ def git_gate_render_entrypoint(upstreams: tuple[GitGateUpstream, ...]) -> str:
|
||||
" repo=/git/${name}.git",
|
||||
" if [ ! -d \"$repo\" ]; then",
|
||||
" git init --bare \"$repo\" >/dev/null",
|
||||
# --mirror=fetch sets remote.origin.fetch = +refs/*:refs/* so",
|
||||
# a later `git fetch origin` mirrors the upstream's full ref",
|
||||
# graph (heads, tags, notes) into the bare repo at canonical",
|
||||
# paths. It does NOT set remote.origin.mirror=true, so an",
|
||||
# explicit `git push origin <ref>:<ref>` still pushes one ref.",
|
||||
" git -C \"$repo\" remote add --mirror=fetch origin \"$upstream_url\"",
|
||||
" fi",
|
||||
" git -C \"$repo\" config remote.upstream.url \"$upstream_url\"",
|
||||
" git -C \"$repo\" config git-gate.identityFile \"$keyfile\"",
|
||||
" git -C \"$repo\" config git-gate.knownHosts \"$hostsfile\"",
|
||||
" git -C \"$repo\" config receive.denyCurrentBranch ignore",
|
||||
@@ -143,6 +160,7 @@ def git_gate_render_entrypoint(upstreams: tuple[GitGateUpstream, ...]) -> str:
|
||||
" --base-path=/git \\",
|
||||
" --export-all \\",
|
||||
" --enable=receive-pack \\",
|
||||
" --access-hook=/etc/git-gate/access-hook \\",
|
||||
" --verbose",
|
||||
])
|
||||
return "\n".join(lines) + "\n"
|
||||
@@ -150,9 +168,9 @@ def git_gate_render_entrypoint(upstreams: tuple[GitGateUpstream, ...]) -> str:
|
||||
|
||||
def git_gate_render_hook() -> str:
|
||||
"""The shared pre-receive hook: gitleaks-scan all incoming refs,
|
||||
then forward each accepted ref to the real upstream using the
|
||||
per-repo credential. Failure in either phase aborts the push so
|
||||
the agent sees a real rejection. POSIX sh.
|
||||
then forward each accepted ref to the real upstream (`origin`)
|
||||
using the per-repo credential. Failure in either phase aborts
|
||||
the push so the agent sees a real rejection. POSIX sh.
|
||||
|
||||
Two phases (scan all, then push all) keeps a hit on ref N from
|
||||
half-pushing refs 1..N-1; both phases re-read stdin from a temp
|
||||
@@ -183,7 +201,8 @@ while IFS=' ' read -r old new ref; do
|
||||
fi
|
||||
done < "$refs_file"
|
||||
|
||||
# Phase 2: forward each ref to the upstream.
|
||||
# Phase 2: forward each ref to the upstream (`origin`, configured
|
||||
# in the entrypoint via `git remote add --mirror=fetch`).
|
||||
keyfile=$(git config --get git-gate.identityFile)
|
||||
hostsfile=$(git config --get git-gate.knownHosts)
|
||||
if [ ! -f "$hostsfile" ]; then
|
||||
@@ -200,8 +219,8 @@ while IFS=' ' read -r old new ref; do
|
||||
else
|
||||
refspec="$new:$ref"
|
||||
fi
|
||||
echo "git-gate: forwarding $ref to upstream" >&2
|
||||
if ! GIT_SSH_COMMAND="$ssh_cmd" git push upstream "$refspec" 1>&2; then
|
||||
echo "git-gate: forwarding $ref to origin" >&2
|
||||
if ! GIT_SSH_COMMAND="$ssh_cmd" git push origin "$refspec" 1>&2; then
|
||||
echo "git-gate: upstream push failed for $ref" >&2
|
||||
exit 1
|
||||
fi
|
||||
@@ -211,6 +230,52 @@ exit 0
|
||||
"""
|
||||
|
||||
|
||||
def git_gate_render_access_hook() -> str:
|
||||
"""`git daemon --access-hook` script. Runs before each protocol
|
||||
service; for `upload-pack` (fetch / clone / ls-remote / pull) it
|
||||
refreshes the bare repo from upstream first, so the response
|
||||
reflects upstream's current state. For other services (notably
|
||||
`receive-pack`) it returns 0 immediately and lets the existing
|
||||
pre-receive hook gate the operation. POSIX sh.
|
||||
|
||||
The hook receives:
|
||||
$1 service name (`upload-pack`, `receive-pack`, ...)
|
||||
$2 absolute path to the resolved repo
|
||||
$3 client hostname (unused)
|
||||
$4 client tcp address (unused)
|
||||
|
||||
Fail-closed on upstream errors: the agent's fetch fails too,
|
||||
so it never silently sees stale data — matches the PRD's
|
||||
'equivalent to operations against the upstream' contract."""
|
||||
return r"""#!/bin/sh
|
||||
# git-gate access-hook (PRD 0008). $1=service $2=repo $3=host $4=peer
|
||||
set -u
|
||||
service=$1
|
||||
repo_dir=$2
|
||||
|
||||
# Push path keeps its own gating in pre-receive (gitleaks +
|
||||
# forward). Only refresh-from-upstream on fetch operations.
|
||||
if [ "$service" != "upload-pack" ]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
keyfile=$(git -C "$repo_dir" config --get git-gate.identityFile 2>/dev/null || true)
|
||||
hostsfile=$(git -C "$repo_dir" config --get git-gate.knownHosts 2>/dev/null || true)
|
||||
if [ -z "$keyfile" ] || [ ! -f "$hostsfile" ]; then
|
||||
echo "git-gate: missing credentials for $repo_dir; refusing fetch" >&2
|
||||
exit 1
|
||||
fi
|
||||
ssh_cmd="ssh -i $keyfile -o UserKnownHostsFile=$hostsfile -o StrictHostKeyChecking=yes -o IdentitiesOnly=yes"
|
||||
|
||||
echo "git-gate: refreshing $repo_dir from upstream" >&2
|
||||
if ! GIT_SSH_COMMAND="$ssh_cmd" git -C "$repo_dir" fetch origin --prune >&2; then
|
||||
echo "git-gate: upstream fetch failed for $repo_dir; refusing to serve stale data" >&2
|
||||
exit 1
|
||||
fi
|
||||
exit 0
|
||||
"""
|
||||
|
||||
|
||||
class GitGate(ABC):
|
||||
"""The per-agent git-gate. Encapsulates the host-side prepare
|
||||
(upstream lift + entrypoint/hook render); the sidecar's
|
||||
@@ -219,8 +284,8 @@ class GitGate(ABC):
|
||||
|
||||
def prepare(self, bottle: Bottle, slug: str, stage_dir: Path) -> GitGatePlan:
|
||||
"""Compute the upstream table from `bottle.git` and write the
|
||||
entrypoint + pre-receive scripts (mode 600) under `stage_dir`.
|
||||
Pure host-side, no docker subprocess.
|
||||
entrypoint, pre-receive hook, and access-hook scripts (mode
|
||||
600) under `stage_dir`. Pure host-side, no docker subprocess.
|
||||
|
||||
Returned plan is incomplete: the launch step must fill
|
||||
`internal_network` / `egress_network` via `dataclasses.replace`
|
||||
@@ -232,10 +297,17 @@ class GitGate(ABC):
|
||||
hook = stage_dir / "git_gate_pre_receive.sh"
|
||||
hook.write_text(git_gate_render_hook())
|
||||
hook.chmod(0o600)
|
||||
access_hook = stage_dir / "git_gate_access_hook.sh"
|
||||
access_hook.write_text(git_gate_render_access_hook())
|
||||
# 0o700 (not 0o600): git daemon execs --access-hook directly,
|
||||
# not via `sh`, so the script needs the x bit. docker cp
|
||||
# preserves source mode into the container.
|
||||
access_hook.chmod(0o700)
|
||||
return GitGatePlan(
|
||||
slug=slug,
|
||||
entrypoint_script=entrypoint,
|
||||
hook_script=hook,
|
||||
access_hook_script=access_hook,
|
||||
upstreams=upstreams,
|
||||
)
|
||||
|
||||
|
||||
@@ -2,16 +2,19 @@
|
||||
|
||||
Two tests against a real Docker daemon:
|
||||
|
||||
1. A freshly-started gate answers ls-remote requests on its
|
||||
internal-network address. Proves the daemon is up and the
|
||||
bare repos rendered by the entrypoint are exported.
|
||||
1. ls-remote against a gate whose upstream is unreachable fails
|
||||
with the access-hook's fail-closed rejection. Proves the
|
||||
daemon is bound to its port AND the access-hook is wired:
|
||||
a working ls-remote against the gate is necessarily a working
|
||||
ls-remote against the upstream (PRD 0008's transparent-mirror
|
||||
contract).
|
||||
2. A push containing a gitleaks-detectable secret is rejected
|
||||
by the pre-receive hook with a non-zero exit on the agent
|
||||
side and a gitleaks-rejection line in the response. This is
|
||||
the PRD's success criterion.
|
||||
side and a gitleaks-rejection line in the response. The PRD's
|
||||
primary success criterion.
|
||||
|
||||
A successful clean-push roundtrip needs a real upstream SSH host;
|
||||
deferred to a follow-up integration test.
|
||||
A successful round-trip (clone through gate reflects upstream)
|
||||
needs a reachable upstream SSH host; deferred to a follow-up.
|
||||
"""
|
||||
|
||||
import dataclasses
|
||||
@@ -124,14 +127,18 @@ class TestGitGateSidecar(unittest.TestCase):
|
||||
"skipped under act_runner: docker socket mount topology breaks "
|
||||
"in-process visibility of networks created on the host daemon",
|
||||
)
|
||||
def test_ls_remote_succeeds_against_fresh_gate(self):
|
||||
"""A freshly-started gate has an empty bare repo per upstream;
|
||||
`git ls-remote` returns no refs and exits 0. Probes the gate
|
||||
from a sibling container on the same internal network — same
|
||||
access topology the agent uses in production."""
|
||||
def test_ls_remote_fails_closed_when_upstream_unreachable(self):
|
||||
"""The gate's access-hook runs `git fetch origin --prune` before
|
||||
every upload-pack. With the fixture's deliberately unreachable
|
||||
`ssh://git@upstream.invalid/...`, that fetch fails and the
|
||||
hook exits 1; the daemon reports access-denied. Asserting
|
||||
non-zero here is what proves the access-hook is wired: under
|
||||
the v1 (push-only) design ls-remote against a fresh gate
|
||||
returned exit 0 with no refs."""
|
||||
gate = self._start_gate("foo")
|
||||
# git ls-remote retries weren't strictly needed in local runs,
|
||||
# but the daemon takes a beat to bind after docker start.
|
||||
# Daemon still has to bind first; retry the TCP connect a few
|
||||
# times. The expected end state is a non-zero exit from the
|
||||
# daemon's access-denied response — not a connection refused.
|
||||
probe = subprocess.run(
|
||||
["docker", "run", "--rm",
|
||||
"--network", self.internal_net,
|
||||
@@ -139,15 +146,23 @@ class TestGitGateSidecar(unittest.TestCase):
|
||||
CLIENT_IMAGE,
|
||||
"-c",
|
||||
f"for i in $(seq 1 15); do "
|
||||
f" git ls-remote git://{gate}/foo.git >/tmp/out 2>&1 && exit 0;"
|
||||
f" out=$(git ls-remote git://{gate}/foo.git 2>&1) && exit 99;"
|
||||
f" case \"$out\" in *'access denied'*|*'not exported'*) "
|
||||
f" echo \"$out\"; exit 1;; esac;"
|
||||
f" sleep 1;"
|
||||
f"done;"
|
||||
f"cat /tmp/out; exit 1"],
|
||||
f"echo TIMEOUT; exit 2"],
|
||||
capture_output=True, text=True, timeout=60, check=False,
|
||||
)
|
||||
# exit 1: daemon access-denied as expected. exit 99 would mean
|
||||
# ls-remote actually succeeded against the unreachable upstream
|
||||
# (impossible — would indicate stale-data serving, the very
|
||||
# thing the access-hook is meant to prevent).
|
||||
self.assertEqual(
|
||||
0, probe.returncode,
|
||||
f"ls-remote failed: stdout={probe.stdout!r} stderr={probe.stderr!r}",
|
||||
1, probe.returncode,
|
||||
f"expected fail-closed access-denied; got "
|
||||
f"exit={probe.returncode} stdout={probe.stdout!r} "
|
||||
f"stderr={probe.stderr!r}",
|
||||
)
|
||||
|
||||
@unittest.skipIf(
|
||||
@@ -164,10 +179,14 @@ class TestGitGateSidecar(unittest.TestCase):
|
||||
push_script = (
|
||||
"set -e\n"
|
||||
"cd /tmp\n"
|
||||
# Wait for git daemon to bind. ls-remote retries until
|
||||
# connection works; we then assume the gate is ready.
|
||||
# Wait for git daemon to bind. Under the v1.1 design,
|
||||
# ls-remote never returns 0 against an unreachable
|
||||
# upstream (access-hook fail-closed), so we wait for *any*
|
||||
# response (the daemon's access-denied line) as the
|
||||
# readiness signal.
|
||||
f"for i in $(seq 1 15); do "
|
||||
f" git ls-remote git://{gate}/foo.git >/dev/null 2>&1 && break;"
|
||||
f" out=$(git ls-remote git://{gate}/foo.git 2>&1) || true;"
|
||||
f" case \"$out\" in *'remote error'*|*'access denied'*) break;; esac;"
|
||||
f" sleep 1;"
|
||||
f"done\n"
|
||||
"git init -q -b main repo\n"
|
||||
|
||||
@@ -10,6 +10,7 @@ from claude_bottle.git_gate import (
|
||||
GitGatePlan,
|
||||
GitGateUpstream,
|
||||
git_gate_known_hosts_line,
|
||||
git_gate_render_access_hook,
|
||||
git_gate_render_entrypoint,
|
||||
git_gate_render_hook,
|
||||
git_gate_upstreams_for_bottle,
|
||||
@@ -87,6 +88,12 @@ class TestEntrypointRender(unittest.TestCase):
|
||||
self.assertIn("exec git daemon", script)
|
||||
self.assertIn("--enable=receive-pack", script)
|
||||
self.assertIn("--base-path=/git", script)
|
||||
# The access-hook is what makes fetch a mirror operation
|
||||
# against the upstream (PRD 0008 v1.1).
|
||||
self.assertIn("--access-hook=/etc/git-gate/access-hook", script)
|
||||
# Each repo's `origin` remote is wired to the upstream via
|
||||
# --mirror=fetch so `git fetch origin` mirrors all refs.
|
||||
self.assertIn("remote add --mirror=fetch origin", script)
|
||||
|
||||
def test_empty_upstreams_still_execs_daemon(self):
|
||||
# A no-upstream gate is a no-op for repos but the daemon still
|
||||
@@ -97,17 +104,36 @@ class TestEntrypointRender(unittest.TestCase):
|
||||
|
||||
|
||||
class TestHookRender(unittest.TestCase):
|
||||
def test_hook_has_two_phases(self):
|
||||
def test_pre_receive_hook_has_two_phases(self):
|
||||
hook = git_gate_render_hook()
|
||||
# Phase 1: gitleaks. Phase 2: forward.
|
||||
# Phase 1: gitleaks. Phase 2: forward to origin.
|
||||
self.assertIn("gitleaks git", hook)
|
||||
self.assertIn("git push upstream", hook)
|
||||
self.assertIn("git push origin", hook)
|
||||
# KnownHostKey absence is fail-closed.
|
||||
self.assertIn("refusing to push", hook)
|
||||
# Stdin is buffered to a tempfile so both phases can re-read.
|
||||
self.assertIn("refs_file=$(mktemp)", hook)
|
||||
|
||||
|
||||
class TestAccessHookRender(unittest.TestCase):
|
||||
def test_access_hook_refreshes_origin_on_upload_pack(self):
|
||||
hook = git_gate_render_access_hook()
|
||||
# Service-name guard: only upload-pack (fetch / clone / pull /
|
||||
# ls-remote) triggers the upstream refresh; receive-pack
|
||||
# bypasses this and the pre-receive hook gates it instead.
|
||||
self.assertIn('service=$1', hook)
|
||||
self.assertIn('"$service" != "upload-pack"', hook)
|
||||
# The fetch is what makes the gate a transparent mirror.
|
||||
self.assertIn("git -C \"$repo_dir\" fetch origin --prune", hook)
|
||||
|
||||
def test_access_hook_fail_closed_on_upstream_error(self):
|
||||
hook = git_gate_render_access_hook()
|
||||
# Upstream-fetch failure exits non-zero, which propagates to
|
||||
# the agent's fetch as a real error rather than stale data.
|
||||
self.assertIn("refusing to serve stale data", hook)
|
||||
self.assertIn("exit 1", hook)
|
||||
|
||||
|
||||
class TestPrepare(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.stage = Path(tempfile.mkdtemp())
|
||||
@@ -117,7 +143,7 @@ class TestPrepare(unittest.TestCase):
|
||||
|
||||
shutil.rmtree(self.stage, ignore_errors=True)
|
||||
|
||||
def test_prepare_writes_entrypoint_and_hook_mode_600(self):
|
||||
def test_prepare_writes_all_three_scripts(self):
|
||||
plan = _StubGate().prepare(
|
||||
fixture_with_git().bottles["dev"], "demo", self.stage
|
||||
)
|
||||
@@ -127,8 +153,17 @@ class TestPrepare(unittest.TestCase):
|
||||
self.assertEqual(
|
||||
self.stage / "git_gate_pre_receive.sh", plan.hook_script
|
||||
)
|
||||
self.assertEqual(
|
||||
self.stage / "git_gate_access_hook.sh", plan.access_hook_script
|
||||
)
|
||||
# Entrypoint + pre-receive are mode 600 (loaded into the
|
||||
# gate by docker cp and then `install -m 755`'d into each
|
||||
# bare repo's hooks/ — source bit doesn't matter). The
|
||||
# access-hook is execed directly by git daemon, so it has to
|
||||
# carry the x bit through docker cp.
|
||||
self.assertEqual(0o600, os.stat(plan.entrypoint_script).st_mode & 0o777)
|
||||
self.assertEqual(0o600, os.stat(plan.hook_script).st_mode & 0o777)
|
||||
self.assertEqual(0o700, os.stat(plan.access_hook_script).st_mode & 0o777)
|
||||
|
||||
def test_prepare_plan_carries_upstreams_and_slug(self):
|
||||
plan = _StubGate().prepare(
|
||||
|
||||
Reference in New Issue
Block a user