feat(git-gate): mirror fetch through access-hook (bidirectional)
test / unit (pull_request) Successful in 11s
test / integration (pull_request) Successful in 14s

The gate is now a transparent mirror, not push-only. Per-repo
init now runs `git remote add --mirror=fetch origin <url>` so a
later `git fetch origin` mirrors the upstream's full ref graph at
canonical paths. The pre-receive hook forwards accepted refs via
`git push origin` (renamed from upstream).

New: an access-hook script wired via `git daemon --access-hook`
runs `git fetch origin --prune` against the real upstream before
every upload-pack request (clone, fetch, pull, ls-remote). On
upstream error the hook exits non-zero — the agent's fetch fails
rather than the gate serving stale data.

The pre-existing smoke test (ls-remote against unreachable
upstream returns refs) had to invert: under the bidirectional
design any ls-remote success is necessarily a success against
the upstream, so the unreachable-upstream case now correctly
fails closed.
This commit is contained in:
2026-05-12 21:37:04 -04:00
parent ae7e22065f
commit fdd06c54d2
4 changed files with 180 additions and 47 deletions
+94 -22
View File
@@ -1,11 +1,19 @@
"""Per-agent git-gate (PRD 0008).
A third per-agent sidecar that fronts the bottle's declared git
upstreams. Each `bottle.git` entry maps to a bare repo on the gate;
the gate runs `git daemon --enable=receive-pack` so the agent can
push to it via `git://<gate>/<name>.git`. A pre-receive hook scans
the incoming refs with gitleaks; on clean, it forwards the refs to
the real upstream using a credential the gate holds.
upstreams as a transparent mirror. Each `bottle.git` entry maps to
a bare repo on the gate; `git daemon` serves the bare repos over
`git://<gate>/<name>.git`. Two hooks make the mirror bidirectional:
- **`pre-receive`** (push path) — gitleaks-scans incoming refs and,
on clean, forwards them to the real upstream with the
gate-resident credential.
- **`--access-hook`** (fetch path) — runs `git fetch origin --prune`
against the real upstream before every `upload-pack`, so an
agent fetch returns whatever the upstream has *now*. Fail-closed
if the upstream is unreachable.
The agent never sees the upstream credential under either path.
Why a third sidecar (not folded into pipelock or ssh-gate): the
gate is the only one of the three that holds upstream push
@@ -53,16 +61,20 @@ class GitGateUpstream:
class GitGatePlan:
"""Output of GitGate.prepare; consumed by .start.
`upstreams` + `slug` + `entrypoint_script` + `hook_script` are
filled in at prepare time (host-side, side-effect-free on docker).
The network fields are populated by the backend's launch step via
`dataclasses.replace` once those networks exist. Empty defaults
are sentinels meaning "not yet set"; `.start` validates that
they are populated."""
The script + slug + upstream fields are filled at prepare time
(host-side, side-effect-free on docker). The network fields are
populated by the backend's launch step via `dataclasses.replace`
once those networks exist. Empty defaults are sentinels meaning
"not yet set"; `.start` validates that they are populated.
`hook_script` is the shared `pre-receive` for push-time gating;
`access_hook_script` is `git daemon`'s `--access-hook` for the
fetch-time upstream refresh."""
slug: str
entrypoint_script: Path
hook_script: Path
access_hook_script: Path
upstreams: tuple[GitGateUpstream, ...]
internal_network: str = ""
egress_network: str = ""
@@ -101,8 +113,8 @@ def git_gate_render_entrypoint(upstreams: tuple[GitGateUpstream, ...]) -> str:
upstream, then `exec git daemon`. The function reads
`/git-gate/creds/<name>-{key,known_hosts}` (laid down by
`DockerGitGate.start` via docker cp) and wires them into each
bare repo's config so the shared pre-receive hook can pick them
up at push time."""
bare repo's config; the access-hook + pre-receive hook pick those
paths up at fetch / push time."""
lines = [
"#!/bin/sh",
"set -eu",
@@ -121,8 +133,13 @@ def git_gate_render_entrypoint(upstreams: tuple[GitGateUpstream, ...]) -> str:
" repo=/git/${name}.git",
" if [ ! -d \"$repo\" ]; then",
" git init --bare \"$repo\" >/dev/null",
# --mirror=fetch sets remote.origin.fetch = +refs/*:refs/* so",
# a later `git fetch origin` mirrors the upstream's full ref",
# graph (heads, tags, notes) into the bare repo at canonical",
# paths. It does NOT set remote.origin.mirror=true, so an",
# explicit `git push origin <ref>:<ref>` still pushes one ref.",
" git -C \"$repo\" remote add --mirror=fetch origin \"$upstream_url\"",
" fi",
" git -C \"$repo\" config remote.upstream.url \"$upstream_url\"",
" git -C \"$repo\" config git-gate.identityFile \"$keyfile\"",
" git -C \"$repo\" config git-gate.knownHosts \"$hostsfile\"",
" git -C \"$repo\" config receive.denyCurrentBranch ignore",
@@ -143,6 +160,7 @@ def git_gate_render_entrypoint(upstreams: tuple[GitGateUpstream, ...]) -> str:
" --base-path=/git \\",
" --export-all \\",
" --enable=receive-pack \\",
" --access-hook=/etc/git-gate/access-hook \\",
" --verbose",
])
return "\n".join(lines) + "\n"
@@ -150,9 +168,9 @@ def git_gate_render_entrypoint(upstreams: tuple[GitGateUpstream, ...]) -> str:
def git_gate_render_hook() -> str:
"""The shared pre-receive hook: gitleaks-scan all incoming refs,
then forward each accepted ref to the real upstream using the
per-repo credential. Failure in either phase aborts the push so
the agent sees a real rejection. POSIX sh.
then forward each accepted ref to the real upstream (`origin`)
using the per-repo credential. Failure in either phase aborts
the push so the agent sees a real rejection. POSIX sh.
Two phases (scan all, then push all) keeps a hit on ref N from
half-pushing refs 1..N-1; both phases re-read stdin from a temp
@@ -183,7 +201,8 @@ while IFS=' ' read -r old new ref; do
fi
done < "$refs_file"
# Phase 2: forward each ref to the upstream.
# Phase 2: forward each ref to the upstream (`origin`, configured
# in the entrypoint via `git remote add --mirror=fetch`).
keyfile=$(git config --get git-gate.identityFile)
hostsfile=$(git config --get git-gate.knownHosts)
if [ ! -f "$hostsfile" ]; then
@@ -200,8 +219,8 @@ while IFS=' ' read -r old new ref; do
else
refspec="$new:$ref"
fi
echo "git-gate: forwarding $ref to upstream" >&2
if ! GIT_SSH_COMMAND="$ssh_cmd" git push upstream "$refspec" 1>&2; then
echo "git-gate: forwarding $ref to origin" >&2
if ! GIT_SSH_COMMAND="$ssh_cmd" git push origin "$refspec" 1>&2; then
echo "git-gate: upstream push failed for $ref" >&2
exit 1
fi
@@ -211,6 +230,52 @@ exit 0
"""
def git_gate_render_access_hook() -> str:
"""`git daemon --access-hook` script. Runs before each protocol
service; for `upload-pack` (fetch / clone / ls-remote / pull) it
refreshes the bare repo from upstream first, so the response
reflects upstream's current state. For other services (notably
`receive-pack`) it returns 0 immediately and lets the existing
pre-receive hook gate the operation. POSIX sh.
The hook receives:
$1 service name (`upload-pack`, `receive-pack`, ...)
$2 absolute path to the resolved repo
$3 client hostname (unused)
$4 client tcp address (unused)
Fail-closed on upstream errors: the agent's fetch fails too,
so it never silently sees stale data — matches the PRD's
'equivalent to operations against the upstream' contract."""
return r"""#!/bin/sh
# git-gate access-hook (PRD 0008). $1=service $2=repo $3=host $4=peer
set -u
service=$1
repo_dir=$2
# Push path keeps its own gating in pre-receive (gitleaks +
# forward). Only refresh-from-upstream on fetch operations.
if [ "$service" != "upload-pack" ]; then
exit 0
fi
keyfile=$(git -C "$repo_dir" config --get git-gate.identityFile 2>/dev/null || true)
hostsfile=$(git -C "$repo_dir" config --get git-gate.knownHosts 2>/dev/null || true)
if [ -z "$keyfile" ] || [ ! -f "$hostsfile" ]; then
echo "git-gate: missing credentials for $repo_dir; refusing fetch" >&2
exit 1
fi
ssh_cmd="ssh -i $keyfile -o UserKnownHostsFile=$hostsfile -o StrictHostKeyChecking=yes -o IdentitiesOnly=yes"
echo "git-gate: refreshing $repo_dir from upstream" >&2
if ! GIT_SSH_COMMAND="$ssh_cmd" git -C "$repo_dir" fetch origin --prune >&2; then
echo "git-gate: upstream fetch failed for $repo_dir; refusing to serve stale data" >&2
exit 1
fi
exit 0
"""
class GitGate(ABC):
"""The per-agent git-gate. Encapsulates the host-side prepare
(upstream lift + entrypoint/hook render); the sidecar's
@@ -219,8 +284,8 @@ class GitGate(ABC):
def prepare(self, bottle: Bottle, slug: str, stage_dir: Path) -> GitGatePlan:
"""Compute the upstream table from `bottle.git` and write the
entrypoint + pre-receive scripts (mode 600) under `stage_dir`.
Pure host-side, no docker subprocess.
entrypoint, pre-receive hook, and access-hook scripts (mode
600) under `stage_dir`. Pure host-side, no docker subprocess.
Returned plan is incomplete: the launch step must fill
`internal_network` / `egress_network` via `dataclasses.replace`
@@ -232,10 +297,17 @@ class GitGate(ABC):
hook = stage_dir / "git_gate_pre_receive.sh"
hook.write_text(git_gate_render_hook())
hook.chmod(0o600)
access_hook = stage_dir / "git_gate_access_hook.sh"
access_hook.write_text(git_gate_render_access_hook())
# 0o700 (not 0o600): git daemon execs --access-hook directly,
# not via `sh`, so the script needs the x bit. docker cp
# preserves source mode into the container.
access_hook.chmod(0o700)
return GitGatePlan(
slug=slug,
entrypoint_script=entrypoint,
hook_script=hook,
access_hook_script=access_hook,
upstreams=upstreams,
)