Files
bot-bottle/bot_bottle/git_gate.py
T
didericis-claude 0b5d59cf9e feat(prd-0048): implement SSH deploy-key provisioning with contrib/gitea
- manifest_git.py: add ProvisionedKeyConfig dataclass; extend GitEntry
  with ProvisionedKey field (optional); make IdentityFile default to ""
  so provisioned_key entries can be constructed without a static path;
  add _parse_provisioned_key_config; update from_repos_entry to accept
  provisioned_key as an alternative to identity (mutually exclusive,
  parser rejects both-or-neither)

- deploy_key_provisioner.py (new): DeployKeyProvisioner ABC with create()
  and delete() abstract methods; get_provisioner() factory with lazy
  contrib import for gitea

- contrib/gitea/deploy_key_provisioner.py (new): GiteaDeployKeyProvisioner
  generating ed25519 keypairs via ssh-keygen and managing them through
  the Gitea deploy-key API (POST/DELETE); 404 on delete is success;
  all other errors raise RuntimeError

- git_gate.py: add _provision_dynamic_key() called in GitGate.prepare()
  for entries with ProvisionedKey — generates key, writes private key
  and key ID files to stage_dir, patches GitGateUpstream.identity_file;
  add revoke_git_gate_provisioned_keys() for teardown — raises on failure

- docker/launch.py: call revoke_git_gate_provisioned_keys() in teardown()
  after stack.close() so revocation runs after containers stop and
  failures propagate (not suppressed)

- smolmachines/launch.py: extract _teardown_smolmachines() helper that
  catches stack.close() errors (warn + re-raise) then calls revocation;
  same fatal-on-failure contract as docker backend

- test_manifest_git.py: 9 new cases for provisioned_key parsing
- test_deploy_key_provisioner.py (new): factory smoke tests
- test_contrib_gitea_deploy_key.py (new): create/delete/error/split tests

Closes #169
2026-06-03 11:58:36 -04:00

504 lines
20 KiB
Python

"""Per-agent git-gate (PRD 0008).
A third per-agent sidecar that fronts the bottle's declared git
upstreams as a transparent mirror. Each `bottle.git` entry maps to
a bare repo on the gate; `git daemon` serves the bare repos over
`git://<gate>/<name>.git`. Two hooks make the mirror bidirectional:
- **`pre-receive`** (push path) — gitleaks-scans incoming refs and,
on clean, forwards them to the real upstream with the
gate-resident credential.
- **`--access-hook`** (fetch path) — runs `git fetch origin --prune`
against the real upstream before every `upload-pack`, so an
agent fetch returns whatever the upstream has *now*. Fail-closed
if the upstream is unreachable.
The agent never sees the upstream credential under either path.
Why a third sidecar (not folded into pipelock or ssh-gate): the
gate is the only one of the three that holds upstream push
credentials. Mixing it with pipelock would put push creds in the
same blast radius as internet-facing TLS interception; mixing it
with ssh-gate would force ssh-gate above L4 and into git-protocol
land. See `docs/prds/0008-git-gate.md`.
This module defines the abstract gate (`GitGate`) and its plan
dataclass (`GitGatePlan`). The sidecar's start/stop lifecycle is
backend-specific and lives on concrete subclasses (see
`bot_bottle/backend/docker/git_gate.py`)."""
from __future__ import annotations
import dataclasses
import os
import shlex
from abc import ABC, abstractmethod
from dataclasses import dataclass
from pathlib import Path
from .log import info
from .manifest import Bottle, GitEntry
# Short network alias for git-gate inside the sidecar bundle. The
# agent's `.gitconfig` insteadOf rewrites resolve through this name.
GIT_GATE_HOSTNAME = "git-gate"
# Bound half-open git client sessions. If an agent/tool runner is
# interrupted during push, git daemon should reap the receive-pack
# child instead of keeping the gate wedged indefinitely.
GIT_GATE_DAEMON_TIMEOUT_SECS = 15
@dataclass(frozen=True)
class GitGateUpstream:
"""One bare repo on the gate. `name` drives the bare-repo path
(`/git/<name>.git`), the agent's URL after insteadOf rewrite
(`git://<gate>/<name>.git`), and the per-upstream credential
paths inside the gate (`/git-gate/creds/<name>-key` and
`/git-gate/creds/<name>-known_hosts`).
`identity_file` is the host-side absolute path the gate's start
step will docker-cp into the container. `known_host_key` is the
KnownHostKey string from the manifest; the gate's start step
materialises it into a known_hosts file if non-empty.
the gate credential paths inside the running sidecar."""
name: str
upstream_url: str
upstream_host: str
upstream_port: str
identity_file: str
known_host_key: str
known_hosts_file: Path = Path()
@dataclass(frozen=True)
class GitGatePlan:
"""Output of GitGate.prepare; consumed by .start.
The script + slug + upstream fields are filled at prepare time
(host-side, side-effect-free on docker). The network fields are
populated by the backend's launch step via `dataclasses.replace`
once those networks exist. Empty defaults are sentinels meaning
"not yet set"; `.start` validates that they are populated.
`hook_script` is the shared `pre-receive` for push-time gating;
`access_hook_script` is `git daemon`'s `--access-hook` for the
fetch-time upstream refresh."""
slug: str
entrypoint_script: Path
hook_script: Path
access_hook_script: Path
upstreams: tuple[GitGateUpstream, ...]
internal_network: str = ""
egress_network: str = ""
def git_gate_upstreams_for_bottle(bottle: Bottle) -> tuple[GitGateUpstream, ...]:
"""Lift each `bottle.git` entry into a GitGateUpstream. Unique-Name
validation already ran in `manifest.Bottle.from_dict`."""
return tuple(
GitGateUpstream(
name=e.Name,
upstream_url=e.Upstream,
upstream_host=e.UpstreamHost,
upstream_port=e.UpstreamPort,
identity_file=e.IdentityFile,
known_host_key=e.KnownHostKey,
)
for e in bottle.git
)
def git_gate_render_gitconfig(
entries: tuple[GitEntry, ...], gate_host: str, *, scheme: str = "git",
) -> str:
"""Render the agent's ~/.gitconfig content for git-gate
`insteadOf` rewrites. Pure host-side, no docker / smolvm;
exposed for tests + reuse across backends.
`gate_host` is the part of the URL between `<scheme>://` and the
repo path — backends differ here:
- docker: `git-gate` (the short network alias)
- smolmachines: `<bundle_ip>:<port>` (no DNS in the
TSI-allowlisted guest)
Empty `entries` returns an empty string so callers can no-op
cleanly without conditional formatting at the call site."""
if not entries:
return ""
out = [
"# bot-bottle git-gate (PRD 0008): every git operation against\n",
"# a declared upstream routes through the gate, which mirrors\n",
"# the upstream bidirectionally (gitleaks-scanned push;\n",
"# fetch-from-upstream-before-every-upload-pack via access-hook).\n",
]
for entry in entries:
out.append(f'[url "{scheme}://{gate_host}/{entry.Name}.git"]\n')
out.append(f"\tinsteadOf = {entry.Upstream}\n")
if entry.RemoteKey and entry.RemoteKey != entry.UpstreamHost:
port = (
f":{entry.UpstreamPort}"
if entry.UpstreamPort and entry.UpstreamPort != "22"
else ""
)
alias = (
f"ssh://{entry.UpstreamUser}@{entry.RemoteKey}{port}/"
f"{entry.UpstreamPath}"
)
out.append(f"\tinsteadOf = {alias}\n")
return "".join(out)
def git_gate_known_hosts_line(host: str, port: str, key: str) -> str:
"""Format `host[:port] key` for OpenSSH's known_hosts. Non-default
ports use the bracketed `[host]:port` form (the form OpenSSH writes
on disk for hosts reached via a non-22 port)."""
if port and port != "22":
target = f"[{host}]:{port}"
else:
target = host
return f"{target} {key}\n"
def git_gate_render_entrypoint(upstreams: tuple[GitGateUpstream, ...]) -> str:
"""Posix-sh entrypoint. One `init_repo` call per upstream, then
`exec git daemon`. The function reads
`/git-gate/creds/<name>-{key,known_hosts}` (bind-mounted into
the bundle by the renderer) and wires them into each bare repo's
config; the access-hook + pre-receive hook pick those paths up
at fetch / push time."""
lines = [
"#!/bin/sh",
"set -eu",
"",
"init_repo() {",
" name=$1",
" upstream_url=$2",
" keyfile=/git-gate/creds/${name}-key",
" hostsfile=/git-gate/creds/${name}-known_hosts",
"",
# `|| true`: PRD 0018 chunk 3+ bind-mounts these RO from the
# host, so chmod-syscalls fail with EROFS. The files already
# have the right perms on the host (SSH requires 0600 to load
# the key in the first place), so the chmod is best-effort
# cleanup for the legacy docker-cp path where the file
# landed at the host's umask perms.
" chmod 600 \"$keyfile\" 2>/dev/null || true",
" if [ -f \"$hostsfile\" ]; then",
" chmod 600 \"$hostsfile\" 2>/dev/null || true",
" fi",
"",
" repo=/git/${name}.git",
" if [ ! -d \"$repo\" ]; then",
" git init --bare \"$repo\" >/dev/null",
# --mirror=fetch sets remote.origin.fetch = +refs/*:refs/* so",
# a later `git fetch origin` mirrors the upstream's full ref",
# graph (heads, tags, notes) into the bare repo at canonical",
# paths. It does NOT set remote.origin.mirror=true, so an",
# explicit `git push origin <ref>:<ref>` still pushes one ref.",
" git -C \"$repo\" remote add --mirror=fetch origin \"$upstream_url\"",
" fi",
" git -C \"$repo\" config git-gate.identityFile \"$keyfile\"",
" git -C \"$repo\" config git-gate.knownHosts \"$hostsfile\"",
" git -C \"$repo\" config receive.denyCurrentBranch ignore",
" git -C \"$repo\" config http.receivepack true",
" install -m 755 /etc/git-gate/pre-receive \"$repo/hooks/pre-receive\"",
"}",
"",
"mkdir -p /git",
]
for u in upstreams:
lines.append(f"init_repo {shlex.quote(u.name)} {shlex.quote(u.upstream_url)}")
lines.extend([
"",
"exec git daemon \\",
" --reuseaddr \\",
f" --timeout={GIT_GATE_DAEMON_TIMEOUT_SECS} \\",
f" --init-timeout={GIT_GATE_DAEMON_TIMEOUT_SECS} \\",
" --base-path=/git \\",
" --export-all \\",
" --enable=receive-pack \\",
" --access-hook=/etc/git-gate/access-hook \\",
" --verbose",
])
return "\n".join(lines) + "\n"
def git_gate_render_hook() -> str:
"""The shared pre-receive hook: gitleaks-scan all incoming refs,
then forward each accepted ref to the real upstream (`origin`)
using the per-repo credential. Failure in either phase aborts
the push so the agent sees a real rejection. POSIX sh.
Two phases (scan all, then push all) keeps a hit on ref N from
half-pushing refs 1..N-1; both phases re-read stdin from a temp
file because pre-receive's stdin is a one-shot stream."""
return r"""#!/bin/sh
# git-gate pre-receive (PRD 0008). Stdin: <old> <new> <ref> per line.
set -u
refs_file=$(mktemp)
trap 'rm -f "$refs_file"' EXIT
cat > "$refs_file"
zero=0000000000000000000000000000000000000000
# Phase 1: gitleaks scan each ref's incoming commits.
while IFS=' ' read -r old new ref; do
[ -z "$ref" ] && continue
[ "$new" = "$zero" ] && continue
if [ "$old" = "$zero" ]; then
# New ref: scan only the commits this push introduces — those
# reachable from $new but not from any ref the gate already has.
# Everything already on the gate arrived via upstream mirror-fetch
# or a previously gitleaks-scanned push, so it's already-upstream
# or already-scanned; re-scanning it (the old `$new` full-ancestry
# range) only resurfaces historical findings and blocks every new
# branch. See PRD 0028 / issue #106.
log_opts="$new --not --all"
else
log_opts="$old..$new"
fi
echo "git-gate: gitleaks scanning $ref ($log_opts)" >&2
if ! gitleaks git --log-opts="$log_opts" --no-banner --redact 1>&2; then
echo "git-gate: gitleaks rejected push to $ref" >&2
exit 1
fi
done < "$refs_file"
# Phase 2: forward each ref to the upstream (`origin`, configured
# in the entrypoint via `git remote add --mirror=fetch`).
keyfile=$(git config --get git-gate.identityFile)
hostsfile=$(git config --get git-gate.knownHosts)
if [ ! -f "$hostsfile" ]; then
echo "git-gate: no KnownHostKey configured for this upstream; refusing to push" >&2
echo "git-gate: add KnownHostKey to the bottle.git entry and restart the bottle" >&2
exit 1
fi
ssh_cmd="ssh -i $keyfile -o UserKnownHostsFile=$hostsfile -o StrictHostKeyChecking=yes -o IdentitiesOnly=yes -o BatchMode=yes -o ConnectTimeout=10"
while IFS=' ' read -r old new ref; do
[ -z "$ref" ] && continue
if [ "$new" = "$zero" ]; then
refspec=":$ref"
else
refspec="$new:$ref"
fi
echo "git-gate: forwarding $ref to origin" >&2
if ! GIT_SSH_COMMAND="$ssh_cmd" git push origin "$refspec" 1>&2; then
echo "git-gate: upstream push failed for $ref" >&2
exit 1
fi
done < "$refs_file"
exit 0
"""
def git_gate_render_access_hook() -> str:
"""`git daemon --access-hook` script. Runs before each protocol
service; for `upload-pack` (fetch / clone / ls-remote / pull) it
refreshes the bare repo from upstream first, so the response
reflects upstream's current state. For other services (notably
`receive-pack`) it returns 0 immediately and lets the existing
pre-receive hook gate the operation. POSIX sh.
The hook receives:
$1 service name (`upload-pack`, `receive-pack`, ...)
$2 absolute path to the resolved repo
$3 client hostname (unused)
$4 client tcp address (unused)
Fail-closed on upstream errors: the agent's fetch fails too,
so it never silently sees stale data — matches the PRD's
'equivalent to operations against the upstream' contract."""
return r"""#!/bin/sh
# git-gate access-hook (PRD 0008). $1=service $2=repo $3=host $4=peer
set -u
service=$1
repo_dir=$2
# Push path keeps its own gating in pre-receive (gitleaks +
# forward). Only refresh-from-upstream on fetch operations.
if [ "$service" != "upload-pack" ]; then
exit 0
fi
keyfile=$(git -C "$repo_dir" config --get git-gate.identityFile 2>/dev/null || true)
hostsfile=$(git -C "$repo_dir" config --get git-gate.knownHosts 2>/dev/null || true)
if [ -z "$keyfile" ] || [ ! -f "$hostsfile" ]; then
echo "git-gate: missing credentials for $repo_dir; refusing fetch" >&2
exit 1
fi
ssh_cmd="ssh -i $keyfile -o UserKnownHostsFile=$hostsfile -o StrictHostKeyChecking=yes -o IdentitiesOnly=yes -o BatchMode=yes -o ConnectTimeout=10"
echo "git-gate: refreshing $repo_dir from upstream" >&2
if ! GIT_SSH_COMMAND="$ssh_cmd" git -C "$repo_dir" fetch origin --prune >&2; then
echo "git-gate: upstream fetch failed for $repo_dir; refusing to serve stale data" >&2
exit 1
fi
# Sync the bare repo's HEAD to upstream's HEAD on the first fetch
# (when it still points at the `git init --bare` default of
# refs/heads/master and upstream uses something else, the cloned
# checkout would fail with "remote HEAD refers to nonexistent ref").
# Costs one extra ls-remote on first fetch only; subsequent fetches
# skip the branch. If upstream's default branch changes after the
# gate has cached it, restart the bottle to resync.
if ! git -C "$repo_dir" rev-parse --verify HEAD >/dev/null 2>&1; then
upstream_head=$(GIT_SSH_COMMAND="$ssh_cmd" git -C "$repo_dir" \
ls-remote --symref origin HEAD 2>/dev/null \
| awk '/^ref:/ {print $2; exit}')
if [ -n "$upstream_head" ]; then
git -C "$repo_dir" symbolic-ref HEAD "$upstream_head" || true
fi
fi
exit 0
"""
def _provision_dynamic_key(
entry: GitEntry,
slug: str,
stage_dir: Path,
) -> str:
"""Generate a fresh ed25519 keypair, register the public half with
the forge, and persist the private key + key ID under `stage_dir`.
Returns the host-side path to the private key file so the caller
can inject it into the GitGateUpstream as `identity_file`."""
from .deploy_key_provisioner import get_provisioner
pk = entry.ProvisionedKey
assert pk is not None
token = os.environ.get(pk.token_env)
if token is None:
raise RuntimeError(
f"git-gate.repos[{entry.Name!r}] provisioned_key.token_env"
f" = {pk.token_env!r}: env var is not set"
)
api_url = pk.api_url or f"https://{entry.UpstreamHost}"
provisioner = get_provisioner(pk.provider, token, api_url)
owner_repo = entry.UpstreamPath
if owner_repo.endswith(".git"):
owner_repo = owner_repo[:-4]
title = f"bot-bottle:{slug}:{entry.Name}"
info(f"provisioning deploy key for git-gate.repos[{entry.Name!r}]")
key_id, private_key_bytes = provisioner.create(owner_repo, title)
key_file = stage_dir / f"{entry.Name}-key"
key_file.write_bytes(private_key_bytes)
key_file.chmod(0o600)
id_file = stage_dir / f"{entry.Name}-deploy-key-id"
id_file.write_text(key_id)
id_file.chmod(0o600)
info(f"provisioned deploy key {key_id} for git-gate.repos[{entry.Name!r}]")
return str(key_file)
def revoke_git_gate_provisioned_keys(bottle: Bottle, stage_dir: Path) -> None:
"""Revoke all deploy keys provisioned for `bottle` during prepare.
Called at teardown after containers stop. Raises if any revocation
fails — a stranded key is a security concern that the operator must
address manually."""
from .deploy_key_provisioner import get_provisioner
for entry in bottle.git:
if entry.ProvisionedKey is None:
continue
pk = entry.ProvisionedKey
id_file = stage_dir / f"{entry.Name}-deploy-key-id"
if not id_file.exists():
continue
key_id = id_file.read_text().strip()
token = os.environ.get(pk.token_env)
if token is None:
raise RuntimeError(
f"git-gate.repos[{entry.Name!r}] provisioned_key.token_env"
f" = {pk.token_env!r}: env var is not set;"
f" cannot revoke deploy key {key_id}"
)
api_url = pk.api_url or f"https://{entry.UpstreamHost}"
provisioner = get_provisioner(pk.provider, token, api_url)
owner_repo = entry.UpstreamPath
if owner_repo.endswith(".git"):
owner_repo = owner_repo[:-4]
info(f"revoking deploy key {key_id} for git-gate.repos[{entry.Name!r}]")
provisioner.delete(owner_repo, key_id)
info(f"revoked deploy key {key_id} for git-gate.repos[{entry.Name!r}]")
class GitGate(ABC):
"""The per-agent git-gate. Encapsulates the host-side prepare
(upstream lift + entrypoint/hook render); the sidecar's
start/stop lifecycle is backend-specific and lives on concrete
subclasses."""
def prepare(self, bottle: Bottle, slug: str, stage_dir: Path) -> GitGatePlan:
"""Compute the upstream table from `bottle.git` and write the
entrypoint, pre-receive hook, and access-hook scripts (mode
600) under `stage_dir`. Pure host-side, no docker subprocess.
For `provisioned_key` entries, also generates and registers
a fresh deploy key via the forge API and writes the private key
+ key ID to `stage_dir`.
Returned plan is incomplete: the launch step must fill
`internal_network` / `egress_network` via `dataclasses.replace`
before passing the plan to `.start`."""
upstreams_list = list(git_gate_upstreams_for_bottle(bottle))
for i, entry in enumerate(bottle.git):
if entry.ProvisionedKey is not None:
key_file = _provision_dynamic_key(entry, slug, stage_dir)
upstreams_list[i] = dataclasses.replace(
upstreams_list[i], identity_file=key_file
)
upstreams = tuple(upstreams_list)
entrypoint = stage_dir / "git_gate_entrypoint.sh"
entrypoint.write_text(git_gate_render_entrypoint(upstreams))
entrypoint.chmod(0o600)
hook = stage_dir / "git_gate_pre_receive.sh"
hook.write_text(git_gate_render_hook())
hook.chmod(0o600)
access_hook = stage_dir / "git_gate_access_hook.sh"
access_hook.write_text(git_gate_render_access_hook())
# 0o700 (not 0o600): git daemon execs --access-hook directly,
# not via `sh`, so the script needs the x bit. docker cp
# preserves source mode into the container.
access_hook.chmod(0o700)
upstreams_with_files: list[GitGateUpstream] = []
for u in upstreams:
known_hosts_file = Path()
if u.known_host_key:
known_hosts_file = stage_dir / f"{u.name}-known_hosts"
known_hosts_file.write_text(
git_gate_known_hosts_line(
u.upstream_host, u.upstream_port, u.known_host_key,
)
)
known_hosts_file.chmod(0o600)
upstreams_with_files.append(
GitGateUpstream(
name=u.name,
upstream_url=u.upstream_url,
upstream_host=u.upstream_host,
upstream_port=u.upstream_port,
identity_file=u.identity_file,
known_host_key=u.known_host_key,
known_hosts_file=known_hosts_file,
)
)
return GitGatePlan(
slug=slug,
entrypoint_script=entrypoint,
hook_script=hook,
access_hook_script=access_hook,
upstreams=tuple(upstreams_with_files),
)