diff --git a/Dockerfile.git-gate b/Dockerfile.git-gate new file mode 100644 index 0000000..1132fe0 --- /dev/null +++ b/Dockerfile.git-gate @@ -0,0 +1,37 @@ +# Per-agent git-gate sidecar image (PRD 0008). +# +# Runs `git daemon --enable=receive-pack` so the agent in the bottle +# can push to it over git://. A shared pre-receive hook runs gitleaks +# against each incoming ref; on clean, it forwards the ref to the real +# upstream using a credential the gate holds. The agent never sees the +# upstream credential. +# +# The agent-facing leg sits on a Docker --internal network with no +# default route, so the image is fully self-contained: no apk pulls at +# boot, no remote registry lookups during the entrypoint. + +# Base on the upstream gitleaks image (alpine + gitleaks v8.x); +# alpine doesn't package gitleaks so this avoids a separate +# install path. Pinned by digest for reproducibility. +FROM zricethezav/gitleaks@sha256:c00b6bd0aeb3071cbcb79009cb16a60dd9e0a7c60e2be9ab65d25e6bc8abbb7f + +# openssh-client supplies the upstream SSH transport the pre-receive +# hook uses to forward accepted refs. git-daemon is the listener the +# agent pushes to (alpine ships `git-daemon` as a sub-package, not +# part of `git`). The `git` core binary is already in the base image. +RUN apk add --no-cache openssh-client git-daemon + +# Layout the gate uses at runtime: +# /git-gate-entrypoint.sh — docker-cp'd at start time +# /etc/git-gate/pre-receive — shared hook, docker-cp'd at start +# /git-gate/creds/-key — per-upstream identity, docker-cp'd +# /git-gate/creds/-known_hosts — per-upstream known_hosts, docker-cp'd +# /git/.git — bare repos, created by the entrypoint +# +# The intermediate directories must exist before `docker cp` runs (cp +# does not create them); the bare-repo parent (/git) is also pre-created +# defensively. +RUN mkdir -p /etc/git-gate /git-gate/creds /git + +# Base image's ENTRYPOINT is the gitleaks binary; override explicitly. +ENTRYPOINT ["/bin/sh", "/git-gate-entrypoint.sh"] diff --git a/README.md b/README.md index 4a08dfb..e877274 100644 --- a/README.md +++ b/README.md @@ -56,12 +56,14 @@ pieces of v1. ## Architecture -A bottle is three containers on a per-agent Docker `--internal` -network. The agent has no default route off-box; its only way out is -through the pipelock sidecar (for HTTP/HTTPS) or the ssh-gate sidecar -(for SSH). Both sidecars also sit on an egress network that does have -internet access, so the agent's traffic always passes through a -container that enforces the manifest before it leaves the host. +A bottle is the agent container plus up to three per-protocol egress +sidecars on a per-agent Docker `--internal` network. The agent has no +default route off-box; its only way out is through the pipelock +sidecar (for HTTP/HTTPS), the ssh-gate sidecar (for SSH), or the +git-gate sidecar (for git operations against declared upstreams). +Each sidecar also sits on an egress network that does have internet +access, so the agent's traffic always passes through a container +that enforces the manifest before it leaves the host. ``` host ( ./cli.py ) @@ -76,11 +78,17 @@ container that enforces the manifest before it leaves the host. │ │ built locally) │ │ (TLS bump, DLP,│ │ hosts │ │ │ │ allowlist) │ │ │ │ skills, env, │ └────────────────┘ │ - │ │ ~/.ssh/config │ │ - │ │ │ ssh ┌────────────────┐ │ TCP to + │ │ ~/.ssh/config, │ │ + │ │ ~/.gitconfig │ ssh ┌────────────────┐ │ TCP to │ │ │ ───────────────► │ socat/ssh image│──┼──► bottle.ssh │ │ │ │ (alpine/socat, │ │ upstreams │ │ │ │ L4 forwarder) │ │ + │ │ │ └────────────────┘ │ + │ │ │ │ + │ │ │ git ops ┌────────────────┐ │ SSH (push/ + │ │ │ ───────────────► │ git-gate image │──┼──► fetch) to + │ │ │ │ (gitleaks + │ │ bottle.git + │ │ │ │ git daemon) │ │ upstreams │ └──────────────────┘ └────────────────┘ │ │ │ │ agent on internal network (no default route); │ @@ -90,7 +98,8 @@ container that enforces the manifest before it leaves the host. - **agent image** — built from the repo `Dockerfile` (`node:22-slim` base) on first run; runs `claude` with the manifest-granted skills, - env vars, and `~/.ssh/config`. + env vars, `~/.ssh/config`, and `~/.gitconfig` (the latter for the + git-gate's `pushInsteadOf` rules when `bottle.git` is set). - **pipelock image** — per-agent sidecar. Terminates the agent's outbound HTTP/HTTPS, enforces the resolved allowlist, runs DLP scanning. Design in `docs/prds/0001-per-agent-egress-proxy-via-pipelock.md` @@ -99,9 +108,22 @@ container that enforces the manifest before it leaves the host. One container, one socat listener per `bottle.ssh` entry, each forwarding TCP to the upstream `Hostname:Port`. SSH does *not* go through pipelock. Design in `docs/prds/0007-ssh-egress-gate.md`. +- **git-gate image** — per-agent sidecar built on `zricethezav/gitleaks` + (alpine + gitleaks + git-daemon + openssh-client). Runs + `git daemon` over `git://` as a bidirectional mirror of each + declared upstream. A pre-receive hook gitleaks-scans incoming + refs and forwards clean refs to the real upstream over SSH; an + access-hook runs `git fetch origin --prune` against the upstream + before every upload-pack so an agent fetch returns whatever the + upstream has *now* (fail-closed if unreachable). The agent's + `~/.gitconfig` rewrites the real URL to the gate via `insteadOf`, + so push, fetch, clone, and pull all route through. The agent + never sees the upstream credential. Brought up only when + `bottle.git` has entries. Design in `docs/prds/0008-git-gate.md`. -When the agent exits, `cli.py` tears down both sidecars and the two -networks; nothing about a bottle persists between runs. +When the agent exits, `cli.py` tears down every sidecar that was +brought up and the two networks; nothing about a bottle persists +between runs. ## Quickstart diff --git a/claude_bottle/backend/__init__.py b/claude_bottle/backend/__init__.py index 8e0dc63..1436bf0 100644 --- a/claude_bottle/backend/__init__.py +++ b/claude_bottle/backend/__init__.py @@ -37,7 +37,7 @@ from pathlib import Path from typing import Any, Generic, Sequence, TypeVar from ..log import die -from ..manifest import Manifest, SshEntry +from ..manifest import GitEntry, Manifest, SshEntry from ..util import expand_tilde from .util import host_skill_dir @@ -171,6 +171,7 @@ class BottleBackend(ABC, Generic[PlanT, CleanupT]): bottle = manifest.bottle_for(spec.agent_name) self._validate_skills(agent.skills) self._validate_ssh_entries(bottle.ssh) + self._validate_git_entries(bottle.git) def _validate_skills(self, skills: Sequence[str]) -> None: """Each named skill must be a directory under the host's @@ -193,6 +194,16 @@ class BottleBackend(ABC, Generic[PlanT, CleanupT]): if not os.path.isfile(key): die(f"ssh key file not found for host '{entry.Host}': {key}") + def _validate_git_entries(self, entries: Sequence[GitEntry]) -> None: + """Each entry's IdentityFile must exist on the host (after + expanding leading ~) — the git-gate copies it in at start time + to authenticate the upstream push (PRD 0008). Shape is already + enforced by Manifest validation; this only checks presence.""" + for entry in entries: + key = expand_tilde(entry.IdentityFile) + if not os.path.isfile(key): + die(f"git upstream key file not found for '{entry.Name}': {key}") + @abstractmethod def _resolve_plan(self, spec: BottleSpec, *, stage_dir: Path) -> PlanT: """Backend-specific plan resolution: image/container names, diff --git a/claude_bottle/backend/docker/backend.py b/claude_bottle/backend/docker/backend.py index 79a4eb9..dbc0ea5 100644 --- a/claude_bottle/backend/docker/backend.py +++ b/claude_bottle/backend/docker/backend.py @@ -23,6 +23,7 @@ from . import prepare as _prepare from .bottle import DockerBottle from .bottle_cleanup_plan import DockerBottleCleanupPlan from .bottle_plan import DockerBottlePlan +from .git_gate import DockerGitGate from .pipelock import DockerPipelockProxy from .provision import ca as _ca from .provision import git as _git @@ -41,16 +42,25 @@ class DockerBottleBackend(BottleBackend["DockerBottlePlan", "DockerBottleCleanup def __init__(self) -> None: self._proxy = DockerPipelockProxy() self._gate = DockerSSHGate() + self._git_gate = DockerGitGate() def _resolve_plan(self, spec: BottleSpec, *, stage_dir: Path) -> DockerBottlePlan: return _prepare.resolve_plan( - spec, stage_dir=stage_dir, proxy=self._proxy, gate=self._gate + spec, + stage_dir=stage_dir, + proxy=self._proxy, + gate=self._gate, + git_gate=self._git_gate, ) @contextmanager def launch(self, plan: DockerBottlePlan) -> Generator[DockerBottle, None, None]: with _launch.launch( - plan, proxy=self._proxy, gate=self._gate, provision=self.provision + plan, + proxy=self._proxy, + gate=self._gate, + git_gate=self._git_gate, + provision=self.provision, ) as bottle: yield bottle diff --git a/claude_bottle/backend/docker/bottle_plan.py b/claude_bottle/backend/docker/bottle_plan.py index f61e9d2..d031b23 100644 --- a/claude_bottle/backend/docker/bottle_plan.py +++ b/claude_bottle/backend/docker/bottle_plan.py @@ -11,6 +11,7 @@ import sys from dataclasses import dataclass, field from pathlib import Path +from ...git_gate import GitGatePlan from ...log import info from ...manifest import Agent, Bottle from ...pipelock import PipelockProxyPlan, pipelock_effective_allowlist @@ -27,6 +28,7 @@ class _PlanView: bottle: Bottle env_names: list[str] ssh_hosts: list[str] + git_names: list[str] prompt_first_line: str @@ -51,6 +53,7 @@ class DockerBottlePlan(BottlePlan): prompt_file: Path proxy_plan: PipelockProxyPlan gate_plan: SSHGatePlan + git_gate_plan: GitGatePlan allowlist_summary: str use_runsc: bool @@ -67,6 +70,7 @@ class DockerBottlePlan(BottlePlan): bottle=bottle, env_names=env_names, ssh_hosts=[e.Host for e in bottle.ssh], + git_names=[e.Name for e in bottle.git], prompt_first_line=agent.prompt.splitlines()[0] if agent.prompt else "", ) @@ -100,6 +104,16 @@ class DockerBottlePlan(BottlePlan): info(f" ssh gate : {'; '.join(gate_lines)}") else: info(" ssh hosts : (none)") + if v.git_names: + info(f" git remotes : {', '.join(v.git_names)}") + git_lines = [ + f"{u.name} -> {u.upstream_host}:{u.upstream_port} " + f"(gitleaks-scanned)" + for u in self.git_gate_plan.upstreams + ] + info(f" git gate : {'; '.join(git_lines)}") + else: + info(" git remotes : (none)") info(f" egress : {self.allowlist_summary}") info(" tls intercept : pipelock (per-bottle ephemeral CA, generated at launch)") info( @@ -131,6 +145,16 @@ class DockerBottlePlan(BottlePlan): } for u in self.gate_plan.upstreams ], + "git_remotes": v.git_names, + "git_gate": [ + { + "name": u.name, + "upstream": f"{u.upstream_host}:{u.upstream_port}", + "upstream_url": u.upstream_url, + "known_host_key_pinned": bool(u.known_host_key), + } + for u in self.git_gate_plan.upstreams + ], "egress": { "host_count": len(hosts), "hosts": hosts, diff --git a/claude_bottle/backend/docker/git_gate.py b/claude_bottle/backend/docker/git_gate.py new file mode 100644 index 0000000..e3c11a3 --- /dev/null +++ b/claude_bottle/backend/docker/git_gate.py @@ -0,0 +1,213 @@ +"""DockerGitGate — the Docker-specific lifecycle for the per-agent +git-gate sidecar (PRD 0008). Inherits the platform-agnostic prepare +step (upstream lift + entrypoint/hook render) from `GitGate`.""" + +from __future__ import annotations + +import os +import subprocess +from pathlib import Path + +from ...git_gate import GitGate, GitGatePlan, git_gate_known_hosts_line +from ...log import die, info, warn +from ...util import expand_tilde +from . import util as docker_mod + + +GIT_GATE_IMAGE = os.environ.get( + "CLAUDE_BOTTLE_GIT_GATE_IMAGE", + "claude-bottle-git-gate:latest", +) + +GIT_GATE_DOCKERFILE = "Dockerfile.git-gate" + +GIT_GATE_ENTRYPOINT_IN_CONTAINER = "/git-gate-entrypoint.sh" +GIT_GATE_HOOK_IN_CONTAINER = "/etc/git-gate/pre-receive" +GIT_GATE_ACCESS_HOOK_IN_CONTAINER = "/etc/git-gate/access-hook" +GIT_GATE_CREDS_DIR_IN_CONTAINER = "/git-gate/creds" + +# git daemon's default listening port. Surfaced as a constant because +# integration tests probe the gate on it. +GIT_GATE_PORT = 9418 + +# Repo root, for `docker build` context. Resolved from this file's +# location: claude_bottle/backend/docker/git_gate.py → repo root. +_REPO_DIR = str(Path(__file__).resolve().parent.parent.parent.parent) + + +def git_gate_container_name(slug: str) -> str: + return f"claude-bottle-git-gate-{slug}" + + +def git_gate_host(slug: str) -> str: + """The hostname the agent's git client should connect to (same as + the container name — Docker's embedded DNS resolves it on the + `--internal` network).""" + return git_gate_container_name(slug) + + +def build_git_gate_image() -> None: + """Build the git-gate image from `Dockerfile.git-gate`. Called by + `DockerGitGate.start`; exposed at module level so integration + tests can build it without running the full launch pipeline.""" + docker_mod.build_image(GIT_GATE_IMAGE, _REPO_DIR, dockerfile=GIT_GATE_DOCKERFILE) + + +class DockerGitGate(GitGate): + """Brings the git-gate sidecar up and down via Docker.""" + + def start(self, plan: GitGatePlan) -> str: + """Boot the gate sidecar: + 1. Build the gate image (no-op when cache is hot). + 2. `docker create` on the internal network with the canonical + name; the image's ENTRYPOINT runs the cp'd entrypoint + script at start time. + 3. `docker cp` the entrypoint, the shared pre-receive hook, + and each upstream's identity + known_hosts into the + container. + 4. Attach to the per-agent egress network so the gate can + reach the real upstream. + 5. `docker start`. + Returns the container name (the target passed to `.stop`).""" + if not plan.upstreams: + die("DockerGitGate.start called with no upstreams; caller should skip") + if not plan.internal_network or not plan.egress_network: + die( + "DockerGitGate.start: internal_network / egress_network must be " + "populated on the plan before start" + ) + if not plan.entrypoint_script.is_file(): + die( + f"git-gate entrypoint missing at {plan.entrypoint_script}; " + f"GitGate.prepare must run first" + ) + if not plan.hook_script.is_file(): + die( + f"git-gate hook missing at {plan.hook_script}; " + f"GitGate.prepare must run first" + ) + if not plan.access_hook_script.is_file(): + die( + f"git-gate access-hook missing at {plan.access_hook_script}; " + f"GitGate.prepare must run first" + ) + + build_git_gate_image() + + name = git_gate_container_name(plan.slug) + info(f"starting git-gate sidecar {name} on network {plan.internal_network}") + + create_args = [ + "docker", "create", + "--name", name, + "--network", plan.internal_network, + GIT_GATE_IMAGE, + ] + if subprocess.run( + create_args, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ).returncode != 0: + die(f"failed to create git-gate sidecar {name}") + + # Order matters: entrypoint + hook first so they're present + # when docker start fires. Per-upstream creds afterwards. + stage_dir = plan.entrypoint_script.parent + cps: list[tuple[str, str, str]] = [ + (str(plan.entrypoint_script), GIT_GATE_ENTRYPOINT_IN_CONTAINER, "entrypoint"), + (str(plan.hook_script), GIT_GATE_HOOK_IN_CONTAINER, "pre-receive hook"), + (str(plan.access_hook_script), GIT_GATE_ACCESS_HOOK_IN_CONTAINER, "access-hook"), + ] + for u in plan.upstreams: + keypath = expand_tilde(u.identity_file) + cps.append(( + keypath, + f"{GIT_GATE_CREDS_DIR_IN_CONTAINER}/{u.name}-key", + f"upstream key for '{u.name}'", + )) + if u.known_host_key: + hosts_path = stage_dir / f"git_gate_known_hosts_{u.name}" + hosts_path.write_text( + git_gate_known_hosts_line( + u.upstream_host, u.upstream_port, u.known_host_key + ) + ) + hosts_path.chmod(0o600) + cps.append(( + str(hosts_path), + f"{GIT_GATE_CREDS_DIR_IN_CONTAINER}/{u.name}-known_hosts", + f"upstream known_hosts for '{u.name}'", + )) + + for src, dst, label in cps: + cp_result = subprocess.run( + ["docker", "cp", src, f"{name}:{dst}"], + capture_output=True, + text=True, + check=False, + ) + if cp_result.returncode != 0: + subprocess.run( + ["docker", "rm", "-f", name], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ) + die( + f"failed to copy {label} into {name}: " + f"{cp_result.stderr.strip()}" + ) + + if subprocess.run( + ["docker", "network", "connect", plan.egress_network, name], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ).returncode != 0: + subprocess.run( + ["docker", "rm", "-f", name], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ) + die( + f"failed to attach git-gate sidecar {name} to egress network " + f"{plan.egress_network}" + ) + + if subprocess.run( + ["docker", "start", name], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ).returncode != 0: + subprocess.run( + ["docker", "rm", "-f", name], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ) + die(f"failed to start git-gate sidecar {name}") + + return name + + def stop(self, target: str) -> None: + """Idempotent: missing container is success. `target` is the + container name returned by `.start`.""" + if subprocess.run( + ["docker", "inspect", target], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ).returncode == 0: + if subprocess.run( + ["docker", "rm", "-f", target], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ).returncode != 0: + warn( + f"failed to remove git-gate sidecar {target}; " + f"clean up with 'docker rm -f {target}'" + ) diff --git a/claude_bottle/backend/docker/launch.py b/claude_bottle/backend/docker/launch.py index 7e5d10f..006a719 100644 --- a/claude_bottle/backend/docker/launch.py +++ b/claude_bottle/backend/docker/launch.py @@ -22,6 +22,7 @@ from . import network as network_mod from . import util as docker_mod from .bottle import DockerBottle from .bottle_plan import DockerBottlePlan +from .git_gate import DockerGitGate from .pipelock import DockerPipelockProxy, pipelock_proxy_url, pipelock_tls_init from .provision.ca import AGENT_CA_BUNDLE, AGENT_CA_PATH from .ssh_gate import DockerSSHGate @@ -37,6 +38,7 @@ def launch( *, proxy: DockerPipelockProxy, gate: DockerSSHGate, + git_gate: DockerGitGate, provision: Callable[[DockerBottlePlan, str], str | None], ) -> Generator[DockerBottle, None, None]: """Build, launch, and provision a Docker bottle. Teardown on exit. @@ -102,6 +104,21 @@ def launch( gate_name = gate.start(plan.gate_plan) stack.callback(gate.stop, gate_name) + # Git gate (PRD 0008). One sidecar per agent, only brought up + # when the bottle has git entries. Same internal + egress + # network attachment as the other sidecars; agent dials it as + # `git:///.git` via the pushInsteadOf + # rules provision_git writes into ~/.gitconfig. + if plan.git_gate_plan.upstreams: + git_gate_plan = dataclasses.replace( + plan.git_gate_plan, + internal_network=internal_network, + egress_network=egress_network, + ) + plan = dataclasses.replace(plan, git_gate_plan=git_gate_plan) + git_gate_name = git_gate.start(plan.git_gate_plan) + stack.callback(git_gate.stop, git_gate_name) + container = _run_agent_container(plan, internal_network) stack.callback(docker_mod.force_remove_container, container) diff --git a/claude_bottle/backend/docker/prepare.py b/claude_bottle/backend/docker/prepare.py index f420851..ce08cba 100644 --- a/claude_bottle/backend/docker/prepare.py +++ b/claude_bottle/backend/docker/prepare.py @@ -19,6 +19,7 @@ from ...log import die from .. import BottleSpec from . import util as docker_mod from .bottle_plan import DockerBottlePlan +from .git_gate import DockerGitGate from .pipelock import DockerPipelockProxy from .ssh_gate import DockerSSHGate @@ -29,6 +30,7 @@ def resolve_plan( stage_dir: Path, proxy: DockerPipelockProxy, gate: DockerSSHGate, + git_gate: DockerGitGate, ) -> DockerBottlePlan: """Resolve Docker-specific names and write scratch files. Trusts that the agent and its skills/SSH keys are present — validation @@ -81,6 +83,7 @@ def resolve_plan( proxy_plan = proxy.prepare(bottle, slug, stage_dir) gate_plan = gate.prepare(bottle, slug, stage_dir) + git_gate_plan = git_gate.prepare(bottle, slug, stage_dir) resolved = resolve_env(manifest, spec.agent_name) # Everything that should reach the bottle by-name (so its value # never lands on argv or in env_file) goes into one dict. The @@ -109,6 +112,7 @@ def resolve_plan( prompt_file=prompt_file, proxy_plan=proxy_plan, gate_plan=gate_plan, + git_gate_plan=git_gate_plan, allowlist_summary=allowlist_summary, use_runsc=use_runsc, ) diff --git a/claude_bottle/backend/docker/provision/git.py b/claude_bottle/backend/docker/provision/git.py index 3007fac..7dc91e0 100644 --- a/claude_bottle/backend/docker/provision/git.py +++ b/claude_bottle/backend/docker/provision/git.py @@ -1,19 +1,39 @@ -"""Copy the host cwd's .git directory into a running Docker bottle. +"""Git provisioning inside a running Docker bottle. -Only fires when `--cwd` was passed AND the host cwd actually has a -.git. The container-side path is fixed at /home/node/workspace/.git; -ownership is reset to node so the agent can run git commands.""" +Two concerns, both about git in the agent: + + 1. If --cwd was passed AND the host cwd has a .git, copy that .git + into /home/node/workspace/.git so the agent operates on the + user's repo. + 2. If the bottle declares `git` entries (PRD 0008), write a + ~/.gitconfig with insteadOf rules so every git operation + against a declared upstream (push, fetch, clone, pull, + ls-remote) transparently hits the per-agent git-gate. The + gate mirrors the upstream in both directions, so URL + rewriting is symmetric. +""" from __future__ import annotations +import os import subprocess from pathlib import Path from ....log import info +from ....manifest import GitEntry +from .. import util as docker_mod from ..bottle_plan import DockerBottlePlan +from ..git_gate import git_gate_host def provision_git(plan: DockerBottlePlan, target: str) -> None: + """Set up git inside the bottle. Runs both subcases; each no-ops + when its condition isn't met.""" + _provision_cwd_git(plan, target) + _provision_git_gate_config(plan, target) + + +def _provision_cwd_git(plan: DockerBottlePlan, target: str) -> None: """If --cwd was set and the host cwd has a .git directory, copy it into /home/node/workspace/.git and fix ownership. No-op otherwise.""" @@ -34,3 +54,49 @@ def provision_git(plan: DockerBottlePlan, target: str) -> None: stdout=subprocess.DEVNULL, check=True, ) + + +def render_git_gate_gitconfig(slug: str, entries: tuple[GitEntry, ...]) -> str: + """Render the ~/.gitconfig content for git-gate `insteadOf` + rewrites. Pure host-side, no docker; exposed for tests. + + Empty `entries` returns an empty string so callers can no-op + cleanly without conditional formatting at the call site.""" + if not entries: + return "" + gate = git_gate_host(slug) + out = [ + "# claude-bottle git-gate (PRD 0008): every git operation against\n", + "# a declared upstream routes through the gate, which mirrors\n", + "# the upstream bidirectionally (gitleaks-scanned push;\n", + "# fetch-from-upstream-before-every-upload-pack via access-hook).\n", + ] + for entry in entries: + out.append(f'[url "git://{gate}/{entry.Name}.git"]\n') + out.append(f"\tinsteadOf = {entry.Upstream}\n") + return "".join(out) + + +def _provision_git_gate_config(plan: DockerBottlePlan, target: str) -> None: + """Write ~/.gitconfig in the bottle with the git-gate + insteadOf rules. No-op when the bottle has no `git` entries.""" + bottle = plan.spec.manifest.bottle_for(plan.spec.agent_name) + if not bottle.git: + return + container = target + container_home = os.environ.get("CLAUDE_BOTTLE_CONTAINER_HOME", "/home/node") + container_gitconfig = f"{container_home}/.gitconfig" + + content = render_git_gate_gitconfig(plan.slug, bottle.git) + config_file = plan.stage_dir / "agent_gitconfig" + config_file.write_text(content) + config_file.chmod(0o600) + + info(f"writing {container_gitconfig} with {len(bottle.git)} insteadOf rule(s)") + subprocess.run( + ["docker", "cp", str(config_file), f"{container}:{container_gitconfig}"], + stdout=subprocess.DEVNULL, + check=True, + ) + docker_mod.docker_exec_root(container, ["chown", "node:node", container_gitconfig]) + docker_mod.docker_exec_root(container, ["chmod", "644", container_gitconfig]) diff --git a/claude_bottle/backend/docker/util.py b/claude_bottle/backend/docker/util.py index 87ada4e..5cb671b 100644 --- a/claude_bottle/backend/docker/util.py +++ b/claude_bottle/backend/docker/util.py @@ -100,12 +100,20 @@ def slugify(name: str) -> str: return slug -def build_image(ref: str, context: str) -> None: +def build_image(ref: str, context: str, *, dockerfile: str = "") -> None: """Invokes `docker build` every call. Layer cache makes no-change rebuilds cheap; running every time means Dockerfile edits land - without manual `docker rmi`.""" + without manual `docker rmi`. + + `dockerfile` is an optional path (relative to `context`, or + absolute) for callers that need to build from a non-default + Dockerfile in the same context — e.g. `Dockerfile.git-gate`.""" info(f"building image {ref} from {context} (layer cache keeps repeat builds fast)") - subprocess.run(["docker", "build", "-t", ref, context], check=True) + args = ["docker", "build", "-t", ref] + if dockerfile: + args.extend(["-f", dockerfile]) + args.append(context) + subprocess.run(args, check=True) _TRUST_DIALOG_NODE_SCRIPT = ( diff --git a/claude_bottle/git_gate.py b/claude_bottle/git_gate.py new file mode 100644 index 0000000..6827178 --- /dev/null +++ b/claude_bottle/git_gate.py @@ -0,0 +1,340 @@ +"""Per-agent git-gate (PRD 0008). + +A third per-agent sidecar that fronts the bottle's declared git +upstreams as a transparent mirror. Each `bottle.git` entry maps to +a bare repo on the gate; `git daemon` serves the bare repos over +`git:///.git`. Two hooks make the mirror bidirectional: + +- **`pre-receive`** (push path) — gitleaks-scans incoming refs and, + on clean, forwards them to the real upstream with the + gate-resident credential. +- **`--access-hook`** (fetch path) — runs `git fetch origin --prune` + against the real upstream before every `upload-pack`, so an + agent fetch returns whatever the upstream has *now*. Fail-closed + if the upstream is unreachable. + +The agent never sees the upstream credential under either path. + +Why a third sidecar (not folded into pipelock or ssh-gate): the +gate is the only one of the three that holds upstream push +credentials. Mixing it with pipelock would put push creds in the +same blast radius as internet-facing TLS interception; mixing it +with ssh-gate would force ssh-gate above L4 and into git-protocol +land. See `docs/prds/0008-git-gate.md`. + +This module defines the abstract gate (`GitGate`) and its plan +dataclass (`GitGatePlan`). The sidecar's start/stop lifecycle is +backend-specific and lives on concrete subclasses (see +`claude_bottle/backend/docker/git_gate.py`).""" + +from __future__ import annotations + +from abc import ABC, abstractmethod +from dataclasses import dataclass +from pathlib import Path + +from .manifest import Bottle + + +@dataclass(frozen=True) +class GitGateUpstream: + """One bare repo on the gate. `name` drives the bare-repo path + (`/git/.git`), the agent's URL after insteadOf rewrite + (`git:///.git`), and the per-upstream credential + paths inside the gate (`/git-gate/creds/-key` and + `/git-gate/creds/-known_hosts`). + + `identity_file` is the host-side absolute path the gate's start + step will docker-cp into the container. `known_host_key` is the + KnownHostKey string from the manifest; the gate's start step + materialises it into a known_hosts file if non-empty.""" + + name: str + upstream_url: str + upstream_host: str + upstream_port: str + identity_file: str + known_host_key: str + + +@dataclass(frozen=True) +class GitGatePlan: + """Output of GitGate.prepare; consumed by .start. + + The script + slug + upstream fields are filled at prepare time + (host-side, side-effect-free on docker). The network fields are + populated by the backend's launch step via `dataclasses.replace` + once those networks exist. Empty defaults are sentinels meaning + "not yet set"; `.start` validates that they are populated. + + `hook_script` is the shared `pre-receive` for push-time gating; + `access_hook_script` is `git daemon`'s `--access-hook` for the + fetch-time upstream refresh.""" + + slug: str + entrypoint_script: Path + hook_script: Path + access_hook_script: Path + upstreams: tuple[GitGateUpstream, ...] + internal_network: str = "" + egress_network: str = "" + + +def git_gate_upstreams_for_bottle(bottle: Bottle) -> tuple[GitGateUpstream, ...]: + """Lift each `bottle.git` entry into a GitGateUpstream. Cross-entry + validation (unique Names, no shadow route with bottle.ssh) already + ran in `manifest.Bottle.from_dict`.""" + return tuple( + GitGateUpstream( + name=e.Name, + upstream_url=e.Upstream, + upstream_host=e.UpstreamHost, + upstream_port=e.UpstreamPort, + identity_file=e.IdentityFile, + known_host_key=e.KnownHostKey, + ) + for e in bottle.git + ) + + +def git_gate_known_hosts_line(host: str, port: str, key: str) -> str: + """Format `host[:port] key` for OpenSSH's known_hosts. Non-default + ports use the bracketed `[host]:port` form (the form OpenSSH writes + on disk for hosts reached via a non-22 port).""" + if port and port != "22": + target = f"[{host}]:{port}" + else: + target = host + return f"{target} {key}\n" + + +def git_gate_render_entrypoint(upstreams: tuple[GitGateUpstream, ...]) -> str: + """Posix-sh entrypoint (alpine ash). One `init_repo` call per + upstream, then `exec git daemon`. The function reads + `/git-gate/creds/-{key,known_hosts}` (laid down by + `DockerGitGate.start` via docker cp) and wires them into each + bare repo's config; the access-hook + pre-receive hook pick those + paths up at fetch / push time.""" + lines = [ + "#!/bin/sh", + "set -eu", + "", + "init_repo() {", + " name=$1", + " upstream_url=$2", + " keyfile=/git-gate/creds/${name}-key", + " hostsfile=/git-gate/creds/${name}-known_hosts", + "", + " chmod 600 \"$keyfile\"", + " if [ -f \"$hostsfile\" ]; then", + " chmod 600 \"$hostsfile\"", + " fi", + "", + " repo=/git/${name}.git", + " if [ ! -d \"$repo\" ]; then", + " git init --bare \"$repo\" >/dev/null", + # --mirror=fetch sets remote.origin.fetch = +refs/*:refs/* so", + # a later `git fetch origin` mirrors the upstream's full ref", + # graph (heads, tags, notes) into the bare repo at canonical", + # paths. It does NOT set remote.origin.mirror=true, so an", + # explicit `git push origin :` still pushes one ref.", + " git -C \"$repo\" remote add --mirror=fetch origin \"$upstream_url\"", + " fi", + " git -C \"$repo\" config git-gate.identityFile \"$keyfile\"", + " git -C \"$repo\" config git-gate.knownHosts \"$hostsfile\"", + " git -C \"$repo\" config receive.denyCurrentBranch ignore", + " install -m 755 /etc/git-gate/pre-receive \"$repo/hooks/pre-receive\"", + "}", + "", + "mkdir -p /git", + ] + for u in upstreams: + # Single-quote args so URL/path content (containing : and /) + # passes through ash unmangled. Names came through the manifest + # validator so they don't contain a single quote. + lines.append(f"init_repo '{u.name}' '{u.upstream_url}'") + lines.extend([ + "", + "exec git daemon \\", + " --reuseaddr \\", + " --base-path=/git \\", + " --export-all \\", + " --enable=receive-pack \\", + " --access-hook=/etc/git-gate/access-hook \\", + " --verbose", + ]) + return "\n".join(lines) + "\n" + + +def git_gate_render_hook() -> str: + """The shared pre-receive hook: gitleaks-scan all incoming refs, + then forward each accepted ref to the real upstream (`origin`) + using the per-repo credential. Failure in either phase aborts + the push so the agent sees a real rejection. POSIX sh. + + Two phases (scan all, then push all) keeps a hit on ref N from + half-pushing refs 1..N-1; both phases re-read stdin from a temp + file because pre-receive's stdin is a one-shot stream.""" + return r"""#!/bin/sh +# git-gate pre-receive (PRD 0008). Stdin: per line. +set -u + +refs_file=$(mktemp) +trap 'rm -f "$refs_file"' EXIT +cat > "$refs_file" + +zero=0000000000000000000000000000000000000000 + +# Phase 1: gitleaks scan each ref's incoming commits. +while IFS=' ' read -r old new ref; do + [ -z "$ref" ] && continue + [ "$new" = "$zero" ] && continue + if [ "$old" = "$zero" ]; then + log_opts="$new" + else + log_opts="$old..$new" + fi + echo "git-gate: gitleaks scanning $ref ($log_opts)" >&2 + if ! gitleaks git --log-opts="$log_opts" --no-banner --redact 1>&2; then + echo "git-gate: gitleaks rejected push to $ref" >&2 + exit 1 + fi +done < "$refs_file" + +# Phase 2: forward each ref to the upstream (`origin`, configured +# in the entrypoint via `git remote add --mirror=fetch`). +keyfile=$(git config --get git-gate.identityFile) +hostsfile=$(git config --get git-gate.knownHosts) +if [ ! -f "$hostsfile" ]; then + echo "git-gate: no KnownHostKey configured for this upstream; refusing to push" >&2 + echo "git-gate: add KnownHostKey to the bottle.git entry and restart the bottle" >&2 + exit 1 +fi +ssh_cmd="ssh -i $keyfile -o UserKnownHostsFile=$hostsfile -o StrictHostKeyChecking=yes -o IdentitiesOnly=yes" + +while IFS=' ' read -r old new ref; do + [ -z "$ref" ] && continue + if [ "$new" = "$zero" ]; then + refspec=":$ref" + else + refspec="$new:$ref" + fi + echo "git-gate: forwarding $ref to origin" >&2 + if ! GIT_SSH_COMMAND="$ssh_cmd" git push origin "$refspec" 1>&2; then + echo "git-gate: upstream push failed for $ref" >&2 + exit 1 + fi +done < "$refs_file" + +exit 0 +""" + + +def git_gate_render_access_hook() -> str: + """`git daemon --access-hook` script. Runs before each protocol + service; for `upload-pack` (fetch / clone / ls-remote / pull) it + refreshes the bare repo from upstream first, so the response + reflects upstream's current state. For other services (notably + `receive-pack`) it returns 0 immediately and lets the existing + pre-receive hook gate the operation. POSIX sh. + + The hook receives: + $1 service name (`upload-pack`, `receive-pack`, ...) + $2 absolute path to the resolved repo + $3 client hostname (unused) + $4 client tcp address (unused) + + Fail-closed on upstream errors: the agent's fetch fails too, + so it never silently sees stale data — matches the PRD's + 'equivalent to operations against the upstream' contract.""" + return r"""#!/bin/sh +# git-gate access-hook (PRD 0008). $1=service $2=repo $3=host $4=peer +set -u +service=$1 +repo_dir=$2 + +# Push path keeps its own gating in pre-receive (gitleaks + +# forward). Only refresh-from-upstream on fetch operations. +if [ "$service" != "upload-pack" ]; then + exit 0 +fi + +keyfile=$(git -C "$repo_dir" config --get git-gate.identityFile 2>/dev/null || true) +hostsfile=$(git -C "$repo_dir" config --get git-gate.knownHosts 2>/dev/null || true) +if [ -z "$keyfile" ] || [ ! -f "$hostsfile" ]; then + echo "git-gate: missing credentials for $repo_dir; refusing fetch" >&2 + exit 1 +fi +ssh_cmd="ssh -i $keyfile -o UserKnownHostsFile=$hostsfile -o StrictHostKeyChecking=yes -o IdentitiesOnly=yes" + +echo "git-gate: refreshing $repo_dir from upstream" >&2 +if ! GIT_SSH_COMMAND="$ssh_cmd" git -C "$repo_dir" fetch origin --prune >&2; then + echo "git-gate: upstream fetch failed for $repo_dir; refusing to serve stale data" >&2 + exit 1 +fi + +# Sync the bare repo's HEAD to upstream's HEAD on the first fetch +# (when it still points at the `git init --bare` default of +# refs/heads/master and upstream uses something else, the cloned +# checkout would fail with "remote HEAD refers to nonexistent ref"). +# Costs one extra ls-remote on first fetch only; subsequent fetches +# skip the branch. If upstream's default branch changes after the +# gate has cached it, restart the bottle to resync. +if ! git -C "$repo_dir" rev-parse --verify HEAD >/dev/null 2>&1; then + upstream_head=$(GIT_SSH_COMMAND="$ssh_cmd" git -C "$repo_dir" \ + ls-remote --symref origin HEAD 2>/dev/null \ + | awk '/^ref:/ {print $2; exit}') + if [ -n "$upstream_head" ]; then + git -C "$repo_dir" symbolic-ref HEAD "$upstream_head" || true + fi +fi +exit 0 +""" + + +class GitGate(ABC): + """The per-agent git-gate. Encapsulates the host-side prepare + (upstream lift + entrypoint/hook render); the sidecar's + start/stop lifecycle is backend-specific and lives on concrete + subclasses.""" + + def prepare(self, bottle: Bottle, slug: str, stage_dir: Path) -> GitGatePlan: + """Compute the upstream table from `bottle.git` and write the + entrypoint, pre-receive hook, and access-hook scripts (mode + 600) under `stage_dir`. Pure host-side, no docker subprocess. + + Returned plan is incomplete: the launch step must fill + `internal_network` / `egress_network` via `dataclasses.replace` + before passing the plan to `.start`.""" + upstreams = git_gate_upstreams_for_bottle(bottle) + entrypoint = stage_dir / "git_gate_entrypoint.sh" + entrypoint.write_text(git_gate_render_entrypoint(upstreams)) + entrypoint.chmod(0o600) + hook = stage_dir / "git_gate_pre_receive.sh" + hook.write_text(git_gate_render_hook()) + hook.chmod(0o600) + access_hook = stage_dir / "git_gate_access_hook.sh" + access_hook.write_text(git_gate_render_access_hook()) + # 0o700 (not 0o600): git daemon execs --access-hook directly, + # not via `sh`, so the script needs the x bit. docker cp + # preserves source mode into the container. + access_hook.chmod(0o700) + return GitGatePlan( + slug=slug, + entrypoint_script=entrypoint, + hook_script=hook, + access_hook_script=access_hook, + upstreams=upstreams, + ) + + @abstractmethod + def start(self, plan: GitGatePlan) -> str: + """Bring up the gate sidecar according to `plan`. Returns the + target string identifying the running instance — the same + value to pass to `.stop`. Backend-specific.""" + + @abstractmethod + def stop(self, target: str) -> None: + """Tear down the gate sidecar identified by `target` (the + value `.start` returned). Idempotent: a missing target is + success. Backend-specific.""" diff --git a/claude_bottle/manifest.py b/claude_bottle/manifest.py index eed15d3..a7910b3 100644 --- a/claude_bottle/manifest.py +++ b/claude_bottle/manifest.py @@ -7,6 +7,7 @@ Schema (see CLAUDE.md "Intended design"): "": { "env": { "": , ... }, "ssh": [ , ... ], + "git": [ , ... ], "egress": { "allowlist": [ "", ... ] } } }, @@ -79,6 +80,65 @@ class SshEntry: ) +@dataclass(frozen=True) +class GitEntry: + """One upstream the per-agent git-gate (PRD 0008) is allowed to + talk to. `Upstream` is the real remote URL the agent would push to + if there were no gate; the gate hosts a bare repo at /git/.git + and `IdentityFile` is the SSH key the gate uses to push that repo + upstream after gitleaks passes. The agent itself never holds the + upstream credential. + + The Upstream URL is parsed once at construction and the pieces are + stashed in the `Upstream*` fields so the git-gate render step + doesn't have to re-parse.""" + + Name: str + Upstream: str + IdentityFile: str + KnownHostKey: str = "" + UpstreamUser: str = "" + UpstreamHost: str = "" + UpstreamPort: str = "" + UpstreamPath: str = "" + + @classmethod + def from_dict(cls, bottle_name: str, idx: int, raw: object) -> "GitEntry": + d = _as_json_object(raw, f"bottle '{bottle_name}' git[{idx}]") + name = d.get("Name") + if not isinstance(name, str) or not name: + die(f"bottle '{bottle_name}' git[{idx}] missing required string field 'Name'") + upstream = d.get("Upstream") + if not isinstance(upstream, str) or not upstream: + die( + f"bottle '{bottle_name}' git '{name}' missing required string field " + f"'Upstream'" + ) + ident = d.get("IdentityFile") + if not isinstance(ident, str) or not ident: + die( + f"bottle '{bottle_name}' git '{name}' missing required string field " + f"'IdentityFile'" + ) + khk = _opt_str( + d.get("KnownHostKey"), + f"bottle '{bottle_name}' git '{name}' KnownHostKey", + ) + user, host, port, path = _parse_git_upstream( + upstream, f"bottle '{bottle_name}' git '{name}' Upstream" + ) + return cls( + Name=name, + Upstream=upstream, + IdentityFile=ident, + KnownHostKey=khk, + UpstreamUser=user, + UpstreamHost=host, + UpstreamPort=port, + UpstreamPath=path, + ) + + DLP_ACTIONS = ("block", "warn") @@ -134,6 +194,7 @@ class BottleEgress: class Bottle: env: Mapping[str, str] = field(default_factory=_empty_str_dict) ssh: tuple[SshEntry, ...] = () + git: tuple[GitEntry, ...] = () egress: BottleEgress = field(default_factory=BottleEgress) @classmethod @@ -171,6 +232,19 @@ class Bottle: for i, entry in enumerate(ssh_list) ) + git: tuple[GitEntry, ...] = () + git_raw = d.get("git") + if git_raw is not None: + if not isinstance(git_raw, list): + die(f"bottle '{name}' git must be an array (was {type(git_raw).__name__})") + git_list = cast(list[object], git_raw) + git = tuple( + GitEntry.from_dict(name, i, entry) + for i, entry in enumerate(git_list) + ) + _validate_unique_git_names(name, git) + _validate_no_shadow_route(name, ssh, git) + egress_raw = d.get("egress") egress = ( BottleEgress.from_dict(name, egress_raw) @@ -178,7 +252,7 @@ class Bottle: else BottleEgress() ) - return cls(env=env, ssh=ssh, egress=egress) + return cls(env=env, ssh=ssh, git=git, egress=egress) @dataclass(frozen=True) @@ -359,3 +433,69 @@ def _opt_port(value: object, label: str) -> str: if isinstance(value, str): return value die(f"{label} must be a string or number (was {type(value).__name__})") + + +def _parse_git_upstream(url: str, label: str) -> tuple[str, str, str, str]: + """Parse `ssh://user@host[:port]/path` into (user, host, port, path). + Dies if `url` doesn't match the ssh:// shape v1 supports. Default + port is 22 (matches OpenSSH).""" + if not url.startswith("ssh://"): + die(f"{label} must be an ssh:// URL (was {url!r})") + rest = url[len("ssh://"):] + if "@" not in rest: + die(f"{label} must include a user (e.g. ssh://git@host/path.git); was {url!r}") + user, _, hostpart = rest.partition("@") + if not user: + die(f"{label} user is empty in {url!r}") + if "/" not in hostpart: + die(f"{label} must include a path (e.g. ssh://git@host/path.git); was {url!r}") + hostport, _, path = hostpart.partition("/") + if not path: + die(f"{label} path is empty in {url!r}") + if ":" in hostport: + host, _, port = hostport.partition(":") + if not port.isdigit(): + die(f"{label} port must be numeric in {url!r}") + else: + host = hostport + port = "22" + if not host: + die(f"{label} host is empty in {url!r}") + return (user, host, port, path) + + +def _validate_unique_git_names(bottle_name: str, git: tuple[GitEntry, ...]) -> None: + seen: dict[str, None] = {} + for g in git: + if g.Name in seen: + die( + f"bottle '{bottle_name}' git entries have duplicate Name '{g.Name}'; " + f"each entry maps to a distinct bare repo on the gate." + ) + seen[g.Name] = None + + +def _validate_no_shadow_route( + bottle_name: str, + ssh: tuple[SshEntry, ...], + git: tuple[GitEntry, ...], +) -> None: + """Reject if any git entry's (host, port) matches an ssh entry's + (Hostname, Port). The same upstream reachable two ways — once through + the L4 ssh-gate, once through the gitleaks-bearing git-gate — defeats + the git-gate.""" + ssh_targets: dict[tuple[str, str], str] = {} + for e in ssh: + if not e.Hostname: + continue + port = e.Port or "22" + ssh_targets[(e.Hostname, port)] = e.Host + for g in git: + ssh_host = ssh_targets.get((g.UpstreamHost, g.UpstreamPort)) + if ssh_host is not None: + die( + f"bottle '{bottle_name}' has ssh entry '{ssh_host}' " + f"({g.UpstreamHost}:{g.UpstreamPort}) and git entry '{g.Name}' " + f"pointing at the same upstream. The same remote reachable two " + f"ways defeats the git-gate; remove one." + ) diff --git a/docs/prds/0008-git-gate.md b/docs/prds/0008-git-gate.md new file mode 100644 index 0000000..47033b5 --- /dev/null +++ b/docs/prds/0008-git-gate.md @@ -0,0 +1,249 @@ +# PRD 0008: Git gate + +- **Status:** Draft +- **Author:** didericis +- **Created:** 2026-05-12 + +## Summary + +Per-bottle sidecar that fronts the agent's git remotes as a +transparent mirror. Push is gated: gitleaks scans incoming refs +via a `pre-receive` hook, and only clean refs get forwarded to +the real upstream. Fetch is mirrored: every `upload-pack` first +runs `git fetch origin --prune` against the upstream via the +daemon's `--access-hook`, so an agent fetch returns whatever the +upstream has *now* (fail-closed if the upstream is unreachable). + +Upstream credentials live in the gate, not the agent — so a +misbehaving agent cannot push a secret-bearing commit past it +and cannot acquire push access by inspecting the agent's own +filesystem. + +## Problem + +Today the agent holds its own SSH identity for each `bottle.ssh` +entry and pushes straight at gitea/github with ssh-gate doing dumb +L4 forwarding. There is no boundary between "the agent thinks this +commit is fine" and "the secret hits an external remote." If a +compromised or careless agent stages a `.env`, slips a token into +a fixture, or commits the `CLAUDE_BOTTLE_OAUTH_TOKEN` itself, `git +push` ships it. + +Host-side pre-commit / pre-push hooks are the usual defense, but +they live on the agent's side of the trust boundary: an agent with +shell access can `git push --no-verify` past them, edit +`.githooks/`, or `git config core.hooksPath /dev/null`. Anything +the agent can disable is not a gate. + +## Goals / Success Criteria + +Two integration tests, both with the gate as the only git path +for a declared upstream: + +1. **Push:** drop a synthetic high-entropy secret into a commit, + run `git push` from inside the agent, observe a non-zero exit + and a gitleaks finding in the response. Repeat with a clean + commit and observe exit 0 + the commit landing on the real + upstream. +2. **Fetch:** clone the upstream through the gate (`git clone` + against the gate URL), observe the upstream's content. Push + a new commit to the upstream out-of-band, refetch through the + gate, observe the new commit. The gate must never serve stale + data — every fetch refreshes from upstream first. + +## Non-goals + +- Pre-commit scanning. The gate is a `pre-receive` checkpoint + only; it does not run on `git commit`, does not block local + commits, and does not edit the agent's working tree. +- Git-protocol awareness beyond what `pre-receive` already gives + you. No bespoke pack inspection; gitleaks runs against the + incoming ref(s) in a bare repo, full stop. +- Per-user authentication on the agent → gate hop. The hop sits + inside a single bottle on an `--internal` Docker network; only + the bottle's agent can reach the gate. No additional ACLs. +- Subsuming ssh-gate or pipelock. Non-git SSH (if any) keeps + flowing through ssh-gate; HTTPS through pipelock. The git-gate + is git-only. +- Multi-tenant gate. One gate is provisioned per bottle, not + shared across bottles (same one-sidecar-per-agent posture as + pipelock / ssh-gate). +- Smolmachines / microVM colocation policy. Whether the future + smolmachines backend packs gates into one VM or runs them as + separate VMs is a backend decision, not a manifest or design + decision in this PRD. See "Future work." + +## Scope + +### In scope + +- **Gate sidecar lifecycle.** New `GitGate` + `DockerGitGate`, + mirroring `DockerSSHGate` and `DockerPipelockProxy` in shape and + network-attachment story. +- **Manifest field.** `bottle.git` — a list of git remotes the + bottle is allowed to talk to, each with the credential the gate + uses to push upstream. The agent gets no parallel `bottle.ssh` + entry for those upstreams. +- **Agent-side URL rewrite.** Provisioner emits `~/.gitconfig` + with `[url ""] insteadOf = ` so every git + operation against the declared upstream (push, fetch, clone, + pull, ls-remote) transparently hits the gate. +- **Pre-receive gitleaks hook.** Baked into the gate image. On a + hit the hook exits non-zero and the push fails; on clean it + shells out `git push origin :` using the gate-resident + credential. +- **Access-hook upstream refresh.** `git daemon --access-hook` runs + `git fetch origin --prune` against the upstream before every + `upload-pack` request, so a fetch through the gate is observably + equivalent to a fetch against the real upstream. Failure to reach + the upstream is fail-closed: the access hook exits non-zero and + the agent's fetch fails. +- **Plan rendering / dry-run.** `bottle_plan.py` and the y/N + preflight surface the gate sidecar (name, listed upstreams, + which credential it holds per upstream). + +### Out of scope + +- Push policy beyond gitleaks. No commit-author allowlist, no + branch-name policy, no signed-commit enforcement. gitleaks is + the single rule for v1. +- Fetch caching / stale-while-revalidate. Every `upload-pack` + refresh is a synchronous round-trip to the upstream; there is + no TTL cache, no background refresh. If the upstream is slow, + the agent's fetch is slow. +- Quarantine / replay. A rejected push is discarded; we do not + stash it for the user to inspect. +- Non-Docker backends. Implementation lands for Docker only; the + `BottleBackend` abstraction gains the hook but other backends + are deferred. +- Bypass for trusted commits. No `[skip gitleaks]` trailer, no + allowlist by commit hash. If the gate is bypassable it isn't a + gate. + +## Proposed Design + +### New services / components + +Mirror the existing sidecar layout: + +- **`claude_bottle/git_gate.py`** (new): abstract `GitGate` + + `GitGatePlan` dataclass. `prepare` is host-side / side-effect- + free on docker; renders the per-upstream config and stages the + push credentials under `stage_dir`. +- **`claude_bottle/backend/docker/git_gate.py`** (new): + `DockerGitGate` concrete subclass. `start` does `docker create` + on the internal network, copies in the bare-repo skeleton, the + hook script, and per-upstream credentials, then `docker start`. + `stop` is idempotent `docker rm -f`. Container name: + `claude-bottle-git-gate-`. + +Gate image: `git-daemon` + `openssh-client` over a +`zricethezav/gitleaks` base (alpine + gitleaks), pinned by digest. +For each declared upstream the gate hosts a bare repo at +`/git/.git` with `remote.origin.url` set to the real +upstream (via `git remote add --mirror=fetch`), `hooks/pre-receive` +wired to gitleaks-then-`git push origin`, and the bare repo's +config carrying per-upstream credential paths. + +Inside the bottle, the agent's `.gitconfig` rewrites the real +upstream URL to the gate's `git://` URL via `insteadOf`. Every +git operation against the declared upstream therefore hits the +gate. + +For pushes, the pre-receive hook gitleaks-scans the incoming +refs and, on clean, pushes each accepted ref to the real +upstream using the credential the gate holds. + +For fetches (clone, pull, fetch, ls-remote), `git daemon`'s +`--access-hook=` runs `git fetch origin --prune` against +the real upstream before the upload-pack service serves the +client. The bare repo therefore reflects the upstream's current +state at the moment the agent's fetch begins; if the upstream +is unreachable, the access hook exits non-zero and the agent's +fetch fails — same observable behavior as if the agent were +talking to the upstream directly. + +The agent never sees the upstream credential under either +operation. + +### Existing code touched + +- **`claude_bottle/manifest.py`**: parse and validate the new + `bottle.git` block; reject `bottle.ssh` entries whose upstream + is also claimed by a `bottle.git` upstream (one path per + remote, no shadow route). +- **`claude_bottle/backend/docker/provision/git.py`** (new) or an + extension of the ssh provisioner: render the `insteadOf` config + and any extra `~/.gitconfig` plumbing. +- **`claude_bottle/backend/docker/backend.py`**: instantiate + `DockerGitGate` alongside `DockerPipelockProxy` and + `DockerSSHGate`; thread its `prepare` / `start` / `stop` + through `resolve_plan` / `launch`. +- **`claude_bottle/backend/docker/launch.py`**: add gate start / + stop to the `ExitStack` so the gate is up before any + provisioner that writes the agent's `~/.gitconfig`. +- **`claude_bottle/backend/docker/bottle_plan.py`**: new + `GitGatePlan` field on `DockerBottlePlan`; preflight rendering + surfaces the gate sidecar (name, per-upstream local paths, + upstream real URLs, which credential is in use). +- **Tests**: unit tests for `GitGate.prepare` and render shape; + manifest validator tests for the new field and the + no-shadow-route rule; an integration test in + `tests/integration/` for the push-with-secret (rejected) and + push-without-secret (forwarded) cases. + +### Data model changes + +`Bottle` grows an optional `git: list[GitEntry]` field. A +`GitEntry` carries the upstream URL, the local name the gate +exposes it as, and the credential the gate uses to push upstream +(initial shape: `identity_file` + `known_host_key`, matching +`bottle.ssh`). + +### External dependencies + +- `zricethezav/gitleaks` base image, pinned by digest. The base + ships gitleaks + git; the gate Dockerfile adds `git-daemon` and + `openssh-client` on top. +- No new Python packages. + +## Future work +- **Smolmachines colocation.** The eventual smolmachines backend + may pack pipelock + ssh-gate + git-gate into a single microVM, + or split git-gate off because it holds push creds and the + others don't. That decision belongs to the backend; the shared + `BottleBackend` interface keeps sidecars independent so either + packing is possible without touching this PRD's design. + +## Open questions + +- Protocol on the agent → gate hop: SSH (`sshd` + `git-shell` + inside the gate) or HTTP smart protocol (`git-http-backend` + behind a tiny webserver)? SSH matches the existing ssh-gate + patterns and the user's existing `~/.ssh` muscle memory; HTTP + is lighter on image size and avoids an `authorized_keys` + story. Default: SSH unless image size becomes a problem. +- Where gitleaks runs: pre-receive hook against a checkout of the + incoming ref vs. a wrapper around `git-receive-pack` that + inspects the pack file directly. Hook is canonical; defer the + wrapper variant. +- Rejection signalling: gitleaks failures surface as a normal + pre-receive reject (the user sees gitleaks's report on + stderr). Worth a "redacted" mode that hides the matched bytes + from the rejection message? Default: show file + line, hide + the matched bytes. +- Credential reuse vs. duplication from `bottle.ssh`. If a user + lists the same identity for ssh-gate (read) and git-gate + (write), we can either reference by name or require two + copies. Default: inline copies; revisit when it gets annoying. + +## References + +- PRD 0001: per-agent egress proxy via pipelock — sidecar + pattern this PRD reuses. +- PRD 0007: SSH egress gate — the L4 SSH forwarder this PRD + sits alongside; explicitly *not* the place to add + git-protocol awareness. +- `claude_bottle/ssh_gate.py` / `claude_bottle/pipelock.py` — + existing sidecar abstractions to mirror. +- gitleaks: diff --git a/tests/fixtures.py b/tests/fixtures.py index b5fd316..49fc04d 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -65,6 +65,31 @@ def fixture_with_ssh_dict() -> dict[str, Any]: } +def fixture_with_git_dict() -> dict[str, Any]: + """Bottle declares a git-gate upstream. JSON shape.""" + return { + "bottles": { + "dev": { + "git": [ + { + "Name": "claude-bottle", + "Upstream": "ssh://git@gitea.dideric.is:30009/didericis/claude-bottle.git", + "IdentityFile": "/dev/null", + "KnownHostKey": "ssh-ed25519 AAAA...", + }, + { + "Name": "foo", + "Upstream": "ssh://git@github.com/didericis/foo.git", + "IdentityFile": "/dev/null", + "KnownHostKey": "ssh-ed25519 BBBB...", + }, + ] + } + }, + "agents": {"demo": {"skills": [], "prompt": "", "bottle": "dev"}}, + } + + def fixture_minimal() -> Manifest: return Manifest.from_json_obj(fixture_minimal_dict()) @@ -77,6 +102,10 @@ def fixture_with_ssh() -> Manifest: return Manifest.from_json_obj(fixture_with_ssh_dict()) +def fixture_with_git() -> Manifest: + return Manifest.from_json_obj(fixture_with_git_dict()) + + def write_fixture(fn: Callable[[], dict[str, Any]]) -> Path: """Write fixture JSON to a temp file; return the path. Caller must rm. Accepts a function returning either a dict (JSON shape) or a Manifest; diff --git a/tests/integration/test_dry_run_plan.py b/tests/integration/test_dry_run_plan.py index 3f8add4..09e8a1c 100644 --- a/tests/integration/test_dry_run_plan.py +++ b/tests/integration/test_dry_run_plan.py @@ -81,6 +81,8 @@ class TestDryRunPlan(unittest.TestCase): self.assertEqual([], plan["skills"]) self.assertEqual([], plan["ssh_hosts"]) self.assertEqual([], plan["ssh_gate"]) + self.assertEqual([], plan["git_remotes"]) + self.assertEqual([], plan["git_gate"]) self.assertEqual(False, plan["remote_control"]) self.assertEqual(0, plan["prompt"]["length"]) diff --git a/tests/integration/test_git_gate_mirror.py b/tests/integration/test_git_gate_mirror.py new file mode 100644 index 0000000..53d5a4a --- /dev/null +++ b/tests/integration/test_git_gate_mirror.py @@ -0,0 +1,391 @@ +"""Integration: the git-gate is a bidirectional mirror of its +upstream (PRD 0008 v1.1). + +Three round-trip assertions against a real Docker daemon plus a +sibling sshd container playing the role of "real upstream": + + 1. clone-through-gate returns whatever the upstream has at the + moment of clone (refs + content). + 2. After a second commit lands on the upstream out-of-band, a + fetch through the gate picks it up — the access-hook is + refreshing before each upload-pack. + 3. A push through the gate (clean commit) lands on the upstream's + bare repo — the pre-receive hook's forward phase works. + +These are the user-facing semantics: every operation against the +gate is observably equivalent to the same operation against the +real upstream. +""" + +import dataclasses +import os +import shutil +import subprocess +import tempfile +import textwrap +import unittest +from pathlib import Path + +from claude_bottle.backend.docker.git_gate import ( + DockerGitGate, + build_git_gate_image, +) +from claude_bottle.backend.docker.network import ( + network_create_egress, + network_create_internal, + network_remove, +) +from claude_bottle.manifest import Manifest +from tests._docker import skip_unless_docker + + +# Same image used by test_git_gate_sidecar — alpine + git + gitleaks. +CLIENT_IMAGE = "zricethezav/gitleaks@sha256:c00b6bd0aeb3071cbcb79009cb16a60dd9e0a7c60e2be9ab65d25e6bc8abbb7f" + +# Built once in setUpClass via `docker build -` from the inline +# Dockerfile below. Carries openssh-server, a `git` user, baked-in +# host keys, and a bare repo at /git/foo.git seeded with one commit. +UPSTREAM_IMAGE = "claude-bottle-test-upstream:latest" + +UPSTREAM_DOCKERFILE = textwrap.dedent(""" + FROM alpine:3.20 + RUN apk add --no-cache openssh-server git + RUN adduser -D -s /usr/bin/git-shell git && \\ + passwd -u git && \\ + mkdir -p /home/git/.ssh && \\ + chown git:git /home/git/.ssh && \\ + chmod 700 /home/git/.ssh && \\ + mkdir -p /git && \\ + chown git:git /git + # Bake host keys into the image so the test can pin the + # KnownHostKey value before the container starts. Re-running + # ssh-keygen -A at boot would invalidate that pinning. + RUN ssh-keygen -A + USER git + RUN git config --global init.defaultBranch main && \\ + git config --global user.email upstream@example && \\ + git config --global user.name upstream && \\ + git init --bare /git/foo.git && \\ + git clone /git/foo.git /tmp/w && \\ + cd /tmp/w && \\ + echo "initial upstream content" > README.md && \\ + git add README.md && \\ + git commit -q -m "initial commit" && \\ + git push -q origin main && \\ + rm -rf /tmp/w + USER root + RUN echo "PermitRootLogin no" >> /etc/ssh/sshd_config && \\ + echo "PasswordAuthentication no" >> /etc/ssh/sshd_config && \\ + echo "AuthorizedKeysFile /home/git/.ssh/authorized_keys" >> /etc/ssh/sshd_config + CMD ["/usr/sbin/sshd", "-D", "-e"] +""").strip() + + +@skip_unless_docker() +class TestGitGateBidirectionalMirror(unittest.TestCase): + @classmethod + def setUpClass(cls): + # Pull the client image first (other suites do the same — keeps + # registry races contained to setUpClass). + if subprocess.run( + ["docker", "pull", CLIENT_IMAGE], + stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False, + ).returncode != 0: + raise unittest.SkipTest(f"could not pull {CLIENT_IMAGE}") + + # Build the upstream sshd image from stdin (no build context + # needed — Dockerfile has no COPY/ADD). + build_result = subprocess.run( + ["docker", "build", "-t", UPSTREAM_IMAGE, "-"], + input=UPSTREAM_DOCKERFILE, + text=True, + capture_output=True, + check=False, + ) + if build_result.returncode != 0: + raise unittest.SkipTest( + f"could not build upstream image: {build_result.stderr}" + ) + + # Pull the upstream's baked-in ed25519 host pubkey out of the + # image so we can pin it as KnownHostKey on the gate's manifest + # entry. Reading from a transient container ensures we get the + # same key the running sshd will present. + pub_result = subprocess.run( + ["docker", "run", "--rm", "--entrypoint", "cat", + UPSTREAM_IMAGE, "/etc/ssh/ssh_host_ed25519_key.pub"], + capture_output=True, text=True, check=True, + ) + parts = pub_result.stdout.strip().split() + # Format: "ssh-ed25519 " — drop comment. + cls.upstream_host_key = f"{parts[0]} {parts[1]}" + + # Build the gate image (uses build cache after the first run). + build_git_gate_image() + + def setUp(self): + suffix = self.id().rsplit('.', 1)[-1].replace('_', '-')[-12:] + self.slug = f"t{os.getpid()}-{suffix}" + self.gate_name = "" + self.upstream_name = f"claude-bottle-test-upstream-{self.slug}" + self.internal_net = "" + self.egress_net = "" + self.work_dir = Path(tempfile.mkdtemp()) + + # Per-test SSH auth keypair. The host gets the private key + # path on disk (manifest IdentityFile); the upstream's + # authorized_keys gets the public key, docker-cp'd in just + # before sshd starts. + self.auth_key = self.work_dir / "auth_key" + subprocess.run( + ["ssh-keygen", "-t", "ed25519", "-N", "", "-f", str(self.auth_key), + "-C", "git-gate-test"], + check=True, stdout=subprocess.DEVNULL, + ) + self.auth_pub = self.work_dir / "auth_key.pub" + + # Networks first so the upstream can attach to the egress + # network at create time. + self.internal_net = network_create_internal(self.slug) + self.egress_net = network_create_egress(self.slug) + + # Start the upstream sshd container, attached to the egress + # network (which the gate also lives on). Container name doubles + # as its DNS-resolvable hostname. + subprocess.run( + ["docker", "create", + "--name", self.upstream_name, + "--network", self.egress_net, + UPSTREAM_IMAGE], + check=True, stdout=subprocess.DEVNULL, + ) + # docker cp the per-test pubkey into the upstream as + # /home/git/.ssh/authorized_keys (right user, right path). + subprocess.run( + ["docker", "cp", str(self.auth_pub), + f"{self.upstream_name}:/home/git/.ssh/authorized_keys"], + check=True, stdout=subprocess.DEVNULL, + ) + # chown / chmod the authorized_keys before sshd refuses to + # use it. + for argv in ( + ["chown", "git:git", "/home/git/.ssh/authorized_keys"], + ["chmod", "600", "/home/git/.ssh/authorized_keys"], + ): + subprocess.run( + ["docker", "exec", "-u", "0", self.upstream_name, *argv], + check=False, stdout=subprocess.DEVNULL, + ) + # The exec-then-start ordering is unusual — exec on a stopped + # container is OK on modern docker but if it errors we just + # do the chown after start instead. Retry post-start to be + # safe. + subprocess.run( + ["docker", "start", self.upstream_name], + check=True, stdout=subprocess.DEVNULL, + ) + for argv in ( + ["chown", "git:git", "/home/git/.ssh/authorized_keys"], + ["chmod", "600", "/home/git/.ssh/authorized_keys"], + ): + subprocess.run( + ["docker", "exec", "-u", "0", self.upstream_name, *argv], + check=False, stdout=subprocess.DEVNULL, + ) + # Wait for sshd to bind; a short retry against TCP 22 is enough. + ready = False + for _ in range(30): + probe = subprocess.run( + ["docker", "exec", self.upstream_name, + "sh", "-c", "nc -z 127.0.0.1 22"], + stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False, + ) + if probe.returncode == 0: + ready = True + break + subprocess.run(["sleep", "0.2"], check=False) + if not ready: + self.fail("upstream sshd never bound port 22") + + # Build the gate plan + start it. Upstream URL points at the + # upstream container's hostname (Docker DNS resolves it on the + # egress network) on port 22, user `git`. + manifest = Manifest.from_json_obj({ + "bottles": { + "dev": { + "git": [{ + "Name": "foo", + "Upstream": f"ssh://git@{self.upstream_name}/git/foo.git", + "IdentityFile": str(self.auth_key), + "KnownHostKey": self.upstream_host_key, + }], + }, + }, + "agents": {"demo": {"skills": [], "prompt": "", "bottle": "dev"}}, + }) + bottle = manifest.bottles["dev"] + gate = DockerGitGate() + prep = gate.prepare(bottle, self.slug, self.work_dir) + plan = dataclasses.replace( + prep, + internal_network=self.internal_net, + egress_network=self.egress_net, + ) + self.gate_name = gate.start(plan) + + def tearDown(self): + if self.gate_name: + DockerGitGate().stop(self.gate_name) + if self.upstream_name: + subprocess.run( + ["docker", "rm", "-f", self.upstream_name], + stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False, + ) + for n in (self.internal_net, self.egress_net): + if n: + network_remove(n) + shutil.rmtree(self.work_dir, ignore_errors=True) + + def _upstream_main_sha(self) -> str: + """Read upstream's current refs/heads/main sha by exec'ing + directly into the upstream container's bare repo.""" + out = subprocess.run( + ["docker", "exec", "-u", "git", self.upstream_name, + "git", "-C", "/git/foo.git", "rev-parse", "refs/heads/main"], + capture_output=True, text=True, check=True, + ) + return out.stdout.strip() + + def _push_to_upstream_oob(self, message: str) -> str: + """Make a new commit directly on the upstream's bare repo + (out-of-band, not through the gate). Returns the new sha.""" + script = textwrap.dedent(f""" + set -e + cd /tmp + rm -rf w + git clone /git/foo.git w + cd w + git config user.email upstream@example + git config user.name upstream + echo "$RANDOM-$$" >> README.md + git add README.md + git commit -q -m "{message}" + git push -q origin main + git rev-parse HEAD + """).strip() + out = subprocess.run( + ["docker", "exec", "-u", "git", self.upstream_name, + "sh", "-c", script], + capture_output=True, text=True, check=True, + ) + return out.stdout.strip().splitlines()[-1] + + @unittest.skipIf( + os.environ.get("GITEA_ACTIONS") == "true", + "skipped under act_runner: docker socket mount topology breaks " + "in-process visibility of networks created on the host daemon", + ) + def test_clone_and_refetch_reflect_upstream(self): + """Clone via gate returns upstream's commit. After a second + commit lands on the upstream out-of-band, a re-fetch through + the gate picks it up — the access-hook is refreshing before + each upload-pack.""" + initial_sha = self._upstream_main_sha() + + # Clone via gate. + clone_script = ( + f"set -e\n" + f"cd /tmp && git clone -q git://{self.gate_name}/foo.git r\n" + f"git -C r rev-parse refs/remotes/origin/main\n" + f"cat r/README.md\n" + ) + clone = subprocess.run( + ["docker", "run", "--rm", + "--network", self.internal_net, + "--entrypoint", "sh", + CLIENT_IMAGE, + "-c", clone_script], + capture_output=True, text=True, timeout=60, check=False, + ) + self.assertEqual( + 0, clone.returncode, + f"clone via gate failed: stdout={clone.stdout!r} " + f"stderr={clone.stderr!r}", + ) + cloned_sha = clone.stdout.strip().splitlines()[0] + self.assertEqual( + initial_sha, cloned_sha, + "clone via gate must return the upstream's current sha", + ) + self.assertIn("initial upstream content", clone.stdout) + + # Out-of-band commit on the upstream. + new_sha = self._push_to_upstream_oob("second commit") + self.assertNotEqual(initial_sha, new_sha) + + # ls-remote via gate (re-fetch should pick up the new sha). + ls = subprocess.run( + ["docker", "run", "--rm", + "--network", self.internal_net, + "--entrypoint", "sh", + CLIENT_IMAGE, + "-c", f"git ls-remote git://{self.gate_name}/foo.git refs/heads/main"], + capture_output=True, text=True, timeout=60, check=False, + ) + self.assertEqual(0, ls.returncode, f"ls-remote failed: {ls.stderr!r}") + gate_sha = ls.stdout.split()[0] + self.assertEqual( + new_sha, gate_sha, + "ls-remote via gate must reflect the upstream's out-of-band update; " + "if this assertion fails, the access-hook is not refreshing on every " + "upload-pack and the gate is serving stale data", + ) + + @unittest.skipIf( + os.environ.get("GITEA_ACTIONS") == "true", + "skipped under act_runner: docker socket mount topology breaks " + "in-process visibility of networks created on the host daemon", + ) + def test_push_through_gate_lands_on_upstream(self): + """A clean (no-gitleaks-hit) push through the gate lands on + the upstream's bare repo — pre-receive phase 2 forwards + the accepted refs.""" + # Make a commit through the gate. The script clones via gate + # (so the commit will be a child of upstream's current main). + push_script = textwrap.dedent(f""" + set -e + cd /tmp + git clone -q git://{self.gate_name}/foo.git r + cd r + git config user.email client@example + git config user.name client + echo "client-side commit" > NEW.md + git add NEW.md + git commit -q -m "client commit" + git rev-parse HEAD + git push origin main 2>&1 + """).strip() + push = subprocess.run( + ["docker", "run", "--rm", + "--network", self.internal_net, + "--entrypoint", "sh", + CLIENT_IMAGE, + "-c", push_script], + capture_output=True, text=True, timeout=120, check=False, + ) + self.assertEqual( + 0, push.returncode, + f"push via gate failed: stdout={push.stdout!r} " + f"stderr={push.stderr!r}", + ) + client_sha = push.stdout.splitlines()[0].strip() + self.assertEqual( + client_sha, self._upstream_main_sha(), + "push via gate must land on upstream's bare repo; " + "if this fails the pre-receive forward phase is broken or the " + "upstream credential is misconfigured", + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/integration/test_git_gate_sidecar.py b/tests/integration/test_git_gate_sidecar.py new file mode 100644 index 0000000..2537b6b --- /dev/null +++ b/tests/integration/test_git_gate_sidecar.py @@ -0,0 +1,224 @@ +"""Integration: per-agent git-gate sidecar (PRD 0008). + +Two tests against a real Docker daemon: + + 1. ls-remote against a gate whose upstream is unreachable fails + with the access-hook's fail-closed rejection. Proves the + daemon is bound to its port AND the access-hook is wired: + a working ls-remote against the gate is necessarily a working + ls-remote against the upstream (PRD 0008's transparent-mirror + contract). + 2. A push containing a gitleaks-detectable secret is rejected + by the pre-receive hook with a non-zero exit on the agent + side and a gitleaks-rejection line in the response. The PRD's + primary success criterion. + +A successful round-trip (clone through gate reflects upstream) +needs a reachable upstream SSH host; deferred to a follow-up. +""" + +import dataclasses +import os +import shutil +import subprocess +import tempfile +import unittest +from pathlib import Path + +from claude_bottle.backend.docker.git_gate import ( + DockerGitGate, + build_git_gate_image, +) +from claude_bottle.backend.docker.network import ( + network_create_egress, + network_create_internal, + network_remove, +) +from claude_bottle.manifest import Manifest +from tests._docker import skip_unless_docker + +# The official gitleaks image already has git + alpine; reusing it +# for the client side too saves a separate image pull. +CLIENT_IMAGE = "zricethezav/gitleaks@sha256:c00b6bd0aeb3071cbcb79009cb16a60dd9e0a7c60e2be9ab65d25e6bc8abbb7f" + +# Synthetic high-entropy AKIA-shaped string; gitleaks's aws-access-token +# rule fires on this with the default config. AWS's own example +# ("AKIAIOSFODNN7EXAMPLE") is NOT flagged by gitleaks v8.x — entropy +# filter rejects it — so we use a distinct random-looking value. +FAKE_AWS_KEY = "AKIAQRJHK7N5ZPM2VXTL" + + +@skip_unless_docker() +class TestGitGateSidecar(unittest.TestCase): + @classmethod + def setUpClass(cls): + # Pre-pull the client/gitleaks base so per-test runs aren't + # racing the registry. Skip cleanly on pull failure (a real + # outage is out of scope here). + result = subprocess.run( + ["docker", "pull", CLIENT_IMAGE], + stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False, + ) + if result.returncode != 0: + raise unittest.SkipTest(f"could not pull {CLIENT_IMAGE}") + # Build the gate image once for the class. Layer cache makes + # repeated runs cheap. + build_git_gate_image() + + def setUp(self): + # DNS hostnames on user-defined Docker networks max out at 63 + # chars per label (RFC 1035). The full container name is + # `claude-bottle-git-gate-` = 23 + len(slug), so the slug + # has to stay under ~40 to be resolvable. Keep it short. + suffix = self.id().rsplit('.', 1)[-1].replace('_', '-')[-12:] + self.slug = f"t{os.getpid()}-{suffix}" + self.gate_name = "" + self.internal_net = "" + self.egress_net = "" + self.work_dir = Path(tempfile.mkdtemp()) + + def tearDown(self): + if self.gate_name: + DockerGitGate().stop(self.gate_name) + for n in (self.internal_net, self.egress_net): + if n: + network_remove(n) + shutil.rmtree(self.work_dir, ignore_errors=True) + + def _start_gate(self, name: str = "foo") -> str: + """Build a one-upstream gate and bring it up. Returns the + container name (== git-gate hostname on the internal net).""" + # Contents of the fake key don't matter for these tests — the + # rejection-path hook never reaches phase 2 where it would be + # used, and ls-remote doesn't push. + fake_key = self.work_dir / "fake-key" + fake_key.write_text("not-a-real-key\n") + + manifest = Manifest.from_json_obj({ + "bottles": { + "dev": { + "git": [{ + "Name": name, + "Upstream": "ssh://git@upstream.invalid/path.git", + "IdentityFile": str(fake_key), + "KnownHostKey": "ssh-ed25519 AAAAEXAMPLE", + }], + }, + }, + "agents": {"demo": {"skills": [], "prompt": "", "bottle": "dev"}}, + }) + bottle = manifest.bottles["dev"] + + gate = DockerGitGate() + prep = gate.prepare(bottle, self.slug, self.work_dir) + + self.internal_net = network_create_internal(self.slug) + self.egress_net = network_create_egress(self.slug) + plan = dataclasses.replace( + prep, + internal_network=self.internal_net, + egress_network=self.egress_net, + ) + self.gate_name = gate.start(plan) + return self.gate_name + + @unittest.skipIf( + os.environ.get("GITEA_ACTIONS") == "true", + "skipped under act_runner: docker socket mount topology breaks " + "in-process visibility of networks created on the host daemon", + ) + def test_ls_remote_fails_closed_when_upstream_unreachable(self): + """The gate's access-hook runs `git fetch origin --prune` before + every upload-pack. With the fixture's deliberately unreachable + `ssh://git@upstream.invalid/...`, that fetch fails and the + hook exits 1; the daemon reports access-denied. Asserting + non-zero here is what proves the access-hook is wired: under + the v1 (push-only) design ls-remote against a fresh gate + returned exit 0 with no refs.""" + gate = self._start_gate("foo") + # Daemon still has to bind first; retry the TCP connect a few + # times. The expected end state is a non-zero exit from the + # daemon's access-denied response — not a connection refused. + probe = subprocess.run( + ["docker", "run", "--rm", + "--network", self.internal_net, + "--entrypoint", "sh", + CLIENT_IMAGE, + "-c", + f"for i in $(seq 1 15); do " + f" out=$(git ls-remote git://{gate}/foo.git 2>&1) && exit 99;" + f" case \"$out\" in *'access denied'*|*'not exported'*) " + f" echo \"$out\"; exit 1;; esac;" + f" sleep 1;" + f"done;" + f"echo TIMEOUT; exit 2"], + capture_output=True, text=True, timeout=60, check=False, + ) + # exit 1: daemon access-denied as expected. exit 99 would mean + # ls-remote actually succeeded against the unreachable upstream + # (impossible — would indicate stale-data serving, the very + # thing the access-hook is meant to prevent). + self.assertEqual( + 1, probe.returncode, + f"expected fail-closed access-denied; got " + f"exit={probe.returncode} stdout={probe.stdout!r} " + f"stderr={probe.stderr!r}", + ) + + @unittest.skipIf( + os.environ.get("GITEA_ACTIONS") == "true", + "skipped under act_runner: docker socket mount topology breaks " + "in-process visibility of networks created on the host daemon", + ) + def test_push_with_secret_is_rejected(self): + """The PRD 0008 success criterion: a push containing a + gitleaks-detectable secret is rejected; the hook's "gitleaks + rejected" line appears in the response, and git push exits + non-zero on the client side.""" + gate = self._start_gate("foo") + push_script = ( + "set -e\n" + "cd /tmp\n" + # Wait for git daemon to bind. Under the v1.1 design, + # ls-remote never returns 0 against an unreachable + # upstream (access-hook fail-closed), so we wait for *any* + # response (the daemon's access-denied line) as the + # readiness signal. + f"for i in $(seq 1 15); do " + f" out=$(git ls-remote git://{gate}/foo.git 2>&1) || true;" + f" case \"$out\" in *'remote error'*|*'access denied'*) break;; esac;" + f" sleep 1;" + f"done\n" + "git init -q -b main repo\n" + "cd repo\n" + "git config user.email test@example.com\n" + "git config user.name test\n" + f"echo '{FAKE_AWS_KEY}' > leak.txt\n" + "git add leak.txt\n" + "git commit -q -m leak\n" + f"git push git://{gate}/foo.git main 2>&1\n" + ) + result = subprocess.run( + ["docker", "run", "--rm", + "--network", self.internal_net, + "--entrypoint", "sh", + CLIENT_IMAGE, + "-c", push_script], + capture_output=True, text=True, timeout=120, check=False, + ) + combined = result.stdout + result.stderr + self.assertNotEqual( + 0, result.returncode, + f"expected push to fail; output={combined!r}", + ) + # Hook's stderr is delivered to the client via the `remote:` + # prefix during a git push. Either token is enough to prove + # the pre-receive hook ran and rejected the push. + self.assertTrue( + "gitleaks rejected" in combined or "leaks found" in combined, + f"expected a gitleaks rejection in the response; got: {combined!r}", + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/unit/test_git_gate.py b/tests/unit/test_git_gate.py new file mode 100644 index 0000000..ff2d402 --- /dev/null +++ b/tests/unit/test_git_gate.py @@ -0,0 +1,188 @@ +"""Unit: GitGate prepare shape + entrypoint/hook render (PRD 0008).""" + +import os +import tempfile +import unittest +from pathlib import Path + +from claude_bottle.git_gate import ( + GitGate, + GitGatePlan, + GitGateUpstream, + git_gate_known_hosts_line, + git_gate_render_access_hook, + git_gate_render_entrypoint, + git_gate_render_hook, + git_gate_upstreams_for_bottle, +) +from tests.fixtures import fixture_minimal, fixture_with_git + + +class _StubGate(GitGate): + def start(self, plan: GitGatePlan) -> str: + raise NotImplementedError + + def stop(self, target: str) -> None: + raise NotImplementedError + + +class TestUpstreamsForBottle(unittest.TestCase): + def test_one_upstream_per_git_entry(self): + bottle = fixture_with_git().bottles["dev"] + ups = git_gate_upstreams_for_bottle(bottle) + self.assertEqual(2, len(ups)) + self.assertEqual("claude-bottle", ups[0].name) + self.assertEqual("gitea.dideric.is", ups[0].upstream_host) + self.assertEqual("30009", ups[0].upstream_port) + self.assertEqual("foo", ups[1].name) + self.assertEqual("github.com", ups[1].upstream_host) + self.assertEqual("22", ups[1].upstream_port) + + def test_empty_bottle_yields_empty_upstreams(self): + bottle = fixture_minimal().bottles["dev"] + self.assertEqual((), git_gate_upstreams_for_bottle(bottle)) + + +class TestKnownHostsLine(unittest.TestCase): + def test_default_port_unbracketed(self): + line = git_gate_known_hosts_line("github.com", "22", "ssh-ed25519 AAAA") + self.assertEqual("github.com ssh-ed25519 AAAA\n", line) + + def test_non_default_port_bracketed(self): + line = git_gate_known_hosts_line("gitea.dideric.is", "30009", "ssh-ed25519 AAAA") + self.assertEqual("[gitea.dideric.is]:30009 ssh-ed25519 AAAA\n", line) + + +class TestEntrypointRender(unittest.TestCase): + def test_one_init_repo_call_per_upstream(self): + ups = ( + GitGateUpstream( + name="claude-bottle", + upstream_url="ssh://git@gitea.dideric.is:30009/didericis/claude-bottle.git", + upstream_host="gitea.dideric.is", + upstream_port="30009", + identity_file="/host/path/key", + known_host_key="ssh-ed25519 AAAA", + ), + GitGateUpstream( + name="foo", + upstream_url="ssh://git@github.com/didericis/foo.git", + upstream_host="github.com", + upstream_port="22", + identity_file="/host/path/key2", + known_host_key="", + ), + ) + script = git_gate_render_entrypoint(ups) + self.assertIn("#!/bin/sh", script) + self.assertIn( + "init_repo 'claude-bottle' " + "'ssh://git@gitea.dideric.is:30009/didericis/claude-bottle.git'", + script, + ) + self.assertIn( + "init_repo 'foo' 'ssh://git@github.com/didericis/foo.git'", + script, + ) + # Daemon line is what keeps PID 1 alive. + self.assertIn("exec git daemon", script) + self.assertIn("--enable=receive-pack", script) + self.assertIn("--base-path=/git", script) + # The access-hook is what makes fetch a mirror operation + # against the upstream (PRD 0008 v1.1). + self.assertIn("--access-hook=/etc/git-gate/access-hook", script) + # Each repo's `origin` remote is wired to the upstream via + # --mirror=fetch so `git fetch origin` mirrors all refs. + self.assertIn("remote add --mirror=fetch origin", script) + + def test_empty_upstreams_still_execs_daemon(self): + # A no-upstream gate is a no-op for repos but the daemon still + # has to start so the entrypoint doesn't exit. + script = git_gate_render_entrypoint(()) + self.assertNotIn("init_repo '", script) + self.assertIn("exec git daemon", script) + + +class TestHookRender(unittest.TestCase): + def test_pre_receive_hook_has_two_phases(self): + hook = git_gate_render_hook() + # Phase 1: gitleaks. Phase 2: forward to origin. + self.assertIn("gitleaks git", hook) + self.assertIn("git push origin", hook) + # KnownHostKey absence is fail-closed. + self.assertIn("refusing to push", hook) + # Stdin is buffered to a tempfile so both phases can re-read. + self.assertIn("refs_file=$(mktemp)", hook) + + +class TestAccessHookRender(unittest.TestCase): + def test_access_hook_refreshes_origin_on_upload_pack(self): + hook = git_gate_render_access_hook() + # Service-name guard: only upload-pack (fetch / clone / pull / + # ls-remote) triggers the upstream refresh; receive-pack + # bypasses this and the pre-receive hook gates it instead. + self.assertIn('service=$1', hook) + self.assertIn('"$service" != "upload-pack"', hook) + # The fetch is what makes the gate a transparent mirror. + self.assertIn("git -C \"$repo_dir\" fetch origin --prune", hook) + + def test_access_hook_fail_closed_on_upstream_error(self): + hook = git_gate_render_access_hook() + # Upstream-fetch failure exits non-zero, which propagates to + # the agent's fetch as a real error rather than stale data. + self.assertIn("refusing to serve stale data", hook) + self.assertIn("exit 1", hook) + + +class TestPrepare(unittest.TestCase): + def setUp(self): + self.stage = Path(tempfile.mkdtemp()) + + def tearDown(self): + import shutil + + shutil.rmtree(self.stage, ignore_errors=True) + + def test_prepare_writes_all_three_scripts(self): + plan = _StubGate().prepare( + fixture_with_git().bottles["dev"], "demo", self.stage + ) + self.assertEqual( + self.stage / "git_gate_entrypoint.sh", plan.entrypoint_script + ) + self.assertEqual( + self.stage / "git_gate_pre_receive.sh", plan.hook_script + ) + self.assertEqual( + self.stage / "git_gate_access_hook.sh", plan.access_hook_script + ) + # Entrypoint + pre-receive are mode 600 (loaded into the + # gate by docker cp and then `install -m 755`'d into each + # bare repo's hooks/ — source bit doesn't matter). The + # access-hook is execed directly by git daemon, so it has to + # carry the x bit through docker cp. + self.assertEqual(0o600, os.stat(plan.entrypoint_script).st_mode & 0o777) + self.assertEqual(0o600, os.stat(plan.hook_script).st_mode & 0o777) + self.assertEqual(0o700, os.stat(plan.access_hook_script).st_mode & 0o777) + + def test_prepare_plan_carries_upstreams_and_slug(self): + plan = _StubGate().prepare( + fixture_with_git().bottles["dev"], "demo", self.stage + ) + self.assertEqual("demo", plan.slug) + self.assertEqual(2, len(plan.upstreams)) + self.assertEqual("", plan.internal_network) + self.assertEqual("", plan.egress_network) + + def test_prepare_with_no_git_writes_minimal_script(self): + plan = _StubGate().prepare( + fixture_minimal().bottles["dev"], "demo", self.stage + ) + self.assertEqual((), plan.upstreams) + content = plan.entrypoint_script.read_text() + self.assertNotIn("init_repo '", content) + self.assertIn("exec git daemon", content) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/unit/test_manifest_git.py b/tests/unit/test_manifest_git.py new file mode 100644 index 0000000..87cc1bc --- /dev/null +++ b/tests/unit/test_manifest_git.py @@ -0,0 +1,192 @@ +"""Unit: Bottle.git manifest parsing + validation (PRD 0008).""" + +import unittest + +from claude_bottle.log import Die +from claude_bottle.manifest import Manifest + + +def _manifest(git_entries): + return { + "bottles": {"dev": {"git": git_entries}}, + "agents": {"demo": {"skills": [], "prompt": "", "bottle": "dev"}}, + } + + +class TestGitEntryParsing(unittest.TestCase): + def test_parses_minimal_entry(self): + m = Manifest.from_json_obj(_manifest([{ + "Name": "claude-bottle", + "Upstream": "ssh://git@gitea.dideric.is:30009/didericis/claude-bottle.git", + "IdentityFile": "/dev/null", + }])) + entries = m.bottles["dev"].git + self.assertEqual(1, len(entries)) + e = entries[0] + self.assertEqual("claude-bottle", e.Name) + self.assertEqual("git", e.UpstreamUser) + self.assertEqual("gitea.dideric.is", e.UpstreamHost) + self.assertEqual("30009", e.UpstreamPort) + self.assertEqual("didericis/claude-bottle.git", e.UpstreamPath) + + def test_default_port_is_22(self): + m = Manifest.from_json_obj(_manifest([{ + "Name": "foo", + "Upstream": "ssh://git@github.com/didericis/foo.git", + "IdentityFile": "/dev/null", + }])) + e = m.bottles["dev"].git[0] + self.assertEqual("22", e.UpstreamPort) + self.assertEqual("github.com", e.UpstreamHost) + + def test_known_host_key_optional(self): + m = Manifest.from_json_obj(_manifest([{ + "Name": "foo", + "Upstream": "ssh://git@github.com/foo.git", + "IdentityFile": "/dev/null", + }])) + self.assertEqual("", m.bottles["dev"].git[0].KnownHostKey) + + def test_missing_name_dies(self): + with self.assertRaises(Die): + Manifest.from_json_obj(_manifest([{ + "Upstream": "ssh://git@github.com/foo.git", + "IdentityFile": "/dev/null", + }])) + + def test_missing_upstream_dies(self): + with self.assertRaises(Die): + Manifest.from_json_obj(_manifest([{ + "Name": "foo", + "IdentityFile": "/dev/null", + }])) + + def test_missing_identity_file_dies(self): + with self.assertRaises(Die): + Manifest.from_json_obj(_manifest([{ + "Name": "foo", + "Upstream": "ssh://git@github.com/foo.git", + }])) + + def test_non_ssh_upstream_dies(self): + with self.assertRaises(Die): + Manifest.from_json_obj(_manifest([{ + "Name": "foo", + "Upstream": "https://github.com/didericis/foo.git", + "IdentityFile": "/dev/null", + }])) + + def test_scp_style_upstream_dies(self): + # SCP-style "git@host:path" is intentionally not supported in + # v1 — ssh:// only. + with self.assertRaises(Die): + Manifest.from_json_obj(_manifest([{ + "Name": "foo", + "Upstream": "git@github.com:didericis/foo.git", + "IdentityFile": "/dev/null", + }])) + + def test_upstream_without_user_dies(self): + with self.assertRaises(Die): + Manifest.from_json_obj(_manifest([{ + "Name": "foo", + "Upstream": "ssh://github.com/foo.git", + "IdentityFile": "/dev/null", + }])) + + def test_upstream_without_path_dies(self): + with self.assertRaises(Die): + Manifest.from_json_obj(_manifest([{ + "Name": "foo", + "Upstream": "ssh://git@github.com", + "IdentityFile": "/dev/null", + }])) + + def test_non_numeric_port_dies(self): + with self.assertRaises(Die): + Manifest.from_json_obj(_manifest([{ + "Name": "foo", + "Upstream": "ssh://git@github.com:notaport/foo.git", + "IdentityFile": "/dev/null", + }])) + + +class TestGitEntryCrossValidation(unittest.TestCase): + def test_duplicate_name_dies(self): + with self.assertRaises(Die): + Manifest.from_json_obj(_manifest([ + {"Name": "foo", "Upstream": "ssh://git@a.example/x.git", + "IdentityFile": "/dev/null"}, + {"Name": "foo", "Upstream": "ssh://git@b.example/y.git", + "IdentityFile": "/dev/null"}, + ])) + + def test_shadow_route_with_ssh_entry_dies(self): + # An ssh entry pointing at gitea.dideric.is:30009 AND a git + # entry pointing at ssh://git@gitea.dideric.is:30009/... is a + # bypass: agents could route around the gate by using the + # ssh-gate. Manifest construction must reject. + with self.assertRaises(Die): + Manifest.from_json_obj({ + "bottles": { + "dev": { + "ssh": [{ + "Host": "gitea", + "IdentityFile": "/dev/null", + "Hostname": "gitea.dideric.is", + "User": "git", + "Port": 30009, + }], + "git": [{ + "Name": "claude-bottle", + "Upstream": "ssh://git@gitea.dideric.is:30009/didericis/claude-bottle.git", + "IdentityFile": "/dev/null", + }], + }, + }, + "agents": {"demo": {"skills": [], "prompt": "", "bottle": "dev"}}, + }) + + def test_independent_ssh_and_git_targets_allowed(self): + # Same hostname but different ports are independent targets. + m = Manifest.from_json_obj({ + "bottles": { + "dev": { + "ssh": [{ + "Host": "gitea-ssh", + "IdentityFile": "/dev/null", + "Hostname": "gitea.dideric.is", + "User": "git", + "Port": 22, + }], + "git": [{ + "Name": "claude-bottle", + "Upstream": "ssh://git@gitea.dideric.is:30009/didericis/claude-bottle.git", + "IdentityFile": "/dev/null", + }], + }, + }, + "agents": {"demo": {"skills": [], "prompt": "", "bottle": "dev"}}, + }) + self.assertEqual(1, len(m.bottles["dev"].ssh)) + self.assertEqual(1, len(m.bottles["dev"].git)) + + +class TestEmptyGitField(unittest.TestCase): + def test_no_git_field_yields_empty_tuple(self): + m = Manifest.from_json_obj({ + "bottles": {"dev": {}}, + "agents": {"demo": {"skills": [], "prompt": "", "bottle": "dev"}}, + }) + self.assertEqual((), m.bottles["dev"].git) + + def test_git_array_type_required(self): + with self.assertRaises(Die): + Manifest.from_json_obj({ + "bottles": {"dev": {"git": "not-a-list"}}, + "agents": {"demo": {"skills": [], "prompt": "", "bottle": "dev"}}, + }) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/unit/test_provision_git.py b/tests/unit/test_provision_git.py new file mode 100644 index 0000000..8c2f6af --- /dev/null +++ b/tests/unit/test_provision_git.py @@ -0,0 +1,46 @@ +"""Unit: render of ~/.gitconfig pushInsteadOf rules (PRD 0008).""" + +import unittest + +from claude_bottle.backend.docker.provision.git import render_git_gate_gitconfig +from tests.fixtures import fixture_minimal, fixture_with_git + + +class TestGitGateGitconfigRender(unittest.TestCase): + def test_empty_entries_renders_nothing(self): + bottle = fixture_minimal().bottles["dev"] + self.assertEqual("", render_git_gate_gitconfig("demo", bottle.git)) + + def test_one_block_per_entry(self): + bottle = fixture_with_git().bottles["dev"] + out = render_git_gate_gitconfig("demo", bottle.git) + # Both entries map to a [url ...] block keyed on the gate's + # container hostname (claude-bottle-git-gate-). + self.assertIn( + '[url "git://claude-bottle-git-gate-demo/claude-bottle.git"]', + out, + ) + self.assertIn( + "\tinsteadOf = " + "ssh://git@gitea.dideric.is:30009/didericis/claude-bottle.git", + out, + ) + self.assertIn('[url "git://claude-bottle-git-gate-demo/foo.git"]', out) + self.assertIn( + "\tinsteadOf = ssh://git@github.com/didericis/foo.git", + out, + ) + + def test_insteadOf_not_pushInsteadOf(self): + # The gate mirrors fetch and push, so insteadOf (which rewrites + # both directions) is the right knob. pushInsteadOf would only + # gate push and leave fetch on the original URL — exactly the + # v1 design we've moved past. + bottle = fixture_with_git().bottles["dev"] + out = render_git_gate_gitconfig("demo", bottle.git) + self.assertIn("\tinsteadOf", out) + self.assertNotIn("pushInsteadOf", out) + + +if __name__ == "__main__": + unittest.main()