PRD 0046: Remove Git Remote Host Overrides #153

Merged
didericis merged 3 commits from prd-0047-remove-extra-hosts into main 2026-06-02 14:24:05 -04:00
15 changed files with 78 additions and 242 deletions
+2 -8
View File
@@ -157,14 +157,8 @@ and MCP endpoints resolve without an agent-side change.
upstream has *now* (fail-closed if unreachable). The agent's
`~/.gitconfig` rewrites the real URL to the gate via `insteadOf`,
so push, fetch, clone, and pull all route through. The agent
never sees the upstream credential. If the upstream's hostname
isn't resolvable from the gate container (e.g. a Tailscale-only
host whose public DNS points elsewhere), pin its IP via
`ExtraHosts: { "<hostname>": "<ip>" }` on the `bottle.git` entry —
the gate's `/etc/hosts` gets the override while the agent's
`insteadOf` rewrite still keys off the original hostname. Brought
up only when `bottle.git` has entries. Design in
`docs/prds/0008-git-gate.md`.
never sees the upstream credential. Brought up only when
`bottle.git` has entries. Design in `docs/prds/0008-git-gate.md`.
- **cred-proxy image** — per-bottle sidecar (`python:3.13-alpine`
base, stdlib-only) that holds API tokens declared in
`bottle.cred_proxy.routes`. Each route names a `path`,
+1 -6
View File
@@ -49,7 +49,7 @@ from ...egress import (
EGRESS_HOSTNAME,
EGRESS_ROUTES_IN_CONTAINER,
)
from ...git_gate import GIT_GATE_HOSTNAME, git_gate_aggregate_extra_hosts
from ...git_gate import GIT_GATE_HOSTNAME
from ...log import die, warn
from ...pipelock import PIPELOCK_HOSTNAME
from ...supervise import (
@@ -198,7 +198,6 @@ def _sidecar_bundle_service(plan: DockerBottlePlan) -> dict[str, Any]:
env.append(token_env)
# --- git-gate ----------------------------------------------------
extra_hosts: list[str] = []
gp = plan.git_gate_plan
if gp.upstreams:
volumes += [
@@ -217,8 +216,6 @@ def _sidecar_bundle_service(plan: DockerBottlePlan) -> dict[str, Any]:
u.known_hosts_file,
f"{GIT_GATE_CREDS_DIR_IN_CONTAINER}/{u.name}-known_hosts",
))
extra_map = git_gate_aggregate_extra_hosts(gp.upstreams)
extra_hosts = [f"{host}:{ip}" for host, ip in sorted(extra_map.items())]
# --- supervise ---------------------------------------------------
sp = plan.supervise_plan
@@ -261,8 +258,6 @@ def _sidecar_bundle_service(plan: DockerBottlePlan) -> dict[str, Any]:
"environment": env,
"volumes": volumes,
}
if extra_hosts:
service["extra_hosts"] = extra_hosts
return service
@@ -349,7 +349,6 @@ def _bundle_launch_spec(
env.append(token_env)
# --- git-gate ---------------------------------------------
extra_hosts: list[str] = []
gp = plan.git_gate_plan
if gp.upstreams:
daemons += ["git-gate", "git-http"]
+2 -40
View File
@@ -30,11 +30,9 @@ backend-specific and lives on concrete subclasses (see
from __future__ import annotations
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from dataclasses import dataclass
from pathlib import Path
from typing import Mapping
from .log import die
from .manifest import Bottle, GitEntry
@@ -47,10 +45,6 @@ GIT_GATE_HOSTNAME = "git-gate"
GIT_GATE_DAEMON_TIMEOUT_SECS = 15
def _empty_str_map() -> dict[str, str]:
return {}
@dataclass(frozen=True)
class GitGateUpstream:
"""One bare repo on the gate. `name` drives the bare-repo path
@@ -64,10 +58,7 @@ class GitGateUpstream:
KnownHostKey string from the manifest; the gate's start step
materialises it into a known_hosts file if non-empty.
`extra_hosts` is a `{hostname: ip}` map the backend injects into
the gate container's `/etc/hosts` via `--add-host` so the gate
can resolve upstream hostnames that aren't reachable via the
container's default DNS (e.g. Tailscale-only hosts)."""
the gate credential paths inside the running sidecar."""
name: str
upstream_url: str
@@ -76,7 +67,6 @@ class GitGateUpstream:
identity_file: str
known_host_key: str
known_hosts_file: Path = Path()
extra_hosts: Mapping[str, str] = field(default_factory=_empty_str_map)
@dataclass(frozen=True)
@@ -113,38 +103,11 @@ def git_gate_upstreams_for_bottle(bottle: Bottle) -> tuple[GitGateUpstream, ...]
upstream_port=e.UpstreamPort,
identity_file=e.IdentityFile,
known_host_key=e.KnownHostKey,
extra_hosts=dict(e.ExtraHosts),
)
for e in bottle.git
)
def git_gate_aggregate_extra_hosts(
upstreams: tuple[GitGateUpstream, ...],
) -> dict[str, str]:
"""Merge every upstream's `extra_hosts` into a single
`{hostname: ip}` map for `--add-host` on the gate container. Two
entries naming the same hostname with different IPs is a manifest
bug — the gate has one /etc/hosts — so die loudly with the
conflicting names rather than silently picking one."""
merged: dict[str, str] = {}
source: dict[str, str] = {}
for u in upstreams:
for host, ip in u.extra_hosts.items():
existing = merged.get(host)
if existing is None:
merged[host] = ip
source[host] = u.name
elif existing != ip:
die(
f"git-gate ExtraHosts conflict: '{host}' maps to "
f"'{existing}' in upstream '{source[host]}' and to "
f"'{ip}' in upstream '{u.name}'. The gate has one "
f"/etc/hosts; pick one IP."
)
return merged
def git_gate_render_gitconfig(
entries: tuple[GitEntry, ...], gate_host: str, *, scheme: str = "git",
) -> str:
@@ -443,7 +406,6 @@ class GitGate(ABC):
identity_file=u.identity_file,
known_host_key=u.known_host_key,
known_hosts_file=known_hosts_file,
extra_hosts=dict(u.extra_hosts),
)
)
return GitGatePlan(
-33
View File
@@ -71,13 +71,6 @@ class GitEntry:
upstream after gitleaks passes. The agent itself never holds the
upstream credential.
`ExtraHosts` is an optional `{hostname: ip}` map injected into the
gate container's `/etc/hosts` via `--add-host`. Use it when the
Upstream's hostname isn't resolvable from the gate (e.g. a
Tailscale-only host whose public DNS A record points elsewhere):
the agent's `insteadOf` rewrite still matches the original
hostname, but the gate routes to the right IP.
The Upstream URL is parsed once at construction and the pieces are
stashed in the `Upstream*` fields so the git-gate render step
doesn't have to re-parse."""
@@ -86,7 +79,6 @@ class GitEntry:
Upstream: str
IdentityFile: str
KnownHostKey: str = ""
ExtraHosts: Mapping[str, str] = field(default_factory=_empty_str_dict)
RemoteKey: str = ""
UpstreamUser: str = ""
UpstreamHost: str = ""
@@ -139,10 +131,6 @@ class GitEntry:
d.get("KnownHostKey"),
f"bottle '{bottle_name}' {label} '{name}' KnownHostKey",
)
extra_hosts = _opt_extra_hosts(
d.get("ExtraHosts"),
f"bottle '{bottle_name}' {label} '{name}' ExtraHosts",
)
user, host, port, path = _parse_git_upstream(
upstream, f"bottle '{bottle_name}' {label} '{name}' Upstream"
)
@@ -160,7 +148,6 @@ class GitEntry:
Upstream=upstream,
IdentityFile=ident,
KnownHostKey=khk,
ExtraHosts=extra_hosts,
RemoteKey=host_key or host,
UpstreamUser=user,
UpstreamHost=host,
@@ -978,26 +965,6 @@ def _opt_str(value: object, label: str) -> str:
return value
def _opt_extra_hosts(value: object, label: str) -> dict[str, str]:
"""Validate a `{hostname: ip}` object and return a plain dict. None
yields an empty dict so callers can treat ExtraHosts as always
present. IP format is not checked here; docker validates at
`--add-host` time."""
if value is None:
return {}
obj = _as_json_object(value, label)
out: dict[str, str] = {}
for host, ip in obj.items():
if not host:
raise ManifestError(f"{label} contains an empty hostname key")
if not isinstance(ip, str):
raise ManifestError(f"{label}['{host}'] must be a string (was {type(ip).__name__})")
if not ip:
raise ManifestError(f"{label}['{host}'] must be a non-empty string")
out[host] = ip
return out
def _parse_git_upstream(url: str, label: str) -> tuple[str, str, str, str]:
"""Parse `ssh://user@host[:port]/path` into (user, host, port, path).
Dies if `url` doesn't match the ssh:// shape v1 supports. Default
+1 -6
View File
@@ -83,12 +83,7 @@ for a declared upstream:
- **Manifest field.** `bottle.git` — a list of git remotes the
bottle is allowed to talk to, each with the credential the gate
uses to push upstream. The agent gets no parallel `bottle.ssh`
entry for those upstreams. Each entry may also carry an
`ExtraHosts: { hostname: ip }` map, surfaced to the gate as
`--add-host` so the gate can resolve upstreams whose public DNS
doesn't point at the reachable IP (e.g. Tailscale-only hosts).
The agent-side `insteadOf` rewrite keys off the original hostname,
so the manifest's `Upstream` URL stays human-readable.
entry for those upstreams.
- **Agent-side URL rewrite.** Provisioner emits `~/.gitconfig`
with `[url "<gate-url>"] insteadOf = <real-url>` so every git
operation against the declared upstream (push, fetch, clone,
+1 -2
View File
@@ -88,8 +88,7 @@ the unused path.
- **Pipelock interaction.** Drop the SSH-derived branch from
pipelock's `ssrf.ip_allowlist` build. With no `bottle.ssh`
there is no per-upstream IP carve-out to render; git-gate
has its own egress network and pulls in upstream resolution
via `ExtraHosts` plus DNS.
has its own egress network.
- **Tests.** Delete the ssh-gate unit + integration suites,
the ssh fixtures in `tests/fixtures.py`, and the
shadow-route assertions in `test_manifest_git.py`. Adjust
-2
View File
@@ -274,8 +274,6 @@ git:
Name: bot-bottle
Upstream: ssh://git@gitea.dideric.is:30009/didericis/bot-bottle.git
IdentityFile: ~/.ssh/gitea-delos-2.pem
ExtraHosts:
gitea.dideric.is: 100.78.141.42
KnownHostKey: ssh-rsa AAAAB3...
egress:
allowlist:
+1 -2
View File
@@ -161,8 +161,7 @@ expectation. (Same model as shell `export` precedence.)
`git.remotes` is also keyed, so it follows dict-style inheritance:
children can override one host without restating every remote. The
remote entry is replaced as a whole on host collision because
`Upstream`, `IdentityFile`, `KnownHostKey`, and `ExtraHosts` are
tightly coupled.
`Upstream`, `IdentityFile`, and `KnownHostKey` are tightly coupled.
The `git.user` dataclass-overlay (each non-empty field wins
individually) is so a parent can declare `git.user.name` and a
@@ -0,0 +1,64 @@
# PRD 0047: Remove Git Remote Host Overrides
- **Status:** Active
- **Author:** didericis-codex
- **Created:** 2026-06-02
- **Issue:** #152
## Summary
Remove git remote host override plumbing from bottle manifests and git-gate
startup. Git remote declarations should describe upstream repositories and the
git-gate credential material needed to mirror them; they should not also
configure hosts-file behavior for sidecars.
## Problem
The git remote model currently has a hosts override path that can make a git
upstream resolve differently inside the git-gate sidecar. That is surprising
because the same hostname may also be used for HTTP/API traffic that should keep
using the normal egress DNS and policy path.
Keeping host resolution in the git remote model makes repository routing,
sidecar hosts files, and egress behavior feel coupled even when the operator
only meant to configure git-gate.
## Goals / Success Criteria
- Git remote manifest parsing no longer stores host override data.
- Git-gate upstream plans no longer carry host override data.
- Docker compose rendering no longer emits sidecar `extra_hosts` entries from
git remote declarations.
- Smolmachines bundle launch planning has no unused host override path for
git-gate.
- Focused unit tests cover the absence of sidecar `extra_hosts` for git
upstreams.
- Current user-facing documentation no longer advertises git remote host
overrides.
## Non-goals
- No replacement hosts-file override feature.
- No SSH client config provisioning.
- No change to git-gate's SSH credential or known-host handling.
- No change to egress DNS, HTTP auth, or pipelock routing semantics.
## Design
Remove the host override field from the internal `GitEntry` and
`GitGateUpstream` models. Remove the git-gate aggregation helper and the Docker
compose code that converted those values into sidecar `extra_hosts`.
The manifest parser does not need a migration-specific error path. After this
change, the old hosts override key has no internal model field and no runtime
effect.
## Testing Strategy
Run:
- `python3 -m unittest discover -s tests/unit`
## Open Questions
None.
+1 -6
View File
@@ -151,7 +151,6 @@ def _plan(
identity_file="/etc/hostname",
known_host_key="",
known_hosts_file=STATE / "git-gate" / "upstream-known_hosts",
extra_hosts={"example.com": "10.0.0.1"},
),)
routes: tuple[EgressRoute, ...] = ()
if with_egress:
@@ -440,12 +439,8 @@ class TestSidecarBundleShape(unittest.TestCase):
self.assertTrue(any("supervise/queue" in t or t.startswith("/run/supervise")
for t in targets))
def test_extra_hosts_emitted_for_git_upstreams(self):
def test_extra_hosts_omitted_for_git_upstreams(self):
sc = self._render(with_git=True)["services"]["sidecars"]
self.assertIn("example.com:10.0.0.1", sc.get("extra_hosts", []))
def test_extra_hosts_omitted_when_no_git(self):
sc = self._render()["services"]["sidecars"]
self.assertNotIn("extra_hosts", sc)
def test_agent_depends_on_bundle_only(self):
-82
View File
@@ -9,14 +9,12 @@ from bot_bottle.git_gate import (
GitGate,
GitGatePlan,
GitGateUpstream,
git_gate_aggregate_extra_hosts,
git_gate_known_hosts_line,
git_gate_render_access_hook,
git_gate_render_entrypoint,
git_gate_render_hook,
git_gate_upstreams_for_bottle,
)
from bot_bottle.log import Die
from bot_bottle.manifest import Manifest
from tests.fixtures import fixture_minimal, fixture_with_git
@@ -46,86 +44,6 @@ class TestUpstreamsForBottle(unittest.TestCase):
self.assertEqual((), git_gate_upstreams_for_bottle(bottle))
class TestExtraHostsPlumbing(unittest.TestCase):
def test_upstream_carries_extra_hosts_from_manifest(self):
m = Manifest.from_json_obj({
"bottles": {
"dev": {
"git": {"remotes": {
"gitea.dideric.is": {
"Name": "bot-bottle",
"Upstream": "ssh://git@gitea.dideric.is:30009/didericis/bot-bottle.git",
"IdentityFile": "/dev/null",
"ExtraHosts": {"gitea.dideric.is": "100.78.141.42"},
},
}},
},
},
"agents": {"demo": {"skills": [], "prompt": "", "bottle": "dev"}},
})
ups = git_gate_upstreams_for_bottle(m.bottles["dev"])
self.assertEqual(
{"gitea.dideric.is": "100.78.141.42"}, dict(ups[0].extra_hosts)
)
def test_aggregator_merges_distinct_hostnames(self):
ups = (
GitGateUpstream(
name="a", upstream_url="", upstream_host="", upstream_port="",
identity_file="", known_host_key="",
extra_hosts={"a.example": "10.0.0.1"},
),
GitGateUpstream(
name="b", upstream_url="", upstream_host="", upstream_port="",
identity_file="", known_host_key="",
extra_hosts={"b.example": "10.0.0.2"},
),
)
self.assertEqual(
{"a.example": "10.0.0.1", "b.example": "10.0.0.2"},
git_gate_aggregate_extra_hosts(ups),
)
def test_aggregator_allows_same_host_same_ip(self):
# Two entries listing the same host:ip is harmless duplication,
# not a conflict. The gate's /etc/hosts ends up with one line.
ups = (
GitGateUpstream(
name="a", upstream_url="", upstream_host="", upstream_port="",
identity_file="", known_host_key="",
extra_hosts={"gitea.dideric.is": "100.78.141.42"},
),
GitGateUpstream(
name="b", upstream_url="", upstream_host="", upstream_port="",
identity_file="", known_host_key="",
extra_hosts={"gitea.dideric.is": "100.78.141.42"},
),
)
self.assertEqual(
{"gitea.dideric.is": "100.78.141.42"},
git_gate_aggregate_extra_hosts(ups),
)
def test_aggregator_rejects_conflicting_ips(self):
ups = (
GitGateUpstream(
name="a", upstream_url="", upstream_host="", upstream_port="",
identity_file="", known_host_key="",
extra_hosts={"gitea.dideric.is": "100.78.141.42"},
),
GitGateUpstream(
name="b", upstream_url="", upstream_host="", upstream_port="",
identity_file="", known_host_key="",
extra_hosts={"gitea.dideric.is": "10.0.0.99"},
),
)
with self.assertRaises(Die):
git_gate_aggregate_extra_hosts(ups)
def test_aggregator_empty_is_empty(self):
self.assertEqual({}, git_gate_aggregate_extra_hosts(()))
class TestKnownHostsLine(unittest.TestCase):
def test_default_port_unbracketed(self):
line = git_gate_known_hosts_line("github.com", "22", "ssh-ed25519 AAAA")
-47
View File
@@ -125,53 +125,6 @@ class TestGitEntryParsing(unittest.TestCase):
}]))
class TestGitEntryExtraHosts(unittest.TestCase):
def test_extra_hosts_defaults_to_empty(self):
m = Manifest.from_json_obj(_manifest([{
"Name": "foo",
"Upstream": "ssh://git@github.com/foo.git",
"IdentityFile": "/dev/null",
}]))
self.assertEqual({}, dict(m.bottles["dev"].git[0].ExtraHosts))
def test_extra_hosts_parses_host_to_ip_map(self):
m = Manifest.from_json_obj(_manifest([{
"Name": "bot-bottle",
"Upstream": "ssh://git@gitea.dideric.is:30009/didericis/bot-bottle.git",
"IdentityFile": "/dev/null",
"ExtraHosts": {"gitea.dideric.is": "100.78.141.42"},
}]))
eh = dict(m.bottles["dev"].git[0].ExtraHosts)
self.assertEqual({"gitea.dideric.is": "100.78.141.42"}, eh)
def test_extra_hosts_must_be_object(self):
with self.assertRaises(ManifestError):
Manifest.from_json_obj(_manifest([{
"Name": "foo",
"Upstream": "ssh://git@github.com/foo.git",
"IdentityFile": "/dev/null",
"ExtraHosts": ["gitea.dideric.is", "100.78.141.42"],
}]))
def test_extra_hosts_ip_must_be_string(self):
with self.assertRaises(ManifestError):
Manifest.from_json_obj(_manifest([{
"Name": "foo",
"Upstream": "ssh://git@github.com/foo.git",
"IdentityFile": "/dev/null",
"ExtraHosts": {"gitea.dideric.is": 100},
}]))
def test_extra_hosts_empty_ip_dies(self):
with self.assertRaises(ManifestError):
Manifest.from_json_obj(_manifest([{
"Name": "foo",
"Upstream": "ssh://git@github.com/foo.git",
"IdentityFile": "/dev/null",
"ExtraHosts": {"gitea.dideric.is": ""},
}]))
class TestGitEntryCrossValidation(unittest.TestCase):
def test_duplicate_name_dies(self):
with self.assertRaises(ManifestError):
-1
View File
@@ -56,7 +56,6 @@ def _git_gate_plan(tmp: str) -> GitGatePlan:
upstream_port="30009",
identity_file="/dev/null",
known_host_key="ssh-ed25519 AAAA...",
extra_hosts={},
),
),
)
+5 -6
View File
@@ -160,13 +160,13 @@ class TestBlockList(unittest.TestCase):
out = _y("""
entries:
- name: foo
ExtraHosts:
metadata:
host.example: 10.0.0.1
- name: bar
""")
self.assertEqual(
{"entries": [
{"name": "foo", "ExtraHosts": {"host.example": "10.0.0.1"}},
{"name": "foo", "metadata": {"host.example": "10.0.0.1"}},
{"name": "bar"},
]},
out,
@@ -270,8 +270,7 @@ class TestRealisticBottleFile(unittest.TestCase):
Name: bot-bottle
Upstream: ssh://git@gitea.dideric.is:30009/x/y.git
IdentityFile: ~/.ssh/gitea.pem
ExtraHosts:
gitea.dideric.is: 100.78.141.42
KnownHostKey: ssh-ed25519 AAAA...
""")
# Spot-check the deep parts; the structure is large.
self.assertEqual(2, len(out["egress"]["routes"]))
@@ -284,8 +283,8 @@ class TestRealisticBottleFile(unittest.TestCase):
out["egress"]["routes"][0]["auth"]["scheme"],
)
self.assertEqual(
"100.78.141.42",
out["git"]["remotes"]["gitea.dideric.is"]["ExtraHosts"]["gitea.dideric.is"],
"ssh-ed25519 AAAA...",
out["git"]["remotes"]["gitea.dideric.is"]["KnownHostKey"],
)