Better merge behavior for git-gate repos on extends #238

Merged
didericis merged 5 commits from git-gate-repo-field-merge-on-extends into main 2026-06-22 14:49:50 -04:00
2 changed files with 151 additions and 11 deletions
Showing only changes of commit 4ed6b84863 - Show all commits
+70 -3
View File
@@ -75,6 +75,11 @@ def _merge_bottles(
from .manifest import ManifestBottle, ManifestGitUser
from .manifest_egress import validate_egress_routes
# Before parsing the child, fill any git-gate repos that share a name
# with a parent repo: parent fields provide the default, child fields
# win on any field they declare (issue #237).
child_raw = _pre_merge_git_repos(parent.git, child_raw)
# Parse the child's declared fields into a ManifestBottle (with the
# usual defaults for anything missing). Validation runs the same
# way it would for a leaf bottle: typos / wrong types die here.
@@ -130,6 +135,68 @@ def _merge_bottles(
)
def _entry_to_raw(entry: "ManifestGitEntry") -> dict[str, object]:
"""Convert a ManifestGitEntry back to its raw YAML-equivalent dict."""
raw: dict[str, object] = {"url": entry.Upstream}
if entry.KnownHostKey:
raw["host_key"] = entry.KnownHostKey
key: dict[str, object] = {"provider": entry.Key.provider}
if entry.Key.provider == "static":
key["path"] = entry.Key.path
else:
key["forge_token_env"] = entry.Key.forge_token_env
if entry.Key.api_url:
key["api_url"] = entry.Key.api_url
raw["key"] = key
return raw
def _pre_merge_git_repos(
parent_git: "tuple[ManifestGitEntry, ...]",
child_raw: dict[str, object],
) -> dict[str, object]:
Outdated
Review

This is way too bespoke: I want a more generic deep merge for git-config repos that looks basically like this:

all_repo_names = set(child_config['git-gate']['repos'].keys()) + set(parent_config['git_gate']['repos'].keys())

merged_repo_configs = {}
for repo_name in all_repo_names:
    merged_repo_configs[repo_name] = {
        **parent_git_gate_config['git-gate']['repos'][repo_name],
        **child_git_gate_config['git-gate']['repos'][repo_name]
    }

Should use a safe accessor/account for possible values not being there, but this seems like way too many lines for a relatively simple merge

This is way too bespoke: I want a more generic deep merge for git-config repos that looks basically like this: ```python all_repo_names = set(child_config['git-gate']['repos'].keys()) + set(parent_config['git_gate']['repos'].keys()) merged_repo_configs = {} for repo_name in all_repo_names: merged_repo_configs[repo_name] = { **parent_git_gate_config['git-gate']['repos'][repo_name], **child_git_gate_config['git-gate']['repos'][repo_name] } ``` Should use a safe accessor/account for possible values not being there, but this seems like way too many lines for a relatively simple merge
Outdated
Review

Agreed, this is too bespoke. The clean version is a per-name shallow merge — git-gate.repos is already { <name>: <entry> } in the raw manifest, so:

merged_repos = {
    name: {**parent_repos.get(name, {}), **child_repos.get(name, {})}
    for name in {*parent_repos, *child_repos}
}

drops both _pre_merge_git_repos and _entry_to_raw, and makes the _merge_git_remotes host-keying change moot — name is the identity here, so the two-repos-on-one-host collision goes away without special-casing.

One wrinkle to flag: _merge_bottles currently receives the parent already parsed (and the parent may itself be a merge result), so there is no parent raw repos dict to merge against — that's the only reason _entry_to_raw existed. To keep the merge a pure dict overlay I'll thread the resolved repos dict through _resolve_one_bottle rather than reconstructing it from parsed entries. Reworking the PR to that shape and keeping the three TestExtendsGitMerge cases.

Agreed, this is too bespoke. The clean version is a per-name shallow merge — `git-gate.repos` is already `{ <name>: <entry> }` in the raw manifest, so: ```python merged_repos = { name: {**parent_repos.get(name, {}), **child_repos.get(name, {})} for name in {*parent_repos, *child_repos} } ``` drops both `_pre_merge_git_repos` and `_entry_to_raw`, and makes the `_merge_git_remotes` host-keying change moot — name is the identity here, so the two-repos-on-one-host collision goes away without special-casing. One wrinkle to flag: `_merge_bottles` currently receives the parent already *parsed* (and the parent may itself be a merge result), so there is no parent *raw* repos dict to merge against — that's the only reason `_entry_to_raw` existed. To keep the merge a pure dict overlay I'll thread the resolved repos dict through `_resolve_one_bottle` rather than reconstructing it from parsed entries. Reworking the PR to that shape and keeping the three `TestExtendsGitMerge` cases.
"""Fill missing fields in same-named child git-gate repos from parent entries.
Returns a (potentially modified) copy of child_raw. For each repo in
child_raw that shares a name with a parent entry, the parent entry's
fields serve as defaults; child-declared fields win. Repos that appear
only in the parent or only in the child are left unchanged."""
from .manifest_util import as_json_object
git_raw = child_raw.get("git-gate")
if git_raw is None:
return child_raw
try:
git_obj = as_json_object(git_raw, "child git-gate")
repos_raw = git_obj.get("repos")
if repos_raw is None:
return child_raw
repos_obj = as_json_object(repos_raw, "child git-gate.repos")
except Exception:
return child_raw
parent_by_name = {entry.Name: entry for entry in parent_git}
if not any(rname in parent_by_name for rname in repos_obj):
return child_raw
merged_repos: dict[str, object] = {}
for repo_name, child_entry_raw in repos_obj.items():
if repo_name in parent_by_name:
parent_raw = _entry_to_raw(parent_by_name[repo_name])
try:
child_entry_obj = as_json_object(
child_entry_raw, f"git-gate.repos[{repo_name!r}]"
)
except Exception:
merged_repos[repo_name] = child_entry_raw
continue
merged_repos[repo_name] = {**parent_raw, **child_entry_obj}
else:
merged_repos[repo_name] = child_entry_raw
return {**child_raw, "git-gate": {**git_obj, "repos": merged_repos}}
def _child_declares_git_gate_repos(child_raw: dict[str, object]) -> bool:
from .manifest_util import as_json_object
@@ -144,10 +211,10 @@ def _merge_git_remotes(
parent: tuple[ManifestGitEntry, ...],
child: tuple[ManifestGitEntry, ...],
) -> tuple[ManifestGitEntry, ...]:
by_host = {entry.UpstreamHost: entry for entry in parent}
by_name = {entry.Name: entry for entry in parent}
for entry in child:
by_host[entry.UpstreamHost] = entry
return tuple(by_host.values())
by_name[entry.Name] = entry
return tuple(by_name.values())
def _merge_egress(
+81 -8
View File
@@ -113,8 +113,8 @@ class TestExtendsEnvMerge(unittest.TestCase):
class TestExtendsGitMerge(unittest.TestCase):
"""git-gate.user overlays by field; git-gate.repos merges by upstream
host, with child entries replacing duplicate hosts."""
"""git-gate.user overlays by field; git-gate.repos merges by name,
with same-name child entries merging field-by-field (child wins)."""
_GIT_ENTRY_A = {"url": "ssh://git@host-a/a.git", "key": {"provider": "static", "path": "/dev/null"}}
_GIT_ENTRY_B = {"url": "ssh://git@host-b/b.git", "key": {"provider": "static", "path": "/dev/null"}}
@@ -130,19 +130,21 @@ class TestExtendsGitMerge(unittest.TestCase):
names = [e.Name for e in m.bottles["child"].git]
self.assertEqual(["a", "b"], names)
def test_child_git_repo_replaces_same_host(self):
replacement = {"url": "ssh://git@host-a/replacement.git", "key": {"provider": "static", "path": "/dev/null"}}
def test_child_git_repo_different_name_same_host_coexists(self):
# Repos are keyed by Name, not UpstreamHost: two repos with
# different names on the same host both survive the merge.
same_host_b = {"url": "ssh://git@host-a/b.git", "key": {"provider": "static", "path": "/dev/null"}}
m = _build(
base={"git-gate": {"repos": {"a": self._GIT_ENTRY_A}}},
child={
"extends": "base",
"git-gate": {"repos": {"a2": replacement}},
"git-gate": {"repos": {"a2": same_host_b}},
},
)
entries = m.bottles["child"].git
self.assertEqual(1, len(entries))
self.assertEqual("a2", entries[0].Name)
self.assertEqual("replacement.git", entries[0].UpstreamPath)
self.assertEqual(2, len(entries))
names = {e.Name for e in entries}
self.assertEqual({"a", "a2"}, names)
def test_child_omits_git_gate_inherits_full_list(self):
m = _build(
@@ -164,6 +166,77 @@ class TestExtendsGitMerge(unittest.TestCase):
)
self.assertEqual((), m.bottles["child"].git)
def test_child_same_name_repo_merges_key_field(self):
# Issue #237: child repo with same name as parent should merge
# field-by-field. Child overrides only `key`; parent's url and
# host_key are preserved.
parent_entry = {
"url": "ssh://git@host-a/repo.git",
"host_key": "ecdsa-sha2-nistp256 AAAA",
"key": {"provider": "static", "path": "/keys/id_rsa"},
}
m = _build(
base={"git-gate": {"repos": {"repo": parent_entry}}},
child={
"extends": "base",
"git-gate": {"repos": {"repo": {
"key": {"provider": "gitea", "forge_token_env": "GITEA_TOKEN"},
}}},
},
)
entries = m.bottles["child"].git
self.assertEqual(1, len(entries))
e = entries[0]
self.assertEqual("repo", e.Name)
self.assertEqual("ssh://git@host-a/repo.git", e.Upstream)
self.assertEqual("ecdsa-sha2-nistp256 AAAA", e.KnownHostKey)
self.assertEqual("gitea", e.Key.provider)
self.assertEqual("GITEA_TOKEN", e.Key.forge_token_env)
def test_child_same_name_repo_overrides_url(self):
# Child can override url on a same-name repo; other parent fields
# fall through.
parent_entry = {
"url": "ssh://git@host-a/old.git",
"key": {"provider": "static", "path": "/keys/id_rsa"},
}
m = _build(
base={"git-gate": {"repos": {"repo": parent_entry}}},
child={
"extends": "base",
"git-gate": {"repos": {"repo": {
"url": "ssh://git@host-b/new.git",
"key": {"provider": "static", "path": "/keys/id_rsa"},
}}},
},
)
entries = m.bottles["child"].git
self.assertEqual(1, len(entries))
self.assertEqual("ssh://git@host-b/new.git", entries[0].Upstream)
def test_child_same_name_plus_new_repo(self):
# Same-name repo is field-merged; a distinct new name in child
# is appended.
parent_entry = {
"url": "ssh://git@host-a/repo.git",
"key": {"provider": "static", "path": "/keys/id_rsa"},
}
m = _build(
base={"git-gate": {"repos": {"repo": parent_entry}}},
child={
"extends": "base",
"git-gate": {"repos": {
"repo": {"key": {"provider": "gitea", "forge_token_env": "TOK"}},
"other": self._GIT_ENTRY_B,
}},
},
)
child = m.bottles["child"]
names = {e.Name for e in child.git}
self.assertEqual({"repo", "other"}, names)
repo_entry = next(e for e in child.git if e.Name == "repo")
self.assertEqual("gitea", repo_entry.Key.provider)
def test_child_git_user_inherits_parent_repos(self):
m = _build(
base={"git-gate": {"repos": {"a": self._GIT_ENTRY_A}}},