Better merge behavior for git-gate repos on extends #238

Merged
didericis merged 5 commits from git-gate-repo-field-merge-on-extends into main 2026-06-22 14:49:50 -04:00
Showing only changes of commit ff7a52c1d2 - Show all commits
+27 -57
View File
@@ -75,10 +75,11 @@ def _merge_bottles(
from .manifest import ManifestBottle, ManifestGitUser from .manifest import ManifestBottle, ManifestGitUser
from .manifest_egress import validate_egress_routes from .manifest_egress import validate_egress_routes
# Before parsing the child, fill any git-gate repos that share a name # git-gate.repos: union merge at the raw level so child entries can
# with a parent repo: parent fields provide the default, child fields # omit fields present on the same-named parent entry (issue #237).
# win on any field they declare (issue #237). # After this, child.git already contains the fully merged repo set.
child_raw = _pre_merge_git_repos(parent.git, child_raw) if _child_declares_git_gate_repos(child_raw):
child_raw = _merge_git_repos_raw(parent.git, child_raw)
# Parse the child's declared fields into a ManifestBottle (with the # Parse the child's declared fields into a ManifestBottle (with the
# usual defaults for anything missing). Validation runs the same # usual defaults for anything missing). Validation runs the same
@@ -97,11 +98,12 @@ def _merge_bottles(
email=child.git_user.email or parent.git_user.email, email=child.git_user.email or parent.git_user.email,
) )
# git-gate.repos: missing means inherit; an explicit empty object # git-gate.repos: raw union merge already applied above; child.git
# clears; otherwise parent and child merge by UpstreamHost with # has the result. An explicit empty repos dict ({}) still clears
# child entries replacing duplicate hosts. # parent (the raw merge returns early for that case, so child.git
# stays empty).
if _child_declares_git_gate_repos(child_raw): if _child_declares_git_gate_repos(child_raw):
merged_git = _merge_git_remotes(parent.git, child.git) if child.git else () merged_git = child.git
else: else:
merged_git = parent.git merged_git = parent.git
@@ -136,7 +138,7 @@ def _merge_bottles(
def _entry_to_raw(entry: "ManifestGitEntry") -> dict[str, object]: def _entry_to_raw(entry: "ManifestGitEntry") -> dict[str, object]:
"""Convert a ManifestGitEntry back to its raw YAML-equivalent dict.""" """Convert a ManifestGitEntry back to its YAML-equivalent raw dict."""
raw: dict[str, object] = {"url": entry.Upstream} raw: dict[str, object] = {"url": entry.Upstream}
if entry.KnownHostKey: if entry.KnownHostKey:
raw["host_key"] = entry.KnownHostKey raw["host_key"] = entry.KnownHostKey
@@ -151,50 +153,28 @@ def _entry_to_raw(entry: "ManifestGitEntry") -> dict[str, object]:
return raw return raw
def _pre_merge_git_repos( def _merge_git_repos_raw(
parent_git: "tuple[ManifestGitEntry, ...]", parent: "tuple[ManifestGitEntry, ...]",
Outdated
Review

This is way too bespoke: I want a more generic deep merge for git-config repos that looks basically like this:

all_repo_names = set(child_config['git-gate']['repos'].keys()) + set(parent_config['git_gate']['repos'].keys())

merged_repo_configs = {}
for repo_name in all_repo_names:
    merged_repo_configs[repo_name] = {
        **parent_git_gate_config['git-gate']['repos'][repo_name],
        **child_git_gate_config['git-gate']['repos'][repo_name]
    }

Should use a safe accessor/account for possible values not being there, but this seems like way too many lines for a relatively simple merge

This is way too bespoke: I want a more generic deep merge for git-config repos that looks basically like this: ```python all_repo_names = set(child_config['git-gate']['repos'].keys()) + set(parent_config['git_gate']['repos'].keys()) merged_repo_configs = {} for repo_name in all_repo_names: merged_repo_configs[repo_name] = { **parent_git_gate_config['git-gate']['repos'][repo_name], **child_git_gate_config['git-gate']['repos'][repo_name] } ``` Should use a safe accessor/account for possible values not being there, but this seems like way too many lines for a relatively simple merge
Outdated
Review

Agreed, this is too bespoke. The clean version is a per-name shallow merge — git-gate.repos is already { <name>: <entry> } in the raw manifest, so:

merged_repos = {
    name: {**parent_repos.get(name, {}), **child_repos.get(name, {})}
    for name in {*parent_repos, *child_repos}
}

drops both _pre_merge_git_repos and _entry_to_raw, and makes the _merge_git_remotes host-keying change moot — name is the identity here, so the two-repos-on-one-host collision goes away without special-casing.

One wrinkle to flag: _merge_bottles currently receives the parent already parsed (and the parent may itself be a merge result), so there is no parent raw repos dict to merge against — that's the only reason _entry_to_raw existed. To keep the merge a pure dict overlay I'll thread the resolved repos dict through _resolve_one_bottle rather than reconstructing it from parsed entries. Reworking the PR to that shape and keeping the three TestExtendsGitMerge cases.

Agreed, this is too bespoke. The clean version is a per-name shallow merge — `git-gate.repos` is already `{ <name>: <entry> }` in the raw manifest, so: ```python merged_repos = { name: {**parent_repos.get(name, {}), **child_repos.get(name, {})} for name in {*parent_repos, *child_repos} } ``` drops both `_pre_merge_git_repos` and `_entry_to_raw`, and makes the `_merge_git_remotes` host-keying change moot — name is the identity here, so the two-repos-on-one-host collision goes away without special-casing. One wrinkle to flag: `_merge_bottles` currently receives the parent already *parsed* (and the parent may itself be a merge result), so there is no parent *raw* repos dict to merge against — that's the only reason `_entry_to_raw` existed. To keep the merge a pure dict overlay I'll thread the resolved repos dict through `_resolve_one_bottle` rather than reconstructing it from parsed entries. Reworking the PR to that shape and keeping the three `TestExtendsGitMerge` cases.
child_raw: dict[str, object], child_raw: dict[str, object],
) -> dict[str, object]: ) -> dict[str, object]:
"""Fill missing fields in same-named child git-gate repos from parent entries. """Union-merge parent and child git-gate repos at the raw dict level.
Returns a (potentially modified) copy of child_raw. For each repo in For repos present in both, parent fields provide defaults and child
child_raw that shares a name with a parent entry, the parent entry's fields win. Repos present in only one side pass through unchanged.
fields serve as defaults; child-declared fields win. Repos that appear An empty child repos dict is returned as-is to preserve the
only in the parent or only in the child are left unchanged.""" "clear parent repos" semantics."""
from .manifest_util import as_json_object from .manifest_util import as_json_object
git_raw = child_raw.get("git-gate") git_raw = as_json_object(child_raw.get("git-gate", {}), "child git-gate")
if git_raw is None: child_repos = as_json_object(git_raw.get("repos", {}), "child git-gate.repos")
if not child_repos:
return child_raw return child_raw
try: parent_repos = {e.Name: _entry_to_raw(e) for e in parent}
git_obj = as_json_object(git_raw, "child git-gate") merged_repos: dict[str, object] = {
repos_raw = git_obj.get("repos") n: {**parent_repos.get(n, {}), **child_repos.get(n, {})}
if repos_raw is None: for n in set(child_repos) | set(parent_repos)
return child_raw }
repos_obj = as_json_object(repos_raw, "child git-gate.repos") return {**child_raw, "git-gate": {**git_raw, "repos": merged_repos}}
except Exception:
return child_raw
parent_by_name = {entry.Name: entry for entry in parent_git}
if not any(rname in parent_by_name for rname in repos_obj):
return child_raw
merged_repos: dict[str, object] = {}
for repo_name, child_entry_raw in repos_obj.items():
if repo_name in parent_by_name:
parent_raw = _entry_to_raw(parent_by_name[repo_name])
try:
child_entry_obj = as_json_object(
child_entry_raw, f"git-gate.repos[{repo_name!r}]"
)
except Exception:
merged_repos[repo_name] = child_entry_raw
continue
merged_repos[repo_name] = {**parent_raw, **child_entry_obj}
else:
merged_repos[repo_name] = child_entry_raw
return {**child_raw, "git-gate": {**git_obj, "repos": merged_repos}}
def _child_declares_git_gate_repos(child_raw: dict[str, object]) -> bool: def _child_declares_git_gate_repos(child_raw: dict[str, object]) -> bool:
@@ -207,16 +187,6 @@ def _child_declares_git_gate_repos(child_raw: dict[str, object]) -> bool:
return "repos" in git_obj return "repos" in git_obj
def _merge_git_remotes(
parent: tuple[ManifestGitEntry, ...],
child: tuple[ManifestGitEntry, ...],
) -> tuple[ManifestGitEntry, ...]:
by_name = {entry.Name: entry for entry in parent}
for entry in child:
by_name[entry.Name] = entry
return tuple(by_name.values())
def _merge_egress( def _merge_egress(
parent: ManifestEgressConfig, parent: ManifestEgressConfig,
child: ManifestEgressConfig, child: ManifestEgressConfig,