From 4f7a506a9e1653bb05643eda1258370c9cad4640 Mon Sep 17 00:00:00 2001 From: claude Date: Wed, 27 May 2026 23:27:04 -0400 Subject: [PATCH 1/5] =?UTF-8?q?docs(prd):=200025=20=E2=80=94=20bottle=20co?= =?UTF-8?q?mposition=20via=20`extends:`=20(issue=20#88)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/prds/0025-bottle-extends.md | 221 +++++++++++++++++++++++++++++++ 1 file changed, 221 insertions(+) create mode 100644 docs/prds/0025-bottle-extends.md diff --git a/docs/prds/0025-bottle-extends.md b/docs/prds/0025-bottle-extends.md new file mode 100644 index 0000000..ef80628 --- /dev/null +++ b/docs/prds/0025-bottle-extends.md @@ -0,0 +1,221 @@ +# PRD 0025: Bottle composition via `extends:` + +- **Status:** Draft +- **Author:** didericis +- **Created:** 2026-05-27 +- **Issue:** #88 + +## Summary + +Let a bottle inherit from another bottle by name. A bottle frontmatter +gains an optional `extends: ` field; at manifest-load +time the parent's resolved config is the base and the child's +declared fields overlay it. Bottles remain home-only — the trust +boundary the README documents stays intact. + +Solves the "I don't want to duplicate a 50-line bottle just to add +one egress route or env var" pain that motivated issue #88's +`bottle_config` proposal, without weakening the agent-vs-bottle +trust separation that proposal would have eroded. + +## Problem + +Today the only way to vary a bottle's config is to write a whole +new `bottles/.md`. If `staging` is the same as `dev` but with +one extra egress route, the operator copy-pastes the entire `dev` +file. Drift between copies follows: an egress addition to `dev` is +silently absent from `staging` until someone notices. + +Issue #88 proposed inlining a `bottle_config:` block in agent files +that would merge with (and override) the referenced bottle. That +design lets a `$CWD/.claude-bottle/agents/.md` file from a +cloned repo redeclare egress routes, env mappings, and git remotes +— breaking the existing security model where bottles are +`$HOME`-only specifically so cloned repos can't influence them +(see README, "Manifest" section). + +`extends:` solves the same composition pain *within the existing +trust boundary*: only `$HOME` bottles can declare it, only `$HOME` +bottles can be its target. Cloned repos still cannot author +bottle-equivalent config. + +## Goals / Success Criteria + +- Add `extends: ` to the bottle frontmatter schema. +- At manifest load, resolve `extends:` chains into a fully-merged + effective config before the rest of the pipeline sees the + `Bottle` object. Downstream code (provisioners, compose + renderer, etc.) is unchanged. +- Defined, simple merge semantics (see "Merge rules" below). +- Cycle detection: `A extends B extends A` dies at parse with a + clear pointer. +- Missing parent dies at parse with a clear pointer + the list of + available bottle names. +- Existing bottles continue to parse identically — `extends:` is + opt-in. +- Backend-agnostic: docker + smolmachines behave the same because + they both consume `Bottle` after the merge. + +## Non-goals + +- **No agent-side `bottle_config:`.** That's the design issue #88 + considered and weighed against; this PRD is the alternative + picked in the issue's design discussion. Don't reintroduce it. +- **No additive list merges** (e.g., `routes: append` keyword). + The `extends:` design uses full-replace for list-valued fields + (see "Merge rules"); if a use case shows up that genuinely + needs `parent_routes + child_routes`, design that separately + rather than baking it in now. +- **No multi-parent inheritance.** A bottle has at most one + parent. Diamond resolution is out of scope and rarely worth + the complexity for a manifest of this size. +- **No agent-level extends.** Agents stay simple (bottle ref + + skills + prompt). Inheritance lives only on the bottle side. + +## Design + +### Schema + +A new optional top-level frontmatter key on bottle files: + +```yaml +--- +extends: dev + +egress: + routes: + - host: staging.example.com + auth: + scheme: Bearer + token_ref: STAGING_TOKEN +--- +``` + +`extends:` is a string — the name of another bottle (without the +`.md`). Required to be one of the bottles loaded from +`$HOME/.claude-bottle/bottles/`. Self-reference (`extends: self` +in `self.md`) and longer cycles die at parse. + +### Merge rules + +Resolution walks `extends:` chains bottom-up: parent's +already-resolved config is the base, child's declared fields +overlay it. For each field on `Bottle`: + +| Field | Type | Merge | +|--------------|-----------------------|---------------------------------------------| +| `env` | `Mapping[str, str]` | dict merge, child wins on key collision | +| `git` | `tuple[GitEntry,…]` | full replace if child declares `git:` | +| `git_user` | `GitUser` | child overlay: child's non-empty fields win | +| `egress` | `EgressConfig` | full replace if child declares `egress:` | +| `supervise` | `bool` | full replace if child declares `supervise:` | + +Why full-replace for the list-valued fields (`git[]`, +`egress.routes[]`): + +- **Ordering matters.** Egress route ordering is part of the + match semantics (first matching host wins). Merging two + ordered lists by name introduces "where does the child's route + go?" ambiguity. +- **Name collisions are ambiguous.** If parent has + `git: [{Name: foo, Upstream: A}]` and child has `git: + [{Name: foo, Upstream: B}]`, "merge" could mean override-B or + error-on-collision. Full-replace makes the operator's + intent explicit. +- **Simpler precedence.** "Child declares X → X wins, full + stop" is one sentence; partial merges need a table per list. + +The `env` dict is the one exception because dict-merge has no +ordering concern and dict-keyed overrides are the obvious user +expectation. (Same model as shell `export` precedence.) + +The `git_user` dataclass-overlay (each non-empty field wins +individually) is so a parent can declare `git_user.name` and a +child can add just `git_user.email`. The default `GitUser()` +fields are empty strings, which are treated as "not set" for +overlay purposes — same `is_empty()` predicate the provisioner +uses. + +### Resolution algorithm + +``` +bottles_raw: dict[name, raw_frontmatter_dict] # before parsing + +def resolve(name, seen=()) -> Bottle: + if name in seen: + die(f"bottle '{name}' extends-cycle: {' -> '.join(seen + (name,))}") + raw = bottles_raw[name] + parent_name = raw.get("extends") + if parent_name is None: + return Bottle.from_dict(name, raw) # leaf + if parent_name not in bottles_raw: + die(f"bottle '{name}' extends '{parent_name}' which is not defined; " + f"available: {sorted(bottles_raw)}") + parent = resolve(parent_name, seen + (name,)) + return _merge(parent, raw, name) +``` + +Resolution is cached per-name within a single `Manifest.from_*` +call so a diamond-like reference graph (multiple children +extending the same parent) doesn't reparse the parent N times. +Cycles are caught by the `seen` set; the error message includes +the full chain so operators can find the offending file. + +### Trust boundary preservation + +Bottles continue to be loaded from `$HOME/.claude-bottle/bottles/` +only (`Manifest.from_md_dirs` is unchanged). The `extends:` field +references another file in that same directory. No cwd-readable +file gains the ability to declare or modify bottle config — the +attack surface from issue #88's comment thread stays closed. + +If a future change ever introduces cwd-loaded bottles, the +`extends:` resolver should be gated to forbid a `$CWD` bottle +from extending a `$HOME` bottle (lest cwd-loaded config inherit +home-resident credentials via the merge step). Today this is +moot because there's only one bottle source. + +## Implementation chunks + +1. **PRD (this commit).** Sets the design. +2. **Resolver + schema + tests.** + - Add `"extends"` to `_BOTTLE_KEYS`. + - Add `extends: str = ""` to a new pre-merge raw shape (or + keep raw dicts and resolve as a separate pass before + `Bottle.from_dict`). + - Implement `_resolve_bottle_with_extends` recursive walk + with cycle detection. + - Wire into `_load_bottles_from_dir` so the public + `Manifest.bottles` dict already contains resolved Bottle + instances (downstream code unchanged). + - Implement `_merge(parent: Bottle, child_raw: dict, name: str) + -> Bottle` with the rules table above. + - Unit tests: simple two-bottle extends, env merge with + collision, list-replace for git + egress, git_user overlay, + supervise override, missing parent dies, cycle dies, deeper + chains (A extends B extends C). +3. **Docs.** Add an `extends:` example to the README's manifest + section. Note that the field is optional + how merge precedes. + +## Testing strategy + +- **Unit (must):** all the merge semantics, the parse-time + errors (cycle, missing parent), and a multi-step inheritance + chain locking the resolver's recursion. +- **No integration changes needed:** downstream code consumes + the already-merged `Bottle`. Existing integration tests cover + the docker / smolmachines provisioning paths and would catch + any regression in how `Bottle.git`, `Bottle.egress`, etc., are + consumed. + +## Open questions + +- **Should the parent appear in the preflight summary?** Right + now the y/N preflight prints the resolved bottle config; with + `extends:` the operator doesn't see *which* fields came from + which level. A short `extends:` annotation in the preflight + output ("inherits from `dev`") would let the operator spot + surprises. Cheap follow-up; out of scope for this PRD. +- **Should `cli.py info ` show the resolution chain?** + Same shape as the preflight question — useful diagnostic + surface, doesn't change the runtime. Out of scope. -- 2.52.0 From a5c8b4e7b20701f9a8d985dadf1e250eff5e7b24 Mon Sep 17 00:00:00 2001 From: claude Date: Wed, 27 May 2026 23:30:40 -0400 Subject: [PATCH 2/5] feat(manifest): bottle composition via \`extends:\` resolver (PRD 0025, #88) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add an optional `extends: ` field to bottle frontmatter. Two-pass load: 1. Collect raw frontmatter for every bottle file. 2. Recursively resolve each name into a merged Bottle via `_resolve_one_bottle` + `_merge_bottles`. Merge rules (per PRD 0025): - env: dict merge, child wins on key collision - git: full replace if child declares `git:` - git_user: per-field overlay (child's non-empty fields win) - egress: full replace if child declares `egress:` - supervise: full replace if child declares `supervise:` List-valued fields full-replace because partial merge is ambiguous (ordering matters, name collisions ambiguous); env is dict-merge because dict-keyed override is the natural shape. git_user overlays per-field so a parent can declare just the name and a child can add just the email. Cycles / self-extends / missing-parent / non-string `extends:` all die at parse with a pointer that includes the chain (cycles) or the available names (missing parent). Resolution is cached per-name so a diamond reference graph doesn't reparse the same parent N times. Both load paths threaded: - `_load_bottles_from_dir` (md files) — collect raws, then resolve. - `Manifest.from_json_obj` (JSON / test fixtures) — same. Tests (24, in `test_manifest_extends.py`): - Leaf without extends parses unchanged - Child inherits parent unchanged when child only declares `extends:` - env: disjoint union, collision (child wins), child-omits - git: replace, omit, explicit-empty-clears-parent - egress: same shape (replace, inherit) - git_user: parent-only, child-overrides-both, partial fields - 3-step chain (grandparent → parent → child) - Errors: missing parent, self-extends, 2-node cycle, 3-node cycle, non-string extends 685 unit tests pass. Co-Authored-By: Claude Opus 4.7 --- claude_bottle/manifest.py | 137 +++++++++++- tests/unit/test_manifest_extends.py | 311 ++++++++++++++++++++++++++++ 2 files changed, 438 insertions(+), 10 deletions(-) create mode 100644 tests/unit/test_manifest_extends.py diff --git a/claude_bottle/manifest.py b/claude_bottle/manifest.py index 97a3a53..fd369ee 100644 --- a/claude_bottle/manifest.py +++ b/claude_bottle/manifest.py @@ -12,6 +12,7 @@ the system prompt, for bottles the body is human documentation (ignored by the parser). Bottle schema (frontmatter): + extends: # optional (PRD 0025) env: { : , ... } git: [ , ... ] git_user: { name: , email: } # optional @@ -641,12 +642,17 @@ class Manifest: def from_json_obj(cls, obj: object) -> "Manifest": """Validate and build a Manifest from a raw JSON-like dict.""" d = _as_json_object(obj, "manifest") - raw_bottles = _section_dict(d.get("bottles"), "manifest 'bottles'") + raw_bottles_obj = _section_dict(d.get("bottles"), "manifest 'bottles'") raw_agents = _section_dict(d.get("agents"), "manifest 'agents'") - bottles: dict[str, Bottle] = { - n: Bottle.from_dict(n, b) for n, b in raw_bottles.items() - } + # Coerce each bottle's raw to dict[str, object] so the + # PRD 0025 resolver can apply extends-merge rules + # consistently with the md-loader path. + raw_bottles: dict[str, dict[str, object]] = {} + for n, b in raw_bottles_obj.items(): + raw_bottles[n] = _as_json_object(b, f"bottle '{n}'") + bottles = _resolve_bottles(raw_bottles) + bottle_names = set(bottles.keys()) agents: dict[str, Agent] = { n: Agent.from_dict(n, a, bottle_names) for n, a in raw_agents.items() @@ -834,7 +840,7 @@ _FILENAME_RX = re.compile(r"^[a-z][a-z0-9-]*$") # sets dies with a "did you mean" pointer — typos shouldn't silently # ghost into an empty config. _BOTTLE_KEYS = frozenset( - {"env", "git", "git_user", "egress", "supervise"} + {"env", "extends", "git", "git_user", "egress", "supervise"} ) _AGENT_KEYS_REQUIRED = frozenset({"bottle"}) _AGENT_KEYS_OPTIONAL = frozenset({"skills"}) @@ -878,10 +884,15 @@ def _entity_name_from_path(path: Path) -> str | None: def _load_bottles_from_dir(bottles_dir: Path) -> dict[str, Bottle]: """Walk `/*.md`, parse each as a bottle, return `{name: Bottle}`. Missing dir → empty dict (the user simply - hasn't declared any bottles yet).""" - out: dict[str, Bottle] = {} + hasn't declared any bottles yet). + + Two-pass to resolve PRD 0025 `extends:` chains: + 1. Collect each file's raw frontmatter into `{name: raw}`. + 2. Recursively merge `extends:` chains into effective + Bottle objects (`_resolve_bottles`).""" + raws: dict[str, dict[str, object]] = {} if not bottles_dir.is_dir(): - return out + return {} for path in sorted(bottles_dir.glob("*.md")): name = _entity_name_from_path(path) if name is None: @@ -903,8 +914,114 @@ def _load_bottles_from_dir(bottles_dir: Path) -> dict[str, Bottle]: f"bottle file {path}: unknown frontmatter key(s) " f"{sorted(unknown)}; allowed keys are {allowed}." ) - out[name] = Bottle.from_dict(name, fm) - return out + raws[name] = fm + return _resolve_bottles(raws) + + +def _resolve_bottles(raws: dict[str, dict[str, object]]) -> dict[str, Bottle]: + """Apply `extends:` chains (PRD 0025) and return a flat + `{name: Bottle}` of resolved configs. Cycle / missing-parent + / self-reference die with a clear pointer.""" + cache: dict[str, Bottle] = {} + for name in raws: + if name not in cache: + _resolve_one_bottle(name, raws, cache, ()) + return cache + + +def _resolve_one_bottle( + name: str, + raws: dict[str, dict[str, object]], + cache: dict[str, Bottle], + seen: tuple[str, ...], +) -> Bottle: + """Recursive resolver. `seen` is the current extends-chain for + cycle detection; on cycle die with the chain so the operator + can see which two files to break the loop in.""" + if name in cache: + return cache[name] + if name in seen: + chain = " -> ".join(seen + (name,)) + die(f"bottle '{name}' is in an extends cycle: {chain}") + raw = raws[name] + parent_name_raw = raw.get("extends") + # Strip `extends:` before passing to Bottle.from_dict so it + # isn't accidentally treated as a real Bottle field by future + # schema additions. It's only meaningful here. + child_raw = {k: v for k, v in raw.items() if k != "extends"} + + if parent_name_raw is None: + bottle = Bottle.from_dict(name, child_raw) + cache[name] = bottle + return bottle + + if not isinstance(parent_name_raw, str): + die( + f"bottle '{name}' extends must be a string " + f"(was {type(parent_name_raw).__name__})" + ) + parent_name: str = parent_name_raw + if parent_name == name: + die( + f"bottle '{name}' extends itself; remove the " + f"self-reference" + ) + if parent_name not in raws: + avail = ", ".join(sorted(raws.keys())) or "(none)" + die( + f"bottle '{name}' extends '{parent_name}' which is not " + f"defined. Available bottles: {avail}" + ) + parent = _resolve_one_bottle(parent_name, raws, cache, seen + (name,)) + bottle = _merge_bottles(parent, child_raw, name) + cache[name] = bottle + return bottle + + +def _merge_bottles( + parent: Bottle, + child_raw: dict[str, object], + name: str, +) -> Bottle: + """Apply PRD 0025 merge rules: parent is base; child's declared + fields overlay. List-valued fields full-replace when the child + declared them (presence-driven on the raw dict, so an explicit + `git: []` clears the parent's list); env merges dict-style + with child-wins on key collision; git_user overlays per-field.""" + # Parse the child's declared fields into a Bottle (with the + # usual defaults for anything missing). Validation runs the same + # way it would for a leaf bottle — typos / wrong types die here. + child = Bottle.from_dict(name, child_raw) + + # env: dict merge, child wins on collision. + merged_env = {**parent.env, **child.env} + + # git_user: per-field overlay. Each non-empty field on child + # wins; empties fall through to parent. The default GitUser() + # is two empty strings, so a child that omits the block + # inherits the parent's user verbatim. + merged_git_user = GitUser( + name=child.git_user.name or parent.git_user.name, + email=child.git_user.email or parent.git_user.email, + ) + + # Presence-driven full-replace for the list-valued + scalar + # fields. "Did the child's raw dict have this key?" is the + # source of truth — an explicit `git: []` means "drop the + # parent's git list", whereas a missing `git:` means "inherit". + merged_git = child.git if "git" in child_raw else parent.git + merged_egress = child.egress if "egress" in child_raw else parent.egress + merged_supervise = ( + child.supervise if "supervise" in child_raw else parent.supervise + ) + + return Bottle( + env=merged_env, + git=merged_git, + git_user=merged_git_user, + egress=merged_egress, + supervise=merged_supervise, + ) def _load_agents_from_dir( diff --git a/tests/unit/test_manifest_extends.py b/tests/unit/test_manifest_extends.py new file mode 100644 index 0000000..9141749 --- /dev/null +++ b/tests/unit/test_manifest_extends.py @@ -0,0 +1,311 @@ +"""Unit: bottle composition via `extends:` (PRD 0025, issue #88). + +Each merge rule from the PRD gets a focused case; the resolver's +recursion + cycle / missing-parent / self-reference dies are in +their own tests. + +The `Manifest.from_json_obj` path is the test surface — same +resolver runs from `Manifest.from_md_dirs` (md loader) so locking +it here covers both.""" + +from __future__ import annotations + +import contextlib +import io +import unittest + +from claude_bottle.log import Die +from claude_bottle.manifest import Manifest + + +def _die_message(callable_, *args, **kwargs) -> str: + buf = io.StringIO() + with contextlib.redirect_stderr(buf): + try: + callable_(*args, **kwargs) + except Die: + return buf.getvalue() + raise AssertionError("expected Die was not raised") + + +def _build(**bottles) -> Manifest: + """Build a manifest with the given bottles and one trivial agent + referencing the first bottle (so the manifest is valid).""" + first = next(iter(bottles)) + return Manifest.from_json_obj({ + "bottles": bottles, + "agents": { + "demo": {"skills": [], "prompt": "", "bottle": first}, + }, + }) + + +class TestExtendsBasic(unittest.TestCase): + def test_leaf_without_extends_unchanged(self): + # Sanity: existing manifests with no `extends:` parse the + # same way they did before the resolver landed. + m = _build(dev={ + "env": {"FOO": "bar"}, + "supervise": True, + }) + b = m.bottles["dev"] + self.assertEqual({"FOO": "bar"}, dict(b.env)) + self.assertTrue(b.supervise) + + def test_child_inherits_parent_fields_unchanged(self): + m = _build( + base={ + "env": {"BASE": "1"}, + "supervise": True, + }, + child={"extends": "base"}, + ) + c = m.bottles["child"] + self.assertEqual({"BASE": "1"}, dict(c.env)) + self.assertTrue(c.supervise) + + def test_child_overrides_supervise_scalar(self): + m = _build( + base={"supervise": True}, + off={"extends": "base", "supervise": False}, + ) + self.assertTrue(m.bottles["base"].supervise) + self.assertFalse(m.bottles["off"].supervise) + + def test_parent_resolved_once_for_multiple_children(self): + # Two children sharing one parent: both inherit; the parent + # is resolved once + cached. (Cache behavior is internal; we + # observe correctness on both children.) + m = _build( + base={"env": {"BASE": "1"}, "supervise": True}, + a={"extends": "base", "env": {"A": "1"}}, + b={"extends": "base", "env": {"B": "1"}}, + ) + self.assertEqual({"BASE": "1", "A": "1"}, dict(m.bottles["a"].env)) + self.assertEqual({"BASE": "1", "B": "1"}, dict(m.bottles["b"].env)) + + +class TestExtendsEnvMerge(unittest.TestCase): + """env: dict merge, child wins on key collision.""" + + def test_disjoint_keys_union(self): + m = _build( + base={"env": {"PARENT_ONLY": "p"}}, + child={"extends": "base", "env": {"CHILD_ONLY": "c"}}, + ) + self.assertEqual( + {"PARENT_ONLY": "p", "CHILD_ONLY": "c"}, + dict(m.bottles["child"].env), + ) + + def test_collision_child_wins(self): + m = _build( + base={"env": {"SHARED": "from-parent"}}, + child={"extends": "base", "env": {"SHARED": "from-child"}}, + ) + self.assertEqual( + {"SHARED": "from-child"}, + dict(m.bottles["child"].env), + ) + + def test_child_omits_env_inherits_full(self): + m = _build( + base={"env": {"A": "1", "B": "2"}}, + child={"extends": "base"}, + ) + self.assertEqual({"A": "1", "B": "2"}, dict(m.bottles["child"].env)) + + +class TestExtendsListsFullReplace(unittest.TestCase): + """git: and egress: are full-replace when the child declares + them — partial merge would be ambiguous (ordering + name + collisions). See PRD 0025 "Merge rules".""" + + _GIT_ENTRY_A = { + "Name": "a", + "Upstream": "ssh://git@host-a/a.git", + "IdentityFile": "/dev/null", + } + _GIT_ENTRY_B = { + "Name": "b", + "Upstream": "ssh://git@host-b/b.git", + "IdentityFile": "/dev/null", + } + + def test_child_git_replaces_parent_entirely(self): + m = _build( + base={"git": [self._GIT_ENTRY_A]}, + child={"extends": "base", "git": [self._GIT_ENTRY_B]}, + ) + names = [e.Name for e in m.bottles["child"].git] + self.assertEqual(["b"], names) + + def test_child_omits_git_inherits_full_list(self): + m = _build( + base={"git": [self._GIT_ENTRY_A, self._GIT_ENTRY_B]}, + child={"extends": "base"}, + ) + names = [e.Name for e in m.bottles["child"].git] + self.assertEqual(["a", "b"], names) + + def test_child_explicit_empty_git_clears_parent(self): + # `git: []` is the documented way to say "drop the + # parent's list" rather than "inherit it". + m = _build( + base={"git": [self._GIT_ENTRY_A]}, + child={"extends": "base", "git": []}, + ) + self.assertEqual((), m.bottles["child"].git) + + def test_child_egress_replaces_parent_entirely(self): + m = _build( + base={"egress": {"routes": [{"host": "a.example.com"}]}}, + child={ + "extends": "base", + "egress": {"routes": [{"host": "b.example.com"}]}, + }, + ) + hosts = [r.Host for r in m.bottles["child"].egress.routes] + self.assertEqual(["b.example.com"], hosts) + + def test_child_omits_egress_inherits(self): + m = _build( + base={"egress": {"routes": [{"host": "a.example.com"}]}}, + child={"extends": "base"}, + ) + hosts = [r.Host for r in m.bottles["child"].egress.routes] + self.assertEqual(["a.example.com"], hosts) + + +class TestExtendsGitUserOverlay(unittest.TestCase): + """git_user: per-field overlay. Each non-empty field on child + wins; empties fall through to parent.""" + + def test_parent_full_child_omits(self): + m = _build( + base={"git_user": {"name": "Parent", "email": "p@x"}}, + child={"extends": "base"}, + ) + u = m.bottles["child"].git_user + self.assertEqual("Parent", u.name) + self.assertEqual("p@x", u.email) + + def test_child_overrides_both(self): + m = _build( + base={"git_user": {"name": "Parent", "email": "p@x"}}, + child={ + "extends": "base", + "git_user": {"name": "Child", "email": "c@x"}, + }, + ) + u = m.bottles["child"].git_user + self.assertEqual("Child", u.name) + self.assertEqual("c@x", u.email) + + def test_child_adds_email_inherits_name(self): + # Parent sets only name; child sets only email. Both end + # up populated on the child. + m = _build( + base={"git_user": {"name": "Parent"}}, + child={"extends": "base", "git_user": {"email": "c@x"}}, + ) + u = m.bottles["child"].git_user + self.assertEqual("Parent", u.name) + self.assertEqual("c@x", u.email) + + def test_child_overrides_only_email(self): + m = _build( + base={"git_user": {"name": "Parent", "email": "p@x"}}, + child={"extends": "base", "git_user": {"email": "c@x"}}, + ) + u = m.bottles["child"].git_user + # Child overrides email; name inherited from parent. + self.assertEqual("Parent", u.name) + self.assertEqual("c@x", u.email) + + +class TestExtendsChain(unittest.TestCase): + """Multi-step inheritance: A extends B extends C.""" + + def test_three_step_chain(self): + m = _build( + grandparent={ + "env": {"GP": "1"}, + "supervise": True, + }, + parent={ + "extends": "grandparent", + "env": {"P": "1"}, + }, + child={ + "extends": "parent", + "env": {"C": "1"}, + }, + ) + self.assertEqual( + {"GP": "1", "P": "1", "C": "1"}, + dict(m.bottles["child"].env), + ) + # supervise threads through unchanged. + self.assertTrue(m.bottles["child"].supervise) + + def test_intermediate_can_override(self): + m = _build( + grandparent={"env": {"X": "from-gp"}}, + parent={"extends": "grandparent", "env": {"X": "from-p"}}, + child={"extends": "parent"}, + ) + self.assertEqual("from-p", m.bottles["child"].env["X"]) + + +class TestExtendsErrors(unittest.TestCase): + def test_missing_parent_dies(self): + msg = _die_message(_build, child={"extends": "ghost"}) + self.assertIn("extends 'ghost'", msg) + self.assertIn("not defined", msg) + + def test_self_extends_dies(self): + msg = _die_message(_build, loop={"extends": "loop"}) + self.assertIn("extends itself", msg) + + def test_two_node_cycle_dies(self): + msg = _die_message( + _build, + a={"extends": "b"}, + b={"extends": "a"}, + ) + self.assertIn("extends cycle", msg) + # Chain should include both names. + self.assertIn("a", msg) + self.assertIn("b", msg) + + def test_three_node_cycle_dies(self): + msg = _die_message( + _build, + a={"extends": "b"}, + b={"extends": "c"}, + c={"extends": "a"}, + ) + self.assertIn("extends cycle", msg) + + def test_non_string_extends_dies(self): + msg = _die_message(_build, child={"extends": ["base"]}) + self.assertIn("extends must be a string", msg) + + +class TestExtendsAvailableInBottleKeys(unittest.TestCase): + """`extends` must not trip the unknown-keys check in the md + loader. Verified indirectly via from_json_obj (same resolver) + + a positive parse here.""" + + def test_extends_alone_parses(self): + # No other fields; child purely inherits. + m = _build( + base={"env": {"A": "1"}}, + child={"extends": "base"}, + ) + self.assertEqual({"A": "1"}, dict(m.bottles["child"].env)) + + +if __name__ == "__main__": + unittest.main() -- 2.52.0 From 85104742cadc4497f01499e615756b1dcdac99f5 Mon Sep 17 00:00:00 2001 From: claude Date: Wed, 27 May 2026 23:31:02 -0400 Subject: [PATCH 3/5] docs(readme): document bottle `extends:` composition (PRD 0025) --- README.md | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/README.md b/README.md index 25926d2..c6b393f 100644 --- a/README.md +++ b/README.md @@ -248,6 +248,36 @@ with a warning. **This is the trust boundary**: bottle infrastructure directory only. A cloned repo cannot redirect a host env var to an attacker-named upstream because it has no way to declare a bottle. +### Bottle composition with `extends:` + +A bottle can inherit from another via `extends: ` so +operators don't have to duplicate a whole bottle file to vary one +field (PRD 0025). The parent's resolved config is the base; the +child's declared fields overlay. Merge rules: + +- `env:` — dict merge, child wins on key collision. +- `git:`, `egress:`, `supervise:` — full replace when the child + declares the field. An explicit `git: []` clears the parent's + list; omitting the field inherits the parent's verbatim. +- `git_user:` — per-field overlay (child's non-empty `name` / + `email` wins; empty falls through to parent). + +```yaml +--- +extends: dev # inherit everything from bottles/dev.md +egress: + routes: + - host: staging.example.com + auth: + scheme: Bearer + token_ref: STAGING_TOKEN +--- +``` + +Cycles (`A extends B extends A`), self-references, and missing +parents die at parse with a clear pointer. Bottles remain +`$HOME`-only — `extends:` preserves the trust boundary above. + ### Example bottle (`~/.claude-bottle/bottles/gitea-dev.md`) ````markdown -- 2.52.0 From 59ee32cc8d9cd9a0f1bca164ee61590c30d53b85 Mon Sep 17 00:00:00 2001 From: codex Date: Thu, 28 May 2026 00:49:34 -0400 Subject: [PATCH 4/5] refactor(manifest): key git config by host --- README.md | 32 ++-- claude_bottle/backend/docker/provision/git.py | 4 +- .../backend/smolmachines/provision/git.py | 4 +- claude_bottle/manifest.py | 168 +++++++++++++----- docs/prds/0011-per-file-md-manifest.md | 14 +- .../0022-sandbox-escape-integration-test.md | 8 +- docs/prds/0025-bottle-extends.md | 26 +-- tests/fixtures.py | 28 +-- tests/integration/test_sandbox_escape.py | 12 +- tests/unit/test_compose.py | 12 +- tests/unit/test_docker_provision_git_user.py | 2 +- tests/unit/test_git_gate.py | 14 +- tests/unit/test_manifest_extends.py | 77 +++++--- tests/unit/test_manifest_git.py | 60 ++++++- tests/unit/test_manifest_git_user.py | 23 ++- tests/unit/test_smolmachines_provision.py | 17 +- tests/unit/test_yaml_subset.py | 14 +- 17 files changed, 356 insertions(+), 159 deletions(-) diff --git a/README.md b/README.md index c6b393f..2154b1f 100644 --- a/README.md +++ b/README.md @@ -256,11 +256,13 @@ field (PRD 0025). The parent's resolved config is the base; the child's declared fields overlay. Merge rules: - `env:` — dict merge, child wins on key collision. -- `git:`, `egress:`, `supervise:` — full replace when the child - declares the field. An explicit `git: []` clears the parent's - list; omitting the field inherits the parent's verbatim. -- `git_user:` — per-field overlay (child's non-empty `name` / +- `git.user:` — per-field overlay (child's non-empty `name` / `email` wins; empty falls through to parent). +- `git.remotes:` — dict merge by host, child wins on host collision. + An explicit `git.remotes: {}` clears the parent's remotes; omitting + `git.remotes` inherits the parent's remotes. +- `egress:`, `supervise:` — full replace when the child declares the + field. ```yaml --- @@ -286,19 +288,15 @@ env: GIT_AUTHOR_NAME: didericis git: - - Name: claude-bottle - Upstream: ssh://git@gitea.dideric.is:30009/didericis/claude-bottle.git - IdentityFile: /Users/didericis/.ssh/id_ed25519_gitea - KnownHostKey: ssh-ed25519 AAAA... - -# Optional per-bottle git identity. When set, `git config --global -# user.name` / `user.email` are applied inside the bottle at -# provisioning so the agent's commits land with this attribution -# instead of git refusing to commit. Either field can be set -# independently. Issue #86. -git_user: - name: "Eric Bauerfeld" - email: "eric+claude@dideric.is" + user: + name: "Eric Bauerfeld" + email: "eric+claude@dideric.is" + remotes: + gitea.dideric.is: + Name: claude-bottle + Upstream: ssh://git@gitea.dideric.is:30009/didericis/claude-bottle.git + IdentityFile: /Users/didericis/.ssh/id_ed25519_gitea + KnownHostKey: ssh-ed25519 AAAA... # Routes declared here are held by a per-bottle cred-proxy sidecar, # not the agent. Each route names a path the agent dials, the diff --git a/claude_bottle/backend/docker/provision/git.py b/claude_bottle/backend/docker/provision/git.py index 7cd797a..29c63a1 100644 --- a/claude_bottle/backend/docker/provision/git.py +++ b/claude_bottle/backend/docker/provision/git.py @@ -11,7 +11,7 @@ Three concerns, all about git in the agent: ls-remote) transparently hits the per-agent git-gate. The gate mirrors the upstream in both directions, so URL rewriting is symmetric. - 3. If the bottle declares `git_user` (issue #86), set + 3. If the bottle declares `git.user` (issue #86), set `git config --global user.{name,email}` inside the bottle so the agent's commits are attributed to that identity. """ @@ -94,7 +94,7 @@ def _provision_git_user(plan: DockerBottlePlan, target: str) -> None: Runs as the `node` user so `--global` lands in `/home/node/.gitconfig` (matching the existing `_provision_git_gate_config` write location). No-op when the - bottle didn't declare `git_user`. + bottle didn't declare `git.user`. Each field set independently — name-only or email-only configs only run the `git config` line for the field diff --git a/claude_bottle/backend/smolmachines/provision/git.py b/claude_bottle/backend/smolmachines/provision/git.py index 42bdb39..975d981 100644 --- a/claude_bottle/backend/smolmachines/provision/git.py +++ b/claude_bottle/backend/smolmachines/provision/git.py @@ -11,7 +11,7 @@ Three concerns, all about git in the agent: against a declared upstream transparently hits the per-bottle git-gate. The gate mirrors the upstream in both directions, so URL rewriting is symmetric. - 3. If the bottle declares `git_user` (issue #86), set + 3. If the bottle declares `git.user` (issue #86), set `git config --global user.{name,email}` inside the guest so the agent's commits are attributed to that identity. @@ -113,7 +113,7 @@ def _provision_git_user( """Apply `git config --global user.{name,email}` inside the guest as the node user so --global lands in the same `/home/node/.gitconfig` that `_provision_git_gate_config` - writes to. No-op when the bottle didn't declare `git_user`. + writes to. No-op when the bottle didn't declare `git.user`. Runs via `runuser -u node --`; HOME is forced via smolvm's `-e` flag because runuser (without -l) inherits root's diff --git a/claude_bottle/manifest.py b/claude_bottle/manifest.py index fd369ee..ff4d131 100644 --- a/claude_bottle/manifest.py +++ b/claude_bottle/manifest.py @@ -14,8 +14,9 @@ the system prompt, for bottles the body is human documentation Bottle schema (frontmatter): extends: # optional (PRD 0025) env: { : , ... } - git: [ , ... ] - git_user: { name: , email: } # optional + git: + user: { name: , email: } # optional + remotes: { : , ... } # optional egress: { routes: [ , ... ] } supervise: # optional @@ -88,31 +89,61 @@ class GitEntry: @classmethod def from_dict(cls, bottle_name: str, idx: int, raw: object) -> "GitEntry": d = _as_json_object(raw, f"bottle '{bottle_name}' git[{idx}]") + return cls._from_object(bottle_name, d, f"git[{idx}]", None) + + @classmethod + def from_remote_dict( + cls, bottle_name: str, host_key: str, raw: object + ) -> "GitEntry": + if not host_key: + die(f"bottle '{bottle_name}' git.remotes has an empty host key") + d = _as_json_object(raw, f"bottle '{bottle_name}' git.remotes[{host_key!r}]") + return cls._from_object( + bottle_name, d, f"git.remotes[{host_key!r}]", host_key, + ) + + @classmethod + def _from_object( + cls, + bottle_name: str, + d: dict[str, object], + label: str, + host_key: str | None, + ) -> "GitEntry": name = d.get("Name") if not isinstance(name, str) or not name: - die(f"bottle '{bottle_name}' git[{idx}] missing required string field 'Name'") + die( + f"bottle '{bottle_name}' {label} missing required string " + f"field 'Name'" + ) upstream = d.get("Upstream") if not isinstance(upstream, str) or not upstream: die( - f"bottle '{bottle_name}' git '{name}' missing required string field " + f"bottle '{bottle_name}' {label} '{name}' missing required string field " f"'Upstream'" ) ident = d.get("IdentityFile") if not isinstance(ident, str) or not ident: die( - f"bottle '{bottle_name}' git '{name}' missing required string field " + f"bottle '{bottle_name}' {label} '{name}' missing required string field " f"'IdentityFile'" ) khk = _opt_str( d.get("KnownHostKey"), - f"bottle '{bottle_name}' git '{name}' KnownHostKey", + f"bottle '{bottle_name}' {label} '{name}' KnownHostKey", ) extra_hosts = _opt_extra_hosts( - d.get("ExtraHosts"), f"bottle '{bottle_name}' git '{name}' ExtraHosts" + d.get("ExtraHosts"), + f"bottle '{bottle_name}' {label} '{name}' ExtraHosts", ) user, host, port, path = _parse_git_upstream( - upstream, f"bottle '{bottle_name}' git '{name}' Upstream" + upstream, f"bottle '{bottle_name}' {label} '{name}' Upstream" ) + if host_key is not None and host_key != host: + die( + f"bottle '{bottle_name}' git.remotes key {host_key!r} " + f"does not match Upstream host {host!r}" + ) return cls( Name=name, Upstream=upstream, @@ -177,28 +208,28 @@ class GitUser: @classmethod def from_dict(cls, bottle_name: str, raw: object) -> "GitUser": - d = _as_json_object(raw, f"bottle '{bottle_name}' git_user") + d = _as_json_object(raw, f"bottle '{bottle_name}' git.user") for k in d.keys(): if k not in {"name", "email"}: die( - f"bottle '{bottle_name}' git_user has unknown key {k!r}; " + f"bottle '{bottle_name}' git.user has unknown key {k!r}; " f"allowed: name, email" ) name = d.get("name", "") email = d.get("email", "") if not isinstance(name, str): die( - f"bottle '{bottle_name}' git_user.name must be a string " + f"bottle '{bottle_name}' git.user.name must be a string " f"(was {type(name).__name__})" ) if not isinstance(email, str): die( - f"bottle '{bottle_name}' git_user.email must be a string " + f"bottle '{bottle_name}' git.user.email must be a string " f"(was {type(email).__name__})" ) if not name and not email: die( - f"bottle '{bottle_name}' git_user is set but neither " + f"bottle '{bottle_name}' git.user is set but neither " f"name nor email is non-empty; remove the block or " f"fill at least one field." ) @@ -208,6 +239,37 @@ class GitUser: return not self.name and not self.email +def _parse_git_config( + bottle_name: str, + raw: object, +) -> tuple[tuple[GitEntry, ...], GitUser]: + d = _as_json_object(raw, f"bottle '{bottle_name}' git") + for k in d.keys(): + if k not in {"user", "remotes"}: + die( + f"bottle '{bottle_name}' git has unknown key {k!r}; " + f"allowed: user, remotes" + ) + + git_user = ( + GitUser.from_dict(bottle_name, d["user"]) + if "user" in d + else GitUser() + ) + + git: tuple[GitEntry, ...] = () + remotes_raw = d.get("remotes") + if remotes_raw is not None: + remotes = _as_json_object(remotes_raw, f"bottle '{bottle_name}' git.remotes") + git = tuple( + GitEntry.from_remote_dict(bottle_name, host, entry) + for host, entry in remotes.items() + ) + _validate_unique_git_names(bottle_name, git) + + return git, git_user + + @dataclass(frozen=True) class EgressRoute: """One route on the per-bottle egress sidecar (PRD 0017). @@ -396,9 +458,9 @@ class Bottle: env: Mapping[str, str] = field(default_factory=_empty_str_dict) git: tuple[GitEntry, ...] = () # Per-bottle git identity (issue #86). Empty default — bottles - # that don't set `git_user:` in the manifest skip the + # that don't set `git.user:` in the manifest skip the # `git config --global` step entirely. Set independently of - # the `git:` upstream list above: a bottle can declare a user + # the `git.remotes:` upstream map above: a bottle can declare a user # identity without any git-gate upstreams, and vice versa. git_user: GitUser = field(default_factory=GitUser) egress: EgressConfig = field(default_factory=EgressConfig) @@ -432,6 +494,20 @@ class Bottle: f"credential and gitleaks-scan pushes." ) + if "git_user" in d: + die( + f"bottle '{name}' has a 'git_user' field, which has been " + f"removed. Move it under 'git.user'." + ) + + unknown = set(d.keys()) - _BOTTLE_KEYS + if unknown: + allowed = ", ".join(sorted(_BOTTLE_KEYS)) + die( + f"bottle '{name}' has unknown key(s) {sorted(unknown)}; " + f"allowed keys are {allowed}." + ) + env: dict[str, str] = {} env_raw = d.get("env") if env_raw is not None: @@ -445,16 +521,10 @@ class Bottle: env[var] = value git: tuple[GitEntry, ...] = () + git_user = GitUser() git_raw = d.get("git") if git_raw is not None: - if not isinstance(git_raw, list): - die(f"bottle '{name}' git must be an array (was {type(git_raw).__name__})") - git_list = cast(list[object], git_raw) - git = tuple( - GitEntry.from_dict(name, i, entry) - for i, entry in enumerate(git_list) - ) - _validate_unique_git_names(name, git) + git, git_user = _parse_git_config(name, git_raw) if "tokens" in d: die( @@ -479,12 +549,6 @@ class Bottle: f"See docs/prds/0017-egress-via-mitmproxy.md." ) - git_user = ( - GitUser.from_dict(name, d["git_user"]) - if "git_user" in d - else GitUser() - ) - egress = ( EgressConfig.from_dict(name, d["egress"]) if "egress" in d @@ -840,7 +904,7 @@ _FILENAME_RX = re.compile(r"^[a-z][a-z0-9-]*$") # sets dies with a "did you mean" pointer — typos shouldn't silently # ghost into an empty config. _BOTTLE_KEYS = frozenset( - {"env", "extends", "git", "git_user", "egress", "supervise"} + {"env", "extends", "git", "egress", "supervise"} ) _AGENT_KEYS_REQUIRED = frozenset({"bottle"}) _AGENT_KEYS_OPTIONAL = frozenset({"skills"}) @@ -984,10 +1048,9 @@ def _merge_bottles( name: str, ) -> Bottle: """Apply PRD 0025 merge rules: parent is base; child's declared - fields overlay. List-valued fields full-replace when the child - declared them (presence-driven on the raw dict, so an explicit - `git: []` clears the parent's list); env merges dict-style - with child-wins on key collision; git_user overlays per-field.""" + fields overlay. env merges dict-style with child-wins on key + collision; git.user overlays per-field; git.remotes merges by + upstream host with child entries replacing duplicate hosts.""" # Parse the child's declared fields into a Bottle (with the # usual defaults for anything missing). Validation runs the same # way it would for a leaf bottle — typos / wrong types die here. @@ -996,20 +1059,25 @@ def _merge_bottles( # env: dict merge, child wins on collision. merged_env = {**parent.env, **child.env} - # git_user: per-field overlay. Each non-empty field on child + # git.user: per-field overlay. Each non-empty field on child # wins; empties fall through to parent. The default GitUser() - # is two empty strings, so a child that omits the block + # is two empty strings, so a child that omits git.user # inherits the parent's user verbatim. merged_git_user = GitUser( name=child.git_user.name or parent.git_user.name, email=child.git_user.email or parent.git_user.email, ) - # Presence-driven full-replace for the list-valued + scalar - # fields. "Did the child's raw dict have this key?" is the - # source of truth — an explicit `git: []` means "drop the - # parent's git list", whereas a missing `git:` means "inherit". - merged_git = child.git if "git" in child_raw else parent.git + # git.remotes: missing means inherit; an explicit empty object + # clears; otherwise parent and child merge by UpstreamHost with + # child entries replacing duplicate hosts. + if _child_declares_git_remotes(child_raw): + merged_git = _merge_git_remotes(parent.git, child.git) if child.git else () + else: + merged_git = parent.git + + # Presence-driven full-replace for the remaining list-valued + + # scalar fields. merged_egress = child.egress if "egress" in child_raw else parent.egress merged_supervise = ( child.supervise if "supervise" in child_raw else parent.supervise @@ -1024,6 +1092,24 @@ def _merge_bottles( ) +def _child_declares_git_remotes(child_raw: dict[str, object]) -> bool: + git_raw = child_raw.get("git") + if git_raw is None: + return False + git_obj = _as_json_object(git_raw, "child git") + return "remotes" in git_obj + + +def _merge_git_remotes( + parent: tuple[GitEntry, ...], + child: tuple[GitEntry, ...], +) -> tuple[GitEntry, ...]: + by_host = {entry.UpstreamHost: entry for entry in parent} + for entry in child: + by_host[entry.UpstreamHost] = entry + return tuple(by_host.values()) + + def _load_agents_from_dir( agents_dir: Path, bottle_names: set[str], diff --git a/docs/prds/0011-per-file-md-manifest.md b/docs/prds/0011-per-file-md-manifest.md index 7463526..813ac0c 100644 --- a/docs/prds/0011-per-file-md-manifest.md +++ b/docs/prds/0011-per-file-md-manifest.md @@ -269,12 +269,14 @@ cred_proxy: token_ref: GITEA_TOKEN role: [git-insteadof, tea-login] git: - - Name: claude-bottle - Upstream: ssh://git@gitea.dideric.is:30009/didericis/claude-bottle.git - IdentityFile: ~/.ssh/gitea-delos-2.pem - ExtraHosts: - gitea.dideric.is: 100.78.141.42 - KnownHostKey: ssh-rsa AAAAB3... + remotes: + gitea.dideric.is: + Name: claude-bottle + Upstream: ssh://git@gitea.dideric.is:30009/didericis/claude-bottle.git + IdentityFile: ~/.ssh/gitea-delos-2.pem + ExtraHosts: + gitea.dideric.is: 100.78.141.42 + KnownHostKey: ssh-rsa AAAAB3... egress: allowlist: - example.com diff --git a/docs/prds/0022-sandbox-escape-integration-test.md b/docs/prds/0022-sandbox-escape-integration-test.md index 6aa39b4..48035f2 100644 --- a/docs/prds/0022-sandbox-escape-integration-test.md +++ b/docs/prds/0022-sandbox-escape-integration-test.md @@ -191,9 +191,11 @@ egress: - host: api.anthropic.com git: - - Name: throwaway - Upstream: ssh://git@127.0.0.1:22/throwaway.git - IdentityFile: ~/.ssh/cb-test-key # fixture key + remotes: + 127.0.0.1: + Name: throwaway + Upstream: ssh://git@127.0.0.1:22/throwaway.git + IdentityFile: ~/.ssh/cb-test-key # fixture key --- ``` diff --git a/docs/prds/0025-bottle-extends.md b/docs/prds/0025-bottle-extends.md index ef80628..a5d4c1e 100644 --- a/docs/prds/0025-bottle-extends.md +++ b/docs/prds/0025-bottle-extends.md @@ -105,23 +105,17 @@ overlay it. For each field on `Bottle`: | Field | Type | Merge | |--------------|-----------------------|---------------------------------------------| | `env` | `Mapping[str, str]` | dict merge, child wins on key collision | -| `git` | `tuple[GitEntry,…]` | full replace if child declares `git:` | -| `git_user` | `GitUser` | child overlay: child's non-empty fields win | +| `git.user` | `GitUser` | child overlay: child's non-empty fields win | +| `git.remotes`| `tuple[GitEntry,…]` | dict merge by host, child wins | | `egress` | `EgressConfig` | full replace if child declares `egress:` | | `supervise` | `bool` | full replace if child declares `supervise:` | -Why full-replace for the list-valued fields (`git[]`, -`egress.routes[]`): +Why full-replace for `egress.routes[]`: - **Ordering matters.** Egress route ordering is part of the match semantics (first matching host wins). Merging two ordered lists by name introduces "where does the child's route go?" ambiguity. -- **Name collisions are ambiguous.** If parent has - `git: [{Name: foo, Upstream: A}]` and child has `git: - [{Name: foo, Upstream: B}]`, "merge" could mean override-B or - error-on-collision. Full-replace makes the operator's - intent explicit. - **Simpler precedence.** "Child declares X → X wins, full stop" is one sentence; partial merges need a table per list. @@ -129,9 +123,15 @@ The `env` dict is the one exception because dict-merge has no ordering concern and dict-keyed overrides are the obvious user expectation. (Same model as shell `export` precedence.) -The `git_user` dataclass-overlay (each non-empty field wins -individually) is so a parent can declare `git_user.name` and a -child can add just `git_user.email`. The default `GitUser()` +`git.remotes` is also keyed, so it follows dict-style inheritance: +children can override one host without restating every remote. The +remote entry is replaced as a whole on host collision because +`Upstream`, `IdentityFile`, `KnownHostKey`, and `ExtraHosts` are +tightly coupled. + +The `git.user` dataclass-overlay (each non-empty field wins +individually) is so a parent can declare `git.user.name` and a +child can add just `git.user.email`. The default `GitUser()` fields are empty strings, which are treated as "not set" for overlay purposes — same `is_empty()` predicate the provisioner uses. @@ -191,7 +191,7 @@ moot because there's only one bottle source. - Implement `_merge(parent: Bottle, child_raw: dict, name: str) -> Bottle` with the rules table above. - Unit tests: simple two-bottle extends, env merge with - collision, list-replace for git + egress, git_user overlay, + collision, host-keyed git remote merge, egress list-replace, git.user overlay, supervise override, missing parent dies, cycle dies, deeper chains (A extends B extends C). 3. **Docs.** Add an `extends:` example to the README's manifest diff --git a/tests/fixtures.py b/tests/fixtures.py index 639dd0c..cfcb11a 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -42,20 +42,22 @@ def fixture_with_git_dict() -> dict[str, Any]: return { "bottles": { "dev": { - "git": [ - { - "Name": "claude-bottle", - "Upstream": "ssh://git@gitea.dideric.is:30009/didericis/claude-bottle.git", - "IdentityFile": "/dev/null", - "KnownHostKey": "ssh-ed25519 AAAA...", + "git": { + "remotes": { + "gitea.dideric.is": { + "Name": "claude-bottle", + "Upstream": "ssh://git@gitea.dideric.is:30009/didericis/claude-bottle.git", + "IdentityFile": "/dev/null", + "KnownHostKey": "ssh-ed25519 AAAA...", + }, + "github.com": { + "Name": "foo", + "Upstream": "ssh://git@github.com/didericis/foo.git", + "IdentityFile": "/dev/null", + "KnownHostKey": "ssh-ed25519 BBBB...", + }, }, - { - "Name": "foo", - "Upstream": "ssh://git@github.com/didericis/foo.git", - "IdentityFile": "/dev/null", - "KnownHostKey": "ssh-ed25519 BBBB...", - }, - ] + } } }, "agents": {"demo": {"skills": [], "prompt": "", "bottle": "dev"}}, diff --git a/tests/integration/test_sandbox_escape.py b/tests/integration/test_sandbox_escape.py index 3a39bb4..a50369c 100644 --- a/tests/integration/test_sandbox_escape.py +++ b/tests/integration/test_sandbox_escape.py @@ -120,11 +120,13 @@ class TestSandboxEscape(unittest.TestCase): # is intentionally unreachable — the pre-receive # gitleaks hook must reject BEFORE git-gate # attempts the upstream push. - "git": [{ - "Name": "throwaway", - "Upstream": "ssh://git@unreachable.invalid:22/throwaway.git", - "IdentityFile": str(cls._key_path), - }], + "git": {"remotes": { + "unreachable.invalid": { + "Name": "throwaway", + "Upstream": "ssh://git@unreachable.invalid:22/throwaway.git", + "IdentityFile": str(cls._key_path), + }, + }}, }, }, "agents": { diff --git a/tests/unit/test_compose.py b/tests/unit/test_compose.py index 2a0ee95..bb99812 100644 --- a/tests/unit/test_compose.py +++ b/tests/unit/test_compose.py @@ -43,11 +43,13 @@ def _manifest(*, supervise: bool, with_git: bool, with_egress: bool) -> Manifest if supervise: bottle["supervise"] = True if with_git: - bottle["git"] = [{ - "Name": "upstream", - "Upstream": "ssh://git@example.com:22/x/y.git", - "IdentityFile": "/etc/hostname", # any existing file - }] + bottle["git"] = {"remotes": { + "example.com": { + "Name": "upstream", + "Upstream": "ssh://git@example.com:22/x/y.git", + "IdentityFile": "/etc/hostname", # any existing file + }, + }} if with_egress: bottle["egress"] = { "routes": [{ diff --git a/tests/unit/test_docker_provision_git_user.py b/tests/unit/test_docker_provision_git_user.py index 9cd5728..fa51cb6 100644 --- a/tests/unit/test_docker_provision_git_user.py +++ b/tests/unit/test_docker_provision_git_user.py @@ -26,7 +26,7 @@ def _plan(*, git_user: dict | None = None, stage_dir: Path | None = None) -> DockerBottlePlan: bottle_json: dict = {} if git_user is not None: - bottle_json["git_user"] = git_user + bottle_json["git"] = {"user": git_user} manifest = Manifest.from_json_obj({ "bottles": {"dev": bottle_json}, "agents": {"demo": {"skills": [], "prompt": "", "bottle": "dev"}}, diff --git a/tests/unit/test_git_gate.py b/tests/unit/test_git_gate.py index eabb66a..a43d37f 100644 --- a/tests/unit/test_git_gate.py +++ b/tests/unit/test_git_gate.py @@ -51,12 +51,14 @@ class TestExtraHostsPlumbing(unittest.TestCase): m = Manifest.from_json_obj({ "bottles": { "dev": { - "git": [{ - "Name": "claude-bottle", - "Upstream": "ssh://git@gitea.dideric.is:30009/didericis/claude-bottle.git", - "IdentityFile": "/dev/null", - "ExtraHosts": {"gitea.dideric.is": "100.78.141.42"}, - }], + "git": {"remotes": { + "gitea.dideric.is": { + "Name": "claude-bottle", + "Upstream": "ssh://git@gitea.dideric.is:30009/didericis/claude-bottle.git", + "IdentityFile": "/dev/null", + "ExtraHosts": {"gitea.dideric.is": "100.78.141.42"}, + }, + }}, }, }, "agents": {"demo": {"skills": [], "prompt": "", "bottle": "dev"}}, diff --git a/tests/unit/test_manifest_extends.py b/tests/unit/test_manifest_extends.py index 9141749..86a72c7 100644 --- a/tests/unit/test_manifest_extends.py +++ b/tests/unit/test_manifest_extends.py @@ -116,10 +116,9 @@ class TestExtendsEnvMerge(unittest.TestCase): self.assertEqual({"A": "1", "B": "2"}, dict(m.bottles["child"].env)) -class TestExtendsListsFullReplace(unittest.TestCase): - """git: and egress: are full-replace when the child declares - them — partial merge would be ambiguous (ordering + name - collisions). See PRD 0025 "Merge rules".""" +class TestExtendsGitMerge(unittest.TestCase): + """git.user overlays by field; git.remotes merges by upstream + host, with child entries replacing duplicate hosts.""" _GIT_ENTRY_A = { "Name": "a", @@ -132,31 +131,67 @@ class TestExtendsListsFullReplace(unittest.TestCase): "IdentityFile": "/dev/null", } - def test_child_git_replaces_parent_entirely(self): + def test_child_git_remotes_merge_with_parent(self): m = _build( - base={"git": [self._GIT_ENTRY_A]}, - child={"extends": "base", "git": [self._GIT_ENTRY_B]}, + base={"git": {"remotes": {"host-a": self._GIT_ENTRY_A}}}, + child={ + "extends": "base", + "git": {"remotes": {"host-b": self._GIT_ENTRY_B}}, + }, ) names = [e.Name for e in m.bottles["child"].git] - self.assertEqual(["b"], names) + self.assertEqual(["a", "b"], names) + + def test_child_git_remote_replaces_same_host(self): + replacement = { + "Name": "a2", + "Upstream": "ssh://git@host-a/replacement.git", + "IdentityFile": "/dev/null", + } + m = _build( + base={"git": {"remotes": {"host-a": self._GIT_ENTRY_A}}}, + child={ + "extends": "base", + "git": {"remotes": {"host-a": replacement}}, + }, + ) + entries = m.bottles["child"].git + self.assertEqual(1, len(entries)) + self.assertEqual("a2", entries[0].Name) + self.assertEqual("replacement.git", entries[0].UpstreamPath) def test_child_omits_git_inherits_full_list(self): m = _build( - base={"git": [self._GIT_ENTRY_A, self._GIT_ENTRY_B]}, + base={"git": {"remotes": { + "host-a": self._GIT_ENTRY_A, + "host-b": self._GIT_ENTRY_B, + }}}, child={"extends": "base"}, ) names = [e.Name for e in m.bottles["child"].git] self.assertEqual(["a", "b"], names) def test_child_explicit_empty_git_clears_parent(self): - # `git: []` is the documented way to say "drop the - # parent's list" rather than "inherit it". + # `git.remotes: {}` is the documented way to say "drop + # the parent's remotes" rather than "inherit them". m = _build( - base={"git": [self._GIT_ENTRY_A]}, - child={"extends": "base", "git": []}, + base={"git": {"remotes": {"host-a": self._GIT_ENTRY_A}}}, + child={"extends": "base", "git": {"remotes": {}}}, ) self.assertEqual((), m.bottles["child"].git) + def test_child_git_user_inherits_parent_remotes(self): + m = _build( + base={"git": {"remotes": {"host-a": self._GIT_ENTRY_A}}}, + child={"extends": "base", "git": {"user": {"name": "Child"}}}, + ) + self.assertEqual(["a"], [e.Name for e in m.bottles["child"].git]) + self.assertEqual("Child", m.bottles["child"].git_user.name) + + +class TestExtendsListsFullReplace(unittest.TestCase): + """egress: remains full-replace when the child declares it.""" + def test_child_egress_replaces_parent_entirely(self): m = _build( base={"egress": {"routes": [{"host": "a.example.com"}]}}, @@ -178,12 +213,12 @@ class TestExtendsListsFullReplace(unittest.TestCase): class TestExtendsGitUserOverlay(unittest.TestCase): - """git_user: per-field overlay. Each non-empty field on child + """git.user: per-field overlay. Each non-empty field on child wins; empties fall through to parent.""" def test_parent_full_child_omits(self): m = _build( - base={"git_user": {"name": "Parent", "email": "p@x"}}, + base={"git": {"user": {"name": "Parent", "email": "p@x"}}}, child={"extends": "base"}, ) u = m.bottles["child"].git_user @@ -192,10 +227,10 @@ class TestExtendsGitUserOverlay(unittest.TestCase): def test_child_overrides_both(self): m = _build( - base={"git_user": {"name": "Parent", "email": "p@x"}}, + base={"git": {"user": {"name": "Parent", "email": "p@x"}}}, child={ "extends": "base", - "git_user": {"name": "Child", "email": "c@x"}, + "git": {"user": {"name": "Child", "email": "c@x"}}, }, ) u = m.bottles["child"].git_user @@ -206,8 +241,8 @@ class TestExtendsGitUserOverlay(unittest.TestCase): # Parent sets only name; child sets only email. Both end # up populated on the child. m = _build( - base={"git_user": {"name": "Parent"}}, - child={"extends": "base", "git_user": {"email": "c@x"}}, + base={"git": {"user": {"name": "Parent"}}}, + child={"extends": "base", "git": {"user": {"email": "c@x"}}}, ) u = m.bottles["child"].git_user self.assertEqual("Parent", u.name) @@ -215,8 +250,8 @@ class TestExtendsGitUserOverlay(unittest.TestCase): def test_child_overrides_only_email(self): m = _build( - base={"git_user": {"name": "Parent", "email": "p@x"}}, - child={"extends": "base", "git_user": {"email": "c@x"}}, + base={"git": {"user": {"name": "Parent", "email": "p@x"}}}, + child={"extends": "base", "git": {"user": {"email": "c@x"}}}, ) u = m.bottles["child"].git_user # Child overrides email; name inherited from parent. diff --git a/tests/unit/test_manifest_git.py b/tests/unit/test_manifest_git.py index 639ead0..4418425 100644 --- a/tests/unit/test_manifest_git.py +++ b/tests/unit/test_manifest_git.py @@ -8,11 +8,26 @@ from claude_bottle.manifest import Manifest def _manifest(git_entries): return { - "bottles": {"dev": {"git": git_entries}}, + "bottles": {"dev": {"git": {"remotes": { + _host_for(entry): entry for entry in git_entries + }}}}, "agents": {"demo": {"skills": [], "prompt": "", "bottle": "dev"}}, } +def _host_for(entry): + upstream = entry.get("Upstream", "") + if "@a.example" in upstream: + return "a.example" + if "@b.example" in upstream: + return "b.example" + if "@github.com" in upstream: + return "github.com" + if "@gitea.dideric.is" in upstream: + return "gitea.dideric.is" + return "example.com" + + class TestGitEntryParsing(unittest.TestCase): def test_parses_minimal_entry(self): m = Manifest.from_json_obj(_manifest([{ @@ -161,12 +176,34 @@ class TestGitEntryExtraHosts(unittest.TestCase): class TestGitEntryCrossValidation(unittest.TestCase): def test_duplicate_name_dies(self): with self.assertRaises(Die): - Manifest.from_json_obj(_manifest([ - {"Name": "foo", "Upstream": "ssh://git@a.example/x.git", - "IdentityFile": "/dev/null"}, - {"Name": "foo", "Upstream": "ssh://git@b.example/y.git", - "IdentityFile": "/dev/null"}, - ])) + Manifest.from_json_obj({ + "bottles": {"dev": {"git": {"remotes": { + "a.example": { + "Name": "foo", + "Upstream": "ssh://git@a.example/x.git", + "IdentityFile": "/dev/null", + }, + "b.example": { + "Name": "foo", + "Upstream": "ssh://git@b.example/y.git", + "IdentityFile": "/dev/null", + }, + }}}}, + "agents": {"demo": {"skills": [], "prompt": "", "bottle": "dev"}}, + }) + + def test_remote_key_must_match_upstream_host(self): + with self.assertRaises(Die): + Manifest.from_json_obj({ + "bottles": {"dev": {"git": {"remotes": { + "wrong.example": { + "Name": "foo", + "Upstream": "ssh://git@github.com/foo.git", + "IdentityFile": "/dev/null", + }, + }}}}, + "agents": {"demo": {"skills": [], "prompt": "", "bottle": "dev"}}, + }) def test_legacy_ssh_field_dies_with_hint(self): # PRD 0009: bottle.ssh is removed; manifests carrying it must @@ -196,13 +233,20 @@ class TestEmptyGitField(unittest.TestCase): }) self.assertEqual((), m.bottles["dev"].git) - def test_git_array_type_required(self): + def test_git_object_type_required(self): with self.assertRaises(Die): Manifest.from_json_obj({ "bottles": {"dev": {"git": "not-a-list"}}, "agents": {"demo": {"skills": [], "prompt": "", "bottle": "dev"}}, }) + def test_empty_remotes_yields_empty_tuple(self): + m = Manifest.from_json_obj({ + "bottles": {"dev": {"git": {"remotes": {}}}}, + "agents": {"demo": {"skills": [], "prompt": "", "bottle": "dev"}}, + }) + self.assertEqual((), m.bottles["dev"].git) + if __name__ == "__main__": unittest.main() diff --git a/tests/unit/test_manifest_git_user.py b/tests/unit/test_manifest_git_user.py index bf950d5..8c943e2 100644 --- a/tests/unit/test_manifest_git_user.py +++ b/tests/unit/test_manifest_git_user.py @@ -1,4 +1,4 @@ -"""Unit: Bottle.git_user manifest parsing + validation (issue #86).""" +"""Unit: Bottle git.user manifest parsing + validation (issue #86).""" import contextlib import io @@ -24,7 +24,7 @@ def _die_message(callable_, *args, **kwargs) -> str: def _manifest(git_user): return { - "bottles": {"dev": {"git_user": git_user}}, + "bottles": {"dev": {"git": {"user": git_user}}}, "agents": {"demo": {"skills": [], "prompt": "", "bottle": "dev"}}, } @@ -53,7 +53,7 @@ class TestGitUserParsing(unittest.TestCase): self.assertEqual("bot@example.com", u.email) def test_omitted_defaults_to_empty(self): - # No git_user block at all → empty GitUser, is_empty True → + # No git.user block at all → empty GitUser, is_empty True → # provisioner skips the `git config` step entirely. m = Manifest.from_json_obj({ "bottles": {"dev": {}}, @@ -63,7 +63,7 @@ class TestGitUserParsing(unittest.TestCase): self.assertTrue(u.is_empty()) def test_both_empty_strings_dies(self): - # An explicit `git_user: {name: "", email: ""}` is a typo + # An explicit `git.user: {name: "", email: ""}` is a typo # / half-finished edit; fail loudly rather than silently # no-op (the operator clearly meant to configure something). msg = _die_message( @@ -83,13 +83,24 @@ class TestGitUserParsing(unittest.TestCase): msg = _die_message( Manifest.from_json_obj, _manifest({"name": 42}), ) - self.assertIn("git_user.name must be a string", msg) + self.assertIn("git.user.name must be a string", msg) def test_non_string_email_dies(self): msg = _die_message( Manifest.from_json_obj, _manifest({"email": ["x@y.z"]}), ) - self.assertIn("git_user.email must be a string", msg) + self.assertIn("git.user.email must be a string", msg) + + def test_legacy_top_level_git_user_dies(self): + msg = _die_message( + Manifest.from_json_obj, + { + "bottles": {"dev": {"git_user": {"name": "Bot"}}}, + "agents": {"demo": {"skills": [], "prompt": "", "bottle": "dev"}}, + }, + ) + self.assertIn("git_user", msg) + self.assertIn("git.user", msg) class TestGitUserDirect(unittest.TestCase): diff --git a/tests/unit/test_smolmachines_provision.py b/tests/unit/test_smolmachines_provision.py index 9cfa6dc..303d60a 100644 --- a/tests/unit/test_smolmachines_provision.py +++ b/tests/unit/test_smolmachines_provision.py @@ -31,6 +31,12 @@ from claude_bottle.pipelock import PipelockProxyPlan from claude_bottle.supervise import SupervisePlan +def _remote_host(g: GitEntry) -> str: + if g.UpstreamHost: + return g.UpstreamHost + return g.Upstream.split("@", 1)[1].split("/", 1)[0].split(":", 1)[0] + + def _plan( *, agent_prompt: str = "", @@ -49,17 +55,20 @@ def _plan( agent_supervise_url: str = "http://127.0.0.1:55556/", ) -> SmolmachinesBottlePlan: bottle_json: dict = {} + git_json: dict = {} if git: - bottle_json["git"] = [ - { + git_json["remotes"] = { + _remote_host(g): { "Name": g.Name, "Upstream": g.Upstream, "IdentityFile": g.IdentityFile, } for g in git - ] + } if git_user is not None: - bottle_json["git_user"] = git_user + git_json["user"] = git_user + if git_json: + bottle_json["git"] = git_json if supervise: bottle_json["supervise"] = True manifest = Manifest.from_json_obj({ diff --git a/tests/unit/test_yaml_subset.py b/tests/unit/test_yaml_subset.py index cd60736..c813b4f 100644 --- a/tests/unit/test_yaml_subset.py +++ b/tests/unit/test_yaml_subset.py @@ -265,11 +265,13 @@ class TestRealisticBottleFile(unittest.TestCase): path_allowlist: - /didericis/ git: - - Name: claude-bottle - Upstream: ssh://git@gitea.dideric.is:30009/x/y.git - IdentityFile: ~/.ssh/gitea.pem - ExtraHosts: - gitea.dideric.is: 100.78.141.42 + remotes: + gitea.dideric.is: + Name: claude-bottle + Upstream: ssh://git@gitea.dideric.is:30009/x/y.git + IdentityFile: ~/.ssh/gitea.pem + ExtraHosts: + gitea.dideric.is: 100.78.141.42 """) # Spot-check the deep parts; the structure is large. self.assertEqual(2, len(out["egress"]["routes"])) @@ -283,7 +285,7 @@ class TestRealisticBottleFile(unittest.TestCase): ) self.assertEqual( "100.78.141.42", - out["git"][0]["ExtraHosts"]["gitea.dideric.is"], + out["git"]["remotes"]["gitea.dideric.is"]["ExtraHosts"]["gitea.dideric.is"], ) -- 2.52.0 From f029a3d7f5e8aff3d148b9a8cd5246688648244c Mon Sep 17 00:00:00 2001 From: codex Date: Thu, 28 May 2026 01:08:05 -0400 Subject: [PATCH 5/5] refactor(manifest): drop stale migration errors --- claude_bottle/manifest.py | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/claude_bottle/manifest.py b/claude_bottle/manifest.py index ff4d131..6d2ae8d 100644 --- a/claude_bottle/manifest.py +++ b/claude_bottle/manifest.py @@ -526,29 +526,6 @@ class Bottle: if git_raw is not None: git, git_user = _parse_git_config(name, git_raw) - if "tokens" in d: - die( - f"bottle '{name}' has a 'tokens' field. The shape was reworked: " - f"each route now lives under 'egress.routes' with explicit " - f"host / path_allowlist / auth. See docs/prds/0017-egress-via-mitmproxy.md." - ) - - if "cred_proxy" in d: - die( - f"bottle '{name}' has a 'cred_proxy' field, which has been removed " - f"(PRD 0017). Rename to 'egress' and migrate each route:\n" - f" - 'path' + 'upstream' (cred-proxy URL prefix + upstream URL)\n" - f" → 'host' (just the upstream hostname)\n" - f" - 'auth_scheme' + 'token_ref' (flat)\n" - f" → 'auth: {{ scheme, token_ref }}' (nested, optional)\n" - f" - 'role' (provisioner dotfile rewrites): drop — egress " - f"is on the agent's HTTP_PROXY path, so dotfile rewrites are no " - f"longer needed.\n" - f" - 'path_allowlist' (new): optional URL prefix gate for the " - f"host.\n" - f"See docs/prds/0017-egress-via-mitmproxy.md." - ) - egress = ( EgressConfig.from_dict(name, d["egress"]) if "egress" in d -- 2.52.0