From 4816733120789a335173975684b0b470402efb70 Mon Sep 17 00:00:00 2001 From: claude Date: Sat, 20 Jun 2026 02:02:28 +0000 Subject: [PATCH] feat: defer broken manifest parse errors to preflight Broken bottle/agent files no longer block the agent selector or prevent unrelated agents from loading. Per-file parse errors are collected in `Manifest.broken_agents`; the CLI selector includes them via `all_agent_names`, and the error surfaces only when the specific agent is selected and launch is attempted (in `require_agent`/`bottle_for`). Closes #236 --- bot_bottle/cli/start.py | 2 +- bot_bottle/manifest.py | 39 ++++++-- bot_bottle/manifest_extends.py | 83 ++++++++++++++++ bot_bottle/manifest_loader.py | 100 ++++++++++++++------ tests/unit/test_cli_start_selector.py | 1 + tests/unit/test_manifest_agent_git_user.py | 10 +- tests/unit/test_manifest_md_load.py | 104 ++++++++++++++++----- 7 files changed, 277 insertions(+), 62 deletions(-) diff --git a/bot_bottle/cli/start.py b/bot_bottle/cli/start.py index a658b56..91942c2 100644 --- a/bot_bottle/cli/start.py +++ b/bot_bottle/cli/start.py @@ -65,7 +65,7 @@ def cmd_start(argv: list[str]) -> int: agent_name: str | None = args.name if agent_name is None: agent_name = tui.filter_select( - sorted(manifest.agents.keys()), + manifest.all_agent_names, title="Select agent", ) if agent_name is None: diff --git a/bot_bottle/manifest.py b/bot_bottle/manifest.py index 427e033..30b2493 100644 --- a/bot_bottle/manifest.py +++ b/bot_bottle/manifest.py @@ -193,6 +193,9 @@ class ManifestBottle: class Manifest: bottles: Mapping[str, ManifestBottle] agents: Mapping[str, ManifestAgent] + # Agents (and agents referencing broken bottles) that failed to load. + # Their errors are deferred to preflight rather than raised at load time. + broken_agents: Mapping[str, ManifestError] = field(default_factory=dict) @classmethod def resolve(cls, cwd: str, *, missing_ok: bool = False) -> "Manifest": @@ -256,16 +259,23 @@ class Manifest: name collision. A `bottles/` subdir under `cwd_dir` is logged as a warning and ignored. + Per-file parse errors are deferred into `broken_agents` rather + than raised, so a broken bottle or agent only fails at preflight + when that specific agent is selected for launch. + Used by tests to build a Manifest from fixture directories without touching `os.environ`.""" bottles_dir = home_dir / "bottles" from .manifest_loader import load_agents_from_dir, load_bottles_from_dir - bottles = load_bottles_from_dir(bottles_dir) + bottles, broken_bottle_errors = load_bottles_from_dir(bottles_dir) bottle_names = set(bottles.keys()) agents_dir = home_dir / "agents" - agents = load_agents_from_dir(agents_dir, bottle_names, source="$HOME") + agents, broken_agents = load_agents_from_dir( + agents_dir, bottle_names, source="$HOME", + broken_bottle_errors=broken_bottle_errors, + ) if cwd_dir is not None: stale_bottles = cwd_dir / "bottles" @@ -281,12 +291,14 @@ class Manifest: f"(PRD 0011). Move them or delete." ) cwd_agents_dir = cwd_dir / "agents" - cwd_agents = load_agents_from_dir( - cwd_agents_dir, bottle_names, source="$CWD" + cwd_agents, cwd_broken = load_agents_from_dir( + cwd_agents_dir, bottle_names, source="$CWD", + broken_bottle_errors=broken_bottle_errors, ) agents = {**agents, **cwd_agents} + broken_agents = {**broken_agents, **cwd_broken} - return cls(bottles=bottles, agents=agents) + return cls(bottles=bottles, agents=agents, broken_agents=broken_agents) @classmethod def from_json_obj(cls, obj: object) -> "Manifest": @@ -311,10 +323,21 @@ class Manifest: } return cls(bottles=bottles, agents=agents) + @property + def all_agent_names(self) -> list[str]: + """Sorted list of all agent names, including broken ones. + + Broken agents appear in the CLI selector so users can select any + agent — the error surfaces only at preflight when launch is + attempted.""" + return sorted(set(self.agents.keys()) | set(self.broken_agents.keys())) + def has_agent(self, name: str) -> bool: return name in self.agents def require_agent(self, name: str) -> None: + if name in self.broken_agents: + raise self.broken_agents[name] if self.has_agent(name): return available = ", ".join(self.agents.keys()) @@ -361,7 +384,11 @@ class Manifest: The overlay lives here, the single point both backends call to resolve an agent's bottle, so the docker / smolmachines git - provisioners pick up the merged identity unchanged.""" + provisioners pick up the merged identity unchanged. + + Raises the stored ManifestError for agents that failed to load.""" + if agent_name in self.broken_agents: + raise self.broken_agents[agent_name] bottle = self.bottles[self.agents[agent_name].bottle] merged = self._effective_git_user(agent_name) if merged == bottle.git_user: diff --git a/bot_bottle/manifest_extends.py b/bot_bottle/manifest_extends.py index 3432f64..b58e4d5 100644 --- a/bot_bottle/manifest_extends.py +++ b/bot_bottle/manifest_extends.py @@ -22,6 +22,25 @@ def resolve_bottles(raws: dict[str, dict[str, object]]) -> dict[str, ManifestBot return cache +def resolve_bottles_partial( + raws: dict[str, dict[str, object]], +) -> tuple[dict[str, ManifestBottle], dict[str, ManifestError]]: + """Apply `extends:` chains and return `(good, broken)`. + + Bottles that fail validation (schema errors, bad extends, cycles) are + collected in `broken` rather than raising, so unrelated bottles remain + usable. Errors for parent bottles propagate to all children that extend + them.""" + from .manifest import ManifestError + + cache: dict[str, ManifestBottle] = {} + broken: dict[str, ManifestError] = {} + for name in raws: + if name not in cache and name not in broken: + _resolve_one_bottle_partial(name, raws, cache, broken, ()) + return cache, broken + + def _resolve_one_bottle( name: str, raws: dict[str, dict[str, object]], @@ -210,3 +229,67 @@ def _merge_egress( routes = parent.routes + child.routes log = child.Log if "log" in child_egress_raw else parent.Log return ManifestEgressConfig(routes=routes, Log=log) + + +def _resolve_one_bottle_partial( + name: str, + raws: dict[str, dict[str, object]], + cache: dict[str, ManifestBottle], + broken: dict[str, ManifestError], + seen: tuple[str, ...], +) -> None: + """Error-tolerant variant: on failure, adds to `broken` instead of raising.""" + from .manifest import ManifestBottle, ManifestError + + if name in cache or name in broken: + return + if name in seen: + chain = " -> ".join(seen + (name,)) + broken[name] = ManifestError( + f"bottle '{name}' is in an extends cycle: {chain}" + ) + return + + raw = raws[name] + parent_name_raw = raw.get("extends") + child_raw = {k: v for k, v in raw.items() if k != "extends"} + + try: + if parent_name_raw is None: + cache[name] = ManifestBottle.from_dict(name, child_raw) + return + + if not isinstance(parent_name_raw, str): + broken[name] = ManifestError( + f"bottle '{name}' extends must be a string " + f"(was {type(parent_name_raw).__name__})" + ) + return + + parent_name: str = parent_name_raw + if parent_name == name: + broken[name] = ManifestError( + f"bottle '{name}' extends itself; remove the self-reference" + ) + return + + if parent_name not in raws: + avail = ", ".join(sorted(raws.keys())) or "(none)" + broken[name] = ManifestError( + f"bottle '{name}' extends '{parent_name}' which is not " + f"defined. Available bottles: {avail}" + ) + return + + _resolve_one_bottle_partial(parent_name, raws, cache, broken, seen + (name,)) + if parent_name in broken: + broken[name] = ManifestError( + f"bottle '{name}' extends '{parent_name}' which failed to load: " + f"{broken[parent_name]}" + ) + return + + parent = cache[parent_name] + cache[name] = _merge_bottles(parent, child_raw, name) + except ManifestError as exc: + broken[name] = exc diff --git a/bot_bottle/manifest_loader.py b/bot_bottle/manifest_loader.py index 67d0e51..6f5ac8e 100644 --- a/bot_bottle/manifest_loader.py +++ b/bot_bottle/manifest_loader.py @@ -11,6 +11,7 @@ from .manifest_schema import ( validate_agent_frontmatter_keys, validate_bottle_frontmatter_keys, ) +from .manifest_util import ManifestError from .yaml_subset import YamlSubsetError, parse_frontmatter if TYPE_CHECKING: @@ -21,8 +22,6 @@ def check_stale_json(dir_path: Path, md_dir: Path, label: str) -> None: """Die if `/bot-bottle.json` exists but `md_dir` does not. The manifest format changed in PRD 0011 and we do not want to silently leave the JSON content unused.""" - from .manifest import ManifestError - legacy = dir_path / "bot-bottle.json" if legacy.is_file() and not md_dir.exists(): raise ManifestError( @@ -34,15 +33,20 @@ def check_stale_json(dir_path: Path, md_dir: Path, label: str) -> None: ) -def load_bottles_from_dir(bottles_dir: Path) -> dict[str, ManifestBottle]: +def load_bottles_from_dir( + bottles_dir: Path, +) -> tuple[dict[str, ManifestBottle], dict[str, ManifestError]]: """Walk `/*.md`, parse each as a bottle, and return - `{name: Bottle}`. Missing dir returns an empty dict.""" - from .manifest import ManifestError - from .manifest_extends import resolve_bottles + `({name: Bottle}, {name: error})`. Missing dir returns empty dicts. + + Per-file errors are collected in the second dict rather than raised, + so an invalid bottle file does not block unrelated bottles or agents.""" + from .manifest_extends import resolve_bottles_partial raws: dict[str, dict[str, object]] = {} + broken: dict[str, ManifestError] = {} if not bottles_dir.is_dir(): - return {} + return {}, {} for path in sorted(bottles_dir.glob("*.md")): name = entity_name_from_path(path) if name is None: @@ -54,12 +58,21 @@ def load_bottles_from_dir(bottles_dir: Path) -> dict[str, ManifestBottle]: try: fm, _body = parse_frontmatter(path.read_text()) except OSError as e: - raise ManifestError(f"could not read {path}: {e}") from e + broken[name] = ManifestError(f"could not read {path}: {e}") + continue except YamlSubsetError as e: - raise ManifestError(f"{path}: {e}") from e - validate_bottle_frontmatter_keys(path, fm.keys()) + broken[name] = ManifestError(f"{path}: {e}") + continue + try: + validate_bottle_frontmatter_keys(path, fm.keys()) + except ManifestError as e: + broken[name] = e + continue raws[name] = fm - return resolve_bottles(raws) + + good, resolve_broken = resolve_bottles_partial(raws) + broken.update(resolve_broken) + return good, broken def load_agents_from_dir( @@ -67,15 +80,26 @@ def load_agents_from_dir( bottle_names: set[str], *, source: str, # noqa: F841 — unused, but required by interface -) -> dict[str, ManifestAgent]: + broken_bottle_errors: dict[str, ManifestError] | None = None, +) -> tuple[dict[str, ManifestAgent], dict[str, ManifestError]]: """Walk `/*.md`, parse each as an agent, and return - `{name: Agent}`. The Markdown body becomes the agent's prompt. - Missing dir returns an empty dict.""" - from .manifest import ManifestAgent, ManifestError + `({name: Agent}, {name: error})`. The Markdown body becomes the + agent's prompt. Missing dir returns empty dicts. + + Per-file errors are collected in the second dict rather than raised. + Agents referencing a broken bottle are also moved to the error dict + so their error surfaces at preflight rather than manifest load time.""" + from .manifest import ManifestAgent + + broken_bottles = broken_bottle_errors or {} + # Agents may reference bottles that failed to resolve; accept those names + # during structural parsing so we can detect the broken-bottle case below. + all_known_bottles = bottle_names | set(broken_bottles.keys()) out: dict[str, ManifestAgent] = {} + broken: dict[str, ManifestError] = {} if not agents_dir.is_dir(): - return out + return out, broken for path in sorted(agents_dir.glob("*.md")): name = entity_name_from_path(path) if name is None: @@ -87,19 +111,33 @@ def load_agents_from_dir( try: fm, body = parse_frontmatter(path.read_text()) except OSError as e: - raise ManifestError(f"could not read {path}: {e}") from e + broken[name] = ManifestError(f"could not read {path}: {e}") + continue except YamlSubsetError as e: - raise ManifestError(f"{path}: {e}") from e - validate_agent_frontmatter_keys(path, fm.keys()) - # Build the dict Agent.from_dict expects. The body becomes - # prompt; Claude Code passthrough fields stay in fm and get - # ignored by Agent.from_dict (reads bottle/skills/git-gate/prompt). - agent_dict: dict[str, object] = { - "bottle": fm.get("bottle"), - "skills": fm.get("skills", []), - "prompt": body.strip(), - } - if "git-gate" in fm: - agent_dict["git-gate"] = fm["git-gate"] - out[name] = ManifestAgent.from_dict(name, agent_dict, bottle_names) - return out + broken[name] = ManifestError(f"{path}: {e}") + continue + try: + validate_agent_frontmatter_keys(path, fm.keys()) + agent_dict: dict[str, object] = { + "bottle": fm.get("bottle"), + "skills": fm.get("skills", []), + "prompt": body.strip(), + } + if "git-gate" in fm: + agent_dict["git-gate"] = fm["git-gate"] + agent = ManifestAgent.from_dict(name, agent_dict, all_known_bottles) + except ManifestError as e: + broken[name] = e + continue + + # Agent parsed fine but its bottle may have failed to resolve. + bottle_ref = agent.bottle + if bottle_ref in broken_bottles: + broken[name] = ManifestError( + f"agent '{name}' references bottle '{bottle_ref}' which " + f"failed to load: {broken_bottles[bottle_ref]}" + ) + continue + + out[name] = agent + return out, broken diff --git a/tests/unit/test_cli_start_selector.py b/tests/unit/test_cli_start_selector.py index d34ee0e..82a83a4 100644 --- a/tests/unit/test_cli_start_selector.py +++ b/tests/unit/test_cli_start_selector.py @@ -19,6 +19,7 @@ import bot_bottle.cli.tui as tui_mod def _make_manifest(agent_names: list[str]): manifest = MagicMock() manifest.agents = {name: MagicMock() for name in agent_names} + manifest.all_agent_names = sorted(agent_names) return manifest diff --git a/tests/unit/test_manifest_agent_git_user.py b/tests/unit/test_manifest_agent_git_user.py index 3fb6c04..ebeb672 100644 --- a/tests/unit/test_manifest_agent_git_user.py +++ b/tests/unit/test_manifest_agent_git_user.py @@ -226,10 +226,16 @@ class TestAgentGitUserMdLoader(unittest.TestCase): m.git_identity_summary("impl"), ) - def test_md_agent_repos_dies(self): + def test_md_agent_repos_deferred(self): + """git-gate.repos on an agent is an error, but deferred into + broken_agents rather than raised at resolve time, so other agents + remain accessible.""" self._write("bottles/dev.md", _BOTTLE_DEV) self._write("agents/impl.md", _AGENT_WITH_REPOS) - msg = _error_message(Manifest.resolve, str(self.home)) + m = Manifest.resolve(str(self.home)) + self.assertNotIn("impl", m.agents) + self.assertIn("impl", m.broken_agents) + msg = str(m.broken_agents["impl"]) self.assertIn("git-gate.repos", msg) self.assertIn("bottle-only", msg) diff --git a/tests/unit/test_manifest_md_load.py b/tests/unit/test_manifest_md_load.py index 4ec18dd..7636ef3 100644 --- a/tests/unit/test_manifest_md_load.py +++ b/tests/unit/test_manifest_md_load.py @@ -294,14 +294,15 @@ class TestManifestEntryPointParity(_ResolveCase): self.assertEqual("dev", manifest.agents["implementer"].bottle) -class TestUnknownAgentKeyDies(_ResolveCase): - """A typo'd / unknown frontmatter key on an agent file dies - rather than silently ignoring.""" +class TestUnknownAgentKeyDefersToBroken(_ResolveCase): + """A typo'd / unknown frontmatter key on an agent file is deferred + into broken_agents rather than crashing the whole manifest load. + The error surfaces when that specific agent is selected for launch.""" - def test_dies(self): + def test_broken_agent_deferred(self): _write(self.home_cb / "bottles" / "dev.md", _BOTTLE_DEV) _write( - self.home_cb / "agents" / "implementer.md", + self.home_cb / "agents" / "bad.md", """ --- bottle: dev @@ -311,17 +312,58 @@ class TestUnknownAgentKeyDies(_ResolveCase): ... """, ) - with self.assertRaises(ManifestError): - self.resolve() + _write(self.home_cb / "agents" / "implementer.md", _AGENT_IMPL) + m = self.resolve() + # The broken agent is NOT in the valid agents dict… + self.assertNotIn("bad", m.agents) + # …but it IS captured in broken_agents. + self.assertIn("bad", m.broken_agents) + self.assertIsInstance(m.broken_agents["bad"], ManifestError) + # Unrelated agent still loads fine. + self.assertIn("implementer", m.agents) - -class TestUnknownBottleKeyDies(_ResolveCase): - """A typo'd / unknown frontmatter key on a bottle file dies - rather than silently ignoring.""" - - def test_dies(self): + def test_broken_agent_appears_in_all_agent_names(self): + _write(self.home_cb / "bottles" / "dev.md", _BOTTLE_DEV) _write( - self.home_cb / "bottles" / "dev.md", + self.home_cb / "agents" / "bad.md", + """ + --- + bottle: dev + skillz: [init-prd] + --- + """, + ) + _write(self.home_cb / "agents" / "implementer.md", _AGENT_IMPL) + m = self.resolve() + self.assertIn("bad", m.all_agent_names) + self.assertIn("implementer", m.all_agent_names) + + def test_broken_agent_raises_at_preflight(self): + _write(self.home_cb / "bottles" / "dev.md", _BOTTLE_DEV) + _write( + self.home_cb / "agents" / "bad.md", + """ + --- + bottle: dev + skillz: [init-prd] + --- + """, + ) + m = self.resolve() + with self.assertRaises(ManifestError): + m.require_agent("bad") + with self.assertRaises(ManifestError): + m.bottle_for("bad") + + +class TestUnknownBottleKeyDefersToBroken(_ResolveCase): + """A typo'd / unknown frontmatter key on a bottle file is deferred + into broken_agents for agents referencing that bottle, rather than + crashing the whole manifest load.""" + + def test_broken_bottle_defers_agent(self): + _write( + self.home_cb / "bottles" / "bad.md", """ --- credproxy: @@ -329,9 +371,24 @@ class TestUnknownBottleKeyDies(_ResolveCase): --- """, ) + _write(self.home_cb / "bottles" / "dev.md", _BOTTLE_DEV) _write(self.home_cb / "agents" / "implementer.md", _AGENT_IMPL) - with self.assertRaises(ManifestError): - self.resolve() + _write( + self.home_cb / "agents" / "broken-agent.md", + """ + --- + bottle: bad + --- + """, + ) + m = self.resolve() + # Good bottle and agent still load. + self.assertIn("dev", m.bottles) + self.assertIn("implementer", m.agents) + # Broken bottle's agent is deferred. + self.assertNotIn("bad", m.bottles) + self.assertNotIn("broken-agent", m.agents) + self.assertIn("broken-agent", m.broken_agents) class TestStaleJsonDies(_ResolveCase): @@ -359,11 +416,11 @@ class TestNoManifestDies(_ResolveCase): self.assertEqual({}, dict(m.agents)) -class TestUnknownBottleReferenceDies(_ResolveCase): - """An agent file naming a bottle that doesn't exist on disk - dies with the existing "bottle not defined" error.""" +class TestUnknownBottleReferenceDefersToBroken(_ResolveCase): + """An agent file naming a bottle that doesn't exist on disk is + deferred into broken_agents; other agents still load.""" - def test_dies(self): + def test_stray_bottle_reference_deferred(self): _write(self.home_cb / "bottles" / "dev.md", _BOTTLE_DEV) _write( self.home_cb / "agents" / "stray.md", @@ -373,8 +430,11 @@ class TestUnknownBottleReferenceDies(_ResolveCase): --- """, ) - with self.assertRaises(ManifestError): - self.resolve() + _write(self.home_cb / "agents" / "implementer.md", _AGENT_IMPL) + m = self.resolve() + self.assertNotIn("stray", m.agents) + self.assertIn("stray", m.broken_agents) + self.assertIn("implementer", m.agents) class TestFilenameValidation(_ResolveCase):