feat: defer broken manifest parse errors to preflight

Broken bottle/agent files no longer block the agent selector or prevent
unrelated agents from loading. Per-file parse errors are collected in
`Manifest.broken_agents`; the CLI selector includes them via
`all_agent_names`, and the error surfaces only when the specific agent
is selected and launch is attempted (in `require_agent`/`bottle_for`).

Closes #236
This commit is contained in:
2026-06-20 02:02:28 +00:00
committed by didericis
parent c9842ce831
commit 3375df3f52
7 changed files with 277 additions and 62 deletions
+1 -1
View File
@@ -67,7 +67,7 @@ def cmd_start(argv: list[str]) -> int:
agent_name: str | None = args.name agent_name: str | None = args.name
if agent_name is None: if agent_name is None:
agent_name = tui.filter_select( agent_name = tui.filter_select(
sorted(manifest.agents.keys()), manifest.all_agent_names,
title="Select agent", title="Select agent",
) )
if agent_name is None: if agent_name is None:
+33 -6
View File
@@ -193,6 +193,9 @@ class ManifestBottle:
class Manifest: class Manifest:
bottles: Mapping[str, ManifestBottle] bottles: Mapping[str, ManifestBottle]
agents: Mapping[str, ManifestAgent] agents: Mapping[str, ManifestAgent]
# Agents (and agents referencing broken bottles) that failed to load.
# Their errors are deferred to preflight rather than raised at load time.
broken_agents: Mapping[str, ManifestError] = field(default_factory=dict)
@classmethod @classmethod
def resolve(cls, cwd: str, *, missing_ok: bool = False) -> "Manifest": def resolve(cls, cwd: str, *, missing_ok: bool = False) -> "Manifest":
@@ -256,16 +259,23 @@ class Manifest:
name collision. A `bottles/` subdir under `cwd_dir` is name collision. A `bottles/` subdir under `cwd_dir` is
logged as a warning and ignored. logged as a warning and ignored.
Per-file parse errors are deferred into `broken_agents` rather
than raised, so a broken bottle or agent only fails at preflight
when that specific agent is selected for launch.
Used by tests to build a Manifest from fixture directories Used by tests to build a Manifest from fixture directories
without touching `os.environ`.""" without touching `os.environ`."""
bottles_dir = home_dir / "bottles" bottles_dir = home_dir / "bottles"
from .manifest_loader import load_agents_from_dir, load_bottles_from_dir from .manifest_loader import load_agents_from_dir, load_bottles_from_dir
bottles = load_bottles_from_dir(bottles_dir) bottles, broken_bottle_errors = load_bottles_from_dir(bottles_dir)
bottle_names = set(bottles.keys()) bottle_names = set(bottles.keys())
agents_dir = home_dir / "agents" agents_dir = home_dir / "agents"
agents = load_agents_from_dir(agents_dir, bottle_names, source="$HOME") agents, broken_agents = load_agents_from_dir(
agents_dir, bottle_names, source="$HOME",
broken_bottle_errors=broken_bottle_errors,
)
if cwd_dir is not None: if cwd_dir is not None:
stale_bottles = cwd_dir / "bottles" stale_bottles = cwd_dir / "bottles"
@@ -281,12 +291,14 @@ class Manifest:
f"(PRD 0011). Move them or delete." f"(PRD 0011). Move them or delete."
) )
cwd_agents_dir = cwd_dir / "agents" cwd_agents_dir = cwd_dir / "agents"
cwd_agents = load_agents_from_dir( cwd_agents, cwd_broken = load_agents_from_dir(
cwd_agents_dir, bottle_names, source="$CWD" cwd_agents_dir, bottle_names, source="$CWD",
broken_bottle_errors=broken_bottle_errors,
) )
agents = {**agents, **cwd_agents} agents = {**agents, **cwd_agents}
broken_agents = {**broken_agents, **cwd_broken}
return cls(bottles=bottles, agents=agents) return cls(bottles=bottles, agents=agents, broken_agents=broken_agents)
@classmethod @classmethod
def from_json_obj(cls, obj: object) -> "Manifest": def from_json_obj(cls, obj: object) -> "Manifest":
@@ -311,10 +323,21 @@ class Manifest:
} }
return cls(bottles=bottles, agents=agents) return cls(bottles=bottles, agents=agents)
@property
def all_agent_names(self) -> list[str]:
"""Sorted list of all agent names, including broken ones.
Broken agents appear in the CLI selector so users can select any
agent — the error surfaces only at preflight when launch is
attempted."""
return sorted(set(self.agents.keys()) | set(self.broken_agents.keys()))
def has_agent(self, name: str) -> bool: def has_agent(self, name: str) -> bool:
return name in self.agents return name in self.agents
def require_agent(self, name: str) -> None: def require_agent(self, name: str) -> None:
if name in self.broken_agents:
raise self.broken_agents[name]
if self.has_agent(name): if self.has_agent(name):
return return
available = ", ".join(self.agents.keys()) available = ", ".join(self.agents.keys())
@@ -361,7 +384,11 @@ class Manifest:
The overlay lives here, the single point both backends call to The overlay lives here, the single point both backends call to
resolve an agent's bottle, so the docker / smolmachines git resolve an agent's bottle, so the docker / smolmachines git
provisioners pick up the merged identity unchanged.""" provisioners pick up the merged identity unchanged.
Raises the stored ManifestError for agents that failed to load."""
if agent_name in self.broken_agents:
raise self.broken_agents[agent_name]
bottle = self.bottles[self.agents[agent_name].bottle] bottle = self.bottles[self.agents[agent_name].bottle]
merged = self._effective_git_user(agent_name) merged = self._effective_git_user(agent_name)
if merged == bottle.git_user: if merged == bottle.git_user:
+83
View File
@@ -22,6 +22,25 @@ def resolve_bottles(raws: dict[str, dict[str, object]]) -> dict[str, ManifestBot
return cache return cache
def resolve_bottles_partial(
raws: dict[str, dict[str, object]],
) -> tuple[dict[str, ManifestBottle], dict[str, ManifestError]]:
"""Apply `extends:` chains and return `(good, broken)`.
Bottles that fail validation (schema errors, bad extends, cycles) are
collected in `broken` rather than raising, so unrelated bottles remain
usable. Errors for parent bottles propagate to all children that extend
them."""
from .manifest import ManifestError
cache: dict[str, ManifestBottle] = {}
broken: dict[str, ManifestError] = {}
for name in raws:
if name not in cache and name not in broken:
_resolve_one_bottle_partial(name, raws, cache, broken, ())
return cache, broken
def _resolve_one_bottle( def _resolve_one_bottle(
name: str, name: str,
raws: dict[str, dict[str, object]], raws: dict[str, dict[str, object]],
@@ -210,3 +229,67 @@ def _merge_egress(
routes = parent.routes + child.routes routes = parent.routes + child.routes
log = child.Log if "log" in child_egress_raw else parent.Log log = child.Log if "log" in child_egress_raw else parent.Log
return ManifestEgressConfig(routes=routes, Log=log) return ManifestEgressConfig(routes=routes, Log=log)
def _resolve_one_bottle_partial(
name: str,
raws: dict[str, dict[str, object]],
cache: dict[str, ManifestBottle],
broken: dict[str, ManifestError],
seen: tuple[str, ...],
) -> None:
"""Error-tolerant variant: on failure, adds to `broken` instead of raising."""
from .manifest import ManifestBottle, ManifestError
if name in cache or name in broken:
return
if name in seen:
chain = " -> ".join(seen + (name,))
broken[name] = ManifestError(
f"bottle '{name}' is in an extends cycle: {chain}"
)
return
raw = raws[name]
parent_name_raw = raw.get("extends")
child_raw = {k: v for k, v in raw.items() if k != "extends"}
try:
if parent_name_raw is None:
cache[name] = ManifestBottle.from_dict(name, child_raw)
return
if not isinstance(parent_name_raw, str):
broken[name] = ManifestError(
f"bottle '{name}' extends must be a string "
f"(was {type(parent_name_raw).__name__})"
)
return
parent_name: str = parent_name_raw
if parent_name == name:
broken[name] = ManifestError(
f"bottle '{name}' extends itself; remove the self-reference"
)
return
if parent_name not in raws:
avail = ", ".join(sorted(raws.keys())) or "(none)"
broken[name] = ManifestError(
f"bottle '{name}' extends '{parent_name}' which is not "
f"defined. Available bottles: {avail}"
)
return
_resolve_one_bottle_partial(parent_name, raws, cache, broken, seen + (name,))
if parent_name in broken:
broken[name] = ManifestError(
f"bottle '{name}' extends '{parent_name}' which failed to load: "
f"{broken[parent_name]}"
)
return
parent = cache[parent_name]
cache[name] = _merge_bottles(parent, child_raw, name)
except ManifestError as exc:
broken[name] = exc
+69 -31
View File
@@ -11,6 +11,7 @@ from .manifest_schema import (
validate_agent_frontmatter_keys, validate_agent_frontmatter_keys,
validate_bottle_frontmatter_keys, validate_bottle_frontmatter_keys,
) )
from .manifest_util import ManifestError
from .yaml_subset import YamlSubsetError, parse_frontmatter from .yaml_subset import YamlSubsetError, parse_frontmatter
if TYPE_CHECKING: if TYPE_CHECKING:
@@ -21,8 +22,6 @@ def check_stale_json(dir_path: Path, md_dir: Path, label: str) -> None:
"""Die if `<dir_path>/bot-bottle.json` exists but `md_dir` does """Die if `<dir_path>/bot-bottle.json` exists but `md_dir` does
not. The manifest format changed in PRD 0011 and we do not want not. The manifest format changed in PRD 0011 and we do not want
to silently leave the JSON content unused.""" to silently leave the JSON content unused."""
from .manifest import ManifestError
legacy = dir_path / "bot-bottle.json" legacy = dir_path / "bot-bottle.json"
if legacy.is_file() and not md_dir.exists(): if legacy.is_file() and not md_dir.exists():
raise ManifestError( raise ManifestError(
@@ -34,15 +33,20 @@ def check_stale_json(dir_path: Path, md_dir: Path, label: str) -> None:
) )
def load_bottles_from_dir(bottles_dir: Path) -> dict[str, ManifestBottle]: def load_bottles_from_dir(
bottles_dir: Path,
) -> tuple[dict[str, ManifestBottle], dict[str, ManifestError]]:
"""Walk `<bottles_dir>/*.md`, parse each as a bottle, and return """Walk `<bottles_dir>/*.md`, parse each as a bottle, and return
`{name: Bottle}`. Missing dir returns an empty dict.""" `({name: Bottle}, {name: error})`. Missing dir returns empty dicts.
from .manifest import ManifestError
from .manifest_extends import resolve_bottles Per-file errors are collected in the second dict rather than raised,
so an invalid bottle file does not block unrelated bottles or agents."""
from .manifest_extends import resolve_bottles_partial
raws: dict[str, dict[str, object]] = {} raws: dict[str, dict[str, object]] = {}
broken: dict[str, ManifestError] = {}
if not bottles_dir.is_dir(): if not bottles_dir.is_dir():
return {} return {}, {}
for path in sorted(bottles_dir.glob("*.md")): for path in sorted(bottles_dir.glob("*.md")):
name = entity_name_from_path(path) name = entity_name_from_path(path)
if name is None: if name is None:
@@ -54,12 +58,21 @@ def load_bottles_from_dir(bottles_dir: Path) -> dict[str, ManifestBottle]:
try: try:
fm, _body = parse_frontmatter(path.read_text()) fm, _body = parse_frontmatter(path.read_text())
except OSError as e: except OSError as e:
raise ManifestError(f"could not read {path}: {e}") from e broken[name] = ManifestError(f"could not read {path}: {e}")
continue
except YamlSubsetError as e: except YamlSubsetError as e:
raise ManifestError(f"{path}: {e}") from e broken[name] = ManifestError(f"{path}: {e}")
validate_bottle_frontmatter_keys(path, fm.keys()) continue
try:
validate_bottle_frontmatter_keys(path, fm.keys())
except ManifestError as e:
broken[name] = e
continue
raws[name] = fm raws[name] = fm
return resolve_bottles(raws)
good, resolve_broken = resolve_bottles_partial(raws)
broken.update(resolve_broken)
return good, broken
def load_agents_from_dir( def load_agents_from_dir(
@@ -67,15 +80,26 @@ def load_agents_from_dir(
bottle_names: set[str], bottle_names: set[str],
*, *,
source: str, # noqa: F841 — unused, but required by interface source: str, # noqa: F841 — unused, but required by interface
) -> dict[str, ManifestAgent]: broken_bottle_errors: dict[str, ManifestError] | None = None,
) -> tuple[dict[str, ManifestAgent], dict[str, ManifestError]]:
"""Walk `<agents_dir>/*.md`, parse each as an agent, and return """Walk `<agents_dir>/*.md`, parse each as an agent, and return
`{name: Agent}`. The Markdown body becomes the agent's prompt. `({name: Agent}, {name: error})`. The Markdown body becomes the
Missing dir returns an empty dict.""" agent's prompt. Missing dir returns empty dicts.
from .manifest import ManifestAgent, ManifestError
Per-file errors are collected in the second dict rather than raised.
Agents referencing a broken bottle are also moved to the error dict
so their error surfaces at preflight rather than manifest load time."""
from .manifest import ManifestAgent
broken_bottles = broken_bottle_errors or {}
# Agents may reference bottles that failed to resolve; accept those names
# during structural parsing so we can detect the broken-bottle case below.
all_known_bottles = bottle_names | set(broken_bottles.keys())
out: dict[str, ManifestAgent] = {} out: dict[str, ManifestAgent] = {}
broken: dict[str, ManifestError] = {}
if not agents_dir.is_dir(): if not agents_dir.is_dir():
return out return out, broken
for path in sorted(agents_dir.glob("*.md")): for path in sorted(agents_dir.glob("*.md")):
name = entity_name_from_path(path) name = entity_name_from_path(path)
if name is None: if name is None:
@@ -87,19 +111,33 @@ def load_agents_from_dir(
try: try:
fm, body = parse_frontmatter(path.read_text()) fm, body = parse_frontmatter(path.read_text())
except OSError as e: except OSError as e:
raise ManifestError(f"could not read {path}: {e}") from e broken[name] = ManifestError(f"could not read {path}: {e}")
continue
except YamlSubsetError as e: except YamlSubsetError as e:
raise ManifestError(f"{path}: {e}") from e broken[name] = ManifestError(f"{path}: {e}")
validate_agent_frontmatter_keys(path, fm.keys()) continue
# Build the dict Agent.from_dict expects. The body becomes try:
# prompt; Claude Code passthrough fields stay in fm and get validate_agent_frontmatter_keys(path, fm.keys())
# ignored by Agent.from_dict (reads bottle/skills/git-gate/prompt). agent_dict: dict[str, object] = {
agent_dict: dict[str, object] = { "bottle": fm.get("bottle"),
"bottle": fm.get("bottle"), "skills": fm.get("skills", []),
"skills": fm.get("skills", []), "prompt": body.strip(),
"prompt": body.strip(), }
} if "git-gate" in fm:
if "git-gate" in fm: agent_dict["git-gate"] = fm["git-gate"]
agent_dict["git-gate"] = fm["git-gate"] agent = ManifestAgent.from_dict(name, agent_dict, all_known_bottles)
out[name] = ManifestAgent.from_dict(name, agent_dict, bottle_names) except ManifestError as e:
return out broken[name] = e
continue
# Agent parsed fine but its bottle may have failed to resolve.
bottle_ref = agent.bottle
if bottle_ref in broken_bottles:
broken[name] = ManifestError(
f"agent '{name}' references bottle '{bottle_ref}' which "
f"failed to load: {broken_bottles[bottle_ref]}"
)
continue
out[name] = agent
return out, broken
+1
View File
@@ -20,6 +20,7 @@ from bot_bottle.backend import ActiveAgent
def _make_manifest(agent_names: list[str]): def _make_manifest(agent_names: list[str]):
manifest = MagicMock() manifest = MagicMock()
manifest.agents = {name: MagicMock() for name in agent_names} manifest.agents = {name: MagicMock() for name in agent_names}
manifest.all_agent_names = sorted(agent_names)
return manifest return manifest
+8 -2
View File
@@ -226,10 +226,16 @@ class TestAgentGitUserMdLoader(unittest.TestCase):
m.git_identity_summary("impl"), m.git_identity_summary("impl"),
) )
def test_md_agent_repos_dies(self): def test_md_agent_repos_deferred(self):
"""git-gate.repos on an agent is an error, but deferred into
broken_agents rather than raised at resolve time, so other agents
remain accessible."""
self._write("bottles/dev.md", _BOTTLE_DEV) self._write("bottles/dev.md", _BOTTLE_DEV)
self._write("agents/impl.md", _AGENT_WITH_REPOS) self._write("agents/impl.md", _AGENT_WITH_REPOS)
msg = _error_message(Manifest.resolve, str(self.home)) m = Manifest.resolve(str(self.home))
self.assertNotIn("impl", m.agents)
self.assertIn("impl", m.broken_agents)
msg = str(m.broken_agents["impl"])
self.assertIn("git-gate.repos", msg) self.assertIn("git-gate.repos", msg)
self.assertIn("bottle-only", msg) self.assertIn("bottle-only", msg)
+82 -22
View File
@@ -294,14 +294,15 @@ class TestManifestEntryPointParity(_ResolveCase):
self.assertEqual("dev", manifest.agents["implementer"].bottle) self.assertEqual("dev", manifest.agents["implementer"].bottle)
class TestUnknownAgentKeyDies(_ResolveCase): class TestUnknownAgentKeyDefersToBroken(_ResolveCase):
"""A typo'd / unknown frontmatter key on an agent file dies """A typo'd / unknown frontmatter key on an agent file is deferred
rather than silently ignoring.""" into broken_agents rather than crashing the whole manifest load.
The error surfaces when that specific agent is selected for launch."""
def test_dies(self): def test_broken_agent_deferred(self):
_write(self.home_cb / "bottles" / "dev.md", _BOTTLE_DEV) _write(self.home_cb / "bottles" / "dev.md", _BOTTLE_DEV)
_write( _write(
self.home_cb / "agents" / "implementer.md", self.home_cb / "agents" / "bad.md",
""" """
--- ---
bottle: dev bottle: dev
@@ -311,17 +312,58 @@ class TestUnknownAgentKeyDies(_ResolveCase):
... ...
""", """,
) )
with self.assertRaises(ManifestError): _write(self.home_cb / "agents" / "implementer.md", _AGENT_IMPL)
self.resolve() m = self.resolve()
# The broken agent is NOT in the valid agents dict…
self.assertNotIn("bad", m.agents)
# …but it IS captured in broken_agents.
self.assertIn("bad", m.broken_agents)
self.assertIsInstance(m.broken_agents["bad"], ManifestError)
# Unrelated agent still loads fine.
self.assertIn("implementer", m.agents)
def test_broken_agent_appears_in_all_agent_names(self):
class TestUnknownBottleKeyDies(_ResolveCase): _write(self.home_cb / "bottles" / "dev.md", _BOTTLE_DEV)
"""A typo'd / unknown frontmatter key on a bottle file dies
rather than silently ignoring."""
def test_dies(self):
_write( _write(
self.home_cb / "bottles" / "dev.md", self.home_cb / "agents" / "bad.md",
"""
---
bottle: dev
skillz: [init-prd]
---
""",
)
_write(self.home_cb / "agents" / "implementer.md", _AGENT_IMPL)
m = self.resolve()
self.assertIn("bad", m.all_agent_names)
self.assertIn("implementer", m.all_agent_names)
def test_broken_agent_raises_at_preflight(self):
_write(self.home_cb / "bottles" / "dev.md", _BOTTLE_DEV)
_write(
self.home_cb / "agents" / "bad.md",
"""
---
bottle: dev
skillz: [init-prd]
---
""",
)
m = self.resolve()
with self.assertRaises(ManifestError):
m.require_agent("bad")
with self.assertRaises(ManifestError):
m.bottle_for("bad")
class TestUnknownBottleKeyDefersToBroken(_ResolveCase):
"""A typo'd / unknown frontmatter key on a bottle file is deferred
into broken_agents for agents referencing that bottle, rather than
crashing the whole manifest load."""
def test_broken_bottle_defers_agent(self):
_write(
self.home_cb / "bottles" / "bad.md",
""" """
--- ---
credproxy: credproxy:
@@ -329,9 +371,24 @@ class TestUnknownBottleKeyDies(_ResolveCase):
--- ---
""", """,
) )
_write(self.home_cb / "bottles" / "dev.md", _BOTTLE_DEV)
_write(self.home_cb / "agents" / "implementer.md", _AGENT_IMPL) _write(self.home_cb / "agents" / "implementer.md", _AGENT_IMPL)
with self.assertRaises(ManifestError): _write(
self.resolve() self.home_cb / "agents" / "broken-agent.md",
"""
---
bottle: bad
---
""",
)
m = self.resolve()
# Good bottle and agent still load.
self.assertIn("dev", m.bottles)
self.assertIn("implementer", m.agents)
# Broken bottle's agent is deferred.
self.assertNotIn("bad", m.bottles)
self.assertNotIn("broken-agent", m.agents)
self.assertIn("broken-agent", m.broken_agents)
class TestStaleJsonDies(_ResolveCase): class TestStaleJsonDies(_ResolveCase):
@@ -359,11 +416,11 @@ class TestNoManifestDies(_ResolveCase):
self.assertEqual({}, dict(m.agents)) self.assertEqual({}, dict(m.agents))
class TestUnknownBottleReferenceDies(_ResolveCase): class TestUnknownBottleReferenceDefersToBroken(_ResolveCase):
"""An agent file naming a bottle that doesn't exist on disk """An agent file naming a bottle that doesn't exist on disk is
dies with the existing "bottle not defined" error.""" deferred into broken_agents; other agents still load."""
def test_dies(self): def test_stray_bottle_reference_deferred(self):
_write(self.home_cb / "bottles" / "dev.md", _BOTTLE_DEV) _write(self.home_cb / "bottles" / "dev.md", _BOTTLE_DEV)
_write( _write(
self.home_cb / "agents" / "stray.md", self.home_cb / "agents" / "stray.md",
@@ -373,8 +430,11 @@ class TestUnknownBottleReferenceDies(_ResolveCase):
--- ---
""", """,
) )
with self.assertRaises(ManifestError): _write(self.home_cb / "agents" / "implementer.md", _AGENT_IMPL)
self.resolve() m = self.resolve()
self.assertNotIn("stray", m.agents)
self.assertIn("stray", m.broken_agents)
self.assertIn("implementer", m.agents)
class TestFilenameValidation(_ResolveCase): class TestFilenameValidation(_ResolveCase):