feat: defer broken manifest parse errors to preflight

Broken bottle/agent files no longer block the agent selector or prevent
unrelated agents from loading. Per-file parse errors are collected in
`Manifest.broken_agents`; the CLI selector includes them via
`all_agent_names`, and the error surfaces only when the specific agent
is selected and launch is attempted (in `require_agent`/`bottle_for`).

Closes #236
This commit is contained in:
2026-06-20 02:02:28 +00:00
committed by didericis
parent c9842ce831
commit 3375df3f52
7 changed files with 277 additions and 62 deletions
+1 -1
View File
@@ -67,7 +67,7 @@ def cmd_start(argv: list[str]) -> int:
agent_name: str | None = args.name
if agent_name is None:
agent_name = tui.filter_select(
sorted(manifest.agents.keys()),
manifest.all_agent_names,
title="Select agent",
)
if agent_name is None:
+33 -6
View File
@@ -193,6 +193,9 @@ class ManifestBottle:
class Manifest:
bottles: Mapping[str, ManifestBottle]
agents: Mapping[str, ManifestAgent]
# Agents (and agents referencing broken bottles) that failed to load.
# Their errors are deferred to preflight rather than raised at load time.
broken_agents: Mapping[str, ManifestError] = field(default_factory=dict)
@classmethod
def resolve(cls, cwd: str, *, missing_ok: bool = False) -> "Manifest":
@@ -256,16 +259,23 @@ class Manifest:
name collision. A `bottles/` subdir under `cwd_dir` is
logged as a warning and ignored.
Per-file parse errors are deferred into `broken_agents` rather
than raised, so a broken bottle or agent only fails at preflight
when that specific agent is selected for launch.
Used by tests to build a Manifest from fixture directories
without touching `os.environ`."""
bottles_dir = home_dir / "bottles"
from .manifest_loader import load_agents_from_dir, load_bottles_from_dir
bottles = load_bottles_from_dir(bottles_dir)
bottles, broken_bottle_errors = load_bottles_from_dir(bottles_dir)
bottle_names = set(bottles.keys())
agents_dir = home_dir / "agents"
agents = load_agents_from_dir(agents_dir, bottle_names, source="$HOME")
agents, broken_agents = load_agents_from_dir(
agents_dir, bottle_names, source="$HOME",
broken_bottle_errors=broken_bottle_errors,
)
if cwd_dir is not None:
stale_bottles = cwd_dir / "bottles"
@@ -281,12 +291,14 @@ class Manifest:
f"(PRD 0011). Move them or delete."
)
cwd_agents_dir = cwd_dir / "agents"
cwd_agents = load_agents_from_dir(
cwd_agents_dir, bottle_names, source="$CWD"
cwd_agents, cwd_broken = load_agents_from_dir(
cwd_agents_dir, bottle_names, source="$CWD",
broken_bottle_errors=broken_bottle_errors,
)
agents = {**agents, **cwd_agents}
broken_agents = {**broken_agents, **cwd_broken}
return cls(bottles=bottles, agents=agents)
return cls(bottles=bottles, agents=agents, broken_agents=broken_agents)
@classmethod
def from_json_obj(cls, obj: object) -> "Manifest":
@@ -311,10 +323,21 @@ class Manifest:
}
return cls(bottles=bottles, agents=agents)
@property
def all_agent_names(self) -> list[str]:
"""Sorted list of all agent names, including broken ones.
Broken agents appear in the CLI selector so users can select any
agent — the error surfaces only at preflight when launch is
attempted."""
return sorted(set(self.agents.keys()) | set(self.broken_agents.keys()))
def has_agent(self, name: str) -> bool:
return name in self.agents
def require_agent(self, name: str) -> None:
if name in self.broken_agents:
raise self.broken_agents[name]
if self.has_agent(name):
return
available = ", ".join(self.agents.keys())
@@ -361,7 +384,11 @@ class Manifest:
The overlay lives here, the single point both backends call to
resolve an agent's bottle, so the docker / smolmachines git
provisioners pick up the merged identity unchanged."""
provisioners pick up the merged identity unchanged.
Raises the stored ManifestError for agents that failed to load."""
if agent_name in self.broken_agents:
raise self.broken_agents[agent_name]
bottle = self.bottles[self.agents[agent_name].bottle]
merged = self._effective_git_user(agent_name)
if merged == bottle.git_user:
+83
View File
@@ -22,6 +22,25 @@ def resolve_bottles(raws: dict[str, dict[str, object]]) -> dict[str, ManifestBot
return cache
def resolve_bottles_partial(
raws: dict[str, dict[str, object]],
) -> tuple[dict[str, ManifestBottle], dict[str, ManifestError]]:
"""Apply `extends:` chains and return `(good, broken)`.
Bottles that fail validation (schema errors, bad extends, cycles) are
collected in `broken` rather than raising, so unrelated bottles remain
usable. Errors for parent bottles propagate to all children that extend
them."""
from .manifest import ManifestError
cache: dict[str, ManifestBottle] = {}
broken: dict[str, ManifestError] = {}
for name in raws:
if name not in cache and name not in broken:
_resolve_one_bottle_partial(name, raws, cache, broken, ())
return cache, broken
def _resolve_one_bottle(
name: str,
raws: dict[str, dict[str, object]],
@@ -210,3 +229,67 @@ def _merge_egress(
routes = parent.routes + child.routes
log = child.Log if "log" in child_egress_raw else parent.Log
return ManifestEgressConfig(routes=routes, Log=log)
def _resolve_one_bottle_partial(
name: str,
raws: dict[str, dict[str, object]],
cache: dict[str, ManifestBottle],
broken: dict[str, ManifestError],
seen: tuple[str, ...],
) -> None:
"""Error-tolerant variant: on failure, adds to `broken` instead of raising."""
from .manifest import ManifestBottle, ManifestError
if name in cache or name in broken:
return
if name in seen:
chain = " -> ".join(seen + (name,))
broken[name] = ManifestError(
f"bottle '{name}' is in an extends cycle: {chain}"
)
return
raw = raws[name]
parent_name_raw = raw.get("extends")
child_raw = {k: v for k, v in raw.items() if k != "extends"}
try:
if parent_name_raw is None:
cache[name] = ManifestBottle.from_dict(name, child_raw)
return
if not isinstance(parent_name_raw, str):
broken[name] = ManifestError(
f"bottle '{name}' extends must be a string "
f"(was {type(parent_name_raw).__name__})"
)
return
parent_name: str = parent_name_raw
if parent_name == name:
broken[name] = ManifestError(
f"bottle '{name}' extends itself; remove the self-reference"
)
return
if parent_name not in raws:
avail = ", ".join(sorted(raws.keys())) or "(none)"
broken[name] = ManifestError(
f"bottle '{name}' extends '{parent_name}' which is not "
f"defined. Available bottles: {avail}"
)
return
_resolve_one_bottle_partial(parent_name, raws, cache, broken, seen + (name,))
if parent_name in broken:
broken[name] = ManifestError(
f"bottle '{name}' extends '{parent_name}' which failed to load: "
f"{broken[parent_name]}"
)
return
parent = cache[parent_name]
cache[name] = _merge_bottles(parent, child_raw, name)
except ManifestError as exc:
broken[name] = exc
+69 -31
View File
@@ -11,6 +11,7 @@ from .manifest_schema import (
validate_agent_frontmatter_keys,
validate_bottle_frontmatter_keys,
)
from .manifest_util import ManifestError
from .yaml_subset import YamlSubsetError, parse_frontmatter
if TYPE_CHECKING:
@@ -21,8 +22,6 @@ def check_stale_json(dir_path: Path, md_dir: Path, label: str) -> None:
"""Die if `<dir_path>/bot-bottle.json` exists but `md_dir` does
not. The manifest format changed in PRD 0011 and we do not want
to silently leave the JSON content unused."""
from .manifest import ManifestError
legacy = dir_path / "bot-bottle.json"
if legacy.is_file() and not md_dir.exists():
raise ManifestError(
@@ -34,15 +33,20 @@ def check_stale_json(dir_path: Path, md_dir: Path, label: str) -> None:
)
def load_bottles_from_dir(bottles_dir: Path) -> dict[str, ManifestBottle]:
def load_bottles_from_dir(
bottles_dir: Path,
) -> tuple[dict[str, ManifestBottle], dict[str, ManifestError]]:
"""Walk `<bottles_dir>/*.md`, parse each as a bottle, and return
`{name: Bottle}`. Missing dir returns an empty dict."""
from .manifest import ManifestError
from .manifest_extends import resolve_bottles
`({name: Bottle}, {name: error})`. Missing dir returns empty dicts.
Per-file errors are collected in the second dict rather than raised,
so an invalid bottle file does not block unrelated bottles or agents."""
from .manifest_extends import resolve_bottles_partial
raws: dict[str, dict[str, object]] = {}
broken: dict[str, ManifestError] = {}
if not bottles_dir.is_dir():
return {}
return {}, {}
for path in sorted(bottles_dir.glob("*.md")):
name = entity_name_from_path(path)
if name is None:
@@ -54,12 +58,21 @@ def load_bottles_from_dir(bottles_dir: Path) -> dict[str, ManifestBottle]:
try:
fm, _body = parse_frontmatter(path.read_text())
except OSError as e:
raise ManifestError(f"could not read {path}: {e}") from e
broken[name] = ManifestError(f"could not read {path}: {e}")
continue
except YamlSubsetError as e:
raise ManifestError(f"{path}: {e}") from e
validate_bottle_frontmatter_keys(path, fm.keys())
broken[name] = ManifestError(f"{path}: {e}")
continue
try:
validate_bottle_frontmatter_keys(path, fm.keys())
except ManifestError as e:
broken[name] = e
continue
raws[name] = fm
return resolve_bottles(raws)
good, resolve_broken = resolve_bottles_partial(raws)
broken.update(resolve_broken)
return good, broken
def load_agents_from_dir(
@@ -67,15 +80,26 @@ def load_agents_from_dir(
bottle_names: set[str],
*,
source: str, # noqa: F841 — unused, but required by interface
) -> dict[str, ManifestAgent]:
broken_bottle_errors: dict[str, ManifestError] | None = None,
) -> tuple[dict[str, ManifestAgent], dict[str, ManifestError]]:
"""Walk `<agents_dir>/*.md`, parse each as an agent, and return
`{name: Agent}`. The Markdown body becomes the agent's prompt.
Missing dir returns an empty dict."""
from .manifest import ManifestAgent, ManifestError
`({name: Agent}, {name: error})`. The Markdown body becomes the
agent's prompt. Missing dir returns empty dicts.
Per-file errors are collected in the second dict rather than raised.
Agents referencing a broken bottle are also moved to the error dict
so their error surfaces at preflight rather than manifest load time."""
from .manifest import ManifestAgent
broken_bottles = broken_bottle_errors or {}
# Agents may reference bottles that failed to resolve; accept those names
# during structural parsing so we can detect the broken-bottle case below.
all_known_bottles = bottle_names | set(broken_bottles.keys())
out: dict[str, ManifestAgent] = {}
broken: dict[str, ManifestError] = {}
if not agents_dir.is_dir():
return out
return out, broken
for path in sorted(agents_dir.glob("*.md")):
name = entity_name_from_path(path)
if name is None:
@@ -87,19 +111,33 @@ def load_agents_from_dir(
try:
fm, body = parse_frontmatter(path.read_text())
except OSError as e:
raise ManifestError(f"could not read {path}: {e}") from e
broken[name] = ManifestError(f"could not read {path}: {e}")
continue
except YamlSubsetError as e:
raise ManifestError(f"{path}: {e}") from e
validate_agent_frontmatter_keys(path, fm.keys())
# Build the dict Agent.from_dict expects. The body becomes
# prompt; Claude Code passthrough fields stay in fm and get
# ignored by Agent.from_dict (reads bottle/skills/git-gate/prompt).
agent_dict: dict[str, object] = {
"bottle": fm.get("bottle"),
"skills": fm.get("skills", []),
"prompt": body.strip(),
}
if "git-gate" in fm:
agent_dict["git-gate"] = fm["git-gate"]
out[name] = ManifestAgent.from_dict(name, agent_dict, bottle_names)
return out
broken[name] = ManifestError(f"{path}: {e}")
continue
try:
validate_agent_frontmatter_keys(path, fm.keys())
agent_dict: dict[str, object] = {
"bottle": fm.get("bottle"),
"skills": fm.get("skills", []),
"prompt": body.strip(),
}
if "git-gate" in fm:
agent_dict["git-gate"] = fm["git-gate"]
agent = ManifestAgent.from_dict(name, agent_dict, all_known_bottles)
except ManifestError as e:
broken[name] = e
continue
# Agent parsed fine but its bottle may have failed to resolve.
bottle_ref = agent.bottle
if bottle_ref in broken_bottles:
broken[name] = ManifestError(
f"agent '{name}' references bottle '{bottle_ref}' which "
f"failed to load: {broken_bottles[bottle_ref]}"
)
continue
out[name] = agent
return out, broken
+1
View File
@@ -20,6 +20,7 @@ from bot_bottle.backend import ActiveAgent
def _make_manifest(agent_names: list[str]):
manifest = MagicMock()
manifest.agents = {name: MagicMock() for name in agent_names}
manifest.all_agent_names = sorted(agent_names)
return manifest
+8 -2
View File
@@ -226,10 +226,16 @@ class TestAgentGitUserMdLoader(unittest.TestCase):
m.git_identity_summary("impl"),
)
def test_md_agent_repos_dies(self):
def test_md_agent_repos_deferred(self):
"""git-gate.repos on an agent is an error, but deferred into
broken_agents rather than raised at resolve time, so other agents
remain accessible."""
self._write("bottles/dev.md", _BOTTLE_DEV)
self._write("agents/impl.md", _AGENT_WITH_REPOS)
msg = _error_message(Manifest.resolve, str(self.home))
m = Manifest.resolve(str(self.home))
self.assertNotIn("impl", m.agents)
self.assertIn("impl", m.broken_agents)
msg = str(m.broken_agents["impl"])
self.assertIn("git-gate.repos", msg)
self.assertIn("bottle-only", msg)
+82 -22
View File
@@ -294,14 +294,15 @@ class TestManifestEntryPointParity(_ResolveCase):
self.assertEqual("dev", manifest.agents["implementer"].bottle)
class TestUnknownAgentKeyDies(_ResolveCase):
"""A typo'd / unknown frontmatter key on an agent file dies
rather than silently ignoring."""
class TestUnknownAgentKeyDefersToBroken(_ResolveCase):
"""A typo'd / unknown frontmatter key on an agent file is deferred
into broken_agents rather than crashing the whole manifest load.
The error surfaces when that specific agent is selected for launch."""
def test_dies(self):
def test_broken_agent_deferred(self):
_write(self.home_cb / "bottles" / "dev.md", _BOTTLE_DEV)
_write(
self.home_cb / "agents" / "implementer.md",
self.home_cb / "agents" / "bad.md",
"""
---
bottle: dev
@@ -311,17 +312,58 @@ class TestUnknownAgentKeyDies(_ResolveCase):
...
""",
)
with self.assertRaises(ManifestError):
self.resolve()
_write(self.home_cb / "agents" / "implementer.md", _AGENT_IMPL)
m = self.resolve()
# The broken agent is NOT in the valid agents dict…
self.assertNotIn("bad", m.agents)
# …but it IS captured in broken_agents.
self.assertIn("bad", m.broken_agents)
self.assertIsInstance(m.broken_agents["bad"], ManifestError)
# Unrelated agent still loads fine.
self.assertIn("implementer", m.agents)
class TestUnknownBottleKeyDies(_ResolveCase):
"""A typo'd / unknown frontmatter key on a bottle file dies
rather than silently ignoring."""
def test_dies(self):
def test_broken_agent_appears_in_all_agent_names(self):
_write(self.home_cb / "bottles" / "dev.md", _BOTTLE_DEV)
_write(
self.home_cb / "bottles" / "dev.md",
self.home_cb / "agents" / "bad.md",
"""
---
bottle: dev
skillz: [init-prd]
---
""",
)
_write(self.home_cb / "agents" / "implementer.md", _AGENT_IMPL)
m = self.resolve()
self.assertIn("bad", m.all_agent_names)
self.assertIn("implementer", m.all_agent_names)
def test_broken_agent_raises_at_preflight(self):
_write(self.home_cb / "bottles" / "dev.md", _BOTTLE_DEV)
_write(
self.home_cb / "agents" / "bad.md",
"""
---
bottle: dev
skillz: [init-prd]
---
""",
)
m = self.resolve()
with self.assertRaises(ManifestError):
m.require_agent("bad")
with self.assertRaises(ManifestError):
m.bottle_for("bad")
class TestUnknownBottleKeyDefersToBroken(_ResolveCase):
"""A typo'd / unknown frontmatter key on a bottle file is deferred
into broken_agents for agents referencing that bottle, rather than
crashing the whole manifest load."""
def test_broken_bottle_defers_agent(self):
_write(
self.home_cb / "bottles" / "bad.md",
"""
---
credproxy:
@@ -329,9 +371,24 @@ class TestUnknownBottleKeyDies(_ResolveCase):
---
""",
)
_write(self.home_cb / "bottles" / "dev.md", _BOTTLE_DEV)
_write(self.home_cb / "agents" / "implementer.md", _AGENT_IMPL)
with self.assertRaises(ManifestError):
self.resolve()
_write(
self.home_cb / "agents" / "broken-agent.md",
"""
---
bottle: bad
---
""",
)
m = self.resolve()
# Good bottle and agent still load.
self.assertIn("dev", m.bottles)
self.assertIn("implementer", m.agents)
# Broken bottle's agent is deferred.
self.assertNotIn("bad", m.bottles)
self.assertNotIn("broken-agent", m.agents)
self.assertIn("broken-agent", m.broken_agents)
class TestStaleJsonDies(_ResolveCase):
@@ -359,11 +416,11 @@ class TestNoManifestDies(_ResolveCase):
self.assertEqual({}, dict(m.agents))
class TestUnknownBottleReferenceDies(_ResolveCase):
"""An agent file naming a bottle that doesn't exist on disk
dies with the existing "bottle not defined" error."""
class TestUnknownBottleReferenceDefersToBroken(_ResolveCase):
"""An agent file naming a bottle that doesn't exist on disk is
deferred into broken_agents; other agents still load."""
def test_dies(self):
def test_stray_bottle_reference_deferred(self):
_write(self.home_cb / "bottles" / "dev.md", _BOTTLE_DEV)
_write(
self.home_cb / "agents" / "stray.md",
@@ -373,8 +430,11 @@ class TestUnknownBottleReferenceDies(_ResolveCase):
---
""",
)
with self.assertRaises(ManifestError):
self.resolve()
_write(self.home_cb / "agents" / "implementer.md", _AGENT_IMPL)
m = self.resolve()
self.assertNotIn("stray", m.agents)
self.assertIn("stray", m.broken_agents)
self.assertIn("implementer", m.agents)
class TestFilenameValidation(_ResolveCase):