feat(yaml_subset): hand-rolled YAML-subset + frontmatter parser

claude_bottle/yaml_subset.py — stdlib-only, ~450 lines. Parses the bounded shape claude-bottle's manifest files use: - Block mappings (top-level + nested via indentation) - Block lists (under a key, items can be scalars or block-style mappings whose keys align with the rest after the dash) - Inline lists `[a, b]` and inline dicts `{a: 1}` for one-level leaves - Quoted (single + double) and bare strings - Scalars: string, int, true/false, null/~ Rejects, each with a clear pointer at the line number: - `yes`/`no`/`on`/`off`/`Y`/`N`/`TRUE`/`FALSE` — only literal `true` / `false` are bools (the Norway problem stays solved by "quote your strings if they look like bools") - Bare strings that look like dates / octals / hex / floats - Anchors (`&`/`*`), aliases, YAML tags (`!!str`) - Multi-line block scalars (`|`, `>`) - Tabs in indentation - Nested flow style (only one level allowed) Public API: parse_yaml_subset(text) -> dict[str, object] Top level must be a mapping. parse_frontmatter(text) -> (dict, body_text) Strips `---` delimiters, parses content as YAML subset, returns the verbatim body text after the closing fence. 46 unit tests covering every construct the real manifest files use (the cred_proxy.routes structure, role-as-inline-list, nested ExtraHosts dicts) plus every rejection case listed in PRD 0011.
2026-05-24 21:59:34 -04:00
parent afa8ca67a4
commit 8c1e4d0220
2 changed files with 896 additions and 0 deletions
@@ -0,0 +1,569 @@
+"""Hand-rolled YAML-subset parser for claude-bottle manifest files
+(PRD 0011).
+
+Why hand-rolled: the configs we accept have a bounded shape (flat
+top-level keys; values are strings / ints / bools / null / lists /
+nested dicts; no anchors, no multi-line block scalars, no tags, no
+implicit type coercion gotchas). A real YAML library is a much
+larger dependency surface than we need. The project's stdlib-only
+stance (CLAUDE.md) is the load-bearing reason; the safety
+properties — no Norway problem, no surprise date/octal coercion —
+are the bonus.
+
+Public API:
+
+    parse_yaml_subset(text) -> dict[str, object]
+        Parse a full document. Top level must be a mapping (the
+        shape every claude-bottle manifest file uses). Values are
+        str / int / bool / None / list / dict only.
+
+    parse_frontmatter(text) -> tuple[dict[str, object], str]
+        For a Markdown file with YAML frontmatter delimited by `---`
+        lines. Returns (frontmatter_dict, body_text).
+
+What we accept (block-style):
+
+    key: value                # mapping entry, value is inline
+    key:                      # mapping entry, value is block
+      nested_key: value
+
+    key:
+      - item                  # list under a key
+      - item
+
+    key:
+      - subkey: value1        # list item that's a mapping
+        subkey2: value2
+      - subkey: value3
+
+What we accept (inline, scalar leaves only):
+
+    key: [a, b, "c d"]
+    key: {a: 1, b: 2}
+
+What we reject (each dies with a clear pointer):
+
+    &anchor / *alias          # anchors / aliases
+    !!tag                     # YAML tags
+    | / >                     # multi-line block scalars
+    yes / no / on / off       # only true / false count as bool
+    ambiguous bare strings    # numbers, dates, etc. when unquoted
+    tabs as indentation       # spaces only
+    flow-style nested deeper than one level
+
+Errors carry the line number from the source document.
+"""
+
+from __future__ import annotations
+
+import re
+from dataclasses import dataclass
+
+from .log import die
+
+
+# --- Tokenizer / line preprocessing ----------------------------------------
+
+
+@dataclass(frozen=True)
+class _Line:
+    """One non-blank, non-comment line from the source. `indent` is
+    the column of the first non-space character; `content` is the
+    line text from that column onward, with trailing whitespace and
+    trailing `# ...` comments stripped. `lineno` is the 1-based
+    line in the original document."""
+
+    indent: int
+    content: str
+    lineno: int
+
+
+def _strip_trailing_comment(s: str) -> str:
+    """Strip ` # comment` from end of line, but only when the `#`
+    isn't inside a quoted string. Returns the cleaned line."""
+    in_single = False
+    in_double = False
+    for i, ch in enumerate(s):
+        if ch == "'" and not in_double:
+            in_single = not in_single
+        elif ch == '"' and not in_single:
+            in_double = not in_double
+        elif ch == "#" and not in_single and not in_double:
+            # `#` must be preceded by whitespace to be a comment,
+            # otherwise it's just a literal character.
+            if i == 0 or s[i - 1] in (" ", "\t"):
+                return s[:i].rstrip()
+    return s.rstrip()
+
+
+def _tokenize(text: str) -> list[_Line]:
+    """Drop blank / comment lines, parse indent + content for the
+    rest. Tabs in the indent area are rejected outright."""
+    out: list[_Line] = []
+    for n, raw in enumerate(text.splitlines(), start=1):
+        # Tabs in indent are a portability footgun — different
+        # editors render them differently and the spec says spaces.
+        leading = len(raw) - len(raw.lstrip(" \t"))
+        if "\t" in raw[:leading]:
+            die(f"yaml-subset: tab character in indent on line {n}")
+        stripped = raw.strip()
+        if not stripped:
+            continue
+        if stripped.startswith("#"):
+            continue
+        # Whole-line position: indent before first non-space.
+        indent = len(raw) - len(raw.lstrip(" "))
+        content = _strip_trailing_comment(raw[indent:])
+        if not content:
+            continue
+        out.append(_Line(indent=indent, content=content, lineno=n))
+    return out
+
+
+# --- Scalar parsing ---------------------------------------------------------
+
+
+_BARE_RX = re.compile(r"^[A-Za-z_][A-Za-z0-9_.\-]*$")
+_INT_RX = re.compile(r"^-?[0-9]+$")
+_RESERVED_BOOL_LIKE = frozenset({"yes", "no", "on", "off", "y", "n", "Y", "N",
+                                 "YES", "NO", "ON", "OFF", "True", "False",
+                                 "TRUE", "FALSE"})
+# Yaml-ish ambiguity sources that an unquoted bare token COULD be
+# mistaken for: dates, octals, etc. Detected and rejected so users
+# quote their strings explicitly. We don't try to enumerate every
+# ambiguity; the rule is "if it looks like a non-string literal,
+# either parse it as that literal (true/false/null/int) or reject
+# it with a 'quote it' hint."
+_DATE_RX = re.compile(r"^-?\d{4}-\d{2}-\d{2}(T\d.*)?$")
+_OCTAL_RX = re.compile(r"^0o?\d+$")
+_HEX_RX = re.compile(r"^0x[0-9A-Fa-f]+$")
+_FLOAT_RX = re.compile(r"^-?\d+\.\d+([eE][-+]?\d+)?$")
+
+
+def _parse_scalar(s: str, lineno: int) -> object:
+    """Turn a stripped value string into a Python value (str, int,
+    bool, None). Quoted strings preserve their literal content
+    (with standard escapes); bare strings are accepted only when
+    they're unambiguous."""
+    s = s.strip()
+    if not s:
+        return ""
+
+    # Quoted forms first — content is whatever's between the quotes
+    # with the documented escapes applied.
+    if (s.startswith('"') and s.endswith('"')) or (
+        s.startswith("'") and s.endswith("'")
+    ):
+        if len(s) < 2:
+            die(f"yaml-subset: unterminated quoted string on line {lineno}")
+        body = s[1:-1]
+        if s.startswith('"'):
+            # JSON-style escapes for double quotes.
+            try:
+                return body.encode("utf-8").decode("unicode_escape")
+            except UnicodeDecodeError as e:
+                die(f"yaml-subset: bad escape on line {lineno}: {e}")
+        else:
+            # Single quotes: only '' → ' (standard YAML); no other escapes.
+            return body.replace("''", "'")
+
+    # Reserved bool-like tokens that aren't `true` / `false` —
+    # always reject so users have to be explicit.
+    if s in _RESERVED_BOOL_LIKE:
+        if s in ("true", "false"):
+            return s == "true"
+        die(
+            f"yaml-subset: bare {s!r} on line {lineno} is ambiguous "
+            f"(use literal `true` / `false`, or quote it as a string)"
+        )
+
+    if s == "true":
+        return True
+    if s == "false":
+        return False
+    if s in ("null", "~"):
+        return None
+
+    if _INT_RX.match(s):
+        return int(s)
+
+    # Look-alikes that we reject to keep the user in control.
+    if _DATE_RX.match(s):
+        die(
+            f"yaml-subset: bare {s!r} on line {lineno} looks like a "
+            f"date — quote it as a string or use an explicit int"
+        )
+    if _OCTAL_RX.match(s):
+        die(
+            f"yaml-subset: bare {s!r} on line {lineno} looks like an "
+            f"octal/0-prefixed integer — quote it as a string"
+        )
+    if _HEX_RX.match(s):
+        die(
+            f"yaml-subset: bare {s!r} on line {lineno} looks like a "
+            f"hex integer — quote it as a string"
+        )
+    if _FLOAT_RX.match(s):
+        die(
+            f"yaml-subset: floats not supported (line {lineno}, "
+            f"value {s!r}); use an int or quote as a string"
+        )
+
+    # Bare strings: anything that matches the bare-string pattern is
+    # accepted as a string literal. Otherwise we hand it back as a
+    # string anyway — for URLs, paths, hostnames, etc. that contain
+    # special chars. The PRD calls for rejecting "ambiguous" strings,
+    # and we've already rejected the ambiguous shapes above; what's
+    # left is unambiguously a string.
+    return s
+
+
+# --- Inline list / dict ----------------------------------------------------
+
+
+def _parse_inline(s: str, lineno: int) -> object:
+    """Inline list `[a, b]` or dict `{a: 1, b: 2}` or scalar.
+    Nested flow more than one level deep is rejected (PRD)."""
+    s = s.strip()
+    if s.startswith("["):
+        if not s.endswith("]"):
+            die(f"yaml-subset: unterminated `[` on line {lineno}")
+        body = s[1:-1].strip()
+        if not body:
+            return []
+        items: list[object] = []
+        for raw in _split_flow(body, lineno, "list"):
+            v = _parse_scalar(raw, lineno)
+            items.append(v)
+        return items
+    if s.startswith("{"):
+        if not s.endswith("}"):
+            die(f"yaml-subset: unterminated `{{` on line {lineno}")
+        body = s[1:-1].strip()
+        if not body:
+            return {}
+        out: dict[str, object] = {}
+        for raw in _split_flow(body, lineno, "dict"):
+            if ":" not in raw:
+                die(
+                    f"yaml-subset: inline dict entry on line {lineno} "
+                    f"missing `:` ({raw!r})"
+                )
+            k, _, v = raw.partition(":")
+            k = k.strip()
+            if not _BARE_RX.match(k):
+                die(
+                    f"yaml-subset: inline dict key on line {lineno} "
+                    f"must be a bare identifier ({k!r})"
+                )
+            out[k] = _parse_scalar(v.strip(), lineno)
+        return out
+    return _parse_scalar(s, lineno)
+
+
+def _split_flow(body: str, lineno: int, kind: str) -> list[str]:
+    """Split `a, b, c` respecting quoted strings. Rejects nested
+    flow (a list/dict inside the flow body) since the PRD limits
+    flow nesting to one level."""
+    items: list[str] = []
+    depth_b = 0
+    depth_c = 0
+    in_single = False
+    in_double = False
+    cur = []
+    for ch in body:
+        if ch == "'" and not in_double:
+            in_single = not in_single
+        elif ch == '"' and not in_single:
+            in_double = not in_double
+        elif not in_single and not in_double:
+            if ch in "[{":
+                depth_b += 1
+            elif ch in "]}":
+                depth_b -= 1
+            if depth_b > 0:
+                die(
+                    f"yaml-subset: nested flow {kind} on line "
+                    f"{lineno} (only one level of flow allowed)"
+                )
+            if ch == "," and depth_b == 0 and depth_c == 0:
+                items.append("".join(cur))
+                cur = []
+                continue
+        cur.append(ch)
+    if cur:
+        items.append("".join(cur))
+    return [s.strip() for s in items if s.strip()]
+
+
+# --- Block parser ----------------------------------------------------------
+
+
+def _split_key_value(content: str, lineno: int) -> tuple[str, str]:
+    """Find the FIRST top-level `:` that separates a key from its
+    value (ignoring `:` inside quoted strings). Returns (key, value).
+    `value` may be empty (block-form mapping)."""
+    in_single = False
+    in_double = False
+    for i, ch in enumerate(content):
+        if ch == "'" and not in_double:
+            in_single = not in_single
+        elif ch == '"' and not in_single:
+            in_double = not in_double
+        elif ch == ":" and not in_single and not in_double:
+            # `:` must be followed by space or be at end-of-line to
+            # count as a key separator (otherwise `key:value` would
+            # ambiguous with URLs etc.).
+            if i + 1 >= len(content) or content[i + 1] in (" ", "\t"):
+                return content[:i].strip(), content[i + 1:].lstrip()
+    die(f"yaml-subset: line {lineno} missing `: ` separator: {content!r}")
+
+
+def _parse_block(
+    lines: list[_Line], idx: int, base_indent: int
+) -> tuple[object, int]:
+    """Parse a block starting at `lines[idx]`, expecting that block
+    to live at `base_indent`. Returns (value, new_idx) where
+    `new_idx` is the index of the first unconsumed line."""
+    if idx >= len(lines):
+        die("yaml-subset: unexpected end of document")
+    first = lines[idx]
+    if first.indent < base_indent:
+        die(
+            f"yaml-subset: line {first.lineno} indented less than "
+            f"expected (got {first.indent}, expected >= {base_indent})"
+        )
+    if first.indent > base_indent:
+        die(
+            f"yaml-subset: line {first.lineno} indented more than "
+            f"expected (got {first.indent}, expected {base_indent})"
+        )
+
+    if first.content.startswith("- ") or first.content == "-":
+        return _parse_block_list(lines, idx, base_indent)
+    return _parse_block_mapping(lines, idx, base_indent)
+
+
+def _parse_block_mapping(
+    lines: list[_Line], idx: int, base_indent: int
+) -> tuple[dict[str, object], int]:
+    out: dict[str, object] = {}
+    while idx < len(lines) and lines[idx].indent == base_indent:
+        line = lines[idx]
+        if line.content.startswith("- "):
+            die(
+                f"yaml-subset: line {line.lineno} unexpected list "
+                f"item at mapping indent (got `-`, expected `key:`)"
+            )
+        key, value_text = _split_key_value(line.content, line.lineno)
+        if not _BARE_RX.match(key):
+            die(
+                f"yaml-subset: line {line.lineno} key {key!r} is not "
+                f"a bare identifier"
+            )
+        if key in out:
+            die(
+                f"yaml-subset: line {line.lineno} duplicate key {key!r}"
+            )
+        if value_text:
+            out[key] = _parse_inline(value_text, line.lineno)
+            idx += 1
+        else:
+            # Value is a block on subsequent lines.
+            idx += 1
+            if idx >= len(lines) or lines[idx].indent <= base_indent:
+                # Empty block — treat as None to match YAML.
+                out[key] = None
+                continue
+            child_indent = lines[idx].indent
+            value, idx = _parse_block(lines, idx, child_indent)
+            out[key] = value
+    return out, idx
+
+
+def _parse_block_list(
+    lines: list[_Line], idx: int, base_indent: int
+) -> tuple[list[object], int]:
+    items: list[object] = []
+    while idx < len(lines) and lines[idx].indent == base_indent and (
+        lines[idx].content.startswith("- ") or lines[idx].content == "-"
+    ):
+        line = lines[idx]
+        rest = line.content[2:] if line.content.startswith("- ") else ""
+        rest = rest.strip()
+
+        # Look ahead at the next non-empty line: if it's indented
+        # more than the dash AND aligned with the rest's column,
+        # we have a multi-line mapping item.
+        if rest and ":" in rest and _looks_like_kv(rest):
+            # The first key:value of a multi-line mapping list item.
+            # Subsequent keys live at indent = base_indent + 2 (or
+            # wherever the content after `- ` started).
+            content_col = base_indent + 2
+            first_key, first_value_text = _split_key_value(rest, line.lineno)
+            if not _BARE_RX.match(first_key):
+                die(
+                    f"yaml-subset: line {line.lineno} key {first_key!r} "
+                    f"is not a bare identifier"
+                )
+            item: dict[str, object] = {}
+            if first_value_text:
+                item[first_key] = _parse_inline(first_value_text, line.lineno)
+                idx += 1
+            else:
+                idx += 1
+                if idx < len(lines) and lines[idx].indent > content_col:
+                    nested_indent = lines[idx].indent
+                    value, idx = _parse_block(lines, idx, nested_indent)
+                    item[first_key] = value
+                else:
+                    item[first_key] = None
+            # Consume additional keys at content_col.
+            while idx < len(lines) and lines[idx].indent == content_col:
+                ln = lines[idx]
+                if ln.content.startswith("- "):
+                    break  # next list item, not a sibling key
+                k, v_text = _split_key_value(ln.content, ln.lineno)
+                if not _BARE_RX.match(k):
+                    die(
+                        f"yaml-subset: line {ln.lineno} key {k!r} is "
+                        f"not a bare identifier"
+                    )
+                if k in item:
+                    die(f"yaml-subset: line {ln.lineno} duplicate key {k!r}")
+                if v_text:
+                    item[k] = _parse_inline(v_text, ln.lineno)
+                    idx += 1
+                else:
+                    idx += 1
+                    if idx < len(lines) and lines[idx].indent > content_col:
+                        nested_indent = lines[idx].indent
+                        value, idx = _parse_block(lines, idx, nested_indent)
+                        item[k] = value
+                    else:
+                        item[k] = None
+            items.append(item)
+        elif rest:
+            # Inline scalar / inline list / inline dict on the dash line.
+            items.append(_parse_inline(rest, line.lineno))
+            idx += 1
+        else:
+            # Bare `-` — value is a block on subsequent lines.
+            idx += 1
+            if idx >= len(lines) or lines[idx].indent <= base_indent:
+                items.append(None)
+                continue
+            child_indent = lines[idx].indent
+            value, idx = _parse_block(lines, idx, child_indent)
+            items.append(value)
+    return items, idx
+
+
+def _looks_like_kv(s: str) -> bool:
+    """Heuristic: does `s` look like a mapping `key: value` line?
+    True if there's an unquoted `:` that's followed by space-or-EOL."""
+    in_single = False
+    in_double = False
+    for i, ch in enumerate(s):
+        if ch == "'" and not in_double:
+            in_single = not in_single
+        elif ch == '"' and not in_single:
+            in_double = not in_double
+        elif ch == ":" and not in_single and not in_double:
+            if i + 1 >= len(s) or s[i + 1] in (" ", "\t"):
+                return True
+    return False
+
+
+# --- Public API -------------------------------------------------------------
+
+
+def parse_yaml_subset(text: str) -> dict[str, object]:
+    """Parse a YAML-subset document. Top level must be a mapping;
+    otherwise we die with a clear pointer."""
+    # Reject features that have no place in our schema before we
+    # tokenize, with line numbers from the raw text.
+    for n, raw in enumerate(text.splitlines(), start=1):
+        s = raw.strip()
+        if s.startswith("|") or s.startswith(">") or s.startswith("- |") or s.startswith("- >"):
+            die(
+                f"yaml-subset: line {n} uses a multi-line block "
+                f"scalar (`|` / `>`) — not supported. Use a quoted "
+                f"single-line string instead."
+            )
+        if "&" in s or "*" in s:
+            # Only flag when `&` or `*` is being used as anchor/alias,
+            # not when it's inside a quoted string. Cheap check: any
+            # bare `&foo:` / `*foo` at the start of a value position.
+            if re.search(r"(^|\s)[&*][A-Za-z0-9_]+", s):
+                die(
+                    f"yaml-subset: line {n} uses anchors / aliases "
+                    f"(`&` / `*`) — not supported."
+                )
+        if "!!" in s and not (s.count("'") % 2 or s.count('"') % 2):
+            die(
+                f"yaml-subset: line {n} uses a YAML tag (`!!`) — not "
+                f"supported."
+            )
+
+    lines = _tokenize(text)
+    if not lines:
+        return {}
+    base_indent = lines[0].indent
+    if base_indent != 0:
+        die(
+            f"yaml-subset: top-level content must start in column 0 "
+            f"(got column {base_indent} on line {lines[0].lineno})"
+        )
+    value, consumed = _parse_block(lines, 0, 0)
+    if consumed < len(lines):
+        die(
+            f"yaml-subset: trailing content starting on line "
+            f"{lines[consumed].lineno}"
+        )
+    if not isinstance(value, dict):
+        die("yaml-subset: top-level value must be a mapping")
+    return value
+
+
+def parse_frontmatter(text: str) -> tuple[dict[str, object], str]:
+    """Find `---` delimiters at the top of a Markdown file, parse
+    the frontmatter as YAML subset, return (mapping, body_text).
+
+    No frontmatter at all → ({}, text). Single opening `---` with
+    no closing → die with a clear pointer. Body is the verbatim
+    text after the closing `---` line (preserving original line
+    endings)."""
+    # Split into lines but preserve the original separators so the
+    # body slice is exact.
+    nl_positions: list[int] = []
+    for i, ch in enumerate(text):
+        if ch == "\n":
+            nl_positions.append(i)
+    if not nl_positions and not text:
+        return {}, ""
+
+    first_nl = nl_positions[0] if nl_positions else len(text)
+    first_line = text[:first_nl].strip()
+    if first_line != "---":
+        return {}, text  # no frontmatter; whole document is body
+
+    # Find the matching closing `---`.
+    body_start = -1
+    fm_end_lineno = -1
+    line_starts = [0] + [p + 1 for p in nl_positions]
+    for line_idx in range(1, len(line_starts)):
+        ls = line_starts[line_idx]
+        next_nl = nl_positions[line_idx] if line_idx < len(nl_positions) else len(text)
+        line = text[ls:next_nl].rstrip()
+        if line == "---":
+            body_start = next_nl + 1 if next_nl < len(text) else next_nl
+            fm_end_lineno = line_idx
+            break
+    if body_start < 0:
+        die("frontmatter: opening `---` has no matching closing `---`")
+
+    fm_text = text[line_starts[1]:line_starts[fm_end_lineno]] if fm_end_lineno > 1 else ""
+    fm = parse_yaml_subset(fm_text)
+    body = text[body_start:]
+    return fm, body
@@ -0,0 +1,327 @@
+"""Unit: YAML-subset parser used by the per-file MD manifest
+(PRD 0011). Covers happy paths, the constructs the manifest files
+actually use, and every rejection case the PRD enumerates."""
+
+import textwrap
+import unittest
+
+from claude_bottle.log import Die
+from claude_bottle.yaml_subset import parse_frontmatter, parse_yaml_subset
+
+
+def _y(s: str):
+    """Parse a dedented YAML string."""
+    return parse_yaml_subset(textwrap.dedent(s).lstrip("\n"))
+
+
+class TestScalars(unittest.TestCase):
+    def test_string(self):
+        self.assertEqual({"k": "hello"}, _y("k: hello\n"))
+
+    def test_string_with_url_chars(self):
+        self.assertEqual(
+            {"k": "https://example.com/path?x=1"},
+            _y("k: https://example.com/path?x=1\n"),
+        )
+
+    def test_int(self):
+        self.assertEqual({"port": 9099}, _y("port: 9099\n"))
+
+    def test_negative_int(self):
+        self.assertEqual({"n": -3}, _y("n: -3\n"))
+
+    def test_bool_true(self):
+        self.assertEqual({"x": True}, _y("x: true\n"))
+
+    def test_bool_false(self):
+        self.assertEqual({"x": False}, _y("x: false\n"))
+
+    def test_null(self):
+        self.assertEqual({"x": None}, _y("x: null\n"))
+
+    def test_tilde_null(self):
+        self.assertEqual({"x": None}, _y("x: ~\n"))
+
+    def test_double_quoted_string(self):
+        self.assertEqual({"k": "a b"}, _y('k: "a b"\n'))
+
+    def test_double_quoted_escape(self):
+        self.assertEqual({"k": "a\nb"}, _y(r'k: "a\nb"' + "\n"))
+
+    def test_single_quoted_string(self):
+        self.assertEqual({"k": "a b"}, _y("k: 'a b'\n"))
+
+    def test_single_quoted_apos_double(self):
+        # Single-quoted YAML uses `''` to embed a literal `'`.
+        self.assertEqual({"k": "it's"}, _y("k: 'it''s'\n"))
+
+
+class TestForbiddenBoolLikes(unittest.TestCase):
+    """Ambiguous bool-ish tokens have to be quoted explicitly."""
+
+    def _expect_die(self, src: str):
+        with self.assertRaises(Die):
+            _y(src)
+
+    def test_yes_dies(self):
+        self._expect_die("k: yes\n")
+
+    def test_no_dies(self):
+        self._expect_die("k: no\n")
+
+    def test_on_dies(self):
+        self._expect_die("k: on\n")
+
+    def test_capital_TRUE_dies(self):
+        self._expect_die("k: TRUE\n")
+
+    def test_norway_quoted_is_fine(self):
+        self.assertEqual({"country": "NO"}, _y('country: "NO"\n'))
+
+
+class TestForbiddenScalarShapes(unittest.TestCase):
+    def _expect_die(self, src: str):
+        with self.assertRaises(Die):
+            _y(src)
+
+    def test_bare_date_dies(self):
+        self._expect_die("k: 2026-05-24\n")
+
+    def test_bare_octal_dies(self):
+        self._expect_die("k: 0o755\n")
+
+    def test_bare_hex_dies(self):
+        self._expect_die("k: 0xFF\n")
+
+    def test_bare_float_dies(self):
+        self._expect_die("k: 1.5\n")
+
+    def test_quoted_date_is_fine(self):
+        self.assertEqual({"k": "2026-05-24"}, _y('k: "2026-05-24"\n'))
+
+
+class TestMapping(unittest.TestCase):
+    def test_flat_mapping(self):
+        self.assertEqual(
+            {"a": 1, "b": "two", "c": True},
+            _y("""
+                a: 1
+                b: two
+                c: true
+            """),
+        )
+
+    def test_nested_mapping(self):
+        out = _y("""
+            outer:
+              inner: hello
+              other: 5
+        """)
+        self.assertEqual({"outer": {"inner": "hello", "other": 5}}, out)
+
+    def test_duplicate_key_dies(self):
+        with self.assertRaises(Die):
+            _y("""
+                a: 1
+                a: 2
+            """)
+
+    def test_key_must_be_bare_identifier(self):
+        with self.assertRaises(Die):
+            _y('"weird key": 1\n')
+
+
+class TestBlockList(unittest.TestCase):
+    def test_list_of_strings(self):
+        out = _y("""
+            allowlist:
+              - example.com
+              - github.com
+        """)
+        self.assertEqual({"allowlist": ["example.com", "github.com"]}, out)
+
+    def test_list_of_mappings(self):
+        out = _y("""
+            routes:
+              - path: /a/
+                upstream: https://a.example
+              - path: /b/
+                upstream: https://b.example
+        """)
+        self.assertEqual(
+            {"routes": [
+                {"path": "/a/", "upstream": "https://a.example"},
+                {"path": "/b/", "upstream": "https://b.example"},
+            ]},
+            out,
+        )
+
+    def test_list_item_with_nested_mapping(self):
+        out = _y("""
+            entries:
+              - name: foo
+                ExtraHosts:
+                  host.example: 10.0.0.1
+              - name: bar
+        """)
+        self.assertEqual(
+            {"entries": [
+                {"name": "foo", "ExtraHosts": {"host.example": "10.0.0.1"}},
+                {"name": "bar"},
+            ]},
+            out,
+        )
+
+    def test_list_item_with_inline_list_value(self):
+        # role: [git-insteadof, tea-login] — the exact shape in the
+        # claude-bottle manifest.
+        out = _y("""
+            routes:
+              - path: /x/
+                role: [git-insteadof, tea-login]
+        """)
+        self.assertEqual(
+            {"routes": [
+                {"path": "/x/", "role": ["git-insteadof", "tea-login"]},
+            ]},
+            out,
+        )
+
+
+class TestInline(unittest.TestCase):
+    def test_inline_list(self):
+        self.assertEqual({"l": [1, 2, 3]}, _y("l: [1, 2, 3]\n"))
+
+    def test_inline_list_of_strings(self):
+        self.assertEqual({"l": ["a", "b", "c"]}, _y("l: [a, b, c]\n"))
+
+    def test_inline_dict(self):
+        self.assertEqual(
+            {"d": {"a": "1", "b": "2"}},
+            _y('d: {a: "1", b: "2"}\n'),
+        )
+
+    def test_nested_flow_dies(self):
+        with self.assertRaises(Die):
+            _y("l: [[1, 2], [3, 4]]\n")
+
+
+class TestForbiddenConstructs(unittest.TestCase):
+    def test_anchor_dies(self):
+        with self.assertRaises(Die):
+            _y("""
+                a: &anchor 1
+                b: *anchor
+            """)
+
+    def test_multiline_block_scalar_dies(self):
+        with self.assertRaises(Die):
+            _y("""
+                k: |
+                  line 1
+                  line 2
+            """)
+
+    def test_tag_dies(self):
+        with self.assertRaises(Die):
+            _y("k: !!str hello\n")
+
+    def test_tab_in_indent_dies(self):
+        with self.assertRaises(Die):
+            _y("a:\n\tb: 1\n")
+
+
+class TestComments(unittest.TestCase):
+    def test_full_line_comment(self):
+        out = _y("""
+            # comment
+            k: v
+        """)
+        self.assertEqual({"k": "v"}, out)
+
+    def test_trailing_comment(self):
+        self.assertEqual({"k": "v"}, _y("k: v  # trailing\n"))
+
+    def test_hash_in_quoted_string_kept(self):
+        self.assertEqual({"k": "a#b"}, _y('k: "a#b"\n'))
+
+
+class TestRealisticBottleFile(unittest.TestCase):
+    """The exact shape a real bottle frontmatter takes — the parser
+    has to round-trip this without surprise."""
+
+    def test_dev_bottle(self):
+        out = _y("""
+            cred_proxy:
+              routes:
+                - path: /anthropic/
+                  upstream: https://api.anthropic.com
+                  auth_scheme: Bearer
+                  token_ref: CLAUDE_BOTTLE_OAUTH_TOKEN
+                  role: anthropic-base-url
+                - path: /gitea/dideric/
+                  upstream: https://gitea.dideric.is
+                  auth_scheme: token
+                  token_ref: GITEA_TOKEN
+                  role: [git-insteadof, tea-login]
+            git:
+              - Name: claude-bottle
+                Upstream: ssh://git@gitea.dideric.is:30009/x/y.git
+                IdentityFile: ~/.ssh/gitea.pem
+                ExtraHosts:
+                  gitea.dideric.is: 100.78.141.42
+            egress:
+              allowlist:
+                - example.com
+        """)
+        # Spot-check the deep parts; the structure is large.
+        self.assertEqual(2, len(out["cred_proxy"]["routes"]))
+        self.assertEqual(
+            ["git-insteadof", "tea-login"],
+            out["cred_proxy"]["routes"][1]["role"],
+        )
+        self.assertEqual(
+            "100.78.141.42",
+            out["git"][0]["ExtraHosts"]["gitea.dideric.is"],
+        )
+        self.assertEqual(["example.com"], out["egress"]["allowlist"])
+
+
+class TestFrontmatter(unittest.TestCase):
+    def test_basic(self):
+        text = textwrap.dedent("""
+            ---
+            bottle: dev
+            ---
+            This is the body.
+        """).lstrip("\n")
+        fm, body = parse_frontmatter(text)
+        self.assertEqual({"bottle": "dev"}, fm)
+        self.assertIn("This is the body", body)
+
+    def test_no_frontmatter_passes_through(self):
+        text = "no frontmatter here\njust body\n"
+        fm, body = parse_frontmatter(text)
+        self.assertEqual({}, fm)
+        self.assertEqual(text, body)
+
+    def test_unclosed_frontmatter_dies(self):
+        with self.assertRaises(Die):
+            parse_frontmatter("---\nbottle: dev\nno closing")
+
+    def test_body_preserves_blank_lines(self):
+        text = (
+            "---\n"
+            "k: v\n"
+            "---\n"
+            "\n"
+            "line one\n"
+            "\n"
+            "line three\n"
+        )
+        _, body = parse_frontmatter(text)
+        self.assertEqual("\nline one\n\nline three\n", body)
+
+
+if __name__ == "__main__":
+    unittest.main()