diff --git a/claude_bottle/yaml_subset.py b/claude_bottle/yaml_subset.py new file mode 100644 index 0000000..4267dc8 --- /dev/null +++ b/claude_bottle/yaml_subset.py @@ -0,0 +1,569 @@ +"""Hand-rolled YAML-subset parser for claude-bottle manifest files +(PRD 0011). + +Why hand-rolled: the configs we accept have a bounded shape (flat +top-level keys; values are strings / ints / bools / null / lists / +nested dicts; no anchors, no multi-line block scalars, no tags, no +implicit type coercion gotchas). A real YAML library is a much +larger dependency surface than we need. The project's stdlib-only +stance (CLAUDE.md) is the load-bearing reason; the safety +properties — no Norway problem, no surprise date/octal coercion — +are the bonus. + +Public API: + + parse_yaml_subset(text) -> dict[str, object] + Parse a full document. Top level must be a mapping (the + shape every claude-bottle manifest file uses). Values are + str / int / bool / None / list / dict only. + + parse_frontmatter(text) -> tuple[dict[str, object], str] + For a Markdown file with YAML frontmatter delimited by `---` + lines. Returns (frontmatter_dict, body_text). + +What we accept (block-style): + + key: value # mapping entry, value is inline + key: # mapping entry, value is block + nested_key: value + + key: + - item # list under a key + - item + + key: + - subkey: value1 # list item that's a mapping + subkey2: value2 + - subkey: value3 + +What we accept (inline, scalar leaves only): + + key: [a, b, "c d"] + key: {a: 1, b: 2} + +What we reject (each dies with a clear pointer): + + &anchor / *alias # anchors / aliases + !!tag # YAML tags + | / > # multi-line block scalars + yes / no / on / off # only true / false count as bool + ambiguous bare strings # numbers, dates, etc. when unquoted + tabs as indentation # spaces only + flow-style nested deeper than one level + +Errors carry the line number from the source document. +""" + +from __future__ import annotations + +import re +from dataclasses import dataclass + +from .log import die + + +# --- Tokenizer / line preprocessing ---------------------------------------- + + +@dataclass(frozen=True) +class _Line: + """One non-blank, non-comment line from the source. `indent` is + the column of the first non-space character; `content` is the + line text from that column onward, with trailing whitespace and + trailing `# ...` comments stripped. `lineno` is the 1-based + line in the original document.""" + + indent: int + content: str + lineno: int + + +def _strip_trailing_comment(s: str) -> str: + """Strip ` # comment` from end of line, but only when the `#` + isn't inside a quoted string. Returns the cleaned line.""" + in_single = False + in_double = False + for i, ch in enumerate(s): + if ch == "'" and not in_double: + in_single = not in_single + elif ch == '"' and not in_single: + in_double = not in_double + elif ch == "#" and not in_single and not in_double: + # `#` must be preceded by whitespace to be a comment, + # otherwise it's just a literal character. + if i == 0 or s[i - 1] in (" ", "\t"): + return s[:i].rstrip() + return s.rstrip() + + +def _tokenize(text: str) -> list[_Line]: + """Drop blank / comment lines, parse indent + content for the + rest. Tabs in the indent area are rejected outright.""" + out: list[_Line] = [] + for n, raw in enumerate(text.splitlines(), start=1): + # Tabs in indent are a portability footgun — different + # editors render them differently and the spec says spaces. + leading = len(raw) - len(raw.lstrip(" \t")) + if "\t" in raw[:leading]: + die(f"yaml-subset: tab character in indent on line {n}") + stripped = raw.strip() + if not stripped: + continue + if stripped.startswith("#"): + continue + # Whole-line position: indent before first non-space. + indent = len(raw) - len(raw.lstrip(" ")) + content = _strip_trailing_comment(raw[indent:]) + if not content: + continue + out.append(_Line(indent=indent, content=content, lineno=n)) + return out + + +# --- Scalar parsing --------------------------------------------------------- + + +_BARE_RX = re.compile(r"^[A-Za-z_][A-Za-z0-9_.\-]*$") +_INT_RX = re.compile(r"^-?[0-9]+$") +_RESERVED_BOOL_LIKE = frozenset({"yes", "no", "on", "off", "y", "n", "Y", "N", + "YES", "NO", "ON", "OFF", "True", "False", + "TRUE", "FALSE"}) +# Yaml-ish ambiguity sources that an unquoted bare token COULD be +# mistaken for: dates, octals, etc. Detected and rejected so users +# quote their strings explicitly. We don't try to enumerate every +# ambiguity; the rule is "if it looks like a non-string literal, +# either parse it as that literal (true/false/null/int) or reject +# it with a 'quote it' hint." +_DATE_RX = re.compile(r"^-?\d{4}-\d{2}-\d{2}(T\d.*)?$") +_OCTAL_RX = re.compile(r"^0o?\d+$") +_HEX_RX = re.compile(r"^0x[0-9A-Fa-f]+$") +_FLOAT_RX = re.compile(r"^-?\d+\.\d+([eE][-+]?\d+)?$") + + +def _parse_scalar(s: str, lineno: int) -> object: + """Turn a stripped value string into a Python value (str, int, + bool, None). Quoted strings preserve their literal content + (with standard escapes); bare strings are accepted only when + they're unambiguous.""" + s = s.strip() + if not s: + return "" + + # Quoted forms first — content is whatever's between the quotes + # with the documented escapes applied. + if (s.startswith('"') and s.endswith('"')) or ( + s.startswith("'") and s.endswith("'") + ): + if len(s) < 2: + die(f"yaml-subset: unterminated quoted string on line {lineno}") + body = s[1:-1] + if s.startswith('"'): + # JSON-style escapes for double quotes. + try: + return body.encode("utf-8").decode("unicode_escape") + except UnicodeDecodeError as e: + die(f"yaml-subset: bad escape on line {lineno}: {e}") + else: + # Single quotes: only '' → ' (standard YAML); no other escapes. + return body.replace("''", "'") + + # Reserved bool-like tokens that aren't `true` / `false` — + # always reject so users have to be explicit. + if s in _RESERVED_BOOL_LIKE: + if s in ("true", "false"): + return s == "true" + die( + f"yaml-subset: bare {s!r} on line {lineno} is ambiguous " + f"(use literal `true` / `false`, or quote it as a string)" + ) + + if s == "true": + return True + if s == "false": + return False + if s in ("null", "~"): + return None + + if _INT_RX.match(s): + return int(s) + + # Look-alikes that we reject to keep the user in control. + if _DATE_RX.match(s): + die( + f"yaml-subset: bare {s!r} on line {lineno} looks like a " + f"date — quote it as a string or use an explicit int" + ) + if _OCTAL_RX.match(s): + die( + f"yaml-subset: bare {s!r} on line {lineno} looks like an " + f"octal/0-prefixed integer — quote it as a string" + ) + if _HEX_RX.match(s): + die( + f"yaml-subset: bare {s!r} on line {lineno} looks like a " + f"hex integer — quote it as a string" + ) + if _FLOAT_RX.match(s): + die( + f"yaml-subset: floats not supported (line {lineno}, " + f"value {s!r}); use an int or quote as a string" + ) + + # Bare strings: anything that matches the bare-string pattern is + # accepted as a string literal. Otherwise we hand it back as a + # string anyway — for URLs, paths, hostnames, etc. that contain + # special chars. The PRD calls for rejecting "ambiguous" strings, + # and we've already rejected the ambiguous shapes above; what's + # left is unambiguously a string. + return s + + +# --- Inline list / dict ---------------------------------------------------- + + +def _parse_inline(s: str, lineno: int) -> object: + """Inline list `[a, b]` or dict `{a: 1, b: 2}` or scalar. + Nested flow more than one level deep is rejected (PRD).""" + s = s.strip() + if s.startswith("["): + if not s.endswith("]"): + die(f"yaml-subset: unterminated `[` on line {lineno}") + body = s[1:-1].strip() + if not body: + return [] + items: list[object] = [] + for raw in _split_flow(body, lineno, "list"): + v = _parse_scalar(raw, lineno) + items.append(v) + return items + if s.startswith("{"): + if not s.endswith("}"): + die(f"yaml-subset: unterminated `{{` on line {lineno}") + body = s[1:-1].strip() + if not body: + return {} + out: dict[str, object] = {} + for raw in _split_flow(body, lineno, "dict"): + if ":" not in raw: + die( + f"yaml-subset: inline dict entry on line {lineno} " + f"missing `:` ({raw!r})" + ) + k, _, v = raw.partition(":") + k = k.strip() + if not _BARE_RX.match(k): + die( + f"yaml-subset: inline dict key on line {lineno} " + f"must be a bare identifier ({k!r})" + ) + out[k] = _parse_scalar(v.strip(), lineno) + return out + return _parse_scalar(s, lineno) + + +def _split_flow(body: str, lineno: int, kind: str) -> list[str]: + """Split `a, b, c` respecting quoted strings. Rejects nested + flow (a list/dict inside the flow body) since the PRD limits + flow nesting to one level.""" + items: list[str] = [] + depth_b = 0 + depth_c = 0 + in_single = False + in_double = False + cur = [] + for ch in body: + if ch == "'" and not in_double: + in_single = not in_single + elif ch == '"' and not in_single: + in_double = not in_double + elif not in_single and not in_double: + if ch in "[{": + depth_b += 1 + elif ch in "]}": + depth_b -= 1 + if depth_b > 0: + die( + f"yaml-subset: nested flow {kind} on line " + f"{lineno} (only one level of flow allowed)" + ) + if ch == "," and depth_b == 0 and depth_c == 0: + items.append("".join(cur)) + cur = [] + continue + cur.append(ch) + if cur: + items.append("".join(cur)) + return [s.strip() for s in items if s.strip()] + + +# --- Block parser ---------------------------------------------------------- + + +def _split_key_value(content: str, lineno: int) -> tuple[str, str]: + """Find the FIRST top-level `:` that separates a key from its + value (ignoring `:` inside quoted strings). Returns (key, value). + `value` may be empty (block-form mapping).""" + in_single = False + in_double = False + for i, ch in enumerate(content): + if ch == "'" and not in_double: + in_single = not in_single + elif ch == '"' and not in_single: + in_double = not in_double + elif ch == ":" and not in_single and not in_double: + # `:` must be followed by space or be at end-of-line to + # count as a key separator (otherwise `key:value` would + # ambiguous with URLs etc.). + if i + 1 >= len(content) or content[i + 1] in (" ", "\t"): + return content[:i].strip(), content[i + 1:].lstrip() + die(f"yaml-subset: line {lineno} missing `: ` separator: {content!r}") + + +def _parse_block( + lines: list[_Line], idx: int, base_indent: int +) -> tuple[object, int]: + """Parse a block starting at `lines[idx]`, expecting that block + to live at `base_indent`. Returns (value, new_idx) where + `new_idx` is the index of the first unconsumed line.""" + if idx >= len(lines): + die("yaml-subset: unexpected end of document") + first = lines[idx] + if first.indent < base_indent: + die( + f"yaml-subset: line {first.lineno} indented less than " + f"expected (got {first.indent}, expected >= {base_indent})" + ) + if first.indent > base_indent: + die( + f"yaml-subset: line {first.lineno} indented more than " + f"expected (got {first.indent}, expected {base_indent})" + ) + + if first.content.startswith("- ") or first.content == "-": + return _parse_block_list(lines, idx, base_indent) + return _parse_block_mapping(lines, idx, base_indent) + + +def _parse_block_mapping( + lines: list[_Line], idx: int, base_indent: int +) -> tuple[dict[str, object], int]: + out: dict[str, object] = {} + while idx < len(lines) and lines[idx].indent == base_indent: + line = lines[idx] + if line.content.startswith("- "): + die( + f"yaml-subset: line {line.lineno} unexpected list " + f"item at mapping indent (got `-`, expected `key:`)" + ) + key, value_text = _split_key_value(line.content, line.lineno) + if not _BARE_RX.match(key): + die( + f"yaml-subset: line {line.lineno} key {key!r} is not " + f"a bare identifier" + ) + if key in out: + die( + f"yaml-subset: line {line.lineno} duplicate key {key!r}" + ) + if value_text: + out[key] = _parse_inline(value_text, line.lineno) + idx += 1 + else: + # Value is a block on subsequent lines. + idx += 1 + if idx >= len(lines) or lines[idx].indent <= base_indent: + # Empty block — treat as None to match YAML. + out[key] = None + continue + child_indent = lines[idx].indent + value, idx = _parse_block(lines, idx, child_indent) + out[key] = value + return out, idx + + +def _parse_block_list( + lines: list[_Line], idx: int, base_indent: int +) -> tuple[list[object], int]: + items: list[object] = [] + while idx < len(lines) and lines[idx].indent == base_indent and ( + lines[idx].content.startswith("- ") or lines[idx].content == "-" + ): + line = lines[idx] + rest = line.content[2:] if line.content.startswith("- ") else "" + rest = rest.strip() + + # Look ahead at the next non-empty line: if it's indented + # more than the dash AND aligned with the rest's column, + # we have a multi-line mapping item. + if rest and ":" in rest and _looks_like_kv(rest): + # The first key:value of a multi-line mapping list item. + # Subsequent keys live at indent = base_indent + 2 (or + # wherever the content after `- ` started). + content_col = base_indent + 2 + first_key, first_value_text = _split_key_value(rest, line.lineno) + if not _BARE_RX.match(first_key): + die( + f"yaml-subset: line {line.lineno} key {first_key!r} " + f"is not a bare identifier" + ) + item: dict[str, object] = {} + if first_value_text: + item[first_key] = _parse_inline(first_value_text, line.lineno) + idx += 1 + else: + idx += 1 + if idx < len(lines) and lines[idx].indent > content_col: + nested_indent = lines[idx].indent + value, idx = _parse_block(lines, idx, nested_indent) + item[first_key] = value + else: + item[first_key] = None + # Consume additional keys at content_col. + while idx < len(lines) and lines[idx].indent == content_col: + ln = lines[idx] + if ln.content.startswith("- "): + break # next list item, not a sibling key + k, v_text = _split_key_value(ln.content, ln.lineno) + if not _BARE_RX.match(k): + die( + f"yaml-subset: line {ln.lineno} key {k!r} is " + f"not a bare identifier" + ) + if k in item: + die(f"yaml-subset: line {ln.lineno} duplicate key {k!r}") + if v_text: + item[k] = _parse_inline(v_text, ln.lineno) + idx += 1 + else: + idx += 1 + if idx < len(lines) and lines[idx].indent > content_col: + nested_indent = lines[idx].indent + value, idx = _parse_block(lines, idx, nested_indent) + item[k] = value + else: + item[k] = None + items.append(item) + elif rest: + # Inline scalar / inline list / inline dict on the dash line. + items.append(_parse_inline(rest, line.lineno)) + idx += 1 + else: + # Bare `-` — value is a block on subsequent lines. + idx += 1 + if idx >= len(lines) or lines[idx].indent <= base_indent: + items.append(None) + continue + child_indent = lines[idx].indent + value, idx = _parse_block(lines, idx, child_indent) + items.append(value) + return items, idx + + +def _looks_like_kv(s: str) -> bool: + """Heuristic: does `s` look like a mapping `key: value` line? + True if there's an unquoted `:` that's followed by space-or-EOL.""" + in_single = False + in_double = False + for i, ch in enumerate(s): + if ch == "'" and not in_double: + in_single = not in_single + elif ch == '"' and not in_single: + in_double = not in_double + elif ch == ":" and not in_single and not in_double: + if i + 1 >= len(s) or s[i + 1] in (" ", "\t"): + return True + return False + + +# --- Public API ------------------------------------------------------------- + + +def parse_yaml_subset(text: str) -> dict[str, object]: + """Parse a YAML-subset document. Top level must be a mapping; + otherwise we die with a clear pointer.""" + # Reject features that have no place in our schema before we + # tokenize, with line numbers from the raw text. + for n, raw in enumerate(text.splitlines(), start=1): + s = raw.strip() + if s.startswith("|") or s.startswith(">") or s.startswith("- |") or s.startswith("- >"): + die( + f"yaml-subset: line {n} uses a multi-line block " + f"scalar (`|` / `>`) — not supported. Use a quoted " + f"single-line string instead." + ) + if "&" in s or "*" in s: + # Only flag when `&` or `*` is being used as anchor/alias, + # not when it's inside a quoted string. Cheap check: any + # bare `&foo:` / `*foo` at the start of a value position. + if re.search(r"(^|\s)[&*][A-Za-z0-9_]+", s): + die( + f"yaml-subset: line {n} uses anchors / aliases " + f"(`&` / `*`) — not supported." + ) + if "!!" in s and not (s.count("'") % 2 or s.count('"') % 2): + die( + f"yaml-subset: line {n} uses a YAML tag (`!!`) — not " + f"supported." + ) + + lines = _tokenize(text) + if not lines: + return {} + base_indent = lines[0].indent + if base_indent != 0: + die( + f"yaml-subset: top-level content must start in column 0 " + f"(got column {base_indent} on line {lines[0].lineno})" + ) + value, consumed = _parse_block(lines, 0, 0) + if consumed < len(lines): + die( + f"yaml-subset: trailing content starting on line " + f"{lines[consumed].lineno}" + ) + if not isinstance(value, dict): + die("yaml-subset: top-level value must be a mapping") + return value + + +def parse_frontmatter(text: str) -> tuple[dict[str, object], str]: + """Find `---` delimiters at the top of a Markdown file, parse + the frontmatter as YAML subset, return (mapping, body_text). + + No frontmatter at all → ({}, text). Single opening `---` with + no closing → die with a clear pointer. Body is the verbatim + text after the closing `---` line (preserving original line + endings).""" + # Split into lines but preserve the original separators so the + # body slice is exact. + nl_positions: list[int] = [] + for i, ch in enumerate(text): + if ch == "\n": + nl_positions.append(i) + if not nl_positions and not text: + return {}, "" + + first_nl = nl_positions[0] if nl_positions else len(text) + first_line = text[:first_nl].strip() + if first_line != "---": + return {}, text # no frontmatter; whole document is body + + # Find the matching closing `---`. + body_start = -1 + fm_end_lineno = -1 + line_starts = [0] + [p + 1 for p in nl_positions] + for line_idx in range(1, len(line_starts)): + ls = line_starts[line_idx] + next_nl = nl_positions[line_idx] if line_idx < len(nl_positions) else len(text) + line = text[ls:next_nl].rstrip() + if line == "---": + body_start = next_nl + 1 if next_nl < len(text) else next_nl + fm_end_lineno = line_idx + break + if body_start < 0: + die("frontmatter: opening `---` has no matching closing `---`") + + fm_text = text[line_starts[1]:line_starts[fm_end_lineno]] if fm_end_lineno > 1 else "" + fm = parse_yaml_subset(fm_text) + body = text[body_start:] + return fm, body diff --git a/tests/unit/test_yaml_subset.py b/tests/unit/test_yaml_subset.py new file mode 100644 index 0000000..5c5ecd1 --- /dev/null +++ b/tests/unit/test_yaml_subset.py @@ -0,0 +1,327 @@ +"""Unit: YAML-subset parser used by the per-file MD manifest +(PRD 0011). Covers happy paths, the constructs the manifest files +actually use, and every rejection case the PRD enumerates.""" + +import textwrap +import unittest + +from claude_bottle.log import Die +from claude_bottle.yaml_subset import parse_frontmatter, parse_yaml_subset + + +def _y(s: str): + """Parse a dedented YAML string.""" + return parse_yaml_subset(textwrap.dedent(s).lstrip("\n")) + + +class TestScalars(unittest.TestCase): + def test_string(self): + self.assertEqual({"k": "hello"}, _y("k: hello\n")) + + def test_string_with_url_chars(self): + self.assertEqual( + {"k": "https://example.com/path?x=1"}, + _y("k: https://example.com/path?x=1\n"), + ) + + def test_int(self): + self.assertEqual({"port": 9099}, _y("port: 9099\n")) + + def test_negative_int(self): + self.assertEqual({"n": -3}, _y("n: -3\n")) + + def test_bool_true(self): + self.assertEqual({"x": True}, _y("x: true\n")) + + def test_bool_false(self): + self.assertEqual({"x": False}, _y("x: false\n")) + + def test_null(self): + self.assertEqual({"x": None}, _y("x: null\n")) + + def test_tilde_null(self): + self.assertEqual({"x": None}, _y("x: ~\n")) + + def test_double_quoted_string(self): + self.assertEqual({"k": "a b"}, _y('k: "a b"\n')) + + def test_double_quoted_escape(self): + self.assertEqual({"k": "a\nb"}, _y(r'k: "a\nb"' + "\n")) + + def test_single_quoted_string(self): + self.assertEqual({"k": "a b"}, _y("k: 'a b'\n")) + + def test_single_quoted_apos_double(self): + # Single-quoted YAML uses `''` to embed a literal `'`. + self.assertEqual({"k": "it's"}, _y("k: 'it''s'\n")) + + +class TestForbiddenBoolLikes(unittest.TestCase): + """Ambiguous bool-ish tokens have to be quoted explicitly.""" + + def _expect_die(self, src: str): + with self.assertRaises(Die): + _y(src) + + def test_yes_dies(self): + self._expect_die("k: yes\n") + + def test_no_dies(self): + self._expect_die("k: no\n") + + def test_on_dies(self): + self._expect_die("k: on\n") + + def test_capital_TRUE_dies(self): + self._expect_die("k: TRUE\n") + + def test_norway_quoted_is_fine(self): + self.assertEqual({"country": "NO"}, _y('country: "NO"\n')) + + +class TestForbiddenScalarShapes(unittest.TestCase): + def _expect_die(self, src: str): + with self.assertRaises(Die): + _y(src) + + def test_bare_date_dies(self): + self._expect_die("k: 2026-05-24\n") + + def test_bare_octal_dies(self): + self._expect_die("k: 0o755\n") + + def test_bare_hex_dies(self): + self._expect_die("k: 0xFF\n") + + def test_bare_float_dies(self): + self._expect_die("k: 1.5\n") + + def test_quoted_date_is_fine(self): + self.assertEqual({"k": "2026-05-24"}, _y('k: "2026-05-24"\n')) + + +class TestMapping(unittest.TestCase): + def test_flat_mapping(self): + self.assertEqual( + {"a": 1, "b": "two", "c": True}, + _y(""" + a: 1 + b: two + c: true + """), + ) + + def test_nested_mapping(self): + out = _y(""" + outer: + inner: hello + other: 5 + """) + self.assertEqual({"outer": {"inner": "hello", "other": 5}}, out) + + def test_duplicate_key_dies(self): + with self.assertRaises(Die): + _y(""" + a: 1 + a: 2 + """) + + def test_key_must_be_bare_identifier(self): + with self.assertRaises(Die): + _y('"weird key": 1\n') + + +class TestBlockList(unittest.TestCase): + def test_list_of_strings(self): + out = _y(""" + allowlist: + - example.com + - github.com + """) + self.assertEqual({"allowlist": ["example.com", "github.com"]}, out) + + def test_list_of_mappings(self): + out = _y(""" + routes: + - path: /a/ + upstream: https://a.example + - path: /b/ + upstream: https://b.example + """) + self.assertEqual( + {"routes": [ + {"path": "/a/", "upstream": "https://a.example"}, + {"path": "/b/", "upstream": "https://b.example"}, + ]}, + out, + ) + + def test_list_item_with_nested_mapping(self): + out = _y(""" + entries: + - name: foo + ExtraHosts: + host.example: 10.0.0.1 + - name: bar + """) + self.assertEqual( + {"entries": [ + {"name": "foo", "ExtraHosts": {"host.example": "10.0.0.1"}}, + {"name": "bar"}, + ]}, + out, + ) + + def test_list_item_with_inline_list_value(self): + # role: [git-insteadof, tea-login] — the exact shape in the + # claude-bottle manifest. + out = _y(""" + routes: + - path: /x/ + role: [git-insteadof, tea-login] + """) + self.assertEqual( + {"routes": [ + {"path": "/x/", "role": ["git-insteadof", "tea-login"]}, + ]}, + out, + ) + + +class TestInline(unittest.TestCase): + def test_inline_list(self): + self.assertEqual({"l": [1, 2, 3]}, _y("l: [1, 2, 3]\n")) + + def test_inline_list_of_strings(self): + self.assertEqual({"l": ["a", "b", "c"]}, _y("l: [a, b, c]\n")) + + def test_inline_dict(self): + self.assertEqual( + {"d": {"a": "1", "b": "2"}}, + _y('d: {a: "1", b: "2"}\n'), + ) + + def test_nested_flow_dies(self): + with self.assertRaises(Die): + _y("l: [[1, 2], [3, 4]]\n") + + +class TestForbiddenConstructs(unittest.TestCase): + def test_anchor_dies(self): + with self.assertRaises(Die): + _y(""" + a: &anchor 1 + b: *anchor + """) + + def test_multiline_block_scalar_dies(self): + with self.assertRaises(Die): + _y(""" + k: | + line 1 + line 2 + """) + + def test_tag_dies(self): + with self.assertRaises(Die): + _y("k: !!str hello\n") + + def test_tab_in_indent_dies(self): + with self.assertRaises(Die): + _y("a:\n\tb: 1\n") + + +class TestComments(unittest.TestCase): + def test_full_line_comment(self): + out = _y(""" + # comment + k: v + """) + self.assertEqual({"k": "v"}, out) + + def test_trailing_comment(self): + self.assertEqual({"k": "v"}, _y("k: v # trailing\n")) + + def test_hash_in_quoted_string_kept(self): + self.assertEqual({"k": "a#b"}, _y('k: "a#b"\n')) + + +class TestRealisticBottleFile(unittest.TestCase): + """The exact shape a real bottle frontmatter takes — the parser + has to round-trip this without surprise.""" + + def test_dev_bottle(self): + out = _y(""" + cred_proxy: + routes: + - path: /anthropic/ + upstream: https://api.anthropic.com + auth_scheme: Bearer + token_ref: CLAUDE_BOTTLE_OAUTH_TOKEN + role: anthropic-base-url + - path: /gitea/dideric/ + upstream: https://gitea.dideric.is + auth_scheme: token + token_ref: GITEA_TOKEN + role: [git-insteadof, tea-login] + git: + - Name: claude-bottle + Upstream: ssh://git@gitea.dideric.is:30009/x/y.git + IdentityFile: ~/.ssh/gitea.pem + ExtraHosts: + gitea.dideric.is: 100.78.141.42 + egress: + allowlist: + - example.com + """) + # Spot-check the deep parts; the structure is large. + self.assertEqual(2, len(out["cred_proxy"]["routes"])) + self.assertEqual( + ["git-insteadof", "tea-login"], + out["cred_proxy"]["routes"][1]["role"], + ) + self.assertEqual( + "100.78.141.42", + out["git"][0]["ExtraHosts"]["gitea.dideric.is"], + ) + self.assertEqual(["example.com"], out["egress"]["allowlist"]) + + +class TestFrontmatter(unittest.TestCase): + def test_basic(self): + text = textwrap.dedent(""" + --- + bottle: dev + --- + This is the body. + """).lstrip("\n") + fm, body = parse_frontmatter(text) + self.assertEqual({"bottle": "dev"}, fm) + self.assertIn("This is the body", body) + + def test_no_frontmatter_passes_through(self): + text = "no frontmatter here\njust body\n" + fm, body = parse_frontmatter(text) + self.assertEqual({}, fm) + self.assertEqual(text, body) + + def test_unclosed_frontmatter_dies(self): + with self.assertRaises(Die): + parse_frontmatter("---\nbottle: dev\nno closing") + + def test_body_preserves_blank_lines(self): + text = ( + "---\n" + "k: v\n" + "---\n" + "\n" + "line one\n" + "\n" + "line three\n" + ) + _, body = parse_frontmatter(text) + self.assertEqual("\nline one\n\nline three\n", body) + + +if __name__ == "__main__": + unittest.main()