feat(yaml_subset): hand-rolled YAML-subset + frontmatter parser
claude_bottle/yaml_subset.py — stdlib-only, ~450 lines. Parses the
bounded shape claude-bottle's manifest files use:
- Block mappings (top-level + nested via indentation)
- Block lists (under a key, items can be scalars or block-style
mappings whose keys align with the rest after the dash)
- Inline lists `[a, b]` and inline dicts `{a: 1}` for one-level
leaves
- Quoted (single + double) and bare strings
- Scalars: string, int, true/false, null/~
Rejects, each with a clear pointer at the line number:
- `yes`/`no`/`on`/`off`/`Y`/`N`/`TRUE`/`FALSE` — only literal
`true` / `false` are bools (the Norway problem stays solved by
"quote your strings if they look like bools")
- Bare strings that look like dates / octals / hex / floats
- Anchors (`&`/`*`), aliases, YAML tags (`!!str`)
- Multi-line block scalars (`|`, `>`)
- Tabs in indentation
- Nested flow style (only one level allowed)
Public API:
parse_yaml_subset(text) -> dict[str, object]
Top level must be a mapping.
parse_frontmatter(text) -> (dict, body_text)
Strips `---` delimiters, parses content as YAML subset, returns
the verbatim body text after the closing fence.
46 unit tests covering every construct the real manifest files use
(the cred_proxy.routes structure, role-as-inline-list, nested
ExtraHosts dicts) plus every rejection case listed in PRD 0011.
This commit is contained in:
@@ -0,0 +1,569 @@
|
||||
"""Hand-rolled YAML-subset parser for claude-bottle manifest files
|
||||
(PRD 0011).
|
||||
|
||||
Why hand-rolled: the configs we accept have a bounded shape (flat
|
||||
top-level keys; values are strings / ints / bools / null / lists /
|
||||
nested dicts; no anchors, no multi-line block scalars, no tags, no
|
||||
implicit type coercion gotchas). A real YAML library is a much
|
||||
larger dependency surface than we need. The project's stdlib-only
|
||||
stance (CLAUDE.md) is the load-bearing reason; the safety
|
||||
properties — no Norway problem, no surprise date/octal coercion —
|
||||
are the bonus.
|
||||
|
||||
Public API:
|
||||
|
||||
parse_yaml_subset(text) -> dict[str, object]
|
||||
Parse a full document. Top level must be a mapping (the
|
||||
shape every claude-bottle manifest file uses). Values are
|
||||
str / int / bool / None / list / dict only.
|
||||
|
||||
parse_frontmatter(text) -> tuple[dict[str, object], str]
|
||||
For a Markdown file with YAML frontmatter delimited by `---`
|
||||
lines. Returns (frontmatter_dict, body_text).
|
||||
|
||||
What we accept (block-style):
|
||||
|
||||
key: value # mapping entry, value is inline
|
||||
key: # mapping entry, value is block
|
||||
nested_key: value
|
||||
|
||||
key:
|
||||
- item # list under a key
|
||||
- item
|
||||
|
||||
key:
|
||||
- subkey: value1 # list item that's a mapping
|
||||
subkey2: value2
|
||||
- subkey: value3
|
||||
|
||||
What we accept (inline, scalar leaves only):
|
||||
|
||||
key: [a, b, "c d"]
|
||||
key: {a: 1, b: 2}
|
||||
|
||||
What we reject (each dies with a clear pointer):
|
||||
|
||||
&anchor / *alias # anchors / aliases
|
||||
!!tag # YAML tags
|
||||
| / > # multi-line block scalars
|
||||
yes / no / on / off # only true / false count as bool
|
||||
ambiguous bare strings # numbers, dates, etc. when unquoted
|
||||
tabs as indentation # spaces only
|
||||
flow-style nested deeper than one level
|
||||
|
||||
Errors carry the line number from the source document.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
|
||||
from .log import die
|
||||
|
||||
|
||||
# --- Tokenizer / line preprocessing ----------------------------------------
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class _Line:
|
||||
"""One non-blank, non-comment line from the source. `indent` is
|
||||
the column of the first non-space character; `content` is the
|
||||
line text from that column onward, with trailing whitespace and
|
||||
trailing `# ...` comments stripped. `lineno` is the 1-based
|
||||
line in the original document."""
|
||||
|
||||
indent: int
|
||||
content: str
|
||||
lineno: int
|
||||
|
||||
|
||||
def _strip_trailing_comment(s: str) -> str:
|
||||
"""Strip ` # comment` from end of line, but only when the `#`
|
||||
isn't inside a quoted string. Returns the cleaned line."""
|
||||
in_single = False
|
||||
in_double = False
|
||||
for i, ch in enumerate(s):
|
||||
if ch == "'" and not in_double:
|
||||
in_single = not in_single
|
||||
elif ch == '"' and not in_single:
|
||||
in_double = not in_double
|
||||
elif ch == "#" and not in_single and not in_double:
|
||||
# `#` must be preceded by whitespace to be a comment,
|
||||
# otherwise it's just a literal character.
|
||||
if i == 0 or s[i - 1] in (" ", "\t"):
|
||||
return s[:i].rstrip()
|
||||
return s.rstrip()
|
||||
|
||||
|
||||
def _tokenize(text: str) -> list[_Line]:
|
||||
"""Drop blank / comment lines, parse indent + content for the
|
||||
rest. Tabs in the indent area are rejected outright."""
|
||||
out: list[_Line] = []
|
||||
for n, raw in enumerate(text.splitlines(), start=1):
|
||||
# Tabs in indent are a portability footgun — different
|
||||
# editors render them differently and the spec says spaces.
|
||||
leading = len(raw) - len(raw.lstrip(" \t"))
|
||||
if "\t" in raw[:leading]:
|
||||
die(f"yaml-subset: tab character in indent on line {n}")
|
||||
stripped = raw.strip()
|
||||
if not stripped:
|
||||
continue
|
||||
if stripped.startswith("#"):
|
||||
continue
|
||||
# Whole-line position: indent before first non-space.
|
||||
indent = len(raw) - len(raw.lstrip(" "))
|
||||
content = _strip_trailing_comment(raw[indent:])
|
||||
if not content:
|
||||
continue
|
||||
out.append(_Line(indent=indent, content=content, lineno=n))
|
||||
return out
|
||||
|
||||
|
||||
# --- Scalar parsing ---------------------------------------------------------
|
||||
|
||||
|
||||
_BARE_RX = re.compile(r"^[A-Za-z_][A-Za-z0-9_.\-]*$")
|
||||
_INT_RX = re.compile(r"^-?[0-9]+$")
|
||||
_RESERVED_BOOL_LIKE = frozenset({"yes", "no", "on", "off", "y", "n", "Y", "N",
|
||||
"YES", "NO", "ON", "OFF", "True", "False",
|
||||
"TRUE", "FALSE"})
|
||||
# Yaml-ish ambiguity sources that an unquoted bare token COULD be
|
||||
# mistaken for: dates, octals, etc. Detected and rejected so users
|
||||
# quote their strings explicitly. We don't try to enumerate every
|
||||
# ambiguity; the rule is "if it looks like a non-string literal,
|
||||
# either parse it as that literal (true/false/null/int) or reject
|
||||
# it with a 'quote it' hint."
|
||||
_DATE_RX = re.compile(r"^-?\d{4}-\d{2}-\d{2}(T\d.*)?$")
|
||||
_OCTAL_RX = re.compile(r"^0o?\d+$")
|
||||
_HEX_RX = re.compile(r"^0x[0-9A-Fa-f]+$")
|
||||
_FLOAT_RX = re.compile(r"^-?\d+\.\d+([eE][-+]?\d+)?$")
|
||||
|
||||
|
||||
def _parse_scalar(s: str, lineno: int) -> object:
|
||||
"""Turn a stripped value string into a Python value (str, int,
|
||||
bool, None). Quoted strings preserve their literal content
|
||||
(with standard escapes); bare strings are accepted only when
|
||||
they're unambiguous."""
|
||||
s = s.strip()
|
||||
if not s:
|
||||
return ""
|
||||
|
||||
# Quoted forms first — content is whatever's between the quotes
|
||||
# with the documented escapes applied.
|
||||
if (s.startswith('"') and s.endswith('"')) or (
|
||||
s.startswith("'") and s.endswith("'")
|
||||
):
|
||||
if len(s) < 2:
|
||||
die(f"yaml-subset: unterminated quoted string on line {lineno}")
|
||||
body = s[1:-1]
|
||||
if s.startswith('"'):
|
||||
# JSON-style escapes for double quotes.
|
||||
try:
|
||||
return body.encode("utf-8").decode("unicode_escape")
|
||||
except UnicodeDecodeError as e:
|
||||
die(f"yaml-subset: bad escape on line {lineno}: {e}")
|
||||
else:
|
||||
# Single quotes: only '' → ' (standard YAML); no other escapes.
|
||||
return body.replace("''", "'")
|
||||
|
||||
# Reserved bool-like tokens that aren't `true` / `false` —
|
||||
# always reject so users have to be explicit.
|
||||
if s in _RESERVED_BOOL_LIKE:
|
||||
if s in ("true", "false"):
|
||||
return s == "true"
|
||||
die(
|
||||
f"yaml-subset: bare {s!r} on line {lineno} is ambiguous "
|
||||
f"(use literal `true` / `false`, or quote it as a string)"
|
||||
)
|
||||
|
||||
if s == "true":
|
||||
return True
|
||||
if s == "false":
|
||||
return False
|
||||
if s in ("null", "~"):
|
||||
return None
|
||||
|
||||
if _INT_RX.match(s):
|
||||
return int(s)
|
||||
|
||||
# Look-alikes that we reject to keep the user in control.
|
||||
if _DATE_RX.match(s):
|
||||
die(
|
||||
f"yaml-subset: bare {s!r} on line {lineno} looks like a "
|
||||
f"date — quote it as a string or use an explicit int"
|
||||
)
|
||||
if _OCTAL_RX.match(s):
|
||||
die(
|
||||
f"yaml-subset: bare {s!r} on line {lineno} looks like an "
|
||||
f"octal/0-prefixed integer — quote it as a string"
|
||||
)
|
||||
if _HEX_RX.match(s):
|
||||
die(
|
||||
f"yaml-subset: bare {s!r} on line {lineno} looks like a "
|
||||
f"hex integer — quote it as a string"
|
||||
)
|
||||
if _FLOAT_RX.match(s):
|
||||
die(
|
||||
f"yaml-subset: floats not supported (line {lineno}, "
|
||||
f"value {s!r}); use an int or quote as a string"
|
||||
)
|
||||
|
||||
# Bare strings: anything that matches the bare-string pattern is
|
||||
# accepted as a string literal. Otherwise we hand it back as a
|
||||
# string anyway — for URLs, paths, hostnames, etc. that contain
|
||||
# special chars. The PRD calls for rejecting "ambiguous" strings,
|
||||
# and we've already rejected the ambiguous shapes above; what's
|
||||
# left is unambiguously a string.
|
||||
return s
|
||||
|
||||
|
||||
# --- Inline list / dict ----------------------------------------------------
|
||||
|
||||
|
||||
def _parse_inline(s: str, lineno: int) -> object:
|
||||
"""Inline list `[a, b]` or dict `{a: 1, b: 2}` or scalar.
|
||||
Nested flow more than one level deep is rejected (PRD)."""
|
||||
s = s.strip()
|
||||
if s.startswith("["):
|
||||
if not s.endswith("]"):
|
||||
die(f"yaml-subset: unterminated `[` on line {lineno}")
|
||||
body = s[1:-1].strip()
|
||||
if not body:
|
||||
return []
|
||||
items: list[object] = []
|
||||
for raw in _split_flow(body, lineno, "list"):
|
||||
v = _parse_scalar(raw, lineno)
|
||||
items.append(v)
|
||||
return items
|
||||
if s.startswith("{"):
|
||||
if not s.endswith("}"):
|
||||
die(f"yaml-subset: unterminated `{{` on line {lineno}")
|
||||
body = s[1:-1].strip()
|
||||
if not body:
|
||||
return {}
|
||||
out: dict[str, object] = {}
|
||||
for raw in _split_flow(body, lineno, "dict"):
|
||||
if ":" not in raw:
|
||||
die(
|
||||
f"yaml-subset: inline dict entry on line {lineno} "
|
||||
f"missing `:` ({raw!r})"
|
||||
)
|
||||
k, _, v = raw.partition(":")
|
||||
k = k.strip()
|
||||
if not _BARE_RX.match(k):
|
||||
die(
|
||||
f"yaml-subset: inline dict key on line {lineno} "
|
||||
f"must be a bare identifier ({k!r})"
|
||||
)
|
||||
out[k] = _parse_scalar(v.strip(), lineno)
|
||||
return out
|
||||
return _parse_scalar(s, lineno)
|
||||
|
||||
|
||||
def _split_flow(body: str, lineno: int, kind: str) -> list[str]:
|
||||
"""Split `a, b, c` respecting quoted strings. Rejects nested
|
||||
flow (a list/dict inside the flow body) since the PRD limits
|
||||
flow nesting to one level."""
|
||||
items: list[str] = []
|
||||
depth_b = 0
|
||||
depth_c = 0
|
||||
in_single = False
|
||||
in_double = False
|
||||
cur = []
|
||||
for ch in body:
|
||||
if ch == "'" and not in_double:
|
||||
in_single = not in_single
|
||||
elif ch == '"' and not in_single:
|
||||
in_double = not in_double
|
||||
elif not in_single and not in_double:
|
||||
if ch in "[{":
|
||||
depth_b += 1
|
||||
elif ch in "]}":
|
||||
depth_b -= 1
|
||||
if depth_b > 0:
|
||||
die(
|
||||
f"yaml-subset: nested flow {kind} on line "
|
||||
f"{lineno} (only one level of flow allowed)"
|
||||
)
|
||||
if ch == "," and depth_b == 0 and depth_c == 0:
|
||||
items.append("".join(cur))
|
||||
cur = []
|
||||
continue
|
||||
cur.append(ch)
|
||||
if cur:
|
||||
items.append("".join(cur))
|
||||
return [s.strip() for s in items if s.strip()]
|
||||
|
||||
|
||||
# --- Block parser ----------------------------------------------------------
|
||||
|
||||
|
||||
def _split_key_value(content: str, lineno: int) -> tuple[str, str]:
|
||||
"""Find the FIRST top-level `:` that separates a key from its
|
||||
value (ignoring `:` inside quoted strings). Returns (key, value).
|
||||
`value` may be empty (block-form mapping)."""
|
||||
in_single = False
|
||||
in_double = False
|
||||
for i, ch in enumerate(content):
|
||||
if ch == "'" and not in_double:
|
||||
in_single = not in_single
|
||||
elif ch == '"' and not in_single:
|
||||
in_double = not in_double
|
||||
elif ch == ":" and not in_single and not in_double:
|
||||
# `:` must be followed by space or be at end-of-line to
|
||||
# count as a key separator (otherwise `key:value` would
|
||||
# ambiguous with URLs etc.).
|
||||
if i + 1 >= len(content) or content[i + 1] in (" ", "\t"):
|
||||
return content[:i].strip(), content[i + 1:].lstrip()
|
||||
die(f"yaml-subset: line {lineno} missing `: ` separator: {content!r}")
|
||||
|
||||
|
||||
def _parse_block(
|
||||
lines: list[_Line], idx: int, base_indent: int
|
||||
) -> tuple[object, int]:
|
||||
"""Parse a block starting at `lines[idx]`, expecting that block
|
||||
to live at `base_indent`. Returns (value, new_idx) where
|
||||
`new_idx` is the index of the first unconsumed line."""
|
||||
if idx >= len(lines):
|
||||
die("yaml-subset: unexpected end of document")
|
||||
first = lines[idx]
|
||||
if first.indent < base_indent:
|
||||
die(
|
||||
f"yaml-subset: line {first.lineno} indented less than "
|
||||
f"expected (got {first.indent}, expected >= {base_indent})"
|
||||
)
|
||||
if first.indent > base_indent:
|
||||
die(
|
||||
f"yaml-subset: line {first.lineno} indented more than "
|
||||
f"expected (got {first.indent}, expected {base_indent})"
|
||||
)
|
||||
|
||||
if first.content.startswith("- ") or first.content == "-":
|
||||
return _parse_block_list(lines, idx, base_indent)
|
||||
return _parse_block_mapping(lines, idx, base_indent)
|
||||
|
||||
|
||||
def _parse_block_mapping(
|
||||
lines: list[_Line], idx: int, base_indent: int
|
||||
) -> tuple[dict[str, object], int]:
|
||||
out: dict[str, object] = {}
|
||||
while idx < len(lines) and lines[idx].indent == base_indent:
|
||||
line = lines[idx]
|
||||
if line.content.startswith("- "):
|
||||
die(
|
||||
f"yaml-subset: line {line.lineno} unexpected list "
|
||||
f"item at mapping indent (got `-`, expected `key:`)"
|
||||
)
|
||||
key, value_text = _split_key_value(line.content, line.lineno)
|
||||
if not _BARE_RX.match(key):
|
||||
die(
|
||||
f"yaml-subset: line {line.lineno} key {key!r} is not "
|
||||
f"a bare identifier"
|
||||
)
|
||||
if key in out:
|
||||
die(
|
||||
f"yaml-subset: line {line.lineno} duplicate key {key!r}"
|
||||
)
|
||||
if value_text:
|
||||
out[key] = _parse_inline(value_text, line.lineno)
|
||||
idx += 1
|
||||
else:
|
||||
# Value is a block on subsequent lines.
|
||||
idx += 1
|
||||
if idx >= len(lines) or lines[idx].indent <= base_indent:
|
||||
# Empty block — treat as None to match YAML.
|
||||
out[key] = None
|
||||
continue
|
||||
child_indent = lines[idx].indent
|
||||
value, idx = _parse_block(lines, idx, child_indent)
|
||||
out[key] = value
|
||||
return out, idx
|
||||
|
||||
|
||||
def _parse_block_list(
|
||||
lines: list[_Line], idx: int, base_indent: int
|
||||
) -> tuple[list[object], int]:
|
||||
items: list[object] = []
|
||||
while idx < len(lines) and lines[idx].indent == base_indent and (
|
||||
lines[idx].content.startswith("- ") or lines[idx].content == "-"
|
||||
):
|
||||
line = lines[idx]
|
||||
rest = line.content[2:] if line.content.startswith("- ") else ""
|
||||
rest = rest.strip()
|
||||
|
||||
# Look ahead at the next non-empty line: if it's indented
|
||||
# more than the dash AND aligned with the rest's column,
|
||||
# we have a multi-line mapping item.
|
||||
if rest and ":" in rest and _looks_like_kv(rest):
|
||||
# The first key:value of a multi-line mapping list item.
|
||||
# Subsequent keys live at indent = base_indent + 2 (or
|
||||
# wherever the content after `- ` started).
|
||||
content_col = base_indent + 2
|
||||
first_key, first_value_text = _split_key_value(rest, line.lineno)
|
||||
if not _BARE_RX.match(first_key):
|
||||
die(
|
||||
f"yaml-subset: line {line.lineno} key {first_key!r} "
|
||||
f"is not a bare identifier"
|
||||
)
|
||||
item: dict[str, object] = {}
|
||||
if first_value_text:
|
||||
item[first_key] = _parse_inline(first_value_text, line.lineno)
|
||||
idx += 1
|
||||
else:
|
||||
idx += 1
|
||||
if idx < len(lines) and lines[idx].indent > content_col:
|
||||
nested_indent = lines[idx].indent
|
||||
value, idx = _parse_block(lines, idx, nested_indent)
|
||||
item[first_key] = value
|
||||
else:
|
||||
item[first_key] = None
|
||||
# Consume additional keys at content_col.
|
||||
while idx < len(lines) and lines[idx].indent == content_col:
|
||||
ln = lines[idx]
|
||||
if ln.content.startswith("- "):
|
||||
break # next list item, not a sibling key
|
||||
k, v_text = _split_key_value(ln.content, ln.lineno)
|
||||
if not _BARE_RX.match(k):
|
||||
die(
|
||||
f"yaml-subset: line {ln.lineno} key {k!r} is "
|
||||
f"not a bare identifier"
|
||||
)
|
||||
if k in item:
|
||||
die(f"yaml-subset: line {ln.lineno} duplicate key {k!r}")
|
||||
if v_text:
|
||||
item[k] = _parse_inline(v_text, ln.lineno)
|
||||
idx += 1
|
||||
else:
|
||||
idx += 1
|
||||
if idx < len(lines) and lines[idx].indent > content_col:
|
||||
nested_indent = lines[idx].indent
|
||||
value, idx = _parse_block(lines, idx, nested_indent)
|
||||
item[k] = value
|
||||
else:
|
||||
item[k] = None
|
||||
items.append(item)
|
||||
elif rest:
|
||||
# Inline scalar / inline list / inline dict on the dash line.
|
||||
items.append(_parse_inline(rest, line.lineno))
|
||||
idx += 1
|
||||
else:
|
||||
# Bare `-` — value is a block on subsequent lines.
|
||||
idx += 1
|
||||
if idx >= len(lines) or lines[idx].indent <= base_indent:
|
||||
items.append(None)
|
||||
continue
|
||||
child_indent = lines[idx].indent
|
||||
value, idx = _parse_block(lines, idx, child_indent)
|
||||
items.append(value)
|
||||
return items, idx
|
||||
|
||||
|
||||
def _looks_like_kv(s: str) -> bool:
|
||||
"""Heuristic: does `s` look like a mapping `key: value` line?
|
||||
True if there's an unquoted `:` that's followed by space-or-EOL."""
|
||||
in_single = False
|
||||
in_double = False
|
||||
for i, ch in enumerate(s):
|
||||
if ch == "'" and not in_double:
|
||||
in_single = not in_single
|
||||
elif ch == '"' and not in_single:
|
||||
in_double = not in_double
|
||||
elif ch == ":" and not in_single and not in_double:
|
||||
if i + 1 >= len(s) or s[i + 1] in (" ", "\t"):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
# --- Public API -------------------------------------------------------------
|
||||
|
||||
|
||||
def parse_yaml_subset(text: str) -> dict[str, object]:
|
||||
"""Parse a YAML-subset document. Top level must be a mapping;
|
||||
otherwise we die with a clear pointer."""
|
||||
# Reject features that have no place in our schema before we
|
||||
# tokenize, with line numbers from the raw text.
|
||||
for n, raw in enumerate(text.splitlines(), start=1):
|
||||
s = raw.strip()
|
||||
if s.startswith("|") or s.startswith(">") or s.startswith("- |") or s.startswith("- >"):
|
||||
die(
|
||||
f"yaml-subset: line {n} uses a multi-line block "
|
||||
f"scalar (`|` / `>`) — not supported. Use a quoted "
|
||||
f"single-line string instead."
|
||||
)
|
||||
if "&" in s or "*" in s:
|
||||
# Only flag when `&` or `*` is being used as anchor/alias,
|
||||
# not when it's inside a quoted string. Cheap check: any
|
||||
# bare `&foo:` / `*foo` at the start of a value position.
|
||||
if re.search(r"(^|\s)[&*][A-Za-z0-9_]+", s):
|
||||
die(
|
||||
f"yaml-subset: line {n} uses anchors / aliases "
|
||||
f"(`&` / `*`) — not supported."
|
||||
)
|
||||
if "!!" in s and not (s.count("'") % 2 or s.count('"') % 2):
|
||||
die(
|
||||
f"yaml-subset: line {n} uses a YAML tag (`!!`) — not "
|
||||
f"supported."
|
||||
)
|
||||
|
||||
lines = _tokenize(text)
|
||||
if not lines:
|
||||
return {}
|
||||
base_indent = lines[0].indent
|
||||
if base_indent != 0:
|
||||
die(
|
||||
f"yaml-subset: top-level content must start in column 0 "
|
||||
f"(got column {base_indent} on line {lines[0].lineno})"
|
||||
)
|
||||
value, consumed = _parse_block(lines, 0, 0)
|
||||
if consumed < len(lines):
|
||||
die(
|
||||
f"yaml-subset: trailing content starting on line "
|
||||
f"{lines[consumed].lineno}"
|
||||
)
|
||||
if not isinstance(value, dict):
|
||||
die("yaml-subset: top-level value must be a mapping")
|
||||
return value
|
||||
|
||||
|
||||
def parse_frontmatter(text: str) -> tuple[dict[str, object], str]:
|
||||
"""Find `---` delimiters at the top of a Markdown file, parse
|
||||
the frontmatter as YAML subset, return (mapping, body_text).
|
||||
|
||||
No frontmatter at all → ({}, text). Single opening `---` with
|
||||
no closing → die with a clear pointer. Body is the verbatim
|
||||
text after the closing `---` line (preserving original line
|
||||
endings)."""
|
||||
# Split into lines but preserve the original separators so the
|
||||
# body slice is exact.
|
||||
nl_positions: list[int] = []
|
||||
for i, ch in enumerate(text):
|
||||
if ch == "\n":
|
||||
nl_positions.append(i)
|
||||
if not nl_positions and not text:
|
||||
return {}, ""
|
||||
|
||||
first_nl = nl_positions[0] if nl_positions else len(text)
|
||||
first_line = text[:first_nl].strip()
|
||||
if first_line != "---":
|
||||
return {}, text # no frontmatter; whole document is body
|
||||
|
||||
# Find the matching closing `---`.
|
||||
body_start = -1
|
||||
fm_end_lineno = -1
|
||||
line_starts = [0] + [p + 1 for p in nl_positions]
|
||||
for line_idx in range(1, len(line_starts)):
|
||||
ls = line_starts[line_idx]
|
||||
next_nl = nl_positions[line_idx] if line_idx < len(nl_positions) else len(text)
|
||||
line = text[ls:next_nl].rstrip()
|
||||
if line == "---":
|
||||
body_start = next_nl + 1 if next_nl < len(text) else next_nl
|
||||
fm_end_lineno = line_idx
|
||||
break
|
||||
if body_start < 0:
|
||||
die("frontmatter: opening `---` has no matching closing `---`")
|
||||
|
||||
fm_text = text[line_starts[1]:line_starts[fm_end_lineno]] if fm_end_lineno > 1 else ""
|
||||
fm = parse_yaml_subset(fm_text)
|
||||
body = text[body_start:]
|
||||
return fm, body
|
||||
@@ -0,0 +1,327 @@
|
||||
"""Unit: YAML-subset parser used by the per-file MD manifest
|
||||
(PRD 0011). Covers happy paths, the constructs the manifest files
|
||||
actually use, and every rejection case the PRD enumerates."""
|
||||
|
||||
import textwrap
|
||||
import unittest
|
||||
|
||||
from claude_bottle.log import Die
|
||||
from claude_bottle.yaml_subset import parse_frontmatter, parse_yaml_subset
|
||||
|
||||
|
||||
def _y(s: str):
|
||||
"""Parse a dedented YAML string."""
|
||||
return parse_yaml_subset(textwrap.dedent(s).lstrip("\n"))
|
||||
|
||||
|
||||
class TestScalars(unittest.TestCase):
|
||||
def test_string(self):
|
||||
self.assertEqual({"k": "hello"}, _y("k: hello\n"))
|
||||
|
||||
def test_string_with_url_chars(self):
|
||||
self.assertEqual(
|
||||
{"k": "https://example.com/path?x=1"},
|
||||
_y("k: https://example.com/path?x=1\n"),
|
||||
)
|
||||
|
||||
def test_int(self):
|
||||
self.assertEqual({"port": 9099}, _y("port: 9099\n"))
|
||||
|
||||
def test_negative_int(self):
|
||||
self.assertEqual({"n": -3}, _y("n: -3\n"))
|
||||
|
||||
def test_bool_true(self):
|
||||
self.assertEqual({"x": True}, _y("x: true\n"))
|
||||
|
||||
def test_bool_false(self):
|
||||
self.assertEqual({"x": False}, _y("x: false\n"))
|
||||
|
||||
def test_null(self):
|
||||
self.assertEqual({"x": None}, _y("x: null\n"))
|
||||
|
||||
def test_tilde_null(self):
|
||||
self.assertEqual({"x": None}, _y("x: ~\n"))
|
||||
|
||||
def test_double_quoted_string(self):
|
||||
self.assertEqual({"k": "a b"}, _y('k: "a b"\n'))
|
||||
|
||||
def test_double_quoted_escape(self):
|
||||
self.assertEqual({"k": "a\nb"}, _y(r'k: "a\nb"' + "\n"))
|
||||
|
||||
def test_single_quoted_string(self):
|
||||
self.assertEqual({"k": "a b"}, _y("k: 'a b'\n"))
|
||||
|
||||
def test_single_quoted_apos_double(self):
|
||||
# Single-quoted YAML uses `''` to embed a literal `'`.
|
||||
self.assertEqual({"k": "it's"}, _y("k: 'it''s'\n"))
|
||||
|
||||
|
||||
class TestForbiddenBoolLikes(unittest.TestCase):
|
||||
"""Ambiguous bool-ish tokens have to be quoted explicitly."""
|
||||
|
||||
def _expect_die(self, src: str):
|
||||
with self.assertRaises(Die):
|
||||
_y(src)
|
||||
|
||||
def test_yes_dies(self):
|
||||
self._expect_die("k: yes\n")
|
||||
|
||||
def test_no_dies(self):
|
||||
self._expect_die("k: no\n")
|
||||
|
||||
def test_on_dies(self):
|
||||
self._expect_die("k: on\n")
|
||||
|
||||
def test_capital_TRUE_dies(self):
|
||||
self._expect_die("k: TRUE\n")
|
||||
|
||||
def test_norway_quoted_is_fine(self):
|
||||
self.assertEqual({"country": "NO"}, _y('country: "NO"\n'))
|
||||
|
||||
|
||||
class TestForbiddenScalarShapes(unittest.TestCase):
|
||||
def _expect_die(self, src: str):
|
||||
with self.assertRaises(Die):
|
||||
_y(src)
|
||||
|
||||
def test_bare_date_dies(self):
|
||||
self._expect_die("k: 2026-05-24\n")
|
||||
|
||||
def test_bare_octal_dies(self):
|
||||
self._expect_die("k: 0o755\n")
|
||||
|
||||
def test_bare_hex_dies(self):
|
||||
self._expect_die("k: 0xFF\n")
|
||||
|
||||
def test_bare_float_dies(self):
|
||||
self._expect_die("k: 1.5\n")
|
||||
|
||||
def test_quoted_date_is_fine(self):
|
||||
self.assertEqual({"k": "2026-05-24"}, _y('k: "2026-05-24"\n'))
|
||||
|
||||
|
||||
class TestMapping(unittest.TestCase):
|
||||
def test_flat_mapping(self):
|
||||
self.assertEqual(
|
||||
{"a": 1, "b": "two", "c": True},
|
||||
_y("""
|
||||
a: 1
|
||||
b: two
|
||||
c: true
|
||||
"""),
|
||||
)
|
||||
|
||||
def test_nested_mapping(self):
|
||||
out = _y("""
|
||||
outer:
|
||||
inner: hello
|
||||
other: 5
|
||||
""")
|
||||
self.assertEqual({"outer": {"inner": "hello", "other": 5}}, out)
|
||||
|
||||
def test_duplicate_key_dies(self):
|
||||
with self.assertRaises(Die):
|
||||
_y("""
|
||||
a: 1
|
||||
a: 2
|
||||
""")
|
||||
|
||||
def test_key_must_be_bare_identifier(self):
|
||||
with self.assertRaises(Die):
|
||||
_y('"weird key": 1\n')
|
||||
|
||||
|
||||
class TestBlockList(unittest.TestCase):
|
||||
def test_list_of_strings(self):
|
||||
out = _y("""
|
||||
allowlist:
|
||||
- example.com
|
||||
- github.com
|
||||
""")
|
||||
self.assertEqual({"allowlist": ["example.com", "github.com"]}, out)
|
||||
|
||||
def test_list_of_mappings(self):
|
||||
out = _y("""
|
||||
routes:
|
||||
- path: /a/
|
||||
upstream: https://a.example
|
||||
- path: /b/
|
||||
upstream: https://b.example
|
||||
""")
|
||||
self.assertEqual(
|
||||
{"routes": [
|
||||
{"path": "/a/", "upstream": "https://a.example"},
|
||||
{"path": "/b/", "upstream": "https://b.example"},
|
||||
]},
|
||||
out,
|
||||
)
|
||||
|
||||
def test_list_item_with_nested_mapping(self):
|
||||
out = _y("""
|
||||
entries:
|
||||
- name: foo
|
||||
ExtraHosts:
|
||||
host.example: 10.0.0.1
|
||||
- name: bar
|
||||
""")
|
||||
self.assertEqual(
|
||||
{"entries": [
|
||||
{"name": "foo", "ExtraHosts": {"host.example": "10.0.0.1"}},
|
||||
{"name": "bar"},
|
||||
]},
|
||||
out,
|
||||
)
|
||||
|
||||
def test_list_item_with_inline_list_value(self):
|
||||
# role: [git-insteadof, tea-login] — the exact shape in the
|
||||
# claude-bottle manifest.
|
||||
out = _y("""
|
||||
routes:
|
||||
- path: /x/
|
||||
role: [git-insteadof, tea-login]
|
||||
""")
|
||||
self.assertEqual(
|
||||
{"routes": [
|
||||
{"path": "/x/", "role": ["git-insteadof", "tea-login"]},
|
||||
]},
|
||||
out,
|
||||
)
|
||||
|
||||
|
||||
class TestInline(unittest.TestCase):
|
||||
def test_inline_list(self):
|
||||
self.assertEqual({"l": [1, 2, 3]}, _y("l: [1, 2, 3]\n"))
|
||||
|
||||
def test_inline_list_of_strings(self):
|
||||
self.assertEqual({"l": ["a", "b", "c"]}, _y("l: [a, b, c]\n"))
|
||||
|
||||
def test_inline_dict(self):
|
||||
self.assertEqual(
|
||||
{"d": {"a": "1", "b": "2"}},
|
||||
_y('d: {a: "1", b: "2"}\n'),
|
||||
)
|
||||
|
||||
def test_nested_flow_dies(self):
|
||||
with self.assertRaises(Die):
|
||||
_y("l: [[1, 2], [3, 4]]\n")
|
||||
|
||||
|
||||
class TestForbiddenConstructs(unittest.TestCase):
|
||||
def test_anchor_dies(self):
|
||||
with self.assertRaises(Die):
|
||||
_y("""
|
||||
a: &anchor 1
|
||||
b: *anchor
|
||||
""")
|
||||
|
||||
def test_multiline_block_scalar_dies(self):
|
||||
with self.assertRaises(Die):
|
||||
_y("""
|
||||
k: |
|
||||
line 1
|
||||
line 2
|
||||
""")
|
||||
|
||||
def test_tag_dies(self):
|
||||
with self.assertRaises(Die):
|
||||
_y("k: !!str hello\n")
|
||||
|
||||
def test_tab_in_indent_dies(self):
|
||||
with self.assertRaises(Die):
|
||||
_y("a:\n\tb: 1\n")
|
||||
|
||||
|
||||
class TestComments(unittest.TestCase):
|
||||
def test_full_line_comment(self):
|
||||
out = _y("""
|
||||
# comment
|
||||
k: v
|
||||
""")
|
||||
self.assertEqual({"k": "v"}, out)
|
||||
|
||||
def test_trailing_comment(self):
|
||||
self.assertEqual({"k": "v"}, _y("k: v # trailing\n"))
|
||||
|
||||
def test_hash_in_quoted_string_kept(self):
|
||||
self.assertEqual({"k": "a#b"}, _y('k: "a#b"\n'))
|
||||
|
||||
|
||||
class TestRealisticBottleFile(unittest.TestCase):
|
||||
"""The exact shape a real bottle frontmatter takes — the parser
|
||||
has to round-trip this without surprise."""
|
||||
|
||||
def test_dev_bottle(self):
|
||||
out = _y("""
|
||||
cred_proxy:
|
||||
routes:
|
||||
- path: /anthropic/
|
||||
upstream: https://api.anthropic.com
|
||||
auth_scheme: Bearer
|
||||
token_ref: CLAUDE_BOTTLE_OAUTH_TOKEN
|
||||
role: anthropic-base-url
|
||||
- path: /gitea/dideric/
|
||||
upstream: https://gitea.dideric.is
|
||||
auth_scheme: token
|
||||
token_ref: GITEA_TOKEN
|
||||
role: [git-insteadof, tea-login]
|
||||
git:
|
||||
- Name: claude-bottle
|
||||
Upstream: ssh://git@gitea.dideric.is:30009/x/y.git
|
||||
IdentityFile: ~/.ssh/gitea.pem
|
||||
ExtraHosts:
|
||||
gitea.dideric.is: 100.78.141.42
|
||||
egress:
|
||||
allowlist:
|
||||
- example.com
|
||||
""")
|
||||
# Spot-check the deep parts; the structure is large.
|
||||
self.assertEqual(2, len(out["cred_proxy"]["routes"]))
|
||||
self.assertEqual(
|
||||
["git-insteadof", "tea-login"],
|
||||
out["cred_proxy"]["routes"][1]["role"],
|
||||
)
|
||||
self.assertEqual(
|
||||
"100.78.141.42",
|
||||
out["git"][0]["ExtraHosts"]["gitea.dideric.is"],
|
||||
)
|
||||
self.assertEqual(["example.com"], out["egress"]["allowlist"])
|
||||
|
||||
|
||||
class TestFrontmatter(unittest.TestCase):
|
||||
def test_basic(self):
|
||||
text = textwrap.dedent("""
|
||||
---
|
||||
bottle: dev
|
||||
---
|
||||
This is the body.
|
||||
""").lstrip("\n")
|
||||
fm, body = parse_frontmatter(text)
|
||||
self.assertEqual({"bottle": "dev"}, fm)
|
||||
self.assertIn("This is the body", body)
|
||||
|
||||
def test_no_frontmatter_passes_through(self):
|
||||
text = "no frontmatter here\njust body\n"
|
||||
fm, body = parse_frontmatter(text)
|
||||
self.assertEqual({}, fm)
|
||||
self.assertEqual(text, body)
|
||||
|
||||
def test_unclosed_frontmatter_dies(self):
|
||||
with self.assertRaises(Die):
|
||||
parse_frontmatter("---\nbottle: dev\nno closing")
|
||||
|
||||
def test_body_preserves_blank_lines(self):
|
||||
text = (
|
||||
"---\n"
|
||||
"k: v\n"
|
||||
"---\n"
|
||||
"\n"
|
||||
"line one\n"
|
||||
"\n"
|
||||
"line three\n"
|
||||
)
|
||||
_, body = parse_frontmatter(text)
|
||||
self.assertEqual("\nline one\n\nline three\n", body)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user