Compare commits
18 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 3ceff1ac4f | |||
| a397d37bbe | |||
| 37a780acf6 | |||
| e2514b3885 | |||
| a002d32779 | |||
| e8d8cf8a64 | |||
| 9470b8f955 | |||
| 249169eca1 | |||
| dede230c4a | |||
| c39d5dc63f | |||
| 4359bd6099 | |||
| f95eabeb86 | |||
| b872985a65 | |||
| a4e12855df | |||
| e0ecb7ceb1 | |||
| 41590ede1f | |||
| 963a178b20 | |||
| e9adcdd91d |
+36
-30
@@ -69,6 +69,12 @@ class YamlSubsetError(ValueError):
|
|||||||
egress sidecar's addon) handle it as a normal exception."""
|
egress sidecar's addon) handle it as a normal exception."""
|
||||||
|
|
||||||
|
|
||||||
|
def die(msg: str) -> None:
|
||||||
|
"""Module-local helper so the parser body reads cleanly. Just
|
||||||
|
raises YamlSubsetError — the `bot-bottle: error: ` prefix
|
||||||
|
is added by the boundary `die` in `bot_bottle.log`."""
|
||||||
|
raise YamlSubsetError(msg)
|
||||||
|
|
||||||
|
|
||||||
# --- Tokenizer / line preprocessing ----------------------------------------
|
# --- Tokenizer / line preprocessing ----------------------------------------
|
||||||
|
|
||||||
@@ -113,7 +119,7 @@ def _tokenize(text: str) -> list[_Line]:
|
|||||||
# editors render them differently and the spec says spaces.
|
# editors render them differently and the spec says spaces.
|
||||||
leading = len(raw) - len(raw.lstrip(" \t"))
|
leading = len(raw) - len(raw.lstrip(" \t"))
|
||||||
if "\t" in raw[:leading]:
|
if "\t" in raw[:leading]:
|
||||||
raise YamlSubsetError(f"yaml-subset: tab character in indent on line {n}")
|
die(f"yaml-subset: tab character in indent on line {n}")
|
||||||
stripped = raw.strip()
|
stripped = raw.strip()
|
||||||
if not stripped:
|
if not stripped:
|
||||||
continue
|
continue
|
||||||
@@ -163,14 +169,14 @@ def _parse_scalar(s: str, lineno: int) -> object:
|
|||||||
s.startswith("'") and s.endswith("'")
|
s.startswith("'") and s.endswith("'")
|
||||||
):
|
):
|
||||||
if len(s) < 2:
|
if len(s) < 2:
|
||||||
raise YamlSubsetError(f"yaml-subset: unterminated quoted string on line {lineno}")
|
die(f"yaml-subset: unterminated quoted string on line {lineno}")
|
||||||
body = s[1:-1]
|
body = s[1:-1]
|
||||||
if s.startswith('"'):
|
if s.startswith('"'):
|
||||||
# JSON-style escapes for double quotes.
|
# JSON-style escapes for double quotes.
|
||||||
try:
|
try:
|
||||||
return body.encode("utf-8").decode("unicode_escape")
|
return body.encode("utf-8").decode("unicode_escape")
|
||||||
except UnicodeDecodeError as e:
|
except UnicodeDecodeError as e:
|
||||||
raise YamlSubsetError(f"yaml-subset: bad escape on line {lineno}: {e}")
|
die(f"yaml-subset: bad escape on line {lineno}: {e}")
|
||||||
else:
|
else:
|
||||||
# Single quotes: only '' → ' (standard YAML); no other escapes.
|
# Single quotes: only '' → ' (standard YAML); no other escapes.
|
||||||
return body.replace("''", "'")
|
return body.replace("''", "'")
|
||||||
@@ -180,7 +186,7 @@ def _parse_scalar(s: str, lineno: int) -> object:
|
|||||||
if s in _RESERVED_BOOL_LIKE:
|
if s in _RESERVED_BOOL_LIKE:
|
||||||
if s in ("true", "false"):
|
if s in ("true", "false"):
|
||||||
return s == "true"
|
return s == "true"
|
||||||
raise YamlSubsetError(
|
die(
|
||||||
f"yaml-subset: bare {s!r} on line {lineno} is ambiguous "
|
f"yaml-subset: bare {s!r} on line {lineno} is ambiguous "
|
||||||
f"(use literal `true` / `false`, or quote it as a string)"
|
f"(use literal `true` / `false`, or quote it as a string)"
|
||||||
)
|
)
|
||||||
@@ -197,22 +203,22 @@ def _parse_scalar(s: str, lineno: int) -> object:
|
|||||||
|
|
||||||
# Look-alikes that we reject to keep the user in control.
|
# Look-alikes that we reject to keep the user in control.
|
||||||
if _DATE_RX.match(s):
|
if _DATE_RX.match(s):
|
||||||
raise YamlSubsetError(
|
die(
|
||||||
f"yaml-subset: bare {s!r} on line {lineno} looks like a "
|
f"yaml-subset: bare {s!r} on line {lineno} looks like a "
|
||||||
f"date — quote it as a string or use an explicit int"
|
f"date — quote it as a string or use an explicit int"
|
||||||
)
|
)
|
||||||
if _OCTAL_RX.match(s):
|
if _OCTAL_RX.match(s):
|
||||||
raise YamlSubsetError(
|
die(
|
||||||
f"yaml-subset: bare {s!r} on line {lineno} looks like an "
|
f"yaml-subset: bare {s!r} on line {lineno} looks like an "
|
||||||
f"octal/0-prefixed integer — quote it as a string"
|
f"octal/0-prefixed integer — quote it as a string"
|
||||||
)
|
)
|
||||||
if _HEX_RX.match(s):
|
if _HEX_RX.match(s):
|
||||||
raise YamlSubsetError(
|
die(
|
||||||
f"yaml-subset: bare {s!r} on line {lineno} looks like a "
|
f"yaml-subset: bare {s!r} on line {lineno} looks like a "
|
||||||
f"hex integer — quote it as a string"
|
f"hex integer — quote it as a string"
|
||||||
)
|
)
|
||||||
if _FLOAT_RX.match(s):
|
if _FLOAT_RX.match(s):
|
||||||
raise YamlSubsetError(
|
die(
|
||||||
f"yaml-subset: floats not supported (line {lineno}, "
|
f"yaml-subset: floats not supported (line {lineno}, "
|
||||||
f"value {s!r}); use an int or quote as a string"
|
f"value {s!r}); use an int or quote as a string"
|
||||||
)
|
)
|
||||||
@@ -235,7 +241,7 @@ def _parse_inline(s: str, lineno: int) -> object:
|
|||||||
s = s.strip()
|
s = s.strip()
|
||||||
if s.startswith("["):
|
if s.startswith("["):
|
||||||
if not s.endswith("]"):
|
if not s.endswith("]"):
|
||||||
raise YamlSubsetError(f"yaml-subset: unterminated `[` on line {lineno}")
|
die(f"yaml-subset: unterminated `[` on line {lineno}")
|
||||||
body = s[1:-1].strip()
|
body = s[1:-1].strip()
|
||||||
if not body:
|
if not body:
|
||||||
return []
|
return []
|
||||||
@@ -246,21 +252,21 @@ def _parse_inline(s: str, lineno: int) -> object:
|
|||||||
return items
|
return items
|
||||||
if s.startswith("{"):
|
if s.startswith("{"):
|
||||||
if not s.endswith("}"):
|
if not s.endswith("}"):
|
||||||
raise YamlSubsetError(f"yaml-subset: unterminated `{{` on line {lineno}")
|
die(f"yaml-subset: unterminated `{{` on line {lineno}")
|
||||||
body = s[1:-1].strip()
|
body = s[1:-1].strip()
|
||||||
if not body:
|
if not body:
|
||||||
return {}
|
return {}
|
||||||
out: dict[str, object] = {}
|
out: dict[str, object] = {}
|
||||||
for raw in _split_flow(body, lineno, "dict"):
|
for raw in _split_flow(body, lineno, "dict"):
|
||||||
if ":" not in raw:
|
if ":" not in raw:
|
||||||
raise YamlSubsetError(
|
die(
|
||||||
f"yaml-subset: inline dict entry on line {lineno} "
|
f"yaml-subset: inline dict entry on line {lineno} "
|
||||||
f"missing `:` ({raw!r})"
|
f"missing `:` ({raw!r})"
|
||||||
)
|
)
|
||||||
k, _, v = raw.partition(":")
|
k, _, v = raw.partition(":")
|
||||||
k = k.strip()
|
k = k.strip()
|
||||||
if not _BARE_RX.match(k):
|
if not _BARE_RX.match(k):
|
||||||
raise YamlSubsetError(
|
die(
|
||||||
f"yaml-subset: inline dict key on line {lineno} "
|
f"yaml-subset: inline dict key on line {lineno} "
|
||||||
f"must be a bare identifier ({k!r})"
|
f"must be a bare identifier ({k!r})"
|
||||||
)
|
)
|
||||||
@@ -290,7 +296,7 @@ def _split_flow(body: str, lineno: int, kind: str) -> list[str]:
|
|||||||
elif ch in "]}":
|
elif ch in "]}":
|
||||||
depth_b -= 1
|
depth_b -= 1
|
||||||
if depth_b > 0:
|
if depth_b > 0:
|
||||||
raise YamlSubsetError(
|
die(
|
||||||
f"yaml-subset: nested flow {kind} on line "
|
f"yaml-subset: nested flow {kind} on line "
|
||||||
f"{lineno} (only one level of flow allowed)"
|
f"{lineno} (only one level of flow allowed)"
|
||||||
)
|
)
|
||||||
@@ -324,7 +330,7 @@ def _split_key_value(content: str, lineno: int) -> tuple[str, str]:
|
|||||||
# ambiguous with URLs etc.).
|
# ambiguous with URLs etc.).
|
||||||
if i + 1 >= len(content) or content[i + 1] in (" ", "\t"):
|
if i + 1 >= len(content) or content[i + 1] in (" ", "\t"):
|
||||||
return content[:i].strip(), content[i + 1:].lstrip()
|
return content[:i].strip(), content[i + 1:].lstrip()
|
||||||
raise YamlSubsetError(f"yaml-subset: line {lineno} missing `: ` separator: {content!r}")
|
die(f"yaml-subset: line {lineno} missing `: ` separator: {content!r}")
|
||||||
return "", "" # unreachable, but needed for type checker
|
return "", "" # unreachable, but needed for type checker
|
||||||
|
|
||||||
|
|
||||||
@@ -335,15 +341,15 @@ def _parse_block(
|
|||||||
to live at `base_indent`. Returns (value, new_idx) where
|
to live at `base_indent`. Returns (value, new_idx) where
|
||||||
`new_idx` is the index of the first unconsumed line."""
|
`new_idx` is the index of the first unconsumed line."""
|
||||||
if idx >= len(lines):
|
if idx >= len(lines):
|
||||||
raise YamlSubsetError("yaml-subset: unexpected end of document")
|
die("yaml-subset: unexpected end of document")
|
||||||
first = lines[idx]
|
first = lines[idx]
|
||||||
if first.indent < base_indent:
|
if first.indent < base_indent:
|
||||||
raise YamlSubsetError(
|
die(
|
||||||
f"yaml-subset: line {first.lineno} indented less than "
|
f"yaml-subset: line {first.lineno} indented less than "
|
||||||
f"expected (got {first.indent}, expected >= {base_indent})"
|
f"expected (got {first.indent}, expected >= {base_indent})"
|
||||||
)
|
)
|
||||||
if first.indent > base_indent:
|
if first.indent > base_indent:
|
||||||
raise YamlSubsetError(
|
die(
|
||||||
f"yaml-subset: line {first.lineno} indented more than "
|
f"yaml-subset: line {first.lineno} indented more than "
|
||||||
f"expected (got {first.indent}, expected {base_indent})"
|
f"expected (got {first.indent}, expected {base_indent})"
|
||||||
)
|
)
|
||||||
@@ -360,18 +366,18 @@ def _parse_block_mapping(
|
|||||||
while idx < len(lines) and lines[idx].indent == base_indent:
|
while idx < len(lines) and lines[idx].indent == base_indent:
|
||||||
line = lines[idx]
|
line = lines[idx]
|
||||||
if line.content.startswith("- "):
|
if line.content.startswith("- "):
|
||||||
raise YamlSubsetError(
|
die(
|
||||||
f"yaml-subset: line {line.lineno} unexpected list "
|
f"yaml-subset: line {line.lineno} unexpected list "
|
||||||
f"item at mapping indent (got `-`, expected `key:`)"
|
f"item at mapping indent (got `-`, expected `key:`)"
|
||||||
)
|
)
|
||||||
key, value_text = _split_key_value(line.content, line.lineno)
|
key, value_text = _split_key_value(line.content, line.lineno)
|
||||||
if not _BARE_RX.match(key):
|
if not _BARE_RX.match(key):
|
||||||
raise YamlSubsetError(
|
die(
|
||||||
f"yaml-subset: line {line.lineno} key {key!r} is not "
|
f"yaml-subset: line {line.lineno} key {key!r} is not "
|
||||||
f"a bare identifier"
|
f"a bare identifier"
|
||||||
)
|
)
|
||||||
if key in out:
|
if key in out:
|
||||||
raise YamlSubsetError(
|
die(
|
||||||
f"yaml-subset: line {line.lineno} duplicate key {key!r}"
|
f"yaml-subset: line {line.lineno} duplicate key {key!r}"
|
||||||
)
|
)
|
||||||
if value_text:
|
if value_text:
|
||||||
@@ -411,7 +417,7 @@ def _parse_block_list(
|
|||||||
content_col = base_indent + 2
|
content_col = base_indent + 2
|
||||||
first_key, first_value_text = _split_key_value(rest, line.lineno)
|
first_key, first_value_text = _split_key_value(rest, line.lineno)
|
||||||
if not _BARE_RX.match(first_key):
|
if not _BARE_RX.match(first_key):
|
||||||
raise YamlSubsetError(
|
die(
|
||||||
f"yaml-subset: line {line.lineno} key {first_key!r} "
|
f"yaml-subset: line {line.lineno} key {first_key!r} "
|
||||||
f"is not a bare identifier"
|
f"is not a bare identifier"
|
||||||
)
|
)
|
||||||
@@ -434,12 +440,12 @@ def _parse_block_list(
|
|||||||
break # next list item, not a sibling key
|
break # next list item, not a sibling key
|
||||||
k, v_text = _split_key_value(ln.content, ln.lineno)
|
k, v_text = _split_key_value(ln.content, ln.lineno)
|
||||||
if not _BARE_RX.match(k):
|
if not _BARE_RX.match(k):
|
||||||
raise YamlSubsetError(
|
die(
|
||||||
f"yaml-subset: line {ln.lineno} key {k!r} is "
|
f"yaml-subset: line {ln.lineno} key {k!r} is "
|
||||||
f"not a bare identifier"
|
f"not a bare identifier"
|
||||||
)
|
)
|
||||||
if k in item:
|
if k in item:
|
||||||
raise YamlSubsetError(f"yaml-subset: line {ln.lineno} duplicate key {k!r}")
|
die(f"yaml-subset: line {ln.lineno} duplicate key {k!r}")
|
||||||
if v_text:
|
if v_text:
|
||||||
item[k] = _parse_inline(v_text, ln.lineno)
|
item[k] = _parse_inline(v_text, ln.lineno)
|
||||||
idx += 1
|
idx += 1
|
||||||
@@ -495,7 +501,7 @@ def parse_yaml_subset(text: str) -> dict[str, object]:
|
|||||||
for n, raw in enumerate(text.splitlines(), start=1):
|
for n, raw in enumerate(text.splitlines(), start=1):
|
||||||
s = raw.strip()
|
s = raw.strip()
|
||||||
if s.startswith("|") or s.startswith(">") or s.startswith("- |") or s.startswith("- >"):
|
if s.startswith("|") or s.startswith(">") or s.startswith("- |") or s.startswith("- >"):
|
||||||
raise YamlSubsetError(
|
die(
|
||||||
f"yaml-subset: line {n} uses a multi-line block "
|
f"yaml-subset: line {n} uses a multi-line block "
|
||||||
f"scalar (`|` / `>`) — not supported. Use a quoted "
|
f"scalar (`|` / `>`) — not supported. Use a quoted "
|
||||||
f"single-line string instead."
|
f"single-line string instead."
|
||||||
@@ -505,12 +511,12 @@ def parse_yaml_subset(text: str) -> dict[str, object]:
|
|||||||
# not when it's inside a quoted string. Cheap check: any
|
# not when it's inside a quoted string. Cheap check: any
|
||||||
# bare `&foo:` / `*foo` at the start of a value position.
|
# bare `&foo:` / `*foo` at the start of a value position.
|
||||||
if re.search(r"(^|\s)[&*][A-Za-z0-9_]+", s):
|
if re.search(r"(^|\s)[&*][A-Za-z0-9_]+", s):
|
||||||
raise YamlSubsetError(
|
die(
|
||||||
f"yaml-subset: line {n} uses anchors / aliases "
|
f"yaml-subset: line {n} uses anchors / aliases "
|
||||||
f"(`&` / `*`) — not supported."
|
f"(`&` / `*`) — not supported."
|
||||||
)
|
)
|
||||||
if "!!" in s and not (s.count("'") % 2 or s.count('"') % 2):
|
if "!!" in s and not (s.count("'") % 2 or s.count('"') % 2):
|
||||||
raise YamlSubsetError(
|
die(
|
||||||
f"yaml-subset: line {n} uses a YAML tag (`!!`) — not "
|
f"yaml-subset: line {n} uses a YAML tag (`!!`) — not "
|
||||||
f"supported."
|
f"supported."
|
||||||
)
|
)
|
||||||
@@ -520,18 +526,18 @@ def parse_yaml_subset(text: str) -> dict[str, object]:
|
|||||||
return {}
|
return {}
|
||||||
base_indent = lines[0].indent
|
base_indent = lines[0].indent
|
||||||
if base_indent != 0:
|
if base_indent != 0:
|
||||||
raise YamlSubsetError(
|
die(
|
||||||
f"yaml-subset: top-level content must start in column 0 "
|
f"yaml-subset: top-level content must start in column 0 "
|
||||||
f"(got column {base_indent} on line {lines[0].lineno})"
|
f"(got column {base_indent} on line {lines[0].lineno})"
|
||||||
)
|
)
|
||||||
value, consumed = _parse_block(lines, 0, 0)
|
value, consumed = _parse_block(lines, 0, 0)
|
||||||
if consumed < len(lines):
|
if consumed < len(lines):
|
||||||
raise YamlSubsetError(
|
die(
|
||||||
f"yaml-subset: trailing content starting on line "
|
f"yaml-subset: trailing content starting on line "
|
||||||
f"{lines[consumed].lineno}"
|
f"{lines[consumed].lineno}"
|
||||||
)
|
)
|
||||||
if not isinstance(value, dict):
|
if not isinstance(value, dict):
|
||||||
raise YamlSubsetError("yaml-subset: top-level value must be a mapping")
|
die("yaml-subset: top-level value must be a mapping")
|
||||||
return cast(dict[str, object], value)
|
return cast(dict[str, object], value)
|
||||||
|
|
||||||
|
|
||||||
@@ -570,7 +576,7 @@ def parse_frontmatter(text: str) -> tuple[dict[str, object], str]:
|
|||||||
fm_end_lineno = line_idx
|
fm_end_lineno = line_idx
|
||||||
break
|
break
|
||||||
if body_start < 0:
|
if body_start < 0:
|
||||||
raise YamlSubsetError("frontmatter: opening `---` has no matching closing `---`")
|
die("frontmatter: opening `---` has no matching closing `---`")
|
||||||
|
|
||||||
fm_text = text[line_starts[1]:line_starts[fm_end_lineno]] if fm_end_lineno > 1 else ""
|
fm_text = text[line_starts[1]:line_starts[fm_end_lineno]] if fm_end_lineno > 1 else ""
|
||||||
fm = parse_yaml_subset(fm_text)
|
fm = parse_yaml_subset(fm_text)
|
||||||
|
|||||||
Reference in New Issue
Block a user