fix(pipelock): validate yaml render config

This commit is contained in:
2026-06-02 08:14:48 +00:00
parent 9185c145a1
commit 6e954da9b7
3 changed files with 312 additions and 13 deletions
+195 -10
View File
@@ -19,7 +19,6 @@ from __future__ import annotations
from dataclasses import dataclass from dataclasses import dataclass
from pathlib import Path from pathlib import Path
from typing import cast
from .egress import EGRESS_HOSTNAME, EgressRoute, egress_routes_for_bottle from .egress import EGRESS_HOSTNAME, EgressRoute, egress_routes_for_bottle
from .supervise import SUPERVISE_HOSTNAME from .supervise import SUPERVISE_HOSTNAME
@@ -223,6 +222,180 @@ def pipelock_build_config(
return cfg return cfg
_PIPELOCK_TOP_LEVEL_KEYS = {
"version",
"mode",
"enforce",
"api_allowlist",
"seed_phrase_detection",
"forward_proxy",
"dlp",
"request_body_scanning",
"tls_interception",
"ssrf",
}
def _pipelock_render_error(section: str, key: str, expected: str) -> ValueError:
return ValueError(
f"pipelock_render_yaml: {section}.{key} must be {expected}"
)
def _reject_unknown_keys(
section: str,
obj: dict[str, object],
allowed: set[str],
) -> None:
for key in sorted(set(obj) - allowed):
raise ValueError(f"pipelock_render_yaml: {section}.{key} is unsupported")
def _required_dict(
obj: dict[str, object],
section: str,
key: str,
) -> dict[str, object]:
value = obj.get(key)
if not isinstance(value, dict):
raise _pipelock_render_error(section, key, "a mapping")
return value
def _required_bool(obj: dict[str, object], section: str, key: str) -> bool:
value = obj.get(key)
if not isinstance(value, bool):
raise _pipelock_render_error(section, key, "a boolean")
return value
def _required_int(obj: dict[str, object], section: str, key: str) -> int:
value = obj.get(key)
if isinstance(value, bool) or not isinstance(value, int):
raise _pipelock_render_error(section, key, "an integer")
return value
def _required_str(obj: dict[str, object], section: str, key: str) -> str:
value = obj.get(key)
if not isinstance(value, str):
raise _pipelock_render_error(section, key, "a string")
return value
def _required_str_list(
obj: dict[str, object],
section: str,
key: str,
) -> list[str]:
value = obj.get(key)
if not isinstance(value, list) or not all(isinstance(v, str) for v in value):
raise _pipelock_render_error(section, key, "a list of strings")
return value
def _optional_str_list(
obj: dict[str, object],
section: str,
key: str,
) -> list[str]:
if key not in obj:
return []
return _required_str_list(obj, section, key)
def _optional_bool(
obj: dict[str, object],
section: str,
key: str,
) -> bool | None:
if key not in obj:
return None
return _required_bool(obj, section, key)
def _optional_str(
obj: dict[str, object],
section: str,
key: str,
) -> str | None:
if key not in obj:
return None
return _required_str(obj, section, key)
def _validate_pipelock_render_config(cfg: dict[str, object]) -> dict[str, object]:
_reject_unknown_keys("config", cfg, _PIPELOCK_TOP_LEVEL_KEYS)
normalized: dict[str, object] = {
"version": _required_int(cfg, "config", "version"),
"mode": _required_str(cfg, "config", "mode"),
"enforce": _required_bool(cfg, "config", "enforce"),
"api_allowlist": _required_str_list(cfg, "config", "api_allowlist"),
}
if "seed_phrase_detection" in cfg:
spd = _required_dict(cfg, "config", "seed_phrase_detection")
_reject_unknown_keys("seed_phrase_detection", spd, {"enabled"})
normalized["seed_phrase_detection"] = {
"enabled": _required_bool(spd, "seed_phrase_detection", "enabled"),
}
fp = _required_dict(cfg, "config", "forward_proxy")
_reject_unknown_keys("forward_proxy", fp, {"enabled"})
normalized["forward_proxy"] = {
"enabled": _required_bool(fp, "forward_proxy", "enabled"),
}
dlp = _required_dict(cfg, "config", "dlp")
_reject_unknown_keys("dlp", dlp, {"include_defaults", "scan_env"})
normalized["dlp"] = {
"include_defaults": _required_bool(dlp, "dlp", "include_defaults"),
"scan_env": _required_bool(dlp, "dlp", "scan_env"),
}
rbs = _required_dict(cfg, "config", "request_body_scanning")
_reject_unknown_keys(
"request_body_scanning",
rbs,
{"action", "scan_headers", "header_mode"},
)
normalized_rbs: dict[str, object] = {
"action": _required_str(rbs, "request_body_scanning", "action"),
}
scan_headers = _optional_bool(rbs, "request_body_scanning", "scan_headers")
if scan_headers is not None:
normalized_rbs["scan_headers"] = scan_headers
header_mode = _optional_str(rbs, "request_body_scanning", "header_mode")
if header_mode is not None:
normalized_rbs["header_mode"] = header_mode
normalized["request_body_scanning"] = normalized_rbs
if "tls_interception" in cfg:
tls = _required_dict(cfg, "config", "tls_interception")
_reject_unknown_keys(
"tls_interception",
tls,
{"enabled", "ca_cert", "ca_key", "passthrough_domains"},
)
normalized["tls_interception"] = {
"enabled": _required_bool(tls, "tls_interception", "enabled"),
"ca_cert": _required_str(tls, "tls_interception", "ca_cert"),
"ca_key": _required_str(tls, "tls_interception", "ca_key"),
"passthrough_domains": _optional_str_list(
tls, "tls_interception", "passthrough_domains",
),
}
if "ssrf" in cfg:
ssrf = _required_dict(cfg, "config", "ssrf")
_reject_unknown_keys("ssrf", ssrf, {"ip_allowlist"})
normalized["ssrf"] = {
"ip_allowlist": _required_str_list(ssrf, "ssrf", "ip_allowlist"),
}
return normalized
def pipelock_render_yaml(cfg: dict[str, object]) -> str: def pipelock_render_yaml(cfg: dict[str, object]) -> str:
"""Render a pipelock config dict (as produced by """Render a pipelock config dict (as produced by
`pipelock_build_config`) as YAML. Hand-rolled so we don't take a `pipelock_build_config`) as YAML. Hand-rolled so we don't take a
@@ -230,31 +403,38 @@ def pipelock_render_yaml(cfg: dict[str, object]) -> str:
def _bool(b: object) -> str: def _bool(b: object) -> str:
return "true" if b else "false" return "true" if b else "false"
cfg = _validate_pipelock_render_config(cfg)
lines: list[str] = [] lines: list[str] = []
lines.append(f"version: {cfg['version']}") lines.append(f"version: {cfg['version']}")
lines.append(f"mode: {cfg['mode']}") lines.append(f"mode: {cfg['mode']}")
lines.append(f"enforce: {_bool(cfg['enforce'])}") lines.append(f"enforce: {_bool(cfg['enforce'])}")
lines.append("") lines.append("")
lines.append("api_allowlist:") lines.append("api_allowlist:")
for h in cast(list[str], cfg["api_allowlist"]): api_allowlist = cfg["api_allowlist"]
assert isinstance(api_allowlist, list)
for h in api_allowlist:
lines.append(f' - "{h}"') lines.append(f' - "{h}"')
lines.append("") lines.append("")
if "seed_phrase_detection" in cfg: if "seed_phrase_detection" in cfg:
lines.append("seed_phrase_detection:") lines.append("seed_phrase_detection:")
spd = cast(dict[str, object], cfg["seed_phrase_detection"]) spd = cfg["seed_phrase_detection"]
assert isinstance(spd, dict)
lines.append(f" enabled: {_bool(spd['enabled'])}") lines.append(f" enabled: {_bool(spd['enabled'])}")
lines.append("") lines.append("")
lines.append("forward_proxy:") lines.append("forward_proxy:")
fp = cast(dict[str, object], cfg["forward_proxy"]) fp = cfg["forward_proxy"]
assert isinstance(fp, dict)
lines.append(f" enabled: {_bool(fp['enabled'])}") lines.append(f" enabled: {_bool(fp['enabled'])}")
lines.append("") lines.append("")
lines.append("dlp:") lines.append("dlp:")
dlp = cast(dict[str, object], cfg["dlp"]) dlp = cfg["dlp"]
assert isinstance(dlp, dict)
lines.append(f" include_defaults: {_bool(dlp['include_defaults'])}") lines.append(f" include_defaults: {_bool(dlp['include_defaults'])}")
lines.append(f" scan_env: {_bool(dlp['scan_env'])}") lines.append(f" scan_env: {_bool(dlp['scan_env'])}")
lines.append("") lines.append("")
lines.append("request_body_scanning:") lines.append("request_body_scanning:")
rbs = cast(dict[str, object], cfg["request_body_scanning"]) rbs = cfg["request_body_scanning"]
assert isinstance(rbs, dict)
lines.append(f' action: "{rbs["action"]}"') lines.append(f' action: "{rbs["action"]}"')
if "scan_headers" in rbs: if "scan_headers" in rbs:
lines.append(f" scan_headers: {_bool(rbs['scan_headers'])}") lines.append(f" scan_headers: {_bool(rbs['scan_headers'])}")
@@ -263,11 +443,13 @@ def pipelock_render_yaml(cfg: dict[str, object]) -> str:
if "tls_interception" in cfg: if "tls_interception" in cfg:
lines.append("") lines.append("")
lines.append("tls_interception:") lines.append("tls_interception:")
tls = cast(dict[str, object], cfg["tls_interception"]) tls = cfg["tls_interception"]
assert isinstance(tls, dict)
lines.append(f" enabled: {_bool(tls['enabled'])}") lines.append(f" enabled: {_bool(tls['enabled'])}")
lines.append(f' ca_cert: "{tls["ca_cert"]}"') lines.append(f' ca_cert: "{tls["ca_cert"]}"')
lines.append(f' ca_key: "{tls["ca_key"]}"') lines.append(f' ca_key: "{tls["ca_key"]}"')
passthrough = cast(list[str], tls.get("passthrough_domains", [])) passthrough = tls["passthrough_domains"]
assert isinstance(passthrough, list)
if passthrough: if passthrough:
lines.append(" passthrough_domains:") lines.append(" passthrough_domains:")
for d in passthrough: for d in passthrough:
@@ -275,9 +457,12 @@ def pipelock_render_yaml(cfg: dict[str, object]) -> str:
if "ssrf" in cfg: if "ssrf" in cfg:
lines.append("") lines.append("")
lines.append("ssrf:") lines.append("ssrf:")
ssrf = cast(dict[str, object], cfg["ssrf"]) ssrf = cfg["ssrf"]
assert isinstance(ssrf, dict)
lines.append(" ip_allowlist:") lines.append(" ip_allowlist:")
for ip in cast(list[str], ssrf["ip_allowlist"]): ip_allowlist = ssrf["ip_allowlist"]
assert isinstance(ip_allowlist, list)
for ip in ip_allowlist:
lines.append(f' - "{ip}"') lines.append(f' - "{ip}"')
return "\n".join(lines) + "\n" return "\n".join(lines) + "\n"
@@ -73,6 +73,15 @@ rendering a section, validate that required keys exist with the expected
primitive/list/dict types. Missing or unsupported shapes should raise a clear primitive/list/dict types. Missing or unsupported shapes should raise a clear
`ValueError` naming the section and key. `ValueError` naming the section and key.
The supported top-level shape is `version`, `mode`, `enforce`,
`api_allowlist`, `seed_phrase_detection`, `forward_proxy`, `dlp`,
`request_body_scanning`, `tls_interception`, and `ssrf`. Required sections are
validated before rendering; optional sections keep the current omission
behavior. `request_body_scanning.scan_headers`,
`request_body_scanning.header_mode`, and
`tls_interception.passthrough_domains` remain optional for compatibility with
parsed running configs that only contain the older rendered subset.
Tests should cover both normal output and failure cases. Because the project is Tests should cover both normal output and failure cases. Because the project is
stdlib-only, semantic tests can use a small purpose-built parser for the exact stdlib-only, semantic tests can use a small purpose-built parser for the exact
rendered shape or compare rendered lines to values from the structured config rendered shape or compare rendered lines to values from the structured config
@@ -101,6 +110,4 @@ Run:
## Open Questions ## Open Questions
- Should malformed config errors be `ValueError`, matching current None.
`pipelock_build_config` validation, or a new internal exception type? Prefer
`ValueError` unless a caller needs to distinguish serializer errors.
+107
View File
@@ -19,6 +19,7 @@ from bot_bottle.pipelock import (
pipelock_build_config, pipelock_build_config,
pipelock_render_yaml, pipelock_render_yaml,
) )
from bot_bottle.yaml_subset import parse_yaml_subset
from tests.fixtures import fixture_minimal from tests.fixtures import fixture_minimal
@@ -158,6 +159,51 @@ class TestRenderAndWrite(unittest.TestCase):
import shutil import shutil
shutil.rmtree(self.out_dir, ignore_errors=True) shutil.rmtree(self.out_dir, ignore_errors=True)
def assert_render_semantics_match(self, cfg: dict[str, object]) -> None:
parsed = parse_yaml_subset(pipelock_render_yaml(cfg))
self.assertEqual(cfg["version"], parsed["version"])
self.assertEqual(cfg["mode"], parsed["mode"])
self.assertEqual(cfg["enforce"], parsed["enforce"])
parsed_allowlist = parsed["api_allowlist"]
if cfg["api_allowlist"] == [] and parsed_allowlist is None:
parsed_allowlist = []
self.assertEqual(cfg["api_allowlist"], parsed_allowlist)
self.assertEqual(cfg["forward_proxy"], parsed["forward_proxy"])
self.assertEqual(cfg["dlp"], parsed["dlp"])
self.assertEqual(
cfg["request_body_scanning"],
parsed["request_body_scanning"],
)
if "seed_phrase_detection" in cfg:
self.assertEqual(
cfg["seed_phrase_detection"],
parsed["seed_phrase_detection"],
)
else:
self.assertNotIn("seed_phrase_detection", parsed)
if "tls_interception" in cfg:
expected_tls = cast(dict[str, object], cfg["tls_interception"])
actual_tls = cast(dict[str, object], parsed["tls_interception"])
self.assertEqual(expected_tls["enabled"], actual_tls["enabled"])
self.assertEqual(expected_tls["ca_cert"], actual_tls["ca_cert"])
self.assertEqual(expected_tls["ca_key"], actual_tls["ca_key"])
expected_passthrough = expected_tls["passthrough_domains"]
if expected_passthrough:
self.assertEqual(
expected_passthrough,
actual_tls["passthrough_domains"],
)
else:
self.assertNotIn("passthrough_domains", actual_tls)
else:
self.assertNotIn("tls_interception", parsed)
if "ssrf" in cfg:
self.assertEqual(cfg["ssrf"], parsed["ssrf"])
else:
self.assertNotIn("ssrf", parsed)
def test_render_emits_required_top_level_keys(self): def test_render_emits_required_top_level_keys(self):
"""One render-level smoke check: the serialized YAML is plausibly """One render-level smoke check: the serialized YAML is plausibly
the shape pipelock expects. We don't grep every key here — that's the shape pipelock expects. We don't grep every key here — that's
@@ -175,6 +221,67 @@ class TestRenderAndWrite(unittest.TestCase):
self.assertNotIn("trusted_domains:", text) self.assertNotIn("trusted_domains:", text)
self.assertNotIn("ssrf:", text) self.assertNotIn("ssrf:", text)
def test_render_semantics_match_minimal_config(self):
cfg = pipelock_build_config(fixture_minimal().bottles["dev"])
self.assert_render_semantics_match(cfg)
def test_render_semantics_match_tls_with_empty_passthrough(self):
cfg = pipelock_build_config(
fixture_minimal().bottles["dev"],
ca_cert_path="/etc/pipelock-ca.pem",
ca_key_path="/etc/pipelock-ca-key.pem",
)
self.assert_render_semantics_match(cfg)
def test_render_semantics_match_all_optional_sections(self):
bottle = Manifest.from_json_obj({
"bottles": {"dev": {"egress": {"routes": [
{"host": "api.openai.com",
"pipelock": {"tls_passthrough": True}},
{"host": "gitea.dideric.is",
"pipelock": {"ssrf_ip_allowlist": ["100.78.141.42/32"]}},
]}}},
"agents": {"demo": {"skills": [], "prompt": "", "bottle": "dev"}},
}).bottles["dev"]
cfg = pipelock_build_config(
bottle,
ca_cert_path="/etc/pipelock-ca.pem",
ca_key_path="/etc/pipelock-ca-key.pem",
ssrf_ip_allowlist=("172.20.0.0/16",),
)
self.assert_render_semantics_match(cfg)
def test_render_rejects_missing_required_key(self):
cfg = pipelock_build_config(fixture_minimal().bottles["dev"])
del cfg["mode"]
with self.assertRaisesRegex(ValueError, r"config\.mode"):
pipelock_render_yaml(cfg)
def test_render_rejects_wrong_section_type(self):
cfg = pipelock_build_config(fixture_minimal().bottles["dev"])
cfg["dlp"] = []
with self.assertRaisesRegex(ValueError, r"config\.dlp.*mapping"):
pipelock_render_yaml(cfg)
def test_render_rejects_wrong_list_item_type(self):
cfg = pipelock_build_config(
fixture_minimal().bottles["dev"],
ca_cert_path="/etc/pipelock-ca.pem",
ca_key_path="/etc/pipelock-ca-key.pem",
)
tls = cast(dict[str, object], cfg["tls_interception"])
tls["passthrough_domains"] = ["api.openai.com", 3]
with self.assertRaisesRegex(
ValueError, r"tls_interception\.passthrough_domains",
):
pipelock_render_yaml(cfg)
def test_render_rejects_unsupported_top_level_section(self):
cfg = pipelock_build_config(fixture_minimal().bottles["dev"])
cfg["trusted_domains"] = []
with self.assertRaisesRegex(ValueError, r"config\.trusted_domains"):
pipelock_render_yaml(cfg)
def test_prepare_writes_file_at_mode_600(self): def test_prepare_writes_file_at_mode_600(self):
plan = PipelockProxy().prepare( plan = PipelockProxy().prepare(
fixture_minimal().bottles["dev"], "demo", self.out_dir fixture_minimal().bottles["dev"], "demo", self.out_dir