feat(egress): implement PRD 0053 — DLP addon with Gateway API matches
Replace path_allowlist with Gateway API HTTPRoute match vocabulary (paths, methods, headers with AND/OR semantics) and add DLP scanning to the egress proxy: - Token pattern detection (AWS, GitHub, Anthropic, OpenAI, Stripe, JWT) - Known secret detection (EGRESS_TOKEN_* with base64/URL/hex variants) - Naive prompt injection detection (disclosure + credential, jailbreak) - Per-route DLP configuration via manifest dlp block - Inbound response scanning with block/warn severity Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,16 +1,9 @@
|
||||
"""Host-side helper to apply a routes.yaml change to a running
|
||||
egress sidecar (PRD 0014 retargeted by PRD 0017 chunk 3).
|
||||
egress sidecar (PRD 0014 retargeted by PRD 0017 chunk 3, PRD 0053).
|
||||
|
||||
Used by the supervise dashboard when the operator approves an
|
||||
egress-block proposal (or runs the operator-initiated
|
||||
`routes edit <bottle>` verb). Fetches the current routes.yaml via
|
||||
`docker exec cat`, validates the new content, writes it into the
|
||||
sidecar via `docker cp`, then `docker kill --signal HUP` to make
|
||||
the addon reload without dropping connections.
|
||||
|
||||
Raises EgressApplyError on any failure — the dashboard
|
||||
surfaces the message and keeps the proposal pending so the
|
||||
operator can retry.
|
||||
egress-block proposal. Fetches current routes.yaml, validates,
|
||||
writes into the sidecar, then SIGHUPs to reload.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -29,9 +22,7 @@ from .sidecar_bundle import sidecar_bundle_container_name
|
||||
|
||||
def _render_routes_payload(routes_list: list[dict[str, object]]) -> str:
|
||||
"""Render a list-of-dicts routes payload as YAML matching the
|
||||
shape `egress_render_routes` produces. The apply path
|
||||
round-trips current routes.yaml through this so the file the
|
||||
sidecar sees stays in the YAML format the addon expects."""
|
||||
shape `egress_render_routes` produces."""
|
||||
if not routes_list:
|
||||
return "routes: []\n"
|
||||
lines: list[str] = ["routes:"]
|
||||
@@ -43,31 +34,42 @@ def _render_routes_payload(routes_list: list[dict[str, object]]) -> str:
|
||||
if auth_scheme and token_env:
|
||||
lines.append(f' auth_scheme: "{auth_scheme}"')
|
||||
lines.append(f' token_env: "{token_env}"')
|
||||
paths_obj = entry.get("path_allowlist")
|
||||
paths = cast(list[str], paths_obj) if isinstance(paths_obj, list) else []
|
||||
if paths:
|
||||
lines.append(" path_allowlist:")
|
||||
for p in paths:
|
||||
lines.append(f' - "{p}"')
|
||||
matches_obj = entry.get("matches")
|
||||
if isinstance(matches_obj, list) and matches_obj:
|
||||
lines.append(" matches:")
|
||||
for match_entry in matches_obj:
|
||||
me = cast(dict[str, object], match_entry)
|
||||
first_key = True
|
||||
if "paths" in me:
|
||||
lines.append(" - paths:")
|
||||
first_key = False
|
||||
for pd in cast(list[dict[str, str]], me["paths"]):
|
||||
if "type" in pd:
|
||||
lines.append(f' - type: "{pd["type"]}"')
|
||||
lines.append(f' value: "{pd["value"]}"')
|
||||
else:
|
||||
lines.append(f' - value: "{pd["value"]}"')
|
||||
if "methods" in me:
|
||||
methods_str = ", ".join(
|
||||
f'"{m}"' for m in cast(list[str], me["methods"])
|
||||
)
|
||||
prefix = " - " if first_key else " "
|
||||
lines.append(f'{prefix}methods: [{methods_str}]')
|
||||
first_key = False
|
||||
if first_key:
|
||||
lines.append(" - {}")
|
||||
return "\n".join(lines) + "\n"
|
||||
|
||||
|
||||
def _egress_routes_host_path(slug: str) -> Path:
|
||||
"""The bind-mount source for the egress sidecar's routes.yaml.
|
||||
Must match what egress.prepare wrote at chunk-2 paths."""
|
||||
return egress_state_dir(slug) / "egress_routes.yaml"
|
||||
|
||||
|
||||
class EgressApplyError(RuntimeError):
|
||||
"""Raised when fetch / apply fails. Caller renders to the
|
||||
operator; does not crash the dashboard."""
|
||||
pass
|
||||
|
||||
|
||||
def fetch_current_routes(slug: str) -> str:
|
||||
"""Read the live routes.yaml from the running egress sidecar
|
||||
for `slug`. Returns the file content as a string. Raises
|
||||
EgressApplyError if the sidecar isn't reachable or the read
|
||||
fails."""
|
||||
container = sidecar_bundle_container_name(slug)
|
||||
r = subprocess.run(
|
||||
["docker", "exec", container, "cat", EGRESS_ROUTES_IN_CONTAINER],
|
||||
@@ -82,9 +84,6 @@ def fetch_current_routes(slug: str) -> str:
|
||||
|
||||
|
||||
def validate_routes_content(content: str) -> None:
|
||||
"""Syntactic check before SIGHUP — the addon's reload also
|
||||
validates, but failing here keeps the old routes live and gives
|
||||
the operator a clearer error than the addon's stderr line."""
|
||||
try:
|
||||
load_routes(content)
|
||||
except ValueError as e:
|
||||
@@ -94,29 +93,10 @@ def validate_routes_content(content: str) -> None:
|
||||
|
||||
|
||||
def apply_routes_change(slug: str, new_content: str) -> tuple[str, str]:
|
||||
"""Apply `new_content` to the egress sidecar for `slug`:
|
||||
1. Fetch current routes.yaml (for the before-diff).
|
||||
2. Validate the new content via the addon's own parser.
|
||||
3. Write to the bind-mount source path.
|
||||
4. `docker kill --signal HUP` so the addon reloads.
|
||||
|
||||
Returns (before, after) where `after` == `new_content`. Raises
|
||||
EgressApplyError on any step."""
|
||||
container = sidecar_bundle_container_name(slug)
|
||||
before = fetch_current_routes(slug)
|
||||
validate_routes_content(new_content)
|
||||
|
||||
# routes.yaml is bind-mounted into the egress container as a
|
||||
# SINGLE FILE. Docker single-file bind mounts pin the source
|
||||
# inode at mount time; write-temp-then-rename swaps the inode
|
||||
# on the host, which leaves the container's mount pointing at
|
||||
# the now-orphaned old inode (so the SIGHUP'd reload re-reads
|
||||
# unchanged content). Write in-place instead. Lose file-level
|
||||
# atomicity, but the apply path issues SIGHUP only AFTER the
|
||||
# write returns, and the addon's `load_routes` raises
|
||||
# `ValueError` on a partial read and keeps the previous
|
||||
# in-memory routes — so a SIGHUP that hypothetically raced an
|
||||
# in-flight write is non-disruptive.
|
||||
target = _egress_routes_host_path(slug)
|
||||
target.parent.mkdir(parents=True, exist_ok=True)
|
||||
target.write_text(new_content)
|
||||
@@ -137,22 +117,12 @@ def apply_routes_change(slug: str, new_content: str) -> tuple[str, str]:
|
||||
def _merge_single_route(
|
||||
current_yaml: str, new_route: dict[str, object],
|
||||
) -> str:
|
||||
"""Merge a single proposed route into the current routes.yaml
|
||||
content, returning the merged YAML string.
|
||||
"""Merge a single proposed route into the current routes.yaml.
|
||||
|
||||
Behavior:
|
||||
- If `new_route['host']` is NOT in the current routes →
|
||||
append the route.
|
||||
- If the host IS already present → union the path_allowlist
|
||||
entries (proposed ∪ existing). The existing `auth_scheme`
|
||||
and `token_env` are preserved — agent-proposed auth changes
|
||||
on an existing host are ignored, matching the tool's
|
||||
documented semantics.
|
||||
|
||||
Round-trips the file through `yaml_subset` (the same parser
|
||||
the addon uses), so the merged output is in the YAML format
|
||||
the sidecar reads. Token VALUES never appear here; the routes
|
||||
file carries only env-var slot NAMES."""
|
||||
- Host absent → append the route.
|
||||
- Host present → union the match paths (proposed ∪ existing).
|
||||
Auth is preserved from existing route.
|
||||
"""
|
||||
try:
|
||||
cfg = parse_yaml_subset(current_yaml)
|
||||
except YamlSubsetError as e:
|
||||
@@ -172,37 +142,58 @@ def _merge_single_route(
|
||||
"proposed route is missing 'host'"
|
||||
)
|
||||
|
||||
proposed_paths_obj = new_route.get("path_allowlist")
|
||||
proposed_paths = cast(list[str], proposed_paths_obj) if isinstance(proposed_paths_obj, list) else []
|
||||
# Build proposed matches from the input
|
||||
proposed_matches = new_route.get("matches")
|
||||
if proposed_matches is None:
|
||||
# Accept legacy path_allowlist from agent proposals and convert
|
||||
proposed_paths = new_route.get("path_allowlist")
|
||||
if isinstance(proposed_paths, list) and proposed_paths:
|
||||
proposed_matches = [{"paths": [{"value": p} for p in proposed_paths]}]
|
||||
|
||||
# Look for an existing entry with the same host (case-insensitive).
|
||||
for entry in routes_typed:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
entry_typed = cast(dict[str, object], entry)
|
||||
if str(entry_typed.get("host", "")).lower() == new_host:
|
||||
# Merge path_allowlist: union proposed + existing, ordered
|
||||
# by first-seen so existing paths stay in original order.
|
||||
existing_paths_obj = entry_typed.get("path_allowlist")
|
||||
existing_paths = cast(list[str], existing_paths_obj) if isinstance(existing_paths_obj, list) else []
|
||||
seen = {p: None for p in existing_paths}
|
||||
for p in proposed_paths:
|
||||
seen.setdefault(p, None)
|
||||
merged_paths = list(seen.keys())
|
||||
if merged_paths:
|
||||
entry_typed["path_allowlist"] = merged_paths
|
||||
# Preserve existing auth — tool description says agent-
|
||||
# proposed auth on an existing host is ignored.
|
||||
# Merge matches: union path values from proposed into existing
|
||||
if isinstance(proposed_matches, list) and proposed_matches:
|
||||
existing_matches = entry_typed.get("matches")
|
||||
if not isinstance(existing_matches, list):
|
||||
existing_matches = []
|
||||
# Simple merge: collect all existing path values, add new ones
|
||||
existing_paths: set[str] = set()
|
||||
for me in existing_matches:
|
||||
me_typed = cast(dict[str, object], me) if isinstance(me, dict) else {}
|
||||
paths = me_typed.get("paths")
|
||||
if isinstance(paths, list):
|
||||
for p in paths:
|
||||
p_typed = cast(dict[str, object], p) if isinstance(p, dict) else {}
|
||||
val = p_typed.get("value")
|
||||
if isinstance(val, str):
|
||||
existing_paths.add(val)
|
||||
new_paths: list[str] = []
|
||||
for me in proposed_matches:
|
||||
me_typed = cast(dict[str, object], me) if isinstance(me, dict) else {}
|
||||
paths = me_typed.get("paths")
|
||||
if isinstance(paths, list):
|
||||
for p in paths:
|
||||
p_typed = cast(dict[str, object], p) if isinstance(p, dict) else {}
|
||||
val = p_typed.get("value")
|
||||
if isinstance(val, str) and val not in existing_paths:
|
||||
new_paths.append(val)
|
||||
existing_paths.add(val)
|
||||
if new_paths:
|
||||
if not isinstance(existing_matches, list):
|
||||
existing_matches = []
|
||||
existing_matches.append(
|
||||
{"paths": [{"value": p} for p in new_paths]}
|
||||
)
|
||||
entry_typed["matches"] = existing_matches
|
||||
break
|
||||
else:
|
||||
# Host not present; build a new route entry from the
|
||||
# proposed fields. Need to assign a token_env slot if
|
||||
# `auth` was proposed (otherwise the addon's parser rejects
|
||||
# a half-set auth pair). Slots: count existing slots, pick
|
||||
# the next free index.
|
||||
entry_typed: dict[str, object] = {"host": new_route.get("host")} # type: ignore
|
||||
if proposed_paths:
|
||||
entry_typed["path_allowlist"] = proposed_paths
|
||||
if isinstance(proposed_matches, list) and proposed_matches:
|
||||
entry_typed["matches"] = proposed_matches
|
||||
auth = new_route.get("auth")
|
||||
if isinstance(auth, dict) and auth.get("scheme") and auth.get("token_ref"): # type: ignore
|
||||
auth_typed = cast(dict[str, object], auth)
|
||||
@@ -222,10 +213,6 @@ def _merge_single_route(
|
||||
|
||||
|
||||
def add_route(slug: str, proposed_route_json: str) -> tuple[str, str]:
|
||||
"""Apply a single-route addition to the egress. Parses the
|
||||
agent's proposed route, fetches the current routes file, merges,
|
||||
and applies via `apply_routes_change`. Returns (before, after)
|
||||
full-file content for the audit log."""
|
||||
try:
|
||||
proposed = json.loads(proposed_route_json)
|
||||
except json.JSONDecodeError as e:
|
||||
|
||||
@@ -0,0 +1,146 @@
|
||||
"""DLP detectors for the egress proxy (PRD 0053).
|
||||
|
||||
Pure Python, no mitmproxy dependency. Each detector is a module-level
|
||||
function returning `ScanResult | None`.
|
||||
|
||||
Ships flat into the sidecar bundle image alongside
|
||||
`egress_addon_core.py` — both this file and the package source use
|
||||
the same try/except import shim pattern.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import re
|
||||
import typing
|
||||
from urllib.parse import quote as url_quote
|
||||
|
||||
try:
|
||||
from egress_addon_core import ScanResult # type: ignore[import-not-found]
|
||||
except ImportError: # pragma: no cover - host-side path
|
||||
from .egress_addon_core import ScanResult
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Token patterns detector (Phase 1a)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
TOKEN_PATTERNS: tuple[tuple[str, re.Pattern[str]], ...] = (
|
||||
("AWS access key", re.compile(r"AKIA[0-9A-Z]{16}")),
|
||||
("GitHub token (classic)", re.compile(r"ghp_[A-Za-z0-9_]{36}")),
|
||||
("GitHub fine-grained token", re.compile(r"github_pat_[A-Za-z0-9_]{82}")),
|
||||
("Anthropic API key", re.compile(r"sk-ant-[A-Za-z0-9\-_]{93}")),
|
||||
("OpenAI API key", re.compile(r"sk-[A-Za-z0-9]{48}")),
|
||||
("Stripe live key", re.compile(r"sk_live_[A-Za-z0-9]{24}")),
|
||||
("Generic Bearer JWT", re.compile(r"Bearer\s+[A-Za-z0-9._\-]{50,}")),
|
||||
)
|
||||
|
||||
|
||||
def scan_token_patterns(text: str) -> ScanResult | None:
|
||||
for name, pattern in TOKEN_PATTERNS:
|
||||
if pattern.search(text):
|
||||
return ScanResult(
|
||||
severity="block",
|
||||
reason=f"outbound request contains {name}",
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Known secrets detector (Phase 1b)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _encoded_variants(secret: str) -> list[str]:
|
||||
"""Return the secret plus base64, URL-encoded, and hex variants."""
|
||||
variants = [secret]
|
||||
secret_bytes = secret.encode("utf-8")
|
||||
b64 = base64.b64encode(secret_bytes).decode("ascii")
|
||||
if b64 != secret:
|
||||
variants.append(b64)
|
||||
url_enc = url_quote(secret, safe="")
|
||||
if url_enc != secret:
|
||||
variants.append(url_enc)
|
||||
hex_enc = secret_bytes.hex()
|
||||
if hex_enc != secret:
|
||||
variants.append(hex_enc)
|
||||
return variants
|
||||
|
||||
|
||||
def scan_known_secrets(
|
||||
text: str,
|
||||
*,
|
||||
env: typing.Mapping[str, str] | None = None,
|
||||
) -> ScanResult | None:
|
||||
if env is None:
|
||||
return None
|
||||
for key, value in env.items():
|
||||
if not key.startswith("EGRESS_TOKEN_") or not value:
|
||||
continue
|
||||
for variant in _encoded_variants(value):
|
||||
if variant in text:
|
||||
return ScanResult(
|
||||
severity="block",
|
||||
reason=(
|
||||
f"outbound request contains provisioned secret "
|
||||
f"from {key}"
|
||||
),
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Naive prompt injection detector (Phase 2)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
DISCLOSURE_PHRASES: tuple[re.Pattern[str], ...] = (
|
||||
re.compile(r"(?i)system\s+prompt"),
|
||||
re.compile(r"(?i)my\s+instructions\s+are"),
|
||||
re.compile(r"(?i)original\s+instructions"),
|
||||
re.compile(r"(?i)secret\s+instructions"),
|
||||
re.compile(r"(?i)hidden\s+rules"),
|
||||
)
|
||||
|
||||
JAILBREAK_PHRASES: tuple[re.Pattern[str], ...] = (
|
||||
re.compile(r"(?i)ignore\s+previous"),
|
||||
re.compile(r"(?i)forget\s+everything"),
|
||||
re.compile(r"(?i)disregard\s+(?:all\s+)?(?:previous|prior)"),
|
||||
re.compile(r"(?i)pretend\s+you\s+are"),
|
||||
re.compile(r"(?i)act\s+as\s+(?:if|though)"),
|
||||
)
|
||||
|
||||
|
||||
def scan_naive_injection(text: str) -> ScanResult | None:
|
||||
disclosure = any(p.search(text) for p in DISCLOSURE_PHRASES)
|
||||
token = scan_token_patterns(text) is not None
|
||||
|
||||
# Tier 1: credential + disclosure = BLOCK
|
||||
if disclosure and token:
|
||||
return ScanResult(
|
||||
severity="block",
|
||||
reason="prompt disclosure with embedded credential in response",
|
||||
)
|
||||
|
||||
# Tier 2: multiple jailbreak phrases = WARN
|
||||
jailbreak_count = sum(1 for p in JAILBREAK_PHRASES if p.search(text))
|
||||
if jailbreak_count >= 2:
|
||||
return ScanResult(
|
||||
severity="warn",
|
||||
reason=f"{jailbreak_count} jailbreak phrases detected in response",
|
||||
)
|
||||
|
||||
# Tier 2b: explicit prompt disclosure without credential = WARN
|
||||
if disclosure and "system prompt:" in text.lower():
|
||||
return ScanResult(
|
||||
severity="warn",
|
||||
reason="explicit system prompt disclosure in response",
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
__all__ = [
|
||||
"TOKEN_PATTERNS",
|
||||
"scan_known_secrets",
|
||||
"scan_naive_injection",
|
||||
"scan_token_patterns",
|
||||
]
|
||||
+113
-118
@@ -1,24 +1,10 @@
|
||||
"""Per-bottle egress proxy (PRD 0017).
|
||||
|
||||
Replaces the cred-proxy sidecar (PRD 0010) with a mitmproxy-based
|
||||
sidecar that becomes the agent's `HTTP_PROXY` / `HTTPS_PROXY`. It
|
||||
owns three jobs:
|
||||
|
||||
1. MITM the agent's HTTPS with the per-bottle CA.
|
||||
2. Enforce manifest-declared `path_allowlist` per route.
|
||||
3. Inject `Authorization` headers for routes that declare an
|
||||
`auth` block, the same way cred-proxy does today.
|
||||
"""Per-bottle egress proxy (PRD 0017, PRD 0053).
|
||||
|
||||
This module defines the abstract proxy (`Egress`), its plan
|
||||
dataclass (`EgressPlan`), and the resolved per-route shape
|
||||
(`EgressRoute`). The sidecar's start/stop lifecycle is backend-
|
||||
specific and lives on concrete subclasses (see
|
||||
`bot_bottle/backend/docker/egress.py`).
|
||||
|
||||
Chunks 1+2 of the PRD: this module + the mitmproxy addon + the Docker
|
||||
lifecycle are wired into the agent's `HTTP_PROXY` path; cred-proxy
|
||||
has been removed. Chunk 3 retargets the cred-proxy-block remediation
|
||||
flow (PRD 0014) at egress and renames the MCP tool.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -37,18 +23,8 @@ if TYPE_CHECKING:
|
||||
|
||||
CODEX_HOST_CREDENTIAL_TOKEN_REF = "BOT_BOTTLE_CODEX_HOST_ACCESS_TOKEN"
|
||||
|
||||
|
||||
# DNS name agents will dial for the per-bottle egress sidecar.
|
||||
# Backend-agnostic by contract: every concrete backend (Docker today,
|
||||
# others later) attaches this name to its sidecar on the bottle's
|
||||
# internal network. The agent's `HTTP_PROXY` env var resolves to
|
||||
# `http://egress:<port>` once chunk 2 cuts over.
|
||||
EGRESS_HOSTNAME = "egress"
|
||||
|
||||
# In-container path the addon reads. Pre-created in
|
||||
# `Dockerfile.sidecars` so the host bind-mount can drop the file
|
||||
# directly. Content is YAML (hand-rolled by `egress_render_routes`,
|
||||
# parsed by `yaml_subset` inside the addon).
|
||||
EGRESS_ROUTES_IN_CONTAINER = "/etc/egress/routes.yaml"
|
||||
|
||||
|
||||
@@ -56,17 +32,13 @@ EGRESS_ROUTES_IN_CONTAINER = "/etc/egress/routes.yaml"
|
||||
class EgressRoute(Route):
|
||||
"""Host-side extension of the addon's `Route`.
|
||||
|
||||
Inherits `host`, `path_allowlist`, `auth_scheme`, and `token_env`
|
||||
Inherits `host`, `matches`, `auth_scheme`, and `token_env`
|
||||
from `egress_addon_core.Route` — those are the fields that cross the
|
||||
YAML wire into the sidecar. The three fields below are host-only and
|
||||
YAML wire into the sidecar. The fields below are host-only and
|
||||
are never serialised to the addon.
|
||||
|
||||
`token_ref` is the host env var the CLI reads at launch and forwards
|
||||
into the container's environ under `token_env`. Routes that share a
|
||||
`token_ref` coalesce to one `token_env` slot.
|
||||
|
||||
`roles` carries the manifest route's role tuple (reserved for
|
||||
future use; always empty today).
|
||||
into the container's environ under `token_env`.
|
||||
|
||||
`roles` carries the manifest route's role tuple (reserved for
|
||||
future use; always empty today)."""
|
||||
@@ -77,33 +49,6 @@ class EgressRoute(Route):
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class EgressPlan:
|
||||
"""Output of Egress.prepare; consumed by .start.
|
||||
|
||||
The slug + routes_path + routes + token_env_map fields are
|
||||
filled at prepare time (host-side, side-effect-free on docker).
|
||||
The network + CA fields are populated by the backend's launch step
|
||||
via `dataclasses.replace` once those resources exist. Empty defaults
|
||||
are sentinels meaning "not yet set"; `.start` validates that they are
|
||||
populated.
|
||||
|
||||
`token_env_map` is `{<token_env in container>: <token_ref on host>}`.
|
||||
The backend's start step reads `os.environ[token_ref]` and
|
||||
forwards the value into the egress container's environ
|
||||
under `token_env`. The plan itself never holds token values —
|
||||
secrets never land in a dataclass that might be logged.
|
||||
|
||||
`mitmproxy_ca_host_path` is the host path of the per-bottle
|
||||
egress CA (single PEM with cert+key concatenated) minted
|
||||
by `egress_tls_init`. `.start` docker-cps it into the
|
||||
sidecar at `~/.mitmproxy/mitmproxy-ca.pem` — mitmproxy reads
|
||||
that file at boot to mint per-host leaf certs.
|
||||
|
||||
`mitmproxy_ca_cert_only_host_path` is the cert-only PEM (no
|
||||
key) for installing into the agent's trust store via
|
||||
`provision_ca`. Separate file rather than re-parsing the
|
||||
concat so secrets and trust artefacts stay on distinct paths.
|
||||
"""
|
||||
|
||||
slug: str
|
||||
routes_path: Path
|
||||
routes: tuple[EgressRoute, ...]
|
||||
@@ -117,18 +62,34 @@ class EgressPlan:
|
||||
def egress_manifest_routes(
|
||||
bottle: Bottle,
|
||||
) -> tuple[EgressRoute, ...]:
|
||||
"""Lift each `bottle.egress.routes[]` manifest entry into an EgressRoute.
|
||||
Order is preserved. Token slots are not assigned here — slot assignment
|
||||
is a final step in `egress_routes_for_bottle` after provider and manifest
|
||||
routes are merged."""
|
||||
from .egress_addon_core import MatchEntry as CoreMatchEntry
|
||||
from .egress_addon_core import PathMatch as CorePathMatch
|
||||
from .egress_addon_core import HeaderMatch as CoreHeaderMatch
|
||||
out: list[EgressRoute] = []
|
||||
for r in bottle.egress.routes:
|
||||
core_matches: list[CoreMatchEntry] = []
|
||||
for m in r.Matches:
|
||||
core_paths = tuple(
|
||||
CorePathMatch(type=p.Type, value=p.Value)
|
||||
for p in m.Paths
|
||||
)
|
||||
core_headers = tuple(
|
||||
CoreHeaderMatch(name=h.Name, value=h.Value, type=h.Type)
|
||||
for h in m.Headers
|
||||
)
|
||||
core_matches.append(CoreMatchEntry(
|
||||
paths=core_paths,
|
||||
methods=m.Methods,
|
||||
headers=core_headers,
|
||||
))
|
||||
out.append(EgressRoute(
|
||||
host=r.Host,
|
||||
path_allowlist=r.PathAllowlist,
|
||||
matches=tuple(core_matches),
|
||||
auth_scheme=r.AuthScheme,
|
||||
token_ref=r.TokenRef,
|
||||
roles=r.Role,
|
||||
outbound_detectors=r.OutboundDetectors,
|
||||
inbound_detectors=r.InboundDetectors,
|
||||
))
|
||||
return tuple(out)
|
||||
|
||||
@@ -137,12 +98,6 @@ def egress_routes_for_bottle(
|
||||
bottle: Bottle,
|
||||
provider_routes: tuple[EgressRoute, ...] = (),
|
||||
) -> tuple[EgressRoute, ...]:
|
||||
"""Effective egress routes for the agent.
|
||||
|
||||
Provider routes own their hosts outright; manifest routes for hosts
|
||||
not claimed by any provider are appended. Token slots are assigned
|
||||
in a final pass over the merged list in order, so provisioned routes
|
||||
get the lower slot numbers."""
|
||||
manifest = egress_manifest_routes(bottle)
|
||||
provisioned_hosts = {pr.host.lower() for pr in provider_routes}
|
||||
merged = list(provider_routes) + [
|
||||
@@ -154,10 +109,6 @@ def egress_routes_for_bottle(
|
||||
def _assign_token_slots(
|
||||
routes: list[EgressRoute],
|
||||
) -> tuple[EgressRoute, ...]:
|
||||
"""Assign EGRESS_TOKEN_N slots to authenticated routes in order.
|
||||
|
||||
Routes sharing a token_ref share a slot. Unauthenticated routes
|
||||
(no auth_scheme / token_ref) keep token_env empty."""
|
||||
slot_for_ref: dict[str, str] = {}
|
||||
out: list[EgressRoute] = []
|
||||
for r in routes:
|
||||
@@ -175,13 +126,6 @@ def _assign_token_slots(
|
||||
def egress_token_env_map(
|
||||
routes: tuple[EgressRoute, ...],
|
||||
) -> dict[str, str]:
|
||||
"""Collapse the route list into `{token_env: token_ref}` for the
|
||||
authenticated routes. Routes without `auth` contribute no entry.
|
||||
|
||||
Conflict detection: two routes that share a `token_env` slot but
|
||||
name different `token_ref` host vars is a programming error in
|
||||
`egress_routes_for_bottle`; surface it as a die rather than
|
||||
silently picking one."""
|
||||
out: dict[str, str] = {}
|
||||
for r in routes:
|
||||
if not (r.auth_scheme and r.token_ref and r.token_env):
|
||||
@@ -198,29 +142,61 @@ def egress_token_env_map(
|
||||
|
||||
|
||||
def _route_to_yaml_fields(r: Route) -> dict[str, object]:
|
||||
"""Return the addon-visible fields for one route.
|
||||
|
||||
Single authoritative mapping between EgressRoute (host-side) and
|
||||
egress_addon_core.Route (sidecar-side). When a field is added to
|
||||
the addon's Route that must appear in the YAML, add it here and
|
||||
in egress_addon_core._parse_one together."""
|
||||
fields: dict[str, object] = {"host": r.host}
|
||||
if r.auth_scheme and r.token_env:
|
||||
fields["auth_scheme"] = r.auth_scheme
|
||||
fields["token_env"] = r.token_env
|
||||
if r.path_allowlist:
|
||||
fields["path_allowlist"] = list(r.path_allowlist)
|
||||
if r.matches:
|
||||
matches_data: list[dict[str, object]] = []
|
||||
for entry in r.matches:
|
||||
entry_data: dict[str, object] = {}
|
||||
if entry.paths:
|
||||
paths_data: list[dict[str, str]] = []
|
||||
for pm in entry.paths:
|
||||
pd: dict[str, str] = {"value": pm.value}
|
||||
if pm.type != "prefix":
|
||||
pd["type"] = pm.type
|
||||
paths_data.append(pd)
|
||||
entry_data["paths"] = paths_data
|
||||
if entry.methods:
|
||||
entry_data["methods"] = list(entry.methods)
|
||||
if entry.headers:
|
||||
headers_data: list[dict[str, str]] = []
|
||||
for hm in entry.headers:
|
||||
hd: dict[str, str] = {"name": hm.name, "value": hm.value}
|
||||
if hm.type != "exact":
|
||||
hd["type"] = hm.type
|
||||
headers_data.append(hd)
|
||||
entry_data["headers"] = headers_data
|
||||
matches_data.append(entry_data)
|
||||
fields["matches"] = matches_data
|
||||
if r.outbound_detectors is not None or r.inbound_detectors is not None:
|
||||
dlp: dict[str, object] = {}
|
||||
if r.outbound_detectors is not None:
|
||||
dlp["outbound_detectors"] = (
|
||||
False if not r.outbound_detectors
|
||||
else list(r.outbound_detectors)
|
||||
)
|
||||
if r.inbound_detectors is not None:
|
||||
dlp["inbound_detectors"] = (
|
||||
False if not r.inbound_detectors
|
||||
else list(r.inbound_detectors)
|
||||
)
|
||||
fields["dlp"] = dlp
|
||||
return fields
|
||||
|
||||
|
||||
def _yaml_scalar(v: object) -> str:
|
||||
if isinstance(v, bool):
|
||||
return "true" if v else "false"
|
||||
if isinstance(v, str):
|
||||
return f'"{v}"'
|
||||
return str(v)
|
||||
|
||||
|
||||
def egress_render_routes(
|
||||
routes: tuple[EgressRoute, ...],
|
||||
) -> str:
|
||||
"""Serialize the route table for the addon to read.
|
||||
|
||||
YAML content — no token values, no host env-var names. Fields are
|
||||
determined by `_route_to_yaml_fields`, which is the single point of
|
||||
truth for the EgressRoute → egress_addon_core.Route mapping."""
|
||||
lines: list[str] = ["routes:"]
|
||||
if not routes:
|
||||
lines[0] = "routes: []"
|
||||
@@ -231,10 +207,49 @@ def egress_render_routes(
|
||||
if "auth_scheme" in f:
|
||||
lines.append(f' auth_scheme: "{f["auth_scheme"]}"')
|
||||
lines.append(f' token_env: "{f["token_env"]}"')
|
||||
if "path_allowlist" in f:
|
||||
lines.append(" path_allowlist:")
|
||||
for p in f["path_allowlist"]: # type: ignore
|
||||
lines.append(f' - "{p}"')
|
||||
if "matches" in f:
|
||||
lines.append(" matches:")
|
||||
for entry in f["matches"]: # type: ignore
|
||||
entry_dict: dict[str, object] = entry # type: ignore
|
||||
first_key = True
|
||||
if "paths" in entry_dict:
|
||||
lines.append(" - paths:")
|
||||
first_key = False
|
||||
for pd in entry_dict["paths"]: # type: ignore
|
||||
pd_dict: dict[str, str] = pd # type: ignore
|
||||
if "type" in pd_dict:
|
||||
lines.append(f' - type: "{pd_dict["type"]}"')
|
||||
lines.append(f' value: "{pd_dict["value"]}"')
|
||||
else:
|
||||
lines.append(f' - value: "{pd_dict["value"]}"')
|
||||
if "methods" in entry_dict:
|
||||
methods_str = ", ".join(
|
||||
f'"{m}"' for m in entry_dict["methods"] # type: ignore
|
||||
)
|
||||
prefix = " - " if first_key else " "
|
||||
lines.append(f'{prefix}methods: [{methods_str}]')
|
||||
first_key = False
|
||||
if "headers" in entry_dict:
|
||||
prefix = " - " if first_key else " "
|
||||
lines.append(f"{prefix}headers:")
|
||||
first_key = False
|
||||
for hd in entry_dict["headers"]: # type: ignore
|
||||
hd_dict: dict[str, str] = hd # type: ignore
|
||||
lines.append(f' - name: "{hd_dict["name"]}"')
|
||||
lines.append(f' value: "{hd_dict["value"]}"')
|
||||
if "type" in hd_dict:
|
||||
lines.append(f' type: "{hd_dict["type"]}"')
|
||||
if first_key:
|
||||
lines.append(" - {}")
|
||||
if "dlp" in f:
|
||||
dlp_dict: dict[str, object] = f["dlp"] # type: ignore
|
||||
lines.append(" dlp:")
|
||||
for dk, dv in dlp_dict.items():
|
||||
if dv is False:
|
||||
lines.append(f" {dk}: false")
|
||||
elif isinstance(dv, list):
|
||||
items_str = ", ".join(f'"{x}"' for x in dv)
|
||||
lines.append(f" {dk}: [{items_str}]")
|
||||
return "\n".join(lines) + "\n"
|
||||
|
||||
|
||||
@@ -242,12 +257,6 @@ def egress_resolve_token_values(
|
||||
token_env_map: dict[str, str],
|
||||
host_env: dict[str, str],
|
||||
) -> dict[str, str]:
|
||||
"""Read `host_env[TokenRef]` for each entry in `token_env_map` and
|
||||
return `{token_env: <value>}`. Dies (with a pointer at the missing
|
||||
var name) if any TokenRef is unset.
|
||||
|
||||
Pure function: takes the host env as an argument so tests can pass
|
||||
a sealed mapping without touching `os.environ`."""
|
||||
out: dict[str, str] = {}
|
||||
for token_env, token_ref in token_env_map.items():
|
||||
value = host_env.get(token_ref)
|
||||
@@ -268,11 +277,6 @@ def egress_resolve_token_values(
|
||||
|
||||
|
||||
class Egress(ABC):
|
||||
"""The per-bottle egress proxy. Encapsulates the host-side prepare
|
||||
(route lift + routes.yaml render + token-env-map derivation); the
|
||||
sidecar's start/stop lifecycle is backend-specific and lives on
|
||||
concrete subclasses."""
|
||||
|
||||
def prepare(
|
||||
self,
|
||||
bottle: Bottle,
|
||||
@@ -280,15 +284,6 @@ class Egress(ABC):
|
||||
stage_dir: Path,
|
||||
provider_routes: tuple[EgressRoute, ...] = (),
|
||||
) -> EgressPlan:
|
||||
"""Lift `bottle.egress.routes` + `provider_routes` into resolved
|
||||
routes, render the routes file (mode 600) under `stage_dir`, and
|
||||
return the plan. Pure host-side, no docker subprocess. The
|
||||
token-env map records the mapping the launch step uses to
|
||||
forward values from the host's environ into the sidecar's environ.
|
||||
|
||||
Returned plan is incomplete: the launch step must fill
|
||||
`internal_network` / `egress_network`
|
||||
via `dataclasses.replace` before passing it to `.start`."""
|
||||
routes = egress_routes_for_bottle(bottle, provider_routes)
|
||||
routes_path = stage_dir / "egress_routes.yaml"
|
||||
routes_path.write_text(egress_render_routes(routes))
|
||||
|
||||
+51
-63
@@ -1,28 +1,7 @@
|
||||
"""mitmproxy addon entrypoint for the egress sidecar (PRD 0017).
|
||||
"""mitmproxy addon entrypoint for the egress sidecar (PRD 0017, PRD 0053).
|
||||
|
||||
Loaded by `mitmdump -s /app/egress_addon.py` inside the
|
||||
egress container. Wraps the pure logic from
|
||||
`egress_addon_core` with mitmproxy's HTTPFlow API:
|
||||
|
||||
- At startup, read `EGRESS_ROUTES` (default
|
||||
`/etc/egress/routes.yaml`, JSON content) → routes table.
|
||||
- SIGHUP re-reads the file and atomically swaps the in-memory
|
||||
table. A parse error keeps the old table in place — better to
|
||||
keep serving the old config than to leave the proxy with no
|
||||
routes after a typo.
|
||||
- On each `request`: strip the inbound Authorization header, then
|
||||
consult `decide()` for forward / block / inject-auth and apply
|
||||
the decision to the flow.
|
||||
|
||||
This file imports `mitmproxy` and is never imported on the host —
|
||||
mitmproxy is a container-only dependency. The host's tests target
|
||||
`egress_addon_core`.
|
||||
|
||||
Dockerfile.sidecars copies both this file and
|
||||
`egress_addon_core.py` flat into `/app/`; the absolute import
|
||||
below works because mitmdump runs with `/app` on its sys.path. The
|
||||
parallel file in the package source tree (bot_bottle/) is the
|
||||
build input — not a module the host imports."""
|
||||
egress container."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
@@ -35,35 +14,23 @@ from pathlib import Path
|
||||
|
||||
from mitmproxy import http # type: ignore[import-not-found]
|
||||
|
||||
# Absolute import (NOT `from .egress_addon_core`) — the
|
||||
# container drops both files flat into /app/ so they are sibling
|
||||
# top-level modules to mitmdump's loader, not a package.
|
||||
from egress_addon_core import ( # type: ignore[import-not-found]
|
||||
Route,
|
||||
decide,
|
||||
is_git_push_request,
|
||||
load_routes,
|
||||
match_route,
|
||||
scan_inbound,
|
||||
scan_outbound,
|
||||
)
|
||||
|
||||
|
||||
DEFAULT_ROUTES_PATH = "/etc/egress/routes.yaml"
|
||||
|
||||
# Magic hostname the addon recognises as an introspection target.
|
||||
# Requests through the proxy for `_egress.local/<path>` are
|
||||
# intercepted and answered with synthetic responses (the addon's
|
||||
# `request` hook sets `flow.response` before any upstream connection).
|
||||
# The hostname is not in DNS — only clients dialing through this
|
||||
# specific egress can reach it, and only via HTTP (no TLS).
|
||||
# Used by the supervise sidecar's `list-egress-routes` MCP
|
||||
# tool to surface the live route table to the agent.
|
||||
INTROSPECT_HOST = "_egress.local"
|
||||
|
||||
|
||||
class EgressAddon:
|
||||
"""The mitmproxy addon. One instance per `mitmdump` process; the
|
||||
request hook is invoked on every CONNECT-decapsulated HTTP/HTTPS
|
||||
request the agent makes."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.routes_path = os.environ.get("EGRESS_ROUTES", DEFAULT_ROUTES_PATH)
|
||||
self.routes: tuple[Route, ...] = ()
|
||||
@@ -80,9 +47,6 @@ class EgressAddon:
|
||||
f"egress: {tag} load failed: {e}\n"
|
||||
)
|
||||
if initial:
|
||||
# No baseline to fall back on; serve nothing rather
|
||||
# than masquerade as a proxy with a route table the
|
||||
# operator never declared.
|
||||
self.routes = ()
|
||||
return
|
||||
self.routes = new_routes
|
||||
@@ -102,11 +66,6 @@ class EgressAddon:
|
||||
signal.signal(signal.SIGHUP, handler)
|
||||
|
||||
def _serve_introspection(self, flow: http.HTTPFlow, path: str) -> None:
|
||||
"""Synthesize a response for `_egress.local` requests.
|
||||
Currently supports `/allowlist` which returns the in-memory
|
||||
route table as JSON (host, path_allowlist, auth_scheme,
|
||||
token_env per route — no token VALUES, those live in the
|
||||
container's environ)."""
|
||||
if path == "/allowlist":
|
||||
payload = json.dumps(
|
||||
{"routes": [dataclasses.asdict(r) for r in self.routes]},
|
||||
@@ -123,32 +82,34 @@ class EgressAddon:
|
||||
{"Content-Type": "text/plain; charset=utf-8"},
|
||||
)
|
||||
|
||||
# mitmproxy's addon API: this method name + signature is how
|
||||
# mitmdump discovers the request hook.
|
||||
def request(self, flow: http.HTTPFlow) -> None:
|
||||
request_path, _, query = flow.request.path.partition("?")
|
||||
|
||||
# Introspection: requests to the magic `_egress.local`
|
||||
# host are answered locally with a synthetic response. Check
|
||||
# before the strip-auth + route logic — these requests aren't
|
||||
# real upstream traffic, the agent isn't injecting auth, and
|
||||
# the addon's own decide() would 403 the magic host (it's
|
||||
# never in the routes table).
|
||||
if flow.request.pretty_host == INTROSPECT_HOST:
|
||||
self._serve_introspection(flow, request_path)
|
||||
return
|
||||
|
||||
# Inbound Authorization is always stripped — the agent cannot
|
||||
# smuggle a stolen token through the proxy. If the matched
|
||||
# route declares an auth pair, a fresh header is injected
|
||||
# below.
|
||||
# DLP outbound scan BEFORE stripping auth — catches tokens the
|
||||
# agent tried to smuggle in the Authorization header.
|
||||
route = match_route(self.routes, flow.request.pretty_host)
|
||||
if route is not None:
|
||||
body = flow.request.get_text(strict=False) or ""
|
||||
auth_header = flow.request.headers.get("authorization", "")
|
||||
scan_text = body
|
||||
if auth_header:
|
||||
scan_text = auth_header + "\n" + body
|
||||
dlp_result = scan_outbound(route, scan_text, os.environ)
|
||||
if dlp_result is not None and dlp_result.severity == "block":
|
||||
flow.response = http.Response.make(
|
||||
403,
|
||||
f"egress DLP: {dlp_result.reason}".encode("utf-8"),
|
||||
{"Content-Type": "text/plain; charset=utf-8"},
|
||||
)
|
||||
return
|
||||
|
||||
# Strip inbound Authorization — agent cannot smuggle tokens.
|
||||
flow.request.headers.pop("authorization", None)
|
||||
|
||||
# Universal HTTPS git-push block. Defense-in-depth: git-gate
|
||||
# (PRD 0008) is the only sanctioned outbound path for git
|
||||
# writes — its pre-receive runs gitleaks. Letting HTTPS push
|
||||
# through egress + auth injection would route around
|
||||
# that scan, so we 403 before any route logic.
|
||||
if is_git_push_request(request_path, query):
|
||||
flow.response = http.Response.make(
|
||||
403,
|
||||
@@ -161,11 +122,16 @@ class EgressAddon:
|
||||
)
|
||||
return
|
||||
|
||||
# Build headers mapping for match evaluation
|
||||
req_headers = {k.lower(): v for k, v in flow.request.headers.items()}
|
||||
|
||||
decision = decide(
|
||||
self.routes,
|
||||
flow.request.pretty_host,
|
||||
request_path,
|
||||
os.environ,
|
||||
request_method=flow.request.method,
|
||||
request_headers=req_headers,
|
||||
)
|
||||
|
||||
if decision.action == "block":
|
||||
@@ -179,5 +145,27 @@ class EgressAddon:
|
||||
if decision.inject_authorization is not None:
|
||||
flow.request.headers["authorization"] = decision.inject_authorization
|
||||
|
||||
def response(self, flow: http.HTTPFlow) -> None:
|
||||
"""DLP inbound scan on response bodies (PRD 0053)."""
|
||||
route = match_route(self.routes, flow.request.pretty_host)
|
||||
if route is None:
|
||||
return
|
||||
if flow.response is None:
|
||||
return
|
||||
body = flow.response.get_text(strict=False) or ""
|
||||
if not body:
|
||||
return
|
||||
result = scan_inbound(route, body)
|
||||
if result is None:
|
||||
return
|
||||
if result.severity == "block":
|
||||
flow.response = http.Response.make(
|
||||
403,
|
||||
f"egress DLP: {result.reason}".encode("utf-8"),
|
||||
{"Content-Type": "text/plain; charset=utf-8"},
|
||||
)
|
||||
elif result.severity == "warn":
|
||||
sys.stderr.write(f"egress DLP warn: {result.reason}\n")
|
||||
|
||||
|
||||
addons = [EgressAddon()]
|
||||
|
||||
+398
-118
@@ -1,4 +1,4 @@
|
||||
"""Pure logic for the egress mitmproxy addon (PRD 0017).
|
||||
"""Pure logic for the egress mitmproxy addon (PRD 0017, PRD 0053).
|
||||
|
||||
Split out of `egress_addon.py` so the host's unit tests can
|
||||
exercise the parse + decision functions without depending on the
|
||||
@@ -8,74 +8,254 @@ container.
|
||||
|
||||
Imports: stdlib + `yaml_subset` (which is itself stdlib-only and
|
||||
ships flat into the sidecar bundle image alongside this file —
|
||||
see `Dockerfile.sidecars`).
|
||||
"""
|
||||
see `Dockerfile.sidecars`)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import typing
|
||||
from dataclasses import dataclass
|
||||
|
||||
# Absolute import — `yaml_subset.py` is copied flat into the bundle
|
||||
# image's `/app/` next to this file (via `Dockerfile.sidecars`).
|
||||
# The host-side unit tests run with the repo on sys.path, where the
|
||||
# import resolves under the `bot_bottle` package. The try/except
|
||||
# shim picks whichever import works.
|
||||
try:
|
||||
from yaml_subset import YamlSubsetError, parse_yaml_subset # type: ignore[import-not-found]
|
||||
except ImportError: # pragma: no cover - host-side path
|
||||
from .yaml_subset import YamlSubsetError, parse_yaml_subset
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Match types (Gateway API HTTPRoute vocabulary, PRD 0053)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
PATH_MATCH_TYPES = ("exact", "prefix", "regex")
|
||||
HEADER_MATCH_TYPES = ("exact", "regex")
|
||||
|
||||
VALID_METHODS = frozenset({
|
||||
"GET", "HEAD", "POST", "PUT", "DELETE", "PATCH", "OPTIONS", "TRACE",
|
||||
"CONNECT",
|
||||
})
|
||||
|
||||
OUTBOUND_DETECTOR_NAMES = frozenset({"token_patterns", "known_secrets"})
|
||||
INBOUND_DETECTOR_NAMES = frozenset({"naive_injection_detection"})
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class PathMatch:
|
||||
type: str # "exact" | "prefix" | "regex"
|
||||
value: str
|
||||
compiled: re.Pattern[str] | None = None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class HeaderMatch:
|
||||
name: str
|
||||
value: str
|
||||
type: str = "exact" # "exact" | "regex"
|
||||
compiled: re.Pattern[str] | None = None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class MatchEntry:
|
||||
paths: tuple[PathMatch, ...] = ()
|
||||
methods: tuple[str, ...] = ()
|
||||
headers: tuple[HeaderMatch, ...] = ()
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Route:
|
||||
"""One row of the egress route table.
|
||||
|
||||
`host` is the request's `Host` header (or SNI hostname) to match
|
||||
against. `path_allowlist` is an optional tuple of absolute path
|
||||
prefixes the request path must start with; empty tuple means no
|
||||
path constraint. `auth_scheme` and `token_env` together form the
|
||||
credential-injection pair (both set or both empty); a non-empty
|
||||
pair tells the addon to overwrite the inbound Authorization with
|
||||
`<auth_scheme> <value-of-environ[token_env]>`.
|
||||
"""
|
||||
|
||||
host: str
|
||||
path_allowlist: tuple[str, ...] = ()
|
||||
matches: tuple[MatchEntry, ...] = ()
|
||||
auth_scheme: str = ""
|
||||
token_env: str = ""
|
||||
outbound_detectors: tuple[str, ...] | None = None
|
||||
inbound_detectors: tuple[str, ...] | None = None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Decision:
|
||||
"""The result of `decide()`. Either forward (with optional
|
||||
`inject_authorization` header) or block (with a `reason` to surface
|
||||
to the agent)."""
|
||||
|
||||
action: str # "forward" or "block"
|
||||
reason: str = ""
|
||||
inject_authorization: str | None = None
|
||||
|
||||
|
||||
def parse_routes(payload: object) -> tuple[Route, ...]:
|
||||
"""Parse the routes-file payload (already JSON-decoded) into a
|
||||
tuple of `Route`s. Raises `ValueError` on any malformed entry —
|
||||
the caller decides whether to keep the old table or refuse to
|
||||
start.
|
||||
@dataclass(frozen=True)
|
||||
class ScanResult:
|
||||
severity: str # "block" or "warn"
|
||||
reason: str
|
||||
|
||||
Schema:
|
||||
{
|
||||
"routes": [
|
||||
{
|
||||
"host": "api.github.com",
|
||||
"path_allowlist": ["/repos/x/", "/users/x"], # optional
|
||||
"auth_scheme": "Bearer", # optional
|
||||
"token_env": "EGRESS_TOKEN_0" # optional
|
||||
},
|
||||
...
|
||||
]
|
||||
}
|
||||
"""
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Parsing
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _parse_path_match(idx: int, j: int, raw: object) -> PathMatch:
|
||||
label = f"route[{idx}] matches paths[{j}]"
|
||||
if not isinstance(raw, dict):
|
||||
raise ValueError(f"{label}: must be an object")
|
||||
raw_dict: dict[str, object] = typing.cast(dict[str, object], raw)
|
||||
ptype = raw_dict.get("type", "prefix")
|
||||
if not isinstance(ptype, str) or ptype not in PATH_MATCH_TYPES:
|
||||
raise ValueError(
|
||||
f"{label}: 'type' must be one of {', '.join(PATH_MATCH_TYPES)} "
|
||||
f"(got {ptype!r})"
|
||||
)
|
||||
value = raw_dict.get("value")
|
||||
if not isinstance(value, str) or not value:
|
||||
raise ValueError(f"{label}: 'value' must be a non-empty string")
|
||||
if ptype in ("exact", "prefix") and not value.startswith("/"):
|
||||
raise ValueError(
|
||||
f"{label}: value {value!r} must start with '/' for "
|
||||
f"type {ptype!r}"
|
||||
)
|
||||
compiled: re.Pattern[str] | None = None
|
||||
if ptype == "regex":
|
||||
try:
|
||||
compiled = re.compile(value)
|
||||
except re.error as e:
|
||||
raise ValueError(
|
||||
f"{label}: regex {value!r} failed to compile: {e}"
|
||||
) from e
|
||||
for k in raw_dict:
|
||||
if k not in ("type", "value"):
|
||||
raise ValueError(f"{label}: unknown key {k!r}")
|
||||
return PathMatch(type=ptype, value=value, compiled=compiled)
|
||||
|
||||
|
||||
def _parse_header_match(idx: int, j: int, raw: object) -> HeaderMatch:
|
||||
label = f"route[{idx}] matches headers[{j}]"
|
||||
if not isinstance(raw, dict):
|
||||
raise ValueError(f"{label}: must be an object")
|
||||
raw_dict: dict[str, object] = typing.cast(dict[str, object], raw)
|
||||
name = raw_dict.get("name")
|
||||
if not isinstance(name, str) or not name:
|
||||
raise ValueError(f"{label}: 'name' must be a non-empty string")
|
||||
value = raw_dict.get("value")
|
||||
if not isinstance(value, str):
|
||||
raise ValueError(f"{label}: 'value' must be a string")
|
||||
htype = raw_dict.get("type", "exact")
|
||||
if not isinstance(htype, str) or htype not in HEADER_MATCH_TYPES:
|
||||
raise ValueError(
|
||||
f"{label}: 'type' must be one of {', '.join(HEADER_MATCH_TYPES)} "
|
||||
f"(got {htype!r})"
|
||||
)
|
||||
compiled: re.Pattern[str] | None = None
|
||||
if htype == "regex":
|
||||
try:
|
||||
compiled = re.compile(value)
|
||||
except re.error as e:
|
||||
raise ValueError(
|
||||
f"{label}: regex {value!r} failed to compile: {e}"
|
||||
) from e
|
||||
for k in raw_dict:
|
||||
if k not in ("name", "value", "type"):
|
||||
raise ValueError(f"{label}: unknown key {k!r}")
|
||||
return HeaderMatch(name=name, value=value, type=htype, compiled=compiled)
|
||||
|
||||
|
||||
def _parse_match_entry(idx: int, k: int, raw: object) -> MatchEntry:
|
||||
label = f"route[{idx}] matches[{k}]"
|
||||
if not isinstance(raw, dict):
|
||||
raise ValueError(f"{label}: must be an object")
|
||||
raw_dict: dict[str, object] = typing.cast(dict[str, object], raw)
|
||||
|
||||
paths: tuple[PathMatch, ...] = ()
|
||||
paths_raw = raw_dict.get("paths")
|
||||
if paths_raw is not None:
|
||||
if not isinstance(paths_raw, list):
|
||||
raise ValueError(f"{label}: 'paths' must be a list")
|
||||
paths_list = typing.cast(list[object], paths_raw)
|
||||
paths = tuple(_parse_path_match(idx, j, p) for j, p in enumerate(paths_list))
|
||||
|
||||
methods: tuple[str, ...] = ()
|
||||
methods_raw = raw_dict.get("methods")
|
||||
if methods_raw is not None:
|
||||
if not isinstance(methods_raw, list):
|
||||
raise ValueError(f"{label}: 'methods' must be a list")
|
||||
methods_list = typing.cast(list[object], methods_raw)
|
||||
normalised: list[str] = []
|
||||
for j, m in enumerate(methods_list):
|
||||
if not isinstance(m, str):
|
||||
raise ValueError(f"{label}: methods[{j}] must be a string")
|
||||
upper = m.upper()
|
||||
if upper not in VALID_METHODS:
|
||||
raise ValueError(
|
||||
f"{label}: methods[{j}] {m!r} is not a valid HTTP method"
|
||||
)
|
||||
normalised.append(upper)
|
||||
methods = tuple(normalised)
|
||||
|
||||
headers: tuple[HeaderMatch, ...] = ()
|
||||
headers_raw = raw_dict.get("headers")
|
||||
if headers_raw is not None:
|
||||
if not isinstance(headers_raw, list):
|
||||
raise ValueError(f"{label}: 'headers' must be a list")
|
||||
headers_list = typing.cast(list[object], headers_raw)
|
||||
headers = tuple(
|
||||
_parse_header_match(idx, j, h) for j, h in enumerate(headers_list)
|
||||
)
|
||||
|
||||
for key in raw_dict:
|
||||
if key not in ("paths", "methods", "headers"):
|
||||
raise ValueError(f"{label}: unknown key {key!r}")
|
||||
|
||||
return MatchEntry(paths=paths, methods=methods, headers=headers)
|
||||
|
||||
|
||||
def _parse_detectors(
|
||||
idx: int,
|
||||
host: str,
|
||||
raw_dict: dict[str, object],
|
||||
) -> tuple[tuple[str, ...] | None, tuple[str, ...] | None]:
|
||||
"""Parse the optional `dlp` block on a route, returning
|
||||
(outbound_detectors, inbound_detectors)."""
|
||||
dlp_raw = raw_dict.get("dlp")
|
||||
if dlp_raw is None:
|
||||
return None, None
|
||||
label = f"route[{idx}] ({host})"
|
||||
if not isinstance(dlp_raw, dict):
|
||||
raise ValueError(f"{label}: 'dlp' must be an object")
|
||||
dlp = typing.cast(dict[str, object], dlp_raw)
|
||||
|
||||
def _parse_detector_field(
|
||||
field: str,
|
||||
valid_names: frozenset[str],
|
||||
) -> tuple[str, ...] | None:
|
||||
val = dlp.get(field)
|
||||
if val is None:
|
||||
return None
|
||||
if val is False:
|
||||
return ()
|
||||
if not isinstance(val, list):
|
||||
raise ValueError(
|
||||
f"{label}: dlp.{field} must be false, a list, or omitted"
|
||||
)
|
||||
items = typing.cast(list[object], val)
|
||||
names: list[str] = []
|
||||
for j, item in enumerate(items):
|
||||
if not isinstance(item, str):
|
||||
raise ValueError(
|
||||
f"{label}: dlp.{field}[{j}] must be a string"
|
||||
)
|
||||
if item not in valid_names:
|
||||
raise ValueError(
|
||||
f"{label}: dlp.{field}[{j}] {item!r} is not a valid "
|
||||
f"detector name; valid names: {', '.join(sorted(valid_names))}"
|
||||
)
|
||||
names.append(item)
|
||||
return tuple(names)
|
||||
|
||||
outbound = _parse_detector_field("outbound_detectors", OUTBOUND_DETECTOR_NAMES)
|
||||
inbound = _parse_detector_field("inbound_detectors", INBOUND_DETECTOR_NAMES)
|
||||
|
||||
for k in dlp:
|
||||
if k not in ("outbound_detectors", "inbound_detectors"):
|
||||
raise ValueError(
|
||||
f"{label}: dlp has unknown key {k!r}; accepted keys "
|
||||
f"are 'outbound_detectors', 'inbound_detectors'"
|
||||
)
|
||||
return outbound, inbound
|
||||
|
||||
|
||||
def parse_routes(payload: object) -> tuple[Route, ...]:
|
||||
if not isinstance(payload, dict):
|
||||
raise ValueError("routes payload: top-level must be an object")
|
||||
payload_dict: dict[str, object] = typing.cast(dict[str, object], payload)
|
||||
@@ -98,32 +278,24 @@ def _parse_one(idx: int, raw: object) -> Route:
|
||||
if not isinstance(host, str) or not host:
|
||||
raise ValueError(f"{label}: 'host' must be a non-empty string")
|
||||
|
||||
path_allow_raw: object = raw_dict.get("path_allowlist", [])
|
||||
if not isinstance(path_allow_raw, list):
|
||||
raise ValueError(f"{label} ({host}): 'path_allowlist' must be a list")
|
||||
path_allow_list: list[object] = typing.cast(list[object], path_allow_raw)
|
||||
prefixes: list[str] = []
|
||||
for j, p in enumerate(path_allow_list):
|
||||
if not isinstance(p, str):
|
||||
raise ValueError(
|
||||
f"{label} ({host}): path_allowlist[{j}] must be a string"
|
||||
)
|
||||
if not p.startswith("/"):
|
||||
raise ValueError(
|
||||
f"{label} ({host}): path_allowlist[{j}] {p!r} must be an "
|
||||
f"absolute path prefix starting with '/'"
|
||||
)
|
||||
prefixes.append(p)
|
||||
# matches
|
||||
matches: tuple[MatchEntry, ...] = ()
|
||||
matches_raw = raw_dict.get("matches")
|
||||
if matches_raw is not None:
|
||||
if not isinstance(matches_raw, list):
|
||||
raise ValueError(f"{label} ({host}): 'matches' must be a list")
|
||||
matches_list = typing.cast(list[object], matches_raw)
|
||||
matches = tuple(
|
||||
_parse_match_entry(idx, k, m) for k, m in enumerate(matches_list)
|
||||
)
|
||||
|
||||
# auth (unchanged wire format)
|
||||
auth_scheme: object = raw_dict.get("auth_scheme", "")
|
||||
token_env: object = raw_dict.get("token_env", "")
|
||||
if not isinstance(auth_scheme, str):
|
||||
raise ValueError(f"{label} ({host}): 'auth_scheme' must be a string")
|
||||
if not isinstance(token_env, str):
|
||||
raise ValueError(f"{label} ({host}): 'token_env' must be a string")
|
||||
# Both-or-neither: 'auth' on the manifest side renders to this
|
||||
# pair atomically. A partial pair here means the renderer or a
|
||||
# hand-edited file is broken.
|
||||
if bool(auth_scheme) != bool(token_env):
|
||||
raise ValueError(
|
||||
f"{label} ({host}): 'auth_scheme' and 'token_env' must be both "
|
||||
@@ -131,19 +303,30 @@ def _parse_one(idx: int, raw: object) -> Route:
|
||||
f"token_env={token_env!r})"
|
||||
)
|
||||
|
||||
# dlp detectors
|
||||
outbound_detectors, inbound_detectors = _parse_detectors(
|
||||
idx, host, raw_dict,
|
||||
)
|
||||
|
||||
for k in raw_dict:
|
||||
if k not in ("host", "matches", "auth_scheme", "token_env", "dlp"):
|
||||
raise ValueError(
|
||||
f"{label} ({host}): unknown key {k!r}; accepted keys "
|
||||
f"are 'host', 'matches', 'auth_scheme', 'token_env', 'dlp'"
|
||||
)
|
||||
|
||||
return Route(
|
||||
host=host,
|
||||
path_allowlist=tuple(prefixes),
|
||||
matches=matches,
|
||||
auth_scheme=auth_scheme,
|
||||
token_env=token_env,
|
||||
outbound_detectors=outbound_detectors,
|
||||
inbound_detectors=inbound_detectors,
|
||||
)
|
||||
|
||||
|
||||
def load_routes(text: str) -> tuple[Route, ...]:
|
||||
"""Parse YAML text → routes. Raises `ValueError` for both
|
||||
decode and shape errors so callers handle them uniformly.
|
||||
`YamlSubsetError` from the parser is a `ValueError` subclass so
|
||||
it already satisfies the same surface; we let it propagate."""
|
||||
"""Parse YAML text → routes."""
|
||||
try:
|
||||
payload = parse_yaml_subset(text)
|
||||
except YamlSubsetError as e:
|
||||
@@ -151,29 +334,76 @@ def load_routes(text: str) -> tuple[Route, ...]:
|
||||
return parse_routes(payload)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Match evaluation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _path_matches(pm: PathMatch, request_path: str) -> bool:
|
||||
if pm.type == "exact":
|
||||
return request_path == pm.value
|
||||
if pm.type == "prefix":
|
||||
if request_path == pm.value:
|
||||
return True
|
||||
if not pm.value.endswith("/"):
|
||||
return request_path.startswith(pm.value + "/")
|
||||
return request_path.startswith(pm.value)
|
||||
if pm.type == "regex" and pm.compiled is not None:
|
||||
return pm.compiled.search(request_path) is not None
|
||||
return False
|
||||
|
||||
|
||||
def _entry_matches(
|
||||
entry: MatchEntry,
|
||||
request_path: str,
|
||||
request_method: str,
|
||||
request_headers: typing.Mapping[str, str],
|
||||
) -> bool:
|
||||
"""All predicates within a MatchEntry are ANDed."""
|
||||
if entry.paths:
|
||||
if not any(_path_matches(pm, request_path) for pm in entry.paths):
|
||||
return False
|
||||
if entry.methods:
|
||||
if request_method.upper() not in entry.methods:
|
||||
return False
|
||||
if entry.headers:
|
||||
for hm in entry.headers:
|
||||
header_val = request_headers.get(hm.name.lower())
|
||||
if header_val is None:
|
||||
return False
|
||||
if hm.type == "exact":
|
||||
if header_val != hm.value:
|
||||
return False
|
||||
elif hm.type == "regex" and hm.compiled is not None:
|
||||
if not hm.compiled.search(header_val):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def evaluate_matches(
|
||||
route: Route,
|
||||
request_path: str,
|
||||
request_method: str = "GET",
|
||||
request_headers: typing.Mapping[str, str] | None = None,
|
||||
) -> bool:
|
||||
"""Return True if the request matches this route's match entries.
|
||||
Empty matches tuple means all requests match (bare-pass route)."""
|
||||
if not route.matches:
|
||||
return True
|
||||
hdrs: typing.Mapping[str, str] = request_headers or {}
|
||||
return any(
|
||||
_entry_matches(entry, request_path, request_method, hdrs)
|
||||
for entry in route.matches
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Git push detection (unchanged)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def is_git_push_request(path: str, query: str) -> bool:
|
||||
"""Return True if the request is a git smart-HTTP push.
|
||||
|
||||
git push over HTTPS hits two endpoints:
|
||||
GET <repo>/info/refs?service=git-receive-pack (capabilities)
|
||||
POST <repo>/git-receive-pack (the push)
|
||||
|
||||
Fetches use `service=git-upload-pack` / `/git-upload-pack` and
|
||||
are unaffected. Egress-proxy refuses HTTPS push because git-gate's
|
||||
pre-receive gitleaks scan is the gate for outbound git data;
|
||||
routing push through egress would bypass that. Use the
|
||||
bottle.git SSH path if you need to push.
|
||||
|
||||
Universal across routes — the block fires even when no
|
||||
egress route matches the host. A bare-pass route (host with
|
||||
no auth, no path_allowlist) would otherwise let push through to
|
||||
the upstream untouched.
|
||||
"""
|
||||
if path.endswith("/git-receive-pack"):
|
||||
return True
|
||||
if path.endswith("/info/refs"):
|
||||
# Query string is parsed leniently — `service=git-receive-pack`
|
||||
# may appear with other params in any order.
|
||||
for pair in query.split("&"):
|
||||
k, _, v = pair.partition("=")
|
||||
if k == "service" and v == "git-receive-pack":
|
||||
@@ -181,18 +411,14 @@ def is_git_push_request(path: str, query: str) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Route lookup + decision
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def match_route(
|
||||
routes: typing.Sequence[Route],
|
||||
request_host: str,
|
||||
) -> Route | None:
|
||||
"""Return the first route whose `host` matches `request_host`
|
||||
exactly (case-insensitive). DNS names are case-insensitive.
|
||||
|
||||
Wildcard hosts (`*.foo.com`) are NOT supported — they caused
|
||||
too many edge cases (apex match? cert validation?) for too
|
||||
little payoff. Operators that need
|
||||
multiple subdomains declare them individually (or one common
|
||||
parent host as a bare-pass route)."""
|
||||
target = request_host.lower()
|
||||
for r in routes:
|
||||
if r.host.lower() == target:
|
||||
@@ -205,23 +431,9 @@ def decide(
|
||||
request_host: str,
|
||||
request_path: str,
|
||||
environ: typing.Mapping[str, str],
|
||||
request_method: str = "GET",
|
||||
request_headers: typing.Mapping[str, str] | None = None,
|
||||
) -> Decision:
|
||||
"""Pure decision: given a route table + request host + path + env,
|
||||
return what the addon should do with the request.
|
||||
|
||||
- No matching route → BLOCK. The route table is the bottle's
|
||||
egress allowlist. A bottle that wants a
|
||||
host reachable from the agent must declare a route for it
|
||||
(bare-pass route — no `auth`, no `path_allowlist` — is fine
|
||||
for hosts that just need passthrough).
|
||||
- Matching route with `path_allowlist` set, request path doesn't
|
||||
start with any of the allowed prefixes → block with a clear
|
||||
reason.
|
||||
- Matching route with an auth pair → forward + inject
|
||||
Authorization. Token comes from `environ[route.token_env]`;
|
||||
missing/empty values block (route declared auth but the secret
|
||||
isn't here — operator misconfig).
|
||||
"""
|
||||
route = match_route(routes, request_host)
|
||||
if route is None:
|
||||
return Decision(
|
||||
@@ -233,15 +445,15 @@ def decide(
|
||||
),
|
||||
)
|
||||
|
||||
if route.path_allowlist:
|
||||
if not any(request_path.startswith(p) for p in route.path_allowlist):
|
||||
return Decision(
|
||||
action="block",
|
||||
reason=(
|
||||
f"egress: path {request_path!r} not in "
|
||||
f"path_allowlist for {route.host!r}"
|
||||
),
|
||||
)
|
||||
if not evaluate_matches(route, request_path, request_method, request_headers):
|
||||
return Decision(
|
||||
action="block",
|
||||
reason=(
|
||||
f"egress: request {request_method} {request_path!r} "
|
||||
f"does not match any entry in matches for "
|
||||
f"{route.host!r}"
|
||||
),
|
||||
)
|
||||
|
||||
if route.auth_scheme and route.token_env:
|
||||
token = environ.get(route.token_env, "")
|
||||
@@ -261,12 +473,80 @@ def decide(
|
||||
return Decision(action="forward")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# DLP scan dispatch (PRD 0053)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _detector_enabled(
|
||||
configured: tuple[str, ...] | None,
|
||||
name: str,
|
||||
) -> bool:
|
||||
"""Check if a named detector is enabled for a route direction.
|
||||
None means all enabled; empty tuple means all disabled."""
|
||||
if configured is None:
|
||||
return True
|
||||
return name in configured
|
||||
|
||||
|
||||
def scan_outbound(
|
||||
route: Route,
|
||||
body: str | bytes,
|
||||
environ: typing.Mapping[str, str],
|
||||
) -> ScanResult | None:
|
||||
# Lazy import to avoid circular deps and keep dlp_detectors optional
|
||||
# at import time (the sidecar copies it flat alongside this file).
|
||||
try:
|
||||
from dlp_detectors import scan_token_patterns, scan_known_secrets # type: ignore[import-not-found]
|
||||
except ImportError: # pragma: no cover - host-side path
|
||||
from .dlp_detectors import scan_token_patterns, scan_known_secrets # type: ignore[import-not-found]
|
||||
|
||||
text = body if isinstance(body, str) else body.decode("utf-8", errors="replace")
|
||||
|
||||
if _detector_enabled(route.outbound_detectors, "token_patterns"):
|
||||
result = scan_token_patterns(text)
|
||||
if result is not None:
|
||||
return result
|
||||
|
||||
if _detector_enabled(route.outbound_detectors, "known_secrets"):
|
||||
result = scan_known_secrets(text, env=environ)
|
||||
if result is not None:
|
||||
return result
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def scan_inbound(
|
||||
route: Route,
|
||||
body: str | bytes,
|
||||
) -> ScanResult | None:
|
||||
try:
|
||||
from dlp_detectors import scan_naive_injection # type: ignore[import-not-found]
|
||||
except ImportError: # pragma: no cover - host-side path
|
||||
from .dlp_detectors import scan_naive_injection # type: ignore[import-not-found]
|
||||
|
||||
text = body if isinstance(body, str) else body.decode("utf-8", errors="replace")
|
||||
|
||||
if _detector_enabled(route.inbound_detectors, "naive_injection_detection"):
|
||||
result = scan_naive_injection(text)
|
||||
if result is not None:
|
||||
return result
|
||||
|
||||
return None
|
||||
|
||||
|
||||
__all__ = [
|
||||
"Decision",
|
||||
"HeaderMatch",
|
||||
"MatchEntry",
|
||||
"PathMatch",
|
||||
"Route",
|
||||
"ScanResult",
|
||||
"decide",
|
||||
"evaluate_matches",
|
||||
"is_git_push_request",
|
||||
"load_routes",
|
||||
"match_route",
|
||||
"parse_routes",
|
||||
"scan_inbound",
|
||||
"scan_outbound",
|
||||
]
|
||||
|
||||
@@ -18,7 +18,7 @@ Bottle schema (frontmatter):
|
||||
user: { name: <str>, email: <str> } # optional
|
||||
repos: { <name>: <git-gate-entry>, ... } # optional
|
||||
egress: { routes: [ <egress-route>, ... ] }
|
||||
# route keys: host, path_allowlist, auth, role
|
||||
# route keys: host, matches, auth, role, dlp
|
||||
supervise: <bool> # optional
|
||||
|
||||
Agent schema (frontmatter):
|
||||
|
||||
+228
-68
@@ -1,32 +1,31 @@
|
||||
"""Egress routing manifest dataclasses and helpers."""
|
||||
"""Egress routing manifest dataclasses and helpers (PRD 0017, PRD 0053)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from typing import cast
|
||||
|
||||
from .manifest_util import ManifestError, as_json_object
|
||||
|
||||
|
||||
# Auth schemes for the egress route's optional `auth` block.
|
||||
# Same values cred-proxy accepts today; `token` sidesteps the Gitea
|
||||
# token-not-Bearer quirk (go-gitea/gitea#16734).
|
||||
EGRESS_AUTH_SCHEMES = ("Bearer", "token")
|
||||
|
||||
PATH_MATCH_TYPES = ("exact", "prefix", "regex")
|
||||
HEADER_MATCH_TYPES = ("exact", "regex")
|
||||
|
||||
VALID_METHODS = frozenset({
|
||||
"GET", "HEAD", "POST", "PUT", "DELETE", "PATCH", "OPTIONS", "TRACE",
|
||||
"CONNECT",
|
||||
})
|
||||
|
||||
OUTBOUND_DETECTOR_NAMES = frozenset({"token_patterns", "known_secrets"})
|
||||
INBOUND_DETECTOR_NAMES = frozenset({"naive_injection_detection"})
|
||||
|
||||
|
||||
def validate_egress_routes(
|
||||
bottle_name: str,
|
||||
routes: tuple[EgressRoute, ...],
|
||||
) -> None:
|
||||
"""Cross-validation for `bottle.egress.routes`: hosts must be unique.
|
||||
|
||||
The proxy matches by exact-host (v1); duplicate hosts leave the
|
||||
route choice ambiguous so we reject them up front.
|
||||
|
||||
No cross-validation against `bottle.git-gate.repos` is performed.
|
||||
git-gate (SSH push/fetch) and egress (HTTPS) broker different
|
||||
protocols; declaring both for the same host is a legitimate dev
|
||||
setup."""
|
||||
seen_hosts: dict[str, None] = {}
|
||||
for r in routes:
|
||||
key = r.Host.lower()
|
||||
@@ -38,37 +37,35 @@ def validate_egress_routes(
|
||||
seen_hosts[key] = None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class PathMatch:
|
||||
Type: str = "prefix"
|
||||
Value: str = ""
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class HeaderMatch:
|
||||
Name: str = ""
|
||||
Value: str = ""
|
||||
Type: str = "exact"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class MatchEntry:
|
||||
Paths: tuple[PathMatch, ...] = ()
|
||||
Methods: tuple[str, ...] = ()
|
||||
Headers: tuple[HeaderMatch, ...] = ()
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class EgressRoute:
|
||||
"""One route on the per-bottle egress sidecar (PRD 0017).
|
||||
|
||||
`Host` matches the request's hostname (case-insensitive). The
|
||||
optional `PathAllowlist` constrains the URL path to a set of
|
||||
prefixes; empty tuple means no path-level filtering. The optional
|
||||
`AuthScheme` / `TokenRef` pair drives credential injection:
|
||||
when set, the proxy strips any inbound Authorization and injects
|
||||
`<AuthScheme> <value-of-host-env-named-by-TokenRef>`. When the
|
||||
manifest's `auth` block is omitted both fields are empty strings —
|
||||
no Authorization is written, no token forwarded.
|
||||
|
||||
`Role` is reserved for future use; all role strings are currently
|
||||
rejected by the validator.
|
||||
|
||||
Validation rules (enforced in `from_dict`):
|
||||
- `host` required, non-empty.
|
||||
- `path_allowlist` optional, list of absolute path prefixes.
|
||||
- `auth` optional. If present, MUST carry both `scheme` and
|
||||
`token_ref` as non-empty strings; an empty `auth: {}` is an
|
||||
error rather than a synonym for "no auth" (omit `auth` for
|
||||
that case).
|
||||
- `role` optional, reserved — any non-empty value is rejected.
|
||||
"""
|
||||
|
||||
Host: str
|
||||
PathAllowlist: tuple[str, ...] = ()
|
||||
Matches: tuple[MatchEntry, ...] = ()
|
||||
AuthScheme: str = ""
|
||||
TokenRef: str = ""
|
||||
Role: tuple[str, ...] = ()
|
||||
OutboundDetectors: tuple[str, ...] | None = None
|
||||
InboundDetectors: tuple[str, ...] | None = None
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, bottle_name: str, idx: int, raw: object) -> "EgressRoute":
|
||||
@@ -78,30 +75,24 @@ class EgressRoute:
|
||||
if not isinstance(host, str) or not host:
|
||||
raise ManifestError(f"{label} missing required string field 'host'")
|
||||
|
||||
path_allow_raw = d.get("path_allowlist")
|
||||
prefixes: tuple[str, ...] = ()
|
||||
if path_allow_raw is not None:
|
||||
if not isinstance(path_allow_raw, list):
|
||||
# --- matches ---
|
||||
matches: tuple[MatchEntry, ...] = ()
|
||||
matches_raw = d.get("matches")
|
||||
if matches_raw is not None:
|
||||
if not isinstance(matches_raw, list):
|
||||
raise ManifestError(
|
||||
f"{label} path_allowlist must be an array "
|
||||
f"(was {type(path_allow_raw).__name__})"
|
||||
f"{label} matches must be an array "
|
||||
f"(was {type(matches_raw).__name__})"
|
||||
)
|
||||
path_list = cast(list[object], path_allow_raw)
|
||||
collected: list[str] = []
|
||||
for j, p in enumerate(path_list):
|
||||
if not isinstance(p, str):
|
||||
raise ManifestError(
|
||||
f"{label} path_allowlist[{j}] must be a string "
|
||||
f"(was {type(p).__name__})"
|
||||
)
|
||||
if not p.startswith("/"):
|
||||
raise ManifestError(
|
||||
f"{label} path_allowlist[{j}] {p!r} must be an "
|
||||
f"absolute path prefix starting with '/'"
|
||||
)
|
||||
collected.append(p)
|
||||
prefixes = tuple(collected)
|
||||
matches_list = cast(list[object], matches_raw)
|
||||
entries: list[MatchEntry] = []
|
||||
for k, entry_raw in enumerate(matches_list):
|
||||
entries.append(
|
||||
_parse_match_entry(label, k, entry_raw)
|
||||
)
|
||||
matches = tuple(entries)
|
||||
|
||||
# --- auth ---
|
||||
auth_scheme = ""
|
||||
token_ref = ""
|
||||
if "auth" in d:
|
||||
@@ -139,6 +130,7 @@ class EgressRoute:
|
||||
auth_scheme = auth_scheme_raw
|
||||
token_ref = token_ref_raw
|
||||
|
||||
# --- role (reserved) ---
|
||||
role_raw = d.get("role")
|
||||
roles: tuple[str, ...] = ()
|
||||
if role_raw is None:
|
||||
@@ -165,29 +157,197 @@ class EgressRoute:
|
||||
f"the 'role' field is reserved for future use"
|
||||
)
|
||||
|
||||
# --- dlp ---
|
||||
outbound_detectors: tuple[str, ...] | None = None
|
||||
inbound_detectors: tuple[str, ...] | None = None
|
||||
if "dlp" in d:
|
||||
outbound_detectors, inbound_detectors = _parse_dlp_block(
|
||||
label, d.get("dlp"),
|
||||
)
|
||||
|
||||
for k in d:
|
||||
if k not in ("host", "path_allowlist", "auth", "role"):
|
||||
if k not in ("host", "matches", "auth", "role", "dlp"):
|
||||
raise ManifestError(
|
||||
f"{label} has unknown key {k!r}; accepted keys are "
|
||||
f"'host', 'path_allowlist', 'auth', 'role'"
|
||||
f"'host', 'matches', 'auth', 'role', 'dlp'"
|
||||
)
|
||||
|
||||
return cls(
|
||||
Host=host,
|
||||
PathAllowlist=prefixes,
|
||||
Matches=matches,
|
||||
AuthScheme=auth_scheme,
|
||||
TokenRef=token_ref,
|
||||
Role=roles,
|
||||
OutboundDetectors=outbound_detectors,
|
||||
InboundDetectors=inbound_detectors,
|
||||
)
|
||||
|
||||
|
||||
def _parse_match_entry(
|
||||
route_label: str, k: int, raw: object,
|
||||
) -> MatchEntry:
|
||||
label = f"{route_label} matches[{k}]"
|
||||
d = as_json_object(raw, label)
|
||||
|
||||
paths: tuple[PathMatch, ...] = ()
|
||||
paths_raw = d.get("paths")
|
||||
if paths_raw is not None:
|
||||
if not isinstance(paths_raw, list):
|
||||
raise ManifestError(f"{label} paths must be an array")
|
||||
paths_list = cast(list[object], paths_raw)
|
||||
parsed_paths: list[PathMatch] = []
|
||||
for j, p_raw in enumerate(paths_list):
|
||||
parsed_paths.append(_parse_path_match(label, j, p_raw))
|
||||
paths = tuple(parsed_paths)
|
||||
|
||||
methods: tuple[str, ...] = ()
|
||||
methods_raw = d.get("methods")
|
||||
if methods_raw is not None:
|
||||
if not isinstance(methods_raw, list):
|
||||
raise ManifestError(f"{label} methods must be an array")
|
||||
methods_list = cast(list[object], methods_raw)
|
||||
normalised: list[str] = []
|
||||
for j, m in enumerate(methods_list):
|
||||
if not isinstance(m, str):
|
||||
raise ManifestError(
|
||||
f"{label} methods[{j}] must be a string"
|
||||
)
|
||||
upper = m.upper()
|
||||
if upper not in VALID_METHODS:
|
||||
raise ManifestError(
|
||||
f"{label} methods[{j}] {m!r} is not a valid HTTP method"
|
||||
)
|
||||
normalised.append(upper)
|
||||
methods = tuple(normalised)
|
||||
|
||||
headers: tuple[HeaderMatch, ...] = ()
|
||||
headers_raw = d.get("headers")
|
||||
if headers_raw is not None:
|
||||
if not isinstance(headers_raw, list):
|
||||
raise ManifestError(f"{label} headers must be an array")
|
||||
headers_list = cast(list[object], headers_raw)
|
||||
parsed_headers: list[HeaderMatch] = []
|
||||
for j, h_raw in enumerate(headers_list):
|
||||
parsed_headers.append(_parse_header_match(label, j, h_raw))
|
||||
headers = tuple(parsed_headers)
|
||||
|
||||
for key in d:
|
||||
if key not in ("paths", "methods", "headers"):
|
||||
raise ManifestError(f"{label} has unknown key {key!r}")
|
||||
|
||||
return MatchEntry(Paths=paths, Methods=methods, Headers=headers)
|
||||
|
||||
|
||||
def _parse_path_match(
|
||||
entry_label: str, j: int, raw: object,
|
||||
) -> PathMatch:
|
||||
label = f"{entry_label} paths[{j}]"
|
||||
d = as_json_object(raw, label)
|
||||
ptype = d.get("type", "prefix")
|
||||
if not isinstance(ptype, str) or ptype not in PATH_MATCH_TYPES:
|
||||
raise ManifestError(
|
||||
f"{label} type must be one of {', '.join(PATH_MATCH_TYPES)} "
|
||||
f"(got {ptype!r})"
|
||||
)
|
||||
value = d.get("value")
|
||||
if not isinstance(value, str) or not value:
|
||||
raise ManifestError(f"{label} value must be a non-empty string")
|
||||
if ptype in ("exact", "prefix") and not value.startswith("/"):
|
||||
raise ManifestError(
|
||||
f"{label} value {value!r} must start with '/' for type {ptype!r}"
|
||||
)
|
||||
if ptype == "regex":
|
||||
try:
|
||||
re.compile(value)
|
||||
except re.error as e:
|
||||
raise ManifestError(
|
||||
f"{label} regex {value!r} failed to compile: {e}"
|
||||
) from e
|
||||
for k in d:
|
||||
if k not in ("type", "value"):
|
||||
raise ManifestError(f"{label} has unknown key {k!r}")
|
||||
return PathMatch(Type=ptype, Value=value)
|
||||
|
||||
|
||||
def _parse_header_match(
|
||||
entry_label: str, j: int, raw: object,
|
||||
) -> HeaderMatch:
|
||||
label = f"{entry_label} headers[{j}]"
|
||||
d = as_json_object(raw, label)
|
||||
name = d.get("name")
|
||||
if not isinstance(name, str) or not name:
|
||||
raise ManifestError(f"{label} name must be a non-empty string")
|
||||
value = d.get("value")
|
||||
if not isinstance(value, str):
|
||||
raise ManifestError(f"{label} value must be a string")
|
||||
htype = d.get("type", "exact")
|
||||
if not isinstance(htype, str) or htype not in HEADER_MATCH_TYPES:
|
||||
raise ManifestError(
|
||||
f"{label} type must be one of {', '.join(HEADER_MATCH_TYPES)} "
|
||||
f"(got {htype!r})"
|
||||
)
|
||||
if htype == "regex":
|
||||
try:
|
||||
re.compile(value)
|
||||
except re.error as e:
|
||||
raise ManifestError(
|
||||
f"{label} regex {value!r} failed to compile: {e}"
|
||||
) from e
|
||||
for k in d:
|
||||
if k not in ("name", "value", "type"):
|
||||
raise ManifestError(f"{label} has unknown key {k!r}")
|
||||
return HeaderMatch(Name=name, Value=value, Type=htype)
|
||||
|
||||
|
||||
def _parse_dlp_block(
|
||||
route_label: str,
|
||||
raw: object,
|
||||
) -> tuple[tuple[str, ...] | None, tuple[str, ...] | None]:
|
||||
label = f"{route_label} dlp"
|
||||
d = as_json_object(raw, label)
|
||||
|
||||
def _parse_field(
|
||||
field: str,
|
||||
valid_names: frozenset[str],
|
||||
) -> tuple[str, ...] | None:
|
||||
val = d.get(field)
|
||||
if val is None:
|
||||
return None
|
||||
if val is False:
|
||||
return ()
|
||||
if not isinstance(val, list):
|
||||
raise ManifestError(
|
||||
f"{label} {field} must be false, a list, or omitted"
|
||||
)
|
||||
items = cast(list[object], val)
|
||||
names: list[str] = []
|
||||
for j, item in enumerate(items):
|
||||
if not isinstance(item, str):
|
||||
raise ManifestError(
|
||||
f"{label} {field}[{j}] must be a string"
|
||||
)
|
||||
if item not in valid_names:
|
||||
raise ManifestError(
|
||||
f"{label} {field}[{j}] {item!r} is not a valid "
|
||||
f"detector; valid: {', '.join(sorted(valid_names))}"
|
||||
)
|
||||
names.append(item)
|
||||
return tuple(names)
|
||||
|
||||
outbound = _parse_field("outbound_detectors", OUTBOUND_DETECTOR_NAMES)
|
||||
inbound = _parse_field("inbound_detectors", INBOUND_DETECTOR_NAMES)
|
||||
|
||||
for k in d:
|
||||
if k not in ("outbound_detectors", "inbound_detectors"):
|
||||
raise ManifestError(
|
||||
f"{label} has unknown key {k!r}; accepted keys are "
|
||||
f"'outbound_detectors', 'inbound_detectors'"
|
||||
)
|
||||
return outbound, inbound
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class EgressConfig:
|
||||
"""Per-bottle egress configuration. Today this is just the
|
||||
route table; the nesting under `egress:` leaves room for
|
||||
per-bottle proxy settings (port override, log level, etc.) in
|
||||
follow-ups."""
|
||||
|
||||
routes: tuple[EgressRoute, ...] = ()
|
||||
|
||||
@classmethod
|
||||
|
||||
@@ -137,21 +137,18 @@ TOOL_DEFINITIONS: list[dict[str, object]] = [
|
||||
"name": _sv.TOOL_EGRESS_BLOCK,
|
||||
"description": (
|
||||
"Call when egress refused your HTTPS request — host "
|
||||
"without a matching route, or a path outside the route's "
|
||||
"path_allowlist (typically a 403 from the proxy). Propose "
|
||||
"a SINGLE route to add: the host you need + (optionally) "
|
||||
"a path_allowlist + (optionally) an auth block. The "
|
||||
"supervisor merges the route into the live table at "
|
||||
"approval time — you do NOT need to see or reproduce the "
|
||||
"existing routes, and you do not pass a full routes file. "
|
||||
"If the host already has a route, the proposed "
|
||||
"path_allowlist entries are unioned with the existing "
|
||||
"ones (host stays single-route). The operator approves "
|
||||
"or rejects in the supervise TUI. On approval the "
|
||||
"supervisor writes the merged routes.yaml, SIGHUPs "
|
||||
"egress (atomic swap, no dropped connections), and "
|
||||
"writes the merged routes.yaml and SIGHUPs egress "
|
||||
"(atomic swap, no dropped connections)."
|
||||
"without a matching route, or a request that did not match "
|
||||
"the route's matches rules (typically a 403 from the "
|
||||
"proxy). Propose a SINGLE route to add: the host you "
|
||||
"need + (optionally) a path_allowlist of path prefixes + "
|
||||
"(optionally) an auth block. The supervisor merges the "
|
||||
"route into the live table at approval time — you do NOT "
|
||||
"need to see or reproduce the existing routes. If the "
|
||||
"host already has a route, the proposed paths are unioned "
|
||||
"with the existing ones (host stays single-route). The "
|
||||
"operator approves or rejects in the supervise TUI. On "
|
||||
"approval the supervisor writes the merged routes.yaml "
|
||||
"and SIGHUPs egress (no dropped connections)."
|
||||
),
|
||||
"inputSchema": {
|
||||
"type": "object",
|
||||
@@ -169,7 +166,8 @@ TOOL_DEFINITIONS: list[dict[str, object]] = [
|
||||
"description": (
|
||||
"Optional URL path prefixes the route permits. "
|
||||
"Each must start with '/'. Omit to allow all "
|
||||
"paths under this host (bare-pass route)."
|
||||
"paths under this host (bare-pass route). "
|
||||
"Internally converted to matches entries."
|
||||
),
|
||||
},
|
||||
"auth": {
|
||||
@@ -203,7 +201,7 @@ TOOL_DEFINITIONS: list[dict[str, object]] = [
|
||||
"description": (
|
||||
"List the current egress route table — the bottle's "
|
||||
"allowlist. Returns JSON with one entry per allowed host, "
|
||||
"each carrying its path_allowlist (if any) and whether "
|
||||
"each carrying its matches rules (if any) and whether "
|
||||
"the proxy injects Authorization for the route. Use this "
|
||||
"before composing an `egress-block` proposal so the new "
|
||||
"routes file extends the live one rather than replacing it."
|
||||
|
||||
Reference in New Issue
Block a user