feat(egress-proxy): retarget remediation flow (PRD 0017 chunk 3) #30
@@ -90,34 +90,20 @@ def _hosts_in_routes(content: str) -> list[str]:
|
|||||||
|
|
||||||
|
|
||||||
# Pipelock's allowlist parser accepts only literal hostnames:
|
# Pipelock's allowlist parser accepts only literal hostnames:
|
||||||
# `[A-Za-z0-9_.-]+`. Wildcard hosts (e.g. `*.example.com`) that
|
# `[A-Za-z0-9_.-]+`. Anything else (wildcards, IPv6 literals,
|
||||||
# egress-proxy's route table accepts get normalised here by
|
# stray characters) is silently dropped from the mirror so the
|
||||||
# stripping the leading `*.` (so `*.example.com` → `example.com`)
|
# pipelock apply doesn't fail parse before the new yaml is even
|
||||||
# — egress-proxy retains the wildcard for its own host matching,
|
# written. The dropped hosts stay on egress-proxy's route table —
|
||||||
# and pipelock's allowlist gets the suffix, which still permits
|
# but the addon does exact-host match only, so they'll never
|
||||||
# the wildcard-matched upstream connections without expanding to
|
# match anything either. (Wildcard host matching was removed —
|
||||||
# arbitrary subdomains. Hosts that still don't fit the pipelock
|
# see `match_route` in egress_proxy_addon_core for the rationale.)
|
||||||
# charset after normalisation (bare `*`, IPv6 literals, weird
|
|
||||||
# chars) are silently skipped.
|
|
||||||
_PIPELOCK_HOST_RE = re.compile(r"^[A-Za-z0-9_.-]+$")
|
_PIPELOCK_HOST_RE = re.compile(r"^[A-Za-z0-9_.-]+$")
|
||||||
|
|
||||||
|
|
||||||
def _pipelock_safe_hosts(hosts: list[str]) -> list[str]:
|
def _pipelock_safe_hosts(hosts: list[str]) -> list[str]:
|
||||||
"""Normalise hosts for pipelock's allowlist: strip leading
|
"""Drop any host pipelock's allowlist parser would reject.
|
||||||
`*.` from wildcards, drop anything that still doesn't match
|
Order preserved."""
|
||||||
pipelock's allowed charset. Order preserved; duplicates that
|
return [h for h in hosts if _PIPELOCK_HOST_RE.match(h)]
|
||||||
arise from normalisation are de-duped (first-seen wins)."""
|
|
||||||
out: list[str] = []
|
|
||||||
seen: set[str] = set()
|
|
||||||
for h in hosts:
|
|
||||||
candidate = h[2:] if h.startswith("*.") else h
|
|
||||||
if not candidate or not _PIPELOCK_HOST_RE.match(candidate):
|
|
||||||
continue
|
|
||||||
if candidate in seen:
|
|
||||||
continue
|
|
||||||
seen.add(candidate)
|
|
||||||
out.append(candidate)
|
|
||||||
return out
|
|
||||||
|
|
||||||
|
|
||||||
def _mirror_hosts_to_pipelock(slug: str, hosts: list[str]) -> None:
|
def _mirror_hosts_to_pipelock(slug: str, hosts: list[str]) -> None:
|
||||||
|
|||||||
@@ -169,37 +169,17 @@ def match_route(
|
|||||||
routes: typing.Sequence[Route],
|
routes: typing.Sequence[Route],
|
||||||
request_host: str,
|
request_host: str,
|
||||||
) -> Route | None:
|
) -> Route | None:
|
||||||
"""Return the route whose `host` matches `request_host`.
|
"""Return the first route whose `host` matches `request_host`
|
||||||
|
exactly (case-insensitive). DNS names are case-insensitive.
|
||||||
|
|
||||||
Match precedence:
|
Wildcard hosts (`*.foo.com`) are NOT supported — they caused
|
||||||
1. Exact (case-insensitive) match on the literal hostname.
|
too many edge cases (apex match? cert validation? pipelock
|
||||||
2. Wildcard match: a route whose host starts with `*.` is a
|
mirror mismatch?) for too little payoff. Operators that need
|
||||||
suffix pattern that covers the apex AND every subdomain.
|
multiple subdomains declare them individually (or one common
|
||||||
`*.example.com` matches `example.com`, `foo.example.com`,
|
parent host as a bare-pass route)."""
|
||||||
and `a.b.example.com`, but NOT `barexample.com` (the
|
|
||||||
label boundary `.` is required when matching a
|
|
||||||
subdomain). This is intentionally more permissive than
|
|
||||||
RFC 6125 TLS-wildcard semantics — an allowlist's natural
|
|
||||||
reading of `*.example.com` is "all of example.com",
|
|
||||||
apex included, and matches what the pipelock mirror does
|
|
||||||
(strips `*.example.com` → `example.com`).
|
|
||||||
|
|
||||||
Exact match wins over wildcard so an operator can declare a
|
|
||||||
specific route on top of a broader wildcard (e.g. a
|
|
||||||
`*.github.com` bare-pass + an `api.github.com` route with
|
|
||||||
auth). DNS names are case-insensitive."""
|
|
||||||
target = request_host.lower()
|
target = request_host.lower()
|
||||||
# Pass 1: exact, literal hostname match.
|
|
||||||
for r in routes:
|
for r in routes:
|
||||||
host = r.host.lower()
|
if r.host.lower() == target:
|
||||||
if not host.startswith("*.") and host == target:
|
|
||||||
return r
|
|
||||||
# Pass 2: wildcard match — apex + every subdomain.
|
|
||||||
for r in routes:
|
|
||||||
host = r.host.lower()
|
|
||||||
if host.startswith("*."):
|
|
||||||
suffix = host[2:] # strip the `*.`
|
|
||||||
if target == suffix or target.endswith("." + suffix):
|
|
||||||
return r
|
return r
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|||||||
@@ -147,67 +147,13 @@ class TestMatchRoute(unittest.TestCase):
|
|||||||
# other-host shouldn't be matched via a "ends with" check.
|
# other-host shouldn't be matched via a "ends with" check.
|
||||||
self.assertIsNone(match_route(self.ROUTES, "evil.api.github.com"))
|
self.assertIsNone(match_route(self.ROUTES, "evil.api.github.com"))
|
||||||
|
|
||||||
|
def test_wildcard_hosts_not_supported(self):
|
||||||
class TestMatchRouteWildcards(unittest.TestCase):
|
# `*.example.com` is treated as a literal host string by
|
||||||
"""Wildcard host patterns: `*.foo.com` matches any host that
|
# the exact-only matcher. Removed from the design after
|
||||||
ends with `.foo.com` (subdomains, one level or more)."""
|
# the apex/RFC-6125/pipelock-mirror edge cases stacked up.
|
||||||
|
|
||||||
def test_wildcard_matches_direct_subdomain(self):
|
|
||||||
routes = (Route(host="*.example.com"),)
|
routes = (Route(host="*.example.com"),)
|
||||||
r = match_route(routes, "foo.example.com")
|
self.assertIsNone(match_route(routes, "foo.example.com"))
|
||||||
self.assertIsNotNone(r)
|
self.assertIsNone(match_route(routes, "example.com"))
|
||||||
self.assertEqual("*.example.com", r.host)
|
|
||||||
|
|
||||||
def test_wildcard_matches_nested_subdomain(self):
|
|
||||||
routes = (Route(host="*.example.com"),)
|
|
||||||
self.assertIsNotNone(match_route(routes, "a.b.example.com"))
|
|
||||||
|
|
||||||
def test_wildcard_matches_apex(self):
|
|
||||||
# Allowlist semantics: `*.example.com` covers
|
|
||||||
# `example.com` itself + every subdomain. Matches what
|
|
||||||
# the pipelock mirror does (strips `*.example.com` →
|
|
||||||
# `example.com`) so the two layers agree.
|
|
||||||
routes = (Route(host="*.example.com"),)
|
|
||||||
self.assertIsNotNone(match_route(routes, "example.com"))
|
|
||||||
|
|
||||||
def test_wildcard_does_not_match_overlapping_suffix(self):
|
|
||||||
# `*.example.com` shouldn't match `barexample.com` — the
|
|
||||||
# match requires `.` before the suffix.
|
|
||||||
routes = (Route(host="*.example.com"),)
|
|
||||||
self.assertIsNone(match_route(routes, "barexample.com"))
|
|
||||||
|
|
||||||
def test_wildcard_case_insensitive(self):
|
|
||||||
routes = (Route(host="*.example.com"),)
|
|
||||||
self.assertIsNotNone(match_route(routes, "FOO.Example.COM"))
|
|
||||||
|
|
||||||
def test_exact_match_wins_over_wildcard(self):
|
|
||||||
# A specific route declared alongside a broader wildcard
|
|
||||||
# should take precedence — operators stack a per-host
|
|
||||||
# config on top of a permissive wildcard this way.
|
|
||||||
routes = (
|
|
||||||
Route(host="*.github.com"),
|
|
||||||
Route(host="api.github.com", auth_scheme="Bearer",
|
|
||||||
token_env="EGRESS_PROXY_TOKEN_0"),
|
|
||||||
)
|
|
||||||
r = match_route(routes, "api.github.com")
|
|
||||||
self.assertIsNotNone(r)
|
|
||||||
self.assertEqual("api.github.com", r.host)
|
|
||||||
self.assertEqual("Bearer", r.auth_scheme)
|
|
||||||
|
|
||||||
def test_exact_wins_regardless_of_route_order(self):
|
|
||||||
# Same as above but with wildcard declared AFTER exact —
|
|
||||||
# exact wins because pass 1 finds it before pass 2 runs.
|
|
||||||
routes = (
|
|
||||||
Route(host="api.github.com", auth_scheme="Bearer",
|
|
||||||
token_env="EGRESS_PROXY_TOKEN_0"),
|
|
||||||
Route(host="*.github.com"),
|
|
||||||
)
|
|
||||||
r = match_route(routes, "api.github.com")
|
|
||||||
self.assertEqual("api.github.com", r.host)
|
|
||||||
|
|
||||||
def test_no_match_falls_through(self):
|
|
||||||
routes = (Route(host="*.example.com"),)
|
|
||||||
self.assertIsNone(match_route(routes, "elsewhere.org"))
|
|
||||||
|
|
||||||
|
|
||||||
# --- decide --------------------------------------------------------------
|
# --- decide --------------------------------------------------------------
|
||||||
|
|||||||
@@ -197,45 +197,30 @@ class TestPipelockSafeHosts(unittest.TestCase):
|
|||||||
_pipelock_safe_hosts(["api.github.com", "registry.npmjs.org"]),
|
_pipelock_safe_hosts(["api.github.com", "registry.npmjs.org"]),
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_strips_wildcard_prefix(self):
|
def test_drops_wildcards(self):
|
||||||
# `*.example.com` becomes `example.com` — pipelock pins the
|
# Wildcard host matching was removed from egress-proxy too,
|
||||||
# suffix, egress-proxy keeps the wildcard on its side.
|
# so a `*.foo.com` route is dead weight anyway; we drop it
|
||||||
|
# entirely from the pipelock mirror so the apply doesn't
|
||||||
|
# fail parse.
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
["example.com", "api.github.com"],
|
["api.github.com"],
|
||||||
_pipelock_safe_hosts(["*.example.com", "api.github.com"]),
|
_pipelock_safe_hosts(["*.example.com", "api.github.com"]),
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_wildcard_strips_one_label_not_recursive(self):
|
|
||||||
# `*.foo.bar.com` → `foo.bar.com` (one strip of `*.`).
|
|
||||||
self.assertEqual(
|
|
||||||
["foo.bar.com"],
|
|
||||||
_pipelock_safe_hosts(["*.foo.bar.com"]),
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_drops_bare_wildcard(self):
|
def test_drops_bare_wildcard(self):
|
||||||
# `*` alone would normalise to empty; nothing useful to send
|
|
||||||
# to pipelock.
|
|
||||||
self.assertEqual([], _pipelock_safe_hosts(["*"]))
|
self.assertEqual([], _pipelock_safe_hosts(["*"]))
|
||||||
|
|
||||||
def test_strips_ipv6_literals(self):
|
def test_drops_ipv6_literals(self):
|
||||||
# Brackets aren't in pipelock's allowed charset either.
|
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
["api.example.com"],
|
["api.example.com"],
|
||||||
_pipelock_safe_hosts(["[::1]", "api.example.com"]),
|
_pipelock_safe_hosts(["[::1]", "api.example.com"]),
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_dedupes_after_normalisation(self):
|
|
||||||
# `*.example.com` + `example.com` both yield `example.com`.
|
|
||||||
self.assertEqual(
|
|
||||||
["example.com"],
|
|
||||||
_pipelock_safe_hosts(["*.example.com", "example.com"]),
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_preserves_order(self):
|
def test_preserves_order(self):
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
["a.example", "b.example", "c.example"],
|
["a.example", "b.example", "c.example"],
|
||||||
_pipelock_safe_hosts([
|
_pipelock_safe_hosts([
|
||||||
"a.example", "weird host", "b.example", "*", "c.example",
|
"a.example", "*.junk", "b.example", "weird host", "c.example",
|
||||||
]),
|
]),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user