revert(egress-proxy): drop wildcard host support entirely
test / unit (pull_request) Successful in 17s
test / integration (pull_request) Successful in 1m3s

The apex-vs-subdomain question, the cert/SNI mismatch when
pipelock-passthrough hosts have wildcard certs, and the
mirror-divergence corner cases stacked up faster than the feature
earned its keep. Going back to exact-host match only.

Addon (`match_route`): single pass, case-insensitive exact match.
`*.foo.com` in a route table is now a literal string that won't
match anything — operators that want subdomains declare them
individually.

Pipelock mirror (`_pipelock_safe_hosts`): silently drops hosts
that don't fit pipelock's `[A-Za-z0-9_.-]+` charset (wildcards,
IPv6 literals, stray chars). Previously normalised wildcards to
their suffix; now just drops them, which matches egress-proxy's
behavior of not matching them either.

8 wildcard test cases removed; 2 lightweight "wildcards are not
supported" assertions retained as documentation. 386 unit pass.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-25 19:48:35 -04:00
parent 6177c0518e
commit 6c886200d9
4 changed files with 32 additions and 135 deletions
@@ -90,34 +90,20 @@ def _hosts_in_routes(content: str) -> list[str]:
# Pipelock's allowlist parser accepts only literal hostnames: # Pipelock's allowlist parser accepts only literal hostnames:
# `[A-Za-z0-9_.-]+`. Wildcard hosts (e.g. `*.example.com`) that # `[A-Za-z0-9_.-]+`. Anything else (wildcards, IPv6 literals,
# egress-proxy's route table accepts get normalised here by # stray characters) is silently dropped from the mirror so the
# stripping the leading `*.` (so `*.example.com` → `example.com`) # pipelock apply doesn't fail parse before the new yaml is even
# — egress-proxy retains the wildcard for its own host matching, # written. The dropped hosts stay on egress-proxy's route table —
# and pipelock's allowlist gets the suffix, which still permits # but the addon does exact-host match only, so they'll never
# the wildcard-matched upstream connections without expanding to # match anything either. (Wildcard host matching was removed —
# arbitrary subdomains. Hosts that still don't fit the pipelock # see `match_route` in egress_proxy_addon_core for the rationale.)
# charset after normalisation (bare `*`, IPv6 literals, weird
# chars) are silently skipped.
_PIPELOCK_HOST_RE = re.compile(r"^[A-Za-z0-9_.-]+$") _PIPELOCK_HOST_RE = re.compile(r"^[A-Za-z0-9_.-]+$")
def _pipelock_safe_hosts(hosts: list[str]) -> list[str]: def _pipelock_safe_hosts(hosts: list[str]) -> list[str]:
"""Normalise hosts for pipelock's allowlist: strip leading """Drop any host pipelock's allowlist parser would reject.
`*.` from wildcards, drop anything that still doesn't match Order preserved."""
pipelock's allowed charset. Order preserved; duplicates that return [h for h in hosts if _PIPELOCK_HOST_RE.match(h)]
arise from normalisation are de-duped (first-seen wins)."""
out: list[str] = []
seen: set[str] = set()
for h in hosts:
candidate = h[2:] if h.startswith("*.") else h
if not candidate or not _PIPELOCK_HOST_RE.match(candidate):
continue
if candidate in seen:
continue
seen.add(candidate)
out.append(candidate)
return out
def _mirror_hosts_to_pipelock(slug: str, hosts: list[str]) -> None: def _mirror_hosts_to_pipelock(slug: str, hosts: list[str]) -> None:
+8 -28
View File
@@ -169,37 +169,17 @@ def match_route(
routes: typing.Sequence[Route], routes: typing.Sequence[Route],
request_host: str, request_host: str,
) -> Route | None: ) -> Route | None:
"""Return the route whose `host` matches `request_host`. """Return the first route whose `host` matches `request_host`
exactly (case-insensitive). DNS names are case-insensitive.
Match precedence: Wildcard hosts (`*.foo.com`) are NOT supported — they caused
1. Exact (case-insensitive) match on the literal hostname. too many edge cases (apex match? cert validation? pipelock
2. Wildcard match: a route whose host starts with `*.` is a mirror mismatch?) for too little payoff. Operators that need
suffix pattern that covers the apex AND every subdomain. multiple subdomains declare them individually (or one common
`*.example.com` matches `example.com`, `foo.example.com`, parent host as a bare-pass route)."""
and `a.b.example.com`, but NOT `barexample.com` (the
label boundary `.` is required when matching a
subdomain). This is intentionally more permissive than
RFC 6125 TLS-wildcard semantics — an allowlist's natural
reading of `*.example.com` is "all of example.com",
apex included, and matches what the pipelock mirror does
(strips `*.example.com` → `example.com`).
Exact match wins over wildcard so an operator can declare a
specific route on top of a broader wildcard (e.g. a
`*.github.com` bare-pass + an `api.github.com` route with
auth). DNS names are case-insensitive."""
target = request_host.lower() target = request_host.lower()
# Pass 1: exact, literal hostname match.
for r in routes: for r in routes:
host = r.host.lower() if r.host.lower() == target:
if not host.startswith("*.") and host == target:
return r
# Pass 2: wildcard match — apex + every subdomain.
for r in routes:
host = r.host.lower()
if host.startswith("*."):
suffix = host[2:] # strip the `*.`
if target == suffix or target.endswith("." + suffix):
return r return r
return None return None
+6 -60
View File
@@ -147,67 +147,13 @@ class TestMatchRoute(unittest.TestCase):
# other-host shouldn't be matched via a "ends with" check. # other-host shouldn't be matched via a "ends with" check.
self.assertIsNone(match_route(self.ROUTES, "evil.api.github.com")) self.assertIsNone(match_route(self.ROUTES, "evil.api.github.com"))
def test_wildcard_hosts_not_supported(self):
class TestMatchRouteWildcards(unittest.TestCase): # `*.example.com` is treated as a literal host string by
"""Wildcard host patterns: `*.foo.com` matches any host that # the exact-only matcher. Removed from the design after
ends with `.foo.com` (subdomains, one level or more).""" # the apex/RFC-6125/pipelock-mirror edge cases stacked up.
def test_wildcard_matches_direct_subdomain(self):
routes = (Route(host="*.example.com"),) routes = (Route(host="*.example.com"),)
r = match_route(routes, "foo.example.com") self.assertIsNone(match_route(routes, "foo.example.com"))
self.assertIsNotNone(r) self.assertIsNone(match_route(routes, "example.com"))
self.assertEqual("*.example.com", r.host)
def test_wildcard_matches_nested_subdomain(self):
routes = (Route(host="*.example.com"),)
self.assertIsNotNone(match_route(routes, "a.b.example.com"))
def test_wildcard_matches_apex(self):
# Allowlist semantics: `*.example.com` covers
# `example.com` itself + every subdomain. Matches what
# the pipelock mirror does (strips `*.example.com` →
# `example.com`) so the two layers agree.
routes = (Route(host="*.example.com"),)
self.assertIsNotNone(match_route(routes, "example.com"))
def test_wildcard_does_not_match_overlapping_suffix(self):
# `*.example.com` shouldn't match `barexample.com` — the
# match requires `.` before the suffix.
routes = (Route(host="*.example.com"),)
self.assertIsNone(match_route(routes, "barexample.com"))
def test_wildcard_case_insensitive(self):
routes = (Route(host="*.example.com"),)
self.assertIsNotNone(match_route(routes, "FOO.Example.COM"))
def test_exact_match_wins_over_wildcard(self):
# A specific route declared alongside a broader wildcard
# should take precedence — operators stack a per-host
# config on top of a permissive wildcard this way.
routes = (
Route(host="*.github.com"),
Route(host="api.github.com", auth_scheme="Bearer",
token_env="EGRESS_PROXY_TOKEN_0"),
)
r = match_route(routes, "api.github.com")
self.assertIsNotNone(r)
self.assertEqual("api.github.com", r.host)
self.assertEqual("Bearer", r.auth_scheme)
def test_exact_wins_regardless_of_route_order(self):
# Same as above but with wildcard declared AFTER exact —
# exact wins because pass 1 finds it before pass 2 runs.
routes = (
Route(host="api.github.com", auth_scheme="Bearer",
token_env="EGRESS_PROXY_TOKEN_0"),
Route(host="*.github.com"),
)
r = match_route(routes, "api.github.com")
self.assertEqual("api.github.com", r.host)
def test_no_match_falls_through(self):
routes = (Route(host="*.example.com"),)
self.assertIsNone(match_route(routes, "elsewhere.org"))
# --- decide -------------------------------------------------------------- # --- decide --------------------------------------------------------------
+8 -23
View File
@@ -197,45 +197,30 @@ class TestPipelockSafeHosts(unittest.TestCase):
_pipelock_safe_hosts(["api.github.com", "registry.npmjs.org"]), _pipelock_safe_hosts(["api.github.com", "registry.npmjs.org"]),
) )
def test_strips_wildcard_prefix(self): def test_drops_wildcards(self):
# `*.example.com` becomes `example.com` — pipelock pins the # Wildcard host matching was removed from egress-proxy too,
# suffix, egress-proxy keeps the wildcard on its side. # so a `*.foo.com` route is dead weight anyway; we drop it
# entirely from the pipelock mirror so the apply doesn't
# fail parse.
self.assertEqual( self.assertEqual(
["example.com", "api.github.com"], ["api.github.com"],
_pipelock_safe_hosts(["*.example.com", "api.github.com"]), _pipelock_safe_hosts(["*.example.com", "api.github.com"]),
) )
def test_wildcard_strips_one_label_not_recursive(self):
# `*.foo.bar.com` → `foo.bar.com` (one strip of `*.`).
self.assertEqual(
["foo.bar.com"],
_pipelock_safe_hosts(["*.foo.bar.com"]),
)
def test_drops_bare_wildcard(self): def test_drops_bare_wildcard(self):
# `*` alone would normalise to empty; nothing useful to send
# to pipelock.
self.assertEqual([], _pipelock_safe_hosts(["*"])) self.assertEqual([], _pipelock_safe_hosts(["*"]))
def test_strips_ipv6_literals(self): def test_drops_ipv6_literals(self):
# Brackets aren't in pipelock's allowed charset either.
self.assertEqual( self.assertEqual(
["api.example.com"], ["api.example.com"],
_pipelock_safe_hosts(["[::1]", "api.example.com"]), _pipelock_safe_hosts(["[::1]", "api.example.com"]),
) )
def test_dedupes_after_normalisation(self):
# `*.example.com` + `example.com` both yield `example.com`.
self.assertEqual(
["example.com"],
_pipelock_safe_hosts(["*.example.com", "example.com"]),
)
def test_preserves_order(self): def test_preserves_order(self):
self.assertEqual( self.assertEqual(
["a.example", "b.example", "c.example"], ["a.example", "b.example", "c.example"],
_pipelock_safe_hosts([ _pipelock_safe_hosts([
"a.example", "weird host", "b.example", "*", "c.example", "a.example", "*.junk", "b.example", "weird host", "c.example",
]), ]),
) )