revert(egress-proxy): drop wildcard host support entirely
test / unit (pull_request) Successful in 17s
test / integration (pull_request) Successful in 1m3s

The apex-vs-subdomain question, the cert/SNI mismatch when
pipelock-passthrough hosts have wildcard certs, and the
mirror-divergence corner cases stacked up faster than the feature
earned its keep. Going back to exact-host match only.

Addon (`match_route`): single pass, case-insensitive exact match.
`*.foo.com` in a route table is now a literal string that won't
match anything — operators that want subdomains declare them
individually.

Pipelock mirror (`_pipelock_safe_hosts`): silently drops hosts
that don't fit pipelock's `[A-Za-z0-9_.-]+` charset (wildcards,
IPv6 literals, stray chars). Previously normalised wildcards to
their suffix; now just drops them, which matches egress-proxy's
behavior of not matching them either.

8 wildcard test cases removed; 2 lightweight "wildcards are not
supported" assertions retained as documentation. 386 unit pass.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-25 19:48:35 -04:00
parent 6177c0518e
commit 6c886200d9
4 changed files with 32 additions and 135 deletions
@@ -90,34 +90,20 @@ def _hosts_in_routes(content: str) -> list[str]:
# Pipelock's allowlist parser accepts only literal hostnames:
# `[A-Za-z0-9_.-]+`. Wildcard hosts (e.g. `*.example.com`) that
# egress-proxy's route table accepts get normalised here by
# stripping the leading `*.` (so `*.example.com` → `example.com`)
# — egress-proxy retains the wildcard for its own host matching,
# and pipelock's allowlist gets the suffix, which still permits
# the wildcard-matched upstream connections without expanding to
# arbitrary subdomains. Hosts that still don't fit the pipelock
# charset after normalisation (bare `*`, IPv6 literals, weird
# chars) are silently skipped.
# `[A-Za-z0-9_.-]+`. Anything else (wildcards, IPv6 literals,
# stray characters) is silently dropped from the mirror so the
# pipelock apply doesn't fail parse before the new yaml is even
# written. The dropped hosts stay on egress-proxy's route table —
# but the addon does exact-host match only, so they'll never
# match anything either. (Wildcard host matching was removed —
# see `match_route` in egress_proxy_addon_core for the rationale.)
_PIPELOCK_HOST_RE = re.compile(r"^[A-Za-z0-9_.-]+$")
def _pipelock_safe_hosts(hosts: list[str]) -> list[str]:
"""Normalise hosts for pipelock's allowlist: strip leading
`*.` from wildcards, drop anything that still doesn't match
pipelock's allowed charset. Order preserved; duplicates that
arise from normalisation are de-duped (first-seen wins)."""
out: list[str] = []
seen: set[str] = set()
for h in hosts:
candidate = h[2:] if h.startswith("*.") else h
if not candidate or not _PIPELOCK_HOST_RE.match(candidate):
continue
if candidate in seen:
continue
seen.add(candidate)
out.append(candidate)
return out
"""Drop any host pipelock's allowlist parser would reject.
Order preserved."""
return [h for h in hosts if _PIPELOCK_HOST_RE.match(h)]
def _mirror_hosts_to_pipelock(slug: str, hosts: list[str]) -> None:
+8 -28
View File
@@ -169,38 +169,18 @@ def match_route(
routes: typing.Sequence[Route],
request_host: str,
) -> Route | None:
"""Return the route whose `host` matches `request_host`.
"""Return the first route whose `host` matches `request_host`
exactly (case-insensitive). DNS names are case-insensitive.
Match precedence:
1. Exact (case-insensitive) match on the literal hostname.
2. Wildcard match: a route whose host starts with `*.` is a
suffix pattern that covers the apex AND every subdomain.
`*.example.com` matches `example.com`, `foo.example.com`,
and `a.b.example.com`, but NOT `barexample.com` (the
label boundary `.` is required when matching a
subdomain). This is intentionally more permissive than
RFC 6125 TLS-wildcard semantics — an allowlist's natural
reading of `*.example.com` is "all of example.com",
apex included, and matches what the pipelock mirror does
(strips `*.example.com` → `example.com`).
Exact match wins over wildcard so an operator can declare a
specific route on top of a broader wildcard (e.g. a
`*.github.com` bare-pass + an `api.github.com` route with
auth). DNS names are case-insensitive."""
Wildcard hosts (`*.foo.com`) are NOT supported — they caused
too many edge cases (apex match? cert validation? pipelock
mirror mismatch?) for too little payoff. Operators that need
multiple subdomains declare them individually (or one common
parent host as a bare-pass route)."""
target = request_host.lower()
# Pass 1: exact, literal hostname match.
for r in routes:
host = r.host.lower()
if not host.startswith("*.") and host == target:
if r.host.lower() == target:
return r
# Pass 2: wildcard match — apex + every subdomain.
for r in routes:
host = r.host.lower()
if host.startswith("*."):
suffix = host[2:] # strip the `*.`
if target == suffix or target.endswith("." + suffix):
return r
return None
+6 -60
View File
@@ -147,67 +147,13 @@ class TestMatchRoute(unittest.TestCase):
# other-host shouldn't be matched via a "ends with" check.
self.assertIsNone(match_route(self.ROUTES, "evil.api.github.com"))
class TestMatchRouteWildcards(unittest.TestCase):
"""Wildcard host patterns: `*.foo.com` matches any host that
ends with `.foo.com` (subdomains, one level or more)."""
def test_wildcard_matches_direct_subdomain(self):
def test_wildcard_hosts_not_supported(self):
# `*.example.com` is treated as a literal host string by
# the exact-only matcher. Removed from the design after
# the apex/RFC-6125/pipelock-mirror edge cases stacked up.
routes = (Route(host="*.example.com"),)
r = match_route(routes, "foo.example.com")
self.assertIsNotNone(r)
self.assertEqual("*.example.com", r.host)
def test_wildcard_matches_nested_subdomain(self):
routes = (Route(host="*.example.com"),)
self.assertIsNotNone(match_route(routes, "a.b.example.com"))
def test_wildcard_matches_apex(self):
# Allowlist semantics: `*.example.com` covers
# `example.com` itself + every subdomain. Matches what
# the pipelock mirror does (strips `*.example.com` →
# `example.com`) so the two layers agree.
routes = (Route(host="*.example.com"),)
self.assertIsNotNone(match_route(routes, "example.com"))
def test_wildcard_does_not_match_overlapping_suffix(self):
# `*.example.com` shouldn't match `barexample.com` — the
# match requires `.` before the suffix.
routes = (Route(host="*.example.com"),)
self.assertIsNone(match_route(routes, "barexample.com"))
def test_wildcard_case_insensitive(self):
routes = (Route(host="*.example.com"),)
self.assertIsNotNone(match_route(routes, "FOO.Example.COM"))
def test_exact_match_wins_over_wildcard(self):
# A specific route declared alongside a broader wildcard
# should take precedence — operators stack a per-host
# config on top of a permissive wildcard this way.
routes = (
Route(host="*.github.com"),
Route(host="api.github.com", auth_scheme="Bearer",
token_env="EGRESS_PROXY_TOKEN_0"),
)
r = match_route(routes, "api.github.com")
self.assertIsNotNone(r)
self.assertEqual("api.github.com", r.host)
self.assertEqual("Bearer", r.auth_scheme)
def test_exact_wins_regardless_of_route_order(self):
# Same as above but with wildcard declared AFTER exact —
# exact wins because pass 1 finds it before pass 2 runs.
routes = (
Route(host="api.github.com", auth_scheme="Bearer",
token_env="EGRESS_PROXY_TOKEN_0"),
Route(host="*.github.com"),
)
r = match_route(routes, "api.github.com")
self.assertEqual("api.github.com", r.host)
def test_no_match_falls_through(self):
routes = (Route(host="*.example.com"),)
self.assertIsNone(match_route(routes, "elsewhere.org"))
self.assertIsNone(match_route(routes, "foo.example.com"))
self.assertIsNone(match_route(routes, "example.com"))
# --- decide --------------------------------------------------------------
+8 -23
View File
@@ -197,45 +197,30 @@ class TestPipelockSafeHosts(unittest.TestCase):
_pipelock_safe_hosts(["api.github.com", "registry.npmjs.org"]),
)
def test_strips_wildcard_prefix(self):
# `*.example.com` becomes `example.com` — pipelock pins the
# suffix, egress-proxy keeps the wildcard on its side.
def test_drops_wildcards(self):
# Wildcard host matching was removed from egress-proxy too,
# so a `*.foo.com` route is dead weight anyway; we drop it
# entirely from the pipelock mirror so the apply doesn't
# fail parse.
self.assertEqual(
["example.com", "api.github.com"],
["api.github.com"],
_pipelock_safe_hosts(["*.example.com", "api.github.com"]),
)
def test_wildcard_strips_one_label_not_recursive(self):
# `*.foo.bar.com` → `foo.bar.com` (one strip of `*.`).
self.assertEqual(
["foo.bar.com"],
_pipelock_safe_hosts(["*.foo.bar.com"]),
)
def test_drops_bare_wildcard(self):
# `*` alone would normalise to empty; nothing useful to send
# to pipelock.
self.assertEqual([], _pipelock_safe_hosts(["*"]))
def test_strips_ipv6_literals(self):
# Brackets aren't in pipelock's allowed charset either.
def test_drops_ipv6_literals(self):
self.assertEqual(
["api.example.com"],
_pipelock_safe_hosts(["[::1]", "api.example.com"]),
)
def test_dedupes_after_normalisation(self):
# `*.example.com` + `example.com` both yield `example.com`.
self.assertEqual(
["example.com"],
_pipelock_safe_hosts(["*.example.com", "example.com"]),
)
def test_preserves_order(self):
self.assertEqual(
["a.example", "b.example", "c.example"],
_pipelock_safe_hosts([
"a.example", "weird host", "b.example", "*", "c.example",
"a.example", "*.junk", "b.example", "weird host", "c.example",
]),
)