From 6c886200d9b7fcb47b884eb708dbb0d7109a803a Mon Sep 17 00:00:00 2001 From: didericis Date: Mon, 25 May 2026 19:48:35 -0400 Subject: [PATCH] revert(egress-proxy): drop wildcard host support entirely MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The apex-vs-subdomain question, the cert/SNI mismatch when pipelock-passthrough hosts have wildcard certs, and the mirror-divergence corner cases stacked up faster than the feature earned its keep. Going back to exact-host match only. Addon (`match_route`): single pass, case-insensitive exact match. `*.foo.com` in a route table is now a literal string that won't match anything — operators that want subdomains declare them individually. Pipelock mirror (`_pipelock_safe_hosts`): silently drops hosts that don't fit pipelock's `[A-Za-z0-9_.-]+` charset (wildcards, IPv6 literals, stray chars). Previously normalised wildcards to their suffix; now just drops them, which matches egress-proxy's behavior of not matching them either. 8 wildcard test cases removed; 2 lightweight "wildcards are not supported" assertions retained as documentation. 386 unit pass. Co-Authored-By: Claude Opus 4.7 --- .../backend/docker/egress_proxy_apply.py | 34 +++------- claude_bottle/egress_proxy_addon_core.py | 36 +++------- tests/unit/test_egress_proxy_addon_core.py | 66 ++----------------- tests/unit/test_egress_proxy_apply.py | 31 +++------ 4 files changed, 32 insertions(+), 135 deletions(-) diff --git a/claude_bottle/backend/docker/egress_proxy_apply.py b/claude_bottle/backend/docker/egress_proxy_apply.py index a7f3996..e73c620 100644 --- a/claude_bottle/backend/docker/egress_proxy_apply.py +++ b/claude_bottle/backend/docker/egress_proxy_apply.py @@ -90,34 +90,20 @@ def _hosts_in_routes(content: str) -> list[str]: # Pipelock's allowlist parser accepts only literal hostnames: -# `[A-Za-z0-9_.-]+`. Wildcard hosts (e.g. `*.example.com`) that -# egress-proxy's route table accepts get normalised here by -# stripping the leading `*.` (so `*.example.com` → `example.com`) -# — egress-proxy retains the wildcard for its own host matching, -# and pipelock's allowlist gets the suffix, which still permits -# the wildcard-matched upstream connections without expanding to -# arbitrary subdomains. Hosts that still don't fit the pipelock -# charset after normalisation (bare `*`, IPv6 literals, weird -# chars) are silently skipped. +# `[A-Za-z0-9_.-]+`. Anything else (wildcards, IPv6 literals, +# stray characters) is silently dropped from the mirror so the +# pipelock apply doesn't fail parse before the new yaml is even +# written. The dropped hosts stay on egress-proxy's route table — +# but the addon does exact-host match only, so they'll never +# match anything either. (Wildcard host matching was removed — +# see `match_route` in egress_proxy_addon_core for the rationale.) _PIPELOCK_HOST_RE = re.compile(r"^[A-Za-z0-9_.-]+$") def _pipelock_safe_hosts(hosts: list[str]) -> list[str]: - """Normalise hosts for pipelock's allowlist: strip leading - `*.` from wildcards, drop anything that still doesn't match - pipelock's allowed charset. Order preserved; duplicates that - arise from normalisation are de-duped (first-seen wins).""" - out: list[str] = [] - seen: set[str] = set() - for h in hosts: - candidate = h[2:] if h.startswith("*.") else h - if not candidate or not _PIPELOCK_HOST_RE.match(candidate): - continue - if candidate in seen: - continue - seen.add(candidate) - out.append(candidate) - return out + """Drop any host pipelock's allowlist parser would reject. + Order preserved.""" + return [h for h in hosts if _PIPELOCK_HOST_RE.match(h)] def _mirror_hosts_to_pipelock(slug: str, hosts: list[str]) -> None: diff --git a/claude_bottle/egress_proxy_addon_core.py b/claude_bottle/egress_proxy_addon_core.py index 967ea06..5e4c854 100644 --- a/claude_bottle/egress_proxy_addon_core.py +++ b/claude_bottle/egress_proxy_addon_core.py @@ -169,38 +169,18 @@ def match_route( routes: typing.Sequence[Route], request_host: str, ) -> Route | None: - """Return the route whose `host` matches `request_host`. + """Return the first route whose `host` matches `request_host` + exactly (case-insensitive). DNS names are case-insensitive. - Match precedence: - 1. Exact (case-insensitive) match on the literal hostname. - 2. Wildcard match: a route whose host starts with `*.` is a - suffix pattern that covers the apex AND every subdomain. - `*.example.com` matches `example.com`, `foo.example.com`, - and `a.b.example.com`, but NOT `barexample.com` (the - label boundary `.` is required when matching a - subdomain). This is intentionally more permissive than - RFC 6125 TLS-wildcard semantics — an allowlist's natural - reading of `*.example.com` is "all of example.com", - apex included, and matches what the pipelock mirror does - (strips `*.example.com` → `example.com`). - - Exact match wins over wildcard so an operator can declare a - specific route on top of a broader wildcard (e.g. a - `*.github.com` bare-pass + an `api.github.com` route with - auth). DNS names are case-insensitive.""" + Wildcard hosts (`*.foo.com`) are NOT supported — they caused + too many edge cases (apex match? cert validation? pipelock + mirror mismatch?) for too little payoff. Operators that need + multiple subdomains declare them individually (or one common + parent host as a bare-pass route).""" target = request_host.lower() - # Pass 1: exact, literal hostname match. for r in routes: - host = r.host.lower() - if not host.startswith("*.") and host == target: + if r.host.lower() == target: return r - # Pass 2: wildcard match — apex + every subdomain. - for r in routes: - host = r.host.lower() - if host.startswith("*."): - suffix = host[2:] # strip the `*.` - if target == suffix or target.endswith("." + suffix): - return r return None diff --git a/tests/unit/test_egress_proxy_addon_core.py b/tests/unit/test_egress_proxy_addon_core.py index cba1edd..833feac 100644 --- a/tests/unit/test_egress_proxy_addon_core.py +++ b/tests/unit/test_egress_proxy_addon_core.py @@ -147,67 +147,13 @@ class TestMatchRoute(unittest.TestCase): # other-host shouldn't be matched via a "ends with" check. self.assertIsNone(match_route(self.ROUTES, "evil.api.github.com")) - -class TestMatchRouteWildcards(unittest.TestCase): - """Wildcard host patterns: `*.foo.com` matches any host that - ends with `.foo.com` (subdomains, one level or more).""" - - def test_wildcard_matches_direct_subdomain(self): + def test_wildcard_hosts_not_supported(self): + # `*.example.com` is treated as a literal host string by + # the exact-only matcher. Removed from the design after + # the apex/RFC-6125/pipelock-mirror edge cases stacked up. routes = (Route(host="*.example.com"),) - r = match_route(routes, "foo.example.com") - self.assertIsNotNone(r) - self.assertEqual("*.example.com", r.host) - - def test_wildcard_matches_nested_subdomain(self): - routes = (Route(host="*.example.com"),) - self.assertIsNotNone(match_route(routes, "a.b.example.com")) - - def test_wildcard_matches_apex(self): - # Allowlist semantics: `*.example.com` covers - # `example.com` itself + every subdomain. Matches what - # the pipelock mirror does (strips `*.example.com` → - # `example.com`) so the two layers agree. - routes = (Route(host="*.example.com"),) - self.assertIsNotNone(match_route(routes, "example.com")) - - def test_wildcard_does_not_match_overlapping_suffix(self): - # `*.example.com` shouldn't match `barexample.com` — the - # match requires `.` before the suffix. - routes = (Route(host="*.example.com"),) - self.assertIsNone(match_route(routes, "barexample.com")) - - def test_wildcard_case_insensitive(self): - routes = (Route(host="*.example.com"),) - self.assertIsNotNone(match_route(routes, "FOO.Example.COM")) - - def test_exact_match_wins_over_wildcard(self): - # A specific route declared alongside a broader wildcard - # should take precedence — operators stack a per-host - # config on top of a permissive wildcard this way. - routes = ( - Route(host="*.github.com"), - Route(host="api.github.com", auth_scheme="Bearer", - token_env="EGRESS_PROXY_TOKEN_0"), - ) - r = match_route(routes, "api.github.com") - self.assertIsNotNone(r) - self.assertEqual("api.github.com", r.host) - self.assertEqual("Bearer", r.auth_scheme) - - def test_exact_wins_regardless_of_route_order(self): - # Same as above but with wildcard declared AFTER exact — - # exact wins because pass 1 finds it before pass 2 runs. - routes = ( - Route(host="api.github.com", auth_scheme="Bearer", - token_env="EGRESS_PROXY_TOKEN_0"), - Route(host="*.github.com"), - ) - r = match_route(routes, "api.github.com") - self.assertEqual("api.github.com", r.host) - - def test_no_match_falls_through(self): - routes = (Route(host="*.example.com"),) - self.assertIsNone(match_route(routes, "elsewhere.org")) + self.assertIsNone(match_route(routes, "foo.example.com")) + self.assertIsNone(match_route(routes, "example.com")) # --- decide -------------------------------------------------------------- diff --git a/tests/unit/test_egress_proxy_apply.py b/tests/unit/test_egress_proxy_apply.py index 47272b2..8e2f5a4 100644 --- a/tests/unit/test_egress_proxy_apply.py +++ b/tests/unit/test_egress_proxy_apply.py @@ -197,45 +197,30 @@ class TestPipelockSafeHosts(unittest.TestCase): _pipelock_safe_hosts(["api.github.com", "registry.npmjs.org"]), ) - def test_strips_wildcard_prefix(self): - # `*.example.com` becomes `example.com` — pipelock pins the - # suffix, egress-proxy keeps the wildcard on its side. + def test_drops_wildcards(self): + # Wildcard host matching was removed from egress-proxy too, + # so a `*.foo.com` route is dead weight anyway; we drop it + # entirely from the pipelock mirror so the apply doesn't + # fail parse. self.assertEqual( - ["example.com", "api.github.com"], + ["api.github.com"], _pipelock_safe_hosts(["*.example.com", "api.github.com"]), ) - def test_wildcard_strips_one_label_not_recursive(self): - # `*.foo.bar.com` → `foo.bar.com` (one strip of `*.`). - self.assertEqual( - ["foo.bar.com"], - _pipelock_safe_hosts(["*.foo.bar.com"]), - ) - def test_drops_bare_wildcard(self): - # `*` alone would normalise to empty; nothing useful to send - # to pipelock. self.assertEqual([], _pipelock_safe_hosts(["*"])) - def test_strips_ipv6_literals(self): - # Brackets aren't in pipelock's allowed charset either. + def test_drops_ipv6_literals(self): self.assertEqual( ["api.example.com"], _pipelock_safe_hosts(["[::1]", "api.example.com"]), ) - def test_dedupes_after_normalisation(self): - # `*.example.com` + `example.com` both yield `example.com`. - self.assertEqual( - ["example.com"], - _pipelock_safe_hosts(["*.example.com", "example.com"]), - ) - def test_preserves_order(self): self.assertEqual( ["a.example", "b.example", "c.example"], _pipelock_safe_hosts([ - "a.example", "weird host", "b.example", "*", "c.example", + "a.example", "*.junk", "b.example", "weird host", "c.example", ]), )