From 3f045672901e75e43c998f14a76c3a302dd567dd Mon Sep 17 00:00:00 2001 From: codex Date: Wed, 10 Jun 2026 07:00:01 +0000 Subject: [PATCH] egress: require opt-in for HTTPS git fetch --- bot_bottle/egress.py | 8 +++ bot_bottle/egress_addon.py | 14 +++++ bot_bottle/egress_addon_core.py | 56 ++++++++++++++++++- bot_bottle/manifest_egress.py | 25 ++++++++- docs/prds/0052-egress-dlp-addon.md | 21 ++++++++ tests/unit/test_egress.py | 14 +++++ tests/unit/test_egress_addon_core.py | 80 +++++++++++++++++++++++++++- tests/unit/test_manifest_egress.py | 29 +++++++++- 8 files changed, 240 insertions(+), 7 deletions(-) diff --git a/bot_bottle/egress.py b/bot_bottle/egress.py index 527279e..66b728d 100644 --- a/bot_bottle/egress.py +++ b/bot_bottle/egress.py @@ -91,6 +91,7 @@ def egress_manifest_routes( auth_scheme=r.AuthScheme, token_ref=r.TokenRef, roles=r.Role, + git_fetch=r.GitFetch, outbound_detectors=r.OutboundDetectors, inbound_detectors=r.InboundDetectors, )) @@ -173,6 +174,8 @@ def _route_to_yaml_fields(r: Route) -> dict[str, object]: entry_data["headers"] = headers_data matches_data.append(entry_data) fields["matches"] = matches_data + if r.git_fetch: + fields["git"] = {"fetch": True} if r.outbound_detectors is not None or r.inbound_detectors is not None: dlp: dict[str, object] = {} if r.outbound_detectors is not None: @@ -242,6 +245,11 @@ def egress_render_routes( lines.append(" matches:") for entry in f["matches"]: # type: ignore[union-attr] lines.extend(_render_match_entry(entry)) # type: ignore[arg-type] + if "git" in f: + git_dict: dict[str, object] = f["git"] # type: ignore + lines.append(" git:") + if git_dict.get("fetch") is True: + lines.append(" fetch: true") if "dlp" in f: dlp_dict: dict[str, object] = f["dlp"] # type: ignore lines.append(" dlp:") diff --git a/bot_bottle/egress_addon.py b/bot_bottle/egress_addon.py index 8e57b69..fd51fe9 100644 --- a/bot_bottle/egress_addon.py +++ b/bot_bottle/egress_addon.py @@ -21,6 +21,8 @@ from egress_addon_core import ( # type: ignore[import-not-found] # pylint: dis build_inbound_scan_text, build_outbound_scan_text, decide, + decide_git_fetch, + is_git_fetch_request, is_git_push_request, load_config, match_route, @@ -181,6 +183,18 @@ class EgressAddon: ) return + if is_git_fetch_request(request_path, query): + git_decision = decide_git_fetch( + self.config.routes, flow.request.pretty_host, + ) + if git_decision.action == "block": + self._block( + flow, + git_decision.reason, + ctx=self._req_ctx(flow), + ) + return + # Strip agent-set Authorization after DLP scan so smuggled tokens # are caught above; the route may inject sidecar-owned auth below. flow.request.headers.pop("authorization", None) diff --git a/bot_bottle/egress_addon_core.py b/bot_bottle/egress_addon_core.py index 4327948..65f86c7 100644 --- a/bot_bottle/egress_addon_core.py +++ b/bot_bottle/egress_addon_core.py @@ -66,6 +66,7 @@ class Route: matches: tuple[MatchEntry, ...] = () auth_scheme: str = "" token_env: str = "" + git_fetch: bool = False outbound_detectors: tuple[str, ...] | None = None inbound_detectors: tuple[str, ...] | None = None @@ -316,16 +317,35 @@ def _parse_one(idx: int, raw: object) -> Route: f"token_env={token_env!r})" ) + # git-over-HTTPS policy + git_fetch = False + git_raw = raw_dict.get("git") + if git_raw is not None: + if not isinstance(git_raw, dict): + raise ValueError(f"{label} ({host}): 'git' must be an object") + git_dict: dict[str, object] = typing.cast(dict[str, object], git_raw) + fetch_raw = git_dict.get("fetch", False) + if fetch_raw is True or fetch_raw is False: + git_fetch = fetch_raw + else: + raise ValueError(f"{label} ({host}): 'git.fetch' must be a boolean") + for k in git_dict: + if k != "fetch": + raise ValueError( + f"{label} ({host}): git has unknown key {k!r}; " + "accepted key is 'fetch'" + ) + # dlp detectors outbound_detectors, inbound_detectors = _parse_detectors( idx, host, raw_dict, ) for k in raw_dict: - if k not in ("host", "matches", "auth_scheme", "token_env", "dlp"): + if k not in ("host", "matches", "auth_scheme", "token_env", "dlp", "git"): raise ValueError( f"{label} ({host}): unknown key {k!r}; accepted keys " - f"are 'host', 'matches', 'auth_scheme', 'token_env', 'dlp'" + f"are 'host', 'matches', 'auth_scheme', 'token_env', 'dlp', 'git'" ) return Route( @@ -333,6 +353,7 @@ def _parse_one(idx: int, raw: object) -> Route: matches=matches, auth_scheme=auth_scheme, token_env=token_env, + git_fetch=git_fetch, outbound_detectors=outbound_detectors, inbound_detectors=inbound_detectors, ) @@ -450,6 +471,17 @@ def is_git_push_request(path: str, query: str) -> bool: return False +def is_git_fetch_request(path: str, query: str) -> bool: + if path.endswith("/git-upload-pack"): + return True + if path.endswith("/info/refs"): + for pair in query.split("&"): + k, _, v = pair.partition("=") + if k == "service" and v == "git-upload-pack": + return True + return False + + # --------------------------------------------------------------------------- # Route lookup + decision # --------------------------------------------------------------------------- @@ -513,6 +545,24 @@ def decide( return Decision(action="forward") +def decide_git_fetch( + routes: typing.Sequence[Route], + request_host: str, +) -> Decision: + route = match_route(routes, request_host) + if route is not None and route.git_fetch: + return Decision(action="forward") + return Decision( + action="block", + reason=( + "egress: git fetch/clone over HTTPS is not allowed by default; " + "use git-gate for declared repos or set " + "egress.routes[].git.fetch=true for explicit read-only " + "HTTPS Git access." + ), + ) + + # --------------------------------------------------------------------------- # DLP scan dispatch (PRD 0053) # --------------------------------------------------------------------------- @@ -660,8 +710,10 @@ __all__ = [ "build_inbound_scan_text", "build_outbound_scan_text", "decide", + "decide_git_fetch", "evaluate_matches", "is_git_push_request", + "is_git_fetch_request", "load_config", "load_routes", "match_route", diff --git a/bot_bottle/manifest_egress.py b/bot_bottle/manifest_egress.py index 73bda30..0f22209 100644 --- a/bot_bottle/manifest_egress.py +++ b/bot_bottle/manifest_egress.py @@ -64,6 +64,7 @@ class ManifestEgressRoute: AuthScheme: str = "" TokenRef: str = "" Role: tuple[str, ...] = () + GitFetch: bool = False OutboundDetectors: tuple[str, ...] | None = None InboundDetectors: tuple[str, ...] | None = None @@ -165,11 +166,30 @@ class ManifestEgressRoute: label, d.get("dlp"), ) + # --- git-over-HTTPS policy --- + git_fetch = False + if "git" in d: + git_d = as_json_object(d.get("git"), f"{label} git") + raw_fetch = git_d.get("fetch", False) + if isinstance(raw_fetch, bool): + git_fetch = raw_fetch + else: + raise ManifestError( + f"{label} git.fetch must be a boolean " + f"(was {type(raw_fetch).__name__})" + ) + for k in git_d: + if k != "fetch": + raise ManifestError( + f"{label} git has unknown key {k!r}; " + f"only 'fetch' is accepted" + ) + for k in d: - if k not in ("host", "matches", "auth", "role", "dlp"): + if k not in ("host", "matches", "auth", "role", "dlp", "git"): raise ManifestError( f"{label} has unknown key {k!r}; accepted keys are " - f"'host', 'matches', 'auth', 'role', 'dlp'" + f"'host', 'matches', 'auth', 'role', 'dlp', 'git'" ) return cls( @@ -178,6 +198,7 @@ class ManifestEgressRoute: AuthScheme=auth_scheme, TokenRef=token_ref, Role=roles, + GitFetch=git_fetch, OutboundDetectors=outbound_detectors, InboundDetectors=inbound_detectors, ) diff --git a/docs/prds/0052-egress-dlp-addon.md b/docs/prds/0052-egress-dlp-addon.md index 8321342..ba5b4c7 100644 --- a/docs/prds/0052-egress-dlp-addon.md +++ b/docs/prds/0052-egress-dlp-addon.md @@ -199,6 +199,25 @@ Named inbound detectors: `naive_injection_detection`. The manifest parser (`manifest_egress.py`) validates the `dlp` block and rejects unknown detector names. +### Manifest schema — `git` block + +HTTPS Git clone/fetch traffic is not implied by a host-level egress route. +Smart HTTP Git fetch uses `git-upload-pack`, which can transfer large repo +packfiles and bypass the git-gate mirror path. It is therefore blocked by +default and must be explicitly enabled per route: + +```yaml +egress: + routes: + - host: github.com + git: + fetch: true +``` + +`git.fetch: true` permits read-only smart HTTP clone/fetch requests +(`git-upload-pack`) after the normal host and `matches` checks pass. HTTPS +Git push (`git-receive-pack`) remains blocked by the egress addon. + ### `EgressRoute` changes `EgressRoute` replaces `PathAllowlist` with `Matches` and gains two new @@ -232,6 +251,7 @@ class EgressRoute: AuthScheme: str = "" TokenRef: str = "" Role: tuple[str, ...] = () + GitFetch: bool = False OutboundDetectors: tuple[str, ...] | None = None # None = all enabled InboundDetectors: tuple[str, ...] | None = None # None = all enabled ``` @@ -252,6 +272,7 @@ class Route: matches: tuple[MatchEntry, ...] = () auth_scheme: str = "" token_env: str = "" + git_fetch: bool = False outbound_detectors: tuple[str, ...] | None = None inbound_detectors: tuple[str, ...] | None = None ``` diff --git a/tests/unit/test_egress.py b/tests/unit/test_egress.py index 37af5d3..36c7bb9 100644 --- a/tests/unit/test_egress.py +++ b/tests/unit/test_egress.py @@ -86,6 +86,11 @@ class TestManifestRouteLift(unittest.TestCase): self.assertEqual(("token_patterns",), r.outbound_detectors) self.assertEqual((), r.inbound_detectors) + def test_git_fetch_policy_lifted(self): + b = _bottle([{"host": "github.com", "git": {"fetch": True}}]) + routes = egress_manifest_routes(b) + self.assertTrue(routes[0].git_fetch) + class TestSlotAssignment(unittest.TestCase): """Slot assignment happens in egress_routes_for_bottle.""" @@ -324,6 +329,15 @@ class TestRenderRoutes(unittest.TestCase): self.assertEqual(("token_patterns",), addon_routes[0].outbound_detectors) self.assertEqual((), addon_routes[0].inbound_detectors) + def test_git_fetch_policy_round_trips(self): + from bot_bottle.egress_addon_core import load_routes + b = _bottle([{"host": "github.com", "git": {"fetch": True}}]) + routes = egress_routes_for_bottle(b) + rendered = egress_render_routes(routes) + self.assertEqual({"fetch": True}, self._parsed(routes)[0]["git"]) + addon_routes = load_routes(rendered) + self.assertTrue(addon_routes[0].git_fetch) + def test_log_zero_omitted_from_render(self): b = _bottle([{"host": "x.example"}]) routes = egress_routes_for_bottle(b) diff --git a/tests/unit/test_egress_addon_core.py b/tests/unit/test_egress_addon_core.py index 85f44a9..09d79e3 100644 --- a/tests/unit/test_egress_addon_core.py +++ b/tests/unit/test_egress_addon_core.py @@ -25,7 +25,9 @@ from bot_bottle.egress_addon_core import ( build_inbound_scan_text, build_outbound_scan_text, decide, + decide_git_fetch, evaluate_matches, + is_git_fetch_request, is_git_push_request, load_config, load_routes, @@ -67,6 +69,31 @@ class TestParseRoutes(unittest.TestCase): self.assertEqual("Bearer", r.auth_scheme) self.assertEqual("EGRESS_TOKEN_0", r.token_env) + def test_git_fetch_defaults_false(self): + routes = parse_routes({"routes": [{"host": "github.com"}]}) + self.assertFalse(routes[0].git_fetch) + + def test_git_fetch_true(self): + routes = parse_routes({"routes": [{ + "host": "github.com", + "git": {"fetch": True}, + }]}) + self.assertTrue(routes[0].git_fetch) + + def test_git_fetch_must_be_boolean(self): + with self.assertRaises(ValueError): + parse_routes({"routes": [{ + "host": "github.com", + "git": {"fetch": "yes"}, + }]}) + + def test_unknown_git_key_rejected(self): + with self.assertRaises(ValueError): + parse_routes({"routes": [{ + "host": "github.com", + "git": {"push": True}, + }]}) + def test_order_preserved(self): routes = parse_routes({"routes": [ {"host": "a.example"}, @@ -604,6 +631,24 @@ class TestDecisionDefaults(unittest.TestCase): self.assertIsNone(d.inject_authorization) +class TestDecideGitFetch(unittest.TestCase): + def test_blocks_when_host_not_allowlisted(self): + d = decide_git_fetch((), "github.com") + self.assertEqual("block", d.action) + self.assertIn("git fetch/clone over HTTPS", d.reason) + + def test_blocks_when_route_does_not_opt_in(self): + d = decide_git_fetch((Route(host="github.com"),), "github.com") + self.assertEqual("block", d.action) + + def test_forwards_when_route_opts_in(self): + d = decide_git_fetch( + (Route(host="github.com", git_fetch=True),), + "github.com", + ) + self.assertEqual("forward", d.action) + + # --- scan_outbound ------------------------------------------------------- @@ -620,7 +665,7 @@ class TestScanOutboundBody(unittest.TestCase): self.assertIn("OpenAI API key", result.reason) -# --- is_git_push_request ------------------------------------------------ +# --- HTTPS Git request detection ---------------------------------------- class TestIsGitPushRequest(unittest.TestCase): @@ -643,7 +688,7 @@ class TestIsGitPushRequest(unittest.TestCase): "service=git-receive-pack&foo=bar", )) - def test_fetch_endpoints_not_blocked(self): + def test_fetch_endpoints_are_not_push(self): self.assertFalse(is_git_push_request( "/owner/repo.git/info/refs", "service=git-upload-pack", @@ -661,6 +706,37 @@ class TestIsGitPushRequest(unittest.TestCase): self.assertFalse(is_git_push_request("/", "")) +class TestIsGitFetchRequest(unittest.TestCase): + def test_post_git_upload_pack_endpoint(self): + self.assertTrue(is_git_fetch_request("/owner/repo.git/git-upload-pack", "")) + + def test_info_refs_with_upload_pack_service(self): + self.assertTrue(is_git_fetch_request( + "/owner/repo.git/info/refs", + "service=git-upload-pack", + )) + + def test_info_refs_with_extra_query_params(self): + self.assertTrue(is_git_fetch_request( + "/owner/repo.git/info/refs", + "foo=bar&service=git-upload-pack&z=1", + )) + + def test_push_endpoints_are_not_fetch(self): + self.assertFalse(is_git_fetch_request( + "/owner/repo.git/info/refs", + "service=git-receive-pack", + )) + self.assertFalse(is_git_fetch_request( + "/owner/repo.git/git-receive-pack", "", + )) + + def test_unrelated_paths_not_fetch(self): + self.assertFalse(is_git_fetch_request("/repos/owner/repo", "")) + self.assertFalse(is_git_fetch_request("/v1/messages", "")) + self.assertFalse(is_git_fetch_request("/", "")) + + class TestGitPushBlockFailFast(unittest.TestCase): def test_real_git_push_fails_fast_when_egress_blocks_receive_pack(self): seen_paths: list[str] = [] diff --git a/tests/unit/test_manifest_egress.py b/tests/unit/test_manifest_egress.py index 4f8b7a3..ed83c84 100644 --- a/tests/unit/test_manifest_egress.py +++ b/tests/unit/test_manifest_egress.py @@ -2,7 +2,8 @@ The route shape uses Gateway API HTTPRoute match vocabulary: `host` (required), optional `matches` (paths/methods/headers), -optional nested `auth: { scheme, token_ref }`, optional `dlp`. +optional nested `auth: { scheme, token_ref }`, optional `dlp`, +optional `git: { fetch: true }`. Validation rules per PRD 0017/0053: empty `auth: {}` is an error, partial `auth` is an error, auth omission means unauthenticated.""" @@ -302,6 +303,32 @@ class TestDlp(unittest.TestCase): }}]) +class TestGitPolicy(unittest.TestCase): + def test_omitted_means_https_git_fetch_disabled(self): + b = _bottle([{"host": "github.com"}]) + self.assertFalse(b.egress.routes[0].GitFetch) + + def test_fetch_true_allowed(self): + b = _bottle([{"host": "github.com", "git": {"fetch": True}}]) + self.assertTrue(b.egress.routes[0].GitFetch) + + def test_fetch_false_allowed(self): + b = _bottle([{"host": "github.com", "git": {"fetch": False}}]) + self.assertFalse(b.egress.routes[0].GitFetch) + + def test_git_must_be_object(self): + with self.assertRaises(ManifestError): + _bottle([{"host": "github.com", "git": True}]) + + def test_fetch_must_be_boolean(self): + with self.assertRaises(ManifestError): + _bottle([{"host": "github.com", "git": {"fetch": "yes"}}]) + + def test_unknown_git_key_rejected(self): + with self.assertRaises(ManifestError): + _bottle([{"host": "github.com", "git": {"push": True}}]) + + class TestAuth(unittest.TestCase): def test_omitted_means_no_auth(self): b = _bottle([{"host": "github.com"}])