egress: require opt-in for HTTPS git fetch
test / unit (pull_request) Successful in 42s
test / integration (pull_request) Successful in 27s
lint / lint (push) Successful in 1m53s
test / unit (push) Successful in 41s
test / integration (push) Successful in 23s
Update Quality Badges / update-badges (push) Successful in 1m35s

This commit was merged in pull request #227.
This commit is contained in:
2026-06-10 07:00:01 +00:00
parent acb9cd67c6
commit 3f04567290
8 changed files with 240 additions and 7 deletions
+8
View File
@@ -91,6 +91,7 @@ def egress_manifest_routes(
auth_scheme=r.AuthScheme, auth_scheme=r.AuthScheme,
token_ref=r.TokenRef, token_ref=r.TokenRef,
roles=r.Role, roles=r.Role,
git_fetch=r.GitFetch,
outbound_detectors=r.OutboundDetectors, outbound_detectors=r.OutboundDetectors,
inbound_detectors=r.InboundDetectors, inbound_detectors=r.InboundDetectors,
)) ))
@@ -173,6 +174,8 @@ def _route_to_yaml_fields(r: Route) -> dict[str, object]:
entry_data["headers"] = headers_data entry_data["headers"] = headers_data
matches_data.append(entry_data) matches_data.append(entry_data)
fields["matches"] = matches_data fields["matches"] = matches_data
if r.git_fetch:
fields["git"] = {"fetch": True}
if r.outbound_detectors is not None or r.inbound_detectors is not None: if r.outbound_detectors is not None or r.inbound_detectors is not None:
dlp: dict[str, object] = {} dlp: dict[str, object] = {}
if r.outbound_detectors is not None: if r.outbound_detectors is not None:
@@ -242,6 +245,11 @@ def egress_render_routes(
lines.append(" matches:") lines.append(" matches:")
for entry in f["matches"]: # type: ignore[union-attr] for entry in f["matches"]: # type: ignore[union-attr]
lines.extend(_render_match_entry(entry)) # type: ignore[arg-type] lines.extend(_render_match_entry(entry)) # type: ignore[arg-type]
if "git" in f:
git_dict: dict[str, object] = f["git"] # type: ignore
lines.append(" git:")
if git_dict.get("fetch") is True:
lines.append(" fetch: true")
if "dlp" in f: if "dlp" in f:
dlp_dict: dict[str, object] = f["dlp"] # type: ignore dlp_dict: dict[str, object] = f["dlp"] # type: ignore
lines.append(" dlp:") lines.append(" dlp:")
+14
View File
@@ -21,6 +21,8 @@ from egress_addon_core import ( # type: ignore[import-not-found] # pylint: dis
build_inbound_scan_text, build_inbound_scan_text,
build_outbound_scan_text, build_outbound_scan_text,
decide, decide,
decide_git_fetch,
is_git_fetch_request,
is_git_push_request, is_git_push_request,
load_config, load_config,
match_route, match_route,
@@ -181,6 +183,18 @@ class EgressAddon:
) )
return return
if is_git_fetch_request(request_path, query):
git_decision = decide_git_fetch(
self.config.routes, flow.request.pretty_host,
)
if git_decision.action == "block":
self._block(
flow,
git_decision.reason,
ctx=self._req_ctx(flow),
)
return
# Strip agent-set Authorization after DLP scan so smuggled tokens # Strip agent-set Authorization after DLP scan so smuggled tokens
# are caught above; the route may inject sidecar-owned auth below. # are caught above; the route may inject sidecar-owned auth below.
flow.request.headers.pop("authorization", None) flow.request.headers.pop("authorization", None)
+54 -2
View File
@@ -66,6 +66,7 @@ class Route:
matches: tuple[MatchEntry, ...] = () matches: tuple[MatchEntry, ...] = ()
auth_scheme: str = "" auth_scheme: str = ""
token_env: str = "" token_env: str = ""
git_fetch: bool = False
outbound_detectors: tuple[str, ...] | None = None outbound_detectors: tuple[str, ...] | None = None
inbound_detectors: tuple[str, ...] | None = None inbound_detectors: tuple[str, ...] | None = None
@@ -316,16 +317,35 @@ def _parse_one(idx: int, raw: object) -> Route:
f"token_env={token_env!r})" f"token_env={token_env!r})"
) )
# git-over-HTTPS policy
git_fetch = False
git_raw = raw_dict.get("git")
if git_raw is not None:
if not isinstance(git_raw, dict):
raise ValueError(f"{label} ({host}): 'git' must be an object")
git_dict: dict[str, object] = typing.cast(dict[str, object], git_raw)
fetch_raw = git_dict.get("fetch", False)
if fetch_raw is True or fetch_raw is False:
git_fetch = fetch_raw
else:
raise ValueError(f"{label} ({host}): 'git.fetch' must be a boolean")
for k in git_dict:
if k != "fetch":
raise ValueError(
f"{label} ({host}): git has unknown key {k!r}; "
"accepted key is 'fetch'"
)
# dlp detectors # dlp detectors
outbound_detectors, inbound_detectors = _parse_detectors( outbound_detectors, inbound_detectors = _parse_detectors(
idx, host, raw_dict, idx, host, raw_dict,
) )
for k in raw_dict: for k in raw_dict:
if k not in ("host", "matches", "auth_scheme", "token_env", "dlp"): if k not in ("host", "matches", "auth_scheme", "token_env", "dlp", "git"):
raise ValueError( raise ValueError(
f"{label} ({host}): unknown key {k!r}; accepted keys " f"{label} ({host}): unknown key {k!r}; accepted keys "
f"are 'host', 'matches', 'auth_scheme', 'token_env', 'dlp'" f"are 'host', 'matches', 'auth_scheme', 'token_env', 'dlp', 'git'"
) )
return Route( return Route(
@@ -333,6 +353,7 @@ def _parse_one(idx: int, raw: object) -> Route:
matches=matches, matches=matches,
auth_scheme=auth_scheme, auth_scheme=auth_scheme,
token_env=token_env, token_env=token_env,
git_fetch=git_fetch,
outbound_detectors=outbound_detectors, outbound_detectors=outbound_detectors,
inbound_detectors=inbound_detectors, inbound_detectors=inbound_detectors,
) )
@@ -450,6 +471,17 @@ def is_git_push_request(path: str, query: str) -> bool:
return False return False
def is_git_fetch_request(path: str, query: str) -> bool:
if path.endswith("/git-upload-pack"):
return True
if path.endswith("/info/refs"):
for pair in query.split("&"):
k, _, v = pair.partition("=")
if k == "service" and v == "git-upload-pack":
return True
return False
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Route lookup + decision # Route lookup + decision
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@@ -513,6 +545,24 @@ def decide(
return Decision(action="forward") return Decision(action="forward")
def decide_git_fetch(
routes: typing.Sequence[Route],
request_host: str,
) -> Decision:
route = match_route(routes, request_host)
if route is not None and route.git_fetch:
return Decision(action="forward")
return Decision(
action="block",
reason=(
"egress: git fetch/clone over HTTPS is not allowed by default; "
"use git-gate for declared repos or set "
"egress.routes[].git.fetch=true for explicit read-only "
"HTTPS Git access."
),
)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# DLP scan dispatch (PRD 0053) # DLP scan dispatch (PRD 0053)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@@ -660,8 +710,10 @@ __all__ = [
"build_inbound_scan_text", "build_inbound_scan_text",
"build_outbound_scan_text", "build_outbound_scan_text",
"decide", "decide",
"decide_git_fetch",
"evaluate_matches", "evaluate_matches",
"is_git_push_request", "is_git_push_request",
"is_git_fetch_request",
"load_config", "load_config",
"load_routes", "load_routes",
"match_route", "match_route",
+23 -2
View File
@@ -64,6 +64,7 @@ class ManifestEgressRoute:
AuthScheme: str = "" AuthScheme: str = ""
TokenRef: str = "" TokenRef: str = ""
Role: tuple[str, ...] = () Role: tuple[str, ...] = ()
GitFetch: bool = False
OutboundDetectors: tuple[str, ...] | None = None OutboundDetectors: tuple[str, ...] | None = None
InboundDetectors: tuple[str, ...] | None = None InboundDetectors: tuple[str, ...] | None = None
@@ -165,11 +166,30 @@ class ManifestEgressRoute:
label, d.get("dlp"), label, d.get("dlp"),
) )
# --- git-over-HTTPS policy ---
git_fetch = False
if "git" in d:
git_d = as_json_object(d.get("git"), f"{label} git")
raw_fetch = git_d.get("fetch", False)
if isinstance(raw_fetch, bool):
git_fetch = raw_fetch
else:
raise ManifestError(
f"{label} git.fetch must be a boolean "
f"(was {type(raw_fetch).__name__})"
)
for k in git_d:
if k != "fetch":
raise ManifestError(
f"{label} git has unknown key {k!r}; "
f"only 'fetch' is accepted"
)
for k in d: for k in d:
if k not in ("host", "matches", "auth", "role", "dlp"): if k not in ("host", "matches", "auth", "role", "dlp", "git"):
raise ManifestError( raise ManifestError(
f"{label} has unknown key {k!r}; accepted keys are " f"{label} has unknown key {k!r}; accepted keys are "
f"'host', 'matches', 'auth', 'role', 'dlp'" f"'host', 'matches', 'auth', 'role', 'dlp', 'git'"
) )
return cls( return cls(
@@ -178,6 +198,7 @@ class ManifestEgressRoute:
AuthScheme=auth_scheme, AuthScheme=auth_scheme,
TokenRef=token_ref, TokenRef=token_ref,
Role=roles, Role=roles,
GitFetch=git_fetch,
OutboundDetectors=outbound_detectors, OutboundDetectors=outbound_detectors,
InboundDetectors=inbound_detectors, InboundDetectors=inbound_detectors,
) )
+21
View File
@@ -199,6 +199,25 @@ Named inbound detectors: `naive_injection_detection`.
The manifest parser (`manifest_egress.py`) validates the `dlp` block and The manifest parser (`manifest_egress.py`) validates the `dlp` block and
rejects unknown detector names. rejects unknown detector names.
### Manifest schema — `git` block
HTTPS Git clone/fetch traffic is not implied by a host-level egress route.
Smart HTTP Git fetch uses `git-upload-pack`, which can transfer large repo
packfiles and bypass the git-gate mirror path. It is therefore blocked by
default and must be explicitly enabled per route:
```yaml
egress:
routes:
- host: github.com
git:
fetch: true
```
`git.fetch: true` permits read-only smart HTTP clone/fetch requests
(`git-upload-pack`) after the normal host and `matches` checks pass. HTTPS
Git push (`git-receive-pack`) remains blocked by the egress addon.
### `EgressRoute` changes ### `EgressRoute` changes
`EgressRoute` replaces `PathAllowlist` with `Matches` and gains two new `EgressRoute` replaces `PathAllowlist` with `Matches` and gains two new
@@ -232,6 +251,7 @@ class EgressRoute:
AuthScheme: str = "" AuthScheme: str = ""
TokenRef: str = "" TokenRef: str = ""
Role: tuple[str, ...] = () Role: tuple[str, ...] = ()
GitFetch: bool = False
OutboundDetectors: tuple[str, ...] | None = None # None = all enabled OutboundDetectors: tuple[str, ...] | None = None # None = all enabled
InboundDetectors: tuple[str, ...] | None = None # None = all enabled InboundDetectors: tuple[str, ...] | None = None # None = all enabled
``` ```
@@ -252,6 +272,7 @@ class Route:
matches: tuple[MatchEntry, ...] = () matches: tuple[MatchEntry, ...] = ()
auth_scheme: str = "" auth_scheme: str = ""
token_env: str = "" token_env: str = ""
git_fetch: bool = False
outbound_detectors: tuple[str, ...] | None = None outbound_detectors: tuple[str, ...] | None = None
inbound_detectors: tuple[str, ...] | None = None inbound_detectors: tuple[str, ...] | None = None
``` ```
+14
View File
@@ -86,6 +86,11 @@ class TestManifestRouteLift(unittest.TestCase):
self.assertEqual(("token_patterns",), r.outbound_detectors) self.assertEqual(("token_patterns",), r.outbound_detectors)
self.assertEqual((), r.inbound_detectors) self.assertEqual((), r.inbound_detectors)
def test_git_fetch_policy_lifted(self):
b = _bottle([{"host": "github.com", "git": {"fetch": True}}])
routes = egress_manifest_routes(b)
self.assertTrue(routes[0].git_fetch)
class TestSlotAssignment(unittest.TestCase): class TestSlotAssignment(unittest.TestCase):
"""Slot assignment happens in egress_routes_for_bottle.""" """Slot assignment happens in egress_routes_for_bottle."""
@@ -324,6 +329,15 @@ class TestRenderRoutes(unittest.TestCase):
self.assertEqual(("token_patterns",), addon_routes[0].outbound_detectors) self.assertEqual(("token_patterns",), addon_routes[0].outbound_detectors)
self.assertEqual((), addon_routes[0].inbound_detectors) self.assertEqual((), addon_routes[0].inbound_detectors)
def test_git_fetch_policy_round_trips(self):
from bot_bottle.egress_addon_core import load_routes
b = _bottle([{"host": "github.com", "git": {"fetch": True}}])
routes = egress_routes_for_bottle(b)
rendered = egress_render_routes(routes)
self.assertEqual({"fetch": True}, self._parsed(routes)[0]["git"])
addon_routes = load_routes(rendered)
self.assertTrue(addon_routes[0].git_fetch)
def test_log_zero_omitted_from_render(self): def test_log_zero_omitted_from_render(self):
b = _bottle([{"host": "x.example"}]) b = _bottle([{"host": "x.example"}])
routes = egress_routes_for_bottle(b) routes = egress_routes_for_bottle(b)
+78 -2
View File
@@ -25,7 +25,9 @@ from bot_bottle.egress_addon_core import (
build_inbound_scan_text, build_inbound_scan_text,
build_outbound_scan_text, build_outbound_scan_text,
decide, decide,
decide_git_fetch,
evaluate_matches, evaluate_matches,
is_git_fetch_request,
is_git_push_request, is_git_push_request,
load_config, load_config,
load_routes, load_routes,
@@ -67,6 +69,31 @@ class TestParseRoutes(unittest.TestCase):
self.assertEqual("Bearer", r.auth_scheme) self.assertEqual("Bearer", r.auth_scheme)
self.assertEqual("EGRESS_TOKEN_0", r.token_env) self.assertEqual("EGRESS_TOKEN_0", r.token_env)
def test_git_fetch_defaults_false(self):
routes = parse_routes({"routes": [{"host": "github.com"}]})
self.assertFalse(routes[0].git_fetch)
def test_git_fetch_true(self):
routes = parse_routes({"routes": [{
"host": "github.com",
"git": {"fetch": True},
}]})
self.assertTrue(routes[0].git_fetch)
def test_git_fetch_must_be_boolean(self):
with self.assertRaises(ValueError):
parse_routes({"routes": [{
"host": "github.com",
"git": {"fetch": "yes"},
}]})
def test_unknown_git_key_rejected(self):
with self.assertRaises(ValueError):
parse_routes({"routes": [{
"host": "github.com",
"git": {"push": True},
}]})
def test_order_preserved(self): def test_order_preserved(self):
routes = parse_routes({"routes": [ routes = parse_routes({"routes": [
{"host": "a.example"}, {"host": "a.example"},
@@ -604,6 +631,24 @@ class TestDecisionDefaults(unittest.TestCase):
self.assertIsNone(d.inject_authorization) self.assertIsNone(d.inject_authorization)
class TestDecideGitFetch(unittest.TestCase):
def test_blocks_when_host_not_allowlisted(self):
d = decide_git_fetch((), "github.com")
self.assertEqual("block", d.action)
self.assertIn("git fetch/clone over HTTPS", d.reason)
def test_blocks_when_route_does_not_opt_in(self):
d = decide_git_fetch((Route(host="github.com"),), "github.com")
self.assertEqual("block", d.action)
def test_forwards_when_route_opts_in(self):
d = decide_git_fetch(
(Route(host="github.com", git_fetch=True),),
"github.com",
)
self.assertEqual("forward", d.action)
# --- scan_outbound ------------------------------------------------------- # --- scan_outbound -------------------------------------------------------
@@ -620,7 +665,7 @@ class TestScanOutboundBody(unittest.TestCase):
self.assertIn("OpenAI API key", result.reason) self.assertIn("OpenAI API key", result.reason)
# --- is_git_push_request ------------------------------------------------ # --- HTTPS Git request detection ----------------------------------------
class TestIsGitPushRequest(unittest.TestCase): class TestIsGitPushRequest(unittest.TestCase):
@@ -643,7 +688,7 @@ class TestIsGitPushRequest(unittest.TestCase):
"service=git-receive-pack&foo=bar", "service=git-receive-pack&foo=bar",
)) ))
def test_fetch_endpoints_not_blocked(self): def test_fetch_endpoints_are_not_push(self):
self.assertFalse(is_git_push_request( self.assertFalse(is_git_push_request(
"/owner/repo.git/info/refs", "/owner/repo.git/info/refs",
"service=git-upload-pack", "service=git-upload-pack",
@@ -661,6 +706,37 @@ class TestIsGitPushRequest(unittest.TestCase):
self.assertFalse(is_git_push_request("/", "")) self.assertFalse(is_git_push_request("/", ""))
class TestIsGitFetchRequest(unittest.TestCase):
def test_post_git_upload_pack_endpoint(self):
self.assertTrue(is_git_fetch_request("/owner/repo.git/git-upload-pack", ""))
def test_info_refs_with_upload_pack_service(self):
self.assertTrue(is_git_fetch_request(
"/owner/repo.git/info/refs",
"service=git-upload-pack",
))
def test_info_refs_with_extra_query_params(self):
self.assertTrue(is_git_fetch_request(
"/owner/repo.git/info/refs",
"foo=bar&service=git-upload-pack&z=1",
))
def test_push_endpoints_are_not_fetch(self):
self.assertFalse(is_git_fetch_request(
"/owner/repo.git/info/refs",
"service=git-receive-pack",
))
self.assertFalse(is_git_fetch_request(
"/owner/repo.git/git-receive-pack", "",
))
def test_unrelated_paths_not_fetch(self):
self.assertFalse(is_git_fetch_request("/repos/owner/repo", ""))
self.assertFalse(is_git_fetch_request("/v1/messages", ""))
self.assertFalse(is_git_fetch_request("/", ""))
class TestGitPushBlockFailFast(unittest.TestCase): class TestGitPushBlockFailFast(unittest.TestCase):
def test_real_git_push_fails_fast_when_egress_blocks_receive_pack(self): def test_real_git_push_fails_fast_when_egress_blocks_receive_pack(self):
seen_paths: list[str] = [] seen_paths: list[str] = []
+28 -1
View File
@@ -2,7 +2,8 @@
The route shape uses Gateway API HTTPRoute match vocabulary: The route shape uses Gateway API HTTPRoute match vocabulary:
`host` (required), optional `matches` (paths/methods/headers), `host` (required), optional `matches` (paths/methods/headers),
optional nested `auth: { scheme, token_ref }`, optional `dlp`. optional nested `auth: { scheme, token_ref }`, optional `dlp`,
optional `git: { fetch: true }`.
Validation rules per PRD 0017/0053: empty `auth: {}` is an error, Validation rules per PRD 0017/0053: empty `auth: {}` is an error,
partial `auth` is an error, auth omission means unauthenticated.""" partial `auth` is an error, auth omission means unauthenticated."""
@@ -302,6 +303,32 @@ class TestDlp(unittest.TestCase):
}}]) }}])
class TestGitPolicy(unittest.TestCase):
def test_omitted_means_https_git_fetch_disabled(self):
b = _bottle([{"host": "github.com"}])
self.assertFalse(b.egress.routes[0].GitFetch)
def test_fetch_true_allowed(self):
b = _bottle([{"host": "github.com", "git": {"fetch": True}}])
self.assertTrue(b.egress.routes[0].GitFetch)
def test_fetch_false_allowed(self):
b = _bottle([{"host": "github.com", "git": {"fetch": False}}])
self.assertFalse(b.egress.routes[0].GitFetch)
def test_git_must_be_object(self):
with self.assertRaises(ManifestError):
_bottle([{"host": "github.com", "git": True}])
def test_fetch_must_be_boolean(self):
with self.assertRaises(ManifestError):
_bottle([{"host": "github.com", "git": {"fetch": "yes"}}])
def test_unknown_git_key_rejected(self):
with self.assertRaises(ManifestError):
_bottle([{"host": "github.com", "git": {"push": True}}])
class TestAuth(unittest.TestCase): class TestAuth(unittest.TestCase):
def test_omitted_means_no_auth(self): def test_omitted_means_no_auth(self):
b = _bottle([{"host": "github.com"}]) b = _bottle([{"host": "github.com"}])