feat(egress): extend outbound DLP scan to headers, query params, path, and hostname (PRD 0053)
lint / lint (push) Failing after 1m30s
test / unit (pull_request) Successful in 35s
test / integration (pull_request) Successful in 43s

This commit is contained in:
2026-06-06 17:43:55 +00:00
parent 9f3991164c
commit 4515c9e8ad
4 changed files with 221 additions and 6 deletions
+10 -5
View File
@@ -16,6 +16,7 @@ from mitmproxy import http # type: ignore[import-not-found]
from egress_addon_core import ( # type: ignore[import-not-found]
Route,
build_outbound_scan_text,
decide,
is_git_push_request,
load_routes,
@@ -98,14 +99,18 @@ class EgressAddon:
return
# DLP outbound scan BEFORE stripping auth — catches tokens the
# agent tried to smuggle in the Authorization header.
# agent tried to smuggle in any header, path, query param, or body.
# Hostname is included to catch DNS-tunnelling exfiltration attempts.
route = match_route(self.routes, flow.request.pretty_host)
if route is not None:
body = flow.request.get_text(strict=False) or ""
auth_header = flow.request.headers.get("authorization", "")
scan_text = body
if auth_header:
scan_text = auth_header + "\n" + body
scan_text = build_outbound_scan_text(
flow.request.pretty_host,
request_path,
query,
dict(flow.request.headers),
body,
)
dlp_result = scan_outbound(route, scan_text, os.environ)
if dlp_result is not None and dlp_result.severity == "block":
self._block(flow, f"egress DLP: {dlp_result.reason}")
+22
View File
@@ -477,6 +477,27 @@ def decide(
# DLP scan dispatch (PRD 0053)
# ---------------------------------------------------------------------------
def build_outbound_scan_text(
host: str,
path: str,
query: str,
headers: typing.Mapping[str, str],
body: str,
) -> str:
"""Assemble all outbound request surfaces into one string for DLP scanning.
Covers hostname (DNS tunnelling), path, query params, all headers, body.
"""
parts: list[str] = [host, path]
if query:
parts.append(query)
for name, value in headers.items():
parts.append(f"{name}: {value}")
if body:
parts.append(body)
return "\n".join(parts)
def _detector_enabled(
configured: tuple[str, ...] | None,
name: str,
@@ -541,6 +562,7 @@ __all__ = [
"PathMatch",
"Route",
"ScanResult",
"build_outbound_scan_text",
"decide",
"evaluate_matches",
"is_git_push_request",