feat(egress): extend outbound DLP scan to headers, query params, path, and hostname (PRD 0053)
This commit is contained in:
@@ -18,6 +18,7 @@ from egress_addon_core import ( # type: ignore[import-not-found] # pylint: dis
|
|||||||
LOG_BLOCKS,
|
LOG_BLOCKS,
|
||||||
LOG_FULL,
|
LOG_FULL,
|
||||||
Config,
|
Config,
|
||||||
|
build_outbound_scan_text,
|
||||||
decide,
|
decide,
|
||||||
is_git_push_request,
|
is_git_push_request,
|
||||||
load_config,
|
load_config,
|
||||||
@@ -147,16 +148,20 @@ class EgressAddon:
|
|||||||
self._serve_introspection(flow, request_path)
|
self._serve_introspection(flow, request_path)
|
||||||
return
|
return
|
||||||
|
|
||||||
# Strip inbound Authorization before DLP and matching; the agent cannot
|
# DLP outbound scan BEFORE stripping auth — catches tokens the
|
||||||
# smuggle tokens, and the route may inject sidecar-owned auth later.
|
# agent tried to smuggle in any header, path, query param, or body.
|
||||||
flow.request.headers.pop("authorization", None)
|
# Hostname is included to catch DNS-tunnelling exfiltration attempts.
|
||||||
|
|
||||||
# DLP outbound scan after auth stripping so placeholder or attempted
|
|
||||||
# agent auth headers do not become part of the scanned payload.
|
|
||||||
route = match_route(self.config.routes, flow.request.pretty_host)
|
route = match_route(self.config.routes, flow.request.pretty_host)
|
||||||
if route is not None:
|
if route is not None:
|
||||||
body = flow.request.get_text(strict=False) or ""
|
body = flow.request.get_text(strict=False) or ""
|
||||||
dlp_result = scan_outbound(route, body, os.environ)
|
scan_text = build_outbound_scan_text(
|
||||||
|
flow.request.pretty_host,
|
||||||
|
request_path,
|
||||||
|
query,
|
||||||
|
dict(flow.request.headers),
|
||||||
|
body,
|
||||||
|
)
|
||||||
|
dlp_result = scan_outbound(route, scan_text, os.environ)
|
||||||
if dlp_result is not None and dlp_result.severity == "block":
|
if dlp_result is not None and dlp_result.severity == "block":
|
||||||
ctx = self._req_ctx(flow)
|
ctx = self._req_ctx(flow)
|
||||||
if dlp_result.context:
|
if dlp_result.context:
|
||||||
@@ -174,6 +179,10 @@ class EgressAddon:
|
|||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# Strip agent-set Authorization after DLP scan so smuggled tokens
|
||||||
|
# are caught above; the route may inject sidecar-owned auth below.
|
||||||
|
flow.request.headers.pop("authorization", None)
|
||||||
|
|
||||||
# Build headers mapping for match evaluation
|
# Build headers mapping for match evaluation
|
||||||
req_headers = {k.lower(): v for k, v in flow.request.headers.items()}
|
req_headers = {k.lower(): v for k, v in flow.request.headers.items()}
|
||||||
|
|
||||||
|
|||||||
@@ -517,6 +517,27 @@ def decide(
|
|||||||
# DLP scan dispatch (PRD 0053)
|
# DLP scan dispatch (PRD 0053)
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def build_outbound_scan_text(
|
||||||
|
host: str,
|
||||||
|
path: str,
|
||||||
|
query: str,
|
||||||
|
headers: typing.Mapping[str, str],
|
||||||
|
body: str,
|
||||||
|
) -> str:
|
||||||
|
"""Assemble all outbound request surfaces into one string for DLP scanning.
|
||||||
|
|
||||||
|
Covers hostname (DNS tunnelling), path, query params, all headers, body.
|
||||||
|
"""
|
||||||
|
parts: list[str] = [host, path]
|
||||||
|
if query:
|
||||||
|
parts.append(query)
|
||||||
|
for name, value in headers.items():
|
||||||
|
parts.append(f"{name}: {value}")
|
||||||
|
if body:
|
||||||
|
parts.append(body)
|
||||||
|
return "\n".join(parts)
|
||||||
|
|
||||||
|
|
||||||
def _detector_enabled(
|
def _detector_enabled(
|
||||||
configured: tuple[str, ...] | None,
|
configured: tuple[str, ...] | None,
|
||||||
name: str,
|
name: str,
|
||||||
@@ -589,6 +610,7 @@ __all__ = [
|
|||||||
"PathMatch",
|
"PathMatch",
|
||||||
"Route",
|
"Route",
|
||||||
"ScanResult",
|
"ScanResult",
|
||||||
|
"build_outbound_scan_text",
|
||||||
"decide",
|
"decide",
|
||||||
"evaluate_matches",
|
"evaluate_matches",
|
||||||
"is_git_push_request",
|
"is_git_push_request",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
# PRD 0053: Extended outbound DLP scan surfaces
|
# PRD 0053: Extended outbound DLP scan surfaces
|
||||||
|
|
||||||
- **Status:** Draft
|
- **Status:** Active
|
||||||
- **Author:** claude
|
- **Author:** claude
|
||||||
- **Created:** 2026-06-06
|
- **Created:** 2026-06-06
|
||||||
- **Issue:** #204
|
- **Issue:** #204
|
||||||
|
|||||||
@@ -22,6 +22,8 @@ from bot_bottle.egress_addon_core import (
|
|||||||
MatchEntry,
|
MatchEntry,
|
||||||
PathMatch,
|
PathMatch,
|
||||||
Route,
|
Route,
|
||||||
|
ScanResult,
|
||||||
|
build_outbound_scan_text,
|
||||||
decide,
|
decide,
|
||||||
evaluate_matches,
|
evaluate_matches,
|
||||||
is_git_push_request,
|
is_git_push_request,
|
||||||
@@ -733,5 +735,190 @@ class TestGitPushBlockFailFast(unittest.TestCase):
|
|||||||
self.assertIn("403", result.stderr)
|
self.assertIn("403", result.stderr)
|
||||||
|
|
||||||
|
|
||||||
|
# --- build_outbound_scan_text -------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestBuildOutboundScanText(unittest.TestCase):
|
||||||
|
def _build(self, **kwargs):
|
||||||
|
defaults = dict(
|
||||||
|
host="api.example.com",
|
||||||
|
path="/v1/data",
|
||||||
|
query="",
|
||||||
|
headers={},
|
||||||
|
body="",
|
||||||
|
)
|
||||||
|
defaults.update(kwargs)
|
||||||
|
return build_outbound_scan_text(**defaults)
|
||||||
|
|
||||||
|
def test_host_appears(self):
|
||||||
|
text = self._build(host="secret.attacker.com")
|
||||||
|
self.assertIn("secret.attacker.com", text)
|
||||||
|
|
||||||
|
def test_path_appears(self):
|
||||||
|
text = self._build(path="/api/token-in-path")
|
||||||
|
self.assertIn("/api/token-in-path", text)
|
||||||
|
|
||||||
|
def test_query_appears(self):
|
||||||
|
text = self._build(query="api_key=abc123")
|
||||||
|
self.assertIn("api_key=abc123", text)
|
||||||
|
|
||||||
|
def test_empty_query_omitted(self):
|
||||||
|
text = self._build(query="")
|
||||||
|
self.assertEqual(1, text.count("\n")) # host + path only: one separator
|
||||||
|
|
||||||
|
def test_headers_appear(self):
|
||||||
|
text = self._build(headers={"x-api-key": "tok", "accept": "application/json"})
|
||||||
|
self.assertIn("x-api-key: tok", text)
|
||||||
|
self.assertIn("accept: application/json", text)
|
||||||
|
|
||||||
|
def test_body_appears(self):
|
||||||
|
text = self._build(body="hello world")
|
||||||
|
self.assertIn("hello world", text)
|
||||||
|
|
||||||
|
def test_empty_body_omitted(self):
|
||||||
|
text = self._build(body="")
|
||||||
|
self.assertNotIn("\n\n", text)
|
||||||
|
|
||||||
|
def test_all_surfaces_present(self):
|
||||||
|
text = build_outbound_scan_text(
|
||||||
|
host="h.example",
|
||||||
|
path="/p",
|
||||||
|
query="q=1",
|
||||||
|
headers={"x-h": "v"},
|
||||||
|
body="body",
|
||||||
|
)
|
||||||
|
for fragment in ["h.example", "/p", "q=1", "x-h: v", "body"]:
|
||||||
|
self.assertIn(fragment, text)
|
||||||
|
|
||||||
|
|
||||||
|
# --- scan_outbound -------------------------------------------------------
|
||||||
|
|
||||||
|
_AWS_KEY = "AKIAIOSFODNN7EXAMPLE"
|
||||||
|
_ROUTE = Route(host="api.example.com")
|
||||||
|
|
||||||
|
|
||||||
|
class TestScanOutbound(unittest.TestCase):
|
||||||
|
def test_clean_request_returns_none(self):
|
||||||
|
text = build_outbound_scan_text(
|
||||||
|
host="api.example.com",
|
||||||
|
path="/v1/data",
|
||||||
|
query="limit=10",
|
||||||
|
headers={"content-type": "application/json"},
|
||||||
|
body='{"msg": "hello"}',
|
||||||
|
)
|
||||||
|
self.assertIsNone(scan_outbound(_ROUTE, text, {}))
|
||||||
|
|
||||||
|
def test_token_in_body_blocked(self):
|
||||||
|
text = build_outbound_scan_text(
|
||||||
|
host="api.example.com",
|
||||||
|
path="/v1/data",
|
||||||
|
query="",
|
||||||
|
headers={},
|
||||||
|
body=f"key={_AWS_KEY}",
|
||||||
|
)
|
||||||
|
result = scan_outbound(_ROUTE, text, {})
|
||||||
|
self.assertIsNotNone(result)
|
||||||
|
assert result is not None
|
||||||
|
self.assertEqual("block", result.severity)
|
||||||
|
|
||||||
|
def test_token_in_path_blocked(self):
|
||||||
|
text = build_outbound_scan_text(
|
||||||
|
host="api.example.com",
|
||||||
|
path=f"/proxy/{_AWS_KEY}/resource",
|
||||||
|
query="",
|
||||||
|
headers={},
|
||||||
|
body="",
|
||||||
|
)
|
||||||
|
result = scan_outbound(_ROUTE, text, {})
|
||||||
|
self.assertIsNotNone(result)
|
||||||
|
assert result is not None
|
||||||
|
self.assertEqual("block", result.severity)
|
||||||
|
|
||||||
|
def test_token_in_query_param_blocked(self):
|
||||||
|
text = build_outbound_scan_text(
|
||||||
|
host="api.example.com",
|
||||||
|
path="/search",
|
||||||
|
query=f"aws_key={_AWS_KEY}",
|
||||||
|
headers={},
|
||||||
|
body="",
|
||||||
|
)
|
||||||
|
result = scan_outbound(_ROUTE, text, {})
|
||||||
|
self.assertIsNotNone(result)
|
||||||
|
assert result is not None
|
||||||
|
self.assertEqual("block", result.severity)
|
||||||
|
|
||||||
|
def test_token_in_non_auth_header_blocked(self):
|
||||||
|
text = build_outbound_scan_text(
|
||||||
|
host="api.example.com",
|
||||||
|
path="/v1/data",
|
||||||
|
query="",
|
||||||
|
headers={"x-aws-key": _AWS_KEY},
|
||||||
|
body="",
|
||||||
|
)
|
||||||
|
result = scan_outbound(_ROUTE, text, {})
|
||||||
|
self.assertIsNotNone(result)
|
||||||
|
assert result is not None
|
||||||
|
self.assertEqual("block", result.severity)
|
||||||
|
|
||||||
|
def test_token_in_hostname_blocked(self):
|
||||||
|
# DNS-tunnelling: secret encoded in subdomain label
|
||||||
|
text = build_outbound_scan_text(
|
||||||
|
host=f"{_AWS_KEY}.attacker.com",
|
||||||
|
path="/",
|
||||||
|
query="",
|
||||||
|
headers={},
|
||||||
|
body="",
|
||||||
|
)
|
||||||
|
result = scan_outbound(_ROUTE, text, {})
|
||||||
|
self.assertIsNotNone(result)
|
||||||
|
assert result is not None
|
||||||
|
self.assertEqual("block", result.severity)
|
||||||
|
|
||||||
|
def test_known_secret_in_query_param_blocked(self):
|
||||||
|
secret = "my-provisioned-secret"
|
||||||
|
env = {"EGRESS_TOKEN_0": secret}
|
||||||
|
text = build_outbound_scan_text(
|
||||||
|
host="api.example.com",
|
||||||
|
path="/data",
|
||||||
|
query=f"token={secret}",
|
||||||
|
headers={},
|
||||||
|
body="",
|
||||||
|
)
|
||||||
|
result = scan_outbound(_ROUTE, text, env)
|
||||||
|
self.assertIsNotNone(result)
|
||||||
|
assert result is not None
|
||||||
|
self.assertEqual("block", result.severity)
|
||||||
|
|
||||||
|
def test_known_secret_in_path_blocked(self):
|
||||||
|
secret = "my-provisioned-secret"
|
||||||
|
env = {"EGRESS_TOKEN_0": secret}
|
||||||
|
text = build_outbound_scan_text(
|
||||||
|
host="api.example.com",
|
||||||
|
path=f"/proxy/{secret}/resource",
|
||||||
|
query="",
|
||||||
|
headers={},
|
||||||
|
body="",
|
||||||
|
)
|
||||||
|
result = scan_outbound(_ROUTE, text, env)
|
||||||
|
self.assertIsNotNone(result)
|
||||||
|
assert result is not None
|
||||||
|
self.assertEqual("block", result.severity)
|
||||||
|
|
||||||
|
def test_known_secret_in_custom_header_blocked(self):
|
||||||
|
secret = "my-provisioned-secret"
|
||||||
|
env = {"EGRESS_TOKEN_0": secret}
|
||||||
|
text = build_outbound_scan_text(
|
||||||
|
host="api.example.com",
|
||||||
|
path="/data",
|
||||||
|
query="",
|
||||||
|
headers={"x-secret": secret},
|
||||||
|
body="",
|
||||||
|
)
|
||||||
|
result = scan_outbound(_ROUTE, text, env)
|
||||||
|
self.assertIsNotNone(result)
|
||||||
|
assert result is not None
|
||||||
|
self.assertEqual("block", result.severity)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|||||||
Reference in New Issue
Block a user