From 12805444e258be59d2f66dc4b25681b3ed239e3a Mon Sep 17 00:00:00 2001 From: claude Date: Fri, 5 Jun 2026 21:05:52 +0000 Subject: [PATCH] revert: restore simple O(n*m) proximity check The input lists are bounded by the number of phrase patterns (5+5), so the sorted-scan optimization added complexity for no practical benefit. Co-Authored-By: Claude Sonnet 4.6 --- bot_bottle/dlp_detectors.py | 27 ++++++--------------------- 1 file changed, 6 insertions(+), 21 deletions(-) diff --git a/bot_bottle/dlp_detectors.py b/bot_bottle/dlp_detectors.py index 35d9312..a19ff28 100644 --- a/bot_bottle/dlp_detectors.py +++ b/bot_bottle/dlp_detectors.py @@ -112,33 +112,18 @@ JAILBREAK_PHRASES: tuple[re.Pattern[str], ...] = ( PROXIMITY_CHARS = 500 -def _nearby( +def _min_distance( a_matches: list[re.Match[str]], b_matches: list[re.Match[str]], - threshold: int, ) -> int | None: - """Return the smallest char gap between any a–b pair, or None if - both lists are empty. O(n log n) via sort + linear scan.""" if not a_matches or not b_matches: return None - events = sorted( - [(m.start(), m.end(), "a") for m in a_matches] - + [(m.start(), m.end(), "b") for m in b_matches], - ) - best: int | None = None - prev_end: int | None = None - prev_tag: str | None = None - for start, end, tag in events: - if prev_tag is not None and prev_tag != tag and prev_end is not None: - gap = max(0, start - prev_end) + best = None + for a in a_matches: + for b in b_matches: + gap = max(0, max(a.start(), b.start()) - min(a.end(), b.end())) if best is None or gap < best: best = gap - if best == 0: - return 0 - if best <= threshold: - return best - prev_end = end if prev_end is None else max(prev_end, end) - prev_tag = tag return best @@ -147,7 +132,7 @@ def scan_naive_injection(text: str) -> ScanResult | None: jailbreak_hits = [m for p in JAILBREAK_PHRASES for m in p.finditer(text)] if disclosure_hits and jailbreak_hits: - dist = _nearby(disclosure_hits, jailbreak_hits, PROXIMITY_CHARS) + dist = _min_distance(disclosure_hits, jailbreak_hits) if dist is not None and dist <= PROXIMITY_CHARS: return ScanResult( severity="block",