Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| d89d389bef | |||
| 8caa79ee76 | |||
| 74060192e0 |
@@ -54,11 +54,23 @@ jobs:
|
||||
echo "percent=$PERCENT" >> $GITHUB_OUTPUT
|
||||
echo "Coverage: $PERCENT%"
|
||||
|
||||
- name: Extract core (critical-module) coverage percentage
|
||||
id: core_coverage
|
||||
run: |
|
||||
# Reuses the .coverage data from the previous step. The core list is
|
||||
# the single source of truth in scripts/critical-modules.txt; every
|
||||
# core module is unit-tested, so the unit-only run is accurate for it.
|
||||
INCLUDE=$(grep -vE '^[[:space:]]*(#|$)' scripts/critical-modules.txt | paste -sd, -)
|
||||
PERCENT=$(python -m coverage report --include="$INCLUDE" 2>/dev/null | grep '^TOTAL' | grep -oP '\d+(?=%)' | tail -1)
|
||||
echo "percent=$PERCENT" >> $GITHUB_OUTPUT
|
||||
echo "Core coverage: $PERCENT%"
|
||||
|
||||
- name: Update badges in README
|
||||
run: |
|
||||
PYLINT_SCORE="${{ steps.pylint.outputs.score }}"
|
||||
PYRIGHT_ERRORS="${{ steps.pyright.outputs.errors }}"
|
||||
COVERAGE_PERCENT="${{ steps.coverage.outputs.percent }}"
|
||||
CORE_COVERAGE_PERCENT="${{ steps.core_coverage.outputs.percent }}"
|
||||
|
||||
PYLINT_SCORE_ENCODED=$(echo "$PYLINT_SCORE" | sed 's|/|%2F|g')
|
||||
|
||||
@@ -71,9 +83,12 @@ jobs:
|
||||
if [ -n "$COVERAGE_PERCENT" ]; then
|
||||
sed -i "s|/badge/coverage-[^)]*|/badge/coverage-${COVERAGE_PERCENT}%25-brightgreen|" README.md
|
||||
fi
|
||||
if [ -n "$CORE_COVERAGE_PERCENT" ]; then
|
||||
sed -i "s|/badge/core%20coverage-[^)]*|/badge/core%20coverage-${CORE_COVERAGE_PERCENT}%25-brightgreen|" README.md
|
||||
fi
|
||||
|
||||
echo "Updated badges:"
|
||||
grep -E "pylint|pyright|coverage" README.md | head -3
|
||||
grep -E "pylint|pyright|coverage" README.md | head -4
|
||||
|
||||
- name: Commit and push badge updates
|
||||
run: |
|
||||
@@ -86,7 +101,7 @@ jobs:
|
||||
else
|
||||
echo "Badge changes detected, committing..."
|
||||
git add README.md
|
||||
MSG="chore: update quality badges"$'\n\n'"- Pylint: ${{ steps.pylint.outputs.score }}"$'\n'"- Pyright: ${{ steps.pyright.outputs.errors }} errors"$'\n'"- Coverage: ${{ steps.coverage.outputs.percent }}%"$'\n\n'"[skip ci]"
|
||||
MSG="chore: update quality badges"$'\n\n'"- Pylint: ${{ steps.pylint.outputs.score }}"$'\n'"- Pyright: ${{ steps.pyright.outputs.errors }} errors"$'\n'"- Coverage: ${{ steps.coverage.outputs.percent }}%"$'\n'"- Core coverage: ${{ steps.core_coverage.outputs.percent }}%"$'\n\n'"[skip ci]"
|
||||
git commit -m "$MSG"
|
||||
git push
|
||||
fi
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
[](https://github.com/PyCQA/pylint)
|
||||
[](https://github.com/microsoft/pyright)
|
||||
[](https://coverage.readthedocs.io/)
|
||||
[](https://gitea.dideric.is/didericis/bot-bottle/src/branch/main/docs/decisions/0004-coverage-policy.md)
|
||||
|
||||
**Problem:** Developer wants to run a coding agent without supervision, but they don't want a prompt injected or misbehaving agent wrecking their environment or exfiltrating sensitive data.
|
||||
|
||||
|
||||
@@ -88,3 +88,9 @@ omit list.
|
||||
- PRs #290 (cover the egress adapter), and the coverage-policy PR that
|
||||
introduces this record.
|
||||
- `.coveragerc`, `scripts/coverage.sh`, `scripts/diff_coverage.py`.
|
||||
- `scripts/critical-modules.txt` — the single source of truth for the
|
||||
core-module list; read by both `scripts/coverage.sh` and the
|
||||
`update-badges.yml` "core coverage" badge so they cannot drift.
|
||||
- The README carries a `core coverage` badge (auto-updated from that
|
||||
list) — the headline number, distinct from the informational global
|
||||
`coverage` badge.
|
||||
|
||||
+4
-7
@@ -16,13 +16,10 @@ cd "$(dirname "$0")/.."
|
||||
|
||||
PY="${PYTHON:-python3}"
|
||||
|
||||
# Critical security/logic core held to the high bar by ADR 0004.
|
||||
CRITICAL="bot_bottle/egress_addon.py,bot_bottle/egress_addon_core.py,\
|
||||
bot_bottle/dlp_detectors.py,bot_bottle/egress.py,bot_bottle/manifest.py,\
|
||||
bot_bottle/manifest_egress.py,bot_bottle/manifest_agent.py,\
|
||||
bot_bottle/manifest_schema.py,bot_bottle/git_gate.py,\
|
||||
bot_bottle/git_http_backend.py,bot_bottle/supervise.py,\
|
||||
bot_bottle/yaml_subset.py,bot_bottle/bottle_state.py"
|
||||
# Critical security/logic core held to the high bar by ADR 0004. The list
|
||||
# lives in one place (scripts/critical-modules.txt) so this report and the
|
||||
# README "core coverage" badge can't drift; comma-join it for --include.
|
||||
CRITICAL=$(grep -vE '^[[:space:]]*(#|$)' scripts/critical-modules.txt | paste -sd, -)
|
||||
|
||||
rm -f .coverage
|
||||
|
||||
|
||||
@@ -0,0 +1,23 @@
|
||||
# Critical security/logic core held to the >=90% coverage bar by
|
||||
# docs/decisions/0004-coverage-policy.md.
|
||||
#
|
||||
# SINGLE SOURCE OF TRUTH: scripts/coverage.sh (the `critical` report) and
|
||||
# .gitea/workflows/update-badges.yml (the "core coverage" badge) both read
|
||||
# this file. Add a module here when it becomes part of the core; a coverage
|
||||
# number that silently stops measuring a module is worse than no badge.
|
||||
#
|
||||
# One module path per line, relative to the repo root. Blank lines and
|
||||
# `#` comments are ignored.
|
||||
bot_bottle/egress_addon.py
|
||||
bot_bottle/egress_addon_core.py
|
||||
bot_bottle/dlp_detectors.py
|
||||
bot_bottle/egress.py
|
||||
bot_bottle/manifest.py
|
||||
bot_bottle/manifest_egress.py
|
||||
bot_bottle/manifest_agent.py
|
||||
bot_bottle/manifest_schema.py
|
||||
bot_bottle/git_gate.py
|
||||
bot_bottle/git_http_backend.py
|
||||
bot_bottle/supervise.py
|
||||
bot_bottle/yaml_subset.py
|
||||
bot_bottle/bottle_state.py
|
||||
@@ -0,0 +1,226 @@
|
||||
"""Unit: manifest + manifest_agent validation error/edge branches
|
||||
(coverage ratchet, ADR 0004).
|
||||
|
||||
Drives ManifestBottle / ManifestAgentProvider / ManifestAgent / the
|
||||
provider-settings parser and the eager ManifestIndex lookup methods
|
||||
through their rejection and edge paths."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import unittest
|
||||
|
||||
from bot_bottle.manifest import ManifestBottle, ManifestIndex
|
||||
from bot_bottle.manifest_agent import (
|
||||
ManifestAgent,
|
||||
ManifestAgentProvider,
|
||||
_parse_provider_settings,
|
||||
)
|
||||
from bot_bottle.manifest_util import ManifestError
|
||||
|
||||
|
||||
def _idx(obj: dict[str, object]) -> ManifestIndex:
|
||||
return ManifestIndex.from_json_obj(obj)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ManifestBottle.from_dict
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestBottleValidation(unittest.TestCase):
|
||||
def test_unknown_key(self) -> None:
|
||||
with self.assertRaises(ManifestError):
|
||||
ManifestBottle.from_dict("b", {"bogus": 1})
|
||||
|
||||
def test_env_value_not_string(self) -> None:
|
||||
with self.assertRaises(ManifestError):
|
||||
ManifestBottle.from_dict("b", {"env": {"X": 5}})
|
||||
|
||||
def test_supervise_not_bool(self) -> None:
|
||||
with self.assertRaises(ManifestError):
|
||||
ManifestBottle.from_dict("b", {"supervise": "yes"})
|
||||
|
||||
def test_removed_runtime_field(self) -> None:
|
||||
with self.assertRaises(ManifestError):
|
||||
ManifestBottle.from_dict("b", {"runtime": "runsc"})
|
||||
|
||||
def test_valid_minimal(self) -> None:
|
||||
b = ManifestBottle.from_dict("b", {"supervise": False, "env": {"X": "1"}})
|
||||
self.assertFalse(b.supervise)
|
||||
self.assertEqual({"X": "1"}, dict(b.env))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ManifestAgentProvider.from_dict
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestAgentProviderValidation(unittest.TestCase):
|
||||
def test_unknown_key(self) -> None:
|
||||
with self.assertRaises(ManifestError):
|
||||
ManifestAgentProvider.from_dict("b", {"bogus": 1})
|
||||
|
||||
def test_empty_template(self) -> None:
|
||||
with self.assertRaises(ManifestError):
|
||||
ManifestAgentProvider.from_dict("b", {"template": ""})
|
||||
|
||||
def test_dockerfile_not_string(self) -> None:
|
||||
with self.assertRaises(ManifestError):
|
||||
ManifestAgentProvider.from_dict("b", {"dockerfile": 5})
|
||||
|
||||
def test_auth_token_unknown_template(self) -> None:
|
||||
with self.assertRaises(ManifestError):
|
||||
ManifestAgentProvider.from_dict("b", {"auth_token": "x", "template": "weird"})
|
||||
|
||||
def test_auth_token_non_claude_template(self) -> None:
|
||||
with self.assertRaises(ManifestError):
|
||||
ManifestAgentProvider.from_dict("b", {"auth_token": "x", "template": "codex"})
|
||||
|
||||
def test_forward_creds_unknown_template(self) -> None:
|
||||
with self.assertRaises(ManifestError):
|
||||
ManifestAgentProvider.from_dict(
|
||||
"b", {"forward_host_credentials": True, "template": "weird"}
|
||||
)
|
||||
|
||||
def test_forward_creds_non_codex_template(self) -> None:
|
||||
with self.assertRaises(ManifestError):
|
||||
ManifestAgentProvider.from_dict(
|
||||
"b", {"forward_host_credentials": True, "template": "claude"}
|
||||
)
|
||||
|
||||
def test_valid_claude_auth_token(self) -> None:
|
||||
p = ManifestAgentProvider.from_dict("b", {"template": "claude", "auth_token": "T"})
|
||||
self.assertEqual("T", p.auth_token)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _parse_provider_settings
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestProviderSettings(unittest.TestCase):
|
||||
def test_unknown_template_passes_settings_through(self) -> None:
|
||||
out = _parse_provider_settings("b", "weird", {"anything": 1})
|
||||
self.assertEqual({"anything": 1}, out)
|
||||
|
||||
def test_startup_args_not_list(self) -> None:
|
||||
with self.assertRaises(ManifestError):
|
||||
_parse_provider_settings("b", "claude", {"startup_args": "x"})
|
||||
|
||||
def test_startup_args_empty_item(self) -> None:
|
||||
with self.assertRaises(ManifestError):
|
||||
_parse_provider_settings("b", "claude", {"startup_args": [""]})
|
||||
|
||||
def test_pi_string_field_empty(self) -> None:
|
||||
with self.assertRaises(ManifestError):
|
||||
_parse_provider_settings("b", "pi", {"provider": ""})
|
||||
|
||||
def test_pi_max_tokens_field_invalid(self) -> None:
|
||||
with self.assertRaises(ManifestError):
|
||||
_parse_provider_settings("b", "pi", {"max_tokens_field": "bogus"})
|
||||
|
||||
def test_pi_api_key_and_env_conflict(self) -> None:
|
||||
with self.assertRaises(ManifestError):
|
||||
_parse_provider_settings("b", "pi", {"api_key": "k", "api_key_env": "E"})
|
||||
|
||||
def test_pi_models_item_not_string(self) -> None:
|
||||
with self.assertRaises(ManifestError):
|
||||
_parse_provider_settings("b", "pi", {"models": [5]})
|
||||
|
||||
def test_pi_bool_field_not_bool(self) -> None:
|
||||
with self.assertRaises(ManifestError):
|
||||
_parse_provider_settings("b", "pi", {"supports_developer_role": "yes"})
|
||||
|
||||
def test_pi_context_window_not_positive(self) -> None:
|
||||
with self.assertRaises(ManifestError):
|
||||
_parse_provider_settings("b", "pi", {"context_window": -1})
|
||||
|
||||
def test_pi_valid_settings(self) -> None:
|
||||
out = _parse_provider_settings(
|
||||
"b", "pi",
|
||||
{"provider": "openai", "models": ["gpt"], "context_window": 8000},
|
||||
)
|
||||
self.assertEqual("openai", out["provider"])
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ManifestAgent.from_dict
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestAgentValidation(unittest.TestCase):
|
||||
def test_bottle_empty_string(self) -> None:
|
||||
with self.assertRaises(ManifestError):
|
||||
ManifestAgent.from_dict("a", {"bottle": ""}, set())
|
||||
|
||||
def test_bottle_undefined(self) -> None:
|
||||
with self.assertRaises(ManifestError):
|
||||
ManifestAgent.from_dict("a", {"bottle": "x"}, set())
|
||||
|
||||
def test_skills_not_list(self) -> None:
|
||||
with self.assertRaises(ManifestError):
|
||||
ManifestAgent.from_dict("a", {"skills": "x"}, set())
|
||||
|
||||
def test_skill_item_not_string(self) -> None:
|
||||
with self.assertRaises(ManifestError):
|
||||
ManifestAgent.from_dict("a", {"skills": [5]}, set())
|
||||
|
||||
def test_prompt_not_string(self) -> None:
|
||||
with self.assertRaises(ManifestError):
|
||||
ManifestAgent.from_dict("a", {"prompt": 5}, set())
|
||||
|
||||
def test_git_gate_repos_rejected_at_agent_level(self) -> None:
|
||||
with self.assertRaises(ManifestError):
|
||||
ManifestAgent.from_dict("a", {"git-gate": {"repos": {}}}, set())
|
||||
|
||||
def test_git_gate_empty_is_allowed(self) -> None:
|
||||
agent = ManifestAgent.from_dict("a", {"git-gate": {}}, set())
|
||||
self.assertTrue(agent.git_user.is_empty())
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Eager ManifestIndex lookup methods
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestEagerIndexLookups(unittest.TestCase):
|
||||
def _idx(self) -> ManifestIndex:
|
||||
return _idx({
|
||||
"bottles": {"b": {"git-gate": {"user": {"name": "Bot", "email": "b@x"}}}},
|
||||
"agents": {"a": {"bottle": "b"}},
|
||||
})
|
||||
|
||||
def test_unknown_bottle_section_is_empty(self) -> None:
|
||||
# no "bottles" key -> _section_dict(None) path
|
||||
idx = _idx({"agents": {"a": {}}})
|
||||
self.assertEqual(["a"], idx.all_agent_names)
|
||||
|
||||
def test_load_unknown_agent_raises(self) -> None:
|
||||
with self.assertRaises(ManifestError):
|
||||
self._idx().load_for_agent("nope")
|
||||
|
||||
def test_has_agent(self) -> None:
|
||||
idx = self._idx()
|
||||
self.assertTrue(idx.has_agent("a"))
|
||||
self.assertFalse(idx.has_agent("nope"))
|
||||
|
||||
def test_require_agent_known_and_unknown(self) -> None:
|
||||
idx = self._idx()
|
||||
idx.require_agent("a") # no raise
|
||||
with self.assertRaises(ManifestError):
|
||||
idx.require_agent("nope")
|
||||
|
||||
def test_git_identity_summary(self) -> None:
|
||||
m = self._idx().load_for_agent("a")
|
||||
summary = m.git_identity_summary()
|
||||
assert summary is not None
|
||||
self.assertIn("name=Bot", summary)
|
||||
self.assertIn("email=b@x", summary)
|
||||
|
||||
def test_git_identity_summary_none_when_empty(self) -> None:
|
||||
m = _idx({"bottles": {"b": {}}, "agents": {"a": {"bottle": "b"}}}).load_for_agent("a")
|
||||
self.assertIsNone(m.git_identity_summary())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,132 @@
|
||||
"""Unit: supervise queue/audit error + edge branches (coverage ratchet,
|
||||
ADR 0004). Complements test_supervise.py with the malformed-input and
|
||||
fallback paths."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
import time
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
from bot_bottle import supervise
|
||||
from bot_bottle.supervise import (
|
||||
Proposal,
|
||||
TOOL_EGRESS_ALLOW,
|
||||
list_pending_proposals,
|
||||
read_audit_entries,
|
||||
read_proposal,
|
||||
read_response,
|
||||
wait_for_response,
|
||||
)
|
||||
|
||||
|
||||
def _proposal() -> Proposal:
|
||||
return Proposal.new(
|
||||
bottle_slug="slug",
|
||||
tool=TOOL_EGRESS_ALLOW,
|
||||
proposed_file="x",
|
||||
justification="j",
|
||||
current_file_hash="h",
|
||||
)
|
||||
|
||||
|
||||
class TestPathHelpers(unittest.TestCase):
|
||||
def test_bot_bottle_root(self) -> None:
|
||||
self.assertTrue(str(supervise.bot_bottle_root()).endswith(".bot-bottle"))
|
||||
|
||||
def test_queue_dir_for_slug(self) -> None:
|
||||
self.assertIn("slug", str(supervise.queue_dir_for_slug("slug")))
|
||||
|
||||
def test_id_from_non_proposal_filename(self) -> None:
|
||||
self.assertIsNone(supervise._id_from_proposal_filename(Path("x.response.json")))
|
||||
|
||||
|
||||
class TestReadMalformed(unittest.TestCase):
|
||||
def test_read_proposal_non_dict(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as d:
|
||||
(Path(d) / "p.proposal.json").write_text("[]")
|
||||
with self.assertRaises(ValueError):
|
||||
read_proposal(Path(d), "p")
|
||||
|
||||
def test_read_response_non_dict(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as d:
|
||||
(Path(d) / "p.response.json").write_text("[]")
|
||||
with self.assertRaises(ValueError):
|
||||
read_response(Path(d), "p")
|
||||
|
||||
def test_list_pending_skips_malformed(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as d:
|
||||
qd = Path(d)
|
||||
(qd / "bad.proposal.json").write_text("{ not json")
|
||||
(qd / "arr.proposal.json").write_text("[]")
|
||||
(qd / "incomplete.proposal.json").write_text("{}") # from_dict raises
|
||||
supervise.write_proposal(qd, _proposal()) # one valid
|
||||
pending = list_pending_proposals(qd)
|
||||
self.assertEqual(1, len(pending))
|
||||
self.assertEqual("slug", pending[0].bottle_slug)
|
||||
|
||||
def test_list_pending_skips_when_response_present(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as d:
|
||||
qd = Path(d)
|
||||
p = _proposal()
|
||||
supervise.write_proposal(qd, p)
|
||||
(qd / f"{p.id}.response.json").write_text("{}") # response exists -> skipped
|
||||
self.assertEqual([], list_pending_proposals(qd))
|
||||
|
||||
|
||||
class TestWaitForResponse(unittest.TestCase):
|
||||
def test_malformed_response_then_timeout(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as d:
|
||||
(Path(d) / "p.response.json").write_text("{ not json")
|
||||
with self.assertRaises(TimeoutError):
|
||||
wait_for_response(Path(d), "p", deadline=time.monotonic())
|
||||
|
||||
def test_incomplete_response_then_timeout(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as d:
|
||||
(Path(d) / "p.response.json").write_text("{}") # dict but from_dict raises
|
||||
with self.assertRaises(TimeoutError):
|
||||
wait_for_response(Path(d), "p", deadline=time.monotonic())
|
||||
|
||||
|
||||
class TestReadAuditEntries(unittest.TestCase):
|
||||
def test_missing_log_returns_empty(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as home, \
|
||||
patch.dict("os.environ", {"HOME": home}):
|
||||
self.assertEqual([], read_audit_entries("egress", "nope"))
|
||||
|
||||
def test_skips_malformed_lines(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as home, \
|
||||
patch.dict("os.environ", {"HOME": home}):
|
||||
path = supervise.audit_log_path("egress", "slug")
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
valid = (
|
||||
'{"timestamp": "t", "bottle_slug": "slug", "component": "egress",'
|
||||
' "operator_action": "approve", "operator_notes": "",'
|
||||
' "justification": "", "diff": ""}'
|
||||
)
|
||||
path.write_text(
|
||||
"\n" # blank line skipped
|
||||
"{ not json\n" # JSONDecodeError skipped
|
||||
"[]\n" # not a dict skipped
|
||||
"{}\n" # missing fields -> ValueError skipped
|
||||
+ valid + "\n"
|
||||
)
|
||||
entries = read_audit_entries("egress", "slug")
|
||||
self.assertEqual(1, len(entries))
|
||||
self.assertEqual("approve", entries[0].operator_action)
|
||||
|
||||
|
||||
class TestFlockFallback(unittest.TestCase):
|
||||
def test_flock_on_closed_fd_is_swallowed(self) -> None:
|
||||
# flock on a closed fd raises OSError(EBADF), which the helpers swallow.
|
||||
fd = os.open(os.devnull, os.O_RDONLY)
|
||||
os.close(fd)
|
||||
supervise._try_flock(fd)
|
||||
supervise._try_funlock(fd)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user