feat: fold bot-bottle-orchestrator into bot_bottle/orchestrator subpackage
Moves the orchestrator into bot_bottle/orchestrator/ so one install gets everything. Entry point is now `python -m bot_bottle.orchestrator run`. - Add bot_bottle/orchestrator/ with all 14 modules (verbatim move; internal imports were already relative, so no changes inside orchestrator modules) - Rewrite bootstrap.py: remove the lazy bot_bottle import guard, use direct relative imports from ..contrib.* - Add bot_bottle/contrib/forge/base.py: ScopedForge (read-anywhere / write-scoped) - Add bot_bottle/contrib/gitea/client.py: GiteaClient + GiteaForge (urllib.request only) - Add bot_bottle/contrib/gitea/forge_state.py: ForgeState + SqliteForgeStateStore - Add tests/unit/orchestrator/ (82 tests: 63 migrated + 19 new for contrib modules) Closes #321
This commit is contained in:
@@ -0,0 +1,68 @@
|
||||
"""Watchdog: freeze runs whose agent exited without signalling done.
|
||||
|
||||
`sweep(now)` is the pure, testable core: any `running` record whose
|
||||
`last_checkin_at` is older than the timeout is frozen as
|
||||
done-without-self-report and returned so provenance can flag it.
|
||||
`Watchdog.start()` runs `sweep` on a daemon thread once a minute.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import threading
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from .model import STATUS_FROZEN, STATUS_RUNNING, RunRecord
|
||||
from .runner import BottleRunner
|
||||
from .store import StateStore
|
||||
|
||||
_TICK_SECS = 60.0
|
||||
|
||||
|
||||
def _parse(ts: str) -> datetime | None:
|
||||
try:
|
||||
return datetime.fromisoformat(ts)
|
||||
except (ValueError, TypeError):
|
||||
return None
|
||||
|
||||
|
||||
class Watchdog:
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
store: StateStore,
|
||||
runner: BottleRunner,
|
||||
timeout_secs: int,
|
||||
) -> None:
|
||||
self._store = store
|
||||
self._runner = runner
|
||||
self._timeout = timedelta(seconds=timeout_secs)
|
||||
self._stop = threading.Event()
|
||||
self._thread: threading.Thread | None = None
|
||||
|
||||
def sweep(self, now: datetime) -> list[RunRecord]:
|
||||
"""Freeze stale running records. Returns the ones fired."""
|
||||
fired: list[RunRecord] = []
|
||||
for record in self._store.all():
|
||||
if record.status != STATUS_RUNNING:
|
||||
continue
|
||||
checkin = _parse(record.last_checkin_at)
|
||||
if checkin is None or now - checkin <= self._timeout:
|
||||
continue
|
||||
self._runner.freeze(record.slug)
|
||||
record.status = STATUS_FROZEN
|
||||
self._store.upsert(record)
|
||||
fired.append(record)
|
||||
return fired
|
||||
|
||||
def start(self) -> None:
|
||||
self._thread = threading.Thread(target=self._loop, daemon=True)
|
||||
self._thread.start()
|
||||
|
||||
def stop(self) -> None:
|
||||
self._stop.set()
|
||||
if self._thread is not None:
|
||||
self._thread.join(timeout=_TICK_SECS)
|
||||
|
||||
def _loop(self) -> None:
|
||||
while not self._stop.wait(_TICK_SECS):
|
||||
self.sweep(datetime.now().astimezone())
|
||||
Reference in New Issue
Block a user