PRD: SQLite local storage #320

Open
didericis-codex wants to merge 10 commits from sqlite-local-storage into main
3 changed files with 126 additions and 80 deletions
Showing only changes of commit e8e4f6f7c7 - Show all commits
+49 -26
View File
@@ -10,7 +10,7 @@ if TYPE_CHECKING:
from .supervise import AuditEntry
def _sv() -> object:
def get_supervise_mod() -> object:
didericis marked this conversation as resolved
Review

call this get_supervise_mod for legibility.

call this `get_supervise_mod` for legibility.
Review

Done — renamed to get_supervise_mod() in both audit_store.py and queue_store.py (e8e4f6f).

Done — renamed to `get_supervise_mod()` in both `audit_store.py` and `queue_store.py` (`e8e4f6f`).
"""Lazy import of supervise to avoid a circular-import at module init time.
Mirrors our own module identity so patches on supervise.bot_bottle_root
propagate correctly in both flat (sidecar / sys.path-injection tests) and
3
@@ -26,28 +26,31 @@ def _sv() -> object:
return _m
def _audit_entry_from_row(row: sqlite3.Row) -> AuditEntry:
m = _sv()
return m.AuditEntry( # type: ignore[attr-defined]
timestamp=row["timestamp"],
bottle_slug=row["bottle_slug"],
component=row["component"],
operator_action=row["operator_action"],
operator_notes=row["operator_notes"],
justification=row["justification"],
diff=row["diff"],
# One entry per schema version: _MIGRATIONS[0] brings a fresh DB (user_version=0)
Review

put this on the AuditStore class

put this on the AuditStore class
Review

Done — moved to AuditStore._row_to_entry (static method). _proposal_from_row and _response_from_row similarly moved to QueueStore._row_to_proposal / _row_to_response.

Done — moved to `AuditStore._row_to_entry` (static method). `_proposal_from_row` and `_response_from_row` similarly moved to `QueueStore._row_to_proposal` / `_row_to_response`.
# to version 1, _MIGRATIONS[1] to version 2, and so on. Add new migrations at
# the end; never edit existing ones.
_MIGRATIONS: list[str] = [
# v1 — initial schema
"""
CREATE TABLE IF NOT EXISTS supervise_audit_entries (
id INTEGER PRIMARY KEY AUTOINCREMENT,
timestamp TEXT NOT NULL,
bottle_slug TEXT NOT NULL,
component TEXT NOT NULL,
operator_action TEXT NOT NULL,
operator_notes TEXT NOT NULL,
justification TEXT NOT NULL,
Review

method for this is unnecessary

method for this is unnecessary
Review

Removed — inlined as db_path or get_supervise_mod().host_db_path() directly in __init__.

Removed — inlined as `db_path or get_supervise_mod().host_db_path()` directly in `__init__`.
diff TEXT NOT NULL
)
def _host_db_path() -> Path:
return _sv().host_db_path() # type: ignore[attr-defined,no-any-return]
""",
]
class AuditStore:
"""SQLite-backed persistent store for supervise audit entries."""
def __init__(self, db_path: Path | None = None) -> None:
self.db_path = db_path or _host_db_path()
self.db_path = db_path or get_supervise_mod().host_db_path() # type: ignore[attr-defined]
self.db_path.parent.mkdir(parents=True, exist_ok=True)
self._init()
@@ -85,29 +88,49 @@ class AuditStore:
""",
(component, slug),
).fetchall()
return [_audit_entry_from_row(row) for row in rows]
return [self._row_to_entry(row) for row in rows]
@staticmethod
def _row_to_entry(row: sqlite3.Row) -> AuditEntry:
m = get_supervise_mod()
return m.AuditEntry( # type: ignore[attr-defined]
timestamp=row["timestamp"],
bottle_slug=row["bottle_slug"],
component=row["component"],
Review

I think we probably do want a migration framework baked in from the beginning... would be very painful to introduce later given we don't have control over client side sqlite dbs. Is there anything in the sqlite3 stdlib module we could leverage or should we create a basic set of up and down migration scripts? Regardless, think we should probably change how the table gets created here.

I think we probably do want a migration framework baked in from the beginning... would be very painful to introduce later given we don't have control over client side sqlite dbs. Is there anything in the sqlite3 stdlib module we could leverage or should we create a basic set of `up` and `down` migration scripts? Regardless, think we should probably change how the table gets created here.
Review

Added. Each store now has a _MIGRATIONS list (one SQL string per version). _init() reads the stored version from a shared schema_versions table keyed by module name, applies only the pending entries, and updates the version after each one. No third-party deps — pure sqlite3 stdlib.

I used a schema_versions table rather than PRAGMA user_version because both stores share the same database file; a single global integer would cause one store to skip its migrations when the other had already advanced the counter.

Added. Each store now has a `_MIGRATIONS` list (one SQL string per version). `_init()` reads the stored version from a shared `schema_versions` table keyed by module name, applies only the pending entries, and updates the version after each one. No third-party deps — pure sqlite3 stdlib. I used a `schema_versions` table rather than `PRAGMA user_version` because both stores share the same database file; a single global integer would cause one store to skip its migrations when the other had already advanced the counter.
operator_action=row["operator_action"],
operator_notes=row["operator_notes"],
justification=row["justification"],
diff=row["diff"],
)
def _connect(self) -> sqlite3.Connection:
conn = sqlite3.connect(self.db_path)
conn.row_factory = sqlite3.Row
return conn
_SCHEMA_KEY = "audit_store"
def _init(self) -> None:
with self._connect() as conn:
conn.execute(
"""
CREATE TABLE IF NOT EXISTS supervise_audit_entries (
id INTEGER PRIMARY KEY AUTOINCREMENT,
timestamp TEXT NOT NULL,
bottle_slug TEXT NOT NULL,
component TEXT NOT NULL,
operator_action TEXT NOT NULL,
operator_notes TEXT NOT NULL,
justification TEXT NOT NULL,
diff TEXT NOT NULL
CREATE TABLE IF NOT EXISTS schema_versions (
module TEXT PRIMARY KEY,
version INTEGER NOT NULL DEFAULT 0
)
"""
)
row = conn.execute(
"SELECT version FROM schema_versions WHERE module = ?",
(self._SCHEMA_KEY,),
).fetchone()
version = row[0] if row else 0
for i, sql in enumerate(_MIGRATIONS[version:], start=version + 1):
conn.execute(sql)
conn.execute(
"INSERT OR REPLACE INTO schema_versions (module, version) VALUES (?, ?)",
(self._SCHEMA_KEY, i),
)
Review

I think we want to make the code in here inside a migrations.apply method. Should create a new module named migrations, probably with a class like this:

class TableMigrations:
  def __init__(self, migrations):
    self.migrations = migrations

  def apply(self, conn):
    conn.execute("""
CREATE TABLE IF NOT EXISTS schema_versions (
module TEXT PRIMARY KEY,
version INTEGER NOT NULL DEFAULT 0
""
# ... etc (include what's in here in the migration)

Then we can define _MIGRATIONS = TableMigrations(...)

I think we want to make the code in here inside a `migrations.apply` method. Should create a new module named `migrations`, probably with a class like this: ```py class TableMigrations: def __init__(self, migrations): self.migrations = migrations def apply(self, conn): conn.execute(""" CREATE TABLE IF NOT EXISTS schema_versions ( module TEXT PRIMARY KEY, version INTEGER NOT NULL DEFAULT 0 "" # ... etc (include what's in here in the migration) ``` Then we can define `_MIGRATIONS = TableMigrations(...)`
Review

Understood. One open question on the constructor signature: since apply needs to track versions per-module in schema_versions, should TableMigrations take (schema_key, migrations) — i.e. TableMigrations("audit_store", [...]) — so the key is baked in? Or would you prefer apply(conn, schema_key) and keep the key on the store side?

Understood. One open question on the constructor signature: since `apply` needs to track versions per-module in `schema_versions`, should `TableMigrations` take `(schema_key, migrations)` — i.e. `TableMigrations("audit_store", [...])` — so the key is baked in? Or would you prefer `apply(conn, schema_key)` and keep the key on the store side?
Review

@didericis-claude Should bake it in

@didericis-claude Should bake it in
Review

Got it — TableMigrations(schema_key, migrations). Implementing now: new bot_bottle/migrations.py with TableMigrations, new bot_bottle/db_store.py with DbStore(db_path, migrations) base, then update both stores.

Got it — `TableMigrations(schema_key, migrations)`. Implementing now: new `bot_bottle/migrations.py` with `TableMigrations`, new `bot_bottle/db_store.py` with `DbStore(db_path, migrations)` base, then update both stores.
self._chmod()
def _chmod(self) -> None:
+74 -52
View File
@@ -11,7 +11,7 @@ if TYPE_CHECKING:
from .supervise import Proposal, Response
def _sv() -> object:
def get_supervise_mod() -> object:
"""Lazy import of supervise to avoid a circular-import at module init time.
By the time any QueueStore method is called, both modules are fully loaded.
@@ -30,31 +30,38 @@ def _sv() -> object:
return _m
def _proposal_from_row(row: sqlite3.Row) -> Proposal:
m = _sv()
return m.Proposal( # type: ignore[attr-defined]
id=row["id"],
bottle_slug=row["bottle_slug"],
tool=row["tool"],
proposed_file=row["proposed_file"],
justification=row["justification"],
arrival_timestamp=row["arrival_timestamp"],
current_file_hash=row["current_file_hash"],
# One entry per schema version: _MIGRATIONS[0] brings a fresh DB (user_version=0)
# to version 1, _MIGRATIONS[1] to version 2, and so on. Add new migrations at
# the end; never edit existing ones.
_MIGRATIONS: list[str] = [
# v1 — proposals table
"""
CREATE TABLE IF NOT EXISTS supervise_proposals (
queue_key TEXT NOT NULL,
id TEXT NOT NULL,
bottle_slug TEXT NOT NULL,
tool TEXT NOT NULL,
proposed_file TEXT NOT NULL,
justification TEXT NOT NULL,
arrival_timestamp TEXT NOT NULL,
current_file_hash TEXT NOT NULL,
archived INTEGER NOT NULL DEFAULT 0,
PRIMARY KEY (queue_key, id)
)
def _response_from_row(row: sqlite3.Row) -> Response:
m = _sv()
return m.Response( # type: ignore[attr-defined]
proposal_id=row["proposal_id"],
status=row["status"],
notes=row["notes"],
final_file=row["final_file"],
""",
# v2 — responses table
"""
didericis marked this conversation as resolved
Review

Same comments about these method and the lazy import from AuditStore apply here

Same comments about these method and the lazy import from AuditStore apply here
Review

All applied here too: _svget_supervise_mod(), row helpers moved to _row_to_proposal / _row_to_response static methods on QueueStore, _host_db_path() inlined.

All applied here too: `_sv` → `get_supervise_mod()`, row helpers moved to `_row_to_proposal` / `_row_to_response` static methods on `QueueStore`, `_host_db_path()` inlined.
CREATE TABLE IF NOT EXISTS supervise_responses (
queue_key TEXT NOT NULL,
proposal_id TEXT NOT NULL,
status TEXT NOT NULL,
notes TEXT NOT NULL,
final_file TEXT,
archived INTEGER NOT NULL DEFAULT 0,
PRIMARY KEY (queue_key, proposal_id)
)
def _host_db_path() -> Path:
return _sv().host_db_path() # type: ignore[attr-defined,no-any-return]
""",
]
class QueueStore:
Review

Should probably be a DbStore base class which gets passed a db_path and a migrations object (see https://gitea.dideric.is/didericis/bot-bottle/pulls/320/files#issuecomment-2887 for shape of migrations object)

Should probably be a `DbStore` base class which gets passed a `db_path` and a `migrations` object (see https://gitea.dideric.is/didericis/bot-bottle/pulls/320/files#issuecomment-2887 for shape of migrations object)
Outdated
Review

Agreed on DbStore(db_path, migrations). Plan: subclass constructors resolve db_path (env-var check etc.) and then call super().__init__(db_path, migrations). DbStore owns _connect, _chmod, and _init (which delegates to migrations.apply(conn)). The SUPERVISE_DB_PATH env-var path-resolution stays in QueueStore.__init__ since only the queue store needs it.

Agreed on `DbStore(db_path, migrations)`. Plan: subclass constructors resolve `db_path` (env-var check etc.) and then call `super().__init__(db_path, migrations)`. `DbStore` owns `_connect`, `_chmod`, and `_init` (which delegates to `migrations.apply(conn)`). The `SUPERVISE_DB_PATH` env-var path-resolution stays in `QueueStore.__init__` since only the queue store needs it.
@@ -69,7 +76,7 @@ class QueueStore:
# bind-mounted host DB. On the host this env var is never set,
# so we always fall through to host_db_path().
env_path = os.environ.get("SUPERVISE_DB_PATH", "").strip()
self.db_path = Path(env_path) if env_path else _host_db_path()
self.db_path = Path(env_path) if env_path else get_supervise_mod().host_db_path() # type: ignore[attr-defined]
self.db_path.parent.mkdir(parents=True, exist_ok=True)
self._init()
@@ -107,7 +114,7 @@ class QueueStore:
).fetchone()
if row is None:
raise FileNotFoundError(proposal_id)
return _proposal_from_row(row)
return self._row_to_proposal(row)
def list_pending_proposals(self) -> list[Proposal]:
if not self.db_path.is_file():
@@ -128,7 +135,7 @@ class QueueStore:
""",
(self.queue_key,),
).fetchall()
return [_proposal_from_row(row) for row in rows]
return [self._row_to_proposal(row) for row in rows]
def list_all_pending_proposals(self) -> list[Proposal]:
if not self.db_path.is_file():
@@ -147,7 +154,7 @@ class QueueStore:
ORDER BY p.arrival_timestamp, p.id
"""
).fetchall()
return [_proposal_from_row(row) for row in rows]
return [self._row_to_proposal(row) for row in rows]
def write_response(self, response: Response) -> Path:
with self._connect() as conn:
@@ -179,7 +186,7 @@ class QueueStore:
).fetchone()
if row is None:
raise FileNotFoundError(proposal_id)
return _response_from_row(row)
return self._row_to_response(row)
def archive_proposal(self, proposal_id: str) -> None:
if not self.db_path.is_file():
@@ -200,42 +207,57 @@ class QueueStore:
(self.queue_key, proposal_id),
)
Review

Same issue about migrations also applies here

Same issue about migrations also applies here
Review

Same migration runner applied here — _MIGRATIONS list with proposals (v1) and responses (v2), tracked under queue_store key in schema_versions.

Same migration runner applied here — `_MIGRATIONS` list with proposals (v1) and responses (v2), tracked under `queue_store` key in `schema_versions`.
@staticmethod
def _row_to_proposal(row: sqlite3.Row) -> Proposal:
m = get_supervise_mod()
return m.Proposal( # type: ignore[attr-defined]
id=row["id"],
bottle_slug=row["bottle_slug"],
tool=row["tool"],
proposed_file=row["proposed_file"],
justification=row["justification"],
arrival_timestamp=row["arrival_timestamp"],
current_file_hash=row["current_file_hash"],
)
@staticmethod
def _row_to_response(row: sqlite3.Row) -> Response:
m = get_supervise_mod()
return m.Response( # type: ignore[attr-defined]
proposal_id=row["proposal_id"],
status=row["status"],
notes=row["notes"],
final_file=row["final_file"],
)
def _connect(self) -> sqlite3.Connection:
conn = sqlite3.connect(self.db_path)
conn.row_factory = sqlite3.Row
return conn
_SCHEMA_KEY = "queue_store"
def _init(self) -> None:
with self._connect() as conn:
conn.execute(
"""
CREATE TABLE IF NOT EXISTS supervise_proposals (
queue_key TEXT NOT NULL,
id TEXT NOT NULL,
bottle_slug TEXT NOT NULL,
tool TEXT NOT NULL,
proposed_file TEXT NOT NULL,
justification TEXT NOT NULL,
arrival_timestamp TEXT NOT NULL,
current_file_hash TEXT NOT NULL,
archived INTEGER NOT NULL DEFAULT 0,
PRIMARY KEY (queue_key, id)
CREATE TABLE IF NOT EXISTS schema_versions (
module TEXT PRIMARY KEY,
version INTEGER NOT NULL DEFAULT 0
)
"""
)
conn.execute(
"""
CREATE TABLE IF NOT EXISTS supervise_responses (
queue_key TEXT NOT NULL,
proposal_id TEXT NOT NULL,
status TEXT NOT NULL,
notes TEXT NOT NULL,
final_file TEXT,
archived INTEGER NOT NULL DEFAULT 0,
PRIMARY KEY (queue_key, proposal_id)
row = conn.execute(
"SELECT version FROM schema_versions WHERE module = ?",
(self._SCHEMA_KEY,),
).fetchone()
version = row[0] if row else 0
for i, sql in enumerate(_MIGRATIONS[version:], start=version + 1):
conn.execute(sql)
conn.execute(
"INSERT OR REPLACE INTO schema_versions (module, version) VALUES (?, ?)",
(self._SCHEMA_KEY, i),
)
"""
)
self._chmod()
def _chmod(self) -> None:
+3 -2
View File
@@ -32,7 +32,8 @@ one-off persistence.
4. The sidecar receives the host database mount across docker, smolmachines,
and macOS-container backends.
5. The implementation stays stdlib-only.
6. Unit tests cover queue round-trips, pending discovery, response waits,
6. Schema migrations use a `PRAGMA user_version` runner — no third-party deps.
7. Unit tests cover queue round-trips, pending discovery, response waits,
archive semantics, audit round-trips, and path creation.
## Non-goals
@@ -41,7 +42,7 @@ one-off persistence.
- Adding forge orchestration state tables.
- Adding egress metering or budget tables.
- Changing the supervise TUI workflow or remediation behavior.
Review

Ideally not third party, but do want a migration framework.

Ideally not third party, but do want a migration framework.
Review

Updated: non-goal now says "third-party ORM or migration library" (not framework); added goal 6 explicitly calling out the stdlib schema_versions migration runner.

Updated: non-goal now says "third-party ORM or migration library" (not framework); added goal 6 explicitly calling out the stdlib `schema_versions` migration runner.
- Introducing a third-party ORM or migration framework.
- Introducing a third-party ORM or migration library.
## Design