56 lines
2.1 KiB
Python
56 lines
2.1 KiB
Python
# storage.py
|
|
from __future__ import annotations
|
|
import sqlite3, pathlib, datetime as dt
|
|
from typing import Optional, Iterable, Tuple
|
|
import os
|
|
DB_PATH = "/data/published.db" # bind-mount ./data:/data in docker
|
|
|
|
_SCHEMA = """
|
|
PRAGMA journal_mode = WAL;
|
|
CREATE TABLE IF NOT EXISTS published_items(
|
|
platform TEXT NOT NULL, -- e.g. xgp | egs | psplus
|
|
key TEXT PRIMARY KEY, -- your dedupe key (see below)
|
|
first_seen_utc TEXT NOT NULL, -- ISO-8601
|
|
last_post_id TEXT -- Ghost post id that recorded it
|
|
);
|
|
CREATE INDEX IF NOT EXISTS idx_platform ON published_items(platform);
|
|
"""
|
|
|
|
class Storage:
|
|
def __init__(self, db_path: str = DB_PATH):
|
|
|
|
|
|
if not os.path.isfile(db_path):
|
|
db_path = os.environ.get("DB_FILE_FALLBACK", r"f:\workspace\Substack_JV\data\published.db")
|
|
|
|
print(db_path)
|
|
pathlib.Path(db_path).parent.mkdir(parents=True, exist_ok=True)
|
|
self.conn = sqlite3.connect(db_path)
|
|
self.conn.execute("PRAGMA foreign_keys = ON;")
|
|
for stmt in filter(None, _SCHEMA.split(";")):
|
|
if stmt.strip():
|
|
self.conn.execute(stmt)
|
|
|
|
def seen(self, key: str) -> bool:
|
|
cur = self.conn.execute("SELECT 1 FROM published_items WHERE key=?", (key,))
|
|
return cur.fetchone() is not None
|
|
|
|
def remember(self, platform: str, key: str, post_id: Optional[str]):
|
|
self.conn.execute(
|
|
"INSERT OR IGNORE INTO published_items(platform,key,first_seen_utc,last_post_id) VALUES(?,?,?,?)",
|
|
(platform, key, dt.datetime.utcnow().isoformat(), post_id),
|
|
)
|
|
if post_id:
|
|
self.conn.execute("UPDATE published_items SET last_post_id=? WHERE key=?", (post_id, key))
|
|
self.conn.commit()
|
|
|
|
def bulk_remember(self, platform: str, pairs: Iterable[Tuple[str, Optional[str]]]):
|
|
rows = [(platform, k, dt.datetime.utcnow().isoformat(), pid) for (k, pid) in pairs]
|
|
self.conn.executemany(
|
|
"INSERT OR IGNORE INTO published_items(platform,key,first_seen_utc,last_post_id) VALUES(?,?,?,?)",
|
|
rows
|
|
)
|
|
self.conn.commit()
|
|
|
|
|