adding caching to presque gratos
This commit is contained in:
55
storage.py
Normal file
55
storage.py
Normal file
@@ -0,0 +1,55 @@
|
||||
# storage.py
|
||||
from __future__ import annotations
|
||||
import sqlite3, pathlib, datetime as dt
|
||||
from typing import Optional, Iterable, Tuple
|
||||
import os
|
||||
DB_PATH = "/data/published.db" # bind-mount ./data:/data in docker
|
||||
|
||||
_SCHEMA = """
|
||||
PRAGMA journal_mode = WAL;
|
||||
CREATE TABLE IF NOT EXISTS published_items(
|
||||
platform TEXT NOT NULL, -- e.g. xgp | egs | psplus
|
||||
key TEXT PRIMARY KEY, -- your dedupe key (see below)
|
||||
first_seen_utc TEXT NOT NULL, -- ISO-8601
|
||||
last_post_id TEXT -- Ghost post id that recorded it
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_platform ON published_items(platform);
|
||||
"""
|
||||
|
||||
class Storage:
|
||||
def __init__(self, db_path: str = DB_PATH):
|
||||
|
||||
|
||||
if not os.path.isfile(db_path):
|
||||
db_path = os.environ.get("DB_FILE_FALLBACK", r"f:\workspace\Substack_JV\data\published.db")
|
||||
|
||||
print(db_path)
|
||||
pathlib.Path(db_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
self.conn = sqlite3.connect(db_path)
|
||||
self.conn.execute("PRAGMA foreign_keys = ON;")
|
||||
for stmt in filter(None, _SCHEMA.split(";")):
|
||||
if stmt.strip():
|
||||
self.conn.execute(stmt)
|
||||
|
||||
def seen(self, key: str) -> bool:
|
||||
cur = self.conn.execute("SELECT 1 FROM published_items WHERE key=?", (key,))
|
||||
return cur.fetchone() is not None
|
||||
|
||||
def remember(self, platform: str, key: str, post_id: Optional[str]):
|
||||
self.conn.execute(
|
||||
"INSERT OR IGNORE INTO published_items(platform,key,first_seen_utc,last_post_id) VALUES(?,?,?,?)",
|
||||
(platform, key, dt.datetime.utcnow().isoformat(), post_id),
|
||||
)
|
||||
if post_id:
|
||||
self.conn.execute("UPDATE published_items SET last_post_id=? WHERE key=?", (post_id, key))
|
||||
self.conn.commit()
|
||||
|
||||
def bulk_remember(self, platform: str, pairs: Iterable[Tuple[str, Optional[str]]]):
|
||||
rows = [(platform, k, dt.datetime.utcnow().isoformat(), pid) for (k, pid) in pairs]
|
||||
self.conn.executemany(
|
||||
"INSERT OR IGNORE INTO published_items(platform,key,first_seen_utc,last_post_id) VALUES(?,?,?,?)",
|
||||
rows
|
||||
)
|
||||
self.conn.commit()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user