Dictionary Schema V2
Extends dictionary_terms with significance, source tracking, provenance, and confidence columns.
1"""Extend dictionary_terms schema with new columns for enrichment pipeline. 2 3Adds columns for: 4- significance fields (HP-specific and scholarship-wide) 5- source document tracking 6- source method and notes 7- confidence level 8 9Idempotent: safe to run multiple times. 10""" 11 12import sqlite3 13from pathlib import Path 14 15BASE_DIR = Path(__file__).resolve().parent.parent 16DB_PATH = BASE_DIR / "db" / "hp.db" 17 18NEW_COLUMNS = [ 19 ("significance_to_hp", "TEXT"), 20 ("significance_to_scholarship", "TEXT"), 21 ("related_scholars", "TEXT"), 22 ("see_also_text", "TEXT"), 23 ("source_documents", "TEXT"), 24 ("source_page_refs", "TEXT"), 25 ("source_quotes_short", "TEXT"), 26 ("source_method", "TEXT"), 27 ("confidence", "TEXT DEFAULT 'MEDIUM'"), 28 ("notes", "TEXT"), 29] 30 31 32def main(): 33 conn = sqlite3.connect(DB_PATH) 34 cur = conn.cursor() 35 36 # Get existing columns 37 cur.execute("PRAGMA table_info(dictionary_terms)") 38 existing = {row[1] for row in cur.fetchall()} 39 40 added = 0 41 for col_name, col_type in NEW_COLUMNS: 42 if col_name not in existing: 43 cur.execute(f"ALTER TABLE dictionary_terms ADD COLUMN {col_name} {col_type}") 44 print(f" Added column: {col_name} ({col_type})") 45 added += 1 46 else: 47 print(f" Column exists: {col_name}") 48 49 conn.commit() 50 conn.close() 51 print(f"\nDone. Added {added} new columns.") 52 53 54if __name__ == "__main__": 55 main()