Alchemical Hands in the Hypnerotomachia Poliphili

Marginalia, Scholarship & Reception

← All Scripts

Build Reading Packets

build_reading_packets.py — 158 lines

Assembles structured research packets from corpus search for dictionary enrichment.

1"""Build structured reading packets for dictionary term enrichment.
2
3For each dictionary term (or a specified subset), this script:
41. Searches the chunk corpus for relevant passages
52. Assembles a structured packet with full provenance
63. Writes packets to /staging/packets/[slug].json
7
8Packets contain ONLY retrieved evidence. No generated interpretations.
9Downstream enrichment scripts use packets as input.
10"""
11
12import sqlite3
13import json
14from pathlib import Path
15
16BASE_DIR = Path(__file__).resolve().parent.parent
17DB_PATH = BASE_DIR / "db" / "hp.db"
18STAGING_DIR = BASE_DIR / "staging"
19PACKETS_DIR = STAGING_DIR / "packets"
20
21# Import corpus search
22import sys
23sys.path.insert(0, str(BASE_DIR / "scripts"))
24from corpus_search import search_by_term, search_chunks
25
26
27# Synonyms / alternate forms for better search coverage
28TERM_SYNONYMS = {
29    'signature': ['signature mark', 'sig.', 'quire mark'],
30    'quire': ['gathering', 'quaternion'],
31    'folio': ['leaf', 'fol.'],
32    'marginalia': ['marginal note', 'annotation', 'margin'],
33    'annotator-hand': ['hand A', 'hand B', 'hand C', 'hand D', 'hand E',
34                        'annotator', 'handwriting'],
35    'alchemical-allegory': ['alchemical reading', 'alchemical interpretation',
36                            'alchemist'],
37    'master-mercury': ['mercury', 'Mercurii', 'quicksilver', "d'Espagnet"],
38    'sol-luna': ['Sol and Luna', 'sun and moon', 'gold and silver'],
39    'chemical-wedding': ['chemical marriage', 'chymische Hochzeit',
40                         'hermaphrodite'],
41    'prisca-sapientia': ['ancient wisdom', 'prisca theologia',
42                         'Hermes Trismegistus'],
43    'woodcut': ['illustration', 'woodblock', 'woodcuts'],
44    'acrostic': ['POLIAM FRATER', 'chapter initials'],
45    'hieroglyph': ['hieroglyphic', 'Horapollo', 'pseudo-Egyptian'],
46    'emblem': ['emblem book', 'Alciato', 'pictura'],
47    'ekphrasis': ['ekphrastic', 'verbal description'],
48    'incunabulum': ['incunabula', 'ISTC', 'fifteenth-century printing'],
49    'aldus-manutius': ['Aldus', 'Manutius', 'Aldine'],
50    'authorship-debate': ['Francesco Colonna', 'Alberti', 'authorship'],
51    'dream-narrative': ['dream', 'Poliphilo falls asleep', 'dream-within'],
52    'elephant-obelisk': ['elephant', 'obelisk', 'Bernini', 'b6v', 'b7r'],
53    'ideogram': ['alchemical symbol', 'alchemical sign', 'ideogram'],
54    'activity-book': ['activity book', 'humanistic activity'],
55    'inventio': ['invention', 'rhetorical invention'],
56    'ingegno': ['ingenium', 'wit', 'ingegno'],
57    'acutezze': ['acutezza', 'wit', 'Alexander VII', 'Chigi'],
58    'cythera': ['Cythera', 'island of Venus', 'circular garden'],
59    'reception-history': ['reception', 'readership', 'readers'],
60    'antiquarianism': ['antiquarian', 'Cyriacus', 'ancient monuments'],
61    'vernacular-poetics': ['Petrarchan', 'vernacular', 'Italian poetry'],
62    'collation': ['collation formula', 'a-z8', 'bibliographic structure'],
63    'apparatus': ['critical edition', 'textual notes', 'apparatus criticus'],
64    'commentary': ['commentator', 'gloss', 'interpretation'],
65    'allegory': ['allegorical', 'allegory of love'],
66    'architectural-body': ['architectural body', 'Lefaivre', 'embodied'],
67    'recto': ['recto'],
68    'verso': ['verso'],
69    'gathering': ['gathering', 'quaternion'],
70}
71
72
73def build_packet(term_slug, term_label, category, current_status):
74    """Build a reading packet for a single dictionary term.
75
76    Returns a structured dict with retrieved evidence only.
77    """
78    synonyms = TERM_SYNONYMS.get(term_slug, [])
79
80    # Search using term label + synonyms
81    results = search_by_term(term_label, synonyms=synonyms)
82
83    passages = []
84    for r in results:
85        passages.append({
86            'text': r['matched_text'],
87            'source_doc': r['source_doc'],
88            'chunk_path': r['chunk_path'],
89            'section': r['section'],
90            'page_refs': r['page_refs'],
91            'relevance_score': r['relevance_score'],
92        })
93
94    return {
95        'term': term_label,
96        'slug': term_slug,
97        'category': category,
98        'current_review_status': current_status,
99        'passage_count': len(passages),
100        'passages': passages,
101        'search_terms_used': [term_label] + synonyms,
102        'source_method': 'CORPUS_EXTRACTION',
103    }
104
105
106def build_all_packets(filter_status=None, filter_slugs=None):
107    """Build reading packets for all (or filtered) dictionary terms.
108
109    Args:
110        filter_status: Only build for terms with this review_status (e.g. 'DRAFT')
111        filter_slugs: Only build for these specific slugs
112    """
113    conn = sqlite3.connect(DB_PATH)
114    cur = conn.cursor()
115
116    query = "SELECT slug, label, category, review_status FROM dictionary_terms"
117    params = []
118    if filter_status:
119        query += " WHERE review_status = ?"
120        params.append(filter_status)
121    query += " ORDER BY slug"
122
123    cur.execute(query, params)
124    terms = cur.fetchall()
125    conn.close()
126
127    PACKETS_DIR.mkdir(parents=True, exist_ok=True)
128
129    built = 0
130    for slug, label, category, status in terms:
131        if filter_slugs and slug not in filter_slugs:
132            continue
133
134        print(f"  Building packet: {slug} ({category})")
135        packet = build_packet(slug, label, category, status)
136
137        packet_path = PACKETS_DIR / f"{slug}.json"
138        with open(packet_path, 'w', encoding='utf-8') as f:
139            json.dump(packet, f, indent=2, ensure_ascii=False)
140
141        built += 1
142        print(f"    -> {packet['passage_count']} passages found")
143
144    print(f"\nBuilt {built} reading packets in {PACKETS_DIR}")
145    return built
146
147
148if __name__ == "__main__":
149    import sys
150    print("=== Building Reading Packets ===\n")
151
152    if len(sys.argv) > 1:
153        # Build for specific slugs
154        slugs = sys.argv[1:]
155        build_all_packets(filter_slugs=slugs)
156    else:
157        # Build for all DRAFT terms
158        build_all_packets(filter_status='DRAFT')