Build Scholar Profiles (Legacy)
Original scholar page generator from summaries.json. Superseded by build_site.py.
1"""Build scholar profile pages and paper summary pages from summaries JSON.""" 2 3import json 4import re 5import os 6from pathlib import Path 7 8BASE_DIR = Path(__file__).resolve().parent.parent 9SCHOLARS_DIR = BASE_DIR / "scholars" 10SUMMARIES_PATH = BASE_DIR / "scholars" / "summaries.json" 11SITE_DIR = BASE_DIR / "site" 12 13 14def slugify(name): 15 """Create a URL-safe slug from a name.""" 16 slug = name.lower().strip() 17 slug = re.sub(r"['\u2019]", '', slug) 18 slug = re.sub(r'[^\w\s-]', '', slug) 19 slug = re.sub(r'[\s]+', '-', slug) 20 return slug.strip('-') 21 22 23def paper_slug(title): 24 """Create a short slug from a paper title.""" 25 words = re.sub(r'[^\w\s]', '', title.lower()).split() 26 return '-'.join(words[:6]) 27 28 29def group_by_scholar(summaries): 30 """Group paper summaries by author.""" 31 scholars = {} 32 for paper in summaries: 33 author = paper.get('author', 'Unknown') 34 if author not in scholars: 35 scholars[author] = [] 36 scholars[author].append(paper) 37 return scholars 38 39 40def write_scholar_profile(scholar_name, papers, scholar_dir): 41 """Write a scholar's profile.md.""" 42 slug = slugify(scholar_name) 43 profile_dir = scholar_dir / slug 44 profile_dir.mkdir(parents=True, exist_ok=True) 45 46 paper_list = [] 47 for p in sorted(papers, key=lambda x: x.get('year', 0) or 0): 48 year = p.get('year', '?') 49 journal = p.get('journal', '') 50 p_slug = paper_slug(p['title']) 51 paper_list.append(f"- [{p['title']}]({p_slug}.md) ({journal} {year})") 52 53 content = f"""--- 54name: "{scholar_name}" 55slug: "{slug}" 56paper_count: {len(papers)} 57topic_clusters: {json.dumps(list(set(p.get('topic_cluster', 'unknown') for p in papers)))} 58--- 59 60# {scholar_name} 61 62## Papers in the HP Corpus 63 64{chr(10).join(paper_list)} 65""" 66 67 (profile_dir / "profile.md").write_text(content, encoding='utf-8') 68 69 # Write individual paper summary files 70 for p in papers: 71 p_slug = paper_slug(p['title']) 72 year = p.get('year', '?') 73 journal = p.get('journal', '') 74 topic = p.get('topic_cluster', 'unknown') 75 summary = p.get('summary', 'Summary pending.') 76 77 paper_content = f"""--- 78title: "{p['title']}" 79author: "{scholar_name}" 80year: {year if year and year != '?' else 'null'} 81journal: "{journal}" 82topic_cluster: "{topic}" 83source_pdf: "{p.get('filename', '')}" 84--- 85 86# {p['title']} 87 88**{scholar_name}** | {journal} {year} 89 90## Summary 91 92{summary} 93 94## Topic 95 96{topic.replace('_', ' ').title()} 97""" 98 (profile_dir / f"{p_slug}.md").write_text(paper_content, encoding='utf-8') 99 100 return slug 101 102 103def generate_scholars_html(scholars_data, papers_data): 104 """Generate the scholars directory HTML page.""" 105 cards = [] 106 for name, papers in sorted(scholars_data.items()): 107 slug = slugify(name) 108 topics = set(p.get('topic_cluster', '') for p in papers) 109 topic_badges = ' '.join( 110 f'<span class="topic-badge topic-{t}">{t.replace("_", " ").title()}</span>' 111 for t in sorted(topics) if t 112 ) 113 paper_count = len(papers) 114 115 # Index card summaries for each paper 116 paper_cards = [] 117 for p in sorted(papers, key=lambda x: x.get('year', 0) or 0): 118 year = p.get('year', '?') 119 journal = p.get('journal', '') 120 summary = p.get('summary', 'Summary pending.') 121 p_slug = paper_slug(p['title']) 122 123 paper_cards.append(f""" 124 <div class="paper-card"> 125 <h4><a href="scholar/{slug}.html#{p_slug}">{escape_html(p['title'])}</a></h4> 126 <div class="paper-meta">{escape_html(journal)} {year}</div> 127 <p class="paper-summary">{escape_html(summary)}</p> 128 </div>""") 129 130 cards.append(f""" 131 <div class="scholar-card" id="{slug}"> 132 <h3><a href="scholar/{slug}.html">{escape_html(name)}</a></h3> 133 <div class="scholar-meta">{paper_count} paper{'s' if paper_count != 1 else ''} {topic_badges}</div> 134 <div class="scholar-papers"> 135 {''.join(paper_cards)} 136 </div> 137 </div>""") 138 139 return cards 140 141 142def generate_scholar_page_html(name, papers): 143 """Generate an individual scholar's HTML page.""" 144 slug = slugify(name) 145 146 paper_sections = [] 147 for p in sorted(papers, key=lambda x: x.get('year', 0) or 0): 148 year = p.get('year', '?') 149 journal = p.get('journal', '') 150 summary = p.get('summary', 'Summary pending.') 151 topic = p.get('topic_cluster', 'unknown') 152 p_slug = paper_slug(p['title']) 153 154 paper_sections.append(f""" 155 <article class="paper-detail" id="{p_slug}"> 156 <h3>{escape_html(p['title'])}</h3> 157 <div class="paper-meta"> 158 {escape_html(journal)} {year} 159 <span class="topic-badge topic-{topic}">{topic.replace('_', ' ').title()}</span> 160 </div> 161 <div class="paper-summary-full"> 162 <p>{escape_html(summary)}</p> 163 </div> 164 </article>""") 165 166 return f"""<!DOCTYPE html> 167<html lang="en"> 168<head> 169 <meta charset="UTF-8"> 170 <meta name="viewport" content="width=device-width, initial-scale=1.0"> 171 <title>{escape_html(name)} — HP Scholarship</title> 172 <link rel="stylesheet" href="../style.css"> 173 <link rel="stylesheet" href="../scholars.css"> 174</head> 175<body> 176 <header> 177 <div class="header-content"> 178 <h1>{escape_html(name)}</h1> 179 <p class="subtitle"><a href="../scholars.html">← All Scholars</a></p> 180 </div> 181 </header> 182 <main> 183 <section class="scholar-detail"> 184 <h2>Papers in the HP Corpus ({len(papers)})</h2> 185 {''.join(paper_sections)} 186 </section> 187 </main> 188 <footer> 189 <div class="footer-content"> 190 <div class="footer-section"> 191 <h4>HP Scholarship Database</h4> 192 <p>Part of the <a href="../index.html">Hypnerotomachia Poliphili</a> digital humanities project.</p> 193 </div> 194 </div> 195 </footer> 196</body> 197</html>""" 198 199 200def escape_html(text): 201 """Escape HTML special characters.""" 202 if not text: 203 return '' 204 return (text.replace('&', '&') 205 .replace('<', '<') 206 .replace('>', '>') 207 .replace('"', '"')) 208 209 210def main(): 211 if not SUMMARIES_PATH.exists(): 212 print(f"ERROR: {SUMMARIES_PATH} not found. Create summaries.json first.") 213 return 214 215 with open(SUMMARIES_PATH, 'r', encoding='utf-8') as f: 216 summaries = json.load(f) 217 218 print(f"Loaded {len(summaries)} paper summaries") 219 220 scholars = group_by_scholar(summaries) 221 print(f"Found {len(scholars)} unique scholars") 222 223 # Write scholar profile markdown files 224 for name, papers in scholars.items(): 225 slug = write_scholar_profile(name, papers, SCHOLARS_DIR) 226 print(f" {name} ({slug}): {len(papers)} papers") 227 228 # Generate HTML pages 229 scholar_page_dir = SITE_DIR / "scholar" 230 scholar_page_dir.mkdir(parents=True, exist_ok=True) 231 232 # Individual scholar pages 233 for name, papers in scholars.items(): 234 slug = slugify(name) 235 html = generate_scholar_page_html(name, papers) 236 (scholar_page_dir / f"{slug}.html").write_text(html, encoding='utf-8') 237 238 # Scholars directory page 239 cards = generate_scholars_html(scholars, summaries) 240 scholars_html = f"""<!DOCTYPE html> 241<html lang="en"> 242<head> 243 <meta charset="UTF-8"> 244 <meta name="viewport" content="width=device-width, initial-scale=1.0"> 245 <title>Scholars — Hypnerotomachia Poliphili</title> 246 <link rel="stylesheet" href="style.css"> 247 <link rel="stylesheet" href="scholars.css"> 248</head> 249<body> 250 <header> 251 <div class="header-content"> 252 <h1>HP Scholarship</h1> 253 <p class="subtitle">Scholars and Their Contributions</p> 254 <p class="attribution"><a href="index.html">← Back to Marginalia</a></p> 255 </div> 256 </header> 257 <main> 258 <section class="intro"> 259 <div class="intro-content"> 260 <p>Five centuries of scholarship on the <em>Hypnerotomachia Poliphili</em>, organized by author. Each scholar's profile links to summaries of their contributions to the field.</p> 261 </div> 262 </section> 263 <section class="scholars-grid"> 264 {''.join(cards)} 265 </section> 266 </main> 267 <footer> 268 <div class="footer-content"> 269 <div class="footer-section"> 270 <h4>HP Scholarship Database</h4> 271 <p>Part of the <a href="index.html">Hypnerotomachia Poliphili</a> digital humanities project.</p> 272 </div> 273 </div> 274 </footer> 275</body> 276</html>""" 277 278 (SITE_DIR / "scholars.html").write_text(scholars_html, encoding='utf-8') 279 print(f"\nGenerated {len(scholars)} scholar pages + scholars.html") 280 281 282if __name__ == "__main__": 283 main()