#!/usr/bin/env python3
"""
fetch_portraits.py
Download portrait images for every person in people.json -→ ./images/people/…

How it works
------------
1. Wikipedia API  : get the Wikidata Q-id for the page about <name>
2. Wikidata API   : fetch property P18 (image filename) for that Q-id
3. Commons file   : resolve File:… to a real image URL via Special:FilePath
4. Save locally   : mkdir -p images/people && write binary data

If anything fails we log and continue.
"""

import json
import os
import pathlib
import sys
import time
from urllib.parse import quote_plus

import requests
from tqdm import tqdm

WIKI_API    = "https://en.wikipedia.org/w/api.php"
WIKIDATA_API = "https://www.wikidata.org/wiki/Special:EntityData/{}.json"
COMMONS_FILE = "https://commons.wikimedia.org/wiki/Special:FilePath/{}"

HEADERS = {"User-Agent": "ShrineTimelineBot/0.1 (https://github.com/shrine)"}

# --------------------------------------------------------------------------- #
# Helper functions
# --------------------------------------------------------------------------- #
def wikipedia_to_qid(title: str) -> str | None:
    """Return the Wikidata Q-identifier for a Wikipedia page title."""
    params = {
        "action": "query",
        "format": "json",
        "titles": title,
        "prop": "pageprops",
        "redirects": 1,
    }
    r = requests.get(WIKI_API, params=params, headers=HEADERS, timeout=15)
    r.raise_for_status()
    pages = r.json()["query"]["pages"].values()
    for page in pages:
        return page.get("pageprops", {}).get("wikibase_item")  # e.g. "Q937"
    return None


def qid_to_image_filename(qid: str) -> str | None:
    """Return the Commons file name (e.g. 'Albert Einstein Head.jpg')."""
    r = requests.get(WIKIDATA_API.format(qid), headers=HEADERS, timeout=15)
    r.raise_for_status()
    entity = r.json()["entities"][qid]
    claims = entity["claims"]
    if "P18" not in claims:
        return None
    # Use the first image in P18
    return claims["P18"][0]["mainsnak"]["datavalue"]["value"]

from urllib.parse import quote  # swap quote_plus → quote

def download_commons_file(filename: str, dest_path: pathlib.Path) -> bool:
    """
    Resolve image via Special:FilePath and stream to dest_path.
    Returns True if saved, False if already on disk.
    """
    if dest_path.exists():
        return False

    # 1. MediaWiki expects underscores, not spaces
    fname = filename.replace(" ", "_")

    # 2. URL-encode *except* underscores, parentheses, apostrophes, dots, dashes
    #    (keeps filenames readable and avoids double-encoding)
    safe = "_().'-"
    url = f"https://commons.wikimedia.org/wiki/Special:FilePath/{quote(fname, safe=safe)}"

    with requests.get(url, headers=HEADERS, stream=True, timeout=30) as r:
        r.raise_for_status()          # will now 302 → 200 instead of 404
        dest_path.parent.mkdir(parents=True, exist_ok=True)
        with open(dest_path, "wb") as fp:
            for chunk in r.iter_content(chunk_size=8192):
                fp.write(chunk)
    return True


# --------------------------------------------------------------------------- #
# Main script
# --------------------------------------------------------------------------- #
def main(json_path="people.json"):
    people = json.loads(pathlib.Path(json_path).read_text())
    saved, skipped, err = 0, 0, 0

    for person in tqdm(people, unit="person"):
        title = person["name"]
        out_path = pathlib.Path("." + person["image"])  # strip leading slash

        try:
            qid = wikipedia_to_qid(title)
            if not qid:
                tqdm.write(f"⚠️  {title}: no Wikidata Q-id");  err += 1;  continue

            filename = qid_to_image_filename(qid)
            if not filename:
                tqdm.write(f"⚠️  {title}: no P18 image");       err += 1;  continue

            if download_commons_file(filename, out_path):
                saved += 1
            else:
                skipped += 1  # already existed
            time.sleep(0.3)  # be polite to the APIs
        except Exception as e:
            tqdm.write(f"❌ {title}: {e}")
            err += 1

    print(f"\nDone: {saved} downloaded, {skipped} skipped, {err} errors.")


if __name__ == "__main__":
    if not pathlib.Path("people.json").exists():
        sys.exit("people.json not found in current directory.")
    main("people.json")