import json
with open("qpsr.json") as f:
    data = json.load(f)
res = {}
for item in data:
    if not "pages" in item:
        print(f"Missing pages: {item['title']} ({item['year']})")
        pages = "?-?"
    else:
        pages = item["pages"]
    year = item["year"]
    if not "volume" in item:
        print(f"Missing volume: {item['title']} ({item['year']})")
        vol = "??"
    else:
        vol = item["volume"]
    if not "edition" in item:
        print(f"Missing edition: {item['title']} ({item['year']})")
        ed = "??"
    else:
        ed = item["edition"]
    if not "-" in pages:
        if pages.isdigit():
            start = end = pages
        else:
            raise IOError(f"No '-' in pages: {pages} ({key})")
    else:
        start, end = pages.split("-")
    if item["title"] == ".":
        print(item["pdf"])
    key = f"{year}_{vol}_{ed}"
    if not key in res:
        res[key] = [{"start": start, "end": end}]
    else:
        res[key].append({"start": start, "end": end})
from functools import cmp_to_key

def compare(item1, item2):
    if item1["end"] < item2["start"]:
        return -1
    elif item1["start"] > item2["end"]:
        return 1
    else:
        return 0

def same_or_next(a, b):
    return (a == b) or (a == b-1)

output = {}
last_pages = {}
for item in res.keys():
    for subitem in res[item]:
        subitem["start"] = int(subitem["start"])
        subitem["end"] = int(subitem["end"])
    tmp = res[item]
    tmp = sorted(tmp, key=cmp_to_key(compare))
    last_pages[item] = tmp[-1]["end"]
    cnt = 0
    outtmp = []
    if tmp[0]['start'] > 1:
        outtmp.append({"start": 1, "end": tmp[0]['start']-1})
    while cnt < len(tmp) - 1:
        if not same_or_next(tmp[cnt]["end"], tmp[cnt+1]["start"]):
            toadd = {"start": tmp[cnt]["end"]+1, "end": tmp[cnt+1]["start"]-1}
            if toadd["start"] == toadd["end"]:
                toadd = {"page": toadd["start"]}
            outtmp.append(toadd)
        cnt += 1
    output[item] = outtmp
with open("pages.txt", "w") as f:
    for (ed, page) in last_pages.items():
        f.write(f'{ed.replace("_", " ")}\t{page}\n')
def _single_page(page):
    if "page" in page:
        return str(page["page"])
    else:
        return f"{page['start']}-{page['end']}"

def _join_pages(pagelist):
    return ", ".join([_single_page(a) for a in pagelist])

merged = {a: _join_pages(b) for (a, b) in output.items()}
with open("missing-pages.txt", "w") as f:
    for (a, b) in merged.items():
        if b and b != "":
            f.write(f"{a}\t{b}\n")
with open("number-of-articles.txt", "w") as f:
    for (a, b) in res.items():
        f.write(f"{a}\t{len(b)}\n")
import json
with open("gaps.json", "w") as out:
    out.write(json.dumps(output, indent=4))