QPSR gap checking
Checking for gaps in the issues
import json
with open("qpsr.json") as f:
data = json.load(f)
res = {}
for item in data:
if not "pages" in item:
print(f"Missing pages: {item['title']} ({item['year']})")
pages = "?-?"
else:
pages = item["pages"]
year = item["year"]
if not "volume" in item:
print(f"Missing volume: {item['title']} ({item['year']})")
vol = "??"
else:
vol = item["volume"]
if not "edition" in item:
print(f"Missing edition: {item['title']} ({item['year']})")
ed = "??"
else:
ed = item["edition"]
if not "-" in pages:
if pages.isdigit():
start = end = pages
else:
raise IOError(f"No '-' in pages: {pages} ({key})")
else:
start, end = pages.split("-")
if item["title"] == ".":
print(item["pdf"])
key = f"{year}_{vol}_{ed}"
if not key in res:
res[key] = [{"start": start, "end": end}]
else:
res[key].append({"start": start, "end": end})
from functools import cmp_to_key
def compare(item1, item2):
if item1["end"] < item2["start"]:
return -1
elif item1["start"] > item2["end"]:
return 1
else:
return 0
def same_or_next(a, b):
return (a == b) or (a == b-1)
output = {}
last_pages = {}
for item in res.keys():
for subitem in res[item]:
subitem["start"] = int(subitem["start"])
subitem["end"] = int(subitem["end"])
tmp = res[item]
tmp = sorted(tmp, key=cmp_to_key(compare))
last_pages[item] = tmp[-1]["end"]
cnt = 0
outtmp = []
if tmp[0]['start'] > 1:
outtmp.append({"start": 1, "end": tmp[0]['start']-1})
while cnt < len(tmp) - 1:
if not same_or_next(tmp[cnt]["end"], tmp[cnt+1]["start"]):
toadd = {"start": tmp[cnt]["end"]+1, "end": tmp[cnt+1]["start"]-1}
if toadd["start"] == toadd["end"]:
toadd = {"page": toadd["start"]}
outtmp.append(toadd)
cnt += 1
output[item] = outtmp
with open("pages.txt", "w") as f:
for (ed, page) in last_pages.items():
f.write(f'{ed.replace("_", " ")}\t{page}\n')
def _single_page(page):
if "page" in page:
return str(page["page"])
else:
return f"{page['start']}-{page['end']}"
def _join_pages(pagelist):
return ", ".join([_single_page(a) for a in pagelist])
merged = {a: _join_pages(b) for (a, b) in output.items()}
with open("missing-pages.txt", "w") as f:
for (a, b) in merged.items():
if b and b != "":
f.write(f"{a}\t{b}\n")
with open("number-of-articles.txt", "w") as f:
for (a, b) in res.items():
f.write(f"{a}\t{len(b)}\n")
import json
with open("gaps.json", "w") as out:
out.write(json.dumps(output, indent=4))