Quigging conversion
First attempt
DATA = {}
SOURCES = {
"Atk.": "atkinsons",
"Cl. S.": "claidheamh_soluis",
"D. P.": "derry_people",
"Di.": "dinneen",
"Diss.": "die_lautliche_geltung",
"Finck": "finck",
"G. J.": "gaelic_journal",
"Henebry": "henebry",
"Hogan": "hogan",
"Macbain": "macbain",
"Meyer": "meyer",
"Molloy": "molloy",
"Pedersen": "pedersen",
"Rhys": "rhys",
"Sg. Fearn.": "sgeulaidhe_fearnmhuighe",
"Spir. Rose": "spiritual_rose",
"O’R.": "oreilly",
"Wi.": "windisch",
"O.Ir. acc. pl.": "old_irish_accusative_plural",
"M.Ir.": "middle_irish",
"O.Ir.": "old_irish",
}
DATA
section = 1
PAGE = """
§ 3. This sound frequently represents O.Ir. a in accented syllables before non-palatal consonants, e.g. αrəm, ‘army’, O.Ir. arm; αt, ‘swelling’, O.Ir. att; fαnαχt ‘to stay, remain’, O.Ir. anaim; kαpəL, ‘mare’, M.Ir. capall; mαk, ‘son’, O.Ir. macc; mαLαχt, ‘curse’, O.Ir. maldacht; tαχtuw, ‘to choke’, O.Ir. tachtad; tαrt, ‘thirst’, O.Ir. tart; tαruw, ‘bull’, M.Ir. tarb.
§ 4. O.Ir. e before non-palatal consonants in accented syllables usually gives α, e.g. αχ, ‘steed’, O.Ir. ech; αlə, ‘swan’, M.Ir. ela; αŋ, ‘splice, strip’; αŋαχ, ‘fisherman’s net’, M.Ir. eng; dʹrʹαm, ‘crowd’, M.Ir. dremm; dʹαrəg, ‘red’, O.Ir. derg; fʹαr, ‘man’, O.Ir. fer; gʹαl, ‘white’, M.Ir. gel; kʹαχtər, ‘either’, O.Ir. cechtar; Lʹαnuw, ‘child’, M.Ir. lenab; Nʹαd, ‘nest’, M.Ir. net; pʹαkuw, ‘sin’, O.Ir. peccad; ʃαsuw, ‘to stand’, M.Ir. sessom; tʹαχ, ‘house’, O.Ir. tech.
"""
PAGE_NUM = 5
page_lines = [x for x in PAGE.replace("\u00ad", "").split("\n") if x != ""]
DATA = {}
def extend_trans(item, trans):
if not "transcription" in item:
item["transcription"] = trans
item["transcription"] = [item["transcription"], trans]
import re
_BASIC = r"^([^‘]+), ‘([^’]+)’"
BASIC = re.compile(_BASIC)
def get_basic(text):
m = BASIC.match(text)
if not m:
return ("", "", text)
t = m.group(1).strip()
e = m.group(2).strip()
m_end = m.span()[1]
if m_end == len(text):
return (t, e, "")
text = text[m_end:]
if text.startswith(","):
text = text[1:].strip()
return (t, e, text)
def get_sources(text):
a = list(SOURCES.keys())
a.sort(key=len, reverse=True)
compare = False
if text.startswith("cp."):
compare = True
text = text[3:].strip()
for s in a:
if text.startswith(s):
item = SOURCES[s]
for line in page_lines:
counter = 1
if line.startswith("§ "):
dot = line.find(". ")
pn = line[2:dot]
try:
section = int(pn)
except:
continue
if not section in DATA:
DATA[section] = []
current = {}
if "e.g." in line:
linep = line.split("e.g.")
if len(linep) != 2:
print(line)
parts = [x.strip() for x in linep[1].split(";")]
for part in parts:
if part.endswith("."):
part = part[:-1]
t, e, r = get_basic(part)
current = {
"page": PAGE_NUM,
"section": section,
"id": f"{section}_{counter}",
"transcription": t,
"english": e
}
DATA[section].append(current)
counter += 1
part[m.span()[1]:]