from lxml import etree

class BuNaMoWrongDocument(Exception):
    """Exception raised for wrong document type"""

    def __init__(self, expected, got):
        self.expected = expected
        self.got = got
        self.message = f"Expected root element <{self.expected}> but got <{self.got}>"
        super().__init__(self.message)

Various functions to read one of the types of XML file. The open parts of speech (noun, adjective, verb) can have multiple forms, so those functions return attributes (a dictionary) and forms (a list of dictionaries) separately.

Close parts of speech (possessives and prepositions) are simpler, and most of the attributes are needless, so they return a simple dictionary containing the forms.

def read_adjective(file):
    tree = etree.parse(file)
    root = tree.getroot()
    valid_tags = ['sgNom', 'sgGenMasc', 'sgGenFem', 'plNom', 'graded', 'abstractNoun', 'sgVocMasc', 'sgVocFem']
    attribs = {}
    forms = []
    if root.tag != 'adjective':
        raise BuNaMoWrongDocument('adjective', root.tag)
    attribs['default'] = root.get('default')
    attribs['declension'] = root.get('declension')
    attribs['disambig'] = root.get('disambig')
    attribs['isPre'] = root.get('isPre')
    for child in root:
        if child.tag not in valid_tags:
            raise Exception('Unexpected tag ' + child.tag)
        tmp = {}
        tmp['props'] = child.tag
        tmp['form'] = child.get('default')
        forms.append(tmp)
    return attribs, forms
def read_noun(file):
    tree = etree.parse(file)
    root = tree.getroot()
    valid_tags = ['sgNom', 'sgGen', 'plNom', 'plGen', 'count', 'sgDat']
    attribs = {}
    forms = []
    if root.tag != 'noun':
        raise BuNaMoWrongDocument('noun', root.tag)
    attribs['default'] = root.get('default')
    attribs['declension'] = root.get('declension')
    attribs['disambig'] = root.get('disambig')
    attribs['isProper'] = root.get('isProper')
    attribs['isDefinite'] = root.get('isDefinite')
    attribs['allowArticledGenitive'] = root.get('allowArticledGenitive')
    for child in root:
        if child.tag not in valid_tags:
            raise Exception('Unexpected tag ' + child.tag)
        tmp = {}
        tmp['props'] = child.tag
        tmp['form'] = child.get('default')
        tmp['gender'] = child.get('gender')
        tmp['strength'] = child.get('strength')
        forms.append(tmp)
    return attribs, forms
def read_verb(file):
    tree = etree.parse(file)
    root = tree.getroot()
    valid_tags = ['verbalNoun', 'verbalAdjective', 'tenseForm', 'moodForm']
    attribs = {}
    forms = []
    if root.tag != 'verb':
        raise BuNaMoWrongDocument('verb', root.tag)
    attribs['default'] = root.get('default')
    attribs['disambig'] = root.get('disambig')
    for child in root:
        if child.tag not in valid_tags:
            raise Exception('Unexpected tag ' + child.tag)
        tmp = {}
        tmp['props'] = child.tag
        tmp['form'] = child.get('default')
        tmp['tense'] = child.get('tense')
        tmp['mood'] = child.get('mood')
        tmp['dependency'] = child.get('dependency')
        tmp['person'] = child.get('person')
        forms.append(tmp)
    return attribs, forms
def read_nounphrase(file):
    tree = etree.parse(file)
    root = tree.getroot()
    valid_tags = ['sgNom', 'sgGen', 'plNom', 'plGen', 'sgNomArt', 'sgGenArt', 'plNomArt', 'plGenArt']
    attribs = {}
    forms = []
    if root.tag != 'nounPhrase':
        raise BuNaMoWrongDocument('nounPhrase', root.tag)
    attribs['default'] = root.get('default')
    attribs['declension'] = root.get('declension')
    attribs['disambig'] = root.get('disambig')
    attribs['isProper'] = root.get('isProper')
    attribs['isDefinite'] = root.get('isDefinite')
    attribs['allowArticledGenitive'] = root.get('allowArticledGenitive')
    attribs['forceNominative'] = root.get('forceNominative')
    for child in root:
        if child.tag not in valid_tags:
            raise Exception('Unexpected tag ' + child.tag)
        tmp = {}
        tmp['props'] = child.tag
        tmp['form'] = child.get('default')
        tmp['gender'] = child.get('gender')
        tmp['strength'] = child.get('strength')
        forms.append(tmp)
    return attribs, forms
def read_possessive(file):
    tree = etree.parse(file)
    root = tree.getroot()
    valid_tags = ['full', 'apos']
    attribs = {}
    forms = []
    if root.tag != 'possessive':
        raise BuNaMoWrongDocument('possessive', root.tag)
    attribs['default'] = root.get('default')
    attribs['disambig'] = root.get('disambig')
    attribs['mutation'] = root.get('mutation')
    for child in root:
        if child.tag not in valid_tags:
            raise Exception('Unexpected tag ' + child.tag)
        if child.tag == 'apos':
            attribs['apos'] = child.get('default')
    return attribs
def read_preposition(file):
    tree = etree.parse(file)
    root = tree.getroot()
    valid_tags = ['sg1', 'sg2', 'sg3Masc', 'sg3Fem', 'pl1', 'pl2', 'pl3']
    attribs = {}
    forms = []
    if root.tag != 'preposition':
        raise BuNaMoWrongDocument('preposition', root.tag)
    attribs['default'] = root.get('default')
    for child in root:
        if child.tag not in valid_tags:
            raise Exception('Unexpected tag ' + child.tag)
        attribs[child.tag] = child.get('default')
    return attribs

import glob
import json

adjectives = {}
for x in glob.glob('../input/bunamo-bunachar-naisiunta-moirfeolaiochta/adjective/*.xml'):
    fname = x.split('/')[-1].replace('.xml', '')
    attribs, forms = read_adjective(x)
    tmp = {}
    tmp['attributes'] = attribs
    tmp['forms'] = forms
    adjectives[fname] = tmp
    word = attribs['default']
with open('adjectives.json', 'w') as outfile:
    json.dump(adjectives, outfile)

nouns = {}
for x in glob.glob('../input/bunamo-bunachar-naisiunta-moirfeolaiochta/noun/*.xml'):
    fname = x.split('/')[-1].replace('.xml', '')
    attribs, forms = read_noun(x)
    tmp = {}
    tmp['attributes'] = attribs
    tmp['forms'] = forms
    nouns[fname] = tmp
    word = attribs['default']
with open('nouns.json', 'w') as outfile:
    json.dump(nouns, outfile)

nounphrases = {}
for x in glob.glob('../input/bunamo-bunachar-naisiunta-moirfeolaiochta/nounPhrase/*.xml'):
    fname = x.split('/')[-1].replace('.xml', '')
    attribs, forms = read_nounphrase(x)
    tmp = {}
    tmp['attributes'] = attribs
    tmp['forms'] = forms
    nounphrases[fname] = tmp
    word = attribs['default']
with open('nounphrases.json', 'w') as outfile:
    json.dump(nounphrases, outfile)

verbs = {}
for x in glob.glob('../input/bunamo-bunachar-naisiunta-moirfeolaiochta/verb/*.xml'):
    fname = x.split('/')[-1].replace('.xml', '')
    attribs, forms = read_verb(x)
    tmp = {}
    tmp['attributes'] = attribs
    tmp['forms'] = forms
    verbs[fname] = tmp
    word = attribs['default']
with open('verbs.json', 'w') as outfile:
    json.dump(verbs, outfile)

preposition = {}
for x in glob.glob('../input/bunamo-bunachar-naisiunta-moirfeolaiochta/preposition/*.xml'):
    fname = x.split('/')[-1].replace('.xml', '')
    attribs = read_preposition(x)
    tmp = {}
    tmp['attributes'] = attribs
    preposition[fname] = tmp
with open('prepositions.json', 'w') as outfile:
    json.dump(preposition, outfile)

possessive = {}
for x in glob.glob('../input/bunamo-bunachar-naisiunta-moirfeolaiochta/possessive/*.xml'):
    fname = x.split('/')[-1].replace('.xml', '')
    attribs = read_possessive(x)
    tmp = {}
    tmp['attributes'] = attribs
    possessive[fname] = tmp
with open('possessives.json', 'w') as outfile:
    json.dump(possessive, outfile)

possessive

{'ár_poss': {'attributes': {'default': 'ár',
   'disambig': '',
   'mutation': 'ecl1'}},
 'a_poss_masc': {'attributes': {'default': 'a',
   'disambig': 'masc',
   'mutation': 'len1'}},
 'a_poss_fem': {'attributes': {'default': 'a',
   'disambig': 'fem',
   'mutation': 'prefH'}},
 'do_poss': {'attributes': {'default': 'do',
   'disambig': '',
   'mutation': 'len1',
   'apos': "d'"}},
 'a_poss_pl': {'attributes': {'default': 'a',
   'disambig': 'pl',
   'mutation': 'ecl1'}},
 'mo_poss': {'attributes': {'default': 'mo',
   'disambig': '',
   'mutation': 'len1',
   'apos': "m'"}},
 'bhur_poss': {'attributes': {'default': 'bhur',
   'disambig': '',
   'mutation': 'ecl1'}}}