example = """\
<corpus id="1960-0000">
<text date="1965-02-14" datefrom="19650214" dateto="19650214" genre="news" publisher="Stockholms Tidningen " timefrom="000000" timeto="235959" topic="Politik och samhällsfrågor" year="1965">
<sentence id="aa9c2ac8-ae5dd1a1">
<w dephead="4" deprel="RA" lemma="|i|" lex="|i..pp.1|" msd="PP" pos="PP" prefix="|" ref="1" saldo="|i..2|" suffix="|">I</w>
<w dephead="3" deprel="DT" lemma="|" lex="|" msd="HD.UTR.SIN.IND" pos="HD" prefix="|" ref="2" saldo="|" suffix="|">vilken</w>
</sentence>
</text>
</corpus>
"""
import xml.etree.ElementTree as ET


def _attrib(node, attrib: str) -> str:
    if attrib in node.attrib:
        return node.attrib[attrib].strip()
    else:
        return ""


def _iattrib(node, attrib: str) -> str:
    if attrib in node.attrib:
        try:
            return int(node.attrib[attrib].strip())
        except ValueError:
            return 0
    else:
        return 0


class Corpus:
    def __init__(self, source):
        tree = ET.parse(source)
        root = tree.getroot()
        self.id = _attrib(root, 'id')
        self.texts = []


        for text_node in root.findall('./text'):
            self.texts.append(Text(text_node))


class Text:
    def __init__(self, node):
        self.date = _attrib(node, 'date')
        self.datefrom = _iattrib(node, 'datefrom')
        self.dateto = _iattrib(node, 'dateto')
        self.genre = _attrib(node, 'genre')
        self.publisher = _attrib(node, 'publisher')
        self.timefrom = _iattrib(node, 'timefrom')
        self.timeto = _iattrib(node, 'timeto')
        self.topic = _attrib(node, 'topic')
        self.year = _iattrib(node, 'year')
        self.sentences = []

        for sent_node in node.findall('./sentence'):
            self.sentences.append(Sentence(sent_node))


class Sentence:
    def __init__(self, node):
        self.id = _attrib(node, 'id')
        self.words = []

        for w_node in node.findall('./w'):
            self.words.append(Word(w_node))


class Word:
    def __init__(self, node):
        self.dephead = _attrib(node, 'dephead')
        self.deprel = _attrib(node, 'deprel')
        self.lemma = _attrib(node, 'lemma')
        self.lex = _attrib(node, 'lex')
        self.msd = _attrib(node, 'msd')
        self.pos = _attrib(node, 'pos')
        self.prefix = _attrib(node, 'prefix')
        self.ref = _attrib(node, 'ref')
        self.saldo = _attrib(node, 'saldo')
        self.suffix = _attrib(node, 'suffix')
        self.word = node.text.strip()
import io

sio = io.StringIO(example)
corp = Corpus(sio)
import json
json.dumps(corp, default=lambda o: o.__dict__)
'{"id": "1960-0000", "texts": [{"date": "1965-02-14", "datefrom": 19650214, "dateto": 19650214, "genre": "news", "publisher": "Stockholms Tidningen", "timefrom": 0, "timeto": 235959, "topic": "Politik och samh\\u00e4llsfr\\u00e5gor", "year": 1965, "sentences": [{"id": "aa9c2ac8-ae5dd1a1", "words": [{"dephead": "4", "deprel": "RA", "lemma": "|i|", "lex": "|i..pp.1|", "msd": "PP", "pos": "PP", "prefix": "|", "ref": "1", "saldo": "|i..2|", "suffix": "|", "word": "I"}, {"dephead": "3", "deprel": "DT", "lemma": "|", "lex": "|", "msd": "HD.UTR.SIN.IND", "pos": "HD", "prefix": "|", "ref": "2", "saldo": "|", "suffix": "|", "word": "vilken"}]}]}]}'