Utterance XML to json
Dataset
sample = """
<?xml version="1.0" encoding="utf-8"?>
<utterance input_string="">
<sentence input_string="">
<token input_string="SILENCE_TOKEN">
<word input_string="SILENCE_TOKEN" trans_source="src" trans_output_format="final">
<syllable >
<phoneme symbol="sil" end="1.19"/>
</syllable>
</word>
</token>
</sentence>
</utterance>
"""
import xml.etree.ElementTree as ET
class Utterance:
def __init__(self, input, sentences):
self.input = input
self.sentences = sentences
class Sentence:
def __init__(self, input, tokens):
self.input = input
self.tokens = tokens
class Token:
def __init__(self, input, words):
self.input = input
self.words = words
class Word:
def __init__(self, input, source, syllables):
self.input = input
self.source = source
self.syllables = syllables
if self.syllables is None:
self.syllables = []
class Syllable:
def __init__(self, stress: int = 0, phonemes = None):
self.stress = stress
self.phonemes = phonemes
if self.phonemes is None:
self.phonemes = []
class Phoneme:
def __init__(self, symbol: str = "", end: float = 0.0):
self.symbol = symbol
self.end = end
import io
sio = io.StringIO(sample.strip())
def from_xml(source):
tree = ET.parse(source)
root = tree.getroot()
if 'input_string' in root.attrib:
input = root.attrib['input_string']
else:
input = ''
sentences = []
for sentence in root.findall('./sentence'):
if 'input_string' in sentence.attrib:
input = sentence.attrib['input_string']
else:
input = ''
tokens = []
for token in sentence.findall('./token'):
if 'input_string' in token.attrib:
input = token.attrib['input_string']
else:
input = ''
words = []
for word in token.findall('./word'):
if 'input_string' in word.attrib:
input = word.attrib['input_string']
else:
input = ""
if 'trans_source' in word.attrib:
source = word.attrib['trans_source']
else:
source = ""
syllables = []
for syllable in word.findall('./syllable'):
phonemes = []
if 'stress' in syllable.attrib:
stress = int(syllable.attrib['stress'])
else:
stress = 0
for phoneme in syllable.findall('./phoneme'):
if 'symbol' in phoneme.attrib:
symbol = phoneme.attrib['symbol']
else:
symbol = ''
if 'end' in phoneme.attrib:
end = float(phoneme.attrib['end'])
else:
symbol = 0.0
phonemes.append(Phoneme(symbol, end))
syllables.append(Syllable(stress, phonemes))
words.append(Word(input, source, syllables))
tokens.append(Token(input, words))
sentences.append(Sentence(input, tokens))
return Utterance(input, sentences)
utt = from_xml(sio)
import json
json.dumps(utt, default=lambda o: o.__dict__)