Azure ASR's JSONL output to JSON
Just getting 1-best
import glob
import json
for i in glob.glob('../input/mo-sgeal-fein-wikisource-azure-asr-output/*.jsonl'):
outf = i.replace('jsonl', 'json').split('/')[-1]
with open(i) as f:
curfile = []
for line in f.readlines():
cur = {}
json_data = json.loads(line)
cur['start'] = json_data['Offset']
cur['duration'] = json_data['Duration']
cur['text'] = json_data['NBest'][0]['Lexical']
curfile.append(cur)
with open(outf, 'w') as of:
json.dump(curfile, of)