Check X-TTS output against original sentences
X-TTS is not exactly great at fidelity
• 21 min read
from pathlib import Path
%cd /Users/joregan/Playing/cvpr-control
/Users/joregan/Playing/cvpr-control
sentences = []
with open("input_sentences.txt") as f:
for line in f.readlines():
sentences.append(line.strip())
items = {}
for tsvfile in Path("tsv").glob("**/*.tsv"):
ident = tsvfile.stem
if ".interloctr." in str(tsvfile):
continue
parts = []
with open(tsvfile) as inf:
for line in inf.readlines():
parts.append(line.strip().split("\t"))
items[ident] = {}
items[ident]["rec"] = " ".join([x[-1] for x in parts])
items[ident]["original"] = sentences[int(ident.split("_")[-1])]
from string import punctuation
PUNCT = set(punctuation)
def clean_sentence(text):
words = []
text = text.replace("—", " ")
for word in text.split(" "):
if word.startswith("[") and word.endswith("]"):
continue
while word[0:1] in PUNCT:
word = word[1:]
while word[-1:] in PUNCT:
word = word[:-1]
words.append(word.lower())
return " ".join(words)
def prune_fillers(text):
FILLERS = ["uh", "um"]
words = [x for x in text.split(" ") if x not in FILLERS]
return " ".join(words)
rest = {}
for item in items:
cleaned = clean_sentence(items[item]["original"])
if items[item]["original"] == items[item]["rec"]:
items[item]["match"] = "exact"
elif cleaned == items[item]["rec"]:
items[item]["match"] = "clean"
elif prune_fillers(cleaned) == items[item]["rec"]:
items[item]["match"] = "clean,fillers"
elif prune_fillers(cleaned) == prune_fillers(items[item]["rec"]):
items[item]["match"] = "clean,fillers_both"
else:
rest[item] = items[item]
%pip install jiwer
from jiwer import wer
for item in items:
if not "match" in items[item]:
s_wer = wer(clean_sentence(items[item]["original"]), items[item]["rec"])
items[item]["wer"] = s_wer
from difflib import SequenceMatcher
insertions = {}
deletions = {}
replacements = {}
trace_replace = []
for item in items:
if not "match" in items[item]:
sent_id = item.split("_")[-1]
a = clean_sentence(items[item]["original"]).split(" ")
b = items[item]["rec"].split(" ")
s = SequenceMatcher(None, a, b)
for tag, i1, i2, j1, j2 in s.get_opcodes():
if tag == "equal":
continue
if tag == "replace":
pair = (sent_id, " ".join(a[i1:i2]), " ".join(b[j1:j2]))
trace_replace.append((item, " ".join(a[i1:i2]), " ".join(b[j1:j2])))
if not pair in replacements:
replacements[pair] = 1
else:
replacements[pair] += 1
if tag == "delete":
pair = (sent_id, " ".join(a[i1:i2]))
if not pair in deletions:
deletions[pair] = 1
else:
deletions[pair] += 1
if tag == "insert":
pair = (sent_id, " ".join(b[j1:j2]))
if not pair in insertions:
insertions[pair] = 1
else:
insertions[pair] += 1
trace_replace
[('spkp264_243', 'really', 'real'),
('spkp237_605', '“you’d', "you'd"),
('spkp237_605', 'instrument,”', 'instrument'),
('spkp271_306', 'i i', 'it'),
('spkp271_306', 'feel', 'feels'),
('spkp282_186', 'gotta', 'got to'),
('spkp282_192', 'uh', 'ah'),
('spkp318_115', 'earbuds', '<unk>'),
('spkp364_156', 'unhear', '<unk>'),
('spkp374_156', 'unhear', '<unk>'),
('spkp318_129', 'gotta', 'got to'),
('spkp317_543', 'heartwarming', '<unk>'),
('spkp317_543', 'workouts', '<unk>'),
('spkp228_535', 'cause i', 'because'),
('spkp317_225', 'patience', 'patients'),
('spkp304_494', 'uh', 'ah'),
('spkp282_145', 'uh', 'err'),
('spkp282_151', 'off-putting', 'off putting'),
('spkp317_231', 'curveball', '<unk>'),
('spkp339_369', 'cause', 'because'),
('spkp339_341', 'panicking', '<unk>'),
('spkp317_219', 'can not', 'cannot'),
('spkp237_188', 'uh', 'ah'),
('spkp237_188', 'open-minded', 'open minded'),
('spkp339_433', 'sibling', 'siblings'),
('spkp339_355', "it's", 'it'),
('spkp237_361', 'panicking', '<unk>'),
('spkp232_556', 'replaying', '<unk>'),
('spkp232_556', 'heartwarming', '<unk>'),
('spkp271_57', 'crafting', '<unk>'),
('spkp314_127', 'there’s', "there's"),
('spkp318_465', 'right', 'write'),
('spkp226_166', 'scrunches', '<unk>'),
('spkp237_407', 'tv', '<unk>'),
('spkp304_127', 'there’s', "there's"),
('spkp282_390', 'cause', 'because'),
('spkp237_16', 'uh kinda', 'kind of'),
('spkp364_432', 'shake', 'shape'),
('spkp304_4', 'realize', 'realise'),
('spkp271_138', 'yuck', "<unk> i'm sorry"),
('spkp232_218', 'uh', '<unk>'),
('spkp232_218', 'uh', '<unk>'),
('spkp282_347', 'cause', 'because'),
('spk4_19', "should've", 'should have'),
('spkp232_595', '20/20', '<unk>'),
('spkp282_421', 'worst-case', 'worst case'),
('spkp282_353', 'health', 'hell'),
('spkp364_397', 'and', 'on'),
('spkp374_397', 'and', 'in'),
('spkp374_208', 'this', 'the'),
('spkp318_511', 'videos', '<unk>'),
('spkp282_596', 'gps', '<unk>'),
('spkp264_135', 'i am overthinking', "i'm <unk>"),
('spkp264_121', 'it’s', "it's"),
('spkp264_121', 'i’m', "i'm"),
('spkp264_121', 'i’m 16', "i'm <unk>"),
('spkp264_121', 'i’d', "i'd"),
('spkp318_505', 'bioluminescent', '<unk>'),
('spkp228_42', 'adrenaline', '<unk>'),
('spkp237_201', 'gotta', 'got to'),
('spkp282_97', 'just heartwarming', '<unk>'),
('spkp264_109', 'kinda', 'kind of'),
('spkp360_604', 'there’s', "there's"),
('spkp360_604', '“this', 'this'),
('spkp360_604', 'join,” i’m', "join i'm"),
('spkp232_378', 'panicking', '<unk>'),
('spkp226_6', 'uh', 'er'),
('spkp282_83', 'just heartwarming', '<unk>'),
('spkp232_26', "it's just mind-blowing", 'mind blowing'),
('spkp228_125', 'it’s', "it's"),
('spkp228_125', 'world’s', "world's"),
('spk1_15', 'uh', 'err'),
('spk1_15', 'uh', 'err'),
('spkp317_153', 'they', 'i'),
('spkp232_387', 'cause', 'because'),
('spkp282_541', 'veggie', '<unk>'),
('spkp225_106', 'kinda', 'kind of'),
('spkp374_98', 'replay', '<unk>'),
('spkp374_98', 'resonates', '<unk>'),
('spkp374_98', 'soundtrack', 'sound'),
('spkp228_119', 'vibe', '<unk>'),
('spkp318_288', 'mid-laugh', 'mid laugh'),
('spkp232_146', 'ugh', 'uh'),
('spkp225_16', 'uh kinda', 'kind of'),
('spkp226_210', 'realize', 'realise'),
('spkp226_210', "i'm", 'i am'),
('spkp226_210', 'uh', 'er'),
('spkp226_210', 'realization moving on', 'realisation'),
('spkp304_523', 'scrolling', '<unk>'),
('spkp304_523', 'emails', '<unk>'),
('spkp304_523', 'skeptical', 'sceptical'),
('spkp317_392', '20', '<unk>'),
('spkp304_537', 'energized', 'energised'),
('spkp226_562', 'cinematography', '<unk>'),
('spkp314_523', 'scrolling', '<unk>'),
('spkp314_523', 'emails', '<unk>'),
('spkp314_523', 'this', 'the'),
('spkp360_406', "at the spinning wheel waiting hoping and it's", "you're"),
('spkp360_406', 'just so incredibly', 'staring credibly'),
('spkp271_528', 'café', 'cafe'),
('spkp232_608', '“expand', 'expand'),
('spkp232_608', 'horizons”', 'horizons'),
('spk2_93', 'in', 'and'),
('spkp228_441', 'realize', 'realise'),
('spkp317_437', 'uh', 'ah'),
('spkp264_486', "i've", 'i'),
('spkp228_333', 'onto', 'on to'),
('spkp228_333', 'despair', 'to spare'),
('spkp228_455', 'and', 'in'),
('spkp317_423', 'backlash', '<unk>'),
('spkp317_423', 'the', 'a'),
('spkp339_553', 'heartwarming', '<unk>'),
('spkp339_547', 'ambiance', '<unk>'),
('spkp304_26', "it's just mind-blowing", 'mind blowing'),
('spkp317_378', 'panicking', '<unk>'),
('spkp225_339', 'look', 'looked'),
('spkp225_463', "i'm at", 'i met'),
('spkp225_463', "it's", 'its'),
('spkp232_190', 'uh', 'er'),
('spkp304_287', 'uh', 'ah'),
('spkp225_477', 'uh', 'ah'),
('spkp225_477', 'for getting', 'forgetting'),
('spkp264_493', 'um', 'and'),
('spkp264_478', 'staying', 'stay'),
('spk2_92', 'soundtrack', '<unk>'),
('spkp360_413', 'backlash', '<unk>'),
('spkp360_413', 'second-guess', 'second guess'),
('spkp360_413', 'negativity', '<unk>'),
('spkp360_407', 'tv', '<unk>'),
('spk2_86', 'uh', 'ah'),
('spk2_86', 'self-connection of introspection', 'self connection'),
('spkp225_488', 'telemarketer', '<unk>'),
('spkp225_488', 'cause i', 'because'),
('spkp271_529', "i'm", 'am'),
('spkp360_361', 'panicking', '<unk>'),
('spkp317_387', 'cause', 'cuz'),
('spkp314_522', 'paths', 'path'),
('spkp226_211', 'judgment', 'judgement'),
('spkp226_211', 'judgment', 'judgement'),
('spkp314_536', "i'm", 'am'),
('spkp282_41', 'colors', 'colours'),
('spkp282_41', 'peace', 'piece'),
('spkp282_41', 'peace', 'piece'),
('spkp317_608', '“expand', 'expand'),
('spkp317_608', 'horizons”', 'horizons'),
('spkp232_392', '20', '<unk>'),
('spkp225_107', "um it's kinda", 'kind of'),
('spkp282_540', 'game-changer', 'game changer'),
('spkp282_226', 'notices', 'noticed'),
('spkp228_80', 'awe-inspiring', 'awe inspiring'),
('spkp228_80', 'chaos and', "case isn't"),
('spkp360_188', 'open-minded', 'open minded'),
('spkp228_130', 'okay', 'ok'),
('spkp282_69', 'crafting', '<unk>'),
('spk1_14', 'kinda', 'kind of'),
('spkp228_124', 'it’s', "it's"),
('spkp228_124', 'non-stop', '<unk>'),
('spkp228_124', 'i’m', "i'm"),
('spkp228_124', '– kinda', 'kind of'),
('spkp282_232', 'open-minded', 'open minded'),
('spkp282_554', 'favorite', 'favourite'),
('spkp374_547', 'ambiance', '<unk>'),
('spkp364_553', 'heartwarming', '<unk>'),
('spkp374_235', 'just uh', '<unk>'),
('spkp374_553', 'so heartwarming', '<unk>'),
('spkp360_605', '“you’d', "you'd"),
('spkp360_605', 'instrument,”', 'instrument'),
('spkp364_221', 'uh a thousand', '<unk> <unk>'),
('spkp264_120', 'hadn’t', "hadn't"),
('spkp264_120', '90s', '<unk>'),
('spkp264_120', '‘why not?’', 'why not'),
('spkp228_57', 'uh envelop', 'envelope'),
('spkp228_57', 'crafting', '<unk>'),
('spkp317_191', 'the', 'this'),
('spkp232_423', 'backlash', '<unk>'),
('spkp264_134', 'overthinking', '<unk>'),
('spkp264_134', 'jewelry’s', "jewelry's"),
('spkp264_134', 'it’s', "it's"),
('spkp282_408', 'scrambling', '<unk>'),
('spkp339_156', 'could unhear', "couldn't hear"),
('spk4_18', 'colorful', 'colourful'),
('spk4_18', 'uhm', 'um'),
('spkp282_346', 'cause', 'because'),
('spkp364_341', 'panicking', '<unk>'),
('spkp374_433', 'sibling', 'siblings'),
('spkp237_348', 'right', 'write'),
('spkp304_5', 'uh', 'a'),
('spkp364_433', 'sibling', 'siblings'),
('spkp374_341', 'panicking', '<unk>'),
('spkp318_458', 'armrest just', '<unk>'),
('spkp304_126', 'spent', '<unk>'),
('spkp271_111', 'kinda', 'kind of'),
('spkp226_173', 'uh', 'err'),
('spkp232_225', 'patience', 'patients'),
('spkp314_132', 'tutorials', '<unk>'),
('spkp314_132', '‘how', 'how'),
('spkp314_132', 'be?’', 'be'),
('spkp232_543', 'heartwarming', '<unk>'),
('spkp232_543', 'workouts', '<unk>'),
('spkp318_464', 'break-ins', 'break ins'),
('spkp318_464', 'double-checking', 'double checking'),
('spkp364_369', 'cause', 'because'),
('spkp232_231', 'curveball', '<unk>'),
('spkp271_105', 'decide', 'decided'),
('spkp271_105', "sky's", 'sky is'),
('spkp304_132', 'tutorials', '<unk>'),
('spkp304_132', '‘how', 'how'),
('spkp304_132', 'be?’', 'be'),
('spkp225_517', "you'll make in", "you're making"),
('spkp225_517', 'impactful', '<unk>'),
('spkp304_481', "it's just", 'it'),
('spkp225_271', 'their', "they're"),
('spkp228_520', 'heartwarming', '<unk>'),
('spkp314_495', 'snorkeling', '<unk>'),
('spkp317_556', 'replaying', '<unk>'),
('spkp317_556', 'heartwarming', '<unk>'),
('spkp314_481', 'uh', 'ah'),
('spkp304_495', 'snorkeling', '<unk>'),
('spkp304_495', "we're", "you're"),
('spkp228_252', 'anew', 'and you'),
('spkp318_128', 'full-on', 'full on'),
('spkp318_128', 'kinda', 'kind of'),
('spkp264_518', 'like-minded', 'like minded'),
('spkp304_442', 'laundromat', '<unk>'),
('spkp314_330', 'uh', 'oh'),
('spkp317_595', '20/20', '<unk>'),
('spkp226_371', 'what-ifs', 'what ifs'),
('spkp304_324', 'timelines', '<unk>'),
('spkp271_313', 'onto', 'on to'),
('spkp271_313', 'i', "i'd"),
('spkp318_114', 'today’s', 'today is'),
('spkp314_442', 'laundromat', '<unk>'),
('spkp237_604', 'there’s', "there's"),
('spkp237_604', '“this', 'this'),
('spkp237_604', 'join,” i’m just', "join i'm"),
('spkp314_324', 'timelines', '<unk>'),
('spkp282_187', 'unsee unhear un-experience', '<unk> <unk> <unk>'),
('spkp226_403', 'checkout', '<unk>'),
('spkp318_116', 'wound', 'went'),
('spkp226_373', 'i have', 'just had'),
('spkp374_169', 'chalkboard', '<unk>'),
('spkp226_415', 'tv', '<unk>'),
('spkp304_440', 'uh', 'ah'),
('spkp271_463', "it's", 'its'),
('spkp364_169', 'chalkboard', '<unk>'),
('spkp237_606', 'there’s', "there's"),
('spkp237_606', '“come', 'come'),
('spkp237_606', 'it’ll', "it'll"),
('spkp237_606', 'you’ll', "you'll"),
('spkp237_606', 'it,”', 'it'),
('spkp237_606', 'i’m', "i'm"),
('spkp318_102', 'kinda', 'kind of'),
('spkp364_141', 'ew', 'you'),
('spkp360_565', 'theatre', 'theater'),
('spkp360_565', 'impactful', '<unk>'),
('spkp339_395', 'right', 'write'),
('spkp339_395', 'tv', '<unk>'),
('spkp339_395', 'glitch', '<unk>'),
('spkp374_141', 'ew', 'you'),
('spkp271_339', 'look', 'looked'),
('spkp339_381', 'just frozen', 'chosen'),
('spkp225_298', 'favorite', 'favourite'),
('spkp360_571', 'heartwarming', '<unk>'),
('spkp304_483', 'stop', 'stopped'),
('spkp304_483', 'cause', 'because'),
('spkp317_232', 'open-minded', 'open minded'),
('spkp339_418', 'what-ifs', 'what ifs'),
('spkp317_540', 'game-changer', 'game changer'),
('spkp228_536', "i'm", 'am'),
('spkp282_608', '“expand', 'expand'),
('spkp282_608', 'horizons”', 'horizons'),
('spkp271_488', 'telemarketer', '<unk>'),
('spkp271_488', 'cause i', 'because'),
('spkp225_529', 'had', 'have'),
('spkp364_196', 'times', 'time'),
('spkp339_342', 'grab', 'grabbed'),
('spkp339_342', 'armrests', '<unk>'),
('spkp232_541', 'veggie', '<unk>'),
('spkp314_130', 'clearer kinda', 'claire'),
('spkp314_130', 'okay', 'ok'),
('spkp314_130', "they're", 'are'),
('spkp226_171', 'too', 'to'),
('spkp304_124', 'it’s', "it's"),
('spkp304_124', 'non-stop', 'non stop'),
('spkp304_124', 'i’m', "i'm"),
('spkp304_124', '– kinda', 'kind of'),
('spkp304_130', 'clearer kinda', '<unk> kind of'),
('spkp304_130', 'okay', 'ok'),
('spkp318_300', 'uh', 'oh'),
('spkp314_124', 'it’s', "it's"),
('spkp314_124', 'non-stop', '<unk>'),
('spkp314_124', 'i’m', "i'm"),
('spkp314_124', '– kinda', 'kind of'),
('spkp282_387', 'cause', 'because'),
('spkp364_425', 'health', 'house'),
('spkp364_357', 'uh roller coaster', '<unk>'),
('spkp226_159', "must've", 'must have'),
('spkp232_569', 'recognized', 'recognised'),
('spkp339_168', 'uh', 'oh'),
('spkp339_168', 'uh', 'so i'),
('spkp318_35', 'this', 'the'),
('spkp282_422', 'as', 'is'),
('spkp232_596', 'recognize', 'recognise'),
('spkp232_596', 'gps', '<unk>'),
('spkp374_394', 'emails', '<unk>'),
('spkp364_394', 'emails', '<unk>'),
('spkp282_378', 'panicking', '<unk>'),
('spkp318_499', 'paddleboarding', '<unk>'),
('spkp318_499', 'adrenaline', '<unk>'),
('spkp232_19', "should've", 'should have'),
('spkp228_41', "you're", 'here'),
('spkp228_41', 'uh', 'oh'),
('spkp228_41', 'peace', 'piece'),
('spkp228_41', 'peace', 'piece'),
('spkp317_187', 'unsee unhear un-experience', '<unk> <unk> <unk>'),
('spkp264_122', 'there’s', "there's"),
('spkp282_595', '20/20', '<unk>'),
('spkp264_136', 'the', 'a'),
('spkp228_55', 'uh', 'oh'),
('spkp228_55', 'lostness', '<unk>'),
('spkp232_421', 'worst-case', 'worst case'),
('spkp237_216', 'uh', 'ah'),
('spkp237_216', 'unsee unknow', '<unk> oh no'),
('spkp364_579', 'a maze', 'domains'),
('spkp282_80', 'just awe-inspiring', 'awe inspiring'),
('spkp282_80', 'chaos and calm', 'cases come'),
('spkp374_545', 'impacting', '<unk>'),
('spkp237_558', 'oh', 'owe'),
('spkp360_607', '“you’re', "you're"),
('spkp360_607', 'don’t', "don't"),
('spkp360_607', 'once,”', 'once'),
('spkp360_607', 'i’m', "i'm"),
('spkp228_69', 'um', 'and'),
('spkp228_69', 'crafting', '<unk>'),
('spkp364_545', 'impacting', '<unk>'),
('spkp228_132', 'tutorials', '<unk>'),
('spkp228_132', '‘how', 'how'),
('spkp228_132', 'be?’', 'be'),
('spkp282_224', 'cause', 'because'),
('spkp225_105', "there's", "it's"),
('spkp225_105', 'sunrise', 'sun rise'),
('spkp225_105', "sky's", 'sky is'),
('spkp282_556', 'replaying', '<unk>'),
('spkp225_111', 'kinda', 'kind of'),
('spk1_16', 'uh kinda', 'a kind of'),
('spkp364_592', 'regroup', '<unk>'),
('spkp374_592', 'regroup', '<unk>'),
('spkp374_592', "a bundle of nerves and it's it's", "just <unk> i'm just"),
('spkp314_21', 'um', 'and'),
('spkp314_21', 'uh', 'ah'),
('spkp282_57', 'uh envelop', 'envelope'),
('spkp282_57', 'crafting', '<unk>'),
('spkp314_520', 'heartwarming', '<unk>'),
('spkp232_151', 'off-putting', 'off putting'),
('spkp228_495', 'snorkeling', '<unk>'),
('spkp304_534', 'uh', 'ah'),
('spkp304_534', 'reflective', 'reflected'),
('spkp226_561', 'uh the', 'a'),
('spkp271_517', 'impactful', '<unk>'),
('spkp304_520', 'heartwarming', '<unk>'),
('spkp226_575', 'exhilarating', 'ex so'),
('spkp314_252', 'anew', 'and you'),
('spkp304_508', "i'm just", 'i'),
('spkp364_61', 'awe-inspiring', 'awe inspiring'),
('spkp232_192', 'uh', 'oh'),
('spkp225_461', 'even', 'the'),
('spkp317_346', 'cause', 'because'),
('spkp360_388', 'gps', '<unk>'),
('spkp360_388', 'rerouting', '<unk>'),
('spkp360_388', 'stop-and-go', 'stop and go'),
('spkp264_485', 'cause', 'because'),
('spkp314_291', "it'll", 'it will'),
('spkp228_324', 'timelines', '<unk>'),
('spkp228_442', 'laundromat', '<unk>'),
('spkp225_448', 'glitch or', '<unk> all'),
('spkp225_448', "machine's", 'machines'),
('spkp339_545', 'impacting', '<unk>'),
('spkp317_353', "can't", 'just just'),
('spkp232_187', 'unsee unhear un-experience', '<unk> <unk> <unk>'),
('spkp317_71', 'uh', 'ah'),
('spkp317_71', "flame's", 'flames'),
('spkp317_71', 'mesmerizing', '<unk>'),
('spkp317_421', 'uh', 'ah'),
('spkp317_421', 'worst-case', 'worst case'),
('spkp304_19', "should've", 'should have'),
('spkp317_347', 'cause i', 'because'),
('spkp317_347', 'leaves me alone', 'just leo lom'),
('spkp339_592', 'regroup', '<unk>'),
('spkp364_60', "there's", 'it was'),
('spkp314_509', 'um and then', "i'm a nen"),
('spk2_85', 'just reveling', 'revelling'),
('spkp314_253', 'uh', 'ah'),
('spkp317_390', 'cause', 'because'),
('spkp304_521', 'um so', '<unk>'),
('spkp226_212', 'uh', 'err'),
('spkp225_14', 'kinda', 'kinder'),
('spkp264_335', 'not-funny', 'not funny'),
('spkp314_535', 'cause i', 'because'),
('spkp232_150', 'realizing', 'realising'),
('spkp304_253', 'uh', 'oh'),
('spkp304_535', 'cause i', 'because'),
('spkp282_56', "you're", 'you'),
('spkp317_179', 'i mean', '<unk>'),
('spkp282_219', 'can not', 'cannot'),
('spkp225_138', 'yuck', '<unk>'),
('spkp232_385', 'scared', 'dared'),
('spkp317_151', 'off-putting', 'off putting'),
('spkp228_97', 'heartwarming', '<unk>'),
('spkp228_127', 'there’s', "there's"),
('spkp282_231', 'curveball', 'curve ball'),
('spkp317_145', 'looked', 'looks'),
('spkp225_104', 'surreal', '<unk>'),
('spkp282_543', 'heartwarming', '<unk>'),
('spkp282_543', 'workouts', '<unk>'),
('spkp360_606', 'there’s', "there's"),
('spkp360_606', '“come', 'come'),
('spkp360_606', 'it’ll', "it'll"),
('spkp360_606', 'you’ll', "you'll"),
('spkp360_606', 'it,”', 'it'),
('spkp360_606', 'i’m', "i'm"),
('spkp228_68', 'just heartwarming', '<unk>'),
('spkp282_95', "you're", 'you are'),
('spkp282_95', 'just savoring', 'savouring'),
('spkp237_571', 'heartwarming', '<unk>'),
('spkp232_18', 'colorful', 'colourful'),
('spkp237_565', 'theatre', 'theater'),
('spkp237_565', 'so impactful', '<unk>'),
('spkp364_395', 'tv', '<unk>'),
('spkp364_395', 'glitch', '<unk>'),
('spkp339_141', 'ew', 'you'),
('spkp374_381', "can't", 'just look down and i i'),
('spkp237_388', 'gps', '<unk>'),
('spkp237_388', 'rerouting', '<unk>'),
('spkp237_388', 'stop-and-go', 'stop and go'),
('spkp374_395', 'tv', '<unk>'),
('spkp374_395', 'glitch', '<unk>'),
('spkp282_423', 'backlash', '<unk>'),
('spkp339_169', 'chalkboard', '<unk>'),
('spkp237_439', 'gonna', 'going to'),
('spkp304_6', 'uh', 'ah'),
('spkp364_342', 'armrests', '<unk>'),
('spkp304_119', 'vibe', '<unk>'),
('spkp339_196', 'times', 'time'),
('spkp374_430', 'scared of', 'dare to'),
('spkp237_405', 'um', "i'm"),
('spkp237_405', 'uh', 'ah'),
('spkp237_405', 'uh', 'ah'),
('spkp232_232', 'open-minded', 'open minded'),
('spkp232_232', 'uh realizing', 'er realising'),
('spkp374_418', 'what-ifs', 'what ifs'),
('spkp339_39', 'mesmerizing', '<unk> no <unk>'),
('spkp226_602', 'diy', '<unk>'),
('spkp314_125', 'it’s', "it's"),
('spkp314_125', 'world’s', "world's"),
('spkp232_554', 'and oh', 'i know'),
('spkp271_41', 'peace', 'piece'),
('spkp271_41', 'peace', 'piece'),
('spkp237_14', 'kinda', 'kind of'),
('spkp232_540', 'game-changer', 'game changer'),
('spkp232_540', 'weight', 'white'),
('spkp364_418', 'what-ifs', 'what ifs'),
('spkp282_392', '20', '<unk>'),
('spkp318_473', "it's daunting just", 'jaunting'),
('spkp304_125', 'pane', 'paine'),
('spkp304_125', 'it’s', "it's"),
('spkp304_125', 'world’s', "world's"),
('spkp271_112', 'it makes', "and it's"),
('spkp226_170', 'cause', 'because'),
('spkp339_357', 'roller coaster', '<unk>'),
('spkp225_528', 'café', 'cafe'),
('spkp225_528', 'cause i', 'because'),
('spkp228_251', 'old', 'all'),
('spkp317_541', 'veggie', '<unk>'),
('spkp228_523', 'scrolling', '<unk>'),
('spkp228_523', 'emails', '<unk>'),
('spkp317_555', 'uh', 'ah'),
('spkp304_469', "there's", 'there is'),
('spkp304_469', "can't help", "just it's not helped"),
('spkp360_216', 'unsee unknow', '<unk> <unk>'),
('spkp339_380', 'relax', 'feel relaxed'),
('spkp339_394', 'emails', '<unk>'),
('spkp360_564', 'theatre', 'theater'),
('spkp264_269', "can't", 'count'),
('spkp226_400', 'um to fantasize', '<unk>'),
('spkp304_333', 'onto', 'on to'),
('spkp318_103', 'kinda', 'kind of'),
('spkp237_607', '“you’re', "you're"),
('spkp237_607', 'don’t', "don't"),
('spkp237_607', 'once,”', 'once'),
('spkp237_607', 'i’m', "i'm"),
('spkp364_168', "couldn't", 'couldn t'),
('spkp271_310', 'uh', 'oh'),
('spkp318_117', 'texted', '<unk>'),
('spkp226_414', 'paralyzed', 'paralysed'),
('spkp264_533', 'cause i', 'because'),
('spkp360_560', 'theatre', 'theater'),
('spkp339_390', 'cause', 'because'),
('spkp314_479', 'uh', 'ah'),
('spkp237_159', "must've", 'must have'),
('spkp304_445', 'neighbor uh', 'neighbour'),
('spkp317_592', 'a', 'the'),
('spkp317_592', 'regroup', '<unk>'),
('spkp237_603', 'resonates', '<unk>'),
('spkp314_445', 'um', 'and'),
('spkp360_548', 'favorite', 'favourite'),
('spkp360_548', 'flavor', 'flavour'),
('spkp318_107', "um it's kinda", 'kind of'),
('spkp282_180', "they're explaining", 'there explain'),
('spkp264_523', 'scrolling', '<unk>'),
('spkp264_523', 'emails', '<unk>'),
('spkp226_389', 'service', 'serviced'),
('spkp339_353', "can't", "didn't chant didn't work"),
('spkp364_187', 'unsee unhear un-experience it', '<unk> <unk> or <unk> at'),
('spkp339_421', 'worst-case', 'worst case'),
('spkp374_187', 'unsee unhear un-experience', '<unk> <unk> <unk>'),
('spkp304_486', 'uh', 'ah'),
('spkp304_486', "i've", 'i'),
('spkp314_492', 'a bit mystified by the discovery', 'oh man'),
('spkp225_262', 'uh', 'oh'),
('spkp314_486', "i've", 'i'),
('spkp314_486', 'gasp', 'gasped'),
('spkp314_486', 'caught', 'coat'),
('spkp364_346', 'cause', 'because'),
('spkp237_429', 'googling', '<unk>'),
('spkp317_8', 'uh', 'ah'),
('spkp314_109', 'kinda', 'kind of'),
('spkp374_346', 'cause', 'because'),
('spkp237_415', 'tv', '<unk>'),
('spkp271_116', 'wound', 'went'),
('spkp304_121', 'it’s', "it's"),
('spkp304_121', 'i’m', "i'm"),
('spkp304_121', 'i’m 16', "i'm <unk>"),
('spkp304_121', 'i’d forgotten', "i'd forgot"),
('spkp237_373', "can't watch my heart just just racing too much", 'be like'),
('spkp314_135', 'i am overthinking', "i'm <unk>"),
('spkp237_367', "it's", 'it'),
('spkp364_408', 'um', 'and'),
('spkp314_121', 'it’s', "it's"),
('spkp314_121', 'i’m', "i'm"),
('spkp314_121', 'i’m 16', "i'm <unk>"),
('spkp314_121', 'i’d', "i'd"),
('spkp226_606', 'there’s', "there's"),
('spkp226_606', '“come', 'come'),
('spkp226_606', 'it’ll', "it'll"),
('spkp226_606', 'you’ll', "you'll"),
('spkp226_606', 'it,”', 'it'),
('spkp226_606', 'i’m', "i'm"),
('spkp304_135', 'overthinking', '<unk>'),
('spkp271_102', 'kinda', 'kind of'),
('spkp339_151', 'off-putting', 'off putting'),
('spkp282_369', 'cause', 'because'),
('spkp318_488', 'telemarketer', '<unk>'),
('spkp318_488', 'cause i', 'because'),
('spkp237_398', 'uh', "you're just"),
('spkp282_355', "it's", 'it'),
('spkp282_341', 'panicking', '<unk>'),
('spk3_16', 'uh kinda', 'kind of'),
('spkp339_179', 'try', 'tried'),
('spkp374_540', 'game-changer', 'game changer'),
('spkp364_232', 'open-minded', 'open minded'),
('spkp226_55', "same time and it's like", 'slight'),
('spkp226_55', 'lostness', '<unk>'),
('spkp374_232', 'open-minded', 'open minded'),
('spkp232_418', 'what-ifs', 'what ifs'),
('spkp364_540', 'game-changer', 'game changer'),
('spkp364_226', 'nope', 'no'),
('spkp360_602', 'diy', 'di'),
('spkp360_602', "i'm", 'and'),
('spkp264_127', 'there’s', "there's"),
('spkp226_69', 'um', "i'm"),
('spkp226_69', 'crafting', '<unk>'),
('spkp360_158', 'this', 'the'),
('spkp228_50', 'with', 'of'),
('spkp228_50', 'just perfect', 'purse puffed'),
('spkp232_342', 'armrests', '<unk>'),
('spkp282_52', 'freeing', 'free'),
('spkp317_169', 'chalkboard', '<unk>'),
('spkp282_209', 'uh', 'are'),
('spkp225_128', 'full-on', 'full on'),
('spkp225_128', 'kinda', 'kind of'),
('spkp232_395', 'uh', 'er'),
('spkp232_395', 'tv', '<unk>'),
('spkp232_395', 'glitch', '<unk>'),
('spkp314_18', 'uhm', 'um'),
('spkp225_114', 'today’s', "today's"),
('spkp225_114', 'made', 'make'),
('spkp232_168', 'like', 'light'),
('spkp237_8', 'uh', 'ah'),
('spkp360_400', 'fantasize', '<unk>'),
('spkp304_519', 'um', "i'm"),
('spkp304_519', 'right', 'write'),
('spkp304_519', 'just', 'so so'),
('spkp339_596', 'gps', '<unk>'),
('spkp226_202', 'uh', 'er'),
('spkp314_525', 'groundbreaking', '<unk>'),
('spkp228_484', 'get-together', 'get together'),
('spkp226_216', 'unsee unknow', '<unk> i know'),
('spkp317_394', 'emails', '<unk>'),
('spkp317_394', 'rechecking', 're checking'),
('spkp304_21', 'um', "i'm"),
('spk2_56', 'the sky is', 'this'),
('spkp339_555', 'uh', 'ah'),
('spkp225_464', 'break-ins', 'break ins'),
('spkp225_464', 'double-checking', 'double checking'),
('spkp228_335', 'not-funny', 'not funny'),
('spkp317_61', "it's", 'is'),
('spkp317_61', 'just awe-inspiring', 'awe inspiring'),
('spkp314_294', 'want to', 'wanna'),
('spkp317_357', "i'm", 'am'),
('spkp317_342', 'armrests', '<unk>'),
('spkp225_303', 'into', 'to'),
('spkp228_452', 'googling', '<unk>'),
('spkp228_452', "i can't calm", 'count'),
('spkp339_232', 'open-minded', 'open minded'),
('spkp339_540', 'game-changer', 'game changer'),
('spkp225_459', 'just becomes', 'comes'),
('spkp317_418', 'what-ifs', 'what ifs'),
('spk2_57', 'envelop', 'envelope'),
('spk2_57', 'crafting', '<unk>'),
('spkp271_275', 'uh', 'well'),
('spkp228_485', 'cause', 'because'),
('spkp317_395', 'right', 'write'),
('spkp317_395', 'tv', '<unk>'),
('spkp317_395', "there's", "it's"),
('spkp317_395', 'glitch', '<unk>'),
('spkp226_571', 'heartwarming seeing', '<unk> seen'),
('spkp226_571', 'realize', 'realise'),
('spkp304_524', 'cause i', 'because'),
('spkp271_513', "you've um you've fantasized", 'you <unk>'),
('spkp271_513', "happening and it's just just so surreal", 'all'),
('spkp226_565', 'impactful', '<unk>'),
('spkp264_59', 'um', 'and'),
('spkp264_442', 'uh', 'ah'),
('spkp264_442', 'laundromat', '<unk>'),
('spkp228_491', 'cause i', 'because'),
('spkp264_324', 'timelines', '<unk>'),
('spkp314_524', "i'm", 'am'),
('spkp314_524', 'cause i', 'because'),
('spkp271_261', 'do', 'did'),
('spkp226_203', 'apologizing', 'apologising'),
('spkp304_518', 'like-minded', 'like minded'),
('spkp364_71', "flame's", 'flames'),
('spkp314_518', 'like-minded', 'like minded'),
('spkp232_169', 'chalkboard', '<unk>'),
('spkp360_415', 'tv just', '<unk>'),
('spkp314_19', "should've", 'should have'),
('spkp228_122', 'there’s', "there's"),
('spkp225_115', 'earbuds', '<unk>'),
('spkp232_394', 'emails', '<unk>'),
('spkp282_546', "it's", 'is'),
('spkp364_596', 'gps', '<unk>'),
('spkp364_596', 'just puzzled', 'tousled'),
('spkp360_159', "must've", 'must have'),
('spkp264_132', 'tutorials', '<unk>'),
('spkp264_132', '‘how', 'how'),
('spkp264_132', 'be?’', 'be'),
('spkp374_569', 'it was just', 'is'),
('spkp232_357', 'roller coaster', '<unk>'),
('spkp364_541', 'this', 'the'),
('spkp364_541', 'veggie', '<unk>'),
('spkp374_233', 'trying to', 'try and'),
('spkp360_603', 'resonates', '<unk>'),
('spkp374_541', 'veggie', '<unk>'),
('spkp232_21', 'uh', 'ah'),
('spkp232_592', 'regroup', '<unk>'),
('spkp271_87', 'this', 'the'),
('spk4_22', 'how', 'her'),
('spkp237_399', 'um', "i'm"),
('spkp364_390', 'cause', 'because'),
('spkp318_19', "should've", 'should have'),
('spkp374_390', 'cause', 'because'),
('spkp314_120', 'hadn’t', "hadn't"),
('spkp314_120', '90s', '<unk>'),
('spkp314_120', '‘why not?’', 'why not'),
('spkp237_400', 'fantasize', '<unk>'),
('spkp271_103', 'kinda', 'kind of'),
('spkp304_134', 'overthinking', '<unk>'),
('spkp304_134', 'jewelry’s', "jewellery's"),
('spkp304_134', 'it’s', "it's"),
('spkp304_120', 'hadn’t', "hadn't"),
('spkp304_120', '90s', '<unk>'),
('spkp304_120', '‘why not?’', 'why not'),
('spkp271_117', 'texted', '<unk>'),
('spkp226_175', 'cause', 'because'),
('spkp232_545', 'impacting', '<unk>'),
('spkp314_134', 'overthinking', '<unk>'),
('spkp314_134', 'jewelry’s', "jewelry's"),
('spkp314_134', 'it’s', "it's"),
('spkp237_372', 'uh', 'ah'),
('spkp304_3', 'uh', 'ah'),
('spkp304_3', 'like', 'that'),
('spkp374_347', 'cause', 'because'),
('spkp318_338', 'this', 'the'),
('spkp339_187', 'unsee unhear un-experience', '<unk> <unk> <unk>'),
('spkp374_421', 'worst-case', 'worst case'),
('spkp364_347', 'cause', 'because'),
('spkp314_108', 'cause uh', 'because'),
('spkp314_108', 'kinda', 'kind of'),
('spkp364_421', 'worst-case', 'worst case'),
('spkp264_287', 'just feel', 'still'),
('spkp314_487', 'cause i', 'because'),
('spkp225_511', 'videos', '<unk>'),
('spkp282_156', 'unhear', '<unk>'),
('spkp339_434', 'about', 'that'),
('spkp226_388', 'gps', '<unk>'),
('spkp226_388', 'rerouting', '<unk>'),
('spkp226_388', 'stop-and-go', 'stop and go'),
('spkp318_106', 'kinda', 'kind of'),
('spkp314_444', 'squishy', '<unk>'),
('spkp264_244', 'out for', 'up from'),
('spkp264_244', "they're not there anymore and", 'then'),
('spkp237_602', 'diy', '<unk>'),
('spkp237_164', 'uh there', 'i'),
('spkp304_450', 'uh', 'a'),
('spkp226_405', 'um so', '<unk>'),
('spkp226_405',
'drilling into your head making it impossible to uh to find any peace',
'just'),
('spkp374_179', 'try', 'tried'),
('spkp374_179', 'uh', 'a'),
('spkp304_444', 'squishy', '<unk>'),
('spkp314_336', "i'm", 'am'),
('spkp271_315', 'uh', 'oh'),
('spkp364_151', 'off-putting', 'off putting'),
('spkp304_478', 'braving', 'breathing'),
('spkp374_151', 'off-putting', 'off putting'),
('spkp225_288', 'mid-laugh', 'mid laugh'),
('spkp271_329', "they're just", "there's"),
('spkp364_145', 'looked', 'looks'),
('spkp318_138', 'yuck', '<unk>'),
('spkp364_153', 'they', 'i'),
('spkp339_393', 'uh', 'ah'),
('spkp304_452', 'googling', '<unk>'),
('spkp264_520', 'this', 'the'),
('spkp264_520', 'heartwarming', '<unk>'),
('spkp237_166', 'scrunches', '<unk>'),
('spkp237_600', 'salsa', '<unk>'),
('spkp318_104', 'surreal', '<unk>'),
('spkp226_361', 'panicking', '<unk>'),
('spkp304_320', 'uh', 'ah'),
('spkp374_609', 'forms', 'forums'),
('spkp264_252', 'anew', 'and you'),
('spkp314_452', 'googling', '<unk>'),
('spkp304_446', 'um', "you're just"),
('spkp339_422', 'as', 'is'),
('spkp364_190', 'the', 'a'),
('spkp304_491', 'cause i', 'because'),
('spkp225_507', 'travelers', 'travellers'),
('spkp339_378', 'panicking', '<unk>'),
('spkp314_485', 'cause', 'because'),
('spkp228_524', 'cause i', 'cuz'),
('spkp225_275', 'uh', 'ah'),
('spkp304_485', 'cause', 'because'),
('spkp228_242', 'uh', 'oh'),
('spkp374_423', 'backlash', '<unk>'),
('spkp237_358', 'tossing', '<unk>'),
('spkp304_1', 'um', "i'm"),
('spkp364_423', 'backlash', '<unk>'),
('spkp318_448', 'glitch', '<unk>'),
('spkp339_16', 'uh kinda', 'kind of'),
('spkp237_364', 'second-guess', 'second guess'),
('spkp314_122', 'there’s', "there's"),
('spkp226_605', '“you’d', "you'd"),
('spkp226_605', 'instrument,”', 'instrument'),
('spkp282_395', 'uh', 'are'),
('spkp282_395', 'right', 'write'),
('spkp282_395', 'tv', '<unk>'),
('spkp282_395', 'glitch', '<unk>'),
('spkp237_370', 'but', 'it'),
('spkp232_547', 'favorite', 'favourite'),
('spkp232_547', 'ambiance', '<unk>'),
('spkp364_379', "there's this", 'there is the'),
('spkp237_416', 'right', 'write'),
('spkp271_115', 'earbuds', '<unk>'),
('spkp304_122', 'there’s', "there's"),
('spkp282_418', 'what-ifs', 'what ifs'),
('spkp364_392', '20', '<unk>'),
('spkp374_392', '20', '<unk>'),
('spkp226_188', 'open-minded', 'open minded'),
('spkp282_342', 'armrests', '<unk>'),
('spkp339_608', '“expand', 'expand'),
('spkp339_608', 'horizons”', 'horizons'),
('spkp364_225', 'patience', 'patients'),
('spkp374_231', 'curveball', 'curve ball'),
('spkp318_528', 'café', 'cafe'),
('spkp318_528', 'cause i', 'because'),
('spkp364_543', 'heartwarming', '<unk>'),
('spkp364_543', 'workouts', '<unk>'),
('spkp374_225', 'patience', 'patients'),
('spkp374_543', 'heartwarming', '<unk>'),
('spkp374_543', 'workouts', '<unk>'),
('spkp364_231', 'curveball', 'curve ball'),
('spkp282_593', 'daunting', 'so'),
('spkp232_341', 'panicking', '<unk>'),
('spkp232_427', 'i i', "i'd"),
('spkp228_53', 'uh', 'oh'),
('spkp374_219', 'can not', 'cannot'),
('spkp237_562', 'cinematography', '<unk>'),
('spkp264_124', 'it’s', "it's"),
('spkp264_124', 'non-stop', 'non stop'),
('spkp264_124', 'i’m', "i'm"),
('spkp264_124', '– kinda', 'kind of'),
('spkp374_594', "i'm just", 'not'),
('spkp364_580', "it's", 'is'),
('spkp228_108', 'danced', 'dance'),
('spk6_19', "should've", 'should have'),
('spkp225_117', 'texted', '<unk>'),
('spkp228_120', 'hadn’t', "hadn't"),
('spkp228_120', '90s', '<unk>'),
('spkp228_120', '‘why not?’', 'why not'),
('spkp317_156', 'unhear', '<unk>'),
('spkp228_134', 'overthinking', '<unk>'),
('spkp228_134', 'jewelry’s', "jewelry's"),
('spkp228_134', 'it’s', "it's"),
('spkp225_103', 'realize', 'realise'),
('spkp314_268', 'and i', 'an eye'),
('spkp339_595', '20/20', '<unk>'),
('spkp317_89', 'self-care', 'self care'),
('spkp360_417', 'this', 'the'),
('spkp339_581', "there's", 'there is'),
('spkp360_371', 'what-ifs', 'what ifs'),
('spkp271_511', 'um', "i'm"),
('spkp271_511', 'videos', '<unk>'),
('spkp304_526', 'cause i', 'because'),
('spkp228_487', 'cause', 'because'),
('spkp304_240', 'uh', 'ah'),
('spkp271_505', 'um', "i'm"),
('spkp271_505', 'bioluminescent', '<unk>'),
('spkp339_556', 'replaying', '<unk>'),
('spkp339_556', 'heartwarming', '<unk>'),
('spkp228_478', 'staying in', 'stay again'),
('spkp271_288', 'mid-laugh', 'mid laugh'),
('spkp339_224', 'cause', 'because'),
('spk2_55', "you're", 'you all'),
('spk2_55', 'uh', 'are'),
('spk2_55', 'lostness', '<unk>'),
('spkp364_98', 'replay', '<unk>'),
('spkp364_98', 'resonates', '<unk>'),
('spkp364_98', 'soundtrack', '<unk>'),
('spkp228_444', 'squishy', '<unk>'),
('spkp232_180', 'uh', 'ah'),
('spkp317_62', "you're", 'you are'),
('spk2_69', 'crafting', '<unk>'),
('spkp228_336', 'lift', 'live'),
('spkp339_219', 'i can', 'and'),
('spkp304_296', 'sadder', 'sad'),
('spkp317_63', 'uh', 'ah'),
('spkp225_466', "just can't", "don't"),
('spkp264_496', "um i'm just", 'uh'),
('spkp264_496', 'trek', 'trick'),
('spkp264_496', 'just thinking', '<unk>'),
('spkp317_433', 'sibling', 'siblings'),
('spkp364_99', 'colors', 'colours'),
('spkp317_77', 'past', 'pass'),
('spkp339_225', 'patience', 'patients'),
('spkp339_543', 'heartwarming', '<unk>'),
('spkp339_543', 'workouts', '<unk>'),
('spkp339_231', 'curveball', 'curve ball'),
('spkp232_156', 'unhear', '<unk>'),
('spkp226_200', 'uh', 'ah'),
('spkp271_504', "i'm just", 'i'),
('spkp317_382', 'jolts', '<unk>'),
('spkp317_382', 'but', 'i'),
('spkp360_358', 'tossing', '<unk>'),
('spkp228_486', "i've", 'i'),
('spkp314_533', 'cause i', 'because'),
('spkp304_269', 'realization', 'realisation'),
('spkp225_499', 'paddleboarding', '<unk>'),
('spkp225_499', 'adrenaline', '<unk>'),
('spkp360_364', 'second-guess', 'second guess'),
('spkp226_228', 'uh', 'a'),
('spkp228_135', 'i am overthinking', "i'm <unk>"),
('spkp282_545', 'impacting', '<unk>'),
('spkp225_102', 'kinda', 'kind of'),
('spkp225_116', 'wound', 'went'),
('spk6_18', 'colorful', 'colourful'),
('spk6_18', 'uhm', 'erm'),
('spkp228_121', 'it’s', "it's"),
('spkp228_121', 'i’m', "i'm"),
('spkp228_121', 'i’m 16', "i'm <unk>"),
('spkp228_121', 'i’d', "i'd"),
('spkp228_109', 'kinda', 'kind of'),
('spkp364_595', '20/20', '<unk>'),
('spkp374_595', '20/20', '<unk>'),
('spkp314_26', "it's just mind-blowing", 'mind blowing'),
('spkp364_581', "there's", 'there is'),
('spkp364_218', 'this', 'a'),
('spkp282_592', 'regroup', '<unk>'),
('spkp318_515', "you're", 'you were'),
('spkp317_194', 'uh', "i'm"),
('spkp374_224', 'cause', 'cuz'),
('spkp226_57', 'um', "i'm"),
('spkp226_57', 'envelop', 'envelope'),
('spkp226_57', 'crafting', '<unk>'),
('spkp226_57', 'act of creating of bringing joy through food', 'acts'),
('spkp364_556', 'replaying', '<unk>'),
('spkp364_556', 'so heartwarming', '<unk>'),
('spkp282_87', 'this', 'the'),
('spkp264_119', 'vibe', '<unk>'),
('spkp374_556', 'um', 'then'),
('spkp374_556', 'replaying', '<unk>'),
('spkp374_556', "brings a smile to your face it's like you're", '<unk>'),
('spkp374_556',
"so thankful for those moments for that perfect date and it's it's just so so heartwarming",
'<unk>'),
('spkp364_224', 'cause', 'because'),
('spkp360_166', 'scrunches', '<unk>'),
('spkp318_529', "i'm", 'am'),
('spkp318_529', 'cause i', 'because'),
('spkp282_357', 'uh roller coaster', '<unk>'),
('spkp282_357', "i'm", 'am'),
('spkp282_431', 'just imagining', 'imagine'),
('spkp318_32', 'um', 'and'),
('spkp339_153', 'tune out', 'two now'),
('spkp364_387', 'cause', 'because'),
('spkp364_393', 'uh', 'us'),
('spk4_21', 'um', "i'm"),
('spkp374_387', 'cause', 'because'),
('spkp237_371', 'what-ifs', 'what ifs'),
('spkp282_394', 'emails', '<unk>'),
('spkp282_394', 'rechecking', "we're checking"),
('spkp271_114', 'today’s', "today's"),
('spkp364_378', 'panicking', '<unk>'),
('spkp237_403', 'checkout', '<unk>'),
('spkp237_403', 'dragging', 'jogging'),
('spkp232_234', 'uh', '<unk>'),
('spkp318_307', 'this', 'the'),
('spkp226_604', 'there’s', "there's"),
('spkp226_604', '“this', 'this'),
('spkp226_604', 'join,” i’m just', "join i'm"),
('spkp374_378', 'panicking', '<unk>'),
('spkp237_365', 'obsessively', '<unk>'),
('spkp364_422',
"take a break because it's it's too much just just too real",
'ugh'),
('spkp271_128', 'full-on', 'full on'),
('spkp232_208', 'uh', 'ah'),
('spkp364_436', 'in the', 'a'),
('spkp228_525', 'groundbreaking', '<unk>'),
('spkp317_553', 'heartwarming', '<unk>'),
('spkp314_490', 'cause i', 'because like'),
('spkp282_155', 'the uh', 'that'),
('spkp304_484', 'get-together', 'get together'),
('spkp304_490', 'cause', 'because'),
('spkp317_221', 'uh a thousand', '<unk> <unk>'),
('spkp314_484', 'get-together', 'get together'),
('spkp314_484', 'just shocked', 'shock'),
('spkp282_169', 'chalkboard', '<unk>'),
('spkp339_423', 'backlash', '<unk>'),
('spkp339_423', 'the', 'a'),
('spkp374_608', '“expand', 'expand'),
('spkp374_608', 'horizons”', 'horizons'),
('spkp318_111', 'kinda', 'kind of'),
('spkp304_447', 'update', '<unk>'),
('spkp264_535',
'surprised and excited cause i i never meet anyone',
'surprise sighted one'),
('spkp314_335', 'not-funny', 'not funny'),
('spkp228_294', 'letting go', 'lingo'),
('spkp364_608', '“expand', 'expand'),
('spkp364_608', 'horizons”', 'horizons'),
('spkp271_464', 'break-ins', 'break ins'),
('spkp271_464', 'double-checking', 'double checking'),
('spkp226_406',
"staring at the spinning wheel waiting hoping and it's",
'just just'),
('spkp318_105', 'decide', 'decided'),
('spkp304_335', 'not-funny', 'not funny'),
('spkp339_392', '20', '<unk>'),
('spkp360_562', 'cinematography', '<unk>'),
('spkp271_458', 'armrest', '<unk>'),
('spkp271_458', 'praying', 'trying'),
('spkp374_146', 'ugh', 'ah'),
('spkp271_403', 'checkout', '<unk>'),
('spkp318_604', 'there’s', "there's"),
('spkp318_604', '“this', 'this'),
('spkp318_604', 'join,” i’m just', "join i'm"),
('spkp271_365', 'obsessively', '<unk>'),
('spkp364_109', 'kinda', 'kind of'),
('spkp228_595', '20/20', '<unk>'),
('spkp304_346', 'brace', 'braced'),
('spkp271_371', 'what-ifs', 'what ifs'),
('spkp237_114', 'today’s', "today's"),
('spkp374_121', 'it’s', "it's"),
('spkp374_121', 'i’m', "i'm"),
('spkp374_121', 'i’m 16', "i'm <unk>"),
('spkp374_121', 'i’d', "i'd"),
('spkp364_135', 'i am overthinking', "i'm <unk>"),
('spkp360_511', 'um so', '<unk>'),
('spkp360_511', 'videos', '<unk>'),
('spkp264_208', 'this', 'a'),
('spkp364_121', 'it’s', "it's"),
('spkp364_121', 'i’m', "i'm"),
('spkp364_121', 'i’m 16', "i'm <unk>"),
('spkp364_121', 'i’d', "i'd"),
('spkp360_505', 'bioluminescent', '<unk>'),
('spkp237_128', 'full-on', 'full on'),
('spkp237_128', 'but', 'that'),
('spkp339_493', 'um', "i'm"),
('spkp374_135', 'overthinking', '<unk>'),
('spkp360_288', 'mid-laugh', 'mid laugh'),
('spkp228_556', 'replaying', '<unk>'),
('spkp228_556', 'so heartwarming', '<unk>'),
('spkp314_391', 'halt', 'hold'),
('spkp282_132', 'tutorials', '<unk>'),
('spkp282_132', '‘how', 'how'),
('spkp282_132', 'be?’', 'be'),
('spkp339_444', 'squishy', '<unk>'),
('spkp237_464', 'break-ins', 'break ins'),
('spkp237_464', 'double-checking', 'double checking'),
...]