Check X-TTS output against original sentences
X-TTS is not exactly great at fidelity
• 21 min read
from pathlib import Path
%cd /Users/joregan/Playing/cvpr-control
/Users/joregan/Playing/cvpr-control
sentences = []
with open("input_sentences.txt") as f:
for line in f.readlines():
sentences.append(line.strip())
items = {}
for tsvfile in Path("tsv").glob("**/*.tsv"):
ident = tsvfile.stem
if ".interloctr." in str(tsvfile):
continue
parts = []
with open(tsvfile) as inf:
for line in inf.readlines():
parts.append(line.strip().split("\t"))
items[ident] = {}
items[ident]["rec"] = " ".join([x[-1] for x in parts])
items[ident]["original"] = sentences[int(ident.split("_")[-1])]
from string import punctuation
PUNCT = set(punctuation)
def clean_sentence(text):
words = []
text = text.replace("—", " ")
for word in text.split(" "):
if word.startswith("[") and word.endswith("]"):
continue
while word[0:1] in PUNCT:
word = word[1:]
while word[-1:] in PUNCT:
word = word[:-1]
words.append(word.lower())
return " ".join(words)
def prune_fillers(text):
FILLERS = ["uh", "um"]
words = [x for x in text.split(" ") if x not in FILLERS]
return " ".join(words)
rest = {}
for item in items:
cleaned = clean_sentence(items[item]["original"])
if items[item]["original"] == items[item]["rec"]:
items[item]["match"] = "exact"
elif cleaned == items[item]["rec"]:
items[item]["match"] = "clean"
elif prune_fillers(cleaned) == items[item]["rec"]:
items[item]["match"] = "clean,fillers"
elif prune_fillers(cleaned) == prune_fillers(items[item]["rec"]):
items[item]["match"] = "clean,fillers_both"
else:
rest[item] = items[item]
%pip install jiwer
from jiwer import wer
for item in items:
if not "match" in items[item]:
s_wer = wer(clean_sentence(items[item]["original"]), items[item]["rec"])
items[item]["wer"] = s_wer
from difflib import SequenceMatcher
insertions = {}
deletions = {}
replacements = {}
trace_replace = []
for item in items:
if not "match" in items[item]:
sent_id = item.split("_")[-1]
a = clean_sentence(items[item]["original"]).split(" ")
b = items[item]["rec"].split(" ")
s = SequenceMatcher(None, a, b)
for tag, i1, i2, j1, j2 in s.get_opcodes():
if tag == "equal":
continue
if tag == "replace":
pair = (sent_id, " ".join(a[i1:i2]), " ".join(b[j1:j2]))
trace_replace.append((item, " ".join(a[i1:i2]), " ".join(b[j1:j2])))
if not pair in replacements:
replacements[pair] = 1
else:
replacements[pair] += 1
if tag == "delete":
pair = (sent_id, " ".join(a[i1:i2]))
if not pair in deletions:
deletions[pair] = 1
else:
deletions[pair] += 1
if tag == "insert":
pair = (sent_id, " ".join(b[j1:j2]))
if not pair in insertions:
insertions[pair] = 1
else:
insertions[pair] += 1
trace_replace
[('spkp264_243', 'really', 'real'), ('spkp237_605', '“you’d', "you'd"), ('spkp237_605', 'instrument,”', 'instrument'), ('spkp271_306', 'i i', 'it'), ('spkp271_306', 'feel', 'feels'), ('spkp282_186', 'gotta', 'got to'), ('spkp282_192', 'uh', 'ah'), ('spkp318_115', 'earbuds', '<unk>'), ('spkp364_156', 'unhear', '<unk>'), ('spkp374_156', 'unhear', '<unk>'), ('spkp318_129', 'gotta', 'got to'), ('spkp317_543', 'heartwarming', '<unk>'), ('spkp317_543', 'workouts', '<unk>'), ('spkp228_535', 'cause i', 'because'), ('spkp317_225', 'patience', 'patients'), ('spkp304_494', 'uh', 'ah'), ('spkp282_145', 'uh', 'err'), ('spkp282_151', 'off-putting', 'off putting'), ('spkp317_231', 'curveball', '<unk>'), ('spkp339_369', 'cause', 'because'), ('spkp339_341', 'panicking', '<unk>'), ('spkp317_219', 'can not', 'cannot'), ('spkp237_188', 'uh', 'ah'), ('spkp237_188', 'open-minded', 'open minded'), ('spkp339_433', 'sibling', 'siblings'), ('spkp339_355', "it's", 'it'), ('spkp237_361', 'panicking', '<unk>'), ('spkp232_556', 'replaying', '<unk>'), ('spkp232_556', 'heartwarming', '<unk>'), ('spkp271_57', 'crafting', '<unk>'), ('spkp314_127', 'there’s', "there's"), ('spkp318_465', 'right', 'write'), ('spkp226_166', 'scrunches', '<unk>'), ('spkp237_407', 'tv', '<unk>'), ('spkp304_127', 'there’s', "there's"), ('spkp282_390', 'cause', 'because'), ('spkp237_16', 'uh kinda', 'kind of'), ('spkp364_432', 'shake', 'shape'), ('spkp304_4', 'realize', 'realise'), ('spkp271_138', 'yuck', "<unk> i'm sorry"), ('spkp232_218', 'uh', '<unk>'), ('spkp232_218', 'uh', '<unk>'), ('spkp282_347', 'cause', 'because'), ('spk4_19', "should've", 'should have'), ('spkp232_595', '20/20', '<unk>'), ('spkp282_421', 'worst-case', 'worst case'), ('spkp282_353', 'health', 'hell'), ('spkp364_397', 'and', 'on'), ('spkp374_397', 'and', 'in'), ('spkp374_208', 'this', 'the'), ('spkp318_511', 'videos', '<unk>'), ('spkp282_596', 'gps', '<unk>'), ('spkp264_135', 'i am overthinking', "i'm <unk>"), ('spkp264_121', 'it’s', "it's"), ('spkp264_121', 'i’m', "i'm"), ('spkp264_121', 'i’m 16', "i'm <unk>"), ('spkp264_121', 'i’d', "i'd"), ('spkp318_505', 'bioluminescent', '<unk>'), ('spkp228_42', 'adrenaline', '<unk>'), ('spkp237_201', 'gotta', 'got to'), ('spkp282_97', 'just heartwarming', '<unk>'), ('spkp264_109', 'kinda', 'kind of'), ('spkp360_604', 'there’s', "there's"), ('spkp360_604', '“this', 'this'), ('spkp360_604', 'join,” i’m', "join i'm"), ('spkp232_378', 'panicking', '<unk>'), ('spkp226_6', 'uh', 'er'), ('spkp282_83', 'just heartwarming', '<unk>'), ('spkp232_26', "it's just mind-blowing", 'mind blowing'), ('spkp228_125', 'it’s', "it's"), ('spkp228_125', 'world’s', "world's"), ('spk1_15', 'uh', 'err'), ('spk1_15', 'uh', 'err'), ('spkp317_153', 'they', 'i'), ('spkp232_387', 'cause', 'because'), ('spkp282_541', 'veggie', '<unk>'), ('spkp225_106', 'kinda', 'kind of'), ('spkp374_98', 'replay', '<unk>'), ('spkp374_98', 'resonates', '<unk>'), ('spkp374_98', 'soundtrack', 'sound'), ('spkp228_119', 'vibe', '<unk>'), ('spkp318_288', 'mid-laugh', 'mid laugh'), ('spkp232_146', 'ugh', 'uh'), ('spkp225_16', 'uh kinda', 'kind of'), ('spkp226_210', 'realize', 'realise'), ('spkp226_210', "i'm", 'i am'), ('spkp226_210', 'uh', 'er'), ('spkp226_210', 'realization moving on', 'realisation'), ('spkp304_523', 'scrolling', '<unk>'), ('spkp304_523', 'emails', '<unk>'), ('spkp304_523', 'skeptical', 'sceptical'), ('spkp317_392', '20', '<unk>'), ('spkp304_537', 'energized', 'energised'), ('spkp226_562', 'cinematography', '<unk>'), ('spkp314_523', 'scrolling', '<unk>'), ('spkp314_523', 'emails', '<unk>'), ('spkp314_523', 'this', 'the'), ('spkp360_406', "at the spinning wheel waiting hoping and it's", "you're"), ('spkp360_406', 'just so incredibly', 'staring credibly'), ('spkp271_528', 'café', 'cafe'), ('spkp232_608', '“expand', 'expand'), ('spkp232_608', 'horizons”', 'horizons'), ('spk2_93', 'in', 'and'), ('spkp228_441', 'realize', 'realise'), ('spkp317_437', 'uh', 'ah'), ('spkp264_486', "i've", 'i'), ('spkp228_333', 'onto', 'on to'), ('spkp228_333', 'despair', 'to spare'), ('spkp228_455', 'and', 'in'), ('spkp317_423', 'backlash', '<unk>'), ('spkp317_423', 'the', 'a'), ('spkp339_553', 'heartwarming', '<unk>'), ('spkp339_547', 'ambiance', '<unk>'), ('spkp304_26', "it's just mind-blowing", 'mind blowing'), ('spkp317_378', 'panicking', '<unk>'), ('spkp225_339', 'look', 'looked'), ('spkp225_463', "i'm at", 'i met'), ('spkp225_463', "it's", 'its'), ('spkp232_190', 'uh', 'er'), ('spkp304_287', 'uh', 'ah'), ('spkp225_477', 'uh', 'ah'), ('spkp225_477', 'for getting', 'forgetting'), ('spkp264_493', 'um', 'and'), ('spkp264_478', 'staying', 'stay'), ('spk2_92', 'soundtrack', '<unk>'), ('spkp360_413', 'backlash', '<unk>'), ('spkp360_413', 'second-guess', 'second guess'), ('spkp360_413', 'negativity', '<unk>'), ('spkp360_407', 'tv', '<unk>'), ('spk2_86', 'uh', 'ah'), ('spk2_86', 'self-connection of introspection', 'self connection'), ('spkp225_488', 'telemarketer', '<unk>'), ('spkp225_488', 'cause i', 'because'), ('spkp271_529', "i'm", 'am'), ('spkp360_361', 'panicking', '<unk>'), ('spkp317_387', 'cause', 'cuz'), ('spkp314_522', 'paths', 'path'), ('spkp226_211', 'judgment', 'judgement'), ('spkp226_211', 'judgment', 'judgement'), ('spkp314_536', "i'm", 'am'), ('spkp282_41', 'colors', 'colours'), ('spkp282_41', 'peace', 'piece'), ('spkp282_41', 'peace', 'piece'), ('spkp317_608', '“expand', 'expand'), ('spkp317_608', 'horizons”', 'horizons'), ('spkp232_392', '20', '<unk>'), ('spkp225_107', "um it's kinda", 'kind of'), ('spkp282_540', 'game-changer', 'game changer'), ('spkp282_226', 'notices', 'noticed'), ('spkp228_80', 'awe-inspiring', 'awe inspiring'), ('spkp228_80', 'chaos and', "case isn't"), ('spkp360_188', 'open-minded', 'open minded'), ('spkp228_130', 'okay', 'ok'), ('spkp282_69', 'crafting', '<unk>'), ('spk1_14', 'kinda', 'kind of'), ('spkp228_124', 'it’s', "it's"), ('spkp228_124', 'non-stop', '<unk>'), ('spkp228_124', 'i’m', "i'm"), ('spkp228_124', '– kinda', 'kind of'), ('spkp282_232', 'open-minded', 'open minded'), ('spkp282_554', 'favorite', 'favourite'), ('spkp374_547', 'ambiance', '<unk>'), ('spkp364_553', 'heartwarming', '<unk>'), ('spkp374_235', 'just uh', '<unk>'), ('spkp374_553', 'so heartwarming', '<unk>'), ('spkp360_605', '“you’d', "you'd"), ('spkp360_605', 'instrument,”', 'instrument'), ('spkp364_221', 'uh a thousand', '<unk> <unk>'), ('spkp264_120', 'hadn’t', "hadn't"), ('spkp264_120', '90s', '<unk>'), ('spkp264_120', '‘why not?’', 'why not'), ('spkp228_57', 'uh envelop', 'envelope'), ('spkp228_57', 'crafting', '<unk>'), ('spkp317_191', 'the', 'this'), ('spkp232_423', 'backlash', '<unk>'), ('spkp264_134', 'overthinking', '<unk>'), ('spkp264_134', 'jewelry’s', "jewelry's"), ('spkp264_134', 'it’s', "it's"), ('spkp282_408', 'scrambling', '<unk>'), ('spkp339_156', 'could unhear', "couldn't hear"), ('spk4_18', 'colorful', 'colourful'), ('spk4_18', 'uhm', 'um'), ('spkp282_346', 'cause', 'because'), ('spkp364_341', 'panicking', '<unk>'), ('spkp374_433', 'sibling', 'siblings'), ('spkp237_348', 'right', 'write'), ('spkp304_5', 'uh', 'a'), ('spkp364_433', 'sibling', 'siblings'), ('spkp374_341', 'panicking', '<unk>'), ('spkp318_458', 'armrest just', '<unk>'), ('spkp304_126', 'spent', '<unk>'), ('spkp271_111', 'kinda', 'kind of'), ('spkp226_173', 'uh', 'err'), ('spkp232_225', 'patience', 'patients'), ('spkp314_132', 'tutorials', '<unk>'), ('spkp314_132', '‘how', 'how'), ('spkp314_132', 'be?’', 'be'), ('spkp232_543', 'heartwarming', '<unk>'), ('spkp232_543', 'workouts', '<unk>'), ('spkp318_464', 'break-ins', 'break ins'), ('spkp318_464', 'double-checking', 'double checking'), ('spkp364_369', 'cause', 'because'), ('spkp232_231', 'curveball', '<unk>'), ('spkp271_105', 'decide', 'decided'), ('spkp271_105', "sky's", 'sky is'), ('spkp304_132', 'tutorials', '<unk>'), ('spkp304_132', '‘how', 'how'), ('spkp304_132', 'be?’', 'be'), ('spkp225_517', "you'll make in", "you're making"), ('spkp225_517', 'impactful', '<unk>'), ('spkp304_481', "it's just", 'it'), ('spkp225_271', 'their', "they're"), ('spkp228_520', 'heartwarming', '<unk>'), ('spkp314_495', 'snorkeling', '<unk>'), ('spkp317_556', 'replaying', '<unk>'), ('spkp317_556', 'heartwarming', '<unk>'), ('spkp314_481', 'uh', 'ah'), ('spkp304_495', 'snorkeling', '<unk>'), ('spkp304_495', "we're", "you're"), ('spkp228_252', 'anew', 'and you'), ('spkp318_128', 'full-on', 'full on'), ('spkp318_128', 'kinda', 'kind of'), ('spkp264_518', 'like-minded', 'like minded'), ('spkp304_442', 'laundromat', '<unk>'), ('spkp314_330', 'uh', 'oh'), ('spkp317_595', '20/20', '<unk>'), ('spkp226_371', 'what-ifs', 'what ifs'), ('spkp304_324', 'timelines', '<unk>'), ('spkp271_313', 'onto', 'on to'), ('spkp271_313', 'i', "i'd"), ('spkp318_114', 'today’s', 'today is'), ('spkp314_442', 'laundromat', '<unk>'), ('spkp237_604', 'there’s', "there's"), ('spkp237_604', '“this', 'this'), ('spkp237_604', 'join,” i’m just', "join i'm"), ('spkp314_324', 'timelines', '<unk>'), ('spkp282_187', 'unsee unhear un-experience', '<unk> <unk> <unk>'), ('spkp226_403', 'checkout', '<unk>'), ('spkp318_116', 'wound', 'went'), ('spkp226_373', 'i have', 'just had'), ('spkp374_169', 'chalkboard', '<unk>'), ('spkp226_415', 'tv', '<unk>'), ('spkp304_440', 'uh', 'ah'), ('spkp271_463', "it's", 'its'), ('spkp364_169', 'chalkboard', '<unk>'), ('spkp237_606', 'there’s', "there's"), ('spkp237_606', '“come', 'come'), ('spkp237_606', 'it’ll', "it'll"), ('spkp237_606', 'you’ll', "you'll"), ('spkp237_606', 'it,”', 'it'), ('spkp237_606', 'i’m', "i'm"), ('spkp318_102', 'kinda', 'kind of'), ('spkp364_141', 'ew', 'you'), ('spkp360_565', 'theatre', 'theater'), ('spkp360_565', 'impactful', '<unk>'), ('spkp339_395', 'right', 'write'), ('spkp339_395', 'tv', '<unk>'), ('spkp339_395', 'glitch', '<unk>'), ('spkp374_141', 'ew', 'you'), ('spkp271_339', 'look', 'looked'), ('spkp339_381', 'just frozen', 'chosen'), ('spkp225_298', 'favorite', 'favourite'), ('spkp360_571', 'heartwarming', '<unk>'), ('spkp304_483', 'stop', 'stopped'), ('spkp304_483', 'cause', 'because'), ('spkp317_232', 'open-minded', 'open minded'), ('spkp339_418', 'what-ifs', 'what ifs'), ('spkp317_540', 'game-changer', 'game changer'), ('spkp228_536', "i'm", 'am'), ('spkp282_608', '“expand', 'expand'), ('spkp282_608', 'horizons”', 'horizons'), ('spkp271_488', 'telemarketer', '<unk>'), ('spkp271_488', 'cause i', 'because'), ('spkp225_529', 'had', 'have'), ('spkp364_196', 'times', 'time'), ('spkp339_342', 'grab', 'grabbed'), ('spkp339_342', 'armrests', '<unk>'), ('spkp232_541', 'veggie', '<unk>'), ('spkp314_130', 'clearer kinda', 'claire'), ('spkp314_130', 'okay', 'ok'), ('spkp314_130', "they're", 'are'), ('spkp226_171', 'too', 'to'), ('spkp304_124', 'it’s', "it's"), ('spkp304_124', 'non-stop', 'non stop'), ('spkp304_124', 'i’m', "i'm"), ('spkp304_124', '– kinda', 'kind of'), ('spkp304_130', 'clearer kinda', '<unk> kind of'), ('spkp304_130', 'okay', 'ok'), ('spkp318_300', 'uh', 'oh'), ('spkp314_124', 'it’s', "it's"), ('spkp314_124', 'non-stop', '<unk>'), ('spkp314_124', 'i’m', "i'm"), ('spkp314_124', '– kinda', 'kind of'), ('spkp282_387', 'cause', 'because'), ('spkp364_425', 'health', 'house'), ('spkp364_357', 'uh roller coaster', '<unk>'), ('spkp226_159', "must've", 'must have'), ('spkp232_569', 'recognized', 'recognised'), ('spkp339_168', 'uh', 'oh'), ('spkp339_168', 'uh', 'so i'), ('spkp318_35', 'this', 'the'), ('spkp282_422', 'as', 'is'), ('spkp232_596', 'recognize', 'recognise'), ('spkp232_596', 'gps', '<unk>'), ('spkp374_394', 'emails', '<unk>'), ('spkp364_394', 'emails', '<unk>'), ('spkp282_378', 'panicking', '<unk>'), ('spkp318_499', 'paddleboarding', '<unk>'), ('spkp318_499', 'adrenaline', '<unk>'), ('spkp232_19', "should've", 'should have'), ('spkp228_41', "you're", 'here'), ('spkp228_41', 'uh', 'oh'), ('spkp228_41', 'peace', 'piece'), ('spkp228_41', 'peace', 'piece'), ('spkp317_187', 'unsee unhear un-experience', '<unk> <unk> <unk>'), ('spkp264_122', 'there’s', "there's"), ('spkp282_595', '20/20', '<unk>'), ('spkp264_136', 'the', 'a'), ('spkp228_55', 'uh', 'oh'), ('spkp228_55', 'lostness', '<unk>'), ('spkp232_421', 'worst-case', 'worst case'), ('spkp237_216', 'uh', 'ah'), ('spkp237_216', 'unsee unknow', '<unk> oh no'), ('spkp364_579', 'a maze', 'domains'), ('spkp282_80', 'just awe-inspiring', 'awe inspiring'), ('spkp282_80', 'chaos and calm', 'cases come'), ('spkp374_545', 'impacting', '<unk>'), ('spkp237_558', 'oh', 'owe'), ('spkp360_607', '“you’re', "you're"), ('spkp360_607', 'don’t', "don't"), ('spkp360_607', 'once,”', 'once'), ('spkp360_607', 'i’m', "i'm"), ('spkp228_69', 'um', 'and'), ('spkp228_69', 'crafting', '<unk>'), ('spkp364_545', 'impacting', '<unk>'), ('spkp228_132', 'tutorials', '<unk>'), ('spkp228_132', '‘how', 'how'), ('spkp228_132', 'be?’', 'be'), ('spkp282_224', 'cause', 'because'), ('spkp225_105', "there's", "it's"), ('spkp225_105', 'sunrise', 'sun rise'), ('spkp225_105', "sky's", 'sky is'), ('spkp282_556', 'replaying', '<unk>'), ('spkp225_111', 'kinda', 'kind of'), ('spk1_16', 'uh kinda', 'a kind of'), ('spkp364_592', 'regroup', '<unk>'), ('spkp374_592', 'regroup', '<unk>'), ('spkp374_592', "a bundle of nerves and it's it's", "just <unk> i'm just"), ('spkp314_21', 'um', 'and'), ('spkp314_21', 'uh', 'ah'), ('spkp282_57', 'uh envelop', 'envelope'), ('spkp282_57', 'crafting', '<unk>'), ('spkp314_520', 'heartwarming', '<unk>'), ('spkp232_151', 'off-putting', 'off putting'), ('spkp228_495', 'snorkeling', '<unk>'), ('spkp304_534', 'uh', 'ah'), ('spkp304_534', 'reflective', 'reflected'), ('spkp226_561', 'uh the', 'a'), ('spkp271_517', 'impactful', '<unk>'), ('spkp304_520', 'heartwarming', '<unk>'), ('spkp226_575', 'exhilarating', 'ex so'), ('spkp314_252', 'anew', 'and you'), ('spkp304_508', "i'm just", 'i'), ('spkp364_61', 'awe-inspiring', 'awe inspiring'), ('spkp232_192', 'uh', 'oh'), ('spkp225_461', 'even', 'the'), ('spkp317_346', 'cause', 'because'), ('spkp360_388', 'gps', '<unk>'), ('spkp360_388', 'rerouting', '<unk>'), ('spkp360_388', 'stop-and-go', 'stop and go'), ('spkp264_485', 'cause', 'because'), ('spkp314_291', "it'll", 'it will'), ('spkp228_324', 'timelines', '<unk>'), ('spkp228_442', 'laundromat', '<unk>'), ('spkp225_448', 'glitch or', '<unk> all'), ('spkp225_448', "machine's", 'machines'), ('spkp339_545', 'impacting', '<unk>'), ('spkp317_353', "can't", 'just just'), ('spkp232_187', 'unsee unhear un-experience', '<unk> <unk> <unk>'), ('spkp317_71', 'uh', 'ah'), ('spkp317_71', "flame's", 'flames'), ('spkp317_71', 'mesmerizing', '<unk>'), ('spkp317_421', 'uh', 'ah'), ('spkp317_421', 'worst-case', 'worst case'), ('spkp304_19', "should've", 'should have'), ('spkp317_347', 'cause i', 'because'), ('spkp317_347', 'leaves me alone', 'just leo lom'), ('spkp339_592', 'regroup', '<unk>'), ('spkp364_60', "there's", 'it was'), ('spkp314_509', 'um and then', "i'm a nen"), ('spk2_85', 'just reveling', 'revelling'), ('spkp314_253', 'uh', 'ah'), ('spkp317_390', 'cause', 'because'), ('spkp304_521', 'um so', '<unk>'), ('spkp226_212', 'uh', 'err'), ('spkp225_14', 'kinda', 'kinder'), ('spkp264_335', 'not-funny', 'not funny'), ('spkp314_535', 'cause i', 'because'), ('spkp232_150', 'realizing', 'realising'), ('spkp304_253', 'uh', 'oh'), ('spkp304_535', 'cause i', 'because'), ('spkp282_56', "you're", 'you'), ('spkp317_179', 'i mean', '<unk>'), ('spkp282_219', 'can not', 'cannot'), ('spkp225_138', 'yuck', '<unk>'), ('spkp232_385', 'scared', 'dared'), ('spkp317_151', 'off-putting', 'off putting'), ('spkp228_97', 'heartwarming', '<unk>'), ('spkp228_127', 'there’s', "there's"), ('spkp282_231', 'curveball', 'curve ball'), ('spkp317_145', 'looked', 'looks'), ('spkp225_104', 'surreal', '<unk>'), ('spkp282_543', 'heartwarming', '<unk>'), ('spkp282_543', 'workouts', '<unk>'), ('spkp360_606', 'there’s', "there's"), ('spkp360_606', '“come', 'come'), ('spkp360_606', 'it’ll', "it'll"), ('spkp360_606', 'you’ll', "you'll"), ('spkp360_606', 'it,”', 'it'), ('spkp360_606', 'i’m', "i'm"), ('spkp228_68', 'just heartwarming', '<unk>'), ('spkp282_95', "you're", 'you are'), ('spkp282_95', 'just savoring', 'savouring'), ('spkp237_571', 'heartwarming', '<unk>'), ('spkp232_18', 'colorful', 'colourful'), ('spkp237_565', 'theatre', 'theater'), ('spkp237_565', 'so impactful', '<unk>'), ('spkp364_395', 'tv', '<unk>'), ('spkp364_395', 'glitch', '<unk>'), ('spkp339_141', 'ew', 'you'), ('spkp374_381', "can't", 'just look down and i i'), ('spkp237_388', 'gps', '<unk>'), ('spkp237_388', 'rerouting', '<unk>'), ('spkp237_388', 'stop-and-go', 'stop and go'), ('spkp374_395', 'tv', '<unk>'), ('spkp374_395', 'glitch', '<unk>'), ('spkp282_423', 'backlash', '<unk>'), ('spkp339_169', 'chalkboard', '<unk>'), ('spkp237_439', 'gonna', 'going to'), ('spkp304_6', 'uh', 'ah'), ('spkp364_342', 'armrests', '<unk>'), ('spkp304_119', 'vibe', '<unk>'), ('spkp339_196', 'times', 'time'), ('spkp374_430', 'scared of', 'dare to'), ('spkp237_405', 'um', "i'm"), ('spkp237_405', 'uh', 'ah'), ('spkp237_405', 'uh', 'ah'), ('spkp232_232', 'open-minded', 'open minded'), ('spkp232_232', 'uh realizing', 'er realising'), ('spkp374_418', 'what-ifs', 'what ifs'), ('spkp339_39', 'mesmerizing', '<unk> no <unk>'), ('spkp226_602', 'diy', '<unk>'), ('spkp314_125', 'it’s', "it's"), ('spkp314_125', 'world’s', "world's"), ('spkp232_554', 'and oh', 'i know'), ('spkp271_41', 'peace', 'piece'), ('spkp271_41', 'peace', 'piece'), ('spkp237_14', 'kinda', 'kind of'), ('spkp232_540', 'game-changer', 'game changer'), ('spkp232_540', 'weight', 'white'), ('spkp364_418', 'what-ifs', 'what ifs'), ('spkp282_392', '20', '<unk>'), ('spkp318_473', "it's daunting just", 'jaunting'), ('spkp304_125', 'pane', 'paine'), ('spkp304_125', 'it’s', "it's"), ('spkp304_125', 'world’s', "world's"), ('spkp271_112', 'it makes', "and it's"), ('spkp226_170', 'cause', 'because'), ('spkp339_357', 'roller coaster', '<unk>'), ('spkp225_528', 'café', 'cafe'), ('spkp225_528', 'cause i', 'because'), ('spkp228_251', 'old', 'all'), ('spkp317_541', 'veggie', '<unk>'), ('spkp228_523', 'scrolling', '<unk>'), ('spkp228_523', 'emails', '<unk>'), ('spkp317_555', 'uh', 'ah'), ('spkp304_469', "there's", 'there is'), ('spkp304_469', "can't help", "just it's not helped"), ('spkp360_216', 'unsee unknow', '<unk> <unk>'), ('spkp339_380', 'relax', 'feel relaxed'), ('spkp339_394', 'emails', '<unk>'), ('spkp360_564', 'theatre', 'theater'), ('spkp264_269', "can't", 'count'), ('spkp226_400', 'um to fantasize', '<unk>'), ('spkp304_333', 'onto', 'on to'), ('spkp318_103', 'kinda', 'kind of'), ('spkp237_607', '“you’re', "you're"), ('spkp237_607', 'don’t', "don't"), ('spkp237_607', 'once,”', 'once'), ('spkp237_607', 'i’m', "i'm"), ('spkp364_168', "couldn't", 'couldn t'), ('spkp271_310', 'uh', 'oh'), ('spkp318_117', 'texted', '<unk>'), ('spkp226_414', 'paralyzed', 'paralysed'), ('spkp264_533', 'cause i', 'because'), ('spkp360_560', 'theatre', 'theater'), ('spkp339_390', 'cause', 'because'), ('spkp314_479', 'uh', 'ah'), ('spkp237_159', "must've", 'must have'), ('spkp304_445', 'neighbor uh', 'neighbour'), ('spkp317_592', 'a', 'the'), ('spkp317_592', 'regroup', '<unk>'), ('spkp237_603', 'resonates', '<unk>'), ('spkp314_445', 'um', 'and'), ('spkp360_548', 'favorite', 'favourite'), ('spkp360_548', 'flavor', 'flavour'), ('spkp318_107', "um it's kinda", 'kind of'), ('spkp282_180', "they're explaining", 'there explain'), ('spkp264_523', 'scrolling', '<unk>'), ('spkp264_523', 'emails', '<unk>'), ('spkp226_389', 'service', 'serviced'), ('spkp339_353', "can't", "didn't chant didn't work"), ('spkp364_187', 'unsee unhear un-experience it', '<unk> <unk> or <unk> at'), ('spkp339_421', 'worst-case', 'worst case'), ('spkp374_187', 'unsee unhear un-experience', '<unk> <unk> <unk>'), ('spkp304_486', 'uh', 'ah'), ('spkp304_486', "i've", 'i'), ('spkp314_492', 'a bit mystified by the discovery', 'oh man'), ('spkp225_262', 'uh', 'oh'), ('spkp314_486', "i've", 'i'), ('spkp314_486', 'gasp', 'gasped'), ('spkp314_486', 'caught', 'coat'), ('spkp364_346', 'cause', 'because'), ('spkp237_429', 'googling', '<unk>'), ('spkp317_8', 'uh', 'ah'), ('spkp314_109', 'kinda', 'kind of'), ('spkp374_346', 'cause', 'because'), ('spkp237_415', 'tv', '<unk>'), ('spkp271_116', 'wound', 'went'), ('spkp304_121', 'it’s', "it's"), ('spkp304_121', 'i’m', "i'm"), ('spkp304_121', 'i’m 16', "i'm <unk>"), ('spkp304_121', 'i’d forgotten', "i'd forgot"), ('spkp237_373', "can't watch my heart just just racing too much", 'be like'), ('spkp314_135', 'i am overthinking', "i'm <unk>"), ('spkp237_367', "it's", 'it'), ('spkp364_408', 'um', 'and'), ('spkp314_121', 'it’s', "it's"), ('spkp314_121', 'i’m', "i'm"), ('spkp314_121', 'i’m 16', "i'm <unk>"), ('spkp314_121', 'i’d', "i'd"), ('spkp226_606', 'there’s', "there's"), ('spkp226_606', '“come', 'come'), ('spkp226_606', 'it’ll', "it'll"), ('spkp226_606', 'you’ll', "you'll"), ('spkp226_606', 'it,”', 'it'), ('spkp226_606', 'i’m', "i'm"), ('spkp304_135', 'overthinking', '<unk>'), ('spkp271_102', 'kinda', 'kind of'), ('spkp339_151', 'off-putting', 'off putting'), ('spkp282_369', 'cause', 'because'), ('spkp318_488', 'telemarketer', '<unk>'), ('spkp318_488', 'cause i', 'because'), ('spkp237_398', 'uh', "you're just"), ('spkp282_355', "it's", 'it'), ('spkp282_341', 'panicking', '<unk>'), ('spk3_16', 'uh kinda', 'kind of'), ('spkp339_179', 'try', 'tried'), ('spkp374_540', 'game-changer', 'game changer'), ('spkp364_232', 'open-minded', 'open minded'), ('spkp226_55', "same time and it's like", 'slight'), ('spkp226_55', 'lostness', '<unk>'), ('spkp374_232', 'open-minded', 'open minded'), ('spkp232_418', 'what-ifs', 'what ifs'), ('spkp364_540', 'game-changer', 'game changer'), ('spkp364_226', 'nope', 'no'), ('spkp360_602', 'diy', 'di'), ('spkp360_602', "i'm", 'and'), ('spkp264_127', 'there’s', "there's"), ('spkp226_69', 'um', "i'm"), ('spkp226_69', 'crafting', '<unk>'), ('spkp360_158', 'this', 'the'), ('spkp228_50', 'with', 'of'), ('spkp228_50', 'just perfect', 'purse puffed'), ('spkp232_342', 'armrests', '<unk>'), ('spkp282_52', 'freeing', 'free'), ('spkp317_169', 'chalkboard', '<unk>'), ('spkp282_209', 'uh', 'are'), ('spkp225_128', 'full-on', 'full on'), ('spkp225_128', 'kinda', 'kind of'), ('spkp232_395', 'uh', 'er'), ('spkp232_395', 'tv', '<unk>'), ('spkp232_395', 'glitch', '<unk>'), ('spkp314_18', 'uhm', 'um'), ('spkp225_114', 'today’s', "today's"), ('spkp225_114', 'made', 'make'), ('spkp232_168', 'like', 'light'), ('spkp237_8', 'uh', 'ah'), ('spkp360_400', 'fantasize', '<unk>'), ('spkp304_519', 'um', "i'm"), ('spkp304_519', 'right', 'write'), ('spkp304_519', 'just', 'so so'), ('spkp339_596', 'gps', '<unk>'), ('spkp226_202', 'uh', 'er'), ('spkp314_525', 'groundbreaking', '<unk>'), ('spkp228_484', 'get-together', 'get together'), ('spkp226_216', 'unsee unknow', '<unk> i know'), ('spkp317_394', 'emails', '<unk>'), ('spkp317_394', 'rechecking', 're checking'), ('spkp304_21', 'um', "i'm"), ('spk2_56', 'the sky is', 'this'), ('spkp339_555', 'uh', 'ah'), ('spkp225_464', 'break-ins', 'break ins'), ('spkp225_464', 'double-checking', 'double checking'), ('spkp228_335', 'not-funny', 'not funny'), ('spkp317_61', "it's", 'is'), ('spkp317_61', 'just awe-inspiring', 'awe inspiring'), ('spkp314_294', 'want to', 'wanna'), ('spkp317_357', "i'm", 'am'), ('spkp317_342', 'armrests', '<unk>'), ('spkp225_303', 'into', 'to'), ('spkp228_452', 'googling', '<unk>'), ('spkp228_452', "i can't calm", 'count'), ('spkp339_232', 'open-minded', 'open minded'), ('spkp339_540', 'game-changer', 'game changer'), ('spkp225_459', 'just becomes', 'comes'), ('spkp317_418', 'what-ifs', 'what ifs'), ('spk2_57', 'envelop', 'envelope'), ('spk2_57', 'crafting', '<unk>'), ('spkp271_275', 'uh', 'well'), ('spkp228_485', 'cause', 'because'), ('spkp317_395', 'right', 'write'), ('spkp317_395', 'tv', '<unk>'), ('spkp317_395', "there's", "it's"), ('spkp317_395', 'glitch', '<unk>'), ('spkp226_571', 'heartwarming seeing', '<unk> seen'), ('spkp226_571', 'realize', 'realise'), ('spkp304_524', 'cause i', 'because'), ('spkp271_513', "you've um you've fantasized", 'you <unk>'), ('spkp271_513', "happening and it's just just so surreal", 'all'), ('spkp226_565', 'impactful', '<unk>'), ('spkp264_59', 'um', 'and'), ('spkp264_442', 'uh', 'ah'), ('spkp264_442', 'laundromat', '<unk>'), ('spkp228_491', 'cause i', 'because'), ('spkp264_324', 'timelines', '<unk>'), ('spkp314_524', "i'm", 'am'), ('spkp314_524', 'cause i', 'because'), ('spkp271_261', 'do', 'did'), ('spkp226_203', 'apologizing', 'apologising'), ('spkp304_518', 'like-minded', 'like minded'), ('spkp364_71', "flame's", 'flames'), ('spkp314_518', 'like-minded', 'like minded'), ('spkp232_169', 'chalkboard', '<unk>'), ('spkp360_415', 'tv just', '<unk>'), ('spkp314_19', "should've", 'should have'), ('spkp228_122', 'there’s', "there's"), ('spkp225_115', 'earbuds', '<unk>'), ('spkp232_394', 'emails', '<unk>'), ('spkp282_546', "it's", 'is'), ('spkp364_596', 'gps', '<unk>'), ('spkp364_596', 'just puzzled', 'tousled'), ('spkp360_159', "must've", 'must have'), ('spkp264_132', 'tutorials', '<unk>'), ('spkp264_132', '‘how', 'how'), ('spkp264_132', 'be?’', 'be'), ('spkp374_569', 'it was just', 'is'), ('spkp232_357', 'roller coaster', '<unk>'), ('spkp364_541', 'this', 'the'), ('spkp364_541', 'veggie', '<unk>'), ('spkp374_233', 'trying to', 'try and'), ('spkp360_603', 'resonates', '<unk>'), ('spkp374_541', 'veggie', '<unk>'), ('spkp232_21', 'uh', 'ah'), ('spkp232_592', 'regroup', '<unk>'), ('spkp271_87', 'this', 'the'), ('spk4_22', 'how', 'her'), ('spkp237_399', 'um', "i'm"), ('spkp364_390', 'cause', 'because'), ('spkp318_19', "should've", 'should have'), ('spkp374_390', 'cause', 'because'), ('spkp314_120', 'hadn’t', "hadn't"), ('spkp314_120', '90s', '<unk>'), ('spkp314_120', '‘why not?’', 'why not'), ('spkp237_400', 'fantasize', '<unk>'), ('spkp271_103', 'kinda', 'kind of'), ('spkp304_134', 'overthinking', '<unk>'), ('spkp304_134', 'jewelry’s', "jewellery's"), ('spkp304_134', 'it’s', "it's"), ('spkp304_120', 'hadn’t', "hadn't"), ('spkp304_120', '90s', '<unk>'), ('spkp304_120', '‘why not?’', 'why not'), ('spkp271_117', 'texted', '<unk>'), ('spkp226_175', 'cause', 'because'), ('spkp232_545', 'impacting', '<unk>'), ('spkp314_134', 'overthinking', '<unk>'), ('spkp314_134', 'jewelry’s', "jewelry's"), ('spkp314_134', 'it’s', "it's"), ('spkp237_372', 'uh', 'ah'), ('spkp304_3', 'uh', 'ah'), ('spkp304_3', 'like', 'that'), ('spkp374_347', 'cause', 'because'), ('spkp318_338', 'this', 'the'), ('spkp339_187', 'unsee unhear un-experience', '<unk> <unk> <unk>'), ('spkp374_421', 'worst-case', 'worst case'), ('spkp364_347', 'cause', 'because'), ('spkp314_108', 'cause uh', 'because'), ('spkp314_108', 'kinda', 'kind of'), ('spkp364_421', 'worst-case', 'worst case'), ('spkp264_287', 'just feel', 'still'), ('spkp314_487', 'cause i', 'because'), ('spkp225_511', 'videos', '<unk>'), ('spkp282_156', 'unhear', '<unk>'), ('spkp339_434', 'about', 'that'), ('spkp226_388', 'gps', '<unk>'), ('spkp226_388', 'rerouting', '<unk>'), ('spkp226_388', 'stop-and-go', 'stop and go'), ('spkp318_106', 'kinda', 'kind of'), ('spkp314_444', 'squishy', '<unk>'), ('spkp264_244', 'out for', 'up from'), ('spkp264_244', "they're not there anymore and", 'then'), ('spkp237_602', 'diy', '<unk>'), ('spkp237_164', 'uh there', 'i'), ('spkp304_450', 'uh', 'a'), ('spkp226_405', 'um so', '<unk>'), ('spkp226_405', 'drilling into your head making it impossible to uh to find any peace', 'just'), ('spkp374_179', 'try', 'tried'), ('spkp374_179', 'uh', 'a'), ('spkp304_444', 'squishy', '<unk>'), ('spkp314_336', "i'm", 'am'), ('spkp271_315', 'uh', 'oh'), ('spkp364_151', 'off-putting', 'off putting'), ('spkp304_478', 'braving', 'breathing'), ('spkp374_151', 'off-putting', 'off putting'), ('spkp225_288', 'mid-laugh', 'mid laugh'), ('spkp271_329', "they're just", "there's"), ('spkp364_145', 'looked', 'looks'), ('spkp318_138', 'yuck', '<unk>'), ('spkp364_153', 'they', 'i'), ('spkp339_393', 'uh', 'ah'), ('spkp304_452', 'googling', '<unk>'), ('spkp264_520', 'this', 'the'), ('spkp264_520', 'heartwarming', '<unk>'), ('spkp237_166', 'scrunches', '<unk>'), ('spkp237_600', 'salsa', '<unk>'), ('spkp318_104', 'surreal', '<unk>'), ('spkp226_361', 'panicking', '<unk>'), ('spkp304_320', 'uh', 'ah'), ('spkp374_609', 'forms', 'forums'), ('spkp264_252', 'anew', 'and you'), ('spkp314_452', 'googling', '<unk>'), ('spkp304_446', 'um', "you're just"), ('spkp339_422', 'as', 'is'), ('spkp364_190', 'the', 'a'), ('spkp304_491', 'cause i', 'because'), ('spkp225_507', 'travelers', 'travellers'), ('spkp339_378', 'panicking', '<unk>'), ('spkp314_485', 'cause', 'because'), ('spkp228_524', 'cause i', 'cuz'), ('spkp225_275', 'uh', 'ah'), ('spkp304_485', 'cause', 'because'), ('spkp228_242', 'uh', 'oh'), ('spkp374_423', 'backlash', '<unk>'), ('spkp237_358', 'tossing', '<unk>'), ('spkp304_1', 'um', "i'm"), ('spkp364_423', 'backlash', '<unk>'), ('spkp318_448', 'glitch', '<unk>'), ('spkp339_16', 'uh kinda', 'kind of'), ('spkp237_364', 'second-guess', 'second guess'), ('spkp314_122', 'there’s', "there's"), ('spkp226_605', '“you’d', "you'd"), ('spkp226_605', 'instrument,”', 'instrument'), ('spkp282_395', 'uh', 'are'), ('spkp282_395', 'right', 'write'), ('spkp282_395', 'tv', '<unk>'), ('spkp282_395', 'glitch', '<unk>'), ('spkp237_370', 'but', 'it'), ('spkp232_547', 'favorite', 'favourite'), ('spkp232_547', 'ambiance', '<unk>'), ('spkp364_379', "there's this", 'there is the'), ('spkp237_416', 'right', 'write'), ('spkp271_115', 'earbuds', '<unk>'), ('spkp304_122', 'there’s', "there's"), ('spkp282_418', 'what-ifs', 'what ifs'), ('spkp364_392', '20', '<unk>'), ('spkp374_392', '20', '<unk>'), ('spkp226_188', 'open-minded', 'open minded'), ('spkp282_342', 'armrests', '<unk>'), ('spkp339_608', '“expand', 'expand'), ('spkp339_608', 'horizons”', 'horizons'), ('spkp364_225', 'patience', 'patients'), ('spkp374_231', 'curveball', 'curve ball'), ('spkp318_528', 'café', 'cafe'), ('spkp318_528', 'cause i', 'because'), ('spkp364_543', 'heartwarming', '<unk>'), ('spkp364_543', 'workouts', '<unk>'), ('spkp374_225', 'patience', 'patients'), ('spkp374_543', 'heartwarming', '<unk>'), ('spkp374_543', 'workouts', '<unk>'), ('spkp364_231', 'curveball', 'curve ball'), ('spkp282_593', 'daunting', 'so'), ('spkp232_341', 'panicking', '<unk>'), ('spkp232_427', 'i i', "i'd"), ('spkp228_53', 'uh', 'oh'), ('spkp374_219', 'can not', 'cannot'), ('spkp237_562', 'cinematography', '<unk>'), ('spkp264_124', 'it’s', "it's"), ('spkp264_124', 'non-stop', 'non stop'), ('spkp264_124', 'i’m', "i'm"), ('spkp264_124', '– kinda', 'kind of'), ('spkp374_594', "i'm just", 'not'), ('spkp364_580', "it's", 'is'), ('spkp228_108', 'danced', 'dance'), ('spk6_19', "should've", 'should have'), ('spkp225_117', 'texted', '<unk>'), ('spkp228_120', 'hadn’t', "hadn't"), ('spkp228_120', '90s', '<unk>'), ('spkp228_120', '‘why not?’', 'why not'), ('spkp317_156', 'unhear', '<unk>'), ('spkp228_134', 'overthinking', '<unk>'), ('spkp228_134', 'jewelry’s', "jewelry's"), ('spkp228_134', 'it’s', "it's"), ('spkp225_103', 'realize', 'realise'), ('spkp314_268', 'and i', 'an eye'), ('spkp339_595', '20/20', '<unk>'), ('spkp317_89', 'self-care', 'self care'), ('spkp360_417', 'this', 'the'), ('spkp339_581', "there's", 'there is'), ('spkp360_371', 'what-ifs', 'what ifs'), ('spkp271_511', 'um', "i'm"), ('spkp271_511', 'videos', '<unk>'), ('spkp304_526', 'cause i', 'because'), ('spkp228_487', 'cause', 'because'), ('spkp304_240', 'uh', 'ah'), ('spkp271_505', 'um', "i'm"), ('spkp271_505', 'bioluminescent', '<unk>'), ('spkp339_556', 'replaying', '<unk>'), ('spkp339_556', 'heartwarming', '<unk>'), ('spkp228_478', 'staying in', 'stay again'), ('spkp271_288', 'mid-laugh', 'mid laugh'), ('spkp339_224', 'cause', 'because'), ('spk2_55', "you're", 'you all'), ('spk2_55', 'uh', 'are'), ('spk2_55', 'lostness', '<unk>'), ('spkp364_98', 'replay', '<unk>'), ('spkp364_98', 'resonates', '<unk>'), ('spkp364_98', 'soundtrack', '<unk>'), ('spkp228_444', 'squishy', '<unk>'), ('spkp232_180', 'uh', 'ah'), ('spkp317_62', "you're", 'you are'), ('spk2_69', 'crafting', '<unk>'), ('spkp228_336', 'lift', 'live'), ('spkp339_219', 'i can', 'and'), ('spkp304_296', 'sadder', 'sad'), ('spkp317_63', 'uh', 'ah'), ('spkp225_466', "just can't", "don't"), ('spkp264_496', "um i'm just", 'uh'), ('spkp264_496', 'trek', 'trick'), ('spkp264_496', 'just thinking', '<unk>'), ('spkp317_433', 'sibling', 'siblings'), ('spkp364_99', 'colors', 'colours'), ('spkp317_77', 'past', 'pass'), ('spkp339_225', 'patience', 'patients'), ('spkp339_543', 'heartwarming', '<unk>'), ('spkp339_543', 'workouts', '<unk>'), ('spkp339_231', 'curveball', 'curve ball'), ('spkp232_156', 'unhear', '<unk>'), ('spkp226_200', 'uh', 'ah'), ('spkp271_504', "i'm just", 'i'), ('spkp317_382', 'jolts', '<unk>'), ('spkp317_382', 'but', 'i'), ('spkp360_358', 'tossing', '<unk>'), ('spkp228_486', "i've", 'i'), ('spkp314_533', 'cause i', 'because'), ('spkp304_269', 'realization', 'realisation'), ('spkp225_499', 'paddleboarding', '<unk>'), ('spkp225_499', 'adrenaline', '<unk>'), ('spkp360_364', 'second-guess', 'second guess'), ('spkp226_228', 'uh', 'a'), ('spkp228_135', 'i am overthinking', "i'm <unk>"), ('spkp282_545', 'impacting', '<unk>'), ('spkp225_102', 'kinda', 'kind of'), ('spkp225_116', 'wound', 'went'), ('spk6_18', 'colorful', 'colourful'), ('spk6_18', 'uhm', 'erm'), ('spkp228_121', 'it’s', "it's"), ('spkp228_121', 'i’m', "i'm"), ('spkp228_121', 'i’m 16', "i'm <unk>"), ('spkp228_121', 'i’d', "i'd"), ('spkp228_109', 'kinda', 'kind of'), ('spkp364_595', '20/20', '<unk>'), ('spkp374_595', '20/20', '<unk>'), ('spkp314_26', "it's just mind-blowing", 'mind blowing'), ('spkp364_581', "there's", 'there is'), ('spkp364_218', 'this', 'a'), ('spkp282_592', 'regroup', '<unk>'), ('spkp318_515', "you're", 'you were'), ('spkp317_194', 'uh', "i'm"), ('spkp374_224', 'cause', 'cuz'), ('spkp226_57', 'um', "i'm"), ('spkp226_57', 'envelop', 'envelope'), ('spkp226_57', 'crafting', '<unk>'), ('spkp226_57', 'act of creating of bringing joy through food', 'acts'), ('spkp364_556', 'replaying', '<unk>'), ('spkp364_556', 'so heartwarming', '<unk>'), ('spkp282_87', 'this', 'the'), ('spkp264_119', 'vibe', '<unk>'), ('spkp374_556', 'um', 'then'), ('spkp374_556', 'replaying', '<unk>'), ('spkp374_556', "brings a smile to your face it's like you're", '<unk>'), ('spkp374_556', "so thankful for those moments for that perfect date and it's it's just so so heartwarming", '<unk>'), ('spkp364_224', 'cause', 'because'), ('spkp360_166', 'scrunches', '<unk>'), ('spkp318_529', "i'm", 'am'), ('spkp318_529', 'cause i', 'because'), ('spkp282_357', 'uh roller coaster', '<unk>'), ('spkp282_357', "i'm", 'am'), ('spkp282_431', 'just imagining', 'imagine'), ('spkp318_32', 'um', 'and'), ('spkp339_153', 'tune out', 'two now'), ('spkp364_387', 'cause', 'because'), ('spkp364_393', 'uh', 'us'), ('spk4_21', 'um', "i'm"), ('spkp374_387', 'cause', 'because'), ('spkp237_371', 'what-ifs', 'what ifs'), ('spkp282_394', 'emails', '<unk>'), ('spkp282_394', 'rechecking', "we're checking"), ('spkp271_114', 'today’s', "today's"), ('spkp364_378', 'panicking', '<unk>'), ('spkp237_403', 'checkout', '<unk>'), ('spkp237_403', 'dragging', 'jogging'), ('spkp232_234', 'uh', '<unk>'), ('spkp318_307', 'this', 'the'), ('spkp226_604', 'there’s', "there's"), ('spkp226_604', '“this', 'this'), ('spkp226_604', 'join,” i’m just', "join i'm"), ('spkp374_378', 'panicking', '<unk>'), ('spkp237_365', 'obsessively', '<unk>'), ('spkp364_422', "take a break because it's it's too much just just too real", 'ugh'), ('spkp271_128', 'full-on', 'full on'), ('spkp232_208', 'uh', 'ah'), ('spkp364_436', 'in the', 'a'), ('spkp228_525', 'groundbreaking', '<unk>'), ('spkp317_553', 'heartwarming', '<unk>'), ('spkp314_490', 'cause i', 'because like'), ('spkp282_155', 'the uh', 'that'), ('spkp304_484', 'get-together', 'get together'), ('spkp304_490', 'cause', 'because'), ('spkp317_221', 'uh a thousand', '<unk> <unk>'), ('spkp314_484', 'get-together', 'get together'), ('spkp314_484', 'just shocked', 'shock'), ('spkp282_169', 'chalkboard', '<unk>'), ('spkp339_423', 'backlash', '<unk>'), ('spkp339_423', 'the', 'a'), ('spkp374_608', '“expand', 'expand'), ('spkp374_608', 'horizons”', 'horizons'), ('spkp318_111', 'kinda', 'kind of'), ('spkp304_447', 'update', '<unk>'), ('spkp264_535', 'surprised and excited cause i i never meet anyone', 'surprise sighted one'), ('spkp314_335', 'not-funny', 'not funny'), ('spkp228_294', 'letting go', 'lingo'), ('spkp364_608', '“expand', 'expand'), ('spkp364_608', 'horizons”', 'horizons'), ('spkp271_464', 'break-ins', 'break ins'), ('spkp271_464', 'double-checking', 'double checking'), ('spkp226_406', "staring at the spinning wheel waiting hoping and it's", 'just just'), ('spkp318_105', 'decide', 'decided'), ('spkp304_335', 'not-funny', 'not funny'), ('spkp339_392', '20', '<unk>'), ('spkp360_562', 'cinematography', '<unk>'), ('spkp271_458', 'armrest', '<unk>'), ('spkp271_458', 'praying', 'trying'), ('spkp374_146', 'ugh', 'ah'), ('spkp271_403', 'checkout', '<unk>'), ('spkp318_604', 'there’s', "there's"), ('spkp318_604', '“this', 'this'), ('spkp318_604', 'join,” i’m just', "join i'm"), ('spkp271_365', 'obsessively', '<unk>'), ('spkp364_109', 'kinda', 'kind of'), ('spkp228_595', '20/20', '<unk>'), ('spkp304_346', 'brace', 'braced'), ('spkp271_371', 'what-ifs', 'what ifs'), ('spkp237_114', 'today’s', "today's"), ('spkp374_121', 'it’s', "it's"), ('spkp374_121', 'i’m', "i'm"), ('spkp374_121', 'i’m 16', "i'm <unk>"), ('spkp374_121', 'i’d', "i'd"), ('spkp364_135', 'i am overthinking', "i'm <unk>"), ('spkp360_511', 'um so', '<unk>'), ('spkp360_511', 'videos', '<unk>'), ('spkp264_208', 'this', 'a'), ('spkp364_121', 'it’s', "it's"), ('spkp364_121', 'i’m', "i'm"), ('spkp364_121', 'i’m 16', "i'm <unk>"), ('spkp364_121', 'i’d', "i'd"), ('spkp360_505', 'bioluminescent', '<unk>'), ('spkp237_128', 'full-on', 'full on'), ('spkp237_128', 'but', 'that'), ('spkp339_493', 'um', "i'm"), ('spkp374_135', 'overthinking', '<unk>'), ('spkp360_288', 'mid-laugh', 'mid laugh'), ('spkp228_556', 'replaying', '<unk>'), ('spkp228_556', 'so heartwarming', '<unk>'), ('spkp314_391', 'halt', 'hold'), ('spkp282_132', 'tutorials', '<unk>'), ('spkp282_132', '‘how', 'how'), ('spkp282_132', 'be?’', 'be'), ('spkp339_444', 'squishy', '<unk>'), ('spkp237_464', 'break-ins', 'break ins'), ('spkp237_464', 'double-checking', 'double checking'), ...]