Continuation/tidying of this notebook

import json

with open("/tmp/procced2.1.json") as inf:
    a = json.load(inf)

filtered1 = [x for x in a if not "discarded" in x]

already_done = []
with open("/tmp/assignment_short.csv") as tsvf:
    for line in tsvf:
        line = line.strip()
        if line.startswith("syntitem"):
            continue
        if not line:
            continue
        already_done.append(line.split(",")[0])

filtered = [x for x in filtered1 if not x["fileid"] in already_done]

filtered = [x for x in a if not "discarded" in x]

len(filtered), len(filtered1), len(already_done)

(1412, 1412, 1405)

import numpy as np

L = np.load('bvh_pt_lengths.npy',allow_pickle=True)
framerate = 120
point_length = {}
for thing in L:
    item = list(thing.keys())[0].split('/')[-1]
    item = item.replace('.bvh','')
    point_length[item] = list(thing.values())[0]/framerate

synth_length = {}
for item in filtered:
    synth_length[item["fileid"]] = item["duration"]

synth_pre = {}
synth_post = {}
synth_data = {}
synth_times = []

for item in filtered:
    fileid = item["fileid"]
    dem_start = float(item["determiner_start"])
    dem_end = float(item["determiner_end"])
    duration = float(item["duration"])

    synth_pre[fileid] = dem_start
    synth_post[fileid] = dem_end - dem_start
    synth_data[fileid] = (dem_start, duration)
    synth_times.append((duration, dem_start))

import pandas as pd
from glob import glob

def find_demonstrative_index(expression, demonstratives):
    words = expression.split(' ')
    # Iterate over each word to find if it matches any of the demonstratives
    for i, word in enumerate(words):
        if word in demonstratives:
            return i  # Return the index of the first matching demonstrative
    return -1  # Return -1 if no demonstratives are found
demonstratives = ['this','that','one','those','these','there','here']

files = glob('/tmp/tsv_pt_segments/*.tsv')
print(len(files))
# words_to_exclude = ['left', 'right', 'middle', 'back']
# files = [f for f in files if not any(word in f for word in words_to_exclude)]
pt_times = []
pt_names = []
pt_pre = {}
pt_post = {}
pt_data = {}
for fn in files:
    temp_list = []
    item = fn.split('/')[-1].split('.tsv')[0]
    with open(fn) as f:
        with open(fn) as f:
            for line in f:
                t0, t1, wrd = line.strip().split('\t')
                t0, t1 = float(t0), float(t1)
                temp_list.append([t0, t1, wrd])
        df = pd.DataFrame(temp_list, columns=['t0','t1','wrd'])
        expr = ' '.join(df['wrd'])
        index  = find_demonstrative_index(expr, demonstratives)
        dem_time = df['t0'].iloc[index]
        total_time = point_length[item]
        pt_times.append((total_time, dem_time))
        pt_names.append(item)
        pt_pre[item] = dem_time
        pt_post[item] = point_length[item] - pt_pre[item]
        pt_data[item] = (dem_time,total_time)

1147

synth_lengths_array = np.array([x[0] for x in synth_times])
print(synth_lengths_array)
lengths_array = np.array(synth_lengths_array)

lengths_array_pt = np.array([x[0] for x in pt_times])
# Calculate the mean and standard deviation
mean_length = np.mean(lengths_array)
std_dev_length = np.std(lengths_array)
synth_lengths_max = np.max(lengths_array)
synth_lengths_min = np.min(lengths_array)

print("mean_length", mean_length)
print("std_dev_length", std_dev_length)
print("synth_lengths_max", synth_lengths_max)
print("synth_lengths_min", synth_lengths_min)

# Calculate the 50th (median) and 75th percentiles
median_length_syn = np.percentile(lengths_array, 50)
percentile_75_length = np.percentile(lengths_array, 75)
print("percentile_75_length", percentile_75_length)

cutoff_time_syn = median_length_syn
print("cutoff_time_syn", cutoff_time_syn)

median_length_pt = np.percentile(lengths_array_pt, 50)
percentile_75_length_pt = np.percentile(lengths_array_pt, 75)
print("percentile_75_length_pt", percentile_75_length_pt)

cutoff_time_pt = median_length_pt
print("cutoff_time_pt", cutoff_time_pt)

print("len(pt_data)", len(pt_data))
print("len(synth_data)", len(synth_data))

short_pt = {k:v for k,v in pt_data.items() if v[1]<cutoff_time_pt}
short_synth = {k:v for k,v in synth_data.items() if v[1]<cutoff_time_syn}
long_pt = {k:v for k,v in pt_data.items() if v[1]>=cutoff_time_pt}
long_synth = {k:v for k,v in synth_data.items() if v[1]>=cutoff_time_syn}

synth_pre_short = {k:v for k,v in synth_pre.items() if k in short_synth}
synth_post_short = {k:v for k,v in synth_post.items() if k in short_synth}
pt_pre_short = {k:v for k,v in pt_pre.items() if k in short_pt}
pt_post_short = {k:v for k,v in pt_post.items() if k in short_pt}

[4.576      3.50933333 2.82666667 ... 5.25866667 4.82133333 3.744     ]
mean_length 4.1298852691218135
std_dev_length 1.371061738198894
synth_lengths_max 14.506
synth_lengths_min 2.026666666666667
percentile_75_length 4.768
cutoff_time_syn 3.8186666666666667
percentile_75_length_pt 5.708333333333334
cutoff_time_pt 4.158333333333333
len(pt_data) 1147
len(synth_data) 1412

len(pt_pre.keys())

573

D = []
synth_pre = synth_pre_short
synth_post = synth_post_short
pt_pre = pt_pre_short
pt_post = pt_post_short

for syntitem in list(synth_pre.keys())[:]:
    row = []
    for pointitem in list(pt_pre.keys())[:]:
        syntpre = synth_pre[syntitem]
        syntpost = synth_post[syntitem]
        pointpre = pt_pre[pointitem]
        pointpost = pt_post[pointitem]

        cost = abs(syntpre-pointpre) + abs(syntpost-pointpost)
        if synth_length[syntitem] > point_length[pointitem]:
            cost *= 10
        # penalize if synt starts before or ends after point
        if syntpre > pointpre:
            cost *= 2
        if syntpost > pointpost:
            cost *= 2
        row.append(cost)
    
    D.append(row)
dd = np.array(D)
dd.shape

(700, 573)

!pip install munkres

Requirement already satisfied: munkres in /Users/joregan/opt/anaconda3/envs/nst-tts/lib/python3.10/site-packages (1.1.4)

from munkres import Munkres
m = Munkres()
assignment_re = m.compute(D)
f = open('assignment_short.csv','w')
f.write('syntitem,pointitem,offset\n')
for pair in assignment_re:
    syntidx,pointidx = pair
    syntitem = list(synth_pre.keys())[syntidx]
    pointitem = list(pt_pre.keys())[pointidx]
    f.write('{},{},{}\n'.format(syntitem, pointitem, pt_pre[pointitem]-synth_pre[syntitem]))
f.close()

selected = []
with open("/tmp/assignment_short.csv") as f:
    for line in f.readlines():
        line = line.strip()
        if line.startswith("syntitem"):
            continue
        parts = line.split(",")
        selected.append(parts[0])

filtered = []
with open("/tmp/procced2.1.json") as inf:
    newdata = json.load(inf)
for item in newdata:
    if item["fileid"] in selected:
        filtered.append(item)

assigned = {}
with open("/tmp/assignment_short.csv") as f:
    for line in f.readlines():
        line = line.strip()
        if line.startswith("syntitem"):
            continue
        parts = line.split(",")
        if not parts[1] in assigned:
            assigned[parts[1]] = {}
        assigned[parts[1]][parts[0]] = parts[2]
        if not parts[1] in assigned:
            print(f"Warning: overwriting for {parts[1]}: {parts[0]}")

assigned["st1.8087_8452_0.97stretch"]

{'hsi_7_0719_227_002_main__ggpt__702': '0.009099999999999997',
 'hsi_4_0717_227_002_main__ggpt__626': '0.019100000000000006',
 'hsi_5_0718_209_001_main__ggpt_2__402': '0.0030999999999999917'}

from pathlib import Path
with open("/tmp/ffmpeg-runner.sh", "w") as outf:
    for vid in Path("/tmp/renders_new").glob("*.mp4"):
        stem = vid.stem
        if stem.startswith("nonreferential"):
            continue
        pieces = stem.split("_")
        gender = pieces[-1]
        p2 = stem.split("_smplx_")
        vidname = p2[0]
        audname = p2[1].replace(f"_{gender}", "")

        if not vidname in assigned:
            print(f"no assignment for {vidname}")
            continue
        if not audname in assigned[vidname]:
            print(f"no assignment for {vidname}: {audname}")
            continue
        time = float(assigned[vidname][audname])

        outf.write(f"ffmpeg -i /tmp/renders_new/{stem}.mp4 -i /tmp/groundinggpt-generated-speech/{audname}.wav")
        if time < 0.0:
            outf.write(f" -filter_complex \"[1:a]atrim=start={time}[aud]\" -map 0:v -map \"[aud]\"")
        else:
            itime = int(time * 1000.0)
            outf.write(f" -filter_complex \"[1:a]adelay={itime}|{itime}[aud]\" -map 0:v -map \"[aud]\"")
        outf.write(f" -c:v copy -c:a aac /tmp/output_aligned/{stem}.mp4\n")

with open("/tmp/assigned_extended_all.csv") as f, open("/tmp/ffmpeg-runner.sh", "w") as outf:
    for line in f.readlines():
        line = line.strip()
        if line.startswith("syntitem"):
            continue
        parts = line.split(",")

        outf.write(f"ffmpeg -i /tmp/output/{parts[1]}.mp4 -i /tmp/groundinggpt-generated-speech/{parts[0]}.wav")
        time = float(parts[2])
        if time < 0.0:
            outf.write(f" -filter_complex \"[1:a]atrim=start={time}[aud]\" -map 0:v -map \"[aud]\"")
        else:
            itime = int(time * 1000.0)
            outf.write(f" -filter_complex \"[1:a]adelay={itime}|{itime}[aud]\" -map 0:v -map \"[aud]\"")
        outf.write(f" -c:v copy -c:a aac /tmp/output_minus/{parts[0]}.mp4\n")

filtered_dict = {x["fileid"]: x for x in filtered1}

seen_motion = []
filter_seen = True

with open("/tmp/assignment_short.csv") as f, open("/tmp/assigned_extended_all.csv", "w") as outf:
    for line in f.readlines():
        line = line.strip()
        if line.startswith("syntitem"):
            outf.write(line + ",person,room,topic\n")
            continue
        parts = line.split(",")
        cur = filtered_dict[parts[0]]
        if filter_seen and parts[1] in seen_motion:
            continue
        seen_motion.append(parts[1])
        outf.write(f"{line},{cur['person']},{cur['room']},{cur['topic']}\n")

with open("/tmp/aligned_motion_audio.tsv", "w") as outf:
    outf.write("filename,person,room,topic\n")
    for videofile in Path("/tmp/output_aligned/").glob("**/*.mp4"):
        stem = videofile.stem

        pieces = stem.split("_")
        gender = pieces[-1]
        p2 = stem.split("_smplx_")
        vidname = p2[0]
        audname = p2[1].replace(f"_{gender}", "")

        print(vidname, audname)

        cur = filtered_dict[audname]
        outf.write(f"{stem}.mp4,{cur['person']},{cur['room']},{cur['topic']}\n")

for new_video in Path("/Users/joregan/Downloads/drive-download-20250502T164348Z-1-001").glob("*.mp4"):
    stem = new_video.stem
    audio_part = stem.split("_start")[0]
    audio_part = audio_part.replace("referential_", "").replace("_ref", "_")
    print(f"ffmpeg -i {new_video} -i /tmp/cut_wav/{audio_part}.wav -c:v copy -c:a aac /tmp/output_ref/{stem}.mp4")

ffmpeg -i /Users/joregan/Downloads/drive-download-20250502T164348Z-1-001/referential_hsi_7_0719_227_002_ref89_start15518_end15651_fps30_smplx.mp4 -i /tmp/cut_wav/hsi_7_0719_227_002_89.wav -c:v copy -c:a aac /tmp/output_ref/referential_hsi_7_0719_227_002_ref89_start15518_end15651_fps30_smplx.mp4
ffmpeg -i /Users/joregan/Downloads/drive-download-20250502T164348Z-1-001/referential_hsi_4_0717_209_001_ref8_start1113_end1249_fps30_smplx.mp4 -i /tmp/cut_wav/hsi_4_0717_209_001_8.wav -c:v copy -c:a aac /tmp/output_ref/referential_hsi_4_0717_209_001_ref8_start1113_end1249_fps30_smplx.mp4
ffmpeg -i /Users/joregan/Downloads/drive-download-20250502T164348Z-1-001/referential_hsi_5_0718_222_002_ref8_start936_end1069_fps30_smplx.mp4 -i /tmp/cut_wav/hsi_5_0718_222_002_8.wav -c:v copy -c:a aac /tmp/output_ref/referential_hsi_5_0718_222_002_ref8_start936_end1069_fps30_smplx.mp4
ffmpeg -i /Users/joregan/Downloads/drive-download-20250502T164348Z-1-001/referential_hsi_3_0715_209_006_ref33_start6967_end7097_fps30_smplx.mp4 -i /tmp/cut_wav/hsi_3_0715_209_006_33.wav -c:v copy -c:a aac /tmp/output_ref/referential_hsi_3_0715_209_006_ref33_start6967_end7097_fps30_smplx.mp4
ffmpeg -i /Users/joregan/Downloads/drive-download-20250502T164348Z-1-001/referential_hsi_6_0718_209_001_ref55_start5080_end5211_fps30_smplx.mp4 -i /tmp/cut_wav/hsi_6_0718_209_001_55.wav -c:v copy -c:a aac /tmp/output_ref/referential_hsi_6_0718_209_001_ref55_start5080_end5211_fps30_smplx.mp4