Extract Riksdag videos from API
Extracts videos of speeches from the Riksdag API
import requests
import json
sample = requests.get("https://data.riksdagen.se/api/mhs-vodapi?H210308")
data = json.loads(sample.text)
data['videodata'][0]
def viddata_get_single_stream(videodata, hires=True):
videos = []
if videodata is None:
return []
if 'streams' not in videodata:
#raise Exception("videodata is missing 'streams'")
return []
if videodata['streams'] is None:
return []
if 'files' not in videodata['streams']:
#raise Exception("videodata['streams'] is missing 'files'")
return []
if type(videodata['streams']['files']) == list:
for vfile in videodata['streams']['files']:
for bw in vfile['bandwidth']:
if hires and bw['name'] == 'Hög kvalitet':
videos.append(bw['downloadurl'])
elif not hires and bw['name'] == 'Låg kvalitet':
videos.append(bw['downloadurl'])
else:
#raise Exception(f"Expected a list, got {type(videodata['streams']['files'])}")
return []
return videos
def viddata_get_streams(videodata, hires=True):
output = []
if 'videodata' not in videodata:
#raise Exception("'videodata' missing")
return []
for vdata in videodata['videodata']:
output += viddata_get_single_stream(vdata, hires)
return output
def fix_speaker_name(name, party):
if name.endswith(f" ({party})"):
name = name[0:name.rfind(f" ({party})")]
return name
def extract_speakers(data):
speakers = []
for viddata in data['videodata']:
for speaker in viddata['speakers']:
speaker['text'] = fix_speaker_name(speaker['text'], speaker['party'])
speakers.append(speaker)
return speakers
#print(data['videodata'][0])
speakers = extract_speakers(data)
viddata_get_streams(data)
viddata_get_streams(data, False)
from pathlib import Path
import glob
urls = []
for f in glob.glob('/Users/joregan/riksdag/riksdag-api-out/[GH]*'):
fpath = Path(f)
if not fpath.is_file():
continue
with open(f) as inf:
data = json.load(inf)
for url in viddata_get_streams(data):
urls.append(url)
with open('/Users/joregan/riksdag/riksdag-api-out/video-urls.txt', 'w') as outf:
for url in urls:
outf.write(url + "\n")