DICT="https://folkets-lexikon.csc.kth.se/folkets/folkets_sv_en_public.xml"

import requests

req = requests.get(DICT)

assert req.status_code == 200

import xml.etree.ElementTree as ET

tree = ET.fromstring(req.text)

words =  []
for word_elem in tree.findall("word"):
    word = {
        "word": word_elem.attrib["value"],
    }
    for attrib in ["comment", "lang", "class"]:
        if attrib in word_elem.attrib:
            word[attrib] = word_elem.attrib[attrib]
    phon = word_elem.find("phonetic")
    if phon is not None:
        if "soundFile" in phon.attrib:
            word["soundfile"] = phon.attrib["soundFile"]
        word["transcription"] = phon.attrib["value"]
    words.append(word)

CHAR_REPLACE = {
    "à": "0340",
    "é": "0351",
    "Ö": "0326",
    "ö": "0366",
    "Ä": "0304",
    "ä": "0344",
    "Å": "0305",
    "å": "0345",
    "ê": "0352",
    "&#39;": "'",
}

PLAINASCII = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz- "

seen = []
for word in words:
    if "soundfile" in word:
        if word["soundfile"].startswith("http://"):
            if "sound/http://" in word["soundfile"]:
                lindex = word["soundfile"].rfind("http://")
                word["url"] = word["soundfile"][lindex:]
            else:
                word["url"] = word["soundfile"]
            del(word["soundfile"])
            continue
        sf = word["soundfile"].replace(".swf", "")
        for chars in list(sf):
            ascii_tastic = True
            for char in chars:
                if char not in PLAINASCII:
                    if char in CHAR_REPLACE:
                        continue
                    ascii_tastic = False
            if not ascii_tastic:
                if not sf in seen:
                    print(sf)
                seen.append(sf)

chargé-d&#39;affaires
d.v.s.
dvs.
handskas med2
inf246r
inför 050
kommando 050
kvällskvist(en)
os 050
satkär(r)ing
ut 050

MISSING = [
    "arbetsmarknadsinstitut", "becquerel", "inner-",
    "j0344mn-", "j0344tte-", "kanon-", "svin-"
]

for word in words:
    if "soundfile" in word and not "url" in word:
        sf = word["soundfile"]
        sf = sf.replace(".swf", "")
        for repl in CHAR_REPLACE:
            sf = sf.replace(repl, CHAR_REPLACE[repl])
        if sf.startswith(" "):
            sf = sf[1:]
        if sf in MISSING:
            continue
        word["url"] = f"http://lexin.nada.kth.se/sound/{sf}.mp3"

import json
with open("/tmp/folkets-sv-en.json", "w") as outf:
    json.dump(words, outf)

seen = []
with open("/tmp/folkets-sounds.tsv", "w") as outf:
    for word in words:
        if not "url" in word:
            continue
        if "transcription" in word:
            ts = word["transcription"]
        else:
            ts = ""
        line = f"{word['word']}\t{ts}\t{word['url']}"
        if not line in seen:
            outf.write(line + "\n")
            seen.append(line)

Through sheer laziness, the audio was downloaded like this:

cat /tmp/folkets-sounds.tsv | awk -F'\t' '{print $3}' > /tmp/folkets-urls
wget -x -c -i /tmp/folkets-urls -o /tmp/folkets-urls.log