Based on this

Set up field reading

import csv
!pip install pyicu
Requirement already satisfied: pyicu in /usr/local/lib/python3.12/dist-packages (2.15.3)
field_names = [
    "orthography",
    "extended_pos",
    "morphology",
    "decomp",
    "decpos",
    "source",
    "language_code",
    "garbage",
    "domain",
    "abbr_acr",
    "expansion",
    "transliteration1",
    "certainty_trans_1",
    "status_trans_1",
    "language_code_trans_1",
    "transliteration2",
    "certainty_trans_2",
    "status_trans_2",
    "language_code_trans_2",
    "transliteration3",
    "certainty_trans_3",
    "status_trans_3",
    "language_code_trans_3",
    "transliteration4",
    "certainty_trans_4",
    "status_trans_4",
    "language_code_trans_4",
    "auto_gen_variants",
    "set_id",
    "set_name",
    "style_status",
    "inflector_role",
    "lemma",
    "inflection_rule",
    "morph_label",
    "compounder_code",
    "semantic_info",
    "available_field1",
    "available_field2",
    "available_field3",
    "available_field4",
    "available_field5",
    "available_field6",
    "available_field7",
    "available_field8",
    "available_field9",
    "frequency",
    "original_orthography",
    "comment_field",
    "update_info",
    "unique_id"
]

Get data

  1. Swedish2. Danish
  2. Norwegian (Bokmål)
!wget https://www.nb.no/sbfil/leksikalske_databaser/leksikon/sv.leksikon.tar.gz -O /tmp/sv.leksikon.tar.gz
!wget https://www.nb.no/sbfil/leksikalske_databaser/leksikon/da_leksikon.tar.gz -O /tmp/da_leksikon.tar.gz
!wget https://www.nb.no/sbfil/leksikalske_databaser/leksikon/no.leksikon.tar.gz -O /tmp/no.leksikon.tar.gz
--2025-10-24 16:51:27--  https://www.nb.no/sbfil/leksikalske_databaser/leksikon/sv.leksikon.tar.gz
Resolving www.nb.no (www.nb.no)... 158.39.129.53, 2001:700:f01:1071::53
Connecting to www.nb.no (www.nb.no)|158.39.129.53|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 22041470 (21M) [application/octet-stream]
Saving to: ‘/tmp/sv.leksikon.tar.gz’

/tmp/sv.leksikon.ta 100%[===================>]  21.02M  9.38MB/s    in 2.2s    

2025-10-24 16:51:30 (9.38 MB/s) - ‘/tmp/sv.leksikon.tar.gz’ saved [22041470/22041470]

--2025-10-24 16:51:30--  https://www.nb.no/sbfil/leksikalske_databaser/leksikon/da_leksikon.tar.gz
Resolving www.nb.no (www.nb.no)... 158.39.129.53, 2001:700:f01:1071::53
Connecting to www.nb.no (www.nb.no)|158.39.129.53|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 5731447 (5.5M) [application/octet-stream]
Saving to: ‘/tmp/da_leksikon.tar.gz’

/tmp/da_leksikon.ta 100%[===================>]   5.47M  4.57MB/s    in 1.2s    

2025-10-24 16:51:32 (4.57 MB/s) - ‘/tmp/da_leksikon.tar.gz’ saved [5731447/5731447]

--2025-10-24 16:51:32--  https://www.nb.no/sbfil/leksikalske_databaser/leksikon/no.leksikon.tar.gz
Resolving www.nb.no (www.nb.no)... 158.39.129.53, 2001:700:f01:1071::53
Connecting to www.nb.no (www.nb.no)|158.39.129.53|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 24214255 (23M) [application/octet-stream]
Saving to: ‘/tmp/no.leksikon.tar.gz’

/tmp/no.leksikon.ta 100%[===================>]  23.09M  9.03MB/s    in 2.6s    

2025-10-24 16:51:36 (9.03 MB/s) - ‘/tmp/no.leksikon.tar.gz’ saved [24214255/24214255]

import tarfile

data = {}

with tarfile.open("/tmp/sv.leksikon.tar.gz") as tar:
    f = tar.extractfile("NST svensk leksikon/swe030224NST.pron/swe030224NST.pron")
    prondata = f.read()
    data["sv"] = prondata.decode('latin1')
with tarfile.open("/tmp/no.leksikon.tar.gz") as tar:
    f = tar.extractfile("NSTs norske leksikon/nor030224NST.pron/nor030224NST.pron")
    prondata = f.read()
    data["no"] = prondata.decode('latin1')
with tarfile.open("/tmp/da_leksikon.tar.gz") as tar:
    f = tar.extractfile("dan030224NST.pron/dan030224NST.pron")
    prondata = f.read()
    data["da"] = prondata.decode('latin1')

Set up transliterator

TRANSLIT_SV = """
n\` → ɳ ;
s\` → ʂ ;
l\` → ɭ ;
t\` → ʈ ;
d\` → ɖ ;
A → ɑ ;
O → ɔ ;
I → ɪ ;
E \* U → e \u2040 ʊ ;
E → ɛ ;
U → ʊ ;
Y → ʏ ;
2 → ø ;
9 → ø ;
u 0 → ɵ ;
N → ŋ ;
'""' → ² ;
'"' → ˈ ;
\% → ˌ ;
\: → ː ;
\$ → \. ;
g → ɡ ;
s \\\' → ɕ ;
x \\\\ → ɧ ;
\* → \u2040 ;
"""
<>:2: SyntaxWarning: invalid escape sequence '\`'
<>:2: SyntaxWarning: invalid escape sequence '\`'
/tmp/ipython-input-4250576132.py:2: SyntaxWarning: invalid escape sequence '\`'
  n\` → ɳ ;
NST_TRANSLIT = r"""
::XSampa-IPA;

\$ → \. ;
\? → ˀ;
\* → \u2040 ;
"""
DA_TRANSLIT = r"""
\? → ˀ;
\_  → ' ';
::XSampa-IPA;
\* → \u2040 ;
\$ → \. ;
"""
import icu
def transliterator_from_rules(name, rules):
    fromrules = icu.Transliterator.createFromRules(name, rules)
    icu.Transliterator.registerInstance(fromrules)
    return icu.Transliterator.createInstance(name)
swelex_trans = transliterator_from_rules("swelex_trans", TRANSLIT_SV)
nstlex_trans = {}
nstlex_trans["no"] = transliterator_from_rules("nst_trans", NST_TRANSLIT)
nstlex_trans["da"] = transliterator_from_rules("da_trans", DA_TRANSLIT)
assert swelex_trans.transliterate('""bA:n`s`$%ma$man') == "²bɑːɳʂ.ˌma.man"
assert swelex_trans.transliterate('"b9r$mIN$ham') == "ˈbør.mɪŋ.ham"
assert swelex_trans.transliterate('"bI$rU') == "ˈbɪ.rʊ"
assert swelex_trans.transliterate('""bIsp$%go:$d`en') == "²bɪsp.ˌɡoː.ɖen"

assert swelex_trans.transliterate('"x\\A:l') == "ˈɧɑːl"
assert swelex_trans.transliterate("\"s'u:$lens") == "ˈɕuː.lens"
assert swelex_trans.transliterate('a$"lE*U$te$n`a') == 'a.ˈle⁀ʊ.te.ɳa'
assert swelex_trans.transliterate('"fu0l') == 'ˈfɵl'
def collapse_available_fields(data):
    output = []
    for i in range(1, 10):
        if data[f"available_field{i}"] != "":
            output.append(data[f"available_field{i}"])
        del data[f"available_field{i}"]
    data["available_fields"] = output
    return data
def collapse_transliterations(data, transliterator):
    output = []
    for i in range(1, 5):
        if data[f"transliteration{i}"] != "":
            tmp = {}
            tmp["transliteration"] = data[f"transliteration{i}"]
            tmp["ipa"] = transliterator.transliterate(data[f"transliteration{i}"])
            tmp["certainty"] = data[f"certainty_trans_{i}"]
            tmp["status"] = data[f"status_trans_{i}"]
            tmp["language_code"] = data[f"language_code_trans_{i}"]
            output.append(tmp)
        del data[f"transliteration{i}"]
        del data[f"certainty_trans_{i}"]
        del data[f"status_trans_{i}"]
        del data[f"language_code_trans_{i}"]
    data["transliterations"] = output
    return data
import json
import io
with open("svlex.json", "w") as outf:
    swelexf = io.StringIO(data["sv"])
    swelex = csv.DictReader(swelexf, delimiter=';', fieldnames=field_names, quoting=csv.QUOTE_NONE)
    for row in swelex:
        row["decomp"] = [f for f in row["decomp"].split("+") if f != ""]
        row = collapse_available_fields(row)
        row = collapse_transliterations(row, swelex_trans)
        jsonstr = json.dumps(row)
        outf.write(jsonstr + "\n")
data["da"] = data["da"].replace('\r', '')
for lang in ["no", "da"]:
    with open(f"{lang}lex.json", "w", newline='') as outf:
        swelexf = io.StringIO(data[lang])
        swelex = csv.DictReader(swelexf, delimiter=';', fieldnames=field_names, quoting=csv.QUOTE_NONE)
        for row in swelex:
            row["decomp"] = [f for f in row["decomp"].split("+") if f != ""]
            row = collapse_available_fields(row)
            row = collapse_transliterations(row, nstlex_trans[lang])
            jsonstr = json.dumps(row)
            outf.write(jsonstr + "\n")