%pip install spacy_stanza
import stanza
import spacy_stanza

stanza.download("en")
nlp = spacy_stanza.load_pipeline("en")

doc = nlp("Barack Obama was born in Hawaii. He was elected president in 2008.")
for token in doc:
    print(token.text, token.lemma_, token.pos_, token.dep_, token.ent_type_)
print(doc.ents)
stanza.download("sv")
nlp_sv = spacy_stanza.load_pipeline("sv")
SENTS = [
"dem som går igenom en subventionerad anställning",
"dem som har haft en subventionerad anställning",

#H101UbU5_0028
"tidsanvändning så framkommer",
"tidsanvändning framkommer",

"arbetsuppgifter har tats bort",
"arbetsuppgifter tagits bort",

"administrativa uppgifterna har eller har ökat",
"administrativa uppgifterna ökat",

# more fun:
"Och det tror jag inte det råder någon oenighet om här.",
"Och jag tror inte att det här råder någon oenighet om det."
]
res = []
for sent in SENTS:
    doc = nlp_sv(sent)
    res.append(doc.to_json())
    print(doc)
dem som går igenom en subventionerad anställning
dem som har haft en subventionerad anställning
tidsanvändning så framkommer
tidsanvändning framkommer
arbetsuppgifter har tats bort
arbetsuppgifter tagits bort
administrativa uppgifterna har eller har ökat
administrativa uppgifterna ökat
Och det tror jag inte det råder någon oenighet om här.
Och jag tror inte att det här råder någon oenighet om det.
res
[{'text': 'dem som går igenom en subventionerad anställning',
  'ents': [],
  'sents': [{'start': 0, 'end': 48}],
  'tokens': [{'id': 0,
    'start': 0,
    'end': 3,
    'tag': 'PN|UTR/NEU|PLU|DEF|OBJ',
    'pos': 'PRON',
    'morph': 'Case=Acc|Definite=Def|Number=Plur|PronType=Prs',
    'lemma': 'de',
    'dep': 'root',
    'head': 0},
   {'id': 1,
    'start': 4,
    'end': 7,
    'tag': 'HP|-|-|-',
    'pos': 'PRON',
    'morph': 'PronType=Rel',
    'lemma': 'som',
    'dep': 'nsubj',
    'head': 2},
   {'id': 2,
    'start': 8,
    'end': 11,
    'tag': 'VB|PRS|AKT',
    'pos': 'VERB',
    'morph': 'Mood=Ind|Tense=Pres|VerbForm=Fin|Voice=Act',
    'lemma': 'gå',
    'dep': 'acl:relcl',
    'head': 0},
   {'id': 3,
    'start': 12,
    'end': 18,
    'tag': 'PL',
    'pos': 'ADP',
    'morph': '',
    'lemma': 'igenom',
    'dep': 'compound:prt',
    'head': 2},
   {'id': 4,
    'start': 19,
    'end': 21,
    'tag': 'DT|UTR|SIN|IND',
    'pos': 'DET',
    'morph': 'Definite=Ind|Gender=Com|Number=Sing|PronType=Art',
    'lemma': 'en',
    'dep': 'det',
    'head': 6},
   {'id': 5,
    'start': 22,
    'end': 36,
    'tag': 'PC|PRF|UTR|SIN|IND|NOM',
    'pos': 'ADJ',
    'morph': 'Case=Nom|Definite=Ind|Gender=Com|Number=Sing|Tense=Past|VerbForm=Part',
    'lemma': 'subventionerad',
    'dep': 'amod',
    'head': 6},
   {'id': 6,
    'start': 37,
    'end': 48,
    'tag': 'NN|UTR|SIN|IND|NOM',
    'pos': 'NOUN',
    'morph': 'Case=Nom|Definite=Ind|Gender=Com|Number=Sing',
    'lemma': 'anställning',
    'dep': 'obj',
    'head': 2}]},
 {'text': 'dem som har haft en subventionerad anställning',
  'ents': [],
  'sents': [{'start': 0, 'end': 46}],
  'tokens': [{'id': 0,
    'start': 0,
    'end': 3,
    'tag': 'PN|UTR/NEU|PLU|DEF|OBJ',
    'pos': 'PRON',
    'morph': 'Case=Acc|Definite=Def|Number=Plur|PronType=Prs',
    'lemma': 'de',
    'dep': 'root',
    'head': 0},
   {'id': 1,
    'start': 4,
    'end': 7,
    'tag': 'HP|-|-|-',
    'pos': 'PRON',
    'morph': 'PronType=Rel',
    'lemma': 'som',
    'dep': 'nsubj',
    'head': 3},
   {'id': 2,
    'start': 8,
    'end': 11,
    'tag': 'VB|PRS|AKT',
    'pos': 'AUX',
    'morph': 'Mood=Ind|Tense=Pres|VerbForm=Fin|Voice=Act',
    'lemma': 'ha',
    'dep': 'aux',
    'head': 3},
   {'id': 3,
    'start': 12,
    'end': 16,
    'tag': 'VB|SUP|AKT',
    'pos': 'VERB',
    'morph': 'VerbForm=Sup|Voice=Act',
    'lemma': 'ha',
    'dep': 'acl:relcl',
    'head': 0},
   {'id': 4,
    'start': 17,
    'end': 19,
    'tag': 'DT|UTR|SIN|IND',
    'pos': 'DET',
    'morph': 'Definite=Ind|Gender=Com|Number=Sing|PronType=Art',
    'lemma': 'en',
    'dep': 'det',
    'head': 6},
   {'id': 5,
    'start': 20,
    'end': 34,
    'tag': 'PC|PRF|UTR|SIN|IND|NOM',
    'pos': 'ADJ',
    'morph': 'Case=Nom|Definite=Ind|Gender=Com|Number=Sing|Tense=Past|VerbForm=Part',
    'lemma': 'subventionerad',
    'dep': 'amod',
    'head': 6},
   {'id': 6,
    'start': 35,
    'end': 46,
    'tag': 'NN|UTR|SIN|IND|NOM',
    'pos': 'NOUN',
    'morph': 'Case=Nom|Definite=Ind|Gender=Com|Number=Sing',
    'lemma': 'anställning',
    'dep': 'obj',
    'head': 3}]},
 {'text': 'tidsanvändning så framkommer',
  'ents': [],
  'sents': [{'start': 0, 'end': 28}],
  'tokens': [{'id': 0,
    'start': 0,
    'end': 14,
    'tag': 'NN|UTR|SIN|IND|NOM',
    'pos': 'NOUN',
    'morph': 'Case=Nom|Definite=Ind|Gender=Com|Number=Sing',
    'lemma': 'tidsanvändning',
    'dep': 'nsubj',
    'head': 2},
   {'id': 1,
    'start': 15,
    'end': 17,
    'tag': 'AB',
    'pos': 'ADV',
    'morph': '',
    'lemma': 'så',
    'dep': 'advmod',
    'head': 2},
   {'id': 2,
    'start': 18,
    'end': 28,
    'tag': 'VB|PRS|AKT',
    'pos': 'VERB',
    'morph': 'Mood=Ind|Tense=Pres|VerbForm=Fin|Voice=Act',
    'lemma': 'framkomma',
    'dep': 'root',
    'head': 2}]},
 {'text': 'tidsanvändning framkommer',
  'ents': [],
  'sents': [{'start': 0, 'end': 25}],
  'tokens': [{'id': 0,
    'start': 0,
    'end': 14,
    'tag': 'NN|UTR|SIN|IND|NOM',
    'pos': 'NOUN',
    'morph': 'Case=Nom|Definite=Ind|Gender=Com|Number=Sing',
    'lemma': 'tidsanvändning',
    'dep': 'nsubj',
    'head': 1},
   {'id': 1,
    'start': 15,
    'end': 25,
    'tag': 'VB|PRS|AKT',
    'pos': 'VERB',
    'morph': 'Mood=Ind|Tense=Pres|VerbForm=Fin|Voice=Act',
    'lemma': 'framkomma',
    'dep': 'root',
    'head': 1}]},
 {'text': 'arbetsuppgifter har tats bort',
  'ents': [],
  'sents': [{'start': 0, 'end': 29}],
  'tokens': [{'id': 0,
    'start': 0,
    'end': 15,
    'tag': 'NN|UTR|PLU|IND|NOM',
    'pos': 'NOUN',
    'morph': 'Case=Nom|Definite=Ind|Gender=Com|Number=Plur',
    'lemma': 'arbetsuppgift',
    'dep': 'nsubj:pass',
    'head': 2},
   {'id': 1,
    'start': 16,
    'end': 19,
    'tag': 'VB|PRS|AKT',
    'pos': 'AUX',
    'morph': 'Mood=Ind|Tense=Pres|VerbForm=Fin|Voice=Act',
    'lemma': 'ha',
    'dep': 'aux',
    'head': 2},
   {'id': 2,
    'start': 20,
    'end': 24,
    'tag': 'VB|SUP|SFO',
    'pos': 'VERB',
    'morph': 'VerbForm=Sup|Voice=Pass',
    'lemma': 'ta',
    'dep': 'root',
    'head': 2},
   {'id': 3,
    'start': 25,
    'end': 29,
    'tag': 'PL',
    'pos': 'ADV',
    'morph': '',
    'lemma': 'bort',
    'dep': 'compound:prt',
    'head': 2}]},
 {'text': 'arbetsuppgifter tagits bort',
  'ents': [],
  'sents': [{'start': 0, 'end': 27}],
  'tokens': [{'id': 0,
    'start': 0,
    'end': 15,
    'tag': 'NN|UTR|PLU|IND|NOM',
    'pos': 'NOUN',
    'morph': 'Case=Nom|Definite=Ind|Gender=Com|Number=Plur',
    'lemma': 'arbetsuppgift',
    'dep': 'nsubj:pass',
    'head': 1},
   {'id': 1,
    'start': 16,
    'end': 22,
    'tag': 'VB|SUP|SFO',
    'pos': 'VERB',
    'morph': 'VerbForm=Sup|Voice=Pass',
    'lemma': 'ta',
    'dep': 'root',
    'head': 1},
   {'id': 2,
    'start': 23,
    'end': 27,
    'tag': 'PL',
    'pos': 'ADV',
    'morph': '',
    'lemma': 'bort',
    'dep': 'compound:prt',
    'head': 1}]},
 {'text': 'administrativa uppgifterna har eller har ökat',
  'ents': [],
  'sents': [{'start': 0, 'end': 45}],
  'tokens': [{'id': 0,
    'start': 0,
    'end': 14,
    'tag': 'JJ|POS|UTR/NEU|PLU|IND/DEF|NOM',
    'pos': 'ADJ',
    'morph': 'Case=Nom|Degree=Pos|Number=Plur',
    'lemma': 'administrativ',
    'dep': 'amod',
    'head': 1},
   {'id': 1,
    'start': 15,
    'end': 26,
    'tag': 'NN|UTR|PLU|DEF|NOM',
    'pos': 'NOUN',
    'morph': 'Case=Nom|Definite=Def|Gender=Com|Number=Plur',
    'lemma': 'uppgift',
    'dep': 'nsubj',
    'head': 2},
   {'id': 2,
    'start': 27,
    'end': 30,
    'tag': 'VB|PRS|AKT',
    'pos': 'VERB',
    'morph': 'Mood=Ind|Tense=Pres|VerbForm=Fin|Voice=Act',
    'lemma': 'ha',
    'dep': 'root',
    'head': 2},
   {'id': 3,
    'start': 31,
    'end': 36,
    'tag': 'KN',
    'pos': 'CCONJ',
    'morph': '',
    'lemma': 'eller',
    'dep': 'cc',
    'head': 5},
   {'id': 4,
    'start': 37,
    'end': 40,
    'tag': 'VB|PRS|AKT',
    'pos': 'AUX',
    'morph': 'Mood=Ind|Tense=Pres|VerbForm=Fin|Voice=Act',
    'lemma': 'ha',
    'dep': 'aux',
    'head': 5},
   {'id': 5,
    'start': 41,
    'end': 45,
    'tag': 'VB|SUP|AKT',
    'pos': 'VERB',
    'morph': 'VerbForm=Sup|Voice=Act',
    'lemma': 'öka',
    'dep': 'conj',
    'head': 2}]},
 {'text': 'administrativa uppgifterna ökat',
  'ents': [],
  'sents': [{'start': 0, 'end': 31}],
  'tokens': [{'id': 0,
    'start': 0,
    'end': 14,
    'tag': 'JJ|POS|UTR/NEU|PLU|IND/DEF|NOM',
    'pos': 'ADJ',
    'morph': 'Case=Nom|Degree=Pos|Number=Plur',
    'lemma': 'administrativ',
    'dep': 'amod',
    'head': 1},
   {'id': 1,
    'start': 15,
    'end': 26,
    'tag': 'NN|UTR|PLU|DEF|NOM',
    'pos': 'NOUN',
    'morph': 'Case=Nom|Definite=Def|Gender=Com|Number=Plur',
    'lemma': 'uppgift',
    'dep': 'nsubj',
    'head': 2},
   {'id': 2,
    'start': 27,
    'end': 31,
    'tag': 'VB|SUP|AKT',
    'pos': 'VERB',
    'morph': 'VerbForm=Sup|Voice=Act',
    'lemma': 'öka',
    'dep': 'root',
    'head': 2}]},
 {'text': 'Och det tror jag inte det råder någon oenighet om här.',
  'ents': [],
  'sents': [{'start': 0, 'end': 54}],
  'tokens': [{'id': 0,
    'start': 0,
    'end': 3,
    'tag': 'KN',
    'pos': 'CCONJ',
    'morph': '',
    'lemma': 'och',
    'dep': 'cc',
    'head': 2},
   {'id': 1,
    'start': 4,
    'end': 7,
    'tag': 'PN|NEU|SIN|DEF|SUB/OBJ',
    'pos': 'PRON',
    'morph': 'Definite=Def|Gender=Neut|Number=Sing|PronType=Prs',
    'lemma': 'den',
    'dep': 'obj',
    'head': 2},
   {'id': 2,
    'start': 8,
    'end': 12,
    'tag': 'VB|PRS|AKT',
    'pos': 'VERB',
    'morph': 'Mood=Ind|Tense=Pres|VerbForm=Fin|Voice=Act',
    'lemma': 'tro',
    'dep': 'root',
    'head': 2},
   {'id': 3,
    'start': 13,
    'end': 16,
    'tag': 'PN|UTR|SIN|DEF|SUB',
    'pos': 'PRON',
    'morph': 'Case=Nom|Definite=Def|Gender=Com|Number=Sing|PronType=Prs',
    'lemma': 'jag',
    'dep': 'nsubj',
    'head': 2},
   {'id': 4,
    'start': 17,
    'end': 21,
    'tag': 'AB',
    'pos': 'PART',
    'morph': 'Polarity=Neg',
    'lemma': 'inte',
    'dep': 'advmod',
    'head': 2},
   {'id': 5,
    'start': 22,
    'end': 25,
    'tag': 'PN|NEU|SIN|DEF|SUB/OBJ',
    'pos': 'PRON',
    'morph': 'Definite=Def|Gender=Neut|Number=Sing|PronType=Prs',
    'lemma': 'den',
    'dep': 'expl',
    'head': 6},
   {'id': 6,
    'start': 26,
    'end': 31,
    'tag': 'VB|PRS|AKT',
    'pos': 'VERB',
    'morph': 'Mood=Ind|Tense=Pres|VerbForm=Fin|Voice=Act',
    'lemma': 'råda',
    'dep': 'ccomp',
    'head': 2},
   {'id': 7,
    'start': 32,
    'end': 37,
    'tag': 'DT|UTR|SIN|IND',
    'pos': 'DET',
    'morph': 'Definite=Ind|Gender=Com|Number=Sing|PronType=Ind',
    'lemma': 'någon',
    'dep': 'det',
    'head': 8},
   {'id': 8,
    'start': 38,
    'end': 46,
    'tag': 'NN|UTR|SIN|IND|NOM',
    'pos': 'NOUN',
    'morph': 'Case=Nom|Definite=Ind|Gender=Com|Number=Sing',
    'lemma': 'oenighet',
    'dep': 'nsubj',
    'head': 6},
   {'id': 9,
    'start': 47,
    'end': 49,
    'tag': 'PP',
    'pos': 'ADP',
    'morph': '',
    'lemma': 'om',
    'dep': 'advmod',
    'head': 6},
   {'id': 10,
    'start': 50,
    'end': 53,
    'tag': 'AB',
    'pos': 'ADV',
    'morph': '',
    'lemma': 'här',
    'dep': 'fixed',
    'head': 9},
   {'id': 11,
    'start': 53,
    'end': 54,
    'tag': 'MAD',
    'pos': 'PUNCT',
    'morph': '',
    'lemma': '.',
    'dep': 'punct',
    'head': 2}]},
 {'text': 'Och jag tror inte att det här råder någon oenighet om det.',
  'ents': [],
  'sents': [{'start': 0, 'end': 58}],
  'tokens': [{'id': 0,
    'start': 0,
    'end': 3,
    'tag': 'KN',
    'pos': 'CCONJ',
    'morph': '',
    'lemma': 'och',
    'dep': 'cc',
    'head': 2},
   {'id': 1,
    'start': 4,
    'end': 7,
    'tag': 'PN|UTR|SIN|DEF|SUB',
    'pos': 'PRON',
    'morph': 'Case=Nom|Definite=Def|Gender=Com|Number=Sing|PronType=Prs',
    'lemma': 'jag',
    'dep': 'nsubj',
    'head': 2},
   {'id': 2,
    'start': 8,
    'end': 12,
    'tag': 'VB|PRS|AKT',
    'pos': 'VERB',
    'morph': 'Mood=Ind|Tense=Pres|VerbForm=Fin|Voice=Act',
    'lemma': 'tro',
    'dep': 'root',
    'head': 2},
   {'id': 3,
    'start': 13,
    'end': 17,
    'tag': 'AB',
    'pos': 'PART',
    'morph': 'Polarity=Neg',
    'lemma': 'inte',
    'dep': 'advmod',
    'head': 2},
   {'id': 4,
    'start': 18,
    'end': 21,
    'tag': 'SN',
    'pos': 'SCONJ',
    'morph': '',
    'lemma': 'att',
    'dep': 'mark',
    'head': 7},
   {'id': 5,
    'start': 22,
    'end': 25,
    'tag': 'PN|NEU|SIN|DEF|SUB/OBJ',
    'pos': 'PRON',
    'morph': 'Definite=Def|Gender=Neut|Number=Sing|PronType=Prs',
    'lemma': 'den',
    'dep': 'expl',
    'head': 7},
   {'id': 6,
    'start': 26,
    'end': 29,
    'tag': 'AB',
    'pos': 'ADV',
    'morph': '',
    'lemma': 'här',
    'dep': 'fixed',
    'head': 5},
   {'id': 7,
    'start': 30,
    'end': 35,
    'tag': 'VB|PRS|AKT',
    'pos': 'VERB',
    'morph': 'Mood=Ind|Tense=Pres|VerbForm=Fin|Voice=Act',
    'lemma': 'råda',
    'dep': 'ccomp',
    'head': 2},
   {'id': 8,
    'start': 36,
    'end': 41,
    'tag': 'DT|UTR|SIN|IND',
    'pos': 'DET',
    'morph': 'Definite=Ind|Gender=Com|Number=Sing|PronType=Ind',
    'lemma': 'någon',
    'dep': 'det',
    'head': 9},
   {'id': 9,
    'start': 42,
    'end': 50,
    'tag': 'NN|UTR|SIN|IND|NOM',
    'pos': 'NOUN',
    'morph': 'Case=Nom|Definite=Ind|Gender=Com|Number=Sing',
    'lemma': 'oenighet',
    'dep': 'nsubj',
    'head': 7},
   {'id': 10,
    'start': 51,
    'end': 53,
    'tag': 'PP',
    'pos': 'ADP',
    'morph': '',
    'lemma': 'om',
    'dep': 'case',
    'head': 11},
   {'id': 11,
    'start': 54,
    'end': 57,
    'tag': 'PN|NEU|SIN|DEF|SUB/OBJ',
    'pos': 'PRON',
    'morph': 'Definite=Def|Gender=Neut|Number=Sing|PronType=Prs',
    'lemma': 'den',
    'dep': 'obl',
    'head': 7},
   {'id': 12,
    'start': 57,
    'end': 58,
    'tag': 'MAD',
    'pos': 'PUNCT',
    'morph': '',
    'lemma': '.',
    'dep': 'punct',
    'head': 2}]}]
import stanza
from stanza.utils.conll import CoNLL

nlp = stanza.Pipeline('sv', processors='tokenize, pos, lemma, depparse')
doc = nlp('Du har mitt ord.') # doc is class Document
CoNLL.write_doc2conll(doc, "doc2.conllu")
INFO:stanza:Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES
INFO:stanza:Loading these models for language: sv (Swedish):
==================================
| Processor | Package            |
----------------------------------
| tokenize  | talbanken          |
| pos       | talbanken_charlm   |
| lemma     | talbanken_nocharlm |
| depparse  | talbanken_charlm   |
==================================

INFO:stanza:Using device: cpu
INFO:stanza:Loading: tokenize
INFO:stanza:Loading: pos
INFO:stanza:Loading: lemma
/usr/local/lib/python3.10/dist-packages/stanza/models/lemma/trainer.py:227: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
  checkpoint = torch.load(filename, lambda storage, loc: storage)
INFO:stanza:Loading: depparse
/usr/local/lib/python3.10/dist-packages/stanza/models/depparse/trainer.py:103: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
  checkpoint = torch.load(filename, lambda storage, loc: storage)
INFO:stanza:Done loading processors!
!cat doc2.conllu
# text = Du har mitt ord.
# sent_id = 0
1	Du	du	PRON	PN|UTR|SIN|DEF|SUB	Case=Nom|Definite=Def|Gender=Com|Number=Sing|PronType=Prs	2	nsubj	_	start_char=0|end_char=2
2	har	ha	VERB	VB|PRS|AKT	Mood=Ind|Tense=Pres|VerbForm=Fin|Voice=Act	0	root	_	start_char=3|end_char=6
3	mitt	jag	PRON	PS|NEU|SIN|DEF	Definite=Def|Gender=Neut|Number=Sing|Poss=Yes|PronType=Prs	4	nmod:poss	_	start_char=7|end_char=11
4	ord	ord	NOUN	NN|NEU|SIN|IND|NOM	Case=Nom|Definite=Ind|Gender=Neut|Number=Sing	2	obj	_	start_char=12|end_char=15
5	.	.	PUNCT	MAD	_	2	punct	_	start_char=15|end_char=16

import stanza
from stanza.utils.conll import CoNLL

nlp_ga = stanza.Pipeline('ga', processors='tokenize, pos, lemma, depparse')
doc_ga = nlp_ga("áthas a bheith air")
CoNLL.write_doc2conll(doc_ga, "docga.conllu")
INFO:stanza:Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES
INFO:stanza:Loading these models for language: ga (Irish):
============================
| Processor | Package      |
----------------------------
| tokenize  | idt          |
| pos       | idt_nocharlm |
| lemma     | idt_nocharlm |
| depparse  | idt_nocharlm |
============================

INFO:stanza:Using device: cpu
INFO:stanza:Loading: tokenize
INFO:stanza:Loading: pos
INFO:stanza:Loading: lemma
INFO:stanza:Loading: depparse
INFO:stanza:Done loading processors!
!cat docga.conllu
# text = áthas a bheith air
# sent_id = 0
1	áthas	áthas	NOUN	Noun	Case=Nom|Gender=Masc|Number=Sing	3	obj	_	start_char=0|end_char=5
2	a	a	PART	Inf	PartType=Inf	3	mark	_	start_char=6|end_char=7
3	bheith	bheith	NOUN	Noun	Form=Len|VerbForm=Inf	0	root	_	start_char=8|end_char=14
4	air	ar	ADP	Prep	Gender=Masc|Number=Sing|Person=3	3	obl:prep	_	start_char=15|end_char=18

SAMPLE = """26.—AN GHRÁINNEÓG AGUS NA HAITHREACHA NIMHE.

Tháinig an ghráinneóg go doras pluaise na n-aithreach nimhe oíche sheaca, agus d’ iar sí bheith istigh orthu. Do ligeadar isteach í ó bhí an oíche chomh fuar. Shocruig sí í fein ar lár an urláir agus dhein sí liarthóid di fein, agus shín sí na deilgne amach mórthimpeall uirthi féin, i dtreo ná féadfadh aon duine dul na goire. Ní fhéadadh athair nimhe gabháil tháirse, síos ná suas, ná prioctí é.

“Féach!” ar siad, “tá an phluais seo beag ár ndóithin againne féin. Ní foláir duitse bheith ag imeacht.”

“Ambasa,” ar sise, ”’an té leis gur cúmhang fágadh!’ Níl aon locht agamsa ar an áit seo.”

An Múineadh.

“Ní haitheantas go haontígheas.”

Cheap na haithreacha nimhe ná bheadh aon bhac orthu an ghráinneóg do chur amach pé uair ba mhaith leo é, nó fiacal nimhe do chur inti. Níor chuímhnígheadar riamh ar na deilgníbh."""
SAMPLE_ORIG = """26.—AN ĠRÁINNEÓG AGUS NA H-AIṪREAĊA NÍṀE.

Ṫáinig an ġráinneóg go dorus pluaise na n-aiṫreaċ níṁe oíḋċe ṡeaca, agus d’ iar sí ḃeiṫ istiġ orṫa. Do leigeadar isteaċ í ó ḃí an oíḋċe ċóṁ fuar. Ṡocruig sí í fein ar lár an úrláir agus ḋein sí liarṫóid dí fein, agus ṡín sí na deilgne amaċ mór-ṫímpal uirṫi féin, i dtreó ná féadfaḋ aoinne dul na goire. Ní ḟéadaḋ aṫair níṁe gaḃáil ṫáirse, síos ná suas, ná prioctí é.

“Feuċ!” ar siad, “tá an ṗluais seo beag ár ndóiṫin againne féin. Ní foláir duitse ḃeiṫ ag imṫeaċt.”

“Ambasa,” ar sise, “‘an t-é leis gur cúṁang fágaḋ!’ Ní’l aon loċt agamsa ar an áit seo.”

An Múineaḋ.

“Ní h-aiṫeantas go h-aontíġeas.”

Ċeap na h-aiṫreaċa níṁe ná ḃéaḋ aon ḃac orṫa an ġráinneóg do ċur amaċ pé uair ba ṁaiṫ leo é, nó fiacal níṁe do ċur inti. Níor ċuíṁníġeadar riaṁ ar na deilgníḃ."""
SAMPLE_CAI = """26. — AN GHRÁINNEOG AGUS NA NATHRACHA NIMHE.
Tháinig an ghráinneog go doras pluaise na nathrach nimhe oíche sheaca, agus d'iarr sí bheith istigh orthu. Ligeadar isteach í ó bhí an oíche chomh fuar. Shocraigh sí í féin ar lár an urláir agus rinne sí liarthóid di féin, agus shín sí na deilgne amach mórthimpeall uirthi féin, i dtreo nach bhféadfadh aon duine dul ina gaire. Ní fhéadadh nathair nimhe gabháil thairsti, síos ná suas, ná prioctí é.
“Féach!” ar siad, “tá an phluais seo beag ár ndóthain againne féin. Ní foláir duitse bheith ag imeacht.”
“Ambaiste,” ar sise, “‘an té leis gur cúng fágadh!’ Níl aon locht agamsa ar an áit seo.”
An Múineadh.
“Ní haitheantas go haontíos.”
Cheap na nathracha nimhe ná beadh aon bhac orthu an ghráinneog a chur amach pé uair ba mhaith leo é, nó fiacla nimhe a chur inti. Níor chuimhníodar riamh ar na deilgní."""
!pip install stanza
import stanza
from stanza.utils.conll import CoNLL

nlp_pl = stanza.Pipeline('pl', processors='tokenize, pos, lemma, depparse')
doc_pl = nlp_pl("Wolne Miasto Gdańsk (niem. Freie Stadt Danzig) – istniejące w okresie międzywojennym autonomiczne miasto-państwo, pod ochroną Ligi Narodów.")
CoNLL.write_doc2conll(doc_pl, "docpl.conllu")
INFO:stanza:Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES
INFO:stanza:Downloaded file to /root/stanza_resources/resources.json
WARNING:stanza:Language pl package default expects mwt, which has been added
INFO:stanza:Loading these models for language: pl (Polish):
============================
| Processor | Package      |
----------------------------
| tokenize  | pdb          |
| mwt       | pdb          |
| pos       | pdb_charlm   |
| lemma     | pdb_nocharlm |
| depparse  | pdb_charlm   |
============================

INFO:stanza:Using device: cpu
INFO:stanza:Loading: tokenize
/usr/local/lib/python3.10/dist-packages/stanza/models/tokenization/trainer.py:82: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
  checkpoint = torch.load(filename, lambda storage, loc: storage)
INFO:stanza:Loading: mwt
/usr/local/lib/python3.10/dist-packages/stanza/models/mwt/trainer.py:201: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
  checkpoint = torch.load(filename, lambda storage, loc: storage)
INFO:stanza:Loading: pos
/usr/local/lib/python3.10/dist-packages/stanza/models/pos/trainer.py:139: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
  checkpoint = torch.load(filename, lambda storage, loc: storage)
/usr/local/lib/python3.10/dist-packages/stanza/models/common/pretrain.py:56: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
  data = torch.load(self.filename, lambda storage, loc: storage)
/usr/local/lib/python3.10/dist-packages/stanza/models/common/char_model.py:271: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
  state = torch.load(filename, lambda storage, loc: storage)
INFO:stanza:Loading: lemma
/usr/local/lib/python3.10/dist-packages/stanza/models/lemma/trainer.py:239: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
  checkpoint = torch.load(filename, lambda storage, loc: storage)
INFO:stanza:Loading: depparse
/usr/local/lib/python3.10/dist-packages/stanza/models/depparse/trainer.py:194: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
  checkpoint = torch.load(filename, lambda storage, loc: storage)
INFO:stanza:Done loading processors!
!cat docpl.conllu
# text = Wolne Miasto Gdańsk (niem. Freie Stadt Danzig) – istniejące w okresie międzywojennym autonomiczne miasto-państwo, pod ochroną Ligi Narodów.
# sent_id = 0
1	Wolne	wolny	ADJ	adj:sg:nom:n:pos	Case=Nom|Degree=Pos|Gender=Neut|Number=Sing	2	amod:flat	_	start_char=0|end_char=5
2	Miasto	miasto	NOUN	subst:sg:nom:n:ncol	Case=Nom|Gender=Neut|Number=Sing	0	root	_	start_char=6|end_char=12
3	Gdańsk	Gdańsk	PROPN	subst:sg:nom:m3	Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing	2	flat	_	start_char=13|end_char=19|SpacesAfter=\u00A0
4	(	(	PUNCT	interp	PunctSide=Ini|PunctType=Brck	5	punct	_	start_char=20|end_char=21|SpaceAfter=No
5	niem	niem	X	brev:pun	Abbr=Yes|Pun=Yes	2	nmod	_	start_char=21|end_char=25|SpaceAfter=No
6	.	.	PUNCT	interp	PunctType=Peri	5	punct	_	start_char=25|end_char=26|SpacesAfter=\u00A0
7	Freie	Frea	PROPN	subst:sg:nom:n:ncol	Case=Nom|Gender=Neut|Number=Sing	5	flat	_	start_char=27|end_char=32
8	Stadt	Stadt	PROPN	subst:sg:nom:n:ncol	Case=Nom|Gender=Neut|Number=Sing	7	flat	_	start_char=33|end_char=38
9	Danzig	Danzig	PROPN	subst:sg:nom:n:ncol	Case=Nom|Gender=Neut|Number=Sing	8	flat	_	start_char=39|end_char=45|SpaceAfter=No
10	)	)	PUNCT	interp	PunctSide=Fin|PunctType=Brck	5	punct	_	start_char=45|end_char=46
11	–	–	PUNCT	interp	PunctType=Dash	17	punct	_	start_char=47|end_char=48
12	istniejące	istnieć	ADJ	pact:sg:nom:n:imperf:aff	Aspect=Imp|Case=Nom|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Act	17	acl	_	start_char=49|end_char=59
13	w	w	ADP	prep:loc:nwok	AdpType=Prep|Variant=Short	14	case	_	start_char=60|end_char=61|SpacesAfter=\u00A0
14	okresie	okres	NOUN	subst:sg:loc:m3	Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing	12	obl	_	start_char=62|end_char=69
15	międzywojennym	międzywojenny	ADJ	adj:sg:loc:m3:pos	Animacy=Inan|Case=Loc|Degree=Pos|Gender=Masc|Number=Sing	14	amod	_	start_char=70|end_char=84|SpacesAfter=\u00A0
16	autonomiczne	autonomiczny	ADJ	adj:sg:nom:n:pos	Case=Nom|Degree=Pos|Gender=Neut|Number=Sing	17	amod	_	start_char=85|end_char=97|SpacesAfter=\u00A0
17	miasto	miasto	NOUN	subst:sg:nom:n:ncol	Case=Nom|Gender=Neut|Number=Sing	2	appos	_	start_char=98|end_char=104|SpaceAfter=No
18	-	-	PUNCT	interp	PunctType=Dash	19	punct	_	start_char=104|end_char=105|SpaceAfter=No
19	państwo	państwo	NOUN	subst:sg:nom:n:ncol	Case=Nom|Gender=Neut|Number=Sing	17	appos	_	start_char=105|end_char=112|SpaceAfter=No
20	,	,	PUNCT	interp	PunctType=Comm	22	punct	_	start_char=112|end_char=113
21	pod	pod	ADP	prep:inst:nwok	AdpType=Prep|Variant=Short	22	case	_	start_char=114|end_char=117
22	ochroną	ochrona	NOUN	subst:sg:inst:f	Case=Ins|Gender=Fem|Number=Sing	17	nmod	_	start_char=118|end_char=125|SpacesAfter=\u00A0
23	Ligi	liga	NOUN	subst:sg:gen:f	Case=Gen|Gender=Fem|Number=Sing	22	nmod:arg	_	start_char=126|end_char=130
24	Narodów	naród	NOUN	subst:pl:gen:m3	Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur	23	nmod:flat	_	start_char=131|end_char=138|SpaceAfter=No
25	.	.	PUNCT	interp	PunctType=Peri	2	punct	_	start_char=138|end_char=139|SpaceAfter=No

doc_pl = nlp_pl("Czy może to tak być? Albo biało, albo czarno. Czerwony jak krasnoludek. Lekki jak piórko. Jakim sposobem się tam znalazłeś? Czy byłeś w domu? Jest tata w domu? My jesteśmy Kaszubami. My jesteśmy Kaszubami.  język kaszubski. Wiem, że ona je teraz obiad. Ona jest Polką. On by nic nie robił, tylko jadł i spał. Chciałem do miasta jechać z rana, alem zaspał. Mam tylko wodę w tej butelce. Nie śpij teraz! Śpiący kot myszy nie łapie. Moniki kot rozbił dzbanek.")
CoNLL.write_doc2conll(doc_pl, "docpl.conllu")
!cat docpl.conllu
# text = Czy może to tak być?
# sent_id = 0
1	Czy	czy	PART	part	PartType=Int	2	advmod	_	start_char=0|end_char=3|SpacesAfter=\u00A0
2	może	móc	VERB	fin:sg:ter:imperf	Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin|Voice=Act	0	root	_	start_char=4|end_char=8|SpacesAfter=\u00A0
3	to	to	PRON	subst:sg:nom:n:ncol	Case=Nom|Gender=Neut|Number=Sing|PronType=Dem	2	nsubj	_	start_char=9|end_char=11|SpacesAfter=\u00A0
4	tak	tak	ADV	adv:pos	Degree=Pos|PronType=Dem	5	advmod	_	start_char=12|end_char=15|SpacesAfter=\u00A0
5	być	być	VERB	inf:imperf	Aspect=Imp|VerbForm=Inf|Voice=Act	2	xcomp	_	start_char=16|end_char=19|SpaceAfter=No
6	?	?	PUNCT	interp	PunctType=Qest	2	punct	_	start_char=19|end_char=20

# text = Albo biało, albo czarno.
# sent_id = 1
1	Albo	albo	CCONJ	conj	_	2	cc	_	start_char=21|end_char=25|SpacesAfter=\u00A0
2	biało	biały	ADJ	adja	Hyph=Yes	0	root	_	start_char=26|end_char=31|SpaceAfter=No
3	,	,	PUNCT	interp	PunctType=Comm	5	punct	_	start_char=31|end_char=32|SpacesAfter=\u00A0
4	albo	albo	CCONJ	conj	_	5	cc	_	start_char=33|end_char=37|SpacesAfter=\u00A0
5	czarno	czarny	ADJ	adja	Hyph=Yes	2	conj	_	start_char=38|end_char=44|SpaceAfter=No
6	.	.	PUNCT	interp	PunctType=Peri	2	punct	_	start_char=44|end_char=45

# text = Czerwony jak krasnoludek.
# sent_id = 2
1	Czerwony	czerwony	ADJ	adj:sg:nom:m2:pos	Animacy=Nhum|Case=Nom|Degree=Pos|Gender=Masc|Number=Sing	0	root	_	start_char=46|end_char=54|SpacesAfter=\u00A0
2	jak	jak	SCONJ	comp	ConjType=Comp	3	mark	_	start_char=55|end_char=58|SpacesAfter=\u00A0
3	krasnoludek	krasnoludka	NOUN	subst:sg:nom:m2	Animacy=Nhum|Case=Nom|Gender=Masc|Number=Sing	1	obl:cmpr	_	start_char=59|end_char=70|SpaceAfter=No
4	.	.	PUNCT	interp	PunctType=Peri	1	punct	_	start_char=70|end_char=71

# text = Lekki jak piórko.
# sent_id = 3
1	Lekki	lekki	ADJ	adj:sg:nom:m3:pos	Animacy=Inan|Case=Nom|Degree=Pos|Gender=Masc|Number=Sing	0	root	_	start_char=72|end_char=77|SpacesAfter=\u00A0
2	jak	jak	SCONJ	comp	ConjType=Comp	3	mark	_	start_char=78|end_char=81|SpacesAfter=\u00A0
3	piórko	piórko	NOUN	subst:sg:nom:n:ncol	Case=Nom|Gender=Neut|Number=Sing	1	obl:cmpr	_	start_char=82|end_char=88|SpaceAfter=No
4	.	.	PUNCT	interp	PunctType=Peri	1	punct	_	start_char=88|end_char=89

# text = Jakim sposobem się tam znalazłeś?
# sent_id = 4
1	Jakim	jaki	DET	adj:sg:inst:m3:pos	Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing|PronType=Int	2	det	_	start_char=90|end_char=95|SpacesAfter=\u00A0
2	sposobem	sposób	NOUN	subst:sg:inst:m3	Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing	5	obl	_	start_char=96|end_char=104|SpacesAfter=\u00A0
3	się	się	PRON	part	PronType=Prs|Reflex=Yes	5	expl:pv	_	start_char=105|end_char=108|SpacesAfter=\u00A0
4	tam	tam	ADV	adv	PronType=Dem	5	advmod	_	start_char=109|end_char=112|SpacesAfter=\u00A0
5-6	znalazłeś	_	_	_	_	_	_	_	start_char=113|end_char=122|SpaceAfter=No
5	znalazł	znaleźć	VERB	praet:sg:m1:perf	Animacy=Hum|Aspect=Perf|Gender=Masc|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Act	0	root	_	start_char=113|end_char=120
6	eś	być	AUX	aglt:sg:sec:imperf:wok	Aspect=Imp|Clitic=Yes|Number=Sing|Person=2|Variant=Long	5	aux:clitic	_	start_char=120|end_char=122
7	?	?	PUNCT	interp	PunctType=Qest	5	punct	_	start_char=122|end_char=123

# text = Czy byłeś w domu?
# sent_id = 5
1	Czy	czy	PART	part	PartType=Int	2	advmod	_	start_char=124|end_char=127|SpacesAfter=\u00A0
2-3	byłeś	_	_	_	_	_	_	_	start_char=128|end_char=133|SpacesAfter=\u00A0
2	był	być	VERB	praet:sg:m1:imperf	Animacy=Hum|Aspect=Imp|Gender=Masc|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Act	0	root	_	start_char=128|end_char=131
3	eś	być	AUX	aglt:sg:sec:imperf:wok	Aspect=Imp|Clitic=Yes|Number=Sing|Person=2|Variant=Long	2	aux:clitic	_	start_char=131|end_char=133
4	w	w	ADP	prep:loc:nwok	AdpType=Prep|Variant=Short	5	case	_	start_char=134|end_char=135|SpacesAfter=\u00A0
5	domu	dom	NOUN	subst:sg:loc:m3	Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing	2	obl	_	start_char=136|end_char=140|SpaceAfter=No
6	?	?	PUNCT	interp	PunctType=Qest	2	punct	_	start_char=140|end_char=141

# text = Jest tata w domu?
# sent_id = 6
1	Jest	być	VERB	fin:sg:ter:imperf	Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin|Voice=Act	0	root	_	start_char=142|end_char=146|SpacesAfter=\u00A0
2	tata	tata	NOUN	subst:sg:nom:m1	Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing	1	nsubj	_	start_char=147|end_char=151|SpacesAfter=\u00A0
3	w	w	ADP	prep:loc:nwok	AdpType=Prep|Variant=Short	4	case	_	start_char=152|end_char=153|SpacesAfter=\u00A0
4	domu	dom	NOUN	subst:sg:loc:m3	Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing	1	obl	_	start_char=154|end_char=158|SpaceAfter=No
5	?	?	PUNCT	interp	PunctType=Qest	1	punct	_	start_char=158|end_char=159

# text = My jesteśmy Kaszubami.
# sent_id = 7
1	My	my	PRON	ppron12:pl:nom:m1:pri	Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur|Person=1|PronType=Prs	3	nsubj	_	start_char=160|end_char=162|SpacesAfter=\u00A0
2	jesteśmy	być	AUX	fin:pl:pri:imperf	Aspect=Imp|Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin|Voice=Act	3	cop	_	start_char=163|end_char=171|SpacesAfter=\u00A0
3	Kaszubami	Kaszuby	PROPN	subst:pl:inst:m1	Animacy=Hum|Case=Ins|Gender=Masc|Number=Plur	0	root	_	start_char=172|end_char=181|SpaceAfter=No
4	.	.	PUNCT	interp	PunctType=Peri	3	punct	_	start_char=181|end_char=182

# text = My jesteśmy Kaszubami.
# sent_id = 8
1	My	my	PRON	ppron12:pl:nom:m1:pri	Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur|Person=1|PronType=Prs	3	nsubj	_	start_char=183|end_char=185|SpacesAfter=\u00A0
2	jesteśmy	być	AUX	fin:pl:pri:imperf	Aspect=Imp|Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin|Voice=Act	3	cop	_	start_char=186|end_char=194|SpacesAfter=\u00A0
3	Kaszubami	Kaszuby	PROPN	subst:pl:inst:m1	Animacy=Hum|Case=Ins|Gender=Masc|Number=Plur	0	root	_	start_char=195|end_char=204|SpaceAfter=No
4	.	.	PUNCT	interp	PunctType=Peri	3	punct	_	start_char=204|end_char=205|SpacesAfter=\s\u00A0

# text = język kaszubski.
# sent_id = 9
1	język	język	NOUN	subst:sg:nom:m3	Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing	0	root	_	start_char=207|end_char=212|SpacesAfter=\u00A0
2	kaszubski	kaszubski	ADJ	adj:sg:nom:m3:pos	Animacy=Inan|Case=Nom|Degree=Pos|Gender=Masc|Number=Sing	1	amod	_	start_char=213|end_char=222|SpaceAfter=No
3	.	.	PUNCT	interp	PunctType=Peri	1	punct	_	start_char=222|end_char=223

# text = Wiem, że ona je teraz obiad.
# sent_id = 10
1	Wiem	wiedzieć	VERB	fin:sg:pri:imperf	Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin|Voice=Act	0	root	_	start_char=224|end_char=228|SpaceAfter=No
2	,	,	PUNCT	interp	PunctType=Comm	7	punct	_	start_char=228|end_char=229|SpacesAfter=\u00A0
3	że	że	SCONJ	comp	_	7	mark	_	start_char=230|end_char=232|SpacesAfter=\u00A0
4	ona	on	PRON	ppron3:sg:nom:f:ter:akc:npraep	Case=Nom|Gender=Fem|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long	7	nsubj	_	start_char=233|end_char=236|SpacesAfter=\u00A0
5	je	on	PRON	ppron3:pl:acc:f:ter:akc:npraep	Case=Acc|Gender=Neut|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long	7	obj	_	start_char=237|end_char=239|SpacesAfter=\u00A0
6	teraz	teraz	ADV	adv	_	7	advmod	_	start_char=240|end_char=245|SpacesAfter=\u00A0
7	obiad	obiad	NOUN	subst:sg:acc:m3	Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing	1	ccomp	_	start_char=246|end_char=251|SpaceAfter=No
8	.	.	PUNCT	interp	PunctType=Peri	1	punct	_	start_char=251|end_char=252

# text = Ona jest Polką.
# sent_id = 11
1	Ona	on	PRON	ppron3:sg:nom:f:ter:akc:npraep	Case=Nom|Gender=Fem|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long	3	nsubj	_	start_char=253|end_char=256|SpacesAfter=\u00A0
2	jest	być	AUX	fin:sg:ter:imperf	Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin|Voice=Act	3	cop	_	start_char=257|end_char=261|SpacesAfter=\u00A0
3	Polką	Polka	PROPN	subst:sg:inst:f	Case=Ins|Gender=Fem|Number=Sing	0	root	_	start_char=262|end_char=267|SpaceAfter=No
4	.	.	PUNCT	interp	PunctType=Peri	3	punct	_	start_char=267|end_char=268

# text = On by nic nie robił, tylko jadł i spał.
# sent_id = 12
1	On	on	PRON	ppron3:sg:nom:m1:ter:akc:npraep	Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long	5	nsubj	_	start_char=269|end_char=271|SpacesAfter=\u00A0
2	by	by	AUX	part	_	5	aux:cnd	_	start_char=272|end_char=274|SpacesAfter=\u00A0
3	nic	nic	PRON	subst:sg:gen:n:ncol	Case=Gen|Gender=Neut|Number=Sing|PronType=Neg	5	obj	_	start_char=275|end_char=278|SpacesAfter=\u00A0
4	nie	nie	PART	part	Polarity=Neg	5	advmod:neg	_	start_char=279|end_char=282|SpacesAfter=\u00A0
5	robił	robić	VERB	praet:sg:m1:imperf	Animacy=Hum|Aspect=Imp|Gender=Masc|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Act	0	root	_	start_char=283|end_char=288|SpaceAfter=No
6	,	,	PUNCT	interp	PunctType=Comm	8	punct	_	start_char=288|end_char=289|SpacesAfter=\u00A0
7	tylko	tylko	CCONJ	conj	_	8	cc	_	start_char=290|end_char=295|SpacesAfter=\u00A0
8	jadł	jeść	VERB	praet:sg:m1:imperf	Animacy=Hum|Aspect=Imp|Gender=Masc|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Act	5	conj	_	start_char=296|end_char=300|SpacesAfter=\u00A0
9	i	i	CCONJ	conj	_	10	cc	_	start_char=301|end_char=302|SpacesAfter=\u00A0
10	spał	spać	VERB	praet:sg:m1:imperf	Animacy=Hum|Aspect=Imp|Gender=Masc|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Act	8	conj	_	start_char=303|end_char=307|SpaceAfter=No
11	.	.	PUNCT	interp	PunctType=Peri	5	punct	_	start_char=307|end_char=308

# text = Chciałem do miasta jechać z rana, alem zaspał.
# sent_id = 13
1-2	Chciałem	_	_	_	_	_	_	_	start_char=309|end_char=317|SpacesAfter=\u00A0
1	Chciał	chcieć	VERB	praet:sg:m1:imperf	Animacy=Hum|Aspect=Imp|Gender=Masc|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Act	0	root	_	start_char=309|end_char=315
2	em	być	AUX	aglt:sg:pri:imperf:wok	Aspect=Imp|Clitic=Yes|Number=Sing|Person=1|Variant=Long	1	aux:clitic	_	start_char=315|end_char=317
3	do	do	ADP	prep:gen	AdpType=Prep	4	case	_	start_char=318|end_char=320|SpacesAfter=\u00A0
4	miasta	miasto	NOUN	subst:sg:gen:n:ncol	Case=Gen|Gender=Neut|Number=Sing	5	obl	_	start_char=321|end_char=327|SpacesAfter=\u00A0
5	jechać	jechać	VERB	inf:imperf	Aspect=Imp|VerbForm=Inf|Voice=Act	1	xcomp	_	start_char=328|end_char=334|SpacesAfter=\u00A0
6	z	z	ADP	prep:gen:nwok	AdpType=Prep|Variant=Short	7	case	_	start_char=335|end_char=336|SpacesAfter=\u00A0
7	rana	rano	NOUN	subst:sg:gen:n:ncol	Case=Gen|Gender=Neut|Number=Sing	5	obl	_	start_char=337|end_char=341|SpaceAfter=No
8	,	,	PUNCT	interp	PunctType=Comm	10	punct	_	start_char=341|end_char=342|SpacesAfter=\u00A0
9	alem	alem	CCONJ	conj	_	10	cc	_	start_char=343|end_char=347|SpacesAfter=\u00A0
10	zaspał	zaspać	VERB	praet:sg:m1:perf	Animacy=Hum|Aspect=Perf|Gender=Masc|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Act	1	conj	_	start_char=348|end_char=354|SpaceAfter=No
11	.	.	PUNCT	interp	PunctType=Peri	1	punct	_	start_char=354|end_char=355

# text = Mam tylko wodę w tej butelce.
# sent_id = 14
1	Mam	mieć	VERB	fin:sg:pri:imperf	Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin|Voice=Act	0	root	_	start_char=356|end_char=359|SpacesAfter=\u00A0
2	tylko	tylko	PART	part	_	3	advmod:emph	_	start_char=360|end_char=365|SpacesAfter=\u00A0
3	wodę	woda	NOUN	subst:sg:acc:f	Case=Acc|Gender=Fem|Number=Sing	1	iobj	_	start_char=366|end_char=370|SpacesAfter=\u00A0
4	w	w	ADP	prep:loc:nwok	AdpType=Prep|Variant=Short	6	case	_	start_char=371|end_char=372|SpacesAfter=\u00A0
5	tej	ten	DET	adj:sg:loc:f:pos	Case=Loc|Gender=Fem|Number=Sing|PronType=Dem	6	det	_	start_char=373|end_char=376|SpacesAfter=\u00A0
6	butelce	butelka	NOUN	subst:sg:loc:f	Case=Loc|Gender=Fem|Number=Sing	1	obl	_	start_char=377|end_char=384|SpaceAfter=No
7	.	.	PUNCT	interp	PunctType=Peri	1	punct	_	start_char=384|end_char=385

# text = Nie śpij teraz!
# sent_id = 15
1	Nie	nie	PART	part	Polarity=Neg	2	advmod:neg	_	start_char=386|end_char=389|SpacesAfter=\u00A0
2	śpij	śpić	VERB	impt:sg:sec:imperf	Aspect=Imp|Mood=Imp|Number=Sing|Person=2|VerbForm=Fin|Voice=Act	0	root	_	start_char=390|end_char=394|SpacesAfter=\u00A0
3	teraz	teraz	ADV	adv	_	2	advmod	_	start_char=395|end_char=400|SpaceAfter=No
4	!	!	PUNCT	interp	PunctType=Excl	2	punct	_	start_char=400|end_char=401

# text = Śpiący kot myszy nie łapie.
# sent_id = 16
1	Śpiący	śpiący	ADJ	adj:sg:nom:m2:pos:aff	Animacy=Nhum|Case=Nom|Degree=Pos|Gender=Masc|Number=Sing	2	acl	_	start_char=402|end_char=408|SpacesAfter=\u00A0
2	kot	kot	NOUN	subst:sg:nom:m2	Animacy=Nhum|Case=Nom|Gender=Masc|Number=Sing	5	nsubj	_	start_char=409|end_char=412|SpacesAfter=\u00A0
3	myszy	mysz	NOUN	subst:sg:gen:f	Case=Gen|Gender=Fem|Number=Sing	2	nmod:arg	_	start_char=413|end_char=418|SpacesAfter=\u00A0
4	nie	nie	PART	part	Polarity=Neg	5	advmod:neg	_	start_char=419|end_char=422|SpacesAfter=\u00A0
5	łapie	łapać	VERB	fin:sg:ter:imperf	Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin|Voice=Act	0	root	_	start_char=423|end_char=428|SpaceAfter=No
6	.	.	PUNCT	interp	PunctType=Peri	5	punct	_	start_char=428|end_char=429

# text = Moniki kot rozbił dzbanek.
# sent_id = 17
1	Moniki	Monika	PROPN	subst:sg:nom:f	Case=Nom|Gender=Fem|Number=Sing	3	nsubj	_	start_char=430|end_char=436|SpacesAfter=\u00A0
2	kot	kot	NOUN	subst:sg:nom:m2	Animacy=Nhum|Case=Nom|Gender=Masc|Number=Sing	1	flat	_	start_char=437|end_char=440|SpacesAfter=\u00A0
3	rozbił	rozbić	VERB	praet:sg:m3:perf	Animacy=Inan|Aspect=Perf|Gender=Masc|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Act	0	root	_	start_char=441|end_char=447|SpacesAfter=\u00A0
4	dzbanek	dzbanek	NOUN	subst:sg:acc:m3	Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing	3	obj	_	start_char=448|end_char=455|SpaceAfter=No
5	.	.	PUNCT	interp	PunctType=Peri	3	punct	_	start_char=455|end_char=456|SpaceAfter=No

nlp_cs = stanza.Pipeline('cs', processors='tokenize, pos, lemma, depparse')
doc_cs = nlp_cs("Moničina kočka rozbila džbán.")

CoNLL.write_doc2conll(doc_cs, "docpl.conllu")
INFO:stanza:Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES
INFO:stanza:Downloaded file to /root/stanza_resources/resources.json
WARNING:stanza:Language cs package default expects mwt, which has been added
INFO:stanza:Loading these models for language: cs (Czech):
============================
| Processor | Package      |
----------------------------
| tokenize  | pdt          |
| mwt       | pdt          |
| pos       | pdt_nocharlm |
| lemma     | pdt_nocharlm |
| depparse  | pdt_nocharlm |
============================

INFO:stanza:Using device: cpu
INFO:stanza:Loading: tokenize
/usr/local/lib/python3.10/dist-packages/stanza/models/tokenization/trainer.py:82: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
  checkpoint = torch.load(filename, lambda storage, loc: storage)
INFO:stanza:Loading: mwt
/usr/local/lib/python3.10/dist-packages/stanza/models/mwt/trainer.py:201: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
  checkpoint = torch.load(filename, lambda storage, loc: storage)
INFO:stanza:Loading: pos
/usr/local/lib/python3.10/dist-packages/stanza/models/pos/trainer.py:139: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
  checkpoint = torch.load(filename, lambda storage, loc: storage)
/usr/local/lib/python3.10/dist-packages/stanza/models/common/pretrain.py:56: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
  data = torch.load(self.filename, lambda storage, loc: storage)
INFO:stanza:Loading: lemma
/usr/local/lib/python3.10/dist-packages/stanza/models/lemma/trainer.py:239: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
  checkpoint = torch.load(filename, lambda storage, loc: storage)
INFO:stanza:Loading: depparse
/usr/local/lib/python3.10/dist-packages/stanza/models/depparse/trainer.py:194: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
  checkpoint = torch.load(filename, lambda storage, loc: storage)
INFO:stanza:Done loading processors!
!cat docpl.conllu
# text = Monikin džbán.
# sent_id = 0
1	Monikin	Monikin	PROPN	NNIS1-----A----	Animacy=Inan|Case=Nom|Gender=Masc|NameType=Oth|Number=Sing|Polarity=Pos	0	root	_	start_char=0|end_char=7
2	džbán	džbán	NOUN	NNIS1-----A----	Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing|Polarity=Pos	1	nmod	_	start_char=8|end_char=13|SpaceAfter=No
3	.	.	PUNCT	Z:-------------	_	1	punct	_	start_char=13|end_char=14|SpaceAfter=No

# generator = UDPipe 2, https://lindat.mff.cuni.cz/services/udpipe
# udpipe_model = czech-pdt-ud-2.15-241121
# sent_id = 1
# text = Moničina kočka rozbila džbán.
1   Moničina    Moničin ADJ AUFS1M--------- Case=Nom|Gender=Fem|Gender[psor]=Masc|NameType=Giv|Number=Sing|Poss=Yes 2   amod    _   TokenRange=0:8
2   kočka   kočka   NOUN    NNFS1-----A---- Case=Nom|Gender=Fem|Number=Sing 3   nsubj   _   TokenRange=9:14
3   rozbila rozbít  VERB    VpQW----R-AAP-- Aspect=Perf|Gender=Fem,Neut|Number=Plur,Sing|Polarity=Pos|Tense=Past|VerbForm=Part|Voice=Act    0   root    _   TokenRange=15:22
4   džbán   džbán   NOUN    NNIS4-----A---- Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing   3   obj _   SpaceAfter=No|TokenRange=23:28
5   .   .   PUNCT   Z:------------- _   3   punct   _   SpaceAfter=No|TokenRange=28:29
# generator = UDPipe 2, https://lindat.mff.cuni.cz/services/udpipe
# udpipe_model = slovak-snk-ud-2.15-241121
# newdoc
# newpar
# sent_id = 1
# text = Moničina mačka rozbila džbán.
1   Moničina    moničin ADJ AFfs1x:r    Case=Nom|Degree=Pos|Gender=Fem|Number=Sing  2   amod    _   TokenRange=0:8
2   mačka   mačka   NOUN    SSfs1   Case=Nom|Gender=Fem|Number=Sing 3   nsubj   _   TokenRange=9:14
3   rozbila rozbiť  VERB    VLdscf+ Aspect=Perf|Gender=Fem|Number=Sing|Polarity=Pos|Tense=Past|VerbForm=Part    0   root    _   TokenRange=15:22
4   džbán   džbán   NOUN    SSis4   Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing   3   obj _   SpaceAfter=No|TokenRange=23:28
5   .   .   PUNCT   Z   _   3   punct   _   SpaceAfter=No|TokenRange=28:29