Using IrishNLP's chunker with NLTK
Convert chunks to a tree
%%capture
!pip install nltk
%%capture
!pip install svgling
from nltk.tree import Tree
The output from the default sentence for the Irish chunker.
sample1 = """
[S
[V Rith ]
[NP siad NP]
[NP an rás NP] S]
"""
buf = []
for tok in sample1.replace("\n", " ").split(" "):
if tok.endswith("]"):
buf.append(")")
else:
buf.append(tok.replace("[", "("))
intree = " ".join(buf)
newt = Tree.fromstring(intree)
newt
sample2 = """
[S
[V Rith rith+Verb+VTI+PastInd+Len+@FMV ]
[NP siad siad+Pron+Pers+3P+Pl+Sbj+@SUBJ NP]
[NP an an+Art+Sg+Def+@>N rás rás+Noun+Masc+Com+Sg+DefArt+@OBJ NP] S]
"""
buf = []
last_word = ""
for line in sample2.split("\n"):
for tok in line.split(" "):
if tok.startswith("["):
buf.append(tok.replace("[", "("))
elif tok.endswith("]"):
buf.append(")")
elif "+" in tok:
parts = tok.split("+")
buf.append("(" + parts[1])
buf.append(last_word)
buf.append(")")
last_word = ""
else:
last_word = tok
Tree.fromstring(" ".join(buf))
def fix_tree(tree_string):
buf = []
last_word = ""
for line in tree_string.split("\n"):
for tok in line.split(" "):
if tok.startswith("["):
buf.append(tok.replace("[", "("))
elif tok.endswith("]"):
buf.append(")")
elif "+" in tok:
parts = tok.split("+")
buf.append("(" + parts[1])
buf.append(last_word)
buf.append(")")
last_word = ""
else:
last_word = tok
return " ".join(buf).replace("(S )", "")
new_samp = """
[S
[COP An is+Cop+Pres+Q+@COP_WH ]
[PRED tusa tú+Pron+Pers+2P+Sg+Emph+@PRED ]
[V a a+Part+Vb+Rel+Direct+@>V chonaic feic+Verb+VTI+PastInd+Len+@FMV_REL ]
[NP é é+Pron+Pers+3P+Sg+Masc+@OBJ NP] ? ?+Punct+Fin+Q+ S]
[S S]
"""
Tree.fromstring(fix_tree(new_samp))