simalign
BERT-based word aligner
!pip install simalign
SRC = ["Chonaic", "mé", "é", "."]
TRG = ["I", "saw", "him", "."]
from simalign import SentenceAligner
myaligner = SentenceAligner(model="bert", token_type="bpe", matching_methods="mai")
alignments = myaligner.get_word_aligns(SRC, TRG)
for matching_method in alignments:
print(matching_method, ":", alignments[matching_method])
for pair in alignments["mwmf"]:
print(SRC[pair[0]], ":", TRG[pair[1]])