!pip install simalign
SRC = ["Chonaic", "mé", "é", "."]
TRG = ["I", "saw", "him", "."]
from simalign import SentenceAligner

myaligner = SentenceAligner(model="bert", token_type="bpe", matching_methods="mai")

alignments = myaligner.get_word_aligns(SRC, TRG)

for matching_method in alignments:
    print(matching_method, ":", alignments[matching_method])
2024-11-17 20:33:38,545 - simalign.simalign - INFO - Initialized the EmbeddingLoader with model: bert-base-multilingual-cased
INFO:simalign.simalign:Initialized the EmbeddingLoader with model: bert-base-multilingual-cased
mwmf : [(0, 1), (1, 0), (2, 2), (3, 3)]
inter : [(0, 0), (3, 3)]
itermax : [(0, 0), (2, 2), (3, 3)]
for pair in alignments["mwmf"]:
    print(SRC[pair[0]], ":", TRG[pair[1]])
Chonaic : saw
mé : I
é : him
. : .