Пример #1
0
import sys
from lemmatizer import Lemmatizer
src = sys.argv[1]
tgt = sys.argv[2]
lemm_cz = Lemmatizer(src,
                     "/home/big_maggie/usr/nmt_scripts/lgmf_%s.lex" % src,
                     "il2",
                     path="/home/big_maggie/usr/nmt_scripts/liblemm.so")
lemm_en = Lemmatizer(tgt,
                     "/home/big_maggie/usr/nmt_scripts/lgmf_%s.lex" % tgt,
                     "il2",
                     path="/home/big_maggie/usr/nmt_scripts/liblemm.so")
#TODO: pro kazdou vetu nahradit entity, ktere vytvoril tokenizator, opet puvodnimi tokeny
for line in sys.stdin:
    #line=line.decode('utf-8')
    print('\t'.join(
        (str(lemm_cz.get_lang(line, 0.5,
                              src)), str(lemm_en.get_lang(line, 0.5, tgt)))))