def get_settings(self): """ Initialize and customize settings. """ translation_settings = TranslationSettings() translation_settings.input = open('../../en-de/in') translation_settings.output = open('../../en-de/out', 'w') translation_settings.models = ["model.npz"] translation_settings.beam_size = 12 translation_settings.normalization_alpha = 1.0 return translation_settings
def get_settings(self): """ Initialize and customize settings. """ translation_settings = TranslationSettings() translation_settings.models = ["model.npz"] translation_settings.num_processes = 1 translation_settings.beam_width = 12 translation_settings.normalization_alpha = 1.0 translation_settings.suppress_unk = True return translation_settings
def get_settings(model, beam): """ Initialize and customize settings. """ translation_settings = TranslationSettings() translation_settings.models = [model] translation_settings.num_processes = 1 translation_settings.beam_width = beam translation_settings.normalization_alpha = 1.0 translation_settings.suppress_unk = True translation_settings.get_word_probs = False return translation_settings
def test_ende(self): with open('en-de/in', 'r', encoding='utf-8') as in_file, \ open('en-de/out', 'w', encoding='utf-8') as out_file: os.chdir('models/en-de/') settings = TranslationSettings() settings.input = in_file settings.output = out_file settings.models = ["model.npz"] settings.beam_size = 12 settings.normalization_alpha = 1.0 translate(settings=settings) os.chdir('../..') self.outputEqual('en-de/ref2','en-de/out')
def test_ende(self): with open('en-de/in', 'r', encoding='utf-8') as in_file, \ open('en-de/out', 'w', encoding='utf-8') as out_file: os.chdir('models/en-de/') settings = TranslationSettings() settings.input = in_file settings.output = out_file settings.models = ["model.npz"] settings.beam_size = 12 settings.normalization_alpha = 1.0 translate(settings=settings) os.chdir('../..') self.outputEqual('en-de/ref2', 'en-de/out')
def outputEqual(output1, output2): """given two translation outputs, check that output string is identical """ for i, (line, line2) in enumerate( zip(open(output1).readlines(), open(output2).readlines())): #assertEqual(line.strip(), line2.strip()) print "translate {}".format(i) print line print line2 # English-German WMT16 system, no dropout """ Initialize and customize settings. """ translation_settings = TranslationSettings() translation_settings.models = ["model_test/model.npz-80000"] #translation_settings.num_processes = 8 translation_settings.beam_width = 10 translation_settings.normalization_alpha = 1.0 translation_settings.verbose = True translation_settings.n_best = True translation_settings.suppress_unk = True translate(input_file=open('data/translated/fr.00'), output_file=open('data/fr.00.8.out', 'w'), translation_settings=translation_settings) print "everyting ok" #outputEqual('en-de/ref2','en-de/out')