def main(): english = tokenize("data/100ktok.low.en") spanish = tokenize("data/100ktok.low.es") training_set, held_out_set, test_set = get_datasets(english, spanish) translations = get_word_translations("100000_trans.txt") search = BeamSearch(training_set, held_out_set, translations) print search.translate(test_set[8])
def main(): english = tokenize("data/100ktok.low.en") spanish = tokenize("data/100ktok.low.es") training_set, test_set, translated_set = get_datasets(english, spanish) translations = get_word_translations("3000_trans.txt") print "Original Sentence:", ' '.join(test_set[0]) translator = DirectTrans(translations) print "Direct Translation:", ' '.join(translator.translate(test_set[0])) test_output = open('trans_beam.txt','w') true_output = open('trans_true.txt','w') search = BeamSearch(training_set, translations) print "Beam Translation:", ' '.join(search.translate(test_set[0])) print "True Translation:", ' '.join(translated_set[0])
def main(): english = tokenize("data/100ktok.low.en") spanish = tokenize("data/100ktok.low.es") training_set, test_set, translated_set = get_datasets(english, spanish) translations = get_word_translations("3000_trans.txt") search = BeamSearch(training_set, translations) test_output = open('trans_beam.txt','w') true_output = open('trans_true.txt','w') for i in range(len(test_set)): print "Translating sentence", i, "..." test_output.write(' '.join(search.translate(test_set[i])) + "\n") true_output.write(' '.join(translated_set[i]) + "\n") test_output.close() true_output.close()