lm_models= [] lm_model_params = [] ## loading the language models for i,(lm_type,path,order) in enumerate(\ zip(\ arguments['--lm_predictors'].split(','),\ arguments["--lm_path"].split(','),\ [int(o) for o in arguments["--lm_order"].split(',')]\ )): print '...Loading lm model {}'.format(i) lm_model_folder = check_path(path, 'LM_MODEL_FOLDER_{}'.format(i), is_data_path=False) if lm_type=="srilm_char": lm_model = SRILM_char_lm_loader(path, order) elif lm_type=="srilm_morph": lm_model = SRILM_morpheme_lm_loader(path,order) else: print "WARNING -- Could not load language model. Unknown type",lm_type,". Use 'srilm_char' or 'srilm_morph'" lm_models.append(lm_model) lm_number = len(lm_models) output_file_path = os.path.join(model_folder,arguments['--pred_path']) # save best dev model parameters and predictions print 'Evaluating on test..' t = time.clock() accuracy, test_results = evaluate_syncbeam(test_data.iter(indices=[0]), ed_models, lm_models, lm_weights, int(arguments['--beam'])) print 'Time: {}'.format(time.clock()-t) print 'accuracy: {}'.format(accuracy) write_pred_file(output_file_path, test_results) write_eval_file(output_file_path, accuracy, test_path)
if patience == train_hyperparams['PATIENCE']: print 'out of patience after {} epochs'.format(epoch) train_progress_bar.finish() break # finished epoch train_progress_bar.update(epoch) print 'finished training.' ti = SoftAttention(pc, model_hyperparams, best_model_path) dev_accuracy, dev_results = ti.evaluate(dev_data.iter(), int(arguments['--beam'])) print 'Best dev accuracy: {}'.format(dev_accuracy) write_param_file(output_file_path, dict(model_hyperparams.items()+train_hyperparams.items())) write_pred_file(output_file_path, dev_results) write_eval_file(output_file_path, best_dev_accuracy, dev_path) elif arguments['test']: print '=========EVALUATION ONLY:=========' # requires test path, model path of pretrained path and results path where to write the results to assert arguments['--test_path']!=None print 'Loading data...' test_path = check_path(arguments['--test_path'], '--test_path') data_set = SoftDataSet input_format = [int(col) for col in arguments['--input_format'].split(',')] test_data = data_set.from_file(test_path,input_format, arguments['--lowercase']) print 'Test data has {} examples'.format(test_data.length) print 'Checking if any special symbols in data...' data = set(test_data.inputs + test_data.outputs)
).format(epoch, avg_train_loss, avg_dev_loss, train_perplexity, dev_perplexity, best_dev_perplexity) log_to_file(log_file_name, epoch, train_perplexity, dev_perplexity) # if patience == max_patience: # print 'out of patience after {} epochs'.format(epoch) # train_progress_bar.finish() # break # finished epoch train_progress_bar.update(epoch) print 'finished training.' # save best dev model parameters write_param_file(output_file_path, dict(model_hyperparams.items()+train_hyperparams.items())) write_eval_file(output_file_path, best_dev_perplexity, dev_path, 'Perplexity') elif arguments['test']: print '=========EVALUATION ONLY:=========' # requires test path, model path of pretrained path and results path where to write the results to assert arguments['--test_path']!=None print 'Loading data...' over_segs = arguments['--segments'] test_path = check_path(arguments['--test_path'], '--test_path') if arguments['--segformat']: col_format=3 elif arguments['--dictformat']: col_format=1 else: col_format=2