def predict_a_file(test_file, out_file, model, add_o_tag): assert os.path.isfile(test_file) model = Model(model_path=model) parameters = model.parameters # Data parameters lower = parameters['lower'] zeros = parameters['zeros'] tag_scheme = model.parameters['tag_scheme'] # Load reverse mappings word_to_id, char_to_id, tag_to_id = [{ v: k for k, v in x.items() } for x in [model.id_to_word, model.id_to_char, model.id_to_tag]] print 'Reloading previous model...' _, f_eval = model.build(training=False, **parameters) model.reload() test_sentences = loader.load_sentences(test_file, lower, zeros) update_tag_scheme(test_sentences, tag_scheme) test_data = prepare_dataset2(test_sentences, word_to_id, char_to_id, tag_to_id, model.feature_maps, lower) print "input: ", test_file, ":", len(test_sentences), len(test_data) print "output: ", out_file predict(parameters, f_eval, test_sentences, test_data, model.id_to_tag, out_file, add_O_tags=add_o_tag)
from tabulate import tabulate embeddings_params, model_params = model.count_parameters() print tabulate(embeddings_params, ["name", "shape", "size"]) print tabulate(model_params, ["name", "shape", "size"]) print "Parameters: \n *", "\n * ".join([str(k) + " = " + str(v) for k, v in model.parameters.items()]) # print "Parameters: \n", model.parameters test_file = opts.test_file out_file = opts.out_file test_sentences = loader.load_sentences(test_file, lower, zeros) update_tag_scheme(test_sentences, tag_scheme) test_data = prepare_dataset2( test_sentences, word_to_id, char_to_id, tag_to_id, model.feature_maps, lower ) print "input: ", test_file, ":", len(test_sentences), len(test_data) print "output: ", out_file import datetime t1 = datetime.datetime.now() predict(parameters, f_eval, test_sentences, test_data, model.id_to_tag, out_file, add_O_tags=opts.add_o_tag) t2 = datetime.datetime.now() print "done in ", (t2-t1).total_seconds(), "seconds" print "finish !!!"
print "--------------------" dico_ftag, ftag_to_id, id_to_ftag = feature_mapping(train_sentences, f) print 'feature_name:', f['name'] print 'dico_ftag :', len(dico_ftag) print 'ftag_to_id :', len(ftag_to_id) print 'id_to_ftag :', len(id_to_ftag) feature_maps.append({ 'name': f['name'], 'column': f['column'], 'dim': f['dim'], 'dico_ftag': dico_ftag, 'ftag_to_id': ftag_to_id, 'id_to_ftag': id_to_ftag }) train_data = prepare_dataset2(train_sentences, word_to_id, char_to_id, tag_to_id, feature_maps, lower) dev_data = prepare_dataset2(dev_sentences, word_to_id, char_to_id, tag_to_id, feature_maps, lower) test_data = prepare_dataset2(test_sentences, word_to_id, char_to_id, tag_to_id, feature_maps, lower) print "%i / %i / %i sentences in train / dev / test." % ( len(train_data), len(dev_data), len(test_data)) # Save the mappings to disk print 'Saving the mappings to disk...' model.save_mappings(id_to_word, id_to_char, id_to_tag, feature_maps) # Build the model # f_train, f_eval, f_test = model.build(**parameters)