def POST(self): web.header('Content-Type', 'application/json') post_data = web.input(_method='post') # print post_data # read the data from request and save into file file_id = np.random.randint(1000000, 2000000) input_path = os.path.join("tmp", "user.%i.input" % file_id) with codecs.open(input_path, "w", "utf-8") as f: f.write(post_data["text"]) test_sentences = loader.load_sentences(input_path, lower, zeros) update_tag_scheme(test_sentences, tag_scheme) test_data = prepare_dataset3( test_sentences, word_to_id, char_to_id, model.tag_maps, model.feature_maps, lower ) # print test_data[0] out_sentences = predict_multilayer(parameters, f_eval, test_sentences, test_data, model.tag_maps, None) text = "" # predictions_list = [p.split("\t") for p in predictions] text = " ".join([line[0] for s in out_sentences for line in s]) data = {"sentences": out_sentences, "text": text} return json.dumps(data, indent=4, sort_keys=True, encoding="utf-8")
def POST(self): post_data = web.input(_method='post') feature_type = post_data["feature"] f_eval, model, parameters, lower, zeros, tag_scheme, word_to_id, char_to_id = web.pre_load_data[feature_type] if post_data.has_key("format"): format = post_data["format"] else: format = "json" # print post_data # read the data from request and save into file file_id = np.random.randint(1000000, 2000000) input_path = os.path.join("temp", "user.%i.input" % file_id) with codecs.open(input_path, "w", "utf-8") as f: f.write(post_data["text"]) gold_colums = [int(x['column']) for x in model.tag_maps] test_sentences = loader.load_sentences(input_path, lower, zeros) update_tag_scheme_multilayer(test_sentences, gold_colums, tag_scheme) test_data = prepare_dataset3( test_sentences, word_to_id, char_to_id, model.tag_maps, model.feature_maps, lower ) # print test_data[0] out_sentences = predict_multilayer(parameters, f_eval, test_sentences, test_data, model.tag_maps, None) results = extract_re_from_conll_data(out_sentences) if format == "json": data = {"conll_data": out_sentences, "text_data": results} web.header('Content-Type', 'application/json') return json.dumps(data, indent=4, sort_keys=True, encoding="utf-8") else: conll_text = "\n\n".join(["\n".join(["\t".join(l) for l in item]) for item in out_sentences]) return conll_text
print 'Reloading previous model...' _, f_eval = model.build(training=False, **parameters) model.reload() assert os.path.isfile(opts.test_file) test_file = opts.test_file out_txt = opts.out_txt out_json = opts.out_json test_sentences = loader.load_sentences(test_file, lower, zeros) update_tag_scheme(test_sentences, tag_scheme) test_data = prepare_dataset3( test_sentences, word_to_id, char_to_id, model.tag_maps, model.feature_maps, lower ) print "input: ", test_file from pprint import pprint print(model.tag_maps) pprint(model.tag_maps) test_score, iob_test_score, result_test, _ = evaluate_multilayer(parameters, f_eval, test_sentences, test_data, model.tag_maps) print_evaluation_result(result_test) print "OVERALL: %f" % test_score
print "--------------------" dico_ftag, ftag_to_id, id_to_ftag = feature_mapping(train_sentences, f) print 'feature_name:', f['name'] print 'dico_ftag :', dico_ftag print 'ftag_to_id :', ftag_to_id print 'id_to_ftag :', id_to_ftag feature_maps.append({ 'name': f['name'], 'column': f['column'], 'dim': f['dim'], 'dico_ftag': dico_ftag, 'ftag_to_id': ftag_to_id, 'id_to_ftag': id_to_ftag }) train_data = prepare_dataset3(train_sentences, word_to_id, char_to_id, tag_maps, feature_maps, lower) dev_data = prepare_dataset3(dev_sentences, word_to_id, char_to_id, tag_maps, feature_maps, lower) test_data = prepare_dataset3(test_sentences, word_to_id, char_to_id, tag_maps, feature_maps, lower) print "%i / %i / %i sentences in train / dev / test." % ( len(train_data), len(dev_data), len(test_data)) print "----------------------A training instance -------------------------" print "{" for key in train_data[0].keys(): print "\t" + key, ":", train_data[0][key] print "}" print "-------------------------------------------------------------------"