def conlleval(mode, label_predict, result_file, negative_label, iob2iobes): """ :param label_predict: :param label_path: :param negative_label:不参与计算f1score的tag :return: """ line = [] line_pre = [] line_real = [] for sent_result in label_predict: for char, tag, tag_ in sent_result: #字 真实标签 预测标签 # tag = '0' if tag == 'O' else tag 为什么把tag转换成‘0’? # char = char.encode("utf-8") if iob2iobes: tag_ = iobes_iob([tag_])[0] line.append("{} {}\n".format(char, tag_)) #字 预测标签 line_pre.append(tag_) line_real.append(tag) line.append("\n") while line[-1] == '\n': line.pop() if mode != 'test': F1, precision, recall = BIO_F1score(predict=line_pre, target=line_real, negative_label=negative_label) return F1, precision, recall with open(result_file, "w", encoding='utf-8') as fw: print('结果保存在%s' % result_file) fw.writelines(line) exit()
def tags_to_labels(y_reals, y_preds, id_to_tag, iobes): y_prds = [] y_rels = [] for y_real, y_pred in zip(y_reals, y_preds): try: assert len(y_pred) == len(y_real) except: print(y_real, y_pred) p_tags = [id_to_tag[y] for y in y_pred] r_tags = [id_to_tag[y] for y in y_real] if iobes: p_tags = iobes_iob(p_tags) r_tags = iobes_iob(r_tags) y_prds.extend(p_tags) y_rels.extend(r_tags) y_prds.extend(['O']) y_rels.extend(['O']) return y_rels, y_prds
def tag(): if request.method == 'POST': data = request.get_json() text = data['text'] if data['split_sentences']: sentences = split_sentences(text) else: sentences = text if data['tokenize'] or data['split_sentences']: tokenized_sentences = [tokenize(s) for s in sentences] else: tokenized_sentences = text count = 0 output = [] for words in tokenized_sentences: if len(words) == 0: continue # Lowercase sentence if model.parameters['lower']: line = line.lower() # Replace all digits with zeros if model.parameters['zeros']: line = zero_digits(line) # Prepare input sentence = prepare_sentence(words, word_to_id, char_to_id, lower=model.parameters['lower']) input = create_input(sentence, model.parameters, False) # Decoding if model.parameters['crf']: y_preds = np.array(f_eval(*input))[1:-1] else: y_preds = f_eval(*input).argmax(axis=1) y_preds = [model.id_to_tag[y_pred] for y_pred in y_preds] # Output tags in the IOB2 format if model.parameters['tag_scheme'] == 'iobes': y_preds = iobes_iob(y_preds) # Write tags assert len(y_preds) == len( words ), "Predictions have different length than sentence. Something went wrong." output.append(list(zip(words, y_preds))) count += 1 if count % 100 == 0: logging.info(count) return jsonify(output)
def evaluate(self, sess, data_manager, id_to_tag): """ :param sess: session to run the model :param data: list of data :param id_to_tag: index to tag name :return: evaluate result -> 格式 "char 正确tag 预测tag" """ results = [] trans_matrix = self.trans.eval() for batch in data_manager.iter_batch(): chars = batch[0] tag_idx = batch[-1] lengths, scores = self.run_step(sess, is_train=False, batch) batch_paths = self.decode(scores, lengths, trans_matrix) for i in range(len(chars)): # len(chars) -> batch size result = [] string = chars[i][:lengths[i]] # 有效字符 gold = utils.iobes_iob([id_to_tag[int(x)] for x in tag_idx[i][: lengths[i]]]) pred = utils.iobes_iob([id_to_tag[int(x)] for x in batch_paths[i][: lengths[i]]]) for char, gold, pred in zip(string, gold, pred): result.append(" ".join([char, gold, pred])) results.append(result) return results
def tag(model, line): # Load existing model print("Loading model...") model = Model(model_path=model) parameters = model.parameters # Load reverse mappings word_to_id, char_to_id, tag_to_id = [{ v: k for k, v in x.items() } for x in [model.id_to_word, model.id_to_char, model.id_to_tag]] # Load the model _, f_eval = model.build(training=False, **parameters) model.reload() start = time.time() print('Tagging...') words_ini = line.rstrip().split() # Replace all digits with zeros if parameters['zeros']: line = zero_digits(line) words = line.rstrip().split() # Prepare input sentence = prepare_sentence(words, word_to_id, char_to_id, lower=parameters['lower']) input = create_input(sentence, parameters, False) # Decoding if parameters['crf']: y_preds = np.array(f_eval(*input))[1:-1] else: y_preds = f_eval(*input).argmax(axis=1) y_preds = [model.id_to_tag[y_pred] for y_pred in y_preds] # Output tags in the IOB2 format if parameters['tag_scheme'] == 'iobes': y_preds = iobes_iob(y_preds) # Write tags assert len(y_preds) == len(words) print('---- sentence tagged in %.4fs ----' % (time.time() - start)) return ' '.join(w + '__' + str(y) for w, y in zip(words_ini, y_preds))
def tag_document(doc, parameters, model, f_eval, word_to_id, char_to_id): count = 0 all_ypreds = list() all_tokens = list() for line in doc.sentences: toks_text = [x.orth_ for x in line.tokens] # line = ' '.join(toks_text) if toks_text: # WL edit: used to be 'if line', was crashing on '\n' lines # Lowercase sentence if parameters['lower']: toks_text = [line.lower() for line in toks_text] # Replace all digits with zeros if parameters['zeros']: toks_text = [zero_digits(line) for line in toks_text] # Prepare input sentence = prepare_sentence(toks_text, word_to_id, char_to_id, lower=parameters['lower']) input = create_input(sentence, parameters, False) # Decoding if parameters['crf']: y_preds = np.array(f_eval(*input))[1:-1] else: y_preds = f_eval(*input).argmax(axis=1) y_preds = [model.id_to_tag[y_pred] for y_pred in y_preds] # Output tags in the IOB2 format if parameters['tag_scheme'] == 'iobes': y_preds = iobes_iob(y_preds) # Write tags assert len(y_preds) == len(toks_text) # strip IOB prefixes y_preds = [x.split('-')[-1] for x in y_preds] all_ypreds.append(y_preds) all_tokens.append(toks_text) count += 1 if count % 100 == 0: print count return (all_ypreds, all_tokens)
def predicts(self, line): if line: # Save original bigrams bigram_sent = self.to_bigram(line, 0).strip().split() # Replave all digits with zeros line = zero_digits(line) input_seq = self.to_bigram(line, 0).strip().split() # Prepare input sentence = prepare_sentence(input_seq, self.word_to_id, self.char_to_id, lower=self.parameters['lower']) input = create_input(sentence, self.parameters, False) if self.parameters['crf']: y_preds = np.array(self.f_eval(*input))[1:-1] else: y_preds = self.f_eval(*input).argmax(axis=1) tags = [self.id_to_tag[y_pred] for y_pred in y_preds] # Output tags in the IOB2 format if self.parameters['tag_scheme'] == 'iobes': tags = iobes_iob(tags) print(tags) # Make output form out_form = "" unigram_sent = self.bigrams_to_unigrams(bigram_sent) for i in range(len(tags)): if tags[i].startswith('B'): out_form += '<' + unigram_sent[i] elif tags[i].startswith('I'): if i == len(tags) - 1: out_form += unigram_sent[i] + ':' + tags[i][2:] + '>' elif tags[i + 1] == 'O': out_form += unigram_sent[i] + ':' + tags[i][2:] + '>' else: out_form += unigram_sent[i] else: out_form += unigram_sent[i] return out_form
def demo_one(self, sess, sent): """ :param sess: :param sent: :return: """ from utils import iobes_iob label_list = [] for seqs, labels in batch_yield(sent, self.batch_size, self.vocab, self.tag2label, shuffle=False, iob2iobes=self.iob2iobes): label_list_, _ = self.predict_one_batch(sess, seqs) label_list.extend(label_list_) label2tag = {} for tag, label in self.tag2label.items(): label2tag[label] = tag tag = [label2tag[label] for label in label_list[0]] if self.iob2iobes: tag = iobes_iob(tag) return tag
def evaluate(parameters, f_eval, raw_sentences, parsed_sentences, id_to_tag, input_file_path): """ Evaluate current model using CoNLL script. """ predictions = [] for raw_sentence, data in zip(raw_sentences, parsed_sentences): input = create_input(data, parameters, False) if parameters['crf']: y_preds = np.array(f_eval(*input))[1:-1] else: y_preds = f_eval(*input).argmax(axis=1) p_tags = [id_to_tag[y_pred] for y_pred in y_preds] if parameters['tag_scheme'] == 'iobes': p_tags = iobes_iob(p_tags) for i, y_pred in enumerate(y_preds): new_line = "%s %s" % (raw_sentence[i][0], p_tags[i]) predictions.append(new_line) predictions.append("") output_path = os.path.join(opts.output, os.path.basename(input_file_path[:-4] + "_Tagged.txt")) with codecs.open(output_path, 'w', 'utf8') as f: f.write("\n".join(predictions))
def run_tagging(model, f_eval, parameters, word_to_id, char_to_id, tag_to_id, opts_input="", opts_output="", opts_delimiter="__", opts_outputFormat=""): # Check parameters validity assert opts_delimiter assert os.path.isfile(opts_input) #set environment to use gpu f_output = codecs.open(opts_output, 'w', 'utf-8') start = time.time() logger.info('Tagging...') with codecs.open(opts_input, 'r', 'utf-8') as f_input: count = 0 for line in f_input: words_ini = line.rstrip().split() if line: # Lowercase sentence if parameters['lower']: line = line.lower() # Replace all digits with zeros if parameters['zeros']: line = zero_digits(line) words = line.rstrip().split() # Prepare input sentence = prepare_sentence(words, word_to_id, char_to_id, lower=parameters['lower']) input = create_input(sentence, parameters, False) # Decoding if parameters['crf']: y_preds = np.array(f_eval(*input))[1:-1] else: y_preds = f_eval(*input).argmax(axis=1) y_preds = [model.id_to_tag[y_pred] for y_pred in y_preds] # Output tags in the IOB2 format if parameters['tag_scheme'] == 'iobes': y_preds = iobes_iob(y_preds) # Write tags assert len(y_preds) == len(words) if opts_outputFormat == 'json': f_output.write( json.dumps({ "text": ' '.join(words), "ranges": iob_ranges(y_preds) })) else: #logger.info( "write out tags..." f_output.write( '%s\n' % ' '.join('%s%s%s' % (w, opts_delimiter, y) for w, y in zip(words_ini, y_preds))) else: f_output.write('\n') count += 1 # if count % 100 == 0: # logger.info( count logger.info('---- %i lines tagged in %.4fs ----' % (count, time.time() - start)) f_output.close() logger.info(opts_output) logger.info("") return opts_output + " has been tagged!" # def main(): # logger.info( "executed" # if __name__ == '__main__': # main()
def eval_with_specific_model(model, epoch, datasets_to_be_predicted, return_datasets_with_predicted_labels=False): # type: (MainTaggerModel, int, dict, bool) -> object eval_dir = eval_logs_dir batch_size = 1 # model.parameters['batch_size'] TODO: switch back for new models. integration_mode = model.parameters['integration_mode'] active_models = model.parameters['active_models'] id_to_tag = model.id_to_tag tag_scheme = model.parameters['t_s'] f_scores = {"ner": {}} # dataset_labels = ["dev", "test", "yuret"] # dataset_labels = [map(lambda purpose: label+"_"+purpose, datasets_to_be_predicted[label].keys()) # for label in datasets_to_be_predicted.keys()] # total_correct_disambs = {dataset_label: 0 for dataset_label in dataset_labels} # total_disamb_targets = {dataset_label: 0 for dataset_label in dataset_labels} total_correct_disambs = { label: { purpose: 0 for purpose in list(datasets_to_be_predicted[label].keys()) } for label in list(datasets_to_be_predicted.keys()) } total_disamb_targets = { label: { purpose: 0 for purpose in list(datasets_to_be_predicted[label].keys()) } for label in list(datasets_to_be_predicted.keys()) } if active_models in [1, 2, 3]: detailed_correct_disambs = { label: { purpose: dd(int) for purpose in list(datasets_to_be_predicted[label].keys()) } for label in list(datasets_to_be_predicted.keys()) } detailed_total_target_disambs = { label: { purpose: dd(int) for purpose in list(datasets_to_be_predicted[label].keys()) } for label in list(datasets_to_be_predicted.keys()) } datasets_with_predicted_labels = { label: { purpose: {} for purpose in list(datasets_to_be_predicted[label].keys()) } for label in list(datasets_to_be_predicted.keys()) } # for dataset_label, dataset_as_list in datasets_to_be_predicted: for label in list(datasets_to_be_predicted.keys()): for purpose in list(datasets_to_be_predicted[label].keys()): dataset_as_list = datasets_to_be_predicted[label][purpose] if len(dataset_as_list) == 0: print("Skipping to evaluate %s dataset as it is empty" % (label + "_" + purpose)) total_correct_disambs[label][purpose] = -1 total_disamb_targets[label][purpose] = 1 continue print("Starting to evaluate %s dataset" % (label + "_" + purpose)) predictions = [] n_tags = len(id_to_tag) count = np.zeros((n_tags, n_tags), dtype=np.int32) n_batches = int(math.ceil( float(len(dataset_as_list)) / batch_size)) print("dataset_label: %s" % (label + "_" + purpose)) print(("n_batches: %d" % n_batches)) for batch_idx in range(n_batches): # print("batch_idx: %d" % batch_idx) sys.stdout.write(". ") sys.stdout.flush() sentences_in_the_batch = dataset_as_list[( batch_idx * batch_size):((batch_idx + 1) * batch_size)] for sentence in sentences_in_the_batch: dynet.renew_cg() sentence_length = len(sentence['word_ids']) if active_models in [2, 3 ] and label in "ner md".split(" "): selected_morph_analyzes, decoded_tags = model.predict( sentence) elif active_models in [1] and label == "md": selected_morph_analyzes, _ = model.predict(sentence) elif active_models in [0] and label == "ner": _, decoded_tags = model.predict(sentence) if active_models in [ 0, 2, 3 ] and label == "ner": # i.e. except MD p_tags = [id_to_tag[p_tag] for p_tag in decoded_tags] r_tags = [ id_to_tag[r_tag] for r_tag in sentence['tag_ids'] ] if tag_scheme == 'iobes': p_tags = iobes_iob(p_tags) r_tags = iobes_iob(r_tags) for i, (y_pred, y_real) in enumerate( zip(decoded_tags, sentence['tag_ids'])): new_line = " ".join([sentence['str_words'][i]] + [r_tags[i], p_tags[i]]) predictions.append(new_line) count[y_real, y_pred] += 1 predictions.append("") if active_models in [1, 2, 3] and label == "md": n_correct_morph_disambs = \ sum([x == y for x, y, z in zip(selected_morph_analyzes, sentence['golden_morph_analysis_indices'], sentence['morpho_analyzes_tags']) if len(z) > 1]) total_correct_disambs[label][ purpose] += n_correct_morph_disambs total_disamb_targets[label][purpose] += sum([ 1 for el in sentence['morpho_analyzes_tags'] if len(el) > 1 ]) for key, value in [ (len(el), x == y) for el, x, y in zip( sentence['morpho_analyzes_tags'], selected_morph_analyzes, sentence['golden_morph_analysis_indices']) ]: if value: detailed_correct_disambs[label][purpose][ key] += 1 detailed_total_target_disambs[label][purpose][ key] += 1 # total_possible_analyzes += sum([len(el) for el in sentence['morpho_analyzes_tags'] if len(el) > 1]) print("") if active_models in [0, 2, 3] and label == "ner": # Write predictions to disk and run CoNLL script externally eval_id = np.random.randint(1000000, 2000000) output_path = os.path.join( eval_dir, "%s.eval.%i.epoch-%04d.output" % ((label + "_" + purpose), eval_id, epoch)) scores_path = os.path.join( eval_dir, "%s.eval.%i.epoch-%04d.scores" % ((label + "_" + purpose), eval_id, epoch)) with codecs.open(output_path, 'w', 'utf8') as f: f.write("\n".join(predictions)) # os.system(command_string) # sys.exit(0) # with open(output_path, "r", encoding="utf-8") as output_path_f: # try: from evaluation.conlleval import evaluate as conll_evaluate, report as conll_report with open(output_path, "r") as output_path_f, open(scores_path, "w") as scores_path_f: print( "Evaluating the %s dataset with conlleval script's Python implementation" % (label + "_" + purpose)) counts = conll_evaluate(output_path_f) eval_script_output = conll_report(counts, out=scores_path_f) # print("Evaluating the %s dataset with conlleval script runner" % (label + "_" + purpose)) # command_string = "%s %s %s" % (eval_script, output_path, scores_path) # print(command_string) # print("Will timeout in 30 seconds and set the F1-score to 0 for this eval.") # # eval_script_output = subprocess.check_output([eval_script], stdin=output_path_f, timeout=30) # eval_script_output = subprocess.check_output([eval_script, output_path, scores_path], timeout=30) # except subprocess.TimeoutExpired as e: # print(e) # eval_script_output = b"""processed 0 tokens with 0 phrases; found: 0 phrases; correct: 0. # accuracy: 0.0%; precision: 0.0%; recall: 0.0%; FB1: 0.0 # """ eval_lines = [ x.rstrip() for x in eval_script_output.decode("utf8").split("\n") ] # CoNLL evaluation results # eval_lines = [l.rstrip() for l in codecs.open(scores_path, 'r', 'utf8')] for line in eval_lines: print(line) f_scores["ner"][purpose] = float(eval_lines[1].split(" ")[-1]) if active_models in [1, 2, 3]: for n_possible_analyzes in map( int, list(detailed_correct_disambs[label][purpose].keys())): print("%s %d %d/%d" % ((label + "_" + purpose), n_possible_analyzes, detailed_correct_disambs[label][purpose] [n_possible_analyzes], detailed_total_target_disambs[label][purpose] [n_possible_analyzes])) if return_datasets_with_predicted_labels: datasets_with_predicted_labels[label][purpose] = predictions disambiguation_accuracies = { label: {} for label in list(datasets_to_be_predicted.keys()) } if active_models in [0]: pass else: for label in list(datasets_to_be_predicted.keys()): for purpose in list(datasets_to_be_predicted[label].keys()): if total_disamb_targets[label][purpose] == 0: total_correct_disambs[label][purpose] = -1 total_disamb_targets[label][purpose] = 1 disambiguation_accuracies[label][purpose] = \ total_correct_disambs[label][purpose] / float(total_disamb_targets[label][purpose]) return f_scores, disambiguation_accuracies, datasets_with_predicted_labels
if parameters['zeros']: line = zero_digits(line) words = line.rstrip().split() # Prepare input sentence = prepare_sentence(words, word_to_id, char_to_id, lower=parameters['lower']) input = create_input(sentence, parameters, False) # Decoding if parameters['crf']: y_preds = np.array(f_eval(*input))[1:-1] else: y_preds = f_eval(*input).argmax(axis=1) y_preds = [model.id_to_tag[y_pred] for y_pred in y_preds] # Output tags in the IOB2 format if parameters['tag_scheme'] == 'iobes': y_preds = iobes_iob(y_preds) # Write tags assert len(y_preds) == len(words) if opts.outputFormat == 'json': f_output.write(json.dumps({ "text": ' '.join(words), "ranges": iob_ranges(y_preds) })) else: f_output.write('%s\n' % ' '.join('%s%s%s' % (w, opts.delimiter, y) for w, y in zip(words_ini, y_preds))) else: f_output.write('\n') count += 1 if count % 100 == 0: print count print '---- %i lines tagged in %.4fs ----' % (count, time.time() - start)
def eval_for_a_checkpoint(sess, model_checkpoint_path): if model_checkpoint_path: # Restores from checkpoint model.saver.restore(sess, model_checkpoint_path) print "Evaluating %s" % model_checkpoint_path # Assuming model_checkpoint_path looks something like: # /my-favorite-path/cifar10_train/model.ckpt-0, # extract global_step from it. epoch = int(os.path.basename(model_checkpoint_path).split('-')[-1]) # global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] else: print('No checkpoint file found') return import threading from loader import _load_and_enqueue for dataset_label, dataset_buckets in [["dev", dev_buckets], ["test", test_buckets]]: print "Starting to evaluate %s dataset" % dataset_label predictions = [] n_tags = len(id_to_tag) count = np.zeros((n_tags, n_tags), dtype=np.int32) # permuted_bucket_ids = np.random.permutation(range(len(dataset_buckets))) for bucket_id in range(len(dataset_buckets)): # bucket_id = np.random.random_integers(0, len(train_bins)-1) bucket_data_dict = dataset_buckets[bucket_id][0] bucket_maxes = dataset_buckets[bucket_id][1] n_batches = int( math.ceil( float(bucket_data_dict['sentence_lengths'].shape[0]) / batch_size)) print "dataset_label: %s" % dataset_label print("n_batches: %d" % n_batches) print("bucket_id: %d" % bucket_id) import Queue str_words_q = Queue.Queue() def load_and_enqueue(): _load_and_enqueue(sess, bucket_data_dict, n_batches, batch_size, placeholders, enqueue_op, str_words_q, train=False) t = threading.Thread(target=load_and_enqueue) t.start() for batch_idx in range(n_batches): # print("batch_idx: %d" % batch_idx) sys.stdout.write(". ") sys.stdout.flush() tag_scores_value, tag_ids_value, word_ids_value, sentence_lengths_value = \ sess.run([tag_scores, tag_ids, word_ids, sentence_lengths]) str_words = str_words_q.get() for sentence_idx, (tag_scores_of_one_sentence, str_words_of_one_sentence) in \ enumerate(zip(tag_scores_value, str_words)): sentence_length = sentence_lengths_value[sentence_idx] # print sentence_idx # print one_sentence[:sentence_length] decoded_tags, _ = viterbi_decode( tag_scores_of_one_sentence[:sentence_length], crf_transition_params.eval()) p_tags = [id_to_tag[p_tag] for p_tag in decoded_tags] r_tags = [ id_to_tag[p_tag] for p_tag in tag_ids_value[ sentence_idx, :sentence_length] ] if parameters['t_s'] == 'iobes': p_tags = iobes_iob(p_tags) r_tags = iobes_iob(r_tags) for i, (word_id, y_pred, y_real) in enumerate( zip(word_ids_value[sentence_idx, :sentence_length], decoded_tags, tag_ids_value[ sentence_idx, :sentence_length])): new_line = " ".join([str_words_of_one_sentence[i]] + [r_tags[i], p_tags[i]]) predictions.append(new_line) count[y_real, y_pred] += 1 predictions.append("") str_words_q.task_done() str_words_q.join() t.join() # print predictions # Write predictions to disk and run CoNLL script externally eval_id = np.random.randint(1000000, 2000000) output_path = os.path.join( FLAGS.eval_dir, "%s.eval.%i.epoch-%04d.output" % (dataset_label, eval_id, epoch)) scores_path = os.path.join( FLAGS.eval_dir, "%s.eval.%i.epoch-%04d.scores" % (dataset_label, eval_id, epoch)) with codecs.open(output_path, 'w', 'utf8') as f: f.write("\n".join(predictions)) print "Evaluating the %s dataset with conlleval script" % dataset_label os.system("%s < %s > %s" % (eval_script, output_path, scores_path)) # CoNLL evaluation results eval_lines = [ l.rstrip() for l in codecs.open(scores_path, 'r', 'utf8') ] for line in eval_lines: print line
def evaluate(parameters, f_eval, raw_sentences, parsed_sentences, id_to_tag, input_path, output_path): """ Evaluate current model using CoNLL script. """ n_tags = len(id_to_tag) predictions = [] count = np.zeros((n_tags, n_tags), dtype=np.int32) y_trues_tags = [] y_preds_tags = [] for raw_sentence, data in zip(raw_sentences, parsed_sentences): input = create_input(data, parameters, False) if parameters['crf']: y_preds = np.array(f_eval(*input))[1:-1] else: y_preds = f_eval(*input).argmax(axis=1) y_reals = np.array(data['tags']).astype(np.int32) assert len(y_preds) == len(y_reals) p_tags = [id_to_tag[y_pred] for y_pred in y_preds] r_tags = [id_to_tag[y_real] for y_real in y_reals] if parameters['tag_scheme'] == 'iobes': p_tags = iobes_iob(p_tags) r_tags = iobes_iob(r_tags) y_trues_tags.extend(p_tags) y_preds_tags.extend(r_tags) for i, (y_pred, y_real) in enumerate(zip(y_preds, y_reals)): new_line = " ".join(raw_sentence[i][:-1] + [r_tags[i], p_tags[i]]) predictions.append(new_line) count[y_real, y_pred] += 1 predictions.append("") # Write predictions to disk and run CoNLL script externally eval_id = np.random.randint(1000000, 2000000) output_path = os.path.join(output_path, "%s.output" % os.path.basename(input_path)) scores_path = os.path.join(eval_temp, "eval.%i.scores" % eval_id) with codecs.open(output_path, 'w', 'utf8') as f: f.write("\n".join(predictions)) os.system("%s < %s > %s" % (eval_script, output_path, scores_path)) # CoNLL evaluation results eval_lines = [l.rstrip() for l in codecs.open(scores_path, 'r', 'utf8')] for line in eval_lines: print(line) # Remove temp files # os.remove(output_path) # os.remove(scores_path) # Confusion matrix with accuracy for each tag confusion_matrix_head = "{: >2}{: >7}{: >7}%s{: >9}" % ("{: >7}" * n_tags) confusion_matrix_head = confusion_matrix_head.format( "ID", "NE", "Total", *([id_to_tag[i] for i in range(n_tags)] + ["Percent"])) print(confusion_matrix_head) for i in range(n_tags): confusion_matrix_content = "{: >2}{: >7}{: >7}%s{: >9}" % ("{: >7}" * n_tags) confusion_matrix_content = confusion_matrix_content.format( str(i), id_to_tag[i], str(count[i].sum()), *([count[i][j] for j in range(n_tags)] + ["%.3f" % (count[i][i] * 100. / max(1, count[i].sum()))])) print(confusion_matrix_content) print() print("Global accuracy") print("\t%i/%i (%.5f%%)" % (count.trace(), count.sum(), 100. * count.trace() / max(1, count.sum()))) # F1 on all entities print("F1 on all entities") F1_all = float(eval_lines[1].strip().split()[-1]) print("\t{}".format(F1_all)) return F1_all
# forward outputs, loss = model.forward(inputs, seq_len, char_len, char_index_mapping) if parameters['crf']: preds = [outputs[seq_index_mapping[j]].data for j in range(len(outputs))] else: _, _preds = torch.max(outputs.data, 2) preds = [ _preds[seq_index_mapping[j]][:seq_len[seq_index_mapping[j]]] for j in range(len(seq_index_mapping)) ] for j, pred in enumerate(preds): pred = [mappings['id_to_tag'][p] for p in pred] # Output tags in the IOB2 format if parameters['tag_scheme'] == 'iobes': pred = iobes_iob(pred) # Write tags assert len(pred) == len(eval_sentences[i+j]) f_output.write('%s\n\n' % '\n'.join('%s%s%s' % (' '.join(w), ' ', z) for w, z in zip(eval_sentences[i+j], pred))) if (i + j + 1) % 500 == 0: print(i+j+1) end = time.time() # epoch end time print('time elapssed: %f seconds' % round( (end - since), 2))
def eval_with_specific_model(model, epoch, buckets_list, integration_mode, active_models, *args): # FLAGS.eval_dir # type: (MainTaggerModel, int, list, object, object) -> object id_to_tag, batch_size, eval_dir, tag_scheme = args f_scores = {} dataset_labels = ["dev", "test", "yuret"] total_correct_disambs = {dataset_label: 0 for dataset_label in dataset_labels} total_disamb_targets = {dataset_label: 0 for dataset_label in dataset_labels} if active_models in [1, 2, 3]: detailed_correct_disambs = {dataset_label: dd(int) for dataset_label in dataset_labels} detailed_total_target_disambs = {dataset_label: dd(int) for dataset_label in dataset_labels} for dataset_label, dataset_buckets in buckets_list: if len(dataset_buckets) == 0: print "Skipping to evaluate %s dataset as it is empty" % dataset_label total_correct_disambs[dataset_label] = -1 total_disamb_targets[dataset_label] = 1 continue print "Starting to evaluate %s dataset" % dataset_label predictions = [] n_tags = len(id_to_tag) count = np.zeros((n_tags, n_tags), dtype=np.int32) # permuted_bucket_ids = np.random.permutation(range(len(dataset_buckets))) for bucket_id in range(len(dataset_buckets)): # bucket_id = np.random.random_integers(0, len(train_bins)-1) bucket_data_dict = dataset_buckets[bucket_id] n_batches = int(math.ceil(float(len(bucket_data_dict)) / batch_size)) print "dataset_label: %s" % dataset_label print ("n_batches: %d" % n_batches) print ("bucket_id: %d" % bucket_id) for batch_idx in range(n_batches): # print("batch_idx: %d" % batch_idx) sys.stdout.write(". ") sys.stdout.flush() sentences_in_the_batch = bucket_data_dict[ (batch_idx * batch_size):((batch_idx + 1) * batch_size)] for sentence in sentences_in_the_batch: dynet.renew_cg() sentence_length = len(sentence['word_ids']) if active_models in [2, 3]: selected_morph_analyzes, decoded_tags = model.predict(sentence) elif active_models in [1]: selected_morph_analyzes, _ = model.predict(sentence) elif active_models in [0]: decoded_tags = model.predict(sentence) if active_models in [0, 2, 3]: # i.e. not only MD p_tags = [id_to_tag[p_tag] for p_tag in decoded_tags] r_tags = [id_to_tag[p_tag] for p_tag in sentence['tag_ids']] if tag_scheme == 'iobes': p_tags = iobes_iob(p_tags) r_tags = iobes_iob(r_tags) for i, (word_id, y_pred, y_real) in enumerate( zip(sentence['word_ids'], decoded_tags, sentence['tag_ids'])): new_line = " ".join([sentence['str_words'][i]] + [r_tags[i], p_tags[i]]) predictions.append(new_line) count[y_real, y_pred] += 1 predictions.append("") if active_models in [1, 2, 3]: n_correct_morph_disambs = \ sum([x == y for x, y, z in zip(selected_morph_analyzes, sentence['golden_morph_analysis_indices'], sentence['morpho_analyzes_tags']) if len(z) > 1]) total_correct_disambs[dataset_label] += n_correct_morph_disambs total_disamb_targets[dataset_label] += sum([1 for el in sentence['morpho_analyzes_tags'] if len(el) > 1]) for key, value in [(len(el), x == y) for el, x, y in zip(sentence['morpho_analyzes_tags'], selected_morph_analyzes, sentence['golden_morph_analysis_indices'])]: if value: detailed_correct_disambs[dataset_label][key] += 1 detailed_total_target_disambs[dataset_label][key] += 1 # total_possible_analyzes += sum([len(el) for el in sentence['morpho_analyzes_tags'] if len(el) > 1]) print "" if active_models in [0, 2, 3]: # Write predictions to disk and run CoNLL script externally eval_id = np.random.randint(1000000, 2000000) output_path = os.path.join(eval_dir, "%s.eval.%i.epoch-%04d.output" % ( dataset_label, eval_id, epoch)) scores_path = os.path.join(eval_dir, "%s.eval.%i.epoch-%04d.scores" % ( dataset_label, eval_id, epoch)) with codecs.open(output_path, 'w', 'utf8') as f: f.write("\n".join(predictions)) print "Evaluating the %s dataset with conlleval script" % dataset_label command_string = "%s < %s > %s" % (eval_script, output_path, scores_path) print command_string # os.system(command_string) # sys.exit(0) with codecs.open(output_path, "r", encoding="utf-8") as output_path_f: eval_lines = [x.rstrip() for x in subprocess.check_output([eval_script], stdin=output_path_f).split( "\n")] # CoNLL evaluation results # eval_lines = [l.rstrip() for l in codecs.open(scores_path, 'r', 'utf8')] for line in eval_lines: print line f_scores[dataset_label] = float(eval_lines[1].split(" ")[-1]) if active_models in [1, 2, 3]: for n_possible_analyzes in map(int, detailed_correct_disambs[dataset_label].keys()): print "%s %d %d/%d" % (dataset_label, n_possible_analyzes, detailed_correct_disambs[dataset_label][n_possible_analyzes], detailed_total_target_disambs[dataset_label][n_possible_analyzes]) if active_models in [0]: return f_scores, {} else: result = {} for dataset_label in dataset_labels: if total_disamb_targets[dataset_label] == 0: total_correct_disambs[dataset_label] = -1 total_disamb_targets[dataset_label] = 1 result[dataset_label] = \ total_correct_disambs[dataset_label] / float(total_disamb_targets[dataset_label]) return f_scores, result
def eval_with_specific_model(model, epoch, dev_buckets, test_buckets, *args): # FLAGS.eval_dir id_to_tag, batch_size, eval_dir, tag_scheme = args f_scores = {} for dataset_label, dataset_buckets in [["dev", dev_buckets], ["test", test_buckets]]: print "Starting to evaluate %s dataset" % dataset_label predictions = [] n_tags = len(id_to_tag) count = np.zeros((n_tags, n_tags), dtype=np.int32) # permuted_bucket_ids = np.random.permutation(range(len(dataset_buckets))) for bucket_id in range(len(dataset_buckets)): # bucket_id = np.random.random_integers(0, len(train_bins)-1) bucket_data_dict = dataset_buckets[bucket_id] n_batches = int(math.ceil(float(len(bucket_data_dict)) / batch_size)) print "dataset_label: %s" % dataset_label print ("n_batches: %d" % n_batches) print ("bucket_id: %d" % bucket_id) for batch_idx in range(n_batches): # print("batch_idx: %d" % batch_idx) sys.stdout.write(". ") sys.stdout.flush() sentences_in_the_batch = bucket_data_dict[ (batch_idx * batch_size):((batch_idx + 1) * batch_size)] for sentence in sentences_in_the_batch: dynet.renew_cg() tag_scores = model.calculate_tag_scores(sentence) sentence_length = len(sentence['word_ids']) loss, decoded_tags = model.crf_module.viterbi_loss(tag_scores, sentence['tag_ids']) p_tags = [id_to_tag[p_tag] for p_tag in decoded_tags] r_tags = [id_to_tag[p_tag] for p_tag in sentence['tag_ids']] if tag_scheme == 'iobes': p_tags = iobes_iob(p_tags) r_tags = iobes_iob(r_tags) for i, (word_id, y_pred, y_real) in enumerate( zip(sentence['word_ids'], decoded_tags, sentence['tag_ids'])): new_line = " ".join([sentence['str_words'][i]] + [r_tags[i], p_tags[i]]) predictions.append(new_line) count[y_real, y_pred] += 1 predictions.append("") # print predictions # Write predictions to disk and run CoNLL script externally eval_id = np.random.randint(1000000, 2000000) output_path = os.path.join(eval_dir, "%s.eval.%i.epoch-%04d.output" % (dataset_label, eval_id, epoch)) scores_path = os.path.join(eval_dir, "%s.eval.%i.epoch-%04d.scores" % (dataset_label, eval_id, epoch)) with codecs.open(output_path, 'w', 'utf8') as f: f.write("\n".join(predictions)) print "Evaluating the %s dataset with conlleval script" % dataset_label command_string = "%s < %s > %s" % (eval_script, output_path, scores_path) print command_string # os.system(command_string) # sys.exit(0) with codecs.open(output_path, "r", encoding="utf-8") as output_path_f: eval_lines = [x.rstrip() for x in subprocess.check_output([eval_script], stdin=output_path_f).split("\n")] # CoNLL evaluation results # eval_lines = [l.rstrip() for l in codecs.open(scores_path, 'r', 'utf8')] for line in eval_lines: print line f_scores[dataset_label] = float(eval_lines[1].split(" ")[-1]) return f_scores
print 'Tagging...' for line in test_data: # Prepare input input = create_input(line, parameters, False, useAttend=parameters['useAttend']) words = line['str_words'] # Decoding if parameters['crf']: y_preds = np.array(f_eval(*input)) else: y_preds = f_eval(*input).argmax(axis=1) y_preds = [model.id_to_tag[y_pred] for y_pred in y_preds] # Output tags in the IOB2 format if parameters['tag_scheme'] == 'iobes': y_preds = iobes_iob(y_preds) # Write tags assert len(y_preds) == len(words) # print words for i in range(len(words)): f_output.write(words[i] + '\t' + y_preds[i] + '\n') f_output.write('\n') # for elem in xmlformat(words,y_preds): # f_output.write(elem+" "); # f_output.write("\n"); print '---- lines tagged in %.4fs ----' % (time.time() - start) f_output.close()
def ner(): global model global f_eval global parameters global word_to_id global char_to_id global tag_to_id model_name = request.json["model"] words = request.json["words"] begin_end = request.json["begin_end"] if model is None: ## Model loading print "Loading model " + model_name + ".." model = Model(model_path="models/" + models[model_name]) parameters = model.parameters # Load reverse mappings word_to_id, char_to_id, tag_to_id = [{ v: k for k, v in x.items() } for x in [model.id_to_word, model.id_to_char, model.id_to_tag]] # Load the model _, f_eval = model.build(training=False, **parameters) model.reload() # else: # parameters = model.parameters # word_to_id, char_to_id, tag_to_id = [ # {v: k for k, v in x.items()} # for x in [model.id_to_word, model.id_to_char, model.id_to_tag] # ] # Lowercase sentence if parameters['lower']: words = [w.lower() for w in words] # Replace all digits with zeros if parameters['zeros']: words = [zero_digits(w) for w in words] words = [w if not w.isupper() else w.title() for w in words] # Prepare input sentence = prepare_sentence(words, word_to_id, char_to_id, lower=parameters['lower']) input = create_input(sentence, parameters, False) # Decoding if parameters['crf']: y_preds = np.array(f_eval(*input))[1:-1] else: y_preds = f_eval(*input).argmax(axis=1) y_preds = [model.id_to_tag[y_pred] for y_pred in y_preds] # Output tags in the IOB2 format if parameters['tag_scheme'] == 'iobes': y_preds = iobes_iob(y_preds) # Write tags assert len(y_preds) == len(words) # TODO:remove assert? ents = [{ "start_char": b, "end_char": e, "label": label } for (b, e), label in zip(begin_end, y_preds) if label != "O"] return json.dumps({"ents": ents})