def conlleval(label_predict, label_path, metric_path): """ :param label_predict: :param label_path: :param metric_path: :return: """ gold_all = [] pred_all = [] for sent_result in label_predict: gold_one = [] pred_one = [] for char, tag, tag_ in sent_result: if tag != 'O': t, head = tag.split('-', 1) if head == 'M': head = 'I' tag = head + '-' + t gold_one.append(tag) if tag_ == 0: tag_ = 'O' else: t_, head_ = tag_.split('-', 1) if head_ == 'M': head_ = 'I' tag_ = head_ + '-' + t_ pred_one.append(tag_) gold_all.append(gold_one) pred_all.append(pred_one) precision, recall, f_measure, acc = get_ner_fmeasure(gold_all, pred_all, "BIOES") with open(label_path, "w") as fw: line = [] for sent_result in label_predict: for char, tag, tag_ in sent_result: tag = '0' if tag == 'O' else tag line.append("{} {} {}\n".format(char, tag, tag_)) line.append("\n") fw.writelines(line) # eval(label_path, metric_path) with open(metric_path,'w') as fr: fr.write('token_acc:' + '\t' + str(precision)) fr.write('\npre:' + '\t' + str(precision)) fr.write('\nrecall:' + '\t' + str(recall)) fr.write('\nf1:' + '\t' + str(f_measure)) return precision, recall, f_measure
def do_pass(data, token_to_id, tag_to_id, id_to_tag, expressions, train, epo): model, optimizer = expressions # Loop over batches loss = 0 gold_lists, pred_lists = [], [] for start in range(0, len(data), BATCH_SIZE): batch = data[start:start + BATCH_SIZE] batch.sort(key=lambda x: -len(x[0])) # Prepare inputs cur_batch_size = len(batch) max_length = len(batch[0][0]) lengths = [len(v[0]) for v in batch] input_array = torch.zeros((cur_batch_size, max_length)).long() mask_array = torch.zeros((cur_batch_size, max_length)).byte() output_array = torch.zeros((cur_batch_size, max_length)).long() for n, (tokens, tags) in enumerate(batch): token_ids = [token_to_id.get(simplify_token(t), 1) for t in tokens] tag_ids = [tag_to_id[t] for t in tags] mask_ids = [1 for t in tags] input_array[n, :len(tokens)] = torch.LongTensor(token_ids) mask_array[n, :len(tokens)] = torch.LongTensor(mask_ids) output_array[n, :len(tags)] = torch.LongTensor(tag_ids) model.to(device) # Construct computation batch_loss, output = model(input_array.to(device), mask_array.to(device), output_array.to(device), lengths, cur_batch_size, epo) # Run computations if train: batch_loss.backward() optimizer.step() model.zero_grad() loss += batch_loss.item() predicted = output.cpu().data.numpy() for (_, g), a in zip(batch, predicted): gold_list, pred_list = [], [] for gt, at in zip(g, a): at = id_to_tag[at] gold_list.append(gt) pred_list.append(at) gold_lists.append(gold_list) pred_lists.append(pred_list) return loss, get_ner_fmeasure(gold_lists, pred_lists)[-1]
def evaluate(data, model, name): if name == "train": instances = data.train_Ids elif name == "dev": instances = data.dev_Ids elif name == 'test': instances = data.test_Ids elif name == 'raw': instances = data.raw_Ids else: print("Error: wrong evaluate name,", name) right_token = 0 whole_token = 0 pred_results = [] gold_results = [] ## set model in eval model model.eval() batch_size = 1 start_time = time.time() train_num = len(instances) total_batch = train_num // batch_size + 1 for batch_id in range(total_batch): start = batch_id * batch_size end = (batch_id + 1) * batch_size if end > train_num: end = train_num instance = instances[start:end] if not instance: continue gaz_list, batch_word, batch_biword, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label( instance, data.HP_gpu, True) tag_seq = model(gaz_list, batch_word, batch_biword, batch_wordlen, batch_char, batch_charlen, batch_charrecover, mask) # print "tag:",tag_seq pred_label, gold_label = recover_label(tag_seq, batch_label, mask, data.label_alphabet, batch_wordrecover) pred_results += pred_label gold_results += gold_label decode_time = time.time() - start_time speed = len(instances) / (decode_time + 0.0000000001) acc, p, r, f = get_ner_fmeasure(gold_results, pred_results, data.tagScheme) return speed, acc, p, r, f, pred_results
def evaluate(data, wordseq, model, name, nbest=None): if name == "train": instances = data.train_Ids elif name == "dev": instances = data.dev_Ids elif name == 'test': instances = data.test_Ids elif name == 'raw': instances = data.raw_Ids else: print("Error: wrong evaluate name") right_token = 0 whole_token = 0 nbest_pred_results = [] pred_scores = [] pred_results = [] gold_results = [] ## set model in eval model wordseq.eval() model.eval() batch_size = data.HP_batch_size start_time = time.time() train_num = len(instances) total_batch = train_num // batch_size + 1 for batch_id in range(total_batch): start = batch_id * batch_size end = (batch_id + 1) * batch_size if end > train_num: end = train_num instance = instances[start:end] if not instance: continue batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask, _ = batchify_with_label( instance, data.HP_gpu, True) if nbest: hidden = wordseq.forward(batch_word, batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, None, None) scores, nbest_tag_seq = model.decode_nbest(hidden, mask, nbest) nbest_pred_result = recover_nbest_label(nbest_tag_seq, mask, data.label_alphabet, batch_wordrecover) nbest_pred_results += nbest_pred_result pred_scores += scores[batch_wordrecover].cpu().data.numpy().tolist( ) ## select the best sequence to evalurate tag_seq = nbest_tag_seq[:, :, 0] else: hidden = wordseq.forward(batch_word, batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, None, None) tag_seq = model(hidden, mask) # print "tag:",tag_seq pred_label, gold_label = recover_label(tag_seq, batch_label, mask, data.label_alphabet, batch_wordrecover) pred_results += pred_label gold_results += gold_label decode_time = time.time() - start_time speed = len(instances) / decode_time acc, p, r, f = get_ner_fmeasure(gold_results, pred_results, data.tagScheme) if nbest: return speed, acc, p, r, f, nbest_pred_results, pred_scores return speed, acc, p, r, f, pred_results, pred_scores
def evaluate(args, model, tokenizer, labels, pad_token_label_id, mode, prefix=""): eval_dataset = load_and_cache_examples(args, tokenizer, labels, pad_token_label_id, mode=mode) args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu) # Note that DistributedSampler samples randomly eval_sampler = SequentialSampler( eval_dataset) if args.local_rank == -1 else DistributedSampler( eval_dataset) eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args.eval_batch_size) # multi-gpu evaluate if args.n_gpu > 1: model = torch.nn.DataParallel(model) # Eval! logger.info("***** Running %s evaluation %s *****", mode, prefix) logger.info(" Num examples = %d", len(eval_dataset)) logger.info(" Batch size = %d", args.eval_batch_size) eval_loss = 0.0 nb_eval_steps = 0 preds = None out_label_ids = None model.eval() for batch in eval_dataloader: batch = tuple(t.to(args.device) for t in batch) with torch.no_grad(): inputs = { "input_ids": batch[0], "attention_mask": batch[1], "labels": batch[3] } if args.model_type != "distilbert": inputs["token_type_ids"] = ( batch[2] if args.model_type in ["bert", "xlnet"] else None ) # XLM and RoBERTa don"t use segment_ids outputs = model(**inputs) tmp_eval_loss, logits = outputs[:2] if args.n_gpu > 1: tmp_eval_loss = tmp_eval_loss.mean( ) # mean() to average on multi-gpu parallel evaluating eval_loss += tmp_eval_loss.item() nb_eval_steps += 1 if preds is None: preds = logits.detach().cpu().numpy() out_label_ids = inputs["labels"].detach().cpu().numpy() else: preds = np.append(preds, logits.detach().cpu().numpy(), axis=0) out_label_ids = np.append(out_label_ids, inputs["labels"].detach().cpu().numpy(), axis=0) eval_loss = eval_loss / nb_eval_steps preds = np.argmax(preds, axis=2) label_map = {i: label for i, label in enumerate(labels)} out_label_list = [[] for _ in range(out_label_ids.shape[0])] preds_list = [[] for _ in range(out_label_ids.shape[0])] for i in range(out_label_ids.shape[0]): for j in range(out_label_ids.shape[1]): if out_label_ids[i, j] != pad_token_label_id: out_label_list[i].append(label_map[out_label_ids[i][j]]) preds_list[i].append(label_map[preds[i][j]]) acc, p, r, f = get_ner_fmeasure(out_label_list, preds_list, 'BIO') results = { "loss": eval_loss, "precision": p, "recall": r, "f1": f, "acc": acc, } logger.info("***** Eval results %s *****", prefix) for key in sorted(results.keys()): logger.info(" %s = %s", key, str(results[key])) return results, preds_list
def evaluate(data, opt, model, name, bEval, nbest=0): if name == "train": instances = data.train_Ids instances_text = data.train_texts elif name == "dev": instances = data.dev_Ids instances_text = data.dev_texts elif name == 'test': instances = data.test_Ids instances_text = data.test_texts else: logging.error("wrong evaluate name, {}".format(name)) right_token = 0 whole_token = 0 nbest_pred_results = [] pred_scores = [] pred_results = [] gold_results = [] ## set model in eval model model.eval() batch_size = opt.batch_size start_time = time.time() train_num = len(instances) total_batch = train_num // batch_size + 1 for batch_id in range(total_batch): start = batch_id * batch_size end = (batch_id + 1) * batch_size if end > train_num: end = train_num instance = instances[start:end] if opt.elmo: instance_text = instances_text[start:end] else: instance_text = None if not instance: continue batch_word, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask, batch_features, batch_text \ = batchify_with_label(data, instance, instance_text, opt.gpu) if nbest > 0: scores, nbest_tag_seq = model.decode_nbest( batch_word, batch_wordlen, batch_char, batch_charlen, batch_charrecover, mask, nbest, batch_features, batch_text) nbest_pred_result = recover_nbest_label(nbest_tag_seq, mask, data.label_alphabet, batch_wordrecover) nbest_pred_results += nbest_pred_result pred_scores += scores[batch_wordrecover].cpu().data.numpy().tolist( ) ## select the best sequence to evalurate tag_seq = nbest_tag_seq[:, :, 0] else: tag_seq = model(batch_word, batch_wordlen, batch_char, batch_charlen, batch_charrecover, mask, batch_features, batch_text) # print "tag:",tag_seq if bEval: pred_label, gold_label = recover_label(tag_seq, batch_label, mask, data.label_alphabet, batch_wordrecover) pred_results += pred_label gold_results += gold_label else: pred_label, _ = recover_label(tag_seq, batch_label, mask, data.label_alphabet, batch_wordrecover) pred_results += pred_label decode_time = time.time() - start_time speed = len(instances) / decode_time if bEval: acc, p, r, f = get_ner_fmeasure(gold_results, pred_results, opt.schema) else: acc, p, r, f = None, None, None, None # if nbest>0: # return speed, acc, p, r, f, nbest_pred_results, pred_scores # return speed, acc, p, r, f, pred_results, pred_scores return speed, acc, p, r, f, pred_results, pred_scores