def eval_batch(data_iter, model, eval_instance, best_fscore, epoch, config, test=False): model.eval() # eval time eval_acc = Eval() eval_PRF = EvalPRF() gold_labels = [] predict_labels = [] for batch_features in data_iter: logit = model(batch_features) for id_batch in range(batch_features.batch_length): inst = batch_features.inst[id_batch] maxId_batch = getMaxindex_batch(logit[id_batch]) predict_label = [] for id_word in range(inst.words_size): predict_label.append( config.create_alphabet.label_alphabet.from_id( maxId_batch[id_word])) gold_labels.append(inst.labels) predict_labels.append(predict_label) for p_label, g_label in zip(predict_labels, gold_labels): eval_PRF.evalPRF(predict_labels=p_label, gold_labels=g_label, eval=eval_instance) if eval_acc.gold_num == 0: eval_acc.gold_num = 1 p, r, f = eval_instance.getFscore() test_flag = "Test" if test is False: print() test_flag = "Dev" if f >= best_fscore.best_dev_fscore: best_fscore.best_dev_fscore = f best_fscore.best_epoch = epoch best_fscore.best_test = True if test is True and best_fscore.best_test is True: best_fscore.p = p best_fscore.r = r best_fscore.f = f print( "{} eval: precision = {:.6f}% recall = {:.6f}% , f-score = {:.6f}%, [TAG-ACC = {:.6f}%]" .format(test_flag, p, r, f, eval_acc.acc())) if test is True: print( "The Current Best Dev F-score: {:.6f}, Locate on {} Epoch.".format( best_fscore.best_dev_fscore, best_fscore.best_epoch)) print( "The Current Best Test Result: precision = {:.6f}% recall = {:.6f}% , f-score = {:.6f}%" .format(best_fscore.p, best_fscore.r, best_fscore.f)) if test is True: best_fscore.best_test = False
def eval_batch(self, data_iter, model, eval_instance, best_score, epoch, config, test=False): """ :param data_iter: eval batch data iterator :param model: eval model :param eval_instance: :param best_score: :param epoch: :param config: config :param test: whether to test :return: None """ model.eval() # eval time eval_acc = Eval() eval_PRF = EvalPRF() gold_labels = [] predict_labels = [] for batch_features in data_iter: word, char, mask, sentence_length, tags = self._get_model_args( batch_features) logit = model(word, char, sentence_length, train=False) if self.use_crf is False: predict_ids = torch_max(logit) for id_batch in range(batch_features.batch_length): inst = batch_features.inst[id_batch] label_ids = predict_ids[id_batch] predict_label = [] for id_word in range(inst.words_size): predict_label.append( config.create_alphabet.label_alphabet.from_id( label_ids[id_word])) gold_labels.append(inst.labels) predict_labels.append(predict_label) else: path_score, best_paths = model.crf_layer(logit, mask) for id_batch in range(batch_features.batch_length): inst = batch_features.inst[id_batch] gold_labels.append(inst.labels) label_ids = best_paths[id_batch].cpu().data.numpy( )[:inst.words_size] label = [] for i in label_ids: label.append( config.create_alphabet.label_alphabet.from_id(i)) predict_labels.append(label) for p_label, g_label in zip(predict_labels, gold_labels): eval_PRF.evalPRF(predict_labels=p_label, gold_labels=g_label, eval=eval_instance) if eval_acc.gold_num == 0: eval_acc.gold_num = 1 p, r, f = eval_instance.getFscore() # p, r, f = entity_evalPRF_exact(gold_labels=gold_labels, predict_labels=predict_labels) # p, r, f = entity_evalPRF_propor(gold_labels=gold_labels, predict_labels=predict_labels) # p, r, f = entity_evalPRF_binary(gold_labels=gold_labels, predict_labels=predict_labels) test_flag = "Test" if test is False: print() test_flag = "Dev" best_score.current_dev_score = f if f >= best_score.best_dev_score: best_score.best_dev_score = f best_score.best_epoch = epoch best_score.best_test = True if test is True and best_score.best_test is True: best_score.p = p best_score.r = r best_score.f = f print( "{} eval: precision = {:.6f}% recall = {:.6f}% , f-score = {:.6f}%, [TAG-ACC = {:.6f}%]" .format(test_flag, p, r, f, 0.0000)) if test is True: print("The Current Best Dev F-score: {:.6f}, Locate on {} Epoch.". format(best_score.best_dev_score, best_score.best_epoch)) print( "The Current Best Test Result: precision = {:.6f}% recall = {:.6f}% , f-score = {:.6f}%" .format(best_score.p, best_score.r, best_score.f)) if test is True: best_score.best_test = False