def evaluate_model(config: Config, conf_conll, conf_ontonotes, model: MT_LSTMCRF, batch_insts_ids, name: str, insts: List[Instance]): ## evaluation metrics_conll = np.asarray([0, 0, 0], dtype=int) metrics_notes = np.asarray([0, 0, 0], dtype=int) batch_id = 0 batch_size = config.batch_size for batch in batch_insts_ids: one_batch_insts = insts[batch_id * batch_size:(batch_id + 1) * batch_size] sorted_batch_insts = sorted(one_batch_insts, key=lambda inst: len(inst.input.words), reverse=True) # batch_max_scores, batch_max_ids = model.decode(batch) bestScores_conll, decodeIdx_conll, bestScores_notes, decodeIdx_notes, mask_conll, mask_ontonotes = model.decode( batch) metrics_conll += eval.evaluate_num(sorted_batch_insts, decodeIdx_conll, batch[6], batch[1], conf_conll.idx2labels, mask_conll) metrics_notes += eval.evaluate_num(sorted_batch_insts, decodeIdx_notes, batch[7], batch[1], conf_ontonotes.idx2labels, mask_ontonotes) batch_id += 1 p_conll, total_predict_conll, total_entity_conll = metrics_conll[ 0], metrics_conll[1], metrics_conll[2] precision_conll = p_conll * 1.0 / total_predict_conll * 100 if total_predict_conll != 0 else 0 recall_conll = p_conll * 1.0 / total_entity_conll * 100 if total_entity_conll != 0 else 0 fscore_conll = 2.0 * precision_conll * recall_conll / ( precision_conll + recall_conll) if precision_conll != 0 or recall_conll != 0 else 0 print("[%s conll set] Precision: %.2f, Recall: %.2f, F1: %.2f" % (name, precision_conll, recall_conll, fscore_conll), flush=True) p_notes, total_predict_notes, total_entity_notes = metrics_notes[ 0], metrics_notes[1], metrics_notes[2] precision_notes = p_notes * 1.0 / total_predict_notes * 100 if total_predict_notes != 0 else 0 recall_notes = p_notes * 1.0 / total_entity_notes * 100 if total_entity_notes != 0 else 0 fscore_notes = 2.0 * precision_notes * recall_notes / ( precision_notes + recall_notes) if precision_notes != 0 or recall_notes != 0 else 0 print("[%s notes set] Precision: %.2f, Recall: %.2f, F1: %.2f" % (name, precision_notes, recall_notes, fscore_notes), flush=True) # return [precision_notes, recall_notes, fscore_notes] return [ precision_conll, recall_conll, fscore_conll, precision_notes, recall_notes, fscore_notes ]
def evaluate_model(config: Config, model: NNCRF, batch_insts_ids, name: str, insts: List[Instance]): ## evaluation metrics = np.asarray([0, 0, 0], dtype=int) batch_id = 0 batch_size = config.batch_size for batch in batch_insts_ids: one_batch_insts = insts[batch_id * batch_size:(batch_id + 1) * batch_size] sorted_batch_insts = sorted(one_batch_insts, key=lambda inst: len(inst.input.words), reverse=True) batch_max_scores, batch_max_ids = model.decode(batch) metrics += eval.evaluate_num(sorted_batch_insts, batch_max_ids, batch[-1], batch[1], config.idx2labels) batch_id += 1 p, total_predict, total_entity = metrics[0], metrics[1], metrics[2] precision = p * 1.0 / total_predict * 100 if total_predict != 0 else 0 recall = p * 1.0 / total_entity * 100 if total_entity != 0 else 0 fscore = 2.0 * precision * recall / ( precision + recall) if precision != 0 or recall != 0 else 0 print("[%s set] Precision: %.2f, Recall: %.2f, F1: %.2f" % (name, precision, recall, fscore), flush=True) return [precision, recall, fscore]