def eval_model(model, data, metric_meta, use_cuda=True, with_label=True, label_mapper=None, task_type=TaskType.Classification): if use_cuda: model.cuda() predictions = [] golds = [] scores = [] ids = [] metrics = {} for idx, (batch_info, batch_data) in enumerate(data): # if idx % 100 == 0: # print("predicting {}".format(idx)) batch_info, batch_data = Collater.patch_data(use_cuda, batch_info, batch_data) score, pred, gold = model.predict(batch_info, batch_data) predictions.extend(pred) golds.extend(gold) scores.extend(score) ids.extend(batch_info['uids']) if task_type == TaskType.Span: from experiments.squad import squad_utils golds = squad_utils.merge_answers(ids, golds) predictions, scores = squad_utils.select_answers( ids, predictions, scores) if with_label: metrics = calc_metrics(metric_meta, golds, predictions, scores, label_mapper) return metrics, predictions, scores, golds, ids
def eval_model(model, data, metric_meta, use_cuda=True, with_label=True, label_mapper=None): if use_cuda: model.cuda() predictions = [] golds = [] scores = [] ids = [] metrics = {} for batch_info, batch_data in data: batch_info, batch_data = Collater.patch_data(use_cuda, batch_info, batch_data) score, pred, gold = model.predict(batch_info, batch_data) predictions.extend(pred) golds.extend(gold) scores.extend(score) ids.extend(batch_info['uids']) if with_label: metrics = calc_metrics(metric_meta, golds, predictions, scores, label_mapper) return metrics, predictions, scores, golds, ids
def eval_model( model, data, metric_meta, device, with_label=True, label_mapper=None, task_type=TaskType.Classification, ): predictions = [] golds = [] scores = [] ids = [] metrics = {} for (batch_info, batch_data) in tqdm(data, total=len(data)): batch_info, batch_data = Collater.patch_data(device, batch_info, batch_data) score, pred, gold = model.predict(batch_info, batch_data) scores = merge(score, scores) golds = merge(gold, golds) predictions = merge(pred, predictions) ids = merge(batch_info["uids"], ids) if task_type == TaskType.Span: predictions, golds = postprocess_qa_predictions( golds, scores, version_2_with_negative=False) elif task_type == TaskType.SpanYN: predictions, golds = postprocess_qa_predictions( golds, scores, version_2_with_negative=True) if with_label: metrics = calc_metrics(metric_meta, golds, predictions, scores, label_mapper) return metrics, predictions, scores, golds, ids
def eval_model(model, data, metric_meta, device, with_label=True, label_mapper=None, task_type=TaskType.Classification): predictions = [] golds = [] scores = [] ids = [] metrics = {} for (batch_info, batch_data) in data: batch_info, batch_data = Collater.patch_data(device, batch_info, batch_data) score, pred, gold = model.predict(batch_info, batch_data) predictions.extend(pred) golds.extend(gold) scores.extend(score) ids.extend(batch_info['uids']) if task_type == TaskType.Span: from experiments.squad import squad_utils golds = squad_utils.merge_answers(ids, golds) predictions, scores = squad_utils.select_answers( ids, predictions, scores) if with_label: metrics = calc_metrics(metric_meta, golds, predictions, scores, label_mapper) return metrics, predictions, scores, golds, ids
def eval_model(model, data, metric_meta, use_cuda=True, with_label=True): data.reset() if use_cuda: model.cuda() predictions = [] golds = [] scores = [] ids = [] metrics = {} for batch_meta, batch_data in data: score, pred, gold = model.predict(batch_meta, batch_data) predictions.extend(pred) golds.extend(gold) scores.extend(score) ids.extend(batch_meta['uids']) if with_label: metrics = calc_metrics(metric_meta, golds, predictions, scores) return metrics, predictions, scores, golds, ids
def eval_model(model, data, metric_meta, device, with_label=True, label_mapper=None, task_type=TaskType.Classification): predictions = [] golds = [] scores = [] ids = [] metrics = {} print("****device={}".format(device)) for (batch_info, batch_data) in data: batch_info, batch_data = Collater.patch_data(device, batch_info, batch_data) score, pred, gold = model.predict(batch_info, batch_data) predictions.extend(pred) golds.extend(gold) scores.extend(score) ids.extend(batch_info['uids']) if task_type == TaskType.Span: from experiments.squad import squad_utils golds = squad_utils.merge_answers(ids, golds) predictions, scores = squad_utils.select_answers( ids, predictions, scores) if with_label: metrics = calc_metrics(metric_meta, golds, predictions, scores, label_mapper) for i in range(min(len(ids), 10)): print("{}\t{}\t{}\t{}\n".format(ids[i], predictions[i], scores[2 * i], scores[2 * i + 1])) #print("score heads={}".format(scores[:10])) return metrics, predictions, scores, golds, ids
predictions = [] scores = [] for sample_id, label in sample_id_2_label_dic.items(): golds.append(label) pred, score_seg = sample_id_2_pred_score_seg_dic[sample_id] predictions.append(pred) scores.extend(score_seg) return golds, predictions, scores args = parser.parse_args() task_def_path = args.task_def task = args.task task_defs = TaskDefs(task_def_path) n_class = task_defs.n_class_map[task] sample_id_2_pred_score_seg_dic = load_score_file(args.score, n_class) data_format = task_defs.data_format_map[task] task_type = task_defs.task_type_map[task] label_mapper = task_defs.global_map.get(task, None) sample_objs = load_data(args.std_input, data_format, task_type, label_mapper) golds, predictions, scores = generate_golds_predictions_scores( sample_id_2_pred_score_seg_dic, sample_objs) metrics = calc_metrics(task_defs.metric_meta_map[task], golds, predictions, scores) print(metrics)
golds = [] predictions = [] scores = [] for sample_id, label in sample_id_2_label_dic.items(): golds.append(label) pred, score_seg = sample_id_2_pred_score_seg_dic[sample_id] predictions.append(pred) scores.extend(score_seg) return golds, predictions, scores args = parser.parse_args() task_def_path = args.task_def task_defs = TaskDefs(task_def_path) task_def = task_defs.get_task_def(args.task) n_class = task_def.n_class sample_id_2_pred_score_seg_dic = load_score_file(args.score, n_class) data_type = task_def.data_type task_type = task_def.task_type label_mapper = task_def.label_vocab sample_objs = load_data(args.std_input, data_type, task_type, label_mapper) golds, predictions, scores = generate_golds_predictions_scores( sample_id_2_pred_score_seg_dic, sample_objs) metrics = calc_metrics(task_def.metric_meta, golds, predictions, scores) print(metrics)