Exemplo n.º 1
0
def confirm_threshold(raw_data_file_path, pred_dir, pred_file_prefix):
    ref_ans = read_mrc_dataset(raw_data_file_path, tag=None)
    candidate_thresholds = np.linspace(0, 1, 100)
    ret_metrics = []
    for ind, candi_thresh in enumerate(candidate_thresholds):
        pred_ans = _pred_ans_by_thresh(pred_dir, pred_file_prefix,
                                       candi_thresh)
        F1, EM, _, _, _ = evaluate(ref_ans, pred_ans)
        ret_metrics.append((candi_thresh, F1, EM))
        if (ind + 1) % 20 == 0:
            print(
                f"now {ind + 1}/{len(candidate_thresholds)}, F1 is {F1}, EM is {EM}"
            )
    ret_metrics = sorted(ret_metrics, key=lambda x: (x[1], x[2]))
    print("the best metrics&threshold is ", ret_metrics[-1])
    return ret_metrics[-1]
Exemplo n.º 2
0
 def _evaluate(raw_data_path, pred_data_path, tag=None):
     ref_ans = read_mrc_dataset(raw_data_path, tag=tag)
     assert len(ref_ans) > 0, 'Find no sample with tag - {}'.format(tag)
     pred_ans = read_model_prediction(pred_data_path)
     F1, EM, ans_score, TOTAL, SKIP = evaluate(ref_ans, pred_ans, verbose=False)
     print_metrics(F1, EM, ans_score, TOTAL, SKIP, tag)