def calculate_overlap_scores(self): for qa in self.original_qas: ans = qa['predicted_answer'] gt_ans = qa['answers'][0]['text'] # store all word overlap metrics between predicted answer and original answer qa['exact_match'] = exact_match_metric(ans, gt_ans) qa['f1_match'] = f1_metric(ans, gt_ans) qa['recall_match'] = recall_metric(ans, gt_ans) qa['precision_match'] = precision_metric(ans, gt_ans) qa['unanswerable'] = qa['predicted_answer'] == ''
def find_parent(sq): # find precision of predicted answer with reference general question answer gq_precisions = [ precision_metric(sq['predicted_answer'], gq['answers'][0]['text']) for gq in general_questions ] if np.max(gq_precisions) > 0.5: # precision is high enough, return the top point return np.argmax(gq_precisions) else: # if precision is too low, resort to the answer positioning heuristic current_pos = sq['answers'][0]['answer_start'] for i, gq in enumerate(general_questions): if gq['answers'][0]['answer_start'] > current_pos: return i - 1 return len(general_questions) - 1