def compute_scores(gold_list, pred): # tests for exact match and on the normalised answer (compute_exact) # test for overlap (compute_f1) f1_sum = 0.0 em_sum = 0.0 if len(gold_list) > 1: for i in range(len(gold_list)): gold_answers = gold_list[0:i] + gold_list[i + 1:] # predictions compared against (n) golds and take maximum em_sum += max(squad_metrics.compute_exact(a, pred) for a in gold_answers) f1_sum += max(squad_metrics.compute_f1(a, pred) for a in gold_answers) else: em_sum += max(squad_metrics.compute_exact(a, pred) for a in gold_list) f1_sum += max(squad_metrics.compute_f1(a, pred) for a in gold_list) return {'em': em_sum / max(1, len(gold_list)), 'f1': f1_sum / max(1, len(gold_list))}
def comput_modified_f1(data, pred): f1 = [] em = [] with open(data, "r", encoding="utf-8") as reader: input_data = json.load(reader) for entry in input_data["data"]: for paragraph in entry["paragraphs"]: # paragraph_text = paragraph["background"] situation_text = paragraph['situation'] for qa in paragraph['qas']: qas_id = qa['id'] question_text = qa['question'] answer_text = qa['answers'][0]['text'] pred_ans = pred[qas_id] answer_text = answer_text[5:] if answer_text.startswith( "Team") else answer_text answer_text = answer_text[4:] if answer_text.startswith( "Day") else answer_text answer_text = answer_text[:-2].strip() if answer_text.endswith( "PM") else answer_text answer_text = answer_text[:-2].strip() if answer_text.endswith( "AM") else answer_text pred_ans = pred_ans[5:] if pred_ans.startswith( "Team") else pred_ans pred_ans = pred_ans[4:] if pred_ans.startswith( "Day") else pred_ans pred_ans = pred_ans[:-2].strip() if pred_ans.endswith( "PM") else pred_ans pred_ans = pred_ans[:-2].strip() if pred_ans.endswith( "AM") else pred_ans f1.append(compute_f1(answer_text, pred_ans)) em.append(compute_exact(answer_text, pred_ans)) return sum(f1) / len(f1), sum(em) / len(em)
def check_f1(data, pred): f1 = [] filtered = [ 1912355095, 580926078, 2893035919, 3005625773, 4037359159, 1090395805, 1111432861, 1772822810, 1779114266, 3003444033, 1687481522, 373839753, 3209385804, 3128907286, 1746228971, 4088330372, 4227418477, 4047850835, 302402461, 603409309, 2849858743, 2050057087, 336287454, 1787516699, 184586157, 2999947384, 1202686909, 1223723965, 2335934912, 2677049981, 1182285725, 430326667, 4183507762, 1091390005, 1076054581, 3847534556, 4074158044, 955038082, 1423883267, 1626887498, 184177891, 1586058524, 156980418, 1607423284, 1608996147, 3071497657, 3865282641, 1588430868, 3876948090, 3344467660, 3346695902, 3600844723, 3227879116, 3484256179, 2934519278, 3876075109, 2041918832, 1115960569, 1108882681, 2013214064, 3895408229, 2975020526, 3885579536, 356328641, 985605428, 809437101, 494407430, 3999682891, 1562329124, 1505378340, 2022868633, 2022147737, 1729594789, 1730315685, 1082622173, 1205043665, 1081901277, 1204322769 ] filtered_id = [str(i) for i in filtered] em = [] with open(data, "r", encoding="utf-8") as reader: input_data = json.load(reader) for entry in input_data["data"]: for paragraph in entry["paragraphs"]: # paragraph_text = paragraph["background"] situation_text = paragraph['situation'] for qa in paragraph['qas']: qas_id = qa['id'] question_text = qa['question'] answer_text = qa['answers'][0]['text'] if qas_id in filtered_id: continue else: f1.append(compute_f1(answer_text, pred[qas_id])) em.append(compute_exact(answer_text, pred[qas_id])) return f1, em
def compute_scores(gold_list, pred): # tests for exact match and on the normalised answer (compute_exact) # test for overlap (compute_f1) em = max(squad_metrics.compute_exact(a, pred) for a in gold_list) f1 = max(squad_metrics.compute_f1(a, pred) for a in gold_list) return {'em': em * 100, 'f1': f1 * 100}
def read_predicted_file_comparative_nn(predict_file, pred): question_word = ['would', 'will'] comparative_words = [ 'more', 'less', 'higher', 'lower', 'increase', 'decrease', 'harder', 'easier', 'increasing', 'decreasing', 'larger', 'smaller', 'better', 'worse', 'faster', 'slower', 'weaker', 'stronger', 'closer', 'farther', 'louder', 'quieter', 'correctly', 'incorrectly', 'not', 'yes', 'no', 'not' ] pairs = { 'more': 'less', 'higher': 'lower', 'increase': 'decrease', 'harder': 'easier', 'increasing': 'decreasing', 'larger': 'smaller', 'better': 'worse', 'faster': 'slower', 'stronger': 'weaker', 'closer': 'farther', 'louder': 'quieter', 'correctly': 'incorrectly', } cnt = {} prediction = {} f1 = [] pred_f1 = [] debug = {} with open(predict_file, "r", encoding="utf-8") as reader: input_data = json.load(reader) for entry in input_data["data"]: for paragraph in entry["paragraphs"]: paragraph_text = paragraph["background"] situation_text = paragraph['situation'] for qa in paragraph['qas']: candidate = [] qas_id = qa['id'] question_text = qa['question'] answer_text = qa['answers'][0]['text'] predicts = qa['predicts'] if not any(i == answer_text for i in comparative_words): continue # if any(question_text.lower().startswith(i) for i in question_word): # continue for key, val in pairs.items(): if key in question_text.lower( ) and val in question_text.lower(): candidate = [key, val] break if candidate != []: pred_f1.append(compute_f1(answer_text, pred[qas_id])) return pred_f1
def read_predicted_file_nn(predict_file, pred): Object_question_words = { "which", 'in which', 'whose', 'who', 'what', 'for which', 'on which' } comparative_words = [ 'more', 'less', 'higher', 'lower', 'increase', 'decrease', 'high', 'low', 'harder', 'easier', 'increasing', 'decreasing', 'up', 'down', 'larger', 'smaller', 'better', 'worse', 'faster', 'slower', 'weaker', 'stronger', 'closer', 'farther', 'louder', 'quieter', 'correctly', 'incorrectly', 'not', 'yes', 'no', 'not' ] positive_words = [ 'more', 'higher', 'increase', 'high', 'harder', 'increasing', 'up', 'larger', 'better', 'faster', 'stronger', 'closer', 'louder', 'correctly' ] negative_words = [ 'less', 'lower', 'decrease', 'low', 'easier', 'decreasing', 'down', 'smaller', 'worse', 'slower', 'weaker', 'farther', 'quieter', 'incorrectly', 'fewer', 'not', 'avoid' ] all_object_scores = [] record = {} pred_f1 = [] with open(predict_file, "r", encoding="utf-8") as reader: input_data = json.load(reader) for entry in input_data["data"]: for paragraph in entry["paragraphs"]: paragraph_text = paragraph["background"] situation_text = paragraph['situation'] for qa in paragraph['qas']: qas_id = qa['id'] question_text = qa['question'] answer_text = qa['answers'][0]['text'] predicts = qa['predicts'] if any(i == answer_text for i in comparative_words): continue if any(question_text.lower().startswith(i) for i in Object_question_words): # if max(compute_f1(answer_text, predicts['object1']), # compute_f1(answer_text, predicts['object2'])) == 0.0: # continue predicts_answer = pred[qas_id] # f1 = compute_f1(remove_punc(answer_text),remove_punc(predicts_answer)) pred_f1.append( compute_f1(remove_punc(answer_text), remove_punc(pred[qas_id]))) # record[qas_id] = [f1,answer_text,predicts_answer] # all_object_scores.append(f1) else: continue return all_object_scores, record, pred_f1
def compute_metrics_from_nbest(quasar_dir, split, fname_nbest_preds): qid2preds = collections.defaultdict(list) with open(fname_nbest_preds) as rf: preds = json.load(rf) for uid in preds: # print(uid) qid, idx = uid.split("_") # print(qid) # Take only non-empty predictions pred = [p for p in preds[uid] if p["text"]][0] # pred = preds[uid][0] # sorted by probs (we are taking best one here) ans, score = pred["text"], pred["probability"] qid2preds[qid].append((ans, score)) preds_qid2ans = dict() for qid, ans in qid2preds.items(): # select best answer from all paragraphs ans, _ = sorted(ans, key=lambda x: x[1], reverse=True)[0] preds_qid2ans[qid] = ans print(preds_qid2ans) gold_qid2ans = dict() quasar_data = os.path.join(quasar_dir, split + "_questions.json") # quasar_data = os.path.join(quasar_dir, split + ".json") with open(quasar_data, 'r') as qa_data: for line in qa_data: data = json.loads(line) gold_qid2ans[data['uid']] = data['answer'] # with open(quasar_data) as qa_data: # data = json.load(qa_data) # data_list = data['data'][0]['paragraphs'] # pred_keys = preds_qid2ans.keys() # print(pred_keys) # for p0 in data_list: # try: # # if p0['qas'][0]['id'] in pred_keys: # gold_qid2ans[p0['qas'][0]['id']] = p0['qas'][0]['answers'][0]['text'] # except IndexError: # continue print((len(preds_qid2ans.keys())), len(gold_qid2ans.keys())) qid2f1 = dict() qid2em = dict() counter = 0 for qid in preds_qid2ans.keys(): try: a_pred = preds_qid2ans[qid] a_gold = gold_qid2ans[qid] # print('pred: ', a_pred, 'gold: ', a_gold) qid2f1[qid] = compute_f1(a_gold, a_pred) qid2em[qid] = compute_exact(a_gold, a_pred) except KeyError: counter += 1 # print() continue print(counter) f1 = sum(list(qid2f1.values())) / len(qid2f1) f1 *= 100 em = sum(list(qid2em.values())) / len(qid2em) em *= 100 metrics = {"f1": f1, "em": em} return metrics
def f1(truths, preds): return mean( [compute_f1(truth, pred) for truth, pred in zip(truths, preds)])
def make_find_answer_by_rule(no_label_synthetic, output): Object_question_words = { "which", 'in which', 'whose', 'who', 'what', 'for which', 'on which', " which", 'when', 'at which', 'where', 'during which', 'when' } positive_words = [ 'more', 'higher', 'increase', 'high', 'harder', 'increasing', 'up', 'larger', 'better', 'faster', 'stronger', 'closer', 'louder', 'correctly' ] negative_words = [ 'less', 'lower', 'decrease', 'low', 'easier', 'decreasing', 'down', 'smaller', 'worse', 'slower', 'weaker', 'farther', 'quieter', 'incorrectly', 'fewer', 'not', 'avoid' ] comparative_words = [ 'more', 'less', 'higher', 'lower', 'increase', 'decrease', 'harder', 'easier', 'increasing', 'decreasing', 'larger', 'smaller', 'better', 'worse', 'faster', 'slower', 'weaker', 'stronger', 'closer', 'farther', 'louder', 'quieter', 'correctly', 'incorrectly', 'not', 'yes', 'no', 'not' ] pairs = { 'more': 'less', 'higher': 'lower', 'increase': 'decrease', 'harder': 'easier', 'increasing': 'decreasing', 'larger': 'smaller', 'better': 'worse', 'faster': 'slower', 'stronger': 'weaker', 'closer': 'farther', 'louder': 'quieter', 'correctly': 'incorrectly', 'increased': "reduced", "warmer": "colder", 'high': 'low', 'turn on': "turn off", 'rise': 'fall', 'up': 'down', 'longer': 'shorter', 'deeper': "shallower", 'positively': 'negatively', } final_answer = {} f1 = [] unsolved = [] exact = [] with open(no_label_synthetic, "r", encoding="utf-8") as reader: input_data = json.load(reader) for entry in input_data["data"]: for paragraph in entry["paragraphs"]: paragraph_text = paragraph["background"] situation_text = paragraph['situation'] for qa in paragraph['qas']: qas_id = qa['id'] question_text = qa['question'] try: answer_text = qa['answers'][0]['text'] except: answer_text = "null" predicts = qa['predicts'] # Object Type: if any(question_text.lower().startswith(i) for i in Object_question_words): predicts_answer = make_choice_pos(predicts) for word in positive_words: if word in question_text.lower(): predicts_answer = make_choice_pos(predicts) break for word in negative_words: if word in question_text.lower(): if word not in predicts['TP in back'].lower(): predicts_answer = make_choice_neg(predicts) break f1.append( compute_f1(remove_punc(answer_text), remove_punc(predicts_answer))) else: candidate = [] for key, val in pairs.items(): if key in question_text.lower( ) and val in question_text.lower(): candidate = [key, val] break # comparative if candidate: o1_ind = question_text.lower().find( remove_punc(predicts["object1"].lower())) o2_ind = question_text.lower().find( remove_punc(predicts["object2"].lower())) than_ind = question_text.lower().find("than") indx = [o1_ind, o2_ind, than_ind] # if (o1_ind == -1 and o2_ind == -1) or o1_ind == o2_ind: # # print(qas_id,question_text) # continue if o1_ind != -1 and o2_ind == -1: if than_ind != -1: if o1_ind < than_ind: o2_ind = 1000 elif o1_ind == -1 and o2_ind != -1: if than_ind != -1: if o2_ind < than_ind: o1_ind = 1000 if o1_ind < o2_ind: if int(predicts['TP_relevance']) == 0: predicts_answer = candidate[0] f1.append(compute_f1(answer_text, candidate[0])) else: predicts_answer = candidate[1] f1.append(compute_f1(answer_text, candidate[1])) else: if int(predicts['TP_relevance']) == 0: predicts_answer = candidate[1] f1.append(compute_f1(answer_text, candidate[1])) else: predicts_answer = candidate[0] f1.append(compute_f1(answer_text, candidate[0])) else: unsolved.append([qas_id, question_text, answer_text]) predicts_answer = predicts["object1"] + predicts[ "object2"] # +predicts["SP_object1"]+predicts["SP_object2"] than_ind = question_text.lower().find("or") o1_f1 = compute_f1(question_text, predicts["object1"]) o2_f1 = compute_f1(question_text, predicts["object2"]) sp_o1_f1 = compute_f1(question_text, predicts["SP_object1"]) sp_o2_f1 = compute_f1(question_text, predicts["SP_object2"]) posssible_answer = [ predicts["object1"], predicts["object2"], predicts["SP_object1"], predicts["SP_object2"] ] predicts_answer = posssible_answer[[ o1_f1, o2_f1, sp_o1_f1, sp_o2_f1 ].index(max([o1_f1, o2_f1, sp_o1_f1, sp_o2_f1]))] f1.append( compute_f1(remove_punc(answer_text), predicts_answer)) predicts_answer = remove_punc(predicts_answer) final_answer[qas_id] = predicts_answer exact.append( compute_exact(remove_punc(answer_text), remove_punc(predicts_answer))) with open(output, "w+") as writer: writer.write(json.dumps(final_answer, indent=4) + "\n") return final_answer, f1, exact
def make_find_answer_by_rule_uncovered(no_label_synthetic, output): Object_question_words = { "which", 'in which', 'whose', 'who', 'what', 'for which', 'on which', " which", 'when', 'at which', 'where', 'during which', 'when' } positive_words = [ 'more', 'higher', 'increase', 'high', 'harder', 'increasing', 'up', 'larger', 'better', 'faster', 'stronger', 'closer', 'louder', 'correctly' ] negative_words = [ 'less', 'lower', 'decrease', 'low', 'easier', 'decreasing', 'down', 'smaller', 'worse', 'slower', 'weaker', 'farther', 'quieter', 'incorrectly', 'fewer', 'not', 'avoid' ] comparative_words = [ 'more', 'less', 'higher', 'lower', 'increase', 'decrease', 'harder', 'easier', 'increasing', 'decreasing', 'larger', 'smaller', 'better', 'worse', 'faster', 'slower', 'weaker', 'stronger', 'closer', 'farther', 'louder', 'quieter', 'correctly', 'incorrectly', 'not', 'yes', 'no', 'not' ] pairs = { 'more': 'less', 'higher': 'lower', 'increase': 'decrease', 'harder': 'easier', 'increasing': 'decreasing', 'larger': 'smaller', 'better': 'worse', 'faster': 'slower', 'stronger': 'weaker', 'closer': 'farther', 'louder': 'quieter', 'correctly': 'incorrectly', 'increased': "reduced", "warmer": "colder", 'high': 'low', 'turn on': "turn off", 'rise': 'fall', 'up': 'down', 'longer': 'shorter', 'deeper': "shallower", 'positively': 'negatively', } filtered = [ 1912355095, 580926078, 2893035919, 3005625773, 4037359159, 1090395805, 1111432861, 1772822810, 1779114266, 3003444033, 1687481522, 373839753, 3209385804, 3128907286, 1746228971, 4088330372, 4227418477, 4047850835, 302402461, 603409309, 2849858743, 2050057087, 336287454, 1787516699, 184586157, 2999947384, 1202686909, 1223723965, 2335934912, 2677049981, 1182285725, 430326667, 4183507762, 1091390005, 1076054581, 3847534556, 4074158044, 955038082, 1423883267, 1626887498, 184177891, 1586058524, 156980418, 1607423284, 1608996147, 3071497657, 3865282641, 1588430868, 3876948090, 3344467660, 3346695902, 3600844723, 3227879116, 3484256179, 2934519278, 3876075109, 2041918832, 1115960569, 1108882681, 2013214064, 3895408229, 2975020526, 3885579536, 356328641, 985605428, 809437101, 494407430, 3999682891, 1562329124, 1505378340, 2022868633, 2022147737, 1729594789, 1730315685, 1082622173, 1205043665, 1081901277, 1204322769 ] filtered_id = [str(i) for i in filtered] final_answer = {} f1 = [] unsolved = [] exact = [] # writer_ref = open(os.path.join(output, "refs_filtered.txt"), 'w+', encoding='utf-8') # writer_hyps = open(os.path.join(out,"hyps_a_1_base.txt"),'w+',encoding='utf-8') writer_hyps = open(os.path.join(output, "hyps_rule_based"), 'w+', encoding='utf-8') with open(no_label_synthetic, "r", encoding="utf-8") as reader: input_data = json.load(reader) for entry in input_data["data"]: for paragraph in entry["paragraphs"]: paragraph_text = paragraph["background"] situation_text = paragraph['situation'] for qa in paragraph['qas']: qas_id = qa['id'] question_text = qa['question'] try: answer_text = qa['answers'][0]['text'] except: answer_text = "null" predicts = qa['predicts'] # if qas_id in filtered_id: # continue # Object Type: if any(question_text.lower().startswith(i) for i in Object_question_words): predicts_answer = make_choice_pos(predicts) for word in positive_words: if word in question_text.lower(): predicts_answer = make_choice_pos(predicts) break for word in negative_words: if word in question_text.lower(): if word not in predicts['TP in back'].lower(): predicts_answer = make_choice_neg(predicts) break f1.append( compute_f1(remove_punc(answer_text), remove_punc(predicts_answer))) else: candidate = [] for key, val in pairs.items(): if key in question_text.lower( ) and val in question_text.lower(): candidate = [key, val] break # comparative if candidate: o1_ind = question_text.lower().find( remove_punc(predicts["object1"].lower())) o2_ind = question_text.lower().find( remove_punc(predicts["object2"].lower())) than_ind = question_text.lower().find("than") indx = [o1_ind, o2_ind, than_ind] # if (o1_ind == -1 and o2_ind == -1) or o1_ind == o2_ind: # # print(qas_id,question_text) # continue if o1_ind != -1 and o2_ind == -1: if than_ind != -1: if o1_ind < than_ind: o2_ind = 1000 elif o1_ind == -1 and o2_ind != -1: if than_ind != -1: if o2_ind < than_ind: o1_ind = 1000 if o1_ind < o2_ind: if int(predicts['TP_relevance']) == 0: predicts_answer = candidate[0] f1.append(compute_f1(answer_text, candidate[0])) else: predicts_answer = candidate[1] f1.append(compute_f1(answer_text, candidate[1])) else: if int(predicts['TP_relevance']) == 0: predicts_answer = candidate[1] f1.append(compute_f1(answer_text, candidate[1])) else: predicts_answer = candidate[0] f1.append(compute_f1(answer_text, candidate[0])) else: unsolved.append([qas_id, question_text, answer_text]) predicts_answer = predicts["object1"] + predicts[ "object2"] # +predicts["SP_object1"]+predicts["SP_object2"] than_ind = question_text.lower().find("or") o1_f1 = compute_f1(question_text, predicts["object1"]) o2_f1 = compute_f1(question_text, predicts["object2"]) sp_o1_f1 = compute_f1(question_text, predicts["SP_object1"]) sp_o2_f1 = compute_f1(question_text, predicts["SP_object2"]) posssible_answer = [ predicts["object1"], predicts["object2"], predicts["SP_object1"], predicts["SP_object2"] ] predicts_answer = posssible_answer[[ o1_f1, o2_f1, sp_o1_f1, sp_o2_f1 ].index(max([o1_f1, o2_f1, sp_o1_f1, sp_o2_f1]))] f1.append( compute_f1(remove_punc(answer_text), predicts_answer)) predicts_answer = remove_punc(predicts_answer) ref2 = answer_text # writer_ref.write(ref2) # writer_ref.write('\n') writer_hyps.write(predicts_answer) writer_hyps.write('\n') final_answer[qas_id] = predicts_answer exact.append( compute_exact(remove_punc(answer_text), remove_punc(predicts_answer))) # writer_ref.close() writer_hyps.close() return final_answer, f1, exact
def fuzzy_f1(data): def comput_f(a): return sum(a) / len(a) group1 = [] group2 = [] effect_B = [] cause_B = [] cause_G1 = [] casue_G2 = [] f_group1 = [] f_group2 = [] f_effect_B = [] f_cause_B = [] f_cause_G1 = [] f_casue_G2 = [] with open(data, "r", encoding="utf-8") as reader: input_data = json.load(reader) for entry in input_data["data"]: for paragraph in entry["paragraphs"]: for qa in paragraph['qas']: predict = qa['predicts'] g1 = compute_f1(predict['object1']['label'], predict['object1']['predict']) g2 = compute_f1(predict['object2']['label'], predict['object2']['predict']) effect = compute_f1(predict['TP in back']['label'], predict['TP in back']['predict']) cb = compute_f1(predict['SP in back']['label'], predict['SP in back']['predict']) cg1 = compute_f1(predict['SP_object1']['label'], predict['SP_object1']['predict']) cg2 = compute_f1(predict['SP_object2']['label'], predict['SP_object2']['predict']) g12 = compute_f1(predict['object1']['label'], predict['object2']['predict']) g21 = compute_f1(predict['object2']['label'], predict['object1']['predict']) cg12 = compute_f1(predict['SP_object1']['label'], predict['SP_object2']['predict']) cg21 = compute_f1(predict['SP_object2']['label'], predict['SP_object1']['predict']) group1.append(g1) if g1 > g12 else group1.append(g12) group2.append(g2) if g2 > g21 else group2.append(g21) effect_B.append(effect) cause_B.append(cb) cause_G1.append(cg1) casue_G2.append(cg2) f_group1.append(1) if g1 > 0 or g12 > 0 else f_group1.append(0) f_group2.append(1) if g2 > 0 or g21 > 0 else f_group2.append(0) f_effect_B.append(1) if effect > 0 else f_effect_B.append(0) f_cause_B.append(1) if cb > 0 else f_cause_B.append(0) f_cause_G1.append( 1) if cg1 > 0 or cg12 > 0 else f_cause_G1.append(0) f_casue_G2.append( 1) if cg2 > 0 or cg21 > 0 else f_casue_G2.append(0) return comput_f(group1), comput_f(group2), comput_f(effect_B), comput_f( cause_B), comput_f(cause_G1), comput_f(casue_G2), comput_f( f_group1), comput_f(f_group2), comput_f(f_effect_B), comput_f( f_cause_B), comput_f(f_cause_G1), comput_f(f_casue_G2)
def statistsic_step18(predict_path, multi_answer_path, output): with open(multi_answer_path, 'r', encoding='utf-8') as fin: lines = fin.readlines() candidate_answer = {} for line in lines: line = json.loads(line) candidate_answer[line['id']] = line['answer_text'] writer = open(output, 'w+', encoding='utf-8') ignored_list = [ 'more', 'less', 'higher', 'lower', 'increase', 'decrease', 'high', 'low', 'harder', 'easier', 'increasing', 'decreasing', 'up', 'down', 'larger', 'smaller' ] cnt = 0 final_score = {} compare = 0 both_correct = 0 half_correct = 0 half_good = 0 same = 0 both_good = 0 both_wrong = 0 both_bad = 0 with open(predict_path, "r", encoding="utf-8") as reader: input_data = json.load(reader) for entry in input_data["data"]: for paragraph in entry["paragraphs"]: background = paragraph['background'] situation = paragraph['situation'] for qa in paragraph['qas']: qas_id = str(qa['id']) predict = qa['predicts'] candidate = candidate_answer[qas_id] first_can = candidate[0] second_can = candidate[1] o1_can1 = compute_f1(first_can, predict['object1']) o1_can2 = compute_f1(second_can, predict['object1']) o2_can1 = compute_f1(first_can, predict['object2']) o2_can2 = compute_f1(second_can, predict['object2']) cnt += 1 if qa["answers"][0]['text'] in ignored_list: compare += 1 continue # both are correct if (o1_can1 == 1 and o2_can2 == 1) or (o1_can2 == 1 and o2_can1 == 1): both_correct += 1 elif (o1_can1 == 1 and o2_can2 == 0) or (o1_can2 == 1 and o2_can1 == 0) or ( o1_can1 == 0 and o2_can2 == 1) or (o1_can2 == 0 and o2_can1 == 1): half_correct += 1 # if (o1_can1 <= 0.7 and o2_can2 <= 0.7 and o1_can1 >= 0.5 and o2_can2 >= 0.5) or (o1_can2 >= 0.5 and o2_can1 >= 0.5 and o1_can2 <= 0.7 and o2_can1 <= 0.7): # # both_good += 1 if (o1_can1 >= 0.5 and o2_can2 >= 0.5) or (o1_can2 >= 0.5 and o2_can1 >= 0.5): both_good += 1 elif (o1_can1 >= 0.5 and o2_can2 < 0.5) or (o1_can2 >= 0.5 and o2_can1 < 0.5) or ( o2_can2 >= 0.5 and o1_can1 < 0.5) or (o2_can1 >= 0.5 and o1_can2 < 0.5): half_good += 1 if (o1_can1 == 0 and o2_can2 == 0) and (o1_can2 == 0 and o2_can1 == 0): both_wrong += 1 if (o1_can1 < 0.5 and o2_can2 < 0.5) and (o1_can2 < 0.5 and o2_can1 < 0.5): both_bad += 1 qa['predicts']['f1'] = [o1_can1, o2_can1, o1_can2, o2_can2] out = { "background": background, "situation": situation, "qa": qa, } writer.write(json.dumps(out, indent=2)) writer.write("\n") writer.close() print(cnt, both_correct, both_good, half_correct, half_good, both_wrong, both_bad) print(compare)
def comput_scores(predict_path, multi_answer_path, output): with open(multi_answer_path, 'r', encoding='utf-8') as fin: lines = fin.readlines() candidate_answer = {} for line in lines: line = json.loads(line) candidate_answer[line['id']] = line['answer_text'] writer = open(output, 'w+', encoding='utf-8') ignored_list = [ 'more', 'less', 'higher', 'lower', 'increase', 'decrease', 'high', 'low', 'harder', 'easier', 'increasing', 'decreasing', 'up', 'down', 'larger', 'smaller' ] final_score = {} cnt = 0 both_correct = 0 half_correct = 0 half_good = 0 same = 0 both_good = 0 both_wrong = 0 both_bad = 0 with open(predict_path, "r", encoding="utf-8") as reader: input_data = json.load(reader) for entry in input_data["data"]: for paragraph in entry["paragraphs"]: background = paragraph['background'] situation = paragraph['situation'] for qa in paragraph['qas']: qas_id = str(qa['id']) predict = qa['predicts'] candidate = candidate_answer[qas_id] first_can = candidate[0] second_can = candidate[1] o1_can1 = compute_f1(first_can, predict['object1']) o1_can2 = compute_f1(second_can, predict['object1']) o2_can1 = compute_f1(first_can, predict['object2']) o2_can2 = compute_f1(second_can, predict['object2']) if qa["answers"][0]['text'] in ignored_list: cnt += 1 continue # both are correct if (o1_can1 == 1 and o2_can2 == 1) or (o1_can2 == 1 and o2_can1 == 1): both_correct += 1 if (o1_can1 >= 0.5 and o2_can2 >= 0.5) or (o1_can2 >= 0.5 and o2_can1 >= 0.5): both_good += 1 o1_f1 = max( max( compute_f1(answer, predict['object1']) for answer in candidate), compute_f1(qa["answers"][0]['text'], predict['object1'])) o2_f1 = max( max( compute_f1(answer, predict['object2']) for answer in candidate), compute_f1(qa["answers"][0]['text'], predict['object2'])) if (o1_f1 == 1 or o2_f1 == 1) and predict['object1'] != predict['object2']: continue if o1_f1 >= 0.5 and o2_f1 >= 0.5: continue final_score[qas_id] = [o1_f1, o2_f1] qa['predicts']['f1'] = [o1_f1, o2_f1] out = { "background": background, "situation": situation, "qa": qa, } writer.write(json.dumps(out, indent=2)) writer.write("\n") writer.close() print(cnt) print(same) print(both_correct) print(half_correct) print(both_good) print(half_good) return final_score
def read_predicted_file_comparative(predict_file, pred): question_word = ['would', 'will'] comparative_words = [ 'more', 'less', 'higher', 'lower', 'increase', 'decrease', 'harder', 'easier', 'increasing', 'decreasing', 'larger', 'smaller', 'better', 'worse', 'faster', 'slower', 'weaker', 'stronger', 'closer', 'farther', 'louder', 'quieter', 'correctly', 'incorrectly', 'not', 'yes', 'no', 'not' ] pairs = { 'more': 'less', 'higher': 'lower', 'increase': 'decrease', 'harder': 'easier', 'increasing': 'decreasing', 'larger': 'smaller', 'better': 'worse', 'faster': 'slower', 'stronger': 'weaker', 'closer': 'farther', 'louder': 'quieter', 'correctly': 'incorrectly', } filtered = [ 1912355095, 580926078, 2893035919, 3005625773, 4037359159, 1090395805, 1111432861, 1772822810, 1779114266, 3003444033, 1687481522, 373839753, 3209385804, 3128907286, 1746228971, 4088330372, 4227418477, 4047850835, 302402461, 603409309, 2849858743, 2050057087, 336287454, 1787516699, 184586157, 2999947384, 1202686909, 1223723965, 2335934912, 2677049981, 1182285725, 430326667, 4183507762, 1091390005, 1076054581, 3847534556, 4074158044, 955038082, 1423883267, 1626887498, 184177891, 1586058524, 156980418, 1607423284, 1608996147, 3071497657, 3865282641, 1588430868, 3876948090, 3344467660, 3346695902, 3600844723, 3227879116, 3484256179, 2934519278, 3876075109, 2041918832, 1115960569, 1108882681, 2013214064, 3895408229, 2975020526, 3885579536, 356328641, 985605428, 809437101, 494407430, 3999682891, 1562329124, 1505378340, 2022868633, 2022147737, 1729594789, 1730315685, 1082622173, 1205043665, 1081901277, 1204322769 ] filtered_id = [str(i) for i in filtered] cnt = {} prediction = {} f1 = [] pred_f1 = [] debug = {} with open(predict_file, "r", encoding="utf-8") as reader: input_data = json.load(reader) for entry in input_data["data"]: for paragraph in entry["paragraphs"]: paragraph_text = paragraph["background"] situation_text = paragraph['situation'] for qa in paragraph['qas']: candidate = [] qas_id = qa['id'] question_text = qa['question'] answer_text = qa['answers'][0]['text'] predicts = qa['predicts'] if not any(i == answer_text for i in comparative_words): continue if qas_id in filtered_id: continue # if any(question_text.lower().startswith(i) for i in question_word): # continue for key, val in pairs.items(): if key in question_text.lower( ) and val in question_text.lower(): candidate = [key, val] break if candidate != []: o1_ind = question_text.lower().find( remove_punc(predicts["object1"].lower())) o2_ind = question_text.lower().find( remove_punc(predicts["object2"].lower())) than_ind = question_text.lower().find("than") indx = [o1_ind, o2_ind, than_ind] if (o1_ind == -1 and o2_ind == -1) or o1_ind == o2_ind: # print(qas_id,question_text) continue elif o1_ind != -1 and o2_ind == -1: if than_ind != -1: if o1_ind < than_ind: o2_ind = 1000 elif o1_ind == -1 and o2_ind != -1: if than_ind != -1: if o2_ind < than_ind: o1_ind = 1000 if o1_ind < o2_ind: if int(predicts['TP_relevance']) == 0: prediction[qas_id] = [ compute_f1(answer_text, candidate[0]), candidate[0], candidate, indx ] f1.append(compute_f1(answer_text, candidate[0])) else: prediction[qas_id] = [ compute_f1(answer_text, candidate[1]), candidate[1], candidate, indx ] f1.append(compute_f1(answer_text, candidate[1])) else: if int(predicts['TP_relevance']) == 0: prediction[qas_id] = [ compute_f1(answer_text, candidate[1]), candidate[1], candidate, indx ] f1.append(compute_f1(answer_text, candidate[1])) else: prediction[qas_id] = [ compute_f1(answer_text, candidate[0]), candidate[0], candidate, indx ] f1.append(compute_f1(answer_text, candidate[0])) pred_f1.append(compute_f1(answer_text, pred[qas_id])) cnt[qas_id] = [question_text, candidate] return pred_f1, f1, prediction
def read_predicted_file(predict_file, pred): Object_question_words = { "which", 'in which', 'whose', 'who', 'what', 'for which', 'on which' } comparative_words = [ 'more', 'less', 'higher', 'lower', 'increase', 'decrease', 'high', 'low', 'harder', 'easier', 'increasing', 'decreasing', 'up', 'down', 'larger', 'smaller', 'better', 'worse', 'faster', 'slower', 'weaker', 'stronger', 'closer', 'farther', 'louder', 'quieter', 'correctly', 'incorrectly', 'not', 'yes', 'no', 'not' ] positive_words = [ 'more', 'higher', 'increase', 'high', 'harder', 'increasing', 'up', 'larger', 'better', 'faster', 'stronger', 'closer', 'louder', 'correctly' ] negative_words = [ 'less', 'lower', 'decrease', 'low', 'easier', 'decreasing', 'down', 'smaller', 'worse', 'slower', 'weaker', 'farther', 'quieter', 'incorrectly', 'fewer', 'not', 'avoid' ] all_object_scores = [] record = {} pred_f1 = [] filtered = [ 1912355095, 580926078, 2893035919, 3005625773, 4037359159, 1090395805, 1111432861, 1772822810, 1779114266, 3003444033, 1687481522, 373839753, 3209385804, 3128907286, 1746228971, 4088330372, 4227418477, 4047850835, 302402461, 603409309, 2849858743, 2050057087, 336287454, 1787516699, 184586157, 2999947384, 1202686909, 1223723965, 2335934912, 2677049981, 1182285725, 430326667, 4183507762, 1091390005, 1076054581, 3847534556, 4074158044, 955038082, 1423883267, 1626887498, 184177891, 1586058524, 156980418, 1607423284, 1608996147, 3071497657, 3865282641, 1588430868, 3876948090, 3344467660, 3346695902, 3600844723, 3227879116, 3484256179, 2934519278, 3876075109, 2041918832, 1115960569, 1108882681, 2013214064, 3895408229, 2975020526, 3885579536, 356328641, 985605428, 809437101, 494407430, 3999682891, 1562329124, 1505378340, 2022868633, 2022147737, 1729594789, 1730315685, 1082622173, 1205043665, 1081901277, 1204322769 ] filtered_id = [str(i) for i in filtered] with open(predict_file, "r", encoding="utf-8") as reader: input_data = json.load(reader) for entry in input_data["data"]: for paragraph in entry["paragraphs"]: paragraph_text = paragraph["background"] situation_text = paragraph['situation'] for qa in paragraph['qas']: qas_id = qa['id'] question_text = qa['question'] answer_text = qa['answers'][0]['text'] predicts = qa['predicts'] if any(i == answer_text for i in comparative_words): continue if qas_id in filtered_id: continue if any(question_text.lower().startswith(i) for i in Object_question_words): if max(compute_f1(answer_text, predicts['object1']), compute_f1(answer_text, predicts['object2'])) == 0.0: continue predicts_answer = make_choice_pos(predicts) for word in positive_words: if word in question_text.lower(): predicts_answer = make_choice_pos(predicts) break for word in negative_words: if word in question_text.lower(): if word not in predicts['TP in back'].lower(): predicts_answer = make_choice_neg(predicts) # print(qas_id,predicts_answer) break f1 = compute_f1(remove_punc(answer_text), remove_punc(predicts_answer)) pred_f1.append( compute_f1(remove_punc(answer_text), remove_punc(pred[qas_id]))) record[qas_id] = [f1, answer_text, predicts_answer] all_object_scores.append(f1) else: continue return all_object_scores, record, pred_f1