Python compute_f1示例，transformers.data.metrics.squad_metrics.compute_f1 Python示例

示例#1

0

显示文件

    def compute_scores(gold_list, pred):
        # tests for exact match and on the normalised answer (compute_exact)
        # test for overlap (compute_f1)
        f1_sum = 0.0
        em_sum = 0.0
        if len(gold_list) > 1:
            for i in range(len(gold_list)):
                gold_answers = gold_list[0:i] + gold_list[i + 1:]
                # predictions compared against (n) golds and take maximum
                em_sum += max(squad_metrics.compute_exact(a, pred) for a in gold_answers)
                f1_sum += max(squad_metrics.compute_f1(a, pred) for a in gold_answers)
        else:
            em_sum += max(squad_metrics.compute_exact(a, pred) for a in gold_list)
            f1_sum += max(squad_metrics.compute_f1(a, pred) for a in gold_list)

        return {'em': em_sum / max(1, len(gold_list)), 'f1': f1_sum / max(1, len(gold_list))}

示例#2

0

显示文件

def comput_modified_f1(data, pred):
    f1 = []
    em = []
    with open(data, "r", encoding="utf-8") as reader:
        input_data = json.load(reader)
    for entry in input_data["data"]:
        for paragraph in entry["paragraphs"]:
            # paragraph_text = paragraph["background"]
            situation_text = paragraph['situation']
            for qa in paragraph['qas']:
                qas_id = qa['id']
                question_text = qa['question']
                answer_text = qa['answers'][0]['text']
                pred_ans = pred[qas_id]
                answer_text = answer_text[5:] if answer_text.startswith(
                    "Team") else answer_text
                answer_text = answer_text[4:] if answer_text.startswith(
                    "Day") else answer_text
                answer_text = answer_text[:-2].strip() if answer_text.endswith(
                    "PM") else answer_text
                answer_text = answer_text[:-2].strip() if answer_text.endswith(
                    "AM") else answer_text

                pred_ans = pred_ans[5:] if pred_ans.startswith(
                    "Team") else pred_ans
                pred_ans = pred_ans[4:] if pred_ans.startswith(
                    "Day") else pred_ans
                pred_ans = pred_ans[:-2].strip() if pred_ans.endswith(
                    "PM") else pred_ans
                pred_ans = pred_ans[:-2].strip() if pred_ans.endswith(
                    "AM") else pred_ans
                f1.append(compute_f1(answer_text, pred_ans))
                em.append(compute_exact(answer_text, pred_ans))
    return sum(f1) / len(f1), sum(em) / len(em)

示例#3

0

显示文件

def check_f1(data, pred):
    f1 = []
    filtered = [
        1912355095, 580926078, 2893035919, 3005625773, 4037359159, 1090395805,
        1111432861, 1772822810, 1779114266, 3003444033, 1687481522, 373839753,
        3209385804, 3128907286, 1746228971, 4088330372, 4227418477, 4047850835,
        302402461, 603409309, 2849858743, 2050057087, 336287454, 1787516699,
        184586157, 2999947384, 1202686909, 1223723965, 2335934912, 2677049981,
        1182285725, 430326667, 4183507762, 1091390005, 1076054581, 3847534556,
        4074158044, 955038082, 1423883267, 1626887498, 184177891, 1586058524,
        156980418, 1607423284, 1608996147, 3071497657, 3865282641, 1588430868,
        3876948090, 3344467660, 3346695902, 3600844723, 3227879116, 3484256179,
        2934519278, 3876075109, 2041918832, 1115960569, 1108882681, 2013214064,
        3895408229, 2975020526, 3885579536, 356328641, 985605428, 809437101,
        494407430, 3999682891, 1562329124, 1505378340, 2022868633, 2022147737,
        1729594789, 1730315685, 1082622173, 1205043665, 1081901277, 1204322769
    ]
    filtered_id = [str(i) for i in filtered]
    em = []
    with open(data, "r", encoding="utf-8") as reader:
        input_data = json.load(reader)
    for entry in input_data["data"]:
        for paragraph in entry["paragraphs"]:
            # paragraph_text = paragraph["background"]
            situation_text = paragraph['situation']
            for qa in paragraph['qas']:
                qas_id = qa['id']
                question_text = qa['question']
                answer_text = qa['answers'][0]['text']
                if qas_id in filtered_id:
                    continue
                else:
                    f1.append(compute_f1(answer_text, pred[qas_id]))
                    em.append(compute_exact(answer_text, pred[qas_id]))
    return f1, em

示例#4

0

显示文件

文件： nq_open.py 项目： yuvalkirstain/lm-evaluation-harness

    def compute_scores(gold_list, pred):
        # tests for exact match and on the normalised answer (compute_exact)
        # test for overlap (compute_f1)
        em = max(squad_metrics.compute_exact(a, pred) for a in gold_list)
        f1 = max(squad_metrics.compute_f1(a, pred) for a in gold_list)

        return {'em': em * 100, 'f1': f1 * 100}

示例#5

0

显示文件

def read_predicted_file_comparative_nn(predict_file, pred):
    question_word = ['would', 'will']
    comparative_words = [
        'more', 'less', 'higher', 'lower', 'increase', 'decrease', 'harder',
        'easier', 'increasing', 'decreasing', 'larger', 'smaller', 'better',
        'worse', 'faster', 'slower', 'weaker', 'stronger', 'closer', 'farther',
        'louder', 'quieter', 'correctly', 'incorrectly', 'not', 'yes', 'no',
        'not'
    ]
    pairs = {
        'more': 'less',
        'higher': 'lower',
        'increase': 'decrease',
        'harder': 'easier',
        'increasing': 'decreasing',
        'larger': 'smaller',
        'better': 'worse',
        'faster': 'slower',
        'stronger': 'weaker',
        'closer': 'farther',
        'louder': 'quieter',
        'correctly': 'incorrectly',
    }

    cnt = {}
    prediction = {}
    f1 = []
    pred_f1 = []
    debug = {}
    with open(predict_file, "r", encoding="utf-8") as reader:
        input_data = json.load(reader)
    for entry in input_data["data"]:
        for paragraph in entry["paragraphs"]:
            paragraph_text = paragraph["background"]
            situation_text = paragraph['situation']
            for qa in paragraph['qas']:
                candidate = []
                qas_id = qa['id']
                question_text = qa['question']
                answer_text = qa['answers'][0]['text']
                predicts = qa['predicts']
                if not any(i == answer_text for i in comparative_words):
                    continue
                # if any(question_text.lower().startswith(i) for i in question_word):
                #     continue
                for key, val in pairs.items():
                    if key in question_text.lower(
                    ) and val in question_text.lower():
                        candidate = [key, val]
                        break
                if candidate != []:
                    pred_f1.append(compute_f1(answer_text, pred[qas_id]))

    return pred_f1

示例#6

0

显示文件

def read_predicted_file_nn(predict_file, pred):
    Object_question_words = {
        "which", 'in which', 'whose', 'who', 'what', 'for which', 'on which'
    }
    comparative_words = [
        'more', 'less', 'higher', 'lower', 'increase', 'decrease', 'high',
        'low', 'harder', 'easier', 'increasing', 'decreasing', 'up', 'down',
        'larger', 'smaller', 'better', 'worse', 'faster', 'slower', 'weaker',
        'stronger', 'closer', 'farther', 'louder', 'quieter', 'correctly',
        'incorrectly', 'not', 'yes', 'no', 'not'
    ]
    positive_words = [
        'more', 'higher', 'increase', 'high', 'harder', 'increasing', 'up',
        'larger', 'better', 'faster', 'stronger', 'closer', 'louder',
        'correctly'
    ]
    negative_words = [
        'less', 'lower', 'decrease', 'low', 'easier', 'decreasing', 'down',
        'smaller', 'worse', 'slower', 'weaker', 'farther', 'quieter',
        'incorrectly', 'fewer', 'not', 'avoid'
    ]
    all_object_scores = []
    record = {}
    pred_f1 = []
    with open(predict_file, "r", encoding="utf-8") as reader:
        input_data = json.load(reader)
    for entry in input_data["data"]:
        for paragraph in entry["paragraphs"]:
            paragraph_text = paragraph["background"]
            situation_text = paragraph['situation']
            for qa in paragraph['qas']:
                qas_id = qa['id']
                question_text = qa['question']
                answer_text = qa['answers'][0]['text']
                predicts = qa['predicts']
                if any(i == answer_text for i in comparative_words):
                    continue
                if any(question_text.lower().startswith(i)
                       for i in Object_question_words):
                    # if max(compute_f1(answer_text, predicts['object1']),
                    #        compute_f1(answer_text, predicts['object2'])) == 0.0:
                    #     continue

                    predicts_answer = pred[qas_id]
                    # f1 = compute_f1(remove_punc(answer_text),remove_punc(predicts_answer))
                    pred_f1.append(
                        compute_f1(remove_punc(answer_text),
                                   remove_punc(pred[qas_id])))
                    # record[qas_id] = [f1,answer_text,predicts_answer]
                    # all_object_scores.append(f1)
                else:
                    continue
    return all_object_scores, record, pred_f1

示例#7

0

显示文件

文件： compute_metrics.py 项目： urpeter/ODQA_Bert_Project

def compute_metrics_from_nbest(quasar_dir, split, fname_nbest_preds):
    qid2preds = collections.defaultdict(list)

    with open(fname_nbest_preds) as rf:
        preds = json.load(rf)
        for uid in preds:
            # print(uid)
            qid, idx = uid.split("_")
            # print(qid)
            # Take only non-empty predictions
            pred = [p for p in preds[uid] if p["text"]][0]
            # pred = preds[uid][0]  # sorted by probs (we are taking best one here)
            ans, score = pred["text"], pred["probability"]
            qid2preds[qid].append((ans, score))

    preds_qid2ans = dict()

    for qid, ans in qid2preds.items():
        # select best answer from all paragraphs
        ans, _ = sorted(ans, key=lambda x: x[1], reverse=True)[0]
        preds_qid2ans[qid] = ans

    print(preds_qid2ans)

    gold_qid2ans = dict()

    quasar_data = os.path.join(quasar_dir, split + "_questions.json")
    # quasar_data = os.path.join(quasar_dir, split + ".json")

    with open(quasar_data, 'r') as qa_data:
        for line in qa_data:
            data = json.loads(line)
            gold_qid2ans[data['uid']] = data['answer']

    # with open(quasar_data) as qa_data:
    #     data = json.load(qa_data)
    #     data_list = data['data'][0]['paragraphs']
    #     pred_keys = preds_qid2ans.keys()
    #     print(pred_keys)
    #     for p0 in data_list:
    #         try:
    #             # if p0['qas'][0]['id'] in pred_keys:
    #             gold_qid2ans[p0['qas'][0]['id']] = p0['qas'][0]['answers'][0]['text']
    #         except IndexError:
    #             continue

    print((len(preds_qid2ans.keys())), len(gold_qid2ans.keys()))

    qid2f1 = dict()
    qid2em = dict()

    counter = 0
    for qid in preds_qid2ans.keys():
        try:
            a_pred = preds_qid2ans[qid]
            a_gold = gold_qid2ans[qid]
            # print('pred: ', a_pred, 'gold: ', a_gold)
            qid2f1[qid] = compute_f1(a_gold, a_pred)
            qid2em[qid] = compute_exact(a_gold, a_pred)
        except KeyError:
            counter += 1
            # print()
            continue

    print(counter)

    f1 = sum(list(qid2f1.values())) / len(qid2f1)
    f1 *= 100
    em = sum(list(qid2em.values())) / len(qid2em)
    em *= 100

    metrics = {"f1": f1, "em": em}

    return metrics

示例#8

0

显示文件

文件： test.py 项目： zolekode/simpletransformers

def f1(truths, preds):
    return mean(
        [compute_f1(truth, pred) for truth, pred in zip(truths, preds)])

示例#9

0

显示文件

def make_find_answer_by_rule(no_label_synthetic, output):
    Object_question_words = {
        "which", 'in which', 'whose', 'who', 'what', 'for which', 'on which',
        " which", 'when', 'at which', 'where', 'during which', 'when'
    }
    positive_words = [
        'more', 'higher', 'increase', 'high', 'harder', 'increasing', 'up',
        'larger', 'better', 'faster', 'stronger', 'closer', 'louder',
        'correctly'
    ]
    negative_words = [
        'less', 'lower', 'decrease', 'low', 'easier', 'decreasing', 'down',
        'smaller', 'worse', 'slower', 'weaker', 'farther', 'quieter',
        'incorrectly', 'fewer', 'not', 'avoid'
    ]
    comparative_words = [
        'more', 'less', 'higher', 'lower', 'increase', 'decrease', 'harder',
        'easier', 'increasing', 'decreasing', 'larger', 'smaller', 'better',
        'worse', 'faster', 'slower', 'weaker', 'stronger', 'closer', 'farther',
        'louder', 'quieter', 'correctly', 'incorrectly', 'not', 'yes', 'no',
        'not'
    ]
    pairs = {
        'more': 'less',
        'higher': 'lower',
        'increase': 'decrease',
        'harder': 'easier',
        'increasing': 'decreasing',
        'larger': 'smaller',
        'better': 'worse',
        'faster': 'slower',
        'stronger': 'weaker',
        'closer': 'farther',
        'louder': 'quieter',
        'correctly': 'incorrectly',
        'increased': "reduced",
        "warmer": "colder",
        'high': 'low',
        'turn on': "turn off",
        'rise': 'fall',
        'up': 'down',
        'longer': 'shorter',
        'deeper': "shallower",
        'positively': 'negatively',
    }
    final_answer = {}
    f1 = []
    unsolved = []
    exact = []
    with open(no_label_synthetic, "r", encoding="utf-8") as reader:
        input_data = json.load(reader)
    for entry in input_data["data"]:
        for paragraph in entry["paragraphs"]:
            paragraph_text = paragraph["background"]
            situation_text = paragraph['situation']
            for qa in paragraph['qas']:
                qas_id = qa['id']
                question_text = qa['question']
                try:
                    answer_text = qa['answers'][0]['text']
                except:
                    answer_text = "null"
                predicts = qa['predicts']

                # Object Type:
                if any(question_text.lower().startswith(i)
                       for i in Object_question_words):
                    predicts_answer = make_choice_pos(predicts)
                    for word in positive_words:
                        if word in question_text.lower():
                            predicts_answer = make_choice_pos(predicts)
                            break
                    for word in negative_words:
                        if word in question_text.lower():
                            if word not in predicts['TP in back'].lower():
                                predicts_answer = make_choice_neg(predicts)
                                break
                    f1.append(
                        compute_f1(remove_punc(answer_text),
                                   remove_punc(predicts_answer)))

                else:
                    candidate = []
                    for key, val in pairs.items():
                        if key in question_text.lower(
                        ) and val in question_text.lower():
                            candidate = [key, val]
                            break
                    # comparative
                    if candidate:
                        o1_ind = question_text.lower().find(
                            remove_punc(predicts["object1"].lower()))
                        o2_ind = question_text.lower().find(
                            remove_punc(predicts["object2"].lower()))
                        than_ind = question_text.lower().find("than")
                        indx = [o1_ind, o2_ind, than_ind]

                        # if (o1_ind == -1 and o2_ind == -1) or o1_ind == o2_ind:
                        #     # print(qas_id,question_text)
                        #     continue
                        if o1_ind != -1 and o2_ind == -1:
                            if than_ind != -1:
                                if o1_ind < than_ind:
                                    o2_ind = 1000
                        elif o1_ind == -1 and o2_ind != -1:
                            if than_ind != -1:
                                if o2_ind < than_ind:
                                    o1_ind = 1000

                        if o1_ind < o2_ind:
                            if int(predicts['TP_relevance']) == 0:
                                predicts_answer = candidate[0]
                                f1.append(compute_f1(answer_text,
                                                     candidate[0]))
                            else:
                                predicts_answer = candidate[1]
                                f1.append(compute_f1(answer_text,
                                                     candidate[1]))
                        else:
                            if int(predicts['TP_relevance']) == 0:
                                predicts_answer = candidate[1]
                                f1.append(compute_f1(answer_text,
                                                     candidate[1]))
                            else:
                                predicts_answer = candidate[0]
                                f1.append(compute_f1(answer_text,
                                                     candidate[0]))

                    else:
                        unsolved.append([qas_id, question_text, answer_text])

                        predicts_answer = predicts["object1"] + predicts[
                            "object2"]
                        # +predicts["SP_object1"]+predicts["SP_object2"]
                        than_ind = question_text.lower().find("or")
                        o1_f1 = compute_f1(question_text, predicts["object1"])
                        o2_f1 = compute_f1(question_text, predicts["object2"])
                        sp_o1_f1 = compute_f1(question_text,
                                              predicts["SP_object1"])
                        sp_o2_f1 = compute_f1(question_text,
                                              predicts["SP_object2"])
                        posssible_answer = [
                            predicts["object1"], predicts["object2"],
                            predicts["SP_object1"], predicts["SP_object2"]
                        ]
                        predicts_answer = posssible_answer[[
                            o1_f1, o2_f1, sp_o1_f1, sp_o2_f1
                        ].index(max([o1_f1, o2_f1, sp_o1_f1, sp_o2_f1]))]
                        f1.append(
                            compute_f1(remove_punc(answer_text),
                                       predicts_answer))
                predicts_answer = remove_punc(predicts_answer)
                final_answer[qas_id] = predicts_answer
                exact.append(
                    compute_exact(remove_punc(answer_text),
                                  remove_punc(predicts_answer)))
    with open(output, "w+") as writer:
        writer.write(json.dumps(final_answer, indent=4) + "\n")
    return final_answer, f1, exact

示例#10

0

显示文件

def make_find_answer_by_rule_uncovered(no_label_synthetic, output):
    Object_question_words = {
        "which", 'in which', 'whose', 'who', 'what', 'for which', 'on which',
        " which", 'when', 'at which', 'where', 'during which', 'when'
    }
    positive_words = [
        'more', 'higher', 'increase', 'high', 'harder', 'increasing', 'up',
        'larger', 'better', 'faster', 'stronger', 'closer', 'louder',
        'correctly'
    ]
    negative_words = [
        'less', 'lower', 'decrease', 'low', 'easier', 'decreasing', 'down',
        'smaller', 'worse', 'slower', 'weaker', 'farther', 'quieter',
        'incorrectly', 'fewer', 'not', 'avoid'
    ]
    comparative_words = [
        'more', 'less', 'higher', 'lower', 'increase', 'decrease', 'harder',
        'easier', 'increasing', 'decreasing', 'larger', 'smaller', 'better',
        'worse', 'faster', 'slower', 'weaker', 'stronger', 'closer', 'farther',
        'louder', 'quieter', 'correctly', 'incorrectly', 'not', 'yes', 'no',
        'not'
    ]
    pairs = {
        'more': 'less',
        'higher': 'lower',
        'increase': 'decrease',
        'harder': 'easier',
        'increasing': 'decreasing',
        'larger': 'smaller',
        'better': 'worse',
        'faster': 'slower',
        'stronger': 'weaker',
        'closer': 'farther',
        'louder': 'quieter',
        'correctly': 'incorrectly',
        'increased': "reduced",
        "warmer": "colder",
        'high': 'low',
        'turn on': "turn off",
        'rise': 'fall',
        'up': 'down',
        'longer': 'shorter',
        'deeper': "shallower",
        'positively': 'negatively',
    }
    filtered = [
        1912355095, 580926078, 2893035919, 3005625773, 4037359159, 1090395805,
        1111432861, 1772822810, 1779114266, 3003444033, 1687481522, 373839753,
        3209385804, 3128907286, 1746228971, 4088330372, 4227418477, 4047850835,
        302402461, 603409309, 2849858743, 2050057087, 336287454, 1787516699,
        184586157, 2999947384, 1202686909, 1223723965, 2335934912, 2677049981,
        1182285725, 430326667, 4183507762, 1091390005, 1076054581, 3847534556,
        4074158044, 955038082, 1423883267, 1626887498, 184177891, 1586058524,
        156980418, 1607423284, 1608996147, 3071497657, 3865282641, 1588430868,
        3876948090, 3344467660, 3346695902, 3600844723, 3227879116, 3484256179,
        2934519278, 3876075109, 2041918832, 1115960569, 1108882681, 2013214064,
        3895408229, 2975020526, 3885579536, 356328641, 985605428, 809437101,
        494407430, 3999682891, 1562329124, 1505378340, 2022868633, 2022147737,
        1729594789, 1730315685, 1082622173, 1205043665, 1081901277, 1204322769
    ]
    filtered_id = [str(i) for i in filtered]
    final_answer = {}
    f1 = []
    unsolved = []
    exact = []
    # writer_ref = open(os.path.join(output, "refs_filtered.txt"), 'w+', encoding='utf-8')
    # writer_hyps = open(os.path.join(out,"hyps_a_1_base.txt"),'w+',encoding='utf-8')
    writer_hyps = open(os.path.join(output, "hyps_rule_based"),
                       'w+',
                       encoding='utf-8')
    with open(no_label_synthetic, "r", encoding="utf-8") as reader:
        input_data = json.load(reader)
    for entry in input_data["data"]:
        for paragraph in entry["paragraphs"]:
            paragraph_text = paragraph["background"]
            situation_text = paragraph['situation']
            for qa in paragraph['qas']:
                qas_id = qa['id']
                question_text = qa['question']
                try:
                    answer_text = qa['answers'][0]['text']
                except:
                    answer_text = "null"
                predicts = qa['predicts']
                # if qas_id in filtered_id:
                #     continue
                # Object Type:
                if any(question_text.lower().startswith(i)
                       for i in Object_question_words):
                    predicts_answer = make_choice_pos(predicts)
                    for word in positive_words:
                        if word in question_text.lower():
                            predicts_answer = make_choice_pos(predicts)
                            break
                    for word in negative_words:
                        if word in question_text.lower():
                            if word not in predicts['TP in back'].lower():
                                predicts_answer = make_choice_neg(predicts)
                                break
                    f1.append(
                        compute_f1(remove_punc(answer_text),
                                   remove_punc(predicts_answer)))

                else:
                    candidate = []
                    for key, val in pairs.items():
                        if key in question_text.lower(
                        ) and val in question_text.lower():
                            candidate = [key, val]
                            break
                    # comparative
                    if candidate:
                        o1_ind = question_text.lower().find(
                            remove_punc(predicts["object1"].lower()))
                        o2_ind = question_text.lower().find(
                            remove_punc(predicts["object2"].lower()))
                        than_ind = question_text.lower().find("than")
                        indx = [o1_ind, o2_ind, than_ind]

                        # if (o1_ind == -1 and o2_ind == -1) or o1_ind == o2_ind:
                        #     # print(qas_id,question_text)
                        #     continue
                        if o1_ind != -1 and o2_ind == -1:
                            if than_ind != -1:
                                if o1_ind < than_ind:
                                    o2_ind = 1000
                        elif o1_ind == -1 and o2_ind != -1:
                            if than_ind != -1:
                                if o2_ind < than_ind:
                                    o1_ind = 1000

                        if o1_ind < o2_ind:
                            if int(predicts['TP_relevance']) == 0:
                                predicts_answer = candidate[0]
                                f1.append(compute_f1(answer_text,
                                                     candidate[0]))
                            else:
                                predicts_answer = candidate[1]
                                f1.append(compute_f1(answer_text,
                                                     candidate[1]))
                        else:
                            if int(predicts['TP_relevance']) == 0:
                                predicts_answer = candidate[1]
                                f1.append(compute_f1(answer_text,
                                                     candidate[1]))
                            else:
                                predicts_answer = candidate[0]
                                f1.append(compute_f1(answer_text,
                                                     candidate[0]))

                    else:
                        unsolved.append([qas_id, question_text, answer_text])

                        predicts_answer = predicts["object1"] + predicts[
                            "object2"]
                        # +predicts["SP_object1"]+predicts["SP_object2"]
                        than_ind = question_text.lower().find("or")
                        o1_f1 = compute_f1(question_text, predicts["object1"])
                        o2_f1 = compute_f1(question_text, predicts["object2"])
                        sp_o1_f1 = compute_f1(question_text,
                                              predicts["SP_object1"])
                        sp_o2_f1 = compute_f1(question_text,
                                              predicts["SP_object2"])
                        posssible_answer = [
                            predicts["object1"], predicts["object2"],
                            predicts["SP_object1"], predicts["SP_object2"]
                        ]
                        predicts_answer = posssible_answer[[
                            o1_f1, o2_f1, sp_o1_f1, sp_o2_f1
                        ].index(max([o1_f1, o2_f1, sp_o1_f1, sp_o2_f1]))]
                        f1.append(
                            compute_f1(remove_punc(answer_text),
                                       predicts_answer))
                predicts_answer = remove_punc(predicts_answer)
                ref2 = answer_text
                # writer_ref.write(ref2)
                # writer_ref.write('\n')
                writer_hyps.write(predicts_answer)
                writer_hyps.write('\n')
                final_answer[qas_id] = predicts_answer
                exact.append(
                    compute_exact(remove_punc(answer_text),
                                  remove_punc(predicts_answer)))
    # writer_ref.close()
    writer_hyps.close()
    return final_answer, f1, exact

示例#11

0

显示文件

def fuzzy_f1(data):
    def comput_f(a):
        return sum(a) / len(a)

    group1 = []
    group2 = []
    effect_B = []
    cause_B = []
    cause_G1 = []
    casue_G2 = []
    f_group1 = []
    f_group2 = []
    f_effect_B = []
    f_cause_B = []
    f_cause_G1 = []
    f_casue_G2 = []
    with open(data, "r", encoding="utf-8") as reader:
        input_data = json.load(reader)
    for entry in input_data["data"]:
        for paragraph in entry["paragraphs"]:
            for qa in paragraph['qas']:
                predict = qa['predicts']
                g1 = compute_f1(predict['object1']['label'],
                                predict['object1']['predict'])
                g2 = compute_f1(predict['object2']['label'],
                                predict['object2']['predict'])
                effect = compute_f1(predict['TP in back']['label'],
                                    predict['TP in back']['predict'])
                cb = compute_f1(predict['SP in back']['label'],
                                predict['SP in back']['predict'])
                cg1 = compute_f1(predict['SP_object1']['label'],
                                 predict['SP_object1']['predict'])
                cg2 = compute_f1(predict['SP_object2']['label'],
                                 predict['SP_object2']['predict'])

                g12 = compute_f1(predict['object1']['label'],
                                 predict['object2']['predict'])
                g21 = compute_f1(predict['object2']['label'],
                                 predict['object1']['predict'])
                cg12 = compute_f1(predict['SP_object1']['label'],
                                  predict['SP_object2']['predict'])
                cg21 = compute_f1(predict['SP_object2']['label'],
                                  predict['SP_object1']['predict'])
                group1.append(g1) if g1 > g12 else group1.append(g12)
                group2.append(g2) if g2 > g21 else group2.append(g21)
                effect_B.append(effect)
                cause_B.append(cb)
                cause_G1.append(cg1)
                casue_G2.append(cg2)
                f_group1.append(1) if g1 > 0 or g12 > 0 else f_group1.append(0)
                f_group2.append(1) if g2 > 0 or g21 > 0 else f_group2.append(0)
                f_effect_B.append(1) if effect > 0 else f_effect_B.append(0)
                f_cause_B.append(1) if cb > 0 else f_cause_B.append(0)
                f_cause_G1.append(
                    1) if cg1 > 0 or cg12 > 0 else f_cause_G1.append(0)
                f_casue_G2.append(
                    1) if cg2 > 0 or cg21 > 0 else f_casue_G2.append(0)

    return comput_f(group1), comput_f(group2), comput_f(effect_B), comput_f(
        cause_B), comput_f(cause_G1), comput_f(casue_G2), comput_f(
            f_group1), comput_f(f_group2), comput_f(f_effect_B), comput_f(
                f_cause_B), comput_f(f_cause_G1), comput_f(f_casue_G2)

示例#12

0

显示文件

def statistsic_step18(predict_path, multi_answer_path, output):
    with open(multi_answer_path, 'r', encoding='utf-8') as fin:
        lines = fin.readlines()
    candidate_answer = {}
    for line in lines:
        line = json.loads(line)
        candidate_answer[line['id']] = line['answer_text']

    writer = open(output, 'w+', encoding='utf-8')
    ignored_list = [
        'more', 'less', 'higher', 'lower', 'increase', 'decrease', 'high',
        'low', 'harder', 'easier', 'increasing', 'decreasing', 'up', 'down',
        'larger', 'smaller'
    ]
    cnt = 0
    final_score = {}
    compare = 0
    both_correct = 0
    half_correct = 0
    half_good = 0
    same = 0
    both_good = 0
    both_wrong = 0
    both_bad = 0
    with open(predict_path, "r", encoding="utf-8") as reader:
        input_data = json.load(reader)
    for entry in input_data["data"]:
        for paragraph in entry["paragraphs"]:
            background = paragraph['background']
            situation = paragraph['situation']
            for qa in paragraph['qas']:
                qas_id = str(qa['id'])
                predict = qa['predicts']
                candidate = candidate_answer[qas_id]

                first_can = candidate[0]
                second_can = candidate[1]
                o1_can1 = compute_f1(first_can, predict['object1'])
                o1_can2 = compute_f1(second_can, predict['object1'])
                o2_can1 = compute_f1(first_can, predict['object2'])
                o2_can2 = compute_f1(second_can, predict['object2'])

                cnt += 1
                if qa["answers"][0]['text'] in ignored_list:
                    compare += 1
                    continue

                # both are correct
                if (o1_can1 == 1 and o2_can2 == 1) or (o1_can2 == 1
                                                       and o2_can1 == 1):
                    both_correct += 1
                elif (o1_can1 == 1
                      and o2_can2 == 0) or (o1_can2 == 1 and o2_can1 == 0) or (
                          o1_can1 == 0 and o2_can2 == 1) or (o1_can2 == 0
                                                             and o2_can1 == 1):
                    half_correct += 1

                # if (o1_can1 <= 0.7 and o2_can2 <= 0.7 and o1_can1 >= 0.5 and o2_can2 >= 0.5) or (o1_can2 >= 0.5 and o2_can1 >= 0.5 and o1_can2 <= 0.7 and o2_can1 <= 0.7):
                #     # both_good += 1

                if (o1_can1 >= 0.5 and o2_can2 >= 0.5) or (o1_can2 >= 0.5
                                                           and o2_can1 >= 0.5):
                    both_good += 1
                elif (o1_can1 >= 0.5 and
                      o2_can2 < 0.5) or (o1_can2 >= 0.5 and o2_can1 < 0.5) or (
                          o2_can2 >= 0.5
                          and o1_can1 < 0.5) or (o2_can1 >= 0.5
                                                 and o1_can2 < 0.5):
                    half_good += 1

                if (o1_can1 == 0 and o2_can2 == 0) and (o1_can2 == 0
                                                        and o2_can1 == 0):
                    both_wrong += 1

                if (o1_can1 < 0.5 and o2_can2 < 0.5) and (o1_can2 < 0.5
                                                          and o2_can1 < 0.5):
                    both_bad += 1
                    qa['predicts']['f1'] = [o1_can1, o2_can1, o1_can2, o2_can2]
                    out = {
                        "background": background,
                        "situation": situation,
                        "qa": qa,
                    }
                    writer.write(json.dumps(out, indent=2))
                    writer.write("\n")

    writer.close()

    print(cnt, both_correct, both_good, half_correct, half_good, both_wrong,
          both_bad)
    print(compare)

示例#13

0

显示文件

def comput_scores(predict_path, multi_answer_path, output):
    with open(multi_answer_path, 'r', encoding='utf-8') as fin:
        lines = fin.readlines()
    candidate_answer = {}
    for line in lines:
        line = json.loads(line)
        candidate_answer[line['id']] = line['answer_text']

    writer = open(output, 'w+', encoding='utf-8')
    ignored_list = [
        'more', 'less', 'higher', 'lower', 'increase', 'decrease', 'high',
        'low', 'harder', 'easier', 'increasing', 'decreasing', 'up', 'down',
        'larger', 'smaller'
    ]
    final_score = {}
    cnt = 0
    both_correct = 0
    half_correct = 0
    half_good = 0
    same = 0
    both_good = 0
    both_wrong = 0
    both_bad = 0
    with open(predict_path, "r", encoding="utf-8") as reader:
        input_data = json.load(reader)
    for entry in input_data["data"]:
        for paragraph in entry["paragraphs"]:
            background = paragraph['background']
            situation = paragraph['situation']
            for qa in paragraph['qas']:
                qas_id = str(qa['id'])
                predict = qa['predicts']
                candidate = candidate_answer[qas_id]

                first_can = candidate[0]
                second_can = candidate[1]
                o1_can1 = compute_f1(first_can, predict['object1'])
                o1_can2 = compute_f1(second_can, predict['object1'])
                o2_can1 = compute_f1(first_can, predict['object2'])
                o2_can2 = compute_f1(second_can, predict['object2'])

                if qa["answers"][0]['text'] in ignored_list:
                    cnt += 1
                    continue

                # both are correct
                if (o1_can1 == 1 and o2_can2 == 1) or (o1_can2 == 1
                                                       and o2_can1 == 1):
                    both_correct += 1

                if (o1_can1 >= 0.5 and o2_can2 >= 0.5) or (o1_can2 >= 0.5
                                                           and o2_can1 >= 0.5):
                    both_good += 1

                o1_f1 = max(
                    max(
                        compute_f1(answer, predict['object1'])
                        for answer in candidate),
                    compute_f1(qa["answers"][0]['text'], predict['object1']))
                o2_f1 = max(
                    max(
                        compute_f1(answer, predict['object2'])
                        for answer in candidate),
                    compute_f1(qa["answers"][0]['text'], predict['object2']))

                if (o1_f1 == 1 or o2_f1
                        == 1) and predict['object1'] != predict['object2']:
                    continue
                if o1_f1 >= 0.5 and o2_f1 >= 0.5:
                    continue
                final_score[qas_id] = [o1_f1, o2_f1]
                qa['predicts']['f1'] = [o1_f1, o2_f1]
                out = {
                    "background": background,
                    "situation": situation,
                    "qa": qa,
                }
                writer.write(json.dumps(out, indent=2))
                writer.write("\n")
    writer.close()
    print(cnt)
    print(same)
    print(both_correct)
    print(half_correct)
    print(both_good)
    print(half_good)

    return final_score

示例#14

0

显示文件

def read_predicted_file_comparative(predict_file, pred):
    question_word = ['would', 'will']
    comparative_words = [
        'more', 'less', 'higher', 'lower', 'increase', 'decrease', 'harder',
        'easier', 'increasing', 'decreasing', 'larger', 'smaller', 'better',
        'worse', 'faster', 'slower', 'weaker', 'stronger', 'closer', 'farther',
        'louder', 'quieter', 'correctly', 'incorrectly', 'not', 'yes', 'no',
        'not'
    ]
    pairs = {
        'more': 'less',
        'higher': 'lower',
        'increase': 'decrease',
        'harder': 'easier',
        'increasing': 'decreasing',
        'larger': 'smaller',
        'better': 'worse',
        'faster': 'slower',
        'stronger': 'weaker',
        'closer': 'farther',
        'louder': 'quieter',
        'correctly': 'incorrectly',
    }
    filtered = [
        1912355095, 580926078, 2893035919, 3005625773, 4037359159, 1090395805,
        1111432861, 1772822810, 1779114266, 3003444033, 1687481522, 373839753,
        3209385804, 3128907286, 1746228971, 4088330372, 4227418477, 4047850835,
        302402461, 603409309, 2849858743, 2050057087, 336287454, 1787516699,
        184586157, 2999947384, 1202686909, 1223723965, 2335934912, 2677049981,
        1182285725, 430326667, 4183507762, 1091390005, 1076054581, 3847534556,
        4074158044, 955038082, 1423883267, 1626887498, 184177891, 1586058524,
        156980418, 1607423284, 1608996147, 3071497657, 3865282641, 1588430868,
        3876948090, 3344467660, 3346695902, 3600844723, 3227879116, 3484256179,
        2934519278, 3876075109, 2041918832, 1115960569, 1108882681, 2013214064,
        3895408229, 2975020526, 3885579536, 356328641, 985605428, 809437101,
        494407430, 3999682891, 1562329124, 1505378340, 2022868633, 2022147737,
        1729594789, 1730315685, 1082622173, 1205043665, 1081901277, 1204322769
    ]
    filtered_id = [str(i) for i in filtered]
    cnt = {}
    prediction = {}
    f1 = []
    pred_f1 = []
    debug = {}
    with open(predict_file, "r", encoding="utf-8") as reader:
        input_data = json.load(reader)
    for entry in input_data["data"]:
        for paragraph in entry["paragraphs"]:
            paragraph_text = paragraph["background"]
            situation_text = paragraph['situation']
            for qa in paragraph['qas']:
                candidate = []
                qas_id = qa['id']
                question_text = qa['question']
                answer_text = qa['answers'][0]['text']
                predicts = qa['predicts']
                if not any(i == answer_text for i in comparative_words):
                    continue
                if qas_id in filtered_id:
                    continue
                # if any(question_text.lower().startswith(i) for i in question_word):
                #     continue
                for key, val in pairs.items():
                    if key in question_text.lower(
                    ) and val in question_text.lower():
                        candidate = [key, val]
                        break
                if candidate != []:
                    o1_ind = question_text.lower().find(
                        remove_punc(predicts["object1"].lower()))
                    o2_ind = question_text.lower().find(
                        remove_punc(predicts["object2"].lower()))
                    than_ind = question_text.lower().find("than")
                    indx = [o1_ind, o2_ind, than_ind]
                    if (o1_ind == -1 and o2_ind == -1) or o1_ind == o2_ind:
                        # print(qas_id,question_text)
                        continue
                    elif o1_ind != -1 and o2_ind == -1:
                        if than_ind != -1:
                            if o1_ind < than_ind:
                                o2_ind = 1000

                    elif o1_ind == -1 and o2_ind != -1:
                        if than_ind != -1:
                            if o2_ind < than_ind:
                                o1_ind = 1000

                    if o1_ind < o2_ind:
                        if int(predicts['TP_relevance']) == 0:
                            prediction[qas_id] = [
                                compute_f1(answer_text, candidate[0]),
                                candidate[0], candidate, indx
                            ]
                            f1.append(compute_f1(answer_text, candidate[0]))
                        else:
                            prediction[qas_id] = [
                                compute_f1(answer_text, candidate[1]),
                                candidate[1], candidate, indx
                            ]
                            f1.append(compute_f1(answer_text, candidate[1]))
                    else:
                        if int(predicts['TP_relevance']) == 0:
                            prediction[qas_id] = [
                                compute_f1(answer_text, candidate[1]),
                                candidate[1], candidate, indx
                            ]
                            f1.append(compute_f1(answer_text, candidate[1]))
                        else:
                            prediction[qas_id] = [
                                compute_f1(answer_text, candidate[0]),
                                candidate[0], candidate, indx
                            ]
                            f1.append(compute_f1(answer_text, candidate[0]))
                    pred_f1.append(compute_f1(answer_text, pred[qas_id]))
                    cnt[qas_id] = [question_text, candidate]

    return pred_f1, f1, prediction

示例#15

0

显示文件

def read_predicted_file(predict_file, pred):
    Object_question_words = {
        "which", 'in which', 'whose', 'who', 'what', 'for which', 'on which'
    }
    comparative_words = [
        'more', 'less', 'higher', 'lower', 'increase', 'decrease', 'high',
        'low', 'harder', 'easier', 'increasing', 'decreasing', 'up', 'down',
        'larger', 'smaller', 'better', 'worse', 'faster', 'slower', 'weaker',
        'stronger', 'closer', 'farther', 'louder', 'quieter', 'correctly',
        'incorrectly', 'not', 'yes', 'no', 'not'
    ]
    positive_words = [
        'more', 'higher', 'increase', 'high', 'harder', 'increasing', 'up',
        'larger', 'better', 'faster', 'stronger', 'closer', 'louder',
        'correctly'
    ]
    negative_words = [
        'less', 'lower', 'decrease', 'low', 'easier', 'decreasing', 'down',
        'smaller', 'worse', 'slower', 'weaker', 'farther', 'quieter',
        'incorrectly', 'fewer', 'not', 'avoid'
    ]
    all_object_scores = []
    record = {}
    pred_f1 = []
    filtered = [
        1912355095, 580926078, 2893035919, 3005625773, 4037359159, 1090395805,
        1111432861, 1772822810, 1779114266, 3003444033, 1687481522, 373839753,
        3209385804, 3128907286, 1746228971, 4088330372, 4227418477, 4047850835,
        302402461, 603409309, 2849858743, 2050057087, 336287454, 1787516699,
        184586157, 2999947384, 1202686909, 1223723965, 2335934912, 2677049981,
        1182285725, 430326667, 4183507762, 1091390005, 1076054581, 3847534556,
        4074158044, 955038082, 1423883267, 1626887498, 184177891, 1586058524,
        156980418, 1607423284, 1608996147, 3071497657, 3865282641, 1588430868,
        3876948090, 3344467660, 3346695902, 3600844723, 3227879116, 3484256179,
        2934519278, 3876075109, 2041918832, 1115960569, 1108882681, 2013214064,
        3895408229, 2975020526, 3885579536, 356328641, 985605428, 809437101,
        494407430, 3999682891, 1562329124, 1505378340, 2022868633, 2022147737,
        1729594789, 1730315685, 1082622173, 1205043665, 1081901277, 1204322769
    ]
    filtered_id = [str(i) for i in filtered]
    with open(predict_file, "r", encoding="utf-8") as reader:
        input_data = json.load(reader)
    for entry in input_data["data"]:
        for paragraph in entry["paragraphs"]:
            paragraph_text = paragraph["background"]
            situation_text = paragraph['situation']
            for qa in paragraph['qas']:
                qas_id = qa['id']
                question_text = qa['question']
                answer_text = qa['answers'][0]['text']
                predicts = qa['predicts']
                if any(i == answer_text for i in comparative_words):
                    continue
                if qas_id in filtered_id:
                    continue
                if any(question_text.lower().startswith(i)
                       for i in Object_question_words):
                    if max(compute_f1(answer_text, predicts['object1']),
                           compute_f1(answer_text,
                                      predicts['object2'])) == 0.0:
                        continue

                    predicts_answer = make_choice_pos(predicts)
                    for word in positive_words:
                        if word in question_text.lower():
                            predicts_answer = make_choice_pos(predicts)
                            break
                    for word in negative_words:
                        if word in question_text.lower():
                            if word not in predicts['TP in back'].lower():
                                predicts_answer = make_choice_neg(predicts)
                                # print(qas_id,predicts_answer)
                                break

                    f1 = compute_f1(remove_punc(answer_text),
                                    remove_punc(predicts_answer))
                    pred_f1.append(
                        compute_f1(remove_punc(answer_text),
                                   remove_punc(pred[qas_id])))
                    record[qas_id] = [f1, answer_text, predicts_answer]
                    all_object_scores.append(f1)
                else:
                    continue
    return all_object_scores, record, pred_f1