def utest_for_ground_truth(d_list):
    nei_c = 0
    support_c = 0
    refute_c = 0
    for item in tqdm(d_list):
        e_list = check_sentences.check_and_clean_evidence(item)
        evidence_sent_id = []
        gt_evidence = []
        if item['verifiable'] == "VERIFIABLE":
            for doc_id, ln in list(e_list)[0]:
                evidence_sent_id.append(doc_id + c_scorer.SENT_LINE + str(ln))
                gt_evidence.append([doc_id, ln])
        elif item['verifiable'] == "NOT VERIFIABLE":
            evidence_sent_id = []

        item["predicted_sentids"] = evidence_sent_id

        # item['predicted_evidence'] = []
        item['predicted_evidence'] = gt_evidence
        item['predicted_label'] = item["label"]

        if item["label"] == 'NOT ENOUGH INFO':
            nei_c += 1
        elif item["label"] == 'SUPPORTS':
            support_c += 1
        elif item["label"] == 'REFUTES':
            refute_c += 1

    print(support_c, refute_c, nei_c)
예제 #2
0
def disabuigation_training_build_v0(item,
                                    cursor,
                                    contain_first_sentence=False,
                                    only_found=True):
    doc_t_list = [it[0] for it in item['prioritized_docids']]
    evidence_group = check_sentences.check_and_clean_evidence(item)
    all_true_t_list = set()
    t_claim = ' '.join(item['claim_tokens'])
    for ground_truth_evid in evidence_group:
        # print(ground_truth_evid)
        true_t_list = set([it[0] for it in ground_truth_evid])
        all_true_t_list = set.union(all_true_t_list, true_t_list)
    all_true_t_list = list(all_true_t_list)

    positive_list = []
    negative_list = []
    eid = item['id']
    found_pos = False

    for doc_id in all_true_t_list:
        if '-LRB-' in doc_id and common.doc_id_to_tokenized_text(
                doc_id) not in t_claim:
            positive_list.append(doc_id)
            found_pos = True

    if found_pos and only_found:
        random.shuffle(doc_t_list)
        num_neg = random.randint(6, 8)

        # for _ in num_neg:
        for doc_id in doc_t_list[:num_neg]:
            if '-LRB-' in doc_id and doc_id not in all_true_t_list:
                negative_list.append(doc_id)

    elif not only_found:
        random.shuffle(doc_t_list)
        # Change this on Aug 30, 2018
        # num_neg = random.randint(36, 36)
        num_neg = random.randint(6, 8)

        # for _ in num_neg:
        for doc_id in doc_t_list[:num_neg]:
            if '-LRB-' in doc_id and doc_id not in all_true_t_list:
                negative_list.append(doc_id)

    return make_examples(eid,
                         positive_list,
                         negative_list,
                         t_claim,
                         cursor,
                         contain_first_sentence=contain_first_sentence)
예제 #3
0
def utest_for_sample():
    # cursor = fever_db.get_cursor()
    d_list = load_data(config.FEVER_DEV_JSONL)
    additional_d_list = load_data(config.RESULT_PATH /
                                  "sent_retri/2018_07_05_17:17:50_r/dev.jsonl")
    additional_data_dict = dict()
    for item in additional_d_list:
        additional_data_dict[item['id']] = item

    for item in d_list:
        e_list = check_sentences.check_and_clean_evidence(item)
        # e_text_list = convert_to_normalized_format(cursor, e_list)
        r_list, flags = sample_additional_data_for_item(
            item, additional_data_dict)
        print(flags)
def filter_contain_parenthese_valid(item):
    doc_t_list = [it[0] for it in item['prioritized_docids']]
    evidence_group = check_sentences.check_and_clean_evidence(item)
    all_true_t_list = set()
    t_claim = ' '.join(item['claim_tokens'])
    for ground_truth_evid in evidence_group:
        # print(ground_truth_evid)
        true_t_list = set([it[0] for it in ground_truth_evid])
        all_true_t_list = set.union(all_true_t_list, true_t_list)
    all_true_t_list = list(all_true_t_list)
    for doc_id in all_true_t_list:
        if '-LRB-' in doc_id and doc_id in doc_t_list and common.doc_id_to_tokenized_text(
                doc_id) not in t_claim:
            return True

    return False
예제 #5
0
def utest_for_ground_truth(d_list):
    for item in tqdm(d_list):
        e_list = check_sentences.check_and_clean_evidence(item)
        print(e_list)
        evidence_sent_id = []
        gt_evidence = []
        if item['verifiable'] == "VERIFIABLE":
            for doc_id, ln in list(e_list)[0]:
                evidence_sent_id.append(doc_id + c_scorer.SENT_LINE + str(ln))
                gt_evidence.append([doc_id, ln])
        elif item['verifiable'] == "NOT VERIFIABLE":
            evidence_sent_id = []

        item["predicted_sentids"] = evidence_sent_id

        # item['predicted_evidence'] = []
        item['predicted_evidence'] = gt_evidence
        item['predicted_label'] = item["label"]
def disabuigation_training_build(item, cursor, contain_first_sentence=False):
    doc_t_list = [it[0] for it in item['prioritized_docids']]
    evidence_group = check_sentences.check_and_clean_evidence(item)
    all_true_t_list = set()
    t_claim = ' '.join(item['claim_tokens'])
    for ground_truth_evid in evidence_group:
        # print(ground_truth_evid)
        true_t_list = set([it[0] for it in ground_truth_evid])
        all_true_t_list = set.union(all_true_t_list, true_t_list)
    all_true_t_list = list(all_true_t_list)

    positive_list = []
    negative_list = []
    eid = item['id']

    for doc_id in all_true_t_list:
        if '-LRB-' in doc_id and common.doc_id_to_tokenized_text(
                doc_id) not in t_claim:
            positive_list.append(doc_id)

    for doc_id in doc_t_list:
        if '-LRB-' in doc_id and doc_id not in all_true_t_list:
            negative_list.append(doc_id)

    # for doc_id in all_true_t_list:
    #     if '-LRB-' in doc_id and doc_id not in claim:
    #         positive_list.append(doc_id)
    #
    # for doc_id in doc_t_list:
    #     if '-LRB-' in doc_id and doc_id not in all_true_t_list:
    #         negative_list.append(doc_id)

    # print("id:", eid)
    # print("Pos:", positive_list)
    # print("Neg:", negative_list)
    # print("Claim:", t_claim)
    return make_examples(eid,
                         positive_list,
                         negative_list,
                         t_claim,
                         cursor,
                         contain_first_sentence=contain_first_sentence)
예제 #7
0
def sample_additional_data_for_item_v1_0(item, additional_data_dictionary):
    res_sentids_list = []
    flags = []

    if item['verifiable'] == "VERIFIABLE":
        assert item['label'] == 'SUPPORTS' or item['label'] == 'REFUTES'
        e_list = check_sentences.check_and_clean_evidence(item)
        current_id = item['id']
        assert current_id in additional_data_dictionary
        additional_data = additional_data_dictionary[current_id][
            'predicted_sentids']
        additional_data_with_score = additional_data_dictionary[current_id][
            'scored_sentids']

        # print(len(additional_data))

        for evidences in e_list:
            # print(evidences)
            new_evidences = copy.deepcopy(evidences)
            n_e = len(evidences)
            if n_e < 5:
                current_sample_num = random.randint(0, 5 - n_e)
                random.shuffle(additional_data)
                for sampled_e in additional_data[:current_sample_num]:
                    doc_ids = sampled_e.split(c_scorer.SENT_LINE)[0]
                    ln = int(sampled_e.split(c_scorer.SENT_LINE)[1])
                    new_evidences.add_sent(doc_ids, ln)

            if new_evidences != evidences:
                flag = f"verifiable.non_eq.{len(new_evidences) - len(evidences)}"
                flags.append(flag)
                pass
            else:
                flag = "verifiable.eq.0"
                flags.append(flag)
                pass
            res_sentids_list.append(new_evidences)

        assert len(res_sentids_list) == len(e_list)

    elif item['verifiable'] == "NOT VERIFIABLE":
        assert item['label'] == 'NOT ENOUGH INFO'

        e_list = check_sentences.check_and_clean_evidence(item)
        current_id = item['id']
        additional_data = additional_data_dictionary[current_id][
            'predicted_sentids']
        # print(len(additional_data))
        random.shuffle(additional_data)
        current_sample_num = random.randint(2, 5)
        raw_evidences_list = []
        for sampled_e in additional_data[:current_sample_num]:
            doc_ids = sampled_e.split(c_scorer.SENT_LINE)[0]
            ln = int(sampled_e.split(c_scorer.SENT_LINE)[1])
            raw_evidences_list.append((doc_ids, ln))
        new_evidences = check_sentences.Evidences(raw_evidences_list)

        if len(new_evidences) == 0:
            flag = f"verifiable.eq.0"
            flags.append(flag)
            pass
        else:
            flag = f"not_verifiable.non_eq.{len(new_evidences)}"
            flags.append(flag)

        assert all(len(e) == 0 for e in e_list)
        res_sentids_list.append(new_evidences)
        assert len(res_sentids_list) == 1

    assert len(res_sentids_list) == len(flags)

    return res_sentids_list, flags
예제 #8
0
def sample_additional_data_for_item(item, additional_data_dictionary):
    #TODO check this code, very messy!!!
    res_sentids_list = []
    flags = []
    # count = 0
    # total = 0
    # print(count, total)

    if item['verifiable'] == "VERIFIABLE":
        assert item['label'] == 'SUPPORTS' or item['label'] == 'REFUTES'
        e_list = check_sentences.check_and_clean_evidence(item)
        current_id = item['id']
        assert current_id in additional_data_dictionary
        additional_data = additional_data_dictionary[current_id][
            'predicted_sentids']

        for evidences in e_list:
            # print(evidences)
            new_evidences = copy.deepcopy(evidences)
            n_e = len(evidences)
            if n_e < 5:
                current_sample_num = random.randint(0, 5 - n_e)
                random.shuffle(additional_data)
                for sampled_e in additional_data[:current_sample_num]:
                    doc_ids = sampled_e.split(c_scorer.SENT_LINE)[0]
                    ln = int(sampled_e.split(c_scorer.SENT_LINE)[1])
                    new_evidences.add_sent(doc_ids, ln)

            if new_evidences != evidences:
                flag = f"verifiable.eq.{len(new_evidences) - len(evidences)}"
                flags.append(flag)
                # count += 1
                # print("Oh")
                # print(evidences)
                # print(new_evidences)
                pass
            else:
                flag = "verifiable.eq.0"
                flags.append(flag)
                pass
                # print("Yes")
            res_sentids_list.append(new_evidences)
            # total += 1
        assert len(res_sentids_list) == len(e_list)

    elif item['verifiable'] == "NOT VERIFIABLE":
        assert item['label'] == 'NOT ENOUGH INFO'

        e_list = check_sentences.check_and_clean_evidence(item)
        current_id = item['id']
        additional_data = additional_data_dictionary[current_id][
            'predicted_sentids']
        random.shuffle(additional_data)
        current_sample_num = random.randint(2, 5)
        raw_evidences_list = []
        for sampled_e in additional_data[:current_sample_num]:
            doc_ids = sampled_e.split(c_scorer.SENT_LINE)[0]
            ln = int(sampled_e.split(c_scorer.SENT_LINE)[1])
            raw_evidences_list.append((doc_ids, ln))
        new_evidences = check_sentences.Evidences(raw_evidences_list)

        # print(e_list)
        # print(new_evidences)

        if len(new_evidences) == 0:
            flag = f"verifiable.eq.0"
            flags.append(flag)
            pass
        else:
            flag = f"not_verifiable.eq.{len(new_evidences)}"
            flags.append(flag)


# print("Oh")
# global count
# count += 1
# print(additional_data)

        assert all(len(e) == 0 for e in e_list)
        res_sentids_list.append(new_evidences)
        assert len(res_sentids_list) == 1

    assert len(res_sentids_list) == len(flags)

    return res_sentids_list, flags
def sample_additional_data_for_item_v1_1(item, additional_data_dictionary):
    res_sentids_list = []
    flags = []

    if item['verifiable'] == "VERIFIABLE":
        assert item['label'] == 'SUPPORTS' or item['label'] == 'REFUTES'
        e_list = check_sentences.check_and_clean_evidence(item)
        current_id = item['id']
        assert current_id in additional_data_dictionary
        additional_data = additional_data_dictionary[current_id][
            'predicted_sentids']
        # additional_data_with_score = additional_data_dictionary[current_id]['scored_sentids']

        # print(len(additional_data))

        for evidences in e_list:
            # print(evidences)
            new_evidences = copy.deepcopy(evidences)
            n_e = len(evidences)
            if n_e < 5:
                current_sample_num = random.randint(0, 5 - n_e)
                random.shuffle(additional_data)
                for sampled_e in additional_data[:current_sample_num]:
                    doc_ids = sampled_e.split(c_scorer.SENT_LINE)[0]
                    ln = int(sampled_e.split(c_scorer.SENT_LINE)[1])
                    new_evidences.add_sent(doc_ids, ln)

            if new_evidences != evidences:
                flag = f"verifiable.non_eq.{len(new_evidences) - len(evidences)}"
                flags.append(flag)
                pass
            else:
                flag = "verifiable.eq.0"
                flags.append(flag)
                pass
            res_sentids_list.append(new_evidences)

        assert len(res_sentids_list) == len(e_list)

    elif item['verifiable'] == "NOT VERIFIABLE":
        assert item['label'] == 'NOT ENOUGH INFO'

        e_list = check_sentences.check_and_clean_evidence(item)
        current_id = item['id']

        additional_data = additional_data_dictionary[current_id][
            'predicted_sentids']
        prioritized_additional_evidence_list = additional_data_dictionary[
            current_id]['scored_sentids']

        #  cur_predicted_sentids.append((sent_i['sid'], sent_i['score'], sent_i['prob']))
        certain_k = 2
        prioritized_additional_evidence_list = sorted(
            prioritized_additional_evidence_list, key=lambda x: -x[1])
        top_two_sent = [
            sid
            for sid, _, _ in prioritized_additional_evidence_list[:certain_k]
        ]

        random.shuffle(additional_data)
        current_sample_num = random.randint(0, 2)
        raw_evidences_list = []

        # Debug
        # print(prioritized_additional_evidence_list)
        # print(top_two_sent)

        for sampled_e in top_two_sent + additional_data[:current_sample_num]:
            doc_ids = sampled_e.split(c_scorer.SENT_LINE)[0]
            ln = int(sampled_e.split(c_scorer.SENT_LINE)[1])
            raw_evidences_list.append((doc_ids, ln))
        new_evidences = check_sentences.Evidences(raw_evidences_list)

        if len(new_evidences) == 0:
            flag = f"verifiable.eq.0"
            flags.append(flag)
            pass
        else:
            flag = f"not_verifiable.non_eq.{len(new_evidences)}"
            flags.append(flag)

        assert all(len(e) == 0 for e in e_list)
        res_sentids_list.append(new_evidences)
        assert len(res_sentids_list) == 1

        # Debug
        # print(res_sentids_list)

    assert len(res_sentids_list) == len(flags)

    return res_sentids_list, flags
예제 #10
0
    #
    #     ssid = (item_id, doc_id, ln)
    #     print(ssid)
    #     train_sent_id_dict.add(ssid)
    selection_dict = simi_sampler.paired_selection_score_dict(train_sent_list)
    selection_dict = simi_sampler.paired_selection_score_dict(
        remaining_sent_list, selection_dict)

    # for k, v in selection_dict.items():
    #     print(k, v)

    total = 0
    hit = 0

    for item in tqdm(training_list):
        item_id = int(item['id'])
        e_list = check_sentences.check_and_clean_evidence(item)
        for evidences in e_list:
            for doc_id, ln in evidences:
                ssid: Tuple[int, str, int] = (item_id, doc_id, ln)
                if ssid in selection_dict:
                    assert item['claim'] == selection_dict[ssid]['claim']
                    hit += 1
                total += 1

    print(hit, total, hit / total, total - hit)

    # for doc_id, ln in evidences:
    #     if (item_id, doc_id, ln) not in selection_dict:
    #         print(item)