예제 #1
0
def precompute_forward_items_and_cache():
    # 3 places need to switch from dev to train !!!

    is_training = False
    doc_results = common.load_json(
        # config.PRO_ROOT / "results/doc_retri_results/doc_retrieval_final_v8/hotpot_train_doc_retrieval_v8_before_multihop_filtering.json")
        # config.PRO_ROOT / "results/doc_retri_results/doc_retrieval_final_v8/hotpot_dev_doc_retrieval_v8_before_multihop_filtering.json")
        config.PRO_ROOT /
        "results/doc_retri_results/doc_retrieval_final_v8/hotpot_test_doc_retrieval_v8_before_multihop_filtering.json"
    )
    doc_results = results_multihop_filtering(doc_results,
                                             multihop_retrieval_top_k=3,
                                             strict_mode=True)

    # db_cursor = wiki_db_tool.get_cursor(config.WHOLE_WIKI_DB)

    t_db_cursor = wiki_db_tool.get_cursor(config.WHOLE_PROCESS_FOR_RINDEX_DB)

    # data_list = common.load_json(config.DEV_FULLWIKI_FILE)
    data_list = common.load_json(config.TEST_FULLWIKI_FILE)
    # data_list = common.load_json(config.TRAIN_FILE)
    append_baseline_context(doc_results, data_list)

    fitem_list = build_full_wiki_document_forward_item(doc_results, data_list,
                                                       is_training,
                                                       t_db_cursor, True)

    print(len(fitem_list))
    common.save_jsonl(
        fitem_list, config.PDATA_ROOT / "content_selection_forward" /
        "hotpot_test_p_level_unlabeled.jsonl")
예제 #2
0
def toy_init_results():
    dev_fullwiki_list = common.load_json(config.DEV_FULLWIKI_FILE)
    print(len(dev_fullwiki_list))

    # Load rindex file
    abs_rindexdb = IndexDB()
    abs_rindexdb.load_from_file(config.PDATA_ROOT / "reverse_indexing/abs_rindexdb")
    print("Number of terms:", len(abs_rindexdb.inverted_index.index))
    abs_rindexdb.inverted_index.build_Nt_table()
    abs_rindexdb.score_db['default-tf-idf'] = dict()
    load_from_file(abs_rindexdb.score_db['default-tf-idf'],
                   config.PDATA_ROOT / "reverse_indexing/abs_rindexdb/scored_db/default-tf-idf.score.txt")
    # Load rindex finished

    saved_items = []
    for item in tqdm(dev_fullwiki_list):
        saved_tfidf_item = dict()
        question = item['question']
        qid = item['_id']

        doc_list = get_top_ranked_tf_idf_doc(question, abs_rindexdb, top_k=50)
        saved_tfidf_item['question'] = question
        saved_tfidf_item['qid'] = qid
        saved_tfidf_item['doc_list'] = doc_list

        saved_items.append(saved_tfidf_item)

    common.save_jsonl(saved_items, config.RESULT_PATH / "doc_retri_results/term_based_methods_results/hotpot_tf_idf_dev.jsonl")
def multi_process(start=0, end=None, tag='dev'):
    task_name = 'fever'
    debug = False
    top_k = 20
    num_process = 3
    query_fieldname = 'claim'
    id_fieldname = 'id'
    debug_name = 'debug' if debug else ""

    # print(multiprocessing.cpu_count())
    print("CPU Count:", multiprocessing.cpu_count())

    if tag == 'dev':
        d_list = common.load_jsonl(config.FEVER_DEV)
    elif tag == 'train':
        d_list = common.load_jsonl(config.FEVER_TRAIN)
    elif tag == 'test':
        d_list = common.load_jsonl(config.FEVER_TEST)
    else:
        raise ValueError(f"Tag:{tag} not supported.")

    print("Total length:", len(d_list))
    # Important Set this number !!!
    # start, end = 0, None
    # Important End !!!

    print(f"Task:{task_name}, Tag:{tag}, TopK:{top_k}, Start/End:{start}/{end}")
    d_list = d_list[start:end]

    print("Data length:", len(d_list))
    if debug:
        d_list = d_list[:10]
        start, end = 0, 10
    print("Data length (Pos-filtering):", len(d_list))

    r_list = []

    incr_file = config.RESULT_PATH / f"doc_retri_results/term_based_methods_results/{task_name}_tf_idf_{tag}_incr_({start},{end})_{debug_name}.jsonl"
    if incr_file.is_file():
        print("Warning save file exists.")

    save_path: Path = config.RESULT_PATH / f"doc_retri_results/term_based_methods_results/{task_name}_tf_idf_{tag}_({start},{end})_{debug_name}.jsonl"
    if save_path.is_file():
        print("Warning save file exists.")

    with open(incr_file, mode='w', encoding='utf-8') as out_f:
        with Pool(processes=num_process, maxtasksperchild=1000) as pool:

            process_func = partial(process_fever_item_multiprocessing,
                                   top_k=top_k, query_field=query_fieldname, id_field=id_fieldname)

            p_item_list = pool.imap_unordered(process_func, d_list)
            for item in tqdm(p_item_list, total=len(d_list)):
                r_list.append(item)
                out_f.write(json.dumps(item) + '\n')
                out_f.flush()

    print(len(r_list))
    common.save_jsonl(r_list, save_path)
def single_process_fever_with_dict(start=0, end=None, tag='dev'):
    task_name = 'fever'
    debug = False
    top_k = 20

    query_fieldname = 'claim'
    id_fieldname = 'id'
    debug_name = 'debug' if debug else ""

    g_score_dict = dict()
    g_score_dict = load_from_file(g_score_dict,
                                  config.PDATA_ROOT / "reverse_indexing/abs_rindexdb/scored_db/default-tf-idf.score.txt")

    if tag == 'dev':
        d_list = common.load_jsonl(config.FEVER_DEV)
    elif tag == 'train':
        d_list = common.load_jsonl(config.FEVER_TRAIN)
    elif tag == 'test':
        d_list = common.load_jsonl(config.FEVER_TEST)
    else:
        raise ValueError(f"Tag:{tag} not supported.")

    # Important Set this number !!!
    print("Total length:", len(d_list))
    # start, end = 0, len(d_list)
    # Important End !!!

    print(f"Task:{task_name}, Tag:{tag}, TopK:{top_k}, Start/End:{start}/{end}")
    d_list = d_list[start:end]

    print("Data length:", len(d_list))
    if debug:
        d_list = d_list[:10]
        start, end = 0, 10
    print("Data length (Pos-filtering):", len(d_list))

    r_item_list = []

    incr_file = config.RESULT_PATH / f"doc_retri_results/term_based_methods_results/{task_name}_tf_idf_{tag}_incr_({start},{end})_{debug_name}.jsonl"
    if incr_file.is_file():
        print("Warning save file exists.")

    save_path: Path = config.RESULT_PATH / f"doc_retri_results/term_based_methods_results/{task_name}_tf_idf_{tag}_({start},{end})_{debug_name}.jsonl"
    if save_path.is_file():
        print("Warning save file exists.")

    with open(incr_file, mode='w', encoding='utf-8') as out_f:
        process_func = partial(process_fever_item_with_score_dict,
                               top_k=top_k, query_field=query_fieldname, id_field=id_fieldname,
                               global_score_dict=g_score_dict)

        for item in tqdm(d_list, total=len(d_list)):
            r_item = process_func(item)
            r_item_list.append(r_item)
            out_f.write(json.dumps(item) + '\n')
            out_f.flush()

    print(len(r_item_list))
    common.save_jsonl(r_item_list, save_path)
예제 #5
0
def prepare_data_only_page_view(tokenized_file, eval_file,
                                doc_retrieval_output_file):
    """
    This method prepare document retrieval data using only page view.
    :return:
    """
    doc_retrieval_method = 'pageview'
    print("Method:", doc_retrieval_method)

    haonan_docretri_object = HAONAN_DOCRETRI_OBJECT()

    doc_retrieval_result_list = first_doc_retrieval(
        haonan_docretri_object,
        tokenized_file,
        method=doc_retrieval_method,
        top_k=100)
    eval_list = common.load_jsonl(eval_file)

    disamb.item_resorting(doc_retrieval_result_list)

    print("Evaluating 1st Doc Retrieval")
    eval_mode = {'check_doc_id_correct': True, 'standard': False}
    print(
        c_scorer.fever_score(doc_retrieval_result_list,
                             eval_list,
                             mode=eval_mode,
                             verbose=False))
    print(
        "Max_doc_num_5:",
        c_scorer.fever_doc_only(doc_retrieval_result_list,
                                eval_list,
                                max_evidence=5))
    print(
        "Max_doc_num_10:",
        c_scorer.fever_doc_only(doc_retrieval_result_list,
                                eval_list,
                                max_evidence=10))
    print(
        "Max_doc_num_15:",
        c_scorer.fever_doc_only(doc_retrieval_result_list,
                                eval_list,
                                max_evidence=15))
    print(
        "Max_doc_num_20:",
        c_scorer.fever_doc_only(doc_retrieval_result_list,
                                eval_list,
                                max_evidence=20))
    # First Document retrieval End.
    common.save_jsonl(doc_retrieval_result_list, doc_retrieval_output_file)
예제 #6
0
def build_anli(path: Path, round=1, version='1.0'):
    data_root_path = (path / "anli")
    if not data_root_path.exists():
        data_root_path.mkdir()

    round_tag = str(round)

    o_train = common.load_jsonl(config.PRO_ROOT / f"data/anli_v{version}/R{round_tag}/train.jsonl")
    o_dev = common.load_jsonl(config.PRO_ROOT / f"data/anli_v{version}/R{round_tag}/dev.jsonl")
    o_test = common.load_jsonl(config.PRO_ROOT / f"data/anli_v{version}/R{round_tag}/test.jsonl")

    d_trian = a_nli2std_format(o_train)
    d_dev = a_nli2std_format(o_dev)
    d_test = a_nli2std_format(o_test)

    print(f"ANLI (R{round_tag}) Train size:", len(d_trian))
    print(f"ANLI (R{round_tag}) Dev size:", len(d_dev))
    print(f"ANLI (R{round_tag}) Test size:", len(d_test))

    if not (data_root_path / f"r{round_tag}").exists():
        (data_root_path / f"r{round_tag}").mkdir()

    common.save_jsonl(d_trian, data_root_path / f"r{round_tag}" / 'train.jsonl')
    common.save_jsonl(d_dev, data_root_path / f"r{round_tag}" / 'dev.jsonl')
    common.save_jsonl(d_test, data_root_path / f"r{round_tag}" / 'test.jsonl')
예제 #7
0
def build_mnli(path: Path):
    data_root_path = (path / "mnli")
    if not data_root_path.exists():
        data_root_path.mkdir()
    o_train = common.load_jsonl(config.PRO_ROOT / "data/multinli_1.0/multinli_1.0_train.jsonl")
    o_mm_dev = common.load_jsonl(config.PRO_ROOT / "data/multinli_1.0/multinli_1.0_dev_mismatched.jsonl")
    o_m_dev = common.load_jsonl(config.PRO_ROOT / "data/multinli_1.0/multinli_1.0_dev_matched.jsonl")

    d_trian = sm_nli2std_format(o_train)
    d_mm_dev = sm_nli2std_format(o_mm_dev)
    d_m_test = sm_nli2std_format(o_m_dev)

    print("MNLI examples without gold label have been filtered.")
    print("MNLI Train size:", len(d_trian))
    print("MNLI MisMatched Dev size:", len(d_mm_dev))
    print("MNLI Matched dev size:", len(d_m_test))

    common.save_jsonl(d_trian, data_root_path / 'train.jsonl')
    common.save_jsonl(d_mm_dev, data_root_path / 'mm_dev.jsonl')
    common.save_jsonl(d_m_test, data_root_path / 'm_dev.jsonl')
예제 #8
0
def build_fever_nli(path: Path):
    data_root_path = (path / "fever_nli")
    if not data_root_path.exists():
        data_root_path.mkdir()

    o_train = common.load_jsonl(config.PRO_ROOT / "data/nli_fever/train_fitems.jsonl")
    o_dev = common.load_jsonl(config.PRO_ROOT / "data/nli_fever/dev_fitems.jsonl")
    o_test = common.load_jsonl(config.PRO_ROOT / "data/nli_fever/test_fitems.jsonl")

    d_trian = fever_nli2std_format(o_train)
    d_dev = fever_nli2std_format(o_dev)
    d_test = fever_nli2std_format(o_test)

    print("FEVER-NLI Train size:", len(d_trian))
    print("FEVER-NLI Dev size:", len(d_dev))
    print("FEVER-NLI Test size:", len(d_test))

    common.save_jsonl(d_trian, data_root_path / 'train.jsonl')
    common.save_jsonl(d_dev, data_root_path / 'dev.jsonl')
    common.save_jsonl(d_test, data_root_path / 'test.jsonl')
예제 #9
0
def build_snli(path: Path):
    snli_data_root_path = (path / "snli")
    if not snli_data_root_path.exists():
        snli_data_root_path.mkdir()
    o_train = common.load_jsonl(config.PRO_ROOT / "data/snli_1.0/snli_1.0_train.jsonl")
    o_dev = common.load_jsonl(config.PRO_ROOT / "data/snli_1.0/snli_1.0_dev.jsonl")
    o_test = common.load_jsonl(config.PRO_ROOT / "data/snli_1.0/snli_1.0_test.jsonl")

    d_trian = sm_nli2std_format(o_train)
    d_dev = sm_nli2std_format(o_dev)
    d_test = sm_nli2std_format(o_test)

    print("SNLI examples without gold label have been filtered.")
    print("SNLI Train size:", len(d_trian))
    print("SNLI Dev size:", len(d_dev))
    print("SNLI Test size:", len(d_test))

    common.save_jsonl(d_trian, snli_data_root_path / 'train.jsonl')
    common.save_jsonl(d_dev, snli_data_root_path / 'dev.jsonl')
    common.save_jsonl(d_test, snli_data_root_path / 'test.jsonl')
def pipeline(in_file,
             eval_file=None,
             model_path_dict=default_model_path_dict,
             steps=default_steps):
    """
    :param in_file: The raw input file.
    :param eval_file: Whether to provide evaluation along the line.
    :return:
    """
    sentence_retri_1_scale_prob = 0.5
    sentence_retri_2_scale_prob = 0.9
    sent_retri_1_top_k = 5
    sent_retri_2_top_k = 1

    sent_prob_for_2doc = 0.1
    sent_topk_for_2doc = 5
    enhance_retri_1_scale_prob = -1

    build_submission = True

    doc_retrieval_method = 'word_freq'

    haonan_docretri_object = HAONAN_DOCRETRI_OBJECT()

    if not PIPELINE_DIR.exists():
        PIPELINE_DIR.mkdir()

    if steps['s1.tokenizing']['do']:
        time_stamp = utils.get_current_time_str()
        current_pipeline_dir = PIPELINE_DIR / f"{time_stamp}_r"
    else:
        current_pipeline_dir = steps['s1.tokenizing']['out_file'].parent

    print("Current Result Root:", current_pipeline_dir)

    if not current_pipeline_dir.exists():
        current_pipeline_dir.mkdir()

    eval_list = common.load_jsonl(eval_file) if eval_file is not None else None

    in_file_stem = in_file.stem
    tokenized_file = current_pipeline_dir / f"t_{in_file_stem}.jsonl"

    # Save code into directory
    script_name = os.path.basename(__file__)
    with open(os.path.join(str(current_pipeline_dir), script_name),
              'w') as out_f, open(__file__, 'r') as it:
        out_f.write(it.read())
        out_f.flush()

    # Tokenizing.
    print("Step 1. Tokenizing.")
    if steps['s1.tokenizing']['do']:
        tokenized_claim(in_file, tokenized_file)  # Auto Saved
        print("Tokenized file saved to:", tokenized_file)
    else:
        tokenized_file = steps['s1.tokenizing']['out_file']
        print("Use preprocessed file:", tokenized_file)
    # Tokenizing End.

    # First Document retrieval.
    print("Step 2. First Document Retrieval")

    if steps['s2.1doc_retri']['do']:
        doc_retrieval_result_list = first_doc_retrieval(
            haonan_docretri_object,
            tokenized_file,
            method=doc_retrieval_method)
        doc_retrieval_file_1 = current_pipeline_dir / f"doc_retr_1_{in_file_stem}.jsonl"
        common.save_jsonl(doc_retrieval_result_list, doc_retrieval_file_1)
        print("First Document Retrieval file saved to:", doc_retrieval_file_1)
    else:
        doc_retrieval_file_1 = steps['s2.1doc_retri']['out_file']
        doc_retrieval_result_list = common.load_jsonl(doc_retrieval_file_1)
        print("Use preprocessed file:", doc_retrieval_file_1)

    if eval_list is not None:
        print("Evaluating 1st Doc Retrieval")
        eval_mode = {'check_doc_id_correct': True, 'standard': False}
        print(
            c_scorer.fever_score(doc_retrieval_result_list,
                                 eval_list,
                                 mode=eval_mode,
                                 verbose=False))
    # First Document retrieval End.

    # First Sentence Selection.
    print("Step 3. First Sentence Selection")
    if steps['s3.1sen_select']['do']:
        dev_sent_list_1_e0 = simple_nnmodel.pipeline_first_sent_selection(
            tokenized_file, doc_retrieval_file_1, model_path_dict['sselector'])
        dev_sent_file_1_e0 = current_pipeline_dir / f"dev_sent_score_1_{in_file_stem}.jsonl"
        common.save_jsonl(dev_sent_list_1_e0, dev_sent_file_1_e0)

        # Manual setting, delete it later
        # dev_sent_file_1_e0 = None
        # dev_sent_list_1_e0 = common.load_jsonl("/home/easonnie/projects/FunEver/results/pipeline_r/2018_07_24_11:07:41_r(new_model_v1_2_for_realtest)_scaled_0.05_selector_em/dev_sent_score_1_shared_task_test.jsonl")
        # End

        if steps['s3.1sen_select']['ensemble']:
            print("Ensemble!")
            dev_sent_list_1_e1 = simple_nnmodel.pipeline_first_sent_selection(
                tokenized_file, doc_retrieval_file_1,
                model_path_dict['sselector_1'])
            dev_sent_file_1_e1 = current_pipeline_dir / f"dev_sent_score_1_{in_file_stem}_e1.jsonl"
            common.save_jsonl(dev_sent_list_1_e1, dev_sent_file_1_e1)
            # exit(0)
            # dev_sent_list_1_e1 = common.load_jsonl(dev_sent_file_1_e1)

            dev_sent_list_1_e2 = simple_nnmodel.pipeline_first_sent_selection(
                tokenized_file, doc_retrieval_file_1,
                model_path_dict['sselector_2'])
            dev_sent_file_1_e2 = current_pipeline_dir / f"dev_sent_score_1_{in_file_stem}_e2.jsonl"
            common.save_jsonl(dev_sent_list_1_e2, dev_sent_file_1_e2)
            # exit(0)
            # dev_sent_list_1_e2 = common.load_jsonl(dev_sent_file_1_e2)

            dev_sent_list_1 = merge_sent_results(
                [dev_sent_list_1_e0, dev_sent_list_1_e1, dev_sent_list_1_e2])
            dev_sent_file_1 = current_pipeline_dir / f"dev_sent_score_1_{in_file_stem}_ensembled.jsonl"
            common.save_jsonl(dev_sent_list_1, dev_sent_file_1)
            # exit(0)
        else:
            dev_sent_list_1 = dev_sent_list_1_e0
            dev_sent_file_1 = dev_sent_file_1_e0
        # Merging two results

        print("First Sentence Selection file saved to:", dev_sent_file_1)

    else:
        dev_sent_file_1 = steps['s3.1sen_select']['out_file']
        dev_sent_list_1 = common.load_jsonl(dev_sent_file_1)
        print("Use preprocessed file:", dev_sent_file_1)

    # exit(0)

    if eval_list is not None:
        print("Evaluating 1st Sentence Selection")
        # sent_select_results_list_1 = simi_sampler.threshold_sampler(tokenized_file, dev_sent_full_list,
        #                                                             sentence_retri_scale_prob, top_n=5)
        # additional_dev_sent_list = common.load_jsonl("/Users/Eason/RA/FunEver/results/sent_retri_nn/2018_07_20_15-17-59_r/dev_sent_2r.jsonl")
        # dev_sent_full_list = dev_sent_full_list + additional_dev_sent_list
        sent_select_results_list_1 = simi_sampler.threshold_sampler_insure_unique(
            tokenized_file,
            dev_sent_list_1,
            sentence_retri_1_scale_prob,
            top_n=sent_retri_1_top_k)
        # sent_select_results_list_1 = simi_sampler.threshold_sampler_insure_unique_merge(sent_select_results_list_1,
        #                                                                                 additional_dev_sent_list,
        #                                                                                 sentence_retri_2_scale_prob,
        #                                                                                 top_n=5, add_n=1)

        eval_mode = {'check_sent_id_correct': True, 'standard': False}
        # for a, b in zip(eval_list, sent_select_results_list_1):
        #     b['predicted_label'] = a['label']
        print(
            c_scorer.fever_score(sent_select_results_list_1,
                                 eval_list,
                                 mode=eval_mode,
                                 verbose=False))

    print("Step 4. Second Document Retrieval")
    if steps['s4.2doc_retri']['do']:
        dev_sent_list_1 = common.load_jsonl(dev_sent_file_1)
        filtered_dev_instance_1_for_doc2 = simi_sampler.threshold_sampler_insure_unique(
            tokenized_file,
            dev_sent_list_1,
            sent_prob_for_2doc,
            top_n=sent_topk_for_2doc)
        filtered_dev_instance_1_for_doc2_file = current_pipeline_dir / f"dev_sent_score_1_{in_file_stem}_scaled_for_doc2.jsonl"
        common.save_jsonl(filtered_dev_instance_1_for_doc2,
                          filtered_dev_instance_1_for_doc2_file)

        dev_sent_1_result = simi_sampler.threshold_sampler_insure_unique(
            doc_retrieval_file_1,  # Remember this name
            dev_sent_list_1,
            sentence_retri_1_scale_prob,
            top_n=sent_topk_for_2doc)

        dev_doc2_list = second_doc_retrieval(
            haonan_docretri_object, filtered_dev_instance_1_for_doc2_file,
            dev_sent_1_result)

        dev_doc2_file = current_pipeline_dir / f"doc_retr_2_{in_file_stem}.jsonl"
        common.save_jsonl(dev_doc2_list, dev_doc2_file)
        print("Second Document Retrieval File saved to:", dev_doc2_file)
    else:
        dev_doc2_file = steps['s4.2doc_retri']['out_file']
        # dev_doc2_list = common.load_jsonl(dev_doc2_file)
        print("Use preprocessed file:", dev_doc2_file)

    print("Step 5. Second Sentence Selection")
    if steps['s5.2sen_select']['do']:
        dev_sent_2_list = get_score_multihop(
            tokenized_file,
            dev_doc2_file,
            model_path=model_path_dict['sselector'])

        dev_sent_file_2 = current_pipeline_dir / f"dev_sent_score_2_{in_file_stem}.jsonl"
        common.save_jsonl(dev_sent_2_list, dev_sent_file_2)
        print("First Sentence Selection file saved to:", dev_sent_file_2)
    else:
        dev_sent_file_2 = steps['s5.2sen_select']['out_file']

    if eval_list is not None:
        print("Evaluating 1st Sentence Selection")
        dev_sent_list_1 = common.load_jsonl(dev_sent_file_1)
        dev_sent_list_2 = common.load_jsonl(dev_sent_file_2)
        sent_select_results_list_1 = simi_sampler.threshold_sampler_insure_unique(
            tokenized_file,
            dev_sent_list_1,
            sentence_retri_1_scale_prob,
            top_n=5)
        sent_select_results_list_1 = simi_sampler.threshold_sampler_insure_unique_merge(
            sent_select_results_list_1,
            dev_sent_list_2,
            sentence_retri_2_scale_prob,
            top_n=5,
            add_n=sent_retri_2_top_k)
        eval_mode = {'check_sent_id_correct': True, 'standard': False}
        # for a, b in zip(eval_list, sent_select_results_list_1):
        #     b['predicted_label'] = a['label']
        print(
            c_scorer.fever_score(sent_select_results_list_1,
                                 eval_list,
                                 mode=eval_mode,
                                 verbose=False))

    # print("Step 6. NLI")
    # if steps['s6.nli']['do']:
    #     dev_sent_list_1 = common.load_jsonl(dev_sent_file_1)
    #     dev_sent_list_2 = common.load_jsonl(dev_sent_file_2)
    #     sentence_retri_1_scale_prob = 0.05
    #     print("Threshold:", sentence_retri_1_scale_prob)
    #     sent_select_results_list_1 = simi_sampler.threshold_sampler_insure_unique(tokenized_file, dev_sent_list_1,
    #                                                                               sentence_retri_1_scale_prob, top_n=5)
    #     # sent_select_results_list_2 = simi_sampler.threshold_sampler_insure_unique_merge(sent_select_results_list_1,
    #     #                                                                                 dev_sent_list_2,
    #     #                                                                                 sentence_retri_2_scale_prob,
    #     #                                                                                 top_n=5,
    #     #                                                                                 add_n=sent_retri_2_top_k)
    #     nli_results = nli.mesim_wn_simi_v1_2.pipeline_nli_run(tokenized_file,
    #                                                           sent_select_results_list_1,
    #                                                           [dev_sent_file_1, dev_sent_file_2],
    #                                                           model_path_dict['nli'],
    #                                                           with_logits=True,
    #                                                           with_probs=True)
    #
    #     nli_results_file = current_pipeline_dir / f"nli_r_{in_file_stem}.jsonl"
    #     common.save_jsonl(nli_results, nli_results_file)
    # else:
    #     nli_results_file = steps['s6.nli']['out_file']
    #     nli_results = common.load_jsonl(nli_results_file)

    # Ensemble code
    # dev_sent_list_1 = common.load_jsonl(dev_sent_file_1)
    # dev_sent_list_2 = common.load_jsonl(dev_sent_file_2)
    # sentence_retri_1_scale_prob = 0.05
    # print("NLI sentence threshold:", sentence_retri_1_scale_prob)
    # sent_select_results_list_1 = simi_sampler.threshold_sampler_insure_unique(tokenized_file, dev_sent_list_1,
    #                                                                           sentence_retri_1_scale_prob, top_n=5)
    #
    # # sent_select_results_list_2 = simi_sampler.threshold_sampler_insure_unique_merge(sent_select_results_list_1,
    # #                                                                                 dev_sent_list_2,
    # #                                                                                 sentence_retri_2_scale_prob,
    # #                                                                                 top_n=5,
    # #                                                                                 add_n=sent_retri_2_top_k)
    # # nli_results = nli.mesim_wn_simi_v1_2.pipeline_nli_run(tokenized_file,
    # #                                                       sent_select_results_list_1,
    # #                                                       [dev_sent_file_1, dev_sent_file_2],
    # #                                                       model_path_dict['nli'], with_probs=True, with_logits=True)
    #
    # # nli_results = nli.mesim_wn_simi_v1_2.pipeline_nli_run_bigger(tokenized_file,
    # #                                                       sent_select_results_list_1,
    # #                                                       [dev_sent_file_1, dev_sent_file_2],
    # #                                                       model_path_dict['nli_2'],
    # #                                                              with_probs=True,
    # #                                                              with_logits=True)
    #
    # nli_results = nli.mesim_wn_simi_v1_2.pipeline_nli_run_bigger(tokenized_file,
    #                                                       sent_select_results_list_1,
    #                                                       [dev_sent_file_1, dev_sent_file_2],
    #                                                       model_path_dict['nli_4'],
    #                                                       with_probs=True,
    #                                                       with_logits=True)
    #
    # nli_results_file = current_pipeline_dir / f"nli_r_{in_file_stem}_withlb_e4.jsonl"
    # common.save_jsonl(nli_results, nli_results_file)
    # Ensemble code end
    # exit(0)

    nli_r_e0 = common.load_jsonl(current_pipeline_dir /
                                 "nli_r_shared_task_test_withlb_e0.jsonl")
    nli_r_e1 = common.load_jsonl(current_pipeline_dir /
                                 "nli_r_shared_task_test_withlb_e1.jsonl")
    nli_r_e2 = common.load_jsonl(current_pipeline_dir /
                                 "nli_r_shared_task_test_withlb_e2.jsonl")
    nli_r_e3 = common.load_jsonl(current_pipeline_dir /
                                 "nli_r_shared_task_test_withlb_e3.jsonl")
    nli_r_e4 = common.load_jsonl(current_pipeline_dir /
                                 "nli_r_shared_task_test_withlb_e4.jsonl")

    nli_results = merge_nli_results(
        [nli_r_e0, nli_r_e1, nli_r_e2, nli_r_e3, nli_r_e4])

    print("Post Processing enhancement")
    delete_unused_evidence(nli_results)
    print("Deleting Useless Evidence")

    dev_sent_list_1 = common.load_jsonl(dev_sent_file_1)
    dev_sent_list_2 = common.load_jsonl(dev_sent_file_2)

    print("Appending 1 of second Evidence")
    nli_results = simi_sampler.threshold_sampler_insure_unique_merge(
        nli_results,
        dev_sent_list_2,
        sentence_retri_2_scale_prob,
        top_n=5,
        add_n=sent_retri_2_top_k)
    delete_unused_evidence(nli_results)

    # High tolerance enhancement!
    print("Final High Tolerance Enhancement")
    print("Appending all of first Evidence")
    nli_results = simi_sampler.threshold_sampler_insure_unique_merge(
        nli_results,
        dev_sent_list_1,
        enhance_retri_1_scale_prob,
        top_n=100,
        add_n=100)
    delete_unused_evidence(nli_results)

    if build_submission:
        output_file = current_pipeline_dir / "predictions.jsonl"
        build_submission_file(nli_results, output_file)
예제 #11
0
def model_eval(model_save_path):
    seed = 6
    bert_model_name = 'bert-base-uncased'
    lazy = False
    forward_size = 16
    batch_size = 32
    # dev_prob_threshold = 0.05
    dev_prob_threshold = 0.1

    num_class = 3

    # num_train_optimization_steps
    torch.manual_seed(seed)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    n_gpu = torch.cuda.device_count()

    unk_token_num = {'tokens': 1}  # work around for initiating vocabulary.
    vocab = ExVocabulary(unk_token_num=unk_token_num)
    vocab.add_token_to_namespace('SUPPORTS', namespace='labels')
    vocab.add_token_to_namespace('REFUTES', namespace='labels')
    vocab.add_token_to_namespace('NOT ENOUGH INFO', namespace='labels')
    vocab.add_token_to_namespace("hidden", namespace="labels")
    vocab.change_token_with_index_to_namespace("hidden",
                                               -2,
                                               namespace='labels')
    # Finished build vocabulary.

    # Load standardized sentence file
    # dev_upstream_sent_list = common.load_jsonl(config.RESULT_PATH /
    #                                            "doc_retri_results/fever_results/sent_results/4-14-sent_results_v0/i(5000)|e(0)|s01(0.9170917091709171)|s05(0.8842384238423843)|seed(12)_dev_sent_results.json")

    # dev_upstream_sent_list = common.load_jsonl(config.DATA_ROOT /
    # "utest_data/dev_sent_score_2_shared_task_dev.jsonl")
    # "utest_data/dev_sent_score_1_shared_task_dev_docnum(10)_ensembled.jsonl")

    # dev_upstream_sent_list = common.load_jsonl(config.FEVER_DATA_ROOT /
    #                                            "upstream_sentence_selection_Feb16/dev_sent_pred_scores.jsonl")

    dev_upstream_sent_list = common.load_jsonl(
        config.FEVER_DATA_ROOT /
        "upstream_sentence_selection_Feb16/4-15-dev_sent_pred_scores.jsonl")
    #
    # dev_upstream_sent_list = common.load_jsonl(config.FEVER_DATA_ROOT /
    #                                            "upstream_sentence_selection_Feb16/4-15-test_sent_pred_scores.jsonl")

    # dev_upstream_sent_list = common.load_jsonl(config.FEVER_DATA_ROOT /
    #                                            "upstream_sentence_selection_Feb16/n_dev_sent_pred_scores.jsonl")

    # dev_sent_after_threshold_filter = fever_ss_sampler.threshold_sampler_insure_unique_new_format(
    dev_sent_after_threshold_filter = fever_ss_sampler.threshold_sampler_insure_unique(
        config.FEVER_DEV,
        dev_upstream_sent_list,
        prob_threshold=dev_prob_threshold,
        top_n=5)

    dev_data_list = fever_nli_sampler.select_sent_with_prob_for_eval(
        config.FEVER_DEV,
        dev_sent_after_threshold_filter,
        None,
        tokenized=True)

    # dev_sent_after_threshold_filter = fever_ss_sampler.threshold_sampler_insure_unique(
    #     config.FEVER_TEST,
    #     dev_upstream_sent_list,
    #     prob_threshold=dev_prob_threshold, top_n=5)
    #
    # dev_data_list = fever_nli_sampler.select_sent_with_prob_for_eval(
    #     config.FEVER_TEST, dev_sent_after_threshold_filter,
    #     None, tokenized=True, pipeline=True)

    for item in dev_data_list:
        item['label'] = 'hidden'

    dev_list = common.load_jsonl(config.FEVER_DEV)

    for a, b in zip(dev_list, dev_data_list):
        del b['label']
        b['predicted_label'] = a['label']

    eval_mode = {'check_sent_id_correct': True, 'standard': True}
    fever_score, label_score, pr, rec, f1 = fever_scorer.fever_score(
        dev_data_list, dev_list, mode=eval_mode, verbose=False)
    print("Fever Score(FScore/LScore:/Precision/Recall/F1):", fever_score,
          label_score, pr, rec, f1)
    print(f"Dev:{fever_score}/{label_score}")

    bert_tokenizer = BertTokenizer.from_pretrained(bert_model_name,
                                                   do_lower_case=True)

    bert_fever_reader = BertReaderFeverNLI(bert_tokenizer, lazy=lazy)

    dev_instances = bert_fever_reader.read(dev_data_list)

    biterator = BasicIterator(batch_size=forward_size)
    biterator.index_with(vocab)

    # print(list(mnli_dev_instances))

    # Load training model
    # Load training model
    model_clf = BertForSequenceClassification.from_pretrained(
        bert_model_name, num_labels=num_class)

    model_clf.load_state_dict(torch.load(model_save_path))

    model_clf.to(device)

    model_clf.eval()

    if n_gpu > 1:
        model_clf = nn.DataParallel(model_clf)

    dev_iter = biterator(dev_instances, num_epochs=1, shuffle=False)

    # for item in dev_data_list:

    dev_data_list = hidden_eval(model_clf, dev_iter, dev_data_list, device)

    common.save_jsonl(
        dev_data_list, config.PRO_ROOT /
        "data/fever/upstream_sentence_selection_Feb16/4-15-dev_nli_results.jsonl"
    )

    eval_mode = {'check_sent_id_correct': True, 'standard': True}
    fever_score, label_score, pr, rec, f1 = fever_scorer.fever_score(
        dev_data_list,
        common.load_jsonl(config.FEVER_DEV),
        mode=eval_mode,
        verbose=False)
    print("Fever Score(FScore/LScore:/Precision/Recall/F1):", fever_score,
          label_score, pr, rec, f1)

    print(f"Dev:{fever_score}/{label_score}")
예제 #12
0
    retri_list = []
    for item in tqdm(fullwiki_list):
        saved_tfidf_item = dict()
        question = item['question']
        qid = item['_id']

        doc_list = lucene_retri_doc(question, top_k=50)
        saved_tfidf_item['question'] = question
        saved_tfidf_item['qid'] = qid
        saved_tfidf_item['doc_list'] = doc_list

        retri_list.append(saved_tfidf_item)
    return retri_list


if __name__ == '__main__':
    lucene_indexing()

    print("retrieve train set ...")
    saved_items = term_based_doc_retri(config.TRAIN_FILE)
    common.save_jsonl(saved_items, config.TRAIN_TERM_BASED)

    print("retrieve dev set ...")
    saved_items = term_based_doc_retri(config.DEV_FULLWIKI_FILE)
    common.save_jsonl(saved_items, config.DEV_TERM_BASED)

    print("retrieve test set ...")
    saved_items = term_based_doc_retri(config.TEST_FULLWIKI_FILE)
    common.save_jsonl(saved_items, config.TEST_TERM_BASED)
예제 #13
0
        sent_level_results_list=train_sent_results_list,
        debug=debug_mode,
        sent_top_k=top_k_sent,
        sent_filter_value=train_sent_filtering_prob)
    test_fitems, test_list = get_nli_pair(
        'test',
        is_training=False,
        sent_level_results_list=test_sent_results_list,
        debug=debug_mode,
        sent_top_k=5,
        sent_filter_value=test_sent_filtering_prob)

    print(dev_fitems[0])

    common.save_jsonl(
        dev_fitems, config.PRO_ROOT /
        "data/p_fever/intermediate_sent_data/dev_fitems.jsonl")
    common.save_jsonl(
        dev_list,
        config.PRO_ROOT / "data/p_fever/intermediate_sent_data/dev_list.jsonl")

    common.save_jsonl(
        train_fitems, config.PRO_ROOT /
        "data/p_fever/intermediate_sent_data/train_fitems.jsonl")
    common.save_jsonl(
        train_list, config.PRO_ROOT /
        "data/p_fever/intermediate_sent_data/train_list.jsonl")

    common.save_jsonl(
        test_fitems, config.PRO_ROOT /
        "data/p_fever/intermediate_sent_data/test_fitems.jsonl")
        # print(sfile)
        a = pattern.fullmatch(str(sfile.name))
        if a is None:
            continue
        file_list.append((int(a.group(1)), sfile))

    with open(filename, encoding='utf-8', mode='w') as out_f:
        for _, the_file in sorted(file_list, key=lambda x: x[0]):
            print(the_file)
            with open(the_file, encoding='utf-8', mode='r') as in_f:
                for line in in_f:
                    out_f.write(line)


if __name__ == '__main__':
    dev_d = common.load_jsonl(config.T_FEVER_DEV_JSONL)
    train_d = common.load_jsonl(config.T_FEVER_TRAIN_JSONL)
    dt_d = dev_d + train_d
    common.save_jsonl(dt_d, config.T_FEVER_DT_JSONL)

    # split_by_line("/Users/Eason/RA/FunEver/utest/utest_data/test_rand_data.txt",
    #               out_path="/Users/Eason/RA/FunEver/utest/utest_data/test_rand_data_1.txt.splits")
    #
    # merge_by_line('/Users/Eason/RA/FunEver/utest/utest_data/test_rand_data_1.txt')

    # merge_by_line('/Users/Eason/RA/FunEver/results/sent_retri/2018_07_05_17:17:50_r/train')

    # split_by_line("/Users/Eason/RA/FunEver/results/doc_retri/2018_07_04_21:56:49_r/train.jsonl",
    #               out_path="/Users/Eason/RA/FunEver/results/doc_retri/2018_07_04_21:56:49_r/o_train.splits",
    #               lines=20000)
예제 #15
0
def fever_retrieval_v0(term_retrieval_top_k=3, match_filtering_k=2, tag='dev'):
    # term_retrieval_top_k = 20
    # term_retrieval_top_k = 20

    # term_retrieval_top_k = 3
    # match_filtering_k = 2

    if tag == 'dev':
        d_list = common.load_jsonl(config.FEVER_DEV)
    elif tag == 'train':
        d_list = common.load_jsonl(config.FEVER_TRAIN)
    elif tag == 'test':
        d_list = common.load_jsonl(config.FEVER_TEST)
    else:
        raise ValueError(f"Tag:{tag} not supported.")

    d_tf_idf = common.load_jsonl(
        config.RESULT_PATH /
        f"doc_retri_results/term_based_methods_results/fever_tf_idf_{tag}.jsonl"
    )

    tf_idf_dict = list_dict_data_tool.list_to_dict(d_tf_idf, 'id')

    r_list = []

    ner_set = get_title_entity_set()

    g_score_dict = dict()
    load_from_file(
        g_score_dict, config.PDATA_ROOT /
        "reverse_indexing/abs_rindexdb/scored_db/default-tf-idf.score.txt")

    keyword_processor = KeywordProcessor(case_sensitive=True)
    keyword_processor_disamb = KeywordProcessor(case_sensitive=True)

    print("Build Processor")
    for kw in tqdm(ner_set):
        if filter_word(kw) or filter_document_id(kw):
            continue  # if the keyword is filtered by above function or is stopwords
        else:
            # matched_key_word is the original matched span. we need to save it for group ordering.
            matched_obj = _MatchedObject(matched_key_word=kw,
                                         matched_keywords_info={kw: 'kwm'})
            keyword_processor.add_keyword(kw, matched_obj)

    for kw in wiki_util.title_entities_set.disambiguation_group:
        if filter_word(kw) or filter_document_id(kw):
            continue  # if the keyword is filtered by above function or is stopwords
        else:
            if kw in keyword_processor:
                # if the kw existed in the kw_processor, we update its dict to add more disamb items
                existing_matched_obj: _MatchedObject = keyword_processor.get_keyword(
                    kw)
                for disamb_kw in wiki_util.title_entities_set.disambiguation_group[
                        kw]:
                    if filter_document_id(disamb_kw):
                        continue
                    if disamb_kw not in existing_matched_obj.matched_keywords_info:
                        existing_matched_obj.matched_keywords_info[
                            disamb_kw] = 'kwm_disamb'
            else:  # If not we add it to the keyword_processor_disamb, which is set to be lower priority
                # new_dict = dict()
                matched_obj = _MatchedObject(matched_key_word=kw,
                                             matched_keywords_info=dict())
                for disamb_kw in wiki_util.title_entities_set.disambiguation_group[
                        kw]:
                    if filter_document_id(disamb_kw):
                        continue
                    matched_obj.matched_keywords_info[disamb_kw] = 'kwm_disamb'
                    # new_dict[disamb_kw] = 'kwm_disamb'
                keyword_processor_disamb.add_keyword(kw, matched_obj)

    for item in tqdm(d_list):
        cur_id = str(item['id'])
        query = item['claim']

        query_terms = get_query_ngrams(query)
        valid_query_terms = [
            term for term in query_terms if term in g_score_dict
        ]

        retrieved_set = RetrievedSet()
        # print(tf_idf_doc_list)
        get_kw_matching_results(query, valid_query_terms, retrieved_set,
                                match_filtering_k, g_score_dict,
                                keyword_processor, keyword_processor_disamb)

        tf_idf_doc_list = tf_idf_dict[cur_id]['retrieved_list']
        added_count = 0
        for score, title in sorted(tf_idf_doc_list,
                                   key=lambda x: x[0],
                                   reverse=True)[:term_retrieval_top_k + 3]:
            if not filter_word(title) and not filter_document_id(
                    title) and not title.startswith('List of '):
                retrieved_set.add_item(RetrievedItem(title, 'tf-idf'))
                added_count += 1
                if term_retrieval_top_k is not None and added_count >= term_retrieval_top_k:
                    break

        predicted_docids = retrieved_set.to_id_list()
        # print(retrieved_set)
        # print(item['claim'], predicted_docids)

        r_item = dict()
        r_item['id'] = int(cur_id)
        r_item['claim'] = item['claim']
        r_item['predicted_docids'] = predicted_docids
        if tag != 'test':
            r_item['label'] = item['label']
        r_list.append(r_item)

    # r_list = common.load_jsonl('dev-debug.jsonl')

    # We need to modify the existing retrieved document for naming consistency
    for i, item in enumerate(r_list):
        predicted_docids = item['predicted_docids']
        modified_docids = []
        for docid in predicted_docids:
            docid = docid.replace(' ', '_')
            docid = reverse_convert_brc(docid)
            modified_docids.append(docid)
        item['predicted_docids'] = modified_docids
    # Modify finished

    # print(r_list[0:10])
    len_list = []
    for rset in r_list:
        len_list.append(len(rset['predicted_docids']))

    print(collections.Counter(len_list).most_common(10000))

    print(np.mean(len_list))
    print(np.std(len_list))
    print(np.max(len_list))
    print(np.min(len_list))

    common.save_jsonl(
        r_list, f'fever_term_based_retri_results_'
        f'{tag}_term_topk:{term_retrieval_top_k}_match_filtering_k:{match_filtering_k}.jsonl'
    )

    mode = {'standard': False, 'check_doc_id_correct': True}
    # fever_scorer.fever_score_analysis(r_list, d_list, mode=mode, max_evidence=None)
    fever_scorer.fever_score(r_list, d_list, mode=mode, max_evidence=None)
def train_fever_std_ema_v1(resume_model=None, do_analysis=False):
    """
    This method is created on 26 Nov 2018 08:50 with the purpose of training vc and ss all together.
    :param resume_model:
    :param wn_feature:
    :return:
    """

    num_epoch = 200
    seed = 12
    batch_size = 32
    lazy = True
    train_prob_threshold = 0.02
    train_sample_top_k = 8
    dev_prob_threshold = 0.1
    dev_sample_top_k = 5
    top_k_doc = 5

    schedule_sample_dict = defaultdict(lambda: 0.1)

    ratio_ss_for_vc = 0.2

    schedule_sample_dict.update({
        0: 0.1,
        1: 0.1,  # 200k + 400K
        2: 0.1,
        3: 0.1,  # 200k + 200k ~ 200k + 100k
        4: 0.1,
        5: 0.1,  # 200k + 100k
        6: 0.1  # 20k + 20k
    })

    # Eval at beginning of the training.
    eval_full_epoch = 1
    eval_nei_epoches = [2, 3, 4, 5, 6, 7]

    neg_only = False
    debug = False

    experiment_name = f"vc_ss_v17_ratio_ss_for_vc:{ratio_ss_for_vc}|t_prob:{train_prob_threshold}|top_k:{train_sample_top_k}_scheduled_neg_sampler"
    # resume_model = None

    print("Do EMA:")

    print("Dev prob threshold:", dev_prob_threshold)
    print("Train prob threshold:", train_prob_threshold)
    print("Train sample top k:", train_sample_top_k)

    # Get upstream sentence document retrieval data
    dev_doc_upstream_file = config.RESULT_PATH / "doc_retri/std_upstream_data_using_pageview/dev_doc.jsonl"
    train_doc_upstream_file = config.RESULT_PATH / "doc_retri/std_upstream_data_using_pageview/train_doc.jsonl"

    complete_upstream_dev_data = get_full_list(config.T_FEVER_DEV_JSONL,
                                               dev_doc_upstream_file,
                                               pred=True,
                                               top_k=top_k_doc)

    complete_upstream_train_data = get_full_list(config.T_FEVER_TRAIN_JSONL,
                                                 train_doc_upstream_file,
                                                 pred=False,
                                                 top_k=top_k_doc)
    if debug:
        complete_upstream_dev_data = complete_upstream_dev_data[:1000]
        complete_upstream_train_data = complete_upstream_train_data[:1000]

    print("Dev size:", len(complete_upstream_dev_data))
    print("Train size:", len(complete_upstream_train_data))

    # Prepare Data
    token_indexers = {
        'tokens':
        SingleIdTokenIndexer(namespace='tokens'),  # This is the raw tokens
        'elmo_chars': ELMoTokenCharactersIndexer(
            namespace='elmo_characters')  # This is the elmo_characters
    }

    # Data Reader
    dev_fever_data_reader = VCSS_Reader(token_indexers=token_indexers,
                                        lazy=lazy,
                                        max_l=260)
    train_fever_data_reader = VCSS_Reader(token_indexers=token_indexers,
                                          lazy=lazy,
                                          max_l=260)

    # Load Vocabulary
    biterator = BasicIterator(batch_size=batch_size)

    vocab, weight_dict = load_vocab_embeddings(config.DATA_ROOT /
                                               "vocab_cache" / "nli_basic")

    vocab.add_token_to_namespace('true', namespace='labels')
    vocab.add_token_to_namespace('false', namespace='labels')
    vocab.add_token_to_namespace("hidden", namespace="labels")
    vocab.change_token_with_index_to_namespace("hidden",
                                               -2,
                                               namespace='labels')

    print(vocab.get_token_to_index_vocabulary('labels'))
    print(vocab.get_vocab_size('tokens'))

    biterator.index_with(vocab)
    # Reader and prepare end

    vc_ss_training_sampler = VCSSTrainingSampler(complete_upstream_train_data)
    vc_ss_training_sampler.show_info()

    # Build Model
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu",
                          index=0)
    device_num = -1 if device.type == 'cpu' else 0

    model = Model(rnn_size_in=(1024 + 300 + 1, 1024 + 450 + 1),
                  rnn_size_out=(450, 450),
                  weight=weight_dict['glove.840B.300d'],
                  vocab_size=vocab.get_vocab_size('tokens'),
                  mlp_d=900,
                  embedding_dim=300,
                  max_l=300,
                  num_of_class=4)

    print("Model Max length:", model.max_l)
    if resume_model is not None:
        model.load_state_dict(torch.load(resume_model))
    model.display()
    model.to(device)

    cloned_empty_model = copy.deepcopy(model)
    ema: EMA = EMA(parameters=model.named_parameters())

    # Create Log File
    file_path_prefix, date = save_tool.gen_file_prefix(f"{experiment_name}")
    # Save the source code.
    script_name = os.path.basename(__file__)
    with open(os.path.join(file_path_prefix, script_name),
              'w') as out_f, open(__file__, 'r') as it:
        out_f.write(it.read())
        out_f.flush()

    analysis_dir = None
    if do_analysis:
        analysis_dir = Path(file_path_prefix) / "analysis_aux"
        analysis_dir.mkdir()
    # Save source code end.

    # Staring parameter setup
    best_dev = -1
    iteration = 0

    start_lr = 0.0001
    optimizer = optim.Adam(filter(lambda p: p.requires_grad,
                                  model.parameters()),
                           lr=start_lr)
    criterion = nn.CrossEntropyLoss()
    # parameter setup end

    for i_epoch in range(num_epoch):
        print("Resampling...")
        # This is for train
        # This is for sample candidate data for from result of ss for vc.
        # This we will need to do after each epoch.
        if i_epoch == eval_full_epoch:  # only eval at 1
            print("We now need to eval the whole training set.")
            print("Be patient and hope good luck!")
            load_ema_to_model(cloned_empty_model, ema)
            eval_sent_for_sampler(cloned_empty_model, token_indexers, vocab,
                                  vc_ss_training_sampler)

        elif i_epoch in eval_nei_epoches:  # at 2, 3, 4 eval for NEI
            print("We now need to eval the NEI training set.")
            print("Be patient and hope good luck!")
            load_ema_to_model(cloned_empty_model, ema)
            eval_sent_for_sampler(cloned_empty_model,
                                  token_indexers,
                                  vocab,
                                  vc_ss_training_sampler,
                                  nei_only=True)

        train_data_with_candidate_sample_list = vc_ss.data_wrangler.sample_sentences_for_vc_with_nei(
            config.T_FEVER_TRAIN_JSONL, vc_ss_training_sampler.sent_list,
            train_prob_threshold, train_sample_top_k)
        # We initialize the prob for each sentence so the sampler can work, but we will need to run the model for dev data to work.

        train_selection_dict = paired_selection_score_dict(
            vc_ss_training_sampler.sent_list)

        cur_train_vc_data = adv_simi_sample_with_prob_v1_1(
            config.T_FEVER_TRAIN_JSONL,
            train_data_with_candidate_sample_list,
            train_selection_dict,
            tokenized=True)

        if do_analysis:
            # Customized analysis output
            common.save_jsonl(
                vc_ss_training_sampler.sent_list, analysis_dir /
                f"E_{i_epoch}_whole_train_sent_{save_tool.get_cur_time_str()}.jsonl"
            )
            common.save_jsonl(
                train_data_with_candidate_sample_list, analysis_dir /
                f"E_{i_epoch}_sampled_train_sent_{save_tool.get_cur_time_str()}.jsonl"
            )
            common.save_jsonl(
                cur_train_vc_data, analysis_dir /
                f"E_{i_epoch}_train_vc_data_{save_tool.get_cur_time_str()}.jsonl"
            )

        print(f"E{i_epoch} VC_data:", len(cur_train_vc_data))

        # This is for sample negative candidate data for ss
        # After sampling, we decrease the ratio.
        neg_sample_upper_prob = schedule_sample_dict[i_epoch]
        print("Neg Sampler upper rate:", neg_sample_upper_prob)
        # print("Rate decreasing")
        # neg_sample_upper_prob -= decay_r
        neg_sample_upper_prob = max(0.000, neg_sample_upper_prob)

        cur_train_ss_data = vc_ss_training_sampler.sample_for_ss(
            neg_only=neg_only, upper_prob=neg_sample_upper_prob)

        if i_epoch >= 1:  # if epoch num >= 6 we balance pos and neg example for selection
            # new_ss_data = []
            pos_ss_data = []
            neg_ss_data = []
            for item in cur_train_ss_data:
                if item['selection_label'] == 'true':
                    pos_ss_data.append(item)
                elif item['selection_label'] == 'false':
                    neg_ss_data.append(item)

            ss_sample_size = min(len(pos_ss_data), len(neg_ss_data))
            random.shuffle(pos_ss_data)
            random.shuffle(neg_ss_data)
            cur_train_ss_data = pos_ss_data[:int(
                ss_sample_size * 0.5)] + neg_ss_data[:ss_sample_size]
            random.shuffle(cur_train_ss_data)

        vc_ss_training_sampler.show_info(cur_train_ss_data)
        print(f"E{i_epoch} SS_data:", len(cur_train_ss_data))

        vc_ss.data_wrangler.assign_task_label(cur_train_ss_data, 'ss')
        vc_ss.data_wrangler.assign_task_label(cur_train_vc_data, 'vc')

        vs_ss_train_list = cur_train_ss_data + cur_train_vc_data
        random.shuffle(vs_ss_train_list)
        print(f"E{i_epoch} Total ss+vc:", len(vs_ss_train_list))
        vc_ss_instance = train_fever_data_reader.read(vs_ss_train_list)

        train_iter = biterator(vc_ss_instance, shuffle=True, num_epochs=1)

        for i, batch in tqdm(enumerate(train_iter)):
            model.train()
            out = model(batch)

            if i_epoch >= 1:
                ratio_ss_for_vc = 0.8

            loss = compute_mixing_loss(
                model,
                out,
                batch,
                criterion,
                vc_ss_training_sampler,
                ss_for_vc_prob=ratio_ss_for_vc)  # Important change

            # No decay
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            iteration += 1

            # EMA update
            ema(model.named_parameters())

            if i_epoch < 9:
                mod = 10000
                # mod = 100
            else:
                mod = 2000

            if iteration % mod == 0:

                # This is the code for eval:
                load_ema_to_model(cloned_empty_model, ema)

                vc_ss.data_wrangler.assign_task_label(
                    complete_upstream_dev_data, 'ss')
                dev_ss_instance = dev_fever_data_reader.read(
                    complete_upstream_dev_data)
                eval_ss_iter = biterator(dev_ss_instance,
                                         num_epochs=1,
                                         shuffle=False)
                scored_dev_sent_data = hidden_eval_ss(
                    cloned_empty_model, eval_ss_iter,
                    complete_upstream_dev_data)

                # for vc
                filtered_dev_list = vc_ss.data_wrangler.sample_sentences_for_vc_with_nei(
                    config.T_FEVER_DEV_JSONL, scored_dev_sent_data,
                    dev_prob_threshold, dev_sample_top_k)

                dev_selection_dict = paired_selection_score_dict(
                    scored_dev_sent_data)
                ready_dev_list = select_sent_with_prob_for_eval(
                    config.T_FEVER_DEV_JSONL,
                    filtered_dev_list,
                    dev_selection_dict,
                    tokenized=True)

                vc_ss.data_wrangler.assign_task_label(ready_dev_list, 'vc')
                dev_vc_instance = dev_fever_data_reader.read(ready_dev_list)
                eval_vc_iter = biterator(dev_vc_instance,
                                         num_epochs=1,
                                         shuffle=False)
                eval_dev_result_list = hidden_eval_vc(cloned_empty_model,
                                                      eval_vc_iter,
                                                      ready_dev_list)

                # Scoring
                eval_mode = {'check_sent_id_correct': True, 'standard': True}
                strict_score, acc_score, pr, rec, f1 = c_scorer.fever_score(
                    eval_dev_result_list,
                    common.load_jsonl(config.T_FEVER_DEV_JSONL),
                    mode=eval_mode,
                    verbose=False)
                print("Fever Score(Strict/Acc./Precision/Recall/F1):",
                      strict_score, acc_score, pr, rec, f1)

                print(f"Dev:{strict_score}/{acc_score}")

                if do_analysis:
                    # Customized analysis output
                    common.save_jsonl(
                        scored_dev_sent_data, analysis_dir /
                        f"E_{i_epoch}_scored_dev_sent_{save_tool.get_cur_time_str()}.jsonl"
                    )
                    common.save_jsonl(
                        eval_dev_result_list, analysis_dir /
                        f"E_{i_epoch}_eval_vc_output_data_{save_tool.get_cur_time_str()}.jsonl"
                    )

                need_save = False
                if strict_score > best_dev:
                    best_dev = strict_score
                    need_save = True

                if need_save or i_epoch < 7:
                    # save_path = os.path.join(
                    #     file_path_prefix,
                    #     f'i({iteration})_epoch({i_epoch})_dev({strict_score})_lacc({acc_score})_seed({seed})'
                    # )

                    # torch.save(model.state_dict(), save_path)

                    ema_save_path = os.path.join(
                        file_path_prefix,
                        f'ema_i({iteration})_epoch({i_epoch})_dev({strict_score})_lacc({acc_score})_p({pr})_r({rec})_f1({f1})_seed({seed})'
                    )

                    save_ema_to_file(ema, ema_save_path)
예제 #17
0
    kw_terms = list(kw_terms)
    kw_terms_total_size = len(kw_terms)
    for start in range(0, kw_terms_total_size, chuck_size):
        print(start, start + chuck_size)
        current_kw_terms = kw_terms[start:start + chuck_size]
        keyword_processor = KeywordProcessor(case_sensitive=True)
        for word in tqdm(current_kw_terms):
            keyword_processor.add_keyword(word)

        for item in tqdm(d_list):
            query = item['question']
            terms = query_get_terms(query, keyword_processor)
            if 'kw_matches' not in item:
                item['kw_matches'] = []
            item['kw_matches'].extend(terms)

        del keyword_processor

    return d_list


if __name__ == '__main__':
    kw_terms = load_terms(config.PDATA_ROOT / "reverse_indexing/terms.txt")
    # d_list = common.load_json(config.DEV_FULLWIKI_FILE)
    d_list = common.load_json(config.TRAIN_FILE)
    d_list = get_kwterm_matching(kw_terms, d_list)
    # common.save_jsonl(d_list, config.RESULT_PATH / "kw_term_match_result/dev_term_match_result.jsonl")
    common.save_jsonl(
        d_list, config.RESULT_PATH /
        "kw_term_match_result/train_term_match_result.jsonl")
def eval_for_remaining():
    batch_size = 128
    lazy = True

    # SAVE_PATH = "/home/easonnie/projects/FunEver/saved_models/07-16-11:37:07_simple_nn/i(25000)_epoch(1)_(tra_score:0.8188318831883188|clf_acc:95.67680650034835|pr:0.7394326932693269|rec:0.7282478247824783|f1:0.7337976403219241|loss:0.11368581993118955)"
    SAVE_PATH = config.PRO_ROOT / "saved_models/saved_sselector/i(57167)_epoch(6)_(tra_score:0.8850885088508851|raw_acc:1.0|pr:0.3834395939593578|rec:0.8276327632763276|f1:0.5240763176570098)_epoch"
    # SAVE_PATH = config.PRO_ROOT / "saved_models/07-20-01:35:16_simple_nn_startkp_0.4_de_0.05/i(53810)_epoch(4)_(tra_score:0.8577357735773578|raw_acc:1.0|pr:0.671477147714762|rec:0.7866036603660366|f1:0.7244953493898653)_epoch"
    print("Model From:", SAVE_PATH)
    # dev_upstream_file = config.RESULT_PATH / "sent_retri/2018_07_05_17:17:50_r/dev.jsonl"
    # dev_upstream_file = config.RESULT_PATH / "doc_retri/cn_util_Jul17_docretri.singularize/dev.jsonl"
    # dev_upstream_file = config.RESULT_PATH / "doc_retri/docretri.pageview/dev.jsonl"
    # dev_upstream_file = config.RESULT_PATH / "doc_retri/docretri.pageview/train.jsonl"
    #
    # SAVE_RESULT_TARGET_FOLDER.mkdir()

    incoming_data_file = config.RESULT_PATH / "sent_retri_nn/remaining_training_cache/dev_s.jsonl"
    incoming_data = common.load_jsonl(incoming_data_file)
    SAVE_RESULT_TARGET_FOLDER = config.RESULT_PATH / "sent_retri_nn/remaining_training_cache"

    # out_file_name = "dev_sent.jsonl"
    out_file_name = "remain_dev_sent.jsonl"

    # Prepare Data
    token_indexers = {
        'tokens':
        SingleIdTokenIndexer(namespace='tokens'),  # This is the raw tokens
        'elmo_chars': ELMoTokenCharactersIndexer(
            namespace='elmo_characters')  # This is the elmo_characters
    }

    dev_fever_data_reader = SSelectorReader(token_indexers=token_indexers,
                                            lazy=lazy)

    # complete_upstream_dev_data = get_full_list(config.T_FEVER_DEV_JSONL, dev_upstream_file, pred=True)
    # complete_upstream_dev_data = get_full_list(config.T_FEVER_TRAIN_JSONL, dev_upstream_file, pred=True)
    print("Dev size:", len(incoming_data))
    dev_instances = dev_fever_data_reader.read(incoming_data)

    # Load Vocabulary
    dev_biterator = BasicIterator(batch_size=batch_size)

    vocab, weight_dict = load_vocab_embeddings(config.DATA_ROOT /
                                               "vocab_cache" / "nli_basic")
    # THis is important
    vocab.add_token_to_namespace("true", namespace="selection_labels")
    vocab.add_token_to_namespace("false", namespace="selection_labels")
    vocab.add_token_to_namespace("hidden", namespace="selection_labels")
    vocab.change_token_with_index_to_namespace("hidden",
                                               -2,
                                               namespace='selection_labels')
    # Label value
    vocab.get_index_to_token_vocabulary('selection_labels')

    print(vocab.get_token_to_index_vocabulary('selection_labels'))
    print(vocab.get_vocab_size('tokens'))

    dev_biterator.index_with(vocab)

    # exit(0)
    # Build Model
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu",
                          index=0)
    device_num = -1 if device.type == 'cpu' else 0

    model = Model(weight=weight_dict['glove.840B.300d'],
                  vocab_size=vocab.get_vocab_size('tokens'),
                  embedding_dim=300,
                  max_l=300,
                  num_of_class=2)

    model.load_state_dict(torch.load(SAVE_PATH))
    model.display()
    model.to(device)

    eval_iter = dev_biterator(dev_instances,
                              shuffle=False,
                              num_epochs=1,
                              cuda_device=device_num)
    complete_upstream_dev_data = hidden_eval(model, eval_iter, incoming_data)

    common.save_jsonl(complete_upstream_dev_data,
                      SAVE_RESULT_TARGET_FOLDER / out_file_name)

    total = 0
    hit = 0

    for item in complete_upstream_dev_data:
        assert item['selection_label'] == 'true'
        if item['prob'] >= 0.5:
            hit += 1
        total += 1

    print(hit, total, hit / total)
예제 #19
0
def evaluation():
    parser = argparse.ArgumentParser()
    parser.add_argument("--cpu",
                        action="store_true",
                        help="If set, we only use CPU.")
    parser.add_argument("--model_class_name",
                        type=str,
                        help="Set the model class of the experiment.",
                        required=True)

    parser.add_argument("--model_checkpoint_path",
                        type=str,
                        help='Set the path to save the prediction.',
                        required=True)

    parser.add_argument("--output_prediction_path",
                        type=str,
                        default=None,
                        help='Set the path to save the prediction.')

    parser.add_argument(
        "--per_gpu_eval_batch_size",
        default=16,
        type=int,
        help="Batch size per GPU/CPU for evaluation.",
    )

    parser.add_argument("--max_length",
                        default=156,
                        type=int,
                        help="Max length of the sequences.")

    parser.add_argument("--eval_data",
                        type=str,
                        help="The training data used in the experiments.")

    args = parser.parse_args()

    if args.cpu:
        args.global_rank = -1
    else:
        args.global_rank = 0

    model_checkpoint_path = args.model_checkpoint_path
    num_labels = 3  # we are doing NLI so we set num_labels = 3, for other task we can change this value.

    max_length = args.max_length

    model_class_item = MODEL_CLASSES[args.model_class_name]
    model_name = model_class_item['model_name']
    do_lower_case = model_class_item[
        'do_lower_case'] if 'do_lower_case' in model_class_item else False

    tokenizer = model_class_item['tokenizer'].from_pretrained(
        model_name,
        cache_dir=str(config.PRO_ROOT / "trans_cache"),
        do_lower_case=do_lower_case)

    model = model_class_item['sequence_classification'].from_pretrained(
        model_name,
        cache_dir=str(config.PRO_ROOT / "trans_cache"),
        num_labels=num_labels)

    model.load_state_dict(torch.load(model_checkpoint_path))

    padding_token_value = tokenizer.convert_tokens_to_ids(
        [tokenizer.pad_token])[0]
    padding_segement_value = model_class_item["padding_segement_value"]
    padding_att_value = model_class_item["padding_att_value"]
    left_pad = model_class_item[
        'left_pad'] if 'left_pad' in model_class_item else False

    batch_size_per_gpu_eval = args.per_gpu_eval_batch_size

    eval_data_str = args.eval_data
    eval_data_name = []
    eval_data_path = []
    eval_data_list = []

    eval_data_named_path = eval_data_str.split(',')

    for named_path in eval_data_named_path:
        ind = named_path.find(':')
        name = named_path[:ind]
        path = name[ind + 1:]
        if name in registered_path:
            d_list = common.load_jsonl(registered_path[name])
        else:
            d_list = common.load_jsonl(path)
        eval_data_name.append(name)
        eval_data_path.append(path)

        eval_data_list.append(d_list)

    batching_schema = {
        'uid':
        RawFlintField(),
        'y':
        LabelFlintField(),
        'input_ids':
        ArrayIndexFlintField(pad_idx=padding_token_value, left_pad=left_pad),
        'token_type_ids':
        ArrayIndexFlintField(pad_idx=padding_segement_value,
                             left_pad=left_pad),
        'attention_mask':
        ArrayIndexFlintField(pad_idx=padding_att_value, left_pad=left_pad),
    }

    data_transformer = NLITransform(model_name, tokenizer, max_length)
    eval_data_loaders = []
    for eval_d_list in eval_data_list:
        d_dataset, d_sampler, d_dataloader = build_eval_dataset_loader_and_sampler(
            eval_d_list, data_transformer, batching_schema,
            batch_size_per_gpu_eval)
        eval_data_loaders.append(d_dataloader)

    if not args.cpu:
        torch.cuda.set_device(0)
        model.cuda(0)

    r_dict = dict()
    # Eval loop:
    for i in range(len(eval_data_name)):
        cur_eval_data_name = eval_data_name[i]
        cur_eval_data_list = eval_data_list[i]
        cur_eval_dataloader = eval_data_loaders[i]
        # cur_eval_raw_data_list = eval_raw_data_list[i]

        evaluation_dataset(args,
                           cur_eval_dataloader,
                           cur_eval_data_list,
                           model,
                           r_dict,
                           eval_name=cur_eval_data_name)

    # save prediction:
    if args.output_prediction_path is not None:
        cur_results_path = Path(args.output_prediction_path)
        if not cur_results_path.exists():
            cur_results_path.mkdir(parents=True)
        for key, item in r_dict.items():
            common.save_jsonl(item['predictions'],
                              cur_results_path / f"{key}.jsonl")

        # avoid saving too many things
        for key, item in r_dict.items():
            del r_dict[key]['predictions']
        common.save_json(r_dict,
                         cur_results_path / "results_dict.json",
                         indent=2)
예제 #20
0
def eval_model_for_downstream(model_saved_path):
    seed = 12
    torch.manual_seed(seed)
    bert_model_name = 'bert-base-uncased'
    # lazy = False
    lazy = True
    forward_size = 32
    # batch_size = 64
    batch_size = 128
    do_lower_case = True

    debug_mode = False
    # est_datasize = 900_000

    num_class = 1
    # num_train_optimization_steps

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    device_num = 0 if torch.cuda.is_available() else -1

    n_gpu = torch.cuda.device_count()

    unk_token_num = {'tokens': 1}  # work around for initiating vocabulary.
    vocab = ExVocabulary(unk_token_num=unk_token_num)
    vocab.add_token_to_namespace("false", namespace="labels")  # 0
    vocab.add_token_to_namespace("true", namespace="labels")  # 1
    vocab.add_token_to_namespace("hidden", namespace="labels")
    vocab.change_token_with_index_to_namespace("hidden", -2, namespace='labels')

    # Load Dataset
    train_list = common.load_json(config.TRAIN_FILE)
    dev_list = common.load_json(config.DEV_FULLWIKI_FILE)

    dev_fitems_list = common.load_jsonl(
        config.PDATA_ROOT / "content_selection_forward" / "hotpot_dev_p_level_unlabeled.jsonl")
    train_fitems_list = common.load_jsonl(
        config.PDATA_ROOT / "content_selection_forward" / "hotpot_train_p_level_labeled.jsonl")
    test_fitems_list = common.load_jsonl(
        config.PDATA_ROOT / "content_selection_forward" / "hotpot_test_p_level_unlabeled.jsonl")

    if debug_mode:
        dev_list = dev_list[:10]
        dev_fitems_list = dev_fitems_list[:296]
        train_fitems_list = train_fitems_list[:300]
        eval_frequency = 2
        # print(dev_list[-1]['_id'])
        # exit(0)

    dev_o_dict = list_dict_data_tool.list_to_dict(dev_list, '_id')
    train_o_dict = list_dict_data_tool.list_to_dict(train_list, '_id')

    bert_tokenizer = BertTokenizer.from_pretrained(bert_model_name, do_lower_case=do_lower_case)
    bert_cs_reader = BertContentSelectionReader(bert_tokenizer, lazy, is_paired=True,
                                                example_filter=lambda x: len(x['context']) == 0, max_l=286)

    bert_encoder = BertModel.from_pretrained(bert_model_name)
    model = BertMultiLayerSeqClassification(bert_encoder, num_labels=num_class, num_of_pooling_layer=1,
                                            act_type='tanh', use_pretrained_pooler=True, use_sigmoid=True)

    model.load_state_dict(torch.load(model_saved_path))

    model.to(device)
    if n_gpu > 1:
        model = torch.nn.DataParallel(model)
    #
    dev_instances = bert_cs_reader.read(dev_fitems_list)
    train_instance = bert_cs_reader.read(train_fitems_list)
    test_instances = bert_cs_reader.read(test_fitems_list)

    biterator = BasicIterator(batch_size=forward_size)
    biterator.index_with(vocab)

    # train_iter = biterator(train_instance, num_epochs=1, shuffle=False)
    # dev_iter = biterator(dev_instances, num_epochs=1, shuffle=False)
    test_iter = biterator(test_instances, num_epochs=1, shuffle=False)

    print(len(dev_fitems_list))
    print(len(test_fitems_list))
    print(len(train_fitems_list))

    # cur_dev_eval_results_list = eval_model(model, dev_iter, device_num, with_probs=True, show_progress=True)
    # cur_train_eval_results_list = eval_model(model, train_iter, device_num, with_probs=True, show_progress=True)

    cur_test_eval_results_list = eval_model(model, test_iter, device_num, with_probs=True, show_progress=True)
    common.save_jsonl(cur_test_eval_results_list, "test_p_level_bert_v1_results.jsonl")

    print("Test write finished.")
    exit(0)

    copied_dev_o_dict = copy.deepcopy(dev_o_dict)

    list_dict_data_tool.append_subfield_from_list_to_dict(cur_dev_eval_results_list, copied_dev_o_dict,
                                                          'qid', 'fid', check=True)
    # Top_3
    cur_results_dict_top3 = select_top_k_and_to_results_dict(copied_dev_o_dict, top_k=3)
    upperbound_results_dict_top3 = append_gt_downstream_to_get_upperbound_from_doc_retri(
        cur_results_dict_top3,
        dev_list)

    # Top_5
    cur_results_dict_top5 = select_top_k_and_to_results_dict(copied_dev_o_dict, top_k=5)
    upperbound_results_dict_top5 = append_gt_downstream_to_get_upperbound_from_doc_retri(
        cur_results_dict_top5,
        dev_list)

    cur_results_dict_top10 = select_top_k_and_to_results_dict(copied_dev_o_dict, top_k=10)
    upperbound_results_dict_top10 = append_gt_downstream_to_get_upperbound_from_doc_retri(
        cur_results_dict_top10,
        dev_list)

    _, metrics_top3 = ext_hotpot_eval.eval(cur_results_dict_top3, dev_list, verbose=False)
    _, metrics_top3_UB = ext_hotpot_eval.eval(upperbound_results_dict_top3, dev_list, verbose=False)

    _, metrics_top5 = ext_hotpot_eval.eval(cur_results_dict_top5, dev_list, verbose=False)
    _, metrics_top5_UB = ext_hotpot_eval.eval(upperbound_results_dict_top5, dev_list, verbose=False)

    _, metrics_top10 = ext_hotpot_eval.eval(cur_results_dict_top10, dev_list, verbose=False)
    _, metrics_top10_UB = ext_hotpot_eval.eval(upperbound_results_dict_top10, dev_list, verbose=False)

    logging_item = {
        'top3': metrics_top3,
        'top3_UB': metrics_top3_UB,
        'top5': metrics_top5,
        'top5_UB': metrics_top5_UB,
        'top10': metrics_top10,
        'top10_UB': metrics_top10_UB,
    }

    print(logging_item)

    common.save_jsonl(cur_train_eval_results_list, "train_p_level_bert_v1_results.jsonl")
    common.save_jsonl(cur_dev_eval_results_list, "dev_p_level_bert_v1_results.jsonl")
def analysis_model(model_path):
    batch_size = 32
    lazy = True
    train_prob_threshold = 0.02
    train_sample_top_k = 8
    dev_prob_threshold = 0.1
    dev_sample_top_k = 5

    neg_sample_upper_prob = 0.006
    decay_r = 0.002

    top_k_doc = 5
    dev_doc_upstream_file = config.RESULT_PATH / "doc_retri/std_upstream_data_using_pageview/dev_doc.jsonl"

    complete_upstream_dev_data = get_full_list(config.T_FEVER_DEV_JSONL,
                                               dev_doc_upstream_file,
                                               pred=True,
                                               top_k=top_k_doc)

    print("Dev size:", len(complete_upstream_dev_data))

    # Prepare Data
    token_indexers = {
        'tokens':
        SingleIdTokenIndexer(namespace='tokens'),  # This is the raw tokens
        'elmo_chars': ELMoTokenCharactersIndexer(
            namespace='elmo_characters')  # This is the elmo_characters
    }

    # Data Reader
    dev_fever_data_reader = VCSS_Reader(token_indexers=token_indexers,
                                        lazy=lazy,
                                        max_l=260)

    # Load Vocabulary
    biterator = BasicIterator(batch_size=batch_size)

    vocab, weight_dict = load_vocab_embeddings(config.DATA_ROOT /
                                               "vocab_cache" / "nli_basic")

    vocab.add_token_to_namespace('true', namespace='labels')
    vocab.add_token_to_namespace('false', namespace='labels')
    vocab.add_token_to_namespace("hidden", namespace="labels")
    vocab.change_token_with_index_to_namespace("hidden",
                                               -2,
                                               namespace='labels')

    print(vocab.get_token_to_index_vocabulary('labels'))
    print(vocab.get_vocab_size('tokens'))

    biterator.index_with(vocab)
    # Reader and prepare end

    # vc_ss_training_sampler = VCSSTrainingSampler(complete_upstream_train_data)
    # vc_ss_training_sampler.show_info()

    # Build Model
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu",
                          index=0)
    device_num = -1 if device.type == 'cpu' else 0

    model = Model(rnn_size_in=(1024 + 300 + 1, 1024 + 450 + 1),
                  rnn_size_out=(450, 450),
                  weight=weight_dict['glove.840B.300d'],
                  vocab_size=vocab.get_vocab_size('tokens'),
                  mlp_d=900,
                  embedding_dim=300,
                  max_l=300)

    print("Model Max length:", model.max_l)

    model.display()
    model.to(device)

    cloned_empty_model = copy.deepcopy(model)

    load_ema_to_model(cloned_empty_model, model_path)

    vc_ss.data_wrangler.assign_task_label(complete_upstream_dev_data, 'ss')
    dev_ss_instance = dev_fever_data_reader.read(complete_upstream_dev_data)
    eval_ss_iter = biterator(dev_ss_instance, num_epochs=1, shuffle=False)
    scored_dev_sent_data = hidden_eval_ss(cloned_empty_model, eval_ss_iter,
                                          complete_upstream_dev_data)

    common.save_jsonl(scored_dev_sent_data, "dev_scored_sent_data.jsonl")
    # for vc
    filtered_dev_list = vc_ss.data_wrangler.sample_sentences_for_vc_with_nei(
        config.T_FEVER_DEV_JSONL, scored_dev_sent_data, dev_prob_threshold,
        dev_sample_top_k)
    common.save_jsonl(filtered_dev_list,
                      "dev_scored_sent_data_after_sample.jsonl")

    dev_selection_dict = paired_selection_score_dict(scored_dev_sent_data)
    ready_dev_list = select_sent_with_prob_for_eval(config.T_FEVER_DEV_JSONL,
                                                    filtered_dev_list,
                                                    dev_selection_dict,
                                                    tokenized=True)

    vc_ss.data_wrangler.assign_task_label(ready_dev_list, 'vc')
    dev_vc_instance = dev_fever_data_reader.read(ready_dev_list)
    eval_vc_iter = biterator(dev_vc_instance, num_epochs=1, shuffle=False)
    eval_dev_result_list = hidden_eval_vc(cloned_empty_model, eval_vc_iter,
                                          ready_dev_list)

    common.save_jsonl(eval_dev_result_list, "dev_nli_results.jsonl")

    # Scoring
    eval_mode = {'check_sent_id_correct': True, 'standard': True}
    strict_score, acc_score, pr, rec, f1 = c_scorer.fever_score(
        eval_dev_result_list,
        common.load_jsonl(config.T_FEVER_DEV_JSONL),
        mode=eval_mode,
        verbose=False)
    print("Fever Score(Strict/Acc./Precision/Recall/F1):", strict_score,
          acc_score, pr, rec, f1)

    print(f"Dev:{strict_score}/{acc_score}")
def hidden_eval_fever_adv_v1():
    batch_size = 64
    lazy = True
    dev_prob_threshold = 0.5

    SAVE_PATH = "/home/easonnie/projects/FunEver/saved_models/07-20-22:28:24_mesim_wn_450_adv_sample_v1_|t_prob:0.35|top_k:8/i(46000)_epoch(7)_dev(0.6405140514051405)_loss(1.0761665150348825)_seed(12)"

    dev_upstream_sent_list = common.load_jsonl(
        config.RESULT_PATH /
        "sent_retri_nn/2018_07_20_15:17:59_r/dev_sent.jsonl")

    # Prepare Data
    token_indexers = {
        'tokens':
        SingleIdTokenIndexer(namespace='tokens'),  # This is the raw tokens
        'elmo_chars': ELMoTokenCharactersIndexer(
            namespace='elmo_characters')  # This is the elmo_characters
    }

    p_dict = wn_persistent_api.persistence_load()

    upstream_dev_list = score_converter_scaled(config.T_FEVER_DEV_JSONL,
                                               dev_upstream_sent_list,
                                               scale_prob=dev_prob_threshold,
                                               delete_prob=False)

    dev_fever_data_reader = WNReader(token_indexers=token_indexers,
                                     lazy=lazy,
                                     wn_p_dict=p_dict,
                                     max_l=360)

    complete_upstream_dev_data = get_actual_data(config.T_FEVER_DEV_JSONL,
                                                 upstream_dev_list)
    dev_instances = dev_fever_data_reader.read(complete_upstream_dev_data)

    # Load Vocabulary
    biterator = BasicIterator(batch_size=batch_size)

    vocab, weight_dict = load_vocab_embeddings(config.DATA_ROOT /
                                               "vocab_cache" / "nli_basic")
    vocab.change_token_with_index_to_namespace('hidden',
                                               -2,
                                               namespace='labels')

    print(vocab.get_token_to_index_vocabulary('labels'))
    print(vocab.get_vocab_size('tokens'))

    biterator.index_with(vocab)

    # Build Model
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu",
                          index=0)
    device_num = -1 if device.type == 'cpu' else 0

    model = Model(
        rnn_size_in=(1024 + 300 + dev_fever_data_reader.wn_feature_size,
                     1024 + 450),
        rnn_size_out=(450, 450),
        weight=weight_dict['glove.840B.300d'],
        vocab_size=vocab.get_vocab_size('tokens'),
        mlp_d=900,
        embedding_dim=300,
        max_l=300)

    print("Model Max length:", model.max_l)
    model.load_state_dict(torch.load(SAVE_PATH))
    model.display()
    model.to(device)

    eval_iter = biterator(dev_instances,
                          shuffle=False,
                          num_epochs=1,
                          cuda_device=device_num)
    builded_dev_data = hidden_eval(model, eval_iter,
                                   complete_upstream_dev_data)

    eval_mode = {'check_sent_id_correct': True, 'standard': True}

    common.save_jsonl(
        builded_dev_data,
        config.RESULT_PATH / "nli_results" / "pipeline_results_1.jsonl")
    c_scorer.delete_label(builded_dev_data)
    print(
        c_scorer.fever_score(builded_dev_data,
                             common.load_jsonl(config.FEVER_DEV_JSONL),
                             mode=eval_mode))
예제 #23
0
def merge_results_with_haonao_module(term_retrieval_top_k=3,
                                     match_filtering_k=2,
                                     haonan_topk=10,
                                     tag='dev',
                                     save=False):
    if tag == 'dev':
        d_list = common.load_jsonl(config.FEVER_DEV)
        task_name = 'shared_task_dev'
    elif tag == 'train':
        d_list = common.load_jsonl(config.FEVER_TRAIN)
        task_name = 'train'
    elif tag == 'test':
        d_list = common.load_jsonl(config.FEVER_TEST)
        task_name = 'shared_task_test'
    else:
        raise ValueError(f"Tag:{tag} not supported.")

    # r_list = common.load_jsonl(config.RESULT_PATH / f'doc_retri_results/fever_results/standard_term_based_results/'
    # f'fever_term_based_retri_results_{tag}_term_topk:{term_retrieval_top_k}_match_filtering_k:{match_filtering_k}.jsonl')

    r_list = common.load_jsonl(
        config.RESULT_PATH /
        f'doc_retri_results/fever_results/standard_term_based_results/'
        f'fever_term_based_retri_results_{tag}_term_topk:{term_retrieval_top_k}_match_filtering_k:{match_filtering_k}.jsonl'
    )

    old_result_list = common.load_jsonl(
        config.RESULT_PATH /
        f"doc_retri_results/fever_results/haonans_results/dr_{tag}.jsonl")
    item_resorting(old_result_list, top_k=haonan_topk)

    old_result_dict = list_dict_data_tool.list_to_dict(old_result_list, 'id')

    for i, item in enumerate(r_list):
        predicted_docids = item['predicted_docids']
        modified_docids = []
        for docid in predicted_docids:
            docid = docid.replace(' ', '_')
            docid = reverse_convert_brc(docid)
            modified_docids.append(docid)
        item['predicted_docids'] = modified_docids
        # item['predicted_docids'] = []

    merged_result_list = []
    for item in tqdm(r_list):
        cur_id = int(item['id'])
        old_retrieval_doc = old_result_dict[cur_id]['predicted_docids']
        new_retrieval_doc = item['predicted_docids']
        m_predicted_docids = set.union(set(old_retrieval_doc),
                                       set(new_retrieval_doc))
        # print(m_predicted_docids)
        m_predicted_docids = [
            docid for docid in m_predicted_docids
            if not docid.startswith('List_of_')
        ]
        item['predicted_docids'] = list(m_predicted_docids)
        # print(item['predicted_docids'])

    mode = {'standard': False, 'check_doc_id_correct': True}
    if tag != 'test':
        fever_scorer.fever_score_analysis(r_list,
                                          d_list,
                                          mode=mode,
                                          max_evidence=None)

    if save:
        print("Saved to:")
        common.save_jsonl(
            r_list, config.RESULT_PATH /
            f"doc_retri_results/fever_results/merged_doc_results/m_doc_{tag}.jsonl"
        )

    # States information.
    len_list = []
    for rset in r_list:
        len_list.append(len(rset['predicted_docids']))

    print(collections.Counter(len_list).most_common(10000))

    print(np.mean(len_list))
    print(np.std(len_list))
    print(np.max(len_list))
    print(np.min(len_list))
예제 #24
0
            del item['evidence']


if __name__ == '__main__':
    # IN_FILE = config.RESULT_PATH / "sent_retri_nn/2018_07_17_15-11-11_r/dev_sent.jsonl"
    # IN_FILE = config.RESULT_PATH / "sent_retri_nn/2018_07_17_15-52-19_r/dev_sent.jsonl"
    IN_FILE = config.RESULT_PATH / "sent_retri_nn/2018_07_20_15-17-59_r/dev_sent.jsonl"
    # IN_FILE = config.RESULT_PATH / "sent_retri_nn/2018_07_20_15-17-59_r/train_sent.jsonl"
    dev_sent_result_lsit = common.load_jsonl(IN_FILE)
    dev_results_list = score_converter_scaled(config.T_FEVER_DEV_JSONL,
                                              dev_sent_result_lsit,
                                              scale_prob=0.1)
    # dev_results_list = score_converter_scaled(config.T_FEVER_TRAIN_JSONL, dev_sent_result_lsit, scale_prob=0.1)

    common.save_jsonl(
        dev_results_list, config.RESULT_PATH /
        "sent_retri_nn/2018_07_20_15-17-59_r/dev_scale(0.1).jsonl")

    # for item in dev_results_list:
    #     print(item['scored_sentids'])

    # common.save_jsonl(dev_results_list, "/Users/Eason/RA/FunEver/results/sent_retri_nn/2018_07_17_16-34-19_r/dev_scale(0.1).jsonl")
    # common.save_jsonl(dev_results_list, "/Users/Eason/RA/FunEver/results/sent_retri_nn/2018_07_17_16-34-19_r/dev_scale(0.1).jsonl")

    # eval_mode = {'check_doc_id_correct': True, 'check_sent_id_correct': True, 'standard': True}
    eval_mode = {'check_sent_id_correct': True, 'standard': True}
    # c_scorer.delete_label(dev_results_list)
    strict_score, acc_score, pr, rec, f1 = c_scorer.fever_score(
        dev_results_list,
        common.load_jsonl(config.FEVER_DEV_UNLABELED_JSONL),
        mode=eval_mode,
def eval_m_on_sselection(model_path):
    # This method is created on 25 Nov 2018 09:32 to use the claim verifier model to do scoring for sentence selection.
    batch_size = 32
    lazy = True
    top_k_doc = 5
    save_file_name = "/home/easonnie/projects/FunEver/results/sent_retri_nn/bert_verification_for_selection_probing_11_25_2018/dev_sent_scores.txt"

    dev_upstream_file = config.RESULT_PATH / "doc_retri/std_upstream_data_using_pageview/dev_doc.jsonl"
    complete_upstream_dev_data = get_full_list(config.T_FEVER_DEV_JSONL,
                                               dev_upstream_file,
                                               pred=True,
                                               top_k=top_k_doc)

    debug = None

    bert_type_name = "bert-large-uncased"
    bert_servant = BertServant(bert_type_name=bert_type_name)
    # train_upstream_file = config.RESULT_PATH / "doc_retri/std_upstream_data_using_pageview/train_doc.jsonl"

    # train_fever_data_reader = SSelectorReader(token_indexers=token_indexers, lazy=lazy, max_l=180)
    # dev_fever_data_reader = SSelectorReader(token_indexers=token_indexers, lazy=False)
    dev_fever_data_reader = BertSSReader(bert_servant, lazy=lazy, max_l=80)

    print("Dev size:", len(complete_upstream_dev_data))
    # dev_instances = dev_fever_data_reader.read(complete_upstream_dev_data)
    if debug is not None:
        complete_upstream_dev_data = complete_upstream_dev_data[:debug]

    dev_instances = dev_fever_data_reader.read(complete_upstream_dev_data)

    # Load Vocabulary
    biterator = BasicIterator(batch_size=batch_size)

    unk_token_num = {'tokens': 2600}  # work around for initiating vocabulary.
    vocab = ExVocabulary(unk_token_num=unk_token_num)
    vocab.add_token_to_namespace('SUPPORTS', namespace='labels')
    vocab.add_token_to_namespace('REFUTES', namespace='labels')
    vocab.add_token_to_namespace('NOT ENOUGH INFO', namespace='labels')

    vocab.add_token_to_namespace('true', namespace='labels')
    vocab.add_token_to_namespace('false', namespace='labels')
    vocab.add_token_to_namespace("hidden", namespace="labels")
    vocab.change_token_with_index_to_namespace("hidden",
                                               -2,
                                               namespace='labels')

    print(vocab)

    biterator.index_with(vocab)

    # Build Model
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu",
                          index=0)
    device_num = -1 if device.type == 'cpu' else 0
    bert_servant.bert_model.to(device)

    # Init model here
    model = Model(
        bert_servant,
        bert_batch_size=1,
        rnn_size_in=(1024 + 2, 1024 + 2 + 300),  # probs + task indicator.
        rnn_size_out=(300, 300),
        max_l=250,
        mlp_d=300,
        num_of_class=3,
        drop_r=0.5,
        activation_type='gelu')

    model.load_state_dict(torch.load(model_path))
    model.to(device)

    eval_iter = biterator(dev_instances, shuffle=False, num_epochs=1)
    dev_scored_data = hidden_eval_on_sselection(model, eval_iter,
                                                complete_upstream_dev_data)

    common.save_jsonl(dev_scored_data, save_file_name)
def eval_and_save_v2(model_path,
                     is_ema,
                     saving_dir,
                     save_train_data=True,
                     prob_thresholds=0.5):
    # This method was modified on 21 NOV 2018
    # for evaluating balanced trained selection model with different threshold value.
    # It will then be used for later verification.

    # Evaluate and Save all the sentence pairs results to be used for downstream verificaion
    # 03 Oct 2018 03:56:40.
    seed = 12
    batch_size = 128
    lazy = True
    torch.manual_seed(seed)
    keep_neg_sample_prob = 1
    top_k_doc = 5

    # sample_prob_decay = 0.05
    dev_upstream_file = config.RESULT_PATH / "doc_retri/std_upstream_data_using_pageview/dev_doc.jsonl"
    train_upstream_file = config.RESULT_PATH / "doc_retri/std_upstream_data_using_pageview/train_doc.jsonl"

    # Prepare Data
    token_indexers = {
        'tokens':
        SingleIdTokenIndexer(namespace='tokens'),  # This is the raw tokens
        'elmo_chars': ELMoTokenCharactersIndexer(
            namespace='elmo_characters')  # This is the elmo_characters
    }

    train_fever_data_reader = SSelectorReader(token_indexers=token_indexers,
                                              lazy=lazy,
                                              max_l=180)
    dev_fever_data_reader = SSelectorReader(token_indexers=token_indexers,
                                            lazy=lazy,
                                            max_l=180)

    complete_upstream_dev_data = get_full_list(config.T_FEVER_DEV_JSONL,
                                               dev_upstream_file,
                                               pred=True,
                                               top_k=top_k_doc)

    complete_upstream_train_data = get_full_list(config.T_FEVER_TRAIN_JSONL,
                                                 train_upstream_file,
                                                 pred=False,
                                                 top_k=top_k_doc)

    print("Dev size:", len(complete_upstream_dev_data))
    print("Train size:", len(complete_upstream_train_data))
    dev_instances = dev_fever_data_reader.read(complete_upstream_dev_data)
    train_instances = train_fever_data_reader.read(
        complete_upstream_train_data)

    # Load Vocabulary
    biterator = BasicIterator(batch_size=batch_size)
    dev_biterator = BasicIterator(batch_size=batch_size)

    vocab, weight_dict = load_vocab_embeddings(config.DATA_ROOT /
                                               "vocab_cache" / "nli_basic")
    # THis is important
    vocab.add_token_to_namespace("true", namespace="selection_labels")
    vocab.add_token_to_namespace("false", namespace="selection_labels")
    vocab.add_token_to_namespace("hidden", namespace="selection_labels")
    vocab.change_token_with_index_to_namespace("hidden",
                                               -2,
                                               namespace='selection_labels')
    # Label value

    vocab.get_index_to_token_vocabulary('selection_labels')

    print(vocab.get_token_to_index_vocabulary('selection_labels'))
    print(vocab.get_vocab_size('tokens'))

    biterator.index_with(vocab)
    dev_biterator.index_with(vocab)

    # exit(0)
    # Build Model
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu",
                          index=0)
    device_num = -1 if device.type == 'cpu' else 0

    model = Model(weight=weight_dict['glove.840B.300d'],
                  vocab_size=vocab.get_vocab_size('tokens'),
                  embedding_dim=300,
                  max_l=160,
                  num_of_class=2)

    if not is_ema:
        model.load_state_dict(torch.load(model_path))
    else:
        load_ema_to_model(model, model_path)

    model.display()
    model.to(device)

    dev_actual_list = common.load_jsonl(config.T_FEVER_DEV_JSONL)
    train_actual_list = common.load_jsonl(config.T_FEVER_TRAIN_JSONL)

    eval_iter = dev_biterator(dev_instances, shuffle=False, num_epochs=1)
    train_iter = biterator(train_instances, shuffle=False, num_epochs=1)

    complete_upstream_dev_data = hidden_eval(model, eval_iter,
                                             complete_upstream_dev_data)

    if save_train_data:
        complete_upstream_train_data = hidden_eval(
            model, train_iter, complete_upstream_train_data)
        common.save_jsonl(complete_upstream_train_data,
                          Path(str(saving_dir)) / "train_sent_scores.jsonl")
        common.save_jsonl(complete_upstream_dev_data,
                          Path(str(saving_dir)) / "dev_sent_pred_scores.jsonl")

    if not isinstance(prob_thresholds, list):
        prob_thresholds = [prob_thresholds]

    for scal_prob in prob_thresholds:
        print("Eval Dev Data prob_threshold:", scal_prob)

        dev_results_list = score_converter_v1(config.T_FEVER_DEV_JSONL,
                                              complete_upstream_dev_data,
                                              sent_retri_top_k=5,
                                              sent_retri_scal_prob=scal_prob)
        # This is only a wrapper for the simi_sampler

        eval_mode = {'check_sent_id_correct': True, 'standard': True}
        for a, b in zip(dev_actual_list, dev_results_list):
            b['predicted_label'] = a['label']
        strict_score, acc_score, pr, rec, f1 = c_scorer.fever_score(
            dev_results_list, dev_actual_list, mode=eval_mode, verbose=False)
        tracking_score = strict_score
        print(f"Dev(raw_acc/pr/rec/f1):{acc_score}/{pr}/{rec}/{f1}/")
        print("Strict score:", strict_score)
        print(f"Eval Tracking score:", f"{tracking_score}")

    if save_train_data:
        print("Build Train Data")
        train_results_list = score_converter_v1(
            config.T_FEVER_TRAIN_JSONL,
            complete_upstream_train_data,
            sent_retri_top_k=5,
            sent_retri_scal_prob=prob_threshold)

        # This is only a wrapper for the simi_sampler

        eval_mode = {'check_sent_id_correct': True, 'standard': True}
        for a, b in zip(train_actual_list, train_results_list):
            b['predicted_label'] = a['label']
        strict_score, acc_score, pr, rec, f1 = c_scorer.fever_score(
            train_results_list,
            train_actual_list,
            mode=eval_mode,
            verbose=False)
        tracking_score = strict_score
        print(f"Train(raw_acc/pr/rec/f1):{acc_score}/{pr}/{rec}/{f1}/")
        print("Strict score:", strict_score)
        print(f"Eval Tracking score:", f"{tracking_score}")
예제 #27
0
def train(local_rank, args):
    # debug = False
    # print("GPU:", gpu)
    # world_size = args.world_size
    args.global_rank = args.node_rank * args.gpus_per_node + local_rank
    args.local_rank = local_rank
    # args.warmup_steps = 20
    debug_count = 1000
    num_epoch = args.epochs

    actual_train_batch_size = args.world_size * args.per_gpu_train_batch_size * args.gradient_accumulation_steps
    args.actual_train_batch_size = actual_train_batch_size

    set_seed(args.seed)
    num_labels = 3  # we are doing NLI so we set num_labels = 3, for other task we can change this value.

    max_length = args.max_length

    model_class_item = MODEL_CLASSES[args.model_class_name]
    model_name = model_class_item['model_name']
    do_lower_case = model_class_item[
        'do_lower_case'] if 'do_lower_case' in model_class_item else False

    tokenizer = model_class_item['tokenizer'].from_pretrained(
        model_name,
        cache_dir=str(config.PRO_ROOT / "trans_cache"),
        do_lower_case=do_lower_case)

    model = model_class_item['sequence_classification'].from_pretrained(
        model_name,
        cache_dir=str(config.PRO_ROOT / "trans_cache"),
        num_labels=num_labels)

    padding_token_value = tokenizer.convert_tokens_to_ids(
        [tokenizer.pad_token])[0]
    padding_segement_value = model_class_item["padding_segement_value"]
    padding_att_value = model_class_item["padding_att_value"]
    left_pad = model_class_item[
        'left_pad'] if 'left_pad' in model_class_item else False

    batch_size_per_gpu_train = args.per_gpu_train_batch_size
    batch_size_per_gpu_eval = args.per_gpu_eval_batch_size

    if not args.cpu and not args.single_gpu:
        dist.init_process_group(backend='nccl',
                                init_method='env://',
                                world_size=args.world_size,
                                rank=args.global_rank)

    train_data_str = args.train_data
    train_data_weights_str = args.train_weights
    eval_data_str = args.eval_data

    train_data_name = []
    train_data_path = []
    train_data_list = []
    train_data_weights = []

    eval_data_name = []
    eval_data_path = []
    eval_data_list = []

    train_data_named_path = train_data_str.split(',')
    weights_str = train_data_weights_str.split(
        ',') if train_data_weights_str is not None else None

    eval_data_named_path = eval_data_str.split(',')

    for named_path in train_data_named_path:
        ind = named_path.find(':')
        name = named_path[:ind]
        path = name[ind + 1:]
        if name in registered_path:
            d_list = common.load_jsonl(registered_path[name])
        else:
            d_list = common.load_jsonl(path)

        train_data_name.append(name)
        train_data_path.append(path)

        train_data_list.append(d_list)

    if weights_str is not None:
        for weights in weights_str:
            train_data_weights.append(float(weights))
    else:
        for i in range(len(train_data_list)):
            train_data_weights.append(1)

    for named_path in eval_data_named_path:
        ind = named_path.find(':')
        name = named_path[:ind]
        path = name[ind + 1:]
        if name in registered_path:
            d_list = common.load_jsonl(registered_path[name])
        else:
            d_list = common.load_jsonl(path)
        eval_data_name.append(name)
        eval_data_path.append(path)

        eval_data_list.append(d_list)

    assert len(train_data_weights) == len(train_data_list)

    batching_schema = {
        'uid':
        RawFlintField(),
        'y':
        LabelFlintField(),
        'input_ids':
        ArrayIndexFlintField(pad_idx=padding_token_value, left_pad=left_pad),
        'token_type_ids':
        ArrayIndexFlintField(pad_idx=padding_segement_value,
                             left_pad=left_pad),
        'attention_mask':
        ArrayIndexFlintField(pad_idx=padding_att_value, left_pad=left_pad),
    }

    data_transformer = NLITransform(model_name, tokenizer, max_length)
    # data_transformer = NLITransform(model_name, tokenizer, max_length, with_element=True)

    eval_data_loaders = []
    for eval_d_list in eval_data_list:
        d_dataset, d_sampler, d_dataloader = build_eval_dataset_loader_and_sampler(
            eval_d_list, data_transformer, batching_schema,
            batch_size_per_gpu_eval)
        eval_data_loaders.append(d_dataloader)

    # Estimate the training size:
    training_list = []
    for i in range(len(train_data_list)):
        print("Build Training Data ...")
        train_d_list = train_data_list[i]
        train_d_name = train_data_name[i]
        train_d_weight = train_data_weights[i]
        cur_train_list = sample_data_list(
            train_d_list, train_d_weight
        )  # change later  # we can apply different sample strategy here.
        print(
            f"Data Name:{train_d_name}; Weight: {train_d_weight}; "
            f"Original Size: {len(train_d_list)}; Sampled Size: {len(cur_train_list)}"
        )
        training_list.extend(cur_train_list)
    estimated_training_size = len(training_list)
    print("Estimated training size:", estimated_training_size)
    # Estimate the training size ends:

    # t_total = estimated_training_size // args.gradient_accumulation_steps * num_epoch
    t_total = estimated_training_size * num_epoch // args.actual_train_batch_size
    if args.warmup_steps <= 0:  # set the warmup steps to 0.1 * total step if the given warmup step is -1.
        args.warmup_steps = int(t_total * 0.1)

    if not args.cpu:
        torch.cuda.set_device(args.local_rank)
        model.cuda(args.local_rank)

    no_decay = ["bias", "LayerNorm.weight"]
    optimizer_grouped_parameters = [
        {
            "params": [
                p for n, p in model.named_parameters()
                if not any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            args.weight_decay,
        },
        {
            "params": [
                p for n, p in model.named_parameters()
                if any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            0.0
        },
    ]

    optimizer = AdamW(optimizer_grouped_parameters,
                      lr=args.learning_rate,
                      eps=args.adam_epsilon)
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=args.warmup_steps,
        num_training_steps=t_total)

    if args.fp16:
        try:
            from apex import amp
        except ImportError:
            raise ImportError(
                "Please install apex from https://www.github.com/nvidia/apex to use fp16 training."
            )
        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level=args.fp16_opt_level)

    if not args.cpu and not args.single_gpu:
        model = nn.parallel.DistributedDataParallel(
            model,
            device_ids=[local_rank],
            output_device=local_rank,
            find_unused_parameters=True)

    args_dict = dict(vars(args))
    file_path_prefix = '.'
    if args.global_rank in [-1, 0]:
        print("Total Steps:", t_total)
        args.total_step = t_total
        print("Warmup Steps:", args.warmup_steps)
        print("Actual Training Batch Size:", actual_train_batch_size)
        print("Arguments", pp.pprint(args))

    # Let build the logger and log everything before the start of the first training epoch.
    if args.global_rank in [
            -1, 0
    ]:  # only do logging if we use cpu or global_rank=0
        if not args.debug_mode:
            file_path_prefix, date = save_tool.gen_file_prefix(
                f"{args.experiment_name}")
            # # # Create Log File
            # Save the source code.
            script_name = os.path.basename(__file__)
            with open(os.path.join(file_path_prefix, script_name),
                      'w') as out_f, open(__file__, 'r') as it:
                out_f.write(it.read())
                out_f.flush()

            # Save option file
            common.save_json(args_dict,
                             os.path.join(file_path_prefix, "args.json"))
            checkpoints_path = Path(file_path_prefix) / "checkpoints"
            if not checkpoints_path.exists():
                checkpoints_path.mkdir()
            prediction_path = Path(file_path_prefix) / "predictions"
            if not prediction_path.exists():
                prediction_path.mkdir()

    global_step = 0

    # print(f"Global Rank:{args.global_rank} ### ", 'Init!')

    for epoch in tqdm(range(num_epoch),
                      desc="Epoch",
                      disable=args.global_rank not in [-1, 0]):
        # Let's build up training dataset for this epoch
        training_list = []
        for i in range(len(train_data_list)):
            print("Build Training Data ...")
            train_d_list = train_data_list[i]
            train_d_name = train_data_name[i]
            train_d_weight = train_data_weights[i]
            cur_train_list = sample_data_list(
                train_d_list, train_d_weight
            )  # change later  # we can apply different sample strategy here.
            print(
                f"Data Name:{train_d_name}; Weight: {train_d_weight}; "
                f"Original Size: {len(train_d_list)}; Sampled Size: {len(cur_train_list)}"
            )
            training_list.extend(cur_train_list)

        random.shuffle(training_list)
        train_dataset = NLIDataset(training_list, data_transformer)

        train_sampler = SequentialSampler(train_dataset)
        if not args.cpu and not args.single_gpu:
            print("Use distributed sampler.")
            train_sampler = DistributedSampler(train_dataset,
                                               args.world_size,
                                               args.global_rank,
                                               shuffle=True)

        train_dataloader = DataLoader(
            dataset=train_dataset,
            batch_size=batch_size_per_gpu_train,
            shuffle=False,  #
            num_workers=0,
            pin_memory=True,
            sampler=train_sampler,
            collate_fn=BaseBatchBuilder(batching_schema))  #
        # training build finished.

        print(debug_node_info(args), "epoch: ", epoch)

        if not args.cpu and not args.single_gpu:
            train_sampler.set_epoch(
                epoch
            )  # setup the epoch to ensure random sampling at each epoch

        for forward_step, batch in enumerate(
                tqdm(train_dataloader,
                     desc="Iteration",
                     disable=args.global_rank not in [-1, 0]), 0):
            model.train()

            batch = move_to_device(batch, local_rank)
            # print(batch['input_ids'], batch['y'])
            if args.model_class_name in ["distilbert", "bart-large"]:
                outputs = model(batch['input_ids'],
                                attention_mask=batch['attention_mask'],
                                labels=batch['y'])
            else:
                outputs = model(batch['input_ids'],
                                attention_mask=batch['attention_mask'],
                                token_type_ids=batch['token_type_ids'],
                                labels=batch['y'])
            loss, logits = outputs[:2]
            # print(debug_node_info(args), loss, logits, batch['uid'])
            # print(debug_node_info(args), loss, batch['uid'])

            # Accumulated loss
            if args.gradient_accumulation_steps > 1:
                loss = loss / args.gradient_accumulation_steps

            # if this forward step need model updates
            # handle fp16
            if args.fp16:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

                # Gradient clip: if max_grad_norm < 0
            if (forward_step + 1) % args.gradient_accumulation_steps == 0:
                if args.max_grad_norm > 0:
                    if args.fp16:
                        torch.nn.utils.clip_grad_norm_(
                            amp.master_params(optimizer), args.max_grad_norm)
                    else:
                        torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                       args.max_grad_norm)

                optimizer.step()
                scheduler.step()  # Update learning rate schedule
                model.zero_grad()

                global_step += 1

                if args.global_rank in [
                        -1, 0
                ] and args.eval_frequency > 0 and global_step % args.eval_frequency == 0:
                    r_dict = dict()
                    # Eval loop:
                    for i in range(len(eval_data_name)):
                        cur_eval_data_name = eval_data_name[i]
                        cur_eval_data_list = eval_data_list[i]
                        cur_eval_dataloader = eval_data_loaders[i]
                        # cur_eval_raw_data_list = eval_raw_data_list[i]

                        evaluation_dataset(args,
                                           cur_eval_dataloader,
                                           cur_eval_data_list,
                                           model,
                                           r_dict,
                                           eval_name=cur_eval_data_name)

                    # saving checkpoints
                    current_checkpoint_filename = \
                        f'e({epoch})|i({global_step})'

                    for i in range(len(eval_data_name)):
                        cur_eval_data_name = eval_data_name[i]
                        current_checkpoint_filename += \
                            f'|{cur_eval_data_name}#({round(r_dict[cur_eval_data_name]["acc"], 4)})'

                    if not args.debug_mode:
                        # save model:
                        model_output_dir = checkpoints_path / current_checkpoint_filename
                        if not model_output_dir.exists():
                            model_output_dir.mkdir()
                        model_to_save = (
                            model.module if hasattr(model, "module") else model
                        )  # Take care of distributed/parallel training

                        torch.save(model_to_save.state_dict(),
                                   str(model_output_dir / "model.pt"))
                        torch.save(optimizer.state_dict(),
                                   str(model_output_dir / "optimizer.pt"))
                        torch.save(scheduler.state_dict(),
                                   str(model_output_dir / "scheduler.pt"))

                    # save prediction:
                    if not args.debug_mode and args.save_prediction:
                        cur_results_path = prediction_path / current_checkpoint_filename
                        if not cur_results_path.exists():
                            cur_results_path.mkdir(parents=True)
                        for key, item in r_dict.items():
                            common.save_jsonl(
                                item['predictions'],
                                cur_results_path / f"{key}.jsonl")

                        # avoid saving too many things
                        for key, item in r_dict.items():
                            del r_dict[key]['predictions']
                        common.save_json(r_dict,
                                         cur_results_path /
                                         "results_dict.json",
                                         indent=2)

        # End of epoch evaluation.
        if args.global_rank in [-1, 0]:
            r_dict = dict()
            # Eval loop:
            for i in range(len(eval_data_name)):
                cur_eval_data_name = eval_data_name[i]
                cur_eval_data_list = eval_data_list[i]
                cur_eval_dataloader = eval_data_loaders[i]
                # cur_eval_raw_data_list = eval_raw_data_list[i]

                evaluation_dataset(args,
                                   cur_eval_dataloader,
                                   cur_eval_data_list,
                                   model,
                                   r_dict,
                                   eval_name=cur_eval_data_name)

            # saving checkpoints
            current_checkpoint_filename = \
                f'e({epoch})|i({global_step})'

            for i in range(len(eval_data_name)):
                cur_eval_data_name = eval_data_name[i]
                current_checkpoint_filename += \
                    f'|{cur_eval_data_name}#({round(r_dict[cur_eval_data_name]["acc"], 4)})'

            if not args.debug_mode:
                # save model:
                model_output_dir = checkpoints_path / current_checkpoint_filename
                if not model_output_dir.exists():
                    model_output_dir.mkdir()
                model_to_save = (
                    model.module if hasattr(model, "module") else model
                )  # Take care of distributed/parallel training

                torch.save(model_to_save.state_dict(),
                           str(model_output_dir / "model.pt"))
                torch.save(optimizer.state_dict(),
                           str(model_output_dir / "optimizer.pt"))
                torch.save(scheduler.state_dict(),
                           str(model_output_dir / "scheduler.pt"))

            # save prediction:
            if not args.debug_mode and args.save_prediction:
                cur_results_path = prediction_path / current_checkpoint_filename
                if not cur_results_path.exists():
                    cur_results_path.mkdir(parents=True)
                for key, item in r_dict.items():
                    common.save_jsonl(item['predictions'],
                                      cur_results_path / f"{key}.jsonl")

                # avoid saving too many things
                for key, item in r_dict.items():
                    del r_dict[key]['predictions']
                common.save_json(r_dict,
                                 cur_results_path / "results_dict.json",
                                 indent=2)
예제 #28
0
def eval_model_for_downstream_ablation(model_saved_path,
                                       doc_top_k=2,
                                       tag='dev'):
    print(f"Run doc_top_k:{doc_top_k}")
    bert_pretrain_path = config.PRO_ROOT / '.pytorch_pretrained_bert'
    seed = 12
    torch.manual_seed(seed)
    bert_model_name = 'bert-base-uncased'
    # lazy = False
    lazy = True
    # forward_size = 256
    forward_size = 256
    # batch_size = 64
    batch_size = 128
    do_lower_case = True
    document_top_k = doc_top_k

    debug_mode = False
    # est_datasize = 900_000

    num_class = 1
    # num_train_optimization_steps

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    device_num = 0 if torch.cuda.is_available() else -1

    n_gpu = torch.cuda.device_count()

    unk_token_num = {'tokens': 1}  # work around for initiating vocabulary.
    vocab = ExVocabulary(unk_token_num=unk_token_num)
    vocab.add_token_to_namespace("false", namespace="labels")  # 0
    vocab.add_token_to_namespace("true", namespace="labels")  # 1
    vocab.add_token_to_namespace("hidden", namespace="labels")
    vocab.change_token_with_index_to_namespace("hidden",
                                               -2,
                                               namespace='labels')

    # Load Dataset
    train_list = common.load_json(config.TRAIN_FILE)
    dev_list = common.load_json(config.DEV_FULLWIKI_FILE)
    test_list = common.load_json(config.TEST_FULLWIKI_FILE)

    # Load train eval results list
    # cur_train_eval_results_list = common.load_jsonl(
    #     config.PRO_ROOT / "data/p_hotpotqa/hotpotqa_paragraph_level/04-10-17:44:54_hotpot_v0_cs/"
    #                       "i(40000)|e(4)|t5_doc_recall(0.8793382849426064)|t5_sp_recall(0.879496479212887)|t10_doc_recall(0.888656313301823)|t5_sp_recall(0.8888325134240054)|seed(12)/train_p_level_bert_v1_results.jsonl")

    cur_dev_eval_results_list = common.load_jsonl(
        config.PRO_ROOT /
        "data/p_hotpotqa/hotpotqa_paragraph_level/04-10-17:44:54_hotpot_v0_cs/"
        "i(40000)|e(4)|t5_doc_recall(0.8793382849426064)|t5_sp_recall(0.879496479212887)|t10_doc_recall(0.888656313301823)|t5_sp_recall(0.8888325134240054)|seed(12)/dev_p_level_bert_v1_results.jsonl"
    )

    # cur_test_eval_results_list = common.load_jsonl(
    #     config.PRO_ROOT / "data/p_hotpotqa/hotpotqa_paragraph_level/04-10-17:44:54_hotpot_v0_cs/"
    #                       "i(40000)|e(4)|t5_doc_recall(0.8793382849426064)|t5_sp_recall(0.879496479212887)|t10_doc_recall(0.888656313301823)|t5_sp_recall(0.8888325134240054)|seed(12)/test_p_level_bert_v1_results.jsonl")

    # if tag == 'train':
    #     train_fitems = get_sentence_pair(document_top_k, train_list, cur_train_eval_results_list, is_training=True,
    #                                      debug_mode=debug_mode)
    if tag == 'dev':
        dev_fitems = get_sentence_pair(document_top_k,
                                       dev_list,
                                       cur_dev_eval_results_list,
                                       is_training=False,
                                       debug_mode=debug_mode)

    # elif tag == 'test':
    #     test_fitems = get_sentence_pair(document_top_k, test_list, cur_test_eval_results_list, is_training=False,
    #                                     debug_mode=debug_mode)

    if debug_mode:
        eval_frequency = 2

    #     dev_list = dev_list[:10]
    #     dev_fitems_list = dev_fitems_list[:296]
    #     train_fitems_list = train_fitems_list[:300]
    # print(dev_list[-1]['_id'])
    # exit(0)

    dev_o_dict = list_dict_data_tool.list_to_dict(dev_list, '_id')
    train_o_dict = list_dict_data_tool.list_to_dict(train_list, '_id')

    bert_tokenizer = BertTokenizer.from_pretrained(
        bert_model_name,
        do_lower_case=do_lower_case,
        cache_dir=bert_pretrain_path)
    bert_cs_reader = BertContentSelectionReader(
        bert_tokenizer,
        lazy,
        is_paired=True,
        example_filter=lambda x: len(x['context']) == 0,
        max_l=128,
        element_fieldname='element')

    bert_encoder = BertModel.from_pretrained(bert_model_name,
                                             cache_dir=bert_pretrain_path)
    model = BertMultiLayerSeqClassification(bert_encoder,
                                            num_labels=num_class,
                                            num_of_pooling_layer=1,
                                            act_type='tanh',
                                            use_pretrained_pooler=True,
                                            use_sigmoid=True)

    model.load_state_dict(torch.load(model_saved_path))

    model.to(device)
    if n_gpu > 1:
        model = torch.nn.DataParallel(model)
    #
    if tag == 'train':
        train_instance = bert_cs_reader.read(train_fitems)
    elif tag == 'dev':
        dev_instances = bert_cs_reader.read(dev_fitems)
    elif tag == 'test':
        test_instances = bert_cs_reader.read(test_fitems)

    biterator = BasicIterator(batch_size=forward_size)
    biterator.index_with(vocab)

    if tag == 'train':
        train_iter = biterator(train_instance, num_epochs=1, shuffle=False)
        print(len(train_fitems))
    elif tag == 'dev':
        dev_iter = biterator(dev_instances, num_epochs=1, shuffle=False)
        print(len(dev_fitems))
    elif tag == 'test':
        test_iter = biterator(test_instances, num_epochs=1, shuffle=False)
        print(len(test_fitems))

    print("Forward size:", forward_size)

    if tag == 'train':
        cur_train_eval_results_list_out = eval_model(model,
                                                     train_iter,
                                                     device_num,
                                                     with_probs=True,
                                                     show_progress=True)
        common.save_jsonl(
            cur_train_eval_results_list_out, config.PRO_ROOT /
            "data/p_hotpotqa/hotpotqa_sentence_level/04-19-02:17:11_hotpot_v0_slevel_retri_(doc_top_k:2)/i(12000)|e(2)|v02_f1(0.7153646038858843)|v02_recall(0.7114645831323757)|v05_f1(0.7153646038858843)|v05_recall(0.7114645831323757)|seed(12)/train_s_level_bert_v1_results.jsonl"
        )
    elif tag == 'dev':
        cur_dev_eval_results_list_out = eval_model(model,
                                                   dev_iter,
                                                   device_num,
                                                   with_probs=True,
                                                   show_progress=True)
        common.save_jsonl(
            cur_dev_eval_results_list_out,
            f"hotpot_s_level_{tag}_results_top_k_doc_{document_top_k}.jsonl")

    elif tag == 'test':
        cur_test_eval_results_list_out = eval_model(model,
                                                    test_iter,
                                                    device_num,
                                                    with_probs=True,
                                                    show_progress=True)
        common.save_jsonl(
            cur_test_eval_results_list_out, config.PRO_ROOT /
            "data/p_hotpotqa/hotpotqa_sentence_level/04-19-02:17:11_hotpot_v0_slevel_retri_(doc_top_k:2)/i(12000)|e(2)|v02_f1(0.7153646038858843)|v02_recall(0.7114645831323757)|v05_f1(0.7153646038858843)|v05_recall(0.7114645831323757)|seed(12)/test_s_level_bert_v1_results.jsonl"
        )

    if tag == 'train' or tag == 'test':
        exit(0)

    copied_dev_o_dict = copy.deepcopy(dev_o_dict)
    list_dict_data_tool.append_subfield_from_list_to_dict(
        cur_dev_eval_results_list_out,
        copied_dev_o_dict,
        'qid',
        'fid',
        check=True)
    # 0.5
    cur_results_dict_v05 = select_top_k_and_to_results_dict(
        copied_dev_o_dict,
        top_k=5,
        score_field_name='prob',
        filter_value=0.5,
        result_field='sp')

    cur_results_dict_v02 = select_top_k_and_to_results_dict(
        copied_dev_o_dict,
        top_k=5,
        score_field_name='prob',
        filter_value=0.2,
        result_field='sp')

    _, metrics_v5 = ext_hotpot_eval.eval(cur_results_dict_v05,
                                         dev_list,
                                         verbose=False)

    _, metrics_v2 = ext_hotpot_eval.eval(cur_results_dict_v02,
                                         dev_list,
                                         verbose=False)

    logging_item = {
        'v02': metrics_v2,
        'v05': metrics_v5,
    }

    print(logging_item)
    f1 = metrics_v5['sp_f1']
    em = metrics_v5['sp_em']
    pr = metrics_v5['sp_prec']
    rec = metrics_v5['sp_recall']
    common.save_json(
        logging_item,
        f"top_k_doc:{document_top_k}_em:{em}_pr:{pr}_rec:{rec}_f1:{f1}")
예제 #29
0
def model_go_with_old_data():
    seed = 12
    torch.manual_seed(seed)
    # bert_model_name = 'bert-large-uncased'
    bert_model_name = 'bert-base-uncased'
    experiment_name = 'fever_v1_nli'
    lazy = False
    # lazy = True
    forward_size = 16
    # batch_size = 64
    # batch_size = 192
    batch_size = 32
    gradient_accumulate_step = int(batch_size / forward_size)
    warmup_proportion = 0.1
    learning_rate = 5e-5
    num_train_epochs = 3
    eval_frequency = 2000
    do_lower_case = True
    pair_order = 'cq'
    # debug_mode = True
    debug_mode = False
    # est_datasize = 900_000

    num_class = 3
    # num_train_optimization_steps

    train_sent_filtering_prob = 0.35
    dev_sent_filtering_prob = 0.1

    # dev_sent_results_file = config.RESULT_PATH / "doc_retri_results/fever_results/sent_results/4-14-sent_results_v0/i(5000)|e(0)|s01(0.9170917091709171)|s05(0.8842384238423843)|seed(12)_dev_sent_results.json"
    # train_sent_results_file = config.RESULT_PATH / "doc_retri_results/fever_results/sent_results/4-14-sent_results_v0/train_sent_results.jsonl"
    from utest.utest_format_converter_for_old_sent.tool import format_convert
    dev_sent_results_file = format_convert(
        config.PRO_ROOT /
        "results/doc_retri_results/fever_results/sent_results/old_sent_data_by_NSMN/4-15-dev_sent_pred_scores_old_format.jsonl"
    )
    train_sent_results_file = format_convert(
        config.PRO_ROOT /
        "results/doc_retri_results/fever_results/sent_results/old_sent_data_by_NSMN/train_sent_scores_old_format.jsonl"
    )

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    device_num = 0 if torch.cuda.is_available() else -1

    n_gpu = torch.cuda.device_count()

    unk_token_num = {'tokens': 1}  # work around for initiating vocabulary.
    vocab = ExVocabulary(unk_token_num=unk_token_num)
    vocab.add_token_to_namespace('SUPPORTS', namespace='labels')
    vocab.add_token_to_namespace('REFUTES', namespace='labels')
    vocab.add_token_to_namespace('NOT ENOUGH INFO', namespace='labels')
    vocab.add_token_to_namespace("hidden", namespace="labels")
    vocab.change_token_with_index_to_namespace("hidden",
                                               -2,
                                               namespace='labels')

    # Load Dataset
    # train_fitems_list = get_inference_pair('train', True, train_sent_results_file, debug_mode, train_sent_filtering_prob)
    dev_debug_num = 2481 if debug_mode else None
    dev_fitems_list, dev_list = get_inference_pair('dev', False,
                                                   dev_sent_results_file,
                                                   dev_debug_num,
                                                   dev_sent_filtering_prob)
    # = common.load_jsonl(config.FEVER_DEV)

    if debug_mode:
        dev_list = dev_list[:50]
        eval_frequency = 1
        # print(dev_list[-1]['_id'])
        # exit(0)

    # sampled_train_list = down_sample_neg(train_fitems_list, ratio=pos_ratio)
    train_debug_num = 2971 if debug_mode else None
    train_fitems_list, _ = get_inference_pair('train', True,
                                              train_sent_results_file,
                                              train_debug_num,
                                              train_sent_filtering_prob)
    est_datasize = len(train_fitems_list)

    # dev_o_dict = list_dict_data_tool.list_to_dict(dev_list, 'id')
    # print(dev_o_dict)

    bert_tokenizer = BertTokenizer.from_pretrained(bert_model_name,
                                                   do_lower_case=do_lower_case)
    bert_cs_reader = BertFeverNLIReader(bert_tokenizer,
                                        lazy,
                                        is_paired=True,
                                        query_l=64,
                                        example_filter=None,
                                        max_l=364,
                                        pair_order=pair_order)

    bert_encoder = BertModel.from_pretrained(bert_model_name)
    model = BertMultiLayerSeqClassification(bert_encoder,
                                            num_labels=num_class,
                                            num_of_pooling_layer=1,
                                            act_type='tanh',
                                            use_pretrained_pooler=True,
                                            use_sigmoid=False)
    #
    param_optimizer = list(model.named_parameters())
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [{
        'params':
        [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
        'weight_decay':
        0.01
    }, {
        'params':
        [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
        'weight_decay':
        0.0
    }]

    num_train_optimization_steps = int(est_datasize / forward_size / gradient_accumulate_step) * \
                                   num_train_epochs

    if debug_mode:
        num_train_optimization_steps = 100

    print("Estimated training size", est_datasize)
    print("Number of optimization steps:", num_train_optimization_steps)

    optimizer = BertAdam(optimizer_grouped_parameters,
                         lr=learning_rate,
                         warmup=warmup_proportion,
                         t_total=num_train_optimization_steps)

    dev_instances = bert_cs_reader.read(dev_fitems_list)

    biterator = BasicIterator(batch_size=forward_size)
    biterator.index_with(vocab)

    model.to(device)
    if n_gpu > 1:
        model = torch.nn.DataParallel(model)

    forbackward_step = 0
    update_step = 0

    logging_agent = save_tool.ScoreLogger({})

    file_path_prefix = '.'
    if not debug_mode:
        file_path_prefix, date = save_tool.gen_file_prefix(
            f"{experiment_name}")
        # # # Create Log File
        # Save the source code.
        script_name = os.path.basename(__file__)
        with open(os.path.join(file_path_prefix, script_name),
                  'w') as out_f, open(__file__, 'r') as it:
            out_f.write(it.read())
            out_f.flush()
        # # # Log File end

    for epoch_i in range(num_train_epochs):
        print("Epoch:", epoch_i)

        train_fitems_list, _ = get_inference_pair('train', True,
                                                  train_sent_results_file,
                                                  train_debug_num,
                                                  train_sent_filtering_prob)
        random.shuffle(train_fitems_list)
        train_instance = bert_cs_reader.read(train_fitems_list)
        train_iter = biterator(train_instance, num_epochs=1, shuffle=True)

        for batch in tqdm(train_iter):
            model.train()
            batch = move_to_device(batch, device_num)

            paired_sequence = batch['paired_sequence']
            paired_segments_ids = batch['paired_segments_ids']
            labels_ids = batch['label']
            att_mask, _ = torch_util.get_length_and_mask(paired_sequence)
            s1_span = batch['bert_s1_span']
            s2_span = batch['bert_s2_span']

            loss = model(
                paired_sequence,
                token_type_ids=paired_segments_ids,
                attention_mask=att_mask,
                mode=BertMultiLayerSeqClassification.ForwardMode.TRAIN,
                labels=labels_ids)

            if n_gpu > 1:
                loss = loss.mean()  # mean() to average on multi-gpu.

            if gradient_accumulate_step > 1:
                loss = loss / gradient_accumulate_step

            loss.backward()
            forbackward_step += 1

            if forbackward_step % gradient_accumulate_step == 0:
                optimizer.step()
                optimizer.zero_grad()
                update_step += 1

                if update_step % eval_frequency == 0:
                    print("Update steps:", update_step)
                    dev_iter = biterator(dev_instances,
                                         num_epochs=1,
                                         shuffle=False)

                    cur_eval_results_list = eval_model(model,
                                                       dev_iter,
                                                       device_num,
                                                       with_probs=True,
                                                       make_int=True)

                    results_dict = list_dict_data_tool.list_to_dict(
                        cur_eval_results_list, 'oid')
                    copied_dev_list = copy.deepcopy(dev_list)
                    list_dict_data_tool.append_item_from_dict_to_list(
                        copied_dev_list, results_dict, 'id', 'predicted_label')

                    mode = {'standard': True}
                    strict_score, acc_score, pr, rec, f1 = fever_scorer.fever_score(
                        copied_dev_list,
                        dev_fitems_list,
                        mode=mode,
                        max_evidence=5)
                    logging_item = {
                        'ss': strict_score,
                        'ac': acc_score,
                        'pr': pr,
                        'rec': rec,
                        'f1': f1,
                    }

                    save_file_name = f'i({update_step})|e({epoch_i})' \
                        f'|ss({strict_score})|ac({acc_score})|pr({pr})|rec({rec})|f1({f1})' \
                        f'|seed({seed})'

                    common.save_jsonl(
                        copied_dev_list,
                        Path(file_path_prefix) /
                        f"{save_file_name}_dev_nli_results.json")

                    # print(save_file_name)
                    logging_agent.incorporate_results({}, save_file_name,
                                                      logging_item)
                    logging_agent.logging_to_file(
                        Path(file_path_prefix) / "log.json")

                    model_to_save = model.module if hasattr(
                        model, 'module') else model
                    output_model_file = Path(file_path_prefix) / save_file_name
                    torch.save(model_to_save.state_dict(),
                               str(output_model_file))
예제 #30
0
def eval_model_for_downstream(model_saved_path):
    bert_model_name = 'bert-base-uncased'
    lazy = True
    # lazy = True
    forward_size = 64
    # batch_size = 64
    batch_size = 128
    do_lower_case = True

    debug_mode = False
    max_l = 264
    # est_datasize = 900_000

    num_class = 1
    # num_train_optimization_steps
    tag = 'test'

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    device_num = 0 if torch.cuda.is_available() else -1

    n_gpu = torch.cuda.device_count()

    unk_token_num = {'tokens': 1}  # work around for initiating vocabulary.
    vocab = ExVocabulary(unk_token_num=unk_token_num)
    vocab.add_token_to_namespace("false", namespace="labels")  # 0
    vocab.add_token_to_namespace("true", namespace="labels")  # 1
    vocab.add_token_to_namespace("hidden", namespace="labels")
    vocab.change_token_with_index_to_namespace("hidden",
                                               -2,
                                               namespace='labels')

    # Load Dataset
    # train_ruleterm_doc_results = common.load_jsonl(
    #     config.PRO_ROOT / "results/doc_retri_results/fever_results/merged_doc_results/m_doc_train.jsonl")
    # dev_ruleterm_doc_results = train_ruleterm_doc_results
    if tag == 'dev':
        dev_ruleterm_doc_results = common.load_jsonl(
            config.PRO_ROOT /
            "results/doc_retri_results/fever_results/merged_doc_results/m_doc_dev.jsonl"
        )

        dev_list = common.load_jsonl(config.FEVER_DEV)

        dev_fitems = fever_p_level_sampler.get_paragraph_forward_pair(
            'dev',
            dev_ruleterm_doc_results,
            is_training=False,
            debug=debug_mode,
            ignore_non_verifiable=False)
    elif tag == 'train':
        dev_ruleterm_doc_results = common.load_jsonl(
            config.PRO_ROOT /
            "results/doc_retri_results/fever_results/merged_doc_results/m_doc_train.jsonl"
        )

        dev_list = common.load_jsonl(config.FEVER_TRAIN)

        dev_fitems = fever_p_level_sampler.get_paragraph_forward_pair(
            'train',
            dev_ruleterm_doc_results,
            is_training=True,
            debug=debug_mode,
            ignore_non_verifiable=False)
    elif tag == 'test':
        dev_ruleterm_doc_results = common.load_jsonl(
            config.PRO_ROOT /
            "results/doc_retri_results/fever_results/merged_doc_results/m_doc_test.jsonl"
        )

        dev_list = common.load_jsonl(config.FEVER_TEST)

        dev_fitems = fever_p_level_sampler.get_paragraph_forward_pair(
            'test',
            dev_ruleterm_doc_results,
            is_training=False,
            debug=debug_mode,
            ignore_non_verifiable=False)
    else:
        raise NotImplemented()

    # dev_fitems = fever_p_level_sampler.get_paragraph_forward_pair('train', dev_ruleterm_doc_results,
    #                                                               is_training=True, debug=debug_mode,
    #                                                               ignore_non_verifiable=False)

    # Just to show the information
    fever_p_level_sampler.down_sample_neg(dev_fitems, None)

    if debug_mode:
        dev_list = dev_list[:100]
        eval_frequency = 2
        # print(dev_list[-1]['_id'])
        # exit(0)

    # sampled_train_list = down_sample_neg(train_fitems_list, ratio=pos_ratio)
    dev_o_dict = list_dict_data_tool.list_to_dict(dev_list, 'id')
    # print(dev_o_dict)

    bert_tokenizer = BertTokenizer.from_pretrained(bert_model_name,
                                                   do_lower_case=do_lower_case)
    bert_cs_reader = BertContentSelectionReader(
        bert_tokenizer,
        lazy,
        is_paired=True,
        example_filter=lambda x: len(x['context']) == 0,
        max_l=max_l,
        element_fieldname='element')

    bert_encoder = BertModel.from_pretrained(bert_model_name)
    model = BertMultiLayerSeqClassification(bert_encoder,
                                            num_labels=num_class,
                                            num_of_pooling_layer=1,
                                            act_type='tanh',
                                            use_pretrained_pooler=True,
                                            use_sigmoid=True)

    model.load_state_dict(torch.load(model_saved_path))

    model.to(device)
    if n_gpu > 1:
        model = torch.nn.DataParallel(model)
    #

    if debug_mode:
        num_train_optimization_steps = 100

    dev_instances = bert_cs_reader.read(dev_fitems)

    biterator = BasicIterator(batch_size=forward_size)
    biterator.index_with(vocab)

    dev_iter = biterator(dev_instances, num_epochs=1, shuffle=False)

    cur_eval_results_list = eval_model(model,
                                       dev_iter,
                                       device_num,
                                       make_int=True,
                                       with_probs=True,
                                       show_progress=True)

    common.save_jsonl(cur_eval_results_list,
                      f"fever_p_level_{tag}_results.jsonl")

    if tag == 'test':
        exit(0)
    # common.save_jsonl(cur_eval_results_list, "fever_p_level_train_results_1.jsonl")

    copied_dev_o_dict = copy.deepcopy(dev_o_dict)
    copied_dev_d_list = copy.deepcopy(dev_list)
    list_dict_data_tool.append_subfield_from_list_to_dict(
        cur_eval_results_list, copied_dev_o_dict, 'qid', 'fid', check=True)

    cur_results_dict_th0_5 = select_top_k_and_to_results_dict(
        copied_dev_o_dict, score_field_name='prob', top_k=5, filter_value=0.5)

    list_dict_data_tool.append_item_from_dict_to_list_hotpot_style(
        copied_dev_d_list, cur_results_dict_th0_5, 'id', 'predicted_docids')
    # mode = {'standard': False, 'check_doc_id_correct': True}
    strict_score, pr, rec, f1 = fever_scorer.fever_doc_only(copied_dev_d_list,
                                                            dev_list,
                                                            max_evidence=5)
    score_05 = {
        'ss': strict_score,
        'pr': pr,
        'rec': rec,
        'f1': f1,
    }

    list_dict_data_tool.append_subfield_from_list_to_dict(
        cur_eval_results_list, copied_dev_o_dict, 'qid', 'fid', check=True)

    cur_results_dict_th0_2 = select_top_k_and_to_results_dict(
        copied_dev_o_dict, score_field_name='prob', top_k=5, filter_value=0.2)

    list_dict_data_tool.append_item_from_dict_to_list_hotpot_style(
        copied_dev_d_list, cur_results_dict_th0_2, 'id', 'predicted_docids')
    # mode = {'standard': False, 'check_doc_id_correct': True}
    strict_score, pr, rec, f1 = fever_scorer.fever_doc_only(copied_dev_d_list,
                                                            dev_list,
                                                            max_evidence=5)
    score_02 = {
        'ss': strict_score,
        'pr': pr,
        'rec': rec,
        'f1': f1,
    }

    list_dict_data_tool.append_subfield_from_list_to_dict(
        cur_eval_results_list, copied_dev_o_dict, 'qid', 'fid', check=True)

    cur_results_dict_th0_1 = select_top_k_and_to_results_dict(
        copied_dev_o_dict, score_field_name='prob', top_k=5, filter_value=0.1)

    list_dict_data_tool.append_item_from_dict_to_list_hotpot_style(
        copied_dev_d_list, cur_results_dict_th0_1, 'id', 'predicted_docids')
    # mode = {'standard': False, 'check_doc_id_correct': True}
    strict_score, pr, rec, f1 = fever_scorer.fever_doc_only(copied_dev_d_list,
                                                            dev_list,
                                                            max_evidence=5)
    score_01 = {
        'ss': strict_score,
        'pr': pr,
        'rec': rec,
        'f1': f1,
    }

    list_dict_data_tool.append_subfield_from_list_to_dict(
        cur_eval_results_list, copied_dev_o_dict, 'qid', 'fid', check=True)

    cur_results_dict_th00_1 = select_top_k_and_to_results_dict(
        copied_dev_o_dict, score_field_name='prob', top_k=5, filter_value=0.01)

    list_dict_data_tool.append_item_from_dict_to_list_hotpot_style(
        copied_dev_d_list, cur_results_dict_th00_1, 'id', 'predicted_docids')
    # mode = {'standard': False, 'check_doc_id_correct': True}
    strict_score, pr, rec, f1 = fever_scorer.fever_doc_only(copied_dev_d_list,
                                                            dev_list,
                                                            max_evidence=5)
    score_001 = {
        'ss': strict_score,
        'pr': pr,
        'rec': rec,
        'f1': f1,
    }

    list_dict_data_tool.append_subfield_from_list_to_dict(
        cur_eval_results_list, copied_dev_o_dict, 'qid', 'fid', check=True)

    cur_results_dict_th000_5 = select_top_k_and_to_results_dict(
        copied_dev_o_dict,
        score_field_name='prob',
        top_k=5,
        filter_value=0.005)

    list_dict_data_tool.append_item_from_dict_to_list_hotpot_style(
        copied_dev_d_list, cur_results_dict_th000_5, 'id', 'predicted_docids')
    # mode = {'standard': False, 'check_doc_id_correct': True}
    strict_score, pr, rec, f1 = fever_scorer.fever_doc_only(copied_dev_d_list,
                                                            dev_list,
                                                            max_evidence=5)
    score_0005 = {
        'ss': strict_score,
        'pr': pr,
        'rec': rec,
        'f1': f1,
    }

    logging_item = {
        'score_0005': score_0005,
        'score_001': score_001,
        'score_01': score_01,
        'score_02': score_02,
        'score_05': score_05,
    }

    print(json.dumps(logging_item, indent=2))