def main(): qk_candidate_name = "qk_candidate_msmarco_filtered_dev" generator = QCKInstanceGenerator(get_eval_candidates_as_qck("dev"), is_correct_factory()) start_generate_jobs_for_sub_split(generator, qk_candidate_name, "qcknc3", "dev") # Selected from doc_scorer_summarizer.py qk_candidate_name = "qk_candidate_msmarco_filtered_train" generator = QCKInstanceGenerator(get_eval_candidates_as_qck("train"), is_correct_factory()) start_generate_jobs_for_train(generator, qk_candidate_name, "qcknc3") generator = QCKInstanceGenerator(get_eval_candidates_as_qck("train"), is_correct_factory()) start_generate_jobs_for_val(generator, qk_candidate_name, "qcknc3")
def qck_gen(job_name, qk_candidate_name, candidate_ranked_list_path, kdp_ranked_list_path, split): claim_ids = load_claim_ids_for_split(split) cids: List[str] = lmap(str, claim_ids) qk_candidate: List[QKUnit] = load_from_pickle(qk_candidate_name) kdp_ranked_list: Dict[ str, List[TrecRankedListEntry]] = load_ranked_list_grouped( kdp_ranked_list_path) print("cids", len(cids)) print("len(qk_candidate)", len(qk_candidate)) print("Generate instances : ", split) generator = QCKInstGenWScore( get_qck_candidate_from_ranked_list_path(candidate_ranked_list_path), is_correct_factory(), kdp_ranked_list) qk_candidate_train: List[QKUnit] = list( [qk for qk in qk_candidate if qk[0].query_id in cids]) def worker_factory(out_dir): return QCKWorker(qk_candidate_train, generator, out_dir) num_jobs = d_n_claims_per_split2[split] runner = JobRunnerS(job_man_dir, num_jobs, job_name + "_" + split, worker_factory) runner.start()
def make_test(): split = "test" qk_candidate_name = "qk_candidate_msmarco_filtered_{}".format(split) generator = QCKInstanceGenerator(get_eval_candidates_as_qck(split), is_correct_factory()) start_generate_jobs_for_sub_split(generator, qk_candidate_name, "qcknc3", split)
def main(): for split in ["dev", "test"]: generator = QCKInstanceGenerator(get_eval_candidates_as_qck(split), is_correct_factory()) # Selected from doc_scorer_summarizer.py qk_candidate_name = "pc_qk2_{}_cpnc12_filtered".format(split) start_generate_jobs(generator, split, qk_candidate_name, "qck11")
def main(): save_dir = os.path.join(output_path, "pc_qc") exist_or_mkdir(save_dir) for split in splits: queries = get_qck_queries(split) eval_candidate = get_eval_candidates_as_qck(split) save_path = os.path.join(save_dir, split) make_pc_qc(queries, eval_candidate, is_correct_factory(), save_path)
def main(): sub_split = "dev" name_prefix = "qcknc2" qk_candidate_name = "qk_stage2_dev_2" generator = QCKInstanceGenerator(get_eval_candidates_as_qck(sub_split), is_correct_factory()) run_jobs_with_qk_candidate(generator, sub_split, qk_candidate_name, name_prefix)
def main(): save_dir = os.path.join(output_path, "pc_qc4") exist_or_mkdir(save_dir) split_filename = split_name2 for split in splits: qids: Iterable[str] = get_qids_for_split(split_filename, split) queries = get_qck_queries_from_cids(lmap(int, qids)) eval_candidate = get_qck_candidate_for_split(split_filename, split) save_path = os.path.join(save_dir, split) make_pc_qc(queries, eval_candidate, is_correct_factory(), save_path)
def main(): save_dir = os.path.join(output_path, "pc_qc2") exist_or_mkdir(save_dir) for split in splits: queries = get_qck_queries(split) q_res_path = os.path.join("output", "perspective_experiments", "q_res_{}.txt".format(split)) eval_candidate = get_qck_candidate_from_ranked_list_path(q_res_path) save_path = os.path.join(save_dir, split) make_pc_qc(queries, eval_candidate, is_correct_factory(), save_path)
def qck_gen_w_ranked_list(job_name, qk_candidates: List[QKUnit], qck_candidates_dict: Dict[str, List[QCKCandidate]], split): qids = list(get_qids_for_split(split_name2, split)) print("Generate instances : ", split) generator = QCKInstanceGenerator(qck_candidates_dict, is_correct_factory()) qk_candidates_for_split: List[QKUnit] = list([qk for qk in qk_candidates if qk[0].query_id in qids]) print("{} of {} qk are used".format(len(qk_candidates_for_split), len(qk_candidates))) def worker_factory(out_dir): return QCKWorker(qk_candidates_for_split, generator, out_dir) num_jobs = len(qids) runner = JobRunnerS(job_man_dir, num_jobs, job_name + "_" + split, worker_factory) runner.start()
def main(config): job_name = "qck_multi" is_correct_fn = is_correct_factory() qk_candidate_name = "qk_candidate_msmarco_filtered_dev" generator = MultiDocInstanceGenerator(get_eval_candidates_as_qck("dev"), is_correct_fn, config) start_generate_jobs_for_sub_split(generator, qk_candidate_name, job_name, "dev") qk_candidate_name = "qk_candidate_msmarco_filtered_train" generator = MultiDocInstanceGenerator(get_eval_candidates_as_qck("train"), is_correct_fn, config) start_generate_jobs_for_sub_split(generator, qk_candidate_name, job_name, "train") generator = MultiDocInstanceGenerator(get_eval_candidates_as_qck("train"), is_correct_fn, config) start_generate_jobs_for_sub_split(generator, qk_candidate_name, job_name, "val")
def multi_qck_gen(job_name, qk_candidate_name, ranked_list_path, split, k_group_size): claim_ids = load_claim_ids_for_split(split) cids: List[str] = lmap(str, claim_ids) qk_candidate: List[QKUnit] = load_from_pickle(qk_candidate_name) print("cids", len(cids)) print("len(qk_candidate)", len(qk_candidate)) print("Generate instances : ", split) generator = QCKGeneratorGrouped( get_qck_candidate_from_ranked_list_path(ranked_list_path), is_correct_factory(), False, k_group_size) qk_candidate_train: List[QKUnit] = list( [qk for qk in qk_candidate if qk[0].query_id in cids]) def worker_factory(out_dir): return QCKWorker(qk_candidate_train, generator, out_dir) num_jobs = d_n_claims_per_split2[split] runner = JobRunnerS(job_man_dir, num_jobs, job_name + "_" + split, worker_factory) runner.start()
def qck_gen_w_ranked_list_multiple(job_name, qk_candidate_name, ranked_list_path, split, n_qk_per_job): claim_ids = load_claim_ids_for_split(split) cids: List[str] = lmap(str, claim_ids) qk_candidate: List[QKUnit] = load_from_pickle(qk_candidate_name) print("cids", len(cids)) print("len(qk_candidate)", len(qk_candidate)) print("Generate instances : ", split) generator = QCKInstanceGenerator( get_qck_candidate_from_ranked_list_path(ranked_list_path), is_correct_factory()) qk_candidate_train: List[QKUnit] = list( [qk for qk in qk_candidate if qk[0].query_id in cids]) def worker_factory(out_dir): return QCKWorkerMultiple(qk_candidate_train, generator, n_qk_per_job, out_dir) num_qks = d_n_claims_per_split2[split] num_jobs = ceil_divide(num_qks, n_qk_per_job) runner = JobRunnerS(job_man_dir, num_jobs, job_name + "_" + split, worker_factory) runner.start()
def main(): generator = QCKInstanceGenerator(get_eval_candidates_as_qck("train"), is_correct_factory()) start_generate_jobs_for_train_val(generator, "qcknc")
def main(): generator = QCKInstanceGenerator(get_eval_candidates_as_qck("dev"), is_correct_factory()) qk_candidate_name = "perspective_qk_stage2_dev" sub_split = "dev" run_jobs_with_qk_candidate(generator, sub_split, qk_candidate_name, "qcknc")
def main(): generator = QCKInstanceGenerator(get_eval_candidates_as_qck("train"), is_correct_factory()) # Selected from doc_scorer_summarizer.py qk_candidate_name = "perspective_qk_candidate_train_dense" start_generate_jobs_for_val(generator, qk_candidate_name, "qcknc_dense")