def main(): pc_clusters: Iterable[PerspectiveCluster] = enum_perspective_clusters() tokenizer = TokenizerForGalago() def get_terms(text: str) -> Counter: terms = tokenizer.tokenize(text) return Counter(terms) # Query = [claim :: avg(perspective)] claim_text_d: Dict[int, str] = get_all_claim_d() perspective_text_d: Dict[int, str] = get_perspective_dict() def cluster_to_query(cluster: PerspectiveCluster) -> DocQuery: claim_text = claim_text_d[cluster.claim_id] perspective_text_list = list( [perspective_text_d[pid] for pid in cluster.perspective_ids]) query_id = get_pc_cluster_query_id(cluster) claim_tf: Counter = get_terms(claim_text) pers_tf: Counter = average_counters( lmap(get_terms, perspective_text_list)) tf = sum_counters([claim_tf, pers_tf]) query: DocQuery = counter_to_galago_query(query_id, tf) return query query_list: List[DocQuery] = lmap(cluster_to_query, pc_clusters) print(len(query_list)) out_path = os.path.join(output_path, "perspective_query", "pc_query_for_evidence.json") save_queries_to_file(query_list, out_path)
def main(): claim_text_d: Dict[int, str] = get_all_claim_d() claim_text_d: Dict[str, str] = dict_key_map(str, claim_text_d) evi_dict: Dict[str, str] = dict_key_map(str, load_evidence_dict()) evi_gold_dict: Dict[str, List[int]] = evidence_gold_dict_str_qid() print("V2") def print_entry(entry): evidence_text = evi_dict[entry.doc_id] print("[{}] {}: {}".format(entry.rank, entry.doc_id, evidence_text)) ranked_list_dict = load_ranked_list_grouped(sys.argv[1]) for query, ranked_list in ranked_list_dict.items(): print() claim_id, perspective_id = query.split("_") gold_ids: List[str] = lmap(str, evi_gold_dict[query]) if not gold_ids: print("query {} has no gold".format(query)) continue assert gold_ids claim_text = claim_text_d[claim_id] perspective_text = perspective_getter(int(perspective_id)) pos_entries = [] neg_entries = [] for entry in ranked_list: label = entry.doc_id in gold_ids if label: pos_entries.append(entry) elif entry.rank < 3: neg_entries.append(entry) if not pos_entries: print("gold not in ranked list") continue num_rel = len(pos_entries) correctness = [] for entry in ranked_list[:num_rel]: label = entry.doc_id in gold_ids correctness.append(int(label)) precision = average(correctness) if precision > 0.99: print("Good") continue print("precision at {}: {}".format(num_rel, precision)) print("Claim: ", claim_text) print("perspective_text: ", perspective_text) print(" < GOLD >") foreach(print_entry, pos_entries) print(" < False Positive >") foreach(print_entry, neg_entries)
def main(config): q_res_path = config['q_res_path'] split = config['split'] query_d: Dict[int, str] = get_all_claim_d() def worker_gen(out_dir): qkgen = QKGenFromDB(q_res_path, query_d, out_dir) return qkgen num_jobs = d_n_claims_per_split2[split] runner = JobRunner(job_man_dir, num_jobs, config['job_name'], worker_gen) runner.auto_runner()
def get_qck_queries_all() -> List[QCKQuery]: pc_itr = enum_perspective_clusters() claim_text_d: Dict[int, str] = get_all_claim_d() query_list = [] for pc in pc_itr: c_text = claim_text_d[pc.claim_id] pid = min(pc.perspective_ids) p_text = perspective_getter(pid) text = c_text + " " + p_text query = QCKQuery(get_pc_cluster_query_id(pc), text) query_list.append(query) return query_list
def main(): claim_text_d: Dict[int, str] = get_all_claim_d() evidence_d = load_evidence_dict() evidence_gold = evidence_gold_dict() while True: s = input() cid, pid = s.split("_") cid = int(cid) pid = int(pid) print("Claim: ", claim_text_d[cid]) print("Perspective: ", perspective_getter(pid)) key = cid, pid e_ids = evidence_gold[key] for eid in e_ids: print("Evidence: ", evidence_d[eid])
def get_qck_queries(split) -> List[QCKQuery]: claim_ids = set(load_claim_ids_for_split(split)) pc_itr = enum_perspective_clusters_for_split(split) claim_text_d: Dict[int, str] = get_all_claim_d() query_list = [] for pc in pc_itr: if pc.claim_id in claim_ids: c_text = claim_text_d[pc.claim_id] pid = min(pc.perspective_ids) p_text = perspective_getter(pid) text = c_text + " " + p_text query = QCKQuery(get_pc_cluster_query_id(pc), text) query_list.append(query) return query_list