def allrank(args): doc_embedding_memmap, doc_id_memmap = get_embed_memmap( args.doc_embedding_dir, args.embedding_dim) assert np.all(doc_id_memmap == list(range(len(doc_id_memmap)))) query_embedding_memmap, query_id_memmap = get_embed_memmap( args.query_embedding_dir, args.embedding_dim) qid2pos = {identity: i for i, identity in enumerate(query_id_memmap)} results_dict = { qid: PriorityQueue(maxsize=args.hit) for qid in query_id_memmap } for doc_begin_index in tqdm(range(0, len(doc_id_memmap), args.per_gpu_doc_num), desc="doc"): doc_end_index = doc_begin_index + args.per_gpu_doc_num doc_ids = doc_id_memmap[doc_begin_index:doc_end_index] doc_embeddings = doc_embedding_memmap[doc_begin_index:doc_end_index] doc_embeddings = torch.from_numpy(doc_embeddings).to(args.device) for qid in tqdm(query_id_memmap, desc="query"): query_embedding = query_embedding_memmap[qid2pos[qid]] query_embedding = torch.from_numpy(query_embedding) query_embedding = query_embedding.to(args.device) all_scores = torch.sum(query_embedding * doc_embeddings, dim=-1) k = min(args.hit, len(doc_embeddings)) top_scores, top_indices = torch.topk(all_scores, k, largest=True, sorted=True) top_scores, top_indices = top_scores.cpu(), top_indices.cpu() top_doc_ids = doc_ids[top_indices.numpy()] cur_q_queue = results_dict[qid] for score, docid in zip(top_scores, top_doc_ids): score, docid = score.item(), docid.item() if cur_q_queue.full(): lowest_score, lowest_docid = cur_q_queue.get_nowait() if lowest_score >= score: cur_q_queue.put_nowait((lowest_score, lowest_docid)) break else: cur_q_queue.put_nowait((score, docid)) else: cur_q_queue.put_nowait((score, docid)) score_path = f"{args.output_path}.score" with open(score_path, 'w') as outputfile: for qid, docqueue in results_dict.items(): while not docqueue.empty(): score, docid = docqueue.get_nowait() outputfile.write(f"{qid}\t{docid}\t{score}\n") generate_rank(score_path, args.output_path)
def evaluate(args, model, tokenizer, prefix=""): eval_dataset = TopNDataset(args.topN_file, tokenizer, "dev.small", args.msmarco_dir, args.collection_memmap_dir, args.tokenize_dir, args.max_query_length, args.max_seq_length) args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu) # Note that DistributedSampler samples randomly eval_dataloader = DataLoader(eval_dataset, batch_size=args.eval_batch_size, collate_fn=get_collate_function( args.mask_target)) # multi-gpu eval if args.n_gpu > 1: model = torch.nn.DataParallel(model) attention_mask_after_softmax_layer_set = set( list(range(args.mask_layer_num))) logger.info("attention_mask_after_softmax_layer_set: {}".format( attention_mask_after_softmax_layer_set)) # Eval! logger.info("***** Running evaluation *****") logger.info(" Num examples = %d", len(eval_dataset)) logger.info(" Batch size = %d", args.eval_batch_size) cnt = 0 with open(args.output_score_path, 'w') as outputfile: for batch, qids, pids in tqdm( eval_dataloader, desc=f"{args.mask_target}@{args.mask_layer_num}"): model.eval() batch = {k: v.to(args.device) for k, v in batch.items()} batch[ 'attention_mask_after_softmax_layer_set'] = attention_mask_after_softmax_layer_set with torch.no_grad(): outputs = model(**batch) scores = outputs[0].detach().cpu().numpy() for qid, pid, score in zip(qids, pids, scores[:, 1]): outputfile.write(f"{qid}\t{pid}\t{score}\n") cnt += 1 # if cnt > 1000: # break generate_rank(args.output_score_path, args.output_rank_path) mrr = eval_results(args.output_rank_path) abs_output_rank_path = os.path.abspath(args.output_rank_path) mrr_ln_path = f"{abs_output_rank_path}.{mrr:.3f}" subprocess.check_call(["ln", "-s", abs_output_rank_path, mrr_ln_path]) print(mrr)
def evaluate(args, model, mode, prefix): eval_output_dir = args.eval_save_dir if not os.path.exists(eval_output_dir): os.makedirs(eval_output_dir) eval_dataset = MSMARCODataset(mode, args.msmarco_dir, args.collection_memmap_dir, args.tokenize_dir, args.max_query_length, args.max_doc_length) args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu) # Note that DistributedSampler samples randomly collate_fn = get_collate_function(mode=mode) eval_dataloader = DataLoader(eval_dataset, batch_size=args.eval_batch_size, num_workers=args.data_num_workers, collate_fn=collate_fn) # multi-gpu eval if args.n_gpu > 1: model = torch.nn.DataParallel(model) # Eval! logger.info("***** Running evaluation {} *****".format(prefix)) logger.info(" Num examples = %d", len(eval_dataset)) logger.info(" Batch size = %d", args.eval_batch_size) output_file_path = f"{eval_output_dir}/{prefix}.{mode}.score.tsv" with open(output_file_path, 'w') as outputfile: for batch, qids, docids in tqdm(eval_dataloader, desc="Evaluating"): model.eval() with torch.no_grad(): batch = {k: v.to(args.device) for k, v in batch.items()} outputs = model(**batch) scores = torch.diagonal(outputs[0]).detach().cpu().numpy() assert len(qids) == len(docids) == len(scores) for qid, docid, score in zip(qids, docids, scores): outputfile.write(f"{qid}\t{docid}\t{score}\n") rank_output = f"{eval_output_dir}/{prefix}.{mode}.rank.tsv" generate_rank(output_file_path, rank_output) if mode == "dev": mrr = eval_results(rank_output) return mrr
def evaluate(args, model, mode, prefix, eval_dataset=None): eval_output_dir = args.eval_save_dir if not os.path.exists(eval_output_dir): os.makedirs(eval_output_dir) if eval_dataset == None: eval_dataset = CLEARDataset(mode=mode, args=args) args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu) # Note that DistributedSampler samples randomly collate_fn = get_collate_function(mode=mode) eval_dataloader = DataLoader(eval_dataset, batch_size=args.eval_batch_size, num_workers=args.data_num_workers, pin_memory=True, collate_fn=collate_fn) # multi-gpu eval if args.n_gpu > 1: model = torch.nn.DataParallel(model) # Eval logger.info("***** Running evaluation {} *****".format(prefix)) logger.info(" Num examples = %d", len(eval_dataset)) logger.info(" Batch size = %d", args.eval_batch_size) output_file_path = f"{eval_output_dir}/{prefix}.{mode}.score.tsv" with open(output_file_path, 'w') as outputfile: for batch, qids, pids in tqdm(eval_dataloader, desc="Evaluating"): model.eval() with torch.no_grad(): batch = {k: v.to(args.device) for k, v in batch.items()} scores = model(**batch) assert len(qids) == len(pids) == len(scores) for qid, pid, score in zip(qids, pids, scores): outputfile.write(f"{qid}\t{pid}\t{score}\n") rank_output = f"{eval_output_dir}/{prefix}.{mode}.rank.tsv" generate_rank(output_file_path, rank_output) if mode == "dev.small": mrr = eval_results(rank_output) * 6980 / args.num_eval_queries return mrr
def evaluate(args, model): eval_dataset = ProbDataset( f"{args.embd_root}/dev.small/{args.key}/{args.layer}", args.msmarco_dir, "dev.small", args.max_token_num) # Note that DistributedSampler samples randomly eval_sampler = SequentialSampler(eval_dataset) eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, pin_memory=False, batch_size=args.eval_batch_size, collate_fn=get_collate_function(), num_workers=args.data_num_workers) # Eval! logger.info("***** Running evaluation *****") logger.info(" Num examples = %d", len(eval_dataset)) logger.info(" Batch size = %d", args.eval_batch_size) model.eval() output_score_path = f"{args.eval_output_dir}/layer_{args.layer}.score.tsv" output_rank_path = f"{args.eval_output_dir}/layer_{args.layer}.rank.tsv" with open(output_score_path, 'w') as outputfile: for batch_idx, (batch, qids, pids) in enumerate( tqdm(eval_dataloader, desc="Evaluating")): del batch['labels'] batch = {k: v.to(args.device) for k, v in batch.items()} with torch.no_grad(): softmax_logits = model(**batch)[0].detach().cpu().numpy() scores = softmax_logits[:, 1] for idx, (qid, pid, score) in enumerate(zip(qids, pids, scores)): outputfile.write(f"{qid}\t{pid}\t{score}\n") generate_rank(output_score_path, output_rank_path) mrr = eval_results(output_rank_path) abs_output_rank_path = os.path.abspath(output_rank_path) mrr_ln_path = f"{abs_output_rank_path}.{mrr:.3f}" subprocess.check_call(["ln", "-s", abs_output_rank_path, mrr_ln_path])
parser.add_argument( "--mask_methods", type=str, nargs="+", default=["commas", "token_mask", "attention_mask", "None"]) parser.add_argument("--input_dir", type=str, default="./data/adversary") parser.add_argument("--output_dir", type=str, default="./data/adversary") args = parser.parse_args() origin_scores = read_scores(f"{args.input_dir}/None.score.tsv") for mask_method in args.mask_methods: new_scores = read_scores(f"{args.input_dir}/{mask_method}.score.tsv") for key, score in new_scores.items(): if key in origin_scores: origin_scores[key] = score temp_score_path = f"{args.output_dir}/temp.{mask_method}.score.tsv" assert not os.path.exists(temp_score_path) with open(temp_score_path, "w") as outFile: for (qid, pid), score in origin_scores.items(): outFile.write(f"{qid}\t{pid}\t{score}\n") output_rank_path = f"{args.output_dir}/{mask_method}.rank.tsv" generate_rank(temp_score_path, output_rank_path) subprocess.check_call(["rm", temp_score_path]) mrr = eval_results(output_rank_path) abs_output_rank_path = os.path.abspath(output_rank_path) rank_with_mrr_path = f"{abs_output_rank_path}.{mrr:.3f}" if not os.path.exists(rank_with_mrr_path): subprocess.check_call( ["ln", "-s", abs_output_rank_path, rank_with_mrr_path]) print(mask_method, "MRR@10:", mrr)
assert query_embeddings.shape[1] == doc_embeddings.shape[1] dim = query_embeddings.shape[1] # faiss GPU print("Initializing FAISS...") index_flat = faiss.IndexFlatIP(dim) # add base vectors print("Adding docs...") index_flat.add(doc_embeddings) # search # num = doc_embeddings.shape[0] num = 5000 steps = math.ceil(query_embeddings.shape[0] / args.search_batch) score_path = args.result_file + "/rank_score.tsv" with open(score_path, 'w') as outfile: for i in tqdm(range(steps), desc="steps"): batch = query_embeddings[i * args.search_batch:(i + 1) * args.search_batch] D, I = index_flat.search(batch, num) for b_i, index_pids in enumerate(I): qid = i * args.search_batch + b_i for b_j, index_pid in enumerate(index_pids): outfile.write( f"{qids[qid]}\t{pids[index_pid]}\t{D[b_i, b_j]}\n") output_path = args.result_file + "/rank.tsv" generate_rank(score_path, output_path)