示例#1
0
def allrank(args):
    doc_embedding_memmap, doc_id_memmap = get_embed_memmap(
        args.doc_embedding_dir, args.embedding_dim)
    assert np.all(doc_id_memmap == list(range(len(doc_id_memmap))))

    query_embedding_memmap, query_id_memmap = get_embed_memmap(
        args.query_embedding_dir, args.embedding_dim)
    qid2pos = {identity: i for i, identity in enumerate(query_id_memmap)}
    results_dict = {
        qid: PriorityQueue(maxsize=args.hit)
        for qid in query_id_memmap
    }

    for doc_begin_index in tqdm(range(0, len(doc_id_memmap),
                                      args.per_gpu_doc_num),
                                desc="doc"):
        doc_end_index = doc_begin_index + args.per_gpu_doc_num
        doc_ids = doc_id_memmap[doc_begin_index:doc_end_index]
        doc_embeddings = doc_embedding_memmap[doc_begin_index:doc_end_index]
        doc_embeddings = torch.from_numpy(doc_embeddings).to(args.device)
        for qid in tqdm(query_id_memmap, desc="query"):
            query_embedding = query_embedding_memmap[qid2pos[qid]]
            query_embedding = torch.from_numpy(query_embedding)
            query_embedding = query_embedding.to(args.device)

            all_scores = torch.sum(query_embedding * doc_embeddings, dim=-1)

            k = min(args.hit, len(doc_embeddings))
            top_scores, top_indices = torch.topk(all_scores,
                                                 k,
                                                 largest=True,
                                                 sorted=True)
            top_scores, top_indices = top_scores.cpu(), top_indices.cpu()
            top_doc_ids = doc_ids[top_indices.numpy()]
            cur_q_queue = results_dict[qid]
            for score, docid in zip(top_scores, top_doc_ids):
                score, docid = score.item(), docid.item()
                if cur_q_queue.full():
                    lowest_score, lowest_docid = cur_q_queue.get_nowait()
                    if lowest_score >= score:
                        cur_q_queue.put_nowait((lowest_score, lowest_docid))
                        break
                    else:
                        cur_q_queue.put_nowait((score, docid))
                else:
                    cur_q_queue.put_nowait((score, docid))

    score_path = f"{args.output_path}.score"
    with open(score_path, 'w') as outputfile:
        for qid, docqueue in results_dict.items():
            while not docqueue.empty():
                score, docid = docqueue.get_nowait()
                outputfile.write(f"{qid}\t{docid}\t{score}\n")
    generate_rank(score_path, args.output_path)
def evaluate(args, model, tokenizer, prefix=""):
    eval_dataset = TopNDataset(args.topN_file, tokenizer, "dev.small",
                               args.msmarco_dir, args.collection_memmap_dir,
                               args.tokenize_dir, args.max_query_length,
                               args.max_seq_length)

    args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu)
    # Note that DistributedSampler samples randomly
    eval_dataloader = DataLoader(eval_dataset,
                                 batch_size=args.eval_batch_size,
                                 collate_fn=get_collate_function(
                                     args.mask_target))

    # multi-gpu eval
    if args.n_gpu > 1:
        model = torch.nn.DataParallel(model)

    attention_mask_after_softmax_layer_set = set(
        list(range(args.mask_layer_num)))

    logger.info("attention_mask_after_softmax_layer_set: {}".format(
        attention_mask_after_softmax_layer_set))
    # Eval!
    logger.info("***** Running evaluation *****")
    logger.info("  Num examples = %d", len(eval_dataset))
    logger.info("  Batch size = %d", args.eval_batch_size)
    cnt = 0
    with open(args.output_score_path, 'w') as outputfile:
        for batch, qids, pids in tqdm(
                eval_dataloader,
                desc=f"{args.mask_target}@{args.mask_layer_num}"):
            model.eval()
            batch = {k: v.to(args.device) for k, v in batch.items()}
            batch[
                'attention_mask_after_softmax_layer_set'] = attention_mask_after_softmax_layer_set
            with torch.no_grad():
                outputs = model(**batch)
                scores = outputs[0].detach().cpu().numpy()
                for qid, pid, score in zip(qids, pids, scores[:, 1]):
                    outputfile.write(f"{qid}\t{pid}\t{score}\n")
            cnt += 1
            # if cnt > 1000:
            #     break
    generate_rank(args.output_score_path, args.output_rank_path)
    mrr = eval_results(args.output_rank_path)
    abs_output_rank_path = os.path.abspath(args.output_rank_path)
    mrr_ln_path = f"{abs_output_rank_path}.{mrr:.3f}"
    subprocess.check_call(["ln", "-s", abs_output_rank_path, mrr_ln_path])
    print(mrr)
示例#3
0
def evaluate(args, model, mode, prefix):
    eval_output_dir = args.eval_save_dir
    if not os.path.exists(eval_output_dir):
        os.makedirs(eval_output_dir)

    eval_dataset = MSMARCODataset(mode, args.msmarco_dir,
                                  args.collection_memmap_dir,
                                  args.tokenize_dir, args.max_query_length,
                                  args.max_doc_length)

    args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu)
    # Note that DistributedSampler samples randomly
    collate_fn = get_collate_function(mode=mode)
    eval_dataloader = DataLoader(eval_dataset,
                                 batch_size=args.eval_batch_size,
                                 num_workers=args.data_num_workers,
                                 collate_fn=collate_fn)

    # multi-gpu eval
    if args.n_gpu > 1:
        model = torch.nn.DataParallel(model)

    # Eval!
    logger.info("***** Running evaluation {} *****".format(prefix))
    logger.info("  Num examples = %d", len(eval_dataset))
    logger.info("  Batch size = %d", args.eval_batch_size)

    output_file_path = f"{eval_output_dir}/{prefix}.{mode}.score.tsv"
    with open(output_file_path, 'w') as outputfile:
        for batch, qids, docids in tqdm(eval_dataloader, desc="Evaluating"):
            model.eval()
            with torch.no_grad():
                batch = {k: v.to(args.device) for k, v in batch.items()}
                outputs = model(**batch)
                scores = torch.diagonal(outputs[0]).detach().cpu().numpy()
                assert len(qids) == len(docids) == len(scores)
                for qid, docid, score in zip(qids, docids, scores):
                    outputfile.write(f"{qid}\t{docid}\t{score}\n")

    rank_output = f"{eval_output_dir}/{prefix}.{mode}.rank.tsv"
    generate_rank(output_file_path, rank_output)

    if mode == "dev":
        mrr = eval_results(rank_output)
        return mrr
示例#4
0
文件: train.py 项目: KaishuaiXu/CLEAR
def evaluate(args, model, mode, prefix, eval_dataset=None):
    eval_output_dir = args.eval_save_dir
    if not os.path.exists(eval_output_dir):
        os.makedirs(eval_output_dir)

    if eval_dataset == None:
        eval_dataset = CLEARDataset(mode=mode, args=args)

    args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu)
    # Note that DistributedSampler samples randomly
    collate_fn = get_collate_function(mode=mode)
    eval_dataloader = DataLoader(eval_dataset,
                                 batch_size=args.eval_batch_size,
                                 num_workers=args.data_num_workers,
                                 pin_memory=True,
                                 collate_fn=collate_fn)

    # multi-gpu eval
    if args.n_gpu > 1:
        model = torch.nn.DataParallel(model)

    # Eval
    logger.info("***** Running evaluation {} *****".format(prefix))
    logger.info("  Num examples = %d", len(eval_dataset))
    logger.info("  Batch size = %d", args.eval_batch_size)

    output_file_path = f"{eval_output_dir}/{prefix}.{mode}.score.tsv"
    with open(output_file_path, 'w') as outputfile:
        for batch, qids, pids in tqdm(eval_dataloader, desc="Evaluating"):
            model.eval()
            with torch.no_grad():
                batch = {k: v.to(args.device) for k, v in batch.items()}
                scores = model(**batch)
                assert len(qids) == len(pids) == len(scores)
                for qid, pid, score in zip(qids, pids, scores):
                    outputfile.write(f"{qid}\t{pid}\t{score}\n")

    rank_output = f"{eval_output_dir}/{prefix}.{mode}.rank.tsv"
    generate_rank(output_file_path, rank_output)

    if mode == "dev.small":
        mrr = eval_results(rank_output) * 6980 / args.num_eval_queries
        return mrr
示例#5
0
def evaluate(args, model):
    eval_dataset = ProbDataset(
        f"{args.embd_root}/dev.small/{args.key}/{args.layer}",
        args.msmarco_dir, "dev.small", args.max_token_num)
    # Note that DistributedSampler samples randomly
    eval_sampler = SequentialSampler(eval_dataset)
    eval_dataloader = DataLoader(eval_dataset,
                                 sampler=eval_sampler,
                                 pin_memory=False,
                                 batch_size=args.eval_batch_size,
                                 collate_fn=get_collate_function(),
                                 num_workers=args.data_num_workers)

    # Eval!
    logger.info("***** Running evaluation *****")
    logger.info("  Num examples = %d", len(eval_dataset))
    logger.info("  Batch size = %d", args.eval_batch_size)
    model.eval()
    output_score_path = f"{args.eval_output_dir}/layer_{args.layer}.score.tsv"
    output_rank_path = f"{args.eval_output_dir}/layer_{args.layer}.rank.tsv"
    with open(output_score_path, 'w') as outputfile:
        for batch_idx, (batch, qids, pids) in enumerate(
                tqdm(eval_dataloader, desc="Evaluating")):
            del batch['labels']
            batch = {k: v.to(args.device) for k, v in batch.items()}
            with torch.no_grad():
                softmax_logits = model(**batch)[0].detach().cpu().numpy()
                scores = softmax_logits[:, 1]
                for idx, (qid, pid,
                          score) in enumerate(zip(qids, pids, scores)):
                    outputfile.write(f"{qid}\t{pid}\t{score}\n")
    generate_rank(output_score_path, output_rank_path)
    mrr = eval_results(output_rank_path)
    abs_output_rank_path = os.path.abspath(output_rank_path)
    mrr_ln_path = f"{abs_output_rank_path}.{mrr:.3f}"
    subprocess.check_call(["ln", "-s", abs_output_rank_path, mrr_ln_path])
示例#6
0
    parser.add_argument(
        "--mask_methods",
        type=str,
        nargs="+",
        default=["commas", "token_mask", "attention_mask", "None"])
    parser.add_argument("--input_dir", type=str, default="./data/adversary")
    parser.add_argument("--output_dir", type=str, default="./data/adversary")
    args = parser.parse_args()

    origin_scores = read_scores(f"{args.input_dir}/None.score.tsv")
    for mask_method in args.mask_methods:
        new_scores = read_scores(f"{args.input_dir}/{mask_method}.score.tsv")
        for key, score in new_scores.items():
            if key in origin_scores:
                origin_scores[key] = score
        temp_score_path = f"{args.output_dir}/temp.{mask_method}.score.tsv"
        assert not os.path.exists(temp_score_path)
        with open(temp_score_path, "w") as outFile:
            for (qid, pid), score in origin_scores.items():
                outFile.write(f"{qid}\t{pid}\t{score}\n")
        output_rank_path = f"{args.output_dir}/{mask_method}.rank.tsv"
        generate_rank(temp_score_path, output_rank_path)
        subprocess.check_call(["rm", temp_score_path])
        mrr = eval_results(output_rank_path)
        abs_output_rank_path = os.path.abspath(output_rank_path)
        rank_with_mrr_path = f"{abs_output_rank_path}.{mrr:.3f}"
        if not os.path.exists(rank_with_mrr_path):
            subprocess.check_call(
                ["ln", "-s", abs_output_rank_path, rank_with_mrr_path])
        print(mask_method, "MRR@10:", mrr)
示例#7
0
    assert query_embeddings.shape[1] == doc_embeddings.shape[1]
    dim = query_embeddings.shape[1]

    # faiss GPU
    print("Initializing FAISS...")
    index_flat = faiss.IndexFlatIP(dim)

    # add base vectors
    print("Adding docs...")
    index_flat.add(doc_embeddings)

    # search
    # num = doc_embeddings.shape[0]
    num = 5000

    steps = math.ceil(query_embeddings.shape[0] / args.search_batch)
    score_path = args.result_file + "/rank_score.tsv"
    with open(score_path, 'w') as outfile:
        for i in tqdm(range(steps), desc="steps"):
            batch = query_embeddings[i * args.search_batch:(i + 1) *
                                     args.search_batch]
            D, I = index_flat.search(batch, num)
            for b_i, index_pids in enumerate(I):
                qid = i * args.search_batch + b_i
                for b_j, index_pid in enumerate(index_pids):
                    outfile.write(
                        f"{qids[qid]}\t{pids[index_pid]}\t{D[b_i, b_j]}\n")

    output_path = args.result_file + "/rank.tsv"
    generate_rank(score_path, output_path)