with open(output_path, 'w') as target_file: batch_topics = list() batch_topic_ids = list() for index, (topic_id, text) in enumerate( tqdm(list(query_iterator(topics, order)))): if args.batch_size <= 1 and args.threads <= 1: hits = searcher.search(text, args.hits) results = [(topic_id, hits)] else: batch_topic_ids.append(str(topic_id)) batch_topics.append(text) if (index + 1) % args.batch_size == 0 or \ index == len(topics.keys()) - 1: results = searcher.batch_search(batch_topics, batch_topic_ids, args.hits, args.threads) results = [(id_, results[id_]) for id_ in batch_topic_ids] batch_topic_ids.clear() batch_topics.clear() else: continue for result in results: if args.max_passage: write_result_max_passage(target_file, result, args.max_passage_delimiter, args.max_passage_hits, args.msmarco, tag) else: write_result(target_file, result, args.hits, args.msmarco,
# build output path output_path = args.output print(f'Running {args.topics} topics, saving to {output_path}...') tag = 'Faiss' if args.batch > 1: with open(output_path, 'w') as target_file: topic_keys = sorted(topics.keys()) for i in tqdm(range(0, len(topic_keys), args.batch)): topic_key_batch = topic_keys[i:i + args.batch] topic_batch = [ topics[topic].get('title').strip() for topic in topic_key_batch ] hits = searcher.batch_search(topic_batch, topic_key_batch, k=args.hits, threads=args.threads) for topic in hits: for idx, hit in enumerate(hits[topic]): if args.msmarco: target_file.write(f'{topic}\t{hit.docid}\t{idx + 1}\n') else: target_file.write( f'{topic} Q0 {hit.docid} {idx + 1} {hit.score:.6f} {tag}\n' ) exit() with open(output_path, 'w') as target_file: for index, topic in enumerate(tqdm(sorted(topics.keys()))): search = topics[topic].get('title').strip() hits = searcher.search(search, args.hits, threads=args.threads)