target_file.write( f'{topic} Q0 {docid} {rank} {hit.score:.6f} {tag}\n') rank = rank + 1 unique_docs.add(docid) if rank > args.max_passage_hits: break order = None if args.topics in QUERY_IDS: order = QUERY_IDS[args.topics] with open(output_path, 'w') as target_file: batch_topics = list() batch_topic_ids = list() for index, (topic_id, text) in enumerate( tqdm(list(query_iterator(topics, order)))): if args.batch_size <= 1 and args.threads <= 1: hits = searcher.search(text, args.hits) results = [(topic_id, hits)] else: batch_topic_ids.append(str(topic_id)) batch_topics.append(text) if (index + 1) % args.batch_size == 0 or \ index == len(topics.keys()) - 1: results = searcher.batch_search(batch_topics, batch_topic_ids, args.hits, args.threads) results = [(id_, results[id_]) for id_ in batch_topic_ids] batch_topic_ids.clear() batch_topics.clear() else:
# build output path output_path = args.run.output print(f'Running {args.run.topics} topics, saving to {output_path}...') tag = 'hybrid' order = None if args.run.topics in QUERY_IDS: print(f'Using pre-defined topic order for {args.run.topics}') order = QUERY_IDS[args.run.topics] with open(output_path, 'w') as target_file: batch_topics = list() batch_topic_ids = list() for index, (topic_id, text) in enumerate(tqdm(list(query_iterator(topics, order)))): if args.run.batch_size <= 1 and args.run.threads <= 1: hits = hsearcher.search(text, args.run.hits, args.fusion.alpha) results = [(topic_id, hits)] else: batch_topic_ids.append(str(topic_id)) batch_topics.append(text) if (index + 1) % args.run.batch_size == 0 or \ index == len(topics.keys()) - 1: results = hsearcher.batch_search( batch_topics, batch_topic_ids, args.run.hits, args.run.threads, args.fusion.alpha) results = [(id_, results[id_]) for id_ in batch_topic_ids] batch_topic_ids.clear() batch_topics.clear() else: continue
# create searcher from prebuilt index name searcher = SimpleDenseSearcher.from_prebuilt_index(args.index, query_encoder) if not searcher: exit() # build output path output_path = args.output print(f'Running {args.topics} topics, saving to {output_path}...') tag = 'Faiss' with open(output_path, 'w') as target_file: batch_topics = list() batch_topic_ids = list() for index, (topic_id, text) in enumerate(tqdm(list(query_iterator(topics, args.topics)))): if args.batch_size <= 1 and args.threads <= 1: hits = searcher.search(text, args.hits) results = [(topic_id, hits)] else: batch_topic_ids.append(str(topic_id)) batch_topics.append(text) if (index + 1) % args.batch_size == 0 or \ index == len(topics.keys()) - 1: results = searcher.batch_search( batch_topics, batch_topic_ids, args.hits, args.threads) results = [(id_, results[id_]) for id_ in batch_topic_ids] batch_topic_ids.clear() batch_topics.clear() else: continue
hsearcher = HybridSearcher(dsearcher, ssearcher) if not hsearcher: exit() # build output path output_path = args.run.output print(f'Running {args.run.topics} topics, saving to {output_path}...') tag = 'hybrid' with open(output_path, 'w') as target_file: batch_topics = list() batch_topic_ids = list() for index, (topic_id, text) in enumerate( tqdm(list(query_iterator(topics, args.topics)))): if args.run.batch_size <= 1 and args.run.threads <= 1: hits = hsearcher.search(text, args.run.hits, args.fusion.alpha) results = [(topic_id, hits)] else: batch_topic_ids.append(str(topic_id)) batch_topics.append(text) if (index + 1) % args.run.batch_size == 0 or \ index == len(topics.keys()) - 1: results = hsearcher.batch_search(batch_topics, batch_topic_ids, args.run.hits, args.run.threads, args.fusion.alpha) results = [(id_, results[id_]) for id_ in batch_topic_ids] batch_topic_ids.clear()
f'{topic} Q0 {docid} {rank} {hit.score:.6f} {tag}\n') rank = rank + 1 unique_docs.add(docid) if rank > args.max_passage_hits: break order = None if args.topics in QUERY_IDS: order = QUERY_IDS[args.topics] with open(output_path, 'w') as target_file: batch_topics = list() batch_topic_ids = list() for index, (topic_id, text) in enumerate(tqdm(list(query_iterator(topics, order)))): if args.batch_size <= 1 and args.threads <= 1: hits = searcher.search(text, args.hits) results = [(topic_id, hits)] else: batch_topic_ids.append(str(topic_id)) batch_topics.append(text) if (index + 1) % args.batch_size == 0 or \ index == len(topics.keys()) - 1: results = searcher.batch_search(batch_topics, batch_topic_ids, args.hits, args.threads) results = [(id_, results[id_]) for id_ in batch_topic_ids] batch_topic_ids.clear() batch_topics.clear() else: continue