Python query_iterator示例，pyserini.query_iterator.query_iterator Python示例

示例#1

0

显示文件

文件： __main__.py 项目： yemiliey/pyserini

                target_file.write(
                    f'{topic} Q0 {docid} {rank} {hit.score:.6f} {tag}\n')
            rank = rank + 1
            unique_docs.add(docid)
            if rank > args.max_passage_hits:
                break

    order = None
    if args.topics in QUERY_IDS:
        order = QUERY_IDS[args.topics]

    with open(output_path, 'w') as target_file:
        batch_topics = list()
        batch_topic_ids = list()
        for index, (topic_id, text) in enumerate(
                tqdm(list(query_iterator(topics, order)))):
            if args.batch_size <= 1 and args.threads <= 1:
                hits = searcher.search(text, args.hits)
                results = [(topic_id, hits)]
            else:
                batch_topic_ids.append(str(topic_id))
                batch_topics.append(text)
                if (index + 1) % args.batch_size == 0 or \
                        index == len(topics.keys()) - 1:
                    results = searcher.batch_search(batch_topics,
                                                    batch_topic_ids, args.hits,
                                                    args.threads)
                    results = [(id_, results[id_]) for id_ in batch_topic_ids]
                    batch_topic_ids.clear()
                    batch_topics.clear()
                else:

示例#2

0

显示文件

    # build output path
    output_path = args.run.output

    print(f'Running {args.run.topics} topics, saving to {output_path}...')
    tag = 'hybrid'

    order = None
    if args.run.topics in QUERY_IDS:
        print(f'Using pre-defined topic order for {args.run.topics}')
        order = QUERY_IDS[args.run.topics]

    with open(output_path, 'w') as target_file:
        batch_topics = list()
        batch_topic_ids = list()
        for index, (topic_id, text) in enumerate(tqdm(list(query_iterator(topics, order)))):
            if args.run.batch_size <= 1 and args.run.threads <= 1:
                hits = hsearcher.search(text, args.run.hits, args.fusion.alpha)
                results = [(topic_id, hits)]
            else:
                batch_topic_ids.append(str(topic_id))
                batch_topics.append(text)
                if (index + 1) % args.run.batch_size == 0 or \
                        index == len(topics.keys()) - 1:
                    results = hsearcher.batch_search(
                        batch_topics, batch_topic_ids, args.run.hits, args.run.threads, args.fusion.alpha)
                    results = [(id_, results[id_]) for id_ in batch_topic_ids]
                    batch_topic_ids.clear()
                    batch_topics.clear()
                else:
                    continue

示例#3

0

显示文件

文件： __main__.py 项目： KaiSun314/pyserini

        # create searcher from prebuilt index name
        searcher = SimpleDenseSearcher.from_prebuilt_index(args.index, query_encoder)

    if not searcher:
        exit()

    # build output path
    output_path = args.output

    print(f'Running {args.topics} topics, saving to {output_path}...')
    tag = 'Faiss'

    with open(output_path, 'w') as target_file:
        batch_topics = list()
        batch_topic_ids = list()
        for index, (topic_id, text) in enumerate(tqdm(list(query_iterator(topics, args.topics)))):
            if args.batch_size <= 1 and args.threads <= 1:
                hits = searcher.search(text, args.hits)
                results = [(topic_id, hits)]
            else:
                batch_topic_ids.append(str(topic_id))
                batch_topics.append(text)
                if (index + 1) % args.batch_size == 0 or \
                        index == len(topics.keys()) - 1:
                    results = searcher.batch_search(
                        batch_topics, batch_topic_ids, args.hits, args.threads)
                    results = [(id_, results[id_]) for id_ in batch_topic_ids]
                    batch_topic_ids.clear()
                    batch_topics.clear()
                else:
                    continue

示例#4

0

显示文件

    hsearcher = HybridSearcher(dsearcher, ssearcher)
    if not hsearcher:
        exit()

    # build output path
    output_path = args.run.output

    print(f'Running {args.run.topics} topics, saving to {output_path}...')
    tag = 'hybrid'

    with open(output_path, 'w') as target_file:
        batch_topics = list()
        batch_topic_ids = list()
        for index, (topic_id, text) in enumerate(
                tqdm(list(query_iterator(topics, args.topics)))):
            if args.run.batch_size <= 1 and args.run.threads <= 1:
                hits = hsearcher.search(text, args.run.hits, args.fusion.alpha)
                results = [(topic_id, hits)]
            else:
                batch_topic_ids.append(str(topic_id))
                batch_topics.append(text)
                if (index + 1) % args.run.batch_size == 0 or \
                        index == len(topics.keys()) - 1:
                    results = hsearcher.batch_search(batch_topics,
                                                     batch_topic_ids,
                                                     args.run.hits,
                                                     args.run.threads,
                                                     args.fusion.alpha)
                    results = [(id_, results[id_]) for id_ in batch_topic_ids]
                    batch_topic_ids.clear()

示例#5

0

显示文件

                f'{topic} Q0 {docid} {rank} {hit.score:.6f} {tag}\n')
        rank = rank + 1
        unique_docs.add(docid)
        if rank > args.max_passage_hits:
            break


order = None
if args.topics in QUERY_IDS:
    order = QUERY_IDS[args.topics]

with open(output_path, 'w') as target_file:
    batch_topics = list()
    batch_topic_ids = list()
    for index, (topic_id,
                text) in enumerate(tqdm(list(query_iterator(topics, order)))):
        if args.batch_size <= 1 and args.threads <= 1:
            hits = searcher.search(text, args.hits)
            results = [(topic_id, hits)]
        else:
            batch_topic_ids.append(str(topic_id))
            batch_topics.append(text)
            if (index + 1) % args.batch_size == 0 or \
                    index == len(topics.keys()) - 1:
                results = searcher.batch_search(batch_topics, batch_topic_ids,
                                                args.hits, args.threads)
                results = [(id_, results[id_]) for id_ in batch_topic_ids]
                batch_topic_ids.clear()
                batch_topics.clear()
            else:
                continue