Python get_output_writer示例

编程语言: Python

命名空间/包名称: pyserini.output_writer

方法/功能: get_output_writer

hotexamples.com的示例: 2

Python get_output_writer - 已找到2个示例。这些是从开源项目中提取的最受好评的pyserini.output_writer.get_output_writer现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： __main__.py 项目： mrkarezina/pyserini

            clf_str = 'prcl_' + '+'.join(clf_rankers)
            tokens1 = ['run', args.topics, '+'.join(search_rankers)]
            tokens2 = [args.vectorizer, clf_str, r_str, n_str, a_str]
            output_path = '.'.join(tokens1) + '-' + '-'.join(tokens2) + ".txt"
        else:
            tokens = ['run', args.topics, '+'.join(search_rankers), 'txt']
            output_path = '.'.join(tokens)

    print(f'Running {args.topics} topics, saving to {output_path}...')
    tag = output_path[:-4] if args.output is None else 'Anserini'

    output_writer = get_output_writer(
        output_path,
        OutputFormat(args.output_format),
        'w',
        max_hits=args.hits,
        tag=tag,
        topics=topics,
        use_max_passage=args.max_passage,
        max_passage_delimiter=args.max_passage_delimiter,
        max_passage_hits=args.max_passage_hits)

    with output_writer:
        batch_topics = list()
        batch_topic_ids = list()
        for index, (topic_id, text) in enumerate(
                tqdm(query_iterator, total=len(topics.keys()))):
            if (args.tokenizer != None):
                toks = tokenizer.tokenize(text)
                text = ' '
                text = text.join(toks)
            if args.batch_size <= 1 and args.threads <= 1:

示例#2

显示文件

文件： __main__.py 项目： MXueguang/pyserini

        for line in topic_f:
            info = json.loads(line)
            topic_ids.append(info['id'])
            topic_vectors.append(info['vector'])

    if not searcher:
        exit()

    # build output path
    output_path = args.output

    print(f'Running {args.topics} topics, saving to {output_path}...')
    tag = 'HNSW'

    # support trec and msmarco format only for now
    output_writer = get_output_writer(output_path, OutputFormat(args.output_format), max_hits=args.hits, tag=tag)

    search_time = 0
    with output_writer:
        batch_topic_vectors = list()
        batch_topic_ids = list()
        for index, (topic_id, vec) in enumerate(tqdm(zip(topic_ids, topic_vectors))):
            if args.batch_size <= 1 and args.threads <= 1:
                start = time.time()
                hits = searcher.search(vec, args.hits)
                search_time += time.time() - start
                results = [(topic_id, hits)]
            else:
                batch_topic_ids.append(str(topic_id))
                batch_topic_vectors.append(vec)
                if (index + 1) % args.batch_size == 0 or \