Exemplos de MsMarcoDataset em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: pygaggle.data

Classe / Tipo: MsMarcoDataset

Exemplos em hotexamples.com: 2

MsMarcoDataset em Python - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de pygaggle.data.MsMarcoDataset em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

from_folder(2)

Métodos Frequentes

from_folder (2)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: evaluate_passage_ranker.py Projeto: kairanithin021/pygaggle

def main(): apb = ArgumentParserBuilder() apb.add_opts( opt('--task', type=str, default='msmarco'), opt('--dataset', type=Path, required=True), opt('--index-dir', type=Path, required=True), opt('--method', required=True, type=str, choices=METHOD_CHOICES), opt('--model', required=True, type=str, help='Path to pre-trained model or huggingface model name'), opt('--duo_model', type=str, help='Path to pre-trained model or huggingface model name'), opt('--mono_hits', type=int, default=50, help='Top k candidates from mono for duo reranking'), opt('--output-file', type=Path, default='.'), opt('--overwrite-output', action='store_true'), opt('--split', type=str, default='dev', choices=('dev', 'eval')), opt('--batch-size', '-bsz', type=int, default=96), opt('--device', type=str, default='cuda:0'), opt('--is-duo', action='store_true'), opt('--from-tf', action='store_true'), opt('--metrics', type=str, nargs='+', default=metric_names(), choices=metric_names()), opt('--model-type', type=str), opt('--tokenizer-name', type=str)) args = apb.parser.parse_args() options = PassageRankingEvaluationOptions(**vars(args)) logging.info("Preprocessing Queries & Passages:") ds = MsMarcoDataset.from_folder(str(options.dataset), split=options.split, is_duo=options.is_duo) examples = ds.to_relevance_examples(str(options.index_dir), is_duo=options.is_duo) logging.info("Loading Ranker & Tokenizer:") construct_map = dict(transformer=construct_transformer, bm25=construct_bm25, t5=construct_t5, duo_t5=construct_duo_t5, seq_class_transformer=construct_seq_class_transformer, random=lambda _: RandomReranker()) reranker = construct_map[options.method](options) writer = MsMarcoWriter(args.output_file, args.overwrite_output) if options.method == 'duo_t5': evaluator = DuoRerankerEvaluator(mono_reranker=reranker[0], duo_reranker=reranker[1], metric_names=options.metrics, mono_hits=options.mono_hits, writer=writer) else: evaluator = RerankerEvaluator(reranker, options.metrics, writer=writer) width = max(map(len, args.metrics)) + 1 logging.info("Reranking:") for metric in evaluator.evaluate(examples): logging.info(f'{metric.name:<{width}}{metric.value:.5}')

Exemplo n.º 2

0

Exibir arquivo

def main(): apb = ArgumentParserBuilder() apb.add_opts(opt('--dataset', type=str, default='msmarco'), opt('--data-dir', type=Path, default='/content/data/msmarco'), opt('--method', required=True, type=str, choices=METHOD_CHOICES), opt('--model-name-or-path', type=str), opt('--output-file', type=Path, default='.'), opt('--overwrite-output', action='store_true'), opt('--split', type=str, default='dev', choices=('dev', 'eval')), opt('--batch-size', '-bsz', type=int, default=96), opt('--device', type=str, default='cuda:0'), opt('--is-duo', action='store_true'), opt('--metrics', type=str, nargs='+', default=metric_names(), choices=metric_names()), opt('--model-type', type=str, default='bert-base'), opt('--tokenizer-name', type=str), opt('--index-dir', type=Path)) args = apb.parser.parse_args() options = PassageRankingEvaluationOptions(**vars(args)) ds = MsMarcoDataset.from_folder(str(options.data_dir), split=options.split, is_duo=options.is_duo) examples = ds.to_relevance_examples(SETTINGS.msmarco_index_path, is_duo=options.is_duo) construct_map = dict(transformer=construct_transformer, bm25=construct_bm25, t5=construct_t5, seq_class_transformer=construct_seq_class_transformer, random=lambda _: RandomReranker()) reranker = construct_map[options.method](options) writer = MsMarcoWriter(args.output_file, args.overwrite_output) evaluator = RerankerEvaluator(reranker, options.metrics, writer=writer) width = max(map(len, args.metrics)) + 1 stdout = [] for metric in evaluator.evaluate(examples): logging.info(f'{metric.name:<{width}}{metric.value:.5}') stdout.append(f'{metric.name}\t{metric.value}') print('\n'.join(stdout))