Python GNMT.to примеры использования

Язык программирования: Python

Пространство имен/Пакет: seq2seq.models.gnmt

Класс/Тип: GNMT

Метод/Функция: to

Примеров на hotexamples.com: 2

Python GNMT.to - 2 примера найдено. Это лучшие примеры Python кода для seq2seq.models.gnmt.GNMT.to, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

GNMT(12)

eval(6)

load_state_dict(6)

type(6)

parameters(5)

cuda(4)

to(2)

state_dict(1)

Пример #1

Показать файл

def main():
    """
    Launches translation (inference).
    Inference is executed on a single GPU, implementation supports beam search
    with length normalization and coverage penalty.
    """
    args = parse_args()
    device = utils.set_device(args.cuda, args.local_rank)
    utils.init_distributed(args.cuda)
    args.rank = utils.get_rank()
    utils.setup_logging()

    if args.env:
        utils.log_env_info()

    logging.info(f'Run arguments: {args}')

    if not args.cuda and torch.cuda.is_available():
        warnings.warn('cuda is available but not enabled')
    if not args.cudnn:
        torch.backends.cudnn.enabled = False

    # load checkpoint and deserialize to CPU (to save GPU memory)
    checkpoint = torch.load(args.model, map_location={'cuda:0': 'cpu'})

    # build GNMT model
    tokenizer = Tokenizer()
    tokenizer.set_state(checkpoint['tokenizer'])
    model_config = checkpoint['model_config']
    model_config['batch_first'] = args.batch_first
    model_config['vocab_size'] = tokenizer.vocab_size
    model = GNMT(**model_config)
    model.load_state_dict(checkpoint['state_dict'])

    # construct the dataset
    if args.input:
        data = RawTextDataset(
            raw_datafile=args.input,
            tokenizer=tokenizer,
            sort=args.sort,
        )
    elif args.input_text:
        data = RawTextDataset(
            raw_data=args.input_text,
            tokenizer=tokenizer,
            sort=args.sort,
        )

    latency_table = tables.LatencyTable(args.percentiles)
    throughput_table = tables.ThroughputTable(args.percentiles)
    accuracy_table = tables.AccuracyTable('BLEU')

    dtype = {'fp32': torch.FloatTensor, 'fp16': torch.HalfTensor}

    for (math, batch_size, beam_size) in product(args.math, args.batch_size,
                                                 args.beam_size):
        logging.info(f'math: {math}, batch size: {batch_size}, '
                     f'beam size: {beam_size}')

        model.type(dtype[math])
        model = model.to(device)
        model.eval()

        # build the data loader
        loader = data.get_loader(
            batch_size=batch_size,
            batch_first=args.batch_first,
            pad=True,
            repeat=args.repeat[batch_size],
            num_workers=0,
        )

        # build the translator object
        translator = Translator(
            model=model,
            tokenizer=tokenizer,
            loader=loader,
            beam_size=beam_size,
            max_seq_len=args.max_seq_len,
            len_norm_factor=args.len_norm_factor,
            len_norm_const=args.len_norm_const,
            cov_penalty_factor=args.cov_penalty_factor,
            print_freq=args.print_freq,
        )

        # execute the inference
        output, stats = translator.run(
            calc_bleu=args.bleu,
            eval_path=args.output,
            summary=True,
            warmup=args.warmup,
            reference_path=args.reference,
        )

        # print translated outputs
        if not args.output and args.rank == 0:
            logging.info(f'Translated output:')
            for out in output:
                print(out)

        key = (batch_size, beam_size)
        latency_table.add(key, {math: stats['runtimes']})
        throughput_table.add(key, {math: stats['throughputs']})
        accuracy_table.add(key, {math: stats['bleu']})

    if args.tables:
        accuracy_table.write('Inference accuracy', args.math)

        if 'fp16' in args.math and 'fp32' in args.math:
            relative = 'fp32'
        else:
            relative = None

        if 'fp32' in args.math:
            throughput_table.write('Inference throughput', 'fp32')
        if 'fp16' in args.math:
            throughput_table.write('Inference throughput',
                                   'fp16',
                                   relative=relative)

        if 'fp32' in args.math:
            latency_table.write('Inference latency', 'fp32')
        if 'fp16' in args.math:
            latency_table.write('Inference latency',
                                'fp16',
                                relative=relative,
                                reverse_speedup=True)

    passed = utils.benchmark(stats['bleu'], args.target_bleu,
                             stats['tokens_per_sec'], args.target_perf)
    return passed

Пример #2

Показать файл

Файл: translate.py Проект: yuanzhedong/DeepLearningExamples

def main():
    """
    Launches translation (inference).
    Inference is executed on a single GPU, implementation supports beam search
    with length normalization and coverage penalty.
    """
    args = parse_args()
    if args.affinity != 'disabled':
        nproc_per_node = torch.cuda.device_count()
        affinity = gpu_affinity.set_affinity(args.local_rank, nproc_per_node,
                                             args.affinity)
        print(f'{args.local_rank}: thread affinity: {affinity}')
    device = utils.set_device(args.cuda, args.local_rank)
    utils.init_distributed(args.cuda)
    args.rank = utils.get_rank()
    os.makedirs(args.save_dir, exist_ok=True)
    utils.setup_logging()

    dllog_file = os.path.join(args.save_dir, args.dllog_file)
    utils.setup_dllogger(enabled=True, filename=dllog_file)

    if args.profile:
        try:
            pyprof.init(enable_function_stack=True)
        except NameError:
            warnings.warn('Called pyprof.init() but pyprof is not available')

    if args.env:
        utils.log_env_info()

    logging.info(f'Run arguments: {args}')
    dllogger.log(step='PARAMETER', data=vars(args))

    if not args.cuda and torch.cuda.is_available():
        warnings.warn('cuda is available but not enabled')
    if not args.cudnn:
        torch.backends.cudnn.enabled = False

    # load checkpoint and deserialize to CPU (to save GPU memory)
    if args.model:
        checkpoint = torch.load(args.model, map_location={'cuda:0': 'cpu'})

        # build GNMT model
        tokenizer = Tokenizer()
        tokenizer.set_state(checkpoint['tokenizer'])
        model_config = checkpoint['model_config']
        model_config['batch_first'] = args.batch_first
        model_config['vocab_size'] = tokenizer.vocab_size
        model = GNMT(**model_config)
        model.load_state_dict(checkpoint['state_dict'])
    elif args.synthetic:
        model = GNMT(args.synthetic_vocab, batch_first=args.batch_first)
        tokenizer = None
    else:
        raise RuntimeError(
            'Specify model either with --synthetic or with --model flag')

    # construct the dataset
    if args.input:
        data = RawTextDataset(
            raw_datafile=args.input,
            tokenizer=tokenizer,
            sort=args.sort,
        )
    elif args.input_text:
        data = RawTextDataset(
            raw_data=args.input_text,
            tokenizer=tokenizer,
            sort=args.sort,
        )
    elif args.synthetic:
        data = SyntheticDataset(args.synthetic_vocab, args.synthetic_len,
                                args.batch_size[0] * args.synthetic_batches)

    latency_table = tables.LatencyTable(args.percentiles)
    throughput_table = tables.ThroughputTable(args.percentiles)
    accuracy_table = tables.AccuracyTable('BLEU')

    dtype = {
        'fp32': torch.FloatTensor,
        'tf32': torch.FloatTensor,
        'fp16': torch.HalfTensor
    }

    for (math, batch_size, beam_size) in product(args.math, args.batch_size,
                                                 args.beam_size):
        logging.info(f'math: {math}, batch size: {batch_size}, '
                     f'beam size: {beam_size}')

        model.type(dtype[math])
        model = model.to(device)
        model.eval()

        # build the data loader
        loader = data.get_loader(
            batch_size=batch_size,
            batch_first=args.batch_first,
            pad=True,
            repeat=args.repeat[batch_size],
            num_workers=0,
        )

        # build the translator object
        translator = Translator(
            model=model,
            tokenizer=tokenizer,
            loader=loader,
            beam_size=beam_size,
            max_seq_len=args.max_seq_len,
            len_norm_factor=args.len_norm_factor,
            len_norm_const=args.len_norm_const,
            cov_penalty_factor=args.cov_penalty_factor,
            print_freq=args.print_freq,
        )

        # execute the inference
        with torch.autograd.profiler.emit_nvtx(enabled=args.profile):
            output, stats = translator.run(
                calc_bleu=args.bleu,
                eval_path=args.output,
                summary=True,
                warmup=args.warmup,
                reference_path=args.reference,
            )

        # print translated outputs
        if not args.synthetic and (not args.output and args.rank == 0):
            logging.info(f'Translated output:')
            for out in output:
                print(out)

        key = (batch_size, beam_size)
        latency_table.add(key, {math: stats['runtimes']})
        throughput_table.add(key, {math: stats['throughputs']})
        accuracy_table.add(key, {math: stats['bleu']})

    if args.tables:
        accuracy_table.write('Inference accuracy', args.math)

        if 'fp16' in args.math and 'fp32' in args.math:
            relative = 'fp32'
        elif 'fp16' in args.math and 'tf32' in args.math:
            relative = 'tf32'
        else:
            relative = None

        if 'fp32' in args.math:
            throughput_table.write('Inference throughput', 'fp32')
        if 'tf32' in args.math:
            throughput_table.write('Inference throughput', 'tf32')
        if 'fp16' in args.math:
            throughput_table.write('Inference throughput',
                                   'fp16',
                                   relative=relative)

        if 'fp32' in args.math:
            latency_table.write('Inference latency', 'fp32')
        if 'tf32' in args.math:
            latency_table.write('Inference latency', 'tf32')
        if 'fp16' in args.math:
            latency_table.write('Inference latency',
                                'fp16',
                                relative=relative,
                                reverse_speedup=True)

    avg_throughput = np.array(stats['throughputs']).mean()
    avg_latency = np.array(stats['runtimes']).mean()
    summary = {
        'eval_throughput': avg_throughput,
        'eval_bleu': stats['bleu'],
        'eval_avg_latency': avg_latency,
    }
    for p in args.percentiles:
        summary[f'eval_{p}%_latency'] = np.percentile(stats['runtimes'], p)

    dllogger.log(step=tuple(), data=summary)

    passed = utils.benchmark(stats['bleu'], args.target_bleu,
                             stats['tokens_per_sec'], args.target_perf)
    return passed