示例#1
0
        args.emsize,
        args.nhid,
        args.nhidlast,
        args.dropout,
        args.dropouth,
        args.dropoutx,
        args.dropouti,
        args.dropoute,
    )

size = 0
for p in model.parameters():
    size += p.nelement()
logging.info("param size: {}".format(size))
logging.info("initial genotype:")
logging.info(model.genotype())


if torch.cuda.is_available():
    # if torch.cuda.device_count() > 1:
    #     parallel_model = nn.DataParallel(model, dim=1)
    #     parallel_model = parallel_model.to(device)
    # else:
    parallel_model = model.to(device)
else:
    parallel_model = model

architect = Architect(parallel_model, args)

total_params = sum(x.data.nelement() for x in model.parameters())
logging.info("Args: {}".format(args))
示例#2
0
test_data = batchify(corpus.test, test_batch_size, args)

ntokens = len(corpus.dictionary)
if args.continue_train:
    model = torch.load(os.path.join(args.save, 'model.pt'))
else:
    model = model.RNNModelSearch(ntokens, args.emsize, args.nhid,
                                 args.nhidlast, args.dropout, args.dropouth,
                                 args.dropoutx, args.dropouti, args.dropoute)

size = 0
for p in model.parameters():
    size += p.nelement()
logging.info('param size: {}'.format(size))
logging.info('initial genotype:')
logging.info(model.genotype())  # what's this

if args.cuda:
    if args.single_gpu:
        parallel_model = model.cuda()
    else:
        parallel_model = nn.DataParallel(model, dim=1).cuda()
else:
    parallel_model = model
architect = Architect(parallel_model, args)

total_params = sum(x.data.nelement() for x in model.parameters())
logging.info('Args: {}'.format(args))
logging.info('Model total parameters: {}'.format(total_params))