Exemplo n.º 1
0
embeddings_index = helper.load_word_embeddings(args.word_vectors_directory,
                                               args.word_vectors_file,
                                               dictionary.word2idx)
print('number of OOV words = ', len(dictionary) - len(embeddings_index))

# ###############################################################################
# # Build the model
# ###############################################################################

model = NSRF(dictionary, embeddings_index, args)
print(model)
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()),
                       args.lr)
best_loss = sys.maxsize

param_dict = helper.count_parameters(model)
print('number of trainable parameters = ',
      numpy.sum(list(param_dict.values())))

if args.cuda:
    model = model.cuda()

if args.resume:
    if os.path.isfile(args.resume):
        print("=> loading checkpoint '{}'".format(args.resume))
        checkpoint = helper.load_checkpoint(args.resume)
        args.start_epoch = checkpoint['epoch']
        best_loss = checkpoint['best_loss']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        print("=> loaded checkpoint '{}' (epoch {})".format(
Exemplo n.º 2
0
                                       seq_length=seq_length,
                                       device=device)

    # and make our data loaders
    # batch size is exactly 1 character by default, which is exactly what we need
    train_loader = DataLoader(train_data)
    validation_loader = DataLoader(validation_data)

    # Part 3: modelling
    # we create our model
    model = CharRNN(num_chars).to(device)
    # and the initial hidden state (a tensor of zeros)
    initial_state = model.init_hidden(batch_size, device)

    # we evaluate the capability of our model
    # a character to parameter ratio approaching 1 is optimal
    # too many parameters and the model may overfit
    # too few and the model may underfit
    char_param_ratio = len(text) / count_parameters(model)
    print("Character to model parameter ratio: %f\n" % char_param_ratio)

    # Part 4: training
    train(model,
          initial_state,
          train_loader=train_loader,
          validation_loader=validation_loader,
          epochs=100)

    # Part 5: evaluation
    print(sample(model, char2int))
Exemplo n.º 3
0
model = LSTM(dictionary, embeddings_index, args)
selector = Selector(dictionary, embeddings_index, args)

print(selector)
print(model)
optim_fn_selector, optim_params_selector = helper.get_optimizer(args.optimizer)
optimizer_selector = optim_fn_selector(
    filter(lambda p: p.requires_grad, selector.parameters()),
    **optim_params_selector)
optim_fn, optim_params = helper.get_optimizer(args.optimizer)
optimizer = optim_fn(filter(lambda p: p.requires_grad, model.parameters()),
                     **optim_params)

best_acc = 0
param_dict_selector = helper.count_parameters(selector)
param_dict = helper.count_parameters(model)
print(
    'number of trainable parameters = ',
    numpy.sum(list(param_dict_selector.values())),
    numpy.sum(list(param_dict.values())),
    numpy.sum(list(param_dict.values())) +
    numpy.sum(list(param_dict_selector.values())))

if args.cuda:
    torch.cuda.set_device(args.gpu)
    selector = selector.cuda()
    model = model.cuda()

if args.load_model == 0 or args.load_model == 2:
    print('loading selector')
    for param in model.parameters():
        param.requires_grad = False

    if model_to_train == "ResNet152":
        params_to_optimize_in_top = list(model.fc.parameters())
    elif model_to_train == "nViewNet" or model_to_train == "nViewNet_resume":
        params_to_optimize_in_top = list(model.collapse.parameters()) + list(
            model.fc.parameters())

    for param in params_to_optimize_in_top:
        param.requires_grad = True

    optimizer_top = optim.Adam(params_to_optimize_in_top, lr=lr_top)
    #lr_scheduler_top = lr_scheduler.StepLR(optimizer_top, step_size=20, gamma=0.8)
    lr_scheduler_top = None
    print("Training Top:", count_parameters(model), "Parameters")

    b_acc = train(model,
                  dataloaders_top,
                  criterion,
                  optimizer_top,
                  epochs_top,
                  scheduler=lr_scheduler_top,
                  best_acc=0)
    print('Finished training top, best acc {:.4f}'.format(b_acc))
    # Set all parameters to train (require gradient)
    for param in model.parameters():
        param.requires_grad = True
    print("Training All:", count_parameters(model), "Parameters")

    # Optimizer for Entire Network