示例#1
0
 def test_asgd(self):
     self._test_basic_cases(
         lambda weight, bias: optim.ASGD([weight, bias], lr=1e-3, t0=100))
     self._test_basic_cases(lambda weight, bias: optim.ASGD(
         self._build_params_dict(weight, bias, lr=1e-2), lr=1e-3, t0=100))
     with self.assertRaisesRegex(ValueError,
                                 "Invalid weight_decay value: -0.5"):
         optim.ASGD(None, lr=1e-2, weight_decay=-0.5)
示例#2
0
 def test_asgd(self):
     self._test_rosenbrock(lambda params: optim.ASGD(params, lr=1e-3),
                           wrap_old_fn(old_optim.asgd, eta0=1e-3))
     self._test_rosenbrock(
         lambda params: optim.ASGD(params, lr=1e-3, alpha=0.8),
         wrap_old_fn(old_optim.asgd, eta0=1e-3, alpha=0.8))
     self._test_rosenbrock(
         lambda params: optim.ASGD(params, lr=1e-3, t0=1e3),
         wrap_old_fn(old_optim.asgd, eta0=1e-3, t0=1e3))
     self._test_basic_cases(
         lambda weight, bias: optim.ASGD([weight, bias], lr=1e-3, t0=100))
     self._test_basic_cases(lambda weight, bias: optim.ASGD(
         self._build_params_dict(weight, bias, lr=1e-2), lr=1e-3, t0=100))
示例#3
0
def train_iter(inputs, targets, iters=100):
    # print(inputs,targets)
    encoder = Encoder(input_size=1, hidden_size=3, output_size=7).to(device)
    decoder = Decoder(hidden_size=3, output_size=1).to(device)
    encoder_optim = optim.ASGD(encoder.parameters(), lr=0.01)
    decoder_optim = optim.ASGD(decoder.parameters(), lr=0.01)
    criterion = nn.MSELoss()
    for i in range(iters):
        input = inputs[i].view(-1)
        target = targets[i].view(-1)
        # print(input,target)
        loss = train(encoder, decoder, criterion, encoder_optim, decoder_optim, input, target)
        if i % 10 == 0:
            print(i, loss)
示例#4
0
 def build_optimizer(self, optimizer, model, lr, l2_weight):
     if optimizer == 'Adam':
         return optim.Adam(model.parameters(),
                           lr=lr,
                           amsgrad=False,
                           weight_decay=l2_weight)
     elif optimizer == 'AdamW':
         return optim.AdamW(model.parameters(),
                            lr=lr,
                            amsgrad=False,
                            weight_decay=l2_weight)
     elif optimizer == 'RMSprop':
         return optim.RMSprop(model.parameters(),
                              lr=lr,
                              weight_decay=l2_weight)
     elif optimizer == 'SGD':
         return optim.SGD(model.parameters(),
                          nesterov=True,
                          lr=lr,
                          momentum=0.9,
                          weight_decay=l2_weight)
         #return optim.SGD(model.parameters(), lr=lr, weight_decay=l2_weight)
     elif optimizer == 'ASGD':
         return optim.ASGD(model.parameters(),
                           lr=lr,
                           weight_decay=l2_weight)
     else:
         raise ('not implemented')
示例#5
0
 def __get_optimizer(self, optim_str, lr, momentum):
     if optim_str == 'sgd':
         optimizer = optim.SGD(filter(lambda p: p.requires_grad,
                                      self.model.parameters()),
                               lr=lr,
                               momentum=momentum)
     elif optim_str == 'adam':
         optimizer = optim.Adam(filter(lambda p: p.requires_grad,
                                       self.model.parameters()),
                                lr=lr)
     elif optim_str == 'adagrad':
         optimizer = optim.Adagrad(filter(lambda p: p.requires_grad,
                                          self.model.parameters()),
                                   lr=lr)
     elif optim_str == 'adadelta':
         optimizer = optim.Adadelta(filter(lambda p: p.requires_grad,
                                           self.model.parameters()),
                                    lr=lr)
     elif optim_str == 'adamax':
         optimizer = optim.Adamax(filter(lambda p: p.requires_grad,
                                         self.model.parameters()),
                                  lr=lr)
     elif optim_str == 'asgd':
         optimizer = optim.ASGD(filter(lambda p: p.requires_grad,
                                       self.model.parameters()),
                                lr=lr)
     else:
         raise Exception('Wrong optimizer')
     return optimizer
示例#6
0
 def __init__(self, lrate, loss_fn, in_size,out_size):
     """
     Initialize the layers of your neural network
     @param lrate: The learning rate for the model.
     @param loss_fn: The loss function
     @param in_size: Dimension of input
     @param out_size: Dimension of output
     """
     super(NeuralNet, self).__init__()
     """
     1) DO NOT change the name of self.encoder & self.decoder
     2) Both of them need to be subclass of torch.nn.Module and callable, like
        output = self.encoder(input)
     3) Use 2d conv for extra credit part.
        self.encoder should be able to take tensor of shape [batch_size, 1, 28, 28] as input.
        self.decoder output tensor should have shape [batch_size, 1, 28, 28].
     """
     self.encoder = nn.Sequential(
         nn.Conv2d(1, 16, 3, 1, 1),
         nn.ReLU()
     )
     self.decoder = nn.Sequential(
         nn.ConvTranspose2d(16, 1, 3, 1, 1),
         nn.ReLU()
     )
     self.lrate = 0.1
     self.loss_fn = loss_fn
     self.optim = optim.ASGD(self.get_parameters(), lr=lrate, weight_decay=1e-3)
示例#7
0
def averaged_sgd(model, params):
    return optim.ASGD(
        model.parameters(),
        lr=params[Constants.LEARNING_RATE][Constants.VALUE],
        weight_decay=params[Constants.WEIGHT_DECAY][Constants.VALUE],
        alpha=params[Constants.ALPHA][Constants.VALUE],
        lambd=params[Constants.LAMBD][Constants.VALUE])
示例#8
0
 def _makeOptimizer(self):
     optimiserArgs = dict(self.optimiserArgs)
     optimiserArgs.update({'lr': self.lr})
     if self.method == 'sgd':
         self.optimizer = optim.SGD(self.params, **optimiserArgs)
     elif self.method == 'asgd':
         self.optimizer = optim.ASGD(self.params, **optimiserArgs)
     elif self.method == 'adagrad':
         self.optimizer = optim.Adagrad(self.params, **optimiserArgs)
     elif self.method == 'adadelta':
         self.optimizer = optim.Adadelta(self.params, **optimiserArgs)
     elif self.method == 'adam':
         self.optimizer = optim.Adam(self.params, **optimiserArgs)
     elif self.method == 'adamw':
         self.optimizer = optim.AdamW(self.params, **optimiserArgs)
     elif self.method == 'adamax':
         self.optimizer = optim.Adamax(self.params, **optimiserArgs)
     elif self.method == 'rmsprop':
         self.optimizer = optim.RMSprop(self.params, **optimiserArgs)
     elif self.method == 'rprop':
         self.optimizer = optim.Rprop(self.params, **optimiserArgs)
     elif self.method == 'lbfgs':
         self.use_shrinkage = False
         self.optimizer = optim.LBFGS(self.params, **optimiserArgs)
     else:
         raise RuntimeError("Invalid optim method: " + self.method)
示例#9
0
    def set_optimizer(self, opt_type, args):
        """
        - set optimizer specified 

        inputs:
            opt_type: (str) optimizer type
            args: (dict) contains learning rate and other necessary info

        """
        lr = float(args["lr"])
        if opt_type == "SGD":
            self.optimizer = optim.SGD(self.model.parameters(),
                                       lr=lr,
                                       momentum=float(args["momentum"]))
        elif opt_type == "ASGD":
            self.optimizer = optim.ASGD(self.model.parameters(), lr=lr)
        elif opt_type == "ADAM":
            self.optimizer = optim.Adam(self.model.parameters(),
                                        lr=lr,
                                        weight_decay=args["weight_decay"],
                                        amsgrad=args["amsgrad"])
        elif opt_type == "ADAGRAD":
            self.optimizer = optim.Adagrad(self.model.parameters(),
                                           lr=lr,
                                           lr_decay=args["lr_decay"],
                                           weight_decay=args["weight_decay"])
        elif opt_type == "RMSPROP":
            self.optimizer = optim.RMSprop(self.model.parameters(), lr=lr)
示例#10
0
def get_optimizer(optimizer, lr, params):

	if optimizer == 'adagrad':
		optimizer = torch.optim.Adagrad(params, lr=lr*5, lr_decay=0, weight_decay=0, initial_accumulator_value=0, eps=1e-10)
	elif optimizer == 'adadelta':
		optimizer = optim.Adadelta(params, lr=lr*100*5, rho=0.9, eps=1e-06, weight_decay=0)
	elif optimizer == 'adam':
		optimizer = optim.Adam(params, lr=lr/10*5, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)
	elif optimizer == 'adaw':
		optimizer = optim.AdamW(params, lr=lr/10*5, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.01, amsgrad=False)
	elif optimizer == 'sparseadam':
		optimizer = optim.SparseAdam(params/10*5, lr=lr, betas=(0.9, 0.999), eps=1e-08)
	elif optimizer == 'ASGD':
		optimizer = optim.ASGD(params, lr=lr*5, lambd=0.0001, alpha=0.75, t0=1000000.0, weight_decay=0)
	elif optimizer == 'LBFGS':
		optimizer = optim.LBFGS(params, lr=lr*100*5) 
	elif optimizer == 'RMSprop':
		optimizer = optim.RMSprop(params, lr=lr*5)
	elif optimizer == 'rprop':
		optimizer = optim.Rprop(params, lr=lr*5)
	elif optimizer == 'SGD':
		optimizer = optim.SGD(params, lr=lr*5, momentum=0, dampening=0, weight_decay=0, nesterov=False)
	elif optimizer == 'adamax': #standard: adamax
		optimizer = optim.Adamax(params, lr=lr) # best lr=0.01, standard is lr=0.002, mutiply every other by factor 5 as well
	else:
		raise Exception("Optimizer not supported. Please change it!")

	return optimizer
示例#11
0
def train(epoch):
    global train_loss
    global train_correct
    global train_total
    global optimizer
    net.train()
    train_loss = 0
    train_correct = 0
    train_total = 0
    optimizer = optim.ASGD(net.parameters(), lr=lrt, weight_decay=5e-4)

    print('Sqnet_1x_v1.0_ASGD Training Epoch: #%d, LR: %.4f' % (epoch, lrt))
    for idx, (inputs, labels) in enumerate(train_loader):
        if is_use_cuda:
            inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predict = torch.max(outputs, 1)
        train_total += labels.size(0)
        train_correct += predict.eq(labels).cpu().sum().double()

        sys.stdout.write('\r')
        sys.stdout.write(
            '[%s] Training Epoch [%d/%d] Iter[%d/%d]\t\tLoss: %.4f Tr_Acc: %.3f'
            % (time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(
                time.time())), epoch, num_epochs, idx,
               len(train_dataset) // batch_size, train_loss /
               (batch_size * (idx + 1)), train_correct / train_total))
        sys.stdout.flush()
示例#12
0
 def fit(self, lr=1e-3, nb_epoch = 20, iter_per_epoch=40, batch_size=100, optimizer="Adam"):
     self.fit_first_stage()
     if self.GPU:
         self.cuda()
     self.__build_dataloader__(batch_size=batch_size)
     
     loss = nn.CrossEntropyLoss()
     if optimizer=="ASGD":
         optimizer = optim.ASGD(self.parameters(),
                                lr=lr,
                                t0=200)
     elif optimizer=="Adam":
         optimizer = optim.Adam(self.parameters(),
                                lr=lr)
     for i in range(nb_epoch):
         train_loss, train_accuracy = self.__train_epoch__(loss=loss,
                                                           optimizer=optimizer,
                                                           iter_per_epoch=iter_per_epoch)
         val_loss, val_accuracy = self.__val_epoch__(loss,
                                                     iter_per_epoch=iter_per_epoch)
         print("Epoch num {}:\n \t train_loss: {},\n \t acc overall: {},\n \t acc class 0: {},\n \t acc class 1: {},\n \t acc class 2: {},\n \t acc class 3: {},\n \t acc class 4: {}, \n \n \t val_loss: {},\n \t acc overall: {},\n \t acc class 0: {},\n \t acc class 1: {},\n \t acc class 2: {},\n \t acc class 3: {},\n \t acc class 4: {}\n\n\n".format(i+1,
                                                                                           np.round(train_loss, 8), np.round(train_accuracy[0], 8),
                                                                                           np.round(train_accuracy[1], 8), np.round(train_accuracy[2], 8),
                                                                                           np.round(train_accuracy[3], 8), np.round(train_accuracy[4], 8), np.round(train_accuracy[5], 8),
                                                                                      np.round(val_loss, 8), np.round(val_accuracy[0], 8),
                                                                                           np.round(val_accuracy[1], 8), np.round(val_accuracy[2], 8),
                                                                                           np.round(val_accuracy[3], 8), np.round(val_accuracy[4], 8), np.round(val_accuracy[5], 8)))
示例#13
0
    def setup_optims(self):
        """
        Optimisers used during training. By default, the learning rate is also decayed every time your validation performance
        stays stuck in a plateau. See the Yaml file for more details on the configuration
        """
        lr = self.config.hp.initial_lr

        if self.config.hp.optim == 'adam':

            self.optim = optim.Adam(self.model.parameters(),
                                    lr=lr,
                                    weight_decay=self.config.hp.weight_decay,
                                    eps=self.config.hp.adam_eps)
        elif self.config.hp.optim == 'sgd':
            self.optim = optim.ASGD(self.model.parameters(),
                                    lr=lr,
                                    weight_decay=self.config.hp.weight_decay)

        self.optim_lr_decay = ReduceLROnPlateau(
            self.optim,
            factor=self.config.hp.decay_lr,
            verbose=True,
            patience=self.config.hp.lr_patience_decay,
            min_lr=self.config.hp.minimal_lr)
        self.initial_lr_decay = self.optim_lr_decay.state_dict()
        self.initial_optim = self.optim.state_dict()
示例#14
0
def get_optimizer(name, model):
    "Just a switch"
    if name == 'adam':
        optimizer = optim.Adam(model.parameters(),
                               lr=lr,
                               weight_decay=lr_factor)
    elif name == 'adagrad':
        optimizer = optim.Adagrad(model.parameters(),
                                  lr=lr,
                                  weight_decay=lr_factor)
    elif name == 'adadelta':
        optimizer = optim.Adadelta(model.parameters(),
                                   lr=lr,
                                   weight_decay=lr_factor)
    elif name == 'rmsprop':
        optimizer = optim.RMSprop(model.parameters(),
                                  lr=lr,
                                  weight_decay=lr_factor)
    elif name == 'asgd':
        optimizer = optim.ASGD(model.parameters(),
                               lr=lr,
                               t0=0,
                               lambd=0.,
                               weight_decay=lr_factor)
    else:
        print('Defaulting to vanilla SGD')
        optimizer = optim.SGD(model.parameters(), lr=lr)
    return optimizer
示例#15
0
 def prep_optim(self):
     if self.args.optim == "adam":
         self.optimizer = optim.Adam(self.model.parameters(),
                                     lr=self.args.lr)
     elif self.args.optim == "adabound":
         self.optimizer = adabound.AdaBound(self.model.parameters(),
                                            lr=self.args.lr)
     elif self.args.optim == "rmsprop":
         self.optimizer = optim.RMSprop(self.model.parameters(),
                                        lr=self.args.lr)
     elif self.args.optim == "sgd":
         self.optimizer = optim.SGD(self.model.parameters(),
                                    lr=self.args.lr)
     elif self.args.optim == "bfgs":
         self.optimizer = optim.LBFGS(self.model.parameters(),
                                      lr=self.args.lr)
     elif self.args.optim == "adamw":
         self.optimizer = optim.AdamW(self.model.parameters(),
                                      lr=self.args.lr)
     elif self.args.optim == "asgd":
         self.optimizer = optim.ASGD(self.model.parameters(),
                                     lr=self.args.lr)
     else:
         print("Invalid optimizer chosen")
         raise
示例#16
0
文件: etm.py 项目: adjidieng/ETM
    def get_optimizer(self, args):
        """
        Get the model default optimizer 

        Args:
            sefl ([type]): [description]
        """
        if args.optimizer == 'adam':
            optimizer = optim.Adam(self.parameters(),
                                   lr=args.lr,
                                   weight_decay=args.wdecay)
        elif args.optimizer == 'adagrad':
            optimizer = optim.Adagrad(self.parameters(),
                                      lr=args.lr,
                                      weight_decay=args.wdecay)
        elif args.optimizer == 'adadelta':
            optimizer = optim.Adadelta(self.parameters(),
                                       lr=args.lr,
                                       weight_decay=args.wdecay)
        elif args.optimizer == 'rmsprop':
            optimizer = optim.RMSprop(self.parameters(),
                                      lr=args.lr,
                                      weight_decay=args.wdecay)
        elif args.optimizer == 'asgd':
            optimizer = optim.ASGD(self.parameters(),
                                   lr=args.lr,
                                   t0=0,
                                   lambd=0.,
                                   weight_decay=args.wdecay)
        else:
            print('Defaulting to vanilla SGD')
            optimizer = optim.SGD(self.parameters(), lr=args.lr)
        self.optimizer = optimizer
        return optimizer
示例#17
0
def get_optim(config, model):
    if config.optimizer == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=float(config.lr))
    if config.optimizer == 'RMSprop':
        optimizer = optim.RMSprop(model.parameters(), lr=float(config.lr))
    if config.optimizer == 'Adagrad':
        optimizer = optim.Adagrad(model.parameters(), lr=float(config.lr))
    if config.optimizer == 'SGD':
        optimizer = optim.SGD(model.parameters(), lr=float(config.lr))
    if config.optimizer == 'Adadelta':
        optimizer = optim.Adadelta(model.parameters(), lr=float(config.lr))
    if config.optimizer == 'AdamW':
        optimizer = optim.AdamW(model.parameters(), lr=float(config.lr))
    if config.optimizer == 'SparseAdam':
        optimizer = optim.SparseAdam(model.parameters(), lr=float(config.lr))
    if config.optimizer == 'Adamax':
        optimizer = optim.Adamax(model.parameters(), lr=float(config.lr))
    if config.optimizer == 'ASGD':
        optimizer = optim.ASGD(model.parameters(), lr=float(config.lr))
    if config.optimizer == 'LBFGS':
        optimizer = optim.LBFGS(model.parameters(), lr=float(config.lr))
    if config.optimizer == 'Rprop':
        optimizer = optim.Rprop(model.parameters(), lr=float(config.lr))
    print('\noptimizer :', optimizer, '\n')
    return optimizer
示例#18
0
def train(x_val, y_val, niter, eta, k, lr=0.1):
    torch.manual_seed(0)
    n_examples, n_features = x_val.shape
    n_classes = len(np.unique(y_val))
    model = build_model(n_features)
    optimizer = optim.ASGD(model.parameters(), lr=lr)
    loss = DRO_loss(eta, k)
    x = Variable(torch.FloatTensor(x_val), requires_grad=False)
    y = Variable(torch.FloatTensor(y_val.astype(float))[:, None],
                 requires_grad=False)
    cost_list = []
    for t in range(niter):
        # Reset gradient
        optimizer.zero_grad()
        # Forward
        fx = model.forward(x)
        output = loss.forward(fx, y)
        # Backward
        output.backward()
        cost_list.append(output.data[0])
        # Update parameters
        optimizer.step()
        z = dump_model(model)
        scalar = np.sqrt(np.sum(z[0]**2.0))
        for param in model.parameters():
            param.data = param.data / float(scalar)
    return model, cost_list
示例#19
0
def _set_optimizer():
    if config_dict['optimization_params']['optimizer'] == 'adam':
        selected_optimizer = optim.Adam(
            etm_model.parameters(),
            lr=config_dict['optimization_params']['lr'],
            weight_decay=config_dict['optimization_params']['wdecay'])
    elif config_dict['optimization_params']['optimizer'] == 'adagrad':
        selected_optimizer = optim.Adagrad(
            etm_model.parameters(),
            lr=config_dict['optimization_params']['lr'],
            weight_decay=config_dict['optimization_params']['wdecay'])
    elif config_dict['optimization_params']['optimizer'] == 'adadelta':
        selected_optimizer = optim.Adadelta(
            etm_model.parameters(),
            lr=config_dict['optimization_params']['lr'],
            weight_decay=config_dict['optimization_params']['wdecay'])
    elif config_dict['optimization_params']['optimizer'] == 'rmsprop':
        selected_optimizer = optim.RMSprop(
            etm_model.parameters(),
            lr=config_dict['optimization_params']['lr'],
            weight_decay=config_dict['optimization_params']['wdecay'])
    elif config_dict['optimization_params']['optimizer'] == 'asgd':
        selected_optimizer = optim.ASGD(
            etm_model.parameters(),
            lr=config_dict['optimization_params']['lr'],
            t0=0,
            lambd=0.,
            weight_decay=config_dict['optimization_params']['wdecay'])
    else:
        print('Defaulting to vanilla SGD')
        selected_optimizer = optim.SGD(
            etm_model.parameters(),
            lr=config_dict['optimization_params']['lr'])
    return selected_optimizer
示例#20
0
def get_optimiser(name, net_params, optim_params):
    lr = optim_params['learning_rate']
    momentum = optim_params['momentum']
    weight_decay = optim_params['weight_decay']
    if(name == "SGD"):
        return optim.SGD(net_params, lr, 
            momentum = momentum, weight_decay = weight_decay)
    elif(name == "Adam"):
        return optim.Adam(net_params, lr, weight_decay = 1e-5)
    elif(name == "SparseAdam"):
        return optim.SparseAdam(net_params, lr)
    elif(name == "Adadelta"):
        return optim.Adadelta(net_params, lr, weight_decay = weight_decay)
    elif(name == "Adagrad"):
        return optim.Adagrad(net_params, lr, weight_decay = weight_decay)
    elif(name == "Adamax"):
        return optim.Adamax(net_params, lr, weight_decay = weight_decay)
    elif(name == "ASGD"):
        return optim.ASGD(net_params, lr, weight_decay = weight_decay)
    elif(name == "LBFGS"):
        return optim.LBFGS(net_params, lr)
    elif(name == "RMSprop"):
        return optim.RMSprop(net_params, lr, momentum = momentum,
            weight_decay = weight_decay)
    elif(name == "Rprop"):
        return optim.Rprop(net_params, lr)
    else:
        raise ValueError("unsupported optimizer {0:}".format(name))
示例#21
0
    def train(self):
        if self.optimizer == 'adam':
            optimizer = optim.Adam(self.skip_gram_model.parameters(),
                                   lr=self.initial_lr,
                                   **self.optimizer_kwargs)
        elif self.optimizer == 'sparse_adam':
            optimizer = optim.SparseAdam(self.skip_gram_model.parameters(),
                                         lr=self.initial_lr,
                                         **self.optimizer_kwargs)
        elif self.optimizer == 'sgd':
            optimizer = optim.SGD(self.skip_gram_model.parameters(),
                                  lr=self.initial_lr,
                                  **self.optimizer_kwargs)
        elif self.optimizer == 'asgd':
            optimizer = optim.ASGD(self.skip_gram_model.parameters(),
                                   lr=self.initial_lr,
                                   **self.optimizer_kwargs)
        elif self.optimizer == 'adagrad':
            optimizer = optim.Adagrad(self.skip_gram_model.parameters(),
                                      lr=self.initial_lr,
                                      **self.optimizer_kwargs)
        else:
            raise Exception('Unknown optimizer!')

        for iteration in range(self.iterations):

            print("\n\n\nIteration: " + str(iteration + 1))

            if self.lr_schedule:
                scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
                    optimizer, len(self.dataloader))
            running_loss = 0.0
            iprint = len(self.dataloader) // 20
            for i, sample_batched in enumerate(tqdm(self.dataloader)):

                if len(sample_batched[0]) > 1:
                    pos_u = sample_batched[0].to(self.device)
                    pos_v = sample_batched[1].to(self.device)
                    neg_v = sample_batched[2].to(self.device)

                    optimizer.zero_grad()
                    loss = self.skip_gram_model.forward(pos_u, pos_v, neg_v)
                    loss.backward()
                    optimizer.step()
                    if self.lr_schedule:
                        scheduler.step()

                    running_loss = running_loss * (
                        1 - 5 / iprint) + loss.item() * (5 / iprint)
                    if i > 0 and i % iprint == 0:
                        print(" Loss: " + str(running_loss) + ' lr: ' + str([
                            param_group['lr']
                            for param_group in optimizer.param_groups
                        ]))
            print(" Loss: " + str(running_loss))

            self.skip_gram_model.save_embedding(self.data.id2word,
                                                self.output_file_name)
示例#22
0
    def objective(self, trial):
        model = self.model()

        optimizer = trial.suggest_categorical('optimizer',
                                              ['Adam', 'AdamW', 'ASGD', 'SGD'])
        lr = trial.suggest_loguniform('lr', 1e-7, 1e-3)

        if optimizer == 'Adam':
            beta1 = trial.suggest_float('beta1', 0.7, 1)
            beta2 = trial.suggest_float('beta2', 0.7, 1)
            weight_decay = trial.suggest_float('weight_decay', 0, 1e-1)
            epsilon = trial.suggest_float('epsilon', 0, 1e-5)
            optimizer = optim.Adam(model.parameters(),
                                   lr=lr,
                                   betas=(beta1, beta2),
                                   eps=epsilon,
                                   weight_decay=weight_decay)
        elif optimizer == 'AdamW':
            beta1 = trial.suggest_float('beta1', 0.7, 1)
            beta2 = trial.suggest_float('beta2', 0.7, 1)
            epsilon = trial.suggest_float('epsilon', 0, 1e-5)
            weight_decay = trial.suggest_float('weight_decay', 0, 1e-1)
            optimizer = optim.AdamW(model.parameters(),
                                    lr=lr,
                                    betas=(beta1, beta2),
                                    eps=epsilon,
                                    weight_decay=weight_decay)
        elif optimizer == 'ASGD':
            lambd = trial.suggest_float('lambd', 0, 1e-6)
            alpha = trial.suggest_float('alpha', 0.5, 1)
            t0 = trial.suggest_float('t0', 0, 1e-4)
            weight_decay = trial.suggest_float('weight_decay', 1e-7, 1e-1)
            optimizer = optim.ASGD(model.parameters(),
                                   lr=lr,
                                   lambd=lambd,
                                   alpha=alpha,
                                   t0=t0,
                                   weight_decay=weight_decay)
        elif optimizer == 'SGD':
            momentum = trial.suggest_float('momentum', 0.7, 1)
            weight_decay = trial.suggest_float('weight_decay', 1e-7, 1e-1)
            dampening = trial.suggest_float('dampening', 0, 1e-1)
            optimizer = optim.SGD(model.parameters(),
                                  lr=lr,
                                  momentum=momentum,
                                  dampening=dampening,
                                  weight_decay=weight_decay)

        accuracy = model.fit(self.train_loader,
                             optimizer,
                             self.epochs,
                             device=self.device,
                             valid_loader=self.vald_loader,
                             verbose=0)
        del model

        return accuracy
示例#23
0
 def configure_optimizers(self):
     if self.hparams.use_asgd:
         return optim.ASGD(self.parameters(),
                           lr=self.hparams.lr,
                           lambd=0.0001,
                           alpha=0.75,
                           t0=1000000.0,
                           weight_decay=0)
     return optim.Adam(self.parameters(), lr=self.hparams.lr)
示例#24
0
def test_AveragedModel(d=10):
    torch.manual_seed(1)
    model = nn.Linear(d, 1, bias=False)
    optimizer = optim.ASGD(model.parameters(),
                           lr=.05,
                           lambd=0,
                           alpha=0,
                           t0=0,
                           weight_decay=0)

    print(next(model.parameters()))
    xeval = torch.randn(100, d)
    yeval = xeval.mean(dim=1, keepdim=True)

    trajectory = defaultdict(list)
    for t in range(100):
        # evaluate
        with torch.no_grad():
            trajectory['error'] += [
                float(torch.mean((model(xeval) - yeval)**2))
            ]
            trajectory['pdist'] += [
                float(
                    torch.norm((next(model.parameters()).data -
                                1 / d * torch.ones(d))**2))
            ]
            with AveragedModel(model, optimizer):
                trajectory['aerror'] += [
                    float(torch.mean((model(xeval) - yeval)**2))
                ]
                trajectory['apdist'] += [
                    float(
                        torch.norm((next(model.parameters()).data -
                                    1 / d * torch.ones(d))**2))
                ]

        # train
        optimizer.zero_grad()
        x = torch.randn(1, d)
        y = torch.mean(x, dim=1, keepdim=True)
        loss = torch.mean((y - model(x))**2)
        loss.backward()
        optimizer.step()

    print(next(model.parameters()).data)

    import matplotlib.pyplot as plt
    # plt.scatter(error, aerror, c=np.arange(len(error)))
    plt.plot(trajectory['error'], alpha=.5, label='MSE')
    plt.plot(trajectory['aerror'], alpha=.5, label='average MSE')
    plt.plot(trajectory['pdist'], alpha=.5, label='pdist')
    plt.plot(trajectory['apdist'], alpha=.5, label='average pdist')
    plt.yscale('log')
    plt.legend()
    plt.grid()
    plt.show()
示例#25
0
    def configure_optimizer(self, options):
        initial_rate = options.learning_rate
        oo = options.optimizer_options

        if options.optimizer == "Adadelta":
            optimizer = optim.Adadelta(self.parameters(),
                                       lr=initial_rate,
                                       weight_decay=oo.weight_decay,
                                       rho=oo.rho,
                                       eps=oo.eps)
        elif options.optimizer == "Adagrad":
            optimizer = optim.Adagrad(self.parameters(),
                                      lr=initial_rate,
                                      weight_decay=oo.weight_decay,
                                      lr_decay=oo.lr_decay)
        elif options.optimizer == "Adam":
            optimizer = optim.Adam(self.parameters(),
                                   lr=initial_rate,
                                   weight_decay=oo.weight_decay,
                                   betas=oo.betas,
                                   eps=oo.eps)
        elif options.optimizer == "Adamax":
            optimizer = optim.Adamax(self.parameters(),
                                     lr=initial_rate,
                                     weight_decay=oo.weight_decay,
                                     betas=oo.betas,
                                     eps=oo.eps)
        elif options.optimizer == "ASGD":
            optimizer = optim.ASGD(self.parameters(),
                                   lr=initial_rate,
                                   weight_decay=oo.weight_decay,
                                   lambd=oo.lambd,
                                   alpha=oo.alpha,
                                   t0=oo.t0)
        elif options.optimizer == "RMSprop":
            optimizer = optim.RMSprop(self.parameters(),
                                      lr=initial_rate,
                                      weight_decay=oo.weight_decay,
                                      eps=oo.eps,
                                      alpha=oo.alpha,
                                      momentum=oo.momentum,
                                      centered=oo.centered)
        elif options.optimizer == "Rprop":
            optimizer = optim.Rprop(self.parameters(),
                                    lr=initial_rate,
                                    etas=oo.etas,
                                    step_sizes=oo.step_sizes)
        elif options.optimizer == "SGD":
            optimizer = optim.SGD(self.parameters(),
                                  lr=initial_rate,
                                  weight_decay=oo.weight_decay,
                                  momentum=oo.momentum,
                                  dampening=oo.dampening,
                                  nesterov=oo.nesterov)
        return optimizer
def make_optimizer(model):
    if config.PARAM['optimizer_name'] == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=config.PARAM['lr'], weight_decay=config.PARAM['weight_decay'])
    elif config.PARAM['optimizer_name'] == 'SGD':
        optimizer = optim.SGD(model.parameters(), lr=config.PARAM['lr'], momentum=0.9,
            weight_decay=config.PARAM['weight_decay'])
    elif config.PARAM['optimizer_name'] == 'ASGD':
        optimizer = optim.ASGD(model.parameters(), lr=config.PARAM['lr'], weight_decay=config.PARAM['weight_decay'])
    else:
        raise ValueError('Not valid optimizer name')
    return optimizer
示例#27
0
 def build_optimizers(self):
     self.optimizers = {
         'sgd':
         optim.SGD(self.net.parameters(), lr=0.01, momentum=0),
         'sgd_with_momentum':
         optim.SGD(self.net.parameters(), lr=0.01, momentum=0.9),
         'adadelta':
         optim.Adadelta(self.net.parameters(),
                        lr=1.0,
                        rho=0.9,
                        eps=1e-06,
                        weight_decay=0),
         'adagrad':
         optim.Adagrad(self.net.parameters(),
                       lr=0.01,
                       lr_decay=0,
                       weight_decay=0,
                       initial_accumulator_value=0),
         'adam':
         optim.Adam(self.net.parameters(),
                    lr=0.001,
                    betas=(0.9, 0.999),
                    eps=1e-08,
                    weight_decay=0,
                    amsgrad=False),
         'adamax':
         optim.Adamax(self.net.parameters(),
                      lr=0.002,
                      betas=(0.9, 0.999),
                      eps=1e-08,
                      weight_decay=0),
         'asgd':
         optim.ASGD(self.net.parameters(),
                    lr=0.01,
                    lambd=0.0001,
                    alpha=0.75,
                    t0=1000000.0,
                    weight_decay=0),
         'rmsprop':
         optim.RMSprop(self.net.parameters(),
                       lr=0.01,
                       alpha=0.99,
                       eps=1e-08,
                       weight_decay=0,
                       momentum=0,
                       centered=False),
         'rprop':
         optim.Rprop(self.net.parameters(),
                     lr=0.01,
                     etas=(0.5, 1.2),
                     step_sizes=(1e-06, 50))
     }
     print('optimizers are built.\n')
示例#28
0
    def _optim(self):
        self.params = list(self.encoder.parameters()) + list(
            self.decoder.parameters())

        if self.config.opt == 'adam':
            self.optimizer = optim.Adam(self.params, lr=self.config.lr)
        elif self.config.opt == 'adadelta':
            self.optimizer = optim.Adadelta(self.params, lr=self.config.lr)
        elif self.config.opt == 'asgd':
            self.optimizer = optim.ASGD(self.params, lr=self.config.lr)
        else:
            self.optimizer = optim.SGD(self.params, lr=self.config.lr)
示例#29
0
def optimizer_creator(model, config):
    optim_name = config["optimizer"]

    if optim_name == "adam":
        return optim.Adam(model.parameters(), lr=config["lr"])
    elif optim_name == "adamw":
        return optim.AdamW(model.parameters(), lr=config["lr"])
    elif optim_name == "asgd":
        return optim.ASGD(model.parameters(), lr=config["lr"], t0=config["t0"])
    elif optim_name == "sgd":
        return optim.SGD(model.parameters(), lr=config["lr"])
    else:
        raise Exception(f"unsupported optimizer {optim_name}")
def get_optimizer(model, hparams):
    """
    Parameters
    ----------
    model_parameters : torch.nn.Module
        PyTorch model

    hparams : OptimizerHyperparams
        Hyperparameters specifying the optimizer

    """

    try:

        if hparams.name is 'Adadelta':
            return optim.Adadelta(model.parameters(), **hparams.hparams)

        elif hparams.name is 'Adagrad':
            return optim.Adagrad(model.parameters(), **hparams.hparams)

        elif hparams.name is 'Adam':
            return optim.Adam(model.parameters(), **hparams.hparams)

        elif hparams.name is 'AdamW':
            return optim.AdamW(model.parameters(), **hparams.hparams)

        elif hparams.name is 'SparseAdam':
            return optim.SparseAdam(model.parameters(), **hparams.hparams)

        elif hparams.name is 'Adamax':
            return optim.Adamax(model.parameters(), **hparams.hparams)

        elif hparams.name is 'ASGD':
            return optim.ASGD(model.parameters(), **hparams.hparams)

        elif hparams.name is 'LBFGS':
            return optim.LBFGS(model.parameters(), **hparams.hparams)

        elif hparams.name is 'RMSprop':
            return optim.RMSprop(model.parameters(), **hparams.hparams)

        elif hparams.name is 'Rprop':
            return optim.Rprop(model.parameters(), **hparams.hparams)

        elif hparams.name is 'SGD':
            return optim.SGD(model.parameters(), **hparams.hparams)

    except TypeError as e:
        raise Exception(f'Invalid parameter in hparams: {hparams.hparams}'
                        f' for optimizer {hparams.name}.\nSee PyTorch docs.')