def test_asgd(self): self._test_basic_cases( lambda weight, bias: optim.ASGD([weight, bias], lr=1e-3, t0=100)) self._test_basic_cases(lambda weight, bias: optim.ASGD( self._build_params_dict(weight, bias, lr=1e-2), lr=1e-3, t0=100)) with self.assertRaisesRegex(ValueError, "Invalid weight_decay value: -0.5"): optim.ASGD(None, lr=1e-2, weight_decay=-0.5)
def test_asgd(self): self._test_rosenbrock(lambda params: optim.ASGD(params, lr=1e-3), wrap_old_fn(old_optim.asgd, eta0=1e-3)) self._test_rosenbrock( lambda params: optim.ASGD(params, lr=1e-3, alpha=0.8), wrap_old_fn(old_optim.asgd, eta0=1e-3, alpha=0.8)) self._test_rosenbrock( lambda params: optim.ASGD(params, lr=1e-3, t0=1e3), wrap_old_fn(old_optim.asgd, eta0=1e-3, t0=1e3)) self._test_basic_cases( lambda weight, bias: optim.ASGD([weight, bias], lr=1e-3, t0=100)) self._test_basic_cases(lambda weight, bias: optim.ASGD( self._build_params_dict(weight, bias, lr=1e-2), lr=1e-3, t0=100))
def train_iter(inputs, targets, iters=100): # print(inputs,targets) encoder = Encoder(input_size=1, hidden_size=3, output_size=7).to(device) decoder = Decoder(hidden_size=3, output_size=1).to(device) encoder_optim = optim.ASGD(encoder.parameters(), lr=0.01) decoder_optim = optim.ASGD(decoder.parameters(), lr=0.01) criterion = nn.MSELoss() for i in range(iters): input = inputs[i].view(-1) target = targets[i].view(-1) # print(input,target) loss = train(encoder, decoder, criterion, encoder_optim, decoder_optim, input, target) if i % 10 == 0: print(i, loss)
def build_optimizer(self, optimizer, model, lr, l2_weight): if optimizer == 'Adam': return optim.Adam(model.parameters(), lr=lr, amsgrad=False, weight_decay=l2_weight) elif optimizer == 'AdamW': return optim.AdamW(model.parameters(), lr=lr, amsgrad=False, weight_decay=l2_weight) elif optimizer == 'RMSprop': return optim.RMSprop(model.parameters(), lr=lr, weight_decay=l2_weight) elif optimizer == 'SGD': return optim.SGD(model.parameters(), nesterov=True, lr=lr, momentum=0.9, weight_decay=l2_weight) #return optim.SGD(model.parameters(), lr=lr, weight_decay=l2_weight) elif optimizer == 'ASGD': return optim.ASGD(model.parameters(), lr=lr, weight_decay=l2_weight) else: raise ('not implemented')
def __get_optimizer(self, optim_str, lr, momentum): if optim_str == 'sgd': optimizer = optim.SGD(filter(lambda p: p.requires_grad, self.model.parameters()), lr=lr, momentum=momentum) elif optim_str == 'adam': optimizer = optim.Adam(filter(lambda p: p.requires_grad, self.model.parameters()), lr=lr) elif optim_str == 'adagrad': optimizer = optim.Adagrad(filter(lambda p: p.requires_grad, self.model.parameters()), lr=lr) elif optim_str == 'adadelta': optimizer = optim.Adadelta(filter(lambda p: p.requires_grad, self.model.parameters()), lr=lr) elif optim_str == 'adamax': optimizer = optim.Adamax(filter(lambda p: p.requires_grad, self.model.parameters()), lr=lr) elif optim_str == 'asgd': optimizer = optim.ASGD(filter(lambda p: p.requires_grad, self.model.parameters()), lr=lr) else: raise Exception('Wrong optimizer') return optimizer
def __init__(self, lrate, loss_fn, in_size,out_size): """ Initialize the layers of your neural network @param lrate: The learning rate for the model. @param loss_fn: The loss function @param in_size: Dimension of input @param out_size: Dimension of output """ super(NeuralNet, self).__init__() """ 1) DO NOT change the name of self.encoder & self.decoder 2) Both of them need to be subclass of torch.nn.Module and callable, like output = self.encoder(input) 3) Use 2d conv for extra credit part. self.encoder should be able to take tensor of shape [batch_size, 1, 28, 28] as input. self.decoder output tensor should have shape [batch_size, 1, 28, 28]. """ self.encoder = nn.Sequential( nn.Conv2d(1, 16, 3, 1, 1), nn.ReLU() ) self.decoder = nn.Sequential( nn.ConvTranspose2d(16, 1, 3, 1, 1), nn.ReLU() ) self.lrate = 0.1 self.loss_fn = loss_fn self.optim = optim.ASGD(self.get_parameters(), lr=lrate, weight_decay=1e-3)
def averaged_sgd(model, params): return optim.ASGD( model.parameters(), lr=params[Constants.LEARNING_RATE][Constants.VALUE], weight_decay=params[Constants.WEIGHT_DECAY][Constants.VALUE], alpha=params[Constants.ALPHA][Constants.VALUE], lambd=params[Constants.LAMBD][Constants.VALUE])
def _makeOptimizer(self): optimiserArgs = dict(self.optimiserArgs) optimiserArgs.update({'lr': self.lr}) if self.method == 'sgd': self.optimizer = optim.SGD(self.params, **optimiserArgs) elif self.method == 'asgd': self.optimizer = optim.ASGD(self.params, **optimiserArgs) elif self.method == 'adagrad': self.optimizer = optim.Adagrad(self.params, **optimiserArgs) elif self.method == 'adadelta': self.optimizer = optim.Adadelta(self.params, **optimiserArgs) elif self.method == 'adam': self.optimizer = optim.Adam(self.params, **optimiserArgs) elif self.method == 'adamw': self.optimizer = optim.AdamW(self.params, **optimiserArgs) elif self.method == 'adamax': self.optimizer = optim.Adamax(self.params, **optimiserArgs) elif self.method == 'rmsprop': self.optimizer = optim.RMSprop(self.params, **optimiserArgs) elif self.method == 'rprop': self.optimizer = optim.Rprop(self.params, **optimiserArgs) elif self.method == 'lbfgs': self.use_shrinkage = False self.optimizer = optim.LBFGS(self.params, **optimiserArgs) else: raise RuntimeError("Invalid optim method: " + self.method)
def set_optimizer(self, opt_type, args): """ - set optimizer specified inputs: opt_type: (str) optimizer type args: (dict) contains learning rate and other necessary info """ lr = float(args["lr"]) if opt_type == "SGD": self.optimizer = optim.SGD(self.model.parameters(), lr=lr, momentum=float(args["momentum"])) elif opt_type == "ASGD": self.optimizer = optim.ASGD(self.model.parameters(), lr=lr) elif opt_type == "ADAM": self.optimizer = optim.Adam(self.model.parameters(), lr=lr, weight_decay=args["weight_decay"], amsgrad=args["amsgrad"]) elif opt_type == "ADAGRAD": self.optimizer = optim.Adagrad(self.model.parameters(), lr=lr, lr_decay=args["lr_decay"], weight_decay=args["weight_decay"]) elif opt_type == "RMSPROP": self.optimizer = optim.RMSprop(self.model.parameters(), lr=lr)
def get_optimizer(optimizer, lr, params): if optimizer == 'adagrad': optimizer = torch.optim.Adagrad(params, lr=lr*5, lr_decay=0, weight_decay=0, initial_accumulator_value=0, eps=1e-10) elif optimizer == 'adadelta': optimizer = optim.Adadelta(params, lr=lr*100*5, rho=0.9, eps=1e-06, weight_decay=0) elif optimizer == 'adam': optimizer = optim.Adam(params, lr=lr/10*5, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False) elif optimizer == 'adaw': optimizer = optim.AdamW(params, lr=lr/10*5, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.01, amsgrad=False) elif optimizer == 'sparseadam': optimizer = optim.SparseAdam(params/10*5, lr=lr, betas=(0.9, 0.999), eps=1e-08) elif optimizer == 'ASGD': optimizer = optim.ASGD(params, lr=lr*5, lambd=0.0001, alpha=0.75, t0=1000000.0, weight_decay=0) elif optimizer == 'LBFGS': optimizer = optim.LBFGS(params, lr=lr*100*5) elif optimizer == 'RMSprop': optimizer = optim.RMSprop(params, lr=lr*5) elif optimizer == 'rprop': optimizer = optim.Rprop(params, lr=lr*5) elif optimizer == 'SGD': optimizer = optim.SGD(params, lr=lr*5, momentum=0, dampening=0, weight_decay=0, nesterov=False) elif optimizer == 'adamax': #standard: adamax optimizer = optim.Adamax(params, lr=lr) # best lr=0.01, standard is lr=0.002, mutiply every other by factor 5 as well else: raise Exception("Optimizer not supported. Please change it!") return optimizer
def train(epoch): global train_loss global train_correct global train_total global optimizer net.train() train_loss = 0 train_correct = 0 train_total = 0 optimizer = optim.ASGD(net.parameters(), lr=lrt, weight_decay=5e-4) print('Sqnet_1x_v1.0_ASGD Training Epoch: #%d, LR: %.4f' % (epoch, lrt)) for idx, (inputs, labels) in enumerate(train_loader): if is_use_cuda: inputs, labels = inputs.to(device), labels.to(device) optimizer.zero_grad() outputs = net(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() train_loss += loss.item() _, predict = torch.max(outputs, 1) train_total += labels.size(0) train_correct += predict.eq(labels).cpu().sum().double() sys.stdout.write('\r') sys.stdout.write( '[%s] Training Epoch [%d/%d] Iter[%d/%d]\t\tLoss: %.4f Tr_Acc: %.3f' % (time.strftime('%Y-%m-%d %H:%M:%S', time.localtime( time.time())), epoch, num_epochs, idx, len(train_dataset) // batch_size, train_loss / (batch_size * (idx + 1)), train_correct / train_total)) sys.stdout.flush()
def fit(self, lr=1e-3, nb_epoch = 20, iter_per_epoch=40, batch_size=100, optimizer="Adam"): self.fit_first_stage() if self.GPU: self.cuda() self.__build_dataloader__(batch_size=batch_size) loss = nn.CrossEntropyLoss() if optimizer=="ASGD": optimizer = optim.ASGD(self.parameters(), lr=lr, t0=200) elif optimizer=="Adam": optimizer = optim.Adam(self.parameters(), lr=lr) for i in range(nb_epoch): train_loss, train_accuracy = self.__train_epoch__(loss=loss, optimizer=optimizer, iter_per_epoch=iter_per_epoch) val_loss, val_accuracy = self.__val_epoch__(loss, iter_per_epoch=iter_per_epoch) print("Epoch num {}:\n \t train_loss: {},\n \t acc overall: {},\n \t acc class 0: {},\n \t acc class 1: {},\n \t acc class 2: {},\n \t acc class 3: {},\n \t acc class 4: {}, \n \n \t val_loss: {},\n \t acc overall: {},\n \t acc class 0: {},\n \t acc class 1: {},\n \t acc class 2: {},\n \t acc class 3: {},\n \t acc class 4: {}\n\n\n".format(i+1, np.round(train_loss, 8), np.round(train_accuracy[0], 8), np.round(train_accuracy[1], 8), np.round(train_accuracy[2], 8), np.round(train_accuracy[3], 8), np.round(train_accuracy[4], 8), np.round(train_accuracy[5], 8), np.round(val_loss, 8), np.round(val_accuracy[0], 8), np.round(val_accuracy[1], 8), np.round(val_accuracy[2], 8), np.round(val_accuracy[3], 8), np.round(val_accuracy[4], 8), np.round(val_accuracy[5], 8)))
def setup_optims(self): """ Optimisers used during training. By default, the learning rate is also decayed every time your validation performance stays stuck in a plateau. See the Yaml file for more details on the configuration """ lr = self.config.hp.initial_lr if self.config.hp.optim == 'adam': self.optim = optim.Adam(self.model.parameters(), lr=lr, weight_decay=self.config.hp.weight_decay, eps=self.config.hp.adam_eps) elif self.config.hp.optim == 'sgd': self.optim = optim.ASGD(self.model.parameters(), lr=lr, weight_decay=self.config.hp.weight_decay) self.optim_lr_decay = ReduceLROnPlateau( self.optim, factor=self.config.hp.decay_lr, verbose=True, patience=self.config.hp.lr_patience_decay, min_lr=self.config.hp.minimal_lr) self.initial_lr_decay = self.optim_lr_decay.state_dict() self.initial_optim = self.optim.state_dict()
def get_optimizer(name, model): "Just a switch" if name == 'adam': optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=lr_factor) elif name == 'adagrad': optimizer = optim.Adagrad(model.parameters(), lr=lr, weight_decay=lr_factor) elif name == 'adadelta': optimizer = optim.Adadelta(model.parameters(), lr=lr, weight_decay=lr_factor) elif name == 'rmsprop': optimizer = optim.RMSprop(model.parameters(), lr=lr, weight_decay=lr_factor) elif name == 'asgd': optimizer = optim.ASGD(model.parameters(), lr=lr, t0=0, lambd=0., weight_decay=lr_factor) else: print('Defaulting to vanilla SGD') optimizer = optim.SGD(model.parameters(), lr=lr) return optimizer
def prep_optim(self): if self.args.optim == "adam": self.optimizer = optim.Adam(self.model.parameters(), lr=self.args.lr) elif self.args.optim == "adabound": self.optimizer = adabound.AdaBound(self.model.parameters(), lr=self.args.lr) elif self.args.optim == "rmsprop": self.optimizer = optim.RMSprop(self.model.parameters(), lr=self.args.lr) elif self.args.optim == "sgd": self.optimizer = optim.SGD(self.model.parameters(), lr=self.args.lr) elif self.args.optim == "bfgs": self.optimizer = optim.LBFGS(self.model.parameters(), lr=self.args.lr) elif self.args.optim == "adamw": self.optimizer = optim.AdamW(self.model.parameters(), lr=self.args.lr) elif self.args.optim == "asgd": self.optimizer = optim.ASGD(self.model.parameters(), lr=self.args.lr) else: print("Invalid optimizer chosen") raise
def get_optimizer(self, args): """ Get the model default optimizer Args: sefl ([type]): [description] """ if args.optimizer == 'adam': optimizer = optim.Adam(self.parameters(), lr=args.lr, weight_decay=args.wdecay) elif args.optimizer == 'adagrad': optimizer = optim.Adagrad(self.parameters(), lr=args.lr, weight_decay=args.wdecay) elif args.optimizer == 'adadelta': optimizer = optim.Adadelta(self.parameters(), lr=args.lr, weight_decay=args.wdecay) elif args.optimizer == 'rmsprop': optimizer = optim.RMSprop(self.parameters(), lr=args.lr, weight_decay=args.wdecay) elif args.optimizer == 'asgd': optimizer = optim.ASGD(self.parameters(), lr=args.lr, t0=0, lambd=0., weight_decay=args.wdecay) else: print('Defaulting to vanilla SGD') optimizer = optim.SGD(self.parameters(), lr=args.lr) self.optimizer = optimizer return optimizer
def get_optim(config, model): if config.optimizer == 'Adam': optimizer = optim.Adam(model.parameters(), lr=float(config.lr)) if config.optimizer == 'RMSprop': optimizer = optim.RMSprop(model.parameters(), lr=float(config.lr)) if config.optimizer == 'Adagrad': optimizer = optim.Adagrad(model.parameters(), lr=float(config.lr)) if config.optimizer == 'SGD': optimizer = optim.SGD(model.parameters(), lr=float(config.lr)) if config.optimizer == 'Adadelta': optimizer = optim.Adadelta(model.parameters(), lr=float(config.lr)) if config.optimizer == 'AdamW': optimizer = optim.AdamW(model.parameters(), lr=float(config.lr)) if config.optimizer == 'SparseAdam': optimizer = optim.SparseAdam(model.parameters(), lr=float(config.lr)) if config.optimizer == 'Adamax': optimizer = optim.Adamax(model.parameters(), lr=float(config.lr)) if config.optimizer == 'ASGD': optimizer = optim.ASGD(model.parameters(), lr=float(config.lr)) if config.optimizer == 'LBFGS': optimizer = optim.LBFGS(model.parameters(), lr=float(config.lr)) if config.optimizer == 'Rprop': optimizer = optim.Rprop(model.parameters(), lr=float(config.lr)) print('\noptimizer :', optimizer, '\n') return optimizer
def train(x_val, y_val, niter, eta, k, lr=0.1): torch.manual_seed(0) n_examples, n_features = x_val.shape n_classes = len(np.unique(y_val)) model = build_model(n_features) optimizer = optim.ASGD(model.parameters(), lr=lr) loss = DRO_loss(eta, k) x = Variable(torch.FloatTensor(x_val), requires_grad=False) y = Variable(torch.FloatTensor(y_val.astype(float))[:, None], requires_grad=False) cost_list = [] for t in range(niter): # Reset gradient optimizer.zero_grad() # Forward fx = model.forward(x) output = loss.forward(fx, y) # Backward output.backward() cost_list.append(output.data[0]) # Update parameters optimizer.step() z = dump_model(model) scalar = np.sqrt(np.sum(z[0]**2.0)) for param in model.parameters(): param.data = param.data / float(scalar) return model, cost_list
def _set_optimizer(): if config_dict['optimization_params']['optimizer'] == 'adam': selected_optimizer = optim.Adam( etm_model.parameters(), lr=config_dict['optimization_params']['lr'], weight_decay=config_dict['optimization_params']['wdecay']) elif config_dict['optimization_params']['optimizer'] == 'adagrad': selected_optimizer = optim.Adagrad( etm_model.parameters(), lr=config_dict['optimization_params']['lr'], weight_decay=config_dict['optimization_params']['wdecay']) elif config_dict['optimization_params']['optimizer'] == 'adadelta': selected_optimizer = optim.Adadelta( etm_model.parameters(), lr=config_dict['optimization_params']['lr'], weight_decay=config_dict['optimization_params']['wdecay']) elif config_dict['optimization_params']['optimizer'] == 'rmsprop': selected_optimizer = optim.RMSprop( etm_model.parameters(), lr=config_dict['optimization_params']['lr'], weight_decay=config_dict['optimization_params']['wdecay']) elif config_dict['optimization_params']['optimizer'] == 'asgd': selected_optimizer = optim.ASGD( etm_model.parameters(), lr=config_dict['optimization_params']['lr'], t0=0, lambd=0., weight_decay=config_dict['optimization_params']['wdecay']) else: print('Defaulting to vanilla SGD') selected_optimizer = optim.SGD( etm_model.parameters(), lr=config_dict['optimization_params']['lr']) return selected_optimizer
def get_optimiser(name, net_params, optim_params): lr = optim_params['learning_rate'] momentum = optim_params['momentum'] weight_decay = optim_params['weight_decay'] if(name == "SGD"): return optim.SGD(net_params, lr, momentum = momentum, weight_decay = weight_decay) elif(name == "Adam"): return optim.Adam(net_params, lr, weight_decay = 1e-5) elif(name == "SparseAdam"): return optim.SparseAdam(net_params, lr) elif(name == "Adadelta"): return optim.Adadelta(net_params, lr, weight_decay = weight_decay) elif(name == "Adagrad"): return optim.Adagrad(net_params, lr, weight_decay = weight_decay) elif(name == "Adamax"): return optim.Adamax(net_params, lr, weight_decay = weight_decay) elif(name == "ASGD"): return optim.ASGD(net_params, lr, weight_decay = weight_decay) elif(name == "LBFGS"): return optim.LBFGS(net_params, lr) elif(name == "RMSprop"): return optim.RMSprop(net_params, lr, momentum = momentum, weight_decay = weight_decay) elif(name == "Rprop"): return optim.Rprop(net_params, lr) else: raise ValueError("unsupported optimizer {0:}".format(name))
def train(self): if self.optimizer == 'adam': optimizer = optim.Adam(self.skip_gram_model.parameters(), lr=self.initial_lr, **self.optimizer_kwargs) elif self.optimizer == 'sparse_adam': optimizer = optim.SparseAdam(self.skip_gram_model.parameters(), lr=self.initial_lr, **self.optimizer_kwargs) elif self.optimizer == 'sgd': optimizer = optim.SGD(self.skip_gram_model.parameters(), lr=self.initial_lr, **self.optimizer_kwargs) elif self.optimizer == 'asgd': optimizer = optim.ASGD(self.skip_gram_model.parameters(), lr=self.initial_lr, **self.optimizer_kwargs) elif self.optimizer == 'adagrad': optimizer = optim.Adagrad(self.skip_gram_model.parameters(), lr=self.initial_lr, **self.optimizer_kwargs) else: raise Exception('Unknown optimizer!') for iteration in range(self.iterations): print("\n\n\nIteration: " + str(iteration + 1)) if self.lr_schedule: scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, len(self.dataloader)) running_loss = 0.0 iprint = len(self.dataloader) // 20 for i, sample_batched in enumerate(tqdm(self.dataloader)): if len(sample_batched[0]) > 1: pos_u = sample_batched[0].to(self.device) pos_v = sample_batched[1].to(self.device) neg_v = sample_batched[2].to(self.device) optimizer.zero_grad() loss = self.skip_gram_model.forward(pos_u, pos_v, neg_v) loss.backward() optimizer.step() if self.lr_schedule: scheduler.step() running_loss = running_loss * ( 1 - 5 / iprint) + loss.item() * (5 / iprint) if i > 0 and i % iprint == 0: print(" Loss: " + str(running_loss) + ' lr: ' + str([ param_group['lr'] for param_group in optimizer.param_groups ])) print(" Loss: " + str(running_loss)) self.skip_gram_model.save_embedding(self.data.id2word, self.output_file_name)
def objective(self, trial): model = self.model() optimizer = trial.suggest_categorical('optimizer', ['Adam', 'AdamW', 'ASGD', 'SGD']) lr = trial.suggest_loguniform('lr', 1e-7, 1e-3) if optimizer == 'Adam': beta1 = trial.suggest_float('beta1', 0.7, 1) beta2 = trial.suggest_float('beta2', 0.7, 1) weight_decay = trial.suggest_float('weight_decay', 0, 1e-1) epsilon = trial.suggest_float('epsilon', 0, 1e-5) optimizer = optim.Adam(model.parameters(), lr=lr, betas=(beta1, beta2), eps=epsilon, weight_decay=weight_decay) elif optimizer == 'AdamW': beta1 = trial.suggest_float('beta1', 0.7, 1) beta2 = trial.suggest_float('beta2', 0.7, 1) epsilon = trial.suggest_float('epsilon', 0, 1e-5) weight_decay = trial.suggest_float('weight_decay', 0, 1e-1) optimizer = optim.AdamW(model.parameters(), lr=lr, betas=(beta1, beta2), eps=epsilon, weight_decay=weight_decay) elif optimizer == 'ASGD': lambd = trial.suggest_float('lambd', 0, 1e-6) alpha = trial.suggest_float('alpha', 0.5, 1) t0 = trial.suggest_float('t0', 0, 1e-4) weight_decay = trial.suggest_float('weight_decay', 1e-7, 1e-1) optimizer = optim.ASGD(model.parameters(), lr=lr, lambd=lambd, alpha=alpha, t0=t0, weight_decay=weight_decay) elif optimizer == 'SGD': momentum = trial.suggest_float('momentum', 0.7, 1) weight_decay = trial.suggest_float('weight_decay', 1e-7, 1e-1) dampening = trial.suggest_float('dampening', 0, 1e-1) optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum, dampening=dampening, weight_decay=weight_decay) accuracy = model.fit(self.train_loader, optimizer, self.epochs, device=self.device, valid_loader=self.vald_loader, verbose=0) del model return accuracy
def configure_optimizers(self): if self.hparams.use_asgd: return optim.ASGD(self.parameters(), lr=self.hparams.lr, lambd=0.0001, alpha=0.75, t0=1000000.0, weight_decay=0) return optim.Adam(self.parameters(), lr=self.hparams.lr)
def test_AveragedModel(d=10): torch.manual_seed(1) model = nn.Linear(d, 1, bias=False) optimizer = optim.ASGD(model.parameters(), lr=.05, lambd=0, alpha=0, t0=0, weight_decay=0) print(next(model.parameters())) xeval = torch.randn(100, d) yeval = xeval.mean(dim=1, keepdim=True) trajectory = defaultdict(list) for t in range(100): # evaluate with torch.no_grad(): trajectory['error'] += [ float(torch.mean((model(xeval) - yeval)**2)) ] trajectory['pdist'] += [ float( torch.norm((next(model.parameters()).data - 1 / d * torch.ones(d))**2)) ] with AveragedModel(model, optimizer): trajectory['aerror'] += [ float(torch.mean((model(xeval) - yeval)**2)) ] trajectory['apdist'] += [ float( torch.norm((next(model.parameters()).data - 1 / d * torch.ones(d))**2)) ] # train optimizer.zero_grad() x = torch.randn(1, d) y = torch.mean(x, dim=1, keepdim=True) loss = torch.mean((y - model(x))**2) loss.backward() optimizer.step() print(next(model.parameters()).data) import matplotlib.pyplot as plt # plt.scatter(error, aerror, c=np.arange(len(error))) plt.plot(trajectory['error'], alpha=.5, label='MSE') plt.plot(trajectory['aerror'], alpha=.5, label='average MSE') plt.plot(trajectory['pdist'], alpha=.5, label='pdist') plt.plot(trajectory['apdist'], alpha=.5, label='average pdist') plt.yscale('log') plt.legend() plt.grid() plt.show()
def configure_optimizer(self, options): initial_rate = options.learning_rate oo = options.optimizer_options if options.optimizer == "Adadelta": optimizer = optim.Adadelta(self.parameters(), lr=initial_rate, weight_decay=oo.weight_decay, rho=oo.rho, eps=oo.eps) elif options.optimizer == "Adagrad": optimizer = optim.Adagrad(self.parameters(), lr=initial_rate, weight_decay=oo.weight_decay, lr_decay=oo.lr_decay) elif options.optimizer == "Adam": optimizer = optim.Adam(self.parameters(), lr=initial_rate, weight_decay=oo.weight_decay, betas=oo.betas, eps=oo.eps) elif options.optimizer == "Adamax": optimizer = optim.Adamax(self.parameters(), lr=initial_rate, weight_decay=oo.weight_decay, betas=oo.betas, eps=oo.eps) elif options.optimizer == "ASGD": optimizer = optim.ASGD(self.parameters(), lr=initial_rate, weight_decay=oo.weight_decay, lambd=oo.lambd, alpha=oo.alpha, t0=oo.t0) elif options.optimizer == "RMSprop": optimizer = optim.RMSprop(self.parameters(), lr=initial_rate, weight_decay=oo.weight_decay, eps=oo.eps, alpha=oo.alpha, momentum=oo.momentum, centered=oo.centered) elif options.optimizer == "Rprop": optimizer = optim.Rprop(self.parameters(), lr=initial_rate, etas=oo.etas, step_sizes=oo.step_sizes) elif options.optimizer == "SGD": optimizer = optim.SGD(self.parameters(), lr=initial_rate, weight_decay=oo.weight_decay, momentum=oo.momentum, dampening=oo.dampening, nesterov=oo.nesterov) return optimizer
def make_optimizer(model): if config.PARAM['optimizer_name'] == 'Adam': optimizer = optim.Adam(model.parameters(), lr=config.PARAM['lr'], weight_decay=config.PARAM['weight_decay']) elif config.PARAM['optimizer_name'] == 'SGD': optimizer = optim.SGD(model.parameters(), lr=config.PARAM['lr'], momentum=0.9, weight_decay=config.PARAM['weight_decay']) elif config.PARAM['optimizer_name'] == 'ASGD': optimizer = optim.ASGD(model.parameters(), lr=config.PARAM['lr'], weight_decay=config.PARAM['weight_decay']) else: raise ValueError('Not valid optimizer name') return optimizer
def build_optimizers(self): self.optimizers = { 'sgd': optim.SGD(self.net.parameters(), lr=0.01, momentum=0), 'sgd_with_momentum': optim.SGD(self.net.parameters(), lr=0.01, momentum=0.9), 'adadelta': optim.Adadelta(self.net.parameters(), lr=1.0, rho=0.9, eps=1e-06, weight_decay=0), 'adagrad': optim.Adagrad(self.net.parameters(), lr=0.01, lr_decay=0, weight_decay=0, initial_accumulator_value=0), 'adam': optim.Adam(self.net.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False), 'adamax': optim.Adamax(self.net.parameters(), lr=0.002, betas=(0.9, 0.999), eps=1e-08, weight_decay=0), 'asgd': optim.ASGD(self.net.parameters(), lr=0.01, lambd=0.0001, alpha=0.75, t0=1000000.0, weight_decay=0), 'rmsprop': optim.RMSprop(self.net.parameters(), lr=0.01, alpha=0.99, eps=1e-08, weight_decay=0, momentum=0, centered=False), 'rprop': optim.Rprop(self.net.parameters(), lr=0.01, etas=(0.5, 1.2), step_sizes=(1e-06, 50)) } print('optimizers are built.\n')
def _optim(self): self.params = list(self.encoder.parameters()) + list( self.decoder.parameters()) if self.config.opt == 'adam': self.optimizer = optim.Adam(self.params, lr=self.config.lr) elif self.config.opt == 'adadelta': self.optimizer = optim.Adadelta(self.params, lr=self.config.lr) elif self.config.opt == 'asgd': self.optimizer = optim.ASGD(self.params, lr=self.config.lr) else: self.optimizer = optim.SGD(self.params, lr=self.config.lr)
def optimizer_creator(model, config): optim_name = config["optimizer"] if optim_name == "adam": return optim.Adam(model.parameters(), lr=config["lr"]) elif optim_name == "adamw": return optim.AdamW(model.parameters(), lr=config["lr"]) elif optim_name == "asgd": return optim.ASGD(model.parameters(), lr=config["lr"], t0=config["t0"]) elif optim_name == "sgd": return optim.SGD(model.parameters(), lr=config["lr"]) else: raise Exception(f"unsupported optimizer {optim_name}")
def get_optimizer(model, hparams): """ Parameters ---------- model_parameters : torch.nn.Module PyTorch model hparams : OptimizerHyperparams Hyperparameters specifying the optimizer """ try: if hparams.name is 'Adadelta': return optim.Adadelta(model.parameters(), **hparams.hparams) elif hparams.name is 'Adagrad': return optim.Adagrad(model.parameters(), **hparams.hparams) elif hparams.name is 'Adam': return optim.Adam(model.parameters(), **hparams.hparams) elif hparams.name is 'AdamW': return optim.AdamW(model.parameters(), **hparams.hparams) elif hparams.name is 'SparseAdam': return optim.SparseAdam(model.parameters(), **hparams.hparams) elif hparams.name is 'Adamax': return optim.Adamax(model.parameters(), **hparams.hparams) elif hparams.name is 'ASGD': return optim.ASGD(model.parameters(), **hparams.hparams) elif hparams.name is 'LBFGS': return optim.LBFGS(model.parameters(), **hparams.hparams) elif hparams.name is 'RMSprop': return optim.RMSprop(model.parameters(), **hparams.hparams) elif hparams.name is 'Rprop': return optim.Rprop(model.parameters(), **hparams.hparams) elif hparams.name is 'SGD': return optim.SGD(model.parameters(), **hparams.hparams) except TypeError as e: raise Exception(f'Invalid parameter in hparams: {hparams.hparams}' f' for optimizer {hparams.name}.\nSee PyTorch docs.')