Exemplo n.º 1
0
def test_update_function():
    target_value_net = recnn.nn.Critic(1290, 128, 256)
    target_policy_net = recnn.nn.Actor(1290, 128, 256)

    target_policy_net.eval()
    target_value_net.eval()

    # soft update
    recnn.utils.soft_update(value_net, target_value_net, soft_tau=1.0)
    recnn.utils.soft_update(policy_net, target_policy_net, soft_tau=1.0)

    # define optimizers
    value_optimizer = optim.RAdam(value_net.parameters(),
                                  lr=1e-5,
                                  weight_decay=1e-2)
    policy_optimizer = optim.RAdam(policy_net.parameters(),
                                   lr=1e-5,
                                   weight_decay=1e-2)

    nets = {
        "value_net": value_net,
        "target_value_net": target_value_net,
        "policy_net": policy_net,
        "target_policy_net": target_policy_net,
    }

    optimizer = {
        "policy_optimizer": policy_optimizer,
        "value_optimizer": value_optimizer,
    }

    debug = {}
    writer = recnn.utils.misc.DummyWriter()

    step = 0
    params = {
        "gamma": 0.99,
        "min_value": -10,
        "max_value": 10,
        "policy_step": 10,
        "soft_tau": 0.001,
    }

    loss = recnn.nn.update.ddpg_update(batch,
                                       params,
                                       nets,
                                       optimizer,
                                       torch.device("cpu"),
                                       debug,
                                       writer,
                                       step=step)

    check_loss_and_networks(loss, nets)
Exemplo n.º 2
0
def get_optimizer(net_conf, model):
    if net_conf["optimizer"] == "adam":
        Gopt = optim.Adam(model["G"].parameters(), lr=net_conf["lr"])
    elif net_conf["optimizer"] == "radam":
        Gopt = toptim.RAdam(model["G"].parameters(), lr=net_conf["lr"])
    elif net_conf["optimizer"] == "lamb":
        Gopt = Lamb(
            model["G"].parameters(),
            lr=net_conf["lr"],
            weight_decay=0.01,
            betas=(0.9, 0.999),
            adam=False,
        )
    optimizer = {"G": Gopt}

    if "D" in model:
        if net_conf["optimizer"] == "adam":
            Dopt = optim.Adam(model["D"].parameters(),
                              lr=net_conf["discriminator_lr"])
        elif net_conf["optimizer"] == "radam":
            Dopt = toptim.RAdam(model["D"].parameters(),
                                lr=net_conf["discriminator_lr"])
        elif net_conf["optimizer"] == "lamb":
            Dopt = Lamb(
                model["D"].parameters(),
                lr=net_conf["lr"],
                weight_decay=0.01,
                betas=(0.9, 0.999),
                adam=False,
            )
        optimizer.update({"D": Dopt})

    if "SPKRADV" in model:
        if net_conf["optimizer"] == "adam":
            SPKRADVopt = optim.Adam(model["SPKRADV"].parameters(),
                                    lr=net_conf["spkradv_lr"])
        elif net_conf["optimizer"] == "radam":
            SPKRADVopt = toptim.RAdam(model["SPKRADV"].parameters(),
                                      lr=net_conf["spkradv_lr"])
        elif net_conf["optimizer"] == "lamb":
            SPKRADVopt = Lamb(
                model["SPKRADV"].parameters(),
                lr=net_conf["spkradv_lr"],
                weight_decay=0.01,
                betas=(0.9, 0.999),
                adam=False,
            )
        optimizer.update({"SPKRADV": SPKRADVopt})

    return optimizer
Exemplo n.º 3
0
def main(args):

    train_cfg = config_from_json(args.train_cfg)
    model_cfg = config_from_json(args.model_cfg)
    model_cfg.block_size = model_cfg.max_len // model_cfg.n_blocks

    set_seeds(train_cfg.seed)

    print("Loading dataset")
    loader = PreTrainDataset(args.data_file, train_cfg, model_cfg)
    model = BertInnerPreTrain(model_cfg)

    if train_cfg.optimizer == "lamb":
        optimizer = torch_optimizer.Lamb(model.parameters(),
                                         lr=train_cfg.lr,
                                         weight_decay=train_cfg.weigth_decay)
    elif train_cfg.optimizer == "radam":
        optimizer = torch_optimizer.RAdam(model.parameters(),
                                          lr=train_cfg.lr,
                                          weight_decay=train_cfg.weigth_decay)
    else:
        optimizer = optim4GPU(train_cfg, model)

    trainer = Trainer(loader, model, optimizer, args.save_dir, get_device(),
                      train_cfg.parallel)

    if args.load_dir != "":
        print("Loading checkpoint")
        trainer.load_model(args.load_dir, args.load_dataset_state)

    trainer.train(train_cfg)
Exemplo n.º 4
0
def get_optimizer(hparams, models):
    eps = 1e-8
    parameters = get_parameters(models)
    if hparams.optimizer == 'sgd':
        optimizer = SGD(parameters,
                        lr=hparams.lr,
                        momentum=hparams.momentum,
                        weight_decay=hparams.weight_decay)
    elif hparams.optimizer == 'adam':
        optimizer = Adam(parameters,
                         lr=hparams.lr,
                         eps=eps,
                         weight_decay=hparams.weight_decay)
    elif hparams.optimizer == 'radam':
        optimizer = torch_optimizer.RAdam(parameters,
                                          lr=hparams.lr,
                                          eps=eps,
                                          weight_decay=hparams.weight_decay)
    elif hparams.optimizer == 'ranger':
        optimizer = torch_optimizer.Ranger(parameters,
                                           lr=hparams.lr,
                                           eps=eps,
                                           weight_decay=hparams.weight_decay)
    else:
        raise ValueError('optimizer not recognized!')

    return optimizer
Exemplo n.º 5
0
    def configure_optimizers(self):
        param_sets = [
            {'params': self.encoder.parameters()},
            {'params': self.decoder.parameters(), 'lr': self.lr * self.args.pcae.decoder.lr_coeff}
        ]
        if self.args.pcae.optimizer == 'sgd':
            opt = torch.optim.SGD(param_sets, lr=self.lr, weight_decay=self.weight_decay)
        elif self.args.pcae.optimizer == 'radam':
            opt = optim.RAdam(param_sets, lr=self.lr, weight_decay=self.weight_decay)
        else:
            raise NotImplementedError()

        if self.args.pcae.lr_scheduler == 'exp':
            scheduler_step = 'epoch'
            lr_sched = torch.optim.lr_scheduler.ExponentialLR(opt, gamma=self.lr_decay)
        elif self.args.pcae.lr_scheduler == 'cosrestarts':
            scheduler_step = 'step'
            lr_sched = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(opt, 469*8)  # TODO scale by batch num
        else:
            raise NotImplementedError

        return [opt], [{
            'scheduler': lr_sched,
            'interval': scheduler_step,
            'name': 'pcae'
        }]
Exemplo n.º 6
0
 def retrain(self,
             dataset,
             max_epoch=10000,
             stopping_criterion=1e-3,
             lr=1e-3):
     logger.info("[+ +] Re-traning starts...")
     loss_fun = nn.MSELoss()
     # optimizer = optim.SGD(self.model.parameters(), lr=lr, momentum=0.9)
     optimizer = optim.RAdam(self._model.parameters(),
                             lr=lr,
                             weight_decay=1e-4)
     X_train, Y_train = self._process_data(dataset)
     for epoch in range(max_epoch):
         #### Training ###
         self._model.train()
         optimizer.zero_grad()
         Y_prediction = self._model(X_train)
         obj_train = loss_fun(Y_prediction, Y_train)
         obj_train.backward()
         optimizer.step()
         if obj_train.item(
         ) < stopping_criterion or epoch % 100 == 0:  # Check stopping criterion
             logger.info("[+ +] Epoch: %5d   Train Obj: %.5e" %
                         (epoch + 1, obj_train.item()))
             if obj_train.item() < stopping_criterion:
                 logger.info("[+ +] Re-training finished!")
                 self._model.eval()
                 return
     raise Exception("Maximum epoch in the retraining is reached!")
Exemplo n.º 7
0
 def __init__(self):
     super(Beta, self).__init__()
     self.net = nn.Sequential(nn.Linear(1024, num_items), nn.Softmax())
     self.optim = optim.RAdam(self.net.parameters(),
                              lr=1e-5,
                              weight_decay=1e-5)
     self.criterion = nn.CrossEntropyLoss()
Exemplo n.º 8
0
def main(args):

    train_cfg = config_from_json(args.train_cfg)
    model_cfg = config_from_json(args.model_cfg)
    model_cfg.block_size = model_cfg.max_len // model_cfg.n_blocks
    set_seeds(train_cfg.seed)

    print("Loading dataset")
    loader = PreTrainDataset(args.data_file, train_cfg, model_cfg)

    model = BertInnerForMaskedLM(model_cfg)

    if train_cfg.optimizer == "lamb":
        if train_cfg.opt_level != "" and train_cfg.opt_level is not None:
            optimizer = apex.optimizers.FusedLAMB(
                model.parameters(), **train_cfg.optimizer_parameters)
        else:
            optimizer = torch_optimizer.Lamb(model.parameters(),
                                             **train_cfg.optimizer_parameters)

    elif train_cfg.optimizer == "radam":
        optimizer = torch_optimizer.RAdam(model.parameters(),
                                          **train_cfg.optimizer_parameters)
    else:
        optimizer = optim4GPU(train_cfg, model)

    trainer = Trainer(loader, model, optimizer, args.save_dir, get_device(),
                      train_cfg.parallel, train_cfg.opt_level)

    if args.load_model != "":
        print("Loading checkpoint")
        trainer.load_model(args.load_model, args.load_dataset_state)

    trainer.train(train_cfg)
Exemplo n.º 9
0
def select_optimizer(opt_name, lr, model, sched_name="cos"):
    if opt_name == "adam":
        opt = optim.Adam(model.parameters(), lr=lr, weight_decay=1e-6)
    elif opt_name == "radam":
        opt = torch_optimizer.RAdam(model.parameters(),
                                    lr=lr,
                                    weight_decay=0.00001)
    elif opt_name == "sgd":
        opt = optim.SGD(model.parameters(),
                        lr=lr,
                        momentum=0.9,
                        nesterov=True,
                        weight_decay=1e-4)
    else:
        raise NotImplementedError("Please select the opt_name [adam, sgd]")

    if sched_name == "cos":
        scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(opt,
                                                                   T_0=1,
                                                                   T_mult=2,
                                                                   eta_min=lr *
                                                                   0.01)
    elif sched_name == "anneal":
        scheduler = optim.lr_scheduler.ExponentialLR(opt,
                                                     1 / 1.1,
                                                     last_epoch=-1)
    elif sched_name == "multistep":
        scheduler = optim.lr_scheduler.MultiStepLR(opt,
                                                   milestones=[30, 60, 80, 90],
                                                   gamma=0.1)
    else:
        raise NotImplementedError(
            "Please select the sched_name [cos, anneal, multistep]")

    return opt, scheduler
Exemplo n.º 10
0
def get_optimizer(model, config):
    if config.use_adam:
        if config.use_transformer:
            no_decay = ['bias', 'LayerNorm.weight']
            optimizer_grouped_parameters = [{
                'params': [
                    p for n, p in model.named_parameters()
                    if not any(nd in n for nd in no_decay)
                ],
                'weight_decay':
                0.01
            }, {
                'params': [
                    p for n, p in model.named_parameters()
                    if any(nd in n for nd in no_decay)
                ],
                'weight_decay':
                0.0
            }]

            optimizer = optim.AdamW(
                optimizer_grouped_parameters,
                lr=config.lr,
            )
        else:  # case of rnn based seq2seq.
            optimizer = optim.Adam(model.parameters(), lr=config.lr)
    elif config.use_radam:
        assert not config.use_noam_decay, "You need to turn-off noam decay, when you use RAdam."

        optimizer = custom_optim.RAdam(model.parameters(), lr=config.lr)
    else:
        optimizer = optim.SGD(model.parameters(), lr=config.lr)

    return optimizer
Exemplo n.º 11
0
def get_optimizer(optimizer: str, model, optimizer_args):
    if optimizer == "sgd":
        return torch.optim.SGD(model.parameters(), **optimizer_args)
    elif optimizer == "adam":
        return torch.optim.Adam(model.parameters(), **optimizer_args)
    elif optimizer == "yogi":
        return optim.Yogi(model.parameters(), **optimizer_args)
    elif optimizer == "shampoo":
        return optim.Shampoo(model.parameters(), **optimizer_args)
    elif optimizer == "swats":
        return optim.SWATS(model.parameters(), **optimizer_args)
    elif optimizer == "sgdw":
        return optim.SGDW(model.parameters(), **optimizer_args)
    elif optimizer == "sgdp":
        return optim.SGDP(model.parameters(), **optimizer_args)
    elif optimizer == "rangerva":
        return optim.RangerVA(model.parameters(), **optimizer_args)
    elif optimizer == "rangerqh":
        return optim.RangerQH(model.parameters(), **optimizer_args)
    elif optimizer == "ranger":
        return optim.Ranger(model.parameters(), **optimizer_args)
    elif optimizer == "radam":
        return optim.RAdam(model.parameters(), **optimizer_args)
    elif optimizer == "qhm":
        return optim.QHM(model.parameters(), **optimizer_args)
    elif optimizer == "qhadam":
        return optim.QHAdam(model.parameters(), **optimizer_args)
    elif optimizer == "pid":
        return optim.PID(model.parameters(), **optimizer_args)
    elif optimizer == "novograd":
        return optim.NovoGrad(model.parameters(), **optimizer_args)
    elif optimizer == "lamb":
        return optim.Lamb(model.parameters(), **optimizer_args)
    elif optimizer == "diffgrad":
        return optim.DiffGrad(model.parameters(), **optimizer_args)
    elif optimizer == "apollo":
        return optim.Apollo(model.parameters(), **optimizer_args)
    elif optimizer == "aggmo":
        return optim.AggMo(model.parameters(), **optimizer_args)
    elif optimizer == "adamp":
        return optim.AdamP(model.parameters(), **optimizer_args)
    elif optimizer == "adafactor":
        return optim.Adafactor(model.parameters(), **optimizer_args)
    elif optimizer == "adamod":
        return optim.AdaMod(model.parameters(), **optimizer_args)
    elif optimizer == "adabound":
        return optim.AdaBound(model.parameters(), **optimizer_args)
    elif optimizer == "adabelief":
        return optim.AdaBelief(model.parameters(), **optimizer_args)
    elif optimizer == "accsgd":
        return optim.AccSGD(model.parameters(), **optimizer_args)
    elif optimizer == "a2graduni":
        return optim.A2GradUni(model.parameters(), **optimizer_args)
    elif optimizer == "a2gradinc":
        return optim.A2GradInc(model.parameters(), **optimizer_args)
    elif optimizer == "a2gradexp":
        return optim.A2GradExp(model.parameters(), **optimizer_args)
    else:
        raise Exception(f"Optimizer '{optimizer}' does not exist!")
Exemplo n.º 12
0
def radam(parameters, lr=1e-3, betas=(0.9, 0.999), eps=1e-3, weight_decay=0):
    if isinstance(betas, str):
        betas = eval(betas)
    return torch_optimizer.RAdam(parameters,
                                 lr=lr,
                                 betas=betas,
                                 eps=eps,
                                 weight_decay=weight_decay)
Exemplo n.º 13
0
 def return_optim(model, optim_type, lr):
     if optim_type == "adam":
         return optim.Adam(model.parameters(), lr=lr)
     elif optim_type == "radam":
         return toptim.RAdam(model.parameters(), lr=lr)
     elif optim_type == "lamb":
         return Lamb(model.parameters(), lr=lr)
     else:
         raise ValueError("Invalid optimizer type")
Exemplo n.º 14
0
def LookaheadRAdam(params,
                   lr=1e-3,
                   betas=(0.9, 0.999),
                   eps=1e-8,
                   weight_decay=0,
                   lalpha=0.5,
                   k=6):
    return Lookahead(
        torch_optimizer.RAdam(params, lr, betas, eps, weight_decay), lalpha, k)
Exemplo n.º 15
0
def main(args):

    train_cfg = config_from_json(args.train_cfg)
    model_cfg = config_from_json(args.model_cfg)
    model_cfg.block_size = model_cfg.max_len // model_cfg.n_blocks
    set_seeds(train_cfg.seed)

    if model_cfg.projection not in ["dense", "cnn"]:
        if args.max_len == 0:
            model_cfg.reduced_max_len = model_cfg.max_len
        else:
            model_cfg.reduced_max_len = args.max_len
        if args.reduce_block_size:
            assert model_cfg.reduced_max_len % model_cfg.n_blocks == 0, "Reduced len cannot be divided by n_blocks"
            model_cfg.block_size = model_cfg.reduced_max_len // model_cfg.n_blocks
        else:
            assert model_cfg.reduced_max_len % model_cfg.block_size == 0, "Reduced len cannot be divided by initial block_size"
            model_cfg.n_blocks = model_cfg.reduced_max_len // model_cfg.block_size
        print("max_len:", model_cfg.reduced_max_len, "block_size:", model_cfg.block_size, "n_blocks:", model_cfg.n_blocks)
    else:
        if args.max_len != 0:
            warnings.warn("Projection is incompatible with a reduced max len, using default max_len")

    
    print("Loading dataset")
    (data, labels), criterion = get_data_and_optimizer_from_dataset(args.data_file, train_cfg.task)

    loader = GlueDataset(data, labels, train_cfg, model_cfg)
    model = BertInnerForSequenceClassification(model_cfg, loader.get_n_labels(), criterion)

    if train_cfg.optimizer == "lamb":
        if train_cfg.opt_level != "" and train_cfg.opt_level is not None:
            optimizer = apex.optimizers.FusedLAMB(model.parameters(), **train_cfg.optimizer_parameters)
        else:
            optimizer = torch_optimizer.Lamb(model.parameters(), **train_cfg.optimizer_parameters)

    elif train_cfg.optimizer == "radam":
        optimizer = torch_optimizer.RAdam(model.parameters(), **train_cfg.optimizer_parameters)
    elif train_cfg.optimizer == "sgd":
        optimizer = optim.SGD(model.parameters(), **train_cfg.optimizer_parameters)
    else:
        optimizer = optim4GPU(train_cfg, model)

    trainer = GlueTrainer(loader, model, optimizer, args.save_dir, get_device(), train_cfg.parallel)

    if args.load_model != "":
        print("Loading checkpoint")
        trainer.load_model(args.load_model, args.load_dataset_state)

    if not args.eval:
        trainer.train(train_cfg)
    else:
        trainer.eval(train_cfg)
Exemplo n.º 16
0
def get_optimizer(net_conf, model):
    if net_conf["optimizer"] == "adam":
        optimizer = {
            "generator":
            optim.Adam(model["G"].parameters(), lr=net_conf["lr"]),
            "discriminator":
            optim.Adam(model["D"].parameters(),
                       lr=net_conf["discriminator_lr"]),
        }
    elif net_conf["optimizer"] == "radam":
        optimizer = {
            "generator":
            toptim.RAdam(model["G"].parameters(), lr=net_conf["lr"]),
            "discriminator":
            toptim.RAdam(model["D"].parameters(),
                         lr=net_conf["discriminator_lr"]),
        }
    elif net_conf["optimizer"] == "lamb":
        optimizer = {
            "generator":
            Lamb(
                model["G"].parameters(),
                lr=net_conf["lr"],
                weight_decay=0.01,
                betas=(0.9, 0.999),
                adam=False,
            ),
            "discriminator":
            Lamb(
                model["D"].parameters(),
                lr=net_conf["lr"],
                weight_decay=0.01,
                betas=(0.9, 0.999),
                adam=False,
            ),
        }
    else:
        raise ValueError("optimizer must be [adam, radam, lamb]")
    return optimizer
Exemplo n.º 17
0
def get_optimizer(model, config):
    if config.use_adam:
        if config.use_transformer:
            optimizer = optim.Adam(model.parameters(),
                                   lr=config.lr,
                                   betas=(.9, .98))
        else:  # case of rnn based seq2seq.
            optimizer = optim.Adam(model.parameters(), lr=config.lr)
    elif config.use_radam:
        optimizer = custom_optim.RAdam(model.parameters(), lr=config.lr)
    else:
        optimizer = optim.SGD(model.parameters(), lr=config.lr)

    return optimizer
        def set_model(self):
            self.asr_model = Transformer(self.id2ch, self.config['asr_model']).cuda()
            self.asr_opt = optim.RAdam(self.asr_model.parameters(), betas=(0.9, 0.98), eps=1e-9)
            # self.asr_opt = TransformerOptimizer(
                # torch.optim.Adam(self.asr_model.parameters(), betas=(0.9, 0.98), eps=1e-09),
                # optim.RAdam(self.asr_model.parameters())
                # self.config['asr_model']['optimizer_opt']['k'],
                # self.config['asr_model']['encoder']['d_model'],
                # self.config['asr_model']['optimizer_opt']['warmup_steps']
            # )
            self.label_smoothing = self.config['solver']['label_smoothing']
            self.sos_id = self.asr_model.sos_id
            self.eos_id = self.asr_model.eos_id

            super().load_model()
Exemplo n.º 19
0
def make_optimizer(config_dict: Dict[str, Any], model: nn.Module):
    cp: Dict[str, Any] = deepcopy(config_dict)
    n = cp.pop("name").lower()

    optimizer: Optimizer
    if n == "adam":
        optimizer = optim.Adam(model.parameters(), **cp)
    elif n == "radam":
        optimizer = torch_optimizer.RAdam(model.parameters(), **cp)
    elif n == "ranger":
        optimizer = torch_optimizer.Ranger(model.parameters(), **cp)
    elif n == "sgd":
        optimizer = optim.SGD(model.parameters(), **cp)
    else:
        raise ValueError(n)

    return optimizer
Exemplo n.º 20
0
    def configure_optimizers(self):
        optimizer = {
            "sgd":
            torch.optim.SGD(self.parameters(),
                            lr=self.learning_rate,
                            momentum=self.args.momentum),
            "adam":
            torch.optim.Adam(self.parameters(),
                             lr=self.learning_rate,
                             weight_decay=self.args.weight_decay),
            "adamw":
            torch.optim.AdamW(self.parameters(),
                              lr=self.learning_rate,
                              weight_decay=self.args.weight_decay),
            "radam":
            optim.RAdam(self.parameters(),
                        lr=self.learning_rate,
                        weight_decay=self.args.weight_decay),
            "fused_adam":
            apex.optimizers.FusedAdam(self.parameters(),
                                      lr=self.learning_rate,
                                      weight_decay=self.args.weight_decay),
        }[self.args.optimizer.lower()]

        scheduler = {
            "none":
            None,
            "multistep":
            torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                 self.args.steps,
                                                 gamma=self.args.factor),
            "cosine":
            torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,
                                                       self.args.max_epochs),
            "plateau":
            torch.optim.lr_scheduler.ReduceLROnPlateau(
                optimizer,
                factor=self.args.factor,
                patience=self.args.lr_patience),
        }[self.args.scheduler.lower()]

        opt_dict = {"optimizer": optimizer, "monitor": "val_loss"}
        if scheduler is not None:
            opt_dict.update({"lr_scheduler": scheduler})
        return opt_dict
def radam(parameters, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0):
    """
    The chosen optimizer - RAdam
    Info: https://pytorch-optimizer.readthedocs.io/en/latest/api.html#radam
    Paper: https://arxiv.org/abs/1908.03265
    Param:
        parameters: The chosen model paramters
        lr (int), betas (tuple), eps (int), weight_decay (int): learning paramters
    Return:
          RAdam optimizer instance with the given parameters
    """
    if isinstance(betas, str):
        betas = eval(betas)
    return optim.RAdam(parameters,
                       lr=lr,
                       betas=betas,
                       eps=eps,
                       weight_decay=weight_decay)
    def configure_optimizers(self):
        print(f"Initial Learning Rate: {self.hparams.learning_rate:.6f}")
        #         optimizer = optim.Adam(self.parameters(),
        #                                lr=self.hparams.learning_rate,
        #                                weight_decay=weight_decay)
        #         optimizer = torch.optim.SGD(self.parameters(),
        #                                     lr=self.hparams.learning_rate,
        #                                     momentum=0.9,
        #                                     dampening=0,
        #                                     weight_decay=weight_decay,
        #                                     nesterov=False)

        optimizer = torch_optimizer.RAdam(
            self.parameters(),
            lr=self.hparams.learning_rate,
            betas=(0.9, 0.999),
            eps=1e-8,
            weight_decay=weight_decay,
        )

        scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer,
                                                         T_max=T_max,
                                                         eta_min=0,
                                                         last_epoch=-1)

        #         scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(
        #             optimizer,
        #             T_0=T_0,
        #             T_mult=1,
        #             eta_min=0,
        #             last_epoch=-1)

        #         scheduler = optim.lr_scheduler.OneCycleLR(
        #             optimizer=optimizer,
        #             pct_start=0.1,
        #             div_factor=1e3,
        #             max_lr=1e-1,
        #             # max_lr=1e-2,
        #             epochs=epochs,
        #             steps_per_epoch=len(self.train_images) // batch_size)

        return [optimizer], [scheduler]
Exemplo n.º 23
0
def get_optimizer(hparams, optimizer_grouped_parameters):
    if hparams.optimizer_type == "ranger":
        optimizer = torch_optimizer.Ranger(
            optimizer_grouped_parameters,
            lr=hparams.learning_rate,
            k=hparams.ranger_k,
            eps=hparams.adam_epsilon,
        )
    elif hparams.optimizer_type == "qhadam":
        optimizer = torch_optimizer.QHAdam(
            optimizer_grouped_parameters,
            lr=hparams.learning_rate,
            nus=(0.1, 1.0),
            betas=(0.9, 0.999),
            eps=hparams.adam_epsilon,
        )
    elif hparams.optimizer_type == "radam":
        optimizer = torch_optimizer.RAdam(
            optimizer_grouped_parameters,
            lr=hparams.learning_rate,
            betas=(0.9, 0.999),
            eps=hparams.adam_epsilon,
        )
    elif hparams.optimizer_type == "adabound":
        optimizer = torch_optimizer.AdaBound(
            optimizer_grouped_parameters,
            lr=hparams.learning_rate,
            betas=(0.9, 0.999),
            final_lr=0.1,
            gamma=1e-3,
            eps=hparams.adam_epsilon,
            amsbound=False,
        )
    else:
        optimizer = torch.optim.AdamW(
            optimizer_grouped_parameters,
            lr=hparams.learning_rate,
            eps=hparams.adam_epsilon,
        )

    return optimizer
def select_optimizer(optimizer, net, learning_rate):
    global adam_beta1, adam_beta2, weight_decay, rmsprop_alpha, momentum, rmsprop_centered, \
        adam_amsgrad, nesterov, dampening

    if optimizer == 'adamax':
        opt = optim.Adamax(filter(lambda p: p.requires_grad, net.parameters()),
                           lr=learning_rate,
                           betas=(adam_beta1, adam_beta2),
                           weight_decay=weight_decay)
    elif optimizer == 'rmsprop':
        opt = optim.RMSprop(filter(lambda p: p.requires_grad,
                                   net.parameters()),
                            lr=learning_rate,
                            alpha=rmsprop_alpha,
                            weight_decay=weight_decay,
                            momentum=momentum,
                            centered=rmsprop_centered)
    elif optimizer == 'adam':
        opt = optim.Adam(filter(lambda p: p.requires_grad, net.parameters()),
                         lr=learning_rate,
                         betas=(adam_beta1, adam_beta2),
                         weight_decay=weight_decay,
                         amsgrad=adam_amsgrad,
                         eps=eps)
    elif optimizer == 'radam':
        opt = new_optim.RAdam(filter(lambda p: p.requires_grad,
                                     net.parameters()),
                              lr=learning_rate,
                              betas=(adam_beta1, adam_beta2),
                              eps=eps,
                              weight_decay=weight_decay)
    else:  # sgd
        opt = optim.SGD(filter(lambda p: p.requires_grad, net.parameters()),
                        lr=learning_rate,
                        momentum=momentum,
                        dampening=dampening,
                        weight_decay=weight_decay,
                        nesterov=nesterov)
    return opt
Exemplo n.º 25
0
def create_optimizer(optimizer_config: Dict[str, Any], model: nn.Module):
    cp: Dict[str, Any] = copy(optimizer_config)
    n = cp.pop("name").lower()

    if n == "adam":
        optimizer: Optimizer = optim.Adam(model.parameters(), **cp)
    elif n == "sgd":
        optimizer = optim.SGD(model.parameters(), **cp)
    elif n == "adabound":
        optimizer = torch_optimizer.AdaBound(model.parameters(), **cp)
    elif n == "diffgrad":
        optimizer = torch_optimizer.DiffGrad(model.parameters(), **cp)
    elif n == "qhadam":
        optimizer = torch_optimizer.QHAdam(model.parameters(), **cp)
    elif n == "radam":
        optimizer = torch_optimizer.RAdam(model.parameters(), **cp)
    elif n == "yogi":
        optimizer = torch_optimizer.Yogi(model.parameters(), **cp)
    else:
        raise ValueError(n)

    return optimizer
    def optimizer_chosen(self, model_param):
        try:
            optimizer_dict = {
                'sgd':
                optim.SGD(params=model_param,
                          lr=self.config.LEARNING_RATE,
                          momentum=self.config.LEARNING_MOMENTUM,
                          nesterov=True),
                'adam':
                optim.Adam(params=model_param, lr=self.config.LEARNING_RATE),
                'adadelta':
                optim.Adadelta(params=model_param,
                               lr=self.config.LEARNING_RATE),
                'adagrad':
                optim.Adagrad(params=model_param,
                              lr=self.config.LEARNING_RATE),
                'adamax':
                optim.Adamax(params=model_param, lr=self.config.LEARNING_RATE),
                'adamw':
                optim.AdamW(params=model_param, lr=self.config.LEARNING_RATE),
                'asgd':
                optim.ASGD(params=model_param, lr=self.config.LEARNING_RATE),
                'rmsprop':
                optim.RMSprop(params=model_param,
                              lr=self.config.LEARNING_RATE),
                'radam':
                torch_optimizer.RAdam(params=model_param,
                                      lr=self.config.LEARNING_RATE),
                'ranger':
                torch_optimizer.Ranger(params=model_param,
                                       lr=self.config.LEARNING_RATE)
            }[self.config.OPTIMIZER.lower()]

            return optimizer_dict

        except Exception as e:
            message = f"Invalid optimizers {e}"
            raise Exception(message)
Exemplo n.º 27
0
    def __create_optimizer(self, model):
        opt_parameters = []
        named_parameters = list(model.named_parameters())

        no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
        set_2 = ["layer.4", "layer.5", "layer.6", "layer.7"]
        set_3 = ["layer.8", "layer.9", "layer.10", "layer.11"]
        init_lr = self.config.lr

        for i, (name, params) in enumerate(named_parameters):

            weight_decay = 0.0 if any(p in name for p in no_decay) else 0.01

            if name.startswith("roberta.embeddings") or name.startswith(
                    "roberta.encoder"):
                lr = init_lr
                lr = init_lr * 1.75 if any(p in name for p in set_2) else lr
                lr = init_lr * 3.5 if any(p in name for p in set_3) else lr

                opt_parameters.append({
                    "params": params,
                    "weight_decay": weight_decay,
                    "lr": lr
                })

            if name.startswith("classifier"):
                lr = init_lr * 3.6

                opt_parameters.append({
                    "params": params,
                    "weight_decay": weight_decay,
                    "lr": lr
                })

        if self.config.optimizer_type != OptimizerType.RADAM:
            return AdamW(opt_parameters, lr=init_lr)

        return torch_optimizer.RAdam(opt_parameters, lr=init_lr)
Exemplo n.º 28
0
    def optimizer_chosen(self, model_param):
        try:
            optimizer_dict = {
                'sgd':
                optim.SGD(params=model_param,
                          lr=self.config.LEARNING_RATE,
                          momentum=0.9,
                          nesterov=True),
                'adam':
                optim.Adam(params=model_param, lr=self.config.LEARNING_RATE),
                'adadelta':
                optim.Adadelta(params=model_param,
                               lr=self.config.LEARNING_RATE),
                'adagrad':
                optim.Adagrad(params=model_param,
                              lr=self.config.LEARNING_RATE),
                'adamax':
                optim.Adamax(params=model_param, lr=self.config.LEARNING_RATE),
                'adamw':
                optim.AdamW(params=model_param, lr=self.config.LEARNING_RATE),
                'asgd':
                optim.ASGD(params=model_param, lr=self.config.LEARNING_RATE),
                'rmsprop':
                optim.RMSprop(params=model_param,
                              lr=self.config.LEARNING_RATE),
                'radam':
                torch_optimizer.RAdam(params=model_param,
                                      lr=self.config.LEARNING_RATE),
                'ranger':
                torch_optimizer.Ranger(params=model_param,
                                       lr=self.config.LEARNING_RATE)
            }[self.config.OPTIMIZER.lower()]

            return optimizer_dict
        except KeyError:
            print("Invalid optimizers")
Exemplo n.º 29
0
def main(opt):
    train_data, valid_data = get_train_valid_split_data_names(opt.img_folder, opt.ano_folder, valid_size=1/8)

    # データの読み込み
    print("load data")
    train_dataset = Phase1Dataset(train_data, load_size=(640, 640), augment=True, limit=opt.limit)
    print("train data length : %d" % (len(train_dataset)))
    valid_dataset = Phase1Dataset(valid_data, load_size=(640, 640), augment=False, limit=opt.limit)
    print("valid data length : %d" % (len(valid_dataset)))
    # DataLoaderの作成
    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=opt.batch_size,
        shuffle=True,
        num_workers=opt.num_workers,
        pin_memory=True,
        drop_last=True
    )

    valid_loader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=1,
        shuffle=False,
        num_workers=opt.num_workers,
        pin_memory=True,
        drop_last=True
    )

    # GPUの設定(PyTorchでは明示的に指定する必要がある)
    device = torch.device('cuda' if opt.gpus > 0 else 'cpu')

    # モデルの作成
    heads = {'hm': 1}
    model = get_pose_net(18, heads, 256).to(device)
    if opt.load_model != '':
        model, optimizer, start_epoch = load_model(
            model, opt.load_model, optimizer)

    # 最適化手法を定義
    if opt.optimizer == "SGD":
        optimizer = torch.optim.SGD(model.parameters(), lr=opt.lr)#, momentum=m, dampening=d, weight_decay=w, nesterov=n)
    elif opt.optimizer == "Adam":
        optimizer = torch.optim.Adam(model.parameters(), opt.lr)
    elif opt.optimizer == "RAdam":
        optimizer = optim.RAdam(model.parameters(), lr=opt.lr)
    
    # 損失関数を定義
    criterion = HMLoss()
    # 学習率のスケジューリングを定義
    scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=1, eta_min=0.00001)

    start_epoch = 0
    best_validation_loss = 1e10
    # 保存用フォルダの作成
    os.makedirs(os.path.join(opt.save_dir, opt.task, 'visualized'), exist_ok=True)

    # 学習 TODO エポック終了時点ごとにテスト用データで評価とモデル保存
    for epoch in range(start_epoch + 1, opt.num_epochs + 1):
        print("learning rate : %f" % scheduler.get_last_lr()[0])
        train(train_loader, model, optimizer, criterion, device, opt.num_epochs, epoch)
        if opt.optimizer == "SGD":
            scheduler.step()

        # 最新モデルの保存
        save_model(os.path.join(opt.save_dir, opt.task, 'model_last.pth'),
                   epoch, model, optimizer, scheduler)

        # テスト用データで評価
        validation_loss, accumulate_datas = valid(valid_loader, model, criterion, device)
        # ベストスコア更新でモデルの保存
        if validation_loss < best_validation_loss:
            best_validation_loss = validation_loss
            save_model(os.path.join(opt.save_dir, opt.task, 'model_best.pth'),
                       epoch, model, optimizer, scheduler)
            print("saved best model")
            visualization(os.path.join(opt.save_dir, opt.task, 'visualized'),
                        accumulate_datas)
Exemplo n.º 30
0
x, x_noisy, y = datasets['train'][0]
args.input_size = x.size()

dataloaders = OrderedDict({
    'train': DataLoader(datasets['train'], shuffle=True, batch_size=args.batch_size),
    'test': DataLoader(datasets['test'], shuffle=False, batch_size=args.batch_size)
})
model = Model(args).to(args.device)


# https://pypi.org/project/torch-optimizer/#radam
if args.optimizer == 'radam':
    optimizer = optim.RAdam(
        model.parameters(),
        lr=args.learning_rate,
        betas=(0.9, 0.999),
        eps=1e-8,
        weight_decay=0,
    )

def dict_list_append(dict, key, value):
    if key not in dict:
        dict[key] = []
    dict[key].append(value)

metrics_best = {
    'best_test_loss': float('Inf'),
    'best_test_loss_dir': -1
}

count_batches = 0