Exemplo n.º 1
0
 def __init__(self,
              max_lr: Union[float, List[float]],
              total_steps: Optional[int] = None,
              epochs: Optional[int] = None,
              steps_per_epoch: Optional[int] = None,
              pct_start: float = 0.3,
              anneal_strategy: str = 'cos',
              cycle_momentum: bool = True,
              base_momentum: Union[float, List[float]] = 0.85,
              max_momentum: Union[float, List[float]] = 0.95,
              div_factor: float = 25.,
              final_div_factor: float = 1e4,
              last_epoch: int = -1):
     from distutils.version import LooseVersion
     if LooseVersion(torch.__version__) >= LooseVersion("1.3.0"):
         super().__init__(lambda opt: _scheduler.OneCycleLR(
             opt,
             max_lr,
             total_steps=total_steps,
             epochs=epochs,
             steps_per_epoch=steps_per_epoch,
             pct_start=pct_start,
             anneal_strategy=anneal_strategy,
             cycle_momentum=cycle_momentum,
             base_momentum=base_momentum,
             max_momentum=max_momentum,
             div_factor=div_factor,
             final_div_factor=final_div_factor,
             last_epoch=last_epoch),
                          step_on_iteration=True)
     else:
         raise ImportError("Update torch>=1.3.0 to use 'OneCycleLR'")
Exemplo n.º 2
0
 def __init__(
     self,
     optimizer: Optimizer,
     max_lr: Union[float, List[float]],
     total_steps: Optional[int] = None,
     epochs: Optional[int] = None,
     steps_per_epoch: Optional[int] = None,
     pct_start: float = 0.3,
     anneal_strategy: str = "cos",
     cycle_momentum: bool = True,
     base_momentum: float = 0.85,
     max_momentum: float = 0.95,
     div_factor: float = 25.0,
     final_div_factor: float = 10000.0,
     last_epoch: int = -1,
     step_duration: int = 1,
 ):
     scheduler = lr_scheduler.OneCycleLR(
         optimizer,
         max_lr,
         total_steps,
         epochs,
         steps_per_epoch,
         pct_start,
         anneal_strategy,
         cycle_momentum,
         base_momentum,
         max_momentum,
         div_factor,
         final_div_factor,
         last_epoch,
     )
     super().__init__(scheduler, step_duration)
Exemplo n.º 3
0
    def _train_model(self) -> nn.Module:
        """Тренировка модели."""
        phenotype = self._phenotype

        loader = data_loader.DescribedDataLoader(
            self._tickers, self._end, phenotype["data"], data_params.TrainParams
        )

        model = self._make_untrained_model(loader)
        optimizer = optim.AdamW(model.parameters(), **phenotype["optimizer"])

        steps_per_epoch = len(loader)
        scheduler_params = dict(phenotype["scheduler"])
        epochs = scheduler_params.pop("epochs")
        total_steps = 1 + int(steps_per_epoch * epochs)
        scheduler_params["total_steps"] = total_steps
        scheduler = lr_scheduler.OneCycleLR(optimizer, **scheduler_params)

        print(f"Epochs - {epochs:.2f}")
        print(f"Train size - {len(loader.dataset)}")

        len_deque = int(total_steps ** 0.5)
        llh_sum = 0.0
        llh_deque = collections.deque([0], maxlen=len_deque)
        weight_sum = 0.0
        weight_deque = collections.deque([0], maxlen=len_deque)
        loss_fn = normal_llh

        loader = itertools.repeat(loader)
        loader = itertools.chain.from_iterable(loader)
        loader = itertools.islice(loader, total_steps)

        model.train()
        bar = tqdm.tqdm(loader, file=sys.stdout, total=total_steps, desc="~~> Train")
        for batch in bar:
            optimizer.zero_grad()
            output = model(batch)

            loss, weight = loss_fn(output, batch)

            llh_sum += -loss.item() - llh_deque[0]
            llh_deque.append(-loss.item())

            weight_sum += weight - weight_deque[0]
            weight_deque.append(weight)

            loss.backward()
            optimizer.step()
            scheduler.step()

            llh = llh_sum / weight_sum
            bar.set_postfix_str(f"{llh:.5f}")

            # Такое условие позволяет отсеять NaN
            if not (llh > LOW_LLH):
                raise GradientsError(llh)

        self._validate(model)

        return model
Exemplo n.º 4
0
def get_optimization(cfg, model):
    if cfg.optimization.optimizer == 'adam':
        optimizer = torch.optim.Adam(model.parameters(),
                                     **cfg.optimization.adam_param)
        cfg.optimization.onecycle_scheduler.max_lr = cfg.optimization.adam_param.lr
    elif cfg.optimization.optimizer == 'adamw':
        optimizer = torch.optim.AdamW(model.parameters(),
                                      **cfg.optimization.adam_param)
        cfg.optimization.onecycle_scheduler.max_lr = cfg.optimization.adam_param.lr
    elif cfg.optimization.optimizer == 'sgd':
        optimizer = torch.optim.SGD(model.parameters(),
                                    **cfg.optimization.sgd_param)
        cfg.optimization.onecycle_scheduler.max_lr = cfg.optimization.sgd_param.lr

    if cfg.optimization.scheduler == 'exp':
        scheduler = lr_scheduler.ExponentialLR(
            optimizer, **cfg.optimization.exp_scheduler)
    elif cfg.optimization.scheduler == 'step':
        scheduler = lr_scheduler.MultiStepLR(optimizer,
                                             **cfg.optimization.step_scheduler)
    elif cfg.optimization.scheduler == 'onecycle':
        scheduler = lr_scheduler.OneCycleLR(
            optimizer, **cfg.optimization.onecycle_scheduler)
    elif cfg.optimization.scheduler == 'cosine':
        scheduler = lr_scheduler.CosineAnnealingLR(
            optimizer, **cfg.optimization.cosine_scheduler)
    return optimizer, scheduler
Exemplo n.º 5
0
 def __init__(
     self,
     max_lr: Union[float, List[float]],
     total_steps: Optional[int] = None,
     epochs: Optional[int] = None,
     steps_per_epoch: Optional[int] = None,
     pct_start: float = 0.3,
     anneal_strategy: str = "cos",
     cycle_momentum: bool = True,
     base_momentum: Union[float, List[float]] = 0.85,
     max_momentum: Union[float, List[float]] = 0.95,
     div_factor: float = 25.0,
     final_div_factor: float = 1e4,
     last_epoch: int = -1,
 ):
     """Constructor for OneCycleLR."""
     super().__init__(
         lambda opt: _schedulers.OneCycleLR(
             opt,
             max_lr,
             total_steps=total_steps,
             epochs=epochs,
             steps_per_epoch=steps_per_epoch,
             pct_start=pct_start,
             anneal_strategy=anneal_strategy,
             cycle_momentum=cycle_momentum,
             base_momentum=base_momentum,
             max_momentum=max_momentum,
             div_factor=div_factor,
             final_div_factor=final_div_factor,
             last_epoch=last_epoch,
         ),
         step_on_batch=True,
     )
Exemplo n.º 6
0
def init_scheduler(args, optimizer):
    lr_init, lr_final = args.lr_init, args.lr_final
    lr_decay = min(args.lr_decay, args.num_epoch)
        
    minibatch_per_epoch = ceil(args.num_train / args.batch_size)
    if args.lr_minibatch:
        lr_decay = lr_decay*minibatch_per_epoch

    lr_ratio = lr_final/lr_init

    lr_bounds = lambda lr, lr_min: min(1, max(lr_min, lr))

    if args.sgd_restart > 0:
        restart_epochs = [(2**k-1) for k in range(1, ceil(log2(args.num_epoch))+1)]
        lr_hold = restart_epochs[0]
        if args.lr_minibatch:
            lr_hold *= minibatch_per_epoch
        logger.info('SGD Restart epochs: {}'.format(restart_epochs))
    else:
        restart_epochs = []
        lr_hold = args.num_epoch
        if args.lr_minibatch:
            lr_hold *= minibatch_per_epoch

    if args.lr_decay_type.startswith('cos'):
        scheduler = sched.CosineAnnealingLR(optimizer, lr_hold, eta_min=lr_final)
    elif args.lr_decay_type.startswith('exp'):
        lr_lambda = lambda epoch: lr_bounds(exp(epoch / lr_decay * log(lr_ratio)), lr_ratio)
        scheduler = sched.LambdaLR(optimizer, lr_lambda)
    elif args.lr_decay_type.startswith('one'):
        lr_lambda = sched.OneCycleLR(optimizer, 10 * lr_init, epochs=num_epoch, steps_per_epoch=100)
    else:
        raise ValueError('Incorrect choice for lr_decay_type!')

    return scheduler, restart_epochs
Exemplo n.º 7
0
 def build(optimizer, cfg, **kwargs):
     return lr_scheduler.OneCycleLR(
         optimizer,
         cfg.SOLVER.LR_SCHEDULER.MAX_LR,
         total_steps=cfg.SOLVER.LR_SCHEDULER.MAX_ITER,
         pct_start=cfg.SOLVER.LR_SCHEDULER.PCT_START,
         base_momentum=cfg.SOLVER.LR_SCHEDULER.BASE_MOM,
         max_momentum=cfg.SOLVER.LR_SCHEDULER.MAX_MOM,
         div_factor=cfg.SOLVER.LR_SCHEDULER.DIV_FACTOR)
Exemplo n.º 8
0
    def _init_optimizers_schedulers(self, max_lr, epochs, div_factor=1.5):
        """
        Function for creating dictionary of optimizers for different branches and common part.
        Optimizer have differential learning rate determined by the div_factor.
        The OneCycleLR schedulers are also defined at the end.
        """

        len_dynamic_layers = len(self.half_dynamic)
        len_half_second_layers = len(self.half_second)

        # Optimizer for the common part of the model
        self.opt_static = optim.AdamW([
                            {'params': self.half_second[:len_half_second_layers//2].parameters(), 'lr': max_lr / div_factor**2},
                            {'params': self.half_second[len_half_second_layers//2:].parameters(), 'lr': max_lr / div_factor},
                            {'params': self.fc1.parameters(), 'lr': max_lr / div_factor},
                            {'params': self.bn1.parameters(), 'lr': max_lr},
                            {'params': self.fc2.parameters(), 'lr': max_lr},
                       ], lr=max_lr)
        
        list_lrs = [
                    max_lr / div_factor**2,
                    max_lr / div_factor,
                    max_lr / div_factor,
                    max_lr,
                    max_lr
                  ]
        # Scheduler for the common part of the model
        self.sched_static = lr_scheduler.OneCycleLR(self.opt_static, max_lr=list_lrs, epochs=epochs, steps_per_epoch=len(self.data["train"]), div_factor=9)

        # Creating dictionary of optimizers and schedulers for the different branches
        for domain in self.list_domains:
            self.dict_opt_dynamic[domain] = optim.AdamW([{'params': self.dynamic_extractors[domain][:len_dynamic_layers//2].parameters(), 'lr': max_lr / div_factor**4},
                                                          {'params': self.dynamic_extractors[domain][len_dynamic_layers//2:].parameters(), 'lr': max_lr / div_factor**3}], 
                                                         lr=div_factor**3)
            list_lrs = [
                    max_lr / div_factor**4,
                    max_lr / div_factor**3
                ]
            self.dict_sched_dynamic[domain] = lr_scheduler.OneCycleLR(self.dict_opt_dynamic[domain], 
                                                                       max_lr=list_lrs, 
                                                                       epochs=epochs, 
                                                                       steps_per_epoch=len(self.data["train"]), 
                                                                       div_factor=9)
Exemplo n.º 9
0
def create_lr_scheduler(
        conf_lrs: Config, epochs: int, optimizer: Optimizer,
        steps_per_epoch: Optional[int]) -> Tuple[Optional[_LRScheduler], bool]:

    # epoch_or_step - apply every epoch or every step
    scheduler, epoch_or_step = None, True

    if conf_lrs is not None:
        lr_scheduler_type = conf_lrs['type']  # TODO: default should be none?
        if lr_scheduler_type == 'cosine':
            # adjust max epochs for warmup
            # TODO: shouldn't we be increasing epochs or schedule lr only after warmup?
            if conf_lrs.get('warmup', None):
                epochs -= conf_lrs['warmup']['epochs']
            scheduler = lr_scheduler.CosineAnnealingLR(
                optimizer, T_max=epochs, eta_min=conf_lrs['min_lr'])
        elif lr_scheduler_type == 'resnet':
            scheduler = _adjust_learning_rate_resnet(optimizer, epochs)
        elif lr_scheduler_type == 'pyramid':
            scheduler = _adjust_learning_rate_pyramid(optimizer, epochs,
                                                      get_optim_lr(optimizer))
        elif lr_scheduler_type == 'step':
            decay_period = conf_lrs['decay_period']
            gamma = conf_lrs['gamma']
            scheduler = lr_scheduler.StepLR(optimizer,
                                            decay_period,
                                            gamma=gamma)
        elif lr_scheduler_type == 'one_cycle':
            assert steps_per_epoch is not None
            ensure_pytorch_ver('1.3.0',
                               'LR scheduler OneCycleLR is not available.')
            max_lr = conf_lrs['max_lr']
            epoch_or_step = False
            scheduler = lr_scheduler.OneCycleLR(
                optimizer,
                max_lr=max_lr,
                epochs=epochs,
                steps_per_epoch=steps_per_epoch,
            )  # TODO: other params
        elif not lr_scheduler_type:
            scheduler = None  # TODO: check support for this or use StepLR
        else:
            raise ValueError('invalid lr_schduler=%s' % lr_scheduler_type)

        # select warmup for LR schedule
        if conf_lrs.get('warmup', None):
            scheduler = GradualWarmupScheduler(
                optimizer,
                multiplier=conf_lrs['warmup']['multiplier'],
                total_epoch=conf_lrs['warmup']['epochs'],
                after_scheduler=scheduler)

    return scheduler, epoch_or_step
Exemplo n.º 10
0
def run_model(model, train_loader, test_loader, epochs, device, learning_rate,
              **regularization):

    # model = Net().to(device)

    criterion = nn.CrossEntropyLoss()
    # optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

    l2_factor = regularization['l2_factor']
    l1_factor = regularization['l1_factor']

    optimizer = optim.SGD(model.parameters(),
                          lr=learning_rate,
                          momentum=0.9,
                          weight_decay=l2_factor)
    # scheduler = StepLR(optimizer, step_size=5, gamma=0.15)
    # scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min')
    # ( factor=0.1, patience=10, threshold=0.0001, threshold_mode='rel', cooldown=0, min_lr=0, eps=1e-08, verbose=False)

    scheduler = lr_scheduler.OneCycleLR(optimizer,
                                        learning_rate,
                                        epochs=24,
                                        steps_per_epoch=64,
                                        pct_start=0.2)

    ## TRACKERS
    train_losses = []
    train_acc = []
    train_trackers = {'train_acc': train_acc, 'train_losses': train_losses}

    test_acc = []
    test_losses = []
    test_trackers = {'test_acc': test_acc, 'test_losses': test_losses}

    incorrect_samples = []

    ## Model RUN!
    for epoch in range(1, epochs + 1):
        print(f'\nEpoch {epoch}:')
        train(model,
              train_loader,
              criterion,
              optimizer,
              device,
              l1_factor=l1_factor,
              **train_trackers)
        scheduler.step()
        test(model, test_loader, criterion, device, incorrect_samples,
             **test_trackers)
        # scheduler.step(test_trackers['test_losses'][-1])

    return model, train_trackers, test_trackers, incorrect_samples
Exemplo n.º 11
0
def get_scheduler(optimizer, opt):
    print('opt.lr_policy = [{}]'.format(opt.lr_policy))
    if opt.lr_policy == 'lambda':
        def lambda_rule(epoch):
            lr_l = 1.0 - max(0, epoch + 1 + opt.epoch_count - opt.niter) / float(opt.niter_decay + 1)
            return lr_l
        scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_rule)
    elif opt.lr_policy == 'step':
        scheduler = lr_scheduler.StepLR(optimizer, step_size=opt.lr_decay_iters, gamma=0.5)
    elif opt.lr_policy == 'step2':
        scheduler = lr_scheduler.StepLR(optimizer, step_size=opt.lr_decay_iters, gamma=0.1)
    elif opt.lr_policy == 'onecyclelr':
        # TODO: Need to set automatically!
        scheduler = lr_scheduler.OneCycleLR(optimizer=optimizer, max_lr=1e-4, steps_per_epoch=192, epochs=opt.n_epochs)
    elif opt.lr_policy == 'plateau':
        print('schedular=plateau')
        scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, threshold=0.01, patience=5)
    elif opt.lr_policy == 'plateau2':
        scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.2, threshold=0.01, patience=5)
    elif opt.lr_policy == 'step_warmstart':
        def lambda_rule(epoch):
            #print(epoch)
            if epoch < 5:
                lr_l = 0.1
            elif 5 <= epoch < 100:
                lr_l = 1
            elif 100 <= epoch < 200:
                lr_l = 0.1
            elif 200 <= epoch:
                lr_l = 0.01
            return lr_l
        scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_rule)
    elif opt.lr_policy == 'step_warmstart2':
        def lambda_rule(epoch):
            #print(epoch)
            if epoch < 5:
                lr_l = 0.1
            elif 5 <= epoch < 50:
                lr_l = 1
            elif 50 <= epoch < 100:
                lr_l = 0.1
            elif 100 <= epoch:
                lr_l = 0.01
            return lr_l
        scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_rule)
    else:

        return NotImplementedError('learning rate policy [%s] is not implemented', opt.lr_policy)
    return scheduler
def one_cycle_lr(optimizer,
                 last_epoch,
                 max_lr,
                 pct_start,
                 epochs,
                 steps_per_epoch,
                 anneal_strategy='cos',
                 **_):
    return lr_scheduler.OneCycleLR(optimizer,
                                   max_lr=max_lr,
                                   epochs=epochs,
                                   steps_per_epoch=steps_per_epoch,
                                   pct_start=pct_start,
                                   anneal_strategy=anneal_strategy,
                                   last_epoch=last_epoch)
Exemplo n.º 13
0
 def configure_optimizers(self):
     optimizer = optim.AdamW(self.parameters(),
                             lr=self.lr,
                             weight_decay=0.01)
     scheduler = lr_scheduler.OneCycleLR(optimizer,
                                         max_lr=1e-5,
                                         epochs=self.max_epochs,
                                         steps_per_epoch=1338)
     return {
         'optimizer': optimizer,
         'interval': 'step',
         'lr_scheduler': {
             'scheduler': scheduler,
             'interval': 'step'
         }
     }
Exemplo n.º 14
0
    def choose_scheduler(self, optimizer):
        if optimizer is None:
            return None

        from torch.optim import lr_scheduler
        if self.hparams['lr_scheduler'] == 'ExpLR':
            scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=0.97)
        elif self.hparams['lr_scheduler'] == 'CosLR':
            scheduler = lr_scheduler.CosineAnnealingLR(
                optimizer, T_max=20 * self.steps_per_epoch + 1, eta_min=0)
            scheduler = {'scheduler': scheduler, 'interval': 'step'}
        elif self.hparams['lr_scheduler'] == 'StepLR':
            scheduler = lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.1)
        elif self.hparams['lr_scheduler'] == 'OneCycLR':
            # + 1 to avoid over flow in steps() when there's totally 800 steps specified and 801 steps called
            # there will be such errors.
            scheduler = lr_scheduler.OneCycleLR(
                optimizer,
                max_lr=self.hparams["max_lr"],
                steps_per_epoch=self.steps_per_epoch + 1,
                epochs=self.hparams["num_epochs"])
            scheduler = {'scheduler': scheduler, 'interval': 'step'}
        elif self.hparams['lr_scheduler'] == 'MultiStepLR':
            scheduler = lr_scheduler.MultiStepLR(optimizer,
                                                 milestones=[70, 140, 190],
                                                 gamma=0.1)
        elif self.hparams['lr_scheduler'] == 'MultiStepLR_CRD':
            scheduler = lr_scheduler.MultiStepLR(optimizer,
                                                 milestones=[150, 180, 210],
                                                 gamma=0.1)
        elif self.hparams['lr_scheduler'] == 'MultiStepLR_NN':
            scheduler = lr_scheduler.MultiStepLR(optimizer,
                                                 milestones=[100, 140, 150],
                                                 gamma=0.1)
        elif self.hparams['lr_scheduler'] == 'MultiStepLR_NN_50':
            scheduler = lr_scheduler.MultiStepLR(optimizer,
                                                 milestones=[25, 40],
                                                 gamma=0.1)
        elif self.hparams['lr_scheduler'] == 'MultiStepLR_NN_70_Adam':
            scheduler = lr_scheduler.MultiStepLR(optimizer,
                                                 milestones=[50, 65],
                                                 gamma=0.1)
        else:
            return None
        return scheduler
Exemplo n.º 15
0
def train_21k(model, train_loader, val_loader, optimizer, args):
    # set loss
    loss_fn = CrossEntropyLS(args.label_smooth)

    # set scheduler
    scheduler = lr_scheduler.OneCycleLR(optimizer,
                                        max_lr=args.lr,
                                        steps_per_epoch=len(train_loader),
                                        epochs=args.epochs,
                                        pct_start=0.1,
                                        cycle_momentum=False,
                                        div_factor=20)

    # set scalaer
    scaler = GradScaler()

    # training loop
    for epoch in range(args.epochs):
        if num_distrib() > 1:
            train_loader.sampler.set_epoch(epoch)

        # train epoch
        print_at_master("\nEpoch {}".format(epoch))
        epoch_start_time = time.time()
        for i, (input, target) in enumerate(train_loader):
            with autocast():  # mixed precision
                output = model(input)
                loss = loss_fn(output, target)  # note - loss also in fp16
            model.zero_grad()
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            scheduler.step()

        epoch_time = time.time() - epoch_start_time
        print_at_master(
            "\nFinished Epoch, Training Rate: {:.1f} [img/sec]".format(
                len(train_loader) * args.batch_size / epoch_time *
                max(num_distrib(), 1)))

        # validation epoch
        validate_21k(val_loader, model)
Exemplo n.º 16
0
    def configure_optimizers(self):
        """
        This is required as part of pytorch-lightning
        :return:
        """
        optimizer_type = self.hparams["optimizer_type"]
        if optimizer_type == "SGD":
            optimizer = optim.SGD(
                self.parameters(),
                lr=self.hparams["lr"],
                weight_decay=self.hparams["weight_decay"],
            )
        if optimizer_type == "ADAM":
            optimizer = optim.Adam(
                self.parameters(),
                lr=self.hparams["lr"],
                weight_decay=self.hparams["weight_decay"],
            )

        if self.hparams["scheduler_type"] == None:
            return [optimizer]
        else:
            if self.hparams["scheduler_type"] == "plateu":
                scheduler = lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           mode="min",
                                                           patience=5)
            elif self.hparams["scheduler_type"] == "one_cycle":
                scheduler = lr_scheduler.OneCycleLR(
                    self.optimizer,
                    max_lr=self.hparams["lr"],
                    epochs=self.hparams["max_epochs"],
                    steps_per_epoch=self.hparams["steps_per_epoch"],
                )
            else:
                raise ValueError("Unspecified scheduler type: {}".format(
                    self.hparams["scheduler_type"]))

            return [optimizer], [scheduler]
Exemplo n.º 17
0
def get_scheduler(optimizer, lr_policy, args):
    if lr_policy == 'step':
        scheduler = lr_scheduler.StepLR(optimizer, gamma=args['gamma'])  # 0.1
    elif lr_policy == 'plateau':
        scheduler = lr_scheduler.ReduceLROnPlateau(optimizer,
                                                   mode=args['mode'],
                                                   factor=args['factor'],
                                                   threshold=args['threshold'],
                                                   patience=args['patience'])
        # optimizer, mode='min', factor=0.2, threshold=0.01, patience=5)
    elif lr_policy == 'cosine':
        scheduler = lr_scheduler.CosineAnnealingLR(optimizer,
                                                   T_max=args['T_max'])  # 200
    elif lr_policy == 'one_cylce':
        scheduler = lr_scheduler.OneCycleLR(optimizer,
                                            max_lr=args['max_lr'],
                                            steps_per_epoch=len(
                                                args['data_loader']),
                                            epochs=args['epochs'])
    else:
        return NotImplementedError(
            'learning rate policy [%s] is not implemented', lr_policy)
    return scheduler
def lr_scheduler_factory(optimizer, hparams, data_loader):
    if hparams.sched == "plateau":
        return lr_scheduler.ReduceLROnPlateau(
            optimizer,
            mode="max",
            patience=2,
            threshold=0.01,
            factor=0.1,
            verbose=True,
        )
    if hparams.sched == "onecycle":
        return lr_scheduler.OneCycleLR(
            optimizer=optimizer,
            max_lr=hparams.lr,
            cycle_momentum=True,
            pct_start=0.25,
            div_factor=25.0,
            final_div_factor=100000.0,
            steps_per_epoch=len(data_loader),
            epochs=hparams.epochs,
        )
    else:
        raise ValueError("Learning rate scheduler not supported yet.")
Exemplo n.º 19
0
def get_optimizer(policy, args):
    if args.optimizer == "adam":
        optimizer = optim.Adam(policy.parameters(), lr=args.lr)
    elif args.optimizer == "sgd":
        optimizer = optim.SGD(policy.parameters(), lr=args.lr)
    elif args.optimizer == "rmsprop":
        optimizer = optim.RMSprop(policy.parameters(), lr=args.lr)
    scheduler = args.opt_schedule
    if scheduler == "cyclic":
        scheduler = lr_scheduler.OneCycleLR(
            optimizer=optimizer,
            max_lr=args.div_factor * args.lr,
            total_steps=args.num_episodes_train)
    elif scheduler == "cyclic_multi":
        scheduler = lr_scheduler.CyclicLR(optimizer=optimizer,
                                          base_lr=args.lr,
                                          max_lr=args.div_factor * args.lr)
    elif scheduler == "WR":
        T_0 = max(1, int(args.num_episodes_train / 1000))
        scheduler = lr_scheduler.CosineAnnealingWarmRestarts(
            optimizer=optimizer, T_0=T_0)

    return optimizer, scheduler
    def fit(self, dataloader, lr, epochs, weight_decay=0, print_steps=200):
        self.model.train()

        loss_fn = nn.CrossEntropyLoss()
        optimizer = optim.SGD(self.model.parameters(),
                              lr,
                              momentum=0.9,
                              weight_decay=weight_decay,
                              nesterov=False)
        scheduler = lr_scheduler.OneCycleLR(optimizer,
                                            lr,
                                            epochs=epochs,
                                            steps_per_epoch=len(dataloader))

        history_loss = []
        history_steps = []
        for epoch in range(epochs):
            for step, (imgs, labels) in enumerate(dataloader):
                if torch.cuda.is_available():
                    imgs, labels = imgs.to('cuda'), labels.to('cuda')

                outputs = self.model(imgs)
                loss = loss_fn(outputs, labels)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                scheduler.step()

                if step % print_steps == print_steps - 1:
                    history_loss.append(loss.item())
                    history_steps.append(epoch * len(dataloader) + step + 1)
                    print(
                        f"epoch: {epoch + 1}    \tstep: {step + 1}    \tloss: {loss:.4f}"
                    )

        return history_steps, history_loss
Exemplo n.º 21
0
def main():
    args = parser.parse_args()
    args.batch_size = args.batch_size

    # setup model
    print('creating model...')
    #state = torch.load(args.model_path, map_location='cpu')
    #args.num_classes = state['num_classes']
    args.do_bottleneck_head = True
    model = create_model(args).cuda()

    ema = EMA(model, 0.999)
    ema.register()

    #model.load_state_dict(state['model'], strict=True)
    #model.train()
    classes_list = np.array(list(idx_to_class.values()))
    print('done\n')

    # Data loading code
    normalize = transforms.Normalize(mean=[0, 0, 0], std=[1, 1, 1])

    instances_path_val = os.path.join(args.data,
                                      'annotations/instances_val2017.json')
    #instances_path_train = os.path.join(args.data, 'annotations/instances_val2017.json')#temprarily use val as train
    instances_path_train = os.path.join(
        args.data, 'annotations/instances_train2017.json')

    data_path_val = os.path.join(args.data, 'val2017')
    #data_path_train = os.path.join(args.data, 'val2017')#temporarily use val as train
    data_path_train = os.path.join(args.data, 'train2017')

    val_dataset = CocoDetection(
        data_path_val, instances_path_val,
        transforms.Compose([
            transforms.Resize((args.image_size, args.image_size)),
            transforms.ToTensor(),
            normalize,
        ]))
    train_dataset = CocoDetection(
        data_path_train, instances_path_train,
        transforms.Compose([
            transforms.Resize((args.image_size, args.image_size)),
            transforms.ToTensor(),
            normalize,
        ]))

    print("len(val_dataset)): ", len(val_dataset))
    print("len(train_dataset)): ", len(train_dataset))

    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=False)

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=False)

    criterion = AsymmetricLoss()
    params = model.parameters()
    optimizer = torch.optim.Adam(params, lr=0.0002,
                                 weight_decay=0.0001)  #尝试新的optimizer
    total_step = len(train_loader)
    scheduler = lr_scheduler.OneCycleLR(optimizer,
                                        max_lr=0.0002,
                                        total_steps=total_step,
                                        epochs=25)
    #total_step = len(train_loader)

    highest_mAP = 0
    trainInfoList = []
    Sig = torch.nn.Sigmoid()

    #f=open('info_train.txt', 'a')

    for epoch in range(5):
        for i, (inputData, target) in enumerate(train_loader):
            f = open('info_train.txt', 'a')
            #model.train()
            inputData = inputData.cuda()
            target = target.cuda()
            target = target.max(dim=1)[0]
            #Sig = torch.nn.Sigmoid()
            output = Sig(model(inputData))
            #output[output<args.thre] = 0
            #output[output>=args.thre]=1
            #print(output.shape) #(batchsize, channel, imhsize, imgsize)
            #print(inputData.shape) #(batchsize, numclasses)
            #print(output[0])
            #print(target[0])

            loss = criterion(output, target)
            model.zero_grad()
            loss.backward()
            optimizer.step()
            ema.update()
            #store information
            if i % 10 == 0:
                trainInfoList.append([epoch, i, loss.item()])
                print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(
                    epoch, 5, i, total_step, loss.item()))

                f.write('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}\n'.format(
                    epoch, 5, i, total_step, loss.item()))

            if (i + 1) % 400 == 0:
                #储存相应迭代模型
                torch.save(
                    model.state_dict(),
                    os.path.join('models/',
                                 'model-{}-{}.ckpt'.format(epoch + 1, i + 1)))
                #modelName = 'models/' + 'decoder-{}-{}.ckpt'.format(epoch+1, i+1)
                mAP_score = validate_multi(val_loader, model, args, ema)
                #model.train()
                if mAP_score > highest_mAP:
                    highest_mAP = mAP_score
                    print('current highest_mAP = ', highest_mAP)
                    f.write('current highest_mAP = {}\n'.format(highest_mAP))

                    torch.save(model.state_dict(),
                               os.path.join('models/', 'model-highest.ckpt'))
            f.close()
            scheduler.step()  #修改学习率
Exemplo n.º 22
0
def onecycle(optimizer, n_examples, cfg):
    lr = cfg.learning_rate
    n_steps = cfg.n_steps(n_examples)
    return lr_scheduler.OneCycleLR(optimizer, lr, total_steps=n_steps)
Exemplo n.º 23
0
Arquivo: train.py Projeto: inkyusa/ASL
def train_multi_label_coco(model, train_loader, val_loader, lr):
    ema = ModelEma(model, 0.9997)  # 0.9997^641=0.82

    # set optimizer
    Epochs = 80
    Stop_epoch = 40
    weight_decay = 1e-4
    criterion = AsymmetricLoss(gamma_neg=4, gamma_pos=0, clip=0.05, disable_torch_grad_focal_loss=True)
    parameters = add_weight_decay(model, weight_decay)
    optimizer = torch.optim.Adam(params=parameters, lr=lr, weight_decay=0)  # true wd, filter_bias_and_bn
    steps_per_epoch = len(train_loader)
    scheduler = lr_scheduler.OneCycleLR(optimizer, max_lr=lr, steps_per_epoch=steps_per_epoch, epochs=Epochs,
                                        pct_start=0.2)

    highest_mAP = 0
    trainInfoList = []
    scaler = GradScaler()
    for epoch in range(Epochs):
        if epoch > Stop_epoch:
            break
        for i, (inputData, target) in enumerate(train_loader):
            inputData = inputData.cuda()
            target = target.cuda()  # (batch,3,num_classes)
            target = target.max(dim=1)[0]
            with autocast():  # mixed precision
                output = model(inputData).float()  # sigmoid will be done in loss !
            loss = criterion(output, target)
            model.zero_grad()

            scaler.scale(loss).backward()
            # loss.backward()

            scaler.step(optimizer)
            scaler.update()
            # optimizer.step()

            scheduler.step()

            ema.update(model)
            # store information
            if i % 100 == 0:
                trainInfoList.append([epoch, i, loss.item()])
                print('Epoch [{}/{}], Step [{}/{}], LR {:.1e}, Loss: {:.1f}'
                      .format(epoch, Epochs, str(i).zfill(3), str(steps_per_epoch).zfill(3),
                              scheduler.get_last_lr()[0], \
                              loss.item()))

        try:
            torch.save(model.state_dict(), os.path.join(
                'models/', 'model-{}-{}.ckpt'.format(epoch + 1, i + 1)))
        except:
            pass

        model.eval()
        mAP_score = validate_multi(val_loader, model, ema)
        model.train()
        if mAP_score > highest_mAP:
            highest_mAP = mAP_score
            try:
                torch.save(model.state_dict(), os.path.join(
                    'models/', 'model-highest.ckpt'))
            except:
                pass
        print('current_mAP = {:.2f}, highest_mAP = {:.2f}\n'.format(mAP_score, highest_mAP))
Exemplo n.º 24
0
def main():
    args = parse_arguments()

    with open('./config/train.yml') as f:
        conf = yaml.load(f, Loader=yaml.FullLoader)

    epochs = args.epochs

    if not os.path.exists(conf['train']['saved_model']) and args.saved:
        raise FileNotFoundError('No such saved model {}'.format(
            conf['train']['saved_model']))

    if not os.path.exists(conf['train']['saved_model']):
        os.makedirs(conf['train']['saved_model'])

    #model = Model(conf['model'])

    # init model
    #model = resnet152(num_classes=conf['train']['num_classes'])
    model = resnet50(num_classes=conf['train']['num_classes'])
    model.to(device)
    ema = ModelEma(model, 0.9997)

    # get parameter index with grad
    filtered_parameters = []
    params_num = []
    for p in filter(lambda p: p.requires_grad, model.parameters()):
        filtered_parameters.append(p)
        params_num.append(np.prod(p.size()))
    print('Trainable params num : ', sum(params_num))

    # training parameter assign
    learning_rate = conf['train']['learning_rate']
    batch_size = conf['train']['batch_size']
    epochs = conf['train']['epochs']

    # prepare datasets
    dataset = BirdCallDataset(conf['data_folder'])
    dataloader = DataLoader(dataset,
                            batch_size=conf['train']['batch_size'],
                            shuffle=True)

    # init utility classes
    loss_avg = Averager()
    criterion = AsymmetricLoss(gamma_neg=4,
                               gamma_pos=0,
                               clip=0.05,
                               disable_torch_grad_focal_loss=True)
    scaler = GradScaler()
    optimizer = optim.Adam(filtered_parameters,
                           lr=learning_rate,
                           betas=(0.9, 0.999))
    scheduler = lr_scheduler.OneCycleLR(optimizer,
                                        max_lr=learning_rate,
                                        steps_per_epoch=len(dataset),
                                        epochs=epochs,
                                        pct_start=0.2)

    best_loss = 100
    cur_time = time.time()
    # Run Training Session
    print(len(dataset))
    with tqdm(range(epochs), unit="epoch") as tepoch:
        for epoch in tepoch:
            model.train()
            for batch, data in enumerate(dataloader):
                tepoch.set_description(f" Epoch {epoch+1}/{batch} ")

                wav, bird = data

                wav = wav.to(torch.float32)
                wav = wav.to(device)
                bird_smooth = np.where(bird == 1, 0.995, 0.0025)
                bird_smooth = torch.from_numpy(bird_smooth).to(device)

                with autocast():  # mixed precision
                    output = model(
                        wav).float()  # sigmoid will be done in loss !

                loss = criterion(output, bird_smooth)
                loss_avg.add(loss)
                model.zero_grad()

                scaler.scale(loss).backward()
                # loss.backward()

                scaler.step(optimizer)
                scaler.update()
                # optimizer.step()

                scheduler.step()
                ema.update(model)

                pred_score = torch.where(
                    F.softmax(output, dim=1) > conf['train']['threshold'], 1,
                    0)
                t1 = pred_score.cpu().detach().numpy()[0]
                t2 = bird.cpu().detach().numpy()[0]

                torch.nn.utils.clip_grad_norm_(
                    model.parameters(),
                    5)  # gradient clipping with 5 (Default)
                #https://kh-kim.gitbook.io/natural-language-processing-with-pytorch/00-cover-6/05-gradient-clipping
                tepoch.set_postfix(loss=loss_avg.val().item(),
                                   f1_score=f1_score(t1, t2))

                del wav, bird, loss, output, t1, t2, pred_score

            if loss_avg.val().item() < best_loss:
                best_loss = loss_avg.val().item()
                torch.save(
                    model.state_dict(),
                    os.path.join(conf['train']['save_folder'],
                                 f'{args.file_name}.pth'))
            # validation section, ToDo.
            '''
Exemplo n.º 25
0
def get_scheduler(optimizer, opt, **kwargs):
    print('opt.lr_policy = [{}]'.format(opt.lr_policy))
    if opt.lr_policy == 'lambda':

        def lambda_rule(epoch):
            lr_l = 1.0 - max(0, epoch + 1 + opt.epoch_count -
                             opt.niter) / float(opt.niter_decay + 1)
            return lr_l

        scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_rule)
    elif opt.lr_policy == 'step':
        scheduler = lr_scheduler.StepLR(optimizer,
                                        step_size=opt.lr_decay_iters,
                                        gamma=0.5)
    elif opt.lr_policy == 'step2':
        scheduler = lr_scheduler.StepLR(optimizer,
                                        step_size=opt.lr_decay_iters,
                                        gamma=0.1)
    elif opt.lr_policy == 'plateau':
        print('schedular=plateau')
        scheduler = lr_scheduler.ReduceLROnPlateau(optimizer,
                                                   mode='min',
                                                   factor=0.1,
                                                   threshold=0.01,
                                                   patience=5)
    elif opt.lr_policy == 'plateau2':
        scheduler = lr_scheduler.ReduceLROnPlateau(optimizer,
                                                   mode='min',
                                                   factor=0.2,
                                                   threshold=0.01,
                                                   patience=5)
    elif opt.lr_policy == 'step_warmstart':

        def lambda_rule(epoch):
            #print(epoch)
            if epoch < 5:
                lr_l = 0.1
            elif 5 <= epoch < 100:
                lr_l = 1
            elif 100 <= epoch < 200:
                lr_l = 0.1
            elif 200 <= epoch:
                lr_l = 0.01
            return lr_l

        scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_rule)
    elif opt.lr_policy == 'step_warmstart2':

        def lambda_rule(epoch):
            #print(epoch)
            if epoch < 5:
                lr_l = 0.1
            elif 5 <= epoch < 50:
                lr_l = 1
            elif 50 <= epoch < 100:
                lr_l = 0.1
            elif 100 <= epoch:
                lr_l = 0.01
            return lr_l

        scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_rule)
    elif opt.lr_policy == 'one_cycle':
        print("Using one-cycle scheduler")

        scheduler = lr_scheduler.OneCycleLR(
            optimizer,
            max_lr=kwargs['max_lr'],
            steps_per_epoch=kwargs['len_train'],
            epochs=opt.n_epochs,
            cycle_momentum=True,
            div_factor=kwargs['division_factor'])

        print(f"Scheduler: last epoch: {kwargs['last_epoch']}")
        scheduler.last_epoch = kwargs[
            'last_epoch'] if kwargs['last_epoch'] > 0 else -1

    else:
        return NotImplementedError(
            'learning rate policy [%s] is not implemented', opt.lr_policy)
    return scheduler
Exemplo n.º 26
0
            # sanity check
            for param in filter(lambda p: p.requires_grad, modelVars['model'].parameters()):
                print(param.name,param.shape)
        else:
            modelVars['optimizer'] = optim.AdamW([
                                                {'params': filter(lambda p: not p.is_cnn_param, modelVars['model'].parameters()), 'lr': params['learning_rate_meta']},
                                                {'params': filter(lambda p: p.is_cnn_param, modelVars['model'].parameters()), 'lr': params['learning_rate']}
                                                ], lr=params['learning_rate'])
    else:
        modelVars['optimizer'] = optim.AdamW(modelVars['model'].parameters(), lr=params['learning_rate'])

    # Decay LR by a factor of 0.1 every 7 epochs
#     modelVars['scheduler'] = lr_scheduler.StepLR(modelVars['optimizer'], step_size=params['lowerLRAfter'], gamma=1/np.float32(params['LRstep']))
    
    modelVars['scheduler'] = lr_scheduler.OneCycleLR(modelVars['optimizer'], max_lr=params['learning_rate'],
                                                     epochs=params['training_steps'],
                                                     steps_per_epoch=len(dataset_train)//params['batchSize'])


    # Define softmax
    modelVars['softmax'] = nn.Softmax(dim=1)

    # Set up training
    # loading from checkpoint
    if load_old:
        # Find last, not last best checkpoint
        files = glob(params['saveDir']+'/*')
        global_steps = np.zeros([len(files)])
        for i in range(len(files)):
            # Use meta files to find the highest index
            if 'best' in files[i]:
Exemplo n.º 27
0
    def test_OneCycleLR(self, debug=True):
        """
    Usage:
        python template_lib/modelarts/scripts/copy_tool.py \
          -s s3://bucket-7001/ZhouPeng/pypi/torch1_7_0 -d /cache/pypi -t copytree
        for filename in /cache/pypi/*.whl; do
            pip install $filename
        done
        proj_root=moco-exp
        python template_lib/modelarts/scripts/copy_tool.py \
          -s s3://bucket-7001/ZhouPeng/codes/$proj_root -d /cache/$proj_root -t copytree -b /cache/$proj_root/code.zip
        cd /cache/$proj_root
        pip install -r requirements.txt

        export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
        export TIME_STR=1
        export PYTHONPATH=./exp:./stylegan2-pytorch:./
        python 	-c "from exp.tests.test_styleganv2 import Testing_stylegan2;\
          Testing_stylegan2().test_train_ffhq_128()"

    :return:
    """
        if 'CUDA_VISIBLE_DEVICES' not in os.environ:
            os.environ['CUDA_VISIBLE_DEVICES'] = '0'
        if 'TIME_STR' not in os.environ:
            os.environ['TIME_STR'] = '0' if utils.is_debugging() else '0'
        from template_lib.v2.config_cfgnode.argparser import \
          (get_command_and_outdir, setup_outdir_and_yaml, get_append_cmd_str, start_cmd_run)

        tl_opts = ' '.join(sys.argv[sys.argv.index('--tl_opts') +
                                    1:]) if '--tl_opts' in sys.argv else ''
        print(f'tl_opts:\n {tl_opts}')

        command, outdir = get_command_and_outdir(
            self, func_name=sys._getframe().f_code.co_name, file=__file__)
        argv_str = f"""
                --tl_config_file none
                --tl_command none
                --tl_outdir {outdir}
                """
        args = setup_outdir_and_yaml(argv_str, return_cfg=True)

        import torch.nn as nn
        from torch.optim import lr_scheduler
        from matplotlib import pyplot as plt

        model = nn.Linear(3, 64)

        def create_optimizer():
            return SGD(model.parameters(),
                       lr=0.1,
                       momentum=0.9,
                       weight_decay=1e-4)

        def plot_lr(scheduler, title='', labels=['base'], nrof_epoch=100):
            lr_li = [[] for _ in range(len(labels))]
            epoch_li = list(range(nrof_epoch))
            for epoch in epoch_li:
                scheduler.step()  # 调用step()方法,计算和更新optimizer管理的参数基于当前epoch的学习率
                lr = scheduler.get_last_lr()  # 获取当前epoch的学习率
                for i in range(len(labels)):
                    lr_li[i].append(lr[i])
            for lr, label in zip(lr_li, labels):
                plt.plot(epoch_li, lr, label=label)
            plt.grid()
            plt.xlabel('epoch')
            plt.ylabel('lr')
            plt.title(title)
            plt.legend()
            plt.show()

        optimizer = create_optimizer()
        scheduler = lr_scheduler.OneCycleLR(optimizer, 0.1, total_steps=100)
        plot_lr(scheduler, title='OneCycleLR')
        pass
Exemplo n.º 28
0
def train_ensemble(models,
                   num_epochs,
                   train_loader,
                   test_loader,
                   train_func,
                   test_func,
                   torch_device,
                   loss_pos_weight,
                   pos_label,
                   lr,
                   clip,
                   save_model_path_func=None,
                   start_idx=0):
    # Iterate through models and train each one.
    for idx, model in enumerate(models):
        print()
        print("Training model " + str(idx) + " of " + str(len(models) - 1))

        # Get optimizer and learning rate scheduler.
        optimizer = AdamW(model.parameters(), lr=lr)
        scheduler = lr_scheduler.OneCycleLR(optimizer=optimizer,
                                            max_lr=lr,
                                            epochs=num_epochs,
                                            steps_per_epoch=len(train_loader))

        # Scaler for mixed precision training.
        scaler = GradScaler()

        # Loss function to use.
        loss_func = nn.BCEWithLogitsLoss(pos_weight=loss_pos_weight)
        best_model_state_dict = deepcopy(model.state_dict())
        best_aps, best_roc_auc = test_func(model, test_loader, torch_device,
                                           pos_label)
        print("Initial results for model: APS=" + str(best_aps) + " ROC AUC=" +
              str(best_roc_auc))
        print()
        best_epoch = 0

        # Train.
        for epoch in range(num_epochs):
            # Call the training function for this epoch.
            print(str(epoch) + " of " + str(num_epochs - 1))
            train_func(model, train_loader, loss_func, torch_device, optimizer,
                       scheduler, scaler, clip)
            aps, roc_auc = test_func(model, test_loader, torch_device,
                                     pos_label)
            print("APS=" + str(aps) + " ROC AUC=" + str(roc_auc))

            # Save model state if it's the best we have seen so far.
            if (round(roc_auc, 2) > round(best_roc_auc, 2)
                    or (round(roc_auc, 2) == round(best_roc_auc, 2)
                        and round(aps, 2) > round(best_aps, 2))):
                best_roc_auc = roc_auc
                best_aps = aps
                best_epoch = epoch
                best_model_state_dict = deepcopy(model.state_dict())

        # Set model to its best version and save.
        model.load_state_dict(best_model_state_dict)
        if save_model_path_func is not None:
            torch.save(best_model_state_dict,
                       save_model_path_func(idx + start_idx))

        print("Best epoch for model: " + str(best_epoch))

    # Return best results for cross-validation using just one model.
    if len(models) == 1:
        return best_aps, best_roc_auc
Exemplo n.º 29
0
def create_lr_scheduler(
        conf_lrs: Config, epochs: int, optimizer: Optimizer,
        steps_per_epoch: Optional[int]) -> Tuple[Optional[_LRScheduler], bool]:

    # epoch_or_step - apply every epoch or every step
    scheduler, epoch_or_step = None, True  # by default sched step on epoch

    conf_warmup = conf_lrs.get_val('warmup', None)
    warmup_epochs = 0
    if conf_warmup is not None and 'epochs' in conf_warmup:
        warmup_epochs = conf_warmup['epochs']

    if conf_lrs is not None:
        lr_scheduler_type = conf_lrs['type']  # TODO: default should be none?

        if lr_scheduler_type == 'cosine':
            scheduler = lr_scheduler.CosineAnnealingLR(
                optimizer,
                T_max=epochs - warmup_epochs,
                eta_min=conf_lrs['min_lr'])
        elif lr_scheduler_type == 'multi_step':
            scheduler = lr_scheduler.MultiStepLR(
                optimizer,
                milestones=conf_lrs['milestones'],
                gamma=conf_lrs['gamma'])
        elif lr_scheduler_type == 'pyramid':
            scheduler = _adjust_learning_rate_pyramid(optimizer,
                                                      epochs - warmup_epochs,
                                                      get_optim_lr(optimizer))
        elif lr_scheduler_type == 'step':
            decay_period = conf_lrs['decay_period']
            gamma = conf_lrs['gamma']
            scheduler = lr_scheduler.StepLR(optimizer,
                                            decay_period,
                                            gamma=gamma)
        elif lr_scheduler_type == 'one_cycle':
            assert steps_per_epoch is not None
            ensure_pytorch_ver('1.3.0',
                               'LR scheduler OneCycleLR is not available.')
            max_lr = conf_lrs['max_lr']
            epoch_or_step = False
            scheduler = lr_scheduler.OneCycleLR(
                optimizer,
                max_lr=max_lr,
                epochs=epochs - warmup_epochs,
                steps_per_epoch=steps_per_epoch,
            )  # TODO: other params
        elif not lr_scheduler_type:
            scheduler = None
        else:
            raise ValueError('invalid lr_schduler=%s' % lr_scheduler_type)

        # select warmup for LR schedule
        if warmup_epochs:
            scheduler = GradualWarmupScheduler(
                optimizer,
                multiplier=conf_lrs['warmup'].get_val('multiplier', 1.0),
                total_epoch=warmup_epochs,
                after_scheduler=scheduler)

    return scheduler, epoch_or_step
Exemplo n.º 30
0
    def __init__(
        self,
        max_lr: Union[float, List[float]],
        total_steps: Optional[int] = None,
        epochs: Optional[int] = None,
        steps_per_epoch: Optional[int] = None,
        pct_start: float = 0.3,
        anneal_strategy: str = "cos",
        cycle_momentum: bool = True,
        base_momentum: Union[float, List[float]] = 0.85,
        max_momentum: Union[float, List[float]] = 0.95,
        div_factor: float = 25.0,
        final_div_factor: float = 1e4,
        last_epoch: int = -1,
    ):
        """Constructor for OneCycleLR.

        Args:
            max_lr (float or list of float): Upper learning rate boundaries in the
                cycle for each parameter group.
            total_steps (int): The total number of steps in the cycle. Note that
                if a value is not provided here, then it must be inferred by
                providing a value for epochs and steps_per_epoch.
                Defaults to None.
            epochs (int): The number of epochs to train for. This is used along
                with steps_per_epoch in order to infer the total number of steps in
                the cycle if a value for total_steps is not provided.
                Defaults to None.
            steps_per_epoch (int): The number of steps per an epoch to train for. This
                is used along with epochs in order to infer the total number of
                steps in the cycle if a value for total_steps is not provided.
                Defaults to None.
            pct_start (float): The percentage of the cycle (in number of steps)
                spent increasing the learning rate.
                Defaults to 0.3.
            anneal_strategy (str): {'cos', 'linear'}
                Specifies the annealing strategy: "cos" for cosine annealing,
                "linear" for linear annealing.
                Defaults to 'cos'.
            cycle_momentum (bool): If ``True``, momentum is cycled inversely
                to learning rate between 'base_momentum' and 'max_momentum'.
                Defaults to True.
            base_momentum (float or list of float): Lower momentum boundaries in
                the cycle for each parameter group. Note that momentum is cycled
                inversely to learning rate; at the peak of a cycle, momentum is
                'base_momentum' and learning rate is 'max_lr'.
                Defaults to 0.85.
            max_momentum (float or list of float): Upper momentum boundaries in
                the cycle for each parameter group. Functionally,
                it defines the cycle amplitude (max_momentum - base_momentum).
                Note that momentum is cycled inversely
                to learning rate; at the start of a cycle, momentum is
                'max_momentum' and learning rate is 'base_lr'
                Defaults to 0.95.
            div_factor (float): Determines the initial learning rate via
                initial_lr = max_lr/div_factor
                Defaults to 25.
            final_div_factor (float): Determines the minimum learning rate via
                min_lr = initial_lr/final_div_factor
                Defaults to 1e4.
            last_epoch (int): The index of last epoch. Default: -1.
        """
        super().__init__(
            lambda opt: _schedulers.OneCycleLR(
                opt,
                max_lr,
                total_steps=total_steps,
                epochs=epochs,
                steps_per_epoch=steps_per_epoch,
                pct_start=pct_start,
                anneal_strategy=anneal_strategy,
                cycle_momentum=cycle_momentum,
                base_momentum=base_momentum,
                max_momentum=max_momentum,
                div_factor=div_factor,
                final_div_factor=final_div_factor,
                last_epoch=last_epoch,
            ),
            step_on_batch=True,
        )