def __init__( self, base_lr: float, max_lr: float, step_size_up: int = 2000, step_size_down: Optional[int] = None, mode: str = "triangular", gamma: float = 1.0, scale_fn: Optional[Callable[[float], float]] = None, scale_mode: str = "cycle", cycle_momentum: bool = True, base_momentum: float = 0.8, max_momentum: float = 0.9, last_epoch: int = -1, step_on_batch: bool = True, ): """Constructor for CyclicLR.""" super().__init__( lambda opt: _schedulers.CyclicLR( opt, base_lr, max_lr, step_size_up=step_size_up, step_size_down=step_size_down, mode=mode, gamma=gamma, scale_fn=scale_fn, scale_mode=scale_mode, cycle_momentum=cycle_momentum, base_momentum=base_momentum, max_momentum=max_momentum, last_epoch=last_epoch, ), step_on_batch=step_on_batch, )
def __init__( self, optimizer: Optimizer, base_lr: Union[float, List[float]], max_lr: Union[float, List[float]], step_size_up: int = 2000, step_size_down: Optional[int] = None, mode: str = "triangular", gamma: float = 1.0, scale_fn: Optional[Callable[[int], float]] = None, scale_mode: str = "cycle", cycle_momentum: bool = True, base_momentum: float = 0.8, max_momentum: float = 0.9, last_epoch: int = -1, step_duration: int = 1, ): scheduler = lr_scheduler.CyclicLR( optimizer, base_lr, max_lr, step_size_up, step_size_down, mode, gamma, scale_fn, scale_mode, cycle_momentum, base_momentum, max_momentum, last_epoch, ) super().__init__(scheduler, step_duration)
def __init__(self, base_lr, max_lr, step_size_up=2000, step_size_down=None, mode='triangular', gamma=1., scale_fn=None, scale_mode='cycle', cycle_momentum=True, base_momentum=0.8, max_momentum=0.9): try: from torch.optim.lr_scheduler import CyclicLR except ImportError: raise ImportError("Update torch>=1.1.0 to use 'CyclicLR'") super().__init__( lambda opt: _scheduler.CyclicLR(opt, base_lr, max_lr, step_size_up=step_size_up, step_size_down=step_size_down, mode=mode, gamma=gamma, scale_fn=scale_fn, scale_mode=scale_mode, cycle_momentum=cycle_momentum, base_momentum=base_momentum, max_momentum=max_momentum) )
def cyclic_lr(optimizer, last_epoch, base_lr=0.001, max_lr=0.01, epochs_up=1, epochs_down=None, epoch_size=None, mode='triangular', gamma=1.0, scale_fn=None, scale_mode='cycle', cycle_momentum=False, base_momentum=0.8, max_momentum=0.9, **_) -> Any: def exp_range_scale_fn(x): res = gamma**(x - 1) return res last_epoch = -1 step_size_up = epochs_up * epoch_size step_size_down = step_size_up if epochs_down is None else epochs_down * epoch_size return lr_sched.CyclicLR(optimizer, base_lr=base_lr, max_lr=max_lr, step_size_up=step_size_up, step_size_down=step_size_down, mode=mode, scale_fn=exp_range_scale_fn, scale_mode=scale_mode, cycle_momentum=cycle_momentum, base_momentum=base_momentum, max_momentum=max_momentum, last_epoch=last_epoch)
def cyclic_lr(optimizer, last_epoch, base_lr=0.001, max_lr=0.01, step_size_up=2000, step_size_down=None, mode='triangular', gamma=1.0, scale_fn=None, scale_mode='cycle', cycle_momentum=True, base_momentum=0.8, max_momentum=0.9, **_): return lr_scheduler.CyclicLR(optimizer, base_lr=base_lr, max_lr=max_lr, step_size_up=step_size_up, step_size_down=step_size_down, mode=mode, gamma=gamma, scale_mode=scale_mode, cycle_momentum=cycle_momentum, base_momentum=base_momentum, max_momentum=max_momentum, last_epoch=last_epoch)
def __init__(self, optimizer, base_lr, max_lr, step_size_up, step_size_down, cycle_momentum, base_momentum, max_momentum, post_decay): # cyclic params self.optimizer = optimizer self.initial_lr = base_lr self.max_lr = max_lr self.step_size_up = step_size_up self.step_size_down = step_size_down self.cycle_momentum = cycle_momentum self.base_momentum = base_momentum self.max_momentum = max_momentum self.post_decay = post_decay # cap to one if self.step_size_up < 1: self.step_size_up = 1 if self.step_size_down < 1: self.step_size_down = 1 # cyclic lr self.initial_scheduler = toptim.CyclicLR( self.optimizer, base_lr=self.initial_lr, max_lr=self.max_lr, step_size_up=self.step_size_up, step_size_down=self.step_size_down, cycle_momentum=self.cycle_momentum, base_momentum=self.base_momentum, max_momentum=self.max_momentum) # our params self.oneshot_n = self.step_size_up + self.step_size_down # steps to warm up for self.finished = False # am i done super().__init__(optimizer)
def make_scheduler_with_cfg(optimizer, total_num, scheduler_cfg: dict): lr_strategy = scheduler_cfg["lr_strategy"] chosen_scheduler_cfg = scheduler_cfg[lr_strategy] if lr_strategy == "clr": # # cycle_id表示当前处于第几个cycle中,这里的cycle_id从1开始计数 # # 这里的step_size表示半个cycle对应的迭代次数 # cycle_id = np.floor(1 + curr_epoch / (2 * step_size)) # # 这里实际上在判定当前处于cycle中的位置所对应的lr尺度,是一个 ^ 形状的折线 # x = 1 - np.abs(curr_epoch / step_size - 2 * cycle_id + 1) # lr = base_lr + (max_lr - base_lr) * np.maximum(0, x) scheduler = lr_scheduler.CyclicLR( optimizer=optimizer, base_lr=chosen_scheduler_cfg["min_lr"], max_lr=chosen_scheduler_cfg["max_lr"], step_size_up=chosen_scheduler_cfg["step_size"], scale_mode=chosen_scheduler_cfg["mode"], ) elif lr_strategy == 'step': scheduler = lr_scheduler.MultiStepLR( optimizer=optimizer, milestones=chosen_scheduler_cfg['milestones'], gamma=chosen_scheduler_cfg['gamma'] ) else: lr_func = partial(_get_lr_coefficient, total_num=total_num, lr_strategy=lr_strategy, scheduler_cfg=chosen_scheduler_cfg) scheduler = lr_scheduler.LambdaLR(optimizer=optimizer, lr_lambda=lr_func) return scheduler
def __init__(self, lr, weight_decay, class_weight, init_type, gpu_ids, dataset_size, view, alpha, network): super(Net, self).__init__() self.view = view self.gpu_ids = gpu_ids self.alpha = alpha self.network = network self.device = torch.device('cuda:{}'.format( self.gpu_ids[0])) if self.gpu_ids else torch.device('cpu') self.model = networks.define_X(init_type, gpu_ids, view, network) self.criterion = nn.BCEWithLogitsLoss( pos_weight=torch.DoubleTensor(class_weight).cuda(gpu_ids[0])) self.optimizer = optim.AdamW(self.model.parameters(), lr=lr, weight_decay=weight_decay) self.scheduler = lr_scheduler.CyclicLR(self.optimizer, base_lr=lr, max_lr=10 * lr, step_size_up=dataset_size // 2, step_size_down=dataset_size - dataset_size // 2, cycle_momentum=False, mode='triangular2') self.softmax = nn.Softmax()
def __init__(self, optimizer, lr, warmup_steps, momentum, decay): # cyclic params self.optimizer = optimizer self.lr = lr self.warmup_steps = warmup_steps self.momentum = momentum self.decay = decay # cap to one if self.warmup_steps < 1: self.warmup_steps = 1 # cyclic lr self.initial_scheduler = toptim.CyclicLR( self.optimizer, base_lr=0, max_lr=self.lr, step_size_up=self.warmup_steps, step_size_down=self.warmup_steps, cycle_momentum=False, base_momentum=self.momentum, max_momentum=self.momentum) # second optimizer # self.final_scheduler = toptim.ReduceLROnPlateau(self.optimizer, factor=0.9, mode='min', patience=self.warmup_steps / 5) # self.final_scheduler = toptim.ExponentialLR(self.optimizer, gamma=0.99997) # our params # self.last_epoch = -1 # fix for pytorch 1.1 and below self.finished = False # am i done super().__init__(optimizer)
def __init__(self, optimizer, lr, warmup_steps, momentum, decay): # cyclic params self.optimizer = optimizer self.lr = lr self.warmup_steps = warmup_steps self.momentum = momentum self.decay = decay # cap to one if self.warmup_steps < 1: self.warmup_steps = 1 # cyclic lr self.initial_scheduler = toptim.CyclicLR( self.optimizer, base_lr=0, max_lr=self.lr, step_size_up=self.warmup_steps, step_size_down=self.warmup_steps, cycle_momentum=False, base_momentum=self.momentum, max_momentum=self.momentum) # our params self.last_epoch = -1 # fix for pytorch 1.1 and below self.finished = False # am i done self.is_start = False super().__init__(optimizer)
def __init__(self, base_lr: float, max_lr: float, step_size_up: int = 2000, step_size_down: Optional[int] = None, mode: str = 'triangular', gamma: float = 1., scale_fn: Optional[Callable[[float], float]] = None, scale_mode: str = 'cycle', cycle_momentum: bool = True, base_momentum: float = 0.8, max_momentum: float = 0.9, last_epoch: int = -1, step_on_iteration: bool = True): super().__init__( lambda opt: _scheduler.CyclicLR(opt, base_lr, max_lr, step_size_up=step_size_up, step_size_down=step_size_down, mode=mode, gamma=gamma, scale_fn=scale_fn, scale_mode=scale_mode, cycle_momentum=cycle_momentum, base_momentum=base_momentum, max_momentum=max_momentum, last_epoch=last_epoch), step_on_iteration=step_on_iteration)
def bigcycle(optimizer, last_epoch, base_lr=1e-4, max_lr=1e-2): print(" cycle LR ") sss = lr_scheduler.CyclicLR(optimizer, base_lr, max_lr, step_size_up=300, step_size_down=300, cycle_momentum=False) return sss
def get_warmup_scheduler(config, optimizer, epoch_size) -> Any: return lr_sched.CyclicLR(optimizer, base_lr=0, max_lr=config.optimizer.params.lr, step_size_up=config.train.warmup.epochs * epoch_size, step_size_down=0, cycle_momentum=False, mode='triangular')
def get_scheduler(optimizer, scheduler_type, **kwargs): """ Return learning rate scheduler. Realize three type of scheduler. Parameters ---------- optimizer: torch.optim optimizer picked for training scheduler_type: str define scheduler type 'step' - decrease learning rate in 10 time step by step. 'cos' - decrease learning rate using a cosine annealing schedule. 'warmup' - increase learning rate from zero to initial. **kwargs : dict, learning_rate: float Initial learning rate. step_len: int Quantity of epochs between learning rate decay at 10 times. Use with 'step' scheduler type only. cycle_len: int Quantity of epochs till the learning rate decay from initial to zero. Use with 'step' scheduler type only. batch_per_epoch: int Quantity batches in datasets. warmup_epoch: int Quantity epochs to rise learning rate from zero to initial. Returns ------- scheduler: torch.optim.lr_scheduler See Also -------- torch.optim.lr_scheduler.StepLR torch.optim.lr_scheduler.CosineAnnealingWarmRestarts torch.optim.lr_scheduler.CyclicLR """ if scheduler_type == 'step': scheduler = lr_scheduler.StepLR(optimizer, step_size=kwargs['step_size'], gamma=0.1) elif scheduler_type == 'cos': scheduler = lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=kwargs['cycle_len'], eta_min=0) elif scheduler_type == 'warmup': scheduler = lr_scheduler.CyclicLR( optimizer, base_lr=kwargs['learning_rate'] / (kwargs['batch_per_epoch'] * kwargs['warmup_epoch']), max_lr=kwargs['learning_rate'], step_size_up=(kwargs['batch_per_epoch'] + 1) * kwargs['warmup_epoch'], step_size_down=0, cycle_momentum=False ) return scheduler
def create_lr_scheduler(self, lr_scheduler_type, optimizer, step_size=None, restart_step=None, multi_step=None): """创建学习率衰减器 Args: lr_scheduler_type: 衰减器类型 optimizer: 优化器 step_size: 使用StepLR时,必须指定该参数 Return: my_lr_scheduler: 学习率衰减器 """ print('Creating lr scheduler: %s' % lr_scheduler_type) if lr_scheduler_type == 'StepLR': if not step_size: raise ValueError( 'You must specified step_size when you are using StepLR.') my_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=0.1) elif lr_scheduler_type == 'CosineLR': if not restart_step: raise ValueError( 'You must specified restart_step when you are using CosineLR.' ) my_lr_scheduler = lr_scheduler.CosineAnnealingLR( optimizer, restart_step) elif lr_scheduler_type == 'MultiStepLR': if not multi_step: raise ValueError( 'You must specified multi step when you are using MultiStepLR.' ) my_lr_scheduler = lr_scheduler.MultiStepLR(optimizer, multi_step) elif lr_scheduler_type == 'ReduceLR': my_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.7, patience=3, verbose=True) elif lr_scheduler_type == 'CyclicLR': # 当使用Adam算法时,必须将cycle_momentum设置为False,默认为True;作者建议设定step_size_up = (2-8) x (training iterations in epoch) my_lr_scheduler = lr_scheduler.CyclicLR(optimizer, base_lr=1e-4, max_lr=2.6e-3, step_size_up=1805, cycle_momentum=False) elif lr_scheduler_type == 'Flat_CosAnneal': from torchtools.lr_scheduler import DelayerScheduler, DelayedCosineAnnealingLR my_lr_scheduler = DelayedCosineAnnealingLR(optimizer, 30, 80) return my_lr_scheduler
def cyclic_lr(optimizer, last_epoch, base_lr=0.001, max_lr=0.01, step_size_up=2000, step_size_down=None, mode='triangular', gamma=1.0, scale_fn=None, scale_mode='cycle', cycle_momentum=False, base_momentum=0.8, max_momentum=0.9, coeff=1, **_) -> Any: def exp_range_scale_fn(x): res = gamma ** (x - 1) return res return lr_sched.CyclicLR(optimizer, base_lr=base_lr*coeff, max_lr=max_lr*coeff, step_size_up=step_size_up, step_size_down= step_size_down, mode=mode, scale_fn=exp_range_scale_fn, scale_mode=scale_mode, cycle_momentum= cycle_momentum, base_momentum=base_momentum, max_momentum=max_momentum, last_epoch=last_epoch)
def str2sched(scheduler: Schedulerlike, optimiser: Optimiser, dataloader: DataLoader, epochs: Numeric, patience: Numeric) -> Scheduler: if not isinstance(scheduler, str): return scheduler elif scheduler == 'reduce_on_plateau': if not isinstance(patience, int): patience = 20 return sched.ReduceLROnPlateau(optimiser, patience=patience // 2) elif scheduler == 'cyclic': return sched.CyclicLR(optimiser, base_lr=1e-4, max_lr=1.) elif scheduler == 'step': return sched.StepLR(optimiser, step_size=5) elif scheduler == 'exp': return sched.ExponentialLR(optimiser, gamma=0.1) else: raise RuntimeError(f'Scheduler {scheduler} not found.')
def prep_scheduler(self): if self.args.scheduler == "step": self.scheduler = scheduler.StepLR(self.optimizer, step_size=50) elif self.args.scheduler == "exp": self.scheduler = scheduler.ExponentialLR(self.optimizer, gamma=0.999) elif self.args.scheduler == "cyclic": self.scheduler = scheduler.CyclicLR(self.optimizer, step_size_up=5000, base_lr=0.1 * self.args.lr, max_lr=self.args.lr) # Originally step_size_up: 2000 elif self.args.scheduler == "plateau": self.scheduler = scheduler.ReduceLROnPlateau(self.optimizer) else: print("Scheduler not available: {}".format(self.args.scheduler)) raise
def get_scheduler(optimizer, opt): """Return a learning rate scheduler Parameters: optimizer -- the optimizer of the network opt (option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions. opt.lr_policy is the tag of learning rate policy: linear | step | plateau | cosine For 'linear', we keep the same learning rate for the first <opt.n_epochs> epochs and linearly decay the rate to zero over the next <opt.n_epochs_decay> epochs. For other schedulers (step, plateau, and cosine), we use the default PyTorch schedulers. See https://pytorch.org/docs/stable/optim.html for more details. """ if opt.lr_policy == 'linear': def lambda_rule(epoch): lr_l = 1.0 - max(0, epoch + opt.epoch_start - opt.n_epochs) / float(opt.n_epochs_decay + 2) return lr_l scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_rule) elif opt.lr_policy == 'step': scheduler = lr_scheduler.StepLR(optimizer, step_size=opt.lr_decay_iters, gamma=0.1) elif opt.lr_policy == 'plateau': scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10) elif opt.lr_policy == 'cosine': scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=opt.n_epochs, eta_min=0) elif opt.lr_policy == 'cyclic': scheduler = lr_scheduler.CyclicLR(optimizer, opt.lr, max_lr=opt.lr * 1.1, cycle_momentum=False) else: return NotImplementedError( 'learning rate policy [%s] is not implemented', opt.lr_policy) return scheduler
def get_schedule(opt, optimizer, train_loader_len=None): if opt.scheduler == 'multistep': scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[30, 60, 100, 130], gamma=0.1) elif opt.scheduler == 'cycle': step_size = train_loader_len * 4 print(step_size) scheduler = lr_scheduler.CyclicLR(optimizer, step_size_up=step_size, base_lr=opt.lr / 100, max_lr=opt.lr, cycle_momentum=False) elif opt.scheduler == 'plateau': scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5) elif opt.scheduler == 'warmup': step = train_loader_len scheduler = WarmupMultiStepLR( optimizer, milestones=[step * 30, step * 60, step * 100, step * 130], gamma=0.1) elif opt.scheduler == 'cos': scheduler = lr_scheduler.CosineAnnealingLR(optimizer, train_loader_len * 5, eta_min=1e-8) elif opt.scheduler == 'cosw': scheduler = WarmupCosineAnnealingLR(optimizer, train_loader_len * 5, eta_min=1e-8) elif opt.scheduler == 'sgdr': scheduler = CosineAnnealingWithRestartsLR(optimizer, train_loader_len * 5, eta_min=1e-10, T_mult=1.1) elif opt.scheduler == 'step': scheduler = lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1) elif opt.scheduler == 'exponential': scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=0.97) else: scheduler = None return scheduler
def get_schedule(opt, optimizer, train_loader_len): if opt.scheduler == 'multistep': scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[25, 45, 70], gamma=0.1) elif opt.scheduler == 'cycle': step_size = train_loader_len*6 print(step_size) scheduler = lr_scheduler.CyclicLR(optimizer, step_size_up=step_size, base_lr=opt.lr/100, max_lr=opt.lr) elif opt.scheduler == 'plateau': scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5) elif opt.scheduler == 'warmup': step = train_loader_len scheduler = WarmupMultiStepLR(optimizer, milestones=[step*25, step*70, step*90], gamma=0.1) elif opt.scheduler == 'cos': scheduler = lr_scheduler.CosineAnnealingLR(optimizer, train_loader_len*3, eta_min=opt.lr/1000) elif opt.scheduler == 'cosw': scheduler = WarmupCosineAnnealingLR(optimizer, train_loader_len*4, eta_min=1e-8) else: scheduler = None return scheduler
def get_scheduler(optimizer, opt): ''' Rules for how to adjust the learning rate. Lambda: custom method to change learning rate. StepLR: learning rate decays by gamma each step size. Plateau: reduce once the quantity monitored has stopped decreasing. ''' if opt.lr_policy == 'lambda': def lambda_rule(epoch): lr_l = 1.0 - \ max(0, epoch + 1 + opt.epoch_count - opt.niter) / \ float(opt.niter_decay + 1) return lr_l scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_rule) elif opt.lr_policy == 'step': scheduler = lr_scheduler.StepLR(optimizer, step_size=opt.lr_decay_iters, gamma=0.1) elif opt.lr_policy == 'plateau': scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.9, threshold=0.01, patience=opt.patience) elif opt.lr_policy == 'cyclic': scheduler = lr_scheduler.CyclicLR(optimizer, base_lr=opt.lr, max_lr=opt.lr_max, step_size_up=opt.lr_step_size, cycle_momentum=False) elif opt.lr_policy == 'none': def lambda_rule(epoch): return 1.0 scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_rule) else: return NotImplementedError( 'learning rate policy [{}] is not implemented'.format( opt.lr_policy)) return scheduler
def get_scheduler(optimizer, opt): if opt.lr_policy == 'lambda': def lambda_rule(epoch): lr_l = 1.0 - max(0, epoch + 1 + opt.epoch_count - opt.niter) / float(opt.niter_decay + 1) return lr_l scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_rule) elif opt.lr_policy == 'step': scheduler = lr_scheduler.StepLR(optimizer, step_size=opt.lr_decay_iters, gamma=0.5) elif opt.lr_policy == 'plateau': scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.8, threshold=0.01, patience=2, min_lr=opt.min_lr) elif opt.lr_policy == 'cyclic': scheduler = lr_scheduler.CyclicLR(optimizer, opt.min_lr, opt.lr, step_size_up=5, step_size_down=None, gamma=0.99, mode='exp_range', cycle_momentum=False) elif opt.lr_policy == 'cosine_restarts': scheduler = lr_scheduler.CosineAnnealingWarmRestarts(optimizer, opt.lr_decay_iters, T_mult=1, eta_min=0) else: return NotImplementedError('learning rate policy [%s] is not implemented', opt.lr_policy) return scheduler
def get_scheduler(args, optimizer, last_epoch, train_loader): assert args.scheduler_name in ['multistep', 'linear_warmup', 'onecycle', 'snapshot_ensemble_scheduler','cyclic', None] if args.scheduler_name is None: return None if args.scheduler_name == 'multistep': return lr_scheduler.MultiStepLR(optimizer, milestones=[5,10,20,30], gamma=0.1, last_epoch=last_epoch) if args.scheduler_name == 'linear_warmup': # Total number of training steps is [number of batches] x [number of epochs]. total_steps = len(train_loader) * args.num_epochs warmup_frac = 0.3 return get_linear_schedule_with_warmup(optimizer, num_warmup_steps = int(total_steps * warmup_frac), num_training_steps = total_steps) if args.scheduler_name == 'onecycle': return OneCycleLR(optimizer, n_epochs=args.num_epochs, n_batches=len(train_loader)) if args.scheduler_name == 'snapshot_ensemble_scheduler': nb_cycles = 2 return snapshot_ensemble_scheduler(optimizer, args.lr, args.num_epochs, nb_cycles, train_loader) if args.scheduler_name == 'cyclic': return lr_scheduler.CyclicLR(optimizer, 0.05, 0.01)
def get_optimizer(policy, args): if args.optimizer == "adam": optimizer = optim.Adam(policy.parameters(), lr=args.lr) elif args.optimizer == "sgd": optimizer = optim.SGD(policy.parameters(), lr=args.lr) elif args.optimizer == "rmsprop": optimizer = optim.RMSprop(policy.parameters(), lr=args.lr) scheduler = args.opt_schedule if scheduler == "cyclic": scheduler = lr_scheduler.OneCycleLR( optimizer=optimizer, max_lr=args.div_factor * args.lr, total_steps=args.num_episodes_train) elif scheduler == "cyclic_multi": scheduler = lr_scheduler.CyclicLR(optimizer=optimizer, base_lr=args.lr, max_lr=args.div_factor * args.lr) elif scheduler == "WR": T_0 = max(1, int(args.num_episodes_train / 1000)) scheduler = lr_scheduler.CosineAnnealingWarmRestarts( optimizer=optimizer, T_0=T_0) return optimizer, scheduler
def test_CyclicLR(self, debug=True): """ Usage: python template_lib/modelarts/scripts/copy_tool.py \ -s s3://bucket-7001/ZhouPeng/pypi/torch1_7_0 -d /cache/pypi -t copytree for filename in /cache/pypi/*.whl; do pip install $filename done proj_root=moco-exp python template_lib/modelarts/scripts/copy_tool.py \ -s s3://bucket-7001/ZhouPeng/codes/$proj_root -d /cache/$proj_root -t copytree -b /cache/$proj_root/code.zip cd /cache/$proj_root pip install -r requirements.txt export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 export TIME_STR=1 export PYTHONPATH=./exp:./stylegan2-pytorch:./ python -c "from exp.tests.test_styleganv2 import Testing_stylegan2;\ Testing_stylegan2().test_train_ffhq_128()" :return: """ if 'CUDA_VISIBLE_DEVICES' not in os.environ: os.environ['CUDA_VISIBLE_DEVICES'] = '0' if 'TIME_STR' not in os.environ: os.environ['TIME_STR'] = '0' if utils.is_debugging() else '0' from template_lib.v2.config_cfgnode.argparser import \ (get_command_and_outdir, setup_outdir_and_yaml, get_append_cmd_str, start_cmd_run) tl_opts = ' '.join(sys.argv[sys.argv.index('--tl_opts') + 1:]) if '--tl_opts' in sys.argv else '' print(f'tl_opts:\n {tl_opts}') command, outdir = get_command_and_outdir( self, func_name=sys._getframe().f_code.co_name, file=__file__) argv_str = f""" --tl_config_file none --tl_command none --tl_outdir {outdir} """ args = setup_outdir_and_yaml(argv_str, return_cfg=True) import torch.nn as nn from torch.optim import lr_scheduler from matplotlib import pyplot as plt model = nn.Linear(3, 64) def create_optimizer(): return SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-4) def plot_lr(scheduler, title='', labels=['base'], nrof_epoch=100): lr_li = [[] for _ in range(len(labels))] epoch_li = list(range(nrof_epoch)) for epoch in epoch_li: scheduler.step() # 调用step()方法,计算和更新optimizer管理的参数基于当前epoch的学习率 lr = scheduler.get_last_lr() # 获取当前epoch的学习率 for i in range(len(labels)): lr_li[i].append(lr[i]) for lr, label in zip(lr_li, labels): plt.plot(epoch_li, lr, label=label) plt.grid() plt.xlabel('epoch') plt.ylabel('lr') plt.title(title) plt.legend() plt.show() optimizer = create_optimizer() scheduler = lr_scheduler.CyclicLR(optimizer, base_lr=0.01, max_lr=0.1, step_size_up=25, step_size_down=10) plot_lr(scheduler, title='CyclicLR') pass
def __init__(self, opt, **kwargs): super(ClassificationTask, self).__init__(opt, kwargs["comm"], kwargs["device"]) train_opt = opt['train'] self.ran_cl = opt['rancl'] self.num_classes = opt['datasets']['train']['num_classes'] self.kd_transfer = opt['kd_transfer'] self.att_transfer = opt['att_transfer'] self.fsp_transfer = opt['fsp_transfer'] self.w_transfer = opt['w_transfer'] self.ws_transfer = opt['ws_transfer'] self.replace_classifier = opt['varyOnData'] # self.device = kwargs['device'] self.device = torch.device("cuda:{}".format(kwargs['device']) if torch. cuda.is_available() else "cpu") # self.logger.info(self.device) # -----early stopping------- self.best_weights = None self.best_metric = None self.wait = 0 self.stop_training = False self.patience = opt['patience'] # -----prepare for transfer------------- self.most_related_task = -1 if self.fsp_transfer or self.att_transfer: self.activation = OrderedDict() # -----define network and load pretrained tasks----- data_name, model_name = opt['network'].split('-') self.model_name = model_name if data_name.lower() == 'mnist': self.network = getattr( mnist, model_name)(num_classes=self.num_classes).to(self.device) elif data_name.lower() == 'cifar': self.network = getattr( cifar, model_name)(num_classes=self.num_classes).to(self.device) if self.att_transfer and 'resnet' in model_name.lower(): self.network.layer1[-1].register_forward_hook( self.get_activation('b1_out')) self.network.layer2[-1].register_forward_hook( self.get_activation('b2_out')) self.network.layer3[-1].register_forward_hook( self.get_activation('b3_out')) # self.network.layer4[-1].register_forward_hook(self.get_activation('b4_out')) elif data_name.lower() == 'imagenet': if opt['imagenet_pretrained']: self.network = getattr(imagenet, model_name)(pretrained=True) if opt['train_lastlayer']: for param in self.network.parameters(): param.requires_grad = False if 'resnet' in model_name or 'inception' in model_name: self.network.fc = nn.Linear(self.network.fc.in_features, self.num_classes) elif 'vgg' in model_name or 'alex' in model_name: self.network.classifier[6] = nn.Linear( 4096, self.num_classes) elif 'squeeze' in self.model_name: self.network.num_classes = self.num_classes self.network.classifier[1] = nn.Conv2d(512, self.num_classes, kernel_size=1) elif 'dense' in self.model_name: num_features = self.network.classifier.in_features self.network.classifier = nn.Linear( num_features, self.num_classes) elif 'mobile' in self.model_name: num_features = self.network.classifier[-1].in_features self.network.classifier[-1] = nn.Linear( num_features, self.num_classes) self.network = self.network.to(self.device) else: self.network = getattr( imagenet, model_name)(num_classes=self.num_classes).to(self.device) if self.att_transfer: if 'resnet' in self.model_name.lower(): self.network.layer1[-1].register_forward_hook( self.get_activation('b1_out')) self.network.layer2[-1].register_forward_hook( self.get_activation('b2_out')) self.network.layer3[-1].register_forward_hook( self.get_activation('b3_out')) # self.network.layer4[-1].register_forward_hook(self.get_activation('b4_out')) # elif 'dense' in self.model_name.lower(): else: raise NotImplementedError( 'Network [{:s}, {:s}] is not defined.'.format( data_name, model_name)) # make starts the same # if USE_HVD: # hvd.broadcast_parameters(self.network.state_dict(), root_rank=0) # test if different task has the same initialization under same seed # for name, param in self.network.named_parameters(): # print(param[0]) # load pretrained model if exists if self._is_solver(): # init_weights(self.network) self.load() # print network # self.print_network() # -----define loss function------ self.one_hot = False self.prob_est = False loss_type = train_opt['loss'] if loss_type == 'l1': self.loss_func = nn.L1Loss().to(self.device) self.one_hot = True elif loss_type == 'l2': self.loss_func = nn.MSELoss().to(self.device) self.one_hot = True elif loss_type == 'l1_pro': self.loss_func = nn.L1Loss().to(self.device) self.prob_est = True self.one_hot = True elif loss_type == 'l2_pro': self.loss_func = nn.MSELoss().to(self.device) self.prob_est = True self.one_hot = True elif loss_type == 'cross_entropy': self.loss_func = nn.CrossEntropyLoss().to(self.device) elif loss_type == 'marginloss': self.loss_func = nn.MultiMarginLoss().to(self.device) else: raise NotImplementedError( 'Loss type [{:s}] is not recognized. Please specifiy it from following options:' .format(loss_type)) if self.is_train: self.network.train() self.logits_loss = nn.KLDivLoss(reduction='batchmean').to( self.device) self.norm_loss = nn.MSELoss(reduction='batchmean').to(self.device) self.at_weight = train_opt['at_weight'] self.kd_weight = train_opt['kd_weight'] self.ws_weight = train_opt['ws_weight'] # -----define optimizers----- optim_type = train_opt['optim'] self.optimizer = getattr(optim, optim_type)( self.network.parameters(), **opt['train']['optimizer_param']) self.optimizers.append(self.optimizer) # self.lr = opt['train']['optimizer_param']['lr'] # -----define schedulers----- for optimizer in self.optimizers: if train_opt['lr_scheme'] == 'MultiStepLR': scheduler = lr_scheduler.MultiStepLR( optimizer, **opt['train']['lr_scheme_param']) elif train_opt['lr_scheme'] == 'CycleLR': scheduler = lr_scheduler.CyclicLR( optimizer, **opt['train']['lr_scheme_param']) elif train_opt['lr_scheme'] == 'ReduceLROnPlateau': scheduler = lr_scheduler.ReduceLROnPlateau( optimizer, **opt['train']['lr_scheme_param']) elif train_opt['lr_scheme'] is None: scheduler = None else: raise NotImplementedError('{} is not implemented!'.format( train_opt['lr_scheme'])) self.schedulers.append(scheduler) # # -----register gradient clipping----- # for param in self.network.parameters(): # param.register_hook(lambda grad: torch.clamp(grad, -0.2, 0.2)) # -----define log_dict----- self.log_dict = OrderedDict() self.transfer_count = 0 # -----prepare for transfer----- if self.kd_transfer or self.att_transfer: # set seeds of all tasks the same to ensure the dataloader is in the same order torch.manual_seed(0)
import torch import torch.optim as optim from torch.optim import lr_scheduler from torchvision.models import resnet18 import matplotlib.pyplot as plt model = resnet18(num_classes=2) base_lr = 1e-4 max_lr = 0.1 optimizer = optim.SGD(params=model.parameters(), lr=0.1) scheduler = lr_scheduler.CyclicLR(optimizer, base_lr=base_lr, max_lr=max_lr, step_size_up=20, step_size_down=20) #100 iteration plt.figure() x = list(range(100)) y = [] for epoch in range(100): scheduler.step() lr = scheduler.get_lr() # print(epoch, scheduler.get_lr()[0]) # get_lr() y.append(scheduler.get_lr()[0]) plt.plot(x, y) plt.savefig('lr_cyclic.png')
os.path.join(test_case_place, 'messages_{}'.format(target))) writer = SummaryWriter( log_dir=os.path.join(test_case_place, 'eval_{}'.format(target))) logger.info(config) sources = list(filter(lambda e: e != target, datasets)) logger.info("Selected sources: {}".format(str(sources))) logger.info("Selected target: {}".format(target)) logger.info("=" * 100) ## ========================== # Initialize MDAN model ## ========================== mdan = load_model('mdan', class_number, len(sources), extractor).to(device) #optimizer = optim.Adadelta(mdan.parameters(), lr=learning_rate) optimizer = optim.SGD(mdan.parameters(), lr=learning_rate, momentum=0.9) scheduler = lr_scheduler.CyclicLR(optimizer, base_lr=learning_rate, max_lr=0.009) # Decay LR by a factor of 0.1 every 7 epochs #scheduler = lr_scheduler.StepLR(optimizer, step_size=25, gamma=0.1) resume_epoch = 0 if constant.resume_train is True: resume_epoch, model_state_dict, optimizer_state_dict = resume_checkpoint( test_case_place, file_name='best_model.pt') mdan.load_state_dict(model_state_dict) optimizer.load_state_dict(optimizer_state_dict) mdan.eval() logger.info("Retain training from epoch {}".format(resume_epoch)) else: mdan.train() #scheduler_plateau = lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True)
def main_worker(gpu,ngpus_per_node,args): global best_acc1 # args.gpu=gpu # if args.gpu is not None: # print("Use GPU: {} for training".format(args.gpu)) # For multiprocessing distributed training, rank needs to be the # global rank among all the processes ## args.rank=args.rank*ngpus_per_node+gpu ## dist.init_process_group(backend=args.dist_backend,init_method="env://",#args.dist_url, ## world_size=args.world_size,rank=args.rank) ##read the matlab matrix as Xvar and ResponseVar inputfile=args.inputfile f=h5py.File(inputdir+inputfile,'r') data=f.get('inputstore') Xvar=np.array(data).transpose() data=f.get('outputstore') ResponseVar=np.array(data).transpose() data=f.get('samplevec') samplevec=np.array(data) samplevec=np.squeeze(samplevec.astype(int)-1)##block index data=f.get('parastore') parastore=np.array(data)##omega normalizer data=f.get('nthetaset') nthetaset=int(np.array(data)[0][0])##block number data=f.get('ntime') ntimetotal=int(np.array(data)[0][0])##time seq including [training part, extrapolation part] f.close() ntime=args.timetrainlen # ResponseVarnorm=(ResponseVar-ResponseVar.mean(axis=0))/ResponseVar.std(axis=0) ResponseVarnorm=ResponseVar## the response variable was originally scale by omega {scaling} but not centered. and no more normalization will be done ##separation of train and test set nsample=(Xvar.shape)[0] ntheta=(Xvar.shape)[1] nspec=(ResponseVarnorm.shape)[1] simusamplevec=np.unique(samplevec) separation=['train','validate','test'] numsamptest_validate=math.floor((simusamplevec.__len__())*args.test_validate_ratio/2) sampleind=set(range(0,nsample)) simusampeind=set(range(0,nthetaset)) ## a preset whole time range for test, validation (groups) simusamplevec_test=random.sample(simusampeind,numsamptest_validate) simusamplevec_validate=random.sample(simusampeind.difference(set(simusamplevec_test)),numsamptest_validate) ##index of training, testing, and validation testind=np.sort(np.where(np.isin(samplevec,simusamplevec_test)))[0] validateind=np.sort(np.where(np.isin(samplevec,simusamplevec_validate)))[0] testvalidte_ind_union=set(testind) testvalidte_ind_union=testvalidte_ind_union.union(set(validateind)) trainind=np.sort(np.array(list(sampleind.difference(testvalidte_ind_union))))#index for training set ntrainset=nthetaset-numsamptest_validate*2 sizeset={"train": (ntrainset), "validate": (numsamptest_validate), "test": (numsamptest_validate)} ind_separa={"train": (trainind), "validate": (validateind), "test": (testind)} ##training block index (time range) keep in the training time block timeind={x: np.tile(np.concatenate((np.repeat(1,ntime),np.repeat(0,ntimetotal-ntime))),sizeset[x]) for x in separation} time_in_ind={} time_extr_ind={} for x in separation: tempind=ind_separa[x] time_in_ind[x]=tempind[timeind[x]==1] time_extr_ind[x]=tempind[timeind[x]==0] ##train validate test "block" ind samplevec_separa={x: samplevec[time_in_ind[x]] for x in separation} Xvar_separa={x: Xvar[list(ind_separa[x]),:] for x in separation} Xvarnorm=np.empty_like(Xvar) # Xvar_norm_separa={} if args.normalize_flag is 'Y': ##the normalization if exist should be after separation of training and testing data to prevent leaking ##normalization (X-mean)/sd ##normalization include time. Train and test model need to have at least same range or same mean&sd for time del(Xvar) for x in separation: Xvartemp=Xvar_separa[x] meanvec=Xvartemp.mean(axis=0) stdvec=Xvartemp.std(axis=0) for coli in range(0,len(meanvec)): Xvartemp[:,coli]=(Xvartemp[:,coli]-meanvec[coli])/stdvec[coli] # Xvar_norm_separa[x]=copy.deepcopy(temp_norm_mat) Xvarnorm[list(ind_separa[x]),:]=copy.deepcopy(Xvartemp) else: # Xvar_norm_separa={x: Xvar_separa[x] for x in separation} Xvarnorm=np.copy(Xvar) del(Xvar) #samplevecXX repeat id vector, XXind index vector inputwrap={"Xvarnorm": (Xvarnorm), "ResponseVar": (ResponseVar), "trainind": (trainind), "testind": (testind), "validateind": (validateind), "ind_separa": (ind_separa), "time_in_ind": (time_in_ind), "time_extr_ind": (time_extr_ind), "samplevec": (samplevec), # "samplewholeselec": (samplewholeselec), "samplevec_separa": (samplevec_separa), # "Xvarmean": (Xvarmean),## these two value: Xvarmean, Xvarstd can be used for "new" test data not used in the original normalization # "Xvarstd": (Xvarstd), "inputfile": (inputfile), "ngpus_per_node": (ngpus_per_node),## number of gpus "numsamptest_validate": (numsamptest_validate),#number of testing samples "timeind": (timeind) } with open("pickle_inputwrap.dat","wb") as f1: pickle.dump(inputwrap,f1,protocol=4)##protocol=4 if there is error: cannot serialize a bytes object larger than 4 GiB del(inputwrap) Xtensor={x: torch.Tensor(Xvarnorm[list(time_in_ind[x]),:]) for x in separation} Resptensor={x: torch.Tensor(ResponseVar[list(time_in_ind[x]),:]) for x in separation} Dataset={x: utils.TensorDataset(Xtensor[x],Resptensor[x]) for x in separation} # train_sampler=torch.utils.data.distributed.DistributedSampler(traindataset) nblock=int(args.batch_size/ntime) # nblocktest=int(args.test_batch_size/ntime) # traindataloader=utils.DataLoader(traindataset,batch_size=args.batch_size, # shuffle=(train_sampler is None),num_workers=args.workers,pin_memory=True,sampler=train_sampler) # # testdataloader=utils.DataLoader(testdataset,batch_size=args.test_batch_size, # shuffle=False,num_workers=args.workers,pin_memory=True,sampler=test_sampler) if args.sampler=="block": # block sampler sampler={x: batch_sampler_block(Dataset[x],samplevec_separa[x],nblock=nblock) for x in separation} dataloader={x: utils.DataLoader(Dataset[x],num_workers=args.workers,pin_memory=True,batch_sampler=sampler[x]) for x in separation} elif args.sampler=="individual": #individual random sampler dataloader={x: utils.DataLoader(Dataset[x],batch_size=args.batch_size,shuffle=True,num_workers=args.workers,pin_memory=True) for x in separation} args.mintime=np.min(Xvarnorm[:,-1]) ninnersize=int(args.layersize_ratio*ntheta) ##store data with open("pickle_dataloader.dat","wb") as f1: pickle.dump(dataloader,f1,protocol=4) dimdict={ "nsample": (nsample,int), "ntheta": (ntheta,int), "nspec": (nspec,int), "ninnersize": (ninnersize,int) } args.nsample=nsample args.ntheta=ntheta args.nspec=nspec with open("pickle_dimdata.dat","wb") as f3: pickle.dump(dimdict,f3,protocol=4) ##free up some space (not currently set) ##create model if bool(re.search("[rR]es[Nn]et",args.net_struct)): model=models.__dict__[args.net_struct](ninput=ntheta,num_response=nspec,p=args.p,ncellscale=args.layersize_ratio) elif args.rnn_struct==1: model=models.__dict__[args.net_struct](ntheta=ntheta,nspec=nspec,num_layer=args.num_layer,ncellscale=args.layersize_ratio,p=args.p) else: model=models.__dict__[args.net_struct](ninput=ntheta,num_response=nspec,nlayer=args.num_layer,p=args.p,ncellscale=args.layersize_ratio,batchnorm_flag=(args.batchnorm_flag is 'Y')) # model.eval() # if args.gpu is not None: # torch.cuda.set_device(args.gpu) # model.cuda(args.gpu) # # When using a single GPU per process and per # # DistributedDataParallel, we need to divide the batch size # # ourselves based on the total number of GPUs we have # args.batch_size=int(args.batch_size/ngpus_per_node) # args.workers=int((args.workers+ngpus_per_node-1)/ngpus_per_node) # model=torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) # else: # DistributedDataParallel will divide and allocate batch_size to all # available GPUs if device_ids are not set # model=torch.nn.DataParallel(model).cuda() model=torch.nn.DataParallel(model) if args.gpu_use==1: device=torch.device("cuda:0")#cpu else: device=torch.device("cpu") model.to(device) if args.optimizer=="sgd": optimizer=optim.SGD(model.parameters(),lr=args.learning_rate,momentum=args.momentum) elif args.optimizer=="adam": optimizer=optim.Adam(model.parameters(),lr=args.learning_rate) elif args.optimizer=="nesterov_momentum": optimizer=optim.SGD(model.parameters(),lr=args.learning_rate,momentum=args.momentum,nesterov=True) if args.scheduler=='step': scheduler=lr_scheduler.StepLR(optimizer,step_size=200,gamma=0.5) elif args.scheduler=='plateau': scheduler=lr_scheduler.ReduceLROnPlateau(optimizer,'min',factor=0.5) elif args.scheduler=='cyclelr': scheduler=lr_scheduler.CyclicLR(optimizer,args.learning_rate/100,args.learning_rate,step_size_up=1000,cycle_momentum=False,mode="triangular2") else: scheduler=None cudnn.benchmark=True ##model training for epoch in range(1,args.epochs+1): msetr=train(args,model,dataloader["train"],optimizer,epoch,device,ntime,scheduler) msevalidate=test(args,model,dataloader["validate"],device,ntime) if scheduler is not None: if args.scheduler=='step': scheduler.step() elif args.scheduler=='plateau': scheduler.step(msevalidate)##based on validation set. This is fine as we use train|validate|test separation if epoch==1: best_msevalidate=msevalidate best_train_mse=msetr # is_best=acc1>best_acc1 is_best=msevalidate<best_msevalidate is_best_train=msetr<best_train_mse best_msevalidate=min(msevalidate,best_msevalidate) best_train_mse=min(msetr,best_train_mse) save_checkpoint({ 'epoch': epoch, 'arch': args.net_struct, 'state_dict': model.state_dict(), 'best_acc1': best_msevalidate, 'best_acctr': best_train_mse, 'optimizer': optimizer.state_dict(), 'args_input': args, },is_best,is_best_train) print('\nFinal test MSE\n') acctest=test(args,model,dataloader["test"],device,ntime)