def test(): from torch.optim import SGD from argument import get_args from model import Efficientnet_Bifpn_ATSS args = get_args() model = Efficientnet_Bifpn_ATSS(args, load_backboe_weight=False) optimizer = SGD( model.backbone.backbone_net.parameters(), lr=0, momentum=0.9, weight_decay=0.0001, nesterov=True, ) optimizer.add_param_group({ 'params': list(model.backbone.bifpn.parameters()), 'lr': 0, 'momentum': 0.9, 'weight_decay': 0.0001, 'nesterov': True }) niters = int(1200) warmup_scheduler = GluonLRScheduler(optimizer, mode='linear', nepochs=1, iters_per_epoch=50, target_lr=[1e-4, 1e-3]) scheduler = GluonLRScheduler(optimizer, mode='cosine', nepochs=24, iters_per_epoch=50) scheduler.set_baselrs(warmup_scheduler.target_lr) initial_schedluer = False #scheduler = torch.optim.lr_scheduler.StepLR(optimizer,step_size=10) lrs_1 = [] lrs_2 = [] lrs_1.append(optimizer.param_groups[0]['lr']) lrs_2.append(optimizer.param_groups[1]['lr']) for i in range(niters): optimizer.step() if i < warmup_scheduler.niters: warmup_scheduler.step() else: scheduler.step() lrs_1.append(optimizer.param_groups[0]['lr']) lrs_2.append(optimizer.param_groups[1]['lr']) from matplotlib import pyplot as plt steps = [i for i in range(niters + 1)] fig = plt.figure() fig.tight_layout() ax_1 = fig.add_axes([0.1, 0.2, 0.35, 0.35]) ax_2 = fig.add_axes([0.6, 0.2, 0.35, 0.35]) line_1, = ax_1.plot(steps, lrs_1) line_2, = ax_2.plot(steps, lrs_2) line_1.set_label('learning rate group_1') line_2.set_label('learning rate group_2') ax_1.legend() ax_2.legend() plt.show()
def get_optimizer( model: nn.Module, optimizer_name: str, learning_rate: float, weight_decay: float = 1e-5, no_weight_decay_on_bias: bool = False, eps: float = 1e-5, **kwargs, ) -> Optimizer: """ Construct an Optimizer for given model Args: model: Model to optimize. Only parameters that require_grad will be used optimizer_name: Name of the optimizer. Case-insensitive learning_rate: Target learning rate (regardless of the scheduler) weight_decay: Target weight decay no_weight_decay_on_bias: Whether to disable weight decay on bias parameters eps: Default epsilon for Adam-like optimizers. **kwargs: Additional parameters for optimizer Returns: """ from torch.optim import ASGD, SGD, Adam, RMSprop, AdamW from torch_optimizer import RAdam, Lamb, DiffGrad, NovoGrad, Ranger # Optimizer parameter groups default_pg, biases_pg = [], [] for k, v in model.named_parameters(): if v.requires_grad: if str.endswith(k, ".bias"): biases_pg.append(v) # biases else: default_pg.append(v) # all else if no_weight_decay_on_bias: parameters = default_pg else: parameters = default_pg + biases_pg optimizer: Optimizer = None if optimizer_name.lower() == "sgd": optimizer = SGD( parameters, lr=learning_rate, momentum=0.9, nesterov=True, weight_decay=weight_decay, **kwargs, ) elif optimizer_name.lower() == "asgd": optimizer = ASGD( parameters, lr=learning_rate, weight_decay=weight_decay, **kwargs, ) elif optimizer_name.lower() == "adam": optimizer = Adam( parameters, lr=learning_rate, weight_decay=weight_decay, eps=eps, **kwargs, ) elif optimizer_name.lower() == "rms": optimizer = RMSprop(parameters, learning_rate, weight_decay=weight_decay, **kwargs) elif optimizer_name.lower() == "adamw": optimizer = AdamW( parameters, lr=learning_rate, weight_decay=weight_decay, eps=eps, **kwargs, ) elif optimizer_name.lower() == "radam": optimizer = RAdam( parameters, lr=learning_rate, weight_decay=weight_decay, eps=eps, **kwargs, ) elif optimizer_name.lower() == "ranger": optimizer = Ranger( parameters, lr=learning_rate, eps=eps, weight_decay=weight_decay, **kwargs, ) elif optimizer_name.lower() == "lamb": optimizer = Lamb( parameters, lr=learning_rate, eps=eps, weight_decay=weight_decay, **kwargs, ) elif optimizer_name.lower() == "diffgrad": optimizer = DiffGrad( parameters, lr=learning_rate, eps=eps, weight_decay=weight_decay, **kwargs, ) elif optimizer_name.lower() == "novograd": optimizer = NovoGrad( parameters, lr=learning_rate, eps=eps, weight_decay=weight_decay, **kwargs, ) elif optimizer_name.lower() == "fused_lamb": from apex.optimizers import FusedLAMB optimizer = FusedLAMB(parameters, learning_rate, eps=eps, weight_decay=weight_decay, **kwargs) elif optimizer_name.lower() == "fused_sgd": from apex.optimizers import FusedSGD optimizer = FusedSGD(parameters, learning_rate, momentum=0.9, nesterov=True, weight_decay=weight_decay, **kwargs) elif optimizer_name.lower() == "fused_adam": from apex.optimizers import FusedAdam optimizer = FusedAdam(parameters, learning_rate, eps=eps, weight_decay=weight_decay, adam_w_mode=True, **kwargs) else: raise KeyError(f"Cannot get optimizer by name {optimizer_name}") # Currently either no_wd or per-group lr if no_weight_decay_on_bias: optimizer.add_param_group({"params": biases_pg, "weight_decay": 0}) return optimizer