def test():

    from torch.optim import SGD
    from argument import get_args
    from model import Efficientnet_Bifpn_ATSS
    args = get_args()
    model = Efficientnet_Bifpn_ATSS(args, load_backboe_weight=False)
    optimizer = SGD(
        model.backbone.backbone_net.parameters(),
        lr=0,
        momentum=0.9,
        weight_decay=0.0001,
        nesterov=True,
    )
    optimizer.add_param_group({
        'params': list(model.backbone.bifpn.parameters()),
        'lr': 0,
        'momentum': 0.9,
        'weight_decay': 0.0001,
        'nesterov': True
    })

    niters = int(1200)
    warmup_scheduler = GluonLRScheduler(optimizer,
                                        mode='linear',
                                        nepochs=1,
                                        iters_per_epoch=50,
                                        target_lr=[1e-4, 1e-3])
    scheduler = GluonLRScheduler(optimizer,
                                 mode='cosine',
                                 nepochs=24,
                                 iters_per_epoch=50)
    scheduler.set_baselrs(warmup_scheduler.target_lr)
    initial_schedluer = False
    #scheduler = torch.optim.lr_scheduler.StepLR(optimizer,step_size=10)
    lrs_1 = []
    lrs_2 = []
    lrs_1.append(optimizer.param_groups[0]['lr'])
    lrs_2.append(optimizer.param_groups[1]['lr'])
    for i in range(niters):
        optimizer.step()
        if i < warmup_scheduler.niters:
            warmup_scheduler.step()
        else:
            scheduler.step()
        lrs_1.append(optimizer.param_groups[0]['lr'])
        lrs_2.append(optimizer.param_groups[1]['lr'])

    from matplotlib import pyplot as plt
    steps = [i for i in range(niters + 1)]
    fig = plt.figure()
    fig.tight_layout()
    ax_1 = fig.add_axes([0.1, 0.2, 0.35, 0.35])
    ax_2 = fig.add_axes([0.6, 0.2, 0.35, 0.35])
    line_1, = ax_1.plot(steps, lrs_1)
    line_2, = ax_2.plot(steps, lrs_2)
    line_1.set_label('learning rate group_1')
    line_2.set_label('learning rate group_2')
    ax_1.legend()
    ax_2.legend()
    plt.show()
示例#2
0
def get_optimizer(
    model: nn.Module,
    optimizer_name: str,
    learning_rate: float,
    weight_decay: float = 1e-5,
    no_weight_decay_on_bias: bool = False,
    eps: float = 1e-5,
    **kwargs,
) -> Optimizer:
    """
    Construct an Optimizer for given model
    Args:
        model: Model to optimize. Only parameters that require_grad will be used
        optimizer_name: Name of the optimizer. Case-insensitive
        learning_rate: Target learning rate (regardless of the scheduler)
        weight_decay: Target weight decay
        no_weight_decay_on_bias: Whether to disable weight decay on bias parameters
        eps: Default epsilon for Adam-like optimizers.
        **kwargs: Additional parameters for optimizer

    Returns:

    """
    from torch.optim import ASGD, SGD, Adam, RMSprop, AdamW
    from torch_optimizer import RAdam, Lamb, DiffGrad, NovoGrad, Ranger

    # Optimizer parameter groups
    default_pg, biases_pg = [], []

    for k, v in model.named_parameters():
        if v.requires_grad:
            if str.endswith(k, ".bias"):
                biases_pg.append(v)  # biases
            else:
                default_pg.append(v)  # all else

    if no_weight_decay_on_bias:
        parameters = default_pg
    else:
        parameters = default_pg + biases_pg

    optimizer: Optimizer = None

    if optimizer_name.lower() == "sgd":
        optimizer = SGD(
            parameters,
            lr=learning_rate,
            momentum=0.9,
            nesterov=True,
            weight_decay=weight_decay,
            **kwargs,
        )
    elif optimizer_name.lower() == "asgd":
        optimizer = ASGD(
            parameters,
            lr=learning_rate,
            weight_decay=weight_decay,
            **kwargs,
        )
    elif optimizer_name.lower() == "adam":
        optimizer = Adam(
            parameters,
            lr=learning_rate,
            weight_decay=weight_decay,
            eps=eps,
            **kwargs,
        )
    elif optimizer_name.lower() == "rms":
        optimizer = RMSprop(parameters,
                            learning_rate,
                            weight_decay=weight_decay,
                            **kwargs)
    elif optimizer_name.lower() == "adamw":
        optimizer = AdamW(
            parameters,
            lr=learning_rate,
            weight_decay=weight_decay,
            eps=eps,
            **kwargs,
        )
    elif optimizer_name.lower() == "radam":
        optimizer = RAdam(
            parameters,
            lr=learning_rate,
            weight_decay=weight_decay,
            eps=eps,
            **kwargs,
        )
    elif optimizer_name.lower() == "ranger":
        optimizer = Ranger(
            parameters,
            lr=learning_rate,
            eps=eps,
            weight_decay=weight_decay,
            **kwargs,
        )
    elif optimizer_name.lower() == "lamb":
        optimizer = Lamb(
            parameters,
            lr=learning_rate,
            eps=eps,
            weight_decay=weight_decay,
            **kwargs,
        )
    elif optimizer_name.lower() == "diffgrad":
        optimizer = DiffGrad(
            parameters,
            lr=learning_rate,
            eps=eps,
            weight_decay=weight_decay,
            **kwargs,
        )
    elif optimizer_name.lower() == "novograd":
        optimizer = NovoGrad(
            parameters,
            lr=learning_rate,
            eps=eps,
            weight_decay=weight_decay,
            **kwargs,
        )
    elif optimizer_name.lower() == "fused_lamb":
        from apex.optimizers import FusedLAMB

        optimizer = FusedLAMB(parameters,
                              learning_rate,
                              eps=eps,
                              weight_decay=weight_decay,
                              **kwargs)
    elif optimizer_name.lower() == "fused_sgd":
        from apex.optimizers import FusedSGD

        optimizer = FusedSGD(parameters,
                             learning_rate,
                             momentum=0.9,
                             nesterov=True,
                             weight_decay=weight_decay,
                             **kwargs)
    elif optimizer_name.lower() == "fused_adam":
        from apex.optimizers import FusedAdam

        optimizer = FusedAdam(parameters,
                              learning_rate,
                              eps=eps,
                              weight_decay=weight_decay,
                              adam_w_mode=True,
                              **kwargs)
    else:
        raise KeyError(f"Cannot get optimizer by name {optimizer_name}")

    # Currently either no_wd or per-group lr
    if no_weight_decay_on_bias:
        optimizer.add_param_group({"params": biases_pg, "weight_decay": 0})

    return optimizer