示例#1
0
def get_optimizer(
    name: str,
    model_params: Iterable,
    lr: float = 1e-3,
    wd: float = 0,
    lookahead: bool = False,
):
    if name == "adam":
        base_optimizer = optim.Adam(model_params, lr=lr, weight_decay=wd)
    elif name == "sgd":
        base_optimizer = optim.SGD(model_params,
                                   lr=lr,
                                   weight_decay=wd,
                                   momentum=0.9,
                                   nesterov=True)
    elif name == "radam":
        base_optimizer = RAdam(model_params, lr=lr, weight_decay=wd)
    elif name == "ralamb":
        base_optimizer = Ralamb(model_params, lr=lr, weight_decay=wd)
    elif name == "adabelief":
        base_optimizer = AdaBelief(model_params, lr=lr, weight_decay=wd)
    else:
        raise ValueError

    # Use lookahead
    if lookahead:
        optimizer = Lookahead(base_optimizer)
    else:
        optimizer = base_optimizer

    return optimizer
示例#2
0
def create_optimizer(args, model, filter_bias_and_bn=True):
    opt_lower = args.opt.lower()
    weight_decay = args.weight_decay

    layerwise_params = {
        "encoder*": dict(lr=args.lr * args.ev, weight_decay=args.weight_decay)
    }
    parameters = process_model_params(model, layerwise_params=layerwise_params)

    opt_args = dict(lr=args.lr, weight_decay=weight_decay)
    if hasattr(args, 'opt_eps') and args.opt_eps is not None:
        opt_args['eps'] = args.opt_eps
    if hasattr(args, 'opt_betas') and args.opt_betas is not None:
        opt_args['betas'] = args.opt_betas

    opt_split = opt_lower.split('_')
    opt_lower = opt_split[-1]
    if opt_lower == 'sgd' or opt_lower == 'nesterov':
        opt_args.pop('eps', None)
        optimizer = SGD(parameters,
                        momentum=args.momentum,
                        nesterov=True,
                        **opt_args)
    elif opt_lower == 'momentum':
        opt_args.pop('eps', None)
        optimizer = SGD(parameters,
                        momentum=args.momentum,
                        nesterov=False,
                        **opt_args)
    elif opt_lower == 'adam':
        optimizer = Adam(parameters, **opt_args)
    elif opt_lower == 'adamw':
        optimizer = AdamW(parameters, **opt_args)
    elif opt_lower == 'radam':
        optimizer = RAdam(parameters, **opt_args)
    elif opt_lower == 'radam':
        optimizer = RAdam(parameters, **opt_args)
    else:
        assert False and "Invalid optimizer"
        raise ValueError

    if len(opt_split) > 1:
        if opt_split[0] == 'lookahead':
            _logger.info('Using lookahead')
            optimizer = Lookahead(optimizer)

    return optimizer
示例#3
0
    ])
    ),
    batch_size=1, shuffle=False, num_workers=args.nw)
print(len(train_data))
print(len(val_data))

SEED = 2020
utils.set_global_seed(SEED)
utils.prepare_cudnn(deterministic=True)
loaders = {'train': train_data,
           'valid': val_data}
criterion = nn.CrossEntropyLoss()

model = ENet('efficientnet-b0')
print(model)
optimizer = Lookahead(RAdam(
    model.parameters(), lr=args.lr, weight_decay=args.wd))
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, factor=0.25, patience=3)
num_epochs = args.e
logdir = "./logs/effnet-b0"
fp16_params = None  # dict(opt_level="O1")
runner = SupervisedRunner(device='cuda')


runner.train(
    model=model,
    criterion=criterion,
    scheduler=scheduler,
    optimizer=optimizer,
    loaders=loaders,
    callbacks=[
     {
         'params': model.decoder.parameters(),
         'lr': learning_rate
     },
     {
         'params': model.encoder.parameters(),
         'lr': 1e-4
     },
     {
         'params': model.segmentation_head.parameters(),
         'lr': learning_rate
     },
 ],
                        weight_decay=0.0003)
 base_optimizer = RAdam(model.parameters(), weight_decay=0.0003)
 optimizer = Lookahead(base_optimizer)
 criterion = {
     "dice": DiceLoss(),
     "iou": IoULoss(),
     "bce": BCEWithLogitsLoss()  # FocalLossBinary()
 }
 runner = SupervisedRunner(device='cuda',
                           input_key="image",
                           input_target_key="mask")
 scheduler = OneCycleLR(optimizer,
                        max_lr=0.0016,
                        steps_per_epoch=1,
                        epochs=num_epochs)
 # scheduler = OneCycleLRWithWarmup(
 #     optimizer,
 #     num_steps=num_epochs,
示例#5
0
def smart_way():
    args = parse_arguments()
    SEED = args.seed
    ROOT = Path(args.dataset)

    img_paths, targets = retrieve_dataset(ROOT)

    train_transforms = compose(
        [resize_transforms(),
         hard_transforms(),
         post_transforms()])
    valid_transforms = compose([pre_transforms(), post_transforms()])
    loaders = get_loaders(
        img_paths=img_paths,
        targets=targets,
        random_state=SEED,
        batch_size=8,
        train_transforms_fn=train_transforms,
        valid_transforms_fn=valid_transforms,
    )

    logdir = './table_recognition/nn/regression/logs6/'

    model = torch.load(
        f'./table_recognition/nn/segmentation/logs/resnet18_PSPNet/save/best_model.pth'
    )
    model: RegressionFromSegmentation = RegressionFromSegmentation(model)
    model.to(utils.get_device())

    learning_rate = 0.001
    encoder_learning_rate = 0.0005

    layerwise_params = {
        "encoder*": dict(lr=encoder_learning_rate, weight_decay=0.00003)
    }
    model_params = utils.process_model_params(
        model, layerwise_params=layerwise_params)
    base_optimizer = RAdam(model_params, lr=learning_rate, weight_decay=0.0003)
    optimizer = Lookahead(base_optimizer)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     factor=0.25,
                                                     patience=2)

    device = utils.get_device()

    runner = CustomRunner2(device=device)
    runner.train(model=model,
                 optimizer=optimizer,
                 scheduler=scheduler,
                 loaders=loaders,
                 logdir=logdir,
                 num_epochs=1000,
                 verbose=True,
                 load_best_on_end=True,
                 main_metric='loss')

    best_model_save_dir = os.path.join(logdir, 'save')
    os.makedirs(best_model_save_dir, exist_ok=True)
    torch.save(model, os.path.join(
        best_model_save_dir,
        'best_model.pth'))  # save best model (by valid loss)
    batch = next(iter(loaders["valid"]))
    try:
        runner.trace(
            model=model, batch=batch, logdir=logdir,
            fp16=False)  # optimized version (not all models can be traced)
    except Exception:
        pass
def main():
    train_dataset = dataset.SentimentDataset(
        texts=df_train['sentences'].values.tolist(),
        labels=df_train['labels'].values,
        max_seq_length=config.MAX_SEQ_LENGTH,
        model_name=config.MODEL_NAME)

    valid_dataset = dataset.SentimentDataset(
        texts=df_valid['sentences'].values.tolist(),
        labels=df_valid['labels'].values,
        max_seq_length=config.MAX_SEQ_LENGTH,
        model_name=config.MODEL_NAME)

    train_val_loaders = {
        "train":
        DataLoader(dataset=train_dataset,
                   batch_size=config.BATCH_SIZE,
                   shuffle=True,
                   num_workers=2,
                   pin_memory=True),
        "valid":
        DataLoader(dataset=valid_dataset,
                   batch_size=config.BATCH_SIZE,
                   shuffle=False,
                   num_workers=2,
                   pin_memory=True)
    }

    dBert = model.DistilBert()

    param_optim = list(dBert.named_parameters())
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']

    criterion = nn.CrossEntropyLoss()

    base_optimizer = RAdam([{
        'params':
        [p for n, p in param_optim if not any(nd in n for nd in no_decay)],
        'weight_decay':
        config.WEIGHT_DECAY
    }, {
        'params':
        [p for n, p in param_optim if any(nd in n for nd in no_decay)],
        'weight_decay':
        0.0
    }])
    optimizer = Lookahead(base_optimizer)
    scheduler = OneCycleLRWithWarmup(
        optimizer,
        num_steps=config.NUM_EPOCHS,
        lr_range=(config.LEARNING_RATE, 1e-8),
        init_lr=config.LEARNING_RATE,
        warmup_steps=0,
    )
    runner = SupervisedRunner(input_key=("input_ids", "attention_mask"))
    # model training
    runner.train(model=dBert,
                 criterion=criterion,
                 optimizer=optimizer,
                 scheduler=scheduler,
                 loaders=train_val_loaders,
                 callbacks=[
                     AccuracyCallback(num_classes=2),
                     OptimizerCallback(accumulation_steps=config.ACCUM_STEPS),
                 ],
                 fp16=config.FP_16,
                 logdir=config.LOG_DIR,
                 num_epochs=config.NUM_EPOCHS,
                 verbose=True)
示例#7
0
def train(config, config_save_name):
    # reproducibility
    seed = config.get("_SEED", 42)
    random.seed(seed)
    torch.manual_seed(seed)
    np.random.seed(seed)

    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    model_config = config["_MODEL_CONFIG"]
    train_dataloader_config = config["_TRAIN_DATALOADER_CONFIG"]
    val_dataloader_config = config["_VAL_DATALOADER_CONFIG"]
    loss_metric_config = config["_LOSSES_METRICS_CONFIG"]
    experiment_dir = config["_EXPERIMENT_DIR"]
    checkpoints_dir = os.path.join(experiment_dir, CHECKPOINTS)
    results_dir = os.path.join(experiment_dir, RESULTS)
    tb_logs_dir_train = os.path.join(experiment_dir, TB_LOGS, "train")
    tb_logs_dir_val = os.path.join(experiment_dir, TB_LOGS, "val")
    config_out = os.path.join(experiment_dir, config_save_name)

    saved_checkpoint = config["_MODEL_CHECKPOINT"]
    checkpoint_format = config["_NEW_CKP_FORMAT"]
    loss_key = config["_OPTIMIZATION_LOSS"]
    optim_config = config["_OPTIMIZER"]
    lookahead_config = config["_LOOKAHEAD_OPTIM"]
    lr_scheduler_config = config["_LR_SCHEDULER"]
    experiment_data = config["_EXPERIMENT_DATA"]
    val_plotting_dict = config.get("_VAL_PLOTTING")

    model = get_object_instance(model_config)()
    global_step = 0
    if saved_checkpoint is not None:
        global_step = load_model_data(saved_checkpoint,
                                      model,
                                      new_format=checkpoint_format)
    train_loader = get_object_instance(train_dataloader_config)()
    val_loader = get_object_instance(val_dataloader_config)()

    print("Train dataset length: {}".format(len(train_loader.dataset)))
    print("Validation dataset length: {}".format(len(val_loader.dataset)))
    print("Valiation dataset patients:\n{}".format(
        val_loader.dataset.patients))

    loss_metric = get_object_instance(loss_metric_config)()
    optimizer_getter = get_object_instance(optim_config)
    lr_scheduler_getter = get_object_instance(lr_scheduler_config)

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    os.makedirs(checkpoints_dir)
    os.makedirs(results_dir)
    os.makedirs(tb_logs_dir_train)
    os.makedirs(tb_logs_dir_val)
    with open(config_out, "w") as f:
        yaml.dump(config, f, default_flow_style=False)

    # create configs for val and test
    val_config, val_out_dir = create_config(config, "val")
    test_config, test_out_dir = create_config(config, "test")
    os.makedirs(val_out_dir)
    os.makedirs(test_out_dir)

    val_path = os.path.join(val_out_dir, "val.yaml")
    print("Creating evaluation config for val: {}".format(val_path))
    with open(val_path, "w") as f:
        yaml.dump(val_config, f, default_flow_style=False)

    test_path = os.path.join(test_out_dir, "test.yaml")
    print("Creating evaluation config for test: {}".format(test_path))
    with open(test_path, "w") as f:
        yaml.dump(test_config, f, default_flow_style=False)

    train_writer = SummaryWriter(tb_logs_dir_train)
    val_writer = SummaryWriter(tb_logs_dir_val)

    model_params = model.parameters()
    if config.get("_MODEL_PARAM_PREP") is not None:
        model_prep = get_object_instance(config.get("_MODEL_PARAM_PREP"))
        model_params = model_prep(model)

    optimizer = optimizer_getter(model_params)
    if lookahead_config["use_lookahead"]:
        optimizer = Lookahead(optimizer, **lookahead_config["params"])
    lr_scheduler = lr_scheduler_getter(optimizer)

    model = model.to(device)
    model.train()
    num_epochs = experiment_data["num_epochs"]
    batch_log_interval = experiment_data["batch_log_interval"]
    # "low" or "high"
    best_metric_type = experiment_data["best_metric_type"]
    saving_metric = experiment_data["saving_metric"]
    previous = float("inf") if best_metric_type == "low" else float("-inf")

    output_example_idx = (hasattr(train_loader.dataset, "output_idx")
                          and train_loader.dataset.output_idx)

    for epoch in range(num_epochs):
        for output in train_loader:
            if output_example_idx:
                x_batch, y_batch, index = output
                extra_dict = train_loader.dataset.get_extra_dict(index)
                extra_dict = tensor_dict_to_device(extra_dict, device)
            else:
                x_batch, y_batch = output
                extra_dict = None

            optimizer.zero_grad()
            x_batch = x_batch.to(device)
            y_batch = y_batch.to(device)
            y_batch_hat = model(x_batch)
            losses_and_metrics = loss_metric(y_batch_hat, y_batch, extra_dict)
            loss = losses_and_metrics[loss_key]
            loss.backward()
            optimizer.step()
            global_step += 1
            if global_step % batch_log_interval == 0:
                print("TRAIN:", get_losses_str(losses_and_metrics))
                tb_log_metrics(train_writer, losses_and_metrics, global_step)
                # TODO: add support for softmax processing
                prediction = torch.sigmoid(y_batch_hat)
                plot_fig_from_batch(
                    train_writer,
                    x_batch,
                    prediction,
                    y_batch,
                    global_step,
                )
            # lr change after each batch
            if lr_scheduler_getter.step_type == "after_batch":
                lr_scheduler.step()

        # done with one epoch
        # let's validate (use code from the validation script)
        model.eval()
        all_losses_and_metrics = validate(
            val_loader,
            model,
            loss_metric,
            device,
            plotting_func=plot_fig_from_batch,
            plotting_dict=val_plotting_dict,
            writer=val_writer,
            global_step=global_step,
            val_metric_to_check=saving_metric,
            output_losses_list=False,
        )

        print("Validation results for epoch {}".format(epoch))
        print("VAL:", get_losses_str(all_losses_and_metrics, tensors=False))
        model.train()

        current = all_losses_and_metrics[saving_metric]
        if is_better(current, previous, best_metric_type):
            print("Validation metric improved "
                  "at the end of epoch {}".format(epoch))
            previous = current
            save_val_metrics(all_losses_and_metrics, results_dir, epoch,
                             global_step)
            out_path = os.path.join(checkpoints_dir, "best_val_checkpoint.pth")
            save_model_data(out_path, model, global_step)

        tb_log_metrics(val_writer, all_losses_and_metrics, global_step)

        # learning rate schedule step at the end of epoch
        if lr_scheduler_getter.step_type != "after_batch":
            if lr_scheduler_getter.step_type == "use_val":
                lr_scheduler.step(all_losses_and_metrics[loss_key])
            elif lr_scheduler_getter.step_type == "use_epoch":
                lr_scheduler.step(epoch)
            else:
                lr_scheduler.step()

        # plot distinct learning rates in order they appear in the optimizer
        lr_dict = OrderedDict()
        for param_group in optimizer.param_groups:
            lr = param_group.get("lr")
            lr_dict[lr] = None
        for idx, lr in enumerate(lr_dict):
            tb_log_metrics(val_writer, {"lr_{}".format(idx): lr}, global_step)
            tb_log_metrics(train_writer, {"lr_{}".format(idx): lr},
                           global_step)

    train_writer.close()
    val_writer.close()
def main():

    train_image_list = sorted(
        glob.glob(
            pathname=
            '../input/uavid-semantic-segmentation-dataset/train/train/*/Images/*.png',
            recursive=True))
    train_mask_list = sorted(
        glob.glob(pathname='./trainlabels/*/TrainId/*.png', recursive=True))
    valid_image_list = sorted(
        glob.glob(
            pathname=
            '../input/uavid-semantic-segmentation-dataset/valid/valid/*/Images/*.png',
            recursive=True))
    valid_mask_list = sorted(
        glob.glob(pathname='./validlabels/*/TrainId/*.png', recursive=True))

    preprocessing_fn = smp.encoders.get_preprocessing_fn(
        config.ENCODER, config.ENCODER_WEIGHTS)

    train_dataset = Dataset(
        train_image_list,
        train_mask_list,
        augmentation=augmentations.get_training_augmentation(),
        preprocessing=augmentations.get_preprocessing(preprocessing_fn),
        classes=config.CLASSES,
    )

    valid_dataset = Dataset(
        valid_image_list,
        valid_mask_list,
        augmentation=augmentations.get_validation_augmentation(),
        preprocessing=augmentations.get_preprocessing(preprocessing_fn),
        classes=config.CLASSES,
    )

    train_loader = DataLoader(train_dataset,
                              batch_size=config.BATCH_SIZE,
                              shuffle=True,
                              num_workers=2,
                              pin_memory=True,
                              drop_last=True)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=config.BATCH_SIZE,
                              shuffle=False,
                              num_workers=2,
                              pin_memory=True,
                              drop_last=False)

    loaders = {"train": train_loader, "valid": valid_loader}

    base_optimizer = RAdam([
        {
            'params': model.MODEL.decoder.parameters(),
            'lr': config.LEARNING_RATE
        },
        {
            'params': model.MODEL.encoder.parameters(),
            'lr': 1e-4
        },
        {
            'params': model.MODEL.segmentation_head.parameters(),
            'lr': config.LEARNING_RATE
        },
    ])
    optimizer = Lookahead(base_optimizer)
    criterion = BCEDiceLoss(activation=None)
    runner = SupervisedRunner()
    scheduler = OneCycleLRWithWarmup(optimizer,
                                     num_steps=config.NUM_EPOCHS,
                                     lr_range=(0.0016, 0.0000001),
                                     init_lr=config.LEARNING_RATE,
                                     warmup_steps=2)

    callbacks = [
        IouCallback(activation='none'),
        ClasswiseIouCallback(classes=config.CLASSES, activation='none'),
        EarlyStoppingCallback(patience=config.ES_PATIENCE,
                              metric='iou',
                              minimize=False),
    ]
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=loaders,
        callbacks=callbacks,
        logdir=config.LOGDIR,
        num_epochs=config.NUM_EPOCHS,
        # save our best checkpoint by IoU metric
        main_metric="iou",
        # IoU needs to be maximized.
        minimize_metric=False,
        # for FP16. It uses the variable from the very first cell
        fp16=config.FP16_PARAMS,
        # prints train logs
        verbose=True,
    )
示例#9
0
def train_segmentation_model(
        model: torch.nn.Module,
        logdir: str,
        num_epochs: int,
        loaders: Dict[str, DataLoader]
):
    criterion = {
        "dice": DiceLoss(),
        "iou": IoULoss(),
        "bce": nn.BCEWithLogitsLoss()
    }

    learning_rate = 0.001
    encoder_learning_rate = 0.0005

    layerwise_params = {"encoder*": dict(lr=encoder_learning_rate, weight_decay=0.00003)}
    model_params = utils.process_model_params(model, layerwise_params=layerwise_params)
    base_optimizer = RAdam(model_params, lr=learning_rate, weight_decay=0.0003)
    optimizer = Lookahead(base_optimizer)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.25, patience=2)

    device = utils.get_device()
    runner = SupervisedRunner(device=device, input_key='image', input_target_key='mask')

    callbacks = [
        CriterionCallback(
            input_key="mask",
            prefix="loss_dice",
            criterion_key="dice"
        ),
        CriterionCallback(
            input_key="mask",
            prefix="loss_iou",
            criterion_key="iou"
        ),
        CriterionCallback(
            input_key="mask",
            prefix="loss_bce",
            criterion_key="bce"
        ),

        MetricAggregationCallback(
            prefix="loss",
            mode="weighted_sum",
            metrics={"loss_dice": 1.0, "loss_iou": 1.0, "loss_bce": 0.8},
        ),

        # metrics
        DiceCallback(input_key='mask'),
        IouCallback(input_key='mask'),
    ]

    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=loaders,
        callbacks=callbacks,
        logdir=logdir,
        num_epochs=num_epochs,
        main_metric="iou",
        minimize_metric=False,
        verbose=True,
        load_best_on_end=True,
    )
    best_model_save_dir = os.path.join(logdir, 'save')
    os.makedirs(best_model_save_dir, True)
    torch.save(model, os.path.join(best_model_save_dir, 'best_model.pth'))   # save best model (by valid loss)
    batch = next(iter(loaders["valid"]))
    try:
        runner.trace(model=model, batch=batch, logdir=logdir, fp16=False)  # optimized version (not all models can be traced)
    except Exception:
        pass