Exemplo n.º 1
0
def get_test_dataset(dataset: str, prepare, test_or_train='test'):
    if test_or_train == 'test':
        ids = D.all_test_ids()
        images = D.read_test_images(ids)
    else:
        ids = D.all_train_ids()
        images = D.read_train_images(ids)

    depths = D.read_depths(ids)

    use_cumsum = (dataset == 'image_depth_cumsum' or dataset == 'image_cumsum')
    use_depth = (dataset == 'image_depth' or dataset == 'image_depth_cumsum')

    dataset = D.ImageAndMaskDataset(ids, images, None, depths,
                                    prepare_fn=prepare)
    return dataset
Exemplo n.º 2
0
def test_folds_coverage():
    train_ids = D.all_train_ids()
    depths = D.read_depths(train_ids)
    images = D.read_train_images(train_ids)
    masks = D.read_train_masks(train_ids)

    n_folds = 10
    coverage = np.array([cv2.countNonZero(x) for x in masks], dtype=np.int)
    folds_d = D.get_folds_vector('coverage',
                                 images,
                                 masks,
                                 depths,
                                 n_folds=n_folds)

    f, ax = plt.subplots(1, 2)

    for fold in range(n_folds):
        train = coverage[folds_d != fold]
        val = coverage[folds_d == fold]

        ax[0].hist(train, label=f'Fold {fold}')
        ax[1].hist(val, label=f'Fold {fold}')

    f.show()
Exemplo n.º 3
0
def main():
    parser = U.get_argparser()
    args = parser.parse_args()
    U.set_manual_seed(args.seed)

    train_session_args = vars(args)
    train_session = U.get_random_name()
    current_time = datetime.now().strftime('%b%d_%H_%M')
    prefix = f'{current_time}_{args.model}_{args.prepare}_{args.augmentation}_{train_session}'
    if args.fold is not None:
        prefix += f'_fold_{args.stratify}_{args.fold}'

    log_dir = os.path.join('runs', prefix)
    exp_dir = os.path.join('experiments', args.model, args.prepare,
                           args.augmentation, prefix)
    os.makedirs(exp_dir, exist_ok=True)

    train_ids = D.all_train_ids()
    depths = D.read_depths(train_ids)
    images = D.read_train_images(train_ids)
    masks = D.read_train_masks(train_ids)

    if args.fix_masks:
        masks, changed_ids = D.fix_masks(masks, train_ids)
        with open(os.path.join(exp_dir, 'fixed_masks.txt'), 'w') as f:
            for sample_id in changed_ids:
                f.write(sample_id)
                f.write('\n')
        print(f'Fixed {len(changed_ids)} masks')

    if args.fold is not None:
        train_indexes, test_indexes = D.get_train_test_split_for_fold(
            args.stratify, args.fold, train_ids)
    else:
        train_indexes, test_indexes = train_test_split(
            np.arange(len(train_ids)),
            shuffle=False,
            random_state=args.split_seed,
            test_size=0.2)

    ids_train, ids_test = train_ids[train_indexes], train_ids[test_indexes]
    img_train, img_test = images[train_indexes], images[test_indexes]
    mask_train, mask_test = masks[train_indexes], masks[test_indexes]
    depth_train, depth_test = depths[train_indexes], depths[test_indexes]

    # Here we can exclude some images from training, but keep in validation
    train_mask = D.drop_some(img_train,
                             mask_train,
                             drop_black=True,
                             drop_vstrips=args.drop_vstrips,
                             drop_few=args.drop_few)
    ids_train = ids_train[train_mask]
    img_train = img_train[train_mask]
    mask_train = mask_train[train_mask]
    depth_train = depth_train[train_mask]

    if not is_sorted(ids_train):
        raise RuntimeError("ids_train is not sorted")
    if not is_sorted(ids_test):
        raise RuntimeError("ids_test_sorted is not sorted")

    prepare_fn = D.get_prepare_fn(args.prepare, **train_session_args)

    # This line valid if we apply prepare_fn first and then do augmentation
    target_size = prepare_fn.target_size if prepare_fn is not None else D.ORIGINAL_SIZE
    # target_size = D.ORIGINAL_SIZE

    build_augmentation_fn = D.AUGMENTATION_MODES[args.augmentation]
    aug = build_augmentation_fn(target_size, border_mode=args.border_mode)

    train_transform_list = []
    valid_transform_list = []
    if prepare_fn is not None:
        train_transform_list.append(prepare_fn.t_forward)
        valid_transform_list.append(prepare_fn.t_forward)

    train_transform_list.append(aug)

    trainset = D.ImageAndMaskDataset(ids_train,
                                     img_train,
                                     mask_train,
                                     depth_train,
                                     augment=A.Compose(train_transform_list))

    validset = D.ImageAndMaskDataset(ids_test,
                                     img_test,
                                     mask_test,
                                     depth_test,
                                     augment=A.Compose(valid_transform_list))

    trainloader = DataLoader(trainset,
                             batch_size=args.batch_size,
                             num_workers=args.workers,
                             pin_memory=True,
                             drop_last=True,
                             shuffle=True)

    validloader = DataLoader(validset,
                             batch_size=args.batch_size,
                             pin_memory=True,
                             drop_last=False,
                             shuffle=False)

    # Save train/val split for future use
    train_session_args.update({
        'train_set': list(ids_train),
        'valid_set': list(ids_test)
    })

    # Declare variables we will use during training
    start_epoch = 0
    train_history = pd.DataFrame()

    target_metric = args.target_metric
    target_metric_mode = 'max'
    best_metric_val = 0
    best_lb_checkpoint = os.path.join(exp_dir, f'{prefix}_{target_metric}.pth')

    model = U.get_model(args.model,
                        num_classes=args.num_classes,
                        num_channels=trainset.channels(),
                        abn=args.abn,
                        use_dropout=not args.no_dropout,
                        pretrained=not args.no_pretrain).cuda()

    print('Train set size :', len(ids_train), 'batch size',
          trainloader.batch_size)
    print('Valid set size :', len(ids_test), 'batch size',
          validloader.batch_size)
    print('Tile transform :', prepare_fn if prepare_fn is not None else "None")
    print('Model          :', args.model, count_parameters(model))
    print('Augmentations  :', args.augmentation, args.border_mode)
    print('Input channels :', trainset.channels())
    print('Output classes :', args.num_classes)
    print('Optimizer      :', args.optimizer, 'wd', args.weight_decay)
    print('Use of dropout :', not args.no_dropout)
    print('Train session  :', train_session)
    print('Freeze encoder :', args.freeze_encoder)
    print('Seed           :', args.seed, args.split_seed)
    print('Restart every  :', args.restart_every)
    print('Fold           :', args.fold, args.stratify)
    print('Fine-tune      :', args.fine_tune)
    print('ABN Mode       :', args.abn)
    print('Fix masks      :', args.fix_masks)

    if args.resume:
        fname = U.auto_file(args.resume)
        start_epoch, train_history, best_score = U.restore_checkpoint(
            fname, model)
        print(train_history)
        print('Resuming training from epoch', start_epoch, ' and score',
              best_score, args.resume)

    if args.fine_tune and args.freeze_encoder > 0:
        raise ValueError(
            'Incompatible options --fune-tune and --freeze-encoder')

    writer = SummaryWriter(log_dir)
    writer.add_text('train/params',
                    '```' + json.dumps(train_session_args, indent=2) + '```',
                    0)

    config_fname = os.path.join(exp_dir, f'{train_session}.json')
    with open(config_fname, 'w') as f:
        f.write(json.dumps(train_session_args, indent=2))

    weights = {
        'mask': 1.0,
        'class': 0.05,
        'dsv': 0.1,
    }

    bce = U.get_loss('bce')
    bce_lovasz = U.get_loss('bce_lovasz')
    bce_jaccard = U.get_loss('bce_jaccard')

    losses = {
        'warmup': {
            'mask': bce,
            'class': bce,
            'dsv': bce,
        },
        'main': {
            'mask': bce_jaccard,
            'class': bce,
            'dsv': bce,
        },
        'annealing': {
            'mask': bce_lovasz,
            'class': bce,
            'dsv': bce,
        }
    }

    epochs = {'warmup': 50, 'main': 250, 'annealing': 50}

    if args.fast:
        for key in epochs.keys():
            epochs[key] = 1

    learning_rates = {
        'warmup': args.learning_rate,
        'main': 1e-3,
        'annealing': 1e-2
    }

    # Warmup phase
    if epochs['warmup']:
        print(torch.cuda.max_memory_allocated(),
              torch.cuda.max_memory_cached())
        trainable_parameters = filter(lambda p: p.requires_grad,
                                      model.parameters())
        optimizer = U.get_optimizer(args.optimizer,
                                    trainable_parameters,
                                    learning_rates['warmup'],
                                    weight_decay=args.weight_decay)
        scheduler = None  # StepLR(optimizer, gamma=0.5, step_size=50)

        train_history, best_metric_val, start_epoch = train(
            model,
            losses['warmup'],
            weights,
            optimizer,
            scheduler,
            trainloader,
            validloader,
            writer,
            start_epoch,
            epochs=epochs['warmup'],
            early_stopping=args.early_stopping,
            train_history=train_history,
            experiment_dir=exp_dir,
            target_metric=target_metric,
            best_metric_val=best_metric_val,
            target_metric_mode=target_metric_mode,
            checkpoint_filename=best_lb_checkpoint)
        U.save_checkpoint(os.path.join(exp_dir, f'{prefix}_warmup.pth'),
                          model,
                          start_epoch,
                          train_history,
                          metric_name=target_metric,
                          metric_score=best_metric_val)

        del trainable_parameters, optimizer, scheduler
        torch.cuda.empty_cache()
        torch.cuda.synchronize()

        print('Finished warmup phase. Main train loop.')

    # Main training phase
    print(torch.cuda.max_memory_allocated(), torch.cuda.max_memory_cached())
    trainable_parameters = filter(lambda p: p.requires_grad,
                                  model.parameters())
    optimizer = U.get_optimizer(args.optimizer,
                                trainable_parameters,
                                learning_rates['main'],
                                weight_decay=args.weight_decay)
    scheduler = ReduceLROnPlateau(optimizer,
                                  mode='max',
                                  patience=50,
                                  factor=0.5,
                                  min_lr=1e-5)

    train_history, best_metric_val, start_epoch = train(
        model,
        losses['main'],
        weights,
        optimizer,
        scheduler,
        trainloader,
        validloader,
        writer,
        start_epoch,
        epochs=epochs['main'],
        early_stopping=args.early_stopping,
        train_history=train_history,
        experiment_dir=exp_dir,
        target_metric=target_metric,
        best_metric_val=best_metric_val,
        target_metric_mode=target_metric_mode,
        checkpoint_filename=best_lb_checkpoint)
    del trainable_parameters, optimizer, scheduler
    torch.cuda.empty_cache()
    torch.cuda.synchronize()
    snapshots = [best_lb_checkpoint]

    U.save_checkpoint(os.path.join(exp_dir, f'{prefix}_main.pth'),
                      model,
                      start_epoch,
                      train_history,
                      metric_name=target_metric,
                      metric_score=best_metric_val)

    print('Finished train phase.')

    # Cosine annealing
    if epochs['annealing']:

        for snapshot in range(5):
            print(f'Starting annealing phase {snapshot}')
            print(torch.cuda.max_memory_allocated(),
                  torch.cuda.max_memory_cached())
            # model.set_fine_tune(True)
            trainable_parameters = filter(lambda p: p.requires_grad,
                                          model.parameters())
            optimizer = U.get_optimizer('sgd',
                                        trainable_parameters,
                                        learning_rates['annealing'],
                                        weight_decay=args.weight_decay)
            scheduler = CosineAnnealingLR(optimizer,
                                          epochs['annealing'],
                                          eta_min=1e-7)

            snapshot_name = os.path.join(
                exp_dir, f'{prefix}_{target_metric}_snapshot_{snapshot}.pth')
            snapshots.append(snapshot_name)
            train_history, best_metric_val, start_epoch = train(
                model,
                losses['annealing'],
                weights,
                optimizer,
                scheduler,
                trainloader,
                validloader,
                writer,
                start_epoch,
                epochs=epochs['annealing'],
                early_stopping=args.early_stopping,
                train_history=train_history,
                experiment_dir=exp_dir,
                target_metric=target_metric,
                best_metric_val=0,
                target_metric_mode=target_metric_mode,
                checkpoint_filename=snapshot_name)
            del trainable_parameters, optimizer, scheduler
            torch.cuda.empty_cache()
            torch.cuda.synchronize()

    print('Training finished')
    train_history.to_csv(os.path.join(exp_dir, 'train_history.csv'),
                         index=False)

    for snapshot_file in snapshots:
        generate_model_submission(snapshot_file,
                                  config_fname,
                                  mine_on_val=True)
Exemplo n.º 4
0
def main():
    parser = U.get_argparser()
    args = parser.parse_args()
    U.set_manual_seed(args.seed)

    train_session_args = vars(args)
    train_session = U.get_random_name()
    current_time = datetime.now().strftime('%b%d_%H_%M')
    prefix = f'{current_time}_{args.model}_{args.prepare}_{args.augmentation}_{train_session}'
    if args.fold is not None:
        prefix += f'_fold_{args.stratify}_{args.fold}'

    log_dir = os.path.join('runs', prefix)
    exp_dir = os.path.join('experiments', args.model, args.prepare,
                           args.augmentation, prefix)
    os.makedirs(exp_dir, exist_ok=True)

    train_ids = D.get_train_ids(drop_black=True,
                                drop_vstrips=args.drop_vstrips,
                                drop_empty=args.drop_empty,
                                drop_few=args.drop_few,
                                fast=args.fast)
    depths = D.read_depths(train_ids)
    images = D.read_train_images(train_ids)
    masks = D.read_train_masks(train_ids)

    if args.fix_masks:
        masks, changed_ids = D.fix_masks(masks, train_ids)
        with open(os.path.join(exp_dir, 'fixed_masks.txt'), 'w') as f:
            for sample_id in changed_ids:
                f.write(sample_id)
                f.write('\n')
        print(f'Fixed {len(changed_ids)} masks')

    if args.fold is not None:
        train_indexes, test_indexes = D.get_train_test_split_for_fold(
            args.stratify, args.fold, train_ids)
    else:
        train_indexes, test_indexes = train_test_split(
            np.arange(len(train_ids)),
            shuffle=False,
            random_state=args.split_seed,
            test_size=0.2)

    ids_train, ids_test = train_ids[train_indexes], train_ids[test_indexes]
    if not is_sorted(ids_train):
        raise RuntimeError("ids_train is not sorted")
    if not is_sorted(ids_test):
        raise RuntimeError("ids_test_sorted is not sorted")

    img_train, img_test = images[train_indexes], images[test_indexes]
    mask_train, mask_test = masks[train_indexes], masks[test_indexes]
    depth_train, depth_test = depths[train_indexes], depths[test_indexes]

    prepare_fn = D.get_prepare_fn(args.prepare, **train_session_args)

    # This line valid if we apply prepare_fn first and then do augmentation
    target_size = prepare_fn.target_size if prepare_fn is not None else D.ORIGINAL_SIZE
    # target_size = D.ORIGINAL_SIZE

    build_augmentation_fn = D.AUGMENTATION_MODES[args.augmentation]
    aug = build_augmentation_fn(target_size, border_mode=args.border_mode)

    train_transform_list = []
    valid_transform_list = []
    if prepare_fn is not None:
        train_transform_list.append(prepare_fn.t_forward)
        valid_transform_list.append(prepare_fn.t_forward)

    train_transform_list.append(aug)

    trainset = D.ImageAndMaskDataset(ids_train,
                                     img_train,
                                     mask_train,
                                     depth_train,
                                     augment=A.Compose(train_transform_list))

    validset = D.ImageAndMaskDataset(ids_test,
                                     img_test,
                                     mask_test,
                                     depth_test,
                                     augment=A.Compose(valid_transform_list))

    trainloader = DataLoader(trainset,
                             batch_size=args.batch_size,
                             num_workers=args.workers,
                             pin_memory=True,
                             drop_last=True,
                             shuffle=True)

    validloader = DataLoader(validset,
                             batch_size=args.batch_size,
                             pin_memory=True,
                             drop_last=False,
                             shuffle=False)

    # Save train/val split for future use
    train_session_args.update({
        'train_set': list(ids_train),
        'valid_set': list(ids_test)
    })

    # Declare variables we will use during training
    start_epoch = 0
    train_history = pd.DataFrame()
    scheduler = None
    optimizer = None

    target_metric = args.target_metric
    target_metric_mode = 'max'
    best_metric_val = 0
    best_lb_checkpoint = os.path.join(exp_dir, f'{prefix}_{target_metric}.pth')

    model = U.get_model(args.model,
                        num_classes=args.num_classes,
                        num_channels=trainset.channels(),
                        abn=args.abn,
                        use_dropout=not args.no_dropout,
                        pretrained=not args.no_pretrain).cuda()

    print('Train set size :', len(trainloader), 'batch size',
          trainloader.batch_size)
    print('Valid set size :', len(validloader), 'batch size',
          validloader.batch_size)
    print('Tile transform :', prepare_fn if prepare_fn is not None else "None")
    print('Model          :', args.model, count_parameters(model))
    print('Augmentations  :', args.augmentation, args.border_mode)
    print('Input channels :', trainset.channels())
    print('Output classes :', args.num_classes)
    print('Criterion      :', args.loss),
    print('Optimizer      :', args.optimizer, args.learning_rate,
          args.weight_decay)
    print('Use of dropout :', not args.no_dropout)
    print('Train session  :', train_session)
    print('Freeze encoder :', args.freeze_encoder)
    print('Seed           :', args.seed, args.split_seed)
    print('Restart every  :', args.restart_every)
    print('Fold           :', args.fold, args.stratify)
    print('Fine-tune      :', args.fine_tune)
    print('ABN Mode       :', args.abn)
    print('Fix masks      :', args.fix_masks)

    if args.resume:
        fname = U.auto_file(args.resume)
        start_epoch, train_history, best_score = U.restore_checkpoint(
            fname, model)
        print(train_history)
        print('Resuming training from epoch', start_epoch, ' and score',
              best_score, args.resume)

    segmentation_loss = U.get_loss(args.loss)

    if args.fine_tune and args.freeze_encoder > 0:
        raise ValueError(
            'Incompatible options --fune-tune and --freeze-encoder')

    writer = SummaryWriter(log_dir)
    writer.add_text('train/params',
                    '```' + json.dumps(train_session_args, indent=2) + '```',
                    0)

    config_fname = os.path.join(exp_dir, f'{train_session}.json')
    with open(config_fname, 'w') as f:
        f.write(json.dumps(train_session_args, indent=2))

    # Start training loop
    no_improvement_epochs = 0

    for epoch in range(start_epoch, start_epoch + args.epochs):
        # On Epoch begin
        if U.should_quit(exp_dir) or (
                args.early_stopping is not None
                and no_improvement_epochs > args.early_stopping):
            break

        epochs_trained = epoch - start_epoch
        should_restart_optimizer = (
            args.restart_every > 0 and epochs_trained % args.restart_every
            == 0) or (epochs_trained
                      == args.freeze_encoder) or optimizer is None

        if should_restart_optimizer:
            del optimizer
            if args.fine_tune:
                model.set_fine_tune(args.fine_tune)
            else:
                model.set_encoder_training_enabled(
                    epochs_trained >= args.freeze_encoder)

            trainable_parameters = filter(lambda p: p.requires_grad,
                                          model.parameters())
            optimizer = U.get_optimizer(args.optimizer,
                                        trainable_parameters,
                                        args.learning_rate,
                                        weight_decay=args.weight_decay)

            print('Restarting optimizer state', epoch, count_parameters(model))

            if args.lr_scheduler:
                scheduler = U.get_lr_scheduler(args.lr_scheduler, optimizer,
                                               args.epochs)

        if scheduler is not None and not isinstance(scheduler,
                                                    ReduceLROnPlateau):
            scheduler.step(epochs_trained)

        U.log_learning_rate(writer, optimizer, epoch)

        # Epoch
        train_metrics = process_epoch(model,
                                      segmentation_loss,
                                      optimizer,
                                      trainloader,
                                      epoch,
                                      True,
                                      writer,
                                      mask_postprocess=prepare_fn.backward)
        valid_metrics = process_epoch(model,
                                      segmentation_loss,
                                      None,
                                      validloader,
                                      epoch,
                                      False,
                                      writer,
                                      mask_postprocess=prepare_fn.backward)

        all_metrics = {}
        all_metrics.update(train_metrics)
        all_metrics.update(valid_metrics)

        # On Epoch End
        summary = {
            'epoch': [int(epoch)],
            'lr': [float(optimizer.param_groups[0]['lr'])]
        }
        for k, v in all_metrics.items():
            summary[k] = [v]

        train_history = train_history.append(pd.DataFrame.from_dict(summary),
                                             ignore_index=True)
        print(epoch, summary)

        if isinstance(scheduler, ReduceLROnPlateau):
            scheduler.step(all_metrics[target_metric], epochs_trained)

        if U.is_better(all_metrics[target_metric], best_metric_val,
                       target_metric_mode):
            best_metric_val = all_metrics[target_metric]
            U.save_checkpoint(best_lb_checkpoint,
                              model,
                              epoch,
                              train_history,
                              metric_name=target_metric,
                              metric_score=best_metric_val)
            print('Checkpoint saved', epoch, best_metric_val,
                  best_lb_checkpoint)
            no_improvement_epochs = 0
        else:
            no_improvement_epochs += 1

    print('Training finished')

    generate_model_submission(best_lb_checkpoint,
                              config_fname,
                              mine_on_val=True)
Exemplo n.º 5
0
from lib import dataset as D
import pandas as pd

if __name__ == '__main__':
    train_ids = D.get_train_ids(drop_black=False,
                                drop_vstrips=False,
                                drop_empty=False,
                                drop_few=False)
    images = D.read_train_images(train_ids)
    masks = D.read_train_masks(train_ids)
    depths = D.read_depths(train_ids)

    folds_by_salt = D.get_folds_vector('coverage',
                                       images,
                                       masks,
                                       depths,
                                       n_folds=5)
    folds_by_depth = D.get_folds_vector('depth',
                                        images,
                                        masks,
                                        depths,
                                        n_folds=5)
    folds_by_rnd = D.get_folds_vector(None,
                                      images,
                                      masks,
                                      depths,
                                      n_folds=5,
                                      random_state=42)

    pd.DataFrame.from_dict({
        'id': train_ids,