Пример #1
0
def main(seed):
    with timer('load data'):
        df = pd.read_csv(FOLD_PATH)

    with timer('preprocessing'):
        val_df = df[df.fold_id == FOLD_ID]
        val_augmentation = None

        val_dataset = SeverDatasetTest(val_df,
                                       IMG_DIR,
                                       IMG_SIZE,
                                       N_CLASSES,
                                       id_colname=ID_COLUMNS,
                                       transforms=val_augmentation)
        val_loader = DataLoader(val_dataset,
                                batch_size=BATCH_SIZE,
                                shuffle=False,
                                num_workers=8)

        del val_df, df, val_dataset
        gc.collect()

    with timer('create model'):
        models = []
        model = smp_old.Unet('resnet34',
                             encoder_weights="imagenet",
                             classes=N_CLASSES,
                             encoder_se_module=True,
                             decoder_semodule=True,
                             h_columns=False,
                             skip=True,
                             act="swish",
                             freeze_bn=True,
                             classification=CLASSIFICATION)
        model = convert_model(model)
        if base_model is not None:
            model.load_state_dict(torch.load(base_model))
        model.to(device)
        models.append(model)

    with timer('predict'):
        rles, sub_ids = predict(models, val_loader, device)
        sub_df = pd.DataFrame({
            'ImageId_ClassId': sub_ids,
            'EncodedPixels': rles
        })
        LOGGER.info(sub_df.head())

        sub_df.to_csv('{}_{}.csv'.format(EXP_ID, FOLD_ID), index=False)
Пример #2
0
def main(seed):
    with timer('load data'):
        df = pd.read_csv(FOLD_PATH)
        df_ = pd.read_csv(SOFT_PATH)
        df = df[[ID_COLUMNS, "fold_id"]].merge(df_, how="left", on=ID_COLUMNS)
        df = df.append(pd.read_csv(PSEUDO_PATH)).reset_index(drop=True)
        for c in [
                "EncodedPixels_1", "EncodedPixels_2", "EncodedPixels_3",
                "EncodedPixels_4"
        ]:
            df[c] = df[c].astype(str)
        df["fold_id"] = df["fold_id"].fillna(FOLD_ID + 1)
        y = (df.sum_target != 0).astype("float32").values

    with timer('preprocessing'):
        train_df, val_df = df[df.fold_id != FOLD_ID], df[df.fold_id == FOLD_ID]
        y_train, y_val = y[df.fold_id != FOLD_ID], y[df.fold_id == FOLD_ID]

        train_augmentation = Compose([
            Flip(p=0.5),
            OneOf([
                GridDistortion(p=0.5),
                OpticalDistortion(p=0.5, distort_limit=2, shift_limit=0.5)
            ],
                  p=0.5),
            OneOf([
                RandomGamma(gamma_limit=(100, 140), p=0.5),
                RandomBrightnessContrast(p=0.5),
            ],
                  p=0.5),
            OneOf([
                GaussNoise(p=0.5),
            ], p=0.5),
            ShiftScaleRotate(rotate_limit=20, p=0.5),
        ])
        val_augmentation = None

        train_dataset = SeverDataset(train_df,
                                     IMG_DIR,
                                     IMG_SIZE,
                                     N_CLASSES,
                                     id_colname=ID_COLUMNS,
                                     transforms=train_augmentation,
                                     crop_rate=1.0,
                                     class_y=y_train)
        val_dataset = SeverDataset(val_df,
                                   IMG_DIR,
                                   IMG_SIZE,
                                   N_CLASSES,
                                   id_colname=ID_COLUMNS,
                                   transforms=val_augmentation)
        train_sampler = MaskProbSampler(train_df, demand_non_empty_proba=0.6)
        train_loader = DataLoader(train_dataset,
                                  batch_size=BATCH_SIZE,
                                  sampler=train_sampler,
                                  num_workers=8)
        val_loader = DataLoader(val_dataset,
                                batch_size=BATCH_SIZE,
                                shuffle=False,
                                num_workers=8)

        del train_df, val_df, df, train_dataset, val_dataset
        gc.collect()

    with timer('create model'):
        model = smp_old.Unet('resnet34',
                             encoder_weights="imagenet",
                             classes=N_CLASSES,
                             encoder_se_module=True,
                             decoder_semodule=True,
                             h_columns=False,
                             skip=True,
                             act="swish",
                             freeze_bn=True,
                             classification=CLASSIFICATION)
        model = convert_model(model)
        if base_model is not None:
            model.load_state_dict(torch.load(base_model))
        model.to(device)

        criterion = torch.nn.BCEWithLogitsLoss()
        optimizer = torch.optim.Adam([
            {
                'params': model.decoder.parameters(),
                'lr': 3e-3
            },
            {
                'params': model.encoder.parameters(),
                'lr': 3e-4
            },
        ])
        if base_model is None:
            scheduler_cosine = CosineAnnealingLR(optimizer,
                                                 T_max=CLR_CYCLE,
                                                 eta_min=3e-5)
            scheduler = GradualWarmupScheduler(
                optimizer,
                multiplier=1.1,
                total_epoch=CLR_CYCLE * 2,
                after_scheduler=scheduler_cosine)
        else:
            scheduler = CosineAnnealingLR(optimizer,
                                          T_max=CLR_CYCLE,
                                          eta_min=3e-5)

        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level="O1",
                                          verbosity=0)
        model = torch.nn.DataParallel(model)

    with timer('train'):
        train_losses = []
        valid_losses = []

        best_model_loss = 999
        best_model_ep = 0
        checkpoint = base_ckpt + 1

        for epoch in range(1, EPOCHS + 1):
            seed = seed + epoch
            seed_torch(seed)

            LOGGER.info("Starting {} epoch...".format(epoch))
            tr_loss = train_one_epoch(model,
                                      train_loader,
                                      criterion,
                                      optimizer,
                                      device,
                                      cutmix_prob=0.0,
                                      classification=CLASSIFICATION)
            train_losses.append(tr_loss)
            LOGGER.info('Mean train loss: {}'.format(round(tr_loss, 5)))

            valid_loss, val_score = validate(model,
                                             val_loader,
                                             criterion,
                                             device,
                                             classification=CLASSIFICATION)
            valid_losses.append(valid_loss)
            LOGGER.info('Mean valid loss: {}'.format(round(valid_loss, 5)))
            LOGGER.info('Mean valid score: {}'.format(round(val_score, 5)))

            scheduler.step()

            if valid_loss < best_model_loss:
                torch.save(
                    model.module.state_dict(),
                    'models/{}_fold{}_ckpt{}.pth'.format(
                        EXP_ID, FOLD_ID, checkpoint))
                best_model_loss = valid_loss
                best_model_ep = epoch
                #np.save("val_pred.npy", val_pred)

            if epoch % (CLR_CYCLE * 2) == CLR_CYCLE * 2 - 1:
                torch.save(
                    model.module.state_dict(),
                    'models/{}_fold{}_latest.pth'.format(EXP_ID, FOLD_ID))
                LOGGER.info('Best valid loss: {} on epoch={}'.format(
                    round(best_model_loss, 5), best_model_ep))
                checkpoint += 1
                best_model_loss = 999

            #del val_pred
            gc.collect()

    LOGGER.info('Best valid loss: {} on epoch={}'.format(
        round(best_model_loss, 5), best_model_ep))

    xs = list(range(1, len(train_losses) + 1))
    plt.plot(xs, train_losses, label='Train loss')
    plt.plot(xs, valid_losses, label='Val loss')
    plt.legend()
    plt.xticks(xs)
    plt.xlabel('Epochs')
    plt.savefig("loss.png")
Пример #3
0
def main(seed):
    with timer('load data'):
        df = pd.read_csv(FOLD_PATH)
        if N_CLASSES == 3:
            df.drop("EncodedPixels_2", axis=1, inplace=True)
            df = df.rename(columns={"EncodedPixels_3": "EncodedPixels_2"})
            df = df.rename(columns={"EncodedPixels_4": "EncodedPixels_3"})

    with timer('preprocessing'):
        val_df = df[df.fold_id == FOLD_ID]

        val_augmentation = None
        val_dataset = SeverDataset(val_df, IMG_DIR, IMG_SIZE, N_CLASSES, id_colname=ID_COLUMNS,
                                  transforms=val_augmentation)
        val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=8)

        del val_df, df, val_dataset
        gc.collect()

    with timer('create model'):
        model = smp_old.Unet('resnet34', encoder_weights="imagenet", classes=N_CLASSES, encoder_se_module=True,
                         decoder_semodule=True, h_columns=False, skip=True, act="swish", freeze_bn=True,
                         classification=CLASSIFICATION)
        model.load_state_dict(torch.load(base_model))
        model.to(device)
        model.eval()

        criterion = torch.nn.BCEWithLogitsLoss()

    with timer('predict'):
        valid_loss, y_pred, y_true, cls = predict(model, val_loader, criterion, device, classification=CLASSIFICATION)
        LOGGER.info('Mean valid loss: {}'.format(round(valid_loss, 5)))

        scores = []
        all_scores = []
        min_sizes = [300, 0, 600, 1600]
        for i in range(N_CLASSES):
            if i == 1:
                continue
            best = 0
            count = 0
            min_size = min_sizes[i]
            for th in [0.7+i*0.01 for i in range(30)]:
                val_preds_ = copy.deepcopy(y_pred[:, i, :, :])
                scores_ = []
                all_scores_ = []
                for y_val_, y_pred_ in zip(y_true[:, i, :, :], val_preds_):
                    y_pred_ = post_process(y_pred_ > 0.5, y_pred_, min_size, th)
                    score = dice(y_val_, y_pred_)
                    if np.isnan(score):
                        scores_.append(1)
                    else:
                        scores_.append(score)
                LOGGER.info('dice={} on {}'.format(np.mean(scores_), th))
                if np.mean(scores_) >= best:
                    best = np.mean(scores_)
                    count = 0
                else:
                    count += 1
                if count == 3:
                    break
            scores.append(best)
            all_scores.append(all_scores_)

        LOGGER.info('holdout dice={}'.format(np.mean(scores)))
                         classification=CLASSIFICATION,
                         attention_type="cbam",
                         center=True)
        model.load_state_dict(torch.load(model_path))
        model.to(device)
        model.eval()
        models.append(model)
        del model
        torch.cuda.empty_cache()

    for model_path in model_pathes2:
        model = smp_old.Unet('resnet34',
                             encoder_weights=None,
                             classes=N_CLASSES,
                             encoder_se_module=True,
                             decoder_semodule=True,
                             h_columns=False,
                             skip=True,
                             act="swish",
                             freeze_bn=True,
                             classification=CLASSIFICATION)
        model.load_state_dict(torch.load(model_path))
        model.to(device)
        model.eval()
        models.append(model)
        del model
        torch.cuda.empty_cache()

with timer('predict'):
    rles, sub_ids = predict_dsv(models, test_loader, device)
    sub_df_ = pd.DataFrame({'ImageId_ClassId': sub_ids, 'EncodedPixels': rles})
    LOGGER.info(len(sub_df_))
Пример #5
0
def main(seed):
    with timer('load data'):
        df = pd.read_csv(FOLD_PATH)

    with timer('preprocessing'):
        val_df = df[df.fold_id == FOLD_ID]

        val_augmentation = None
        val_dataset = SeverDataset(val_df,
                                   IMG_DIR,
                                   IMG_SIZE,
                                   N_CLASSES,
                                   id_colname=ID_COLUMNS,
                                   transforms=val_augmentation)
        val_loader = DataLoader(val_dataset,
                                batch_size=BATCH_SIZE,
                                shuffle=False,
                                num_workers=8)

        del val_df, df, val_dataset
        gc.collect()

    with timer('create model'):
        models = []
        for p in base_model_res:
            model = smp.Unet('resnet34',
                             encoder_weights="imagenet",
                             classes=N_CLASSES,
                             encoder_se_module=True,
                             decoder_semodule=True,
                             h_columns=False,
                             skip=True,
                             act="swish",
                             freeze_bn=True,
                             classification=CLASSIFICATION,
                             attention_type="cbam",
                             center=True)
            model.load_state_dict(torch.load(p))
            model.to(device)
            model.eval()
            models.append(model)

        model = smp_old.Unet('resnet34',
                             encoder_weights="imagenet",
                             classes=N_CLASSES,
                             encoder_se_module=True,
                             decoder_semodule=True,
                             h_columns=False,
                             skip=True,
                             act="swish",
                             freeze_bn=True,
                             classification=CLASSIFICATION)
        model.load_state_dict(torch.load(base_model_res_old))
        model.to(device)
        model.eval()
        models.append(model)

        criterion = torch.nn.BCEWithLogitsLoss()

    with timer('predict'):
        valid_loss, y_pred, y_true, cls = predict(
            models,
            val_loader,
            criterion,
            device,
            classification=CLASSIFICATION)
        LOGGER.info('Mean valid loss: {}'.format(round(valid_loss, 5)))

        scores = []
        all_scores = []
        for i, th in enumerate(ths):
            sum_val_preds = np.sum(
                y_pred[:, i, :, :].reshape(len(y_pred), -1) > th, axis=1)

            best = 0
            for n_th, remove_mask_pixel in enumerate(
                [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800]):
                val_preds_ = copy.deepcopy(y_pred[:, i, :, :])
                val_preds_[sum_val_preds < remove_mask_pixel] = 0
                scores_ = []
                all_scores_ = []
                for y_val_, y_pred_ in zip(y_true[:, i, :, :], val_preds_):
                    score = dice(y_val_, y_pred_ > 0.5)
                    if np.isnan(score):
                        scores_.append(1)
                    else:
                        scores_.append(score)
                LOGGER.info('dice={} on {}'.format(np.mean(scores_),
                                                   remove_mask_pixel))
                if np.mean(scores_) >= best:
                    best = np.mean(scores_)
                all_scores_.append(np.mean(scores_))
            scores.append(np.mean(scores_))
            all_scores.append(all_scores_)

        LOGGER.info('holdout dice={}'.format(np.mean(scores)))
        np.save("all_scores_fold{}.npy".format(FOLD_ID), np.array(all_scores))
Пример #6
0
def main(seed):
    with timer('load data'):
        df = pd.read_csv(FOLD_PATH)
        y = (df.sum_target != 0).astype("float32").values

    with timer('preprocessing'):
        train_augmentation = Compose([
            Flip(p=0.5),
            OneOf([
                GridDistortion(p=0.5),
                OpticalDistortion(p=0.5, distort_limit=2, shift_limit=0.5)
            ],
                  p=0.5),
            OneOf([
                RandomGamma(gamma_limit=(100, 140), p=0.5),
                RandomBrightnessContrast(p=0.5),
                RandomBrightness(p=0.5),
                RandomContrast(p=0.5)
            ],
                  p=0.5),
            OneOf([
                GaussNoise(p=0.5),
                Cutout(num_holes=10, max_h_size=10, max_w_size=20, p=0.5)
            ],
                  p=0.5),
            ShiftScaleRotate(rotate_limit=20, p=0.5),
        ])
        val_augmentation = None

        train_dataset = SeverDataset(df,
                                     IMG_DIR,
                                     IMG_SIZE,
                                     N_CLASSES,
                                     id_colname=ID_COLUMNS,
                                     transforms=train_augmentation,
                                     crop_rate=1.0,
                                     class_y=y)
        train_sampler = MaskProbSampler(df, demand_non_empty_proba=0.6)
        train_loader = DataLoader(train_dataset,
                                  batch_size=BATCH_SIZE,
                                  sampler=train_sampler,
                                  num_workers=8)

        del df, train_dataset
        gc.collect()

    with timer('create model'):
        model = smp_old.Unet('resnet34',
                             encoder_weights="imagenet",
                             classes=N_CLASSES,
                             encoder_se_module=True,
                             decoder_semodule=True,
                             h_columns=False,
                             skip=True,
                             act="swish",
                             freeze_bn=True,
                             classification=CLASSIFICATION)
        model = convert_model(model)
        if base_model is not None:
            model.load_state_dict(torch.load(base_model))
        model.to(device)

        criterion = torch.nn.BCEWithLogitsLoss()
        optimizer = torch.optim.Adam([
            {
                'params': model.decoder.parameters(),
                'lr': 3e-3
            },
            {
                'params': model.encoder.parameters(),
                'lr': 3e-4
            },
        ])
        #if base_model is None:
        #    scheduler_cosine = CosineAnnealingLR(optimizer, T_max=CLR_CYCLE, eta_min=3e-5)
        #    scheduler = GradualWarmupScheduler(optimizer, multiplier=1.1, total_epoch=CLR_CYCLE*2, after_scheduler=scheduler_cosine)
        #else:
        #    scheduler = CosineAnnealingLR(optimizer, T_max=CLR_CYCLE, eta_min=3e-5)

        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level="O1",
                                          verbosity=0)
        model = torch.nn.DataParallel(model)

    with timer('train'):
        for epoch in range(71, EPOCHS + 1):
            seed = seed + epoch
            seed_torch(seed)

            LOGGER.info("Starting {} epoch...".format(epoch))
            tr_loss = train_one_epoch(model,
                                      train_loader,
                                      criterion,
                                      optimizer,
                                      device,
                                      cutmix_prob=0.0,
                                      classification=CLASSIFICATION)
            LOGGER.info('Mean train loss: {}'.format(round(tr_loss, 5)))

            #scheduler.step()

            if epoch % (CLR_CYCLE * 2) == CLR_CYCLE * 2 - 1:
                torch.save(model.module.state_dict(),
                           'models/{}_latest.pth'.format(EXP_ID))

            gc.collect()