Exemplo n.º 1
0
runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    loaders=loaders,
    logdir=logdir,
    num_epochs=num_epochs,
    load_best_on_end=True,
)

# # Setup 9 - predict_loader

# In[ ]:

runner_out = runner.predict_loader(
    model=model,
    loader=loaders["valid"],
)

# In[ ]:

next(runner_out)[runner.output_key].shape

# # Setup 10 - predict batch

# In[ ]:

features, targets = next(iter(loaders["valid"]))

# In[ ]:

features.shape
Exemplo n.º 2
0
runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    loaders=loaders,
    logdir=logdir,
    num_epochs=num_epochs,
    check=True,
    load_best_on_end=True,
)

# # Inference

# In[ ]:

runner_out = runner.predict_loader(loader=loaders["valid"])

# # Predictions visualization

# In[ ]:

import matplotlib.pyplot as plt

plt.style.use("ggplot")

# In[ ]:

sigmoid = lambda x: 1 / (1 + np.exp(-x))

for i, (input, output) in enumerate(zip(valid_data, runner_out)):
    image, mask = input
Exemplo n.º 3
0
runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    loaders=loaders,
    logdir=logdir,
    num_epochs=num_epochs,
    check=True,
)

# # Inference

# In[ ]:

runner_out = runner.predict_loader(model,
                                   loaders["valid"],
                                   resume=f"{logdir}/checkpoints/best.pth")

# # Predictions visualization

# In[ ]:

import matplotlib.pyplot as plt

plt.style.use("ggplot")

# In[ ]:

sigmoid = lambda x: 1 / (1 + np.exp(-x))

for i, (input, output) in enumerate(zip(valid_data, runner_out)):
def main(config):
    opts = config()
    path = opts.path
    train = pd.read_csv(f'{path}/train.csv')
    sub = pd.read_csv(f'{path}/sample_submission.csv')

    n_train = len(os.listdir(f'{path}/train_images'))
    n_test = len(os.listdir(f'{path}/test_images'))

    sub['label'] = sub['Image_Label'].apply(lambda x: x.split('_')[1])
    sub['im_id'] = sub['Image_Label'].apply(lambda x: x.split('_')[0])
    train.loc[train['EncodedPixels'].isnull() == False,
              'Image_Label'].apply(lambda x: x.split('_')[1]).value_counts()
    train.loc[train['EncodedPixels'].isnull() == False, 'Image_Label'].apply(
        lambda x: x.split('_')[0]).value_counts().value_counts()

    train['label'] = train['Image_Label'].apply(lambda x: x.split('_')[1])
    train['im_id'] = train['Image_Label'].apply(lambda x: x.split('_')[0])

    valid_ids = pd.read_csv("csvs/valid_threshold.csv")["img_id"].values
    test_ids = sub['Image_Label'].apply(
        lambda x: x.split('_')[0]).drop_duplicates().values
    #     print(valid_ids)
    ENCODER = opts.backborn
    ENCODER_WEIGHTS = opts.encoder_weights
    DEVICE = 'cuda'

    ACTIVATION = None
    model = get_model(model_type=opts.model_type,
                      encoder=ENCODER,
                      encoder_weights=ENCODER_WEIGHTS,
                      activation=ACTIVATION,
                      n_classes=opts.class_num,
                      task=opts.task,
                      attention_type=opts.attention_type,
                      head='simple',
                      center=opts.center,
                      tta=opts.tta)
    if opts.refine:
        model = get_ref_model(infer_model=model,
                              encoder=opts.ref_backborn,
                              encoder_weights=ENCODER_WEIGHTS,
                              activation=ACTIVATION,
                              n_classes=opts.class_num,
                              preprocess=opts.preprocess,
                              tta=opts.tta)
    model = convert_model(model)
    preprocessing_fn = encoders.get_preprocessing_fn(ENCODER, ENCODER_WEIGHTS)

    encoded_pixels = []
    runner = SupervisedRunner()
    probabilities = np.zeros((2220, 350, 525))

    for i in range(opts.fold_max):
        if opts.refine:
            logdir = f"{opts.logdir}_refine/fold{i}"
        else:
            logdir = f"{opts.logdir}/fold{i}"
        valid_dataset = CloudDataset(
            df=train,
            datatype='valid',
            img_ids=valid_ids,
            transforms=get_validation_augmentation(opts.img_size),
            preprocessing=get_preprocessing(preprocessing_fn))
        valid_loader = DataLoader(valid_dataset,
                                  batch_size=opts.batchsize,
                                  shuffle=False,
                                  num_workers=opts.num_workers)
        loaders = {"infer": valid_loader}
        runner.infer(
            model=model,
            loaders=loaders,
            callbacks=[
                CheckpointCallback(resume=f"{logdir}/checkpoints/best.pth"),
                InferCallback()
            ],
        )
        valid_masks = []
        for i, (batch, output) in enumerate(
                tqdm.tqdm(
                    zip(valid_dataset,
                        runner.callbacks[0].predictions["logits"]))):
            image, mask = batch
            for m in mask:
                if m.shape != (350, 525):
                    m = cv2.resize(m,
                                   dsize=(525, 350),
                                   interpolation=cv2.INTER_LINEAR)
                valid_masks.append(m)

            for j, probability in enumerate(output):
                if probability.shape != (350, 525):
                    probability = cv2.resize(probability,
                                             dsize=(525, 350),
                                             interpolation=cv2.INTER_LINEAR)
                probabilities[i * 4 + j, :, :] += sigmoid(probability)

    probabilities /= opts.fold_max
    if opts.tta:
        np.save(
            f'probabilities/{opts.logdir.split("/")[-1]}_{opts.img_size[0]}x{opts.img_size[1]}_tta_valid.npy',
            probabilities)
    else:
        np.save(
            f'probabilities/{opts.logdir.split("/")[-1]}_{opts.img_size[0]}x{opts.img_size[1]}_valid.npy',
            probabilities)

    torch.cuda.empty_cache()
    gc.collect()

    class_params = {}
    cv_d = []
    for class_id in tqdm.trange(opts.class_num, desc='class_id', leave=False):
        #         print(class_id)
        attempts = []
        for tt in tqdm.trange(0, 100, 10, desc='top_threshold', leave=False):
            tt /= 100
            for bt in tqdm.trange(0,
                                  100,
                                  10,
                                  desc='bot_threshold',
                                  leave=False):
                bt /= 100
                for ms in tqdm.tqdm([
                        0, 100, 1000, 5000, 10000, 11000, 14000, 15000, 16000,
                        18000, 19000, 20000, 21000, 23000, 25000, 27000, 30000,
                        50000
                ],
                                    desc='min_size',
                                    leave=False):
                    masks = []
                    for i in range(class_id, len(probabilities), 4):
                        probability = probabilities[i]
                        predict, num_predict = post_process(
                            probability, tt, ms, bt)

                        masks.append(predict)

                    d = []
                    for i, j in zip(masks, valid_masks[class_id::4]):
                        #                     print(i.shape, j.shape)
                        if (i.sum() == 0) & (j.sum() == 0):
                            d.append(1)
                        else:
                            d.append(dice(i, j))
                    attempts.append((tt, ms, bt, np.mean(d)))

        attempts_df = pd.DataFrame(
            attempts,
            columns=['top_threshold', 'size', 'bottom_threshold', 'dice'])

        attempts_df = attempts_df.sort_values('dice', ascending=False)
        print(attempts_df.head())
        cv_d.append(attempts_df['dice'].values[0])
        best_top_threshold = attempts_df['top_threshold'].values[0]
        best_size = attempts_df['size'].values[0]
        best_bottom_threshold = attempts_df['bottom_threshold'].values[0]

        class_params[class_id] = (best_top_threshold, best_size,
                                  best_bottom_threshold)
    cv_d = np.array(cv_d)
    print("CV Dice:", np.mean(cv_d))
    pathlist = [
        "../input/test_images/" + i.split("_")[0] for i in sub['Image_Label']
    ]

    del masks
    del valid_masks
    del probabilities
    gc.collect()

    ############# predict ###################
    probabilities = np.zeros((n_test, 4, 350, 525))
    for fold in tqdm.trange(opts.fold_max, desc='fold loop'):
        if opts.refine:
            logdir = f"{opts.logdir}_refine/fold{fold}"
        else:
            logdir = f"{opts.logdir}/fold{fold}"


#         loaders = {"test": test_loader}
        test_dataset = CloudDataset(
            df=sub,
            datatype='test',
            img_ids=test_ids,
            transforms=get_validation_augmentation(opts.img_size),
            preprocessing=get_preprocessing(preprocessing_fn))
        test_loader = DataLoader(test_dataset,
                                 batch_size=opts.batchsize,
                                 shuffle=False,
                                 num_workers=opts.num_workers)
        runner_out = runner.predict_loader(
            model,
            test_loader,
            resume=f"{logdir}/checkpoints/best.pth",
            verbose=True)
        for i, batch in enumerate(
                tqdm.tqdm(runner_out, desc='probability loop')):
            for j, probability in enumerate(batch):
                if probability.shape != (350, 525):
                    probability = cv2.resize(probability,
                                             dsize=(525, 350),
                                             interpolation=cv2.INTER_LINEAR)
                probabilities[i, j, :, :] += sigmoid(probability)
        gc.collect()
    probabilities /= opts.fold_max
    if opts.tta:
        np.save(
            f'probabilities/{opts.logdir.split("/")[-1]}_{opts.img_size[0]}x{opts.img_size[1]}_tta_test.npy',
            probabilities)
    else:
        np.save(
            f'probabilities/{opts.logdir.split("/")[-1]}_{opts.img_size[0]}x{opts.img_size[1]}_test.npy',
            probabilities)
    image_id = 0
    print("##################### start post_process #####################")
    for i in tqdm.trange(n_test, desc='post porocess loop'):
        for probability in probabilities[i]:
            predict, num_predict = post_process(probability,
                                                class_params[image_id % 4][0],
                                                class_params[image_id % 4][1],
                                                class_params[image_id % 4][2])
            if num_predict == 0:
                encoded_pixels.append('')
            else:
                black_mask = get_black_mask(pathlist[image_id])
                predict = np.multiply(predict, black_mask)
                r = mask2rle(predict)
                encoded_pixels.append(r)
            image_id += 1
        gc.collect()
    print("##################### Finish post_process #####################")
    #######################################
    sub['EncodedPixels'] = encoded_pixels
    sub.to_csv(
        f'submissions/submission_{opts.logdir.split("/")[-1]}_{opts.img_size[0]}x{opts.img_size[1]}.csv',
        columns=['Image_Label', 'EncodedPixels'],
        index=False)