Пример #1
0
def build_rle_dict(mask_dict):
    encoded_pixels = []
    for name, mask in tqdm(mask_dict.items()):
        if mask.shape != (350, 525):
            mask = cv2.resize(mask,
                              dsize=(525, 350),
                              interpolation=cv2.INTER_LINEAR)
        predict, num_predict = post_process(mask, 0.7, 10000)
        if num_predict == 0:
            encoded_pixels.append("")
        else:
            r = mask2rle(predict)
            encoded_pixels.append(r)
    return encoded_pixels
Пример #2
0
def postprocess_shape(file,mode):
    sub = pd.read_csv(file)
    name = file.split('/')[-1].split('.')[0]

    # mode = 'convex'  # choose from 'rect', 'min', 'convex' and 'approx'
    model_class_names = ['Fish', 'Flower', 'Gravel', 'Sugar']
    min_size = [25000, 15000, 22500, 10000]

    img_label_list = []
    enc_pixels_list = []
    test_imgs = os.listdir('../../dados/test_images/')
    for test_img_i, test_img in enumerate(tqdm(test_imgs)):
        for class_i, class_name in enumerate(model_class_names):

            path = os.path.join('../../dados/test_images/', test_img)
            img = cv2.imread(path).astype(np.float32)  # use already-resized ryches' dataset
            img = img / 255.
            img = np_resize(img, (350, 525))
            img2 = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

            img_label_list.append(f'{test_img}_{class_name}')

            mask = make_mask(sub, test_img + '_' + class_name, shape=(350, 525))
            if True:
                # if class_name == 'Flower' or class_name =='Sugar': # you can decide to post-process for some certain classes
                mask = draw_convex_hull(mask.astype(np.uint8), mode=mode)
            mask[img2 <= 2 / 255.] = 0
            mask = post_process_minsize(mask, min_size[class_i])

            if mask.sum() == 0:
                enc_pixels_list.append(np.nan)
            else:
                mask = np.where(mask > 0.5, 1.0, 0.0)
                enc_pixels_list.append(mask2rle(mask))

    name = name + '_convex.csv'
    submission_df = pd.DataFrame({'Image_Label': img_label_list, 'EncodedPixels': enc_pixels_list})
    submission_df.to_csv(name, index=None)
Пример #3
0
def run_seg(config_file_seg):
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'

    # ------------------------------------------------------------------------------------------------------------
    # 2. segmentation inference
    # ------------------------------------------------------------------------------------------------------------
    config = load_config(config_file_seg)

    model = getattr(smp, config.model.arch)(
        encoder_name=config.model.encoder,
        encoder_weights=config.model.pretrained,
        classes=config.data.num_classes,
        activation=None,
    )

    if os.path.exists('cls_preds.csv'):
        testloader = make_loader(data_folder=config.data.test_dir,
                                 df_path='cls_preds.csv',
                                 phase='filtered_test',
                                 batch_size=config.test.batch_size,
                                 num_workers=config.num_workers,
                                 transforms=get_transforms(
                                     config.transforms.test))
    else:
        testloader = make_loader(data_folder=config.data.test_dir,
                                 df_path=config.data.sample_submission_path,
                                 phase='test',
                                 batch_size=config.test.batch_size,
                                 num_workers=config.num_workers,
                                 transforms=get_transforms(
                                     config.transforms.test))

    model.to(config.device)
    model.eval()

    checkpoint = load_checkpoint(f"{config.work_dir}/checkpoints/best.pth")
    model.load_state_dict(checkpoint['model_state_dict'])

    if os.path.exists(config.work_dir + '/threshold_search.json'):
        with open(config.work_dir + '/threshold_search.json') as json_file:
            data = json.load(json_file)
        df = pd.DataFrame(data)
        min_sizes = list(df.T.idxmax().values.astype(int))
        print('load best threshold from validation:', min_sizes)
    else:
        min_sizes = config.test.min_size
        print('load default threshold:', min_sizes)

    predictions = []
    with torch.no_grad():
        for i, (batch_fnames, batch_images) in enumerate(tqdm(testloader)):
            batch_images = batch_images.to(config.device)
            batch_preds = predict_batch(model,
                                        batch_images,
                                        tta=config.test.tta)

            for fname, preds in zip(batch_fnames, batch_preds):
                if config.data.num_classes == 4:
                    for cls in range(preds.shape[0]):
                        mask = preds[cls, :, :]
                        mask, num = post_process(mask,
                                                 config.test.best_threshold,
                                                 min_sizes[cls])
                        rle = mask2rle(mask)
                        name = fname + f"_{cls + 1}"
                        predictions.append([name, rle])
                else:  # == 5
                    for cls in range(1, 5):
                        mask = preds[cls, :, :]
                        mask, num = post_process(mask,
                                                 config.test.best_threshold,
                                                 min_sizes[cls])
                        rle = mask2rle(mask)
                        name = fname + f"_{cls}"
                        predictions.append([name, rle])

    # ------------------------------------------------------------------------------------------------------------
    # submission
    # ------------------------------------------------------------------------------------------------------------
    df = pd.DataFrame(predictions,
                      columns=['ImageId_ClassId', 'EncodedPixels'])
    df.to_csv(config.work_dir + "/submission.csv", index=False)
Пример #4
0
def ensemble():
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'

    # ------------------------------------------------------------------------------------------------------------
    # parmeters and configs
    # ------------------------------------------------------------------------------------------------------------
    config_paths320 = [
        'config/seg/017_efnet_b3_Unet_img320_cutout5_aug_fold0.yml',
        'config/seg/017_efnet_b3_Unet_img320_cutout5_aug_fold1.yml',
        'config/seg/017_efnet_b3_Unet_img320_cutout5_aug_fold2.yml',
        'config/seg/017_efnet_b3_Unet_img320_cutout5_aug_fold3.yml',
        'config/seg/017_efnet_b3_Unet_img320_cutout5_aug_fold4.yml',
        'config/seg/030_efnet_b0_Unet_bs16_half_cosine_fold0.yml',
        'config/seg/030_efnet_b0_Unet_bs16_half_cosine_fold1.yml',
        'config/seg/030_efnet_b0_Unet_bs16_half_cosine_fold2.yml',
        'config/seg/030_efnet_b0_Unet_bs16_half_cosine_fold3.yml',
        'config/seg/030_efnet_b0_Unet_bs16_half_cosine_fold4.yml',
    ]
    config_paths384 = [
        'config/seg/032_efnet_b3_Unet_img384_RandomSizedCrop_half_cosine_fold0.yml',
        'config/seg/032_efnet_b3_Unet_img384_RandomSizedCrop_half_cosine_fold1.yml',
        'config/seg/032_efnet_b3_Unet_img384_RandomSizedCrop_half_cosine_fold2.yml',
        'config/seg/032_efnet_b3_Unet_img384_RandomSizedCrop_half_cosine_fold3.yml',
        'config/seg/032_efnet_b3_Unet_img384_RandomSizedCrop_half_cosine_fold4.yml',
        'config/seg/048_resnet34_FPN_img384_mixup_fold0.yml',
        'config/seg/048_resnet34_FPN_img384_mixup_fold1.yml',
        'config/seg/048_resnet34_FPN_img384_mixup_fold2.yml',
        'config/seg/048_resnet34_FPN_img384_mixup_fold3.yml',
        'config/seg/048_resnet34_FPN_img384_mixup_fold4.yml',
    ]
    LABEL_THRESHOLDS = [0.68, 0.69, 0.69, 0.67]
    MASK_THRESHOLDS = [0.31, 0.36, 0.31, 0.34]
    MIN_SIZES = [7500, 10000, 7500, 7500]
    WEIGHTS = [0.5, 0.5]
    # ------------------------------------------------------------------------------------------------------------
    #
    # ------------------------------------------------------------------------------------------------------------
    config = load_config('config/base_config.yml')

    def get_model_and_loader(config_paths):
        config = load_config(config_paths[0])

        models = []
        for c in config_paths:
            models.append(load_model(c))

        model = MultiSegModels(models)

        testloader = make_loader(
            data_folder=config.data.test_dir,
            df_path=config.data.sample_submission_path,
            phase='test',
            img_size=(config.data.height, config.data.width),
            batch_size=config.test.batch_size,
            num_workers=config.num_workers,
            transforms=get_transforms(config.transforms.test))
        return model, testloader

    model320, loader320 = get_model_and_loader(config_paths320)
    model384, loader384 = get_model_and_loader(config_paths384)

    predictions = []
    with torch.no_grad():
        for (batch_fnames320,
             batch_images320), (batch_fnames384, batch_images384) in tqdm(
                 zip(loader320, loader384)):
            batch_images320 = batch_images320.to(config.device)
            batch_images384 = batch_images384.to(config.device)

            batch_preds320 = predict_batch(model320,
                                           batch_images320,
                                           tta=config.test.tta)
            batch_preds384 = predict_batch(model384,
                                           batch_images384,
                                           tta=config.test.tta)

            batch_preds320 = resize_batch_images(batch_preds320, SUB_HEIGHT,
                                                 SUB_WIDTH)
            batch_preds384 = resize_batch_images(batch_preds384, SUB_HEIGHT,
                                                 SUB_WIDTH)
            batch_preds = batch_preds320 * \
                WEIGHTS[0] + batch_preds384 * WEIGHTS[1]

            batch_labels320 = torch.nn.functional.adaptive_max_pool2d(
                torch.sigmoid(torch.Tensor(batch_preds320)),
                1).view(batch_preds320.shape[0], -1)
            batch_labels384 = torch.nn.functional.adaptive_max_pool2d(
                torch.sigmoid(torch.Tensor(batch_preds384)),
                1).view(batch_preds384.shape[0], -1)
            batch_labels = batch_labels320 * \
                WEIGHTS[0] + batch_labels384 * WEIGHTS[1]

            for fname, preds, labels in zip(batch_fnames320, batch_preds,
                                            batch_labels):
                for cls in range(4):
                    if labels[cls] <= LABEL_THRESHOLDS[cls]:
                        pred = np.zeros(preds[cls, :, :].shape)
                    else:
                        pred, _ = post_process(preds[cls, :, :],
                                               MASK_THRESHOLDS[cls],
                                               MIN_SIZES[cls],
                                               height=SUB_HEIGHT,
                                               width=SUB_WIDTH)
                    rle = mask2rle(pred)
                    cls_name = INV_CLASSES[cls]
                    name = fname + f"_{cls_name}"
                    predictions.append([name, rle])

    # ------------------------------------------------------------------------------------------------------------
    # submission
    # ------------------------------------------------------------------------------------------------------------
    sub_df = pd.DataFrame(predictions,
                          columns=['Image_Label', 'EncodedPixels'])

    sample_submission = pd.read_csv(config.data.sample_submission_path)
    df_merged = pd.merge(sample_submission,
                         sub_df,
                         on='Image_Label',
                         how='left')
    df_merged.fillna('', inplace=True)
    df_merged['EncodedPixels'] = df_merged['EncodedPixels_y']
    df_merged = df_merged[['Image_Label', 'EncodedPixels']]

    df_merged.to_csv("submission.csv", index=False)

    if 'COLAB_GPU' in os.environ:
        config.work_dir = '/content/drive/My Drive/kaggle_cloud/'
    elif 'KAGGLE_WORKING_DIR' in os.environ:
        config.work_dir = '/kaggle/working/'
    else:
        config.work_dir = '.'
    df_merged.to_csv(config.work_dir + '/submission.csv', index=False)
Пример #5
0
def run_seg(config_file_seg):
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'

    # ------------------------------------------------------------------------------------------------------------
    # 2. segmentation inference
    # ------------------------------------------------------------------------------------------------------------
    config = load_config(config_file_seg)
    if 'COLAB_GPU' in os.environ:
        config.work_dir = '/content/drive/My Drive/kaggle_cloud/' + config.work_dir
    elif 'KAGGLE_WORKING_DIR' in os.environ:
        config.work_dir = '/kaggle/working/' + config.work_dir

    if os.path.exists('cls_preds.csv'):
        testloader = make_loader(
            data_folder=config.data.test_dir,
            df_path='cls_preds.csv',
            phase='filtered_test',
            img_size=(config.data.height, config.data.width),
            batch_size=config.test.batch_size,
            num_workers=config.num_workers,
            transforms=get_transforms(config.transforms.test))
    else:
        testloader = make_loader(
            data_folder=config.data.test_dir,
            df_path=config.data.sample_submission_path,
            phase='test',
            img_size=(config.data.height, config.data.width),
            batch_size=config.test.batch_size,
            num_workers=config.num_workers,
            transforms=get_transforms(config.transforms.test))

    model = load_model(config_file_seg)

    if os.path.exists(config.work_dir + '/threshold_search.json'):
        with open(config.work_dir + '/threshold_search.json') as json_file:
            data = json.load(json_file)
        df = pd.DataFrame(data)
        min_sizes = list(df.T.idxmax().values.astype(int))
        print('load best threshold from validation:', min_sizes)
    else:
        min_sizes = config.test.min_size
        print('load default threshold:', min_sizes)

    predictions = []
    with torch.no_grad():
        for i, (batch_fnames, batch_images) in enumerate(tqdm(testloader)):
            batch_images = batch_images.to(config.device)
            batch_preds = predict_batch(model,
                                        batch_images,
                                        tta=config.test.tta)

            for fname, preds in zip(batch_fnames, batch_preds):
                for cls in range(preds.shape[0]):
                    pred, _ = post_process(
                        preds[cls, :, :],
                        config.test.best_threshold,
                        min_sizes[cls],
                        height=config.transforms.test.Resize.height,
                        width=config.transforms.test.Resize.width)
                    pred = cv2.resize(pred, (SUB_WIDTH, SUB_HEIGHT))
                    pred = (pred > 0.5).astype(int)
                    rle = mask2rle(pred)
                    cls_name = INV_CLASSES[cls]
                    name = fname + f"_{cls_name}"
                    predictions.append([name, rle])

    # ------------------------------------------------------------------------------------------------------------
    # submission
    # ------------------------------------------------------------------------------------------------------------
    df = pd.DataFrame(predictions, columns=['Image_Label', 'EncodedPixels'])
    df.to_csv(config.work_dir + "/submission.csv", index=False)
Пример #6
0
def run_seg(config_dir):
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'

    # ------------------------------------------------------------------------------------------------------------
    # 2. segmentation inference
    # ------------------------------------------------------------------------------------------------------------
    config_root = Path(config_dir) / 'seg'
    config_paths = [config_root / p for p in os.listdir(config_root)]
    base_config_paths = [
        Path(config_dir) / p for p in os.listdir(config_dir) if 'yml' in p
    ]
    config = load_config(base_config_paths[0])

    models = []
    for c in config_paths:
        models.append(load_model(c))

    model = MultiSegModels(models)

    if os.path.exists('cls_preds.csv'):
        testloader = make_loader(data_folder=config.data.test_dir,
                                 df_path='cls_preds.csv',
                                 phase='filtered_test',
                                 batch_size=config.test.batch_size,
                                 num_workers=config.num_workers,
                                 transforms=get_transforms(
                                     config.transforms.test))
    else:
        testloader = make_loader(data_folder=config.data.test_dir,
                                 df_path=config.data.sample_submission_path,
                                 phase='test',
                                 batch_size=config.test.batch_size,
                                 num_workers=config.num_workers,
                                 transforms=get_transforms(
                                     config.transforms.test))

    if os.path.exists(config.work_dir + '/threshold_search.json'):
        with open(config.work_dir + '/threshold_search.json') as json_file:
            data = json.load(json_file)
        df = pd.DataFrame(data)
        min_sizes = list(df.T.idxmax().values.astype(int))
        print('load best threshold from validation:', min_sizes)
    else:
        min_sizes = config.test.min_size
        print('load default threshold:', min_sizes)

    predictions = []
    with torch.no_grad():
        for i, (batch_fnames, batch_images) in enumerate(tqdm(testloader)):
            batch_images = batch_images.to(config.device)
            batch_preds = predict_batch(model,
                                        batch_images,
                                        tta=config.test.tta)

            for fname, preds in zip(batch_fnames, batch_preds):
                for cls in range(preds.shape[0]):
                    mask = preds[cls, :, :]
                    mask, num = post_process(mask, config.test.best_threshold,
                                             min_sizes[cls])
                    rle = mask2rle(mask)
                    name = fname + f"_{cls + 1}"
                    predictions.append([name, rle])

    # ------------------------------------------------------------------------------------------------------------
    # submission
    # ------------------------------------------------------------------------------------------------------------
    sub_df = pd.DataFrame(predictions,
                          columns=['ImageId_ClassId', 'EncodedPixels'])

    sample_submission = pd.read_csv(config.data.sample_submission_path)
    df_merged = pd.merge(sample_submission,
                         sub_df,
                         on='ImageId_ClassId',
                         how='left')
    df_merged.fillna('', inplace=True)
    df_merged['EncodedPixels'] = df_merged['EncodedPixels_y']
    df_merged = df_merged[['ImageId_ClassId', 'EncodedPixels']]

    df_merged.to_csv("submission.csv", index=False)
    df_merged.to_csv(KAGGLE_WORK_DIR + "/submission.csv", index=False)