Exemplo n.º 1
0
def load_data(fold: int, params: Dict[str, Any]) -> Any:
    torch.multiprocessing.set_sharing_strategy('file_system')
    cudnn.benchmark = True

    logger.info('Options:')
    logger.info(pprint.pformat(opt))

    full_df = pd.read_csv(opt.TRAIN.CSV)
    print('full_df', full_df.shape)
    train_df, val_df = train_val_split(full_df, fold)
    print('train_df', train_df.shape, 'val_df', val_df.shape)
    test_df = pd.read_csv(opt.TEST.CSV)

    transform_train = albu.Compose([
        albu.PadIfNeeded(opt.MODEL.INPUT_SIZE, opt.MODEL.INPUT_SIZE),
        albu.RandomCrop(height=opt.MODEL.INPUT_SIZE, width=opt.MODEL.INPUT_SIZE),
        albu.HorizontalFlip(.5),
        ])

    if opt.TEST.NUM_TTAS > 1:
        transform_test = albu.Compose([
            albu.PadIfNeeded(opt.MODEL.INPUT_SIZE, opt.MODEL.INPUT_SIZE),
            albu.RandomCrop(height=opt.MODEL.INPUT_SIZE, width=opt.MODEL.INPUT_SIZE),
            albu.HorizontalFlip(),
        ])
    else:
        transform_test = albu.Compose([
            albu.PadIfNeeded(opt.MODEL.INPUT_SIZE, opt.MODEL.INPUT_SIZE),
            albu.CenterCrop(height=opt.MODEL.INPUT_SIZE, width=opt.MODEL.INPUT_SIZE),
        ])


    train_dataset = Dataset(train_df, path=opt.TRAIN.PATH, mode='train',
                            num_classes=opt.MODEL.NUM_CLASSES, resize=False,
                            augmentor=transform_train)

    val_dataset = Dataset(val_df, path=opt.TRAIN.PATH, mode='val',
                          # image_size=opt.MODEL.INPUT_SIZE,
                          num_classes=opt.MODEL.NUM_CLASSES, resize=False,
                          num_tta=1, # opt.TEST.NUM_TTAS,
                          augmentor=transform_test)
    test_dataset = Dataset(test_df, path=opt.TEST.PATH, mode='test',
                           # image_size=opt.MODEL.INPUT_SIZE,
                           num_classes=opt.MODEL.NUM_CLASSES, resize=False,
                           num_tta=opt.TEST.NUM_TTAS,
                           augmentor=transform_test)

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=opt.TRAIN.BATCH_SIZE, shuffle=True,
        num_workers=opt.TRAIN.WORKERS)

    val_loader = torch.utils.data.DataLoader(
        val_dataset, batch_size=opt.TRAIN.BATCH_SIZE, shuffle=False, num_workers=opt.TRAIN.WORKERS)

    test_loader = torch.utils.data.DataLoader(
        test_dataset, batch_size=opt.TRAIN.BATCH_SIZE, shuffle=False, num_workers=opt.TRAIN.WORKERS)

    return train_loader, val_loader, test_loader
Exemplo n.º 2
0
def load_data(fold: int, params: Dict[str, Any]) -> Any:
    torch.multiprocessing.set_sharing_strategy('file_system')
    cudnn.benchmark = True

    logger.info('Options:')
    logger.info(pprint.pformat(opt))

    full_df = pd.read_csv(opt.TRAIN.CSV)
    print('full_df', full_df.shape)
    train_df, val_df = train_val_split(full_df, fold)
    print('train_df', train_df.shape, 'val_df', val_df.shape)
    test_df = pd.read_csv(opt.TEST.CSV)

    # transform_train = transforms.Compose([
    #     # transforms.Resize((opt.MODEL.IMAGE_SIZE)), # smaller edge
    #     transforms.RandomCrop(opt.MODEL.INPUT_SIZE),
    #     transforms.RandomHorizontalFlip(),
    #     # transforms.ColorJitter(brightness=0.2, contrast=0.2),
    #     # transforms.RandomAffine(degrees=20, scale=(0.8, 1.2), shear=10, resample=PIL.Image.BILINEAR),
    #     # transforms.RandomCrop(opt.MODEL.INPUT_SIZE),
    # ])

    augs = []
    augs.append(albu.HorizontalFlip(.5))
    if int(params['vflip']):
        augs.append(albu.VerticalFlip(.5))
    if int(params['rotate90']):
        augs.append(albu.RandomRotate90())

    if params['affine'] == 'soft':
        augs.append(
            albu.ShiftScaleRotate(shift_limit=0.075,
                                  scale_limit=0.15,
                                  rotate_limit=10,
                                  p=.75))
    elif params['affine'] == 'medium':
        augs.append(
            albu.ShiftScaleRotate(shift_limit=0.0625,
                                  scale_limit=0.2,
                                  rotate_limit=45,
                                  p=0.2))
    elif params['affine'] == 'hard':
        augs.append(
            albu.ShiftScaleRotate(shift_limit=0.0625,
                                  scale_limit=0.50,
                                  rotate_limit=45,
                                  p=.75))

    if float(params['noise']) > 0.1:
        augs.append(
            albu.OneOf([
                albu.IAAAdditiveGaussianNoise(),
                albu.GaussNoise(),
            ],
                       p=float(params['noise'])))

    if float(params['blur']) > 0.1:
        augs.append(
            albu.OneOf([
                albu.MotionBlur(p=.2),
                albu.MedianBlur(blur_limit=3, p=0.1),
                albu.Blur(blur_limit=3, p=0.1),
            ],
                       p=float(params['blur'])))

    if float(params['distortion']) > 0.1:
        augs.append(
            albu.OneOf([
                albu.OpticalDistortion(p=0.3),
                albu.GridDistortion(p=.1),
                albu.IAAPiecewiseAffine(p=0.3),
            ],
                       p=float(params['distortion'])))

    if float(params['color']) > 0.1:
        augs.append(
            albu.OneOf([
                albu.CLAHE(clip_limit=2),
                albu.IAASharpen(),
                albu.IAAEmboss(),
                albu.RandomBrightnessContrast(),
            ],
                       p=float(params['color'])))

    transform_train = albu.Compose([
        albu.RandomCrop(height=opt.MODEL.INPUT_SIZE,
                        width=opt.MODEL.INPUT_SIZE),
        albu.Compose(augs, p=float(params['aug_global_prob']))
    ])

    transform_test = albu.Compose([
        # transforms.CenterCrop(opt.MODEL.INPUT_SIZE),
        albu.RandomCrop(height=opt.MODEL.INPUT_SIZE,
                        width=opt.MODEL.INPUT_SIZE),
        albu.HorizontalFlip(),
    ])

    train_dataset = Dataset(train_df,
                            path=opt.TRAIN.PATH,
                            mode='train',
                            num_classes=opt.MODEL.NUM_CLASSES,
                            resize=False,
                            augmentor=transform_train)

    val_dataset = Dataset(
        val_df,
        path=opt.TRAIN.PATH,
        mode='val',
        # image_size=opt.MODEL.INPUT_SIZE,
        num_classes=opt.MODEL.NUM_CLASSES,
        resize=False,
        num_tta=1,  # opt.TEST.NUM_TTAS,
        augmentor=transform_test)
    test_dataset = Dataset(
        test_df,
        path=opt.TEST.PATH,
        mode='test',
        # image_size=opt.MODEL.INPUT_SIZE,
        num_classes=opt.MODEL.NUM_CLASSES,
        resize=False,
        num_tta=opt.TEST.NUM_TTAS,
        augmentor=transform_test)

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=opt.TRAIN.BATCH_SIZE,
                                               shuffle=True,
                                               num_workers=opt.TRAIN.WORKERS)

    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=opt.TRAIN.BATCH_SIZE,
                                             shuffle=False,
                                             num_workers=opt.TRAIN.WORKERS)

    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=opt.TRAIN.BATCH_SIZE,
                                              shuffle=False,
                                              num_workers=opt.TRAIN.WORKERS)

    return train_loader, val_loader, test_loader