Пример #1
0
def getDataLoader(df, image_folder, process, batch_size = 64, image_size = 224, train_weights = None, replacement = True, option_da = []):
    if process == 'train':      
        trnsfms = albumentations.Compose([
                    albumentations.Resize(height=image_size, width=image_size),
                    RGBToGray(always_apply = True) if 'gray' in option_da else Empty(),
                    albumentations.HorizontalFlip(),
                    albumentations.OneOf([
                        albumentations.RandomContrast(),
                        albumentations.RandomBrightness(),
                    ]),
                    albumentations.ShiftScaleRotate(rotate_limit=20, scale_limit=0.2),
                    albumentations.JpegCompression(80),
                    albumentations.HueSaturationValue(),
                    albumentations.Normalize(),
                    AT.ToTensor()
                ])
    elif process == 'val' or process =='test':
        trnsfms = albumentations.Compose([
                    albumentations.Resize(height=image_size, width=image_size),
                    RGBToGray(always_apply = True) if 'gray' in option_da else Empty(),
                    albumentations.Normalize(),
                    AT.ToTensor()
                ])
            
    dataset = WhaleDataLoader(image_folder = image_folder, process=process, df=df, transform=trnsfms)
    if process == 'train':  
        tr_ind = np.arange(0, df.shape[0], 1)
        train_sampler = WeightedSubsetRandomSampler(tr_ind, train_weights, replacement = replacement)
        loader  = DataLoader(dataset, batch_size=batch_size, num_workers=4, pin_memory=True, sampler=train_sampler)
    else:
        loader  = DataLoader(dataset, batch_size=batch_size, num_workers=4, pin_memory=True, shuffle=False)
    
    return loader
Пример #2
0
def get_training_augmentation(img):
    #print(type(img))
    train_transform = [
        albumentations.Resize(32 , 32),
		AT.ToTensor()
    ]
    transforms =  albumentations.Compose(train_transform)
    return lambda img:transforms(image=np.array(img))['image']
def data_loader_mask():
    """
    Converting the images for PILImage to tensor,
    so they can be accepted as the input to the network
    :return :
    """
    print("Loading Dataset")

    default_transform = albu.Compose(
        [PadDifferentlyIfNeeded(512, 512, mask_value=0),
         AT.ToTensor()])

    transform = albu.Compose([
        albu.RandomRotate90(1.0),
        albu.HorizontalFlip(0.5),
        PadDifferentlyIfNeeded(512, 512, mask_value=0),
        AT.ToTensor()
    ])

    testset_gt = ImageDataset(root=TEST_ENHANCED_IMG_DIR,
                              transform=default_transform)

    trainset_2_gt = ImageDataset(root=ENHANCED2_IMG_DIR, transform=transform)

    testset_inp = ImageDataset(root=TEST_INPUT_IMG_DIR,
                               transform=default_transform)
    trainset_1_inp = ImageDataset(root=INPUT_IMG_DIR, transform=transform)

    train_loader_cross = torch.utils.data.DataLoader(
        ConcatDataset(trainset_1_inp, trainset_2_gt),
        num_workers=NUM_WORKERS,
        batch_size=BATCH_SIZE *
        GPUS_NUM,  # Enlarge batch_size by a factor of len(device_ids)
        shuffle=True,
    )

    test_loader = torch.utils.data.DataLoader(
        ConcatDataset(testset_inp, testset_gt),
        num_workers=NUM_WORKERS,
        batch_size=BATCH_SIZE *
        GPUS_NUM,  # Enlarge batch_size by a factor of len(device_ids)
        shuffle=False)
    print("Finished loading dataset")

    return train_loader_cross, test_loader
Пример #4
0
    def __init__(self, split):
        self.split = split
        self.aug = albumentations.Compose([
            albumentations.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
            AT.ToTensor()
        ])

        if self.split == 'train':
            self.aug = albumentations.Compose([
                albumentations.HorizontalFlip(),
                albumentations.RandomBrightness(),
                #albumentations.ShiftScaleRotate(rotate_limit=15, scale_limit=0.10),
                #albumentations.HueSaturationValue(),
                albumentations.Cutout(1, 4, 4, 0.5),
                #albumentations.GaussNoise(),
                #albumentations.ElasticTransform(),
                albumentations.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
                AT.ToTensor()
            ])
Пример #5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-s',
                        help='Settings file')
    args = parser.parse_args()
    with open(args.s) as config_file:
        config = json.load(config_file)

    path_to_out = config.get('out_path')
    is_available_cuda = config.get('cuda', False)

    test_ds = CancerDataset(csv_file=config.get('data_csv'),
                            root_dir=config.get('data'),
                            transform_image=albumentations.Compose([
                                albumentations.Resize(int(config.get('width')),
                                                      int(config.get('height'))),
                                albumentations.Normalize(),
                                AT.ToTensor()
                            ]))

    loader_test = DataLoader(test_ds, batch_size=config.get('batch_size'),
                             num_workers=1)

    submission_names = test_ds.get_train_img_names()
    model = fm.get_dense_net_121(pretrained=False)

    model.load_state_dict(torch.load(config.get('model')))
    model.eval()

    if is_available_cuda:
        model.cuda()

    predicted_labels = []
    pbar = tqdm(loader_test)
    for batch_idx, data in enumerate(pbar):
        with torch.no_grad():

            if is_available_cuda:
                data = Variable(data[0].cuda(), requires_grad=False)
            else:
                data = Variable(data[0], requires_grad=False)

            y_predicted = model(data)
            y_predicted = torch.sigmoid(y_predicted)

            for predicted in y_predicted:

                predicted_labels.append(predicted.cpu().numpy()[0])

            del data
            del y_predicted

    predicted_labels = numpy.array(predicted_labels)
    utils.generate_submission(submission_names,
                              predicted_labels, path_to_out)
Пример #6
0
 def __init__(self, df: pd.DataFrame = None, datatype: str = 'train', img_ids: np.array = None,
              transforms = albu.Compose([albu.HorizontalFlip(),AT.ToTensor()]),
             preprocessing=None):
     self.df = df
     if datatype != 'test':
         self.data_folder = f"{settings.DATA_DIR}/train"
     else:
         self.data_folder = f"{settings.DATA_DIR}/test"
     self.img_ids = img_ids
     self.transforms = transforms
     self.preprocessing = preprocessing
Пример #7
0
 def __init__(self, df: pd.DataFrame = None, datatype: str = 'train', img_ids: np.array = None,
              # AT被替换成了torch
              transforms = albu.Compose([albu.HorizontalFlip(), AT.ToTensor()]),
             preprocessing=None):
     self.df = df
     if datatype != 'test':
         self.data_folder = f"{path}/train_images"
     else:
         self.data_folder = f"{path}/test_images"
     self.img_ids = img_ids
     self.transforms = transforms
     self.preprocessing = preprocessing
Пример #8
0
 def __init__(self,
              path: str,
              df: pd.DataFrame = None,
              datatype: str = "train",
              im_ids: np.array = None,
              transforms=albu.Compose(
                  [albu.HorizontalFlip(),
                   AT.ToTensor()]),
              preprocessing=None):
     df["hasMask"] = ~df["EncodedPixels"].isna()
     self.df = df
     if datatype != "test":
         self.data_folder = os.path.join(path, "train_images")
     else:
         self.data_folder = os.path.join(path, "test_images")
     self.img_ids = im_ids
     self.transforms = transforms
     self.preprocessing = preprocessing
Пример #9
0
 def __init__(self,
              path: str,
              df: pd.DataFrame = None,
              datatype: str = "train",
              im_ids: np.array = None,
              transforms=albu.Compose(
                  [albu.HorizontalFlip(),
                   AT.ToTensor()]),
              preprocessing=None,
              use_resized_dataset=False):
     self.df = df
     if datatype != "test":
         self.data_folder = os.path.join(path, "train_images")
     else:
         self.data_folder = os.path.join(path, "test_images")
     self.masks_folder = os.path.join(
         path, "masks")  # only when use_resized_dataset=True
     self.use_resized_dataset = use_resized_dataset
     self.img_ids = im_ids
     self.transforms = transforms
     self.preprocessing = preprocessing
def main(params):
    wd = 4e-4

    train_transforms = albumentations.Compose([
        #albumentations.CenterCrop(64, 64),
        albumentations.RandomRotate90(p=0.5),
        albumentations.Transpose(p=0.5),
        albumentations.Flip(p=0.5),
        albumentations.OneOf([
            albumentations.CLAHE(clip_limit=2),
            albumentations.IAASharpen(),
            albumentations.IAAEmboss(),
            albumentations.RandomBrightness(),
            albumentations.RandomContrast(),
            albumentations.JpegCompression(),
            albumentations.Blur(),
            albumentations.GaussNoise()
        ],
                             p=0.5),
        albumentations.HueSaturationValue(p=0.5),
        albumentations.ShiftScaleRotate(shift_limit=0.15,
                                        scale_limit=0.15,
                                        rotate_limit=45,
                                        p=0.5),
        albumentations.Normalize(),
        AT.ToTensor()
    ])

    valid_transforms = albumentations.Compose([
        #albumentations.CenterCrop(64, 64),
        albumentations.Normalize(),
        AT.ToTensor()
    ])

    sgd = partial(optim.Adam, lr=params.base_lr)

    writer = SummaryWriter(params.model_dir + "/log")
    model = Model().cuda()

    trn_dl, val_dl = get_train_and_valid_dataload(params.data_dir,
                                                  train_transforms,
                                                  valid_transforms)

    loss_fn = nn.BCEWithLogitsLoss()
    metric = BCA()
    learner = Learner(model=model,
                      trn_dl=trn_dl,
                      val_dl=val_dl,
                      optim_fn=sgd,
                      metrics=[metric],
                      loss_fn=loss_fn,
                      callbacks=[],
                      writer=writer)
    #to_fp16(learner, 512)
    learner.callbacks.append(
        SaveBestModel(learner,
                      small_better=False,
                      name='best.pth',
                      model_dir=params.model_dir))

    epoches = params.num_epoches
    warmup_batches = 2 * len(trn_dl)
    lr1 = np.linspace(params.base_lr / 25,
                      params.base_lr,
                      num=warmup_batches,
                      endpoint=False)
    lr2 = np.linspace(params.base_lr,
                      params.base_lr / 25,
                      num=epoches * len(trn_dl) - warmup_batches)
    lrs = np.concatenate((lr1, lr2))

    lr_sched = LrScheduler(learner.optimizer, lrs)
    learner.fit(epoches, lr_sched)
RESIZE_H = 160
RESIZE_W = 320

data_transforms = albumentations.Compose([
    albumentations.Resize(RESIZE_H, RESIZE_W),
    albumentations.HorizontalFlip(),
    albumentations.OneOf([
        albumentations.RandomContrast(),
        albumentations.RandomBrightness(),
    ]),
    albumentations.ShiftScaleRotate(rotate_limit=10, scale_limit=0.15),
    albumentations.JpegCompression(80),
    albumentations.HueSaturationValue(),
    albumentations.Normalize(),
    AT.ToTensor()
])

data_transforms_test = albumentations.Compose([
    albumentations.Resize(RESIZE_H, RESIZE_W),
    albumentations.Normalize(),
    AT.ToTensor()
])


# In[11]:


def prepare_labels(y):
    # From here: https://www.kaggle.com/pestipeti/keras-cnn-starter
    values = np.array(y)
Пример #12
0
def get_transforms_test():
    transforms_test_list = []

    if config['transforms'] == 'pytorch':
        if config['pytorch']['resize']['test']:
            transforms_test_list.append(
                transforms.Resize(
                    size=(config['pytorch']['resize']['test_size'],
                          config['pytorch']['resize']['test_size'])))
        if config['pytorch']['centerCrop']['test']:
            transforms_test_list.append(
                transforms.Resize(
                    size=(config['pytorch']['centerCrop']['test_size'],
                          config['pytorch']['centerCrop']['test_size'])))
        if config['pytorch']['randomHorizontalFlip']['test']:
            transforms_test_list.append(transforms.RandomHorizontalFlip())
        if config['pytorch']['randomRotation']['test']:
            transforms_test_list.append(
                transforms.RandomRotation(
                    degrees=config['pytorch']['randomRotation']['degrees']))
        if config['pytorch']['toTensor']['test']:
            transforms_test_list.append(transforms.ToTensor())
        if config['pytorch']['normalize']['test']:
            transforms_test_list.append(
                transforms.Normalize([0.485, 0.456, 0.406],
                                     [0.229, 0.224, 0.225]))
        test_transforms = transforms.Compose(transforms_test_list)
    else:
        if config['albumentations']['resize']['test']:
            transforms_test_list.append(
                A.Resize(config['albumentations']['resize']['test_size'],
                         config['albumentations']['resize']['test_size']))
        if config['albumentations']['centerCrop']['test']:
            transforms_test_list.append(
                transforms.Resize(
                    config['albumentations']['centerCrop']['test_size'],
                    config['albumentations']['centerCrop']['test_size']))
        if config['albumentations']['horizontalFlip']['test']:
            transforms_test_list.append(A.HorizontalFlip())
        if config['albumentations']['rotate']['test']:
            transforms_test_list.append(
                A.Rotate(config['albumentations']['rotate']['limit']))
        if config['albumentations']['clahe']['test']:
            transforms_test_list.append(A.CLAHE())
        if config['albumentations']['gaussNoise']['test']:
            transforms_test_list.append(A.GaussNoise())
        if config['albumentations']['randomBrightness']['test']:
            transforms_test_list.append(A.RandomBrightness())
        if config['albumentations']['randomContrast']['test']:
            transforms_test_list.append(A.RandomContrast())
        if config['albumentations']['randomBrightnrssContrast']['test']:
            transforms_test_list.append(A.RandomBrightnessContrast())
        if config['albumentations']['hueSaturationValue']['test']:
            transforms_test_list.append(A.HueSaturationValue())
        if config['albumentations']['normalize']['test']:
            transforms_test_list.append(
                A.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]))
        if config['albumentations']['toTensor']['test']:
            transforms_test_list.append(AT.ToTensor())
        test_transforms = A.Compose(transforms_test_list)

    return test_transforms
def main(model_dir, data_dir):

    data_transforms_test = albumentations.Compose([
        #albumentations.CenterCrop(64, 64),
        albumentations.Normalize(),
        AT.ToTensor()
    ])

    data_transforms_tta0 = albumentations.Compose([
        #albumentations.CenterCrop(64, 64),
        albumentations.RandomRotate90(p=0.5),
        albumentations.Transpose(p=0.5),
        albumentations.Flip(p=0.5),
        albumentations.Normalize(),
        AT.ToTensor()
    ])

    data_transforms_tta1 = albumentations.Compose([
        #albumentations.CenterCrop(64, 64),
        albumentations.RandomRotate90(p=1),
        albumentations.Normalize(),
        AT.ToTensor()
    ])

    data_transforms_tta2 = albumentations.Compose([
        #albumentations.CenterCrop(64, 64),
        albumentations.Transpose(p=1),
        albumentations.Normalize(),
        AT.ToTensor()
    ])

    data_transforms_tta3 = albumentations.Compose([
        #albumentations.CenterCrop(64, 64),
        albumentations.Flip(p=1),
        albumentations.Normalize(),
        AT.ToTensor()
    ])

    best_train_result_path = os.path.join(model_dir, "best.pth")
    checkpoint = torch.load(best_train_result_path)

    model = Model()
    model.load_state_dict(checkpoint["model"])
    model.cuda()
    model.eval()
    NUM_TTA = 8

    sigmoid = lambda x: scipy.special.expit(x)
    test_data_dir = os.path.join(data_dir, "test")
    submit_sample_csv = os.path.join(data_dir, "sample_submission.csv")
    result_csv = os.path.join(data_dir, "sub_tta.csv")
    for num_tta in range(NUM_TTA):
        if num_tta == 0:
            test_set = CancerDataset(datafolder=test_data_dir,
                                     datatype='test',
                                     transform=data_transforms_test)
            test_loader = torch.utils.data.DataLoader(test_set,
                                                      batch_size=batch_size,
                                                      num_workers=num_workers)
        elif num_tta == 1:
            test_set = CancerDataset(datafolder=test_data_dir,
                                     datatype='test',
                                     transform=data_transforms_tta1)
            test_loader = torch.utils.data.DataLoader(test_set,
                                                      batch_size=batch_size,
                                                      num_workers=num_workers)
        elif num_tta == 2:
            test_set = CancerDataset(datafolder=test_data_dir,
                                     datatype='test',
                                     transform=data_transforms_tta2)
            test_loader = torch.utils.data.DataLoader(test_set,
                                                      batch_size=batch_size,
                                                      num_workers=num_workers)
        elif num_tta == 3:
            test_set = CancerDataset(datafolder=test_data_dir,
                                     datatype='test',
                                     transform=data_transforms_tta3)
            test_loader = torch.utils.data.DataLoader(test_set,
                                                      batch_size=batch_size,
                                                      num_workers=num_workers)
        else:
            test_set = CancerDataset(datafolder=test_data_dir,
                                     datatype='test',
                                     transform=data_transforms_tta0)
            test_loader = torch.utils.data.DataLoader(test_set,
                                                      batch_size=batch_size,
                                                      num_workers=num_workers)

        preds = []
        for batch_i, (data, target) in enumerate(test_loader):
            data, target = data.cuda(), target.cuda()
            output = model(data).detach()
            pr = output[:, 0].cpu().numpy()
            for i in pr:
                preds.append(sigmoid(i) / NUM_TTA)
        if num_tta == 0:
            test_preds = pd.DataFrame({
                'imgs': test_set.image_files_list,
                'preds': preds
            })
            test_preds['imgs'] = test_preds['imgs'].apply(
                lambda x: x.split('.')[0])
        else:
            test_preds['preds'] += np.array(preds)

    sub = pd.read_csv(submit_sample_csv)
    sub = pd.merge(sub, test_preds, left_on='id', right_on='imgs')
    sub = sub[['id', 'preds']]
    sub.columns = ['id', 'label']
    sub.to_csv(result_csv, index=False)
Пример #14
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('-s', help='Settings file')
    args = parser.parse_args()
    with open(args.s) as config_file:
        config = json.load(config_file)

    best_accuracy = -1.0
    save_path = config.get('dest_model')

    model = fm.get_dense_net_121(pretrained=True)

    src_path = config.get('src_model')
    if src_path is not None:
        model.load_state_dict(torch.load(src_path))

    is_available_cuda = config.get('cuda', False)
    if is_available_cuda:
        model.cuda()

    data_transforms = albumentations.Compose([
        albumentations.Resize(int(config.get('width')),
                              int(config.get('height'))),
        albumentations.RandomRotate90(p=0.5),
        albumentations.Transpose(p=0.5),
        albumentations.Flip(p=0.5),
        albumentations.OneOf([
            albumentations.CLAHE(clip_limit=2),
            albumentations.IAASharpen(),
            albumentations.IAAEmboss(),
            albumentations.RandomBrightness(),
            albumentations.RandomContrast(),
            albumentations.JpegCompression(),
            albumentations.Blur(),
            albumentations.GaussNoise()
        ],
                             p=0.5),
        albumentations.HueSaturationValue(p=0.5),
        albumentations.ShiftScaleRotate(shift_limit=0.15,
                                        scale_limit=0.15,
                                        rotate_limit=45,
                                        p=0.5),
        albumentations.Normalize(),
        AT.ToTensor()
    ])

    data_transforms_valid = albumentations.Compose([
        albumentations.Resize(int(config.get('width')),
                              int(config.get('height'))),
        albumentations.Normalize(),
        AT.ToTensor()
    ])

    optimizer = optim.Adam(model.parameters(), lr=0.00001)

    writer = SummaryWriter()
    train_ds = CancerDataset(
        csv_file=config.get('train_csv'),
        root_dir=config.get('data_train'),
        transform_image=data_transforms,
    )
    valid_ds = CancerDataset(csv_file=config.get('valid_csv'),
                             root_dir=config.get('data_validate'),
                             transform_image=data_transforms_valid)

    loader_validate = DataLoader(valid_ds,
                                 batch_size=config.get('batch_size'),
                                 num_workers=1)
    loader_train = DataLoader(train_ds,
                              batch_size=config.get('batch_size'),
                              num_workers=1)

    criterion = nn.BCEWithLogitsLoss()
    start_epoch = config.get('st_epoch', 0)
    for epoch in range(config.get('do_epoch', 1000)):

        utils.train(model, writer, is_available_cuda, loader_train, criterion,
                    optimizer, epoch + start_epoch)
        accuracy = utils.validate(model, writer, is_available_cuda,
                                  loader_validate, criterion,
                                  epoch + start_epoch)

        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_model = copy.deepcopy(model)
            best_model.cpu()
            pth_model = '{0}_{1}.pt'.format(save_path, epoch + start_epoch)
            torch.save(best_model.state_dict(), pth_model)
Пример #15
0
def get_transforms_train():
    transforms_train_list = []

    if config['transforms'] == 'pytorch':
        if config['pytorch']['resize']['train']:
            transforms_train_list.append(
                transforms.Resize(
                    size=(config['pytorch']['resize']['train_size'],
                          config['pytorch']['resize']['train_size'])))
        if config['pytorch']['centerCrop']['train']:
            transforms_train_list.append(
                transforms.Resize(
                    size=(config['pytorch']['centerCrop']['train_size'],
                          config['pytorch']['centerCrop']['train_size'])))
        if config['pytorch']['colorJitter']['train']:
            transforms_train_list.append(
                transforms.RandomApply([
                    transforms.ColorJitter(
                        config['pytorch']['colorJitter']['brightness'])
                ],
                                       p=0.75))
        if config['pytorch']['randomCrop']['train']:
            transforms_train_list.append(
                transforms.RandomCrop(
                    size=config['pytorch']['randomCrop']['train_size'],
                    padding=config['pytorch']['randomCrop']['padding']))
        if config['pytorch']['randomResizedCrop']['train']:
            transforms_train_list.append(
                transforms.RandomResizedCrop(
                    size=config['pytorch']['randomResizedCrop']['train_size']))
        if config['pytorch']['randomHorizontalFlip']['train']:
            transforms_train_list.append(transforms.RandomHorizontalFlip())
        if config['pytorch']['randomAffine']['train']:
            transforms_train_list.append(
                transforms.RandomAffine(
                    degrees=config['pytorch']['randomAffine']['degrees'],
                    scale=config['pytorch']['randomAffine']['scale']))
        if config['pytorch']['randomRotation']['train']:
            transforms_train_list.append(
                transforms.RandomRotation(
                    degrees=config['pytorch']['randomRotation']['degrees']))
        if config['pytorch']['toTensor']['train']:
            transforms_train_list.append(transforms.ToTensor())
        if config['pytorch']['normalize']['train']:
            transforms_train_list.append(
                transforms.Normalize([0.485, 0.456, 0.406],
                                     [0.229, 0.224, 0.225]))
        if config['pytorch']['randomErasing']['train']:
            transforms_train_list.append(
                transforms.RandomErasing(
                    p=config['pytorch']['randomErasing']['p'],
                    value=config['pytorch']['randomErasing']['value']))
        train_transforms = transforms.Compose(transforms_train_list)
    else:
        if config['albumentations']['resize']['train']:
            transforms_train_list.append(
                A.Resize(config['albumentations']['resize']['train_size'],
                         config['albumentations']['resize']['train_size']))
        if config['albumentations']['centerCrop']['train']:
            transforms_train_list.append(
                transforms.Resize(
                    config['albumentations']['centerCrop']['train_size'],
                    config['albumentations']['centerCrop']['train_size']))
        if config['albumentations']['horizontalFlip']['train']:
            transforms_train_list.append(A.HorizontalFlip())
        if config['albumentations']['rotate']['train']:
            transforms_train_list.append(
                A.Rotate(config['albumentations']['rotate']['limit']))
        if config['albumentations']['clahe']['train']:
            transforms_train_list.append(A.CLAHE())
        if config['albumentations']['gaussNoise']['train']:
            transforms_train_list.append(A.GaussNoise())
        if config['albumentations']['randomBrightness']['train']:
            transforms_train_list.append(A.RandomBrightness())
        if config['albumentations']['randomContrast']['train']:
            transforms_train_list.append(A.RandomContrast())
        if config['albumentations']['randomBrightnrssContrast']['train']:
            transforms_train_list.append(A.RandomBrightnessContrast())
        if config['albumentations']['hueSaturationValue']['train']:
            transforms_train_list.append(A.HueSaturationValue())
        if config['albumentations']['normalize']['train']:
            transforms_train_list.append(
                A.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]))
        if config['albumentations']['toTensor']['train']:
            transforms_train_list.append(AT.ToTensor())
        train_transforms = A.Compose(transforms_train_list)

    return train_transforms
Пример #16
0
def main():
    train = pd.read_csv('C:/Data/slepota_data/train.csv')
    test = pd.read_csv('C:/Data/slepota_data/test.csv')
    sample_submission = pd.read_csv(
        'C:/Data/slepota_data/sample_submission.csv')
    y, le = prepare_labels(train['diagnosis'])

    data_transforms = albumentations.Compose([
        albumentations.Resize(224, 224),
        albumentations.HorizontalFlip(p=0.5),
        albumentations.RandomBrightness(),
        albumentations.ShiftScaleRotate(rotate_limit=15, scale_limit=0.10),
        albumentations.JpegCompression(80),
        albumentations.HueSaturationValue(),
        albumentations.Normalize(),
        AT.ToTensor()
    ])

    test_transforms = albumentations.Compose([
        albumentations.Resize(224, 224),
        albumentations.HorizontalFlip(),
        albumentations.Normalize(),
        AT.ToTensor(),
    ])  #Возможно добавить больше test time augmentation

    train_set = GlassDataset(df=train,
                             datatype='train',
                             transform=data_transforms,
                             y=y)
    test_set = GlassDataset(df=test,
                            datatype='test',
                            transform=test_transforms)

    tr, val = train_test_split(train.diagnosis,
                               stratify=train.diagnosis,
                               test_size=0.1)

    train_sampler = SubsetRandomSampler(list(tr.index))
    validation_sampler = SubsetRandomSampler(list(val.index))

    batch_size = 64
    num_workers = 0
    train_loader = DataLoader(train_set,
                              batch_size=batch_size,
                              sampler=train_sampler,
                              num_workers=num_workers)
    valid_loader = DataLoader(train_set,
                              batch_size=batch_size,
                              sampler=validation_sampler,
                              num_workers=num_workers)
    test_loader = DataLoader(test_set,
                             batch_size=batch_size,
                             num_workers=num_workers)

    model = pretrainedmodels.__dict__['resnet101'](pretrained=None)

    model.avg_pool = nn.AdaptiveAvgPool2d(1)
    model.last_linear = nn.Sequential(
        nn.BatchNorm1d(2048,
                       eps=1e-05,
                       momentum=0.1,
                       affine=True,
                       track_running_stats=True),
        nn.Dropout(p=0.25),
        nn.Linear(in_features=2048, out_features=2048, bias=True),
        nn.ReLU(),
        nn.BatchNorm1d(2048,
                       eps=1e-05,
                       momentum=0.1,
                       affine=True,
                       track_running_stats=True),
        nn.Dropout(p=0.5),
        nn.Linear(in_features=2048, out_features=1, bias=True),
    )
    model.load_state_dict(torch.load("../input/mmmodel/model.bin"))
    model = model.to(device)

    for param in model.parameters():
        param.requires_grad = False

    model.eval()

    valid_predictions = np.zeros((len(val), 1))
    tk_valid = tqdm(valid_loader)
    for i, x_batch in enumerate(tk_valid):
        x_batch = x_batch["image"]
        pred = model(x_batch.to(device))
        valid_predictions[i * 32:(i + 1) * 32] = pred.detach().cpu().squeeze(
        ).numpy().ravel().reshape(-1, 1)

    test_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

    test_preds1 = np.zeros((len(test_set), 1))
    tk0 = tqdm(test_loader)
    for i, x_batch in enumerate(tk0):
        x_batch = x_batch["image"]
        pred = model(x_batch.to(device))
        test_preds1[i * 32:(i + 1) * 32] = pred.detach().cpu().squeeze().numpy(
        ).ravel().reshape(-1, 1)

    test_preds2 = np.zeros((len(test_set), 1))
    tk0 = tqdm(test_loader)
    for i, x_batch in enumerate(tk0):
        x_batch = x_batch["image"]
        pred = model(x_batch.to(device))
        test_preds2[i * 32:(i + 1) * 32] = pred.detach().cpu().squeeze().numpy(
        ).ravel().reshape(-1, 1)

    test_preds3 = np.zeros((len(test_set), 1))
    tk0 = tqdm(test_loader)
    for i, x_batch in enumerate(tk0):
        x_batch = x_batch["image"]
        pred = model(x_batch.to(device))
        test_preds3[i * 32:(i + 1) * 32] = pred.detach().cpu().squeeze().numpy(
        ).ravel().reshape(-1, 1)

    test_preds4 = np.zeros((len(test_set), 1))
    tk0 = tqdm(test_loader)
    for i, x_batch in enumerate(tk0):
        x_batch = x_batch["image"]
        pred = model(x_batch.to(device))
        test_preds4[i * 32:(i + 1) * 32] = pred.detach().cpu().squeeze().numpy(
        ).ravel().reshape(-1, 1)

    test_preds5 = np.zeros((len(test_set), 1))
    tk0 = tqdm(test_loader)
    for i, x_batch in enumerate(tk0):
        x_batch = x_batch["image"]
        pred = model(x_batch.to(device))
        test_preds5[i * 32:(i + 1) * 32] = pred.detach().cpu().squeeze().numpy(
        ).ravel().reshape(-1, 1)

    test_predictions = (test_preds1 + test_preds2 + test_preds3 + test_preds4 +
                        test_preds5) / 5.0

    optR = kappa_optimizer.OptimizerRounder()
    optR.fit(valid_predictions, y)
    coefficients = optR.coefficient()
    valid_predictions = optR.predict(valid_predictions, coefficients)
    test_predictions = optR.predict(test_predictions, coefficients)

    sample_submission.diagnosis = test_predictions.astype(int)
    sample_submission.to_csv("submission.csv", index=False)
    print('Validation prediction: ', valid_predictions)
    print('Validation score: ',
          metrics.cohen_kappa_score(y, valid_predictions, weights='quadratic'))