def main(args):
    i_path = args.input_path
    m_path = args.mask_path
    bg_path = args.bg_path
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    torch.backends.cudnn.deterministic = True

    camouflage_dir = args.output_dir
    os.makedirs(camouflage_dir, exist_ok=True)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    VGG = models.vgg19(pretrained=True).features
    VGG.to(device)

    for parameter in VGG.parameters():
        parameter.requires_grad_(False)

    style_net = HRNet.HRNet()
    style_net.to(device)

    transform = Compose([
        Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225],
        ),
        ToTensorV2(),
    ])

    # try to give fore con_layers more weight so that can get more detail in output iamge
    style_weights = args.style_weight_dic

    mask = cv2.imread(m_path, 0)
    mask = scaling(mask, scale=args.mask_scale)

    if args.crop:
        idx_y, idx_x = np.where(mask > 0)
        x1_m, y1_m, x2_m, y2_m = np.min(idx_x), np.min(idx_y), np.max(
            idx_x), np.max(idx_y)
    else:
        x1_m, y1_m = 0, 0
        y2_m, x2_m = mask.shape
        x2_m, y2_m = 8 * (x2_m // 8), 8 * (y2_m // 8)

    x1_m = 8 * (x1_m // 8)
    x2_m = 8 * (x2_m // 8)
    y1_m = 8 * (y1_m // 8)
    y2_m = 8 * (y2_m // 8)

    fore_origin = cv2.cvtColor(cv2.imread(i_path), cv2.COLOR_BGR2RGB)
    fore_origin = scaling(fore_origin, scale=args.mask_scale)
    fore = fore_origin[y1_m:y2_m, x1_m:x2_m]

    mask_crop = mask[y1_m:y2_m, x1_m:x2_m]
    mask_crop = np.where(mask_crop > 0, 255, 0).astype(np.uint8)
    kernel = np.ones((15, 15), np.uint8)
    mask_dilated = cv2.dilate(mask_crop, kernel, iterations=1)

    origin = cv2.cvtColor(cv2.imread(bg_path), cv2.COLOR_BGR2RGB)
    h_origin, w_origin, _ = origin.shape
    h, w = mask_dilated.shape
    assert h < h_origin, "mask height must be smaller than bg height, and lower mask_scale parameter!!"
    assert w < w_origin, "mask width must be smaller than bg width, and lower mask_scale parameter!!"

    print("mask size,height:{},width:{}".format(h, w))
    if args.hidden_selected is None:
        y_start, x_start = recommend(origin, fore, mask_dilated)
    else:
        y_start, x_start = args.hidden_selected

    x1, y1 = x_start + x1_m, y_start + y1_m
    x2, y2 = x1 + w, y1 + h
    if y2 > h_origin:
        y1 -= (y2 - h_origin)
        y2 = h_origin
    if x2 > w_origin:
        x1 -= (x2 - w_origin)
        x2 = w_origin

    print("hidden region...,height-{}:{},width-{}:{}".format(y1, y2, x1, x2))
    mat_dilated = fore * np.expand_dims(
        mask_crop / 255, axis=-1) + origin[y1:y2, x1:x2] * np.expand_dims(
            (mask_dilated - mask_crop) / 255, axis=-1)
    bg = origin.copy()
    bg[y1:y2,
       x1:x2] = fore * np.expand_dims(mask_crop / 255, axis=-1) + origin[
           y1:y2, x1:x2] * np.expand_dims(1 - mask_crop / 255, axis=-1)

    content_image = transform(image=mat_dilated)["image"].unsqueeze(0)
    style_image = transform(image=origin[y1:y2, x1:x2])["image"].unsqueeze(0)
    content_image = content_image.to(device)
    style_image = style_image.to(device)

    style_features = get_features(style_image, VGG, mode="style")
    if args.style_all:
        style_image_all = transform(
            image=origin)["image"].unsqueeze(0).to(device)
        style_features = get_features(style_image_all, VGG, mode="style")

    style_gram_matrixs = {}
    style_index = {}
    for layer in style_features:
        sf = style_features[layer]
        _, _, h_sf, w_sf = sf.shape
        mask_sf = (cv2.resize(mask_dilated, (w_sf, h_sf))).flatten()
        sf_idxes = np.where(mask_sf > 0)[0]
        gram_matrix = gram_matrix_slice(sf, sf_idxes)
        style_gram_matrixs[layer] = gram_matrix
        style_index[layer] = sf_idxes

    target = content_image.clone().requires_grad_(True).to(device)

    foreground_features = get_features(content_image, VGG, mode="camouflage")
    target_features = foreground_features.copy()
    attention_layers = [
        "conv3_1",
        "conv3_2",
        "conv3_3",
        "conv3_4",
        "conv4_1",
        "conv4_2",
        "conv4_3",
        "conv4_4",
    ]

    for u, layer in enumerate(attention_layers):
        target_feature = target_features[layer].detach().cpu().numpy(
        )  # output image's feature map after layer
        attention = attention_map_cv(target_feature)
        h, w = attention.shape
        if "conv3" in layer:
            attention = cv2.resize(attention, (w // 2, h // 2)) * 1 / 4
        if u == 0:
            all_attention = attention
        else:
            all_attention += attention
    all_attention /= 5
    max_att, min_att = np.max(all_attention), np.min(all_attention)
    all_attention = (all_attention - min_att) / (max_att - min_att)
    if args.erode_border:
        h, w = all_attention.shape
        mask_erode = cv2.erode(mask_crop, kernel, iterations=3)
        mask_erode = cv2.resize(mask_erode, (w, h))
        mask_erode = np.where(mask_erode > 0, 1, 0)
        all_attention = all_attention * mask_erode

    foreground_attention = torch.from_numpy(all_attention.astype(
        np.float32)).clone().to(device).unsqueeze(0).unsqueeze(0)
    b, ch, h, w = foreground_features["conv4_1"].shape
    mask_f = cv2.resize(mask_dilated, (w, h)) / 255
    idx = np.where(mask_f > 0)
    size = len(idx[0])
    mask_f = torch.from_numpy(mask_f.astype(
        np.float32)).clone().to(device).unsqueeze(0).unsqueeze(0)

    foreground_chi = foreground_features["conv4_1"] * foreground_attention
    foreground_chi = foreground_chi.detach().cpu().numpy()[0].transpose(
        1, 2, 0)
    foreground_cosine = cosine_distances(foreground_chi[idx])

    background_features = get_features(style_image, VGG, mode="camouflage")

    idxes = np.where(mask_dilated > 0)
    n_neighbors, n_jobs, reg = 7, None, 1e-3
    nbrs = NearestNeighbors(n_neighbors=n_neighbors + 1, n_jobs=n_jobs)
    X_origin = origin[y1:y2, x1:x2][idxes] / 255
    nbrs.fit(X_origin)
    X = nbrs._fit_X
    Weight_Matrix = barycenter_kneighbors_graph(nbrs,
                                                n_neighbors=n_neighbors,
                                                reg=reg,
                                                n_jobs=n_jobs)

    idx_new = np.where(idxes[0] < (y2 - y1 - 1))
    idxes_h = (idxes[0][idx_new], idxes[1][idx_new])
    idx_new = np.where(idxes[1] < (x2 - x1 - 1))
    idxes_w = (idxes[0][idx_new], idxes[1][idx_new])

    mask_norm = mask_crop / 255.
    mask_norm_torch = torch.from_numpy(
        (mask_norm).astype(np.float32)).unsqueeze(0).unsqueeze(0).to(device)
    boundary = (mask_dilated - mask_crop) / 255
    boundary = torch.from_numpy(
        (boundary).astype(np.float32)).unsqueeze(0).unsqueeze(0).to(device)

    content_loss_epoch = []
    style_loss_epoch = []
    total_loss_epoch = []
    time_start = datetime.datetime.now()
    epoch = 0
    show_every = args.show_every
    optimizer = optim.Adam(style_net.parameters(), lr=args.lr)
    steps = args.epoch
    mse = nn.MSELoss()
    while epoch <= steps:
        #############################
        ### boundary conceal ########
        #############################
        target = style_net(content_image).to(device)
        target = content_image * boundary + target * mask_norm_torch
        target.requires_grad_(True)

        target_features = get_features(
            target, VGG)  # extract output image's all feature maps

        #############################
        ### content loss    #########
        #############################
        target_features_content = get_features(target, VGG, mode="content")
        content_loss = torch.sum((target_features_content['conv4_2'] -
                                  foreground_features['conv4_2'])**2) / 2
        content_loss *= args.lambda_weights["content"]

        #############################
        ### style loss      #########
        #############################
        style_loss = 0

        # compute each layer's style loss and add them
        for layer in style_weights:
            target_feature = target_features[
                layer]  # output image's feature map after layer
            #target_gram_matrix = get_gram_matrix(target_feature)
            target_gram_matrix = gram_matrix_slice(target_feature,
                                                   style_index[layer])
            style_gram_matrix = style_gram_matrixs[layer]
            b, c, h, w = target_feature.shape
            layer_style_loss = style_weights[layer] * torch.sum(
                (target_gram_matrix - style_gram_matrix)**2) / (
                    (2 * c * w * h)**2)
            #layer_style_loss = style_weights[layer] * torch.mean((target_gram_matrix - style_gram_matrix) ** 2)
            style_loss += layer_style_loss

        style_loss *= args.lambda_weights["style"]

        #############################
        ### camouflage loss #########
        #############################
        target_chi = target_features["conv4_1"] * foreground_attention
        target_chi = target_chi.detach().cpu().numpy()[0].transpose(1, 2, 0)
        target_cosine = cosine_distances(target_chi[idx])

        leave_loss = (np.mean(np.abs(target_cosine - foreground_cosine)) / 2)
        leave_loss = torch.Tensor([leave_loss]).to(device)

        remove_matrix = (1.0 - foreground_attention) * mask_f * (
            target_features["conv4_1"] - background_features["conv4_1"])
        r_min, r_max = torch.min(remove_matrix), torch.max(remove_matrix)
        remove_matrix = (remove_matrix - r_min) / (r_max - r_min)
        remove_loss = (torch.mean(remove_matrix**2) / 2).to(device)

        camouflage_loss = leave_loss + args.mu * remove_loss
        camouflage_loss *= args.lambda_weights["cam"]

        #############################
        ### regularization loss #####
        #############################

        target_renormalize = target.detach().cpu().numpy()[0, :].transpose(
            1, 2, 0)
        target_renormalize = target_renormalize * np.array(
            (0.229, 0.224, 0.225)) + np.array((0.485, 0.456, 0.406))
        target_renormalize = target_renormalize.clip(0, 1)[idxes]
        target_reconst = torch.from_numpy(
            (Weight_Matrix * target_renormalize).astype(np.float32))
        target_renormalize = torch.from_numpy(
            target_renormalize.astype(np.float32))
        reg_loss = mse(target_renormalize, target_reconst).to(device)
        reg_loss *= args.lambda_weights["reg"]

        #############################
        ### total variation loss ####
        #############################
        tv_h = torch.pow(target[:, :, 1:, :] - target[:, :, :-1, :],
                         2).detach().cpu().numpy()[0].transpose(1, 2, 0)
        tv_w = torch.pow(target[:, :, :, 1:] - target[:, :, :, :-1],
                         2).detach().cpu().numpy()[0].transpose(1, 2, 0)
        tv_h_mask = tv_h[:, :,
                         0][idxes_h] + tv_h[:, :,
                                            1][idxes_h] + tv_h[:, :,
                                                               2][idxes_h]
        tv_w_mask = tv_w[:, :,
                         0][idxes_w] + tv_w[:, :,
                                            2][idxes_w] + tv_w[:, :,
                                                               2][idxes_w]
        tv_loss = torch.from_numpy(
            (np.array(np.mean(np.concatenate([tv_h_mask,
                                              tv_w_mask]))))).to(device)
        tv_loss *= args.lambda_weights["tv"]

        total_loss = content_loss + style_loss + camouflage_loss + reg_loss + tv_loss
        total_loss_epoch.append(total_loss)

        style_loss_epoch.append(style_loss)

        optimizer.zero_grad()
        total_loss.backward()
        optimizer.step()

        if epoch % show_every == 0:
            print("After %d criterions:" % epoch)
            print('Total loss: ', total_loss.item())
            print('Style loss: ', style_loss.item())
            print('camouflage loss: ', camouflage_loss.item())
            print('camouflage loss leave: ', leave_loss.item())
            print('camouflage loss remove: ', remove_loss.item())
            print('regularization loss: ', reg_loss.item())
            print('total variation loss: ', tv_loss.item())
            print('content loss: ', content_loss.item())
            print("elapsed time:{}".format(datetime.datetime.now() -
                                           time_start))
            canvas = origin.copy()
            fore_gen = im_convert(target) * 255.
            sub_canvas = np.vstack(
                [mat_dilated, fore_gen, origin[y1:y2, x1:x2]])
            canvas[y1:y2, x1:x2] = fore_gen * np.expand_dims(
                mask_norm, axis=-1) + origin[y1:y2, x1:x2] * np.expand_dims(
                    1.0 - mask_norm, axis=-1)
            canvas = canvas.astype(np.uint8)
            if args.save_process:
                new_path = os.path.join(
                    camouflage_dir, "{}_epoch{}.png".format(args.name, epoch))
                cv2.imwrite(new_path, cv2.cvtColor(canvas, cv2.COLOR_RGB2BGR))
            cv2.rectangle(canvas, (x1, y1), (x2, y2), (255, 0, 0), 10)
            cv2.rectangle(canvas, (x1 - x1_m, y1 - y1_m), (x2, y2),
                          (255, 255, 0), 10)
            canvas = np.vstack([canvas, bg])
            h_c, w_c, _ = canvas.shape
            h_s, w_s, _ = sub_canvas.shape
            sub_canvas = cv2.resize(sub_canvas, (int(w_s * (h_c / h_s)), h_c))
            canvas = np.hstack([sub_canvas, canvas])
            canvas = canvas.astype(np.uint8)
            canvas = cv2.cvtColor(canvas, cv2.COLOR_RGB2BGR)
            h_show, w_show, c = canvas.shape
            cv2.imshow(
                "now camouflage...",
                cv2.resize(
                    canvas,
                    (w_show // args.show_comp, h_show // args.show_comp)))

        epoch += 1
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    time_end = datetime.datetime.now()
    print('totally cost:{}'.format(time_end - time_start))
    new_path = os.path.join(camouflage_dir, "{}.png".format(args.name))
    canvas = origin.copy()
    fore_gen = im_convert(target) * 255.
    canvas[y1:y2,
           x1:x2] = fore_gen * np.expand_dims(mask_norm, axis=-1) + origin[
               y1:y2, x1:x2] * np.expand_dims(1.0 - mask_norm, axis=-1)
    canvas = canvas.astype(np.uint8)
    canvas = cv2.cvtColor(canvas, cv2.COLOR_RGB2BGR)
    cv2.imwrite(new_path, canvas)
Пример #2
0
    def __init__(self, dataframe, image_dir, transforms=None):
        super().__init__()

        self.df = dataframe
        self.image_ids = dataframe['image_id'].unique()
        self.image_ids = shuffle(self.image_ids)
        self.labels = [np.zeros(
            (0, 5), dtype=np.float32)] * len(self.image_ids)
        self.img_size = 1024
        im_w = 1024
        im_h = 1024
        for i, img_id in enumerate(self.image_ids):
            records = self.df[self.df['image_id'] == img_id]
            boxes = records[['x', 'y', 'w', 'h']].values
            boxes[:, 2] = boxes[:, 0] + boxes[:, 2]
            boxes[:, 3] = boxes[:, 1] + boxes[:, 3]
            boxesyolo = []
            for box in boxes:
                x1, y1, x2, y2 = box
                xc, yc, w, h = 0.5 * x1 / im_w + 0.5 * x2 / im_w, 0.5 * y1 / im_h + 0.5 * y2 / im_h, abs(
                    x2 / im_w - x1 / im_w), abs(y2 / im_h - y1 / im_h)
                boxesyolo.append([1, xc, yc, w, h])
            self.labels[i] = np.array(boxesyolo)

        self.image_dir = image_dir
        self.transforms = transforms

        self.mosaic = False
        self.augment = True

        self.aug = A.Compose(
            [
                A.Resize(config.CROP_SIZE, config.CROP_SIZE,
                         always_apply=True),
                A.OneOf([
                    A.RandomBrightnessContrast(brightness_limit=0.4,
                                               contrast_limit=0.4),
                    A.RandomGamma(gamma_limit=(50, 150)),
                    A.NoOp()
                ]),
                A.OneOf([
                    A.RGBShift(
                        r_shift_limit=20, b_shift_limit=15, g_shift_limit=15),
                    A.HueSaturationValue(hue_shift_limit=5, sat_shift_limit=5),
                    A.NoOp()
                ]),
                A.OneOf([A.ChannelShuffle(),
                         A.CLAHE(clip_limit=4),
                         A.NoOp()]),
                A.OneOf([A.JpegCompression(),
                         A.Blur(blur_limit=4),
                         A.NoOp()]),
                A.OneOf([A.ToGray(), A.ToSepia(),
                         A.NoOp()], p=0.2),
                A.GaussNoise(),
                A.Cutout(num_holes=8,
                         max_h_size=64,
                         max_w_size=64,
                         fill_value=0,
                         p=0.5),
                A.Normalize(
                    config.MODEL_MEAN, config.MODEL_STD, always_apply=True),
                ToTensorV2(p=1.0)
            ],
            bbox_params={
                'format': config.DATA_FMT,
                'min_area': 1,
                'min_visibility': 0.5,
                'label_fields': ['labels']
            },
            p=1.0)
Пример #3
0
def main(cfg: DictConfig):

    # This is here to collapse the code in VS Code
    if True:

        # Setup
        print = logging.getLogger(__name__).info
        print(OmegaConf.to_yaml(cfg))
        pl.seed_everything(cfg.seed)

        # Create validation and test segmentation datasets
        # NOTE: The batch size must be 1 for test because the masks are different sizes,
        # and evaluation should be done using the mask at the original resolution.
        val_dataloaders = []
        test_dataloaders = []
        for _cfg in cfg.data_seg.data:
            kwargs = dict(images_dir=_cfg.images_dir,
                          labels_dir=_cfg.labels_dir,
                          image_size=cfg.data_seg.image_size)
            val_dataset = SegmentationDataset(**kwargs, crop=True)
            test_dataset = SegmentationDataset(**kwargs,
                                               crop=_cfg.crop,
                                               resize_mask=False)
            val_dataloaders.append(DataLoader(val_dataset, **cfg.dataloader))
            test_dataloaders.append(
                DataLoader(test_dataset, **{
                    **cfg.dataloader, 'batch_size': 1
                }))

    # Evaluate only
    if not cfg.train:
        assert cfg.eval_checkpoint is not None

        # Print dataset info
        for i, dataloader in enumerate(test_dataloaders):
            dataset = dataloader.dataset
            print(
                f'Test dataset / dataloader size [{i}]: {len(dataset)} / {len(dataset)}'
            )

        # Create trainer
        trainer = pl.Trainer(**cfg.trainer)

        # Load checkpoint(s)
        net = UNet().eval()
        checkpoint = torch.load(cfg.eval_checkpoint, map_location='cpu')
        state_dict = {
            k.replace('net.', ''): v
            for k, v in checkpoint["state_dict"].items()
        }
        net.load_state_dict(state_dict)
        print(f'Loaded checkpoint from {cfg.eval_checkpoint}')

        # Create module
        module = SementationModule(net, cfg).eval()

        # Compute test results
        trainer.test(module, test_dataloaders=test_dataloaders)

        # Pretty print results
        table = utils.get_metrics_as_table(trainer.callback_metrics)
        print('\n' + str(table.round(decimals=3)))

    # Train
    else:

        # Generated images: load from disk
        if cfg.data_gen.load_from_disk:
            print('Loading images from disk')

            # Transforms
            train_transform = val_transform = A.Compose([
                A.Resize(cfg.data_gen.image_size, cfg.data_gen.image_size),
                A.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
                ToTensorV2()
            ])

            # Loaders
            gan_train_dataloader, gan_val_dataloader = create_train_and_val_dataloaders(
                cfg,
                train_transform=train_transform,
                val_transform=val_transform)

        # Generated images: generate on the fly
        else:
            print('Loading images on-the-fly')

            # Create GAN dataset
            gan_train_dataset = create_gan_dataset(cfg.data_gen)

            # GAN training dataloader
            # NOTE: Only 1 process (num_workers=0) supported
            gan_train_dataloader = DataLoader(gan_train_dataset, batch_size=1)

            # Load or create GAN validation batches
            print('Creating new GAN validation set.')
            num_batches = max(
                1, cfg.data_gen.val_images // cfg.data_gen.kwargs.batch_size)
            gan_val_batches = utils.get_subset_of_dataset(
                dataset=gan_train_dataset, num_batches=num_batches)
            gan_val_dataset = TensorDataset(*gan_val_batches)

            # Save example images from GAN validation dataset
            fname = 'generated-val-examples.png'
            utils.save_overlayed_images(gan_val_batches,
                                        filename=fname,
                                        is_mask=True)
            print(f'Saved visualization images to {fname}')

            # Validation dataloader
            gan_val_dataloader = DataLoader(gan_val_dataset, **cfg.dataloader)

        # Summary of dataset/dataloader sizes
        print(f'Generated train {utils.get_dl_size(gan_train_dataloader)}')
        print(f'Generated val {utils.get_dl_size(gan_val_dataloader)}')
        for i, dl in enumerate(val_dataloaders):
            print(f'Seg val [{i}] {utils.get_dl_size(dl)}')

        # Validation dataloaders
        val_dataloaders = [gan_val_dataloader, *val_dataloaders]

        # Checkpointer
        callbacks = [
            pl.callbacks.ModelCheckpoint(monitor='train_loss',
                                         save_top_k=20,
                                         save_last=True,
                                         verbose=True),
            pl.callbacks.LearningRateMonitor('step')
        ]

        # Logging
        logger = pl.loggers.WandbLogger(name=cfg.name) if cfg.wandb else True

        # Trainer
        trainer = pl.Trainer(logger=logger, callbacks=callbacks, **cfg.trainer)

        # Lightning
        net = UNet().train()
        module = SementationModule(net, cfg)

        # Train
        trainer.fit(module,
                    train_dataloader=gan_train_dataloader,
                    val_dataloaders=val_dataloaders)

        # Test
        trainer.test(module, test_dataloaders=test_dataloaders)

        # Pretty print results
        table = utils.get_metrics_as_table(trainer.callback_metrics)
        print('\n' + str(table.round(decimals=3)))
Пример #4
0
VAL_IMG_DIR = "data/val/"
VAL_MASK_DIR = "data/val_masks/"
CHECKPOINT_PTH = "my_checkpoint.pth.tar"
SAVE_IMAGES = "saved_images/"

train_transform = A.Compose(
    [
        A.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH),
        A.Rotate(limit=35, p=1.0),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.1),
        A.Normalize(
            mean=[0.0, 0.0, 0.0],
            std=[1.0, 1.0, 1.0],
            max_pixel_value=255.0,
        ),
        ToTensorV2(),
    ],
)

val_transform = A.Compose(
    [
        A.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH),
        A.Normalize(
            mean=[0.0, 0.0, 0.0],
            std=[1.0, 1.0, 1.0],
            max_pixel_value=255.0,
        ),
        ToTensorV2(),
    ],
)
Пример #5
0
    val_labels = labels[val_indices]

    #augmentations are carefully chosen such that the amount of distortion would not
    #transform an otherwise "informative" patch into an "uninformative" patch (for example, by making it low contrast)
    imsize = 224
    normalize = Normalize()  #default is imagenet normalization
    tfs = Compose([
        Resize(imsize, imsize),
        RandomBrightnessContrast(brightness_limit=0.2,
                                 contrast_limit=0.2,
                                 p=0.5),
        GaussNoise(var_limit=(40, 100.0), p=0.5),
        GaussianBlur(blur_limit=5, p=0.5),
        HorizontalFlip(),
        VerticalFlip(), normalize,
        ToTensorV2()
    ])

    eval_tfs = Compose([Resize(imsize, imsize), normalize, ToTensorV2()])

    #make a basic dataset class for loading and augmenting images
    class SimpleDataset(Dataset):
        def __init__(self, imfiles, labels, tfs=None):
            super(SimpleDataset, self).__init__()
            self.imfiles = imfiles
            self.labels = labels
            self.tfs = tfs

        def __len__(self):
            return len(self.imfiles)
import albumentations as A
from albumentations.pytorch import ToTensorV2

train_transformation = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.ShiftScaleRotate(rotate_limit=(-15, -15), border_mode=1, p=0.5),
    A.OneOf([
        A.RandomBrightnessContrast(p=0.5,
                                   brightness_limit=(-0.1, 0.1),
                                   contrast_limit=(-0.1, 0.1),
                                   brightness_by_max=True),
        A.Equalize(p=0.5, mode='cv', by_channels=True),
    ],
            p=0.5),
    A.RandomCrop(224, 224, p=1.0),
    ToTensorV2(p=1.0)
])

test_transformation = A.Compose([A.Resize(224, 224, p=1.0), ToTensorV2(p=1.0)])
train_transforms = A.Compose([
    A.RandomResizedCrop(train_img_size,
                        train_img_size,
                        scale=(0.7, 1.0),
                        ratio=(0.9, 1.1)),
    A.OneOf([
        A.RandomRotate90(),
        A.Flip(),
        A.ShiftScaleRotate(shift_limit=0.0625,
                           scale_limit=0.0,
                           rotate_limit=45,
                           interpolation=1)
    ]),
    A.CoarseDropout(max_holes=4, max_height=64, max_width=64),
    A.Normalize(mean=mean, std=std, max_pixel_value=max_value),
    ToTensorV2()
])

val_transforms = A.Compose(
    [A.Normalize(mean=mean, std=std, max_pixel_value=max_value),
     ToTensorV2()])

train_loader, val_loader, train_eval_loader = get_train_val_loaders(
    train_ds,
    val_ds,
    train_transforms=train_transforms,
    val_transforms=val_transforms,
    batch_size=batch_size,
    num_workers=num_workers,
    val_batch_size=val_batch_size,
    pin_memory=True,
Пример #8
0
def train_model(args, device, parallel):
    # TODO more options of network
    model = StackMTLNet.StackHourglassNetMTL(args['task1_classes'],
                                             args['task2_classes'],
                                             args['backbone'])
    log_dir = os.path.join(args['trainer']['save_dir'], 'log')
    writer = SummaryWriter(log_dir=log_dir)
    try:
        writer.add_graph(
            model, torch.rand(1, 3, *eval(args['dataset']['input_size'])))
    except (RuntimeError, TypeError):
        print(
            'Warning: could not write graph to tensorboard, this might be a bug in tensorboardX'
        )
    if parallel:
        model.encoder = nn.DataParallel(
            model.encoder,
            device_ids=[a for a in range(len(args['gpu'].split(',')))])
        model.decoder = nn.DataParallel(
            model.decoder,
            device_ids=[a for a in range(len(args['gpu'].split(',')))])

    start_epoch = 0
    if args['resume_dir'] != 'None':
        print('Resume training from {}'.format(args['resume_dir']))
        ckpt = torch.load(args['resume_dir'])
        start_epoch = ckpt['epoch']
        network_utils.load(model, args['resume_dir'], disable_parallel=True)
    elif args['finetune_dir'] != 'None':
        print('Finetune model from {}'.format(args['finetune_dir']))
        network_utils.load(model, args['finetune_dir'], disable_parallel=True)

    model.to(device)

    # make optimizer
    train_params = [{
        'params': model.encoder.parameters(),
        'lr': args['optimizer']['e_lr']
    }, {
        'params': model.decoder.parameters(),
        'lr': args['optimizer']['d_lr']
    }]
    optm = optim.SGD(train_params,
                     lr=args['optimizer']['e_lr'],
                     momentum=0.9,
                     weight_decay=5e-4)
    scheduler = optim.lr_scheduler.MultiStepLR(
        optm,
        milestones=eval(args['optimizer']['lr_drop_epoch']),
        gamma=args['optimizer']['lr_step'])
    angle_weights = torch.ones(args['task2_classes']).to(device)
    road_weights = torch.tensor(
        [1 - args['task1_classes'], args['task1_classes']],
        dtype=torch.float).to(device)
    angle_loss = metric_utils.CrossEntropyLoss2d(
        weight=angle_weights).to(device)
    road_loss = metric_utils.mIoULoss(weight=road_weights).to(device)
    iou_loss = metric_utils.IoU().to(device)

    # prepare training
    print('Total params: {:.2f}M'.format(
        sum(p.numel() for p in model.parameters()) / 1000000.0))

    # make data loader
    mean = eval(args['dataset']['mean'])
    std = eval(args['dataset']['std'])
    tsfm_train = A.Compose([
        A.Flip(),
        A.RandomRotate90(),
        A.Normalize(mean=mean, std=std),
        ToTensorV2(),
    ])
    tsfm_valid = A.Compose([
        A.Normalize(mean=mean, std=std),
        ToTensorV2(),
    ])
    train_loader = DataLoader(loader.TransmissionDataLoader(
        args['dataset']['data_dir'],
        args['dataset']['train_file'],
        transforms=tsfm_train),
                              batch_size=args['dataset']['batch_size'],
                              shuffle=True,
                              num_workers=args['dataset']['workers'])
    valid_loader = DataLoader(loader.TransmissionDataLoader(
        args['dataset']['data_dir'],
        args['dataset']['valid_file'],
        transforms=tsfm_valid),
                              batch_size=args['dataset']['batch_size'],
                              shuffle=False,
                              num_workers=args['dataset']['workers'])
    print('Start training model')
    train_val_loaders = {'train': train_loader, 'valid': valid_loader}

    # train the model
    for epoch in range(start_epoch, args['trainer']['total_epochs']):
        for phase in ['train', 'valid']:
            start_time = timeit.default_timer()
            if phase == 'train':
                model.train()
                scheduler.step()
            else:
                model.eval()

            loss_dict = model.step(train_val_loaders[phase], device, optm,
                                   phase, road_loss, angle_loss, iou_loss,
                                   True, mean, std)
            misc_utils.write_and_print(writer, phase, epoch,
                                       args['trainer']['total_epochs'],
                                       loss_dict, start_time)

        # save the model
        if epoch % args['trainer']['save_epoch'] == (
                args['trainer']['save_epoch'] - 1):
            save_name = os.path.join(args['trainer']['save_dir'],
                                     'epoch-{}.pth.tar'.format(epoch))
            torch.save(
                {
                    'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'opt_dict': optm.state_dict(),
                    'loss': loss_dict,
                }, save_name)
            print('Saved model at {}'.format(save_name))
    writer.close()
Пример #9
0
def get_transform_imagenet(use_albu_aug):
    if use_albu_aug:
        train_transform = al.Compose([
            # al.Flip(p=0.5),
            al.Resize(256, 256, interpolation=2),
            al.RandomResizedCrop(224,
                                 224,
                                 scale=(0.08, 1.0),
                                 ratio=(3. / 4., 4. / 3.),
                                 interpolation=2),
            al.HorizontalFlip(),
            al.OneOf(
                [
                    al.OneOf(
                        [
                            al.ShiftScaleRotate(
                                border_mode=cv2.BORDER_CONSTANT,
                                rotate_limit=30),  # , p=0.05),
                            al.OpticalDistortion(
                                border_mode=cv2.BORDER_CONSTANT,
                                distort_limit=5.0,
                                shift_limit=0.1),
                            # , p=0.05),
                            al.GridDistortion(border_mode=cv2.BORDER_CONSTANT
                                              ),  # , p=0.05),
                            al.ElasticTransform(
                                border_mode=cv2.BORDER_CONSTANT,
                                alpha_affine=15),  # , p=0.05),
                        ],
                        p=0.1),
                    al.OneOf(
                        [
                            al.RandomGamma(),  # p=0.05),
                            al.HueSaturationValue(),  # p=0.05),
                            al.RGBShift(),  # p=0.05),
                            al.CLAHE(),  # p=0.05),
                            al.ChannelShuffle(),  # p=0.05),
                            al.InvertImg(),  # p=0.05),
                        ],
                        p=0.1),
                    al.OneOf(
                        [
                            al.RandomSnow(),  # p=0.05),
                            al.RandomRain(),  # p=0.05),
                            al.RandomFog(),  # p=0.05),
                            al.RandomSunFlare(num_flare_circles_lower=1,
                                              num_flare_circles_upper=2,
                                              src_radius=110),
                            # p=0.05, ),
                            al.RandomShadow(),  # p=0.05),
                        ],
                        p=0.1),
                    al.RandomBrightnessContrast(p=0.1),
                    al.OneOf(
                        [
                            al.GaussNoise(),  # p=0.05),
                            al.ISONoise(),  # p=0.05),
                            al.MultiplicativeNoise(),  # p=0.05),
                        ],
                        p=0.1),
                    al.OneOf(
                        [
                            al.ToGray(),  # p=0.05),
                            al.ToSepia(),  # p=0.05),
                            al.Solarize(),  # p=0.05),
                            al.Equalize(),  # p=0.05),
                            al.Posterize(),  # p=0.05),
                            al.FancyPCA(),  # p=0.05),
                        ],
                        p=0.1),
                    al.OneOf(
                        [
                            # al.MotionBlur(blur_limit=1),
                            al.Blur(blur_limit=[3, 5]),
                            al.MedianBlur(blur_limit=[3, 5]),
                            al.GaussianBlur(blur_limit=[3, 5]),
                        ],
                        p=0.1),
                    al.OneOf(
                        [
                            al.CoarseDropout(),  # p=0.05),
                            al.Cutout(),  # p=0.05),
                            al.GridDropout(),  # p=0.05),
                            al.ChannelDropout(),  # p=0.05),
                            al.RandomGridShuffle(),  # p=0.05),
                        ],
                        p=0.1),
                    al.OneOf(
                        [
                            al.Downscale(),  # p=0.1),
                            al.ImageCompression(quality_lower=60),  # , p=0.1),
                        ],
                        p=0.1),
                ],
                p=0.5),
            al.Normalize(),
            ToTensorV2()
        ])
    else:
        train_transform = transforms.Compose([
            transforms.Resize(256),
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
        ])
    test_transform = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
    ])

    if use_albu_aug:
        train_transform = MultiDataTransformAlbu(train_transform)
    else:
        train_transform = MultiDataTransform(train_transform)

    return train_transform, test_transform
Пример #10
0
                    A.RandomGamma(),  # apply random gamma
                ],
                p=0.3,
            ),
            A.OneOf(
                [
                    # apply one of transforms to 30% images
                    A.ElasticTransform(
                        alpha=120, sigma=120 * 0.05, alpha_affine=120 * 0.03),
                    A.GridDistortion(),
                    A.OpticalDistortion(distort_limit=2, shift_limit=0.5),
                ],
                p=0.3,
            ),
            A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            ToTensorV2()  # convert the image to PyTorch tensor
        ],
        p=1,
    )

    # Define the transformation pipeline for test
    tranformation_pipeline_test = A.Compose(
        [
            A.Resize(width=512, height=512),
            A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            ToTensorV2()  # convert the image to PyTorch tensor
        ],
        p=1,
    )

    IMG_ANNOTATIONS_PATH = Path("references/img_annotations.json")
Пример #11
0
def main():
    # Define the augmentation pipeline
    augmentation_pipeline_train = A.Compose(
        [
            A.Resize(width=512, height=512),
            A.HorizontalFlip(p=0.5),  # apply horizontal flip to 50% of images
            A.Rotate(limit=90,
                     p=0.5),  # apply random with limit of 90° to 50% of images
            A.OneOf(
                [
                    # apply one of transforms to 30% of images
                    A.RandomBrightnessContrast(
                    ),  # apply random contrast & brightness
                    A.RandomGamma(),  # apply random gamma
                ],
                p=0.3,
            ),
            A.OneOf(
                [
                    # apply one of transforms to 30% images
                    A.ElasticTransform(
                        alpha=120, sigma=120 * 0.05, alpha_affine=120 * 0.03),
                    A.GridDistortion(),
                    A.OpticalDistortion(distort_limit=2, shift_limit=0.5),
                ],
                p=0.3,
            ),
            A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            ToTensorV2()  # convert the image to PyTorch tensor
        ],
        p=1,
    )

    # Define the transformation pipeline for test
    tranformation_pipeline_test = A.Compose(
        [
            A.Resize(width=512, height=512),
            A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            ToTensorV2()  # convert the image to PyTorch tensor
        ],
        p=1,
    )

    IMG_ANNOTATIONS_PATH = Path("references/img_annotations.json")
    LABEL_MAPPING_PATH = Path("references/label_mapping.csv")
    FOLDER_IMGS = Path("assignment_imgs/")
    (
        img_annotations_train,
        img_annotations_test,
        img_annotations_valid,
    ) = split_train_test_valid_json(IMG_ANNOTATIONS_PATH,
                                    random_seed=42,
                                    split_size=[0.65, 0.25, 0.1])

    # Build dataset
    food_dataset_train = FoodVisorDataset(
        json_annotations=img_annotations_train,
        csv_mapping=LABEL_MAPPING_PATH,
        root_dir=FOLDER_IMGS,
        regex_aliment=r"[Tt]omate(s)?",
        augmentations=augmentation_pipeline_train,
    )
    food_dataset_test = FoodVisorDataset(
        json_annotations=img_annotations_test,
        csv_mapping=LABEL_MAPPING_PATH,
        root_dir=FOLDER_IMGS,
        regex_aliment=r"[Tt]omate(s)?",
        augmentations=tranformation_pipeline_test,
    )
    food_dataset_valid = FoodVisorDataset(
        json_annotations=img_annotations_valid,
        csv_mapping=LABEL_MAPPING_PATH,
        root_dir=FOLDER_IMGS,
        regex_aliment=r"[Tt]omate(s)?",
        augmentations=tranformation_pipeline_test,
    )

    params_loader = {
        "batch_size": 32,
        "validation_split": 0.2,
        "shuffle_dataset": True,
        "random_seed": 42
    }
Пример #12
0
def main():
    train_transform = A.Compose(
        [
            A.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH),
            # A.Rotate(limit=35, p=1.0),
            # A.HorizontalFlip(p=0.5),
            # A.VerticalFlip(p=0.1),
            A.Normalize(
                mean=[0.0, 0.0, 0.0],
                std=[1.0, 1.0, 1.0],
                max_pixel_value=255.0,
            ),
            ToTensorV2(),
        ],
    )

    val_transforms = A.Compose(
        [
            A.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH),
            A.Normalize(
                mean=[0.0, 0.0, 0.0],
                std=[1.0, 1.0, 1.0],
                max_pixel_value=255.0,
            ),
            ToTensorV2(),
        ],
    )

    model = UNET(in_channels=3, out_channels=1).to(DEVICE)
    loss_fn = nn.BCEWithLogitsLoss()
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

    train_loader, val_loader = get_loaders(
        TRAIN_IMG_DIR,
        TRAIN_MASK_DIR,
        VAL_IMG_DIR,
        VAL_MASK_DIR,
        BATCH_SIZE,
        train_transform,
        val_transforms,
        NUM_WORKERS,
        PIN_MEMORY,
    )

    if LOAD_MODEL:
        load_checkpoint(torch.load("my_checkpoint.pth.tar"), model)

    check_accuracy(val_loader, model, device=DEVICE)
    scaler = torch.cuda.amp.GradScaler()

    for epoch in range(NUM_EPOCHS):
        train_fn(train_loader, model, optimizer, loss_fn, scaler)

        # save model
        checkpoint = {
            "state_dict": model.state_dict(),
            "optimizer": optimizer.state_dict(),
        }
        save_checkpoint(checkpoint)

        # check accuracy
        check_accuracy(val_loader, model, device=DEVICE)

        # print some examples to a folder
        save_predictions_as_imgs(
            val_loader, model, folder="saved_images/", device=DEVICE
        )
Пример #13
0
def get_transforms(*, data):

    if data == 'train':
        return Compose(
            [
                #Resize(CFG.size, CFG.size),
                RandomResizedCrop(CFG.size, CFG.size, scale=(0.85, 1.0)),
                HorizontalFlip(p=0.5),
                RandomBrightnessContrast(p=0.2,
                                         brightness_limit=(-0.2, 0.2),
                                         contrast_limit=(-0.2, 0.2)),
                HueSaturationValue(p=0.2,
                                   hue_shift_limit=0.2,
                                   sat_shift_limit=0.2,
                                   val_shift_limit=0.2),
                ShiftScaleRotate(p=0.2,
                                 shift_limit=0.0625,
                                 scale_limit=0.2,
                                 rotate_limit=20),
                CoarseDropout(p=0.2),
                Cutout(p=0.2,
                       max_h_size=16,
                       max_w_size=16,
                       fill_value=(0., 0., 0.),
                       num_holes=16),
                Normalize(
                    mean=[0.485, 0.456, 0.406],
                    std=[0.229, 0.224, 0.225],
                ),
                ToTensorV2(),
            ],
            additional_targets={'image_annot': 'image'})

    elif data == 'check':
        return Compose(
            [
                #Resize(CFG.size, CFG.size),
                RandomResizedCrop(CFG.size, CFG.size, scale=(0.85, 1.0)),
                HorizontalFlip(p=0.5),
                RandomBrightnessContrast(p=0.2,
                                         brightness_limit=(-0.2, 0.2),
                                         contrast_limit=(-0.2, 0.2)),
                HueSaturationValue(p=0.2,
                                   hue_shift_limit=0.2,
                                   sat_shift_limit=0.2,
                                   val_shift_limit=0.2),
                ShiftScaleRotate(p=0.2,
                                 shift_limit=0.0625,
                                 scale_limit=0.2,
                                 rotate_limit=20),
                CoarseDropout(p=0.2),
                Cutout(p=0.2,
                       max_h_size=16,
                       max_w_size=16,
                       fill_value=(0., 0., 0.),
                       num_holes=16),
                #Normalize(
                #    mean=[0.485, 0.456, 0.406],
                #    std=[0.229, 0.224, 0.225],
                #),
                ToTensorV2(),
            ],
            additional_targets={'image_annot': 'image'})

    elif data == 'valid':
        return Compose([
            Resize(CFG.size, CFG.size),
            Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
            ToTensorV2(),
        ])
Пример #14
0
def main():
    check_path(SAVE_PATH)
    train_transform = A.Compose([
        A.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH),
        A.OneOf([
            A.Rotate(limit=35, p=0.5),
            A.HorizontalFlip(p=0.5),
            A.VerticalFlip(p=0.5),
            A.RandomRotate90(p=0.5),
            A.Transpose(p=0.5)
        ]),
        A.Normalize(mean=[0.625, 0.448, 0.688],
                    std=[0.131, 0.177, 0.101],
                    max_pixel_value=255.0),
        ToTensorV2(),
    ])

    model = get_model(data_channel=CHANNEL_NUM,
                      encoder=ENCODER,
                      encoder_weight=ENCODER_WEIGHT).to(device=DEVICE)
    print(model)
    loss_fn = nn.CrossEntropyLoss().to(device=DEVICE)
    optimizer = optim.Adam(params=model.parameters(), lr=LEARNING_RATE)

    ##plot
    plot_train_loss = []
    plot_val_loss = []
    plot_dice = []
    plot_miou = []
    learning_lr = []

    # Define Scheduler
    #scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,factor=0.1, patience=10,
    #                                                   verbose=True)
    scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer,
                                                  cycle_momentum=False,
                                                  base_lr=1.25e-4,
                                                  max_lr=0.001,
                                                  step_size_up=2000,
                                                  mode="triangular2",
                                                  verbose=False)
    best_iou = 0
    best_dice = 0

    train_loader, val_loader = get_loaders(train_dir=TRAIN_IMG_DIR,
                                           train_maskdir=TRAIN_MASK_DIR,
                                           batch_size=BATCH_SIZE,
                                           train_transform=train_transform,
                                           num_workers=NUM_WORKS,
                                           pin_memory=PIN_MEMORY)

    scaler = torch.cuda.amp.GradScaler()
    for epoch in range(NUM_EPOCHS):
        epoch_loss, current_lr = train_fn(train_loader,
                                          model,
                                          optimizer,
                                          scheduler,
                                          loss_fn,
                                          scaler,
                                          epoch,
                                          aux_loss='lovasz_softmax')
        plot_train_loss.append(epoch_loss)
        print(epoch_loss)
        learning_lr.append(current_lr)

        #save_checkpoint(check_point, filename=f"/data3/mry/results/best_checkpoint_{flod_idx}fold_{epoch}epoch.pth.tar")
        ##check valid metric
        m_dice, miou, val_loss = check_valid_metric(val_loader,
                                                    model,
                                                    device=DEVICE,
                                                    loss_fn=loss_fn,
                                                    aux_loss='lovasz_softmax',
                                                    channel_nums=CHANNEL_NUM)
        plot_val_loss.append(val_loss if val_loss < 100 else 100)
        plot_dice.append(m_dice)
        plot_miou.append(miou)

        if best_iou < miou:
            best_iou = miou
            ##save model
            check_point = {
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict()
            }
            save_checkpoint(check_point,
                            filename=f"{SAVE_PATH}{epoch}epoch.pth.tar")

        ##plot metric and save
        fig = plt.figure(figsize=(24, 12))
        x = [i for i in range(epoch + 1)]

        ax = fig.add_subplot(2, 3, 1)
        ax.plot(x, plot_train_loss, label='train loss')
        ax.set_xlabel('Epoch')
        ax.set_ylabel('train loss')
        ax.grid(True)
        ax = fig.add_subplot(2, 3, 2)
        ax.plot(x, plot_val_loss, label='val loss')
        ax.set_xlabel('Epoch')
        ax.set_ylabel('val loss')
        ax.grid(True)

        ax = fig.add_subplot(2, 3, 3)
        ax.plot(x, learning_lr, label='Learning Rate')
        ax.set_xlabel('Epoch')
        ax.set_ylabel('Learning Rate')
        ax.grid(True)

        ax = fig.add_subplot(2, 3, 4)
        ax.plot(x, plot_miou, label='mIOU')
        ax.set_xlabel('Epoch')
        ax.set_ylabel('mIOU')
        ax.grid(True)

        ax = fig.add_subplot(2, 3, 5)
        ax.plot(x, plot_dice, label='mDICE')
        ax.set_xlabel('Epoch')
        ax.set_ylabel('mDICE')
        ax.grid(True)

        fig.savefig(PLOT_PATH)
        plt.show()
Пример #15
0
def get_dataloader_single_folder(data_dir,
                                 mean=(0.485, 0.456, 0.406),
                                 std=(0.229, 0.224, 0.225),
                                 imageFolder='photos',
                                 maskFolder='matrixes',
                                 fraction=0.2,
                                 batch_size=BATCH_SIZE):
    '''
    Make iterable PyTorch DataLoader using instances of SegDataset class
    :param data_dir: A base folder with whole dataset
    :param mean: Parameter used in Normalization transform, set to imagenet mean by default
    :param std: Parameter used in Normalization transform, set to imagenet std by default
    :param imageFolder: Photos subfolder
    :param maskFolder: Masks subfolder
    :param fraction: Train split fraction (the rest is used on validation and test stages)
    :param batch_size: Number of photo-mask pairs in one batch
    '''
    data_transforms = {
        'Train':
        albu.Compose([
            resize_transforms(),
            pixelwise_transforms(),
            albu.Normalize(mean, std),
            ToTensorV2()
        ]),
        'Valid':
        albu.Compose(
            [resize_transforms(),
             albu.Normalize(mean, std),
             ToTensorV2()]),
        'Test':
        albu.Compose(
            [resize_transforms(),
             albu.Normalize(mean, std),
             ToTensorV2()])
    }
    image_datasets = {
        x: SegDataset(data_dir,
                      imageFolder=imageFolder,
                      maskFolder=maskFolder,
                      seed=100,
                      fraction=fraction,
                      subset=x,
                      transform=data_transforms[x])
        for x in ['Train', 'Valid']
    }
    dataloaders = {
        x: DataLoader(image_datasets[x],
                      batch_size=batch_size,
                      shuffle=True,
                      num_workers=4)
        for x in ['Train', 'Valid']
    }
    dataloaders['Test'] = DataLoader(SegDataset(
        data_dir,
        imageFolder=imageFolder,
        maskFolder=maskFolder,
        seed=100,
        fraction=fraction,
        subset='Test',
        transform=data_transforms['Test']),
                                     batch_size=1,
                                     shuffle=True,
                                     num_workers=4)
    return dataloaders
Пример #16
0
def main():
    # TODO: Might be worth trying the normalization from assignment 2
    train_transform = A.Compose([
        A.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH),
        A.Rotate(limit=35, p=1.0),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.1),
        A.Normalize(
            mean=[0.0, 0.0, 0.0],
            std=[1.0, 1.0, 1.0],
            max_pixel_value=255.0,
        ),
        ToTensorV2(),
    ], )

    val_transforms = A.Compose([
        A.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH),
        A.Normalize(
            mean=[0.0, 0.0, 0.0],
            std=[1.0, 1.0, 1.0],
            max_pixel_value=255.0,
        ),
        ToTensorV2(),
    ], )

    model = UNET(in_channels=3, out_channels=1).to(DEVICE)
    """
    We're using with logitsLoss because we're not using sigmoid on the,
    final output layer.
    If we wanted to have several output channels, we'd change the loss_fn
    to a cross entropy loss instead.
    """
    loss_fn = nn.BCEWithLogitsLoss()
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

    train_loader, val_loader = get_loaders(
        TRAIN_IMG_DIR,
        TRAIN_MASK_DIR,
        VAL_IMG_DIR,
        VAL_MASK_DIR,
        BATCH_SIZE,
        train_transform,
        val_transforms,
        NUM_WORKERS,
        PIN_MEMORY,
    )

    if LOAD_MODEL:
        load_checkpoint(torch.load("my_checkpoint.pth.tar"), model)

    scaler = torch.cuda.amp.GradScaler(
    )  # Scales the gradients to avoid underflow. Requires a GPU

    for epoch in range(NUM_EPOCHS):
        train_fn(train_loader, model, optimizer, loss_fn, scaler)

        # save model
        checkpoint = {
            "state_dict": model.state_dict(),
            "optimizer": optimizer.state_dict(),
        }
        save_checkpoint(checkpoint)

        # check accuracy
        check_accuracy(val_loader, model, device=DEVICE)

        # print some examples to a folder
        save_predictions_as_imgs(val_loader,
                                 model,
                                 folder="saved_images/",
                                 device=DEVICE)
Пример #17
0
        return img


# Albumentations Transformations
transform_train_albu = Compose([
    RandomCrop(height=32, width=32),  #, always_apply=True
    HorizontalFlip(p=0.2),
    VerticalFlip(p=0.0),
    GaussianBlur(p=0.0),
    Rotate(limit=20),
    #ToTensor(),
    Normalize(mean=(0.4914, 0.4822, 0.4465),
              std=(0.2023, 0.1994, 0.2010),
              always_apply=True),
    Cutout(num_holes=1,
           max_h_size=8,
           max_w_size=8,
           fill_value=[0.4914, 0.4822, 0.4465],
           p=0.3),
    ToTensorV2(always_apply=True)
])

transform_test_albu = Compose([
    #ToTensor(),
    Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.2023, 0.1994, 0.2010)),
    ToTensorV2(always_apply=True)
])

transform_test_albu = AlbuCompose(transform_test_albu)
transform_train_albu = AlbuCompose(transform_train_albu)
Пример #18
0
def get_train_test_valid_dataloaders(data_path, test_data_path, seed, image_size, batch_size):
    """
    Utility function for the model.
    """
    def build_data(data_path):
        content_list = []
        labels_list = []

        for image in tqdm(os.listdir(data_path)):
            if ".jpg" in image:
                content = cv2.imread(data_path + image)
                content_list.append(content)
            elif ".txt" in image:
                with open(data_path + image, "r") as f:
                    labels = f.read()
                labels = np.array(labels.split(" "), dtype=int)
                labels[0] = 0 if labels[0] == 1 else 1
                labels = np.roll(labels, -1)
                labels_list.append(labels)
        data = np.array([list(a) for a in zip(content_list, labels_list)])

        return data

    train_data = build_data(data_path=data_path)
    test_data = build_data(data_path=test_data_path)

    train_data, valid_data = train_test_split(train_data, shuffle=True, test_size=0.1, random_state=seed)

    train_clf_labels = [a[-1] for a in train_data[:, 1]]

    transform = Compose(
        [
            Resize(width=image_size, height=image_size),
            HorizontalFlip(p=0.4),
            # ShiftScaleRotate(p=0.3),
            MedianBlur(blur_limit=7, always_apply=False, p=0.3),
            IAAAdditiveGaussianNoise(scale=(0, 0.15 * 255), p=0.5),
            HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.4),
            RandomBrightnessContrast(brightness_limit=(-0.1, 0.1), contrast_limit=(-0.1, 0.1), p=0.5),
            # in this implementation imagenet normalization is used
            Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
            Cutout(p=0.4),
            ToTensorV2(p=1.0),
        ],
        p=1.0,
        bbox_params=A.BboxParams(format="pascal_voc"),
    )

    test_transform = Compose(
        [
            # only resize and normalization is used for testing
            # no TTA is implemented in this solution
            Resize(width=image_size, height=image_size),
            Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
            ToTensorV2(p=1.0),
        ],
        p=1.0,
        bbox_params=A.BboxParams(format="pascal_voc"),
    )

    train_dataset = Dataset(train_data, transforms=transform)
    valid_dataset = Dataset(valid_data, transforms=transform)
    test_dataset = Dataset(test_data, transforms=test_transform)

    train_dataloader = DataLoader(
        train_dataset,
        # balanced sampler is used to minimize harmful effects of dataset not being fully balanced
        sampler=BalanceClassSampler(labels=train_clf_labels, mode="upsampling"),
        batch_size=batch_size,
    )
    test_dataloader = DataLoader(test_dataset, sampler=SequentialSampler(test_dataset), batch_size=1)
    valid_dataloader = DataLoader(valid_dataset, sampler=SequentialSampler(valid_dataset), batch_size=batch_size)

    return train_dataloader, test_dataloader, valid_dataloader
Пример #19
0
def get_inference_transforms(input_shape, way="pad", crop_rate=1.0):
    if way == "pad":
        return Compose(
            [
                PadIfNeeded(input_shape[0], input_shape[1]),
                Resize(input_shape[0], input_shape[1]),
                HorizontalFlip(p=0.5),
                ToGray(p=0.5),
                VerticalFlip(p=0.5),
                ShiftScaleRotate(scale_limit=0.0, p=0.5),
                HueSaturationValue(hue_shift_limit=0.2,
                                   sat_shift_limit=0.2,
                                   val_shift_limit=0.2,
                                   p=0.5),
                RandomBrightnessContrast(brightness_limit=(-0.1, 0.1),
                                         contrast_limit=(-0.1, 0.1),
                                         p=0.5),
                Normalize(mean=[0.485, 0.456, 0.406],
                          std=[0.229, 0.224, 0.225],
                          max_pixel_value=255.0,
                          p=1.0),
                #CoarseDropout(p=0.5),
                Cutout(p=0.5),
                ToTensorV2(p=1.0),
            ],
            p=1.)
    elif way == "resize":
        return Compose(
            [
                RandomResizedCrop(input_shape[0], input_shape[1]),
                HorizontalFlip(p=0.5),
                ToGray(p=0.5),
                VerticalFlip(p=0.5),
                ShiftScaleRotate(scale_limit=0.0, p=0.5),
                HueSaturationValue(hue_shift_limit=0.2,
                                   sat_shift_limit=0.2,
                                   val_shift_limit=0.2,
                                   p=0.5),
                RandomBrightnessContrast(brightness_limit=(-0.1, 0.1),
                                         contrast_limit=(-0.1, 0.1),
                                         p=0.5),
                Normalize(mean=[0.485, 0.456, 0.406],
                          std=[0.229, 0.224, 0.225],
                          max_pixel_value=255.0,
                          p=1.0),
                #CoarseDropout(p=0.5),
                Cutout(p=0.5),
                ToTensorV2(p=1.0),
            ],
            p=1.)
    elif way == "center":
        return Compose(
            [
                Resize(input_shape[0], input_shape[1]),
                HorizontalFlip(p=0.5),
                ToGray(p=0.5),
                VerticalFlip(p=0.5),
                ShiftScaleRotate(scale_limit=0.0, p=0.5),
                HueSaturationValue(hue_shift_limit=0.2,
                                   sat_shift_limit=0.2,
                                   val_shift_limit=0.2,
                                   p=0.5),
                RandomBrightnessContrast(brightness_limit=(-0.1, 0.1),
                                         contrast_limit=(-0.1, 0.1),
                                         p=0.5),
                Normalize(mean=[0.485, 0.456, 0.406],
                          std=[0.229, 0.224, 0.225],
                          max_pixel_value=255.0,
                          p=1.0),
                #CoarseDropout(p=0.5),
                Cutout(p=0.5),
                ToTensorV2(p=1.0),
            ],
            p=1.)
    elif way == "crop":
        return Compose(
            [
                Resize(input_shape[0], input_shape[1]),
                CenterCrop(int(input_shape[0] * crop_rate),
                           int(input_shape[1] * crop_rate)),
                HorizontalFlip(p=0.5),
                ToGray(p=0.5),
                VerticalFlip(p=0.5),
                ShiftScaleRotate(scale_limit=0.0, p=0.5),
                HueSaturationValue(hue_shift_limit=0.2,
                                   sat_shift_limit=0.2,
                                   val_shift_limit=0.2,
                                   p=0.5),
                RandomBrightnessContrast(brightness_limit=(-0.1, 0.1),
                                         contrast_limit=(-0.1, 0.1),
                                         p=0.5),
                Normalize(mean=[0.485, 0.456, 0.406],
                          std=[0.229, 0.224, 0.225],
                          max_pixel_value=255.0,
                          p=1.0),
                #CoarseDropout(p=0.5),
                Cutout(p=0.5),
                ToTensorV2(p=1.0),
            ],
            p=1.)
    num_epochs = 20
    learning_rate = 0.0001
    weight_decay = 1e-6
    val_every = 1
    # 모델
    model_path = './saved/fpn_b16_e20.pt'

    model = get_smp_model('FPN', 'efficientnet-b0')

    category_names = [
        'Backgroud', 'UNKNOWN', 'General trash', 'Paper', 'Paper pack',
        'Metal', 'Glass', 'Plastic', 'Styrofoam', 'Plastic bag', 'Battery',
        'Clothing'
    ]
    # 데이터셋
    test_transform = A.Compose([ToTensorV2()])
    test_dataset = COCODataLoader(data_dir=test_path,
                                  dataset_path=dataset_path,
                                  mode='test',
                                  category_names=category_names,
                                  transform=test_transform)
    test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                              batch_size=batch_size,
                                              num_workers=4,
                                              collate_fn=collate_fn)
    train_transform = A.Compose([ToTensorV2()])

    train_dataset = COCODataLoader(data_dir=train_path,
                                   dataset_path=dataset_path,
                                   mode='train',
                                   category_names=category_names,
Пример #21
0
def run_train():
    df = pd.read_csv(args.train_csv)

    labelencoder = LabelEncoder()
    df['label_group'] = labelencoder.fit_transform(df['label_group'])

    # Augmentation
    train_transform = A.Compose([
        A.Resize(args.image_size, args.image_size),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.Rotate(limit=120, p=0.8),
        #A.Cutout(p=0.5),
        #A.OneOf([
        #    A.HueSaturationValue(),
        #    A.ShiftScaleRotate()
        #], p=1),
        A.RandomBrightness(limit=(0.09, 0.6), p=0.5),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(p=1.0),
    ])

    test_transform = A.Compose([
        A.Resize(args.image_size, args.image_size),
        #A.CenterCrop(args.image_size, args.image_size, p=1.),
        A.Normalize(),
        ToTensorV2(p=1.0),
    ])

    # Dataset, Dataloader
    train_dataset = ShopeeDataset(df,
                                  data_dir=args.train_dir,
                                  transforms=train_transform)
    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              num_workers=args.num_workers,
                              pin_memory=True,
                              shuffle=True,
                              drop_last=True)

    model = ShopeeModel(
        model_name=args.model_name,
        n_classes=args.n_classes,
        fc_dim=args.feat_dim,
        scale=args.s,
        margin=args.m,
        #crit=args.crit,
        use_fc=args.use_fc,
        pretrained=args.pretrained)
    model.cuda()

    existing_layer = torch.nn.SiLU
    new_layer = Mish()
    # in eca_nfnet_l0 SiLU() is used, but it will be replace by Mish()
    model = replace_activations(model, existing_layer, new_layer)
    if args.resume is not None:
        model.load_state_dict(
            torch.load(os.path.join(args.model_dir, args.resume)))

    optimizer = Ranger(model.parameters(), lr=scheduler_params['lr_start'])
    scheduler = ShopeeScheduler(optimizer, **scheduler_params)

    for i in range(args.epochs):
        avg_loss_train = train(model, train_loader, optimizer, scheduler, i)
        torch.save(
            model.state_dict(),
            os.path.join(args.model_dir,
                         f'arcface_512x512_{args.model_name}_epoch{i+1}.pt'))
Пример #22
0
def get_augmentations(name, img_size):

    if name == 'training_none':
        aug = A.Compose([
            A.Resize(img_size, img_size),
            A.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
            ToTensorV2()
        ])
    elif name == 'training_dropout':
        aug = A.Compose([
            A.Resize(img_size, img_size),
            A.CoarseDropout(min_height=int(img_size * 0.05),
                            min_width=int(img_size * 0.05),
                            max_height=int(img_size * 0.1),
                            max_width=int(img_size * 0.1),
                            min_holes=1,
                            max_holes=20,
                            p=0),
            A.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
            ToTensorV2()
        ])
    elif name == 'training_1':
        aug = A.Compose([
            A.RandomResizedCrop(img_size, img_size, scale=(0.9, 1), p=1),
            A.ShiftScaleRotate(p=0.5),
            A.HorizontalFlip(p=0.5),
            A.RandomBrightnessContrast(brightness_limit=0.2,
                                       contrast_limit=0.2,
                                       p=0.7),
            A.HueSaturationValue(hue_shift_limit=10,
                                 val_shift_limit=10,
                                 sat_shift_limit=10,
                                 p=0.7),
            A.CLAHE(clip_limit=(1, 4), p=0.5),
            A.OneOf([
                A.GaussNoise(var_limit=[10, 50]),
                A.GaussianBlur(),
                A.MotionBlur(),
                A.MedianBlur(),
            ],
                    p=0.3),
            A.OneOf([
                A.OpticalDistortion(distort_limit=1.0),
                A.GridDistortion(num_steps=5, distort_limit=1.),
                A.ElasticTransform(alpha=3),
            ],
                    p=0.3),
            A.OneOf([
                A.ImageCompression(),
                A.Downscale(scale_min=0.1, scale_max=0.15),
            ],
                    p=0.2),
            A.IAAPiecewiseAffine(p=0.2),
            A.IAASharpen(p=0.2),
            A.CoarseDropout(max_height=int(img_size * 0.1),
                            max_width=int(img_size * 0.1),
                            min_holes=5,
                            max_holes=10,
                            p=0.5),
            A.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
            ToTensorV2()
        ])
    elif name == 'training_2':
        aug = A.Compose([
            A.RandomResizedCrop(img_size, img_size, scale=(0.9, 1), p=1),
            A.ShiftScaleRotate(p=0.5),
            A.HorizontalFlip(p=0.5),
            A.RandomBrightnessContrast(brightness_limit=0.2,
                                       contrast_limit=0.2,
                                       p=0.7),
            A.HueSaturationValue(hue_shift_limit=10,
                                 val_shift_limit=10,
                                 sat_shift_limit=10,
                                 p=0.7),
            A.CLAHE(clip_limit=(1, 4), p=0.5),
            A.OneOf([
                A.GaussNoise(var_limit=[10, 50]),
                A.GaussianBlur(),
                A.MotionBlur(),
                A.MedianBlur(),
            ],
                    p=0.3),
            A.OneOf([
                A.OpticalDistortion(distort_limit=1.0),
                A.GridDistortion(num_steps=5, distort_limit=1.),
                A.ElasticTransform(alpha=3),
            ],
                    p=0.3),
            A.OneOf([
                A.ImageCompression(),
                A.Downscale(scale_min=0.1, scale_max=0.15),
            ],
                    p=0.2),
            A.IAAPiecewiseAffine(p=0.2),
            A.IAASharpen(p=0.2),
            A.CoarseDropout(max_height=int(img_size * 0.1),
                            max_width=int(img_size * 0.1),
                            min_holes=5,
                            max_holes=10,
                            p=0.5),
            A.Normalize(),
            ToTensorV2()
        ])
    elif name == 'training_2_bis':
        aug = A.Compose([
            A.RandomResizedCrop(img_size, img_size, scale=(0.9, 1), p=1),
            A.ShiftScaleRotate(rotate_limit=30, p=0.5),
            A.HorizontalFlip(p=0.5),
            A.RandomBrightnessContrast(brightness_limit=0.2,
                                       contrast_limit=0.2,
                                       p=0.7),
            A.HueSaturationValue(hue_shift_limit=10,
                                 val_shift_limit=10,
                                 sat_shift_limit=10,
                                 p=0.7),
            A.CLAHE(clip_limit=(1, 4), p=0.5),
            A.OneOf([
                A.GaussNoise(var_limit=[10, 50]),
                A.GaussianBlur(),
                A.MotionBlur(),
                A.MedianBlur()
            ],
                    p=0.3),
            #A.OneOf([A.OpticalDistortion(distort_limit=1.0), A.GridDistortion(num_steps=5, distort_limit=1.),
            #         A.ElasticTransform(alpha=3)], p=0.3),
            A.OneOf([
                A.ImageCompression(),
                A.Downscale(scale_min=0.1, scale_max=0.15)
            ],
                    p=0.2),
            #A.IAAPiecewiseAffine(p=0.2),
            A.IAASharpen(p=0.2),
            A.CoarseDropout(max_height=int(img_size * 0.1),
                            max_width=int(img_size * 0.1),
                            min_holes=5,
                            max_holes=10,
                            p=0.5),
            A.Normalize(),
            ToTensorV2()
        ])
    elif name == 'training_3':
        aug = A.Compose([
            A.Rotate(limit=5),
            A.RandomResizedCrop(img_size, img_size, scale=(0.9, 1), p=1),
            A.HorizontalFlip(p=0.5),
            A.RandomBrightnessContrast(brightness_limit=0.15,
                                       contrast_limit=0.15,
                                       p=0.5),
            A.CoarseDropout(min_height=int(img_size * 0.05),
                            min_width=int(img_size * 0.05),
                            max_height=int(img_size * 0.1),
                            max_width=int(img_size * 0.1),
                            min_holes=1,
                            max_holes=10,
                            p=0.5),
            A.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
            ToTensorV2()
        ])
    elif name == 'training_4':
        aug = A.Compose([
            A.Rotate(limit=5, p=1),
            A.RandomResizedCrop(img_size, img_size, scale=(0.9, 1), p=1),
            A.HorizontalFlip(p=0.5),
            A.RandomBrightnessContrast(brightness_limit=(-0.15, +0.25),
                                       contrast_limit=(-0.15, +0.25),
                                       p=1),
            A.CLAHE(clip_limit=(1, 4), p=0.5),
            A.OneOf([
                A.GaussNoise(var_limit=(10, 50)),
                A.GaussianBlur(),
                A.MotionBlur(),
                A.MedianBlur(),
            ],
                    p=1),
            A.IAASharpen(p=0.3),
            A.CoarseDropout(min_height=int(img_size * 0.05),
                            min_width=int(img_size * 0.05),
                            max_height=int(img_size * 0.1),
                            max_width=int(img_size * 0.1),
                            min_holes=1,
                            max_holes=20,
                            p=0),
            A.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
            ToTensorV2()
        ])
    elif name == 'validation':
        aug = A.Compose(
            [A.Resize(img_size, img_size),
             A.Normalize(),
             ToTensorV2()])
    elif name == 'none':
        aug = A.Compose([A.Resize(img_size, img_size)])
    else:
        raise ValueError(f"{name} is not a valid augmentations name")

    return aug
Пример #23
0
def post_transforms():
    # we use ImageNet image normalization
    # and convert it to torch.Tensor
    return [albu.Normalize(), ToTensorV2()]
Пример #24
0
def main():
    
    # 주요 path 정의
    data_path = './data'
    train_dir = Path(data_path, 'images/train_imgs')
    
    # config 파일을 가져옵니다.
    args = parse_args()
    update_config(cfg, args)

    lr = cfg.TRAIN.LR
    lamb = cfg.LAMB
    test_option = eval(cfg.test_option)
    
    input_w = cfg.MODEL.IMAGE_SIZE[1]
    input_h = cfg.MODEL.IMAGE_SIZE[0]
    
    # 랜덤 요소를 최대한 줄여줌
    RANDOM_SEED = int(cfg.RANDOMSEED)
    np.random.seed(RANDOM_SEED) # cpu vars
    torch.manual_seed(RANDOM_SEED) # cpu  vars
    random.seed(RANDOM_SEED) # Python
    os.environ['PYTHONHASHSEED'] = str(RANDOM_SEED) # Python hash buildin
    torch.backends.cudnn.deterministic = True  #needed
    torch.backends.cudnn.benchmark = False
    torch.cuda.manual_seed(RANDOM_SEED)
    torch.cuda.manual_seed_all(RANDOM_SEED) # if use multi-GPU

    
    # log 데이터와 최종 저장위치를 만듭니다.
    logger, final_output_dir, tb_log_dir = create_logger(cfg, args.cfg, f'lr_{str(lr)}', 'train')

    logger.info(pprint.pformat(args))
    logger.info(cfg)
    
    # cudnn related setting
    cudnn.benchmark = cfg.CUDNN.BENCHMARK

    # annotation 파일을 만듭니다.
    if os.path.isfile(data_path+'/annotations/train_annotation.pkl') == False :
        make_annotations(data_path)
    
    # 쓰려는 모델을 불러옵니다.
    model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')(
        cfg, is_train=True
    )
    
    # model의 끝부분 수정 및 초기화 작업을 진행합니다.
    model = initialize_model(model, cfg)
    
    
    # model 파일과 train.py 파일을 copy합니다.
    this_dir = os.path.dirname(__file__)
    shutil.copy2(
        os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'),
        final_output_dir)
    
    shutil.copy2(
        os.path.join(this_dir, '../tools', 'train.py'),
        final_output_dir)

    writer_dict = {
        'writer': SummaryWriter(log_dir=tb_log_dir),
        'train_global_steps': 0,
        'valid_global_steps': 0,
    }
    
    
    # model을 그래픽카드가 있을 경우 cuda device로 전환합니다.
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    
    # loss를 정의합니다.
    criterion = nn.MSELoss().cuda()

    # Data Augumentation을 정의합니다.
    A_transforms = {
        
        'val':
            A.Compose([
                A.Resize(input_h, input_w, always_apply=True),
                A.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
                ToTensorV2()
            ], bbox_params=A.BboxParams(format="coco", min_visibility=0.05, label_fields=['class_labels'])),
        
        'test':
            A.Compose([
                A.Resize(input_h, input_w, always_apply=True),
                A.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
                ToTensorV2()
            ])
        }
        
    if input_h == input_w :
        
        A_transforms['train'] = A.Compose([
                A.Resize(input_h, input_w, always_apply=True),
                A.OneOf([A.HorizontalFlip(p=1),
                         A.VerticalFlip(p=1),
                         A.Rotate(p=1),
                         A.RandomRotate90(p=1)
                ], p=0.5),
                A.OneOf([A.MotionBlur(p=1),
                         A.GaussNoise(p=1),
                         A.ColorJitter(p=1)
                ], p=0.5),

                A.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
                ToTensorV2()
            ], bbox_params=A.BboxParams(format="coco", min_visibility=0.05, label_fields=['class_labels']))
        
    else :
        A_transforms['train'] = A.Compose([
                A.Resize(input_h, input_w, always_apply=True),
                A.OneOf([A.HorizontalFlip(p=1),
                         A.VerticalFlip(p=1),
                         A.Rotate(p=1),
                ], p=0.5),
                A.OneOf([A.MotionBlur(p=1),
                         A.GaussNoise(p=1)
                         
                ], p=0.5),
                A.OneOf([A.CropAndPad(percent=0.1, p=1),
                         A.CropAndPad(percent=0.2, p=1),
                         A.CropAndPad(percent=0.3, p=1)
                ], p=0.5),

                A.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
                ToTensorV2()
            ], bbox_params=A.BboxParams(format="coco", min_visibility=0.05, label_fields=['class_labels']))
    

    # parameter를 설정합니다.
    batch_size = int(cfg.TRAIN.BATCH_SIZE_PER_GPU)
    test_ratio = float(cfg.TEST_RATIO)
    num_epochs = cfg.TRAIN.END_EPOCH
    
    # earlystopping에 주는 숫자 변수입니다.
    num_earlystop = num_epochs
    
    # torch에서 사용할 dataset을 생성합니다.
    imgs, bbox, class_labels = make_train_data(data_path)

    since = time.time()
    
    """
    # test_option : train, valid로 데이터를 나눌 때 test data를 고려할지 결정합니다.
        * True일 경우 test file을 10% 뺍니다.
        * False일 경우 test file 빼지 않습니다.
    """
    if test_option == True :
        X_train, X_test, y_train, y_test = train_test_split(imgs, bbox, test_size=0.1, random_state=RANDOM_SEED)
        test_dataset = [X_test, y_test]
        with open(final_output_dir+'/test_dataset.pkl', 'wb') as f:
            pickle.dump(test_dataset, f)
        X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=test_ratio, random_state=RANDOM_SEED)
        test_data = Dataset(train_dir, X_test, y_test, data_transforms=A_transforms, class_labels=class_labels, phase='val')
        test_loader = data_utils.DataLoader(test_data, batch_size=batch_size, shuffle=False)
    
    else :
        X_train, X_val, y_train, y_val = train_test_split(imgs, bbox, test_size=test_ratio, random_state=RANDOM_SEED)
        
    train_data = Dataset(train_dir, X_train, y_train, data_transforms=A_transforms, class_labels=class_labels, phase='train')
    
    val_data = Dataset(train_dir, X_val, y_val, data_transforms=A_transforms, class_labels=class_labels, phase='val')
    train_loader = data_utils.DataLoader(train_data, batch_size=batch_size, shuffle=True)
    val_loader = data_utils.DataLoader(val_data, batch_size=batch_size, shuffle=False)
    
    
    # best loss를 판별하기 위한 변수 초기화
    best_perf = 10000000000
    test_loss = None
    best_model = False
    
    # optimizer 정의
    optimizer = optim.Adam(
        model.parameters(),
        lr=lr
    )
    
    # 중간에 학습된 모델이 있다면 해당 epoch에서부터 진행할 수 있도록 만듭니다.
    begin_epoch = cfg.TRAIN.BEGIN_EPOCH
    checkpoint_file = os.path.join(
        final_output_dir, 'checkpoint.pth'
    )
    
    if cfg.AUTO_RESUME and os.path.exists(checkpoint_file):
        logger.info("=> loading checkpoint '{}'".format(checkpoint_file))
        checkpoint = torch.load(checkpoint_file)
        begin_epoch = checkpoint['epoch']
        best_perf = checkpoint['perf']
        num_epochs = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])

        optimizer.load_state_dict(checkpoint['optimizer'])
        logger.info("=> loaded checkpoint '{}' (epoch {})".format(
            checkpoint_file, checkpoint['epoch']))
    
    # lr_scheduler 정의
    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR,
        last_epoch=-1
    )
    
    # early stopping하는데 사용하는 count 변수
    count = 0
    val_losses = []
    train_losses = []
    
    # 학습 시작
    for epoch in range(begin_epoch, num_epochs):
        epoch_since = time.time()
        
        lr_scheduler.step()
        
        # train for one epoch
        train_loss = train(cfg, device, train_loader, model, criterion, optimizer, epoch,
              final_output_dir, tb_log_dir, writer_dict, lamb=lamb)

        
        # evaluate on validation set
        perf_indicator = validate(
            cfg, device, val_loader, val_data, model, criterion,
            final_output_dir, tb_log_dir, writer_dict, lamb=lamb
        )
        
        # 해당 epoch이 best_model인지 판별합니다. valid 값을 기준으로 결정됩니다.
        if perf_indicator <= best_perf:
            best_perf = perf_indicator
            best_model = True
            count = 0
            
        else:
            best_model = False
            count +=1
            
        
        
        logger.info('=> saving checkpoint to {}'.format(final_output_dir))
        save_checkpoint({
            'epoch': epoch + 1,
            'model': cfg.MODEL.NAME,
            'state_dict': model.state_dict(),
            'best_state_dict': model.state_dict(),
            'perf': perf_indicator,
            'optimizer': optimizer.state_dict(),
        }, best_model, final_output_dir)
        
        # loss를 저장합니다.
        val_losses.append(perf_indicator)
        train_losses.append(train_loss)
        if count == num_earlystop :
            break
        
        
        epoch_time_elapsed = time.time() - epoch_since
        print(f'epoch : {epoch}' \
                f' train loss : {round(train_loss,3)}' \
                              f' valid loss : {round(perf_indicator,3)}' \
                              f' Elapsed time: {int(epoch_time_elapsed // 60)}m {int(epoch_time_elapsed % 60)}s')
        
    # log 파일 등을 저장합니다.
    final_model_state_file = os.path.join(
        final_output_dir, 'final_state.pth'
    )
    logger.info('=> saving final model state to {}'.format(
        final_model_state_file)
    )
    torch.save(model.state_dict(), final_model_state_file)
    writer_dict['writer'].close()

    time_elapsed = time.time() - since
    print('Training and Validation complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best validation loss: {:4f}\n'.format(best_perf))
    
    # test_option이 True일 경우, 떼어난 10% 데이터에 대해 만들어진 모델로 eval을 진행합니다.
    if test_option == True :
        # test data
        model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')(
            cfg, is_train=True)
        
        model = initialize_model(model, cfg)
        parameters = f'{final_output_dir}/model_best.pth'
        
        model = model.to(device)
        model.load_state_dict(torch.load(parameters))
        
        test_loss = validate(
                cfg, device, test_loader, test_data, model, criterion,
                final_output_dir, tb_log_dir, writer_dict, lamb=lamb
            )
    
    print(f'test loss : {test_loss}')
    
    # loss 결과를 pickle 파일로 따로 저장합니다.
    result_dict = {}
    result_dict['val_loss'] = val_losses
    result_dict['train_loss'] = train_losses
    result_dict['best_loss'] = best_perf
    result_dict['test_loss'] = test_loss
    result_dict['lr'] = lr
    with open(final_output_dir+'/result.pkl', 'wb') as f:
        pickle.dump(result_dict, f)
Пример #25
0
def main_worker(gpu, ngpus_per_node, args):
    args.gpu = gpu

    # suppress printing if not master
    if args.multiprocessing_distributed and args.gpu != 0:

        def print_pass(*args):
            pass

        builtins.print = print_pass

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    if args.distributed:
        if args.dist_url == "env://" and args.rank == -1:
            args.rank = int(os.environ["RANK"])
        if args.multiprocessing_distributed:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            args.rank = args.rank * ngpus_per_node + gpu
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size,
                                rank=args.rank)
    # create model
    print("=> creating model '{}'".format(args.arch))
    model = PixPro(models.__dict__[args.arch], args.pixpro_mom,
                   args.ppm_layers, args.ppm_gamma)

    if args.distributed:
        #hopefully this is the right place to do this:
        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)

        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            model.cuda(args.gpu)
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
            args.batch_size = int(args.batch_size / ngpus_per_node)
            args.workers = int(
                (args.workers + ngpus_per_node - 1) / ngpus_per_node)
            model = torch.nn.parallel.DistributedDataParallel(
                model, device_ids=[args.gpu], find_unused_parameters=True)
        else:
            model.cuda()
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model = torch.nn.parallel.DistributedDataParallel(model)

    elif args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
        # comment out the following line for debugging
        raise NotImplementedError("Only DistributedDataParallel is supported.")
    else:
        # AllGather implementation (batch shuffle, queue update, etc.) in
        # this code only supports DistributedDataParallel.
        raise NotImplementedError("Only DistributedDataParallel is supported.")

    #define loss criterion and optimizer
    criterion = ConsistencyLoss(distance_thr=args.pixpro_t).cuda(args.gpu)

    optimizer = configure_optimizer(model, args)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            if args.gpu is None:
                checkpoint = torch.load(args.resume)
            else:
                # Map model to be loaded to specified single gpu.
                loc = 'cuda:{}'.format(args.gpu)
                checkpoint = torch.load(args.resume, map_location=loc)
            args.start_epoch = checkpoint['epoch']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    #physical space only
    space_tfs = A.Compose([A.RandomResizedCrop(224, 224),
                           A.HorizontalFlip()],
                          additional_targets={
                              'grid_y': 'image',
                              'grid_x': 'image'
                          })

    #could work for both views
    view1_color_tfs = A.Compose([
        A.ColorJitter(0.4, 0.4, 0.2, 0.1, p=0.8),
        A.ToGray(p=0.2),
        A.GaussianBlur(blur_limit=23, sigma_limit=(0.1, 2.0), p=1.0),
        A.Normalize(),
        ToTensorV2()
    ])

    #technically optional, but used in the BYOL paper
    view2_color_tfs = A.Compose([
        A.ColorJitter(0.4, 0.4, 0.2, 0.1, p=0.8),
        A.ToGray(p=0.2),
        A.GaussianBlur(blur_limit=23, sigma_limit=(0.1, 2.0), p=0.1),
        A.Solarize(p=0.2),
        A.Normalize(),
        ToTensorV2()
    ])

    train_dataset = ContrastData(args.data, space_tfs, view1_color_tfs,
                                 view2_color_tfs)

    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_dataset)
    else:
        train_sampler = None

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=(train_sampler is None),
                                               num_workers=args.workers,
                                               pin_memory=True,
                                               sampler=train_sampler,
                                               drop_last=True)

    #encoder momentum is updated by STEP and not EPOCH
    args.train_steps = args.epochs * len(train_loader)
    args.current_step = args.start_epoch * len(train_loader)

    if args.fp16:
        scaler = GradScaler()
    else:
        scaler = None

    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)

        adjust_learning_rate(optimizer, epoch, args)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, scaler, epoch, args)

        if not args.multiprocessing_distributed or (
                args.multiprocessing_distributed
                and args.rank % ngpus_per_node == 0):
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                },
                is_best=False,
                filename=os.path.join(
                    args.model_dir, 'checkpoint_{:04d}.pth.tar'.format(epoch)))
Пример #26
0
pre_model = LitModel.load_from_checkpoint(
    checkpoint_path=best_checkpoints).to("cuda")

pre_model.eval()
pre_model.freeze()

transforms = A.Compose(
    [
        A.CenterCrop(img_size, img_size, p=1.0),
        A.Resize(img_size, img_size),
        A.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225],
            max_pixel_value=255.0,
            p=1.0,
        ),
        ToTensorV2(p=1.0),
    ],
    p=1.0,
)

test_img = transforms(image=cv2.imread(
    "/media/hdd/Datasets/asl/asl_alphabet_test/asl_alphabet_test/C_test.jpg"))

y_hat = pre_model(test_img["image"].unsqueeze(0).to("cuda"))

label_map

label_map[int(torch.argmax(y_hat, dim=1))]
Пример #27
0
def train_function(gpu, world_size, node_rank, gpus):
    import torch.multiprocessing
    torch.multiprocessing.set_sharing_strategy('file_system')

    torch.manual_seed(25)
    np.random.seed(25)

    rank = node_rank * gpus + gpu
    dist.init_process_group(
        backend='nccl',
        init_method='env://',
        world_size=world_size,
        rank=rank
    )

    width_size = 512
    batch_size = 32
    accumulation_step = 5
    device = torch.device("cuda:{}".format(gpu) if torch.cuda.is_available() else "cpu")

    if rank == 0:
        wandb.init(project='inception_v3', group=wandb.util.generate_id())
        wandb.config.width_size = width_size
        wandb.config.aspect_rate = 1
        wandb.config.batch_size = batch_size
        wandb.config.accumulation_step = accumulation_step

        shutil.rmtree('tensorboard_runs', ignore_errors=True)
        writer = SummaryWriter(log_dir='tensorboard_runs', filename_suffix=str(time.time()))

    ranzcr_df = pd.read_csv('train_folds.csv')
    ranzcr_train_df = ranzcr_df[ranzcr_df['fold'] != 1]

    chestx_df = pd.read_csv('chestx_pseudolabeled_data_lazy_balancing.csv')
    train_image_transforms = alb.Compose([
        alb.ImageCompression(quality_lower=65, p=0.5),
        alb.HorizontalFlip(p=0.5),
        alb.CLAHE(p=0.5),
        alb.OneOf([
            alb.GridDistortion(
                num_steps=8,
                distort_limit=0.5,
                p=1.0
            ),
            alb.OpticalDistortion(
                distort_limit=0.5,
                shift_limit=0.5,
                p=1.0,
            ),
            alb.ElasticTransform(alpha=3, p=1.0)],
            p=0.7
        ),
        alb.RandomResizedCrop(
            height=width_size,
            width=width_size,
            scale=(0.8, 1.2),
            p=0.7
        ),
        alb.RGBShift(p=0.5),
        alb.RandomSunFlare(p=0.5),
        alb.RandomFog(p=0.5),
        alb.RandomBrightnessContrast(p=0.5),
        alb.HueSaturationValue(
            hue_shift_limit=20,
            sat_shift_limit=20,
            val_shift_limit=20,
            p=0.5
        ),
        alb.ShiftScaleRotate(shift_limit=0.025, scale_limit=0.1, rotate_limit=20, p=0.5),
        alb.CoarseDropout(
            max_holes=12,
            min_holes=6,
            max_height=int(width_size / 6),
            max_width=int(width_size / 6),
            min_height=int(width_size / 6),
            min_width=int(width_size / 20),
            p=0.5
        ),
        alb.IAAAdditiveGaussianNoise(loc=0, scale=(2.5500000000000003, 12.75), per_channel=False, p=0.5),
        alb.IAAAffine(scale=1.0, translate_percent=None, translate_px=None, rotate=0.0, shear=0.0, order=1, cval=0,
                      mode='reflect', p=0.5),
        alb.IAAAffine(rotate=90., p=0.5),
        alb.IAAAffine(rotate=180., p=0.5),
        alb.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        ToTensorV2()
    ])
    train_set = NoisyStudentDataset(ranzcr_train_df, chestx_df, train_image_transforms,
                                    '../ranzcr/train', '../data', width_size=width_size)
    train_sampler = DistributedSampler(train_set, num_replicas=world_size, rank=rank, shuffle=True)
    train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=False, num_workers=4, sampler=train_sampler)

    ranzcr_valid_df = ranzcr_df[ranzcr_df['fold'] == 1]
    valid_image_transforms = alb.Compose([
        alb.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        ToTensorV2()
    ])
    valid_set = ImageDataset(ranzcr_valid_df, valid_image_transforms, '../ranzcr/train', width_size=width_size)
    valid_loader = DataLoader(valid_set, batch_size=batch_size, num_workers=4, pin_memory=False, drop_last=False)

    # ranzcr_valid_df = ranzcr_df[ranzcr_df['fold'] == 1]
    # valid_image_transforms = alb.Compose([
    #     alb.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    #     ToTensorV2()
    # ])
    # valid_set = ImageDataset(ranzcr_valid_df, valid_image_transforms, '../ranzcr/train', width_size=width_size)
    # valid_sampler = DistributedSampler(valid_set, num_replicas=world_size, rank=rank)
    # valid_loader = DataLoader(valid_set, batch_size=batch_size, num_workers=4, sampler=valid_sampler)

    checkpoints_dir_name = 'inception_v3_noisy_student_{}'.format(width_size)
    os.makedirs(checkpoints_dir_name, exist_ok=True)

    # model = EfficientNetNoisyStudent(11, pretrained_backbone=True,
    #                                  mixed_precision=True, model_name='tf_efficientnet_b7_ns')
    model = Inception(11, pretrained_backbone=True, mixed_precision=False, model_name='inception_v3')
    model = SyncBatchNorm.convert_sync_batchnorm(model)
    model.to(device)
    model = DistributedDataParallel(model, device_ids=[gpu])

    # class_weights = [354.625, 23.73913043478261, 2.777105767812362, 110.32608695652173,
    #                  52.679245283018865, 9.152656621728786, 4.7851333032083145,
    #                  8.437891632878731, 2.4620064899945917, 0.4034751151063363, 31.534942820838626]
    class_names = ['ETT - Abnormal', 'ETT - Borderline', 'ETT - Normal',
                   'NGT - Abnormal', 'NGT - Borderline', 'NGT - Incompletely Imaged', 'NGT - Normal',
                   'CVC - Abnormal', 'CVC - Borderline', 'CVC - Normal', 'Swan Ganz Catheter Present']
    scaler = GradScaler()
    criterion = torch.nn.BCEWithLogitsLoss()

    lr_start = 1e-4
    lr_end = 1e-6
    weight_decay = 0
    epoch_num = 20
    if rank == 0:
        wandb.config.model_name = checkpoints_dir_name
        wandb.config.lr_start = lr_start
        wandb.config.lr_end = lr_end
        wandb.config.weight_decay = weight_decay
        wandb.config.epoch_num = epoch_num
        wandb.config.optimizer = 'adam'
        wandb.config.scheduler = 'CosineAnnealingLR'
        wandb.config.is_loss_weights = 'no'

    optimizer = Adam(model.parameters(), lr=lr_start, weight_decay=weight_decay)
    scheduler = CosineAnnealingLR(optimizer, T_max=epoch_num, eta_min=lr_end, last_epoch=-1)

    max_val_auc = 0

    for epoch in range(epoch_num):
        train_loss, train_avg_auc, train_auc, train_rocs, train_data_pr, train_duration = one_epoch_train(
            model, train_loader, optimizer, criterion, device, scaler,
            iters_to_accumulate=accumulation_step, clip_grads=False)
        scheduler.step()

        if rank == 0:
            val_loss, val_avg_auc, val_auc, val_rocs, val_data_pr, val_duration = eval_model(
                model, valid_loader, device, criterion, scaler)

            wandb.log({'train_loss': train_loss, 'val_loss': val_loss,
                       'train_auc': train_avg_auc, 'val_auc': val_avg_auc, 'epoch': epoch})
            for class_name, auc1, auc2 in zip(class_names, train_auc, val_auc):
                wandb.log({'{} train auc'.format(class_name): auc1,
                           '{} val auc'.format(class_name): auc2, 'epoch': epoch})

            if val_avg_auc > max_val_auc:
                max_val_auc = val_avg_auc
                wandb.run.summary["best_accuracy"] = val_avg_auc

            print('EPOCH %d:\tTRAIN [duration %.3f sec, loss: %.3f, avg auc: %.3f]\t\t'
                  'VAL [duration %.3f sec, loss: %.3f, avg auc: %.3f]\tCurrent time %s' %
                  (epoch + 1, train_duration, train_loss, train_avg_auc,
                   val_duration, val_loss, val_avg_auc, str(datetime.now(timezone('Europe/Moscow')))))

            torch.save(model.module.state_dict(),
                       os.path.join(checkpoints_dir_name, '{}_epoch{}_val_auc{}_loss{}_train_auc{}_loss{}.pth'.format(
                           checkpoints_dir_name, epoch + 1, round(val_avg_auc, 3), round(val_loss, 3),
                           round(train_avg_auc, 3), round(train_loss, 3))))
    if rank == 0:
        wandb.finish()
Пример #28
0
def get_loaders(stage: str, train_bs: int = 32, valid_bs: int = 64) -> tuple:
    """Prepare loaders for a stage.

    Args:
        stage (str): stage name
        train_bs (int, optional): batch size for training dataset.
            Default is `32`.
        valid_bs (int, optional): batch size for validation dataset.
            Default is `64`.

    Returns:
        train and validation data loaders
    """

    train_valid = ps.read_pickle(TRAIN_VALID_FILE)
    train = train_valid[train_valid["is_valid"] == False]
    valid = train_valid[train_valid["is_valid"] == True]

    landmark_map = {
        landmark: idx
        for idx, landmark in enumerate(sorted(set(train_valid["landmark_id"].values)))
    }

    train_augs = albu.Compose(
        [
            albu.RandomResizedCrop(224, 224, scale=(0.6, 1.0)),
            albu.HorizontalFlip(p=0.5),
            albu.JpegCompression(p=0.5),
            albu.Normalize(),
            ToTensorV2(),
        ]
    )

    train_set = FolderDataset(
        train["id"].values,
        train["landmark_id"].values,
        landmark_map,
        transforms=train_augs,
        data_dir=IMAGES_DIR,
    )
    train_loader = DataLoader(
        dataset=train_set,
        batch_size=train_bs,
        num_workers=NUM_WORKERS,
        sampler=LimitedClassSampler(
            targets=train["landmark_id"].values, max_samples=MAX_SAMPLES_PER_CLASS
        ),
    )
    print(
        f" * Num records in train dataset - {len(train_set)}, batches - {len(train_loader)}"
    )

    valid_set = FolderDataset(
        valid["id"].values,
        valid["landmark_id"].values,
        landmark_map,
        data_dir=IMAGES_DIR,
    )
    valid_loader = DataLoader(
        dataset=valid_set, batch_size=valid_bs, num_workers=NUM_WORKERS
    )
    print(
        f" * Num records in valid dataset - {len(valid_set)}, batches - {len(valid_loader)}"
    )

    return train_loader, valid_loader
Пример #29
0
import albumentations
from albumentations.pytorch import ToTensorV2

input_dir='/ssd_data/720p_CDJ'
data_transform=transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor()
])

data_transfrom_albumentation=albumentations.Compose([
    albumentations.Resize(256, 256),
    albumentations.RandomCrop(224, 224),
    albumentations.HorizontalFlip(),
    ToTensorV2()
    # albumentations.pytorch.transforms.ToTensor()
    
])


# dataset=Retina_dataset(input_dir, data_transform)
dataset_albumentation=Retina_dataset_albumentation(input_dir, data_transfrom_albumentation)

total_time=0
for i in range(100):
    # trans_img, time=dataset[0]
    trans_img, time=dataset_albumentation[0]
    total_time+=time

print(f'Time consumption is {total_time}')
Пример #30
0
    dataset = Xview2(
        r'D:\DATA\xView2\train\images',
        r'D:\DATA\xView2\train\labels',
        transforms=Compose([
            OneOf([
                HorizontalFlip(True),
                VerticalFlip(True),
                RandomRotate90(True)
            ],
                  p=0.5),
            # RandomDiscreteScale([0.75, 1.25, 1.5], p=0.5),
            RandomCrop(640, 640, True),
            Normalize(mean=(0.485, 0.456, 0.406, 0.485, 0.456, 0.406),
                      std=(0.229, 0.224, 0.225, 0.229, 0.224, 0.225),
                      max_pixel_value=255),
            ToTensorV2(True),
        ]),
        include=('pre', 'post')).pairwise_mode()

    print(len(dataset))
    a = dataset[1]
    print()
    # img, mask = dataset[4]
    # print(np.unique(mask))
    # for e in tqdm(dataset):
    #     pass
    # viz_img = Xview2.viz_image_mask(img, mask)
    #
    # plt.imshow(viz_img)
    # plt.show()