Exemplo n.º 1
0
    def __data_pipline(self, img, ldmarks):
        transform = None
        if self.mode == 'train':
            transform = A.Compose(
                [
                    A.Resize(height=self.output_size[0],
                             width=self.output_size[1],
                             p=1),
                    A.Crop(x_min=40,
                           y_min=0,
                           x_max=self.output_size[1] - 76,
                           y_max=self.output_size[0],
                           p=1),
                    A.HorizontalFlip(p=0.5),
                    A.VerticalFlip(p=0.5),
                    A.ToFloat(p=1),
                ],
                keypoint_params=A.KeypointParams(format='xy'))
        elif self.mode == 'test':
            transform = A.Compose(
                [
                    A.Resize(height=self.output_size[0],
                             width=self.output_size[1],
                             p=1),
                    A.Crop(x_min=40,
                           y_min=0,
                           x_max=self.output_size[1] - 76,
                           y_max=self.output_size[0],
                           p=1),
                    A.ToFloat(p=1),
                ],
                keypoint_params=A.KeypointParams(format='xy'))
        transformed = transform(image=img, keypoints=ldmarks)

        return transformed
Exemplo n.º 2
0
    def attention(self, frame, objects, index):
        # image augmentation helper function
        def augment(aug, image):
            return aug(image=image)['image']

        # convert box float vals to ints for cropping faces
        box_ints = objects[index]['box'].astype(int)
        box_half_width = (box_ints[2] - box_ints[0]) // 2
        box_mid_width = (box_ints[0] + box_ints[2]) // 2
        # crop only bounding box
        crop_left = albu.Crop((box_mid_width - box_half_width), box_ints[1],
                              box_mid_width, box_ints[3])
        crop_right = albu.Crop(box_mid_width, box_ints[1],
                               (box_mid_width + box_half_width), box_ints[3])
        cropped_l = augment(crop_left, frame)
        cropped_r = augment(crop_right, frame)
        # increase brightness and decrease contrast for lessened lighting effect
        lighting_aug = albu.RandomBrightnessContrast(brightness_limit=(0.3,
                                                                       0.299),
                                                     contrast_limit=(-0.2,
                                                                     -0.199),
                                                     p=1)
        cropped_l = augment(lighting_aug, cropped_l)
        cropped_r = augment(lighting_aug, cropped_r)
        # flip left side of cropped image horizontally
        flip_aug = albu.HorizontalFlip(p=1)
        flipped_l = augment(flip_aug, cropped_l)
        # convert to tensor and flatten
        cropped_r = torch.flatten(transforms.ToTensor()(cropped_r))
        flipped_l = torch.flatten(transforms.ToTensor()(flipped_l))
        # compare right side of face with flipped left side of face
        cos = torch.nn.CosineSimilarity(dim=0)
        output = cos(cropped_r, flipped_l)
        if output < 0.985:
            attn = 0
        else:
            attn = 1
        # create dictionary of attention flags for each ID
        self.attn_age += 1
        self.attn_dict.setdefault(objects[index]['id'], []).append(attn)
        self.attn_age_dict.update({objects[index]['id']: self.attn_age})

        for v in self.attn_dict.values():
            if len(v) > self.error_time:  # history of X frames per ID
                v.pop(0)  # remove oldest attention flag for ID
            if len(v) > 2:  # record mode of last X frames
                objects[index].update(
                    {'forward_gaze': max(set(v), key=v.count)})
            else:  # record current flag
                objects[index].update({'forward_gaze': attn})

        for k, v in self.attn_age_dict.items():
            if (self.attn_age - v) >= 10:
                self.attn_dict.pop(k, None)
        if self.attn_age % 30 == 0:
            self.attn_age_dict.clear()
Exemplo n.º 3
0
def test_crop_keypoints():
    image = np.random.randint(0, 256, (100, 100), np.uint8)
    keypoints = [(50, 50, 0, 0)]

    aug = A.Crop(0, 0, 80, 80, p=1)
    result = aug(image=image, keypoints=keypoints)
    assert result["keypoints"] == keypoints

    aug = A.Crop(50, 50, 100, 100, p=1)
    result = aug(image=image, keypoints=keypoints)
    assert result["keypoints"] == [(0, 0, 0, 0)]
Exemplo n.º 4
0
def get_train_transforms(bb, format=det_config.IMAGE_FORMAT):
    return A.Compose(
        [A.Crop(*bb),
         A.Resize(height=512, width=512, p=1),
         ToTensorV2(p=1.0)],
        p=1.0,
        bbox_params=A.BboxParams(format=format, label_fields=['labels']))
def true_center_by_face(image: torch.Tensor, landmarks: torch.Tensor):
    image, landmarks = np.transpose(image.numpy(),
                                    (1, 2, 0)), landmarks.numpy()
    # y_center = int(landmarks[36][0] + landmarks[45][0]) // 2
    # x_center = int(landmarks[:,1].mean().item())
    y, x = landmarks[:, 0], landmarks[:, 1]
    keypoints_landmarks = [x, y, 0, 1]
    # H, W, C = image.shape
    # W_max = min(x_center, W - x_center)
    # H_max = min(y_center, H - y_center)
    # radius = min(W_max, H_max)
    # y_max, y_min = min(H, y_center + H//2), max(0, y_center - H//2)
    # x_max, x_min = min(W, x_center + W//2), max(0, x_center - W//2)
    H, W, C = image.shape
    H09 = int(H * 0.9)
    rh = max(int(270 * H09 / W), 270)
    rw = max(int(270 * W / H09), 270)
    transforms = albumentations.Compose([
        albumentations.Crop(x_min=0, y_min=0, x_max=W, y_max=H09),
        albumentations.Resize(rh, rw),
        albumentations.CenterCrop(256, 256),
        # albumentations.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
    ])
    data_dict = transforms(image=image, keypoints=[keypoints_landmarks])
    image_new = torch.tensor(np.transpose(data_dict['image'], (2, 0, 1)))
    kp_x, kp_y = data_dict['keypoints'][0][0], data_dict['keypoints'][0][1]
    keypoints_new = torch.cat(
        [torch.tensor(kp_x)[..., None],
         torch.tensor(kp_y)[..., None]], dim=1)
    return image_new, keypoints_new
Exemplo n.º 6
0
    def crop_aug(self, img, boxes, visualise=False):
        index = random.randint(0, len(boxes) - 1)
        window = deepcopy(boxes[index])
        window[:2] -= 404
        window[2:4] += 404
        self.check_bboxes(img, [window])
        window = list(map(int, window))
        aug = A.Compose([
            A.Crop(x_min=window[0], x_max=window[2], y_min=window[1], y_max=window[3], p=1.0),
            A.Resize(808, 808, p=1.0),
            A.RandomCrop(608, 608, p=1.0)
        ], p=1.0, bbox_params=A.BboxParams(
            format='pascal_voc',
            min_area=0,
            min_visibility=0,
        ))
        sample = aug(image=img, bboxes=boxes)
        res_img, res_boxes = sample['image'], sample['bboxes']

        if visualise:
            for box in res_boxes:
                box = list(map(int, box))
                cv2.rectangle(res_img, (box[0], box[1]), (box[2], box[3]), (255, 255, 0), 1)
            cv2.imwrite('temp.jpg', res_img)
        return res_img, res_boxes
Exemplo n.º 7
0
 def crop_image_train_new(self, img_path, annotations, crop_h, crop_w,
                          json_name, im_fold):
     # import pdb
     # pdb.set_trace()
     save_path = osp.dirname(img_path)
     json_path = osp.join(save_path, json_name)
     save_path = osp.join(save_path, im_fold)
     if not osp.exists(save_path):
         os.makedirs(save_path)
     imgs = os.listdir(img_path)
     # crop = A.Compose(
     #     [A.RandomCrop(crop_h, crop_w)],
     #     bbox_params=A.BboxParams(format='coco')
     # )
     # image = cv2.imread(osp.join(img_path, imgs[0]))
     new_annotations = {}
     id = 0
     for im in tqdm(imgs):
         path = osp.join(img_path, im)
         fname, _ = osp.splitext(im)
         image = cv2.imread(path)
         # print(annotations[im])
         bboxs = [[box[1][0], box[1][1], box[1][2], box[1][3], box[0]]
                  for box in annotations[im]]
         H, W, C = image.shape
         # for i,bbox in enumerate(bboxs):
         for x in range(0, W - crop_w, 200):
             for y in range(0, H - crop_h, 200):
                 x1 = int(x)
                 x2 = int(x + crop_w)
                 y1 = int(y)
                 y2 = int(y + crop_h)
                 # x1 = x1 if x1>0 else 0
                 # x2 = x2 if x2<W else W
                 # y1 = y1 if y1>0 else 0
                 # y2 = y2 if y2<H else H
                 crop = A.Compose([A.Crop(x1, y1, x2, y2)],
                                  bbox_params=A.BboxParams(format='coco'))
                 # import pdb
                 # pdb.set_trace()
                 transformed = crop(image=image, bboxes=bboxs)
                 new_image = transformed['image']
                 new_bbox = transformed['bboxes']
                 # import pdb
                 # pdb.set_trace()
                 # pixle_value = new_image.sum()
                 # print(pixle_value)
                 if new_bbox:
                     new_name = fname + '_' + str(id) + '.png'
                     new_box = [[box[4], box[:4]] for box in new_bbox]
                     new_path = osp.join(save_path, new_name)
                     new_im_info = [id, crop_h, crop_w]
                     all_info = [new_box, new_im_info]
                     new_annotations[new_name] = all_info
                     cv2.imwrite(new_path, new_image)
                     id += 1
                 else:
                     continue
     self.convert2coco(new_annotations, json_path)
Exemplo n.º 8
0
    def __data_pipline(self, img, ldmarks):
        # Convert RGB to BGR
        transform = None
        if self.mode == 'train':
            transform = A.Compose(
                [
                    A.Resize(height=self.output_size[0],
                             width=self.output_size[1],
                             p=1),  # /8--->(356, 536)
                    A.Crop(x_min=40,
                           y_min=0,
                           x_max=self.output_size[1] - 76,
                           y_max=self.output_size[0],
                           p=1),
                    # A.CLAHE(p=1),
                    A.HorizontalFlip(p=0.5),
                    A.VerticalFlip(p=0.5),
                    A.ToFloat(p=1),  # (0 ~ 1)
                    # A.Normalize(max_pixel_value=1, p=1)
                ],
                keypoint_params=A.KeypointParams(format='xy'))
        elif self.mode == 'test':
            # import random
            # random.seed(2020)
            transform = A.Compose(
                [
                    A.Resize(height=self.output_size[0],
                             width=self.output_size[1],
                             p=1),  # /8--->(356, 536)
                    A.Crop(x_min=40,
                           y_min=0,
                           x_max=self.output_size[1] - 76,
                           y_max=self.output_size[0],
                           p=1),
                    # (356, 460)
                    # A.CLAHE(p=1),
                    A.ToFloat(p=1),  # (0 ~ 1)
                    # A.Normalize(max_pixel_value=1, p=1)
                ],
                keypoint_params=A.KeypointParams(format='xy'))
        transformed = transform(image=img, keypoints=ldmarks)

        return transformed
Exemplo n.º 9
0
    def forward(self, x: List[torch.Tensor]) -> List[List[ClassifiedBBox]]:
        results = []

        bx_raw = self.detector(x)
        for i in range(len(x)):
            x_i = x[i].cpu()
            bx_raw_i = {k: v.cpu() for k, v in bx_raw[i].items()}

            bboxes = convert_bboxes(boxes=bx_raw_i['boxes'],
                                    labels=bx_raw_i['labels'],
                                    scores=bx_raw_i['scores'])
            bboxes = filter_bboxes(bboxes,
                                   min_score=self.min_score,
                                   categories=self.airplane_categories)
            bboxes = non_maximum_suppression(bboxes, self.nms_threshold,
                                             self.nms_ranking,
                                             self.nms_suppression)
            bboxes = bboxes[:self.top_bboxes]

            image = self.tensor_to_image(x_i)
            w, h = image.size
            image_rect = Rectangle(pt_from=(0, 0), pt_to=(w - 1, h - 1))

            classified_bboxes = []
            if len(bboxes) > 0:
                cropped_tensors = []
                for bbox in bboxes:
                    bbox = bbox.expand(self.expand_coeff)
                    bbox = bbox.intersection(image_rect)

                    crop = AlbumentationsTransform(
                        albu.Crop(x_min=bbox.x0,
                                  y_min=bbox.y0,
                                  x_max=bbox.x1,
                                  y_max=bbox.y1,
                                  always_apply=True))
                    cropped_image = crop(image)

                    cropped_tensor = self.classifier_transform(cropped_image)
                    cropped_tensors.append(cropped_tensor)

                crops_i = torch.stack(cropped_tensors).to(x[i])
                z_i = self.classifier(crops_i)
                probs_i_hat = torch.exp(log_softmax(z_i, dim=1)).cpu().numpy()

                for j in range(len(bboxes)):
                    classified_bboxes.append(
                        ClassifiedBBox(frame=bboxes[j],
                                       classes=dict(
                                           zip(self.classifier.target_classes,
                                               probs_i_hat[j].tolist()))))

            results.append(classified_bboxes)

        return results
Exemplo n.º 10
0
 def cropIfNeed(self, img):
     img = np.array(img)
     height, width, _ = img.shape
     cropped_height, cropped_width, _ = img.shape
     if cropped_height % 32 != 0:
         cropped_height = (cropped_height // 32) * 32
     if cropped_width % 32 != 0:
         cropped_width = (cropped_width // 32) * 32
     crop = albu.Crop(x_min=0, y_min=0, x_max=cropped_width, y_max=cropped_height)
     img = crop(image=img)["image"]
     return img, crop
Exemplo n.º 11
0
def crop_by_bboxes(image_dir_in,
                   image_dir_out,
                   bboxes_path: str,
                   expand_coeff: float = 0.25,
                   min_score: float = 0.7,
                   nms_threshold: float = 0.5,
                   nms_ranking: str = 'score_sqrt_area',
                   nms_suppression: str = 'overlap',
                   show_progress: bool = True):

    pathlib.Path(image_dir_out).mkdir(parents=True, exist_ok=False)

    paths = list(os.listdir(image_dir_in))

    with LMDBDict(bboxes_path) as lmdb_dict:
        bboxes_db = BoundingBoxesLMDBDict(lmdb_dict)

        with tqdm(total=len(paths), disable=not show_progress,
                  file=sys.stdout) as progress_bar:
            progress_bar.set_description(
                f'Cropping images {image_dir_in} => {image_dir_out}')

            for path in paths:
                img = Image.open(os.path.join(image_dir_in, path))

                w, h = img.size
                img_rect = Rectangle(pt_from=(0, 0), pt_to=(w - 1, h - 1))

                bboxes = bboxes_db[path]
                bboxes = filter_bboxes(bboxes, min_score=min_score)
                bboxes = non_maximum_suppression(bboxes, nms_threshold,
                                                 nms_ranking, nms_suppression)

                if len(bboxes) > 0:
                    bbox = bboxes[0]
                    bbox = bbox.expand(expand_coeff)
                    bbox = bbox.intersection(img_rect)

                    transform = albu.Crop(x_min=bbox.x0,
                                          y_min=bbox.y0,
                                          x_max=bbox.x1,
                                          y_max=bbox.y1,
                                          always_apply=True)
                else:
                    transform = albu_ext.Identity()

                augmented = transform(image=np.array(img))
                img_aug = Image.fromarray(augmented['image'])

                img_aug.save(os.path.join(image_dir_out, path))

                progress_bar.update()
Exemplo n.º 12
0
 def crop_image_train(self, img_path, annotations, crop_h, crop_w,
                      json_name, im_fold):
     # import pdb
     # pdb.set_trace()
     save_path = osp.dirname(img_path)
     json_path = osp.join(save_path, json_name)
     save_path = osp.join(save_path, im_fold)
     if not osp.exists(save_path):
         os.makedirs(save_path)
     imgs = os.listdir(img_path)
     # crop = A.Compose(
     #     [A.RandomCrop(crop_h, crop_w)],
     #     bbox_params=A.BboxParams(format='coco')
     # )
     # image = cv2.imread(osp.join(img_path, imgs[0]))
     new_annotations = {}
     id = 0
     for im in tqdm(imgs):
         path = osp.join(img_path, im)
         fname, _ = osp.splitext(im)
         image = cv2.imread(path)
         # print(annotations[im])
         bboxs = [[box[1][0], box[1][1], box[1][2], box[1][3], box[0]]
                  for box in annotations[im]]
         H, W, C = image.shape
         for i, bbox in enumerate(bboxs):
             center_point = [bbox[0] + bbox[2] // 2, bbox[1] + bbox[3] // 2]
             x1 = int(center_point[0] - crop_h // 2)
             x2 = int(center_point[0] + crop_h // 2)
             y1 = int(center_point[1] - crop_w // 2)
             y2 = int(center_point[1] + crop_w // 2)
             x1 = x1 if x1 > 0 else 0
             x2 = x2 if x2 < W else W
             y1 = y1 if y1 > 0 else 0
             y2 = y2 if y2 < H else H
             crop = A.Compose([A.Crop(x1, y1, x2, y2)],
                              bbox_params=A.BboxParams(format='coco'))
             # import pdb
             # pdb.set_trace()
             transformed = crop(image=image, bboxes=bboxs)
             new_image = transformed['image']
             new_bbox = transformed['bboxes']
             new_name = fname + '_' + str(i) + '.png'
             new_box = [[box[4], box[:4]] for box in new_bbox]
             new_path = osp.join(save_path, new_name)
             new_im_info = [id, crop_h, crop_w]
             all_info = [new_box, new_im_info]
             new_annotations[new_name] = all_info
             cv2.imwrite(new_path, new_image)
             id += 1
     self.convert2coco(new_annotations, json_path)
Exemplo n.º 13
0
def test(resize_size=256, crop_size=224):
    start_center = int(round((resize_size - crop_size - 1) / 2))
    return albumentations.Compose([
        albumentations.Resize(resize_size, resize_size),
        albumentations.Crop(
            start_center,
            start_center,
            start_center + crop_size,
            start_center + crop_size,
        ),
        albumentations.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])
    def __getitem__(self, index):
        """Return a data point and its metadata information.

        Parameters:
            index - - a random integer for data indexing

        Returns a dictionary that contains A, B, A_paths and B_paths
            A (tensor) - - an image in the input domain
            B (tensor) - - its corresponding image in the target domain
            A_paths (str) - - image paths
            B_paths (str) - - image paths (same as A_paths)
        """
        # read a image given a random integer index
        AB_path = self.AB_paths[index]
        AB = Image.open(AB_path).convert('RGB')
        # split AB image into A and B
        w, h = AB.size
        w2 = int(w / 2)
        A = AB.crop((0, 0, w2, h))
        B = AB.crop((w2, 0, w, h))

        # apply the same transform to both A and B
        # transform_params = get_params(self.opt, A.size)
        transformOutput = Al.Compose([
            Al.Crop(x_max=256, y_max=256),
            Al.RandomRotate90(),
            Al.Flip(),
        ])
        transformInput = Al.Compose([
            Al.Blur(blur_limit=3),
            Al.Equalize(always_apply=True),
            Al.RandomBrightnessContrast(),
            transformOutput,
        ])
        random.seed(42)
        A = transformInput(image=np.array(A))['image']
        B = transformOutput(image=np.array(B))['image']

        # A_transform = get_transform(self.opt, transform_params, grayscale=(self.input_nc == 1))
        # B_transform = get_transform(self.opt, transform_params, grayscale=(self.output_nc == 1))

        # A = A_transform(A)
        # B = B_transform(B)

        return {'A': A, 'B': B, 'A_paths': AB_path, 'B_paths': AB_path}
Exemplo n.º 15
0
def detect(data_path, model):
    #change
    th = (200, 300, 1100, 900)
    transform_A = A.Compose([
        A.Crop(x_min=th[0],
               y_min=th[1],
               x_max=th[2],
               y_max=th[3],
               always_apply=True)
    ])
    data_frame = pd.DataFrame(columns=['image_name', 'x1', 'y1', 'x2', 'y2'])
    k = 0
    l = 0
    for img_nm in os.listdir(data_path):
        k += 1
        image = data_path + img_nm
        image = io.imread(image)
        image = transform_A(image=image)['image']
        #print(image)
        image_for_model = FT.to_tensor(image)
        mean = [0.485, 0.456, 0.406]
        std = [0.229, 0.224, 0.225]
        image_for_model = FT.normalize(image_for_model, mean=mean, std=std)
        image_for_model = image_for_model.to(device)
        preds = model([image_for_model])[0]
        nms_preds = torchvision.ops.nms(preds['boxes'],
                                        preds['scores'],
                                        iou_threshold=0.2)
        result = {
            'boxes': preds['boxes'][nms_preds].to('cpu').tolist(),
            'labels': preds['labels'][nms_preds].to('cpu').tolist()
        }
        for bbox, lbl in zip(result['boxes'], result['labels']):
            if lbl == 2:
                l += 1
                df = pd.Series({
                    'image_name': img_nm,
                    'x1': bbox[0] + th[0],
                    'y1': bbox[1] + th[1],
                    'x2': bbox[2] + th[0],
                    'y2': bbox[3] + th[1]
                })
                data_frame = data_frame.append(df, ignore_index=True)
    data_frame.to_csv('bboxes.csv')
Exemplo n.º 16
0
    def test_augment(image, eigenvectors, eigenvalues, pixel_avg, stdev):
        """
        Create ten different images per input image, all
        the different augmentations specified on the paper
        for test time.
        """
        height, width = image.shape[0], image.shape[1]

        crop_width = min(224, width)
        crop_height = min(224, height)

        crops = [albumentations.CenterCrop(crop_height, crop_width)]
        flipper = albumentations.HorizontalFlip()

        for corner in ['ur', 'lr', 'll', 'ul']:
            if corner[0] == 'u':
                y_max = height
                y_min = height - crop_height
            else:
                y_max = crop_height
                y_min = 0

            if corner[1] == 'r':
                x_max = width
                x_min = width - crop_width
            else:
                x_max = crop_width
                x_min = 0

            crops.append(albumentations.Crop(x_min, y_min, x_max, y_max))

        output_images = []
        for cropper in crops:
            for is_reflected in [True, False]:

                augmentations = [cropper, flipper
                                 ] if is_reflected else [cropper]
                augmenter = albumentations.Compose(augmentations)
                output_image = ((augmenter(image=image)["image"] - pixel_avg) /
                                255) / stdev
                output_images.append(ImagenetSequence.pad_image(output_image))

        return output_images
Exemplo n.º 17
0
    def expression(self, frame, objects, index):
        # image augmentation helper function
        def augment(aug, image):
            return aug(image=image)['image']

        # convert box float vals to ints for cropping faces
        box_ints = objects[index]['box'].astype(int)
        # crop bounding box, resize to 48x48, convert to 1-channel tensor
        crop_aug = albu.Crop(box_ints[0], box_ints[1], box_ints[2],
                             box_ints[3])
        cropped = augment(crop_aug, frame)
        size_aug = albu.Resize(48, 48)
        cropped = augment(size_aug, cropped)
        pil = transforms.ToPILImage()(cropped)
        grayed = transforms.Grayscale()(pil)
        input_tens = transforms.ToTensor()(grayed)

        classes = [
            'Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprised', 'Neutral'
        ]
        # define and load network
        net = model.Model(num_classes=len(classes))
        checkpoint = torch.load(
            '../pytorchfer/trained/private_model_291_60.t7',
            map_location=torch.device(
                "cuda" if torch.cuda.is_available() else "cpu"))
        net.load_state_dict(checkpoint['net'])
        net.eval()
        # backprop net - order of predicted classes
        img = torch.stack([input_tens])
        bp = BackPropagation(model=net)
        prob, emo = bp.forward(img)
        gcam = GradCAM(model=net)
        _ = gcam.forward(img)
        gbp = GuidedBackPropagation(model=net)
        _ = gbp.forward(img)
        # most probable class
        actual_emotion = emo[:, 0]
        gbp.backward(ids=actual_emotion.reshape(1, 1))
        gcam.backward(ids=actual_emotion.reshape(1, 1))

        print(classes[actual_emotion.data], (prob.data[:, 0] * 100))
Exemplo n.º 18
0
    def _configure_slicing(self):
        """Configure the transformation for the slicing of images."""
        slice_index = self.index % self.parent.num_slices_per_image
        slice_index_x = slice_index // self.parent.num_slices_per_axis
        slice_index_y = slice_index % self.parent.num_slices_per_axis
        if self.parent.num_slices_per_image > 1:

            if self.parent.initial_cropping_rectangle is None:
                image_size = image_size_hwc(self.image)
            else:
                r = self.parent.initial_cropping_rectangle
                image_size = (r[3] - r[1], r[2] - r[0])

            num_surplus_pixels_y, num_surplus_pixels_x = [
                num_pixels % self.parent.num_slices_per_axis
                for num_pixels in image_size
            ]

            if num_surplus_pixels_y or num_surplus_pixels_x:
                warnings.warn(
                    f"Cannot slice image evenly. Discarding pixels "
                    f"(x: {num_surplus_pixels_x}; y: {num_surplus_pixels_y}).")

            slice_size_y, slice_size_x = [
                num_pixels // self.parent.num_slices_per_axis
                for num_pixels in image_size
            ]

            x_min = slice_index_x * slice_size_x
            x_max = x_min + slice_size_x
            y_min = slice_index_y * slice_size_y
            y_max = y_min + slice_size_y

            slice_rectangle = [x_min, y_min, x_max, y_max]

            self.transforms.append(albumentations.Crop(*slice_rectangle))

            self.image_name += f"_slice{slice_index}"

        self.slice_index = slice_index
        self.slice_index_x = slice_index_x
        self.slice_index_y = slice_index_y
Exemplo n.º 19
0
    def bbox_aug(self, img, bbox, h, w):
        resized_h = int(h * 0.8)
        resized_w = int(w * 0.8)
        xmin, ymin, xmax, ymax = bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]
        first = [max(xmax - resized_w, 0), max(ymax - resized_h, 0)]
        second = [min(xmin + resized_w, w) - resized_w, min(ymin + resized_h, h) - resized_h]
        if first[0] > second[0] or first[1] > second[1]:
            tf = A.Compose(
                [
                    A.HorizontalFlip(p=0.5),
                    A.Resize(self.data_cfg.DATA.GLOBAL_SIZE[0], self.data_cfg.DATA.GLOBAL_SIZE[1]),
                    AP.transforms.ToTensor(normalize={
                        'mean': [0.485, 0.456, 0.406],
                        'std': [0.229, 0.224, 0.225]
                    })
                ],
                bbox_params=A.BboxParams(format='coco', label_fields=['class_labels']),
            )(image=img, bboxes=[bbox], class_labels=[0])
            # print(tf['bboxes'])
            return tf['image'], tf['bboxes'][0]
            # return img, bbox
        x = random.randint(first[0], second[0])
        y = random.randint(first[1], second[1])

        # print(bbox)
        tf = A.Compose(
            [
                A.Crop(x_min=x, y_min=y, x_max=x+resized_w, y_max=y+resized_h, p=0.5),
                A.HorizontalFlip(p=0.5),
                A.Resize(self.data_cfg.DATA.GLOBAL_SIZE[0], self.data_cfg.DATA.GLOBAL_SIZE[1]),
                AP.transforms.ToTensor(normalize={
                    'mean': [0.485, 0.456, 0.406],
                    'std': [0.229, 0.224, 0.225]
                })
            ],
            bbox_params=A.BboxParams(format='coco', label_fields=['class_labels']),
        )(image=img, bboxes=[bbox], class_labels=[0])
        # print(tf['bboxes'])
        return tf['image'], tf['bboxes'][0]
Exemplo n.º 20
0
 def __init__(self, names, image_dir, point_dir, transform, cache_size,
              patch_size, inter_dist):
     #store filenames
     self.image_names = [os.path.join(image_dir, f + '.jpg') for f in names]
     self.point_names = [os.path.join(point_dir, f + '.txt') for f in names]
     self.transform = transform
     self.random_crop = A.Compose(
         [A.RandomCrop(*patch_size)],
         keypoint_params=A.KeypointParams(format='xy'))
     self.det_crop = A.Compose(
         [A.Crop()], keypoint_params=A.KeypointParams(format='xy'))
     self.patch_size = patch_size
     self.center = (patch_size[1] // 2, patch_size[0] // 2)
     self.inter_dist = inter_dist
     self._cache_patch = []
     self._cache_points = []
     self._cache_size = cache_size
     self.start_index = 0
     # fill cache
     self.update_n_samples = 20
     for i in range(
         (cache_size + self.update_n_samples - 1) // self.update_n_samples):
         self.update_cache()
Exemplo n.º 21
0
    def expression(self, frame, objects, index):
        # apply TenCrop to image and convert each crop ToTensor
        transform_test = transforms.Compose([
            transforms.TenCrop(self.cut_size),
            transforms.Lambda(lambda crops: torch.stack([transforms.ToTensor()(crop) for crop in crops])),
        ])
        # albumentations helper function
        def augment(aug, image):
            return aug(image=image)['image']
        # face bounding box coordinates as type: integer
        box_ints = objects[index]['box'].astype(int)
        # crop bounding box from frame, resize to 48x48, convert to Tensor
        crop_aug = albu.Crop(box_ints[0], box_ints[1], box_ints[2], box_ints[3])
        img = augment(crop_aug, frame)
        size_aug = albu.Resize(48, 48)
        img = augment(size_aug, img)
        img = transforms.ToPILImage()(img)
        # apply TenCrop to Resized + Grayscaled faces and convert ToTensor inputs
        inputs = transform_test(img)

        class_names = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']

        net = VGG('VGG19')
        checkpoint = torch.load(model_path,     # path to FER model
            map_location=torch.device("cuda" if torch.cuda.is_available() else "cpu"))
        net.load_state_dict(checkpoint['net'])
        net.eval()

        ncrops, c, h, w = np.shape(inputs)
        inputs = inputs.view(-1, c, h, w)
        outputs = net(inputs)
        outputs_avg = outputs.view(ncrops, -1).mean(0)  # avg over crops

        scores = F.softmax(outputs_avg, dim=0)
        max_score, predicted = torch.max(scores.data, 0)

        print("{0}: {1}% confidence".format(class_names[int(predicted)], (float(max_score)*100)))
Exemplo n.º 22
0
def get_transform_valid_another_crop_512():
    return alb.Compose([alb.Crop(12, 42, 500, 460, p=1.0)])
Exemplo n.º 23
0
def get_transform_another_crop_512():
    return alb.Compose([
        alb.Crop(12, 42, 500, 460, p=1.0),
        alb.HorizontalFlip(p=0.5),
        alb.VerticalFlip(p=0.5),
    ])
    def _get_val_item_offline(self, index):
        img_path = self.imgs[index]
        # print('val image path: '+ img_path)
        bboxes_with_cls_id = np.array(self.truth.get(img_path), dtype=np.float)

        img_old = cv2.imread(os.path.join(self.cfg.data_dir, img_path))
        img = self.load_image_pil(os.path.join(self.cfg.data_dir, img_path))
        w, h, h_cut = 2432, 1368, 1216

        # 0. Dict for all augmentations
        bbox_params_ = {
            'format': 'pascal_voc',
            'min_area': 0,
            'min_visibility': 0,
        }

        # 1. Resize the image, and crop out upper part of it
        resize_and_crop = A.Compose([
            A.Resize(height=h, width=w, p=1.0),
            A.Crop(x_min=0, x_max=w, y_min=h - h_cut, y_max=1368, p=1.0)
        ],
                                    p=1.0,
                                    bbox_params=A.BboxParams(**bbox_params_))

        # 2. Divide into 8 squares
        assert (w % 4) == 0
        assert (h % 2) == 0
        sides_x = [int(w * (j / 4)) for j in range(5)]
        sides_y = [int(h_cut * (j / 2)) for j in range(3)]

        crop_list = []
        for i in range(4):
            for j in range(2):
                x_min, x_max = sides_x[i], sides_x[i + 1]
                y_min, y_max = sides_y[j], sides_y[j + 1]
                crop_list.append(
                    A.Compose(
                        [
                            A.Crop(x_min=x_min,
                                   x_max=x_max,
                                   y_min=y_min,
                                   y_max=y_max,
                                   p=1.0),
                            # ToTensorV2(p=1.0)
                        ],
                        p=1.0,
                        bbox_params=A.BboxParams(**bbox_params_)))

        # 3. Apply augs
        temp = resize_and_crop(**{
            'image': img,
            'bboxes': bboxes_with_cls_id,
        })
        images_list, boxes_list = [], []
        for crop in crop_list:
            sample = crop(**{
                'image': temp['image'],
                'bboxes': temp['bboxes'],
            })
            images_list.append(sample['image'])
            boxes_list.append(sample['bboxes'])
        # images_combined = torch.stack(images_list, dim=0)
        for i in range(len(boxes_list)):
            boxes_list[i] = tuple(
                map(lambda x: torch.tensor(x, dtype=torch.float),
                    boxes_list[i]))
        boxes_combined = pad_annots(boxes_list)

        targets_list = []
        for box in boxes_combined:
            num_objs = len(box)
            target = {}
            # boxes to coco format
            boxes = box[..., :4]
            boxes[...,
                  2:] = boxes[...,
                              2:] - boxes[..., :2]  # box width, box height
            target['boxes'] = torch.as_tensor(boxes, dtype=torch.float32)
            target['labels'] = torch.as_tensor(
                bboxes_with_cls_id[..., -1].flatten(), dtype=torch.int64)
            # target['image_id'] = torch.tensor([get_image_id(img_path)])
            target['area'] = (target['boxes'][:, 3]) * (target['boxes'][:, 2])
            target['iscrowd'] = torch.zeros((num_objs, ), dtype=torch.int64)
            targets_list.append(target)

        return images_list, targets_list
    def _get_val_item_offline_wgisd(self, index):
        img_path = self.imgs[index]
        # print('val image path: '+ img_path)
        bboxes_with_cls_id = np.array(self.truth.get(img_path), dtype=np.float)

        img = Image.open(img_path)
        img = np.array(img)
        w, h, h_cut = 4256, 2432, 2432

        # 0. Dict for all augmentations
        bbox_params_ = {
            'format': 'pascal_voc',
            'min_area': 0,
            'min_visibility': 0,
        }

        # 1. Resize the image, and crop out upper part of it
        resize_and_crop = A.Compose(
            [
                A.Resize(height=h, width=w, p=1.0),
                # A.Crop(x_min=0, x_max=w, y_min=h - h_cut, y_max=h, p=1.0)
            ],
            p=1.0,
            bbox_params=A.BboxParams(**bbox_params_))

        # 2. Divide into 8 squares
        assert (w % 7) == 0
        assert (h % 4) == 0
        sides_x = [int(w * (j / 7)) for j in range(8)]
        sides_y = [int(h_cut * (j / 4)) for j in range(5)]

        crop_list = []
        for i in range(7):
            for j in range(4):
                x_min, x_max = sides_x[i], sides_x[i + 1]
                y_min, y_max = sides_y[j], sides_y[j + 1]
                crop_list.append(
                    A.Compose(
                        [
                            A.Crop(x_min=x_min,
                                   x_max=x_max,
                                   y_min=y_min,
                                   y_max=y_max,
                                   p=1.0),
                            # ToTensorV2(p=1.0)
                        ],
                        p=1.0,
                        bbox_params=A.BboxParams(**bbox_params_)))

        # 3. Apply augs
        try:
            temp = resize_and_crop(**{
                'image': img,
                'bboxes': bboxes_with_cls_id,
            })
        except:
            return None, None
        images_list, boxes_list = [], []
        for crop in crop_list:
            sample = crop(**{
                'image': temp['image'],
                'bboxes': temp['bboxes'],
            })
            images_list.append(sample['image'])
            boxes_list.append(sample['bboxes'])
        # images_combined = torch.stack(images_list, dim=0)
        for i in range(len(boxes_list)):
            boxes_list[i] = tuple(
                map(lambda x: torch.tensor(x, dtype=torch.float),
                    boxes_list[i]))
        # boxes_combined = pad_annots(boxes_list)

        targets_list = []
        for box in boxes_list:
            num_objs = len(box)
            target = {}
            # boxes to coco format

            target['num_objs'] = num_objs
            targets_list.append(target)

        return images_list, targets_list
Exemplo n.º 26
0
    def __init__(
        self,
        base_dir="deeplite_torch_zoo/data/VOC/",
        split="train_aug",
        num_classes=21,
        affine_augmenter=None,
        image_augmenter=None,
        target_size=(512, 512),
        net_type="unet",
        ignore_index=255,
        debug=False,
    ):
        self.debug = debug
        #########To support subclasses######################
        self.classes = cfg.DATA["CLASSES"]
        self.all_classes = ["BACKGROUND"] + cfg.DATA["ALLCLASSES"]

        if num_classes == 2:
            self.classes = cfg.DATA["CLASSES_1"]
        elif num_classes == 3:
            self.classes = cfg.DATA["CLASSES_2"]

        self.classes = ["BACKGROUND"] + self.classes
        self.num_classes = len(self.classes)

        self.class_to_id = dict(zip(self.classes, range(self.num_classes)))
        self.id_to_class = {v: k for k, v in self.class_to_id.items()}

        self.class_to_id_all = dict(
            zip(self.all_classes, range(len(self.all_classes))))

        self.map_selected_ids_to_all = {
            k: self.class_to_id_all[v]
            for k, v in self.id_to_class.items()
        }
        self.map_all_ids_to_selected = {
            v: k
            for k, v in self.map_selected_ids_to_all.items()
        }

        #########To support subclasses######################
        self.base_dir = Path(base_dir) / Path("VOCdevkit/VOC2012")
        self.net_type = net_type
        self.ignore_index = ignore_index
        self.split = split

        valid_ids = self.base_dir / "ImageSets" / "Segmentation" / "val.txt"
        with open(valid_ids, "r") as f:
            valid_ids = f.readlines()
        if self.split == "valid":
            lbl_dir = "SegmentationClass"
            img_ids = valid_ids
        else:
            valid_set = set([valid_id.strip() for valid_id in valid_ids])
            lbl_dir = "SegmentationClassAug" if "aug" in split else "SegmentationClass"
            all_set = set([
                p.name[:-4] for p in self.base_dir.joinpath(lbl_dir).iterdir()
            ])
            img_ids = list(all_set - valid_set)

        img_ids = [_id for _id in img_ids if self._not_empty(_id, split)]

        self.img_paths = [
            (self.base_dir / "JPEGImages" / "{}.jpg".format(img_id.strip()))
            for img_id in img_ids
        ]
        self.lbl_paths = [
            (self.base_dir / lbl_dir / "{}.png".format(img_id.strip()))
            for img_id in img_ids
        ]

        # Resize
        if isinstance(target_size, str):
            target_size = eval(target_size)
        if "train" in self.split:
            if self.net_type == "deeplab":
                target_size = (target_size[0] + 1, target_size[1] + 1)
            self.resizer = albu.Compose([
                albu.RandomScale(scale_limit=(-0.5, 0.5), p=1.0),
                PadIfNeededRightBottom(
                    min_height=target_size[0],
                    min_width=target_size[1],
                    value=0,
                    ignore_index=self.ignore_index,
                    p=1.0,
                ),
                albu.RandomCrop(height=target_size[0],
                                width=target_size[1],
                                p=1.0),
            ])
        else:
            # self.resizer = None
            self.resizer = albu.Compose([
                PadIfNeededRightBottom(
                    min_height=target_size[0],
                    min_width=target_size[1],
                    value=0,
                    ignore_index=self.ignore_index,
                    p=1.0,
                ),
                albu.Crop(x_min=0,
                          x_max=target_size[1],
                          y_min=0,
                          y_max=target_size[0]),
            ])

        # Augment
        if "train" in self.split:
            self.affine_augmenter = affine_augmenter
            self.image_augmenter = image_augmenter
        else:
            self.affine_augmenter = None
            self.image_augmenter = None
Exemplo n.º 27
0
 def _configure_initial_cropping(self):
     """Configure the transform for the initial image cropping."""
     if self.parent.initial_cropping_rectangle is not None:
         self.transforms.append(
             albumentations.Crop(*self.parent.initial_cropping_rectangle))
Exemplo n.º 28
0
    def __init__(self,
                 base_dir='../data/pascal_voc_2012/VOCdevkit/VOC2012',
                 split='train_aug',
                 affine_augmenter=None,
                 image_augmenter=None,
                 target_size=(512, 512),
                 net_type='unet',
                 ignore_index=255,
                 debug=False):
        self.debug = debug
        self.base_dir = Path(base_dir)
        assert net_type in ['unet', 'deeplab']
        self.net_type = net_type
        self.ignore_index = ignore_index
        self.split = split

        valid_ids = self.base_dir / 'ImageSets' / 'Segmentation' / 'val.txt'
        with open(valid_ids, 'r') as f:
            valid_ids = f.readlines()
        if self.split == 'valid':
            lbl_dir = 'SegmentationClass'
            img_ids = valid_ids
        else:
            valid_set = set([valid_id.strip() for valid_id in valid_ids])
            lbl_dir = 'SegmentationClassAug' if 'aug' in split else 'SegmentationClass'
            all_set = set([
                p.name[:-4] for p in self.base_dir.joinpath(lbl_dir).iterdir()
            ])
            img_ids = list(all_set - valid_set)
        self.img_paths = [
            (self.base_dir / 'JPEGImages' / f'{img_id.strip()}.jpg')
            for img_id in img_ids
        ]
        self.lbl_paths = [(self.base_dir / lbl_dir / f'{img_id.strip()}.png')
                          for img_id in img_ids]

        # Resize
        if isinstance(target_size, str):
            target_size = eval(target_size)
        if 'train' in self.split:
            if self.net_type == 'deeplab':
                target_size = (target_size[0] + 1, target_size[1] + 1)
            self.resizer = albu.Compose([
                albu.RandomScale(scale_limit=(-0.5, 0.5), p=1.0),
                PadIfNeededRightBottom(min_height=target_size[0],
                                       min_width=target_size[1],
                                       value=0,
                                       ignore_index=self.ignore_index,
                                       p=1.0),
                albu.RandomCrop(height=target_size[0],
                                width=target_size[1],
                                p=1.0)
            ])
        else:
            # self.resizer = None
            self.resizer = albu.Compose([
                PadIfNeededRightBottom(min_height=target_size[0],
                                       min_width=target_size[1],
                                       value=0,
                                       ignore_index=self.ignore_index,
                                       p=1.0),
                albu.Crop(x_min=0,
                          x_max=target_size[1],
                          y_min=0,
                          y_max=target_size[0])
            ])

        # Augment
        if 'train' in self.split:
            self.affine_augmenter = affine_augmenter
            self.image_augmenter = image_augmenter
        else:
            self.affine_augmenter = None
            self.image_augmenter = None
Exemplo n.º 29
0
    def __init__(self, imageInfo, opt, split):
        print("=> THe input CHANNELs are BGR not others.")
        opt.numClasses = 21
        self.inputSize = (375, 500)
        self.input_dim = 3
        self.imageInfo = imageInfo[split]
        self.opt = opt
        self.split = split
        self.dir = imageInfo['basedir']
        self.ignore_index = 255
        self.class_names = np.array([
            'background',
            'aeroplane',
            'bicycle',
            'bird',
            'boat',
            'bottle',
            'bus',
            'car',
            'cat',
            'chair',
            'cow',
            'diningtable',
            'dog',
            'horse',
            'motorbike',
            'person',
            'potted plant',
            'sheep',
            'sofa',
            'train',
            'tv/monitor',
        ])
        self.mean_bgr = np.array([0.485, 0.456, 0.406])
        self.mean_rgb = np.array([0.406, 0.456, 0.485])
        self.var = np.array([0.229, 0.224, 0.225])
        self.target_size = (self.inputSize[0] + 1, self.inputSize[1] + 1)
        if split == 'train':
            self.resizor = albu.Compose([
                albu.RandomScale(scale_limit=(-0.5, 0.5), p=1.0),
                albu.PadIfNeeded(min_height=self.target_size[0],
                                 min_width=self.target_size[1],
                                 value=0,
                                 p=1.0),
                albu.RandomCrop(height=self.target_size[0],
                                width=self.target_size[1],
                                p=1.0)
            ])
        else:
            self.resizor = albu.Compose([
                albu.PadIfNeeded(min_height=self.target_size[0],
                                 min_width=self.target_size[1],
                                 value=0,
                                 p=1.0),
                albu.Crop(x_min=0,
                          x_max=self.target_size[1],
                          y_min=0,
                          y_max=self.target_size[0])
            ])

        if 'train' in self.split:
            self.image_augmenter = albu.Compose([
                albu.RandomBrightnessContrast(),
                albu.HorizontalFlip(p=0.5),
                albu.OpticalDistortion(p=0.5, distort_limit=2,
                                       shift_limit=0.5),
            ])
            # self.affine_augmenter = albu.Compose([albu])
        else:
            self.image_augmenter = None
Exemplo n.º 30
0
    def __init__(self,
                 base_dir='../data/deepglobe_as_pascalvoc/VOCdevkit/VOC2012',
                 split='train',
                 affine_augmenter=None,
                 image_augmenter=None,
                 target_size=(512, 512),
                 net_type='unet',
                 ignore_index=255,
                 debug=False):
        self.debug = debug
        self.base_dir = Path(base_dir)
        assert net_type in ['unet', 'deeplab']
        self.net_type = net_type
        self.ignore_index = ignore_index
        self.split = split

        ######################################
        #          This will change :        #
        ######################################

        # Generate randomized valid split
        valid_ids = []
        valid_ids_dir = self.base_dir / 'ClassifiedTiles' / 'AgricultureLand' / 'val.txt'
        with open(valid_ids_dir, 'r') as f:
            temp_ids = f.readlines()
            random.shuffle(temp_ids)
            valid_ids = valid_ids + temp_ids[:69]
        valid_ids_dir = self.base_dir / 'ClassifiedTiles' / 'BarrenLand' / 'val.txt'
        with open(valid_ids_dir, 'r') as f:
            temp_ids = f.readlines()
            random.shuffle(temp_ids)
            valid_ids = valid_ids + temp_ids[:69]
        valid_ids_dir = self.base_dir / 'ClassifiedTiles' / 'Forest' / 'val.txt'
        with open(valid_ids_dir, 'r') as f:
            temp_ids = f.readlines()
            random.shuffle(temp_ids)
            valid_ids = valid_ids + temp_ids[:69]
        valid_ids_dir = self.base_dir / 'ClassifiedTiles' / 'RangeLand' / 'val.txt'
        with open(valid_ids_dir, 'r') as f:
            temp_ids = f.readlines()
            random.shuffle(temp_ids)
            valid_ids = valid_ids + temp_ids[:69]
        valid_ids_dir = self.base_dir / 'ClassifiedTiles' / 'UrbanLand' / 'val.txt'
        with open(valid_ids_dir, 'r') as f:
            temp_ids = f.readlines()
            random.shuffle(temp_ids)
            valid_ids = valid_ids + temp_ids[:69]
        valid_ids_dir = self.base_dir / 'ClassifiedTiles' / 'Water' / 'val.txt'
        with open(valid_ids_dir, 'r') as f:
            temp_ids = f.readlines()
            random.shuffle(temp_ids)
            valid_ids = valid_ids + temp_ids[:69]

        # Generate randomized train split
        train_ids = []
        train_ids_dir = self.base_dir / 'ClassifiedTiles' / 'AgricultureLand' / 'train.txt'
        with open(train_ids_dir, 'r') as f:
            temp_ids = f.readlines()
            random.shuffle(temp_ids)
            train_ids = train_ids + temp_ids[:278]
        train_ids_dir = self.base_dir / 'ClassifiedTiles' / 'BarrenLand' / 'train.txt'
        with open(train_ids_dir, 'r') as f:
            temp_ids = f.readlines()
            random.shuffle(temp_ids)
            train_ids = train_ids + temp_ids[:278]
        train_ids_dir = self.base_dir / 'ClassifiedTiles' / 'Forest' / 'train.txt'
        with open(train_ids_dir, 'r') as f:
            temp_ids = f.readlines()
            random.shuffle(temp_ids)
            train_ids = train_ids + temp_ids[:278]
        train_ids_dir = self.base_dir / 'ClassifiedTiles' / 'RangeLand' / 'train.txt'
        with open(train_ids_dir, 'r') as f:
            temp_ids = f.readlines()
            random.shuffle(temp_ids)
            train_ids = train_ids + temp_ids[:278]
        train_ids_dir = self.base_dir / 'ClassifiedTiles' / 'UrbanLand' / 'train.txt'
        with open(train_ids_dir, 'r') as f:
            temp_ids = f.readlines()
            random.shuffle(temp_ids)
            train_ids = train_ids + temp_ids[:278]
        train_ids_dir = self.base_dir / 'ClassifiedTiles' / 'Water' / 'train.txt'
        with open(train_ids_dir, 'r') as f:
            temp_ids = f.readlines()
            random.shuffle(temp_ids)
            train_ids = train_ids + temp_ids[:278]

        lbl_dir = 'SegmentationClass'
        if self.split == 'valid':
            img_ids = valid_ids
        elif self.split == 'train':
            img_ids = train_ids
        else:
            valid_set = set([valid_id.strip() for valid_id in valid_ids])
            lbl_dir = 'SegmentationClassAug' if 'aug' in split else 'SegmentationClass'
            all_set = set([
                p.name[:-4] for p in self.base_dir.joinpath(lbl_dir).iterdir()
            ])
            img_ids = list(all_set - valid_set)
        self.img_paths = [
            (self.base_dir / 'JPEGImages' / f'{img_id.strip()}.jpg')
            for img_id in img_ids
        ]
        self.lbl_paths = [(self.base_dir / lbl_dir / f'{img_id.strip()}.png')
                          for img_id in img_ids]

        # Resize
        if isinstance(target_size, str):
            target_size = eval(target_size)
        if 'train' in self.split:
            if self.net_type == 'deeplab':
                target_size = (target_size[0] + 1, target_size[1] + 1)
            self.resizer = albu.Compose([
                albu.RandomScale(scale_limit=(-0.5, 0.5), p=1.0),
                PadIfNeededRightBottom(min_height=target_size[0],
                                       min_width=target_size[1],
                                       value=0,
                                       ignore_index=self.ignore_index,
                                       p=1.0),
                albu.RandomCrop(height=target_size[0],
                                width=target_size[1],
                                p=1.0)
            ])
        else:
            # self.resizer = None
            self.resizer = albu.Compose([
                PadIfNeededRightBottom(min_height=target_size[0],
                                       min_width=target_size[1],
                                       value=0,
                                       ignore_index=self.ignore_index,
                                       p=1.0),
                albu.Crop(x_min=0,
                          x_max=target_size[1],
                          y_min=0,
                          y_max=target_size[0])
            ])

        # Augment
        if 'train' in self.split:
            self.affine_augmenter = affine_augmenter
            self.image_augmenter = image_augmenter
        else:
            self.affine_augmenter = None
            self.image_augmenter = None