示例#1
0
def scale(img, size):
    """Rescales the input image to the given "size".

    If height of the image is larger than its width,
    image will be resized to (size * height / width, size).
    Similar resizing will be done otherwise.

    Args:
        img (numpy.ndarray): an image array
        size (int): the length of the smaller edge.

    Returns:
        ~numpy.ndarray: A scaled image

    """
    _, H, W = img.shape
    if (W <= H and W == size) or (H <= W and H == size):
        return img

    if W < H:
        oH = int(size * H / W)
        return resize(img, (oH, size))
    else:
        oW = int(size * W / H)
        return resize(img, (size, oW))
示例#2
0
def stack_imgs(fns, crop, resize=False, grey=False):
    imgs_in = []
    for fn in fns:
        fn1, ext = os.path.splitext(fn)
        # image can be given as csv or jpg/png... etc
        if ext == ".csv":
            img_in = np.loadtxt(fn, delimiter=",")[np.newaxis, ]
        elif ext == ".npy":
            img_in = (np.load(fn)[np.newaxis, ]).astype(np.float32)
            img_in = (np.sqrt(np.clip(img_in, 0,
                                      100))) / 10.0  ## nasty preprocess
#            img_in = (img_in - np.mean(img_in))/2*np.std(img_in) # standardize
        else:
            img_in = read_image(fn, color=not grey) / 127.5 - 1.0
        # resize if the image is too small
        if resize:
            if img_in.shape[1] < crop[0] or img_in.shape[2] < crop[1]:
                if crop[0] / img_in.shape[1] < crop[1] / img_in.shape[2]:
                    img_in = resize(img_in, (int(
                        crop[1] / img_in.shape[2] * img_in.shape[1]), crop[1]))
                else:
                    img_in = resize(
                        img_in,
                        (crop[0],
                         int(crop[0] / img_in.shape[1] * img_in.shape[2])))
        imgs_in.append(img_in)
    # an input/output image can consist of multiple images; they are stacked as channels


#    print(imgs_in.shape)
    return (np.concatenate(imgs_in, axis=0))
示例#3
0
    def _get_proba(self, img, scale, flip):
        if flip:
            img = img[:, :, ::-1]

        _, H, W = img.shape
        if scale == 1.0:
            h, w = H, W
        else:
            h, w = int(H * scale), int(W * scale)
            img = resize(img, (h, w))

        img = self.prepare(img)

        x = chainer.Variable(self.xp.asarray(img[np.newaxis]))
        x = self.__call__(x)
        x = F.softmax(x, axis=1)
        score = F.resize_images(x, img.shape[1:])[0, :, :h, :w].array
        score = chainer.backends.cuda.to_cpu(score)

        if scale != 1.0:
            score = resize(score, (H, W))

        if flip:
            score = score[:, :, ::-1]

        return score
示例#4
0
    def __call__(self, in_data):
        assert len(in_data) == 6
        img, bbox, label, mask, lbl_vis, lbl_occ = in_data

        # H, W, C -> C, H, W
        img = img.transpose(2, 0, 1)
        lbl_occ = lbl_occ.transpose(2, 0, 1)

        if not self.train:
            return img, bbox, label, mask, lbl_vis, lbl_occ

        imgs, sizes, scales = self.mask_rcnn.prepare([img])
        img = imgs[0]
        H, W = sizes[0]
        scale = scales[0]
        # _, o_H, o_W = img.shape

        o_H, o_W = int(round(scale * H)), int(round(scale * W))

        if len(bbox) > 0:
            bbox = transforms.resize_bbox(bbox, (H, W), (o_H, o_W))
        if len(mask) > 0:
            mask = transforms.resize(mask, size=(o_H, o_W), interpolation=0)
            mask = mask.transpose(1, 2, 0)
            mask = pad_multiple_of(mask, mode='constant', constant_values=-1)
            mask = mask.transpose(2, 0, 1)
            assert mask.shape[1:] == img.shape[1:]

        lbl_vis = transforms.resize(lbl_vis[None],
                                    size=(o_H, o_W),
                                    interpolation=0)[0]
        lbl_occ = transforms.resize(lbl_occ, size=(o_H, o_W), interpolation=0)
        lbl_vis = pad_multiple_of(lbl_vis, mode='constant', constant_values=-1)
        lbl_occ = lbl_occ.transpose(1, 2, 0)
        lbl_occ = pad_multiple_of(lbl_occ, mode='constant', constant_values=-1)
        lbl_occ = lbl_occ.transpose(2, 0, 1)
        assert lbl_vis.shape == img.shape[1:]
        assert lbl_occ.shape[1:] == img.shape[1:]

        # # horizontally flip
        # img, params = transforms.random_flip(
        #     img, x_random=True, return_param=True)
        # bbox = transforms.flip_bbox(
        #     bbox, (o_H, o_W), x_flip=params['x_flip'])
        # if mask.ndim == 2:
        #     mask = transforms.flip(
        #         mask[None, :, :], x_flip=params['x_flip'])[0]
        # else:
        #     mask = transforms.flip(mask, x_flip=params['x_flip'])
        # lbl_vis = transforms.flip(lbl_vis[None], x_flip=params['x_flip'])[0]
        # lbl_occ = transforms.flip(lbl_occ, x_flip=params['x_flip'])

        keep = (mask == 1).sum(axis=(1, 2)) > 0
        bbox = bbox[keep]
        label = label[keep]
        mask = mask[keep]

        return img, bbox, label, mask, scale, lbl_vis, lbl_occ
示例#5
0
    def __call__(self, in_data):
        img, label = in_data
        _, height, width = img.shape

        scale = np.random.uniform(self.scale_range[0], self.scale_range[1])

        # Scale
        scaled_height = int(scale * height)
        scaled_width = int(scale * width)
        img = transforms.resize(img, (scaled_height, scaled_width),
                                PIL.Image.BICUBIC)
        label = transforms.resize(label[None], (scaled_height, scaled_width),
                                  PIL.Image.NEAREST)[0]

        # Crop
        if (scaled_height < self.crop_size[0]) or (scaled_width <
                                                   self.crop_size[1]):
            shorter_side = min(img.shape[1:])
            img, param = transforms.random_crop(img,
                                                (shorter_side, shorter_side),
                                                True)
        else:
            img, param = transforms.random_crop(img, self.crop_size, True)
        label = label[param['y_slice'], param['x_slice']]

        # Rotate
        angle = np.random.uniform(-10, 10)
        img = transforms.rotate(img, angle, expand=False)
        label = transforms.rotate(label[None],
                                  angle,
                                  expand=False,
                                  interpolation=PIL.Image.NEAREST,
                                  fill=-1)[0]

        # Resize
        if ((img.shape[1] < self.crop_size[0])
                or (img.shape[2] < self.crop_size[1])):
            img = transforms.resize(img, self.crop_size, PIL.Image.BICUBIC)
        if ((label.shape[0] < self.crop_size[0])
                or (label.shape[1] < self.crop_size[1])):
            label = transforms.resize(label[None].astype(np.float32),
                                      self.crop_size, PIL.Image.NEAREST)
            label = label.astype(np.int32)[0]

        # heightorizontal flip
        if self.horizontal_flip and np.random.rand() > 0.5:
            img = transforms.flip(img, x_flip=True)
            label = transforms.flip(label[None], x_flip=True)[0]

        # Mean subtraction
        img = img - self.mean
        return img, label
    def stack_imgs(self, fns, resize=False, onehot=False, clip=(None, None)):
        imgs_in = []
        for fn in fns:
            fn1, ext = os.path.splitext(fn)
            # image can be given as csv or jpg/png... etc
            if ext == ".csv":
                img_in = np.loadtxt(fn, delimiter=",")
            elif ext == ".txt":
                img_in = np.loadtxt(fn)
            elif ext == ".npy":
                img_in = np.load(fn)

    #            img_in = (np.sqrt(np.clip(img_in,0,100)))/10.0  ## nasty preprocess
    #            img_in = (img_in - np.mean(img_in))/2*np.std(img_in) # standardize
            elif ext == ".dcm":
                ref_dicom_in = dicom.read_file(fn, force=True)
                ref_dicom_in.file_meta.TransferSyntaxUID = dicom.uid.ImplicitVRLittleEndian
                img_in = ref_dicom_in.pixel_array + ref_dicom_in.RescaleIntercept
            else:  ## image file
                img_in = read_image(fn, color=not self.grey)

            # make the image shape to [C,H,W]
            if len(img_in.shape) == 2:
                img_in = img_in[np.newaxis, ]

            # resize if the image is too small
            if resize:
                if img_in.shape[1] < self.crop[0] or img_in.shape[
                        2] < self.crop[1]:
                    if self.crop[0] / img_in.shape[1] < self.crop[
                            1] / img_in.shape[2]:
                        img_in = resize(img_in,
                                        (int(self.crop[1] / img_in.shape[2] *
                                             img_in.shape[1]), self.crop[1]))
                    else:
                        img_in = resize(img_in,
                                        (self.crop[0],
                                         int(self.crop[0] / img_in.shape[1] *
                                             img_in.shape[2])))
            imgs_in.append(img_in)

        imgs_in = np.concatenate(imgs_in, axis=0)
        #    print(imgs_in.shape)
        if onehot > 0:
            return (np.eye(self.class_num)[imgs_in[0].astype(
                np.uint64)].astype(np.float32).transpose((2, 0, 1)))
        else:
            ## clip and normalise to [-1,1]
            if clip[0] is not None:
                imgs_in = np.clip(imgs_in, clip[0], clip[1])
                imgs_in = 2 * (imgs_in - clip[0]) / (clip[1] - clip[0]) - 1.0
            return (imgs_in.astype(np.float32))
def transform_img(inputs,
                  mean,
                  std,
                  pca_sigma=0,
                  random_angle=0,
                  x_random_flip=False,
                  y_random_flip=False,
                  expand_ratio=1.,
                  random_crop_size=(224, 224),
                  random_erase=False,
                  output_size=(224, 224),
                  train=False):
    x, lab = inputs
    x = x.copy()
    # Color augmentation
    if train and pca_sigma != 0:
        x = transforms.pca_lighting(x, pca_sigma)
    x -= mean[:, None, None]
    x /= std[:, None, None]
    x = x[::-1]
    if train:
        # Random rotate
        if random_angle != 0:
            angle = np.random.uniform(-random_angle, random_angle)
            x = cv_rotate(x, angle)

        # Random flip
        if x_random_flip or y_random_flip:
            x = transforms.random_flip(x,
                                       x_random=x_random_flip,
                                       y_random=y_random_flip)

        # Random expand
        if expand_ratio > 1:
            x = transforms.random_expand(x, max_ratio=expand_ratio)

        if all(random_crop_size) > 0:
            x = transforms.random_crop(x, random_crop_size)
        else:
            if random_erase:
                x = random_erasing(x)

    if all(random_crop_size) > 0:
        x = transforms.resize(x, random_crop_size)
    else:
        x = transforms.resize(x, output_size)

    return x, lab
 def get_example(self, i):
     if self.imgtype == "npy":
         img = np.load(self.get_img_path(i))
         img = 2 * (np.clip(img, self.base, self.base + self.range) -
                    self.base) / self.range - 1.0
         if len(img.shape) == 2:
             img = img[np.newaxis, ]
     else:
         ref_dicom = dicom.read_file(self.get_img_path(i), force=True)
         #        print(ref_dicom)
         #        ref_dicom.file_meta.TransferSyntaxUID = dicom.uid.ImplicitVRLittleEndian
         img = ref_dicom.pixel_array + ref_dicom.RescaleIntercept
         img = self.img2var(img)
         img = img[np.newaxis, :, :]
     if self.scale_to > 0:
         img = resize(img, (self.scale_to, self.scale_to))
     H, W = self.crop
     #        print(img.shape)
     if img.shape[1] < H + 2 * self.random or img.shape[
             2] < W + 2 * self.random:
         p = max(H + 2 * self.random - img.shape[1],
                 W + 2 * self.random - img.shape[2])
         img = np.pad(img, ((0, 0), (p, p), (p, p)), 'edge')
     if H + self.random < img.shape[1] and W + self.random < img.shape[2]:
         img = center_crop(img, (H + self.random, W + self.random))
         img = random_crop(img, self.crop)
     return img
示例#9
0
    def predict(self, imgs):
        """Conduct semantic segmentations from images.

        Args:
            imgs (iterable of numpy.ndarray): Arrays holding images.
                All images are in CHW and RGB format
                and the range of their values are :math:`[0, 255]`.

        Returns:
            list of numpy.ndarray:

            List of integer labels predicted from each image in the input \
            list.

        """
        labels = []
        for img in imgs:
            C, H, W = img.shape
            with chainer.using_config('train', False), \
                    chainer.function.no_backprop_mode():
                x = chainer.Variable(self.xp.asarray(img[np.newaxis]))
                score = self.__call__(x)[0].data
            score = chainer.cuda.to_cpu(score)
            if score.shape != (C, H, W):
                dtype = score.dtype
                score = resize(score, (H, W)).astype(dtype)

            label = np.argmax(score, axis=0).astype(np.int32)
            labels.append(label)
        return labels
示例#10
0
def _transform2(data, mean, train=True, mean_flag=False):

    img, label = data
    img = img.copy()

    size316 = (316, 316)
    size = (224, 224)

    img_o = transforms.scale(img, 316)
    img_o = transforms.center_crop(img_o, size316)

    # 学習のときだけ実行
    if train:
        img_o = transforms.random_flip(img_o, y_random=True)
        img_o = transforms.random_rotate(img_o)
        # img = random_erase(img)

    img_o = transforms.resize(img_o, size)
    # 画像から平均を引く
    if mean_flag:
        img_o -= mean
    img_o *= (1.0 / 255.0)

    r = random.randint(316, 1500)
    img_st = transforms.scale(img, r)
    img_st = transforms.center_crop(img_st, (224, 224))
    # 画像から平均を引く
    if mean_flag:
        img_st -= mean
    img_st *= (1.0 / 255.0)

    return img_o, label, img_st
示例#11
0
 def __call__(self, in_data):
     img, label = in_data
     img = random_sized_crop(img)
     img = resize(img, (224, 224))
     img = random_flip(img, x_random=True)
     img -= self.mean
     return img, label
示例#12
0
    def __call__(self, in_data):
        if len(in_data) == 4:
            img, mask, label, bbox = in_data
        else:
            img, bbox, label = in_data
        # Flipping
        img, params = transforms.random_flip(img,
                                             x_random=True,
                                             return_param=True)
        x_flip = params['x_flip']
        bbox = transforms.flip_bbox(bbox, img.shape[1:], x_flip=x_flip)

        # Scaling and mean subtraction
        img, scale = scale_img(img, self.min_size, self.max_size)
        img -= self.mean
        bbox = bbox * scale

        if len(in_data) == 4:
            mask = transforms.flip(mask, x_flip=x_flip)
            mask = transforms.resize(mask.astype(np.float32),
                                     img.shape[1:],
                                     interpolation=PIL.Image.NEAREST).astype(
                                         np.bool)
            return img, bbox, label, mask
        else:
            return img, bbox, label
def transform(data, mean, train=True):

    img, lable = data
    img = img.copy()
    img -= mean

    size = (224, 224)

    if train:
        h, w = img.shape[1:]
        angles = [i for i in range(0, 360, 10)]
        angle = np.random.choice(angles)
        img = rotate(img, angle)

        rad = angle * np.pi / 180
        new_length = int(h / (np.abs(np.cos(rad)) + np.abs(np.sin(rad))))
        img = transforms.center_crop(img, (new_length, new_length))

        # img = transforms.random_rotate(img, return_param=False)
        img = transforms.random_flip(img, x_random=True)

    img = transforms.resize(img, size, interpolation=2)
    img *= (1.0 / 255.0)

    return img, lable
示例#14
0
    def _image_process(self, img):
        img = img / 255
        img = resize(img, (224, 224))

        if np.random.rand() >= 0.5:
            img = img[:, :, ::-1]
        return img
    def __call__(self, in_data):
        # There are five data augmentation steps
        # 3. Random cropping
        # 4. Resizing with random interpolation
        # 5. Random horizontal flipping

        img, bbox, label = in_data

        # 3. Random cropping
        if self.random_crop and np.random.rand() > 0.5:
            next_img, param = random_crop_with_bbox_constraints(
                img,
                bbox,
                min_scale=min(self.crop_rate),
                max_scale=max(self.crop_rate),
                return_param=True)
            next_bbox, param = transforms.crop_bbox(bbox,
                                                    y_slice=param['y_slice'],
                                                    x_slice=param['x_slice'],
                                                    allow_outside_center=False,
                                                    return_param=True)
            if (len(label[param['index']]) != 0):
                label = label[param['index']]
                img, bbox = next_img, next_bbox

        # 4. Resizing with random interpolatation
        _, H, W = img.shape
        img = transforms.resize(img, (self.size, self.size))
        bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size))

        # 5. Random horizontal flipping
        if self.flip:
            img, params = transforms.random_flip(img,
                                                 x_random=True,
                                                 return_param=True)
            bbox = transforms.flip_bbox(bbox, (self.size, self.size),
                                        x_flip=params['x_flip'])

        img -= self.mean
        img /= self.std

        _, height, width = img.shape
        ymin = bbox[:, 0]
        xmin = bbox[:, 1]
        ymax = bbox[:, 2]
        xmax = bbox[:, 3]
        one_hot_label = np.eye(self.n_class)[label]
        xs = (xmin + (xmax - xmin) // 2) / width
        ws = (xmax - xmin) / width
        ys = (ymin + (ymax - ymin) // 2) / height
        hs = (ymax - ymin) / height
        t = [{
            'label': l,
            'x': x,
            'w': w,
            'y': y,
            'h': h,
            'one_hot_label': hot
        } for l, x, w, y, h, hot in zip(label, xs, ws, ys, hs, one_hot_label)]
        return img, t
示例#16
0
    def _prepare(self, img):
        """Prepare an image for feeding it to a model.

        This is a standard preprocessing scheme used by feature extraction
        models.
        First, the image is scaled or resized according to :math:`scale_size`.
        Note that this step is optional.
        Next, the image is cropped to :math:`crop_size`.
        Last, the image is mean subtracted by an array :obj:`mean`.

        Args:
            img (~numpy.ndarray): An image. This is in CHW format.
                The range of its value is :math:`[0, 255]`.

        Returns:
            ~numpy.ndarray:
            A preprocessed image. This is 4D array whose batch size is
            the number of crops.

        """
        if self.scale_size is not None:
            if isinstance(self.scale_size, int):
                img = scale(img, size=self.scale_size)
            else:
                img = resize(img, size=self.scale_size)

        if self.crop == '10':
            imgs = ten_crop(img, self.crop_size)
        elif self.crop == 'center':
            imgs = center_crop(img, self.crop_size)[np.newaxis]

        imgs -= self.mean[np.newaxis]

        return imgs
示例#17
0
    def predict(self, imgs):
        """Conduct semantic segmentations from images.

        Args:
            imgs (iterable of numpy.ndarray): Arrays holding images.
                All images are in CHW and RGB format
                and the range of their values are :math:`[0, 255]`.

        Returns:
            list of numpy.ndarray:

            List of integer labels predicted from each image in the input \
            list.

        """
        labels = []
        for img in imgs:
            C, H, W = img.shape
            with chainer.using_config('train', False), \
                    chainer.function.no_backprop_mode():
                x = chainer.Variable(self.xp.asarray(img[np.newaxis]))
                score = self.__call__(x)[0].data
            score = chainer.cuda.to_cpu(score)
            if score.shape != (C, H, W):
                dtype = score.dtype
                score = resize(score, (H, W)).astype(dtype)

            label = np.argmax(score, axis=0).astype(np.int32)
            labels.append(label)
        return labels
示例#18
0
 def __call__(self, in_data):
     img, label = in_data
     img = transforms.random_sized_crop(img)
     img = transforms.resize(img, (224, 224))
     img = transforms.random_flip(img, x_random=True)
     img -= self.mean
     return img.astype(chainer.get_dtype()), label
示例#19
0
    def _prepare(self, img):
        """Prepare an image for feeding it to a model.

        This is a standard preprocessing scheme used by feature extraction
        models.
        First, the image is scaled or resized according to :math:`scale_size`.
        Note that this step is optional.
        Next, the image is cropped to :math:`crop_size`.
        Last, the image is mean subtracted by an array :obj:`mean`.

        Args:
            img (~numpy.ndarray): An image. This is in CHW format.
                The range of its value is :math:`[0, 255]`.

        Returns:
            ~numpy.ndarray:
            A preprocessed image. This is 4D array whose batch size is
            the number of crops.

        """
        if self.scale_size is not None:
            if isinstance(self.scale_size, int):
                img = scale(img, size=self.scale_size)
            else:
                img = resize(img, size=self.scale_size)

        if self.crop == '10':
            imgs = ten_crop(img, self.crop_size)
        elif self.crop == 'center':
            imgs = center_crop(img, self.crop_size)[np.newaxis]

        imgs -= self.mean[np.newaxis]

        return imgs
def _transform2(data, mean, train=True, mean_flag=False):

    img, label = data
    img = img.copy()

    size316 = (316, 316)
    size = (224, 224)

    img = transforms.scale(img, 316)
    img = transforms.center_crop(img, size316)

    # 学習のときだけ実行
    if train:
        img = transforms.random_flip(img, y_random=True)
        img = transforms.random_rotate(img)
        # img = random_erase(img)

    img = transforms.resize(img, size)
    img = img.transpose(1, 2, 0)
    # 画像から平均を引く
    if mean_flag:
        img -= mean

    img *= (1.0 / 255.0)

    img = img.transpose(2, 0, 1)

    return img, label
示例#21
0
 def test_zero_length_img(self):
     if self.backend == 'cv2' and not _cv2_available:
         return
     img = np.random.uniform(size=(0, 24, 32))
     with chainer.using_config('cv_resize_backend', self.backend):
         out = resize(img, size=(32, 64), interpolation=self.interpolation)
     self.assertEqual(out.shape, (0, 32, 64))
示例#22
0
def random_resize(img):
    rv = random.random()
    if rv < 0.5:
        ratio = round(rv * 2, 1)
        _, H, W = img.shape
        img = transforms.resize(img, (int(ratio * H), int(ratio * W)))
    return img
示例#23
0
def gen_morphed_images(z, base_class, palette, masks, interpolation=8):
    z = xp.broadcast_to(z, (interpolation, 128))

    sizes = [4, 8, 8, 16, 16, 32, 32, 64, 64, 128, 128, 256]
    ws = []
    for i_size, size in enumerate(sizes):
        w = xp.zeros((interpolation, size, size, gen.n_classes),
                     dtype=xp.float32)
        w[:, :, :, base_class] = 1.0  # default class
        for i_mask in range(len(palette)):
            resized_mask = xp.array(resize(masks[i_mask],
                                           (size, size)).reshape((size, size)),
                                    dtype=xp.float32)
            # resized_mask = xp.array(img_masks[i_mask].resize((size, size))).astype(xp.float32) / 255
            for i in range(interpolation):
                weight = i / (interpolation - 1.0)
                # if i_size <= 0:
                #     weight = 0
                w[i, :, :, base_class] -= resized_mask * weight
                w[i, :, :, palette[i_mask]] = resized_mask * weight
        ws.append(chainer.Variable(w))

    with chainer.using_config('train', False), chainer.using_config(
            'enable_backprop', False):
        x = gen.spatial_interpolation(z, ws)

    x = x.data
    if args.gpu >= 0:
        x = x.get()

    x = np.asarray(np.clip(x * 127.5 + 127.5, 0.0, 255.0),
                   dtype=np.uint8).transpose((0, 2, 3, 1))
    return x
示例#24
0
def segm_to_mask(segm, bbox, size):
    """Recover mask from cropped and resized mask.

    This function requires cv2.

    Args:
        segm (~numpy.ndarray): See below.
        bbox (~numpy.ndarray): See below.
        size (tuple): This is a tuple of length 2. Its elements are
            ordered as (height, width).

    Returns:
        ~numpy.ndarray: See below.

    .. csv-table::
        :header: name, shape, dtype, format

        :obj:`segm`, ":math:`(R, S, S)`", :obj:`float32`, --
        :obj:`bbox`, ":math:`(R, 4)`", :obj:`float32`, \
        ":math:`(y_{min}, x_{min}, y_{max}, x_{max})`"
        :obj:`mask` (output), ":math:`(R, H, W)`", :obj:`bool`, --

    """
    pad = 1
    H, W = size
    _, segm_size, _ = segm.shape

    mask = np.zeros((len(bbox), H, W), dtype=np.bool)

    # As commented in mask_to_segm, cv2.resize needs adjust.
    padded_segm_size = segm_size + pad * 2
    expand_scale = padded_segm_size / segm_size
    bbox = _expand_bbox(bbox, expand_scale)
    canvas_mask = np.zeros((padded_segm_size, padded_segm_size),
                           dtype=np.float32)
    bbox = _integerize_bbox(bbox)

    for i, (bb, sgm) in enumerate(zip(bbox, segm)):
        bb_height = bb[2] - bb[0]
        bb_width = bb[3] - bb[1]
        if bb_height == 0 or bb_width == 0:
            continue

        canvas_mask[pad:-pad, pad:-pad] = sgm

        with chainer.using_config('cv_resize_backend', 'cv2'):
            crop_mask = transforms.resize(canvas_mask[None],
                                          (bb_height, bb_width))[0]
        crop_mask = crop_mask > 0.5

        y_min = max(bb[0], 0)
        x_min = max(bb[1], 0)
        y_max = max(min(bb[2], H), 0)
        x_max = max(min(bb[3], W), 0)
        y_offset = y_min - bb[0]
        x_offset = x_min - bb[1]
        mask[i, y_min:y_max,
             x_min:x_max] = crop_mask[y_offset:y_offset + y_max - y_min,
                                      x_offset:x_offset + x_max - x_min]
    return mask
示例#25
0
def scale_mask(mask, bbox, size):
    """Scale instance segmentation mask while keeping the aspect ratio.

    This function exploits the sparsity of :obj:`mask` to speed up
    resize operation.

    The input image will be resized so that
    the shorter edge will be scaled to length :obj:`size` after
    resizing.

    Args:
        mask (array): An array whose shape is :math:`(R, H, W)`.
            :math:`R` is the number of masks.
            The dtype should be :obj:`numpy.bool`.
        bbox (array): The bounding boxes around the masked region
            of :obj:`mask`. This is expected to be the value
            obtained by :obj:`bbox = chainercv.utils.mask_to_bbox(mask)`.
        size (int): The length of the smaller edge.

    Returns:
        array:
        An array whose shape is :math:`(R, H, W)`.
        :math:`R` is the number of masks.
        The dtype should be :obj:`numpy.bool`.

    """
    xp = chainer.backends.cuda.get_array_module(mask)
    mask = chainer.cuda.to_cpu(mask)
    bbox = chainer.cuda.to_cpu(bbox)

    R, H, W = mask.shape
    if H < W:
        out_size = (size, int(size * W / H))
        scale = size / H
    else:
        out_size = (int(size * H / W), size)
        scale = size / W

    bbox[:, :2] = np.floor(bbox[:, :2])
    bbox[:, 2:] = np.ceil(bbox[:, 2:])
    bbox = bbox.astype(np.int32)
    scaled_bbox = bbox * scale
    scaled_bbox[:, :2] = np.floor(scaled_bbox[:, :2])
    scaled_bbox[:, 2:] = np.ceil(scaled_bbox[:, 2:])
    scaled_bbox = scaled_bbox.astype(np.int32)

    out_mask = xp.zeros((R, ) + out_size, dtype=np.bool)
    for i, (m, bb, scaled_bb) in enumerate(zip(mask, bbox, scaled_bbox)):
        cropped_m = m[bb[0]:bb[2], bb[1]:bb[3]]
        h = scaled_bb[2] - scaled_bb[0]
        w = scaled_bb[3] - scaled_bb[1]
        cropped_m = transforms.resize(cropped_m[None].astype(np.float32),
                                      (h, w),
                                      interpolation=PIL.Image.NEAREST)[0]
        if xp != np:
            cropped_m = xp.array(cropped_m)
        out_mask[i, scaled_bb[0]:scaled_bb[2],
                 scaled_bb[1]:scaled_bb[3]] = cropped_m
    return out_mask
示例#26
0
def transform(inputs, mean, std, output_size=(224, 224)):
    x, lab = inputs
    x = x.copy()
    x -= mean[:, None, None]
    x /= std[:, None, None]
    x = x[::-1]
    x = transforms.resize(x, output_size)
    return x, lab
def _transform(inputs, mean=None, img_size=(512, 1024), scale_label=1):
    img, label = inputs
    # Scaling
    if img_size:
        img_size = (img_size[0], img_size[1])
        img = transforms.resize(img, img_size, Image.BICUBIC)
        label = transforms.resize(label[None, ...], img_size, Image.NEAREST)[0]

    # Mean subtraction
    if mean is not None:
        img -= mean[:, None, None]

    if scale_label != 1:
        scale_label = (int(label.shape[1]/scale_label),\
                          int(label.shape[0]/scale_label))
        label = cv.resize(label, scale_label, interpolation=cv.INTER_NEAREST)
    return img, label
示例#28
0
    def __call__(self, in_data):
        if len(in_data) == 6:
            img, bbox, label, mask, crowd, area = in_data
        elif len(in_data) == 4:
            img, bbox, label, mask = in_data
        else:
            raise ValueError

        img = img.transpose(2, 0, 1)  # H, W, C -> C, H, W

        if not self.train:
            if len(in_data) == 6:
                return img, bbox, label, mask, crowd, area
            elif len(in_data) == 4:
                return img, bbox, label, mask
            else:
                raise ValueError

        imgs, sizes, scales = self.mask_rcnn.prepare([img])
        # print(type(imgs))
        # print(type(sizes))
        # print(type(scales))

        img = imgs[0]
        H, W = sizes[0]
        scale = scales[0]
        _, o_H, o_W = img.shape

        if len(bbox) > 0:
            bbox = transforms.resize_bbox(bbox, (H, W), (o_H, o_W))
        if len(mask) > 0:
            mask = transforms.resize(
                mask, size=(o_H, o_W), interpolation=0)

        # # horizontally flip
        # img, params = transforms.random_flip(
        #     img, x_random=True, return_param=True)
        # bbox = transforms.flip_bbox(
        #     bbox, (o_H, o_W), x_flip=params['x_flip'])
        # if mask.ndim == 2:
        #     mask = transforms.flip(
        #         mask[None, :, :], x_flip=params['x_flip'])[0]
        # else:
        #     mask = transforms.flip(mask, x_flip=params['x_flip'])

        # horizontally and vartically flip
        img, params = transforms.random_flip(
            img, y_random=True, x_random=True, return_param=True)
        bbox = transforms.flip_bbox(
            bbox, (o_H, o_W), y_flip=params['y_flip'], x_flip=params['x_flip'])
        if mask.ndim == 2:
            mask = transforms.flip(
                mask[None, :, :], y_flip=params['y_flip'], x_flip=params['x_flip'])[0]
        else:
            mask = transforms.flip(mask, y_flip=params['y_flip'], x_flip=params['x_flip'])

        return img, bbox, label, mask, scale, sizes
示例#29
0
def scale_img(img, min_size, max_size):
    """Process image."""
    _, H, W = img.shape
    scale = min_size / min(H, W)
    if scale * max(H, W) > max_size:
        scale = max_size / max(H, W)
    H, W = int(H * scale), int(W * scale)
    img = transforms.resize(img, (H, W))
    return img, scale
示例#30
0
 def _scale_img(self, img):
     """Process image."""
     _, H, W = img.shape
     scale = self._min_size / min(H, W)
     if scale * max(H, W) > self._max_size:
         scale = self._max_size / max(H, W)
     H, W = int(H * scale), int(W * scale)
     img = transforms.resize(img, (H, W))
     return img, scale
 def test_transform(sample):
     img = sample
     if len(img.shape) == 2: # Grayscale
         img = np.stack([img, img, img], 2)
     img = np.transpose(img, (2, 0, 1))
     img = transforms.resize(img, resize_size)
     img = transforms.center_crop(img, patchsize)
     img = img - mean
     img = img.astype(dtype)
     return img
示例#32
0
    def _prepare(self, img):
        img = img.astype(np.float32)
        img = transforms.resize(img, (self.insize, self.insize))
        img -= self.mean

        # NOTE: chainer.get_dtype will return float16 if the
        # global_config.dtype is chainer.mixed16
        img = img.astype(chainer.get_dtype())

        return img
示例#33
0
    def __call__(self, imgs):
        resized_imgs = []
        for img in imgs:
            _, H, W = img.shape
            scale = self._min_size / min(H, W)
            if scale * max(H, W) > self._max_size:
                scale = self._max_size / max(H, W)
            H, W = int(H * scale), int(W * scale)
            img = transforms.resize(img, (H, W))
            img -= self._mean
            resized_imgs.append(img)

        size = np.array([img.shape[1:] for img in resized_imgs]).max(axis=0)
        size = (np.ceil(size / self._stride) * self._stride).astype(int)
        x = np.zeros((len(imgs), 3, size[0], size[1]), dtype=np.float32)
        for i, img in enumerate(resized_imgs):
            _, H, W = img.shape
            x[i, :, :H, :W] = img

        return x
示例#34
0
 def test_resize_grayscale(self):
     img = np.random.uniform(size=(1, 24, 32))
     out = resize(img, size=(32, 64), interpolation=self.interpolation)
     self.assertEqual(out.shape, (1, 32, 64))
示例#35
0
 def transform(in_data):
     img, label = in_data
     img = resize(img, (32, 32))
     return img, label
示例#36
0
def resize_contain(img, size, fill=0, interpolation=PIL.Image.BILINEAR,
                   return_param=False):
    """Resize the image to fit in the given area while keeping aspect ratio.

    If both the height and the width in :obj:`size` are larger than
    the height and the width of the :obj:`img`, the :obj:`img` is placed on
    the center with an appropriate padding to match :obj:`size`.

    Otherwise, the input image is scaled to fit in a canvas whose size
    is :obj:`size` while preserving aspect ratio.

    Args:
        img (~numpy.ndarray): An array to be transformed. This is in
            CHW format.
        size (tuple of two ints): A tuple of two elements:
            :obj:`height, width`. The size of the image after resizing.
        fill (float, tuple or ~numpy.ndarray): The value of padded pixels.
            If it is :class:`numpy.ndarray`,
            its shape should be :math:`(C, 1, 1)`,
            where :math:`C` is the number of channels of :obj:`img`.
        return_param (bool): Returns information of resizing and offsetting.

    Returns:
        ~numpy.ndarray or (~numpy.ndarray, dict):

        If :obj:`return_param = False`,
        returns an array :obj:`out_img` that is the result of resizing.

        If :obj:`return_param = True`,
        returns a tuple whose elements are :obj:`out_img, param`.
        :obj:`param` is a dictionary of intermediate parameters whose
        contents are listed below with key, value-type and the description
        of the value.

        * **y_offset** (*int*): The y coodinate of the top left corner of\
            the image after placing on the canvas.
        * **x_offset** (*int*): The x coordinate of the top left corner\
            of the image after placing on the canvas.
        * **scaled_size** (*tuple*): The size to which the image is scaled\
            to before placing it on a canvas. This is a tuple of two elements:\
            :obj:`height, width`.

    """
    C, H, W = img.shape
    out_H, out_W = size
    scale_h = out_H / H
    scale_w = out_W / W
    scale = min(min(scale_h, scale_w), 1)
    scaled_size = (int(H * scale), int(W * scale))
    if scale < 1:
        img = resize(img, scaled_size, interpolation=interpolation)
    y_slice, x_slice = _get_pad_slice(img, size=size)
    out_img = np.empty((C, out_H, out_W), dtype=img.dtype)
    out_img[:] = np.array(fill).reshape((-1, 1, 1))
    out_img[:, y_slice, x_slice] = img

    if return_param:
        param = {'y_offset': y_slice.start, 'x_offset': x_slice.start,
                 'scaled_size': scaled_size}
        return out_img, param
    else:
        return out_img