def scale(img, size): """Rescales the input image to the given "size". If height of the image is larger than its width, image will be resized to (size * height / width, size). Similar resizing will be done otherwise. Args: img (numpy.ndarray): an image array size (int): the length of the smaller edge. Returns: ~numpy.ndarray: A scaled image """ _, H, W = img.shape if (W <= H and W == size) or (H <= W and H == size): return img if W < H: oH = int(size * H / W) return resize(img, (oH, size)) else: oW = int(size * W / H) return resize(img, (size, oW))
def stack_imgs(fns, crop, resize=False, grey=False): imgs_in = [] for fn in fns: fn1, ext = os.path.splitext(fn) # image can be given as csv or jpg/png... etc if ext == ".csv": img_in = np.loadtxt(fn, delimiter=",")[np.newaxis, ] elif ext == ".npy": img_in = (np.load(fn)[np.newaxis, ]).astype(np.float32) img_in = (np.sqrt(np.clip(img_in, 0, 100))) / 10.0 ## nasty preprocess # img_in = (img_in - np.mean(img_in))/2*np.std(img_in) # standardize else: img_in = read_image(fn, color=not grey) / 127.5 - 1.0 # resize if the image is too small if resize: if img_in.shape[1] < crop[0] or img_in.shape[2] < crop[1]: if crop[0] / img_in.shape[1] < crop[1] / img_in.shape[2]: img_in = resize(img_in, (int( crop[1] / img_in.shape[2] * img_in.shape[1]), crop[1])) else: img_in = resize( img_in, (crop[0], int(crop[0] / img_in.shape[1] * img_in.shape[2]))) imgs_in.append(img_in) # an input/output image can consist of multiple images; they are stacked as channels # print(imgs_in.shape) return (np.concatenate(imgs_in, axis=0))
def _get_proba(self, img, scale, flip): if flip: img = img[:, :, ::-1] _, H, W = img.shape if scale == 1.0: h, w = H, W else: h, w = int(H * scale), int(W * scale) img = resize(img, (h, w)) img = self.prepare(img) x = chainer.Variable(self.xp.asarray(img[np.newaxis])) x = self.__call__(x) x = F.softmax(x, axis=1) score = F.resize_images(x, img.shape[1:])[0, :, :h, :w].array score = chainer.backends.cuda.to_cpu(score) if scale != 1.0: score = resize(score, (H, W)) if flip: score = score[:, :, ::-1] return score
def __call__(self, in_data): assert len(in_data) == 6 img, bbox, label, mask, lbl_vis, lbl_occ = in_data # H, W, C -> C, H, W img = img.transpose(2, 0, 1) lbl_occ = lbl_occ.transpose(2, 0, 1) if not self.train: return img, bbox, label, mask, lbl_vis, lbl_occ imgs, sizes, scales = self.mask_rcnn.prepare([img]) img = imgs[0] H, W = sizes[0] scale = scales[0] # _, o_H, o_W = img.shape o_H, o_W = int(round(scale * H)), int(round(scale * W)) if len(bbox) > 0: bbox = transforms.resize_bbox(bbox, (H, W), (o_H, o_W)) if len(mask) > 0: mask = transforms.resize(mask, size=(o_H, o_W), interpolation=0) mask = mask.transpose(1, 2, 0) mask = pad_multiple_of(mask, mode='constant', constant_values=-1) mask = mask.transpose(2, 0, 1) assert mask.shape[1:] == img.shape[1:] lbl_vis = transforms.resize(lbl_vis[None], size=(o_H, o_W), interpolation=0)[0] lbl_occ = transforms.resize(lbl_occ, size=(o_H, o_W), interpolation=0) lbl_vis = pad_multiple_of(lbl_vis, mode='constant', constant_values=-1) lbl_occ = lbl_occ.transpose(1, 2, 0) lbl_occ = pad_multiple_of(lbl_occ, mode='constant', constant_values=-1) lbl_occ = lbl_occ.transpose(2, 0, 1) assert lbl_vis.shape == img.shape[1:] assert lbl_occ.shape[1:] == img.shape[1:] # # horizontally flip # img, params = transforms.random_flip( # img, x_random=True, return_param=True) # bbox = transforms.flip_bbox( # bbox, (o_H, o_W), x_flip=params['x_flip']) # if mask.ndim == 2: # mask = transforms.flip( # mask[None, :, :], x_flip=params['x_flip'])[0] # else: # mask = transforms.flip(mask, x_flip=params['x_flip']) # lbl_vis = transforms.flip(lbl_vis[None], x_flip=params['x_flip'])[0] # lbl_occ = transforms.flip(lbl_occ, x_flip=params['x_flip']) keep = (mask == 1).sum(axis=(1, 2)) > 0 bbox = bbox[keep] label = label[keep] mask = mask[keep] return img, bbox, label, mask, scale, lbl_vis, lbl_occ
def __call__(self, in_data): img, label = in_data _, height, width = img.shape scale = np.random.uniform(self.scale_range[0], self.scale_range[1]) # Scale scaled_height = int(scale * height) scaled_width = int(scale * width) img = transforms.resize(img, (scaled_height, scaled_width), PIL.Image.BICUBIC) label = transforms.resize(label[None], (scaled_height, scaled_width), PIL.Image.NEAREST)[0] # Crop if (scaled_height < self.crop_size[0]) or (scaled_width < self.crop_size[1]): shorter_side = min(img.shape[1:]) img, param = transforms.random_crop(img, (shorter_side, shorter_side), True) else: img, param = transforms.random_crop(img, self.crop_size, True) label = label[param['y_slice'], param['x_slice']] # Rotate angle = np.random.uniform(-10, 10) img = transforms.rotate(img, angle, expand=False) label = transforms.rotate(label[None], angle, expand=False, interpolation=PIL.Image.NEAREST, fill=-1)[0] # Resize if ((img.shape[1] < self.crop_size[0]) or (img.shape[2] < self.crop_size[1])): img = transforms.resize(img, self.crop_size, PIL.Image.BICUBIC) if ((label.shape[0] < self.crop_size[0]) or (label.shape[1] < self.crop_size[1])): label = transforms.resize(label[None].astype(np.float32), self.crop_size, PIL.Image.NEAREST) label = label.astype(np.int32)[0] # heightorizontal flip if self.horizontal_flip and np.random.rand() > 0.5: img = transforms.flip(img, x_flip=True) label = transforms.flip(label[None], x_flip=True)[0] # Mean subtraction img = img - self.mean return img, label
def stack_imgs(self, fns, resize=False, onehot=False, clip=(None, None)): imgs_in = [] for fn in fns: fn1, ext = os.path.splitext(fn) # image can be given as csv or jpg/png... etc if ext == ".csv": img_in = np.loadtxt(fn, delimiter=",") elif ext == ".txt": img_in = np.loadtxt(fn) elif ext == ".npy": img_in = np.load(fn) # img_in = (np.sqrt(np.clip(img_in,0,100)))/10.0 ## nasty preprocess # img_in = (img_in - np.mean(img_in))/2*np.std(img_in) # standardize elif ext == ".dcm": ref_dicom_in = dicom.read_file(fn, force=True) ref_dicom_in.file_meta.TransferSyntaxUID = dicom.uid.ImplicitVRLittleEndian img_in = ref_dicom_in.pixel_array + ref_dicom_in.RescaleIntercept else: ## image file img_in = read_image(fn, color=not self.grey) # make the image shape to [C,H,W] if len(img_in.shape) == 2: img_in = img_in[np.newaxis, ] # resize if the image is too small if resize: if img_in.shape[1] < self.crop[0] or img_in.shape[ 2] < self.crop[1]: if self.crop[0] / img_in.shape[1] < self.crop[ 1] / img_in.shape[2]: img_in = resize(img_in, (int(self.crop[1] / img_in.shape[2] * img_in.shape[1]), self.crop[1])) else: img_in = resize(img_in, (self.crop[0], int(self.crop[0] / img_in.shape[1] * img_in.shape[2]))) imgs_in.append(img_in) imgs_in = np.concatenate(imgs_in, axis=0) # print(imgs_in.shape) if onehot > 0: return (np.eye(self.class_num)[imgs_in[0].astype( np.uint64)].astype(np.float32).transpose((2, 0, 1))) else: ## clip and normalise to [-1,1] if clip[0] is not None: imgs_in = np.clip(imgs_in, clip[0], clip[1]) imgs_in = 2 * (imgs_in - clip[0]) / (clip[1] - clip[0]) - 1.0 return (imgs_in.astype(np.float32))
def transform_img(inputs, mean, std, pca_sigma=0, random_angle=0, x_random_flip=False, y_random_flip=False, expand_ratio=1., random_crop_size=(224, 224), random_erase=False, output_size=(224, 224), train=False): x, lab = inputs x = x.copy() # Color augmentation if train and pca_sigma != 0: x = transforms.pca_lighting(x, pca_sigma) x -= mean[:, None, None] x /= std[:, None, None] x = x[::-1] if train: # Random rotate if random_angle != 0: angle = np.random.uniform(-random_angle, random_angle) x = cv_rotate(x, angle) # Random flip if x_random_flip or y_random_flip: x = transforms.random_flip(x, x_random=x_random_flip, y_random=y_random_flip) # Random expand if expand_ratio > 1: x = transforms.random_expand(x, max_ratio=expand_ratio) if all(random_crop_size) > 0: x = transforms.random_crop(x, random_crop_size) else: if random_erase: x = random_erasing(x) if all(random_crop_size) > 0: x = transforms.resize(x, random_crop_size) else: x = transforms.resize(x, output_size) return x, lab
def get_example(self, i): if self.imgtype == "npy": img = np.load(self.get_img_path(i)) img = 2 * (np.clip(img, self.base, self.base + self.range) - self.base) / self.range - 1.0 if len(img.shape) == 2: img = img[np.newaxis, ] else: ref_dicom = dicom.read_file(self.get_img_path(i), force=True) # print(ref_dicom) # ref_dicom.file_meta.TransferSyntaxUID = dicom.uid.ImplicitVRLittleEndian img = ref_dicom.pixel_array + ref_dicom.RescaleIntercept img = self.img2var(img) img = img[np.newaxis, :, :] if self.scale_to > 0: img = resize(img, (self.scale_to, self.scale_to)) H, W = self.crop # print(img.shape) if img.shape[1] < H + 2 * self.random or img.shape[ 2] < W + 2 * self.random: p = max(H + 2 * self.random - img.shape[1], W + 2 * self.random - img.shape[2]) img = np.pad(img, ((0, 0), (p, p), (p, p)), 'edge') if H + self.random < img.shape[1] and W + self.random < img.shape[2]: img = center_crop(img, (H + self.random, W + self.random)) img = random_crop(img, self.crop) return img
def predict(self, imgs): """Conduct semantic segmentations from images. Args: imgs (iterable of numpy.ndarray): Arrays holding images. All images are in CHW and RGB format and the range of their values are :math:`[0, 255]`. Returns: list of numpy.ndarray: List of integer labels predicted from each image in the input \ list. """ labels = [] for img in imgs: C, H, W = img.shape with chainer.using_config('train', False), \ chainer.function.no_backprop_mode(): x = chainer.Variable(self.xp.asarray(img[np.newaxis])) score = self.__call__(x)[0].data score = chainer.cuda.to_cpu(score) if score.shape != (C, H, W): dtype = score.dtype score = resize(score, (H, W)).astype(dtype) label = np.argmax(score, axis=0).astype(np.int32) labels.append(label) return labels
def _transform2(data, mean, train=True, mean_flag=False): img, label = data img = img.copy() size316 = (316, 316) size = (224, 224) img_o = transforms.scale(img, 316) img_o = transforms.center_crop(img_o, size316) # 学習のときだけ実行 if train: img_o = transforms.random_flip(img_o, y_random=True) img_o = transforms.random_rotate(img_o) # img = random_erase(img) img_o = transforms.resize(img_o, size) # 画像から平均を引く if mean_flag: img_o -= mean img_o *= (1.0 / 255.0) r = random.randint(316, 1500) img_st = transforms.scale(img, r) img_st = transforms.center_crop(img_st, (224, 224)) # 画像から平均を引く if mean_flag: img_st -= mean img_st *= (1.0 / 255.0) return img_o, label, img_st
def __call__(self, in_data): img, label = in_data img = random_sized_crop(img) img = resize(img, (224, 224)) img = random_flip(img, x_random=True) img -= self.mean return img, label
def __call__(self, in_data): if len(in_data) == 4: img, mask, label, bbox = in_data else: img, bbox, label = in_data # Flipping img, params = transforms.random_flip(img, x_random=True, return_param=True) x_flip = params['x_flip'] bbox = transforms.flip_bbox(bbox, img.shape[1:], x_flip=x_flip) # Scaling and mean subtraction img, scale = scale_img(img, self.min_size, self.max_size) img -= self.mean bbox = bbox * scale if len(in_data) == 4: mask = transforms.flip(mask, x_flip=x_flip) mask = transforms.resize(mask.astype(np.float32), img.shape[1:], interpolation=PIL.Image.NEAREST).astype( np.bool) return img, bbox, label, mask else: return img, bbox, label
def transform(data, mean, train=True): img, lable = data img = img.copy() img -= mean size = (224, 224) if train: h, w = img.shape[1:] angles = [i for i in range(0, 360, 10)] angle = np.random.choice(angles) img = rotate(img, angle) rad = angle * np.pi / 180 new_length = int(h / (np.abs(np.cos(rad)) + np.abs(np.sin(rad)))) img = transforms.center_crop(img, (new_length, new_length)) # img = transforms.random_rotate(img, return_param=False) img = transforms.random_flip(img, x_random=True) img = transforms.resize(img, size, interpolation=2) img *= (1.0 / 255.0) return img, lable
def _image_process(self, img): img = img / 255 img = resize(img, (224, 224)) if np.random.rand() >= 0.5: img = img[:, :, ::-1] return img
def __call__(self, in_data): # There are five data augmentation steps # 3. Random cropping # 4. Resizing with random interpolation # 5. Random horizontal flipping img, bbox, label = in_data # 3. Random cropping if self.random_crop and np.random.rand() > 0.5: next_img, param = random_crop_with_bbox_constraints( img, bbox, min_scale=min(self.crop_rate), max_scale=max(self.crop_rate), return_param=True) next_bbox, param = transforms.crop_bbox(bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], allow_outside_center=False, return_param=True) if (len(label[param['index']]) != 0): label = label[param['index']] img, bbox = next_img, next_bbox # 4. Resizing with random interpolatation _, H, W = img.shape img = transforms.resize(img, (self.size, self.size)) bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size)) # 5. Random horizontal flipping if self.flip: img, params = transforms.random_flip(img, x_random=True, return_param=True) bbox = transforms.flip_bbox(bbox, (self.size, self.size), x_flip=params['x_flip']) img -= self.mean img /= self.std _, height, width = img.shape ymin = bbox[:, 0] xmin = bbox[:, 1] ymax = bbox[:, 2] xmax = bbox[:, 3] one_hot_label = np.eye(self.n_class)[label] xs = (xmin + (xmax - xmin) // 2) / width ws = (xmax - xmin) / width ys = (ymin + (ymax - ymin) // 2) / height hs = (ymax - ymin) / height t = [{ 'label': l, 'x': x, 'w': w, 'y': y, 'h': h, 'one_hot_label': hot } for l, x, w, y, h, hot in zip(label, xs, ws, ys, hs, one_hot_label)] return img, t
def _prepare(self, img): """Prepare an image for feeding it to a model. This is a standard preprocessing scheme used by feature extraction models. First, the image is scaled or resized according to :math:`scale_size`. Note that this step is optional. Next, the image is cropped to :math:`crop_size`. Last, the image is mean subtracted by an array :obj:`mean`. Args: img (~numpy.ndarray): An image. This is in CHW format. The range of its value is :math:`[0, 255]`. Returns: ~numpy.ndarray: A preprocessed image. This is 4D array whose batch size is the number of crops. """ if self.scale_size is not None: if isinstance(self.scale_size, int): img = scale(img, size=self.scale_size) else: img = resize(img, size=self.scale_size) if self.crop == '10': imgs = ten_crop(img, self.crop_size) elif self.crop == 'center': imgs = center_crop(img, self.crop_size)[np.newaxis] imgs -= self.mean[np.newaxis] return imgs
def predict(self, imgs): """Conduct semantic segmentations from images. Args: imgs (iterable of numpy.ndarray): Arrays holding images. All images are in CHW and RGB format and the range of their values are :math:`[0, 255]`. Returns: list of numpy.ndarray: List of integer labels predicted from each image in the input \ list. """ labels = [] for img in imgs: C, H, W = img.shape with chainer.using_config('train', False), \ chainer.function.no_backprop_mode(): x = chainer.Variable(self.xp.asarray(img[np.newaxis])) score = self.__call__(x)[0].data score = chainer.cuda.to_cpu(score) if score.shape != (C, H, W): dtype = score.dtype score = resize(score, (H, W)).astype(dtype) label = np.argmax(score, axis=0).astype(np.int32) labels.append(label) return labels
def __call__(self, in_data): img, label = in_data img = transforms.random_sized_crop(img) img = transforms.resize(img, (224, 224)) img = transforms.random_flip(img, x_random=True) img -= self.mean return img.astype(chainer.get_dtype()), label
def _prepare(self, img): """Prepare an image for feeding it to a model. This is a standard preprocessing scheme used by feature extraction models. First, the image is scaled or resized according to :math:`scale_size`. Note that this step is optional. Next, the image is cropped to :math:`crop_size`. Last, the image is mean subtracted by an array :obj:`mean`. Args: img (~numpy.ndarray): An image. This is in CHW format. The range of its value is :math:`[0, 255]`. Returns: ~numpy.ndarray: A preprocessed image. This is 4D array whose batch size is the number of crops. """ if self.scale_size is not None: if isinstance(self.scale_size, int): img = scale(img, size=self.scale_size) else: img = resize(img, size=self.scale_size) if self.crop == '10': imgs = ten_crop(img, self.crop_size) elif self.crop == 'center': imgs = center_crop(img, self.crop_size)[np.newaxis] imgs -= self.mean[np.newaxis] return imgs
def _transform2(data, mean, train=True, mean_flag=False): img, label = data img = img.copy() size316 = (316, 316) size = (224, 224) img = transforms.scale(img, 316) img = transforms.center_crop(img, size316) # 学習のときだけ実行 if train: img = transforms.random_flip(img, y_random=True) img = transforms.random_rotate(img) # img = random_erase(img) img = transforms.resize(img, size) img = img.transpose(1, 2, 0) # 画像から平均を引く if mean_flag: img -= mean img *= (1.0 / 255.0) img = img.transpose(2, 0, 1) return img, label
def test_zero_length_img(self): if self.backend == 'cv2' and not _cv2_available: return img = np.random.uniform(size=(0, 24, 32)) with chainer.using_config('cv_resize_backend', self.backend): out = resize(img, size=(32, 64), interpolation=self.interpolation) self.assertEqual(out.shape, (0, 32, 64))
def random_resize(img): rv = random.random() if rv < 0.5: ratio = round(rv * 2, 1) _, H, W = img.shape img = transforms.resize(img, (int(ratio * H), int(ratio * W))) return img
def gen_morphed_images(z, base_class, palette, masks, interpolation=8): z = xp.broadcast_to(z, (interpolation, 128)) sizes = [4, 8, 8, 16, 16, 32, 32, 64, 64, 128, 128, 256] ws = [] for i_size, size in enumerate(sizes): w = xp.zeros((interpolation, size, size, gen.n_classes), dtype=xp.float32) w[:, :, :, base_class] = 1.0 # default class for i_mask in range(len(palette)): resized_mask = xp.array(resize(masks[i_mask], (size, size)).reshape((size, size)), dtype=xp.float32) # resized_mask = xp.array(img_masks[i_mask].resize((size, size))).astype(xp.float32) / 255 for i in range(interpolation): weight = i / (interpolation - 1.0) # if i_size <= 0: # weight = 0 w[i, :, :, base_class] -= resized_mask * weight w[i, :, :, palette[i_mask]] = resized_mask * weight ws.append(chainer.Variable(w)) with chainer.using_config('train', False), chainer.using_config( 'enable_backprop', False): x = gen.spatial_interpolation(z, ws) x = x.data if args.gpu >= 0: x = x.get() x = np.asarray(np.clip(x * 127.5 + 127.5, 0.0, 255.0), dtype=np.uint8).transpose((0, 2, 3, 1)) return x
def segm_to_mask(segm, bbox, size): """Recover mask from cropped and resized mask. This function requires cv2. Args: segm (~numpy.ndarray): See below. bbox (~numpy.ndarray): See below. size (tuple): This is a tuple of length 2. Its elements are ordered as (height, width). Returns: ~numpy.ndarray: See below. .. csv-table:: :header: name, shape, dtype, format :obj:`segm`, ":math:`(R, S, S)`", :obj:`float32`, -- :obj:`bbox`, ":math:`(R, 4)`", :obj:`float32`, \ ":math:`(y_{min}, x_{min}, y_{max}, x_{max})`" :obj:`mask` (output), ":math:`(R, H, W)`", :obj:`bool`, -- """ pad = 1 H, W = size _, segm_size, _ = segm.shape mask = np.zeros((len(bbox), H, W), dtype=np.bool) # As commented in mask_to_segm, cv2.resize needs adjust. padded_segm_size = segm_size + pad * 2 expand_scale = padded_segm_size / segm_size bbox = _expand_bbox(bbox, expand_scale) canvas_mask = np.zeros((padded_segm_size, padded_segm_size), dtype=np.float32) bbox = _integerize_bbox(bbox) for i, (bb, sgm) in enumerate(zip(bbox, segm)): bb_height = bb[2] - bb[0] bb_width = bb[3] - bb[1] if bb_height == 0 or bb_width == 0: continue canvas_mask[pad:-pad, pad:-pad] = sgm with chainer.using_config('cv_resize_backend', 'cv2'): crop_mask = transforms.resize(canvas_mask[None], (bb_height, bb_width))[0] crop_mask = crop_mask > 0.5 y_min = max(bb[0], 0) x_min = max(bb[1], 0) y_max = max(min(bb[2], H), 0) x_max = max(min(bb[3], W), 0) y_offset = y_min - bb[0] x_offset = x_min - bb[1] mask[i, y_min:y_max, x_min:x_max] = crop_mask[y_offset:y_offset + y_max - y_min, x_offset:x_offset + x_max - x_min] return mask
def scale_mask(mask, bbox, size): """Scale instance segmentation mask while keeping the aspect ratio. This function exploits the sparsity of :obj:`mask` to speed up resize operation. The input image will be resized so that the shorter edge will be scaled to length :obj:`size` after resizing. Args: mask (array): An array whose shape is :math:`(R, H, W)`. :math:`R` is the number of masks. The dtype should be :obj:`numpy.bool`. bbox (array): The bounding boxes around the masked region of :obj:`mask`. This is expected to be the value obtained by :obj:`bbox = chainercv.utils.mask_to_bbox(mask)`. size (int): The length of the smaller edge. Returns: array: An array whose shape is :math:`(R, H, W)`. :math:`R` is the number of masks. The dtype should be :obj:`numpy.bool`. """ xp = chainer.backends.cuda.get_array_module(mask) mask = chainer.cuda.to_cpu(mask) bbox = chainer.cuda.to_cpu(bbox) R, H, W = mask.shape if H < W: out_size = (size, int(size * W / H)) scale = size / H else: out_size = (int(size * H / W), size) scale = size / W bbox[:, :2] = np.floor(bbox[:, :2]) bbox[:, 2:] = np.ceil(bbox[:, 2:]) bbox = bbox.astype(np.int32) scaled_bbox = bbox * scale scaled_bbox[:, :2] = np.floor(scaled_bbox[:, :2]) scaled_bbox[:, 2:] = np.ceil(scaled_bbox[:, 2:]) scaled_bbox = scaled_bbox.astype(np.int32) out_mask = xp.zeros((R, ) + out_size, dtype=np.bool) for i, (m, bb, scaled_bb) in enumerate(zip(mask, bbox, scaled_bbox)): cropped_m = m[bb[0]:bb[2], bb[1]:bb[3]] h = scaled_bb[2] - scaled_bb[0] w = scaled_bb[3] - scaled_bb[1] cropped_m = transforms.resize(cropped_m[None].astype(np.float32), (h, w), interpolation=PIL.Image.NEAREST)[0] if xp != np: cropped_m = xp.array(cropped_m) out_mask[i, scaled_bb[0]:scaled_bb[2], scaled_bb[1]:scaled_bb[3]] = cropped_m return out_mask
def transform(inputs, mean, std, output_size=(224, 224)): x, lab = inputs x = x.copy() x -= mean[:, None, None] x /= std[:, None, None] x = x[::-1] x = transforms.resize(x, output_size) return x, lab
def _transform(inputs, mean=None, img_size=(512, 1024), scale_label=1): img, label = inputs # Scaling if img_size: img_size = (img_size[0], img_size[1]) img = transforms.resize(img, img_size, Image.BICUBIC) label = transforms.resize(label[None, ...], img_size, Image.NEAREST)[0] # Mean subtraction if mean is not None: img -= mean[:, None, None] if scale_label != 1: scale_label = (int(label.shape[1]/scale_label),\ int(label.shape[0]/scale_label)) label = cv.resize(label, scale_label, interpolation=cv.INTER_NEAREST) return img, label
def __call__(self, in_data): if len(in_data) == 6: img, bbox, label, mask, crowd, area = in_data elif len(in_data) == 4: img, bbox, label, mask = in_data else: raise ValueError img = img.transpose(2, 0, 1) # H, W, C -> C, H, W if not self.train: if len(in_data) == 6: return img, bbox, label, mask, crowd, area elif len(in_data) == 4: return img, bbox, label, mask else: raise ValueError imgs, sizes, scales = self.mask_rcnn.prepare([img]) # print(type(imgs)) # print(type(sizes)) # print(type(scales)) img = imgs[0] H, W = sizes[0] scale = scales[0] _, o_H, o_W = img.shape if len(bbox) > 0: bbox = transforms.resize_bbox(bbox, (H, W), (o_H, o_W)) if len(mask) > 0: mask = transforms.resize( mask, size=(o_H, o_W), interpolation=0) # # horizontally flip # img, params = transforms.random_flip( # img, x_random=True, return_param=True) # bbox = transforms.flip_bbox( # bbox, (o_H, o_W), x_flip=params['x_flip']) # if mask.ndim == 2: # mask = transforms.flip( # mask[None, :, :], x_flip=params['x_flip'])[0] # else: # mask = transforms.flip(mask, x_flip=params['x_flip']) # horizontally and vartically flip img, params = transforms.random_flip( img, y_random=True, x_random=True, return_param=True) bbox = transforms.flip_bbox( bbox, (o_H, o_W), y_flip=params['y_flip'], x_flip=params['x_flip']) if mask.ndim == 2: mask = transforms.flip( mask[None, :, :], y_flip=params['y_flip'], x_flip=params['x_flip'])[0] else: mask = transforms.flip(mask, y_flip=params['y_flip'], x_flip=params['x_flip']) return img, bbox, label, mask, scale, sizes
def scale_img(img, min_size, max_size): """Process image.""" _, H, W = img.shape scale = min_size / min(H, W) if scale * max(H, W) > max_size: scale = max_size / max(H, W) H, W = int(H * scale), int(W * scale) img = transforms.resize(img, (H, W)) return img, scale
def _scale_img(self, img): """Process image.""" _, H, W = img.shape scale = self._min_size / min(H, W) if scale * max(H, W) > self._max_size: scale = self._max_size / max(H, W) H, W = int(H * scale), int(W * scale) img = transforms.resize(img, (H, W)) return img, scale
def test_transform(sample): img = sample if len(img.shape) == 2: # Grayscale img = np.stack([img, img, img], 2) img = np.transpose(img, (2, 0, 1)) img = transforms.resize(img, resize_size) img = transforms.center_crop(img, patchsize) img = img - mean img = img.astype(dtype) return img
def _prepare(self, img): img = img.astype(np.float32) img = transforms.resize(img, (self.insize, self.insize)) img -= self.mean # NOTE: chainer.get_dtype will return float16 if the # global_config.dtype is chainer.mixed16 img = img.astype(chainer.get_dtype()) return img
def __call__(self, imgs): resized_imgs = [] for img in imgs: _, H, W = img.shape scale = self._min_size / min(H, W) if scale * max(H, W) > self._max_size: scale = self._max_size / max(H, W) H, W = int(H * scale), int(W * scale) img = transforms.resize(img, (H, W)) img -= self._mean resized_imgs.append(img) size = np.array([img.shape[1:] for img in resized_imgs]).max(axis=0) size = (np.ceil(size / self._stride) * self._stride).astype(int) x = np.zeros((len(imgs), 3, size[0], size[1]), dtype=np.float32) for i, img in enumerate(resized_imgs): _, H, W = img.shape x[i, :, :H, :W] = img return x
def test_resize_grayscale(self): img = np.random.uniform(size=(1, 24, 32)) out = resize(img, size=(32, 64), interpolation=self.interpolation) self.assertEqual(out.shape, (1, 32, 64))
def transform(in_data): img, label = in_data img = resize(img, (32, 32)) return img, label
def resize_contain(img, size, fill=0, interpolation=PIL.Image.BILINEAR, return_param=False): """Resize the image to fit in the given area while keeping aspect ratio. If both the height and the width in :obj:`size` are larger than the height and the width of the :obj:`img`, the :obj:`img` is placed on the center with an appropriate padding to match :obj:`size`. Otherwise, the input image is scaled to fit in a canvas whose size is :obj:`size` while preserving aspect ratio. Args: img (~numpy.ndarray): An array to be transformed. This is in CHW format. size (tuple of two ints): A tuple of two elements: :obj:`height, width`. The size of the image after resizing. fill (float, tuple or ~numpy.ndarray): The value of padded pixels. If it is :class:`numpy.ndarray`, its shape should be :math:`(C, 1, 1)`, where :math:`C` is the number of channels of :obj:`img`. return_param (bool): Returns information of resizing and offsetting. Returns: ~numpy.ndarray or (~numpy.ndarray, dict): If :obj:`return_param = False`, returns an array :obj:`out_img` that is the result of resizing. If :obj:`return_param = True`, returns a tuple whose elements are :obj:`out_img, param`. :obj:`param` is a dictionary of intermediate parameters whose contents are listed below with key, value-type and the description of the value. * **y_offset** (*int*): The y coodinate of the top left corner of\ the image after placing on the canvas. * **x_offset** (*int*): The x coordinate of the top left corner\ of the image after placing on the canvas. * **scaled_size** (*tuple*): The size to which the image is scaled\ to before placing it on a canvas. This is a tuple of two elements:\ :obj:`height, width`. """ C, H, W = img.shape out_H, out_W = size scale_h = out_H / H scale_w = out_W / W scale = min(min(scale_h, scale_w), 1) scaled_size = (int(H * scale), int(W * scale)) if scale < 1: img = resize(img, scaled_size, interpolation=interpolation) y_slice, x_slice = _get_pad_slice(img, size=size) out_img = np.empty((C, out_H, out_W), dtype=img.dtype) out_img[:] = np.array(fill).reshape((-1, 1, 1)) out_img[:, y_slice, x_slice] = img if return_param: param = {'y_offset': y_slice.start, 'x_offset': x_slice.start, 'scaled_size': scaled_size} return out_img, param else: return out_img