def test_resize_contain(self): H, W = 32, 64 img = np.random.uniform(255, size=(3, H, W)) out, param = resize_contain( img, self.size, fill=self.fill, return_param=True) self.assertEqual(param['scaled_size'], self.scaled_size) self.assertEqual(param['y_offset'], self.y_offset) self.assertEqual(param['x_offset'], self.x_offset) if self.scaled_size == (H, W): np.testing.assert_array_equal( out[:, self.y_offset:self.y_offset + H, self.x_offset:self.x_offset + W], img) if self.y_offset > 0 or self.x_offset > 0: if isinstance(self.fill, int): fill = (self.fill,) * 3 else: fill = self.fill np.testing.assert_array_equal( out[:, 0, 0], np.array(fill).flatten())
def test_resize_contain(self): H, W = 32, 64 img = np.random.uniform(255, size=(3, H, W))\ out, param = resize_contain(img, self.size, fill=self.fill, interpolation=self.interpolation, return_param=True) self.assertEqual(param['scaled_size'], self.scaled_size) self.assertEqual(param['y_offset'], self.y_offset) self.assertEqual(param['x_offset'], self.x_offset) if self.scaled_size == (H, W): np.testing.assert_array_equal( out[:, self.y_offset:self.y_offset + H, self.x_offset:self.x_offset + W], img) if self.y_offset > 0 or self.x_offset > 0: if isinstance(self.fill, int): fill = (self.fill, ) * 3 else: fill = self.fill np.testing.assert_array_equal(out[:, 0, 0], np.array(fill).flatten())
def resize_contain(image, joint_zyx, camera, size, fill=0, return_param=False): _, inH, inW = image.shape resized, resize_param = transforms.resize_contain( image, size=size, return_param=True, fill=fill, ) y_scale, x_scale = resize_param["scaled_size"] / np.array([inH, inW]) print(resize_param) vu = camera.zyx2vu(joint_zyx.copy()) vu = np.expand_dims(vu, axis=0) vu = transforms.resize_point(vu, in_size=(inH, inW), out_size=resize_param["scaled_size"]) vu = transforms.translate_point(vu, y_offset=resize_param["y_offset"], x_offset=resize_param["x_offset"]) camera_scaled = camera.scale_camera(y_scale=y_scale, x_scale=x_scale) camera_resized = camera_scaled.translate_camera( y_offset=resize_param["y_offset"], x_offset=resize_param["x_offset"]) vu = camera_resized.zyx2vu(joint_zyx) return resized, vu, camera_resized
def test_resize_contain_canvas_small_y(self): img = np.random.uniform(size=(3, 32, 64)) out, param = resize_contain( img, (24, 16), fill=self.fill, return_param=True) self.assertEqual(param['scaled_size'], (8, 16)) self.assertEqual(param['y_offset'], 8) self.assertEqual(param['x_offset'], 0)
def argument_image(self, img, c_source, is_crop_random=True, is_flip_random=True): cW, cH = self.char_size fW, fH = self.fine_size pW, pH = ((fW - cW), (fH - cH)) if is_crop_random: assert pW >= 0 and pW % 2 == 0 and pH >= 0 and pH % 2 == 0 img = resize_contain(img, (fH + pH, fW + pW), img[:, 0, 0]) img = random_crop_by_2(img, c_source, pH, pW, fH, fW) else: img = resize_contain(img, (fH, fW), img[:, 0, 0]) if is_flip_random: img = random_flip(img, x_random=True) return img
def preprocesss(self, image): image = image.transpose((2, 0, 1)) ori_size = (image.shape[1], image.shape[2]) image, param = transforms.resize_contain(image, size=self.config.input_shape, fill=0, return_param=True) param['ori_size'] = ori_size return image, param
def _tile_predict(self, img): if self.mean is not None: img = img - self.mean ori_H, ori_W = img.shape[1:] long_size = max(ori_H, ori_W) if long_size > max(self.input_size): stride_rate = 2 / 3 stride = (int(ceil(self.input_size[0] * stride_rate)), int(ceil(self.input_size[1] * stride_rate))) imgs, param = convolution_crop(img, self.input_size, stride, return_param=True) counts = self.xp.zeros((1, ori_H, ori_W), dtype=np.float32) preds = self.xp.zeros((1, self.n_class, ori_H, ori_W), dtype=np.float32) N = len(param['y_slices']) for i in range(N): img_i = imgs[i:i + 1] y_slice = param['y_slices'][i] x_slice = param['x_slices'][i] crop_y_slice = param['crop_y_slices'][i] crop_x_slice = param['crop_x_slices'][i] scores_i = self._predict(img_i) # Flip horizontally flipped score maps again flipped_scores_i = self._predict( img_i[:, :, :, ::-1])[:, :, :, ::-1] preds[0, :, y_slice, x_slice] +=\ scores_i[0, :, crop_y_slice, crop_x_slice] preds[0, :, y_slice, x_slice] +=\ flipped_scores_i[0, :, crop_y_slice, crop_x_slice] counts[0, y_slice, x_slice] += 2 scores = preds / counts[:, None] else: img, param = transforms.resize_contain(img, self.input_size, return_param=True) preds1 = self._predict(img[np.newaxis]) preds2 = self._predict(img[np.newaxis, :, :, ::-1]) preds = (preds1 + preds2[:, :, :, ::-1]) / 2 y_start = param['y_offset'] y_end = y_start + param['scaled_size'][0] x_start = param['x_offset'] x_end = x_start + param['scaled_size'][1] scores = preds[:, :, y_start:y_end, x_start:x_end] scores = F.resize_images(scores, (ori_H, ori_W))[0].array return scores
def test_resize_contain(self): img = np.random.uniform(size=(3, 32, 64)) out, param = resize_contain(img, (48, 96), fill=self.fill, return_param=True) np.testing.assert_array_equal(img, out[:, 8:40, 16:80]) np.testing.assert_array_equal(self.fill, out[:, 0, 0]) self.assertEqual(param['scaled_size'], (32, 64)) self.assertEqual(param['y_offset'], 8) self.assertEqual(param['x_offset'], 16)
def transform(in_data): img, label = in_data img = img.copy() img -= mean[:, None, None] img /= std[:, None, None] img = T.resize_contain(img, (40, 40)) img = T.random_crop(img, (32, 32)) img = T.random_flip(img, x_random=True) return img, label
def forward(self, imgs): """ Forward batch of images and predict bounding boxes """ x = [] params = [] for img in imgs: _, H, W = img.shape img, param = transforms.resize_contain( img / 255, (self.insize, self.insize), fill=0.5, return_param=True) x.append(self.xp.array(img)) param['size'] = (H, W) params.append(param) x = self.xp.stack(x) bboxes = [] labels = [] scores = [] locs, objs, confs = self.__call__(x) locs = locs.array objs = objs.array confs = confs.array _bboxes = [] _confs = [] _objs = [] for loc, obj, conf in zip(locs, objs, confs): raw_bbox = self._default_bbox.copy() raw_bbox[:, :2] += 1 / (1 + self.xp.exp(-loc[:, :2])) raw_bbox[:, 2:] *= self.xp.exp(loc[:, 2:]) raw_bbox[:, :2] -= raw_bbox[:, 2:] / 2 raw_bbox[:, 2:] += raw_bbox[:, :2] raw_bbox *= self.insize / self.extractor.grid obj = 1 / (1 + self.xp.exp(-obj)) conf = self.xp.exp(conf) conf /= conf.sum(axis=1, keepdims=True) _bboxes.append(raw_bbox) _confs.append(conf) _objs.append(obj) return _bboxes, _confs, _objs
def test_resize_contain(self): img = np.random.uniform(size=(3, 32, 64)) out, param = resize_contain( img, (48, 96), fill=self.fill, return_param=True) np.testing.assert_array_equal(img, out[:, 8:40, 16:80]) if isinstance(self.fill, int): fill = (self.fill,) * 3 else: fill = self.fill np.testing.assert_array_equal( out[:, 0, 0], np.array(fill).flatten()) self.assertEqual(param['scaled_size'], (32, 64)) self.assertEqual(param['y_offset'], 8) self.assertEqual(param['x_offset'], 16)
def predict(self, imgs): """Detect objects from images. This method predicts objects for each image. Args: imgs (iterable of numpy.ndarray): Arrays holding images. All images are in CHW and RGB format and the range of their value is :math:`[0, 255]`. Returns: tuple of lists: This method returns a tuple of three lists, :obj:`(bboxes, labels, scores)`. * **bboxes**: A list of float arrays of shape :math:`(R, 4)`, \ where :math:`R` is the number of bounding boxes in a image. \ Each bouding box is organized by \ :math:`(y_{min}, x_{min}, y_{max}, x_{max})` \ in the second axis. * **labels** : A list of integer arrays of shape :math:`(R,)`. \ Each value indicates the class of the bounding box. \ Values are in range :math:`[0, L - 1]`, where :math:`L` is the \ number of the foreground classes. * **scores** : A list of float arrays of shape :math:`(R,)`. \ Each value indicates how confident the prediction is. """ x = [] params = [] for img in imgs: _, H, W = img.shape img, param = transforms.resize_contain(img / 255, (self.insize, self.insize), fill=0.5, return_param=True) x.append(self.xp.array(img)) param['size'] = (H, W) params.append(param) with chainer.using_config('train', False), \ chainer.function.no_backprop_mode(): locs, objs, confs = self(self.xp.stack(x)) locs = locs.array objs = objs.array confs = confs.array bboxes = [] labels = [] scores = [] for loc, obj, conf, param in zip(locs, objs, confs, params): bbox, label, score = self._decode(loc, obj, conf) bbox = cuda.to_cpu(bbox) label = cuda.to_cpu(label) score = cuda.to_cpu(score) bbox = transforms.translate_bbox(bbox, -self.insize / 2, -self.insize / 2) bbox = transforms.resize_bbox(bbox, param['scaled_size'], param['size']) bbox = transforms.translate_bbox(bbox, param['size'][0] / 2, param['size'][1] / 2) bboxes.append(bbox) labels.append(label) scores.append(score) return bboxes, labels, scores
def crop_around_3d_center(subject_id, action, seq_idx, frame_id): global image fig = plt.figure(figsize=(8, 8)) ax1 = fig.add_subplot(221) ax2 = fig.add_subplot(222) ax3 = fig.add_subplot(223, projection="3d") label_3d(ax3) ax3.view_init(-90, -90) example = get_example(subject_id, action, seq_idx, frame_id) joints_zyx = example["world_joints"][:, ::-1] vu, z_ = zyx2depth_vu(joints_zyx, return_z=True) vu_com, z_com = calc_com(vu, z_) zyx_com = depth_vu2zyx(vu_com[np.newaxis], z_com[np.newaxis]).squeeze() z_com, y_com, x_com = zyx_com [ xmin, ymin, xmax, ymax, ] = [ x_com-crop3dW/2, y_com-crop3dH/2, x_com+crop3dW/2, y_com+crop3dH/2, ] [ [vmin, umin], [vmax, umax], ] = zyx2depth_vu(np.array([ [z_com, ymin, xmin], [z_com, ymax, xmax], ])).astype(int) domain = [vmin, umin, vmax, umax] depth = example["depth"] cropped, crop_param = crop_domain(depth, domain) vu = np.expand_dims(vu, axis=0) vu = transforms.translate_point( vu, y_offset=crop_param["y_offset"], x_offset=crop_param["x_offset"] ) _, inH, inW = cropped.shape if inH < crop2dH or inW < crop2dW: cropped = chainercv.transforms.scale( cropped, size=max(crop2dH, crop2dW), fit_short=True) vu = transforms.resize_point( vu, in_size=(inH, inW), out_size=cropped.shape[1:], ) _, inH, inW = cropped.shape resized, resize_param = transforms.resize_contain( cropped, size=(crop2dH, crop2dW), return_param=True, fill=define_background(cropped), ) vu = transforms.resize_point( vu, in_size=(inH, inW), out_size=resize_param["scaled_size"] ) vu = transforms.translate_point( vu, y_offset=resize_param["y_offset"], x_offset=resize_param["x_offset"] ) # visualize color = [COLOR_MAP[k] for k in KEYPOINT_NAMES] vis_image(resized, ax=ax1) print(z_com, z_com-crop3dD/2, z_com+crop3dD/2) normalized = normalize_depth(resized, z_com, z_size=crop3dD) vis_image(normalized, ax=ax2) vis_point(point=vu, ax=ax1, color=color) vis_point(point=vu, ax=ax2, color=color) cropped_zyx = joints_zyx-zyx_com vis_point(point=[cropped_zyx], ax=ax3, color=color) edge_color = [COLOR_MAP[s, t] for s, t in EDGES] vis_edges(point=vu, indices=EDGES, color=edge_color, ax=ax1) vis_edges(point=vu, indices=EDGES, color=edge_color, ax=ax2) vis_edges(point=[cropped_zyx], indices=EDGES, color=edge_color, ax=ax3)
def crop_around_3d_center(subject_id, action, seq_idx, frame_id): global image fig = plt.figure(figsize=(10, 5)) ax1 = fig.add_subplot(121) ax2 = fig.add_subplot(122, projection="3d") label_3d(ax2) ax2.view_init(-90, -90) example = get_example(subject_id, action, seq_idx, frame_id) cam_joints_zyx = example["cam_joints"][:, ::-1] vu, z_ = zyx2vu(cam_joints_zyx, return_z=True) vu_com, z_com = calc_com(vu, z_) zyx_com = vu2zyx(vu_com[np.newaxis], z_com[np.newaxis]).squeeze() z_com, y_com, x_com = zyx_com [ xmin, ymin, xmax, ymax, ] = [ x_com-crop3dW/2, y_com-crop3dH/2, x_com+crop3dW/2, y_com+crop3dH/2, ] [ [vmin, umin], [vmax, umax], ] = zyx2vu(np.array([ [z_com, ymin, xmin], [z_com, ymax, xmax], ])).astype(int) domain = [vmin, umin, vmax, umax] img = example["image"] cropped, crop_param = crop_domain(img, domain) offset_vu = np.array([crop_param["y_offset"], crop_param["x_offset"]]) vu = np.expand_dims(vu, axis=0) vu = transforms.translate_point( vu, y_offset=crop_param["y_offset"], x_offset=crop_param["x_offset"] ) _, inH, inW = cropped.shape resized, resize_param = transforms.resize_contain( cropped, size=(crop2dH, crop2dW), return_param=True ) vu = transforms.resize_point(vu, in_size=( inH, inW), out_size=resize_param["scaled_size"]) vu = transforms.translate_point( vu, y_offset=resize_param["y_offset"], x_offset=resize_param["x_offset"] ) # visualize color = [COLOR_MAP[k] for k in KEYPOINT_NAMES] chainercv.visualizations.vis_image(resized, ax=ax1) vis_point(point=vu, ax=ax1, color=color) cropped_zyx = cam_joints_zyx-zyx_com vis_point(point=[cropped_zyx], ax=ax2, color=color) edge_color = [COLOR_MAP[s, t] for s, t in EDGES] vis_edges(point=vu, indices=EDGES, color=edge_color, ax=ax1) vis_edges(point=[cropped_zyx], indices=EDGES, color=edge_color, ax=ax2)