def resize(self, image, keypoints, bbox, size): _, h, w = image.shape new_h, new_w = size image = transforms.resize(image, (new_h, new_w)) keypoints = [ transforms.resize_point(points, (h, w), (new_h, new_w)) for points in keypoints ] new_bbox = [] for x, y, bw, bh in bbox: [[y, x]] = transforms.resize_point(np.array([[y, x]]), (h, w), (new_h, new_w)) bw *= new_w / w bh *= new_h / h new_bbox.append([x, y, bw, bh]) return image, keypoints, new_bbox
def resize_contain(image, joint_zyx, camera, size, fill=0, return_param=False): _, inH, inW = image.shape resized, resize_param = transforms.resize_contain( image, size=size, return_param=True, fill=fill, ) y_scale, x_scale = resize_param["scaled_size"] / np.array([inH, inW]) print(resize_param) vu = camera.zyx2vu(joint_zyx.copy()) vu = np.expand_dims(vu, axis=0) vu = transforms.resize_point(vu, in_size=(inH, inW), out_size=resize_param["scaled_size"]) vu = transforms.translate_point(vu, y_offset=resize_param["y_offset"], x_offset=resize_param["x_offset"]) camera_scaled = camera.scale_camera(y_scale=y_scale, x_scale=x_scale) camera_resized = camera_scaled.translate_camera( y_offset=resize_param["y_offset"], x_offset=resize_param["x_offset"]) vu = camera_resized.zyx2vu(joint_zyx) return resized, vu, camera_resized
def test_resize_point(self): point = np.random.uniform(low=0., high=32., size=(12, 2)) out = resize_point(point, in_size=(16, 32), out_size=(8, 64)) point[:, 0] *= 0.5 point[:, 1] *= 2 np.testing.assert_equal(out, point)
def point2heatmap(points, indices, input_shape): """convert keypoint of a person to heatmap Args: points: np.ndarray, shape [16, 2] (y, x) indices: np.ndarray, available point or not shape [16, ] input_shape: tuple, input shape of an image of the person Returns: heatmap: shape [16, 64, 64] """ points = transforms.resize_point(points, input_shape, (64, 64)) # (y, x) -> (x, y) points = points[:, ::-1] # pose-hg-train/src/utils/img.lua drawGaussian # pose-hg-train/src/utils/pose.lua generateSample heatmap = np.zeros((16, 64, 64), dtype=np.float32) sigma = 1.0 for i, (available, point) in enumerate(zip(indices, points)): if available: coordinates = np.array(np.meshgrid(range(64), range(64))) diff = coordinates - point[:, None, None] dist = np.sum(diff**2, axis=0) heatmap[i] = np.exp(-dist / (2 * sigma**2)) return heatmap
def test_resize_point(self): point = np.random.uniform( low=0., high=32., size=(12, 2)) out = resize_point(point, in_size=(16, 32), out_size=(8, 64)) point[:, 0] *= 0.5 point[:, 1] *= 2 np.testing.assert_equal(out, point)
def test_resize_point_list(self): point = [ np.random.uniform(low=0., high=32., size=(12, 2)), np.random.uniform(low=0., high=32., size=(10, 2)) ] out = resize_point(point, in_size=(16, 32), out_size=(8, 64)) for i, pnt in enumerate(point): pnt[:, 0] *= 0.5 pnt[:, 1] *= 2 np.testing.assert_equal(out[i], pnt)
def crop_around_3d_center(subject_id, action, seq_idx, frame_id): global image fig = plt.figure(figsize=(8, 8)) ax1 = fig.add_subplot(221) ax2 = fig.add_subplot(222) ax3 = fig.add_subplot(223, projection="3d") label_3d(ax3) ax3.view_init(-90, -90) example = get_example(subject_id, action, seq_idx, frame_id) joints_zyx = example["world_joints"][:, ::-1] vu, z_ = zyx2depth_vu(joints_zyx, return_z=True) vu_com, z_com = calc_com(vu, z_) zyx_com = depth_vu2zyx(vu_com[np.newaxis], z_com[np.newaxis]).squeeze() z_com, y_com, x_com = zyx_com [ xmin, ymin, xmax, ymax, ] = [ x_com-crop3dW/2, y_com-crop3dH/2, x_com+crop3dW/2, y_com+crop3dH/2, ] [ [vmin, umin], [vmax, umax], ] = zyx2depth_vu(np.array([ [z_com, ymin, xmin], [z_com, ymax, xmax], ])).astype(int) domain = [vmin, umin, vmax, umax] depth = example["depth"] cropped, crop_param = crop_domain(depth, domain) vu = np.expand_dims(vu, axis=0) vu = transforms.translate_point( vu, y_offset=crop_param["y_offset"], x_offset=crop_param["x_offset"] ) _, inH, inW = cropped.shape if inH < crop2dH or inW < crop2dW: cropped = chainercv.transforms.scale( cropped, size=max(crop2dH, crop2dW), fit_short=True) vu = transforms.resize_point( vu, in_size=(inH, inW), out_size=cropped.shape[1:], ) _, inH, inW = cropped.shape resized, resize_param = transforms.resize_contain( cropped, size=(crop2dH, crop2dW), return_param=True, fill=define_background(cropped), ) vu = transforms.resize_point( vu, in_size=(inH, inW), out_size=resize_param["scaled_size"] ) vu = transforms.translate_point( vu, y_offset=resize_param["y_offset"], x_offset=resize_param["x_offset"] ) # visualize color = [COLOR_MAP[k] for k in KEYPOINT_NAMES] vis_image(resized, ax=ax1) print(z_com, z_com-crop3dD/2, z_com+crop3dD/2) normalized = normalize_depth(resized, z_com, z_size=crop3dD) vis_image(normalized, ax=ax2) vis_point(point=vu, ax=ax1, color=color) vis_point(point=vu, ax=ax2, color=color) cropped_zyx = joints_zyx-zyx_com vis_point(point=[cropped_zyx], ax=ax3, color=color) edge_color = [COLOR_MAP[s, t] for s, t in EDGES] vis_edges(point=vu, indices=EDGES, color=edge_color, ax=ax1) vis_edges(point=vu, indices=EDGES, color=edge_color, ax=ax2) vis_edges(point=[cropped_zyx], indices=EDGES, color=edge_color, ax=ax3)
def crop_around_3d_center(subject_id, action, seq_idx, frame_id): global image fig = plt.figure(figsize=(10, 5)) ax1 = fig.add_subplot(121) ax2 = fig.add_subplot(122, projection="3d") label_3d(ax2) ax2.view_init(-90, -90) example = get_example(subject_id, action, seq_idx, frame_id) cam_joints_zyx = example["cam_joints"][:, ::-1] vu, z_ = zyx2vu(cam_joints_zyx, return_z=True) vu_com, z_com = calc_com(vu, z_) zyx_com = vu2zyx(vu_com[np.newaxis], z_com[np.newaxis]).squeeze() z_com, y_com, x_com = zyx_com [ xmin, ymin, xmax, ymax, ] = [ x_com-crop3dW/2, y_com-crop3dH/2, x_com+crop3dW/2, y_com+crop3dH/2, ] [ [vmin, umin], [vmax, umax], ] = zyx2vu(np.array([ [z_com, ymin, xmin], [z_com, ymax, xmax], ])).astype(int) domain = [vmin, umin, vmax, umax] img = example["image"] cropped, crop_param = crop_domain(img, domain) offset_vu = np.array([crop_param["y_offset"], crop_param["x_offset"]]) vu = np.expand_dims(vu, axis=0) vu = transforms.translate_point( vu, y_offset=crop_param["y_offset"], x_offset=crop_param["x_offset"] ) _, inH, inW = cropped.shape resized, resize_param = transforms.resize_contain( cropped, size=(crop2dH, crop2dW), return_param=True ) vu = transforms.resize_point(vu, in_size=( inH, inW), out_size=resize_param["scaled_size"]) vu = transforms.translate_point( vu, y_offset=resize_param["y_offset"], x_offset=resize_param["x_offset"] ) # visualize color = [COLOR_MAP[k] for k in KEYPOINT_NAMES] chainercv.visualizations.vis_image(resized, ax=ax1) vis_point(point=vu, ax=ax1, color=color) cropped_zyx = cam_joints_zyx-zyx_com vis_point(point=[cropped_zyx], ax=ax2, color=color) edge_color = [COLOR_MAP[s, t] for s, t in EDGES] vis_edges(point=vu, indices=EDGES, color=edge_color, ax=ax1) vis_edges(point=[cropped_zyx], indices=EDGES, color=edge_color, ax=ax2)
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument('--gpu', type=int, default=-1) parser.add_argument( '--model', default='', help='if not specified, you download and use a pre-trained model.') parser.add_argument('--snapshot', default='') parser.add_argument('--image', type=str) args = parser.parse_args() if not args.image: ValueError('args.image should be specified.') else: args.image = os.path.expanduser(args.image) model = StackedHG(16) if args.model: chainer.serializers.load_npz(args.model, model) elif args.snapshot: chainer.serializers.load_npz(snap2model_trainer(args.snapshot), model) else: model_path = './models/model_2018_05_22.npz' if not os.path.exists(model_path): os.makedirs("models", exist_ok=True) url = "https://github.com/fujibo/poseHG/releases/download/1.0.1/model_2018_05_22.npz" wget.download(url, model_path) chainer.serializers.load_npz(model_path, model) if args.gpu >= 0: cuda.get_device_from_id(args.gpu).use() model.to_gpu() chainer.config.train = False img = utils.read_image(args.image) img = img / 255. img = img.astype(np.float32) # expected properties # - A person is in the center of the image # - the height of this image == 1.25 * a person's scale (= height) img_resized = transforms.resize(img, (256, 256)) img_resized = img_resized[np.newaxis] if args.gpu >= 0: img_resized = cuda.to_gpu(img_resized) with chainer.no_backprop_mode(): # (1, 3, 256, 256) -> (1, 16, 64, 64) -> (16, 64, 64) _output, output = model(img_resized) output = cuda.to_cpu(output.array)[0] C, H, W = output.shape # (16, 64, 64) -> (16, ) output = output.reshape(C, -1).argmax(axis=1) keypoint = np.unravel_index(output, (H, W)) keypoint = np.array(keypoint).T keypoint = transforms.resize_point(keypoint, (H, W), img.shape[1:]) img = cv2.imread(args.image) visualizer = MPIIVisualizer() img_pose = visualizer.run(img, keypoint) cv2.imwrite('input.jpg', img) cv2.imwrite('output.jpg', img_pose)
def evaluate(model, dataset, device=-1, flip=False): batch_size = 50 data_iter = chainer.iterators.MultithreadIterator(dataset, batch_size, repeat=False, shuffle=False) corrects = list() counts = list() for it, batch in enumerate(data_iter): # print progress print(f'{batch_size*it:04d} / {len(dataset):04d}', end='\r') img, label, idx, scale, shape = concat_examples(batch) N, C, H, W = img.shape if flip: img = np.array((img, img[:, :, :, ::-1])) img = img.reshape(N * 2, C, H, W) if device >= 0: img = cuda.to_gpu(img) with chainer.no_backprop_mode(): # (N, 3, 256, 256) -> (N, 16, 64, 64) _output, output = model(img) output = output.array if flip: output = output.reshape(( 2, N, ) + output.shape[1:]) output_flipped = flip_heatmap(output[1], copy=True) output = (output[0] + output_flipped) / 2 N, C, H, W = output.shape keypoints = list() # (N, 16, 64, 64) -> (N, 16, 2) for i in range(N): # (16, 64, 64) -> (16, -1) out_reshaped = output[i].reshape(C, -1).argmax(axis=1) out_reshaped = cuda.to_cpu(out_reshaped) keypoint = np.unravel_index(out_reshaped, (H, W)) # (2, 16) -> (16, 2) keypoint = np.array(keypoint).T keypoint = transforms.resize_point(keypoint, (H, W), shape[i]) keypoints.append(keypoint) else: keypoints = np.array(keypoints) correct, count = pckh_score(label, keypoints, idx, scale) corrects.append(correct) counts.append(count) print() corrects = np.sum(corrects, axis=0) counts = np.sum(counts, axis=0) # Head, Shoulder, Elbow, Wrist, Hip, Knee, Ankle joints = { 'head': [8, 9], 'shoulder': [12, 13], 'elbow': [11, 14], 'wrist': [10, 15], 'hip': [2, 3], 'knee': [1, 4], 'ankle': [0, 5] } scores = dict() for key, value in joints.items(): score = corrects[value].sum() / counts[value].sum() scores.update({key: score}) return scores
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument('--gpu', type=int, default=-1) parser.add_argument( '--model', default='', help='if not specified, you download and use a pre-trained model.') parser.add_argument('--snapshot', default='') parser.add_argument('--image', type=str) args = parser.parse_args() if not args.image: ValueError('args.image should be specified.') else: args.image = os.path.expanduser(args.image) detector = SSD512(pretrained_model='voc0712') model = StackedHG(16) if args.model: chainer.serializers.load_npz(args.model, model) elif args.snapshot: chainer.serializers.load_npz(snap2model_trainer(args.snapshot), model) else: # pre-trained model model_path = './models/model_2018_05_22.npz' if not os.path.exists(model_path): gdd.download_file_from_google_drive( file_id='1rZZJRpqQKkncn30Igtk8KirgR96QlCFO', dest_path=model_path) chainer.serializers.load_npz(model_path, model) if args.gpu >= 0: cuda.get_device_from_id(args.gpu).use() detector.to_gpu() model.to_gpu() chainer.config.train = False img = utils.read_image(args.image) # detect persons bboxes, labels, scores = detector.predict([img]) bbox, label, score = bboxes[0], labels[0], scores[0] # expand bboxes and crop the image img = img / 255. img = img.astype(np.float32) img_persons = list() bbox_persons = list() for ymin, xmin, ymax, xmax in bbox: scale = ymax - ymin # this is for ankle (also used in training with mpii dataset) offset = 15 / 200 * scale center = (xmin + xmax) / 2, (ymin + ymax) / 2 + offset # this is for ankle (also used in training with mpii dataset) scale *= 1.25 xmin, xmax = center[0] - scale / 2, center[0] + scale / 2 ymin, ymax = center[1] - scale / 2, center[1] + scale / 2 # truncate xmin = int(max(0, xmin)) ymin = int(max(0, ymin)) xmax = int(min(img.shape[2], xmax)) ymax = int(min(img.shape[1], ymax)) # croping img_person = img[:, ymin:ymax, xmin:xmax] img_person = transforms.resize(img_person, (256, 256)) img_persons.append(img_person) bbox_persons.append((ymin, xmin, ymax, xmax)) img_persons = np.array(img_persons) bbox_persons = np.array(bbox_persons) utils.write_image( utils.tile_images((255 * img_persons).astype(np.float32), n_col=2), 'tiled.jpg') # estimate poses if args.gpu >= 0: img_persons = cuda.to_gpu(img_persons) with chainer.no_backprop_mode(): # (R, 3, 256, 256) -> (R, 16, 64, 64) -> (16, 64, 64) _outputs, outputs = model(img_persons) outputs = cuda.to_cpu(outputs.array) R, C, H, W = outputs.shape # heatmap to keypoint # R, C, H, W -> R, C, 2 keypoints = list() for output in outputs: # (16, 64, 64) -> (16, ) output = output.reshape(C, -1).argmax(axis=1) keypoint = np.unravel_index(output, (H, W)) keypoint = np.array(keypoint).T keypoints.append(keypoint) # keypoint (local) to keypoint (global) keypoint_persons = list() for keypoint, bbox_person in zip(keypoints, bbox_persons): ymin, xmin, ymax, xmax = bbox_person keypoint = transforms.resize_point(keypoint, (H, W), (ymax - ymin, xmax - xmin)) keypoint_person = keypoint + np.array((ymin, xmin)) keypoint_persons.append(keypoint_person) # visualize img = cv2.imread(args.image) visualizer = MPIIVisualizer() img_pose = img.copy() for keypoint_person, bbox_person in zip(keypoint_persons, bbox_persons): ymin, xmin, ymax, xmax = bbox_person img_pose = visualizer.run(img_pose, keypoint_person) img_pose = cv2.rectangle(img_pose, (xmin, ymin), (xmax, ymax), (0, 255, 255), 10) cv2.imwrite('input.jpg', img) cv2.imwrite('output.jpg', img_pose)