def keypoints_from_regression(regression_preds, center, scale, img_size): """Get final keypoint predictions from regression vectors and transform them back to the image. Note: batch_size: N num_keypoints: K Args: regression_preds (np.ndarray[N, K, 2]): model prediction. center (np.ndarray[N, 2]): Center of the bounding box (x, y). scale (np.ndarray[N, 2]): Scale of the bounding box wrt height/width. img_size (list(img_width, img_height)): model input image size. Returns: preds (np.ndarray[N, K, 2]): Predicted keypoint location in images. maxvals (np.ndarray[N, K, 1]): Scores (confidence) of the keypoints. """ N, K, _ = regression_preds.shape preds, maxvals = regression_preds, np.ones((N, K, 1), dtype=np.float32) preds = preds * img_size # Transform back to the image for i in range(N): preds[i] = transform_preds(preds[i], center[i], scale[i], img_size) return preds, maxvals
def keypoints_from_heatmaps3d(heatmaps, center, scale): """Get final keypoint predictions from 3d heatmaps and transform them back to the image. Note: - batch size: N - num keypoints: K - heatmap depth size: D - heatmap height: H - heatmap width: W Args: heatmaps (np.ndarray[N, K, D, H, W]): model predicted heatmaps. center (np.ndarray[N, 2]): Center of the bounding box (x, y). scale (np.ndarray[N, 2]): Scale of the bounding box wrt height/width. Returns: tuple: A tuple containing keypoint predictions and scores. - preds (np.ndarray[N, K, 3]): Predicted 3d keypoint location \ in images. - maxvals (np.ndarray[N, K, 1]): Scores (confidence) of the keypoints. """ N, K, D, H, W = heatmaps.shape preds, maxvals = _get_max_preds_3d(heatmaps) # Transform back to the image for i in range(N): preds[i, :, :2] = transform_preds(preds[i, :, :2], center[i], scale[i], [W, H]) return preds, maxvals
def decode(self, img_metas, output, **kwargs): """Decode keypoints from heatmaps. Args: img_metas (list(dict)): Information about data augmentation By default this includes: - "image_file: path to the image file - "center": center of the bbox - "scale": scale of the bbox - "rotation": rotation of the bbox - "bbox_score": score of bbox output (np.ndarray[N, K, D, H, W]): model predicted 3D heatmaps. """ batch_size = len(img_metas) N, K, D, H, W = output.shape if 'bbox_id' in img_metas[0]: bbox_ids = [] else: bbox_ids = None center = np.zeros((batch_size, 2), dtype=np.float32) scale = np.zeros((batch_size, 2), dtype=np.float32) image_paths = [] score = np.ones(batch_size, dtype=np.float32) for i in range(batch_size): center[i, :] = img_metas[i]['center'] scale[i, :] = img_metas[i]['scale'] image_paths.append(img_metas[i]['image_file']) if 'bbox_score' in img_metas[i]: score[i] = np.array(img_metas[i]['bbox_score']).reshape(-1) if bbox_ids is not None: bbox_ids.append(img_metas[i]['bbox_id']) preds, maxvals = _get_max_preds_3d(output) # Transform back to the image for i in range(N): preds[i, :, :2] = transform_preds(preds[i, :, :2], center[i], scale[i], [W, H]) all_preds = np.zeros((batch_size, preds.shape[1], 3), dtype=np.float32) all_boxes = np.zeros((batch_size, 6), dtype=np.float32) all_preds[:, :, 0:2] = preds[:, :, 0:2] all_preds[:, :, 2:3] = maxvals all_boxes[:, 0:2] = center[:, 0:2] all_boxes[:, 2:4] = scale[:, 0:2] # scale is defined as: bbox_size / 200.0, # so we need multiply 200.0 to get bbox size all_boxes[:, 4] = np.prod(scale * 200.0, axis=1) all_boxes[:, 5] = score result = {} result['preds'] = all_preds result['boxes'] = all_boxes result['image_paths'] = image_paths result['bbox_ids'] = bbox_ids return result
def get_group_preds(grouped_joints, center, scale, heatmap_size): """Transform the grouped joints back to the image. Args: grouped_joints (list): Grouped person joints. center (np.ndarray[2, ]): Center of the bounding box (x, y). scale (np.ndarray[2, ]): Scale of the bounding box wrt [width, height]. heatmap_size (np.ndarray[2, ]): Size of the destination heatmaps. Returns: results (List): List of the pose result for each person. """ results = [] for person in grouped_joints[0]: joints = transform_preds(person, center, scale, heatmap_size) results.append(joints) return results
def get_group_preds(grouped_joints, center, scale, heatmap_size, use_udp=False): """Transform the grouped joints back to the image. Args: grouped_joints (list): Grouped person joints. center (np.ndarray[2, ]): Center of the bounding box (x, y). scale (np.ndarray[2, ]): Scale of the bounding box wrt [width, height]. heatmap_size (np.ndarray[2, ]): Size of the destination heatmaps. use_udp (bool): Unbiased data processing. Paper ref: Huang et al. The Devil is in the Details: Delving into Unbiased Data Processing for Human Pose Estimation (CVPR'2020). Returns: list: List of the pose result for each person. """ if len(grouped_joints) == 0: return [] if use_udp: if grouped_joints[0].shape[0] > 0: heatmap_size_t = np.array(heatmap_size, dtype=np.float32) - 1.0 trans = get_warp_matrix( theta=0, size_input=heatmap_size_t, size_dst=scale, size_target=heatmap_size_t) grouped_joints[0][..., :2] = \ warp_affine_joints(grouped_joints[0][..., :2], trans) results = [person for person in grouped_joints[0]] else: results = [] for person in grouped_joints[0]: joints = transform_preds(person, center, scale, heatmap_size) results.append(joints) return results
def keypoints_from_heatmaps(heatmaps, center, scale, post_process=True, unbiased=False, kernel=11): """Get final keypoint predictions from heatmaps and transform them back to the image. Note: batch_size: N num_keypoints: K heatmap height: H heatmap width: W Args: heatmaps (np.ndarray[N, K, H, W]): model predicted heatmaps. center (np.ndarray[N, 2]): Center of the bounding box (x, y). scale (np.ndarray[N, 2]): Scale of the bounding box wrt height/width. post_process (bool): Option to use post processing or not. unbiased (bool): Option to use unbiased decoding. Paper ref: Zhang et al. Distribution-Aware Coordinate Representation for Human Pose Estimation (CVPR 2020). kernel (int): Gaussian kernel size (K) for modulation, which should match the heatmap gaussian sigma when training. K=17 for sigma=3 and k=11 for sigma=2. Returns: preds (np.ndarray[N, K, 2]): Predicted keypoint location in images. maxvals (np.ndarray[N, K, 1]): Scores (confidence) of the keypoints. """ preds, maxvals = _get_max_preds(heatmaps) N, K, H, W = heatmaps.shape if post_process: if unbiased: # alleviate biased coordinate assert kernel > 0 # apply Gaussian distribution modulation. heatmaps = _gaussian_blur(heatmaps, kernel) heatmaps = np.maximum(heatmaps, 1e-10) heatmaps = np.log(heatmaps) for n in range(N): for k in range(K): preds[n][k] = _taylor(heatmaps[n][k], preds[n][k]) else: # add +/-0.25 shift to the predicted locations for higher acc. for n in range(N): for k in range(K): heatmap = heatmaps[n][k] px = int(preds[n][k][0]) py = int(preds[n][k][1]) if 1 < px < W - 1 and 1 < py < H - 1: diff = np.array([ heatmap[py][px + 1] - heatmap[py][px - 1], heatmap[py + 1][px] - heatmap[py - 1][px] ]) preds[n][k] += np.sign(diff) * .25 # Transform back to the image for i in range(N): preds[i] = transform_preds(preds[i], center[i], scale[i], [W, H]) return preds, maxvals
def keypoints_from_heatmaps(heatmaps, center, scale, unbiased=False, post_process='default', kernel=11): """Get final keypoint predictions from heatmaps and transform them back to the image. Note: batch_size: N num_keypoints: K heatmap height: H heatmap width: W Args: heatmaps (np.ndarray[N, K, H, W]): model predicted heatmaps. center (np.ndarray[N, 2]): Center of the bounding box (x, y). scale (np.ndarray[N, 2]): Scale of the bounding box wrt height/width. post_process (str/None): Choice of methods to post-process heatmaps. Currently supported: None, 'default', 'unbiased', 'megvii'. unbiased (bool): Option to use unbiased decoding. Mutually exclusive with megvii. Note: this arg is deprecated and unbiased=True can be replaced by post_process='unbiased' Paper ref: Zhang et al. Distribution-Aware Coordinate Representation for Human Pose Estimation (CVPR 2020). kernel (int): Gaussian kernel size (K) for modulation, which should match the heatmap gaussian sigma when training. K=17 for sigma=3 and k=11 for sigma=2. Returns: tuple: A tuple containing keypoint predictions and scores. - preds (np.ndarray[N, K, 2]): Predicted keypoint location in images. - maxvals (np.ndarray[N, K, 1]): Scores (confidence) of the keypoints. """ # detect conflicts if unbiased: assert post_process not in [False, None, 'megvii'] if post_process in ['megvii', 'unbiased']: assert kernel > 0 # normalize configs if post_process is False: warnings.warn( 'post_process=False is deprecated, ' 'please use post_process=None instead', DeprecationWarning) post_process = None elif post_process is True: if unbiased is True: warnings.warn( 'post_process=True, unbiased=True is deprecated,' " please use post_process='unbiased' instead", DeprecationWarning) post_process = 'unbiased' else: warnings.warn( 'post_process=True, unbiased=False is deprecated, ' "please use post_process='default' instead", DeprecationWarning) post_process = 'default' elif post_process == 'default': if unbiased is True: warnings.warn( 'unbiased=True is deprecated, please use ' "post_process='unbiased' instead", DeprecationWarning) post_process = 'unbiased' # start processing if post_process == 'megvii': heatmaps = _gaussian_blur(heatmaps, kernel=kernel) preds, maxvals = _get_max_preds(heatmaps) N, K, H, W = heatmaps.shape if post_process == 'unbiased': # alleviate biased coordinate # apply Gaussian distribution modulation. heatmaps = np.log(np.maximum(_gaussian_blur(heatmaps, kernel), 1e-10)) for n in range(N): for k in range(K): preds[n][k] = _taylor(heatmaps[n][k], preds[n][k]) elif post_process is not None: # add +/-0.25 shift to the predicted locations for higher acc. for n in range(N): for k in range(K): heatmap = heatmaps[n][k] px = int(preds[n][k][0]) py = int(preds[n][k][1]) if 1 < px < W - 1 and 1 < py < H - 1: diff = np.array([ heatmap[py][px + 1] - heatmap[py][px - 1], heatmap[py + 1][px] - heatmap[py - 1][px] ]) preds[n][k] += np.sign(diff) * .25 if post_process == 'megvii': preds[n][k] += 0.5 # Transform back to the image for i in range(N): preds[i] = transform_preds(preds[i], center[i], scale[i], [W, H]) if post_process == 'megvii': maxvals = maxvals / 255.0 + 0.5 return preds, maxvals
def keypoints_from_heatmaps(heatmaps, center, scale, unbiased=False, post_process='default', kernel=11, valid_radius_factor=0.0546875, use_udp=False, target_type='GaussianHeatMap'): """Get final keypoint predictions from heatmaps and transform them back to the image. Note: batch size: N num keypoints: K heatmap height: H heatmap width: W Args: heatmaps (np.ndarray[N, K, H, W]): model predicted heatmaps. center (np.ndarray[N, 2]): Center of the bounding box (x, y). scale (np.ndarray[N, 2]): Scale of the bounding box wrt height/width. post_process (str/None): Choice of methods to post-process heatmaps. Currently supported: None, 'default', 'unbiased', 'megvii'. unbiased (bool): Option to use unbiased decoding. Mutually exclusive with megvii. Note: this arg is deprecated and unbiased=True can be replaced by post_process='unbiased' Paper ref: Zhang et al. Distribution-Aware Coordinate Representation for Human Pose Estimation (CVPR 2020). kernel (int): Gaussian kernel size (K) for modulation, which should match the heatmap gaussian sigma when training. K=17 for sigma=3 and k=11 for sigma=2. valid_radius_factor (float): The radius factor of the positive area in classification heatmap for UDP. use_udp (bool): Use unbiased data processing. target_type (str): 'GaussianHeatMap' or 'CombinedTarget'. GaussianHeatMap: Classification target with gaussian distribution. CombinedTarget: The combination of classification target (response map) and regression target (offset map). Paper ref: Huang et al. The Devil is in the Details: Delving into Unbiased Data Processing for Human Pose Estimation (CVPR 2020). Returns: tuple: A tuple containing keypoint predictions and scores. - preds (np.ndarray[N, K, 2]): Predicted keypoint location in images. - maxvals (np.ndarray[N, K, 1]): Scores (confidence) of the keypoints. """ # detect conflicts if unbiased: assert post_process not in [False, None, 'megvii'] if post_process in ['megvii', 'unbiased']: assert kernel > 0 if use_udp: assert not post_process == 'megvii' # normalize configs if post_process is False: warnings.warn( 'post_process=False is deprecated, ' 'please use post_process=None instead', DeprecationWarning) post_process = None elif post_process is True: if unbiased is True: warnings.warn( 'post_process=True, unbiased=True is deprecated,' " please use post_process='unbiased' instead", DeprecationWarning) post_process = 'unbiased' else: warnings.warn( 'post_process=True, unbiased=False is deprecated, ' "please use post_process='default' instead", DeprecationWarning) post_process = 'default' elif post_process == 'default': if unbiased is True: warnings.warn( 'unbiased=True is deprecated, please use ' "post_process='unbiased' instead", DeprecationWarning) post_process = 'unbiased' # start processing if post_process == 'megvii': heatmaps = _gaussian_blur(heatmaps, kernel=kernel) N, K, H, W = heatmaps.shape if use_udp: assert target_type in ['GaussianHeatMap', 'CombinedTarget'] if target_type == 'GaussianHeatMap': preds, maxvals = _get_max_preds(heatmaps) preds = post_dark_udp(preds, heatmaps, kernel=kernel) elif target_type == 'CombinedTarget': for person_heatmaps in heatmaps: for i, heatmap in enumerate(person_heatmaps): kt = 2 * kernel + 1 if i % 3 == 0 else kernel cv2.GaussianBlur(heatmap, (kt, kt), 0, heatmap) # valid radius is in direct proportion to the height of heatmap. valid_radius = valid_radius_factor * H offset_x = heatmaps[:, 1::3, :].flatten() * valid_radius offset_y = heatmaps[:, 2::3, :].flatten() * valid_radius heatmaps = heatmaps[:, ::3, :] preds, maxvals = _get_max_preds(heatmaps) index = preds[..., 0] + preds[..., 1] * W index += W * H * np.arange(0, N * K / 3) index = index.astype(np.int).reshape(N, K // 3, 1) preds += np.concatenate((offset_x[index], offset_y[index]), axis=2) else: preds, maxvals = _get_max_preds(heatmaps) if post_process == 'unbiased': # alleviate biased coordinate # apply Gaussian distribution modulation. heatmaps = np.log( np.maximum(_gaussian_blur(heatmaps, kernel), 1e-10)) for n in range(N): for k in range(K): preds[n][k] = _taylor(heatmaps[n][k], preds[n][k]) elif post_process is not None: # add +/-0.25 shift to the predicted locations for higher acc. for n in range(N): for k in range(K): heatmap = heatmaps[n][k] px = int(preds[n][k][0]) py = int(preds[n][k][1]) if 1 < px < W - 1 and 1 < py < H - 1: diff = np.array([ heatmap[py][px + 1] - heatmap[py][px - 1], heatmap[py + 1][px] - heatmap[py - 1][px] ]) preds[n][k] += np.sign(diff) * .25 if post_process == 'megvii': preds[n][k] += 0.5 # Transform back to the image for i in range(N): preds[i] = transform_preds( preds[i], center[i], scale[i], [W, H], use_udp=use_udp) if post_process == 'megvii': maxvals = maxvals / 255.0 + 0.5 return preds, maxvals