def _load_jsons(self): """Load all image paths and labels from JSON annotation files into buffer.""" items = [] labels = [] with open(self._ann_file, 'r') as fid: database = json.load(fid) # iterate through the annotations for ann_image, ann_annotations in zip(database['images'], database['annotations']): ann = dict() for k, v in ann_image.items(): assert k not in ann.keys() ann[k] = v for k, v in ann_annotations.items(): ann[k] = v image_id = ann['image_id'] width, height = ann['width'], ann['height'] xmin, ymin, xmax, ymax = bbox_clip_xyxy( bbox_xywh_to_xyxy(ann['bbox']), width, height) intrinsic_param = np.array(ann['cam_param']['intrinsic_param'], dtype=np.float32) f = np.array([intrinsic_param[0, 0], intrinsic_param[1, 1]], dtype=np.float32) c = np.array([intrinsic_param[0, 2], intrinsic_param[1, 2]], dtype=np.float32) joint_cam = np.array(ann['keypoints_cam']) joint_img = cam2pixel_matrix(joint_cam, intrinsic_param) joint_img[:, 2] = joint_img[:, 2] - joint_cam[self.root_idx, 2] joint_vis = np.ones((self.num_joints, 3)) root_cam = joint_cam[self.root_idx] abs_path = os.path.join( self._root, 'mpi_inf_3dhp_{}_set'.format( 'train' if self._train else 'test'), ann['file_name']) items.append(abs_path) labels.append({ 'bbox': (xmin, ymin, xmax, ymax), 'img_id': image_id, 'img_path': abs_path, 'img_name': ann['file_name'], 'width': width, 'height': height, 'joint_img': joint_img, 'joint_vis': joint_vis, 'joint_cam': joint_cam, 'root_cam': root_cam, 'intrinsic_param': intrinsic_param, 'f': f, 'c': c }) if not self._train: labels[-1]['activity_id'] = ann['activity_id'] return items, labels
def _lazy_load_json(self): """Load all image paths and labels from json annotation files into buffer.""" items = [] labels = [] db = COCO(self._ann_file) cnt = 0 for aid in db.anns.keys(): ann = db.anns[aid] img_id = ann['image_id'] img = db.loadImgs(img_id)[0] width, height = img['width'], img['height'] sequence_name = img['sequence'] img_name = img['file_name'] abs_path = os.path.join(self._root, 'imageFiles', sequence_name, img_name) beta = np.array(ann['smpl_param']['shape']).reshape(10) theta = np.array(ann['smpl_param']['pose']).reshape(24, 3) x, y, w, h = ann['bbox'] xmin, ymin, xmax, ymax = bbox_clip_xyxy( bbox_xywh_to_xyxy(ann['bbox']), width, height) if xmin > xmax - 5 or ymin > ymax - 5: continue f = np.array(img['cam_param']['focal'], dtype=np.float32) c = np.array(img['cam_param']['princpt'], dtype=np.float32) joint_cam_17 = np.array(ann['h36m_joints'], dtype=np.float32).reshape(17, 3) joint_vis_17 = np.ones((17, 3)) joint_img_17 = np.zeros((17, 3)) joint_relative_17 = joint_cam_17 - joint_cam_17[ self.root_idx_17, :] joint_cam = np.array(ann['smpl_joint_cam']) if joint_cam.size == 24 * 3: joint_cam_29 = np.zeros((29, 3)) joint_cam_29[:24, :] = joint_cam.reshape(24, 3) else: joint_cam_29 = joint_cam.reshape(29, 3) joint_img = np.array(ann['smpl_joint_img'], dtype=np.float32).reshape(24, 3) if joint_img.size == 24 * 3: joint_img_29 = np.zeros((29, 3)) joint_img_29[:24, :] = joint_img.reshape(24, 3) else: joint_img_29 = joint_img.reshape(29, 3) joint_img_29[:, 2] = joint_img_29[:, 2] - joint_img_29[ self.root_idx_smpl, 2] joint_vis_24 = np.ones((24, 3)) joint_vis_29 = np.zeros((29, 3)) joint_vis_29[:24, :] = joint_vis_24 root_cam = joint_cam_29[self.root_idx_smpl] items.append(abs_path) labels.append({ 'bbox': (xmin, ymin, xmax, ymax), 'img_id': cnt, 'img_path': abs_path, 'img_name': img_name, 'width': width, 'height': height, 'joint_img_17': joint_img_17, 'joint_vis_17': joint_vis_17, 'joint_cam_17': joint_cam_17, 'joint_relative_17': joint_relative_17, 'joint_img_29': joint_img_29, 'joint_vis_29': joint_vis_29, 'joint_cam_29': joint_cam_29, 'beta': beta, 'theta': theta, 'root_cam': root_cam, 'f': f, 'c': c }) cnt += 1 return items, labels
def _check_load_keypoints(self, coco, entry): """Check and load ground-truth keypoints""" ann_ids = coco.getAnnIds(imgIds=entry['id'], iscrowd=False) objs = coco.loadAnns(ann_ids) # check valid bboxes valid_objs = [] width = entry['width'] height = entry['height'] for obj in objs: contiguous_cid = self.json_id_to_contiguous[obj['category_id']] if contiguous_cid >= self.num_class: # not class of interest continue if max(obj['keypoints']) == 0: continue # convert from (x, y, w, h) to (xmin, ymin, xmax, ymax) and clip bound xmin, ymin, xmax, ymax = bbox_clip_xyxy(bbox_xywh_to_xyxy(obj['bbox']), width, height) # require non-zero box area if obj['area'] <= 0 or xmax <= xmin or ymax <= ymin: continue if obj['num_keypoints'] == 0: continue # joints 3d: (num_joints, 3, 2); 3 is for x, y, z; 2 is for position, visibility joints_3d = np.zeros((self.num_joints, 3, 2), dtype=np.float32) for i in range(self.num_joints): joints_3d[i, 0, 0] = obj['keypoints'][i * 3 + 0] joints_3d[i, 1, 0] = obj['keypoints'][i * 3 + 1] # joints_3d[i, 2, 0] = 0 visible = min(1, obj['keypoints'][i * 3 + 2]) joints_3d[i, :2, 1] = visible # joints_3d[i, 2, 1] = 0 if np.sum(joints_3d[:, 0, 1]) < 1: # no visible keypoint continue if self._check_centers and self._train: bbox_center, bbox_area = self._get_box_center_area((xmin, ymin, xmax, ymax)) kp_center, num_vis = self._get_keypoints_center_count(joints_3d) ks = np.exp(-2 * np.sum(np.square(bbox_center - kp_center)) / bbox_area) if (num_vis / 80.0 + 47 / 80.0) > ks: continue valid_objs.append({ 'bbox': (xmin, ymin, xmax, ymax), 'width': width, 'height': height, 'joints_3d': joints_3d, 'segmentation': obj['segmentation'], 'keypoints': obj['keypoints'], }) if not valid_objs: if not self._skip_empty: # dummy invalid labels if no valid objects are found valid_objs.append({ 'bbox': np.array([-1, -1, 0, 0]), 'width': width, 'height': height, 'joints_3d': np.zeros((self.num_joints, 2, 2), dtype=np.float32) }) return valid_objs
def _load_jsons(self): """Load all image paths and labels from JSON annotation files into buffer.""" items = [] labels = [] with open(self._ann_file, 'r') as fid: database = json.load(fid) # iterate through the annotations bbox_scale_list = [] det_bbox_set = {} if self._det_bbox_file is not None: bbox_list = json.load( open( os.path.join( self._root, 'annotations', self._det_bbox_file + f'_protocol_{self.protocol}.json'), 'r')) for item in bbox_list: image_id = item['image_id'] det_bbox_set[image_id] = item['bbox'] for ann_image, ann_annotations in zip(database['images'], database['annotations']): ann = dict() for k, v in ann_image.items(): assert k not in ann.keys() ann[k] = v for k, v in ann_annotations.items(): ann[k] = v skip = False for name in self.block_list: if name in ann['file_name']: skip = True if skip: continue image_id = ann['image_id'] width, height = ann['width'], ann['height'] if self._det_bbox_file is not None: xmin, ymin, xmax, ymax = bbox_clip_xyxy( bbox_xywh_to_xyxy(det_bbox_set[ann['file_name']]), width, height) else: xmin, ymin, xmax, ymax = bbox_clip_xyxy( bbox_xywh_to_xyxy(ann['bbox']), width, height) f, c = np.array(ann['cam_param']['f'], dtype=np.float32), np.array(ann['cam_param']['c'], dtype=np.float32) joint_cam_17 = np.array(ann['h36m_joints']).reshape(17, 3) joint_cam = np.array(ann['smpl_joints']) if joint_cam.size == 24 * 3: joint_cam_29 = np.zeros((29, 3)) joint_cam_29[:24, :] = joint_cam.reshape(24, 3) else: joint_cam_29 = joint_cam.reshape(29, 3) beta = np.array(ann['betas']) theta = np.array(ann['thetas']).reshape(self.num_thetas, 3) joint_img_17 = cam2pixel(joint_cam_17, f, c) joint_img_17[:, 2] = joint_img_17[:, 2] - joint_cam_17[self.root_idx_17, 2] joint_relative_17 = joint_cam_17 - joint_cam_17[ self.root_idx_17, :] joint_img_29 = cam2pixel(joint_cam_29, f, c) joint_img_29[:, 2] = joint_img_29[:, 2] - joint_cam_29[ self.root_idx_smpl, 2] joint_vis_17 = np.ones((17, 3)) joint_vis_29 = np.ones((29, 3)) root_cam = np.array(ann['root_coord']) abs_path = os.path.join(self._root, 'images', ann['file_name']) if 'angle_twist' in ann.keys(): twist = ann['angle_twist'] angle = np.array(twist['angle']) cos = np.array(twist['cos']) sin = np.array(twist['sin']) assert (np.cos(angle) - cos < 1e-6).all(), np.cos(angle) - cos assert (np.sin(angle) - sin < 1e-6).all(), np.sin(angle) - sin phi = np.stack((cos, sin), axis=1) phi_weight = np.ones_like(phi) else: phi = np.zeros((23, 2)) phi_weight = np.zeros_like(phi) items.append(abs_path) labels.append({ 'bbox': (xmin, ymin, xmax, ymax), 'img_id': image_id, 'img_path': abs_path, 'width': width, 'height': height, 'joint_img_17': joint_img_17, 'joint_vis_17': joint_vis_17, 'joint_cam_17': joint_cam_17, 'joint_relative_17': joint_relative_17, 'joint_img_29': joint_img_29, 'joint_vis_29': joint_vis_29, 'joint_cam_29': joint_cam_29, 'twist_phi': phi, 'twist_weight': phi_weight, 'beta': beta, 'theta': theta, 'root_cam': root_cam, 'f': f, 'c': c }) bbox_scale_list.append(max(xmax - xmin, ymax - ymin)) return items, labels