def load_data(self): if self.data_split == 'train': name = 'train2017' else: name = 'val2017' db = COCO( osp.join(self.annot_path, 'person_keypoints_' + name + '.json')) data = [] for aid in db.anns.keys(): ann = db.anns[aid] img = db.loadImgs(ann['image_id'])[0] width, height = img['width'], img['height'] if (ann['image_id'] not in db.imgs) or ann['iscrowd'] or (ann['num_keypoints'] == 0): continue bbox = process_bbox(ann['bbox'], width, height) if bbox is None: continue area = bbox[2] * bbox[3] # joints and vis joint_img = np.array(ann['keypoints']).reshape(-1, 3) # add Thorax thorax = (joint_img[self.lshoulder_idx, :] + joint_img[self.rshoulder_idx, :]) * 0.5 thorax[2] = joint_img[self.lshoulder_idx, 2] * joint_img[self.rshoulder_idx, 2] thorax = thorax.reshape((1, 3)) # add Pelvis pelvis = (joint_img[self.lhip_idx, :] + joint_img[self.rhip_idx, :]) * 0.5 pelvis[2] = joint_img[self.lhip_idx, 2] * joint_img[self.rhip_idx, 2] pelvis = pelvis.reshape((1, 3)) joint_img = np.concatenate((joint_img, thorax, pelvis), axis=0) joint_vis = (joint_img[:, 2].copy().reshape(-1, 1) > 0) joint_img[:, 2] = 0 root_img = joint_img[self.root_idx] root_vis = joint_vis[self.root_idx] imgname = osp.join(name, img['file_name']) img_path = osp.join(self.img_dir, imgname) data.append({ 'img_path': img_path, 'image_id': ann['image_id'], 'bbox': bbox, 'area': area, 'root_img': root_img, # [org_img_x, org_img_y, 0] 'root_vis': root_vis, 'f': np.array([1500, 1500]) # dummy value }) return data
def load_data(self): if self.data_split == 'train': db = COCO(self.train_annot_path) else: print('Unknown data subset') assert 0 data = [] for aid in db.anns.keys(): ann = db.anns[aid] img = db.loadImgs(ann['image_id'])[0] width, height = img['width'], img['height'] if ann['num_keypoints'] == 0: continue bbox = process_bbox(ann['bbox'], width, height) if bbox is None: continue # joints and vis joint_img = np.array(ann['keypoints']).reshape(self.joint_num, 3) joint_vis = joint_img[:, 2].copy().reshape(-1, 1) joint_img[:, 2] = 0 imgname = img['file_name'] img_path = osp.join(self.img_dir, imgname) data.append({ 'img_path': img_path, 'bbox': bbox, 'joint_img': joint_img, # [org_img_x, org_img_y, 0] 'joint_vis': joint_vis, }) return data
def get_item(path, bbox): cvimg = cv2.imread(path, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) plt.imshow(Image.open(path)) plt.gca().add_patch(Rectangle((bbox[0],bbox[1]),bbox[2],bbox[3],linewidth=1,edgecolor='r',facecolor='none')) plt.show() height, width, num_channels = cvimg.shape # bbox = [float(i) for i in bbox.split(',')] bbox = process_bbox(bbox, width, height) area = bbox[2]*bbox[3] img_patch, trans = generate_patch_image(cvimg, bbox, False, 0) tmp_img = img_patch.astype(np.uint8) plt.imshow(tmp_img) plt.show() color_scale = [1.0, 1.0, 1.0] for i in range(num_channels): img_patch[:, :, i] = np.clip(img_patch[:, :, i] * color_scale[i], 0, 255) transform = transforms.Compose([\ transforms.ToTensor(), transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))]) img_patch = transform(img_patch) f = np.array([1500, 1500]) k_value = np.array([math.sqrt(cfg.bbox_real[0]*cfg.bbox_real[1]*f[0]*f[1]/(area))]).astype(np.float32) print(k_value) c = np.array([width*0.5, height*0.5]) return img_patch, k_value, bbox, c
def load_data(self): db = COCO(osp.join(self.data_path, '3DPW_' + self.data_split + '.json')) datalist = [] for aid in db.anns.keys(): ann = db.anns[aid] image_id = ann['image_id'] img = db.loadImgs(image_id)[0] img_width, img_height = img['width'], img['height'] sequence_name = img['sequence'] img_name = img['file_name'] img_path = osp.join(self.data_path, 'imageFiles', sequence_name, img_name) cam_param = { k: np.array(v, dtype=np.float32) for k, v in img['cam_param'].items() } joint_cam = np.array(ann['joint_cam'], dtype=np.float32).reshape(-1, 3) joint_img = cam2pixel(joint_cam, cam_param['focal'], cam_param['princpt']) joint_valid = ((joint_img[:, 0] >= 0) * (joint_img[:, 0] < img_width) * (joint_img[:, 1] >= 0) * (joint_img[:, 1] < img_height)).astype(np.float32) root_cam = joint_cam[self.root_idx] root_img = joint_img[self.root_idx] root_vis = joint_valid[self.root_idx] bbox = process_bbox(ann['bbox'], img_width, img_height) if bbox is None: continue area = bbox[2] * bbox[3] datalist.append({ 'img_path': img_path, 'img_id': image_id, 'ann_id': aid, 'bbox': bbox, 'area': area, 'root_img': root_img, 'root_cam': root_cam, 'root_vis': root_vis, 'f': cam_param['focal'], 'c': cam_param['princpt'] }) return datalist
def load_data(self): if self.data_split == 'train': db = COCO(self.train_annot_path) else: print('Unknown data subset') assert 0 data = [] for iid in db.imgs.keys(): img = db.imgs[iid] img_id = img["id"] img_width, img_height = img['width'], img['height'] imgname = img['file_name'] img_path = osp.join(self.img_dir, imgname) f = img["f"] c = img["c"] # crop the closest person to the camera ann_ids = db.getAnnIds(img_id) anns = db.loadAnns(ann_ids) root_depths = [ann['keypoints_cam'][self.root_idx][2] for ann in anns] closest_pid = root_depths.index(min(root_depths)) pid_list = [closest_pid] for i in range(len(anns)): if i == closest_pid: continue picked = True for j in range(len(anns)): if i == j: continue dist = (np.array(anns[i]['keypoints_cam'][self.root_idx]) - np.array(anns[j]['keypoints_cam'][self.root_idx])) ** 2 dist_2d = math.sqrt(np.sum(dist[:2])) dist_3d = math.sqrt(np.sum(dist)) if dist_2d < 500 or dist_3d < 500: picked = False if picked: pid_list.append(i) for pid in pid_list: joint_cam = np.array(anns[pid]['keypoints_cam']) root_cam = joint_cam[self.root_idx] joint_img = np.array(anns[pid]['keypoints_img']) joint_img = np.concatenate([joint_img, joint_cam[:,2:]],1) joint_img[:,2] = joint_img[:,2] - root_cam[2] joint_vis = np.ones((self.joint_num,1)) bbox = process_bbox(anns[pid]['bbox'], img_width, img_height) if bbox is None: continue data.append({ 'img_path': img_path, 'bbox': bbox, 'joint_img': joint_img, # [org_img_x, org_img_y, depth - root_depth] 'joint_cam': joint_cam, # [X, Y, Z] in camera coordinate 'joint_vis': joint_vis, 'root_cam': root_cam, # [X, Y, Z] in camera coordinate 'f': f, 'c': c }) return data
def load_data(self): print('Load data of H36M Protocol ' + str(self.protocol)) subject_list = self.get_subject() sampling_ratio = self.get_subsampling_ratio() # aggregate annotations from each subject db = COCO() cameras = {} joints = {} for subject in subject_list: # data load with open( osp.join(self.annot_path, 'Human36M_subject' + str(subject) + '_data.json'), 'r') as f: annot = json.load(f) if len(db.dataset) == 0: for k, v in annot.items(): db.dataset[k] = v else: for k, v in annot.items(): db.dataset[k] += v # camera load with open( osp.join( self.annot_path, 'Human36M_subject' + str(subject) + '_camera.json'), 'r') as f: cameras[str(subject)] = json.load(f) # joint coordinate load with open( osp.join( self.annot_path, 'Human36M_subject' + str(subject) + '_joint_3d.json'), 'r') as f: joints[str(subject)] = json.load(f) db.createIndex() if self.data_split == 'test' and not cfg.use_gt_info: print("Get bounding box and root from " + self.human_bbox_root_dir) bbox_root_result = {} with open(self.human_bbox_root_dir) as f: annot = json.load(f) for i in range(len(annot)): bbox_root_result[str(annot[i]['image_id'])] = { 'bbox': np.array(annot[i]['bbox']), 'root': np.array(annot[i]['root_cam']) } else: print("Get bounding box and root from groundtruth") data = [] for aid in db.anns.keys(): ann = db.anns[aid] image_id = ann['image_id'] img = db.loadImgs(image_id)[0] img_path = osp.join(self.img_dir, img['file_name']) img_width, img_height = img['width'], img['height'] # check subject and frame_idx subject = img['subject'] frame_idx = img['frame_idx'] if subject not in subject_list: continue if frame_idx % sampling_ratio != 0: continue # camera parameter cam_idx = img['cam_idx'] cam_param = cameras[str(subject)][str(cam_idx)] R, t, f, c = np.array(cam_param['R'], dtype=np.float32), np.array( cam_param['t'], dtype=np.float32), np.array( cam_param['f'], dtype=np.float32), np.array(cam_param['c'], dtype=np.float32) # project world coordinate to cam, image coordinate space action_idx = img['action_idx'] subaction_idx = img['subaction_idx'] frame_idx = img['frame_idx'] joint_world = np.array(joints[str(subject)][str(action_idx)][str( subaction_idx)][str(frame_idx)], dtype=np.float32) joint_world = self.add_thorax(joint_world) joint_cam = world2cam(joint_world, R, t) joint_img = cam2pixel(joint_cam, f, c) joint_img[:, 2] = joint_img[:, 2] - joint_cam[self.root_idx, 2] joint_vis = np.ones((self.joint_num, 1)) if self.data_split == 'test' and not cfg.use_gt_info: bbox = bbox_root_result[str( image_id )]['bbox'] # bbox should be aspect ratio preserved-extended. It is done in RootNet. root_cam = bbox_root_result[str(image_id)]['root'] else: bbox = process_bbox(np.array(ann['bbox']), img_width, img_height) if bbox is None: continue root_cam = joint_cam[self.root_idx] data.append({ 'img_path': img_path, 'img_id': image_id, 'bbox': bbox, 'joint_img': joint_img, # [org_img_x, org_img_y, depth - root_depth] 'joint_cam': joint_cam, # [X, Y, Z] in camera coordinate 'joint_vis': joint_vis, 'root_cam': root_cam, # [X, Y, Z] in camera coordinate 'f': f, 'c': c }) return data
12572.5966796875 ] # obtain this from RootNet (https://github.com/mks0601/3DMPPE_ROOTNET_RELEASE/tree/master/demo) assert len(bbox_list) == len(root_depth_list) person_num = len(bbox_list) # normalized camera intrinsics focal = [1500, 1500] # x-axis, y-axis princpt = [original_img_width / 2, original_img_height / 2] # x-axis, y-axis print('focal length: (' + str(focal[0]) + ', ' + str(focal[1]) + ')') print('principal points: (' + str(princpt[0]) + ', ' + str(princpt[1]) + ')') # for each cropped and resized human image, forward it to PoseNet output_pose_2d_list = [] output_pose_3d_list = [] for n in range(person_num): bbox = process_bbox(np.array(bbox_list[n]), original_img_width, original_img_height) img, img2bb_trans = generate_patch_image(original_img, bbox, False, 1.0, 0.0, False) print(img.shape, person_num) img = transform(img).cuda()[None, :, :, :] # forward with torch.no_grad(): pose_3d = model(img) # x,y: pixel, z: root-relative depth (mm) # inverse affine transform (restore the crop and resize) pose_3d = pose_3d[0].cpu().numpy() pose_3d[:, 0] = pose_3d[:, 0] / cfg.output_shape[1] * cfg.input_shape[1] pose_3d[:, 1] = pose_3d[:, 1] / cfg.output_shape[0] * cfg.input_shape[0] pose_3d_xy1 = np.concatenate( (pose_3d[:, :2], np.ones_like(pose_3d[:, :1])), 1)
def load_data(self): if self.data_split != 'test': print('Unknown data subset') assert 0 data = [] db = COCO(self.test_annot_path) # use gt bbox and root if cfg.use_gt_info: print("Get bounding box and root from groundtruth") for aid in db.anns.keys(): ann = db.anns[aid] if ann['is_valid'] == 0: continue image_id = ann['image_id'] img = db.loadImgs(image_id)[0] img_path = osp.join(self.img_dir, img['file_name']) fx, fy, cx, cy = img['intrinsic'] f = np.array([fx, fy]); c = np.array([cx, cy]); joint_cam = np.array(ann['keypoints_cam']) root_cam = joint_cam[self.root_idx] joint_img = np.array(ann['keypoints_img']) joint_img = np.concatenate([joint_img, joint_cam[:,2:]],1) joint_img[:,2] = joint_img[:,2] - root_cam[2] joint_vis = np.ones((self.original_joint_num,1)) img_width, img_height = img['width'], img['height'] bbox = process_bbox(bbox, img_width, img_height) if bbox is None: continue data.append({ 'img_path': img_path, 'bbox': bbox, 'joint_img': joint_img, # [org_img_x, org_img_y, depth - root_depth] 'joint_cam': joint_cam, # [X, Y, Z] in camera coordinate 'joint_vis': joint_vis, 'root_cam': root_cam, # [X, Y, Z] in camera coordinate 'f': f, 'c': c, }) else: print("Get bounding box and root from " + self.human_bbox_root_dir) with open(self.human_bbox_root_dir) as f: annot = json.load(f) for i in range(len(annot)): image_id = annot[i]['image_id'] img = db.loadImgs(image_id)[0] img_width, img_height = img['width'], img['height'] img_path = osp.join(self.img_dir, img['file_name']) fx, fy, cx, cy = img['intrinsic'] f = np.array([fx, fy]); c = np.array([cx, cy]); root_cam = np.array(annot[i]['root_cam']).reshape(3) bbox = np.array(annot[i]['bbox']).reshape(4) data.append({ 'img_path': img_path, 'bbox': bbox, 'joint_img': np.zeros((self.original_joint_num, 3)), # dummy 'joint_cam': np.zeros((self.original_joint_num, 3)), # dummy 'joint_vis': np.zeros((self.original_joint_num, 1)), # dummy 'root_cam': root_cam, # [X, Y, Z] in camera coordinate 'f': f, 'c': c, }) return data
def load_data(self): print('Load data of H36M Protocol ' + str(self.protocol)) subject_list = self.get_subject() sampling_ratio = self.get_subsampling_ratio() # aggregate annotations from each subject db = COCO() cameras = {} joints = {} for subject in subject_list: # data load with open( osp.join(self.annot_path, 'Human36M_subject' + str(subject) + '_data.json'), 'r') as f: annot = json.load(f) if len(db.dataset) == 0: for k, v in annot.items(): db.dataset[k] = v else: for k, v in annot.items(): db.dataset[k] += v # camera load with open( osp.join( self.annot_path, 'Human36M_subject' + str(subject) + '_camera.json'), 'r') as f: cameras[str(subject)] = json.load(f) # joint coordinate load with open( osp.join( self.annot_path, 'Human36M_subject' + str(subject) + '_joint_3d.json'), 'r') as f: joints[str(subject)] = json.load(f) db.createIndex() if self.data_split == 'test' and not cfg.use_gt_bbox: print("Get bounding box from " + self.human_bbox_dir) bbox_result = {} with open(self.human_bbox_dir) as f: annot = json.load(f) for i in range(len(annot)): bbox_result[str(annot[i]['image_id'])] = np.array( annot[i]['bbox']) else: print("Get bounding box from groundtruth") data = [] for aid in db.anns.keys(): ann = db.anns[aid] image_id = ann['image_id'] img = db.loadImgs(image_id)[0] img_path = osp.join(self.img_dir, img['file_name']) img_width, img_height = img['width'], img['height'] # check subject and frame_idx subject = img['subject'] frame_idx = img['frame_idx'] if subject not in subject_list: continue if frame_idx % sampling_ratio != 0: continue # camera parameter cam_idx = img['cam_idx'] cam_param = cameras[str(subject)][str(cam_idx)] R, t, f, c = np.array(cam_param['R'], dtype=np.float32), np.array( cam_param['t'], dtype=np.float32), np.array( cam_param['f'], dtype=np.float32), np.array(cam_param['c'], dtype=np.float32) # project world coordinate to cam, image coordinate space action_idx = img['action_idx'] subaction_idx = img['subaction_idx'] frame_idx = img['frame_idx'] root_world = np.array(joints[str(subject)][str(action_idx)][str( subaction_idx)][str(frame_idx)], dtype=np.float32)[self.root_idx] root_cam = world2cam(root_world[None, :], R, t)[0] root_img = cam2pixel(root_cam[None, :], f, c)[0] joint_vis = np.ones((self.joint_num, 1)) root_vis = np.array(ann['keypoints_vis'])[self.root_idx, None] # bbox load if self.data_split == 'test' and not cfg.use_gt_bbox: bbox = bbox_result[str(image_id)] else: bbox = np.array(ann['bbox']) bbox = process_bbox(bbox, img_width, img_height) if bbox is None: continue area = bbox[2] * bbox[3] data.append({ 'img_path': img_path, 'img_id': image_id, 'bbox': bbox, 'area': area, 'root_img': root_img, # [org_img_x, org_img_y, depth] 'root_cam': root_cam, 'root_vis': root_vis, 'f': f, 'c': c }) return data
def load_data(self): if self.data_split != 'test': print('Unknown data subset') assert 0 data = [] db = COCO(self.annot_path) if cfg.use_gt_bbox: print("Get bounding box from groundtruth") for aid in db.anns.keys(): ann = db.anns[aid] if ann['is_valid'] == 0: continue image_id = ann['image_id'] img = db.loadImgs(image_id)[0] img_path = osp.join(self.img_dir, img['file_name']) fx, fy, cx, cy = img['intrinsic'] f = np.array([fx, fy]) c = np.array([cx, cy]) joint_cam = np.array(ann['keypoints_cam']) joint_img = np.array(ann['keypoints_img']) joint_img = np.concatenate([joint_img, joint_cam[:, 2:]], 1) joint_vis = np.array(ann['keypoints_vis']) root_cam = joint_cam[self.root_idx] root_img = joint_img[self.root_idx] root_vis = joint_vis[self.root_idx, None] bbox = np.array(ann['bbox']) img_width, img_height = img['width'], img['height'] bbox = process_bbox(bbox, img_width, img_height) if bbox is None: continue area = bbox[2] * bbox[3] data.append({ 'image_id': ann['image_id'], 'img_path': img_path, 'bbox': bbox, 'area': area, 'root_img': root_img, # [org_img_x, org_img_y, depth - root_depth] 'root_cam': root_cam, # [X, Y, Z] in camera coordinate 'root_vis': root_vis, 'f': f, 'c': c, 'score': 1.0 }) else: with open(self.human_bbox_dir) as f: annot = json.load(f) print("Get bounding box from " + self.human_bbox_dir) for i in range(len(annot)): image_id = annot[i]['image_id'] img = db.loadImgs(image_id)[0] img_path = osp.join(self.img_dir, img['file_name']) fx, fy, cx, cy = img['intrinsic'] f = np.array([fx, fy]) c = np.array([cx, cy]) bbox = np.array(annot[i]['bbox']).reshape(4) img_width, img_height = img['width'], img['height'] bbox = process_bbox(bbox, img_width, img_height) if bbox is None: continue area = bbox[2] * bbox[3] data.append({ 'image_id': image_id, 'img_path': img_path, 'bbox': bbox, 'area': area, 'root_img': np.ones((3)), # dummy 'root_cam': np.ones((3)), # dummy 'root_vis': np.ones((1)), # dummy 'f': f, 'c': c, 'score': annot[i]['score'] }) return data
def load_data(self): if self.data_split == 'train': db = COCO(self.train_annot_path) data = [] for aid in db.anns.keys(): ann = db.anns[aid] img = db.loadImgs(ann['image_id'])[0] width, height = img['width'], img['height'] if (ann['image_id'] not in db.imgs) or ann['iscrowd'] or (ann['num_keypoints'] == 0): continue bbox = process_bbox(ann['bbox'], width, height) if bbox is None: continue # joints and vis joint_img = np.array(ann['keypoints']).reshape(-1,3) # add Thorax thorax = (joint_img[self.lshoulder_idx, :] + joint_img[self.rshoulder_idx, :]) * 0.5 thorax[2] = joint_img[self.lshoulder_idx,2] * joint_img[self.rshoulder_idx,2] thorax = thorax.reshape((1, 3)) # add Pelvis pelvis = (joint_img[self.lhip_idx, :] + joint_img[self.rhip_idx, :]) * 0.5 pelvis[2] = joint_img[self.lhip_idx,2] * joint_img[self.rhip_idx,2] pelvis = pelvis.reshape((1, 3)) joint_img = np.concatenate((joint_img, thorax, pelvis), axis=0) joint_vis = (joint_img[:,2].copy().reshape(-1,1) > 0) joint_img[:,2] = 0 imgname = osp.join('train2017', db.imgs[ann['image_id']]['file_name']) img_path = osp.join(self.img_dir, imgname) data.append({ 'img_path': img_path, 'bbox': bbox, 'joint_img': joint_img, # [org_img_x, org_img_y, 0] 'joint_vis': joint_vis, 'f': np.array([1500, 1500]), 'c': np.array([width/2, height/2]) }) elif self.data_split == 'test': db = COCO(self.test_annot_path) with open(self.human_3d_bbox_root_dir) as f: annot = json.load(f) data = [] for i in range(len(annot)): image_id = annot[i]['image_id'] img = db.loadImgs(image_id)[0] img_path = osp.join(self.img_dir, 'val2017', img['file_name']) fx, fy, cx, cy = 1500, 1500, img['width']/2, img['height']/2 f = np.array([fx, fy]); c = np.array([cx, cy]); root_cam = np.array(annot[i]['root_cam']).reshape(3) bbox = np.array(annot[i]['bbox']).reshape(4) data.append({ 'img_path': img_path, 'bbox': bbox, 'joint_img': np.zeros((self.original_joint_num, 3)), # dummy 'joint_cam': np.zeros((self.original_joint_num, 3)), # dummy 'joint_vis': np.zeros((self.original_joint_num, 1)), # dummy 'root_cam': root_cam, # [X, Y, Z] in camera coordinate 'f': f, 'c': c, }) else: print('Unknown data subset') assert 0 return data