Exemplos de process_bbox em Python, exemplos de utils.pose_utils.process_bbox em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: MSCOCO.py Projeto: zoezhu/3DMPPE_ROOTNET_RELEASE

    def load_data(self):

        if self.data_split == 'train':
            name = 'train2017'
        else:
            name = 'val2017'

        db = COCO(
            osp.join(self.annot_path, 'person_keypoints_' + name + '.json'))
        data = []
        for aid in db.anns.keys():
            ann = db.anns[aid]
            img = db.loadImgs(ann['image_id'])[0]
            width, height = img['width'], img['height']

            if (ann['image_id']
                    not in db.imgs) or ann['iscrowd'] or (ann['num_keypoints']
                                                          == 0):
                continue

            bbox = process_bbox(ann['bbox'], width, height)
            if bbox is None: continue
            area = bbox[2] * bbox[3]

            # joints and vis
            joint_img = np.array(ann['keypoints']).reshape(-1, 3)
            # add Thorax
            thorax = (joint_img[self.lshoulder_idx, :] +
                      joint_img[self.rshoulder_idx, :]) * 0.5
            thorax[2] = joint_img[self.lshoulder_idx,
                                  2] * joint_img[self.rshoulder_idx, 2]
            thorax = thorax.reshape((1, 3))
            # add Pelvis
            pelvis = (joint_img[self.lhip_idx, :] +
                      joint_img[self.rhip_idx, :]) * 0.5
            pelvis[2] = joint_img[self.lhip_idx, 2] * joint_img[self.rhip_idx,
                                                                2]
            pelvis = pelvis.reshape((1, 3))

            joint_img = np.concatenate((joint_img, thorax, pelvis), axis=0)

            joint_vis = (joint_img[:, 2].copy().reshape(-1, 1) > 0)
            joint_img[:, 2] = 0

            root_img = joint_img[self.root_idx]
            root_vis = joint_vis[self.root_idx]

            imgname = osp.join(name, img['file_name'])
            img_path = osp.join(self.img_dir, imgname)
            data.append({
                'img_path': img_path,
                'image_id': ann['image_id'],
                'bbox': bbox,
                'area': area,
                'root_img': root_img,  # [org_img_x, org_img_y, 0]
                'root_vis': root_vis,
                'f': np.array([1500, 1500])  # dummy value
            })

        return data

Exemplo n.º 2

0

Exibir arquivo

    def load_data(self):

        if self.data_split == 'train':
            db = COCO(self.train_annot_path)
        else:
            print('Unknown data subset')
            assert 0

        data = []
        for aid in db.anns.keys():
            ann = db.anns[aid]
            img = db.loadImgs(ann['image_id'])[0]
            width, height = img['width'], img['height']

            if ann['num_keypoints'] == 0:
                continue

            bbox = process_bbox(ann['bbox'], width, height)
            if bbox is None: continue

            # joints and vis
            joint_img = np.array(ann['keypoints']).reshape(self.joint_num, 3)
            joint_vis = joint_img[:, 2].copy().reshape(-1, 1)
            joint_img[:, 2] = 0

            imgname = img['file_name']
            img_path = osp.join(self.img_dir, imgname)
            data.append({
                'img_path': img_path,
                'bbox': bbox,
                'joint_img': joint_img,  # [org_img_x, org_img_y, 0]
                'joint_vis': joint_vis,
            })

        return data

Exemplo n.º 3

0

Exibir arquivo

Arquivo: inference.py Projeto: yudhik11/Rootnet_3DMPPE

def get_item(path, bbox):
    
    cvimg = cv2.imread(path, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
    plt.imshow(Image.open(path))
    plt.gca().add_patch(Rectangle((bbox[0],bbox[1]),bbox[2],bbox[3],linewidth=1,edgecolor='r',facecolor='none'))
    plt.show()
    height, width, num_channels = cvimg.shape
    # bbox = [float(i) for i in bbox.split(',')]
    bbox = process_bbox(bbox, width, height)
    area = bbox[2]*bbox[3]
    
    img_patch, trans = generate_patch_image(cvimg, bbox, False, 0)
    tmp_img = img_patch.astype(np.uint8)
    plt.imshow(tmp_img)
    plt.show()
    color_scale = [1.0, 1.0, 1.0]
    for i in range(num_channels):
        img_patch[:, :, i] = np.clip(img_patch[:, :, i] * color_scale[i], 0, 255)
    transform = transforms.Compose([\
                            transforms.ToTensor(),
                            transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))])
    img_patch = transform(img_patch)
    f = np.array([1500, 1500])
    k_value = np.array([math.sqrt(cfg.bbox_real[0]*cfg.bbox_real[1]*f[0]*f[1]/(area))]).astype(np.float32)        
    print(k_value)
    c = np.array([width*0.5, height*0.5])
    return img_patch, k_value, bbox, c

Exemplo n.º 4

0

Exibir arquivo

    def load_data(self):
        db = COCO(osp.join(self.data_path,
                           '3DPW_' + self.data_split + '.json'))

        datalist = []
        for aid in db.anns.keys():
            ann = db.anns[aid]

            image_id = ann['image_id']
            img = db.loadImgs(image_id)[0]
            img_width, img_height = img['width'], img['height']
            sequence_name = img['sequence']
            img_name = img['file_name']
            img_path = osp.join(self.data_path, 'imageFiles', sequence_name,
                                img_name)

            cam_param = {
                k: np.array(v, dtype=np.float32)
                for k, v in img['cam_param'].items()
            }
            joint_cam = np.array(ann['joint_cam'],
                                 dtype=np.float32).reshape(-1, 3)
            joint_img = cam2pixel(joint_cam, cam_param['focal'],
                                  cam_param['princpt'])
            joint_valid = ((joint_img[:, 0] >= 0) *
                           (joint_img[:, 0] < img_width) *
                           (joint_img[:, 1] >= 0) *
                           (joint_img[:, 1] < img_height)).astype(np.float32)

            root_cam = joint_cam[self.root_idx]
            root_img = joint_img[self.root_idx]
            root_vis = joint_valid[self.root_idx]

            bbox = process_bbox(ann['bbox'], img_width, img_height)
            if bbox is None: continue
            area = bbox[2] * bbox[3]

            datalist.append({
                'img_path': img_path,
                'img_id': image_id,
                'ann_id': aid,
                'bbox': bbox,
                'area': area,
                'root_img': root_img,
                'root_cam': root_cam,
                'root_vis': root_vis,
                'f': cam_param['focal'],
                'c': cam_param['princpt']
            })

        return datalist

Exemplo n.º 5

0

Exibir arquivo

Arquivo: MuCo.py Projeto: SangbumChoi/MobileHumanPose

    def load_data(self):

        if self.data_split == 'train':
            db = COCO(self.train_annot_path)
        else:
            print('Unknown data subset')
            assert 0

        data = []
        for iid in db.imgs.keys():
            img = db.imgs[iid]
            img_id = img["id"]
            img_width, img_height = img['width'], img['height']
            imgname = img['file_name']
            img_path = osp.join(self.img_dir, imgname)
            f = img["f"]
            c = img["c"]

            # crop the closest person to the camera
            ann_ids = db.getAnnIds(img_id)
            anns = db.loadAnns(ann_ids)

            root_depths = [ann['keypoints_cam'][self.root_idx][2] for ann in anns]
            closest_pid = root_depths.index(min(root_depths))
            pid_list = [closest_pid]
            for i in range(len(anns)):
                if i == closest_pid:
                    continue
                picked = True
                for j in range(len(anns)):
                    if i == j:
                        continue
                    dist = (np.array(anns[i]['keypoints_cam'][self.root_idx]) - np.array(anns[j]['keypoints_cam'][self.root_idx])) ** 2
                    dist_2d = math.sqrt(np.sum(dist[:2]))
                    dist_3d = math.sqrt(np.sum(dist))
                    if dist_2d < 500 or dist_3d < 500:
                        picked = False
                if picked:
                    pid_list.append(i)
            
            for pid in pid_list:
                joint_cam = np.array(anns[pid]['keypoints_cam'])
                root_cam = joint_cam[self.root_idx]
                
                joint_img = np.array(anns[pid]['keypoints_img'])
                joint_img = np.concatenate([joint_img, joint_cam[:,2:]],1)
                joint_img[:,2] = joint_img[:,2] - root_cam[2]
                joint_vis = np.ones((self.joint_num,1))

                bbox = process_bbox(anns[pid]['bbox'], img_width, img_height)
                if bbox is None: continue

                data.append({
                    'img_path': img_path,
                    'bbox': bbox,
                    'joint_img': joint_img, # [org_img_x, org_img_y, depth - root_depth]
                    'joint_cam': joint_cam, # [X, Y, Z] in camera coordinate
                    'joint_vis': joint_vis,
                    'root_cam': root_cam, # [X, Y, Z] in camera coordinate
                    'f': f,
                    'c': c
                })


        return data

Exemplo n.º 6

0

Exibir arquivo

Arquivo: Human36M.py Projeto: ChenyanWu/3DMPPE_POSENET_RELEASE

    def load_data(self):
        print('Load data of H36M Protocol ' + str(self.protocol))

        subject_list = self.get_subject()
        sampling_ratio = self.get_subsampling_ratio()

        # aggregate annotations from each subject
        db = COCO()
        cameras = {}
        joints = {}
        for subject in subject_list:
            # data load
            with open(
                    osp.join(self.annot_path,
                             'Human36M_subject' + str(subject) + '_data.json'),
                    'r') as f:
                annot = json.load(f)
            if len(db.dataset) == 0:
                for k, v in annot.items():
                    db.dataset[k] = v
            else:
                for k, v in annot.items():
                    db.dataset[k] += v
            # camera load
            with open(
                    osp.join(
                        self.annot_path,
                        'Human36M_subject' + str(subject) + '_camera.json'),
                    'r') as f:
                cameras[str(subject)] = json.load(f)
            # joint coordinate load
            with open(
                    osp.join(
                        self.annot_path,
                        'Human36M_subject' + str(subject) + '_joint_3d.json'),
                    'r') as f:
                joints[str(subject)] = json.load(f)
        db.createIndex()

        if self.data_split == 'test' and not cfg.use_gt_info:
            print("Get bounding box and root from " + self.human_bbox_root_dir)
            bbox_root_result = {}
            with open(self.human_bbox_root_dir) as f:
                annot = json.load(f)
            for i in range(len(annot)):
                bbox_root_result[str(annot[i]['image_id'])] = {
                    'bbox': np.array(annot[i]['bbox']),
                    'root': np.array(annot[i]['root_cam'])
                }
        else:
            print("Get bounding box and root from groundtruth")

        data = []
        for aid in db.anns.keys():
            ann = db.anns[aid]
            image_id = ann['image_id']
            img = db.loadImgs(image_id)[0]
            img_path = osp.join(self.img_dir, img['file_name'])
            img_width, img_height = img['width'], img['height']

            # check subject and frame_idx
            subject = img['subject']
            frame_idx = img['frame_idx']
            if subject not in subject_list:
                continue
            if frame_idx % sampling_ratio != 0:
                continue

            # camera parameter
            cam_idx = img['cam_idx']
            cam_param = cameras[str(subject)][str(cam_idx)]
            R, t, f, c = np.array(cam_param['R'], dtype=np.float32), np.array(
                cam_param['t'], dtype=np.float32), np.array(
                    cam_param['f'],
                    dtype=np.float32), np.array(cam_param['c'],
                                                dtype=np.float32)

            # project world coordinate to cam, image coordinate space
            action_idx = img['action_idx']
            subaction_idx = img['subaction_idx']
            frame_idx = img['frame_idx']
            joint_world = np.array(joints[str(subject)][str(action_idx)][str(
                subaction_idx)][str(frame_idx)],
                                   dtype=np.float32)
            joint_world = self.add_thorax(joint_world)
            joint_cam = world2cam(joint_world, R, t)
            joint_img = cam2pixel(joint_cam, f, c)
            joint_img[:, 2] = joint_img[:, 2] - joint_cam[self.root_idx, 2]
            joint_vis = np.ones((self.joint_num, 1))

            if self.data_split == 'test' and not cfg.use_gt_info:
                bbox = bbox_root_result[str(
                    image_id
                )]['bbox']  # bbox should be aspect ratio preserved-extended. It is done in RootNet.
                root_cam = bbox_root_result[str(image_id)]['root']
            else:
                bbox = process_bbox(np.array(ann['bbox']), img_width,
                                    img_height)
                if bbox is None: continue
                root_cam = joint_cam[self.root_idx]

            data.append({
                'img_path': img_path,
                'img_id': image_id,
                'bbox': bbox,
                'joint_img':
                joint_img,  # [org_img_x, org_img_y, depth - root_depth]
                'joint_cam': joint_cam,  # [X, Y, Z] in camera coordinate
                'joint_vis': joint_vis,
                'root_cam': root_cam,  # [X, Y, Z] in camera coordinate
                'f': f,
                'c': c
            })

        return data

Exemplo n.º 7

0

Exibir arquivo

    12572.5966796875
]  # obtain this from RootNet (https://github.com/mks0601/3DMPPE_ROOTNET_RELEASE/tree/master/demo)
assert len(bbox_list) == len(root_depth_list)
person_num = len(bbox_list)

# normalized camera intrinsics
focal = [1500, 1500]  # x-axis, y-axis
princpt = [original_img_width / 2, original_img_height / 2]  # x-axis, y-axis
print('focal length: (' + str(focal[0]) + ', ' + str(focal[1]) + ')')
print('principal points: (' + str(princpt[0]) + ', ' + str(princpt[1]) + ')')

# for each cropped and resized human image, forward it to PoseNet
output_pose_2d_list = []
output_pose_3d_list = []
for n in range(person_num):
    bbox = process_bbox(np.array(bbox_list[n]), original_img_width,
                        original_img_height)
    img, img2bb_trans = generate_patch_image(original_img, bbox, False, 1.0,
                                             0.0, False)
    print(img.shape, person_num)
    img = transform(img).cuda()[None, :, :, :]

    # forward
    with torch.no_grad():
        pose_3d = model(img)  # x,y: pixel, z: root-relative depth (mm)

    # inverse affine transform (restore the crop and resize)
    pose_3d = pose_3d[0].cpu().numpy()
    pose_3d[:, 0] = pose_3d[:, 0] / cfg.output_shape[1] * cfg.input_shape[1]
    pose_3d[:, 1] = pose_3d[:, 1] / cfg.output_shape[0] * cfg.input_shape[0]
    pose_3d_xy1 = np.concatenate(
        (pose_3d[:, :2], np.ones_like(pose_3d[:, :1])), 1)

Exemplo n.º 8

0

Exibir arquivo

Arquivo: MuPoTS.py Projeto: zhangyahu1/3DMPPE_POSENET_RELEASE

    def load_data(self):
        
        if self.data_split != 'test':
            print('Unknown data subset')
            assert 0
        
        data = []
        db = COCO(self.test_annot_path)

        # use gt bbox and root
        if cfg.use_gt_info:
            print("Get bounding box and root from groundtruth")
            for aid in db.anns.keys():
                ann = db.anns[aid]
                if ann['is_valid'] == 0:
                    continue

                image_id = ann['image_id']
                img = db.loadImgs(image_id)[0]
                img_path = osp.join(self.img_dir, img['file_name'])
                fx, fy, cx, cy = img['intrinsic']
                f = np.array([fx, fy]); c = np.array([cx, cy]);

                joint_cam = np.array(ann['keypoints_cam'])
                root_cam = joint_cam[self.root_idx]

                joint_img = np.array(ann['keypoints_img'])
                joint_img = np.concatenate([joint_img, joint_cam[:,2:]],1)
                joint_img[:,2] = joint_img[:,2] - root_cam[2]
                joint_vis = np.ones((self.original_joint_num,1))

                img_width, img_height = img['width'], img['height']
                bbox = process_bbox(bbox, img_width, img_height)
                if bbox is None: continue
                
                data.append({
                    'img_path': img_path,
                    'bbox': bbox, 
                    'joint_img': joint_img, # [org_img_x, org_img_y, depth - root_depth]
                    'joint_cam': joint_cam, # [X, Y, Z] in camera coordinate
                    'joint_vis': joint_vis,
                    'root_cam': root_cam, # [X, Y, Z] in camera coordinate
                    'f': f,
                    'c': c,
                })
           
        else:
            print("Get bounding box and root from " + self.human_bbox_root_dir)
            with open(self.human_bbox_root_dir) as f:
                annot = json.load(f)
            
            for i in range(len(annot)):
                image_id = annot[i]['image_id']
                img = db.loadImgs(image_id)[0]
                img_width, img_height = img['width'], img['height']
                img_path = osp.join(self.img_dir, img['file_name'])
                fx, fy, cx, cy = img['intrinsic']
                f = np.array([fx, fy]); c = np.array([cx, cy]);
                root_cam = np.array(annot[i]['root_cam']).reshape(3)
                bbox = np.array(annot[i]['bbox']).reshape(4)

                data.append({
                    'img_path': img_path,
                    'bbox': bbox,
                    'joint_img': np.zeros((self.original_joint_num, 3)), # dummy
                    'joint_cam': np.zeros((self.original_joint_num, 3)), # dummy
                    'joint_vis': np.zeros((self.original_joint_num, 1)), # dummy
                    'root_cam': root_cam, # [X, Y, Z] in camera coordinate
                    'f': f,
                    'c': c,
                })

        return data

Exemplo n.º 9

0

Exibir arquivo

Arquivo: Human36M.py Projeto: zoq/3DMPPE_ROOTNET_RELEASE

    def load_data(self):
        print('Load data of H36M Protocol ' + str(self.protocol))
        subject_list = self.get_subject()
        sampling_ratio = self.get_subsampling_ratio()

        # aggregate annotations from each subject
        db = COCO()
        cameras = {}
        joints = {}
        for subject in subject_list:
            # data load
            with open(
                    osp.join(self.annot_path,
                             'Human36M_subject' + str(subject) + '_data.json'),
                    'r') as f:
                annot = json.load(f)
            if len(db.dataset) == 0:
                for k, v in annot.items():
                    db.dataset[k] = v
            else:
                for k, v in annot.items():
                    db.dataset[k] += v
            # camera load
            with open(
                    osp.join(
                        self.annot_path,
                        'Human36M_subject' + str(subject) + '_camera.json'),
                    'r') as f:
                cameras[str(subject)] = json.load(f)
            # joint coordinate load
            with open(
                    osp.join(
                        self.annot_path,
                        'Human36M_subject' + str(subject) + '_joint_3d.json'),
                    'r') as f:
                joints[str(subject)] = json.load(f)
        db.createIndex()

        if self.data_split == 'test' and not cfg.use_gt_bbox:
            print("Get bounding box from " + self.human_bbox_dir)
            bbox_result = {}
            with open(self.human_bbox_dir) as f:
                annot = json.load(f)
            for i in range(len(annot)):
                bbox_result[str(annot[i]['image_id'])] = np.array(
                    annot[i]['bbox'])
        else:
            print("Get bounding box from groundtruth")

        data = []
        for aid in db.anns.keys():
            ann = db.anns[aid]
            image_id = ann['image_id']
            img = db.loadImgs(image_id)[0]
            img_path = osp.join(self.img_dir, img['file_name'])
            img_width, img_height = img['width'], img['height']

            # check subject and frame_idx
            subject = img['subject']
            frame_idx = img['frame_idx']
            if subject not in subject_list:
                continue
            if frame_idx % sampling_ratio != 0:
                continue

            # camera parameter
            cam_idx = img['cam_idx']
            cam_param = cameras[str(subject)][str(cam_idx)]
            R, t, f, c = np.array(cam_param['R'], dtype=np.float32), np.array(
                cam_param['t'], dtype=np.float32), np.array(
                    cam_param['f'],
                    dtype=np.float32), np.array(cam_param['c'],
                                                dtype=np.float32)

            # project world coordinate to cam, image coordinate space
            action_idx = img['action_idx']
            subaction_idx = img['subaction_idx']
            frame_idx = img['frame_idx']
            root_world = np.array(joints[str(subject)][str(action_idx)][str(
                subaction_idx)][str(frame_idx)],
                                  dtype=np.float32)[self.root_idx]
            root_cam = world2cam(root_world[None, :], R, t)[0]
            root_img = cam2pixel(root_cam[None, :], f, c)[0]
            joint_vis = np.ones((self.joint_num, 1))
            root_vis = np.array(ann['keypoints_vis'])[self.root_idx, None]

            # bbox load
            if self.data_split == 'test' and not cfg.use_gt_bbox:
                bbox = bbox_result[str(image_id)]
            else:
                bbox = np.array(ann['bbox'])
            bbox = process_bbox(bbox, img_width, img_height)
            if bbox is None: continue
            area = bbox[2] * bbox[3]

            data.append({
                'img_path': img_path,
                'img_id': image_id,
                'bbox': bbox,
                'area': area,
                'root_img': root_img,  # [org_img_x, org_img_y, depth]
                'root_cam': root_cam,
                'root_vis': root_vis,
                'f': f,
                'c': c
            })

        return data

Exemplo n.º 10

0

Exibir arquivo

    def load_data(self):

        if self.data_split != 'test':
            print('Unknown data subset')
            assert 0

        data = []
        db = COCO(self.annot_path)
        if cfg.use_gt_bbox:
            print("Get bounding box from groundtruth")

            for aid in db.anns.keys():
                ann = db.anns[aid]
                if ann['is_valid'] == 0:
                    continue

                image_id = ann['image_id']
                img = db.loadImgs(image_id)[0]
                img_path = osp.join(self.img_dir, img['file_name'])
                fx, fy, cx, cy = img['intrinsic']
                f = np.array([fx, fy])
                c = np.array([cx, cy])

                joint_cam = np.array(ann['keypoints_cam'])
                joint_img = np.array(ann['keypoints_img'])
                joint_img = np.concatenate([joint_img, joint_cam[:, 2:]], 1)
                joint_vis = np.array(ann['keypoints_vis'])

                root_cam = joint_cam[self.root_idx]
                root_img = joint_img[self.root_idx]
                root_vis = joint_vis[self.root_idx, None]

                bbox = np.array(ann['bbox'])
                img_width, img_height = img['width'], img['height']
                bbox = process_bbox(bbox, img_width, img_height)
                if bbox is None: continue
                area = bbox[2] * bbox[3]

                data.append({
                    'image_id': ann['image_id'],
                    'img_path': img_path,
                    'bbox': bbox,
                    'area': area,
                    'root_img':
                    root_img,  # [org_img_x, org_img_y, depth - root_depth]
                    'root_cam': root_cam,  # [X, Y, Z] in camera coordinate
                    'root_vis': root_vis,
                    'f': f,
                    'c': c,
                    'score': 1.0
                })
        else:
            with open(self.human_bbox_dir) as f:
                annot = json.load(f)
            print("Get bounding box from " + self.human_bbox_dir)

            for i in range(len(annot)):
                image_id = annot[i]['image_id']
                img = db.loadImgs(image_id)[0]
                img_path = osp.join(self.img_dir, img['file_name'])
                fx, fy, cx, cy = img['intrinsic']
                f = np.array([fx, fy])
                c = np.array([cx, cy])

                bbox = np.array(annot[i]['bbox']).reshape(4)
                img_width, img_height = img['width'], img['height']
                bbox = process_bbox(bbox, img_width, img_height)
                if bbox is None: continue
                area = bbox[2] * bbox[3]

                data.append({
                    'image_id': image_id,
                    'img_path': img_path,
                    'bbox': bbox,
                    'area': area,
                    'root_img': np.ones((3)),  # dummy
                    'root_cam': np.ones((3)),  # dummy
                    'root_vis': np.ones((1)),  # dummy
                    'f': f,
                    'c': c,
                    'score': annot[i]['score']
                })
        return data

Exemplo n.º 11

0

Exibir arquivo

    def load_data(self):

        if self.data_split == 'train':
            db = COCO(self.train_annot_path)
            data = []
            for aid in db.anns.keys():
                ann = db.anns[aid]
                img = db.loadImgs(ann['image_id'])[0]
                width, height = img['width'], img['height']

                if (ann['image_id'] not in db.imgs) or ann['iscrowd'] or (ann['num_keypoints'] == 0):
                    continue
                
                bbox = process_bbox(ann['bbox'], width, height) 
                if bbox is None: continue

                # joints and vis
                joint_img = np.array(ann['keypoints']).reshape(-1,3)
                # add Thorax
                thorax = (joint_img[self.lshoulder_idx, :] + joint_img[self.rshoulder_idx, :]) * 0.5
                thorax[2] = joint_img[self.lshoulder_idx,2] * joint_img[self.rshoulder_idx,2]
                thorax = thorax.reshape((1, 3))
                # add Pelvis
                pelvis = (joint_img[self.lhip_idx, :] + joint_img[self.rhip_idx, :]) * 0.5
                pelvis[2] = joint_img[self.lhip_idx,2] * joint_img[self.rhip_idx,2]
                pelvis = pelvis.reshape((1, 3))

                joint_img = np.concatenate((joint_img, thorax, pelvis), axis=0)

                joint_vis = (joint_img[:,2].copy().reshape(-1,1) > 0)
                joint_img[:,2] = 0

                imgname = osp.join('train2017', db.imgs[ann['image_id']]['file_name'])
                img_path = osp.join(self.img_dir, imgname)
                data.append({
                    'img_path': img_path,
                    'bbox': bbox,
                    'joint_img': joint_img, # [org_img_x, org_img_y, 0]
                    'joint_vis': joint_vis,
                    'f': np.array([1500, 1500]), 
                    'c': np.array([width/2, height/2]) 
                })

        elif self.data_split == 'test':
            db = COCO(self.test_annot_path)
            with open(self.human_3d_bbox_root_dir) as f:
                annot = json.load(f)
            data = [] 
            for i in range(len(annot)):
                image_id = annot[i]['image_id']
                img = db.loadImgs(image_id)[0]
                img_path = osp.join(self.img_dir, 'val2017', img['file_name'])
                fx, fy, cx, cy = 1500, 1500, img['width']/2, img['height']/2
                f = np.array([fx, fy]); c = np.array([cx, cy]);
                root_cam = np.array(annot[i]['root_cam']).reshape(3)
                bbox = np.array(annot[i]['bbox']).reshape(4)

                data.append({
                    'img_path': img_path,
                    'bbox': bbox,
                    'joint_img': np.zeros((self.original_joint_num, 3)), # dummy
                    'joint_cam': np.zeros((self.original_joint_num, 3)), # dummy
                    'joint_vis': np.zeros((self.original_joint_num, 1)), # dummy
                    'root_cam': root_cam, # [X, Y, Z] in camera coordinate
                    'f': f,
                    'c': c,
                })

        else:
            print('Unknown data subset')
            assert 0


        return data