示例#1
0
def optimize_cam_param(project_net, joint_input, crop_size):
    bbox = get_bbox(joint_input)
    bbox1 = process_bbox(bbox.copy(), aspect_ratio=1.0, scale=1.25)
    bbox2 = process_bbox(bbox.copy())
    proj_target_joint_img, trans = j2d_processing(joint_input.copy(), (crop_size, crop_size), bbox1, 0, 0, None)
    joint_img, _ = j2d_processing(joint_input.copy(), (cfg.MODEL.input_shape[1], cfg.MODEL.input_shape[0]), bbox2, 0, 0, None)

    joint_img = joint_img[:, :2]
    joint_img /= np.array([[cfg.MODEL.input_shape[1], cfg.MODEL.input_shape[0]]])
    mean, std = np.mean(joint_img, axis=0), np.std(joint_img, axis=0)
    joint_img = (joint_img.copy() - mean) / std
    joint_img = torch.Tensor(joint_img[None, :, :]).cuda()
    target_joint = torch.Tensor(proj_target_joint_img[None, :, :2]).cuda()

    # get optimization settings for projection
    criterion = nn.L1Loss()
    optimizer = optim.Adam(project_net.parameters(), lr=0.1)

    # estimate mesh, pose
    model.eval()
    pred_mesh, _ = model(joint_img)
    pred_mesh = pred_mesh[:, graph_perm_reverse[:mesh_model.face.max() + 1], :]
    pred_3d_joint = torch.matmul(joint_regressor, pred_mesh)

    out = {}
    # assume batch=1
    project_net.train()
    for j in range(0, 1500):
        # projection
        pred_2d_joint = project_net(pred_3d_joint.detach())

        loss = criterion(pred_2d_joint, target_joint[:, :17, :])
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if j == 500:
            for param_group in optimizer.param_groups:
                param_group['lr'] = 0.05
        if j == 1000:
            for param_group in optimizer.param_groups:
                param_group['lr'] = 0.001

    out['mesh'] = pred_mesh[0].detach().cpu().numpy()
    out['cam_param'] = project_net.cam_param[0].detach().cpu().numpy()
    out['bbox'] = bbox1

    out['target'] = proj_target_joint_img

    return out
示例#2
0
    def get_fitting_error(self, bbox, coco_from_dataset, coco_from_smpl,
                          coco_joint_valid):
        bbox = process_bbox(bbox.copy(), aspect_ratio=1.0)
        coco_from_smpl_xy1 = np.concatenate(
            (coco_from_smpl[:, :2], np.ones_like(coco_from_smpl[:, 0:1])), 1)
        coco_from_smpl, _ = j2d_processing(coco_from_smpl_xy1, (64, 64), bbox,
                                           0, 0, None)
        coco_from_dataset_xy1 = np.concatenate(
            (coco_from_dataset[:, :2], np.ones_like(coco_from_smpl[:, 0:1])),
            1)
        coco_from_dataset, trans = j2d_processing(coco_from_dataset_xy1,
                                                  (64, 64), bbox, 0, 0, None)

        # vis
        # img = cv2.imread(img_path, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        # img = cv2.warpAffine(img, trans, (64, 64), flags=cv2.INTER_LINEAR)
        # vis_2d_pose(coco_from_smpl, img, self.coco_origin_skeleton, prefix='from smpl')
        # vis_2d_pose(coco_from_dataset, img, self.coco_origin_skeleton, prefix='from dataset')

        # mask joint coordinates
        coco_joint = coco_from_dataset[:, :2][np.tile(coco_joint_valid, (
            1, 2)) == 1].reshape(-1, 2)
        coco_from_smpl = coco_from_smpl[:, :2][np.tile(coco_joint_valid, (
            1, 2)) == 1].reshape(-1, 2)

        error = np.sqrt(np.sum((coco_joint - coco_from_smpl)**2, 1)).mean()
        return error
示例#3
0
    def __getitem__(self, idx):
        data = copy.deepcopy(self.datalist[idx])
        annot_id, img_id, img_path, img_shape = data['annot_id'], data['image_id'], data['img_path'], data['img_shape']
        cam_param, bbox, smpl_param = data['cam_param'].copy(), data['bbox'].copy(), data['smpl_param'].copy()
        rot, flip = 0, 0

        # Detection
        # detection_data = copy.deepcopy(self.datadict_pose2d_det[idx])
        # det_annot_id, det_joint_img_coco = detection_data['annotation_id'], np.array(detection_data['keypoints'], dtype=np.float32)
        detection_data = self.datadict_pose2d_det[annot_id]
        det_joint_img_coco = detection_data['img_joint']
        joint_img_coco = self.add_pelvis_and_neck(det_joint_img_coco)

        # smpl coordinates
        mesh_cam, joint_cam_smpl = self.get_smpl_coord(smpl_param)

        # regress h36m, coco cam joints
        joint_cam_coco, gt_joint_img_coco = self.get_coco_from_mesh(mesh_cam, cam_param)
        joint_cam_h36m = self.get_h36mJ_from_mesh(mesh_cam)

        # root relative camera coordinate
        mesh_cam = mesh_cam - joint_cam_h36m[:1]
        joint_cam_coco = joint_cam_coco - joint_cam_coco[-2:-1]
        joint_cam_h36m = joint_cam_h36m - joint_cam_h36m[:1]

        # make new bbox
        bbox = get_bbox(gt_joint_img_coco)
        bbox = process_bbox(bbox.copy())

        if cfg.DATASET.use_gt_input:
            joint_img_coco = gt_joint_img_coco

        # aug
        joint_img_coco, trans = j2d_processing(joint_img_coco.copy(), (cfg.MODEL.input_shape[1], cfg.MODEL.input_shape[0]),
                                               bbox, rot, flip, None)

        #  -> 0~1
        joint_img_coco = joint_img_coco[:, :2]
        joint_img_coco /= np.array([[cfg.MODEL.input_shape[1], cfg.MODEL.input_shape[0]]])

        # normalize loc&scale
        mean, std = np.mean(joint_img_coco, axis=0), np.std(joint_img_coco, axis=0)
        joint_img_coco = (joint_img_coco.copy() - mean) / std

        if cfg.MODEL.name == 'pose2mesh_net':
            inputs = {'pose2d': joint_img_coco}
            targets = {'mesh': mesh_cam / 1000, 'reg_pose3d': joint_cam_h36m}
            meta = {'dummy': np.ones(1, dtype=np.float32)}

            return inputs, targets, meta

        elif cfg.MODEL.name == 'posenet':
            joint_valid = np.ones((len(joint_cam_coco), 1), dtype=np.float32)  # dummy
            return joint_img_coco, joint_cam_coco, joint_valid
示例#4
0
    def __getitem__(self, idx):
        data = copy.deepcopy(self.datalist[idx])
        img_id, img_path, img_shape, joint_cam, cam_param, mano_param = \
            data['img_id'], data['img_path'], data['img_shape'],  data['joint_cam'], data['cam_param'], data['mano_param']
        rot, flip = 0, 0

        # mano coordinates
        mano_mesh_cam, mano_joint_cam = self.get_mano_coord(mano_param, cam_param)
        mano_coord_cam = np.concatenate((mano_mesh_cam, mano_joint_cam))
        # cam -> image projection
        # focal, princpt = cam_param['focal'], cam_param['princpt']
        # joint_coord_img = cam2pixel(mano_joint_cam, focal, princpt)[:, :2]

        # root align cam mesh/joint
        mano_coord_cam = mano_coord_cam - mano_joint_cam[:1]
        mesh_coord_cam = mano_coord_cam[:self.vertex_num];
        joint_coord_cam = mano_coord_cam[self.vertex_num:];

        # use det
        det_data = self.datalist_pose2d_det[idx]
        assert img_id == det_data['img_id']
        joint_coord_img = det_data['img_joint']

        # make bbox
        bbox = get_bbox(joint_coord_img)
        bbox = process_bbox(bbox.copy())

        # aug
        joint_coord_img, trans = j2d_processing(joint_coord_img.copy(),
                                                (cfg.MODEL.input_shape[1], cfg.MODEL.input_shape[0]),
                                                bbox, rot, flip, None)
        #  -> 0~1
        joint_coord_img = joint_coord_img[:, :2]
        joint_coord_img /= np.array([[cfg.MODEL.input_shape[1], cfg.MODEL.input_shape[0]]])

        # normalize loc&scale
        mean, std = np.mean(joint_coord_img, axis=0), np.std(joint_coord_img, axis=0)
        joint_coord_img = (joint_coord_img.copy() - mean) / std

        if cfg.MODEL.name == 'pose2mesh_net':
            # default valid
            mesh_valid = np.ones((len(mesh_coord_cam), 1), dtype=np.float32)
            reg_joint_valid = np.ones((len(joint_coord_cam), 1), dtype=np.float32)
            lift_joint_valid = np.ones((len(joint_coord_cam), 1), dtype=np.float32)

            inputs = {'pose2d': joint_coord_img}
            targets = {'mesh': mesh_coord_cam / 1000, 'lift_pose3d': joint_coord_cam, 'reg_pose3d': joint_coord_cam}
            meta = {'mesh_valid': mesh_valid, 'lift_pose3d_valid': lift_joint_valid, 'reg_pose3d_valid': reg_joint_valid}
            return inputs, targets, meta

        elif cfg.MODEL.name == 'posenet':
            # default valid
            joint_valid = np.ones((len(joint_coord_cam), 1), dtype=np.float32)
            return joint_coord_img, joint_coord_cam, joint_valid
示例#5
0
    def __getitem__(self, idx):
        data = copy.deepcopy(self.datalist[idx])
        flip, rot = 0, 0

        aid, img_path, img_shape = data['aid'], data['img_path'], data[
            'img_shape']
        gt_joint_img_coco = data['joint_img']

        det_data = self.datalist_cocoj_det[idx]
        det_aid = det_data['aid']
        assert det_aid == aid, f"detection aid: {det_aid}, dataset aid: {aid} / det_aid type: {type(det_aid)}, aid type: {type(aid)}"
        det_img_coco = np.array(det_data['keypoints']).reshape(-1, 3)
        joint_img = self.add_pelvis_and_neck(det_img_coco)
        # vis_2d_joints(gt_joint_img, img_path, self.coco_skeleton, prefix=f"{img_path.split('/')[-1]}")

        bbox = get_bbox(joint_img)
        bbox1 = process_bbox(bbox.copy(), aspect_ratio=1.0, scale=1.25)
        bbox2 = process_bbox(bbox.copy())
        proj_target_joint_img, trans = j2d_processing(
            joint_img.copy(), (self.img_res, self.img_res), bbox1, rot, flip,
            None)

        joint_img, _ = j2d_processing(
            joint_img.copy(),
            (cfg.MODEL.input_shape[1], cfg.MODEL.input_shape[0]), bbox2, rot,
            flip, None)

        #  -> 0~1
        joint_img = joint_img[:, :2]
        joint_img /= np.array(
            [[cfg.MODEL.input_shape[1], cfg.MODEL.input_shape[0]]])

        # normalize loc&scale
        mean, std = np.mean(joint_img, axis=0), np.std(joint_img, axis=0)
        joint_img = (joint_img.copy() - mean) / std

        return joint_img[:, :2], proj_target_joint_img[:, :2], img_path, trans
示例#6
0
    def load_data(self):
        print('Load annotations of COCO')
        db = COCO(
            osp.join(self.annot_path,
                     'person_keypoints_' + self.data_split + '2017.json'))
        with open(osp.join(self.annot_path, 'coco_smplify_train.json')) as f:
            smplify_results = json.load(f)

        datalist = []
        if self.data_split == 'train':
            for aid in db.anns.keys():
                ann = db.anns[aid]
                img = db.loadImgs(ann['image_id'])[0]
                imgname = osp.join('train2017', img['file_name'])
                img_path = osp.join(self.img_path, imgname)
                width, height = img['width'], img['height']

                if ann['iscrowd'] or (ann['num_keypoints'] == 0):
                    continue

                # bbox
                bbox = process_bbox(ann['bbox'])
                if bbox is None: continue

                # joint coordinates
                joint_img = np.array(ann['keypoints'],
                                     dtype=np.float32).reshape(-1, 3)
                joint_valid = (joint_img[:, 2].copy().reshape(-1, 1) >
                               0).astype(np.float32)
                joint_img[:, 2] = 0

                if str(aid) in smplify_results:
                    dp_data = None
                    smplify_result = smplify_results[str(aid)]
                else:
                    continue

                datalist.append({
                    'img_path': img_path,
                    'img_shape': (height, width),
                    'bbox': bbox,
                    'joint_img': joint_img,  # [org_img_x, org_img_y, 0]
                    'joint_valid': joint_valid,
                    'dp_data': dp_data,
                    'smplify_result': smplify_result
                })

            return datalist
示例#7
0
    def load_data(self):
        db = COCO(
            osp.join(self.annot_path,
                     'person_keypoints_' + self.data_split + '2017.json'))
        datalist = []

        for aid in db.anns.keys():
            ann = db.anns[aid]
            img = db.loadImgs(ann['image_id'])[0]
            imgname = osp.join(f'{self.data_split}2017', img['file_name'])
            img_path = osp.join(self.img_path, imgname)
            width, height = img['width'], img['height']

            if ann['iscrowd'] or (ann['num_keypoints'] == 0):
                continue

            # bbox
            bbox = process_bbox(ann['bbox'])
            if bbox is None: continue

            # joint coordinates
            joint_img = np.array(ann['keypoints'],
                                 dtype=np.float32).reshape(-1, 3)
            joint_img = self.add_pelvis_and_neck(joint_img)
            joint_valid = (joint_img[:, 2].copy().reshape(-1, 1) > 0).astype(
                np.float32)
            joint_img[:, 2] = 0

            datalist.append({
                'aid':
                aid,
                'img_path':
                img_path,
                'img_shape': (height, width),
                'bbox':
                bbox,
                'joint_img':
                joint_img,  # [org_img_x, org_img_y, 0]
                'joint_valid':
                joint_valid,
                'root_joint_depth':
                np.array([10], dtype=np.float32)[0]  # dummy
            })

        datalist = sorted(datalist, key=lambda x: x['aid'])

        return datalist
示例#8
0
    def load_data(self):
        print('Load annotations of 3DPW ')
        db = COCO(osp.join(self.data_path, 'new_3DPW_' + self.data_split + '.json'))

        datalist = []
        for aid in db.anns.keys():
            ann = db.anns[aid]
            image_id = ann['image_id']

            img = db.loadImgs(image_id)[0]
            img_width, img_height = img['width'], img['height']
            sequence_name = img['sequence']
            img_name = img['file_name']

            # if 'fencing' not in sequence_name:
            #     continue

            img_path = osp.join(self.img_path, sequence_name, img_name)
            cam_param = {k: np.array(v, dtype=np.float32) for k,v in img['cam_param'].items()}

            smpl_param = ann['smpl_param']
            pid = ann['person_id']
            vid_name = sequence_name + str(pid)
            bbox = process_bbox(np.array(ann['bbox']))
            if bbox is None: continue

            datalist.append({
                'annot_id': aid,
                'person_id': pid,
                'image_id': image_id,
                'img_path': img_path,
                'vid_name': vid_name,
                'img_shape': (img_height, img_width),
                'cam_param': cam_param,
                'bbox': bbox,
                'smpl_param': smpl_param})

        datalist = sorted(datalist, key=lambda x: (x['person_id'],x['img_path']))
        valid_names = np.array([data['vid_name'] for data in datalist])
        unique_names = np.unique(valid_names)
        video_indices = []
        for u_n in unique_names:
            indexes = valid_names == u_n
            video_indices.append(indexes)

        return datalist, video_indices
示例#9
0
    def load_data(self):
        print('Load annotations of FreiHAND ')
        if self.data_split == 'train':
            db = COCO(osp.join(self.data_path, 'freihand_train_coco.json'))
            with open(osp.join(self.data_path, 'freihand_train_data.json')) as f:
                data = json.load(f)

        else:
            db = COCO(osp.join(self.data_path, 'freihand_eval_coco.json'))
            with open(osp.join(self.data_path, 'freihand_eval_data.json')) as f:
                data = json.load(f)

        datalist = []
        for aid in db.anns.keys():
            ann = db.anns[aid]
            image_id = ann['image_id']
            img = db.loadImgs(image_id)[0]
            img_path = osp.join(self.data_path, img['file_name'])
            img_shape = (img['height'], img['width'])
            db_idx = str(img['db_idx'])

            if self.data_split == 'train':
                cam_param, mano_param, joint_cam = data[db_idx]['cam_param'], data[db_idx]['mano_param'], data[db_idx][
                    'joint_3d']
                joint_cam = np.array(joint_cam).reshape(-1, 3)
                bbox = process_bbox(np.array(ann['bbox']))
                if bbox is None: continue

            else:
                cam_param, scale = data[db_idx]['cam_param'], data[db_idx]['scale']
                cam_param['R'] = np.eye(3).astype(np.float32).tolist();
                cam_param['t'] = np.zeros((3), dtype=np.float32)  # dummy
                joint_cam = np.ones((self.joint_num, 3), dtype=np.float32)  # dummy
                mano_param = {'pose': np.ones((48), dtype=np.float32), 'shape': np.ones((10), dtype=np.float32)}

            datalist.append({
                'img_id': image_id,
                'img_path': img_path,
                'img_shape': img_shape,
                'joint_cam': joint_cam,
                'cam_param': cam_param,
                'mano_param': mano_param})

        return sorted(datalist, key=lambda d: d['img_id'])
示例#10
0
    def load_data(self):
        print('Load annotations of SURREAL')

        db = COCO(osp.join(self.data_path, self.data_split + '.json'))

        datalist = []
        for iid in db.imgs.keys():
            img = db.imgs[iid]
            img_id = img['id']
            img_width, img_height = img['width'], img['height']
            img_name = img['file_name']
            cam_param = {k: np.array(v) for k, v in img['cam_param'].items()}

            ann_id = db.getAnnIds(img_id)
            ann = db.loadAnns(ann_id)[0]
            smpl_param = ann['smpl_param']
            joint_cam = np.array(ann['joint_cam'],
                                 dtype=np.float32).reshape(-1, 3)

            bbox = process_bbox(ann['bbox'])
            if bbox is None: continue

            datalist.append({
                'img_id': img_id,
                'img_name': img_name,
                'img_shape': (img_height, img_width),
                'cam_param': cam_param,
                'bbox': bbox,
                'smpl_param': smpl_param
            })

            if self.debug and len(datalist) > 1000:
                break

        datalist = sorted(datalist, key=lambda d: d['img_id'])
        if self.data_split == 'train':
            # fix gpu bug
            datalist.append(datalist[0])
        return datalist
示例#11
0
    def load_data(self):
        print('Load annotations of 3DPW ')
        db = COCO(osp.join(self.data_path,
                           '3DPW_' + self.data_split + '.json'))

        datalist = []
        for aid in db.anns.keys():
            ann = db.anns[aid]
            image_id = ann['image_id']

            img = db.loadImgs(image_id)[0]
            img_width, img_height = img['width'], img['height']
            sequence_name = img['sequence']
            img_name = img['file_name']
            img_path = osp.join(self.img_path, sequence_name, img_name)
            cam_param = {
                k: np.array(v, dtype=np.float32)
                for k, v in img['cam_param'].items()
            }

            smpl_param = ann['smpl_param']
            bbox = process_bbox(np.array(ann['bbox']))
            if bbox is None: continue

            datalist.append({
                'annot_id': aid,
                'image_id': image_id,
                'img_path': img_path,
                'img_shape': (img_height, img_width),
                'cam_param': cam_param,
                'bbox': bbox,
                'smpl_param': smpl_param
            })

        datalist = sorted(datalist, key=lambda x: x['annot_id'])

        return datalist
示例#12
0
    def __getitem__(self, idx):
        data = copy.deepcopy(self.datalist[idx])
        img_shape, bbox, dp_data, smplify_result = data['img_shape'], data[
            'bbox'], data['dp_data'], data['smplify_result']
        flip, rot = augm_params(is_train=(self.data_split == 'train'))
        # img_name = img_path.split('/')[-1][:-4]

        smpl_param, cam_param = smplify_result['smpl_param'], smplify_result[
            'cam_param']
        # regress h36m, coco joints
        mesh_cam, joint_cam_smpl = self.get_smpl_coord(smpl_param)
        joint_cam_h36m, joint_img_h36m = self.get_joints_from_mesh(
            mesh_cam, 'human36', cam_param)
        joint_cam_coco, joint_img_coco = self.get_joints_from_mesh(
            mesh_cam, 'coco', cam_param)
        # vis_2d_pose(joint_img_h36m, img_path, self.human36_skeleton, prefix='h36m joint')
        # vis_2d_pose(joint_img_coco, img_path, self.coco_skeleton, prefix='coco joint')
        # vis_3d_pose(joint_cam_h36m, self.human36_skeleton, 'human36', gt=True)

        # root relative camera coordinate
        mesh_cam = mesh_cam - joint_cam_h36m[:1]
        joint_cam_coco = joint_cam_coco - joint_cam_coco[-2:-1]
        joint_cam_h36m = joint_cam_h36m - joint_cam_h36m[:1]

        if self.input_joint_name == 'coco':
            joint_img, joint_cam = joint_img_coco, joint_cam_coco
        elif self.input_joint_name == 'human36':
            joint_img, joint_cam = joint_img_h36m, joint_cam_h36m

        # make new bbox
        tight_bbox = get_bbox(joint_img)
        bbox = process_bbox(tight_bbox.copy())

        # aug
        joint_img, trans = j2d_processing(
            joint_img.copy(),
            (cfg.MODEL.input_shape[1], cfg.MODEL.input_shape[0]), bbox, rot, 0,
            None)
        if not cfg.DATASET.use_gt_input:
            joint_img = self.replace_joint_img(joint_img, tight_bbox, trans)
        if flip:
            joint_img = flip_2d_joint(joint_img, cfg.MODEL.input_shape[1],
                                      self.flip_pairs)
        joint_cam = j3d_processing(joint_cam, rot, flip, self.flip_pairs)

        #  -> 0~1
        joint_img = joint_img[:, :2]
        joint_img /= np.array(
            [[cfg.MODEL.input_shape[1], cfg.MODEL.input_shape[0]]])

        # normalize loc&scale
        mean, std = np.mean(joint_img, axis=0), np.std(joint_img, axis=0)
        joint_img = (joint_img.copy() - mean) / std

        if cfg.MODEL.name == 'pose2mesh_net':
            # default valid
            mesh_valid = np.ones((len(mesh_cam), 1), dtype=np.float32)
            reg_joint_valid = np.ones((len(joint_cam_h36m), 1),
                                      dtype=np.float32)
            lift_joint_valid = np.ones((len(joint_cam), 1), dtype=np.float32)
            error = self.get_fitting_error(tight_bbox, data['joint_img'],
                                           joint_img_coco[:17],
                                           data['joint_valid'])
            if error > self.fitting_thr:
                mesh_valid[:], reg_joint_valid[:], lift_joint_valid[:] = 0, 0, 0

            inputs = {'pose2d': joint_img}
            targets = {
                'mesh': mesh_cam / 1000,
                'lift_pose3d': joint_cam,
                'reg_pose3d': joint_cam_h36m
            }
            meta = {
                'mesh_valid': mesh_valid,
                'lift_pose3d_valid': lift_joint_valid,
                'reg_pose3d_valid': reg_joint_valid
            }

            return inputs, targets, meta

        elif cfg.MODEL.name == 'posenet':
            # default valid
            joint_valid = np.ones((len(joint_cam), 1), dtype=np.float32)
            # compute fitting error
            error = self.get_fitting_error(tight_bbox, data['joint_img'],
                                           joint_img_coco[:17],
                                           data['joint_valid'])
            if error > self.fitting_thr:
                joint_valid[:, :] = 0

            return joint_img, joint_cam, joint_valid
示例#13
0
    def __getitem__(self, idx):
        data = copy.deepcopy(self.datalist[idx])
        flip, rot = augm_params(is_train=(self.data_split == 'train'))

        # get smpl mesh, joints
        smpl_param, cam_param, img_shape = data['smpl_param'], data[
            'cam_param'], data['img_shape']
        mesh_cam, joint_cam_smpl = self.get_smpl_coord(smpl_param, cam_param)

        # regress coco joints
        joint_cam_h36m, joint_img_h36m = self.get_joints_from_mesh(
            mesh_cam, 'human36', cam_param)
        joint_cam_coco, joint_img_coco = self.get_joints_from_mesh(
            mesh_cam, 'coco', cam_param)
        # debug vis
        # vis_3d_pose(joint_cam_coco, self.coco_skeleton, joint_set_name='coco', prefix=f'coco_joint_cam_{idx}')
        # img = np.zeros((int(img_shape[0]), int(img_shape[1]), 3))
        # vis_2d_pose(joint_img_coco, img, self.coco_skeleton, prefix='coco joint img')

        # root relative camera coordinate
        mesh_cam = mesh_cam - joint_cam_h36m[:1]
        joint_cam_coco = joint_cam_coco - joint_cam_coco[-2:-1]
        joint_cam_h36m = joint_cam_h36m - joint_cam_h36m[:1]

        # joint_cam is PoseNet target
        if self.input_joint_name == 'coco':
            joint_img, joint_cam = joint_img_coco, joint_cam_coco
        elif self.input_joint_name == 'human36':
            joint_img, joint_cam = joint_img_h36m, joint_cam_h36m

        # make new bbox
        tight_bbox = get_bbox(joint_img)
        bbox = process_bbox(tight_bbox.copy())

        # aug
        joint_img, trans = j2d_processing(
            joint_img.copy(),
            (cfg.MODEL.input_shape[1], cfg.MODEL.input_shape[0]), bbox, rot, 0,
            None)
        if not cfg.DATASET.use_gt_input:
            joint_img = self.replace_joint_img(joint_img, tight_bbox, trans)
        if flip:
            joint_img = flip_2d_joint(joint_img, cfg.MODEL.input_shape[1],
                                      self.flip_pairs)
        joint_cam = j3d_processing(joint_cam, rot, flip, self.flip_pairs)

        #  -> 0~1
        joint_img = joint_img[:, :2]
        joint_img /= np.array(
            [[cfg.MODEL.input_shape[1], cfg.MODEL.input_shape[0]]])

        # normalize loc&scale
        mean, std = np.mean(joint_img, axis=0), np.std(joint_img, axis=0)
        joint_img = (joint_img.copy() - mean) / std

        if cfg.MODEL.name == 'pose2mesh_net':
            # default valid
            mesh_valid = np.ones((len(mesh_cam), 1), dtype=np.float32)
            reg_joint_valid = np.ones((len(joint_cam_h36m), 1),
                                      dtype=np.float32)
            lift_joint_valid = np.ones((len(joint_cam), 1), dtype=np.float32)

            inputs = {'pose2d': joint_img}
            targets = {
                'mesh': mesh_cam / 1000,
                'lift_pose3d': joint_cam,
                'reg_pose3d': joint_cam_h36m
            }
            meta = {
                'mesh_valid': mesh_valid,
                'lift_pose3d_valid': lift_joint_valid,
                'reg_pose3d_valid': reg_joint_valid
            }

            return inputs, targets, meta

        elif cfg.MODEL.name == 'posenet':
            # default valid
            joint_valid = np.ones((len(joint_cam), 1), dtype=np.float32)
            return joint_img, joint_cam, joint_valid
示例#14
0
    def __getitem__(self, idx):
        data = copy.deepcopy(self.datalist[idx])
        annot_id, img_id, img_path, img_shape = data['annot_id'], data[
            'image_id'], data['img_path'], data['img_shape']
        cam_param, bbox, smpl_param = data['cam_param'].copy(
        ), data['bbox'].copy(), data['smpl_param'].copy()
        rot, flip = 0, 0

        # get coco img joints from detection
        joint_img_coco = data['pred_pose2d']
        # vis
        # img = cv2.imread(img_path)
        # vis_2d_pose(joint_img_coco, img, self.coco_skeleton, prefix='vis2dpose', bbox=None)
        # import pdb; pdb.set_trace()

        # smpl coordinates
        mesh_cam, joint_cam_smpl = self.get_smpl_coord(smpl_param)

        # regress h36m, coco cam joints
        joint_cam_coco, gt_joint_img_coco = self.get_coco_from_mesh(
            mesh_cam, cam_param)
        joint_cam_h36m = self.get_h36mJ_from_mesh(mesh_cam)

        # root relative camera coordinate
        mesh_cam = mesh_cam - joint_cam_h36m[:1]
        joint_cam_coco = joint_cam_coco - joint_cam_coco[-2:-1]
        joint_cam_h36m = joint_cam_h36m - joint_cam_h36m[:1]

        if cfg.DATASET.use_gt_input:
            joint_img_coco = gt_joint_img_coco

        # make new bbox
        bbox = get_bbox(joint_img_coco)
        bbox = process_bbox(bbox.copy())

        # aug
        joint_img_coco, trans = j2d_processing(
            joint_img_coco.copy(),
            (cfg.MODEL.input_shape[1], cfg.MODEL.input_shape[0]), bbox, rot,
            flip, None)

        #  -> 0~1
        joint_img_coco = joint_img_coco[:, :2]
        joint_img_coco /= np.array(
            [[cfg.MODEL.input_shape[1], cfg.MODEL.input_shape[0]]])

        # normalize loc&scale
        mean, std = np.mean(joint_img_coco, axis=0), np.std(joint_img_coco,
                                                            axis=0)
        joint_img_coco = (joint_img_coco.copy() - mean) / std

        if cfg.MODEL.name == 'pose2mesh_net':
            inputs = {'pose2d': joint_img_coco}
            targets = {'mesh': mesh_cam / 1000, 'reg_pose3d': joint_cam_h36m}
            meta = {'dummy': np.ones(1, dtype=np.float32)}

            return inputs, targets, meta

        elif cfg.MODEL.name == 'posenet':
            joint_valid = np.ones((len(joint_cam_coco), 1),
                                  dtype=np.float32)  # dummy
            return joint_img_coco, joint_cam_coco, joint_valid
示例#15
0
    def load_data(self):
        print('Load annotations of 3DPW ')
        db = COCO(
            osp.join(self.data_path,
                     '3DPW_latest_' + self.data_split + '.json'))

        # get detected 2d pose
        with open(
                osp.join(self.data_path,
                         f'darkpose_3dpw_{self.data_split}set_output.json')
        ) as f:  #
            pose2d_outputs = {}
            data = json.load(f)
            for item in data:
                annot_id = str(item['annotation_id'])
                pose2d_outputs[annot_id] = {
                    'coco_joints':
                    np.array(item['keypoints'], dtype=np.float32)[:, :3]
                }

        datalist = []
        custompose_count = 0
        for aid in db.anns.keys():
            aid = int(aid)
            ann = db.anns[aid]
            image_id = ann['image_id']

            img = db.loadImgs(image_id)[0]
            img_width, img_height = img['width'], img['height']
            sequence_name = img['sequence']
            img_name = img['file_name']

            img_path = osp.join(self.img_path, sequence_name, img_name)
            cam_param = {
                k: np.array(v, dtype=np.float32)
                for k, v in img['cam_param'].items()
            }

            smpl_param = ann['smpl_param']
            pid = ann['person_id']
            vid_name = sequence_name + str(pid)
            bbox = process_bbox(np.array(ann['bbox']))
            if bbox is None: continue

            openpose = np.array(ann['openpose_result'],
                                dtype=np.float32).reshape(-1, 3)
            openpose = self.add_pelvis_and_neck(openpose,
                                                self.openpose_joints_name,
                                                only_pelvis=True)

            custompose = np.array(pose2d_outputs[str(aid)]['coco_joints'])
            custompose = self.add_pelvis_and_neck(custompose,
                                                  self.coco_joints_name)
            custompose_count += 1

            datalist.append({
                'annot_id': aid,
                'person_id': pid,
                'image_id': image_id,
                'img_path': img_path,
                'vid_name': vid_name,
                'img_shape': (img_height, img_width),
                'cam_param': cam_param,
                'bbox': bbox,
                'smpl_param': smpl_param,
                'pred_pose2d': custompose
            })

        datalist = sorted(datalist,
                          key=lambda x: (x['person_id'], x['img_path']))
        valid_names = np.array([data['vid_name'] for data in datalist])
        unique_names = np.unique(valid_names)
        video_indices = []
        for u_n in unique_names:
            indexes = valid_names == u_n
            video_indices.append(indexes)

        print("num custom pose: ", custompose_count)
        return datalist, video_indices
示例#16
0
    def __getitem__(self, idx):
        data = copy.deepcopy(self.datalist[idx])
        img_id, bbox, smpl_param, cam_param, img_shape = data['img_id'], data['bbox'].copy(), data['smpl_param'].copy(), data['cam_param'].copy(), data['img_hw']
        flip, rot = augm_params(is_train=(self.data_split == 'train'))

        # smpl coordinates
        mesh_cam, joint_cam_smpl = self.get_smpl_coord(smpl_param, cam_param)

        # regress coco joints
        joint_cam_coco, joint_img_coco = self.get_coco_from_mesh(mesh_cam, cam_param)
        # h36m joints from datasets
        joint_cam_h36m, joint_img_h36m = data['joint_cam'], data['joint_img'][:, :2]

        # root relative camera coordinate
        mesh_cam = mesh_cam - joint_cam_h36m[:1]
        # joint_cam_smpl = joint_cam_smpl - joint_cam_h36m[:1]
        joint_cam_coco = joint_cam_coco - joint_cam_coco[-2:-1]
        joint_cam_h36m = joint_cam_h36m - joint_cam_h36m[:1]

        # joint_cam is PoseNet target
        if self.input_joint_name == 'coco':
            joint_img, joint_cam = joint_img_coco, joint_cam_coco
        elif self.input_joint_name == 'human36':
            joint_img, joint_cam = joint_img_h36m, joint_cam_h36m

        # make new bbox
        bbox = get_bbox(joint_img)
        bbox = process_bbox(bbox.copy())

        # aug
        joint_img, trans = j2d_processing(joint_img.copy(), (cfg.MODEL.input_shape[1], cfg.MODEL.input_shape[0]),
                                          bbox, rot, flip, self.flip_pairs)
        joint_cam = j3d_processing(joint_cam, rot, flip, self.flip_pairs)

        if not cfg.DATASET.use_gt_input:
            joint_img = self.replace_joint_img(idx, img_id, joint_img, bbox, trans)

        # vis
        # img = cv2.imread(img_path, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        # new_img = cv2.warpAffine(img, trans, (cfg.MODEL.input_shape[1], cfg.MODEL.input_shape[0]), flags=cv2.INTER_LINEAR)
        # vis_2d_pose(joint_img, new_img, self.human36_skeleton, prefix='detection')
        # vis_3d_pose(joint_cam, self.human36_skeleton, joint_set_name='human36', gt=True)

        #  -> 0~1
        joint_img = joint_img[:, :2]
        joint_img /= np.array([[cfg.MODEL.input_shape[1], cfg.MODEL.input_shape[0]]])

        # normalize loc&scale
        mean, std = np.mean(joint_img, axis=0), np.std(joint_img, axis=0)
        joint_img = (joint_img.copy() - mean) / std

        if cfg.MODEL.name == 'pose2mesh_net':
            # default valid
            mesh_valid = np.ones((len(mesh_cam), 1), dtype=np.float32)
            reg_joint_valid = np.ones((len(joint_cam_h36m), 1), dtype=np.float32)
            lift_joint_valid = np.ones((len(joint_cam), 1), dtype=np.float32)
            # if fitted mesh is too far from h36m gt, discard it
            error = self.get_fitting_error(joint_cam_h36m, mesh_cam)
            if error > self.fitting_thr:
                mesh_valid[:] = 0
                if self.input_joint_name == 'coco':
                    lift_joint_valid[:] = 0

            inputs = {'pose2d': joint_img}
            targets = {'mesh': mesh_cam / 1000, 'lift_pose3d': joint_cam, 'reg_pose3d': joint_cam_h36m}
            meta = {'mesh_valid': mesh_valid, 'lift_pose3d_valid': lift_joint_valid, 'reg_pose3d_valid': reg_joint_valid}

            return inputs, targets, meta

        elif cfg.MODEL.name == 'posenet':
            # default valid
            joint_valid = np.ones((len(joint_cam), 1), dtype=np.float32)
            # if fitted mesh is too far from h36m gt, discard it
            error = self.get_fitting_error(joint_cam_h36m, mesh_cam)
            if (error > self.fitting_thr) and (self.input_joint_name == 'coco'):
                joint_valid[:] = 0

            return joint_img, joint_cam, joint_valid
示例#17
0
    def load_data(self):
        if self.data_split == 'train':
            db = COCO(self.annot_path)
            with open(self.smpl_param_path) as f:
                smpl_params = json.load(f)
        else:
            print('Unknown data subset')
            assert 0

        datalist = []
        for iid in db.imgs.keys():
            img = db.imgs[iid]
            img_id = img["id"]
            img_width, img_height = img['width'], img['height']
            imgname = img['file_name']
            img_path = osp.join(self.img_dir, imgname)
            focal = img["f"]
            princpt = img["c"]
            cam_param = {'focal': focal, 'princpt': princpt}

            # crop the closest person to the camera
            ann_ids = db.getAnnIds(img_id)
            anns = db.loadAnns(ann_ids)

            # sample closest subject
            root_depths = [
                ann['keypoints_cam'][self.muco_root_joint_idx][2]
                for ann in anns
            ]
            closest_pid = root_depths.index(min(root_depths))
            pid_list = [closest_pid]

            # sample close subjects
            # for i in range(len(anns)):
            #     if i == closest_pid:
            #         continue
            #     picked = True
            #     for j in range(len(anns)):
            #         if i == j:
            #             continue
            #         dist = (np.array(anns[i]['keypoints_cam'][self.muco_root_joint_idx]) - np.array(
            #             anns[j]['keypoints_cam'][self.muco_root_joint_idx])) ** 2
            #         dist_2d = math.sqrt(np.sum(dist[:2]))
            #         dist_3d = math.sqrt(np.sum(dist))
            #         if dist_2d < 500 or dist_3d < 500:
            #             picked = False
            #     if picked:
            #         pid_list.append(i)

            for pid in pid_list:
                joint_cam = np.array(anns[pid]['keypoints_cam'])
                joint_img = np.array(anns[pid]['keypoints_img'])
                joint_img = np.concatenate([joint_img, joint_cam[:, 2:]], 1)
                joint_valid = np.ones((self.muco_joint_num, 3))

                bbox = process_bbox(anns[pid]['bbox'])
                if bbox is None: continue

                # check smpl parameter exist
                try:
                    smpl_param = smpl_params[str(ann_ids[pid])]
                    pose, shape, trans = np.array(
                        smpl_param['pose']), np.array(
                            smpl_param['shape']), np.array(smpl_param['trans'])
                    sum = pose.sum() + shape.sum() + trans.sum()
                    if np.isnan(sum):
                        continue
                except KeyError:
                    continue

                datalist.append({
                    'annot_id': ann_ids[pid],
                    'img_path': img_path,
                    'img_shape': (img_height, img_width),
                    'bbox': bbox,
                    'joint_img': joint_img,
                    'joint_cam': joint_cam,
                    'joint_valid': joint_valid,
                    'cam_param': cam_param,
                    'smpl_param': smpl_param
                })

            if self.debug and len(datalist) > 10000:
                break

        return datalist
示例#18
0
    # get model and input
    if joint_set == 'mano':
        mesh_model = MANO()
    else:
        mesh_model = SMPL()
    model, joint_regressor, joint_num, skeleton, graph_L, graph_perm_reverse = get_joint_setting(
        mesh_model, joint_category=joint_set)
    model = model.cuda()
    project_net = models.project_net.get_model().cuda()
    joint_regressor = torch.Tensor(joint_regressor).cuda()
    joint_input = np.load(input_path)

    # pre-process input
    bbox = get_bbox(joint_input)
    bbox1 = process_bbox(bbox.copy(), aspect_ratio=1.0, scale=1.25)
    bbox2 = process_bbox(bbox.copy())
    proj_target_joint_img, trans = j2d_processing(joint_input.copy(),
                                                  (IMG_RES, IMG_RES), bbox1, 0,
                                                  0, None)
    joint_img, _ = j2d_processing(
        joint_input.copy(),
        (cfg.MODEL.input_shape[1], cfg.MODEL.input_shape[0]), bbox2, 0, 0,
        None)

    joint_img = joint_img[:, :2]
    joint_img /= np.array(
        [[cfg.MODEL.input_shape[1], cfg.MODEL.input_shape[0]]])
    mean, std = np.mean(joint_img, axis=0), np.std(joint_img, axis=0)
    joint_img = (joint_img.copy() - mean) / std
    joint_img = torch.Tensor(joint_img[None, :, :]).cuda()
示例#19
0
    def __getitem__(self, idx):
        data = copy.deepcopy(self.datalist[idx])
        img_id, img_name, cam_param, bbox, smpl_param, img_shape = data[
            'img_id'], data['img_name'], data['cam_param'], data['bbox'], data[
                'smpl_param'], data['img_shape']
        flip, rot = augm_params(is_train=(self.data_split == 'train'))

        # smpl coordinates
        smpl_mesh_coord_cam, smpl_joint_coord_cam = self.get_smpl_coord(
            smpl_param)
        smpl_coord_cam = np.concatenate(
            (smpl_mesh_coord_cam, smpl_joint_coord_cam))
        smpl_coord_img = cam2pixel(smpl_coord_cam, cam_param['focal'],
                                   cam_param['princpt'])
        joint_coord_img = smpl_coord_img[self.smpl_vertex_num:][:, :2]

        # vis_2d_pose(joint_coord_img, img_path, self.smpl_skeleton, prefix='gt')

        # root relative cam coord
        smpl_coord_cam = smpl_coord_cam - smpl_coord_cam[
            self.smpl_vertex_num + self.smpl_root_joint_idx]
        mesh_coord_cam = smpl_coord_cam[:self.smpl_vertex_num]
        joint_coord_cam = smpl_coord_cam[self.smpl_vertex_num:]

        if not cfg.DATASET.use_gt_input:
            # train / test with 2d dection input
            det_data = self.datalist_pose2d_det[idx]
            assert img_id == det_data['img_id']
            joint_coord_img = det_data['img_joint']

        # vis_2d_pose(joint_coord_img, img_path, self.smpl_skeleton, prefix='det')
        # vis_3d_pose(joint_coord_cam, self.smpl_skeleton, joint_set_name='smpl')

        # make new bbox
        bbox = get_bbox(joint_coord_img)
        bbox = process_bbox(bbox.copy())

        # aug
        joint_coord_img, trans = j2d_processing(
            joint_coord_img.copy(),
            (cfg.MODEL.input_shape[1], cfg.MODEL.input_shape[0]), bbox, rot,
            flip, self.flip_pairs)
        joint_coord_cam = j3d_processing(joint_coord_cam, rot, flip,
                                         self.flip_pairs)

        #  -> 0~1
        joint_coord_img = joint_coord_img[:, :2]
        joint_coord_img /= np.array(
            [[cfg.MODEL.input_shape[1], cfg.MODEL.input_shape[0]]])

        # normalize loc&scale
        mean, std = np.mean(joint_coord_img, axis=0), np.std(joint_coord_img,
                                                             axis=0)
        joint_coord_img = (joint_coord_img.copy() - mean) / std

        if cfg.MODEL.name == 'pose2mesh_net':
            # default valid
            mesh_valid = np.ones((len(mesh_coord_cam), 1), dtype=np.float32)
            reg_joint_valid = np.ones((len(joint_coord_cam), 1),
                                      dtype=np.float32)
            lift_joint_valid = np.ones((len(joint_coord_cam), 1),
                                       dtype=np.float32)

            inputs = {'pose2d': joint_coord_img}
            targets = {
                'mesh': mesh_coord_cam / 1000,
                'lift_pose3d': joint_coord_cam,
                'reg_pose3d': joint_coord_cam
            }
            meta = {
                'mesh_valid': mesh_valid,
                'lift_pose3d_valid': lift_joint_valid,
                'reg_pose3d_valid': reg_joint_valid
            }

        elif cfg.MODEL.name == 'posenet':
            # default valid
            joint_valid = np.ones((len(joint_coord_cam), 1), dtype=np.float32)
            return joint_coord_img, joint_coord_cam, joint_valid

        return inputs, targets, meta
示例#20
0
    def load_data(self):
        print('Load annotations of Human36M Protocol ' + str(self.protocol))
        subject_list = self.get_subject()
        sampling_ratio = self.get_subsampling_ratio()

        # aggregate annotations from each subject
        db = COCO()
        cameras = {}
        joints = {}
        smpl_params = {}
        for subject in subject_list:
            # data load
            with open(osp.join(self.annot_path, 'Human36M_subject' + str(subject) + '_data.json'), 'r') as f:
                annot = json.load(f)
            if len(db.dataset) == 0:
                for k, v in annot.items():
                    db.dataset[k] = v
            else:
                for k, v in annot.items():
                    db.dataset[k] += v
            # camera load
            with open(osp.join(self.annot_path, 'Human36M_subject' + str(subject) + '_camera.json'), 'r') as f:
                cameras[str(subject)] = json.load(f)
            # joint coordinate load
            with open(osp.join(self.annot_path, 'Human36M_subject' + str(subject) + '_joint_3d.json'), 'r') as f:
                joints[str(subject)] = json.load(f)
            # smpl parameter load
            with open(osp.join(self.annot_path, 'Human36M_subject' + str(subject) + '_smpl_param.json'), 'r') as f:
                smpl_params[str(subject)] = json.load(f)
        db.createIndex()

        skip_idx = []
        datalist = []
        skip_img_idx = []
        for aid in db.anns.keys():
            ann = db.anns[aid]
            image_id = ann['image_id']
            img = db.loadImgs(image_id)[0]
            img_path = osp.join(self.img_dir, img['file_name'])
            img_name = img_path.split('/')[-1]

            # check subject and frame_idx
            frame_idx = img['frame_idx'];

            if frame_idx % sampling_ratio != 0:
                continue

            # check smpl parameter exist
            subject = img['subject'];
            action_idx = img['action_idx'];

            subaction_idx = img['subaction_idx'];
            frame_idx = img['frame_idx'];
            try:
                smpl_param = smpl_params[str(subject)][str(action_idx)][str(subaction_idx)][str(frame_idx)]
            except KeyError:
                skip_idx.append(image_id)
                skip_img_idx.append(img_path.split('/')[-1])
                continue

            smpl_param['gender'] = 'neutral'  # self.subject_genders[subject] # set corresponding gender

            # camera parameter
            cam_idx = img['cam_idx']
            cam_param = cameras[str(subject)][str(cam_idx)]
            R, t, f, c = np.array(cam_param['R'], dtype=np.float32), np.array(cam_param['t'],
                                                                              dtype=np.float32), np.array(
                cam_param['f'], dtype=np.float32), np.array(cam_param['c'], dtype=np.float32)
            cam_param = {'R': R, 't': t, 'focal': f, 'princpt': c}

            # project world coordinate to cam, image coordinate space
            joint_world = np.array(joints[str(subject)][str(action_idx)][str(subaction_idx)][str(frame_idx)],
                                   dtype=np.float32)
            joint_cam = world2cam(joint_world, R, t)
            joint_img = cam2pixel(joint_cam, f, c)
            joint_vis = np.ones((self.human36_joint_num, 1))

            bbox = process_bbox(np.array(ann['bbox']))
            if bbox is None: continue

            datalist.append({
                'img_path': img_path,
                'img_name': img_name,
                'img_id': image_id,
                'bbox': bbox,
                'img_hw': (img['height'], img['width']),
                'joint_img': joint_img,  # [x_img, y_img, z_cam]
                'joint_cam': joint_cam,  # [X, Y, Z] in camera coordinate
                'joint_vis': joint_vis,
                'smpl_param': smpl_param,
                'cam_param': cam_param})

        datalist = sorted(datalist, key=lambda x: x['img_name'])

        return datalist, skip_idx, skip_img_idx