예제 #1
0
    def train_dataloader(self):
        if self.hparams.no_data_aug:
            tfms = None
        else:
            tfms = augmentation(0.9)

        return self.__dataloader(tfms, self.train_fnames)
예제 #2
0
    def __getitem__(self, idx):
        data = copy.deepcopy(self.datalist[idx])
        img_path, skeleton_path, original_shape, action_label, frame_num, start_frame_idx = data[
            'img_path'], data['skeleton_path'], data['original_shape'], data[
                'action_label'], data['frame_num'], data['start_frame_idx']

        # video load
        video, skeleton_frame_idxs = load_video(img_path, frame_num,
                                                start_frame_idx)
        resized_shape = video.shape[1:3]

        # augmentation
        video, img2aug_trans, aug2img_trans, do_flip = augmentation(
            video, self.data_split)
        video = video.transpose(0, 3, 1, 2).astype(
            np.float32) / 255.  # frame_num, channel_dim, height, width

        # skeleton information load
        pose_coords, pose_scores = self.load_skeleton(skeleton_path,
                                                      skeleton_frame_idxs,
                                                      original_shape,
                                                      resized_shape)

        # process skeleton information
        pose_coords, pose_scores = process_skeleton(pose_coords, pose_scores,
                                                    img2aug_trans, do_flip,
                                                    self.flip_pairs,
                                                    self.joint_num,
                                                    resized_shape)
        """
        # for debug
        # keypoint visualization
        for i in range(cfg.frame_per_seg):
            img = video[i,::-1,:,:].transpose(1,2,0) * 255
            person_num = len(pose_coords[i])
            for p in range(person_num):
                #for j in range(self.joint_num):
                    #coord = (int(pose_coords[i][p][j][0]), int(pose_coords[i][p][j][1]))
                    #cv2.circle(img, coord, radius=3, color=(255,0,0), thickness=-1, lineType=cv2.LINE_AA)
                    #cv2.imwrite(str(idx) + '_' + str(action_label) + '_' + str(i) + '_' + str(j) + '.jpg', img)
                coord = pose_coords[i][p].copy()
                coord[:,0] = coord[:,0] / cfg.input_hm_shape[1] * cfg.input_img_shape[1]
                coord[:,1] = coord[:,1] / cfg.input_hm_shape[0] * cfg.input_img_shape[0]
                img = vis_keypoints(img, pose_coords[i][p] * 4, self.skeleton)
            cv2.imwrite(str(idx) + '_' + str(action_label) + '_' + str(i) + '.jpg', img)
        """

        inputs = {
            'video': video,
            'pose_coords': pose_coords,
            'pose_scores': pose_scores
        }
        targets = {'action_label': action_label}
        meta_info = {'img_id': data['img_id']}
        return inputs, targets, meta_info
예제 #3
0
    def __getitem__(self, idx):
        data = self.datalist[idx]
        img_path, bbox, joint, hand_type, hand_type_valid = data[
            'img_path'], data['bbox'], data['joint'], data['hand_type'], data[
                'hand_type_valid']
        joint_cam = joint['cam_coord'].copy()
        joint_img = joint['img_coord'].copy()
        joint_valid = joint['valid'].copy()
        hand_type = self.handtype_str2array(hand_type)
        joint_coord = np.concatenate((joint_img, joint_cam[:, 2, None]), 1)

        # image load
        img = load_img(img_path)
        # augmentation
        img, joint_coord, joint_valid, hand_type, inv_trans = augmentation(
            img, bbox, joint_coord, joint_valid, hand_type, self.mode,
            self.joint_type)
        rel_root_depth = np.array([
            joint_coord[self.root_joint_idx['left'], 2] -
            joint_coord[self.root_joint_idx['right'], 2]
        ],
                                  dtype=np.float32).reshape(1)
        root_valid = np.array(
            [
                joint_valid[self.root_joint_idx['right']] *
                joint_valid[self.root_joint_idx['left']]
            ],
            dtype=np.float32).reshape(
                1) if hand_type[0] * hand_type[1] == 1 else np.zeros(
                    (1), dtype=np.float32)
        # transform to output heatmap space
        joint_coord, joint_valid, rel_root_depth, root_valid = transform_input_to_output_space(
            joint_coord, joint_valid, rel_root_depth, root_valid,
            self.root_joint_idx, self.joint_type)
        img = self.transform(img.astype(np.float32)) / 255.

        inputs = {'img': img}
        targets = {
            'joint_coord': joint_coord,
            'rel_root_depth': rel_root_depth,
            'hand_type': hand_type
        }
        meta_info = {
            'joint_valid': joint_valid,
            'root_valid': root_valid,
            'hand_type_valid': hand_type_valid,
            'inv_trans': inv_trans,
            'capture': int(data['capture']),
            'cam': int(data['cam']),
            'frame': int(data['frame'])
        }
        return inputs, targets, meta_info
예제 #4
0
    def __getitem__(self, idx):
        data = copy.deepcopy(self.datalist[idx])
        img_path, bbox, smpl_param = data['img_path'], data['bbox'], data['smpl_param']
        
        # img
        img = load_img(img_path)
        img, img2bb_trans, bb2img_trans, _, _ = augmentation(img, bbox, self.data_split)
        img = self.transform(img.astype(np.float32))/255.

        # smpl coordinates
        smpl_mesh_cam, smpl_joint_cam = self.get_smpl_coord(smpl_param)
        
        inputs = {'img': img}
        targets = {'fit_mesh_coord_cam': smpl_mesh_cam}
        meta_info = {'bb2img_trans': bb2img_trans}
        return inputs, targets, meta_info
예제 #5
0
    def get_batch_from_txt_files_(self):        
        self.index += 1
        #for i, f in enumerate(fs):
        img = load_image(osp.join('/home/ubuntu/3d-testing/InterHand2.6M/main/', self.fs[self.index]).strip())

        box = self.bs[self.index].split(",")
        bbox = []
        for b in box:
            bbox.append(float(b.replace("\n", "")))

        img, inv_trans = augmentation(img, np.array(bbox), None, None, None, 'test', None)
        img = self.transform(img.astype(np.float32))/255.

        # image = cv2.imread(osp.join('/home/ubuntu/3d-testing/InterHand2.6M/main/', self.fs[self.index]).strip())
        # image = cv2.rectangle(image, (int(bbox[0]), int(bbox[1])), (int(bbox[0])+int(bbox[2]), int(bbox[0]) + int(bbox[3])), (255,0,0), 3)
        # print(cv2.imwrite("bbox_testing/" + str(self.index)+".jpg", image))

        return img, osp.join('/home/ubuntu/3d-testing/InterHand2.6M/main/', self.fs[self.index]).strip(), inv_trans
예제 #6
0
    def __getitem__(self, idx):
        data = copy.deepcopy(self.datalist[idx])
        img_path, img_shape, bbox, joint_cam, cam_param, mano_param = data[
            'img_path'], data['img_shape'], data['bbox'], data[
                'joint_cam'], data['cam_param'], data['mano_param']

        # img
        img = load_img(img_path)
        img, img2bb_trans, bb2img_trans, rot, _ = augmentation(
            img, bbox, self.data_split, exclude_flip=True
        )  # FreiHAND dataset only contains right hands. do not perform flip aug.
        img = self.transform(img.astype(np.float32)) / 255.

        if self.data_split == 'train':
            # mano coordinates
            mano_mesh_cam, mano_joint_cam, mano_pose, mano_shape = self.get_mano_coord(
                mano_param, cam_param)
            mano_coord_cam = np.concatenate((mano_mesh_cam, mano_joint_cam))
            focal, princpt = cam_param['focal'], cam_param['princpt']
            mano_coord_img = cam2pixel(mano_coord_cam, focal, princpt)

            # affine transform x,y coordinates. root-relative depth
            mano_coord_img_xy1 = np.concatenate(
                (mano_coord_img[:, :2], np.ones_like(mano_coord_img[:, :1])),
                1)
            mano_coord_img[:, :2] = np.dot(img2bb_trans,
                                           mano_coord_img_xy1.transpose(
                                               1, 0)).transpose(1, 0)[:, :2]
            root_joint_depth = mano_coord_cam[self.vertex_num +
                                              self.root_joint_idx][2]
            mano_coord_img[:, 2] = mano_coord_img[:, 2] - root_joint_depth
            mano_coord_img[:, 0] = mano_coord_img[:, 0] / cfg.input_img_shape[
                1] * cfg.output_hm_shape[2]
            mano_coord_img[:, 1] = mano_coord_img[:, 1] / cfg.input_img_shape[
                0] * cfg.output_hm_shape[1]
            mano_coord_img[:, 2] = (mano_coord_img[:, 2] /
                                    (cfg.bbox_3d_size / 2) +
                                    1) / 2. * cfg.output_hm_shape[0]

            # check truncation
            mano_trunc = ((mano_coord_img[:,0] >= 0) * (mano_coord_img[:,0] < cfg.output_hm_shape[2]) * \
                        (mano_coord_img[:,1] >= 0) * (mano_coord_img[:,1] < cfg.output_hm_shape[1]) * \
                        (mano_coord_img[:,2] >= 0) * (mano_coord_img[:,2] < cfg.output_hm_shape[0])).reshape(-1,1).astype(np.float32)

            # split mesh and joint coordinates
            mano_mesh_img = mano_coord_img[:self.vertex_num]
            mano_joint_img = mano_coord_img[self.vertex_num:]
            mano_mesh_trunc = mano_trunc[:self.vertex_num]
            mano_joint_trunc = mano_trunc[self.vertex_num:]

            # 3D data rotation augmentation
            rot_aug_mat = np.array(
                [[np.cos(np.deg2rad(-rot)), -np.sin(np.deg2rad(-rot)), 0],
                 [np.sin(np.deg2rad(-rot)),
                  np.cos(np.deg2rad(-rot)), 0], [0, 0, 1]],
                dtype=np.float32)
            # parameter
            mano_pose = mano_pose.reshape(-1, 3)
            root_pose = mano_pose[self.root_joint_idx, :]
            root_pose, _ = cv2.Rodrigues(root_pose)
            root_pose, _ = cv2.Rodrigues(np.dot(rot_aug_mat, root_pose))
            mano_pose[self.root_joint_idx] = root_pose.reshape(3)
            mano_pose = mano_pose.reshape(-1)
            # mano coordinate
            mano_joint_cam = mano_joint_cam - mano_joint_cam[
                self.root_joint_idx, None]  # root-relative
            mano_joint_cam = np.dot(rot_aug_mat, mano_joint_cam.transpose(
                1, 0)).transpose(1, 0)

            orig_joint_img = np.zeros((self.joint_num, 3),
                                      dtype=np.float32)  # dummy
            orig_joint_cam = np.zeros((self.joint_num, 3),
                                      dtype=np.float32)  # dummy
            orig_joint_valid = np.zeros((self.joint_num, 1),
                                        dtype=np.float32)  # dummy
            orig_joint_trunc = np.zeros((self.joint_num, 1),
                                        dtype=np.float32)  # dummy

            inputs = {'img': img}
            targets = {
                'orig_joint_img': orig_joint_img,
                'fit_joint_img': mano_joint_img,
                'fit_mesh_img': mano_mesh_img,
                'orig_joint_cam': orig_joint_cam,
                'fit_joint_cam': mano_joint_cam,
                'pose_param': mano_pose,
                'shape_param': mano_shape
            }
            meta_info = {
                'orig_joint_valid': orig_joint_valid,
                'orig_joint_trunc': orig_joint_trunc,
                'fit_joint_trunc': mano_joint_trunc,
                'fit_mesh_trunc': mano_mesh_trunc,
                'is_valid_fit': float(True),
                'is_3D': float(True)
            }
        else:
            inputs = {'img': img}
            targets = {}
            meta_info = {'bb2img_trans': bb2img_trans}

        return inputs, targets, meta_info
예제 #7
0
    def __getitem__(self, idx):
        data = copy.deepcopy(self.datalist[idx])
        img_path, img_shape, bbox, smpl_param, cam_param = data[
            'img_path'], data['img_shape'], data['bbox'], data[
                'smpl_param'], data['cam_param']

        # img
        img = load_img(img_path)
        img, img2bb_trans, bb2img_trans, rot, do_flip = augmentation(
            img, bbox, self.data_split)
        img = self.transform(img.astype(np.float32)) / 255.

        # muco gt
        muco_joint_img = data['joint_img']
        muco_joint_cam = data['joint_cam']
        muco_joint_cam = muco_joint_cam - muco_joint_cam[
            self.muco_root_joint_idx, None, :]  # root-relative
        muco_joint_valid = data['joint_valid']
        if do_flip:
            muco_joint_img[:, 0] = img_shape[1] - 1 - muco_joint_img[:, 0]
            muco_joint_cam[:, 0] = -muco_joint_cam[:, 0]
            for pair in self.muco_flip_pairs:
                muco_joint_img[pair[0], :], muco_joint_img[
                    pair[1], :] = muco_joint_img[
                        pair[1], :].copy(), muco_joint_img[pair[0], :].copy()
                muco_joint_cam[pair[0], :], muco_joint_cam[
                    pair[1], :] = muco_joint_cam[
                        pair[1], :].copy(), muco_joint_cam[pair[0], :].copy()
                muco_joint_valid[pair[0], :], muco_joint_valid[
                    pair[1], :] = muco_joint_valid[pair[1], :].copy(
                    ), muco_joint_valid[pair[0], :].copy()

        muco_joint_img_xy1 = np.concatenate(
            (muco_joint_img[:, :2], np.ones_like(muco_joint_img[:, :1])), 1)
        muco_joint_img[:, :2] = np.dot(img2bb_trans,
                                       muco_joint_img_xy1.transpose(
                                           1, 0)).transpose(1, 0)
        muco_joint_img[:, 0] = muco_joint_img[:, 0] / cfg.input_img_shape[
            1] * cfg.output_hm_shape[2]
        muco_joint_img[:, 1] = muco_joint_img[:, 1] / cfg.input_img_shape[
            0] * cfg.output_hm_shape[1]
        muco_joint_img[:, 2] = muco_joint_img[:, 2] - muco_joint_img[
            self.muco_root_joint_idx][2]  # root-relative
        muco_joint_img[:, 2] = (
            muco_joint_img[:, 2] /
            (cfg.bbox_3d_size * 1000 / 2) + 1) / 2. * cfg.output_hm_shape[
                0]  # change cfg.bbox_3d_size from meter to milimeter

        # check truncation
        muco_joint_trunc = muco_joint_valid * ((muco_joint_img[:,0] >= 0) * (muco_joint_img[:,0] < cfg.output_hm_shape[2]) * \
                    (muco_joint_img[:,1] >= 0) * (muco_joint_img[:,1] < cfg.output_hm_shape[1]) * \
                    (muco_joint_img[:,2] >= 0) * (muco_joint_img[:,2] < cfg.output_hm_shape[0])).reshape(-1,1).astype(np.float32)

        # transform muco joints to target db joints
        muco_joint_img = transform_joint_to_other_db(muco_joint_img,
                                                     self.muco_joints_name,
                                                     self.joints_name)
        muco_joint_cam = transform_joint_to_other_db(muco_joint_cam,
                                                     self.muco_joints_name,
                                                     self.joints_name)
        muco_joint_valid = transform_joint_to_other_db(muco_joint_valid,
                                                       self.muco_joints_name,
                                                       self.joints_name)
        muco_joint_trunc = transform_joint_to_other_db(muco_joint_trunc,
                                                       self.muco_joints_name,
                                                       self.joints_name)

        if smpl_param is not None:
            # smpl coordinates
            smpl_mesh_cam, smpl_joint_cam, smpl_pose, smpl_shape = self.get_smpl_coord(
                smpl_param, cam_param, do_flip, img_shape)
            smpl_coord_cam = np.concatenate((smpl_mesh_cam, smpl_joint_cam))
            focal, princpt = cam_param['focal'], cam_param['princpt']
            smpl_coord_img = cam2pixel(smpl_coord_cam, focal, princpt)

            # affine transform x,y coordinates. root-relative depth
            smpl_coord_img_xy1 = np.concatenate(
                (smpl_coord_img[:, :2], np.ones_like(smpl_coord_img[:, :1])),
                1)
            smpl_coord_img[:, :2] = np.dot(img2bb_trans,
                                           smpl_coord_img_xy1.transpose(
                                               1, 0)).transpose(1, 0)[:, :2]
            smpl_coord_img[:, 2] = smpl_coord_img[:, 2] - smpl_coord_cam[
                self.vertex_num + self.root_joint_idx][2]
            smpl_coord_img[:, 0] = smpl_coord_img[:, 0] / cfg.input_img_shape[
                1] * cfg.output_hm_shape[2]
            smpl_coord_img[:, 1] = smpl_coord_img[:, 1] / cfg.input_img_shape[
                0] * cfg.output_hm_shape[1]
            smpl_coord_img[:, 2] = (
                smpl_coord_img[:, 2] /
                (cfg.bbox_3d_size * 1000 / 2) + 1) / 2. * cfg.output_hm_shape[
                    0]  # change cfg.bbox_3d_size from meter to milimeter

            # check truncation
            smpl_trunc = ((smpl_coord_img[:,0] >= 0) * (smpl_coord_img[:,0] < cfg.output_hm_shape[2]) * \
                        (smpl_coord_img[:,1] >= 0) * (smpl_coord_img[:,1] < cfg.output_hm_shape[1]) * \
                        (smpl_coord_img[:,2] >= 0) * (smpl_coord_img[:,2] < cfg.output_hm_shape[0])).reshape(-1,1).astype(np.float32)

            # split mesh and joint coordinates
            smpl_mesh_img = smpl_coord_img[:self.vertex_num]
            smpl_joint_img = smpl_coord_img[self.vertex_num:]
            smpl_mesh_trunc = smpl_trunc[:self.vertex_num]
            smpl_joint_trunc = smpl_trunc[self.vertex_num:]

            # if fitted mesh is too far from muco gt, discard it
            is_valid_fit = True
            error = self.get_fitting_error(data['joint_cam'], smpl_mesh_cam,
                                           do_flip)
            if error > self.fitting_thr:
                is_valid_fit = False

        else:
            smpl_joint_img = np.zeros((self.joint_num, 3),
                                      dtype=np.float32)  # dummy
            smpl_joint_cam = np.zeros((self.joint_num, 3),
                                      dtype=np.float32)  # dummy
            smpl_mesh_img = np.zeros((self.vertex_num, 3),
                                     dtype=np.float32)  # dummy
            smpl_pose = np.zeros((72), dtype=np.float32)  # dummy
            smpl_shape = np.zeros((10), dtype=np.float32)  # dummy
            smpl_joint_trunc = np.zeros((self.joint_num, 1),
                                        dtype=np.float32)  # dummy
            smpl_mesh_trunc = np.zeros((self.vertex_num, 1),
                                       dtype=np.float32)  # dummy
            is_valid_fit = False

        # 3D data rotation augmentation
        rot_aug_mat = np.array(
            [[np.cos(np.deg2rad(-rot)), -np.sin(np.deg2rad(-rot)), 0],
             [np.sin(np.deg2rad(-rot)),
              np.cos(np.deg2rad(-rot)), 0], [0, 0, 1]],
            dtype=np.float32)
        # muco coordinate
        muco_joint_cam = np.dot(rot_aug_mat, muco_joint_cam.transpose(
            1, 0)).transpose(1, 0) / 1000  # milimeter to meter
        # parameter
        smpl_pose = smpl_pose.reshape(-1, 3)
        root_pose = smpl_pose[self.root_joint_idx, :]
        root_pose, _ = cv2.Rodrigues(root_pose)
        root_pose, _ = cv2.Rodrigues(np.dot(rot_aug_mat, root_pose))
        smpl_pose[self.root_joint_idx] = root_pose.reshape(3)
        smpl_pose = smpl_pose.reshape(-1)
        # smpl coordinate
        smpl_joint_cam = smpl_joint_cam - smpl_joint_cam[self.root_joint_idx,
                                                         None]  # root-relative
        smpl_joint_cam = np.dot(rot_aug_mat, smpl_joint_cam.transpose(
            1, 0)).transpose(1, 0) / 1000  # milimeter to meter

        inputs = {'img': img}
        targets = {
            'orig_joint_img': muco_joint_img,
            'fit_joint_img': smpl_joint_img,
            'fit_mesh_img': smpl_mesh_img,
            'orig_joint_cam': muco_joint_cam,
            'fit_joint_cam': smpl_joint_cam,
            'pose_param': smpl_pose,
            'shape_param': smpl_shape
        }
        meta_info = {
            'orig_joint_valid': muco_joint_valid,
            'orig_joint_trunc': muco_joint_trunc,
            'fit_joint_trunc': smpl_joint_trunc,
            'fit_mesh_trunc': smpl_mesh_trunc,
            'is_valid_fit': float(is_valid_fit),
            'is_3D': float(True)
        }
        return inputs, targets, meta_info
예제 #8
0
    def __getitem__(self, idx):
        data = copy.deepcopy(self.datalist[idx])
        img_path, img_shape, bbox = data['img_path'], data['img_shape'], data[
            'bbox']

        # image load and affine transform
        img = load_img(img_path)
        img, img2bb_trans, bb2img_trans, rot, do_flip = augmentation(
            img, bbox, self.data_split)
        img = self.transform(img.astype(np.float32)) / 255.

        if self.data_split == 'train':
            # coco gt
            coco_joint_img = data['joint_img']
            coco_joint_valid = data['joint_valid']
            if do_flip:
                coco_joint_img[:, 0] = img_shape[1] - 1 - coco_joint_img[:, 0]
                for pair in self.coco_flip_pairs:
                    coco_joint_img[pair[0], :], coco_joint_img[
                        pair[1], :] = coco_joint_img[pair[1], :].copy(
                        ), coco_joint_img[pair[0], :].copy()
                    coco_joint_valid[pair[0], :], coco_joint_valid[
                        pair[1], :] = coco_joint_valid[pair[1], :].copy(
                        ), coco_joint_valid[pair[0], :].copy()

            coco_joint_img_xy1 = np.concatenate(
                (coco_joint_img[:, :2], np.ones_like(coco_joint_img[:, :1])),
                1)
            coco_joint_img[:, :2] = np.dot(img2bb_trans,
                                           coco_joint_img_xy1.transpose(
                                               1, 0)).transpose(1, 0)
            coco_joint_img[:, 0] = coco_joint_img[:, 0] / cfg.input_img_shape[
                1] * cfg.output_hm_shape[2]
            coco_joint_img[:, 1] = coco_joint_img[:, 1] / cfg.input_img_shape[
                0] * cfg.output_hm_shape[1]

            # backup for calculating fitting error
            _coco_joint_img = coco_joint_img.copy()
            _coco_joint_valid = coco_joint_valid.copy()

            # check truncation
            coco_joint_trunc = coco_joint_valid * ((coco_joint_img[:,0] >= 0) * (coco_joint_img[:,0] < cfg.output_hm_shape[2]) * \
                        (coco_joint_img[:,1] >= 0) * (coco_joint_img[:,1] < cfg.output_hm_shape[1])).reshape(-1,1).astype(np.float32)

            # transform coco joints to target db joints
            coco_joint_img = transform_joint_to_other_db(
                coco_joint_img, self.coco_joints_name, self.joints_name)
            coco_joint_cam = np.zeros((self.joint_num, 3),
                                      dtype=np.float32)  # dummy
            coco_joint_valid = transform_joint_to_other_db(
                coco_joint_valid, self.coco_joints_name, self.joints_name)
            coco_joint_trunc = transform_joint_to_other_db(
                coco_joint_trunc, self.coco_joints_name, self.joints_name)

            smplify_result = data['smplify_result']
            if smplify_result is not None:
                # use fitted mesh
                smpl_param, cam_param = smplify_result[
                    'smpl_param'], smplify_result['cam_param']
                smpl_mesh_cam, smpl_joint_cam, smpl_pose, smpl_shape = self.get_smpl_coord(
                    smpl_param, cam_param, do_flip, img_shape)
                smpl_coord_cam = np.concatenate(
                    (smpl_mesh_cam, smpl_joint_cam))
                smpl_coord_img = cam2pixel(smpl_coord_cam, cam_param['focal'],
                                           cam_param['princpt'])

                # x,y affine transform, root-relative depth
                smpl_coord_img_xy1 = np.concatenate(
                    (smpl_coord_img[:, :2], np.ones_like(
                        smpl_coord_img[:, 0:1])), 1)
                smpl_coord_img[:, :2] = np.dot(
                    img2bb_trans,
                    smpl_coord_img_xy1.transpose(1, 0)).transpose(1, 0)[:, :2]
                smpl_coord_img[:, 2] = smpl_coord_img[:, 2] - smpl_coord_cam[
                    self.vertex_num + self.root_joint_idx][2]
                smpl_coord_img[:,
                               0] = smpl_coord_img[:, 0] / cfg.input_img_shape[
                                   1] * cfg.output_hm_shape[2]
                smpl_coord_img[:,
                               1] = smpl_coord_img[:, 1] / cfg.input_img_shape[
                                   0] * cfg.output_hm_shape[1]
                smpl_coord_img[:, 2] = (smpl_coord_img[:, 2] /
                                        (cfg.bbox_3d_size / 2) +
                                        1) / 2. * cfg.output_hm_shape[0]

                # check truncation
                smpl_trunc = ((smpl_coord_img[:,0] >= 0) * (smpl_coord_img[:,0] < cfg.output_hm_shape[2]) * \
                            (smpl_coord_img[:,1] >= 0) * (smpl_coord_img[:,1] < cfg.output_hm_shape[1]) * \
                            (smpl_coord_img[:,2] >= 0) * (smpl_coord_img[:,2] < cfg.output_hm_shape[0])).reshape(-1,1).astype(np.float32)

                # split mesh and joint coordinates
                smpl_mesh_img = smpl_coord_img[:self.vertex_num]
                smpl_joint_img = smpl_coord_img[self.vertex_num:]
                smpl_mesh_trunc = smpl_trunc[:self.vertex_num]
                smpl_joint_trunc = smpl_trunc[self.vertex_num:]

                # if fitted mesh is too far from h36m gt, discard it
                is_valid_fit = True
                error = self.get_fitting_error(_coco_joint_img, smpl_mesh_cam,
                                               cam_param, img2bb_trans,
                                               _coco_joint_valid)
                if error > self.fitting_thr:
                    is_valid_fit = False

            else:
                smpl_joint_img = np.zeros((self.joint_num, 3),
                                          dtype=np.float32)  # dummy
                smpl_joint_cam = np.zeros((self.joint_num, 3),
                                          dtype=np.float32)  # dummy
                smpl_mesh_img = np.zeros((self.vertex_num, 3),
                                         dtype=np.float32)  # dummy
                smpl_pose = np.zeros((72), dtype=np.float32)  # dummy
                smpl_shape = np.zeros((10), dtype=np.float32)  # dummy
                smpl_joint_trunc = np.zeros((self.joint_num, 1),
                                            dtype=np.float32)
                smpl_mesh_trunc = np.zeros((self.vertex_num, 1),
                                           dtype=np.float32)
                is_valid_fit = False

            # 3D data rotation augmentation
            rot_aug_mat = np.array(
                [[np.cos(np.deg2rad(-rot)), -np.sin(np.deg2rad(-rot)), 0],
                 [np.sin(np.deg2rad(-rot)),
                  np.cos(np.deg2rad(-rot)), 0], [0, 0, 1]],
                dtype=np.float32)
            # parameter
            smpl_pose = smpl_pose.reshape(-1, 3)
            root_pose = smpl_pose[self.root_joint_idx, :]
            root_pose, _ = cv2.Rodrigues(root_pose)
            root_pose, _ = cv2.Rodrigues(np.dot(rot_aug_mat, root_pose))
            smpl_pose[self.root_joint_idx] = root_pose.reshape(3)
            smpl_pose = smpl_pose.reshape(-1)
            # smpl coordinate
            smpl_joint_cam = smpl_joint_cam - smpl_joint_cam[
                self.root_joint_idx, None]  # root-relative
            smpl_joint_cam = np.dot(rot_aug_mat, smpl_joint_cam.transpose(
                1, 0)).transpose(1, 0)

            inputs = {'img': img}
            targets = {
                'orig_joint_img': coco_joint_img,
                'fit_joint_img': smpl_joint_img,
                'fit_mesh_img': smpl_mesh_img,
                'orig_joint_cam': coco_joint_cam,
                'fit_joint_cam': smpl_joint_cam,
                'pose_param': smpl_pose,
                'shape_param': smpl_shape
            }
            meta_info = {
                'orig_joint_valid': coco_joint_valid,
                'orig_joint_trunc': coco_joint_trunc,
                'fit_joint_trunc': smpl_joint_trunc,
                'fit_mesh_trunc': smpl_mesh_trunc,
                'is_valid_fit': float(is_valid_fit),
                'is_3D': float(False)
            }
            return inputs, targets, meta_info
        else:
            inputs = {'img': img}
            targets = {}
            meta_info = {'bb2img_trans': bb2img_trans}
            return inputs, targets, meta_info
예제 #9
0
    def run_webcam():
        args = parse_args()
        cfg.set_args(args.gpu_ids)
        cudnn.benchmark = True

        if cfg.dataset == 'InterHand2.6M':
            assert args.test_set, 'Test set is required. Select one of test/val'
            assert args.annot_subset, "Please set proper annotation subset. Select one of all, human_annot, machine_annot"
        else:
            args.test_set = 'test'
            args.annot_subset = 'all'

        tester = Tester(args.test_epoch)
        tester._make_batch_generator(args.test_set, args.annot_subset)
        tester._make_model()

        detector = YOLO_AKASH("best.pt", save_img=True)

        test_loader = CustomLoader(transforms.ToTensor(), 'my_hand', 1)

        cap = cv2.VideoCapture(0)

        with torch.no_grad():
            while (True):
                ret, frame = cap.read()

                inputs = {}
                preds = {
                    'joint_coord': [],
                    'rel_root_depth': [],
                    'hand_type': []
                }
                img = test_loader.process_frame(frame)
                inputs['img'] = torch.reshape(img, (1, 3, 256, 256))

                boxes, confs, labels = detector.detect_image(img)

                img, inv_trans = augmentation(img, np.array(bbox), None, None,
                                              None, 'test', None)

                # forward
                out = tester.model(inputs)

                joint_coord_out = out['joint_coord'].cpu().numpy()
                rel_root_depth_out = out['rel_root_depth'].cpu().numpy()
                hand_type_out = out['hand_type'].cpu().numpy()

                preds['joint_coord'].append(joint_coord_out)
                preds['rel_root_depth'].append(rel_root_depth_out)
                preds['hand_type'].append(hand_type_out)

                # evaluate
                preds = {k: np.concatenate(v) for k, v in preds.items()}

                ##Changes need to be done from here onwards !!!!!!!!!!!!!!!!!!!!!!!!
                keypoint_frame = test_loader.visualize(preds, filename,
                                                       img_path, inv_trans)

                cv2.imshow('webcam', keypoint_frame)

                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break

            cap.release()
            cv2.destroyAllWindows()