Пример #1
0
    def default_drawer(gts, preds, valid, ax):
        show3Dpose(gts, MuPoTSJoints(), ax=ax, invert_vertical=True, show_numbers=False, lcolor="#911f1f",
                   rcolor="#874924", ccolor="#1b4882")
        for p in preds[valid]:
            add3Dpose(p, ax, MuPoTSJoints())

        ax.set_xlim3d([-RADIUS - 400 + xroot, RADIUS + xroot + 600])
        ax.set_ylim3d([-RADIUS + zroot - 200, RADIUS + zroot + 100])
        ax.set_zlim3d([bottom + 10, bottom - 2500])
Пример #2
0
def train_ground_truth(sub, seq, fix_incorrect=True):
    """
    Returns the ground truth annotations. Returns a dict with fields 'annot2', 'annot3', 'univ_annot3'
    :param fix_incorrect: S4/Seq2 has annotations flipped on some frames, if True they are flipped back
    :return:
    """
    annot = load(
        os.path.join(MPII_3DHP_PATH, "S%d" % sub, "Seq%d" % seq, "annot.mat"))
    annot2 = list([
        x[0].reshape((-1, 28, 2))[:, MUPOTS_RELEVANT_JOINTS].astype("float32")
        for x in annot["annot2"]
    ])
    annot3 = list([
        x[0].reshape((-1, 28, 3))[:, MUPOTS_RELEVANT_JOINTS].astype("float32")
        for x in annot["annot3"]
    ])
    univ_annot3 = list([
        x[0].reshape((-1, 28, 3))[:, MUPOTS_RELEVANT_JOINTS].astype("float32")
        for x in annot["univ_annot3"]
    ])
    assert np.all(annot["cameras"][0] == np.arange(14))
    assert np.all(annot["frames"][:, 0] == np.arange(len(annot2[0])))

    # S3/Seq1 has one extra annotation but one less frame
    # Remove the very last annotation from everywhere
    if sub == 3 and seq == 1:
        for cam in range(14):
            annot2[cam] = annot2[cam][:-1]
            annot3[cam] = annot3[cam][:-1]
            univ_annot3[cam] = univ_annot3[cam][:-1]

    if sub == 4 and seq == 2 and fix_incorrect:
        # between 3759(in) and 5853(ex) annotations are flipped
        for cam in range(14):
            annot2[cam][3759:5853] = MuPoTSJoints().flip(
                annot2[cam][3759:5853])
            annot3[cam][3759:5853] = MuPoTSJoints().flip(
                annot3[cam][3759:5853])
            univ_annot3[cam][3759:5853] = MuPoTSJoints().flip(
                univ_annot3[cam][3759:5853])

    N = len(annot2[0])
    for cam in range(14):
        assert len(annot2[cam]) == N
        assert len(annot3[cam]) == N
        assert len(univ_annot3[cam]) == N

    result = {"annot2": annot2, "annot3": annot3, "univ_annot3": univ_annot3}

    return result
Пример #3
0
def main(img_folder, metadata, poses_path, depth_folder, out_path, visualize):
    config, model = load_model('unnormalized')

    test_set = ImageFolderDataset(img_folder, metadata, poses_path, depth_folder)

    transforms = load_transforms('unnormalized', config, test_set) + [lambda x: x['pose2d']]
    test_set.transform = Compose(transforms)

    test_loader = DataLoader(test_set)
    pred = torch_predict(model, test_loader)

    mean3d = transforms[1].normalizer.mean
    std3d = transforms[1].normalizer.std
    pred = combine_pose_and_trans(pred, std3d, mean3d, MuPoTSJoints(), 'hip')

    result = {}
    for image in test_set.images:
        inds = test_set.img_names == image
        result[image] = pred[inds]

    save(out_path, result)

    if visualize:
        image = test_set.images[0]
        image_path = os.path.join(img_folder, image)
        show_result(image_path, result[image])
Пример #4
0
    def augment(self, scale_by_dist, scales=None):
        """
        Augments the data in a pose dataset. It simulates moving the poses
        closer and further away from the camera. The method takes the dataset D, applies a transformation T,
        and concatenates the transformed data to the original data.

        :param scale_by_dist: If true, during augmentation it scales values with l2 distance from camera,
                              otherwise with z coordinate (depth).
        :param scales: if defined, values in this array used for scaling instead of random values
        """
        assert isinstance(self.pose3d_jointset,
                          MuPoTSJoints), "only implemented for MupoTS joints"
        orig_size = len(self.poses2d)
        root_ind = MuPoTSJoints().index_of('hip')

        # Calculating minimum scale to avoid joints behind camera
        if scales is None:
            limb_vec = self.poses3d[:, :, 2] - self.poses3d[:, [root_ind], 2]
            min_scale = np.nanmax(-limb_vec / self.poses3d[:, [root_ind], 2],
                                  axis=1)

            scales = np.random.normal(1, 0.25, orig_size)
            scales[scales < 0.6] = 1
            scales = np.maximum(scales, min_scale + 1e-5)
            scales[scales > 1.5] = 1
            scales = scales.reshape((-1, 1))
        else:
            assert scales.ndim == 2, "scales is expected to be a column vector"
        self.scales = scales.copy()

        # Duplicate all the training data, the first half is the original unchanged,
        # the second half is augmented
        for field in [
                'poses2d', 'poses3d', 'fx', 'fy', 'cx', 'cy', 'width',
                'valid_2d_pred'
        ]:
            if hasattr(self, field):
                data = self.__getattribute__(field)
                self.__setattr__(field, np.concatenate([data, data.copy()]))
        if hasattr(self, 'index'):
            self.index = np.concatenate([self.index, self.index.copy()])

        # Calculate the new 3D coordinates of the poses
        orig_roots = np.expand_dims(self.poses3d[orig_size:,
                                                 root_ind, :].copy(),
                                    1)  # (nPoses, 1, 3)
        new_roots = orig_roots * np.expand_dims(scales, 1)
        self.poses3d[orig_size:, :, :] = self.poses3d[
            orig_size:, :, :] - orig_roots + new_roots

        pose2d_root_ind = self.pose2d_jointset.index_of('hip')
        self.poses2d[orig_size:, :, :2] = (self.poses2d[orig_size:, :, :2]
                                           - self.poses2d[orig_size:, [pose2d_root_ind], :2]) / scales[:, :, None] \
                                          + self.poses2d[orig_size:, [pose2d_root_ind], :2]

        assert np.all((self.poses3d[:, :, 2] >= 0)
                      | np.isnan(self.poses3d[:, :, 2])), "Joint behind camera"
Пример #5
0
def generate_vid_frames(cam, vid_id):
    print(cam, vid_id)
    metas = sequence_metas[cam][vid_id]
    steps = [
        2 if mpii_3dhp.get_train_fps(meta[0], meta[1]) == 50 else 1
        for meta in metas
    ]
    out_folder = os.path.join(muco_temp.MUCO_TEMP_PATH,
                              'frames/cam_%d/vid_%d' % (cam, vid_id))
    ensuredir(out_folder)

    gt_poses = load(
        os.path.join(muco_temp.MUCO_TEMP_PATH,
                     'frames/cam_%d/gt.pkl' % cam))[vid_id]['annot3']
    hip_ind = MuPoTSJoints().index_of('hip')

    for i in range(NUM_FRAMES):
        # generate frame
        depths = gt_poses[i, :, hip_ind, 2]
        ordered_poses = np.argsort(
            depths)[::-1]  # poses ordered by depth in decreasing order

        bg_ind = ordered_poses[0]
        img = mpii_3dhp.get_image(metas[bg_ind][0],
                                  metas[bg_ind][1],
                                  cam,
                                  metas[bg_ind][2] + i * steps[bg_ind],
                                  rgb=False)
        img = img.astype('float32')
        # add new pose onto image
        for pose_ind in ordered_poses[1:]:
            sub, seq, start = metas[pose_ind]
            pose_img = mpii_3dhp.get_image(sub,
                                           seq,
                                           cam,
                                           start + i * steps[pose_ind],
                                           rgb=False)

            # mask is 0 at greenscreen bg, 1 at foreground (body, chair)
            mask = mpii_3dhp.get_mask(sub, seq, cam,
                                      start + i * steps[pose_ind],
                                      'FGmasks')[:, :, 2] / 255.
            mask = cv2.GaussianBlur(mask, (0, 0), 2)[:, :, np.newaxis]
            # chair_mask is 0 at chair, 1 everywhere else
            chair_mask = mpii_3dhp.get_mask(sub, seq, cam,
                                            start + i * steps[pose_ind],
                                            'ChairMasks')[:, :, [2]] / 255

            img = chair_mask * img + (1 - chair_mask) * pose_img
            img = mask * pose_img + (1 - mask) * img

        img = img.astype('uint8')
        cv2.imwrite(os.path.join(out_folder, 'img_%04d.jpg' % i), img,
                    [cv2.IMWRITE_JPEG_QUALITY, 80])
Пример #6
0
    def __init__(self, img_folder, metadata, poses_path, depth_folder):
        self.transform = None
        self.images = sorted(os.listdir(img_folder))

        # Load camera parameters
        with open(metadata, 'r') as f:
            data = f.readlines()
            data = [x.split(',') for x in data]
            data = [[y.strip() for y in x] for x in data]
            camera_params = {x[0]: [float(y) for y in x[1:]] for x in data[1:]}

        # Prepare data
        poses2d = []
        fx = []
        fy = []
        cx = []
        cy = []
        img_names = []
        jointwise_depth = []

        pred2d = load(poses_path)
        for image in self.images:
            poses = [np.array(x['keypoints']).reshape((17, 3)) for x in pred2d[image]]
            poses = np.stack(poses, axis=0)  # (nPoses, 17, 3)
            poses = extend_hrnet_raw(poses)  # (nPoses, 19, 3)

            img = cv2.imread(os.path.join(img_folder, image))
            width, height = recommended_size(img.shape)

            depth = load(os.path.join(depth_folder, image + '.npy'))
            depth = depth_from_coords(depth, poses.reshape((1, -1, 3))[:, :, :2], width, height)  # (nFrames(=1), nPoses*19)
            depth = depth.reshape((-1, 19))  # (nPoses, 19)
            jointwise_depth.append(depth)

            poses2d.append(poses)
            for i, field in enumerate([fx, fy, cx, cy]):
                field.extend([camera_params[image][i]] * len(poses))
            img_names.extend([image] * len(poses))

        self.poses2d = np.concatenate(poses2d).astype('float32')
        self.poses3d = np.ones_like(self.poses2d)[:, :17]
        self.fx = np.array(fx, dtype='float32')
        self.fy = np.array(fy, dtype='float32')
        self.cx = np.array(cx, dtype='float32')
        self.cy = np.array(cy, dtype='float32')
        self.img_names = np.array(img_names)
        self.pred_cdepths = np.concatenate(jointwise_depth).astype('float32')

        self.pose2d_jointset = CocoExJoints()
        self.pose3d_jointset = MuPoTSJoints()
Пример #7
0
def eval_poses(is_relative, pose3d_type, preds_3d_kpt):
    """
    Calculates the PCK and AUC. This function is equivalent to ``mpii_test_predictions.m``.

    :param is_relative: True if relative error is calculated
    :param pose3d_type: 'annot3' or 'univ_annot3'
    :param preds_3d_kpt: seq->ndarray(nFrames,17,3), in MuPo-TS joint order. 3D pose predictions.
    :return: two dicts from seq name to pck and auc
    """

    # Joints used in original evaluation script
    joint_groups = [
        ["Head", [0]],
        ["Neck", [1]],
        ["Shou", [2, 5]],
        ["Elbow", [3, 6]],
        ["Wrist", [4, 7]],
        ["Hip", [8, 11]],
        ["Knee", [9, 12]],
        ["Ankle", [10, 13]],
    ]
    scored_joints = np.concatenate(
        [x[1] for x in joint_groups])  # Those joints that take part in scoring

    pck_by_sequence = {}
    auc_by_sequence = {}
    for seq in range(1, 7):
        gt = test_ground_truth(seq)

        gt3d = gt[pose3d_type][gt["valid_frame"]]
        pred3d = preds_3d_kpt[seq][gt["valid_frame"]]  # (nFrames, nJoints, 3)

        if is_relative:
            hip_ind = MuPoTSJoints().index_of("hip")
            gt3d -= gt3d[:, [hip_ind]]
            pred3d -= pred3d[:, [hip_ind]]

        jointwise_err = np.linalg.norm(gt3d - pred3d,
                                       axis=-1)  # (nFrames, nJoints)

        pck_by_sequence[seq] = (
            np.mean(jointwise_err[:, scored_joints] < PCK_THRESHOLD) * 100)
        auc_by_sequence[seq] = (np.mean([
            np.mean(jointwise_err[:, scored_joints] < t)
            for t in AUC_THRESHOLDS
        ]) * 100)

    return pck_by_sequence, auc_by_sequence
Пример #8
0
def show_result(image_path, poses):
    assert_shape(poses, (None, MuPoTSJoints.NUM_JOINTS, 3))

    # import here so it's not needed for prediction
    import matplotlib.pyplot as plt
    from util import viz

    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

    plt.figure(figsize=(9, 4.5))
    plt.subplot(1, 2, 1)
    plt.imshow(img)
    ax = viz.subplot(1, 2, 2)
    viz.show3Dpose(poses, MuPoTSJoints(), ax, invert_vertical=True)
    plt.show()
Пример #9
0
    def __init__(self,
                 frame_folder,
                 hrnet_keypoint_file,
                 fx,
                 fy,
                 cx=None,
                 cy=None):
        self.transform = None

        self.pose2d_jointset = CocoExJoints()
        self.pose3d_jointset = MuPoTSJoints()

        frame_list = sorted(os.listdir(frame_folder))
        N = len(frame_list)

        hrnet_detections = load(hrnet_keypoint_file)
        self.poses2d, self.valid_2d_pred = stack_hrnet_raw(
            frame_list, hrnet_detections)
        assert len(self.poses2d) == N, "unexpected number of frames"

        index = [('vid', i) for i in range(N)]
        self.index = np.rec.array(index,
                                  dtype=[('seq', 'U4'), ('frame', 'int32')])

        self.poses3d = np.ones(
            (N, self.pose3d_jointset.NUM_JOINTS, 3))  # dummy values

        # load first frame to get width/height
        frame = cv2.imread(os.path.join(frame_folder, frame_list[0]))
        self.width = frame.shape[1]

        self.fx = np.full(N, fx, dtype='float32')
        self.fy = np.full(N, fy, dtype='float32')
        self.cx = np.full(N,
                          cx if cx is not None else frame.shape[1] / 2,
                          dtype='float32')
        self.cy = np.full(N,
                          cy if cy is not None else frame.shape[0] / 2,
                          dtype='float32')

        assert self.poses2d.shape[1] == self.pose2d_jointset.NUM_JOINTS
Пример #10
0
def eval_poses(matched_only,
               is_relative,
               pose3d_type,
               preds_2d_kpt,
               preds_3d_kpt,
               keep_matching=False):
    """
    Calculates the PCK and AUC. This function is equivalent to ``mpii_mupots_multiperson_eval.m``.
    It performs the same gt scaling transformation, uses the same joints for matching and evaluation.

    :param matched_only: True if only detected poses count towards the PCK and AUC
    :param is_relative: True if relative error is calculated
    :param pose3d_type: 'annot3' or 'univ_annot3'
    :param preds_2d_kpt: seq->list(ndarray(nPoses,17,2)), in MuPo-TS joint order. 2D pose predictions.
    :param preds_3d_kpt: seq->list(ndarray(nPoses,17,2)), in MuPo-TS joint order. 3D pose predictions.
    :param keep_matching: if True, the preds_2d_kpt arrays are assumed to be already matched with gt.
                          Otherwise, the matching algorithm in mpii_map_to_gt_bone_lengths is used.
    :return: two dicts from seq name to pck and auc
    """

    # Joints used in original evaluation script
    joints_for_matching = np.arange(1,
                                    14)  # Joints used to match up the 2D poses
    joint_groups = [['Head', [0]], ['Neck', [1]], ['Shou', [2, 5]],
                    ['Elbow', [3, 6]], ['Wrist', [4, 7]], ['Hip', [8, 11]],
                    ['Knee', [9, 12]], ['Ankle', [10, 13]]]
    scored_joints = np.concatenate(
        [x[1] for x in joint_groups])  # Those joints that take part in scoring

    my_matching_inds = []
    all_perjoint_errors = {}
    pck_by_sequence = {}
    auc_by_sequence = {}
    for seq in range(1, 21):
        gt = load_gt_annotations(seq)
        num_frames = gt['annot2'].shape[0]

        gt_poses = []
        pred_poses = []
        valid_pred = []
        for i in range(num_frames):
            gt_pose_2d = gt['annot2'][i][gt['isValidFrame'][i]]
            gt_pose_3d = gt[pose3d_type][i][gt['isValidFrame'][i]]
            # gt_visibility = ~gt['occlusions'][i][gt['isValidFrame'][i]]
            gt_visibility = np.ones(gt_pose_2d.shape[:2], dtype='bool')

            pred_pose_2d = preds_2d_kpt[seq][i]
            pred_pose_3d = preds_3d_kpt[seq][i]

            pred_visibility = np.ones(pred_pose_2d.shape[:2], dtype='bool')

            # matching between 2D points
            if keep_matching:
                pair_inds = np.arange(
                    gt['annot2'].shape[1])[gt['isValidFrame'][i]]
            else:
                pair_inds = _match_poses(
                    gt_pose_2d[:, joints_for_matching],
                    gt_visibility[:, joints_for_matching],
                    pred_pose_2d[:, joints_for_matching],
                    pred_visibility[:, joints_for_matching], 40)

            my_matching_inds.append(pair_inds)
            has_pair = pair_inds >= 0

            # Reorder predicted poses to match Gt poses. If a GT pose does not have a pair, it is filled with 1e5
            reordered_pose_3d = 100000 * np.ones_like(
                gt_pose_3d)  # (nGtPoses, nJoints, 3)
            reordered_pose_3d[has_pair] = pred_pose_3d[
                pair_inds[has_pair]]  # (nGtPoses, nJoints, 3)

            gt_poses.append(gt_pose_3d)
            pred_poses.append(reordered_pose_3d)
            valid_pred.append(has_pair)

        gt_poses = np.concatenate(gt_poses)
        pred_poses = np.concatenate(pred_poses)
        valid_pred = np.concatenate(valid_pred)

        if is_relative:
            hip_ind = MuPoTSJoints().index_of('hip')
            gt_poses -= gt_poses[:, [hip_ind]]
            pred_poses -= pred_poses[:, [hip_ind]]

        # calculating per joint errors
        pred_poses = _scale_to_gt(pred_poses, gt_poses)
        pred_poses[~valid_pred] = 100000
        errors = np.linalg.norm(gt_poses - pred_poses,
                                axis=2)  # (nGtPoses, nJoints)
        if matched_only:
            errors = errors[valid_pred]

        pck_by_sequence[seq] = np.mean(errors[:, scored_joints] < 150) * 100
        auc_by_sequence[seq] = np.mean(
            [np.mean(errors[:, scored_joints] < t)
             for t in AUC_THRESHOLDS]) * 100
        all_perjoint_errors[seq] = errors

    return pck_by_sequence, auc_by_sequence
Пример #11
0
 def post_process_func(x):
     return combine_pose_and_trans(x, std3d, mean3d, MuPoTSJoints(), 'hip')
Пример #12
0
def optimize_poses(pred3d, data, _config, **kwargs):
    """
    Runs the optimisation process on the dataset defined by resulsts.
    Parameters:
        pred3d: poses predicted by VideoPose, aligned with dataset
        dataset: dataset describing
        _config: dictionary of additional parameters
    """
    _config = dict(_config)
    _config.update(kwargs)

    joint_set = MuPoTSJoints()

    seqs = np.unique(data.index.seq)
    if isinstance(pred3d, torch.Tensor):
        smoothed_pred = np.zeros(pred3d.shape)
    else:
        smoothed_pred = np.zeros_like(pred3d) # (20899, 17, 3)

    losses = []

    for seq in seqs:
        inds = data.index.seq == seq # (20899,)

        poses_init = abs_to_hiprel(pred3d[inds].copy(), joint_set).astype('float32') / 1000 # (201, 17, 3)

        # interpolate invisible poses, if required
        visible_poses = data.good_poses[inds] # (201,)
        poses_pred = poses_init.copy() # (201, 17, 3)

        kp_score = np.mean(data.poses2d[inds, :, 2], axis=-1) # (201,)
        if _config['smooth_visibility']:
            kp_score = ndimage.median_filter(kp_score, 9)
        kp_score = torch.from_numpy(kp_score).cuda() # [201]
        poses_init = torch.from_numpy(poses_init).cuda() # [201, 17, 3]
        poses_pred = torch.from_numpy(poses_pred).cuda() # [201, 17, 3]
        scale = torch.ones((len(kp_score), 1, 1)) # torch.Size([201, 1, 1])

        poses_init.requires_grad = False
        poses_pred.requires_grad = True # TODO set to False
        kp_score.requires_grad = False
        scale.requires_grad = False

        optimizer = get_optimizer([poses_pred], _config)

        for i in range(_config['num_iter']):
            # smoothing formulation
            if _config['pose_loss'] == 'gm':
                pose_loss = torch.sum(kp_score.view(-1, 1, 1) * gmloss(poses_pred - poses_init, _config['gm_alpha']))
            elif _config['pose_loss'] == 'capped_l2':
                pose_loss = torch.sum(kp_score.view(-1, 1, 1) * capped_l2(poses_pred - poses_init,
                                                                          torch.tensor(_config['l2_cap']).float().cuda()))
            elif _config['pose_loss'] == 'capped_l2_euc_err':
                pose_loss = torch.sum(kp_score.view(-1, 1) * capped_l2_euc_err(poses_pred, poses_init,
                                                                               torch.tensor(_config['l2_cap']).float().cuda()))
            else:
                raise NotImplementedError('Unknown pose_loss' + _config['pose_loss'])

            velocity_loss_hip = torch.sum(globals()[_config['smoothness_loss_hip']](poses_pred[:, [0], :], 1))

            step = _config['smoothness_loss_hip_largestep']
            vel_loss = globals()[_config['smoothness_loss_hip']](poses_pred[:, [0], :], step)
            velocity_loss_hip_large = torch.sum((1 - kp_score[-len(vel_loss):]) * vel_loss)

            velocity_loss_rel = torch.sum(globals()[_config['smoothness_loss_rel']](poses_pred[:, 1:, :], 1))
            vel_loss = globals()[_config['smoothness_loss_rel']](poses_pred[:, 1:, :], step)
            velocity_loss_rel_large = torch.sum((1 - kp_score[-len(vel_loss):]) * vel_loss)

            total_loss = pose_loss + _config['smoothness_weight_hip'] * velocity_loss_hip \
                         + _config['smoothness_weight_hip_large'] * velocity_loss_hip_large \
                         + _config['smoothness_weight_rel'] * velocity_loss_rel \
                         + _config['smoothness_weight_rel_large'] * velocity_loss_rel_large

            # np.savez("pose_ref.npz",
            #     total_loss=total_loss.detach().cpu(),
            #     pose_loss=pose_loss.detach().cpu(),
            #     velocity_loss_hip=velocity_loss_hip.detach().cpu(),
            #     velocity_loss_hip_large=velocity_loss_hip_large.detach().cpu(),
            #     velocity_loss_rel=velocity_loss_rel.detach().cpu(),
            #     velocity_loss_rel_large=velocity_loss_rel_large.detach().cpu(),
            # )
            # exit()

            optimizer.zero_grad()
            total_loss.backward()

            optimizer.step()

        poses_init = poses_init.detach().cpu().numpy() * 1000
        poses_pred = poses_pred.detach().cpu().numpy() * 1000

        poses_init = add_back_hip(poses_init, joint_set)
        poses_pred = add_back_hip(poses_pred, joint_set)
        smoothed_pred[inds] = poses_pred

        losses.append(total_loss.item())

    if _config.get('print_loss', False):
        print('Avg loss:', np.mean(losses))
    return smoothed_pred
Пример #13
0
    def __init__(self, pose2d_type, pose3d_scaling):
        """
        Loads MuPoTS dataset but only those images where at least one person was detected. Each person on a frame
        is loaded separately.
        """
        assert pose3d_scaling in ['univ', 'normal']

        self.pose2d_jointset = FilteredSinglePersonMuPoTsDataset.get_jointset(
            pose2d_type)
        self.pose3d_jointset = MuPoTSJoints()

        poses2d = []
        poses3d = []
        pred_cdepths = []
        index = []
        for seq in range(1, 21):
            depth_width = 512
            depth_height = 512 if seq <= 5 else 288

            gt = mupots_3d.load_gt_annotations(seq)
            op = mupots_3d.load_2d_predictions(seq, pose2d_type)

            pose2d = op['pose']
            pose3d = gt['annot3' if pose3d_scaling ==
                        'normal' else 'univ_annot3']

            depth = mupots_3d.load_jointwise_depth(seq)

            good_poses = gt['isValidFrame'].squeeze()
            good_poses = np.logical_and(good_poses, op['valid_pose'])

            orig_frame = np.tile(
                np.arange(len(good_poses)).reshape((-1, 1)),
                (1, good_poses.shape[1]))
            orig_pose = np.tile(
                np.arange(good_poses.shape[1]).reshape((1, -1)),
                (good_poses.shape[0], 1))

            assert pose2d.shape[:2] == good_poses.shape  # (nFrames, nPeople)
            assert pose3d.shape[:2] == good_poses.shape
            assert depth.shape[:2] == good_poses.shape
            assert orig_frame.shape == good_poses.shape
            assert orig_pose.shape == good_poses.shape
            assert pose2d.shape[2:] == (self.pose2d_jointset.NUM_JOINTS, 3)
            assert pose3d.shape[2:] == (17, 3)
            assert good_poses.ndim == 2

            # Keep only those poses where good_poses is True
            pose2d = pose2d[good_poses]
            pose3d = pose3d[good_poses]
            orig_frame = orig_frame[good_poses]
            orig_pose = orig_pose[good_poses]
            depth = depth[good_poses]

            index.extend([(seq, orig_frame[i], orig_pose[i], depth_width,
                           depth_height) for i in range(len(orig_frame))])

            assert len(pose2d) == len(pose3d)

            poses2d.append(pose2d)
            poses3d.append(pose3d)
            pred_cdepths.append(depth)

        self.poses2d = np.concatenate(poses2d).astype('float32')
        self.poses3d = np.concatenate(poses3d).astype('float32')
        self.pred_cdepths = np.concatenate(pred_cdepths).astype('float32')
        self.index = np.rec.array(index,
                                  dtype=[('seq', 'int32'), ('frame', 'int32'),
                                         ('pose', 'int32'),
                                         ('depth_width', 'int32'),
                                         ('depth_height', 'int32')])

        # Load calibration matrices
        N = len(self.poses2d)
        self.fx = np.zeros(N, dtype='float32')
        self.fy = np.zeros(N, dtype='float32')
        self.cx = np.zeros(N, dtype='float32')
        self.cy = np.zeros(N, dtype='float32')

        mupots_calibs = mupots_3d.get_calibration_matrices()
        for seq in range(1, 21):
            inds = (self.index.seq == seq)
            self.fx[inds] = mupots_calibs[seq][0, 0]
            self.fy[inds] = mupots_calibs[seq][1, 1]
            self.cx[inds] = mupots_calibs[seq][0, 2]
            self.cy[inds] = mupots_calibs[seq][1, 2]

        assert np.all(self.fx > 0), "Some fields were not filled"
        assert np.all(self.fy > 0), "Some fields were not filled"
        assert np.all(np.abs(self.cx) > 0), "Some fields were not filled"
        assert np.all(np.abs(self.cy) > 0), "Some fields were not filled"
        self.transform = None
Пример #14
0
def run(**kwargs):
    refine_config = load("scripts/nn_refine_config.json")
    for k, v in kwargs.items():
        refine_config[k] = v
    exp = Experiment(
        workspace="pose-refinement",
        project_name="08-nn-ref-bone-length",
        display_summary_level=0,
    )
    exp.log_parameters(refine_config)

    model_name = refine_config["model_name"]
    config, model = load_model(model_name)
    test_set = get_dataset(config)
    post_process_func = extract_post(model_name, test_set, config)

    joint_set = MuPoTSJoints()
    connected_joints = joint_set.LIMBGRAPH

    pad = (model.receptive_field() - 1) // 2
    generator = UnchunkedGeneratorWithGT(test_set, pad, True)
    seqs = sorted(np.unique(test_set.index.seq))

    optimized_preds_list = defaultdict(list)
    max_batch = len(generator)
    exp.log_parameter("max_batch", max_batch)
    for curr_batch, (pose2d, valid, pose3d) in enumerate(generator):
        exp.log_parameter("curr_batch", curr_batch)
        exp.log_parameter("curr_batch%", curr_batch / max_batch)
        if refine_config["full_batch"]:
            max_item = 1
        else:
            max_item = valid.shape[-1]
        for curr_item in range(max_item):
            if not refine_config["full_batch"]:
                exp.log_parameter("curr_item", curr_item)
                exp.log_parameter("curr_item%", curr_item / max_item)
                if (curr_item + 1) > (
                    max_item - refine_config["smoothness_loss_hip_largestep"]
                ):
                    reverse = True
                    f = curr_item - refine_config["smoothness_loss_hip_largestep"]
                    t = curr_item + 1
                else:
                    reverse = False
                    f = curr_item
                    t = f + refine_config["smoothness_loss_hip_largestep"] + 1
            model_ = copy.deepcopy(model)
            optimizer = get_optimizer(model_.parameters(), refine_config)
            max_iter = refine_config["num_iter"]
            for curr_iter in range(max_iter):
                exp.log_parameter("curr_iter", curr_iter)
                exp.log_parameter("curr_iter%", curr_iter / max_iter)
                optimizer.zero_grad()

                seq = seqs[curr_batch]
                if refine_config["full_batch"]:
                    nn_input = pose2d
                    valid_ = valid[0]
                else:
                    nn_input = pose2d[:, f : t + 2 * pad, :]
                    valid_ = valid[0][f:t]
                pred3d = model_(
                    torch.from_numpy(nn_input).cuda()
                )  # [2, 401, 42] -> [2, 21+2*13, 42], pred3d: [21, 16, 3]

                pred_real_pose = post_process_func(
                    pred3d[0], seq
                )  # unnormalized output

                pred_real_pose_aug = post_process_func(pred3d[1], seq)
                pred_real_pose_aug[:, :, 0] *= -1
                pred_real_pose_aug = test_set.pose3d_jointset.flip(pred_real_pose_aug)
                pred_real_pose = (pred_real_pose + pred_real_pose_aug) / 2

                pred = pred_real_pose[valid_]
                gt_pose = post_process_func(pose3d[0], seq)

                inds = test_set.index.seq == seq

                poses_pred = abs_to_hiprel(pred, joint_set) / 1000  # (201, 17, 3)
                if refine_config["reinit"] or (curr_iter == 0):
                    poses_init = poses_pred.detach().clone()
                    poses_init.requires_grad = False
                    if not refine_config["full_batch"]:
                        kp_score = np.mean(test_set.poses2d[inds, :, 2], axis=-1)[
                            f:t
                        ]  # (201,)
                    else:
                        kp_score = np.mean(
                            test_set.poses2d[inds, :, 2], axis=-1
                        )  # (201,)
                    #     if refine_config['smooth_visibility']:
                    #         kp_score = ndimage.median_filter(kp_score, 9)
                    kp_score = torch.from_numpy(kp_score).cuda()  # [201]
                    scale = torch.ones((len(kp_score), 1, 1))  # torch.Size([201, 1, 1])

                    kp_score.requires_grad = False
                    scale.requires_grad = False

                # smoothing formulation

                if refine_config["pose_loss"] == "gm":
                    pose_loss = kp_score.view(-1, 1, 1) * gmloss(
                        poses_pred - poses_init, refine_config["gm_alpha"]
                    )
                elif refine_config["pose_loss"] == "capped_l2":
                    pose_loss = kp_score.view(-1, 1, 1) * capped_l2(
                        poses_pred - poses_init,
                        torch.tensor(refine_config["l2_cap"]).float().cuda(),
                    )
                elif refine_config["pose_loss"] == "capped_l2_euc_err":
                    pose_loss = kp_score.view(-1, 1) * capped_l2_euc_err(
                        poses_pred,
                        poses_init,
                        torch.tensor(refine_config["l2_cap"]).float().cuda(),
                    )
                else:
                    raise NotImplementedError(
                        "Unknown pose_loss" + refine_config["pose_loss"]
                    )

                velocity_loss_hip = globals()[refine_config["smoothness_loss_hip"]](
                    poses_pred[:, [0], :], 1
                )

                step = refine_config["smoothness_loss_hip_largestep"]
                vel_loss = globals()[refine_config["smoothness_loss_hip"]](
                    poses_pred[:, [0], :], step
                )
                velocity_loss_hip_large = (1 - kp_score[-len(vel_loss) :]) * vel_loss

                velocity_loss_rel = globals()[refine_config["smoothness_loss_rel"]](
                    poses_pred[:, 1:, :], 1
                )
                vel_loss = globals()[refine_config["smoothness_loss_rel"]](
                    poses_pred[:, 1:, :], step
                )
                velocity_loss_rel_large = (1 - kp_score[-len(vel_loss) :]) * vel_loss

                prefix = f"{curr_batch}_{curr_item}"
                if refine_config["full_batch"]:
                    total_loss = (
                        torch.sum(pose_loss)
                        + refine_config["smoothness_weight_hip"]
                        * torch.sum(velocity_loss_hip)
                        + refine_config["smoothness_weight_hip_large"]
                        * torch.sum(velocity_loss_hip_large)
                        + refine_config["smoothness_weight_rel"]
                        * torch.sum(velocity_loss_rel)
                        + refine_config["smoothness_weight_rel_large"]
                        * torch.sum(velocity_loss_rel_large)
                    )
                    m = {
                        f"{prefix}_total_loss": total_loss,
                        f"{prefix}_pose_loss": torch.sum(pose_loss),
                        f"{prefix}_velocity_loss_hip": torch.sum(velocity_loss_hip),
                        f"{prefix}_velocity_loss_hip_large": torch.sum(
                            velocity_loss_hip_large
                        ),
                        f"{prefix}_velocity_loss_rel": torch.sum(velocity_loss_rel),
                        f"{prefix}_velocity_loss_rel_large": torch.sum(
                            velocity_loss_rel_large
                        ),
                    }
                else:
                    neighbour_dist_idx = 0 if not reverse else -1
                    total_loss = (
                        torch.sum(pose_loss[neighbour_dist_idx,])
                        + refine_config["smoothness_weight_hip"]
                        * velocity_loss_hip[[neighbour_dist_idx]]
                        + refine_config["smoothness_weight_hip_large"]
                        * velocity_loss_hip_large
                        + refine_config["smoothness_weight_rel"]
                        * velocity_loss_rel[[neighbour_dist_idx]]
                        + refine_config["smoothness_weight_rel_large"]
                        * velocity_loss_rel_large
                    )
                    m = {
                        f"{prefix}_total_loss": total_loss[0],
                        f"{prefix}_pose_loss": torch.sum(
                            pose_loss[neighbour_dist_idx,]
                        ),
                        f"{prefix}_velocity_loss_hip": velocity_loss_hip[
                            neighbour_dist_idx
                        ],
                        f"{prefix}_velocity_loss_hip_large": velocity_loss_hip_large[0],
                        f"{prefix}_velocity_loss_rel": velocity_loss_rel[
                            neighbour_dist_idx
                        ],
                        f"{prefix}_velocity_loss_rel_large": velocity_loss_rel_large[0],
                    }

                if refine_config["bone_weight"] != 0:
                    assert refine_config["full_batch"]

                    err = get_bone_lengths(pred, connected_joints)
                    bone_err = (
                        torch.mean(torch.std(err, dim=0)) * refine_config["bone_weight"]
                    )  # [cs]
                    total_loss += bone_err
                    m["bone_err"] = bone_err

                    gt_bones = get_bone_lengths(
                        torch.from_numpy(gt_pose), connected_joints
                    )
                    gt_bones = torch.mean(gt_bones, dim=0)
                    length_err = torch.nn.functional.mse_loss(err, gt_bones.cuda()) * refine_config["bone_length_weight"]
                    total_loss += length_err
                    m["bone_length_err"] = length_err

                total_loss.backward()
                optimizer.step()
                # print(m)

                # m = {k: v.detach().cpu().numpy() for k, v in m.items()}
                # exp.log_metrics(m, step=curr_iter)

            os.makedirs("nn_refs", exist_ok=True)
            np.save(f"nn_refs/{seq.replace('/', '_')}.npy", pred.cpu().detach().numpy())
            if refine_config["full_batch"]:
                optimized_preds_list[seq].append(
                    add_back_hip(poses_pred.detach().cpu().numpy() * 1000, joint_set)
                )
            else:
                optimized_preds_list[seq].append(
                    add_back_hip(
                        poses_pred[[neighbour_dist_idx]].detach().cpu().numpy() * 1000,
                        joint_set,
                    )
                )

    pred = {k: np.concatenate(v) for k, v in optimized_preds_list.items()}
    pred = TemporalMupotsEvaluator._group_by_seq(pred)
    pred = np.concatenate([pred[i] for i in range(1, 21)])

    l = StackedArrayAllMupotsEvaluator(pred, test_set, True, prefix="R")
    l.eval(calculate_scale_free=True, verbose=True)
    exp.log_metrics(l.losses_to_log)

    pred_by_seq = {}
    for seq in range(1, 21):
        inds = test_set.index.seq_num == seq
        pred_by_seq[seq] = pred[inds]
    pred_2d, pred_3d = unstack_mupots_poses(test_set, pred_by_seq)

    print("\nR-PCK  R-AUC  A-PCK  A-AUC")
    keys = ["R-PCK", "R-AUC", "A-PCK", "A-AUC"]
    values = []
    for relative in [True, False]:
        pcks, aucs = mupots_3d.eval_poses(
            False,
            relative,
            "annot3" if config["pose3d_scaling"] == "normal" else "univ_annot3",
            pred_2d,
            pred_3d,
            keep_matching=True,
        )
        pck = np.mean(list(pcks.values()))
        auc = np.mean(list(aucs.values()))
        values.append(pck)
        values.append(auc)

        print(" %4.1f   %4.1f  " % (pck, auc), end="")
    print()
    exp.log_metrics({curr_iter: v for curr_iter, v in zip(keys, values)})
Пример #15
0
    def __init__(self, pose2d_type, pose3d_scaling, v='v1'):
        assert pose2d_type == 'hrnet', "only hrnet is implemented"
        assert pose3d_scaling in ['univ', 'normal']

        self.transform = None

        self.pose2d_jointset = PersonStackedMuPoTsDataset.get_jointset(
            pose2d_type)
        self.pose3d_jointset = MuPoTSJoints()

        pose3d_key = 'annot3' if pose3d_scaling == 'normal' else 'univ_annot3'

        poses2d = []
        poses3d = []
        valid_2d_pred = []  # True if HR-net found a pose
        fx = []
        fy = []
        cx = []
        cy = []
        index = []

        calibs = mpii_3dhp.get_calibration_matrices()
        meta_data = muco_temp.get_metadata(v=v)

        for cam in range(11):
            gt = muco_temp.load_gt(cam, v=v)

            for vid in range(7):
                orig_shape = gt[vid][
                    pose3d_key].shape  # (nFrames, nPoses, nJoints, 3)
                poses3d.append(_column_stack(gt[vid][pose3d_key]))

                kp = muco_temp.load_hrnet(cam, vid, v=v)
                poses2d.append(_column_stack(kp['poses']))
                valid_2d_pred.append(_column_stack(kp['is_valid']))

                assert len(poses3d[-1]) == len(poses2d[-1]), \
                    "Gt and predicted frames are not aligned, cam:" + str(cam)

                orig_frame = np.tile(
                    np.arange(orig_shape[0]).reshape(-1, 1),
                    (1, orig_shape[1]))
                orig_pose = np.tile(
                    np.arange(orig_shape[1]).reshape(1, -1),
                    (orig_shape[0], 1))
                orig_frame = _column_stack(orig_frame)  # (nFrames*nPoses,)
                orig_pose = _column_stack(orig_pose)

                index.extend([('%d/%d/%d' % (cam, vid, orig_pose[i]), cam, vid,
                               orig_frame[i], orig_pose[i])
                              for i in range(len(orig_frame))])

                for pose_ind in range(orig_shape[1]):
                    sub, seq, _ = meta_data[cam][vid][pose_ind]
                    calibration_mx = calibs[(sub, seq, cam)]
                    fx.extend([calibration_mx[0, 0]] * orig_shape[0])
                    fy.extend([calibration_mx[1, 1]] * orig_shape[0])
                    cx.extend([calibration_mx[0, 2]] * orig_shape[0])
                    cy.extend([calibration_mx[1, 2]] * orig_shape[0])

        self.poses2d = np.concatenate(poses2d)
        self.poses3d = np.concatenate(poses3d)
        self.valid_2d_pred = np.concatenate(valid_2d_pred)
        self.index = np.rec.array(index,
                                  dtype=[('seq', 'U12'), ('cam', 'int32'),
                                         ('vid', 'int32'), ('frame', 'int32'),
                                         ('pose', 'int32')])

        self.fx = np.array(fx, dtype='float32')
        self.fy = np.array(fy, dtype='float32')
        self.cx = np.array(cx, dtype='float32')
        self.cy = np.array(cy, dtype='float32')

        assert len(self.poses2d) == len(self.index), len(self.index)

        assert len(self.poses2d) == len(self.poses3d)
        assert len(self.poses2d) == len(self.index), len(self.index)
        assert len(self.poses2d) == len(self.valid_2d_pred), len(
            self.valid_2d_pred)
        assert len(self.poses2d) == len(self.fx), len(self.fx)
        assert len(self.poses2d) == len(self.fy), len(self.fy)
        assert len(self.poses2d) == len(self.cx), len(self.cx)
        assert len(self.poses2d) == len(self.cy), len(self.cy)
Пример #16
0
    def __init__(self, pose2d_type, pose3d_scaling, cap_at_25fps, stride=1):
        assert pose2d_type == 'hrnet', "Only hrnet 2d is implemented"
        assert pose3d_scaling in ['normal', 'univ'], \
            "Unexpected pose3d scaling type: " + str(pose3d_scaling)
        self.transform = None

        pose3d_key = 'annot3' if pose3d_scaling == 'normal' else 'univ_annot3'

        poses2d = []
        poses3d = []
        valid_2d_pred = []  # True if HR-net found a pose
        fx = []
        fy = []
        cx = []
        cy = []
        index = []
        sequences = []

        calibs = mpii_3dhp.get_calibration_matrices()
        for sub in range(1, 9):  # S1, ..., S8
            for seq in range(1, 3):  # 2 sequence per S
                gt = mpii_3dhp.train_ground_truth(sub, seq)
                for cam in range(11):
                    # In S3/Seq2 cam2 there are some frame between 9400-9900 where the pose is
                    # behind the camera/nearly in the camera plane. This breaks training.
                    # For simplicity, ignore the whole set but ignoring frames 9400-9900
                    # would also work
                    if seq == 2 and sub == 3 and cam == 2:
                        continue

                    # Find indices that are selected for the dataset
                    inds = np.arange(len(gt[pose3d_key][cam]))
                    if cap_at_25fps and mpii_3dhp.get_train_fps(sub,
                                                                seq) == 50:
                        inds = inds[::2]
                    inds = inds[::stride]
                    num_frames = len(inds)

                    poses3d.append(gt[pose3d_key][cam][inds])

                    tmp = mpii_3dhp.train_poses_hrnet(sub, seq, cam)
                    poses2d.append(tmp['poses'][inds])
                    valid_2d_pred.append(tmp['is_valid'][inds])

                    assert len(poses3d[-1]) == len(
                        poses2d[-1]
                    ), "Gt and predicted frames are not aligned, seq:" + str(
                        seq)

                    seq_name = 'S%d/Seq%d/%d' % (sub, seq, cam)
                    sequences.append(seq_name)
                    index.extend([(seq_name, sub, seq, cam, i) for i in inds])

                    calibration_mx = calibs[(sub, seq, cam)]
                    fx.extend([calibration_mx[0, 0]] * num_frames)
                    fy.extend([calibration_mx[1, 1]] * num_frames)
                    cx.extend([calibration_mx[0, 2]] * num_frames)
                    cy.extend([calibration_mx[1, 2]] * num_frames)

        self.pose2d_jointset = CocoExJoints()
        self.pose3d_jointset = MuPoTSJoints()

        self.poses2d = np.concatenate(poses2d)
        self.poses3d = np.concatenate(poses3d)
        self.valid_2d_pred = np.concatenate(valid_2d_pred)
        self.index = np.rec.array(index,
                                  dtype=[('seq', 'U12'), ('sub', 'int32'),
                                         ('subseq', 'int32'), ('cam', 'int32'),
                                         ('frame', 'int32')])

        self.fx = np.array(fx, dtype='float32')
        self.fy = np.array(fy, dtype='float32')
        self.cx = np.array(cx, dtype='float32')
        self.cy = np.array(cy, dtype='float32')

        self.sequences = sorted(sequences)

        assert len(self.poses2d) == len(self.index), len(self.index)

        assert len(self.poses2d) == len(self.poses3d)
        assert len(self.poses2d) == len(self.index), len(self.index)
        assert len(self.poses2d) == len(self.valid_2d_pred), len(
            self.valid_2d_pred)
        assert len(self.poses2d) == len(self.fx), len(self.fx)
        assert len(self.poses2d) == len(self.fy), len(self.fy)
        assert len(self.poses2d) == len(self.cx), len(self.cx)
        assert len(self.poses2d) == len(self.cy), len(self.cy)
Пример #17
0
    def __init__(self, pose2d_type, pose3d_scaling, eval_frames_only=False):
        assert pose2d_type == 'hrnet', "Only hrnet 2d is implemented"
        assert pose3d_scaling in ['normal', 'univ'], \
            "Unexpected pose3d scaling type: " + str(pose3d_scaling)
        self.transform = None
        self.eval_frames_only = eval_frames_only

        pose3d_key = 'annot3' if pose3d_scaling == 'normal' else 'univ_annot3'

        poses2d = []
        poses3d = []
        valid_2d_pred = []  # True if HR-net found a pose
        valid_frame = []  # True if MPI-INF-3DHP marked the frame as valid
        fx = []
        fy = []
        cx = []
        cy = []
        width = []
        index = []

        for seq in range(1, 7):
            gt = h5py.File(
                os.path.join(mpii_3dhp.MPII_3DHP_PATH, 'mpi_inf_3dhp_test_set',
                             'TS%d' % seq, 'annot_data.mat'), 'r')
            poses3d.append(gt[pose3d_key][:, 0])
            valid_frame.append(gt['valid_frame'][()] == 1)
            num_frames = len(
                poses3d[-1]
            )  # The annotations are shorter than the number of images

            tmp = mpii_3dhp.test_poses_hrnet(seq)
            poses2d.append(tmp['poses'])
            valid_2d_pred.append(tmp['is_valid'])

            assert len(poses3d[-1]) == len(
                poses2d[-1]
            ), "Gt and predicted frames are not aligned, seq:" + str(seq)

            index.extend([(seq, i) for i in range(num_frames)])

            calibration_mx = mpii_3dhp.get_test_calib(seq)
            fx.extend([calibration_mx[0, 0]] * num_frames)
            fy.extend([calibration_mx[1, 1]] * num_frames)
            cx.extend([calibration_mx[0, 2]] * num_frames)
            cy.extend([calibration_mx[1, 2]] * num_frames)
            width.extend([2048 if seq < 5 else 1920] * num_frames)

        self.pose2d_jointset = CocoExJoints()
        self.pose3d_jointset = MuPoTSJoints()

        self.poses2d = np.concatenate(poses2d)
        self.poses3d = np.concatenate(poses3d)
        self.valid_2d_pred = np.concatenate(valid_2d_pred)
        valid_frame = np.concatenate(valid_frame)
        assert valid_frame.shape[1] == 1, valid_frame.shape
        valid_frame = valid_frame[:, 0]
        self.index = np.rec.array(index,
                                  dtype=[('seq', 'int32'), ('frame', 'int32')])

        self.fx = np.array(fx, dtype='float32')
        self.fy = np.array(fy, dtype='float32')
        self.cx = np.array(cx, dtype='float32')
        self.cy = np.array(cy, dtype='float32')
        self.width = np.array(width, dtype='int32')

        assert len(self.poses2d) == len(self.index), len(self.index)

        # keep only those frame where a pose was detected
        good_poses = self.valid_2d_pred.copy()
        if eval_frames_only:
            good_poses = good_poses & valid_frame

        self.good_poses = good_poses

        assert len(self.poses2d) == len(self.poses3d)
        assert len(self.poses2d) == len(self.index), len(self.index)
        assert len(self.poses2d) == len(self.valid_2d_pred), len(
            self.valid_2d_pred)
        assert len(self.poses2d) == len(self.fx), len(self.fx)
        assert len(self.poses2d) == len(self.fy), len(self.fy)
        assert len(self.poses2d) == len(self.cx), len(self.cx)
        assert len(self.poses2d) == len(self.cy), len(self.cy)
        assert len(self.poses2d) == len(self.width), len(self.width)
        assert len(self.poses2d) == len(self.good_poses), len(self.good_poses)
Пример #18
0
    def __init__(self,
                 pose2d_type,
                 pose3d_scaling,
                 pose_validity='detected_only',
                 hip_threshold=-1):
        """
        Loads MuPoTS dataset but only those images where at least one person was detected. Each person on a frame
        is loaded separately.

        :param pose_validity: one of 'all', 'detected_only', 'valid_only'; specifies which poses are marked valid
                              all - all of them; valid_only - those that are valid according to the GT annotations
                              detected_only - those that were successfuly detected by the 2D algon and also valid
        :param hip_threshold: only those poses are loaded, where the score of the hip is larger than this value
        :param filter_incorrect_match: MuPoTS's pose matching script has some erroneous matching. If filter_incorrect_match is True,
                                these are not loaded.
        """
        assert pose_validity in ['all', 'detected_only', 'valid_only']
        assert pose3d_scaling in ['univ', 'normal']

        self.pose2d_jointset = PersonStackedMuPoTsDataset.get_jointset(
            pose2d_type)
        self.pose3d_jointset = MuPoTSJoints()
        self.pose3d_scaling = pose3d_scaling
        pred2d_root_ind = self.pose2d_jointset.index_of('hip')

        poses2d = []
        poses3d = []
        joint3d_visible = []
        all_good_poses = []
        valid_annotations = []
        width = []
        index = []
        for seq in range(1, 21):
            img_width, img_height = mupots_3d.image_size(seq)

            gt = mupots_3d.load_gt_annotations(seq)
            pred2d = mupots_3d.load_2d_predictions(seq, pose2d_type)

            pose2d = pred2d['pose']
            pose3d = gt['annot3' if pose3d_scaling ==
                        'normal' else 'univ_annot3']
            visibility = ~gt['occlusions']

            if pose_validity == 'all':
                good_poses = np.full(pose3d.shape[:2], True, dtype='bool')
            elif pose_validity == 'valid_only':
                good_poses = gt['isValidFrame'].squeeze()
            elif pose_validity == 'detected_only':
                good_poses = gt['isValidFrame'].squeeze()
                good_poses = np.logical_and(good_poses, pred2d['valid_pose'])
                good_poses = np.logical_and(
                    good_poses,
                    pose2d[:, :, pred2d_root_ind, 2] > hip_threshold)
            else:
                raise NotImplementedError("Unknown pose_validity value:" +
                                          pose_validity)

            orig_frame = np.tile(
                np.arange(len(good_poses)).reshape(-1, 1),
                (1, good_poses.shape[1]))
            orig_pose = np.tile(
                np.arange(good_poses.shape[1]).reshape(1, -1),
                (good_poses.shape[0], 1))

            assert pose2d.shape[:2] == good_poses.shape  # (nFrames, nPeople)
            assert pose3d.shape[:2] == good_poses.shape
            assert orig_frame.shape == good_poses.shape
            assert orig_pose.shape == good_poses.shape
            assert pose2d.shape[2:] == (self.pose2d_jointset.NUM_JOINTS, 3)
            assert pose3d.shape[2:] == (17, 3)
            assert visibility.shape[2] == 17
            assert good_poses.ndim == 2

            orig_frame = _column_stack(orig_frame)
            orig_pose = _column_stack(orig_pose)

            index.extend([('%d/%d' % (seq, orig_pose[i]), seq, orig_frame[i],
                           orig_pose[i]) for i in range(len(orig_frame))])

            poses2d.append(_column_stack(pose2d))
            poses3d.append(_column_stack(pose3d))
            joint3d_visible.append(_column_stack(visibility))
            all_good_poses.append(_column_stack(good_poses))
            valid_annotations.append(_column_stack(gt['isValidFrame']))
            width.extend([img_width] * len(orig_frame))

        self.poses2d = np.concatenate(poses2d).astype('float32')
        self.poses3d = np.concatenate(poses3d).astype('float32')
        self.joint3d_visible = np.concatenate(joint3d_visible)
        self.good_poses = np.concatenate(all_good_poses)
        self.valid_annotations = np.concatenate(valid_annotations)
        self.width = np.array(width)
        self.index = np.rec.array(index,
                                  dtype=[('seq', 'U5'), ('seq_num', 'int32'),
                                         ('frame', 'int32'),
                                         ('pose', 'int32')])

        assert self.valid_annotations.shape == self.good_poses.shape
        assert len(self.valid_annotations) == len(self.poses2d)

        # Load calibration matrices
        N = len(self.poses2d)
        self.fx = np.zeros(N, dtype='float32')
        self.fy = np.zeros(N, dtype='float32')
        self.cx = np.zeros(N, dtype='float32')
        self.cy = np.zeros(N, dtype='float32')

        mupots_calibs = mupots_3d.get_calibration_matrices()
        for seq in range(1, 21):
            inds = (self.index.seq_num == seq)
            self.fx[inds] = mupots_calibs[seq][0, 0]
            self.fy[inds] = mupots_calibs[seq][1, 1]
            self.cx[inds] = mupots_calibs[seq][0, 2]
            self.cy[inds] = mupots_calibs[seq][1, 2]

        assert np.all(self.fx > 0), "Some fields were not filled"
        assert np.all(self.fy > 0), "Some fields were not filled"
        assert np.all(np.abs(self.cx) > 0), "Some fields were not filled"
        assert np.all(np.abs(self.cy) > 0), "Some fields were not filled"
        self.transform = None