Пример #1
0
def read_data(folder):
    dataset = {
        'img_name': [],
        'joints2D': [],
        'bbox': [],
        'vid_name': [],
        'features': [],
    }

    model = spin.get_pretrained_hmr()

    file_names = sorted(glob.glob(folder + '/labels/' + '*.mat'))

    for fname in tqdm(file_names):
        vid_dict = load_mat(fname)
        imgs = sorted(
            glob.glob(folder + '/frames/' +
                      fname.strip().split('/')[-1].split('.')[0] + '/*.jpg'))
        kp_2d = np.zeros((vid_dict['nframes'], 13, 3))
        perm_idxs = get_perm_idxs('pennaction', 'common')

        kp_2d[:, :, 0] = vid_dict['x']
        kp_2d[:, :, 1] = vid_dict['y']
        kp_2d[:, :, 2] = vid_dict['visibility']
        kp_2d = kp_2d[:, perm_idxs, :]

        # fix inconsistency
        n_kp_2d = np.zeros((kp_2d.shape[0], 14, 3))
        n_kp_2d[:, :12, :] = kp_2d[:, :-1, :]
        n_kp_2d[:, 13, :] = kp_2d[:, 12, :]
        kp_2d = n_kp_2d

        bbox = np.zeros((vid_dict['nframes'], 4))

        for fr_id, fr in enumerate(kp_2d):
            u, d, l, r = calc_kpt_bound(fr)
            center = np.array([(l + r) * 0.5, (u + d) * 0.5], dtype=np.float32)
            c_x, c_y = center[0], center[1]
            w, h = r - l, d - u
            w = h = np.where(w / h > 1, w, h)

            bbox[fr_id, :] = np.array([c_x, c_y, w, h])

        dataset['vid_name'].append(np.array([f'{fname}'] *
                                            vid_dict['nframes']))
        dataset['img_name'].append(np.array(imgs))
        dataset['joints2D'].append(kp_2d)
        dataset['bbox'].append(bbox)

        features = extract_features(model,
                                    np.array(imgs),
                                    bbox,
                                    dataset='pennaction',
                                    debug=False)
        dataset['features'].append(features)

    for k in dataset.keys():
        dataset[k] = np.array(dataset[k])
    for k in dataset.keys():
        dataset[k] = np.concatenate(dataset[k])

    return dataset
Пример #2
0
def read_data(folder, set, debug=False):

    dataset = {
        'vid_name': [],
        'frame_id': [],
        'joints3D': [],
        'joints2D': [],
        'shape': [],
        'pose': [],
        'bbox': [],
        'img_name': [],
        'features': [],
        'valid': [],
    }

    model = spin.get_pretrained_hmr()

    if set == 'val': set = 'test'
    sequences = [
        x.split('.')[0]
        for x in os.listdir(osp.join(folder, 'sequenceFiles', set))
    ]

    J_regressor = None

    smpl = SMPL(SMPL_MODEL_DIR, batch_size=1, create_transl=False)
    if set == 'test':
        J_regressor = torch.from_numpy(
            np.load(osp.join(VIBE_DATA_DIR, 'J_regressor_h36m.npy'))).float()

    for i, seq in tqdm(enumerate(sequences)):

        data_file = osp.join(folder, 'sequenceFiles', set, seq + '.pkl')

        data = pkl.load(open(data_file, 'rb'), encoding='latin1')

        img_dir = osp.join(folder, 'imageFiles', seq)

        num_people = len(data['poses'])
        num_frames = len(data['img_frame_ids'])
        assert (data['poses2d'][0].shape[0] == num_frames)

        for p_id in range(num_people):
            pose = torch.from_numpy(data['poses'][p_id]).float()
            shape = torch.from_numpy(data['betas'][p_id][:10]).float().repeat(
                pose.size(0), 1)
            trans = torch.from_numpy(data['trans'][p_id]).float()
            j2d = data['poses2d'][p_id].transpose(0, 2, 1)
            cam_pose = data['cam_poses']
            campose_valid = data['campose_valid'][p_id]

            # ======== Align the mesh params ======== #
            rot = pose[:, :3]
            rot_mat = batch_rodrigues(rot)

            Rc = torch.from_numpy(cam_pose[:, :3, :3]).float()
            Rs = torch.bmm(Rc, rot_mat.reshape(-1, 3, 3))
            rot = rotation_matrix_to_angle_axis(Rs)
            pose[:, :3] = rot
            # ======== Align the mesh params ======== #

            output = smpl(betas=shape,
                          body_pose=pose[:, 3:],
                          global_orient=pose[:, :3],
                          transl=trans)
            # verts = output.vertices
            j3d = output.joints

            if J_regressor is not None:
                vertices = output.vertices
                J_regressor_batch = J_regressor[None, :].expand(
                    vertices.shape[0], -1, -1).to(vertices.device)
                j3d = torch.matmul(J_regressor_batch, vertices)
                j3d = j3d[:, H36M_TO_J14, :]

            img_paths = []
            for i_frame in range(num_frames):
                img_path = os.path.join(img_dir +
                                        '/image_{:05d}.jpg'.format(i_frame))
                img_paths.append(img_path)

            bbox_params, time_pt1, time_pt2 = get_smooth_bbox_params(
                j2d, vis_thresh=VIS_THRESH, sigma=8)

            # process bbox_params
            c_x = bbox_params[:, 0]
            c_y = bbox_params[:, 1]
            scale = bbox_params[:, 2]
            w = h = 150. / scale
            w = h = h * 1.1
            bbox = np.vstack([c_x, c_y, w, h]).T

            # process keypoints
            j2d[:, :, 2] = j2d[:, :, 2] > 0.3  # set the visibility flags
            # Convert to common 2d keypoint format
            perm_idxs = get_perm_idxs('3dpw', 'common')
            perm_idxs += [0, 0]  # no neck, top head
            j2d = j2d[:, perm_idxs]
            j2d[:, 12:, 2] = 0.0

            # print('j2d', j2d[time_pt1:time_pt2].shape)
            # print('campose', campose_valid[time_pt1:time_pt2].shape)

            img_paths_array = np.array(img_paths)[time_pt1:time_pt2]
            dataset['vid_name'].append(
                np.array([f'{seq}_{p_id}'] * num_frames)[time_pt1:time_pt2])
            dataset['frame_id'].append(
                np.arange(0, num_frames)[time_pt1:time_pt2])
            dataset['img_name'].append(img_paths_array)
            dataset['joints3D'].append(j3d.numpy()[time_pt1:time_pt2])
            dataset['joints2D'].append(j2d[time_pt1:time_pt2])
            dataset['shape'].append(shape.numpy()[time_pt1:time_pt2])
            dataset['pose'].append(pose.numpy()[time_pt1:time_pt2])
            dataset['bbox'].append(bbox)
            dataset['valid'].append(campose_valid[time_pt1:time_pt2])

            features = extract_features(model,
                                        img_paths_array,
                                        bbox,
                                        kp_2d=j2d[time_pt1:time_pt2],
                                        debug=debug,
                                        dataset='3dpw',
                                        scale=1.2)
            dataset['features'].append(features)

    for k in dataset.keys():
        dataset[k] = np.concatenate(dataset[k])
        print(k, dataset[k].shape)

    # Filter out keypoints
    indices_to_use = np.where(
        (dataset['joints2D'][:, :, 2] > VIS_THRESH).sum(-1) > MIN_KP)[0]
    for k in dataset.keys():
        dataset[k] = dataset[k][indices_to_use]

    return dataset
Пример #3
0
def read_data_train(dataset_path, debug=False):
    h, w = 2048, 2048
    dataset = {
        'vid_name': [],
        'frame_id': [],
        'joints3D': [],
        'joints2D': [],
        'bbox': [],
        'img_name': [],
        'features': [],
    }

    model = spin.get_pretrained_hmr()

    # training data
    user_list = range(1, 9)
    seq_list = range(1, 3)
    vid_list = list(range(3)) + list(range(4, 9))

    # product = product(user_list, seq_list, vid_list)
    # user_i, seq_i, vid_i = product[process_id]

    for user_i in user_list:
        for seq_i in seq_list:
            seq_path = os.path.join(dataset_path, 'S' + str(user_i),
                                    'Seq' + str(seq_i))
            # mat file with annotations
            annot_file = os.path.join(seq_path, 'annot.mat')
            annot2 = sio.loadmat(annot_file)['annot2']
            annot3 = sio.loadmat(annot_file)['annot3']
            # calibration file and camera parameters
            for j, vid_i in enumerate(vid_list):
                # image folder
                imgs_path = os.path.join(seq_path, 'video_' + str(vid_i))
                # per frame
                pattern = os.path.join(imgs_path, '*.jpg')
                img_list = sorted(glob.glob(pattern))
                vid_used_frames = []
                vid_used_joints = []
                vid_used_bbox = []
                vid_segments = []
                vid_uniq_id = "subj" + str(user_i) + '_seq' + str(
                    seq_i) + "_vid" + str(vid_i) + "_seg0"
                for i, img_i in tqdm_enumerate(img_list):

                    # for each image we store the relevant annotations
                    img_name = img_i.split('/')[-1]
                    joints_2d_raw = np.reshape(annot2[vid_i][0][i], (1, 28, 2))
                    joints_2d_raw = np.append(joints_2d_raw,
                                              np.ones((1, 28, 1)),
                                              axis=2)
                    joints_2d = convert_kps(joints_2d_raw, "mpii3d",
                                            "spin").reshape((-1, 3))

                    # visualize = True
                    # if visualize == True and i == 500:
                    #     import matplotlib.pyplot as plt
                    #
                    #     frame = cv2.cvtColor(cv2.imread(img_i), cv2.COLOR_BGR2RGB)
                    #
                    #     for k in range(49):
                    #         kp = joints_2d[k]
                    #
                    #         frame = cv2.circle(
                    #             frame.copy(),
                    #             (int(kp[0]), int(kp[1])),
                    #             thickness=3,
                    #             color=(255, 0, 0),
                    #             radius=5,
                    #         )
                    #
                    #         cv2.putText(frame, f'{k}', (int(kp[0]), int(kp[1]) + 1), cv2.FONT_HERSHEY_SIMPLEX, 1.5,
                    #                     (0, 255, 0),
                    #                     thickness=3)
                    #
                    #     plt.imshow(frame)
                    #     plt.show()

                    joints_3d_raw = np.reshape(annot3[vid_i][0][i],
                                               (1, 28, 3)) / 1000
                    joints_3d = convert_kps(joints_3d_raw, "mpii3d",
                                            "spin").reshape((-1, 3))

                    bbox = get_bbox_from_kp2d(
                        joints_2d[~np.all(joints_2d == 0, axis=1)]).reshape(4)

                    joints_3d = joints_3d - joints_3d[39]  # 4 is the root

                    # check that all joints are visible
                    x_in = np.logical_and(joints_2d[:, 0] < w,
                                          joints_2d[:, 0] >= 0)
                    y_in = np.logical_and(joints_2d[:, 1] < h,
                                          joints_2d[:, 1] >= 0)
                    ok_pts = np.logical_and(x_in, y_in)
                    if np.sum(ok_pts) < joints_2d.shape[0]:
                        vid_uniq_id = "_".join(vid_uniq_id.split("_")[:-1])+ "_seg" +\
                                          str(int(dataset['vid_name'][-1].split("_")[-1][3:])+1)
                        continue

                    dataset['vid_name'].append(vid_uniq_id)
                    dataset['frame_id'].append(img_name.split(".")[0])
                    dataset['img_name'].append(img_i)
                    dataset['joints2D'].append(joints_2d)
                    dataset['joints3D'].append(joints_3d)
                    dataset['bbox'].append(bbox)
                    vid_segments.append(vid_uniq_id)
                    vid_used_frames.append(img_i)
                    vid_used_joints.append(joints_2d)
                    vid_used_bbox.append(bbox)

                vid_segments = np.array(vid_segments)
                ids = np.zeros((len(set(vid_segments)) + 1))
                ids[-1] = len(vid_used_frames) + 1
                if (np.where(
                        vid_segments[:-1] != vid_segments[1:])[0]).size != 0:
                    ids[1:-1] = (np.where(
                        vid_segments[:-1] != vid_segments[1:])[0]) + 1

                # for i in tqdm(range(len(set(vid_segments)))):
                #     features = extract_features(model, np.array(vid_used_frames)[int(ids[i]):int(ids[i+1])],
                #                                 vid_used_bbox[int(ids[i]):int((ids[i+1]))],
                #                                 kp_2d=np.array(vid_used_joints)[int(ids[i]):int(ids[i+1])],
                #                                 dataset='spin', debug=False)
                #     dataset['features'].append(features)

    for k in dataset.keys():
        dataset[k] = np.array(dataset[k])
    # dataset['features'] = np.concatenate(dataset['features'])

    return dataset
Пример #4
0
def read_single_record(fname):
    dataset = {
        'vid_name': [],
        'frame_id': [],
        'joints2D': [],  # should contain openpose keypoints only
        # 'features': [],
        # 'bbox':[],
    }

    model = spin.get_pretrained_hmr()

    sess = tf.Session()

    for vid_idx, serialized_ex in tqdm(
            enumerate(tf.python_io.tf_record_iterator(fname))):
        example = tf.train.Example()
        example.ParseFromString(serialized_ex)

        N = int(example.features.feature['meta/N'].int64_list.value[0])

        # print(fname, vid_idx, N)
        # This is a list of length N
        images_data = example.features.feature[
            'image/encoded'].bytes_list.value

        xys = example.features.feature['image/xys'].float_list.value
        xys = np.array(xys).reshape(-1, 2, 14)

        face_pts = example.features.feature['image/face_pts'].float_list.value
        face_pts = np.array(face_pts).reshape(-1, 3, 5)

        toe_pts = example.features.feature['image/toe_pts'].float_list.value

        if len(toe_pts) == 0:
            toe_pts = np.zeros(xys.shape[0], 3, 6)

        toe_pts = np.array(toe_pts).reshape(-1, 3, 6)

        visibles = example.features.feature[
            'image/visibilities'].int64_list.value
        visibles = np.array(visibles).reshape(-1, 1, 14)

        video = []
        kp_2d = []
        for i in range(N):
            image = np.expand_dims(sess.run(
                tf.image.decode_jpeg(images_data[i], channels=3)),
                                   axis=0)
            video.append(image)

            kp = np.vstack((xys[i], visibles[i]))
            faces = face_pts[i]
            toes = toe_pts[i]

            kp = np.hstack((kp, faces, toes))

            if 'image/phis' in example.features.feature.keys():
                # Preprocessed, so kps are in [-1, 1]
                img_shape = 224  # image.shape[0]
                vis = kp[2, :]
                kp = ((kp[:2, :] + 1) * 0.5) * img_shape
                kp = np.vstack((kp, vis))

            kp_2d.append(np.expand_dims(kp.T, axis=0))

        video = np.concatenate(video, axis=0)

        kp_2d = np.concatenate(kp_2d, axis=0)

        vid_name = f'{fname}-{vid_idx}'
        frame_id = np.arange(N)
        joints2D = kp_2d

        dataset['vid_name'].append(np.array([vid_name] * N))
        dataset['frame_id'].append(frame_id)
        dataset['joints2D'].append(joints2D)
        # dataset['video'].append(video)
        # features = extract_features(model, video, bbox=None, kp_2d=kp_2d, dataset='insta', debug=False)
        # dataset['features'].append(features)
        #
        # print(features.shape)
        # assert features.shape[0] == N

    for k in dataset.keys():
        dataset[k] = np.concatenate(dataset[k])

    for k, v in dataset.items():
        print(k, len(v))

    return dataset
Пример #5
0
def read_test_data(dataset_path):

    dataset = {
        'vid_name': [],
        'frame_id': [],
        'joints3D': [],
        'joints2D': [],
        'bbox': [],
        'img_name': [],
        'features': [],
        "valid_i": []
    }

    model = spin.get_pretrained_hmr()

    user_list = range(1, 7)

    for user_i in user_list:
        print('Subject', user_i)
        seq_path = os.path.join(dataset_path, 'mpi_inf_3dhp_test_set',
                                'TS' + str(user_i))
        # mat file with annotations
        annot_file = os.path.join(seq_path, 'annot_data.mat')
        mat_as_h5 = h5py.File(annot_file, 'r')
        annot2 = np.array(mat_as_h5['annot2'])
        annot3 = np.array(mat_as_h5['univ_annot3'])
        valid = np.array(mat_as_h5['valid_frame'])

        vid_used_frames = []
        vid_used_joints = []
        vid_used_bbox = []
        vid_segments = []
        vid_uniq_id = "subj" + str(user_i) + "_seg0"

        for frame_i, valid_i in tqdm(enumerate(valid)):

            img_i = os.path.join('mpi_inf_3dhp_test_set', 'TS' + str(user_i),
                                 'imageSequence',
                                 'img_' + str(frame_i + 1).zfill(6) + '.jpg')

            joints_2d_raw = np.expand_dims(annot2[frame_i, 0, :, :], axis=0)
            joints_2d_raw = np.append(joints_2d_raw,
                                      np.ones((1, 17, 1)),
                                      axis=2)

            joints_2d = convert_kps(joints_2d_raw,
                                    src="mpii3d_test",
                                    dst="spin").reshape((-1, 3))

            # visualize = True
            # if visualize == True:
            #     import matplotlib.pyplot as plt
            #
            #     frame = cv2.cvtColor(cv2.imread(os.path.join(dataset_path, img_i)), cv2.COLOR_BGR2RGB)
            #
            #     for k in range(49):
            #         kp = joints_2d[k]
            #
            #         frame = cv2.circle(
            #             frame.copy(),
            #             (int(kp[0]), int(kp[1])),
            #             thickness=3,
            #             color=(255, 0, 0),
            #             radius=5,
            #         )
            #
            #         cv2.putText(frame, f'{k}', (int(kp[0]), int(kp[1]) + 1), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 255, 0),
            #                     thickness=3)
            #
            #     plt.imshow(frame)
            #     plt.show()

            joints_3d_raw = np.reshape(annot3[frame_i, 0, :, :],
                                       (1, 17, 3)) / 1000
            joints_3d = convert_kps(joints_3d_raw, "mpii3d_test",
                                    "spin").reshape((-1, 3))
            joints_3d = joints_3d - joints_3d[
                39]  # substract pelvis zero is the root for test

            bbox = get_bbox_from_kp2d(
                joints_2d[~np.all(joints_2d == 0, axis=1)]).reshape(4)

            # check that all joints are visible
            img_file = os.path.join(dataset_path, img_i)
            I = cv2.imread(img_file)
            h, w, _ = I.shape
            x_in = np.logical_and(joints_2d[:, 0] < w, joints_2d[:, 0] >= 0)
            y_in = np.logical_and(joints_2d[:, 1] < h, joints_2d[:, 1] >= 0)
            ok_pts = np.logical_and(x_in, y_in)

            if np.sum(ok_pts) < joints_2d.shape[0]:
                vid_uniq_id = "_".join(vid_uniq_id.split("_")[:-1]) + "_seg" + \
                              str(int(dataset['vid_name'][-1].split("_")[-1][3:]) + 1)
                continue

            dataset['vid_name'].append(vid_uniq_id)
            dataset['frame_id'].append(img_file.split("/")[-1].split(".")[0])
            dataset['img_name'].append(img_file)
            dataset['joints2D'].append(joints_2d)
            dataset['joints3D'].append(joints_3d)
            dataset['bbox'].append(bbox)
            dataset['valid_i'].append(valid_i)

            vid_segments.append(vid_uniq_id)
            vid_used_frames.append(img_file)
            vid_used_joints.append(joints_2d)
            vid_used_bbox.append(bbox)

        vid_segments = np.array(vid_segments)
        ids = np.zeros((len(set(vid_segments)) + 1))
        ids[-1] = len(vid_used_frames) + 1
        if (np.where(vid_segments[:-1] != vid_segments[1:])[0]).size != 0:
            ids[1:-1] = (np.where(
                vid_segments[:-1] != vid_segments[1:])[0]) + 1

        # for i in tqdm(range(len(set(vid_segments)))):
        #     features = extract_features(model, np.array(vid_used_frames)[int(ids[i]):int(ids[i + 1])],
        #                                 vid_used_bbox[int(ids[i]):int(ids[i + 1])],
        #                                 kp_2d=np.array(vid_used_joints)[int(ids[i]):int(ids[i + 1])],
        #                                 dataset='spin', debug=False)
        #     dataset['features'].append(features)

    for k in dataset.keys():
        dataset[k] = np.array(dataset[k])
    # dataset['features'] = np.concatenate(dataset['features'])

    return dataset
Пример #6
0
def read_data(folder, set):
    dataset = {
        'img_name': [],
        'joints2D': [],
        'bbox': [],
        'vid_name': [],
        'features': [],
    }

    model = spin.get_pretrained_hmr()

    file_names = glob.glob(
        osp.join(folder, 'posetrack_data/annotations/', f'{set}/*.json'))
    file_names = sorted(file_names)
    nn_corrupted = 0
    tot_frames = 0
    min_frame_number = 8

    for fid, fname in tqdm_enumerate(file_names):
        if fname == osp.join(folder,
                             'annotations/train/021133_mpii_train.json'):
            continue

        with open(fname, 'r') as entry:
            anns = json.load(entry)
        # num_frames = anns['images'][0]['nframes']
        anns['images'] = [
            item for item in anns['images'] if item['is_labeled']
        ]
        num_frames = len(anns['images'])
        frame2imgname = dict()
        for el in anns['images']:
            frame2imgname[el['frame_id']] = el['file_name']

        num_people = -1
        for x in anns['annotations']:
            if num_people < x['track_id']:
                num_people = x['track_id']
        num_people += 1
        posetrack_joints = get_posetrack_original_kp_names()
        idxs = [
            anns['categories'][0]['keypoints'].index(h)
            for h in posetrack_joints
            if h in anns['categories'][0]['keypoints']
        ]
        for x in anns['annotations']:
            kps = np.array(x['keypoints']).reshape((17, 3))
            kps = kps[idxs, :]
            x['keypoints'] = list(kps.flatten())

        tot_frames += num_people * num_frames
        for p_id in range(num_people):

            annot_pid = [(item['keypoints'], item['bbox'], item['image_id'])
                         for item in anns['annotations']
                         if item['track_id'] == p_id
                         and not (np.count_nonzero(item['keypoints']) == 0)]

            if len(annot_pid) < min_frame_number:
                nn_corrupted += len(annot_pid)
                continue

            bbox = np.zeros((len(annot_pid), 4))
            # perm_idxs = get_perm_idxs('posetrack', 'common')
            kp_2d = np.zeros((len(annot_pid), len(annot_pid[0][0]) // 3, 3))
            img_paths = np.zeros((len(annot_pid)))

            for i, (key2djnts, bbox_p, image_id) in enumerate(annot_pid):

                if (bbox_p[2] == 0 or bbox_p[3] == 0):
                    nn_corrupted += 1
                    continue

                img_paths[i] = image_id
                key2djnts[2::3] = len(key2djnts[2::3]) * [1]

                kp_2d[i, :] = np.array(key2djnts).reshape(
                    int(len(key2djnts) / 3), 3)  # [perm_idxs, :]
                for kp_loc in kp_2d[i, :]:
                    if kp_loc[0] == 0 and kp_loc[1] == 0:
                        kp_loc[2] = 0

                x_tl = bbox_p[0]
                y_tl = bbox_p[1]
                w = bbox_p[2]
                h = bbox_p[3]
                bbox_p[0] = x_tl + w / 2
                bbox_p[1] = y_tl + h / 2
                #

                w = h = np.where(w / h > 1, w, h)
                w = h = h * 0.8
                bbox_p[2] = w
                bbox_p[3] = h
                bbox[i, :] = bbox_p

            img_paths = list(img_paths)
            img_paths = [
                osp.join(folder, frame2imgname[item]) if item != 0 else 0
                for item in img_paths
            ]

            bbx_idxs = []
            for bbx_id, bbx in enumerate(bbox):
                if np.count_nonzero(bbx) == 0:
                    bbx_idxs += [bbx_id]

            kp_2d = np.delete(kp_2d, bbx_idxs, 0)
            img_paths = np.delete(np.array(img_paths), bbx_idxs, 0)
            bbox = np.delete(bbox, np.where(~bbox.any(axis=1))[0], axis=0)

            # Convert to common 2d keypoint format
            if bbox.size == 0 or bbox.shape[0] < min_frame_number:
                nn_corrupted += 1
                continue

            kp_2d = convert_kps(kp_2d, src='posetrack', dst='spin')

            dataset['vid_name'].append(
                np.array([f'{fname}_{p_id}'] * img_paths.shape[0]))
            dataset['img_name'].append(np.array(img_paths))
            dataset['joints2D'].append(kp_2d)
            dataset['bbox'].append(np.array(bbox))

            # compute_features
            features = extract_features(
                model,
                np.array(img_paths),
                bbox,
                kp_2d=kp_2d,
                dataset='spin',
                debug=False,
            )

            assert kp_2d.shape[0] == img_paths.shape[0] == bbox.shape[0]

            dataset['features'].append(features)

    print(nn_corrupted, tot_frames)
    for k in dataset.keys():
        dataset[k] = np.array(dataset[k])

    for k in dataset.keys():
        dataset[k] = np.concatenate(dataset[k])

    for k, v in dataset.items():
        print(k, v.shape)

    return dataset
Пример #7
0
def read_data(amass_data, set, debug=False, max_samples = -1):

    dataset = {
        'vid_name': [],
        'frame_id': [],
        'joints3D': [],
        'joints2D': [],
        'shape': [],
        'pose': [],
        'bbox': [],
        'img_name': [],
        'features': [],
        'valid': [],
    }
    device = (
        torch.device("cuda", index=0)
        if torch.cuda.is_available()
        else torch.device("cpu")
    )
    model = spin.get_pretrained_hmr()
    smpl_renderer = SMPL_Renderer(device = device, image_size = 400, camera_mode = "look_at")
    for i, (k,v) in tqdm(enumerate(amass_data)):
        vid_name, frame_id, j3d, j2d, shape, pose, bbox, img_name, features, valid = amass_to_dataset(k, v, set = set, smpl_renderer = smpl_renderer)

        if not vid_name is None:
            bbox_params, time_pt1, time_pt2 = get_smooth_bbox_params(j2d, vis_thresh=VIS_THRESH, sigma=8)

            c_x = bbox_params[:,0]
            c_y = bbox_params[:,1]
            scale = bbox_params[:,2]
            w = h = 150. / scale
            w = h = h * 1.1
            bbox = np.vstack([c_x,c_y,w,h]).T
            # print('campose', campose_valid[time_pt1:time_pt2].shape)

            img_paths_array = img_name
            dataset['vid_name'].append(vid_name)
            dataset['frame_id'].append(frame_id)
            dataset['img_name'].append(img_name)
            dataset['joints3D'].append(j3d)
            dataset['joints2D'].append(j2d)
            dataset['shape'].append(shape)
            dataset['pose'].append(pose)
            dataset['bbox'].append(bbox)
            dataset['valid'].append(valid)

            features = extract_features(model, img_paths_array, bbox,
                                        kp_2d=j2d[time_pt1:time_pt2], debug=debug, dataset='3dpw', scale=1.2)
            dataset['features'].append(features)
                
        if max_samples != -1 and i > max_samples:
            break
    for k in dataset.keys():
        dataset[k] = np.concatenate(dataset[k])
        print(k, dataset[k].shape)

    # Filter out keypoints
    indices_to_use = np.where((dataset['joints2D'][:, :, 2] > VIS_THRESH).sum(-1) > MIN_KP)[0]
    for k in dataset.keys():
        dataset[k] = dataset[k][indices_to_use]

    return dataset
def read_data_train(dataset_path, set='train', debug=False):
    dataset = {
        'vid_name': [],
        'frame_id': [],
        'joints3D': [],
        'joints2D': [],
        'shape': [],
        'pose': [],
        'bbox': [],
        'img_name': [],
        'features': [],
    }

    # occluders = load_occluders('./data/VOC2012')

    model = spin.get_pretrained_hmr()

    if set == 'train':
        subjects = [1, 5, 6, 7, 8]
    else:
        subjects = [9, 11]
    for subject in subjects:
        annot_path = osp.join(dataset_path, 'annotations')
        # camera load
        with open(
                osp.join(annot_path,
                         'Human36M_subject' + str(subject) + '_camera.json'),
                'r') as f:
            cameras = json.load(f)
        # joint coordinate load
        with open(
                osp.join(annot_path,
                         'Human36M_subject' + str(subject) + '_joint_3d.json'),
                'r') as f:
            joints = json.load(f)
        # SMPL parameters obtained by NeuralAnnot will be released (https://arxiv.org/abs/2011.11232) after publication
        # # smpl parameter load
        # with open(osp.join(annot_path, 'Human36M_subject' + str(subject) + '_SMPL_NeuralAnnot.json'), 'r') as f:
        #     smpl_params = json.load(f)

        seq_list = sorted(glob.glob(dataset_path +
                                    f'/images/s_{subject:02d}*'))
        for seq in tqdm(seq_list):
            seq_name = seq.split('/')[-1]
            act = str(int(seq_name.split('_act_')[-1][0:2]))
            subact = str(int(seq_name.split('_subact_')[-1][0:2]))
            cam = str(int(seq_name.split('_ca_')[-1][0:2]))
            # if cam != '4':  # front camera (Table 6)
            #     continue
            print("seq name: ", seq)

            img_paths = sorted(glob.glob(seq + '/*.jpg'))
            num_frames = len(img_paths)
            if num_frames < 1:
                continue
            # camera parameter
            cam_param = cameras[cam]
            R, t, f, c = np.array(cam_param['R'], dtype=np.float32), np.array(
                cam_param['t'], dtype=np.float32), np.array(
                    cam_param['f'],
                    dtype=np.float32), np.array(cam_param['c'],
                                                dtype=np.float32)

            # img starts from index 1, and annot starts from index 0
            poses = np.zeros((num_frames, 72), dtype=np.float32)
            shapes = np.zeros((num_frames, 10), dtype=np.float32)
            j3ds = np.zeros((num_frames, 49, 3), dtype=np.float32)
            j2ds = np.zeros((num_frames, 49, 3), dtype=np.float32)

            for img_i in tqdm(range(num_frames)):
                # smpl_param = smpl_params[act][subact][str(img_i)][cam]
                # pose = np.array(smpl_param['pose'], dtype=np.float32)
                # shape = np.array(smpl_param['shape'], dtype=np.float32)

                joint_world = np.array(joints[act][subact][str(img_i)],
                                       dtype=np.float32)
                # match right, left
                match = [[1, 4], [2, 5], [3, 6]]
                for m in match:
                    l, r = m
                    joint_world[l], joint_world[r] = joint_world[r].copy(
                    ), joint_world[l].copy()
                joint_cam = world2cam(joint_world, R, t)
                joint_img = cam2pixel(joint_cam, f, c)

                j3d = convert_kps(joint_cam[None, :, :] / 1000, "h36m",
                                  "spin").reshape((-1, 3))
                j3d = j3d - j3d[39]  # 4 is the root

                joint_img[:, 2] = 1
                j2d = convert_kps(joint_img[None, :, :], "h36m",
                                  "spin").reshape((-1, 3))

                # poses[img_i] = pose
                # shapes[img_i] = shape
                j3ds[img_i] = j3d
                j2ds[img_i] = j2d
                """
                import torch
                smpl = SMPL(SMPL_MODEL_DIR, batch_size=1, create_transl=False)
    
                p = torch.from_numpy(pose).float().reshape(1,-1,3)
                s = torch.from_numpy(shape).float().reshape(1,-1)
                J_regressor = torch.from_numpy(np.load(osp.join(TCMR_DATA_DIR, 'J_regressor_h36m.npy'))).float()
                output = smpl(betas=s, body_pose=p[:, 3:], global_orient=p[:, :3])
                vertices = output.vertices
                J_regressor_batch = J_regressor[None, :].expand(vertices.shape[0], -1, -1).to(vertices.device)
                temp_j3d = torch.matmul(J_regressor_batch, vertices) * 1000
                # temp_j3d = temp_j3d - temp_j3d[:, 0, :]
                temp_j3d = temp_j3d[0, H36M_TO_J14, :]
    
                gt_j3d = joint_cam - joint_cam[0, :]
                gt_j3d = gt_j3d[H36M_TO_J14, :]
    
                print("CHECK: ", (temp_j3d-gt_j3d))
                """

            bbox_params, time_pt1, time_pt2 = get_smooth_bbox_params(
                j2ds, vis_thresh=VIS_THRESH, sigma=8)
            # bbox_params, time_pt1, time_pt2 = get_all_bbox_params(j2ds, vis_thresh=VIS_THRESH)
            """
            img = cv2.imread(img_paths[0])
            temp = draw_skeleton(img, j2ds[0], dataset='spin', unnormalize=False, thickness=2)
            cv2.imshow('img', temp)
            cv2.waitKey(0)
            cv2.destroyAllWindows()
            cv2.waitKey(1)
            """

            # process bbox_params
            c_x = bbox_params[:, 0]
            c_y = bbox_params[:, 1]
            scale = bbox_params[:, 2]

            w = h = 150. / scale
            w = h = h * 0.9  # 1.1 for h36m_train_25fps_occ_db.pt
            bbox = np.vstack([c_x, c_y, w, h]).T

            img_paths_array = np.array(img_paths)[time_pt1:time_pt2][::2]
            bbox = bbox[::2]
            # subsample frame to 25 fps

            dataset['vid_name'].append(
                np.array([f'{seq}_{subject}'] *
                         num_frames)[time_pt1:time_pt2][::2])
            dataset['frame_id'].append(
                np.arange(0, num_frames)[time_pt1:time_pt2][::2])
            dataset['joints3D'].append(j3ds[time_pt1:time_pt2][::2])
            dataset['joints2D'].append(j2ds[time_pt1:time_pt2][::2])
            dataset['shape'].append(shapes[time_pt1:time_pt2][::2])
            dataset['pose'].append(poses[time_pt1:time_pt2][::2])

            dataset['img_name'].append(img_paths_array)
            dataset['bbox'].append(bbox)

            features = extract_features(
                model,
                None,
                img_paths_array,
                bbox,
                kp_2d=j2ds[time_pt1:time_pt2][::2],
                debug=debug,
                dataset='h36m',
                scale=1.0)  # 1.2 for h36m_train_25fps_occ_db.pt

            dataset['features'].append(features)

    for k in dataset.keys():
        dataset[k] = np.concatenate(dataset[k])
        print(k, dataset[k].shape)

    return dataset
Пример #9
0
def read_data(folder):
    dataset = {
        'img_name': [],
        'joints2D': [],
        'bbox': [],
        'bbox_orig': [],
        'vid_name': [],
        'features': [],
    }

    model = spin.get_pretrained_hmr()

    file_names = sorted(glob.glob(folder + '/labels/' + '*.mat'))

    for fname in tqdm(file_names):
        vid_dict = load_mat(fname)
        imgs = sorted(
            glob.glob(folder + '/frames/' +
                      fname.strip().split('/')[-1].split('.')[0] + '/*.jpg'))
        kp_2d = np.zeros((vid_dict['nframes'], 13, 3))
        perm_idxs = get_perm_idxs('pennaction', 'common')

        kp_2d[:, :, 0] = vid_dict['x']
        kp_2d[:, :, 1] = vid_dict['y']
        kp_2d[:, :, 2] = vid_dict['visibility']
        kp_2d = kp_2d[:, perm_idxs, :]

        # fix inconsistency
        n_kp_2d = np.zeros((kp_2d.shape[0], 14, 3))
        n_kp_2d[:, :12, :] = kp_2d[:, :-1, :]
        n_kp_2d[:, 13, :] = kp_2d[:, 12, :]
        kp_2d = n_kp_2d

        bbox = np.zeros((vid_dict['nframes'], 4))
        bbox_orig = np.zeros((vid_dict['nframes'], 4))

        for fr_id, fr in enumerate(kp_2d):
            u, d, l, r = calc_kpt_bound(fr)
            center = np.array([(l + r) * 0.5, (u + d) * 0.5], dtype=np.float32)
            c_x, c_y = center[0], center[1]
            w, h = r - l, d - u
            h *= 1.1
            bbox_orig[fr_id, :] = np.array([c_x, c_y, h * 0.5, h])

            w = h = np.where(w / h > 1, w, h)

            bbox[fr_id, :] = np.array([c_x, c_y, w, h])
            # if True:
            #     tmpimgname = imgs[fr_id]
            #     import matplotlib.pyplot as plt
            #     import matplotlib.patches as patches
            #     fig, ax = plt.subplots()
            #     tmpimg = plt.imread(tmpimgname)
            #     ax.imshow(tmpimg)
            #     rect = patches.Rectangle((bbox_orig[fr_id, 0] - bbox_orig[fr_id, 2] / 2, bbox_orig[fr_id, 1] - bbox_orig[fr_id, 3] / 2),
            #                              bbox_orig[fr_id, 2], bbox_orig[fr_id, 3], linewidth=2, edgecolor='r', facecolor='none')
            #     ax.add_patch(rect)
            #     rect = patches.Rectangle((bbox[fr_id, 0] - bbox[fr_id, 2] / 2, bbox[fr_id, 1] - bbox[fr_id, 3] / 2),
            #                              bbox[fr_id, 2], bbox[fr_id, 3], linewidth=2, edgecolor='g', facecolor='none')
            #     ax.add_patch(rect)
            #     plt.show()
            #     print('vis')

        dataset['vid_name'].append(np.array([f'{fname}'] *
                                            vid_dict['nframes']))
        dataset['img_name'].append(np.array(imgs))
        dataset['joints2D'].append(kp_2d)
        dataset['bbox'].append(bbox)
        dataset['bbox_orig'].append(bbox_orig)
        features = extract_features(model,
                                    np.array(imgs),
                                    bbox,
                                    dataset='pennaction',
                                    debug=False)
        dataset['features'].append(features)

    for k in dataset.keys():
        dataset[k] = np.array(dataset[k])
    for k in dataset.keys():
        dataset[k] = np.concatenate(dataset[k])

    return dataset