예제 #1
0
    def __init__(self, config, is_train=True):

        poses_3d_root, rotations, bones, alphas, contacts, projections = [], [], [], [], [], []
        self.frames = []
        self.config = config
        self.rotation_number = ROTATION_NUMBERS.get(config.arch.rotation_type)

        datasets = ['bvh']  #, 'bvh']
        if 'h36m' in datasets:
            dim_to_use_3d = h36m_utils.dimension_reducer(
                3, config.arch.predict_joints)
            subjects = h36m_utils.TRAIN_SUBJECTS if is_train else h36m_utils.TEST_SUBJECTS
            actions = h36m_utils.define_actions('All')
            self.cameras = h36m_utils.load_cameras(config.trainer.data_path)
            for subject in subjects:
                for action in actions:
                    for subaction in range(1, 3):
                        data_file = h5py.File(
                            '%s/S%s/%s-%s/annot.h5' %
                            (config.trainer.data_path, subject, action,
                             subaction), 'r')
                        data_size = data_file['frame'].size / 4
                        data_set = np.array(data_file['pose/3d']).reshape(
                            (-1, 96))[:, dim_to_use_3d]
                        for i in range(4):
                            camera_name = data_file['camera'][int(data_size *
                                                                  i)]
                            R, T, f, c, k, p, res_w, res_h = self.cameras[(
                                subject, str(camera_name))]
                            set_3d = data_set[int(data_size *
                                                  i):int(data_size *
                                                         (i + 1))].copy()
                            set_3d_world = h36m_utils.camera_to_world_frame(
                                set_3d.reshape((-1, 3)), R, T)
                            # set_3d_world[:, [1, 2]] = set_3d_world[:, [2, 1]]
                            # set_3d_world[:, [2]] *= -1
                            # set_3d_world = set_3d_world.reshape((-1, config.arch.predict_joints * 3))
                            set_3d_root = set_3d_world - np.tile(
                                set_3d_world[:, :3],
                                [1, int(set_3d_world.shape[-1] / 3)])

                            set_bones = self.get_bones(
                                set_3d_root, config.arch.predict_joints)
                            set_alphas = np.mean(set_bones, axis=1)

                            self.frames.append(set_3d_root.shape[0])
                            poses_3d_root.append(
                                set_3d_root /
                                np.expand_dims(set_alphas, axis=-1))
                            rotations.append(
                                np.zeros((set_3d_root.shape[0],
                                          int(set_3d_root.shape[1] / 3 *
                                              self.rotation_number))))
                            bones.append(set_bones /
                                         np.expand_dims(set_alphas, axis=-1))
                            alphas.append(set_alphas)
                            contacts.append(
                                self.get_contact(set_3d_world,
                                                 config.arch.predict_joints))
                            projections.append(
                                (set_3d_world.copy() /
                                 np.expand_dims(set_alphas, axis=-1)).reshape(
                                     (set_3d_world.shape[0], -1, 3))[:, 0, 2])

        if 'bvh' in datasets:
            to_keep = [
                0, 7, 8, 9, 2, 3, 4, 12, 15, 18, 19, 20, 25, 26, 27
            ] if config.arch.predict_joints == 15 else [
                0, 7, 8, 9, 2, 3, 4, 12, 13, 15, 16, 18, 19, 20, 25, 26, 27
            ]
            parents = [
                -1, 0, 1, 2, 0, 4, 5, 0, 7, 7, 9, 10, 7, 12, 13
            ] if config.arch.predict_joints == 15 else [
                -1, 0, 1, 2, 0, 4, 5, 0, 7, 8, 9, 8, 11, 12, 8, 14, 15
            ]

            bvh_files = util.make_dataset(['/mnt/dataset/test_bvh'],
                                          phase='bvh',
                                          data_split=1)
            bvh_files = bvh_files[:int(len(bvh_files) *
                                       0.8)] if is_train else bvh_files[
                                           int(len(bvh_files) * 0.8):]
            for bvh_file in bvh_files:
                original_anim, joint_names, frame_rate = BVH.load(bvh_file)
                set_skel_in = original_anim.positions[:, to_keep, :]
                set_rotations = original_anim.rotations.qs[:, to_keep, :]
                anim = Animation.Animation(
                    Quaternions(set_rotations), set_skel_in,
                    original_anim.orients.qs[to_keep, :], set_skel_in,
                    np.array(parents))
                set_3d_world = Animation.positions_global(anim).reshape(
                    set_rotations.shape[0], -1)
                set_3d_world[:, 0:3] = (set_3d_world[:, 3:6] +
                                        set_3d_world[:, 12:15]) / 2
                set_3d_root = set_3d_world - np.tile(
                    set_3d_world[:, :3],
                    [1, int(set_3d_world.shape[-1] / 3)])

                set_bones = self.get_bones(set_3d_root,
                                           config.arch.predict_joints)
                set_alphas = np.mean(set_bones, axis=1)

                self.frames.append(set_3d_root.shape[0])
                poses_3d_root.append(set_3d_root /
                                     np.expand_dims(set_alphas, axis=-1))
                rotations.append(
                    np.zeros((set_3d_root.shape[0],
                              int(set_3d_root.shape[1] / 3 *
                                  self.rotation_number))))
                bones.append(set_bones / np.expand_dims(set_alphas, axis=-1))
                alphas.append(set_alphas)
                contacts.append(
                    self.get_contact(set_3d_world, config.arch.predict_joints))
                projections.append(
                    (set_3d_world.copy() /
                     np.expand_dims(set_alphas, axis=-1)).reshape(
                         (set_3d_world.shape[0], -1, 3))[:, 0, 2])

        self.poses_3d = np.concatenate(poses_3d_root, axis=0)
        self.rotations = np.concatenate(rotations, axis=0)
        self.bones = np.concatenate(bones, axis=0)
        self.alphas = np.concatenate(alphas, axis=0)
        self.contacts = np.concatenate(contacts, axis=0)
        self.projections = np.concatenate(projections, axis=0)

        posed_3d_flip = self.get_flipping(self.poses_3d, 3,
                                          config.arch.predict_joints)
        if config.trainer.data_aug_flip and is_train:
            self.poses_3d = np.concatenate([self.poses_3d, posed_3d_flip],
                                           axis=0)

        self.poses_2d = self.get_projection(self.poses_3d)
        self.poses_2d_root = (self.poses_2d -
                              self.poses_2d[:, 0, None]).reshape(
                                  (self.poses_3d.shape[0], -1))

        import matplotlib.pyplot as plt
        import matplotlib.gridspec as gridspec
        from utils import visualization
        fig = plt.figure()
        gs = gridspec.GridSpec(1, 2)
        for i in range(1):
            ax1 = plt.subplot(gs[0], projection='3d')
            visualization.show3Dpose(self.poses_3d[i], ax1, radius=5)

            ax2 = plt.subplot(gs[1])
            visualization.show2Dpose(self.poses_2d_root[i] * 1000 + 500,
                                     ax2,
                                     radius=1000)

            fig.savefig('./images/2d_3d/_%d.png' % i)
            fig.clear()

        self.update_sequence_index()
예제 #2
0
    def __init__(self, config, is_train=True):
        poses_3d, poses_2d, poses_2d_pixel, bones, alphas, contacts, proj_facters = [], [], [], [], [], [], []
        self.cameras = h36m_utils.load_cameras('./data/cameras.h5')

        self.frame_numbers = []
        self.video_name = []
        self.config = config
        self.is_train = is_train
        subjects = h36m_utils.TRAIN_SUBJECTS if is_train else h36m_utils.TEST_SUBJECTS

        positions_set = np.load('./data/data_h36m.npz',
                                allow_pickle=True)['positions_3d'].item()
        if config.trainer.data == 'cpn':
            positions_set_2d = np.load(
                './data/data_2d_h36m_cpn_ft_h36m_dbb.npz',
                allow_pickle=True)['positions_2d'].item()
        elif config.trainer.data == 'detectron':
            positions_set_2d = np.load(
                './data/data_2d_h36m_detectron_ft_h36m.npz',
                allow_pickle=True)['positions_2d'].item()

        # Load human3.6m position data
        for subject in subjects:
            for action in positions_set['S%s' % subject].keys():
                action_sequences = positions_set['S%s' % subject][action]
                sequence_length = action_sequences[0].shape[0]
                for c_idx, set_3d in enumerate(action_sequences):
                    set_3d = set_3d.copy().reshape((set_3d.shape[0], -1))
                    R, T, f, c, k, p, res_w, res_h = self.cameras[(subject,
                                                                   c_idx)]
                    set_3d_world = h36m_utils.camera_to_world_frame(
                        set_3d.reshape((-1, 3)), R, T).reshape(set_3d.shape)
                    augment_depth = random.randint(
                        -5, 20) if config.trainer.data_aug_depth else 0
                    if config.trainer.data == 'gt':
                        set_2d = h36m_utils.project_2d(
                            set_3d.reshape((-1, 3)),
                            R,
                            T,
                            f,
                            c,
                            k,
                            p,
                            augment_depth=augment_depth,
                            from_world=False)[0].reshape(
                                (set_3d.shape[0],
                                 int(set_3d.shape[-1] / 3 * 2)))
                    else:
                        set_2d = positions_set_2d['S%s' %
                                                  subject][action][c_idx]
                        set_2d = set_2d.reshape(
                            (set_2d.shape[0],
                             -1))[:min(set_3d.shape[0], set_2d.shape[0])]
                        set_3d = set_3d[:min(set_3d.shape[0], set_2d.shape[0])]
                    set_2d_pixel = set_2d
                    set_3d_root = set_3d - np.tile(
                        set_3d[:, :3], [1, int(set_3d.shape[-1] / 3)])
                    set_2d_root = set_2d - np.tile(
                        set_2d[:, :2], [1, int(set_2d.shape[-1] / 2)])

                    set_2d_root[:,
                                list(range(0, set_2d.shape[-1], 2))] /= res_w
                    set_2d_root[:,
                                list(range(1, set_2d.shape[-1], 2))] /= res_h

                    set_bones = self.get_bones(set_3d_root)
                    set_alphas = np.mean(set_bones, axis=1)

                    self.frame_numbers.append(set_3d_root.shape[0])
                    self.video_name.append('S%s_%s_%s' %
                                           (subject, action, c_idx))
                    poses_3d.append(set_3d_root /
                                    np.expand_dims(set_alphas, axis=-1))
                    poses_2d.append(set_2d_root)
                    poses_2d_pixel.append(set_2d_pixel)
                    bones.append(set_bones /
                                 np.expand_dims(set_alphas, axis=-1))
                    alphas.append(set_alphas)
                    contacts.append(self.get_contacts(set_3d_world))
                    proj_facters.append(
                        (set_3d / np.expand_dims(set_alphas, axis=-1)).reshape(
                            (set_3d.shape[0], -1, 3))[:, 0, 2])

        self.poses_3d = np.concatenate(poses_3d, axis=0)
        self.poses_2d = np.concatenate(poses_2d, axis=0)
        self.poses_2d_pixel = np.concatenate(poses_2d_pixel, axis=0)
        self.proj_facters = np.concatenate(proj_facters, axis=0)
        self.contacts = np.concatenate(contacts, axis=0)
        self.alphas = np.concatenate(alphas, axis=0)
        self.bones = np.concatenate(bones, axis=0)

        if is_train:
            if config.trainer.data_aug_flip:
                posed_3d_flip = self.get_flipping(self.poses_3d, dim=3)
                posed_2d_flip = self.get_flipping(self.poses_2d, dim=2)
                poses_2d_pixel_flip = self.get_flipping(self.poses_2d_pixel,
                                                        dim=2)
                self.poses_3d = np.concatenate([self.poses_3d, posed_3d_flip],
                                               axis=0)
                self.poses_2d = np.concatenate([self.poses_2d, posed_2d_flip],
                                               axis=0)
                self.poses_2d_pixel = np.concatenate(
                    [self.poses_2d_pixel, poses_2d_pixel_flip], axis=0)
            if config.trainer.use_loss_D:
                rotations_set = np.load('./data/data_cmu.npz',
                                        allow_pickle=True)['rotations']
                self.r_frame_numbers = [
                    r_array.shape[0] for r_array in rotations_set
                ]
                self.rotations = np.concatenate(rotations_set, axis=0)
                self.rotations = self.rotations.reshape(
                    (self.rotations.shape[0], -1))
        if config.arch.confidence:
            self.poses_2d_noised, confidence_maps = self.add_noise(
                self.poses_2d, training=is_train)
            self.poses_2d_noised_with_confidence = np.zeros(
                (self.poses_2d_noised.shape[0],
                 int(self.poses_2d_noised.shape[-1] / 2 * 3)))
            for joint_index in range(int(self.poses_2d_noised.shape[-1] / 2)):
                self.poses_2d_noised_with_confidence[:, 3 *
                                                     joint_index] = self.poses_2d_noised[:,
                                                                                         2
                                                                                         *
                                                                                         joint_index]
                self.poses_2d_noised_with_confidence[:, 3 * joint_index +
                                                     1] = self.poses_2d_noised[:,
                                                                               2
                                                                               *
                                                                               joint_index
                                                                               +
                                                                               1]
                self.poses_2d_noised_with_confidence[:, 3 * joint_index + 2] = (
                    confidence_maps[:, 2 * joint_index] +
                    confidence_maps[:, 2 * joint_index]) / 2

        self.set_sequences()

        self.poses_2d, self.poses_2d_mean, self.poses_2d_std = util.normalize_data(
            self.poses_2d_noised_with_confidence if config.arch.
            confidence else self.poses_2d)
        self.bones, self.bones_mean, self.bones_std = util.normalize_data(
            self.bones)
        self.proj_facters, self.proj_mean, self.proj_std = util.normalize_data(
            self.proj_facters)