def __init__(self, config, is_train=True): poses_3d_root, rotations, bones, alphas, contacts, projections = [], [], [], [], [], [] self.frames = [] self.config = config self.rotation_number = ROTATION_NUMBERS.get(config.arch.rotation_type) datasets = ['bvh'] #, 'bvh'] if 'h36m' in datasets: dim_to_use_3d = h36m_utils.dimension_reducer( 3, config.arch.predict_joints) subjects = h36m_utils.TRAIN_SUBJECTS if is_train else h36m_utils.TEST_SUBJECTS actions = h36m_utils.define_actions('All') self.cameras = h36m_utils.load_cameras(config.trainer.data_path) for subject in subjects: for action in actions: for subaction in range(1, 3): data_file = h5py.File( '%s/S%s/%s-%s/annot.h5' % (config.trainer.data_path, subject, action, subaction), 'r') data_size = data_file['frame'].size / 4 data_set = np.array(data_file['pose/3d']).reshape( (-1, 96))[:, dim_to_use_3d] for i in range(4): camera_name = data_file['camera'][int(data_size * i)] R, T, f, c, k, p, res_w, res_h = self.cameras[( subject, str(camera_name))] set_3d = data_set[int(data_size * i):int(data_size * (i + 1))].copy() set_3d_world = h36m_utils.camera_to_world_frame( set_3d.reshape((-1, 3)), R, T) # set_3d_world[:, [1, 2]] = set_3d_world[:, [2, 1]] # set_3d_world[:, [2]] *= -1 # set_3d_world = set_3d_world.reshape((-1, config.arch.predict_joints * 3)) set_3d_root = set_3d_world - np.tile( set_3d_world[:, :3], [1, int(set_3d_world.shape[-1] / 3)]) set_bones = self.get_bones( set_3d_root, config.arch.predict_joints) set_alphas = np.mean(set_bones, axis=1) self.frames.append(set_3d_root.shape[0]) poses_3d_root.append( set_3d_root / np.expand_dims(set_alphas, axis=-1)) rotations.append( np.zeros((set_3d_root.shape[0], int(set_3d_root.shape[1] / 3 * self.rotation_number)))) bones.append(set_bones / np.expand_dims(set_alphas, axis=-1)) alphas.append(set_alphas) contacts.append( self.get_contact(set_3d_world, config.arch.predict_joints)) projections.append( (set_3d_world.copy() / np.expand_dims(set_alphas, axis=-1)).reshape( (set_3d_world.shape[0], -1, 3))[:, 0, 2]) if 'bvh' in datasets: to_keep = [ 0, 7, 8, 9, 2, 3, 4, 12, 15, 18, 19, 20, 25, 26, 27 ] if config.arch.predict_joints == 15 else [ 0, 7, 8, 9, 2, 3, 4, 12, 13, 15, 16, 18, 19, 20, 25, 26, 27 ] parents = [ -1, 0, 1, 2, 0, 4, 5, 0, 7, 7, 9, 10, 7, 12, 13 ] if config.arch.predict_joints == 15 else [ -1, 0, 1, 2, 0, 4, 5, 0, 7, 8, 9, 8, 11, 12, 8, 14, 15 ] bvh_files = util.make_dataset(['/mnt/dataset/test_bvh'], phase='bvh', data_split=1) bvh_files = bvh_files[:int(len(bvh_files) * 0.8)] if is_train else bvh_files[ int(len(bvh_files) * 0.8):] for bvh_file in bvh_files: original_anim, joint_names, frame_rate = BVH.load(bvh_file) set_skel_in = original_anim.positions[:, to_keep, :] set_rotations = original_anim.rotations.qs[:, to_keep, :] anim = Animation.Animation( Quaternions(set_rotations), set_skel_in, original_anim.orients.qs[to_keep, :], set_skel_in, np.array(parents)) set_3d_world = Animation.positions_global(anim).reshape( set_rotations.shape[0], -1) set_3d_world[:, 0:3] = (set_3d_world[:, 3:6] + set_3d_world[:, 12:15]) / 2 set_3d_root = set_3d_world - np.tile( set_3d_world[:, :3], [1, int(set_3d_world.shape[-1] / 3)]) set_bones = self.get_bones(set_3d_root, config.arch.predict_joints) set_alphas = np.mean(set_bones, axis=1) self.frames.append(set_3d_root.shape[0]) poses_3d_root.append(set_3d_root / np.expand_dims(set_alphas, axis=-1)) rotations.append( np.zeros((set_3d_root.shape[0], int(set_3d_root.shape[1] / 3 * self.rotation_number)))) bones.append(set_bones / np.expand_dims(set_alphas, axis=-1)) alphas.append(set_alphas) contacts.append( self.get_contact(set_3d_world, config.arch.predict_joints)) projections.append( (set_3d_world.copy() / np.expand_dims(set_alphas, axis=-1)).reshape( (set_3d_world.shape[0], -1, 3))[:, 0, 2]) self.poses_3d = np.concatenate(poses_3d_root, axis=0) self.rotations = np.concatenate(rotations, axis=0) self.bones = np.concatenate(bones, axis=0) self.alphas = np.concatenate(alphas, axis=0) self.contacts = np.concatenate(contacts, axis=0) self.projections = np.concatenate(projections, axis=0) posed_3d_flip = self.get_flipping(self.poses_3d, 3, config.arch.predict_joints) if config.trainer.data_aug_flip and is_train: self.poses_3d = np.concatenate([self.poses_3d, posed_3d_flip], axis=0) self.poses_2d = self.get_projection(self.poses_3d) self.poses_2d_root = (self.poses_2d - self.poses_2d[:, 0, None]).reshape( (self.poses_3d.shape[0], -1)) import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec from utils import visualization fig = plt.figure() gs = gridspec.GridSpec(1, 2) for i in range(1): ax1 = plt.subplot(gs[0], projection='3d') visualization.show3Dpose(self.poses_3d[i], ax1, radius=5) ax2 = plt.subplot(gs[1]) visualization.show2Dpose(self.poses_2d_root[i] * 1000 + 500, ax2, radius=1000) fig.savefig('./images/2d_3d/_%d.png' % i) fig.clear() self.update_sequence_index()
def __init__(self, config, is_train=True): poses_3d, poses_2d, poses_2d_pixel, bones, alphas, contacts, proj_facters = [], [], [], [], [], [], [] self.cameras = h36m_utils.load_cameras('./data/cameras.h5') self.frame_numbers = [] self.video_name = [] self.config = config self.is_train = is_train subjects = h36m_utils.TRAIN_SUBJECTS if is_train else h36m_utils.TEST_SUBJECTS positions_set = np.load('./data/data_h36m.npz', allow_pickle=True)['positions_3d'].item() if config.trainer.data == 'cpn': positions_set_2d = np.load( './data/data_2d_h36m_cpn_ft_h36m_dbb.npz', allow_pickle=True)['positions_2d'].item() elif config.trainer.data == 'detectron': positions_set_2d = np.load( './data/data_2d_h36m_detectron_ft_h36m.npz', allow_pickle=True)['positions_2d'].item() # Load human3.6m position data for subject in subjects: for action in positions_set['S%s' % subject].keys(): action_sequences = positions_set['S%s' % subject][action] sequence_length = action_sequences[0].shape[0] for c_idx, set_3d in enumerate(action_sequences): set_3d = set_3d.copy().reshape((set_3d.shape[0], -1)) R, T, f, c, k, p, res_w, res_h = self.cameras[(subject, c_idx)] set_3d_world = h36m_utils.camera_to_world_frame( set_3d.reshape((-1, 3)), R, T).reshape(set_3d.shape) augment_depth = random.randint( -5, 20) if config.trainer.data_aug_depth else 0 if config.trainer.data == 'gt': set_2d = h36m_utils.project_2d( set_3d.reshape((-1, 3)), R, T, f, c, k, p, augment_depth=augment_depth, from_world=False)[0].reshape( (set_3d.shape[0], int(set_3d.shape[-1] / 3 * 2))) else: set_2d = positions_set_2d['S%s' % subject][action][c_idx] set_2d = set_2d.reshape( (set_2d.shape[0], -1))[:min(set_3d.shape[0], set_2d.shape[0])] set_3d = set_3d[:min(set_3d.shape[0], set_2d.shape[0])] set_2d_pixel = set_2d set_3d_root = set_3d - np.tile( set_3d[:, :3], [1, int(set_3d.shape[-1] / 3)]) set_2d_root = set_2d - np.tile( set_2d[:, :2], [1, int(set_2d.shape[-1] / 2)]) set_2d_root[:, list(range(0, set_2d.shape[-1], 2))] /= res_w set_2d_root[:, list(range(1, set_2d.shape[-1], 2))] /= res_h set_bones = self.get_bones(set_3d_root) set_alphas = np.mean(set_bones, axis=1) self.frame_numbers.append(set_3d_root.shape[0]) self.video_name.append('S%s_%s_%s' % (subject, action, c_idx)) poses_3d.append(set_3d_root / np.expand_dims(set_alphas, axis=-1)) poses_2d.append(set_2d_root) poses_2d_pixel.append(set_2d_pixel) bones.append(set_bones / np.expand_dims(set_alphas, axis=-1)) alphas.append(set_alphas) contacts.append(self.get_contacts(set_3d_world)) proj_facters.append( (set_3d / np.expand_dims(set_alphas, axis=-1)).reshape( (set_3d.shape[0], -1, 3))[:, 0, 2]) self.poses_3d = np.concatenate(poses_3d, axis=0) self.poses_2d = np.concatenate(poses_2d, axis=0) self.poses_2d_pixel = np.concatenate(poses_2d_pixel, axis=0) self.proj_facters = np.concatenate(proj_facters, axis=0) self.contacts = np.concatenate(contacts, axis=0) self.alphas = np.concatenate(alphas, axis=0) self.bones = np.concatenate(bones, axis=0) if is_train: if config.trainer.data_aug_flip: posed_3d_flip = self.get_flipping(self.poses_3d, dim=3) posed_2d_flip = self.get_flipping(self.poses_2d, dim=2) poses_2d_pixel_flip = self.get_flipping(self.poses_2d_pixel, dim=2) self.poses_3d = np.concatenate([self.poses_3d, posed_3d_flip], axis=0) self.poses_2d = np.concatenate([self.poses_2d, posed_2d_flip], axis=0) self.poses_2d_pixel = np.concatenate( [self.poses_2d_pixel, poses_2d_pixel_flip], axis=0) if config.trainer.use_loss_D: rotations_set = np.load('./data/data_cmu.npz', allow_pickle=True)['rotations'] self.r_frame_numbers = [ r_array.shape[0] for r_array in rotations_set ] self.rotations = np.concatenate(rotations_set, axis=0) self.rotations = self.rotations.reshape( (self.rotations.shape[0], -1)) if config.arch.confidence: self.poses_2d_noised, confidence_maps = self.add_noise( self.poses_2d, training=is_train) self.poses_2d_noised_with_confidence = np.zeros( (self.poses_2d_noised.shape[0], int(self.poses_2d_noised.shape[-1] / 2 * 3))) for joint_index in range(int(self.poses_2d_noised.shape[-1] / 2)): self.poses_2d_noised_with_confidence[:, 3 * joint_index] = self.poses_2d_noised[:, 2 * joint_index] self.poses_2d_noised_with_confidence[:, 3 * joint_index + 1] = self.poses_2d_noised[:, 2 * joint_index + 1] self.poses_2d_noised_with_confidence[:, 3 * joint_index + 2] = ( confidence_maps[:, 2 * joint_index] + confidence_maps[:, 2 * joint_index]) / 2 self.set_sequences() self.poses_2d, self.poses_2d_mean, self.poses_2d_std = util.normalize_data( self.poses_2d_noised_with_confidence if config.arch. confidence else self.poses_2d) self.bones, self.bones_mean, self.bones_std = util.normalize_data( self.bones) self.proj_facters, self.proj_mean, self.proj_std = util.normalize_data( self.proj_facters)