def _test_step(self, image, kp3d, return_kps=False): tf.keras.backend.set_learning_phase(0) if len(tf.shape(image)) is not 4: image = tf.expand_dims(image, 0) kp3d = tf.expand_dims(kp3d, 0) result = self.generator(image, training=False) # only use last computed theta (from accumulated iterative feedback loop) _, _, kp3d_pred, _, _, _ = result[-1] factor = tf.constant(1000, tf.float32) kp3d, kp3d_predict = kp3d * factor, kp3d_pred * factor # convert back from m -> mm kp3d_predict = kp3d_predict[:, :self.config.NUM_KP3D, :] real_kp3d = batch_align_by_pelvis(kp3d) predict_kp3d = batch_align_by_pelvis(kp3d_predict) kp3d_mpjpe = tf.norm(real_kp3d - predict_kp3d, axis=2) aligned_kp3d = batch_compute_similarity_transform( real_kp3d, predict_kp3d) kp3d_mpjpe_aligned = tf.norm(real_kp3d - aligned_kp3d, axis=2) if return_kps: return kp3d_mpjpe, kp3d_mpjpe_aligned, predict_kp3d, real_kp3d return kp3d_mpjpe, kp3d_mpjpe_aligned, None, None
def _val_step(self, images, kp2d, kp3d, has3d): tf.keras.backend.set_learning_phase(0) result = self.generator(images, training=False) # only use last computed theta (from accumulated iterative feedback loop) _, kp2d_pred, kp3d_pred, _, _, _ = result[-1] vis = kp2d[:, :, 2] kp2d_norm = tf.norm( kp2d_pred[:, :self.config.NUM_KP2D, :] - kp2d[:, :, :2], axis=2) * vis kp2d_mpjpe = tf.reduce_sum(kp2d_norm) / tf.reduce_sum(vis) self.kp2d_mpjpe_log(kp2d_mpjpe) if self.config.USE_3D: # check if at least one 3d sample available if tf.reduce_sum(has3d) > 0: kp3d_real = tf.boolean_mask(kp3d, has3d) kp3d_predict = tf.boolean_mask(kp3d_pred, has3d) kp3d_predict = kp3d_predict[:, :self.config.NUM_KP3D, :] kp3d_real = batch_align_by_pelvis(kp3d_real) kp3d_predict = batch_align_by_pelvis(kp3d_predict) kp3d_mpjpe = tf.norm(kp3d_predict - kp3d_real, axis=2) kp3d_mpjpe = tf.reduce_mean(kp3d_mpjpe) aligned_kp3d = batch_compute_similarity_transform( kp3d_real, kp3d_predict) kp3d_mpjpe_aligned = tf.norm(aligned_kp3d - kp3d_real, axis=2) kp3d_mpjpe_aligned = tf.reduce_mean(kp3d_mpjpe_aligned) self.kp3d_mpjpe_log.update_state(kp3d_mpjpe) self.kp3d_mpjpe_aligned_log.update_state(kp3d_mpjpe_aligned)
def test_batch_align_by_pelvis(self): joints_3d = tf.ones((self.config.BATCH_SIZE, self.config.NUM_KP3D, 3)) output = batch_align_by_pelvis(joints_3d) expected = tf.zeros((self.config.BATCH_SIZE, self.config.NUM_KP3D, 3)) self.assertAllCloseAccordingToType(expected, output) self.assertEqual((self.config.BATCH_SIZE, self.config.NUM_KP3D, 3), output.shape)
def _train_step(self, images, kp2d, kp3d, has3d, theta): tf.keras.backend.set_learning_phase(1) batch_size = images.shape[0] with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape: generator_outputs = self.generator(images, training=True) # only use last computed theta (from iterative feedback loop) _, kp2d_pred, kp3d_pred, pose_pred, shape_pred, _ = generator_outputs[ -1] vis = tf.expand_dims(kp2d[:, :, 2], -1) kp2d_loss = v1_loss.absolute_difference(kp2d[:, :, :2], kp2d_pred, weights=vis) kp2d_loss = kp2d_loss * self.config.GENERATOR_2D_LOSS_WEIGHT if self.config.USE_3D: has3d = tf.expand_dims(has3d, -1) kp3d_real = batch_align_by_pelvis(kp3d) kp3d_pred = batch_align_by_pelvis( kp3d_pred[:, :self.config.NUM_KP3D, :]) kp3d_real = tf.reshape(kp3d_real, [batch_size, -1]) kp3d_pred = tf.reshape(kp3d_pred, [batch_size, -1]) kp3d_loss = v1_loss.mean_squared_error( kp3d_real, kp3d_pred, weights=has3d) * 0.5 kp3d_loss = kp3d_loss * self.config.GENERATOR_3D_LOSS_WEIGHT """Calculating pose and shape loss basically makes no sense due to missing paired 3d and mosh ground truth data. The original implementation has paired data for Human 3.6 M dataset which was not published due to licence conflict. Nevertheless with SMPLify paired data can be generated (see http://smplify.is.tue.mpg.de/ for more information) """ pose_pred = tf.reshape(pose_pred, [batch_size, -1]) shape_pred = tf.reshape(shape_pred, [batch_size, -1]) pose_shape_pred = tf.concat([pose_pred, shape_pred], 1) # fake ground truth has_smpl = tf.zeros(batch_size, tf.float32) # do not include loss has_smpl = tf.expand_dims(has_smpl, -1) pose_shape_real = tf.zeros(pose_shape_pred.shape) ps_loss = v1_loss.mean_squared_error( pose_shape_real, pose_shape_pred, weights=has_smpl) * 0.5 ps_loss = ps_loss * self.config.GENERATOR_3D_LOSS_WEIGHT # use all poses and shapes from iterative feedback loop fake_disc_input = self.accumulate_fake_disc_input( generator_outputs) fake_disc_output = self.discriminator(fake_disc_input, training=True) real_disc_input = self.accumulate_real_disc_input(theta) real_disc_output = self.discriminator(real_disc_input, training=True) gen_disc_loss = tf.reduce_mean( tf.reduce_sum((fake_disc_output - 1)**2, axis=1)) gen_disc_loss = gen_disc_loss * self.config.DISCRIMINATOR_LOSS_WEIGHT generator_loss = tf.reduce_sum([kp2d_loss, gen_disc_loss]) if self.config.USE_3D: generator_loss = tf.reduce_sum( [generator_loss, kp3d_loss, ps_loss]) disc_real_loss = tf.reduce_mean( tf.reduce_sum((real_disc_output - 1)**2, axis=1)) disc_fake_loss = tf.reduce_mean( tf.reduce_sum(fake_disc_output**2, axis=1)) discriminator_loss = tf.reduce_sum( [disc_real_loss, disc_fake_loss]) discriminator_loss = discriminator_loss * self.config.DISCRIMINATOR_LOSS_WEIGHT generator_grads = gen_tape.gradient(generator_loss, self.generator.trainable_variables) discriminator_grads = disc_tape.gradient( discriminator_loss, self.discriminator.trainable_variables) self.generator_opt.apply_gradients( zip(generator_grads, self.generator.trainable_variables)) self.discriminator_opt.apply_gradients( zip(discriminator_grads, self.discriminator.trainable_variables)) self.generator_loss_log.update_state(generator_loss) self.kp2d_loss_log.update_state(kp2d_loss) self.gen_disc_loss_log.update_state(gen_disc_loss) if self.config.USE_3D: self.kp3d_loss_log.update_state(kp3d_loss) self.pose_shape_loss_log.update_state(ps_loss) self.discriminator_loss_log.update_state(discriminator_loss) self.disc_real_loss_log.update_state(disc_real_loss) self.disc_fake_loss_log.update_state(disc_fake_loss)