Пример #1
0
    def _train_step(self, images, kp2d, kp3d, has3d, theta):
        tf.keras.backend.set_learning_phase(1)
        batch_size = images.shape[0]

        with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
            generator_outputs = self.generator(images, training=True)
            # only use last computed theta (from iterative feedback loop)
            _, kp2d_pred, kp3d_pred, pose_pred, shape_pred, _ = generator_outputs[-1]

            vis = tf.expand_dims(kp2d[:, :, 2], -1)
            kp2d_loss = v1_loss.absolute_difference(kp2d[:, :, :2], kp2d_pred, weights=vis)
            kp2d_loss = kp2d_loss * self.config.GENERATOR_2D_LOSS_WEIGHT

            if self.config.USE_3D:
                has3d = tf.expand_dims(has3d, -1)

                kp3d_real = batch_align_by_pelvis(kp3d)
                kp3d_pred = batch_align_by_pelvis(kp3d_pred[:, :self.config.NUM_KP3D, :])

                kp3d_real = tf.reshape(kp3d_real, [batch_size, -1])
                kp3d_pred = tf.reshape(kp3d_pred, [batch_size, -1])

                kp3d_loss = v1_loss.mean_squared_error(kp3d_real, kp3d_pred, weights=has3d) * 0.5
                kp3d_loss = kp3d_loss * self.config.GENERATOR_3D_LOSS_WEIGHT

                """Calculating pose and shape loss basically makes no sense 
                    due to missing paired 3d and mosh ground truth data.
                    The original implementation has paired data for Human 3.6 M dataset
                    which was not published due to licence conflict.
                    Nevertheless with SMPLify paired data can be generated 
                    (see http://smplify.is.tue.mpg.de/ for more information)
                """
                pose_pred = tf.reshape(pose_pred, [batch_size, -1])
                shape_pred = tf.reshape(shape_pred, [batch_size, -1])
                pose_shape_pred = tf.concat([pose_pred, shape_pred], 1)

                # fake ground truth
                has_smpl = tf.zeros(batch_size, tf.float32)  # do not include loss
                has_smpl = tf.expand_dims(has_smpl, -1)
                pose_shape_real = tf.zeros(pose_shape_pred.shape)

                ps_loss = v1_loss.mean_squared_error(pose_shape_real, pose_shape_pred, weights=has_smpl) * 0.5
                ps_loss = ps_loss * self.config.GENERATOR_3D_LOSS_WEIGHT

            # use all poses and shapes from iterative feedback loop
            fake_disc_input = self.accumulate_fake_disc_input(generator_outputs)
            fake_disc_output = self.discriminator(fake_disc_input, training=True)

            real_disc_input = self.accumulate_real_disc_input(theta)
            real_disc_output = self.discriminator(real_disc_input, training=True)

            gen_disc_loss = tf.reduce_mean(tf.reduce_sum((fake_disc_output - 1) ** 2, axis=1))
            gen_disc_loss = gen_disc_loss * self.config.DISCRIMINATOR_LOSS_WEIGHT

            generator_loss = tf.reduce_sum([kp2d_loss, gen_disc_loss])
            if self.config.USE_3D:
                generator_loss = tf.reduce_sum([generator_loss, kp3d_loss, ps_loss])

            disc_real_loss = tf.reduce_mean(tf.reduce_sum((real_disc_output - 1) ** 2, axis=1))
            disc_fake_loss = tf.reduce_mean(tf.reduce_sum(fake_disc_output ** 2, axis=1))

            discriminator_loss = tf.reduce_sum([disc_real_loss, disc_fake_loss])
            discriminator_loss = discriminator_loss * self.config.DISCRIMINATOR_LOSS_WEIGHT

        generator_grads = gen_tape.gradient(generator_loss, self.generator.trainable_variables)
        discriminator_grads = disc_tape.gradient(discriminator_loss, self.discriminator.trainable_variables)
        self.generator_opt.apply_gradients(zip(generator_grads, self.generator.trainable_variables))
        self.discriminator_opt.apply_gradients(zip(discriminator_grads, self.discriminator.trainable_variables))

        self.generator_loss_log.update_state(generator_loss)
        self.kp2d_loss_log.update_state(kp2d_loss)
        self.gen_disc_loss_log.update_state(gen_disc_loss)

        if self.config.USE_3D:
            self.kp3d_loss_log.update_state(kp3d_loss)
            self.pose_shape_loss_log.update_state(ps_loss)

        self.discriminator_loss_log.update_state(discriminator_loss)
        self.disc_real_loss_log.update_state(disc_real_loss)
        self.disc_fake_loss_log.update_state(disc_fake_loss)
Пример #2
0
 def generator_loss(self, y_img_true, y_img_pred, y_dis_true, y_dis_pred):
     loss1 = Losses.mean_squared_error(y_img_true, y_img_pred)
     loss2 = Losses.sigmoid_cross_entropy(y_dis_true, y_dis_pred)
     gan_lambda = 1e-2
     loss = loss1 + gan_lambda * loss2
     return loss, {'loss': loss, 'img_loss': loss1, 'g_loss': loss2}
Пример #3
0
    def __init__(self, observation_size, net_arch, initializer, activation,
                 clip_range, value_coef, entropy_coef, learning_rate,
                 pre_training_learning_rate, action_bounds, policy):
        """
        :param observation_size:
        :param net_arch:
        :param initializer:
        :param activation:
        :param clip_range:
        :param value_coef:
        :param entropy_coef:
        :param learning_rate:
        :param pre_training_learning_rate:
        :param action_bounds:
        :param policy:
        """
        """Set class constants"""
        self.observation_size = observation_size
        self.net_arch = net_arch
        self.initializer = initializer
        self.activation = activation
        self.clip_range = clip_range
        self.value_coef = value_coef
        self.entropy_coef = entropy_coef

        if action_bounds is None:
            action_bounds = [0.0, 1.5]
        self.action_bounds = action_bounds
        self.learning_rate = learning_rate
        self.pre_training_learning_rate = pre_training_learning_rate

        if policy is None:
            policy = GaussFull()
        self.policy = policy
        """Set up the tensorflow graph"""
        self.graph = Graph()

        with self.graph.as_default():
            self.sess = Session(graph=self.graph)
            """ core """
            # place holders
            self.observation_string_ph = placeholder(
                shape=(None, 1), dtype=string, name="observation_string_ph")
            self.action_ph = placeholder(dtype=float32,
                                         shape=(None, 1),
                                         name="action_ph")
            self.old_neg_logits = placeholder(dtype=float32,
                                              shape=(None, 1),
                                              name="old_neg_logits")
            self.advantage_ph = placeholder(dtype=float32,
                                            shape=(None, 1),
                                            name="advantage_ph")
            self.value_target_ph = placeholder(dtype=float32,
                                               shape=(None, 1),
                                               name="value_target_ph")
            # learning rate tensors
            self.learning_rate_ph = placeholder_with_default(
                input=self.learning_rate, shape=())
            self.pre_training_learning_rate_ph = placeholder_with_default(
                input=self.pre_training_learning_rate, shape=())

            # observation tensor
            replaced1 = regex_replace(self.observation_string_ph, "/", "_")
            replaced2 = regex_replace(replaced1, r"\+", "-")
            byte_tensor = decode_base64(replaced2)
            decoded = decode_raw(byte_tensor, out_type=float32)
            squeezed = squeeze(decoded, axis=1)
            self.observation_input = ensure_shape(
                squeezed,
                shape=(None, self.observation_size),
                name="observation_input")

            # policy net
            latent_policy = net_core(self.observation_input, self.net_arch,
                                     self.initializer, self.activation)
            self.policy.construct(latent_policy=latent_policy)

            self.clipped_action = clip_by_value(
                cast(self.policy.action, float32), self.action_bounds[0],
                self.action_bounds[1], "clipped_action")

            # value net
            latent_value = net_core(self.observation_input, self.net_arch,
                                    self.initializer, self.activation)
            self.value = identity(
                input=Dense(units=1,
                            activation=None,
                            kernel_initializer=self.initializer)(latent_value),
                name="value")
            """loss calculation"""
            # policy loss
            self.neg_logits = self.policy.neg_logits_from_actions(
                self.action_ph)
            ratio = exp(self.old_neg_logits - self.neg_logits)

            standardized_adv = (self.advantage_ph - reduce_mean(
                self.advantage_ph)) / (reduce_std(self.advantage_ph) + 1e-8)
            raw_policy_loss = -standardized_adv * ratio
            clipped_policy_loss = -standardized_adv * clip_by_value(
                ratio, 1 - self.clip_range, 1 + self.clip_range)
            self.policy_loss = reduce_mean(
                maximum(raw_policy_loss, clipped_policy_loss))

            self.value_loss = mean_squared_error(self.value_target_ph,
                                                 self.value)

            # entropy loss
            self.entropy_loss = -reduce_mean(self.policy.entropy)

            # total loss
            self.total_loss = self.policy_loss + self.value_coef * self.value_loss + self.entropy_coef * self.entropy_loss

            # optimizer
            optimizer = AdamOptimizer(learning_rate=self.learning_rate_ph)

            # training ops
            self.training_op = optimizer.minimize(self.total_loss)

            # pre training
            self.dist_param_target_ph = placeholder(
                dtype=float32,
                shape=(None, self.policy.dist_params.shape[1]),
                name="dist_param_label_ph")
            self.pre_training_loss = mean_squared_error(
                self.dist_param_target_ph, self.policy.dist_params)
            pre_training_optimizer = GradientDescentOptimizer(
                learning_rate=self.pre_training_learning_rate_ph)
            self.pre_training_op = pre_training_optimizer.minimize(
                self.pre_training_loss)
            """utility nodes"""
            # inspect model weights
            self.trainable_variables = trainable_variables()

            # saviour
            self.saver = Saver()

            # tensorboard summaries
            self.summary = merge([
                histogram("values", self.value),
                histogram("advantages", standardized_adv),
                histogram("actions", self.clipped_action),
                histogram("det_actions",
                          replace_nan(self.policy.det_action, 0.0)),
                histogram("value_targets", self.value_target_ph),
                scalar("policy_loss", self.policy_loss),
                scalar("value_loss", self.value_loss),
                scalar("entropy_loss", self.entropy_loss)
            ])

            self.pre_summary = merge([
                histogram("pretraining_actions", self.clipped_action),
                scalar("pretraining_loss", self.pre_training_loss)
            ])

            # initialization
            init = global_variables_initializer()
            self.sess.run(init)
    def _nll(self, labels, logits):
        """Negative log likelihood - the reconstruction term in the ELBO
		"""
        return mean_squared_error(labels=labels, predictions=logits)