Пример #1
0
    def _training_summary(self, training_info, loss_info, grads_and_vars):
        if self._summarize_grads_and_vars:
            summary_utils.add_variables_summaries(grads_and_vars,
                                                  self._train_step_counter)
            summary_utils.add_gradients_summaries(grads_and_vars,
                                                  self._train_step_counter)
        if self._debug_summaries:
            common.add_action_summaries(training_info.action,
                                        self.env.action_spec())
            common.add_loss_summaries(loss_info)

        if self._summarize_action_distributions:
            summary_utils.summarize_action_dist(
                training_info.action_distribution, self.env.action_spec())
            if training_info.collect_action_distribution:
                summary_utils.summarize_action_dist(
                    action_distributions=training_info.
                    collect_action_distribution,
                    action_specs=self.env.action_spec(),
                    name="collect_action_dist")

        for metric in self.get_metrics():
            metric.tf_summaries(
                train_step=self._train_step_counter,
                step_metrics=self.get_metrics()[:2])

        mem = tf.py_function(
            lambda: self._proc.memory_info().rss // 1e6, [],
            tf.float32,
            name='memory_usage')
        if not tf.executing_eagerly():
            mem.set_shape(())
        tf.summary.scalar(name='memory_usage', data=mem)
Пример #2
0
    def result(self):
        def _result():
            return np.sum(self._buffer) / self.buffer_size

        result_value = tf.py_function(_result, [],
                                      tf.float32,
                                      name='metric_result_py_func')
        if not tf.executing_eagerly():
            return result_value.set_shape(())
        return result_value
Пример #3
0
    def summarize_metrics(self):
        """Generate summaries for metrics `AverageEpisodeLength`, `AverageReturn`..."""
        if self._metrics:
            for metric in self._metrics:
                metric.tf_summaries(train_step=common.get_global_counter(),
                                    step_metrics=self._metrics[:2])

        mem = tf.py_function(lambda: self._proc.memory_info().rss // 1e6, [],
                             tf.float32,
                             name='memory_usage')
        if not tf.executing_eagerly():
            mem.set_shape(())
        tf.summary.scalar(name='memory_usage', data=mem)
        def step(batch_theta, batch_psi):
            with tf.GradientTape() as tape:
                z_mean, z_log_var = self.encode(batch_theta)
                z = self.sample(z_mean, z_log_var, training=True)

                p_z = self.discriminator(z)

                x_mean, x_log_var = self.decode(z)

                loss_theta = self.objective(batch_theta, x_mean, x_log_var,
                                            z_mean, z_log_var, p_z)
                tf.debugging.check_numerics(loss_theta, "loss is invalid")

            # Discriminator weights are assigned as not trainable in init
            grad_theta = tape.gradient(loss_theta, self.trainable_variables)
            optimizer.apply_gradients(zip(grad_theta,
                                          self.trainable_variables))

            # Updating Discriminator
            with tf.GradientTape() as tape:
                z_mean, z_log_var = self.encode(batch_psi)
                z = self.sample(z_mean, z_log_var, training=True)

                z_permuted = tf.py_function(self.permute_dims,
                                            inp=[z],
                                            Tout=tf.float32)
                z_permuted.set_shape(z.shape)

                p_permuted = self.discriminator(z_permuted)

                loss_psi = discriminator_loss(p_z, p_permuted)

            grad_psi = tape.gradient(loss_psi,
                                     self.discriminator_net.variables)
            optimizer_discriminator.apply_gradients(
                zip(grad_psi, self.discriminator_net.variables))

            logs = {m.name: m.result() for m in self.metrics}
            logs["loss"] = loss_theta

            return logs