def _training_summary(self, training_info, loss_info, grads_and_vars): if self._summarize_grads_and_vars: summary_utils.add_variables_summaries(grads_and_vars, self._train_step_counter) summary_utils.add_gradients_summaries(grads_and_vars, self._train_step_counter) if self._debug_summaries: common.add_action_summaries(training_info.action, self.env.action_spec()) common.add_loss_summaries(loss_info) if self._summarize_action_distributions: summary_utils.summarize_action_dist( training_info.action_distribution, self.env.action_spec()) if training_info.collect_action_distribution: summary_utils.summarize_action_dist( action_distributions=training_info. collect_action_distribution, action_specs=self.env.action_spec(), name="collect_action_dist") for metric in self.get_metrics(): metric.tf_summaries( train_step=self._train_step_counter, step_metrics=self.get_metrics()[:2]) mem = tf.py_function( lambda: self._proc.memory_info().rss // 1e6, [], tf.float32, name='memory_usage') if not tf.executing_eagerly(): mem.set_shape(()) tf.summary.scalar(name='memory_usage', data=mem)
def result(self): def _result(): return np.sum(self._buffer) / self.buffer_size result_value = tf.py_function(_result, [], tf.float32, name='metric_result_py_func') if not tf.executing_eagerly(): return result_value.set_shape(()) return result_value
def summarize_metrics(self): """Generate summaries for metrics `AverageEpisodeLength`, `AverageReturn`...""" if self._metrics: for metric in self._metrics: metric.tf_summaries(train_step=common.get_global_counter(), step_metrics=self._metrics[:2]) mem = tf.py_function(lambda: self._proc.memory_info().rss // 1e6, [], tf.float32, name='memory_usage') if not tf.executing_eagerly(): mem.set_shape(()) tf.summary.scalar(name='memory_usage', data=mem)
def step(batch_theta, batch_psi): with tf.GradientTape() as tape: z_mean, z_log_var = self.encode(batch_theta) z = self.sample(z_mean, z_log_var, training=True) p_z = self.discriminator(z) x_mean, x_log_var = self.decode(z) loss_theta = self.objective(batch_theta, x_mean, x_log_var, z_mean, z_log_var, p_z) tf.debugging.check_numerics(loss_theta, "loss is invalid") # Discriminator weights are assigned as not trainable in init grad_theta = tape.gradient(loss_theta, self.trainable_variables) optimizer.apply_gradients(zip(grad_theta, self.trainable_variables)) # Updating Discriminator with tf.GradientTape() as tape: z_mean, z_log_var = self.encode(batch_psi) z = self.sample(z_mean, z_log_var, training=True) z_permuted = tf.py_function(self.permute_dims, inp=[z], Tout=tf.float32) z_permuted.set_shape(z.shape) p_permuted = self.discriminator(z_permuted) loss_psi = discriminator_loss(p_z, p_permuted) grad_psi = tape.gradient(loss_psi, self.discriminator_net.variables) optimizer_discriminator.apply_gradients( zip(grad_psi, self.discriminator_net.variables)) logs = {m.name: m.result() for m in self.metrics} logs["loss"] = loss_theta return logs