Пример #1
0
    def _setup_stats(self, base):
        """Create the running means and std of the model inputs and outputs.

        This method also adds the same running means and stds as scalars to
        tensorboard for additional storage.
        """
        ops = []
        names = []

        ops += [tf.reduce_mean(self.critic_tf[0])]
        names += ['{}/reference_Q1_mean'.format(base)]
        ops += [reduce_std(self.critic_tf[0])]
        names += ['{}/reference_Q1_std'.format(base)]

        ops += [tf.reduce_mean(self.critic_tf[1])]
        names += ['{}/reference_Q2_mean'.format(base)]
        ops += [reduce_std(self.critic_tf[1])]
        names += ['{}/reference_Q2_std'.format(base)]

        ops += [tf.reduce_mean(self.critic_with_actor_tf[0])]
        names += ['{}/reference_actor_Q1_mean'.format(base)]
        ops += [reduce_std(self.critic_with_actor_tf[0])]
        names += ['{}/reference_actor_Q1_std'.format(base)]

        ops += [tf.reduce_mean(self.critic_with_actor_tf[1])]
        names += ['{}/reference_actor_Q2_mean'.format(base)]
        ops += [reduce_std(self.critic_with_actor_tf[1])]
        names += ['{}/reference_actor_Q2_std'.format(base)]

        ops += [tf.reduce_mean(self.actor_tf)]
        names += ['{}/reference_action_mean'.format(base)]
        ops += [reduce_std(self.actor_tf)]
        names += ['{}/reference_action_std'.format(base)]

        ops += [tf.reduce_mean(self.rew_ph)]
        names += ['{}/rewards'.format(base)]

        ops += [self.actor_loss]
        names += ['{}/actor_loss'.format(base)]

        ops += [self.critic_loss[0]]
        names += ['{}/Q1_loss'.format(base)]

        ops += [self.critic_loss[1]]
        names += ['{}/Q2_loss'.format(base)]

        # Add all names and ops to the tensorboard summary.
        for op, name in zip(ops, names):
            tf.compat.v1.summary.scalar(name, op)

        return ops, names
Пример #2
0
    def _setup_stats(rew_ph, actor_loss, critic_loss, actor_tf, critic_tf):
        """Prepare tensorboard logging for attributes of the agent.

        Parameters
        ----------
        rew_ph : tf.compat.v1.placeholder
            a placeholder for the rewards of an agent
        actor_loss : tf.Operation
            the operation that returns the loss of the actor
        critic_loss : list of tf.Operation
            the operation that returns the loss of the critic
        actor_tf : tf.Variable
            the output from the actor of the agent
        critic_tf : tf.Variable
            the output from the critics of the agent
        """
        # rewards
        tf.compat.v1.summary.scalar('rewards', tf.reduce_mean(rew_ph))

        # actor and critic losses
        tf.compat.v1.summary.scalar('actor_loss', actor_loss)
        tf.compat.v1.summary.scalar('Q1_loss', critic_loss[0])
        tf.compat.v1.summary.scalar('Q2_loss', critic_loss[1])

        # critic dynamics
        tf.compat.v1.summary.scalar('reference_Q1_mean',
                                    tf.reduce_mean(critic_tf[0]))
        tf.compat.v1.summary.scalar('reference_Q1_std',
                                    reduce_std(critic_tf[0]))

        tf.compat.v1.summary.scalar('reference_Q2_mean',
                                    tf.reduce_mean(critic_tf[1]))
        tf.compat.v1.summary.scalar('reference_Q2_std',
                                    reduce_std(critic_tf[1]))

        # actor dynamics
        tf.compat.v1.summary.scalar('reference_action_mean',
                                    tf.reduce_mean(actor_tf))
        tf.compat.v1.summary.scalar('reference_action_std',
                                    reduce_std(actor_tf))
Пример #3
0
    def _setup_stats(self, base):
        """Create the running means and std of the model inputs and outputs.

        This method also adds the same running means and stds as scalars to
        tensorboard for additional storage.
        """
        ops = []
        names = []

        ops += [tf.reduce_mean(self.policy)]
        names += ['{}/reference_action_mean'.format(base)]
        ops += [reduce_std(self.policy)]
        names += ['{}/reference_action_std'.format(base)]

        ops += [tf.reduce_mean(self.loss)]
        names += ['{}/reference_loss_mean'.format(base)]
        ops += [reduce_std(self.loss)]
        names += ['{}/reference_loss_std'.format(base)]

        # Add all names and ops to the tensorboard summary.
        for op, name in zip(ops, names):
            tf.compat.v1.summary.scalar(name, op)

        return ops, names
Пример #4
0
    def _setup_stats(self, base):
        """Create the running means and std of the model inputs and outputs.

        This method also adds the same running means and stds as scalars to
        tensorboard for additional storage.
        """
        ops = []
        names = []

        ops += [tf.reduce_mean(self.qf1)]
        names += ['{}/reference_Q1_mean'.format(base)]
        ops += [reduce_std(self.qf1)]
        names += ['{}/reference_Q1_std'.format(base)]

        ops += [tf.reduce_mean(self.qf2)]
        names += ['{}/reference_Q2_mean'.format(base)]
        ops += [reduce_std(self.qf2)]
        names += ['{}/reference_Q2_std'.format(base)]

        ops += [tf.reduce_mean(self.qf1_pi)]
        names += ['{}/reference_actor_Q1_mean'.format(base)]
        ops += [reduce_std(self.qf1_pi)]
        names += ['{}/reference_actor_Q1_std'.format(base)]

        ops += [tf.reduce_mean(self.qf2_pi)]
        names += ['{}/reference_actor_Q2_mean'.format(base)]
        ops += [reduce_std(self.qf2_pi)]
        names += ['{}/reference_actor_Q2_std'.format(base)]

        ops += [
            tf.reduce_mean(self._ac_magnitudes * self.policy_out +
                           self._ac_means)
        ]
        names += ['{}/reference_action_mean'.format(base)]
        ops += [
            reduce_std(self._ac_magnitudes * self.policy_out + self._ac_means)
        ]
        names += ['{}/reference_action_std'.format(base)]

        ops += [tf.reduce_mean(self.logp_pi)]
        names += ['{}/reference_log_probability_mean'.format(base)]
        ops += [reduce_std(self.logp_pi)]
        names += ['{}/reference_log_probability_std'.format(base)]

        ops += [tf.reduce_mean(self.rew_ph)]
        names += ['{}/rewards'.format(base)]

        ops += [self.alpha_loss]
        names += ['{}/alpha_loss'.format(base)]

        ops += [self.actor_loss]
        names += ['{}/actor_loss'.format(base)]

        ops += [self.critic_loss[0]]
        names += ['{}/Q1_loss'.format(base)]

        ops += [self.critic_loss[1]]
        names += ['{}/Q2_loss'.format(base)]
        tf.compat.v1.summary.scalar('Q2_loss', self.critic_loss[1])

        ops += [self.critic_loss[2]]
        names += ['{}/value_loss'.format(base)]

        # Add all names and ops to the tensorboard summary.
        for op, name in zip(ops, names):
            tf.compat.v1.summary.scalar(name, op)

        return ops, names