Exemplo n.º 1
0
    def postprocess_trajectory(self,
                               sample_batch,
                               other_agent_batches=None,
                               episode=None):
        if self.config["parameter_noise"]:
            # adjust the sigma of parameter space noise
            states, noisy_actions = [
                list(x) for x in sample_batch.columns(["obs", "actions"])
            ]
            self.sess.run(self.remove_noise_op)
            clean_actions = self.sess.run(
                self.output_actions,
                feed_dict={
                    self.cur_observations: states,
                    self.stochastic: False,
                    self.eps: .0
                })
            distance_in_action_space = np.sqrt(
                np.mean(np.square(clean_actions - noisy_actions)))
            self.pi_distance = distance_in_action_space
            if distance_in_action_space < self.config["exploration_sigma"]:
                self.parameter_noise_sigma_val *= 1.01
            else:
                self.parameter_noise_sigma_val /= 1.01
            self.parameter_noise_sigma.load(
                self.parameter_noise_sigma_val, session=self.sess)

        return _postprocess_dqn(self, sample_batch)
Exemplo n.º 2
0
    def postprocess_trajectory(self,
                               sample_batch,
                               other_agent_batches=None,
                               episode=None):
        if self.config["parameter_noise"]:
            # adjust the sigma of parameter space noise
            states, noisy_actions = [
                list(x) for x in sample_batch.columns(["obs", "actions"])
            ]
            self.sess.run(self.remove_noise_op)
            clean_actions = self.sess.run(self.output_actions,
                                          feed_dict={
                                              self.cur_observations: states,
                                              self.stochastic: False,
                                              self.eps: .0
                                          })
            distance_in_action_space = np.sqrt(
                np.mean(np.square(clean_actions - noisy_actions)))
            self.pi_distance = distance_in_action_space
            if distance_in_action_space < self.config["exploration_sigma"]:
                self.parameter_noise_sigma_val *= 1.01
            else:
                self.parameter_noise_sigma_val /= 1.01
            self.parameter_noise_sigma.load(self.parameter_noise_sigma_val,
                                            session=self.sess)

        return _postprocess_dqn(self, sample_batch)
Exemplo n.º 3
0
 def postprocess_trajectory(self, sample_batch, other_agent_batches=None):
     return _postprocess_dqn(self, sample_batch)
Exemplo n.º 4
0
 def postprocess_trajectory(self,
                            sample_batch,
                            other_agent_batches=None,
                            episode=None):
     return _postprocess_dqn(self, sample_batch)