Exemplos de rollout em Python, exemplos de ray.rllib.agents.es.es_tf_policy.rollout em Python

Exemplo n.º 1

0

Exibir arquivo

 def rollout(self, timestep_limit, add_noise=True):
     rollout_rewards, rollout_fragment_length = rollout(
         self.policy,
         self.env,
         timestep_limit=timestep_limit,
         add_noise=add_noise)
     return rollout_rewards, rollout_fragment_length

Exemplo n.º 2

0

Exibir arquivo

 def rollout(self, timestep_limit):
     rollout_rewards, rollout_length = rollout(
         self.policy,
         self.env,
         timestep_limit=timestep_limit,
         add_noise=False)
     return rollout_rewards, rollout_length

Exemplo n.º 3

0

Exibir arquivo

Arquivo: ars.py Projeto: RuofanKong/ray

 def rollout(self, timestep_limit, add_noise=False):
     rollout_rewards, rollout_fragment_length = rollout(
         self.policy,
         self.env,
         timestep_limit=timestep_limit,
         add_noise=add_noise,
         offset=self.config["offset"])
     return rollout_rewards, rollout_fragment_length

Exemplo n.º 4

0

Exibir arquivo

Arquivo: es_co_trainer.py Projeto: NREL/K_Road

    def evaluate(self, candidate):
        noise_index, multiplier = candidate

        weights = self.common.model_keeper.get_perturbed_weights(
            noise_index, multiplier)
        self.common.policy.set_flat_weights(weights)

        rewards, length = \
            rollout(
                self.common.policy,
                self.common.env,
                timestep_limit=self.timestep_limit,
                add_noise=False)
        return rewards.sum(), length

Exemplo n.º 5

0

Exibir arquivo

Arquivo: coordinated_dps_trainer.py Projeto: NREL/K_Road

    def evaluate(self, candidate):
        # ******************************* how to evaluate a candidate message

        weights = self.common.optimizer.expand(candidate)
        self.common.policy.set_flat_weights(weights)

        rewards, length = \
            rollout(
                self.common.policy,
                self.common.env,
                timestep_limit=self.timestep_limit,
                add_noise=False)
        # if candidate == 0:
        logger.info('candidate {} {} {} {} {}'.format(
            candidate, weights[0],
            self.common.policy.get_flat_weights()[0], rewards.sum(), length))

        return rewards.sum(), length