Exemplo n.º 1
0
 def rollout(self, timestep_limit, add_noise=True):
     rollout_rewards, rollout_fragment_length = rollout(
         self.policy,
         self.env,
         timestep_limit=timestep_limit,
         add_noise=add_noise)
     return rollout_rewards, rollout_fragment_length
Exemplo n.º 2
0
 def rollout(self, timestep_limit):
     rollout_rewards, rollout_length = rollout(
         self.policy,
         self.env,
         timestep_limit=timestep_limit,
         add_noise=False)
     return rollout_rewards, rollout_length
Exemplo n.º 3
0
 def rollout(self, timestep_limit, add_noise=False):
     rollout_rewards, rollout_fragment_length = rollout(
         self.policy,
         self.env,
         timestep_limit=timestep_limit,
         add_noise=add_noise,
         offset=self.config["offset"])
     return rollout_rewards, rollout_fragment_length
Exemplo n.º 4
0
    def evaluate(self, candidate):
        noise_index, multiplier = candidate

        weights = self.common.model_keeper.get_perturbed_weights(
            noise_index, multiplier)
        self.common.policy.set_flat_weights(weights)

        rewards, length = \
            rollout(
                self.common.policy,
                self.common.env,
                timestep_limit=self.timestep_limit,
                add_noise=False)
        return rewards.sum(), length
Exemplo n.º 5
0
    def evaluate(self, candidate):
        # ******************************* how to evaluate a candidate message

        weights = self.common.optimizer.expand(candidate)
        self.common.policy.set_flat_weights(weights)

        rewards, length = \
            rollout(
                self.common.policy,
                self.common.env,
                timestep_limit=self.timestep_limit,
                add_noise=False)
        # if candidate == 0:
        logger.info('candidate {} {} {} {} {}'.format(
            candidate, weights[0],
            self.common.policy.get_flat_weights()[0], rewards.sum(), length))

        return rewards.sum(), length