Пример #1
0
    def obtain_samples(self,
                       itr,
                       reset_args=None,
                       return_dict=False,
                       log_prefix=''):
        init_policy_params = cur_policy_params = self.algo.policy.get_param_values(
        )
        if hasattr(self.algo.env, "get_param_values"):
            try:
                cur_env_params = self.algo.env.get_param_values()
            except:
                cur_env_params = None
        else:
            cur_env_params = None
        import time
        start = time.time()
        if type(reset_args) != list and type(reset_args) != np.ndarray:
            reset_args = [reset_args] * self.n_envs
        if self.algo.policy.all_param_vals:
            cur_policy_params = [
                flatten_tensors(x.values())
                for x in self.algo.policy.all_param_vals
            ]
        else:
            cur_policy_params = [cur_policy_params] * self.n_envs
        # do tasks sequentially and parallelize within rollouts per task.
        paths = {}
        for i in range(self.n_envs):
            paths[i] = parallel_sampler.sample_paths(
                policy_params=cur_policy_params[i],
                env_params=cur_env_params,
                max_samples=self.algo.batch_size / self.n_envs,
                max_path_length=self.algo.max_path_length,
                scope=self.algo.scope,
                reset_arg=reset_args[i],
                show_prog_bar=False,
            )
        total_time = time.time() - start
        logger.record_tabular(log_prefix + "TotalExecTime", total_time)

        if not return_dict:
            flatten_list = lambda l: [
                item for sublist in l for item in sublist
            ]
            paths = flatten_list(paths.values())

        self.algo.policy.set_param_values(init_policy_params)

        # currently don't support not whole paths (if desired, add code to truncate paths)
        assert self.algo.whole_paths

        return paths
Пример #2
0
 def eval_loss_grad(params):
     self.policy.set_param_values(params, trainable=True)
     grad = f_loss_grad(*input)
     flattened_grad = tensor_utils.flatten_tensors(
         list(map(np.asarray, grad)))
     return flattened_grad.astype(np.float64)
Пример #3
0
 def get_param_values(self, all_params=False, **tags):
     params = self.get_params(all_params, **tags)
     param_values = tf.get_default_session().run(params)
     return flatten_tensors(param_values)
Пример #4
0
 def get_param_values(self, **tags):
     return flatten_tensors([
         param.get_value(borrow=True) for param in self.get_params(**tags)
     ])