示例#1
0
def sample_paths(policy_params,
                 max_samples,
                 max_path_length=np.inf,
                 env_params=None,
                 scope=None):
    """
    :param policy_params: parameters for the policy. This will be updated on
     each worker process
    :param max_samples: desired maximum number of samples to be collected. The
     actual number of collected samples might be greater since all trajectories
     will be rolled out either until termination or until max_path_length is
     reached
    :param max_path_length: horizon / maximum length of a single trajectory
    :return: a list of collected paths
    """
    singleton_pool.run_each(_worker_set_policy_params,
                            [(policy_params, scope)] *
                            singleton_pool.n_parallel)
    if env_params is not None:
        singleton_pool.run_each(_worker_set_env_params, [(env_params, scope)] *
                                singleton_pool.n_parallel)
    return singleton_pool.run_collect(_worker_collect_one_path,
                                      threshold=max_samples,
                                      args=(max_path_length, scope),
                                      show_prog_bar=True)
示例#2
0
    def sample_paths(self,
                     policy_params,
                     max_samples,
                     max_path_length,
                     env_params=None,
                     inference_params=None,
                     scope=None):
        singleton_pool.run_each(
            parallel_sampler._worker_set_policy_params,
            [(policy_params, scope)] * singleton_pool.n_parallel,
        )
        singleton_pool.run_each(
            _worker_set_inference_params,
            [(inference_params, scope)] * singleton_pool.n_parallel,
        )
        # if env_params:
        #     singleton_pool.run_each(
        #         parallel_sampler._worker_set_env_params,
        #         [(env_params, scope)] * singleton_pool.n_parallel,
        #     )

        return singleton_pool.run_collect(
            _worker_collect_one_path,
            threshold=max_samples,
            args=(max_path_length, scope),
            show_prog_bar=True,
        )
示例#3
0
def sample_paths(policy_params,
                 max_samples,
                 max_path_length=np.inf,
                 env_params=None,
                 scope=None):
    """Sample paths from each worker.

    Parameters
    ----------
    policy_params :
        parameters for the policy. This will be updated on each worker process
    max_samples : int
        desired maximum number of samples to be collected. The
        actual number of collected samples might be greater since all trajectories
        will be rolled out either until termination or until max_path_length is
        reached
    max_path_length : int, optional
        horizon / maximum length of a single trajectory
    scope : str
        Scope for identifying the algorithm.
        Must be specified if running multiple algorithms
        simultaneously, each using different environments
        and policies.
    """
    singleton_pool.run_each(_worker_set_policy_params,
                            [(policy_params, scope)] *
                            singleton_pool.n_parallel)

    if env_params is not None:
        singleton_pool.run_each(_worker_set_env_params, [(env_params, scope)] *
                                singleton_pool.n_parallel)

    return singleton_pool.run_collect(_worker_collect_one_path,
                                      threshold=max_samples,
                                      args=(max_path_length, scope),
                                      show_prog_bar=True)