def populate_task(env, policy, scope=None): """Set each worker's env and policy. Parameters ---------- env : :py:class:`ast_toolbox.envs.ASTEnv` The environment. policy : :py:class:`garage.tf.policies.Policy` The policy. scope : str Scope for identifying the algorithm. Must be specified if running multiple algorithms simultaneously, each using different environments and policies. """ logger.log('Populating workers...') if singleton_pool.n_parallel > 1: singleton_pool.run_each( _worker_populate_task, [(pickle.dumps(env), pickle.dumps(policy), scope)] * singleton_pool.n_parallel) else: # avoid unnecessary copying g = _get_scoped_g(singleton_pool.G, scope) g.env = env g.policy = policy logger.log('Populated')
def __init__(self, env, n, max_path_length, scope=None): if scope is None: # initialize random scope scope = str(uuid.uuid4()) envs_per_worker = int(np.ceil(n * 1.0 / singleton_pool.n_parallel)) alloc_env_ids = [] rest_alloc = n start_id = 0 for _ in range(singleton_pool.n_parallel): n_allocs = min(envs_per_worker, rest_alloc) alloc_env_ids.append(list(range(start_id, start_id + n_allocs))) start_id += n_allocs rest_alloc = max(0, rest_alloc - envs_per_worker) singleton_pool.run_each(worker_init_envs, [(alloc, scope, env) for alloc in alloc_env_ids]) self._alloc_env_ids = alloc_env_ids self._action_space = env.action_space self._observation_space = env.observation_space self._num_envs = n self.scope = scope self.ts = np.zeros(n, dtype='int') self.max_path_length = max_path_length warnings.warn( DeprecationWarning( 'ParallelVecEnvExecutor is deprecated, and will be removed in ' 'the next release. Please use VecWorker and one of the new ' 'samplers which implement garage.sampler.Sampler, such as ' 'RaySampler'))
def sample_paths(policy_params, max_samples, max_path_length=np.inf, env_params=None, scope=None): """ :param policy_params: parameters for the policy. This will be updated on each worker process :param max_samples: desired maximum number of samples to be collected. The actual number of collected samples might be greater since all trajectories will be rolled out either until termination or until max_path_length is reached :param max_path_length: horizon / maximum length of a single trajectory :return: a list of collected paths """ singleton_pool.run_each(_worker_set_policy_params, [(policy_params, scope)] * singleton_pool.n_parallel) if env_params is not None: singleton_pool.run_each(_worker_set_env_params, [(env_params, scope)] * singleton_pool.n_parallel) return singleton_pool.run_collect(_worker_collect_one_path, threshold=max_samples, args=(max_path_length, scope), show_prog_bar=True)
def initialize(n_parallel): """Initialize the worker pool. SIGINT is blocked for all processes created in parallel_sampler to avoid the creation of sleeping and zombie processes. If the user interrupts run_experiment, there's a chance some processes won't die due to a dead lock condition where one of the children in the parallel sampler exits without releasing a lock once after it catches SIGINT. Later the parent tries to acquire the same lock to proceed with his cleanup, but it remains sleeping waiting for the lock to be released. In the meantime, all the process in parallel sampler remain in the zombie state since the parent cannot proceed with their clean up. Parameters ---------- n_parallel : int Number of workers to run in parallel. """ try: signal.pthread_sigmask(signal.SIG_BLOCK, [signal.SIGINT]) singleton_pool.initialize(n_parallel) singleton_pool.run_each(_worker_init, [(id, ) for id in range(singleton_pool.n_parallel)]) finally: signal.pthread_sigmask(signal.SIG_UNBLOCK, [signal.SIGINT])
def sample_paths(self, policy_params, max_samples, max_path_length, env_params=None, inference_params=None, scope=None): singleton_pool.run_each( parallel_sampler._worker_set_policy_params, [(policy_params, scope)] * singleton_pool.n_parallel, ) singleton_pool.run_each( _worker_set_inference_params, [(inference_params, scope)] * singleton_pool.n_parallel, ) # if env_params: # singleton_pool.run_each( # parallel_sampler._worker_set_env_params, # [(env_params, scope)] * singleton_pool.n_parallel, # ) return singleton_pool.run_collect( _worker_collect_one_path, threshold=max_samples, args=(max_path_length, scope), show_prog_bar=True, )
def __init__(self, env, n, max_path_length, scope=None): if scope is None: # initialize random scope scope = str(uuid.uuid4()) envs_per_worker = int(np.ceil(n * 1.0 / singleton_pool.n_parallel)) alloc_env_ids = [] rest_alloc = n start_id = 0 for _ in range(singleton_pool.n_parallel): n_allocs = min(envs_per_worker, rest_alloc) alloc_env_ids.append(list(range(start_id, start_id + n_allocs))) start_id += n_allocs rest_alloc = max(0, rest_alloc - envs_per_worker) singleton_pool.run_each(worker_init_envs, [(alloc, scope, env) for alloc in alloc_env_ids]) self._alloc_env_ids = alloc_env_ids self._action_space = env.action_space self._observation_space = env.observation_space self._num_envs = n self.scope = scope self.ts = np.zeros(n, dtype='int') self.max_path_length = max_path_length
def start_worker(self): assert singleton_pool.initialized, ( 'Use singleton_pool.initialize(n_parallel) to setup workers.') if singleton_pool.n_parallel > 1: singleton_pool.run_each(worker_init_tf) parallel_sampler.populate_task(self.env, self.algo.policy) if singleton_pool.n_parallel > 1: singleton_pool.run_each(worker_init_tf_vars)
def set_seed(seed): """Set the seed in each worker. Parameters ---------- seed : int The random seed to be used by the worker. """ singleton_pool.run_each(_worker_set_seed, [(seed + i, ) for i in range(singleton_pool.n_parallel)])
def populate_task(env, policy, scope=None): logger.log("Populating workers...") if singleton_pool.n_parallel > 1: singleton_pool.run_each(_worker_populate_task, [ (pickle.dumps(env), pickle.dumps(policy), scope) ] * singleton_pool.n_parallel) else: # avoid unnecessary copying g = _get_scoped_g(singleton_pool.G, scope) g.env = env g.policy = policy logger.log("Populated")
def terminate_task(scope=None): """Close each worker's env and terminate each policy. Parameters ---------- scope : str Scope for identifying the algorithm. Must be specified if running multiple algorithms simultaneously, each using different environments and policies. """ singleton_pool.run_each(_worker_terminate_task, [(scope, )] * singleton_pool.n_parallel)
def populate_task(env, policy, scope=None): """Set each worker's env and policy.""" logger.log('Populating workers...') if singleton_pool.n_parallel > 1: singleton_pool.run_each( _worker_populate_task, [(cloudpickle.dumps(env), cloudpickle.dumps(policy), scope)] * singleton_pool.n_parallel) else: # avoid unnecessary copying g = _get_scoped_g(singleton_pool.G, scope) g.env = env g.policy = policy logger.log('Populated')
def step(self, action_n): results = singleton_pool.run_each( worker_run_step, [(action_n, self.scope) for _ in self._alloc_env_ids], ) results = [x for x in results if x is not None] ids, obs, rewards, dones, env_infos = list(zip(*results)) ids = np.concatenate(ids) obs = self.observation_space.unflatten_n(np.concatenate(obs)) rewards = np.concatenate(rewards) dones = np.concatenate(dones) env_infos = tensor_utils.split_tensor_dict_list( tensor_utils.concat_tensor_dict_list(env_infos)) if env_infos is None: env_infos = [dict() for _ in range(self.num_envs)] items = list(zip(ids, obs, rewards, dones, env_infos)) items = sorted(items, key=lambda x: x[0]) ids, obs, rewards, dones, env_infos = list(zip(*items)) obs = list(obs) rewards = np.asarray(rewards) dones = np.asarray(dones) self.ts += 1 dones[self.ts >= self.max_path_length] = True reset_obs = self._run_reset(dones) for (i, done) in enumerate(dones): if done: obs[i] = reset_obs[i] self.ts[i] = 0 return obs, rewards, dones, tensor_utils.stack_tensor_dict_list( list(env_infos))
def sample_paths(policy_params, max_samples, max_path_length=np.inf, env_params=None, scope=None): """Sample paths from each worker. Parameters ---------- policy_params : parameters for the policy. This will be updated on each worker process max_samples : int desired maximum number of samples to be collected. The actual number of collected samples might be greater since all trajectories will be rolled out either until termination or until max_path_length is reached max_path_length : int, optional horizon / maximum length of a single trajectory scope : str Scope for identifying the algorithm. Must be specified if running multiple algorithms simultaneously, each using different environments and policies. """ singleton_pool.run_each(_worker_set_policy_params, [(policy_params, scope)] * singleton_pool.n_parallel) if env_params is not None: singleton_pool.run_each(_worker_set_env_params, [(env_params, scope)] * singleton_pool.n_parallel) return singleton_pool.run_collect(_worker_collect_one_path, threshold=max_samples, args=(max_path_length, scope), show_prog_bar=True)
def _run_reset(self, dones): dones = np.asarray(dones) results = singleton_pool.run_each( worker_run_reset, [(dones, self.scope) for _ in self._alloc_env_ids], ) ids, flat_obs = list(map(np.concatenate, list(zip(*results)))) zipped = list(zip(ids, flat_obs)) sorted_obs = np.asarray( [x[1] for x in sorted(zipped, key=lambda x: x[0])]) done_ids, = np.where(dones) done_flat_obs = sorted_obs[done_ids] done_unflat_obs = self.observation_space.unflatten_n(done_flat_obs) all_obs = [None] * self.num_envs done_cursor = 0 for idx, done in enumerate(dones): if done: all_obs[idx] = done_unflat_obs[done_cursor] done_cursor += 1 return all_obs
def initialize(n_parallel): singleton_pool.initialize(n_parallel) singleton_pool.run_each(_worker_init, [(id, ) for id in range(singleton_pool.n_parallel)])
def set_seed(seed): """Set the seed in each worker.""" singleton_pool.run_each(_worker_set_seed, [(seed + i, ) for i in range(singleton_pool.n_parallel)])
def terminate_task(scope=None): singleton_pool.run_each(_worker_terminate_task, [(scope, )] * singleton_pool.n_parallel)
def start_worker(self): if singleton_pool.n_parallel > 1: singleton_pool.run_each(worker_init_tf) self.populate_task(self.algo.env, self.algo.policy) if singleton_pool.n_parallel > 1: singleton_pool.run_each(worker_init_tf_vars)
def set_seed(seed): singleton_pool.run_each(_worker_set_seed, [(seed + i, ) for i in range(singleton_pool.n_parallel)])
def terminate_task(scope=None): """Close each worker's env and terminate each policy.""" singleton_pool.run_each(_worker_terminate_task, [(scope, )] * singleton_pool.n_parallel)