def __init__(self, env, n, max_path_length, scope=None): if scope is None: # initialize random scope scope = str(uuid.uuid4()) envs_per_worker = int(np.ceil(n * 1.0 / singleton_pool.n_parallel)) alloc_env_ids = [] rest_alloc = n start_id = 0 for _ in range(singleton_pool.n_parallel): n_allocs = min(envs_per_worker, rest_alloc) alloc_env_ids.append(list(range(start_id, start_id + n_allocs))) start_id += n_allocs rest_alloc = max(0, rest_alloc - envs_per_worker) singleton_pool.run_each(worker_init_envs, [(alloc, scope, env) for alloc in alloc_env_ids]) self._alloc_env_ids = alloc_env_ids self._action_space = env.action_space self._observation_space = env.observation_space self._num_envs = n self.scope = scope self.ts = np.zeros(n, dtype='int') self.max_path_length = max_path_length warnings.warn( DeprecationWarning( 'ParallelVecEnvExecutor is deprecated, and will be removed in ' 'the next release. Please use VecWorker and one of the new ' 'samplers which implement metarl.sampler.Sampler, such as ' 'RaySampler'))
def __init__(self, env, n, max_path_length, scope=None): if scope is None: # initialize random scope scope = str(uuid.uuid4()) envs_per_worker = int(np.ceil(n * 1.0 / singleton_pool.n_parallel)) alloc_env_ids = [] rest_alloc = n start_id = 0 for _ in range(singleton_pool.n_parallel): n_allocs = min(envs_per_worker, rest_alloc) alloc_env_ids.append(list(range(start_id, start_id + n_allocs))) start_id += n_allocs rest_alloc = max(0, rest_alloc - envs_per_worker) singleton_pool.run_each(worker_init_envs, [(alloc, scope, env) for alloc in alloc_env_ids]) self._alloc_env_ids = alloc_env_ids self._action_space = env.action_space self._observation_space = env.observation_space self._num_envs = n self.scope = scope self.ts = np.zeros(n, dtype='int') self.max_path_length = max_path_length
def start_worker(self): """Initialize the sampler.""" assert singleton_pool.initialized, ( 'Use singleton_pool.initialize(n_parallel) to setup workers.') if singleton_pool.n_parallel > 1: singleton_pool.run_each(worker_init_tf) parallel_sampler.populate_task(self.env, self.algo.policy) if singleton_pool.n_parallel > 1: singleton_pool.run_each(worker_init_tf_vars)
def populate_task(env, policy, scope=None): """Set each worker's env and policy.""" logger.log('Populating workers...') if singleton_pool.n_parallel > 1: singleton_pool.run_each( _worker_populate_task, [(pickle.dumps(env), pickle.dumps(policy), scope)] * singleton_pool.n_parallel) else: # avoid unnecessary copying g = _get_scoped_g(singleton_pool.G, scope) g.env = env g.policy = policy logger.log('Populated')
def initialize(n_parallel): """Initialize the worker pool.""" # SIGINT is blocked for all processes created in parallel_sampler to avoid # the creation of sleeping and zombie processes. # # If the user interrupts run_experiment, there's a chance some processes # won't die due to a dead lock condition where one of the children in the # parallel sampler exits without releasing a lock once after it catches # SIGINT. # # Later the parent tries to acquire the same lock to proceed with his # cleanup, but it remains sleeping waiting for the lock to be released. # In the meantime, all the process in parallel sampler remain in the zombie # state since the parent cannot proceed with their clean up. try: signal.pthread_sigmask(signal.SIG_BLOCK, [signal.SIGINT]) singleton_pool.initialize(n_parallel) singleton_pool.run_each(_worker_init, [(id, ) for id in range(singleton_pool.n_parallel)]) finally: signal.pthread_sigmask(signal.SIG_UNBLOCK, [signal.SIGINT])
def sample_paths(policy_params, max_samples, max_path_length=np.inf, scope=None): """Sample paths from each worker. :param policy_params: parameters for the policy. This will be updated on each worker process :param max_samples: desired maximum number of samples to be collected. The actual number of collected samples might be greater since all trajectories will be rolled out either until termination or until max_path_length is reached :param max_path_length: horizon / maximum length of a single trajectory :return: a list of collected paths """ singleton_pool.run_each(_worker_set_policy_params, [(policy_params, scope)] * singleton_pool.n_parallel) return singleton_pool.run_collect(_worker_collect_one_path, threshold=max_samples, args=(max_path_length, scope), show_prog_bar=True)
def _run_reset(self, dones): dones = np.asarray(dones) results = singleton_pool.run_each( worker_run_reset, [(dones, self.scope) for _ in self._alloc_env_ids], ) ids, flat_obs = list(map(np.concatenate, list(zip(*results)))) zipped = list(zip(ids, flat_obs)) sorted_obs = np.asarray( [x[1] for x in sorted(zipped, key=lambda x: x[0])]) done_ids, = np.where(dones) done_flat_obs = sorted_obs[done_ids] done_unflat_obs = self.observation_space.unflatten_n(done_flat_obs) all_obs = [None] * self.num_envs done_cursor = 0 for idx, done in enumerate(dones): if done: all_obs[idx] = done_unflat_obs[done_cursor] done_cursor += 1 return all_obs
def step(self, action_n): """Step all environments using the provided actions.""" results = singleton_pool.run_each( worker_run_step, [(action_n, self.scope) for _ in self._alloc_env_ids], ) results = [x for x in results if x is not None] ids, obs, rewards, dones, env_infos = list(zip(*results)) ids = np.concatenate(ids) obs = self.observation_space.unflatten_n(np.concatenate(obs)) rewards = np.concatenate(rewards) dones = np.concatenate(dones) env_infos = tensor_utils.split_tensor_dict_list( tensor_utils.concat_tensor_dict_list(env_infos)) if env_infos is None: env_infos = [dict() for _ in range(self.num_envs)] items = list(zip(ids, obs, rewards, dones, env_infos)) items = sorted(items, key=lambda x: x[0]) ids, obs, rewards, dones, env_infos = list(zip(*items)) obs = list(obs) rewards = np.asarray(rewards) dones = np.asarray(dones) self.ts += 1 dones[self.ts >= self.max_path_length] = True reset_obs = self._run_reset(dones) for (i, done) in enumerate(dones): if done: obs[i] = reset_obs[i] self.ts[i] = 0 return obs, rewards, dones, tensor_utils.stack_tensor_dict_list( list(env_infos))
def terminate_task(scope=None): """Close each worker's env and terminate each policy.""" singleton_pool.run_each(_worker_terminate_task, [(scope, )] * singleton_pool.n_parallel)
def set_seed(seed): """Set the seed in each worker.""" singleton_pool.run_each(_worker_set_seed, [(seed + i, ) for i in range(singleton_pool.n_parallel)])