def __init__(self, vec_env_constrs, observation_space, action_space, tot_num_envs, metadata): self.observation_space = observation_space self.action_space = action_space self.num_envs = num_envs = tot_num_envs self.metadata = metadata self.shared_obs = create_shared_memory(self.observation_space, n=self.num_envs) self.shared_act = create_shared_memory(self.action_space, n=self.num_envs) self.shared_rews = SharedArray((num_envs, ), dtype=np.float32) self.shared_dones = SharedArray((num_envs, ), dtype=np.uint8) self.observations_buffers = read_from_shared_memory( self.observation_space, self.shared_obs, n=self.num_envs) pipes = [] procs = [] for constr in vec_env_constrs: inpt, outpt = mp.Pipe() constr = gym.vector.async_vector_env.CloudpickleWrapper(constr) proc = mp.Process( target=async_loop, args=( constr, inpt, outpt, self.shared_obs, self.shared_rews, self.shared_dones, ), ) proc.start() outpt.close() pipes.append(inpt) procs.append(proc) self.pipes = pipes self.procs = procs num_envs = 0 env_nums = self._receive_info() idx_starts = [] for pipe, cnum_env in zip(self.pipes, env_nums): cur_env_idx = num_envs num_envs += cnum_env pipe.send(cur_env_idx) idx_starts.append(cur_env_idx) idx_starts.append(num_envs) assert num_envs == tot_num_envs self.idx_starts = idx_starts
def __init__(self, env_fns, observation_space=None, action_space=None, shared_memory=True, copy=True, context=None): try: ctx = mp.get_context(context) except AttributeError: logger.warn('Context switching for `multiprocessing` is not ' 'available in Python 2. Using the default context.') ctx = mp self.env_fns = env_fns self.shared_memory = shared_memory self.copy = copy if (observation_space is None) or (action_space is None): dummy_env = env_fns[0]() observation_space = observation_space or dummy_env.observation_space action_space = action_space or dummy_env.action_space dummy_env.close() del dummy_env super(AsyncVectorEnv, self).__init__(num_envs=len(env_fns), observation_space=observation_space, action_space=action_space) if self.shared_memory: _obs_buffer = create_shared_memory(self.single_observation_space, n=self.num_envs) self.observations = read_from_shared_memory( _obs_buffer, self.single_observation_space, n=self.num_envs) else: _obs_buffer = None self.observations = create_empty_array( self.single_observation_space, n=self.num_envs, fn=np.zeros) self.parent_pipes, self.processes = [], [] self.error_queue = ctx.Queue() target = _worker_shared_memory if self.shared_memory else _worker with clear_mpi_env_vars(): for idx, env_fn in enumerate(self.env_fns): parent_pipe, child_pipe = ctx.Pipe() process = ctx.Process( target=target, name='Worker<{0}>-{1}'.format(type(self).__name__, idx), args=(idx, CloudpickleWrapper(env_fn), child_pipe, parent_pipe, _obs_buffer, self.error_queue)) self.parent_pipes.append(parent_pipe) self.processes.append(process) process.daemon = True process.start() child_pipe.close() self._state = AsyncState.DEFAULT self._check_observation_spaces()
def __init__(self, env_fns, observation_space=None, action_space=None, shared_memory=True, copy=True, context=None, daemon=True, worker=None): ctx = mp.get_context(context) self.env_fns = env_fns self.shared_memory = shared_memory self.copy = copy if (observation_space is None) or (action_space is None): dummy_env = env_fns[0]() observation_space = observation_space or dummy_env.observation_space action_space = action_space or dummy_env.action_space dummy_env.close() del dummy_env super(AsyncVectorEnv, self).__init__(num_envs=len(env_fns), observation_space=observation_space, action_space=action_space) if self.shared_memory: try: _obs_buffer = create_shared_memory(self.single_observation_space, n=self.num_envs, ctx=ctx) self.observations = read_from_shared_memory(_obs_buffer, self.single_observation_space, n=self.num_envs) except CustomSpaceError: raise ValueError('Using `shared_memory=True` in `AsyncVectorEnv` ' 'is incompatible with non-standard Gym observation spaces ' '(i.e. custom spaces inheriting from `gym.Space`), and is ' 'only compatible with default Gym spaces (e.g. `Box`, ' '`Tuple`, `Dict`) for batching. Set `shared_memory=False` ' 'if you use custom observation spaces.') else: _obs_buffer = None self.observations = create_empty_array( self.single_observation_space, n=self.num_envs, fn=np.zeros) self.parent_pipes, self.processes = [], [] self.error_queue = ctx.Queue() target = _worker_shared_memory if self.shared_memory else _worker target = worker or target with clear_mpi_env_vars(): for idx, env_fn in enumerate(self.env_fns): parent_pipe, child_pipe = ctx.Pipe() process = ctx.Process(target=target, name='Worker<{0}>-{1}'.format(type(self).__name__, idx), args=(idx, CloudpickleWrapper(env_fn), child_pipe, parent_pipe, _obs_buffer, self.error_queue)) self.parent_pipes.append(parent_pipe) self.processes.append(process) process.daemon = daemon process.start() child_pipe.close() self._state = AsyncState.DEFAULT self._check_observation_spaces()
def __init__( self, env_fns: Sequence[callable], observation_space: Optional[gym.Space] = None, action_space: Optional[gym.Space] = None, shared_memory: bool = True, copy: bool = True, context: Optional[str] = None, daemon: bool = True, worker: Optional[callable] = None, ): """Vectorized environment that runs multiple environments in parallel. Args: env_fns: Functions that create the environments. observation_space: Observation space of a single environment. If ``None``, then the observation space of the first environment is taken. action_space: Action space of a single environment. If ``None``, then the action space of the first environment is taken. shared_memory: If ``True``, then the observations from the worker processes are communicated back through shared variables. This can improve the efficiency if the observations are large (e.g. images). copy: If ``True``, then the :meth:`~AsyncVectorEnv.reset` and :meth:`~AsyncVectorEnv.step` methods return a copy of the observations. context: Context for `multiprocessing`_. If ``None``, then the default context is used. daemon: If ``True``, then subprocesses have ``daemon`` flag turned on; that is, they will quit if the head process quits. However, ``daemon=True`` prevents subprocesses to spawn children, so for some environments you may want to have it set to ``False``. worker: If set, then use that worker in a subprocess instead of a default one. Can be useful to override some inner vector env logic, for instance, how resets on done are handled. Warnings: worker is an advanced mode option. It provides a high degree of flexibility and a high chance to shoot yourself in the foot; thus, if you are writing your own worker, it is recommended to start from the code for ``_worker`` (or ``_worker_shared_memory``) method, and add changes. Raises: RuntimeError: If the observation space of some sub-environment does not match observation_space (or, by default, the observation space of the first sub-environment). ValueError: If observation_space is a custom space (i.e. not a default space in Gym, such as gym.spaces.Box, gym.spaces.Discrete, or gym.spaces.Dict) and shared_memory is True. """ ctx = mp.get_context(context) self.env_fns = env_fns self.shared_memory = shared_memory self.copy = copy dummy_env = env_fns[0]() self.metadata = dummy_env.metadata if (observation_space is None) or (action_space is None): observation_space = observation_space or dummy_env.observation_space action_space = action_space or dummy_env.action_space dummy_env.close() del dummy_env super().__init__( num_envs=len(env_fns), observation_space=observation_space, action_space=action_space, ) if self.shared_memory: try: _obs_buffer = create_shared_memory( self.single_observation_space, n=self.num_envs, ctx=ctx) self.observations = read_from_shared_memory( self.single_observation_space, _obs_buffer, n=self.num_envs) except CustomSpaceError: raise ValueError( "Using `shared_memory=True` in `AsyncVectorEnv` " "is incompatible with non-standard Gym observation spaces " "(i.e. custom spaces inheriting from `gym.Space`), and is " "only compatible with default Gym spaces (e.g. `Box`, " "`Tuple`, `Dict`) for batching. Set `shared_memory=False` " "if you use custom observation spaces.") else: _obs_buffer = None self.observations = create_empty_array( self.single_observation_space, n=self.num_envs, fn=np.zeros) self.parent_pipes, self.processes = [], [] self.error_queue = ctx.Queue() target = _worker_shared_memory if self.shared_memory else _worker target = worker or target with clear_mpi_env_vars(): for idx, env_fn in enumerate(self.env_fns): parent_pipe, child_pipe = ctx.Pipe() process = ctx.Process( target=target, name=f"Worker<{type(self).__name__}>-{idx}", args=( idx, CloudpickleWrapper(env_fn), child_pipe, parent_pipe, _obs_buffer, self.error_queue, ), ) self.parent_pipes.append(parent_pipe) self.processes.append(process) process.daemon = daemon process.start() child_pipe.close() self._state = AsyncState.DEFAULT self._check_spaces()