def test_dqn_apex_cpu_spawn_full_train(self, tmpdir): # by default, pytorch lightning will use ddp-spawn mode to replace ddp # if there are only cpus os.environ["WORLD_SIZE"] = "3" config = generate_env_config("CartPole-v0", {}) config = generate_training_config(root_dir=tmpdir.make_numbered_dir(), config=config) config = generate_algorithm_config("DQNApex", config) # use ddp_cpu config["gpus"] = None config["num_processes"] = 3 # this testing process corresponds to this node config["num_nodes"] = 1 config["early_stopping_patience"] = 100 # Use class instead of string name since algorithms is distributed. config["frame_config"]["models"] = [QNet, QNet] config["frame_config"]["model_kwargs"] = [ { "state_dim": 4, "action_num": 2 }, { "state_dim": 4, "action_num": 2 }, ] # for spawn we use a special callback, because the we cannot access # max_total_reward from sub-processes queue = SimpleQueue(ctx=mp.get_context("spawn")) # cb = [SpawnInspectCallback(queue), LoggerDebugCallback()] cb = [SpawnInspectCallback(queue)] t = Thread(target=launch, args=(config, ), kwargs={"pl_callbacks": cb}) t.start() default_logger.info("Start tracking") subproc_max_total_reward = [0, 0, 0] while True: try: result = queue.quick_get(timeout=60) default_logger.info( f"Result from process [{result[0]}]: {result[1]}") subproc_max_total_reward[result[0]] = result[1] except TimeoutError: # no more results default_logger.info("No more results.") break t.join() assert ( sum(subproc_max_total_reward) / 3 >= 150 ), f"Max total reward {sum(subproc_max_total_reward) / 3} below threshold 150."
def __init__(self, env_creators: List[Callable[[int], gym.Env]]) -> None: """ Args: env_creators: List of gym environment creators, used to create environments on sub process workers, accepts a index as your environment id. """ super().__init__() self.workers = [] # Some environments will hang or collapse when using fork context. # E.g.: in "CarRacing-v0". pyglet used by gym will have render problems. # In case users wants to pass tensors to environments, # always copy all tensors to avoid errors ctx = get_context("spawn") self.cmd_queues = [ SimpleQueue(ctx=ctx, copy_tensor=True) for _ in range(len(env_creators)) ] self.result_queue = SimpleQueue(ctx=ctx, copy_tensor=True) for cmd_queue, ec, env_idx in zip( self.cmd_queues, env_creators, range(len(env_creators)) ): # enable recursive serialization to support # lambda & local function creators. self.workers.append( ctx.Process( target=self._worker, args=( cmd_queue, self.result_queue, dumps(ec, recurse=True, copy_tensor=True), env_idx, ), ) ) for worker in self.workers: worker.daemon = True worker.start() self.env_size = env_size = len(env_creators) self._cmd_lock = Lock() self._closed = False tmp_env = env_creators[0](0) self._action_space = tmp_env.action_space self._obsrv_space = tmp_env.observation_space tmp_env.close() self._terminal = np.zeros([env_size], dtype=np.bool)
def __init__(self, env_creators: List[Callable[[int], gym.Env]]) -> None: """ Args: env_creators: List of gym environment creators, used to create environments on sub process workers, accepts a index as your environment id. """ super(ParallelWrapperSubProc, self).__init__() self.workers = [] # Some environments will hang or collapse when using fork context. # E.g.: in "CarRacing-v0". pyglet used by gym will have render problems. ctx = get_context("spawn") self.cmd_queues = [ SimpleQueue(ctx=ctx) for _ in range(len(env_creators)) ] self.result_queue = SimpleQueue(ctx=ctx) for cmd_queue, ec, env_idx in zip(self.cmd_queues, env_creators, range(len(env_creators))): # lambda & local function creators must be serialized by dill, # the default pickler in spawn context doesn't work. self.workers.append( ctx.Process(target=self._worker, args=(cmd_queue, self.result_queue, dill.dumps(ec), env_idx))) for worker in self.workers: worker.daemon = True worker.start() self.env_size = env_size = len(env_creators) self._cmd_lock = Lock() self._closed = False tmp_env = env_creators[0](0) self._action_space = tmp_env.action_space self._obsrv_space = tmp_env.observation_space tmp_env.close() self._terminal = np.zeros([env_size], dtype=np.bool)
def _worker(cmd_queue: SimpleQueue, result_queue: SimpleQueue, env_creator, env_idx): env = None try: env = dill.loads(env_creator)(env_idx) except Exception: # Something has gone wrong during environment creation, # exit with error. exit(2) try: while True: try: command = cmd_queue.quick_get(timeout=1e-3) except TimeoutError: continue try: if command is not None: method, args, kwargs = command else: # End of all tasks signal received cmd_queue.close() result_queue.close() break result = getattr(env, method)(*args, **kwargs) result_queue.put((env_idx, True, result)) except Exception as e: # Something has gone wrong during execution, serialize # the exception and send it back to master. result_queue.put( (env_idx, False, ExceptionWithTraceback(e))) except KeyboardInterrupt: cmd_queue.close() result_queue.close()
class ParallelWrapperSubProc(ParallelWrapperBase): """ Parallel wrapper based on sub processes. """ def __init__(self, env_creators: List[Callable[[int], gym.Env]]) -> None: """ Args: env_creators: List of gym environment creators, used to create environments on sub process workers, accepts a index as your environment id. """ super(ParallelWrapperSubProc, self).__init__() self.workers = [] # Some environments will hang or collapse when using fork context. # E.g.: in "CarRacing-v0". pyglet used by gym will have render problems. ctx = get_context("spawn") self.cmd_queues = [ SimpleQueue(ctx=ctx) for _ in range(len(env_creators)) ] self.result_queue = SimpleQueue(ctx=ctx) for cmd_queue, ec, env_idx in zip(self.cmd_queues, env_creators, range(len(env_creators))): # lambda & local function creators must be serialized by dill, # the default pickler in spawn context doesn't work. self.workers.append( ctx.Process(target=self._worker, args=(cmd_queue, self.result_queue, dill.dumps(ec), env_idx))) for worker in self.workers: worker.daemon = True worker.start() self.env_size = env_size = len(env_creators) self._cmd_lock = Lock() self._closed = False tmp_env = env_creators[0](0) self._action_space = tmp_env.action_space self._obsrv_space = tmp_env.observation_space tmp_env.close() self._terminal = np.zeros([env_size], dtype=np.bool) def reset(self, idx: Union[int, List[int]] = None) -> List[object]: """ Returns: A list of gym states. """ env_idxs = self._select_envs(idx) self._terminal[env_idxs] = False with self._cmd_lock: return self._call_gym_env_method(env_idxs, "reset") def step(self, action: Union[np.ndarray, List[Any]], idx: Union[int, List[int]] = None) \ -> Tuple[List[object], np.ndarray, np.ndarray, List[dict]]: """ Let specified environment(s) run one time step. Specified environments must be active and have not reached terminal states before. Args: action: Actions sent to each specified environment, the size of the first dimension must match the number of selected environments. idx: Indexes of selected environments, default is all. Returns: Observation, reward, terminal, and diagnostic info. """ env_idxs = self._select_envs(idx) if len(action) != len(env_idxs): raise ValueError("Action number must match environment number!") with self._cmd_lock: result = self._call_gym_env_method(env_idxs, "step", [(act, ) for act in action]) obsrv = [r[0] for r in result] reward = np.stack([r[1] for r in result]) terminal = np.stack([r[2] for r in result]) info = [r[3] for r in result] self._terminal[env_idxs] |= terminal return obsrv, reward, terminal, info def seed(self, seed: Union[int, List[int]] = None) -> List[int]: """ Set seeds for all environments. Args: seed: If seed is ``int``, the same seed will be used for all environments. If seed is ``List[int]``, it must have the same size as the number of all environments. If seed is ``None``, all environments will use the default seed. Returns: Actual used seed returned by all environments. """ if np.isscalar(seed) or seed is None: seed = [seed] * self.size() env_idxs = self._select_envs() with self._cmd_lock: return self._call_gym_env_method(env_idxs, "seed", [(sd, ) for sd in seed]) def render(self, idx: Union[int, List[int]] = None, *args, **kwargs) -> List[np.ndarray]: """ Render all/specified environments. Args: idx: Indexes of selected environments, default is all. Returns: A list or rendered frames, of type ``np.ndarray`` and size (H, W, 3). """ env_idxs = self._select_envs(idx) with self._cmd_lock: return self._call_gym_env_method(env_idxs, "render", kwargs=list( repeat({"mode": "rgb_array"}, len(env_idxs)))) def close(self) -> None: """ Close all environments, including the wrapper. """ with self._cmd_lock: if self._closed: return self._closed = True env_idxs = self._select_envs() self._call_gym_env_method(env_idxs, "close") for cmd_queue in self.cmd_queues: cmd_queue.quick_put(None) for worker in self.workers: worker.join() def active(self) -> List[int]: """ Returns: Indexes of current active environments. """ return np.arange(self.size())[~self._terminal] def size(self) -> int: """ Returns: Number of environments. """ return self.env_size @property def action_space(self) -> Any: # DOC INHERITED return self._action_space @property def observation_space(self) -> Any: # DOC INHERITED return self._obsrv_space def _select_envs(self, idx=None): if idx is None: idx = list(range(self.env_size)) else: if np.isscalar(idx): idx = [idx] return idx def _call_gym_env_method(self, env_idxs, method, args=None, kwargs=None): if args is None: args = [() for _ in range(len(env_idxs))] if kwargs is None: kwargs = [{} for _ in range(len(env_idxs))] result = {} # Check whether any process has exited with error code: for worker, worker_id in zip(self.workers, range(len(self.workers))): if worker.exitcode is None: continue if worker.exitcode == 2: raise RuntimeError( "Worker {} failed to create environment.".format( worker_id)) elif worker.exitcode != 0: raise RuntimeError("Worker {} exited with code {}.".format( worker_id, worker.exitcode)) for env_idx, i in zip(env_idxs, range(len(env_idxs))): self.cmd_queues[env_idx].quick_put((method, args[i], kwargs[i])) while len(result) < len(env_idxs): e_idx, success, res = self.result_queue.get() if success: result[e_idx] = res else: raise res return [result[e_idx] for e_idx in env_idxs] @staticmethod def _worker(cmd_queue: SimpleQueue, result_queue: SimpleQueue, env_creator, env_idx): env = None try: env = dill.loads(env_creator)(env_idx) except Exception: # Something has gone wrong during environment creation, # exit with error. exit(2) try: while True: try: command = cmd_queue.quick_get(timeout=1e-3) except TimeoutError: continue try: if command is not None: method, args, kwargs = command else: # End of all tasks signal received cmd_queue.close() result_queue.close() break result = getattr(env, method)(*args, **kwargs) result_queue.put((env_idx, True, result)) except Exception as e: # Something has gone wrong during execution, serialize # the exception and send it back to master. result_queue.put( (env_idx, False, ExceptionWithTraceback(e))) except KeyboardInterrupt: cmd_queue.close() result_queue.close()