def test_subproc_retro(): env = retro.make("Airstriker-Genesis") with pytest.raises(RuntimeError): env = retro.make("Airstriker-Genesis") envs = [ SubprocEnv(env_fn=make_retro_env, env_kwargs=dict(game="Airstriker-Genesis")) for _ in range(2) ] env = ConcatEnv(envs) rew, ob, first = env.observe() assert np.array_equal(ob[0], ob[1])
def vectorize_gym(num, env_fn=None, env_kwargs=None, use_subproc=True, render_mode=None, seed=None): """ Given a function that creates a gym environment and a number of environments to create, create the environments in subprocesses and combine them into a single gym3 Env. This is meant as a replacement for baselines' SubprocVecEnv and DummyVecEnv If you want to use this for a registered gym env, the default is to use gym.make as the function to call: env = vectorize_gym(num=2, env_kwargs={"id": "Pendulum-v0"}) :param num: number of gym environments to create :param env_fn: function to call to create the gym environment, defaults to `gym.make` :param env_kwargs: keyword arguments to pass to env_fn :param use_subproc: if set to False, create the environment in the current process :param render_mode: if set, this will be passed to the `FromGymEnv` adapter, see the documentation for `FromGymEnv` for more information """ if env_fn is None: import gym env_fn = gym.make if env_kwargs is None: env_kwargs = {} if use_subproc: envs = [ SubprocEnv( env_fn=_make_gym_env, env_kwargs=dict( env_fn=env_fn, env_kwargs=env_kwargs, render_mode=render_mode, seed=seed, ), ) for _ in range(num) ] else: envs = [ _make_gym_env(env_fn=env_fn, env_kwargs=env_kwargs, render_mode=render_mode, seed=seed) for _ in range(num) ] return ConcatEnv(envs)
def test_concat(space_type): if space_type == "binary": space = types.discrete_scalar(2) elif space_type == "dict": space = types.DictType(degrees=types.discrete_scalar(360)) elif space_type == "real_dict": space = types.DictType( degrees=types.TensorType(shape=(), eltype=types.Real())) else: raise Exception(f"invalid space_type {space_type}") base_env1 = IdentityEnv(space=space, episode_len=1, seed=0) base_env1.f = lambda x: [x[0]**2] base_env2 = IdentityEnv(space=space, episode_len=2, seed=1) base_env2.f = lambda x: [2 * x[0]] env1 = AssertSpacesWrapper(base_env1) env1 = AddInfo(env=env1, id=1) env2 = AssertSpacesWrapper(base_env2) env2 = AddInfo(env=env2, id=2) env = AssertSpacesWrapper(ConcatEnv([env1, env2])) rew, ob, first = env.observe() assert np.array_equal(rew, np.array([0, 0])) if isinstance(space, types.DictType): ob = ob["degrees"] assert ob.shape == (2, ) assert ob[0] != ob[1] assert np.array_equal(first, np.array([1, 1])) act = np.array([ob[0], ob[0]]) if isinstance(space, types.DictType): act = dict(degrees=act) env.act(act) rew, _ob, first = env.observe() if space_type == "real_dict": assert rew[0] == 0 assert rew[1] < 0 else: assert np.array_equal(rew, np.array([1, 0])) assert np.array_equal(first, np.array([1, 0])) assert env.get_info() == [{"id": 1}, {"id": 2}] with pytest.raises(AssertionError): env.callmethod("f", [2, 3, 4]) assert env.callmethod("f", [2, 3]) == [2**2, 2 * 3]
def test_recorder(): with tempfile.TemporaryDirectory() as tmpdir: ep_len1 = 3 ep_len2 = 4 env1 = IdentityEnv( space=types.TensorType(eltype=types.Discrete(256), shape=(3, 3, 3)), episode_len=ep_len1, ) env2 = IdentityEnv( space=types.TensorType(eltype=types.Discrete(256), shape=(3, 3, 3)), episode_len=ep_len2, seed=1, ) env = ConcatEnv([env1, env2]) env = TrajectoryRecorderWrapper(env=env, directory=tmpdir) _, obs, _ = env.observe() action = types_np.zeros(env.ac_space, bshape=(env.num, )) action[1] = 1 num_acs = 10 for _ in range(num_acs): env.act(action) files = sorted(glob(os.path.join(tmpdir, "*.pickle"))) print(files) assert len(files) == (num_acs // ep_len1) + (num_acs // ep_len2) with open(files[0], "rb") as f: loaded_traj = pickle.load(f) assert len(loaded_traj["ob"]) == ep_len1 assert np.allclose(loaded_traj["ob"][0], obs[0]) assert np.allclose(loaded_traj["act"][0], action[0]) with open(files[1], "rb") as f: loaded_traj = pickle.load(f) assert len(loaded_traj["ob"]) == ep_len2 assert np.allclose(loaded_traj["ob"][0], obs[1]) assert np.allclose(loaded_traj["act"][0], action[1])