def __init__(self, spec): super().__init__(spec) try_register_env(spec) # register if it's a custom gym env seed = ps.get(spec, 'meta.random_seed') episode_life = not util.in_eval_lab_modes() if self.is_venv: # make vector environment self.u_env = make_gym_venv(name=self.name, num_envs=self.num_envs, seed=seed, frame_op=self.frame_op, frame_op_len=self.frame_op_len, image_downsize=self.image_downsize, reward_scale=self.reward_scale, normalize_state=self.normalize_state, episode_life=episode_life) else: self.u_env = make_gym_env(name=self.name, seed=seed, frame_op=self.frame_op, frame_op_len=self.frame_op_len, image_downsize=self.image_downsize, reward_scale=self.reward_scale, normalize_state=self.normalize_state, episode_life=episode_life) if self.name.startswith('Unity'): # Unity is always initialized as singleton gym env, but the Unity runtime can be vec_env self.num_envs = self.u_env.num_envs # update variables dependent on num_envs self._infer_venv_attr() self._set_clock() self._set_attr_from_u_env(self.u_env) self.max_t = self.max_t or self.u_env.spec.max_episode_steps assert self.max_t is not None logger.info(util.self_desc(self))
def test_make_gym_stack(name, num_envs, state_shape, reward_scale): seed = 0 frame_op = 'stack' # used for rnn frame_op_len = 4 venv = make_gym_venv(name, num_envs, seed, frame_op=frame_op, frame_op_len=frame_op_len, reward_scale=reward_scale) venv.reset() for i in range(5): state, reward, done, info = venv.step([venv.action_space.sample()] * num_envs) assert isinstance(state, np.ndarray) stack_shape = ( num_envs, frame_op_len, ) + state_shape assert state.shape == stack_shape assert isinstance(reward, np.ndarray) assert reward.shape == (num_envs, ) assert isinstance(done, np.ndarray) assert done.shape == (num_envs, ) assert len(info) == num_envs venv.close()
def __init__(self, spec): super().__init__(spec) try_register_env(spec) # register if it's a custom gym env seed = ps.get(spec, 'meta.random_seed') if self.is_venv: # make vector environment self.u_env = make_gym_venv(self.name, self.num_envs, seed, self.frame_op, self.frame_op_len, self.reward_scale, self.normalize_state) else: self.u_env = make_gym_env(self.name, seed, self.frame_op, self.frame_op_len, self.reward_scale, self.normalize_state) self._set_attr_from_u_env(self.u_env) self.max_t = self.max_t or self.u_env.spec.max_episode_steps assert self.max_t is not None logger.info(util.self_desc(self))
def test_make_gym_venv_downsize(name, num_envs, state_shape, image_downsize): seed = 0 frame_op = None frame_op_len = None venv = make_gym_venv(name, num_envs, seed, frame_op=frame_op, frame_op_len=frame_op_len, image_downsize=image_downsize) venv.reset() for i in range(5): state, reward, done, info = venv.step([venv.action_space.sample()] * num_envs) assert isinstance(state, np.ndarray) assert state.shape == (num_envs, ) + state_shape assert isinstance(reward, np.ndarray) assert reward.shape == (num_envs, ) assert isinstance(done, np.ndarray) assert done.shape == (num_envs, ) assert len(info) == num_envs venv.close()