def __init__(self, spec): super().__init__(spec) try_register_env(spec) # register if it's a custom gym env seed = ps.get(spec, 'meta.random_seed') episode_life = not util.in_eval_lab_modes() if self.is_venv: # make vector environment self.u_env = make_gym_venv(name=self.name, num_envs=self.num_envs, seed=seed, frame_op=self.frame_op, frame_op_len=self.frame_op_len, image_downsize=self.image_downsize, reward_scale=self.reward_scale, normalize_state=self.normalize_state, episode_life=episode_life) else: self.u_env = make_gym_env(name=self.name, seed=seed, frame_op=self.frame_op, frame_op_len=self.frame_op_len, image_downsize=self.image_downsize, reward_scale=self.reward_scale, normalize_state=self.normalize_state, episode_life=episode_life) if self.name.startswith('Unity'): # Unity is always initialized as singleton gym env, but the Unity runtime can be vec_env self.num_envs = self.u_env.num_envs # update variables dependent on num_envs self._infer_venv_attr() self._set_clock() self._set_attr_from_u_env(self.u_env) self.max_t = self.max_t or self.u_env.spec.max_episode_steps assert self.max_t is not None logger.info(util.self_desc(self))
def test_make_gym_env_nostack(name, state_shape, reward_scale): seed = 0 frame_op = None frame_op_len = None env = make_gym_env(name, seed, frame_op, frame_op_len, reward_scale) env.reset() for i in range(5): state, reward, done, info = env.step(env.action_space.sample()) assert isinstance(state, np.ndarray) assert state.shape == state_shape assert state.shape == env.observation_space.shape assert isinstance(reward, float) assert isinstance(done, bool) assert isinstance(info, dict) env.close()
def __init__(self, spec): super().__init__(spec) try_register_env(spec) # register if it's a custom gym env seed = ps.get(spec, 'meta.random_seed') if self.is_venv: # make vector environment self.u_env = make_gym_venv(self.name, self.num_envs, seed, self.frame_op, self.frame_op_len, self.reward_scale, self.normalize_state) else: self.u_env = make_gym_env(self.name, seed, self.frame_op, self.frame_op_len, self.reward_scale, self.normalize_state) self._set_attr_from_u_env(self.u_env) self.max_t = self.max_t or self.u_env.spec.max_episode_steps assert self.max_t is not None logger.info(util.self_desc(self))
def test_make_gym_env_stack(name, state_shape, reward_scale): seed = 0 frame_op = 'stack' # used for rnn frame_op_len = 4 env = make_gym_env(name, seed, frame_op, frame_op_len, reward_scale) env.reset() for i in range(5): state, reward, done, info = env.step(env.action_space.sample()) assert isinstance(state, LazyFrames) state = state.__array__() # realize data assert isinstance(state, np.ndarray) # stack creates new dim stack_shape = (frame_op_len, ) + state_shape assert state.shape == stack_shape assert state.shape == env.observation_space.shape assert isinstance(reward, float) assert isinstance(done, bool) assert isinstance(info, dict) env.close()
def test_make_gym_env_concat(name, state_shape, reward_scale): seed = 0 frame_op = 'concat' # used for image, or for concat vector frame_op_len = 4 env = make_gym_env(name, seed, frame_op, frame_op_len, reward_scale) env.reset() for i in range(5): state, reward, done, info = env.step(env.action_space.sample()) assert isinstance(state, LazyFrames) state = state.__array__() # realize data assert isinstance(state, np.ndarray) # concat multiplies first dim stack_shape = (frame_op_len * state_shape[0], ) + state_shape[1:] assert state.shape == stack_shape assert state.shape == env.observation_space.shape assert isinstance(reward, float) assert isinstance(done, bool) assert isinstance(info, dict) env.close()