Пример #1
0
 def __init__(self, spec):
     super().__init__(spec)
     try_register_env(spec)  # register if it's a custom gym env
     seed = ps.get(spec, 'meta.random_seed')
     episode_life = not util.in_eval_lab_modes()
     if self.is_venv:  # make vector environment
         self.u_env = make_gym_venv(name=self.name,
                                    num_envs=self.num_envs,
                                    seed=seed,
                                    frame_op=self.frame_op,
                                    frame_op_len=self.frame_op_len,
                                    image_downsize=self.image_downsize,
                                    reward_scale=self.reward_scale,
                                    normalize_state=self.normalize_state,
                                    episode_life=episode_life)
     else:
         self.u_env = make_gym_env(name=self.name,
                                   seed=seed,
                                   frame_op=self.frame_op,
                                   frame_op_len=self.frame_op_len,
                                   image_downsize=self.image_downsize,
                                   reward_scale=self.reward_scale,
                                   normalize_state=self.normalize_state,
                                   episode_life=episode_life)
     if self.name.startswith('Unity'):
         # Unity is always initialized as singleton gym env, but the Unity runtime can be vec_env
         self.num_envs = self.u_env.num_envs
         # update variables dependent on num_envs
         self._infer_venv_attr()
         self._set_clock()
     self._set_attr_from_u_env(self.u_env)
     self.max_t = self.max_t or self.u_env.spec.max_episode_steps
     assert self.max_t is not None
     logger.info(util.self_desc(self))
Пример #2
0
def test_make_gym_env_nostack(name, state_shape, reward_scale):
    seed = 0
    frame_op = None
    frame_op_len = None
    env = make_gym_env(name, seed, frame_op, frame_op_len, reward_scale)
    env.reset()
    for i in range(5):
        state, reward, done, info = env.step(env.action_space.sample())

    assert isinstance(state, np.ndarray)
    assert state.shape == state_shape
    assert state.shape == env.observation_space.shape
    assert isinstance(reward, float)
    assert isinstance(done, bool)
    assert isinstance(info, dict)
    env.close()
Пример #3
0
 def __init__(self, spec):
     super().__init__(spec)
     try_register_env(spec)  # register if it's a custom gym env
     seed = ps.get(spec, 'meta.random_seed')
     if self.is_venv:  # make vector environment
         self.u_env = make_gym_venv(self.name, self.num_envs, seed,
                                    self.frame_op, self.frame_op_len,
                                    self.reward_scale, self.normalize_state)
     else:
         self.u_env = make_gym_env(self.name, seed, self.frame_op,
                                   self.frame_op_len, self.reward_scale,
                                   self.normalize_state)
     self._set_attr_from_u_env(self.u_env)
     self.max_t = self.max_t or self.u_env.spec.max_episode_steps
     assert self.max_t is not None
     logger.info(util.self_desc(self))
Пример #4
0
def test_make_gym_env_stack(name, state_shape, reward_scale):
    seed = 0
    frame_op = 'stack'  # used for rnn
    frame_op_len = 4
    env = make_gym_env(name, seed, frame_op, frame_op_len, reward_scale)
    env.reset()
    for i in range(5):
        state, reward, done, info = env.step(env.action_space.sample())

    assert isinstance(state, LazyFrames)
    state = state.__array__()  # realize data
    assert isinstance(state, np.ndarray)
    # stack creates new dim
    stack_shape = (frame_op_len, ) + state_shape
    assert state.shape == stack_shape
    assert state.shape == env.observation_space.shape
    assert isinstance(reward, float)
    assert isinstance(done, bool)
    assert isinstance(info, dict)
    env.close()
Пример #5
0
def test_make_gym_env_concat(name, state_shape, reward_scale):
    seed = 0
    frame_op = 'concat'  # used for image, or for concat vector
    frame_op_len = 4
    env = make_gym_env(name, seed, frame_op, frame_op_len, reward_scale)
    env.reset()
    for i in range(5):
        state, reward, done, info = env.step(env.action_space.sample())

    assert isinstance(state, LazyFrames)
    state = state.__array__()  # realize data
    assert isinstance(state, np.ndarray)
    # concat multiplies first dim
    stack_shape = (frame_op_len * state_shape[0], ) + state_shape[1:]
    assert state.shape == stack_shape
    assert state.shape == env.observation_space.shape
    assert isinstance(reward, float)
    assert isinstance(done, bool)
    assert isinstance(info, dict)
    env.close()