Пример #1
0
 def __init__(self, spec):
     super().__init__(spec)
     try_register_env(spec)  # register if it's a custom gym env
     seed = ps.get(spec, 'meta.random_seed')
     episode_life = not util.in_eval_lab_modes()
     if self.is_venv:  # make vector environment
         self.u_env = make_gym_venv(name=self.name,
                                    num_envs=self.num_envs,
                                    seed=seed,
                                    frame_op=self.frame_op,
                                    frame_op_len=self.frame_op_len,
                                    image_downsize=self.image_downsize,
                                    reward_scale=self.reward_scale,
                                    normalize_state=self.normalize_state,
                                    episode_life=episode_life)
     else:
         self.u_env = make_gym_env(name=self.name,
                                   seed=seed,
                                   frame_op=self.frame_op,
                                   frame_op_len=self.frame_op_len,
                                   image_downsize=self.image_downsize,
                                   reward_scale=self.reward_scale,
                                   normalize_state=self.normalize_state,
                                   episode_life=episode_life)
     if self.name.startswith('Unity'):
         # Unity is always initialized as singleton gym env, but the Unity runtime can be vec_env
         self.num_envs = self.u_env.num_envs
         # update variables dependent on num_envs
         self._infer_venv_attr()
         self._set_clock()
     self._set_attr_from_u_env(self.u_env)
     self.max_t = self.max_t or self.u_env.spec.max_episode_steps
     assert self.max_t is not None
     logger.info(util.self_desc(self))
Пример #2
0
def test_make_gym_stack(name, num_envs, state_shape, reward_scale):
    seed = 0
    frame_op = 'stack'  # used for rnn
    frame_op_len = 4
    venv = make_gym_venv(name,
                         num_envs,
                         seed,
                         frame_op=frame_op,
                         frame_op_len=frame_op_len,
                         reward_scale=reward_scale)
    venv.reset()
    for i in range(5):
        state, reward, done, info = venv.step([venv.action_space.sample()] *
                                              num_envs)

    assert isinstance(state, np.ndarray)
    stack_shape = (
        num_envs,
        frame_op_len,
    ) + state_shape
    assert state.shape == stack_shape
    assert isinstance(reward, np.ndarray)
    assert reward.shape == (num_envs, )
    assert isinstance(done, np.ndarray)
    assert done.shape == (num_envs, )
    assert len(info) == num_envs
    venv.close()
Пример #3
0
 def __init__(self, spec):
     super().__init__(spec)
     try_register_env(spec)  # register if it's a custom gym env
     seed = ps.get(spec, 'meta.random_seed')
     if self.is_venv:  # make vector environment
         self.u_env = make_gym_venv(self.name, self.num_envs, seed,
                                    self.frame_op, self.frame_op_len,
                                    self.reward_scale, self.normalize_state)
     else:
         self.u_env = make_gym_env(self.name, seed, self.frame_op,
                                   self.frame_op_len, self.reward_scale,
                                   self.normalize_state)
     self._set_attr_from_u_env(self.u_env)
     self.max_t = self.max_t or self.u_env.spec.max_episode_steps
     assert self.max_t is not None
     logger.info(util.self_desc(self))
Пример #4
0
def test_make_gym_venv_downsize(name, num_envs, state_shape, image_downsize):
    seed = 0
    frame_op = None
    frame_op_len = None
    venv = make_gym_venv(name,
                         num_envs,
                         seed,
                         frame_op=frame_op,
                         frame_op_len=frame_op_len,
                         image_downsize=image_downsize)
    venv.reset()
    for i in range(5):
        state, reward, done, info = venv.step([venv.action_space.sample()] *
                                              num_envs)

    assert isinstance(state, np.ndarray)
    assert state.shape == (num_envs, ) + state_shape
    assert isinstance(reward, np.ndarray)
    assert reward.shape == (num_envs, )
    assert isinstance(done, np.ndarray)
    assert done.shape == (num_envs, )
    assert len(info) == num_envs
    venv.close()