def __init__(self, n_actions: int = 10, episode_len: int = 100, num: int = 1) -> None: super().__init__( ac_space=types.discrete_scalar(n_actions), ob_space=types.discrete_scalar(1), num=num, ) rng = np.random.RandomState(0) self.sequence = rng.randint(0, n_actions, size=episode_len) self.time = 0 self.actions = np.zeros(num, "i") self.episode_len = episode_len
def test_concat(space_type): if space_type == "binary": space = types.discrete_scalar(2) elif space_type == "dict": space = types.DictType(degrees=types.discrete_scalar(360)) elif space_type == "real_dict": space = types.DictType( degrees=types.TensorType(shape=(), eltype=types.Real())) else: raise Exception(f"invalid space_type {space_type}") base_env1 = IdentityEnv(space=space, episode_len=1, seed=0) base_env1.f = lambda x: [x[0]**2] base_env2 = IdentityEnv(space=space, episode_len=2, seed=1) base_env2.f = lambda x: [2 * x[0]] env1 = AssertSpacesWrapper(base_env1) env1 = AddInfo(env=env1, id=1) env2 = AssertSpacesWrapper(base_env2) env2 = AddInfo(env=env2, id=2) env = AssertSpacesWrapper(ConcatEnv([env1, env2])) rew, ob, first = env.observe() assert np.array_equal(rew, np.array([0, 0])) if isinstance(space, types.DictType): ob = ob["degrees"] assert ob.shape == (2, ) assert ob[0] != ob[1] assert np.array_equal(first, np.array([1, 1])) act = np.array([ob[0], ob[0]]) if isinstance(space, types.DictType): act = dict(degrees=act) env.act(act) rew, _ob, first = env.observe() if space_type == "real_dict": assert rew[0] == 0 assert rew[1] < 0 else: assert np.array_equal(rew, np.array([1, 0])) assert np.array_equal(first, np.array([1, 0])) assert env.get_info() == [{"id": 1}, {"id": 2}] with pytest.raises(AssertionError): env.callmethod("f", [2, 3, 4]) assert env.callmethod("f", [2, 3]) == [2**2, 2 * 3]
def test_identity_env(): env = IdentityEnv(space=types.discrete_scalar(1024), episode_len=2) rew, ob, first = env.observe() assert ob == ob assert first assert rew == 0 env.act(ob) rew, ob, first = env.observe() assert not first assert rew == 1 env.act(ob) rew, ob, first = env.observe() assert first assert rew == 1
def __init__( self, ob_space: types.ValType = types.TensorType(eltype=types.Discrete( 256, dtype_name="uint8"), shape=(64, 64, 3)), ac_space: types.ValType = types.discrete_scalar(2), num: int = 1, episode_len: int = 1000, delay_seconds: float = 0.0, ) -> None: super().__init__(ob_space=ob_space, ac_space=ac_space, num=num) self._delay_seconds = delay_seconds self._episode_len = episode_len self._ob = types_np.zeros(self.ob_space, bshape=(self.num, )) self._rews = np.zeros((self.num, ), dtype=np.float32) self._steps = 0 self._none_first = np.zeros((self.num, ), dtype=np.bool) self._all_first = np.ones((self.num, ), dtype=np.bool) self._infos = [{} for _ in range(self.num)]
def test_identity_env_delay(): delay = 2 for space in [types.discrete_scalar(1024)]: env = IdentityEnv(space=space, episode_len=delay * 2, delay_steps=delay) _rew, obs, _first = env.observe() obs_queue = [obs] for i in range(delay): env.act(0) _rew, obs, _first = env.observe() obs_queue.append(obs) first = False for i in range(delay): env.act(obs_queue.pop(0)) rew, obs, first = env.observe() obs_queue.append(obs) assert rew == 1 assert first
class IdentityEnv(Env): """ An environment for testing where the observation at each step is the correct action to take on that step. :param space: observation/action space for the environment :param episode_len: steps per episode :param delay_steps: delay the correct action by this many steps :param seed: random seed used to determine observations """ DEFAULT_TYPE = types.discrete_scalar(2) def __init__( self, space: types.ValType = DEFAULT_TYPE, episode_len: int = 1, delay_steps: int = 0, seed: int = 0, ) -> None: super().__init__(ob_space=space, ac_space=space, num=1) self._seed = seed self._episode_len = episode_len self._step = None self._rews = np.zeros((1, ), dtype=np.float32) self._firsts = np.zeros((1, ), dtype=np.bool) self._delay_steps = delay_steps self._q = deque(maxlen=delay_steps + 1) self._rng = np.random.RandomState(seed) self._reset() def _reset(self) -> None: self._q.clear() for _ in range(self._delay_steps + 1): self._q.append( types_np.sample(self.ac_space, bshape=(self.num, ), rng=self._rng)) self._step = 0 self._firsts[0] = True def observe(self) -> Tuple[np.ndarray, Any, np.ndarray]: return self._rews, self._q[-1], self._firsts def act(self, ac: Any) -> None: self._firsts[0] = False state = self._q.popleft() rews = [] def add_reward(subspace, substate, subval): if isinstance(subspace.eltype, types.Discrete): r = 1 if (substate == subval).all() else 0 elif isinstance(subspace.eltype, types.Real): diff = subval - substate diff = diff[:] r = -0.5 * np.dot(diff, diff) else: raise Exception( f"unrecognized action space eltype {subspace.eltype}") rews.append(r) types.multimap(add_reward, self.ac_space, state, ac) rew = sum(rews) / len(rews) if self._step < self._delay_steps: # don't give any reward for guessing un-observed states rew = 0 self._rews[0] = rew self._q.append( types_np.sample(self.ac_space, bshape=(self.num, ), rng=self._rng)) self._step += 1 if self._step >= self._episode_len: self._reset()