def __init__(self, desc='4x4', max_episode_length=None): """Initialize the environment. Args: desc (str): grid configuration key. max_episode_length (int): The maximum steps allowed for an episode. """ if isinstance(desc, str): desc = MAPS[desc] desc = np.array(list(map(list, desc))) desc[desc == '.'] = 'F' desc[desc == 'o'] = 'H' desc[desc == 'x'] = 'W' self._desc = desc self._n_row, self._n_col = desc.shape (start_x, ), (start_y, ) = np.nonzero(desc == 'S') self._start_state = start_x * self._n_col + start_y self._state = None self._domain_fig = None self._step_cnt = None self._max_episode_length = max_episode_length self._action_space = akro.Discrete(4) self._observation_space = akro.Discrete(self._n_row * self._n_col) self._spec = EnvSpec(action_space=self.action_space, observation_space=self.observation_space, max_episode_length=max_episode_length)
def action_space(self): """Return the action space. Returns: akro.Box: Action space. """ if self.act_space_type == 'box': return akro.Box(low=-5.0, high=5.0, shape=(1, ), dtype=np.float32) else: return akro.Discrete(5)
def eps_data(): # spaces obs_space = akro.Box(low=1, high=np.inf, shape=(4, 3, 2), dtype=np.float32) act_space = akro.Discrete(2) env_spec = EnvSpec(obs_space, act_space) # generate data lens = np.array([10, 20, 7, 25, 25, 40, 10, 5]) n_t = lens.sum() obs = np.stack([obs_space.low] * n_t) last_obs = np.stack([obs_space.low] * len(lens)) act = np.stack([1] * n_t) rew = np.arange(n_t) # env_infos env_infos = dict() env_infos['goal'] = np.stack([[1, 1]] * n_t) env_infos['foo'] = np.arange(n_t) # agent_infos agent_infos = dict() agent_infos['prev_action'] = act agent_infos['hidden'] = np.arange(n_t) # step_types step_types = [] for size in lens: step_types.extend([StepType.FIRST] + [StepType.MID] * (size - 2) + [StepType.TERMINAL]) step_types = np.array(step_types, dtype=StepType) # episode_infos episode_infos = dict() episode_infos['task_one_hot'] = np.stack([[1, 1]] * len(lens)) return { 'env_spec': env_spec, 'episode_infos': episode_infos, 'observations': obs, 'last_observations': last_obs, 'actions': act, 'rewards': rew, 'env_infos': env_infos, 'agent_infos': agent_infos, 'step_types': step_types, 'lengths': lens }
def test_act_env_spec_mismatch_time_step(sample_data): with pytest.raises(ValueError, match='action must conform to action_space'): sample_data['action'] = sample_data['action'][:-1] s = TimeStep(**sample_data) del s obs_space = akro.Box(low=1, high=10, shape=(4, 3, 2), dtype=np.float32) act_space = akro.Discrete(5) env_spec = EnvSpec(obs_space, act_space) sample_data['env_spec'] = env_spec with pytest.raises(ValueError, match='action should have the same dimensionality'): sample_data['action'] = sample_data['action'][:-1] s = TimeStep(**sample_data) del s
def __init__(self, env, name=None): """Create a DMControlEnv. Args: env (dm_control.suite.Task): The wrapped dm_control environment. name (str): Name of the environment. """ self._env = env self._name = name or type(env.task).__name__ self._viewer = None self._step_cnt = None self._max_episode_length = self._env._step_limit # action space action_spec = self._env.action_spec() if (len(action_spec.shape) == 1) and (-np.inf in action_spec.minimum or np.inf in action_spec.maximum): self._action_space = akro.Discrete(np.prod(action_spec.shape)) else: self._action_space = akro.Box(low=action_spec.minimum, high=action_spec.maximum, dtype=np.float32) # observation_space flat_dim = _flat_shape(self._env.observation_spec()) self._observation_space = akro.Box(low=-np.inf, high=np.inf, shape=[flat_dim], dtype=np.float32) # spec self._spec = EnvSpec(action_space=self.action_space, observation_space=self.observation_space, max_episode_length=self._max_episode_length)
def action_space(self): """akro.Discrete: an action space.""" return akro.Discrete(self._action_dim)