Exemplo n.º 1
0
 def observation_spec(self) -> types.Observation:
     return {
         agent: types.OLT(
             observation=_convert_to_spec(
                 self.observation_space["observation"]),
             legal_actions=_convert_to_spec(
                 self.observation_space["action_mask"]),
             terminal=specs.Array((1, ), np.float32),
         )
         for agent in self._possible_agents
     }
Exemplo n.º 2
0
 def observation_spec(self) -> types.Observation:
     observation_specs = {}
     for agent in self.possible_agents:
         observation_specs[agent] = types.OLT(
             observation=_convert_to_spec(
                 self._environment.observation_spaces[agent]),
             legal_actions=_convert_to_spec(
                 self._environment.action_spaces[agent]),
             terminal=specs.Array((1, ), np.float32),
         )
     return observation_specs
Exemplo n.º 3
0
 def observation_spec(self) -> Dict[str, OLT]:
     observation_specs = {}
     for agent in self._environment.agent_ids:
         observation_specs[agent] = OLT(
             observation=_convert_to_spec(
                 self._environment.observation_spaces[agent]
             ),
             legal_actions=_convert_to_spec(self._environment.action_spaces[agent]),
             terminal=specs.Array((1,), np.float32),
         )
     return observation_specs
Exemplo n.º 4
0
 def observation_spec(self) -> Dict[str, OLT]:
     observation_specs = {}
     for agent in self.agents:
         observation_specs[agent] = OLT(
             observation=tuple((
                 _convert_to_spec(self.observation_spaces[agent]),
                 agent_info_spec(),
             )) if self._include_agent_info else _convert_to_spec(
                 self.observation_spaces[agent]),
             legal_actions=_convert_to_spec(self.action_spaces[agent]),
             terminal=specs.Array((1, ), np.float32),
         )
     return observation_specs
Exemplo n.º 5
0
 def action_spec(
         self) -> Dict[str, Union[specs.DiscreteArray, specs.BoundedArray]]:
     action_specs = {}
     action_spaces = self.action_spaces
     for agent in self.possible_agents:
         action_specs[agent] = _convert_to_spec(action_spaces[agent])
     return action_specs
Exemplo n.º 6
0
    def _convert_observation(  # type: ignore[override]
            self, agent: str, observe: Union[dict, np.ndarray],
            done: bool) -> types.OLT:

        legals: np.ndarray = None
        observation: np.ndarray = None

        if isinstance(observe, dict) and "action_mask" in observe:
            legals = observe["action_mask"]
            observation = observe["observation"]
        else:
            legals = np.ones(
                _convert_to_spec(self._environment.action_spaces[agent]).shape,
                dtype=self._environment.action_spaces[agent].dtype,
            )
            observation = observe
        if observation.dtype == np.int8:
            observation = np.dtype(np.float32).type(
                observation)  # observation is not expected to be int8
        if legals.dtype == np.int8:
            legals = np.dtype(np.int64).type(legals)

        observation = types.OLT(
            observation=observation,
            legal_actions=legals,
            terminal=np.asarray([done], dtype=np.float32),
        )
        return observation
Exemplo n.º 7
0
    def _convert_observations(
        self, observes: Dict[str, np.ndarray], dones: Dict[str, bool]
    ) -> Dict[str, OLT]:
        observations: Dict[str, OLT] = {}
        for agent, observation in observes.items():
            if isinstance(observation, dict) and "action_mask" in observation:
                legals = observation["action_mask"]
                observation = observation["observation"]
            else:
                # TODO Handle legal actions better for continuous envs,
                #  maybe have min and max for each action and clip the agents actions
                #  accordingly
                legals = np.ones(
                    _convert_to_spec(self._environment.action_spaces[agent]).shape,
                    dtype=self._environment.action_spaces[agent].dtype,
                )

            observation = np.array(observation, dtype=np.float32)
            observations[agent] = OLT(
                observation=observation,
                legal_actions=legals,
                terminal=np.asarray([dones[agent]], dtype=np.float32),
            )

        return observations
Exemplo n.º 8
0
    def test_multi_discrete(self):
        space = gym.spaces.MultiDiscrete([2, 3])
        spec = gym_wrapper._convert_to_spec(space)

        spec.validate([0, 0])
        spec.validate([1, 2])

        self.assertRaises(ValueError, spec.validate, [2, 2])
        self.assertRaises(ValueError, spec.validate, [1, 3])
Exemplo n.º 9
0
 def observation_spec(self) -> types.Observation:
     observation_specs = {}
     for agent in self._environment.possible_agents:
         if isinstance(self._environment.observation_spaces[agent],
                       gym.spaces.Dict):
             obs_space = copy.deepcopy(
                 self._environment.observation_spaces[agent]["observation"])
             legal_actions_space = copy.deepcopy(
                 self._environment.observation_spaces[agent]["action_mask"])
         else:
             obs_space = copy.deepcopy(
                 self._environment.observation_spaces[agent])
             legal_actions_space = copy.deepcopy(
                 self._environment.action_spaces[agent])
         if obs_space.dtype == np.int8:
             obs_space.dtype = np.dtype(np.float32)
         if legal_actions_space.dtype == np.int8:
             legal_actions_space.dtype = np.dtype(np.int64)
         observation_specs[agent] = types.OLT(
             observation=_convert_to_spec(obs_space),
             legal_actions=_convert_to_spec(legal_actions_space),
             terminal=specs.Array((1, ), np.float32),
         )
     return observation_specs
Exemplo n.º 10
0
    def _convert_observations(self, observes: Dict[str, np.ndarray],
                              dones: Dict[str, bool]) -> types.Observation:
        observations: Dict[str, types.OLT] = {}
        for agent, observation in observes.items():
            if isinstance(observation, dict) and "action_mask" in observation:
                legals = observation["action_mask"]
                observation = observation["observation"]
            else:
                legals = np.ones(
                    _convert_to_spec(self.action_space).shape,
                    dtype=self.action_space.dtype,
                )
            observations[agent] = types.OLT(
                observation=observation,
                legal_actions=legals,
                terminal=np.asarray([dones[agent]], dtype=np.float32),
            )

        return observations
Exemplo n.º 11
0
 def action_spec(self) -> Dict[str, specs.DiscreteArray]:
     return {
         agent: _convert_to_spec(self.action_space)
         for agent in self._possible_agents
     }
Exemplo n.º 12
0
 def action_spec(self) -> Dict[str, specs.DiscreteArray]:
     action_specs = {}
     for agent in self.possible_agents:
         action_specs[agent] = _convert_to_spec(
             self._environment.action_spaces[agent])
     return action_specs