示例#1
0
def _check_obs(
    obs: Union[tuple, dict, np.ndarray, int],
    observation_space: Space,
    method_name: str,
):
    """Check that the observation returned by the environment correspond to the declared one.

    Args:
        obs: The observation to check
        observation_space: The observation space of the observation
        method_name: The method name that generated the observation
    """
    if not isinstance(observation_space, Tuple):
        assert not isinstance(
            obs, tuple
        ), f"The observation returned by the `{method_name}()` method should be a single value, not a tuple"

    if isinstance(observation_space, Discrete):
        assert isinstance(
            obs, int
        ), f"The observation returned by `{method_name}()` method must be an int"
    elif _is_numpy_array_space(observation_space):
        assert isinstance(
            obs, np.ndarray
        ), f"The observation returned by `{method_name}()` method must be a numpy array"

    assert observation_space.contains(
        obs
    ), f"The observation returned by the `{method_name}()` method does not match the given observation space"
示例#2
0
def _check_returned_values(env: gym.Env, observation_space: spaces.Space,
                           action_space: spaces.Space) -> None:
    """
    Check the returned values by the env when calling `.reset()` or `.step()` methods.
    """
    # because env inherits from gym.Env, we assume that `reset()` and `step()` methods exists
    obs = env.reset()

    if isinstance(observation_space, spaces.Dict):
        assert isinstance(
            obs,
            dict), "The observation returned by `reset()` must be a dictionary"
        for key in observation_space.spaces.keys():
            try:
                _check_obs(obs[key], observation_space.spaces[key], "reset")
            except AssertionError as e:
                raise AssertionError(f"Error while checking key={key}: " +
                                     str(e))
    else:
        _check_obs(obs, observation_space, "reset")

    # Sample a random action
    action = action_space.sample()
    data = env.step(action)

    assert (
        len(data) == 4
    ), "The `step()` method must return four values: obs, reward, done, info"

    # Unpack
    obs, reward, done, info = data

    if isinstance(observation_space, spaces.Dict):
        assert isinstance(
            obs,
            dict), "The observation returned by `step()` must be a dictionary"
        for key in observation_space.spaces.keys():
            try:
                _check_obs(obs[key], observation_space.spaces[key], "step")
            except AssertionError as e:
                raise AssertionError(f"Error while checking key={key}: " +
                                     str(e))

    else:
        _check_obs(obs, observation_space, "step")

    # We also allow int because the reward will be cast to float
    assert isinstance(
        reward,
        (float, int,
         np.float32)), "The reward returned by `step()` must be a float"
    assert isinstance(done, bool), "The `done` signal must be a boolean"
    assert isinstance(
        info,
        dict), "The `info` returned by `step()` must be a python dictionary"

    if isinstance(env, gym.GoalEnv):
        # For a GoalEnv, the keys are checked at reset
        assert reward == env.compute_reward(obs["achieved_goal"],
                                            obs["desired_goal"], info)
示例#3
0
def _check_obs(obs: Union[tuple, dict, np.ndarray, int],
               observation_space: spaces.Space, method_name: str) -> None:
    """
    Check that the observation returned by the environment
    correspond to the declared one.
    """
    if not isinstance(observation_space, spaces.Tuple):
        assert not isinstance(
            obs, tuple
        ), "The observation returned by the `{}()` method should be a single value, not a tuple".format(
            method_name)

    # The check for a GoalEnv is done by the base class
    if isinstance(observation_space, spaces.Discrete):
        assert isinstance(
            obs, int
        ), "The observation returned by `{}()` method must be an int".format(
            method_name)
    elif _is_numpy_array_space(observation_space):
        assert isinstance(
            obs, np.ndarray
        ), "The observation returned by `{}()` method must be a numpy array".format(
            method_name)

    assert observation_space.contains(
        obs
    ), "The observation returned by the `{}()` method does not match the given observation space".format(
        method_name)
示例#4
0
def _check_obs(
    obs: Union[tuple, dict, np.ndarray, int],
    observation_space: spaces.Space,
    method_name: str,
) -> None:
    """
    Check that the observation returned by the environment
    correspond to the declared one.
    """
    if not isinstance(observation_space, spaces.Tuple):
        assert not isinstance(
            obs, tuple
        ), f"The observation returned by the `{method_name}()` method should be a single value, not a tuple"

    if isinstance(observation_space, spaces.Discrete):
        assert np.isscalar(
            obs
        ), f"The observation returned by `{method_name}()` method must be a scalar"
    elif _is_numpy_array_space(observation_space):
        assert isinstance(
            obs, np.ndarray
        ), f"The observation returned by `{method_name}()` method must be a numpy array"

    assert observation_space.contains(
        obs
    ), f"The observation returned by the `{method_name}()` method does not match the given observation space"
示例#5
0
def _check_returned_values(env: gym.Env, observation_space: Space, action_space: Space):
    """Check the returned values by the env when calling :meth:`env.reset` or :meth:`env.step` methods.

    Args:
        env: The environment
        observation_space: The environment's observation space
        action_space: The environment's action space
    """
    # because env inherits from gym.Env, we assume that `reset()` and `step()` methods exists
    obs = env.reset()

    if isinstance(observation_space, Dict):
        assert isinstance(
            obs, dict
        ), "The observation returned by `reset()` must be a dictionary"
        for key in observation_space.spaces.keys():
            try:
                _check_obs(obs[key], observation_space.spaces[key], "reset")
            except AssertionError as e:
                raise AssertionError(f"Error while checking key={key}: " + str(e))
    else:
        _check_obs(obs, observation_space, "reset")

    # Sample a random action
    action = action_space.sample()
    data = env.step(action)

    assert (
        len(data) == 4
    ), "The `step()` method must return four values: obs, reward, done, info"

    # Unpack
    obs, reward, done, info = data

    if isinstance(observation_space, Dict):
        assert isinstance(
            obs, dict
        ), "The observation returned by `step()` must be a dictionary"
        for key in observation_space.spaces.keys():
            try:
                _check_obs(obs[key], observation_space.spaces[key], "step")
            except AssertionError as e:
                raise AssertionError(f"Error while checking key={key}: " + str(e))

    else:
        _check_obs(obs, observation_space, "step")

    # We also allow int because the reward will be cast to float
    assert isinstance(
        reward, (float, int, np.float32)
    ), "The reward returned by `step()` must be a float"
    assert isinstance(done, bool), "The `done` signal must be a boolean"
    assert isinstance(
        info, dict
    ), "The `info` returned by `step()` must be a python dictionary"
示例#6
0
def get_obs_batch(batch_size: int, obs_space: Space,
                  num_tasks: int) -> Tuple[torch.Tensor, torch.Tensor]:
    """
    Sample a batch of (multi-task) observations and task indices. Note that `obs_space`
    must be one-dimensional.
    """

    obs_shape = obs_space.sample().shape
    assert len(obs_shape) == 1
    obs_len = obs_shape[0]

    obs_list = []
    for i in range(batch_size):
        ob = torch.Tensor(obs_space.sample())
        task_vector = one_hot_tensor(num_tasks)
        obs_list.append(torch.cat([ob, task_vector]))
    obs = torch.stack(obs_list)
    nonzero_pos = obs[:, obs_len:].nonzero()
    assert nonzero_pos[:, 0].tolist() == list(range(batch_size))
    task_indices = nonzero_pos[:, 1]

    return obs, task_indices
示例#7
0
def preprocess_obs(obs: th.Tensor,
                   device,
                   observation_space: spaces.Space,
                   normalize_images: bool = True) -> th.Tensor:
    """
    Preprocess observation to be to a neural network.
    For images, it normalizes the values by dividing them by 255 (to have values in [0, 1])
    For discrete observations, it create a one hot vector.

    :param obs: Observation
    :param observation_space:
    :param normalize_images: Whether to normalize images or not
        (True by default)
    :return:
    """
    if isinstance(observation_space, JsonGraph):
        converted_obs = observation_space.converter(obs)
        converted_tensor = converted_obs.to(device)
        return converted_tensor
    else:
        obs = th.as_tensor(obs).to(device)

    if isinstance(observation_space, spaces.Box):
        if is_image_space(observation_space) and normalize_images:
            return obs.float() / 255.0
        return obs.float()

    elif isinstance(observation_space, spaces.Discrete):
        # One hot encoding and convert to float to avoid errors
        return F.one_hot(obs.long(), num_classes=observation_space.n).float()

    elif isinstance(observation_space, spaces.MultiDiscrete):
        # Tensor concatenation of one hot encodings of each Categorical sub-space
        return th.cat(
            [
                F.one_hot(obs_.long(),
                          num_classes=int(
                              observation_space.nvec[idx])).float()
                for idx, obs_ in enumerate(th.split(obs.long(), 1, dim=1))
            ],
            dim=-1,
        ).view(obs.shape[0], sum(observation_space.nvec))

    elif isinstance(observation_space, spaces.MultiBinary):
        return obs.float()

    else:
        raise NotImplementedError(
            f"Preprocessing not implemented for {observation_space}")
示例#8
0
def _check_obs(obs, observation_space: spaces.Space, method_name: str):
    """Check that the observation returned by the environment correspond to the declared one.

    Args:
        obs: The observation to check
        observation_space: The observation space of the observation
        method_name: The method name that generated the observation
    """
    pre = f"The observation returned by the `{method_name}()` method"

    assert observation_space.contains(
        obs
    ), f"{pre} is not contained with the observation space ({observation_space})"

    if isinstance(observation_space, spaces.Discrete):
        assert isinstance(
            obs, int
        ), f"The observation returned by `{method_name}()` method must be an int, actually {type(obs)}"
    elif isinstance(
        observation_space, (spaces.Box, spaces.MultiBinary, spaces.MultiDiscrete)
    ):
        assert isinstance(
            obs, np.ndarray
        ), f"The observation returned by `{method_name}()` method must be a numpy array, actually {type(obs)}"
    elif isinstance(observation_space, spaces.Tuple):
        assert isinstance(
            obs, tuple
        ), f"The observation returned by the `{method_name}()` method must be a tuple, actually {type(obs)}"
        for sub_obs, sub_space in zip(obs, observation_space.spaces):
            _check_obs(sub_obs, sub_space, method_name)
    elif isinstance(observation_space, spaces.Dict):
        assert isinstance(
            obs, dict
        ), f"The observation returned by the `{method_name}()` method must be a dict, actually {type(obs)}"
        for space_key in observation_space.keys():
            _check_obs(obs[space_key], observation_space[space_key], method_name)
示例#9
0
def check_run(env: gym.Env, action_space: spaces.Space):
    """Check normally running process of webotenv."""
    num_env = 3
    time_steps = 100
    for _ in range(num_env):
        env.reset()
        for j in range(time_steps):
            action = action_space.sample()
            _, _, done, _ = env.step(action)
            if done is True:
                assert j+1 == env.steps_in_run, \
                         "The value of time steps is correct when 'done'"
                break
            if j == time_steps - 1:
                assert env.steps_in_run == time_steps, \
                    "The number time steps are correct after steps > 1"
示例#10
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        DQNTorchModel.__init__(
            self,
            Space(shape=(obs_space.shape[0] * obs_space.shape[1] * 4, ),
                  dtype=np.float), action_space, num_outputs, model_config,
            name)
        nn.Module.__init__(self)

        self.convfilter = nn.Sequential(
            nn.Conv2d(21, 8, kernel_size=3, padding=2),
            nn.BatchNorm2d(8),
            nn.ReLU(),
            nn.Conv2d(8, 4, kernel_size=3, padding=2),
            nn.BatchNorm2d(4),
            nn.ReLU(),
            nn.Flatten(),
            nn.Linear(obs_space.shape[0] * obs_space.shape[1] * 4, 32),
            nn.ReLU(),
        )
示例#11
0
def check_reset_step(env: gym.Env, observation_space: spaces.Space,
                     action_space: spaces.Space):
    """ Check reset() and step() function."""
    obs_pre = env.reset()
    _check_obs(obs_pre, observation_space, 'reset')
    obs_current = env.reset()
    assert (obs_pre[1:8] != obs_current[1:8]).any(), \
        "The infos of the observation must differ after reset the webot env."
    assert (obs_pre[10:] != obs_current[10:]).any(), \
        "The infos of the lidar data must differ after reset the webot env."

    for _ in range(3):
        action = action_space.sample()
        obs_next, _, _, _ = env.step(action)
        _check_obs(obs_next, observation_space, 'step')
    assert (obs_next[0:3] != obs_current[0:3]).any(), \
        "The information of observation must be updated after the first action"
    assert (obs_next[6:9] != obs_current[6:9]).any(), \
        "The information of observation must be updated after the first action"
    assert (obs_next[10:] != obs_current[10:]).any(), \
        "The information of lidar data must be updated after the first action"
示例#12
0
def get_action_dim(action_space: spaces.Space) -> int:
    """
    Get the dimension of the action space.

    :param action_space:
    :return:
    """
    if isinstance(action_space, spaces.Box):
        return int(np.prod(action_space.shape))
    elif isinstance(action_space, spaces.Discrete):
        # Action is an int
        return 1
    elif isinstance(action_space, spaces.MultiDiscrete):
        # Number of discrete actions
        return int(len(action_space.nvec))
    elif isinstance(action_space, spaces.MultiBinary):
        # Number of binary actions
        return int(action_space.n)
    elif isinstance(action_space, HybridBase):
        return action_space.get_dimension()
    else:
        raise NotImplementedError(
            f"{action_space} action space is not supported")
示例#13
0
def _check_returned_values(env: gym.Env, observation_space: spaces.Space,
                           action_space: spaces.Space) -> None:
    """
    Check the returned values by the env when calling `.reset()` or `.step()` methods.
    """
    # because env inherits from gym.Env, we assume that `reset()` and `step()` methods exists
    obs = env.reset()

    _check_obs(obs, observation_space, "reset")

    # Sample a random action
    action = action_space.sample()
    data = env.step(action)

    assert len(
        data
    ) == 4, "The `step()` method must return four values: obs, reward, done, info"

    # Unpack
    obs, reward, done, info = data

    _check_obs(obs, observation_space, "step")

    # We also allow int because the reward will be cast to float
    assert isinstance(
        reward,
        (float, int)), "The reward returned by `step()` must be a float"
    assert isinstance(done, bool), "The `done` signal must be a boolean"
    assert isinstance(
        info,
        dict), "The `info` returned by `step()` must be a python dictionary"

    if isinstance(env, gym.GoalEnv):
        # For a GoalEnv, the keys are checked at reset
        assert reward == env.compute_reward(obs["achieved_goal"],
                                            obs["desired_goal"], info)
示例#14
0
 def reward_space(self) -> Space:
     return Space(shape=(1, ), dtype=np.float32)