示例#1
0
def test_check_iterate_and_step(dataset: str, expected_obs_shape: Tuple[int,
                                                                        ...],
                                batch_size: int):
    # TODO: Fix the default transforms, shouldn't necessarily have `to_tensor` in there.
    setting = IncrementalRLSetting(
        dataset=dataset,
        nb_tasks=5,
        train_transforms=[Transforms.to_tensor],
        val_transforms=[Transforms.to_tensor],
        test_transforms=[Transforms.to_tensor],
    )
    assert setting.train_transforms == [Transforms.to_tensor]
    assert setting.val_transforms == [Transforms.to_tensor]
    assert setting.test_transforms == [Transforms.to_tensor]
    # TODO: Interesting issue: can't pickle only the to_tensor transform, as it modifies
    # the given class in-place?

    assert len(setting.train_task_schedule) == 5
    assert not setting.smooth_task_boundaries
    assert setting.task_labels_at_train_time

    # TODO: Should we have the task label space in this case?
    assert setting.task_labels_at_train_time
    assert not setting.task_labels_at_test_time

    if batch_size is None:
        expected_obs_batch_shape = expected_obs_shape
    else:
        expected_obs_batch_shape = (batch_size, *expected_obs_shape)

    with setting.train_dataloader(batch_size=batch_size) as temp_env:
        obs_space = temp_env.observation_space
        assert obs_space[0] == spaces.Box(0.0,
                                          1.0,
                                          expected_obs_batch_shape,
                                          dtype=np.float32)
        assert (obs_space[1] == spaces.MultiDiscrete([5] * batch_size)
                if batch_size else spaces.Discrete(5))

    with setting.val_dataloader(batch_size=batch_size) as temp_env:
        # No task labels:
        obs_space = temp_env.observation_space

        assert obs_space[0] == spaces.Box(0.0,
                                          1.0,
                                          expected_obs_batch_shape,
                                          dtype=np.float32)
        if batch_size:
            assert str(obs_space[1]) == str(
                spaces.MultiDiscrete([5] * batch_size))
        else:
            # TODO: Should the task labels be given in the valid dataloader if they
            # arent' during testing?
            assert obs_space[1] == spaces.Discrete(5)

    # NOTE: Limitting the batch size at test time to None (i.e. a single env)
    # because of how the Monitor class works atm.

    with setting.test_dataloader(batch_size=None) as temp_env:
        obs_space = temp_env.observation_space
        assert obs_space[1] == Sparse(spaces.Discrete(5), sparsity=1.0)

    def check_obs(obs, task_label: int = None):
        if batch_size is None:
            assert obs[1] == task_label
        else:
            assert isinstance(obs,
                              IncrementalRLSetting.Observations), obs[0].shape
            assert obs.task_labels is task_label or all(
                task_label == task_label for task_label in obs.task_labels)

    env = setting.train_dataloader(batch_size=batch_size)
    reset_obs = env.reset()
    check_obs(reset_obs, task_label=0)

    for i in range(5):
        step_obs, *_ = env.step(env.action_space.sample())
        check_obs(step_obs, task_label=0)

    for iter_obs in take(env, 3):
        check_obs(iter_obs, task_label=0)
        _ = env.send(env.action_space.sample())
        env.render("human")

    env.close()
示例#2
0
    def test_check_iterate_and_step(
        self, setting_kwargs: Dict[str, Any], batch_size: Optional[int],
    ):
        """ Test that the observations are of the right type and shape, regardless
        of wether we iterate on the env by calling 'step' or by using it as a
        DataLoader.
        """
        with gym.make(setting_kwargs["dataset"]) as temp_env:
            expected_x_space = temp_env.observation_space
            expected_action_space = temp_env.action_space

        setting = self.Setting(**setting_kwargs, num_workers=0)

        if batch_size is not None:
            expected_batched_x_space = batch_space(expected_x_space, batch_size)
            expected_batched_action_space = batch_space(
                setting.action_space, batch_size
            )
        else:
            expected_batched_x_space = expected_x_space
            expected_batched_action_space = expected_action_space

        assert setting.observation_space.x == expected_x_space
        assert setting.action_space == expected_action_space

        # TODO: This is changing:
        assert setting.train_transforms == []
        # assert setting.train_transforms == [Transforms.to_tensor, Transforms.three_channels]

        def check_env_spaces(env: gym.Env) -> None:
            if env.batch_size is not None:
                # TODO: This might not be totally accurate, for example because the
                # TransformObservation wrapper applied to a VectorEnv doesn't change the
                # single_observation_space, AFAIR.
                assert env.single_observation_space.x == expected_x_space
                assert env.single_action_space == expected_action_space
                assert isinstance(env.observation_space, TypedDictSpace), (env, env.observation_space)
                assert env.observation_space.x == expected_batched_x_space
                assert env.action_space == expected_batched_action_space
            else:
                assert env.observation_space.x == expected_x_space
                assert env.action_space == expected_action_space

        # FIXME: Move this to an instance method on the test class so that subclasses
        # can change stuff in it.
        def check_obs(obs: ContinualRLSetting.Observations) -> None:
            if isinstance(self.Setting, partial):
                # NOTE: This Happens when we sneakily switch out the self.Setting
                # attribute in other tests (for the SettingProxy for example).
                assert isinstance(obs, self.Setting.args[0].Observations)
            else:
                assert isinstance(obs, self.Setting.Observations)
            assert obs.x in expected_batched_x_space
            # In this particular case here, the task labels should be None.
            # FIXME: For InrementalRL, this isn't correct! TestIncrementalRL should
            # therefore have its own version of this function.
            if self.Setting is ContinualRLSetting:
                assert obs.task_labels is None or all(
                    task_label == None for task_label in obs.task_labels
                )

        with setting.train_dataloader(batch_size=batch_size, num_workers=0) as env:
            assert env.batch_size == batch_size
            check_env_spaces(env)

            obs = env.reset()
            # BUG: TODO: The observation space that we use should actually check with
            # isinstance and over the fields that fit in the space. Here there is a bug
            # because the env observations also have a `done` field, while the space
            # doesnt.
            # assert obs in env.observation_space
            assert obs.x in env.observation_space.x  # this works though.

            # BUG: This doesn't currently work: (would need a tuple value rather than an
            # array.
            # assert obs.task_labels in env.observation_space.task_labels

            if batch_size:
                # FIXME: This differs between ContinualRL and IncrementalRL:
                if not setting.known_task_boundaries_at_train_time:
                    assert obs.task_labels[0] in setting.task_label_space
                    assert tuple(obs.task_labels) in env.observation_space.task_labels
                else:
                    assert obs.task_labels[0] in setting.task_label_space
                    assert obs.task_labels in env.observation_space.task_labels
                    assert (
                        np.array(obs.task_labels) in env.observation_space.task_labels
                    )
            else:
                assert obs.task_labels in env.observation_space.task_labels

            reset_obs = env.reset()
            check_obs(reset_obs)

            # BUG: Environment is closed? (batch_size = 3, dataset = 'CartPole-v0')
            step_obs, *_ = env.step(env.action_space.sample())
            check_obs(step_obs)

            for iter_obs in take(env, 3):
                check_obs(iter_obs)
                _ = env.send(env.action_space.sample())

        with setting.val_dataloader(batch_size=batch_size, num_workers=0) as env:
            assert env.batch_size == batch_size
            check_env_spaces(env)

            reset_obs = env.reset()
            check_obs(reset_obs)

            step_obs, *_ = env.step(env.action_space.sample())
            check_obs(step_obs)

            for iter_obs in take(env, 3):
                check_obs(iter_obs)
                _ = env.send(env.action_space.sample())

        # NOTE: Limitting the batch size at test time to None (i.e. a single env)
        # because of how the Monitor class works atm.
        batch_size = None
        expected_batched_x_space = expected_x_space
        expected_batched_action_space = expected_action_space
        with setting.test_dataloader(batch_size=batch_size, num_workers=0) as env:
            assert env.batch_size is None
            check_env_spaces(env)

            reset_obs = env.reset()
            check_obs(reset_obs)

            step_obs, *_ = env.step(env.action_space.sample())
            check_obs(step_obs)

            # NOTE: Can't do this here, unless the episode is over, because the Monitor
            # doesn't want us to end an episode early!
            # for iter_obs in take(env, 3):
            #     check_obs(iter_obs)
            #     _ = env.send(env.action_space.sample())

        with setting.test_dataloader(batch_size=batch_size) as env:
            # NOTE: Can't do this here, unless the episode is over, because the Monitor
            # doesn't want us to end an episode early!
            for iter_obs in take(env, 3):
                check_obs(iter_obs)
                _ = env.send(env.action_space.sample())
示例#3
0
def test_check_iterate_and_step(
    dataset: str, expected_obs_shape: Tuple[int, ...], batch_size: int
):
    """ Test that the observations are of the right type and shape, regardless
    of wether we iterate on the env by calling 'step' or by using it as a
    DataLoader.
    """
    setting = ContinualRLSetting(dataset=dataset)

    expected_obs_batch_shape = (batch_size, *expected_obs_shape)
    if batch_size is None:
        expected_obs_batch_shape = expected_obs_shape

    # Test the shapes of the obs generated by the train/val/test dataloaders.
    dataloader_methods = [
        setting.train_dataloader,
        setting.val_dataloader,
        setting.test_dataloader,
    ]
    assert setting.nb_tasks == 1

    with setting.train_dataloader(batch_size=batch_size) as temp_env:
        assert temp_env.observation_space[0] == spaces.Box(
            0.0, 1.0, expected_obs_batch_shape, dtype=np.float32
        )
        obs = temp_env.reset()
        # BUG:
        # assert has_tensor_support(temp_env.observation_space)
        assert obs[0].shape == temp_env.observation_space[0].shape

    with setting.val_dataloader(batch_size=batch_size) as temp_env:
        assert temp_env.observation_space[0] == spaces.Box(
            0.0, 1.0, expected_obs_batch_shape, dtype=np.float32
        )

    # NOTE: Limitting the batch size at test time to None (i.e. a single env)
    # because of how the Monitor class works atm.
    with setting.test_dataloader(batch_size=None) as temp_env:
        assert temp_env.observation_space[0] == spaces.Box(
            0.0, 1.0, expected_obs_shape, dtype=np.float32
        )
        assert type(temp_env.observation_space)
        # assert temp_env.observation_space[0] == spaces.Box(0., 1., expected_obs_batch_shape, dtype=np.float32)

    def check_obs(obs):
        assert isinstance(obs, ContinualRLSetting.Observations), obs[0].shape
        assert obs.x.shape == expected_obs_batch_shape
        assert obs.task_labels is None or all(
            task_label == None for task_label in obs.task_labels
        )

    # FIXME: Same a temp copy
    expected_obs_batch_shape_ = expected_obs_batch_shape

    for dataloader_method in dataloader_methods:
        print(f"Testing dataloader method {dataloader_method.__name__}")
        ## FIXME: Remove this if we allow batched env at test time.
        if dataloader_method.__name__ == "test_dataloader":
            # Temporarily change the expected shape.
            expected_obs_batch_shape = expected_obs_shape
            env = dataloader_method(batch_size=None)
            assert env.batch_size is None

        else:
            # Restore the original value.
            expected_obs_batch_shape = expected_obs_batch_shape_
            env = dataloader_method(batch_size=batch_size)
            assert env.batch_size == batch_size
        ##
        # env = dataloader_method(batch_size=batch_size)

        reset_obs = env.reset()
        check_obs(reset_obs)

        step_obs, *_ = env.step(env.action_space.sample())
        check_obs(step_obs)

        for iter_obs in take(env, 3):
            check_obs(iter_obs)
            reward = env.send(env.action_space.sample())
示例#4
0
 def shorten(dataloader: DataLoader):
     return take(dataloader, n=self.max_num_batches)