Exemplo n.º 1
0
def test_deterministic(space: Space, n: int, base_seed: int):
    """Tests the batched spaces are deterministic by using a copied version"""
    # Copy the spaces and check that the np_random are not reference equal
    space_a = space
    space_a.seed(base_seed)
    space_b = copy.deepcopy(space_a)
    assert_rng_equal(space_a.np_random, space_b.np_random)
    assert space_a.np_random is not space_b.np_random

    # Batch the spaces and check that the np_random are not reference equal
    space_a_batched = batch_space(space_a, n)
    space_b_batched = batch_space(space_b, n)
    assert_rng_equal(space_a_batched.np_random, space_b_batched.np_random)
    assert space_a_batched.np_random is not space_b_batched.np_random
    # Create that the batched space is not reference equal to the origin spaces
    assert space_a.np_random is not space_a_batched.np_random

    # Check that batched space a and b random number generator are not effected by the original space
    space_a.sample()
    space_a_batched_sample = space_a_batched.sample()
    space_b_batched_sample = space_b_batched.sample()
    for a_sample, b_sample in zip(
            iterate(space_a_batched, space_a_batched_sample),
            iterate(space_b_batched, space_b_batched_sample),
    ):
        if isinstance(a_sample, tuple):
            assert len(a_sample) == len(b_sample)
            for a_subsample, b_subsample in zip(a_sample, b_sample):
                assert_array_equal(a_subsample, b_subsample)
        else:
            assert_array_equal(a_sample, b_sample)
Exemplo n.º 2
0
    def __init__(self, num_envs, observation_space, action_space):
        self.num_envs = num_envs
        self.is_vector_env = True
        self.observation_space = batch_space(observation_space, n=num_envs)
        self.action_space = batch_space(action_space, n=num_envs)

        self.closed = False
        self.viewer = None

        # The observation and action spaces of a single environment are
        # kept in separate properties
        self.single_observation_space = observation_space
        self.single_action_space = action_space
Exemplo n.º 3
0
    def fit(self, train_env: Environment, valid_env: Environment):
        for i, batch in enumerate(train_env):
            if isinstance(batch, Observations):
                observations, rewards = batch, None
            else:
                assert isinstance(batch, tuple) and len(batch) == 2
                observations, rewards = batch

            y_preds = train_env.action_space.sample()
            if rewards is None:
                action_space = train_env.action_space
                if train_env.action_space.shape:
                    # This is a bit complicated, but it's needed because the last batch
                    # might have a different batch dimension than the env's action
                    # space, (only happens on the last batch in supervised learning).
                    # TODO: Should we perhaps drop the last batch?
                    action_space = train_env.action_space
                    batch_size = getattr(train_env, "num_envs",
                                         getattr(train_env, "batch_size", 0))
                    env_is_batched = batch_size is not None and batch_size >= 1
                    if env_is_batched:
                        # NOTE: Need to pass an action space that actually reflects the batch
                        # size, even for the last batch!
                        obs_batch_size = observations.x.shape[
                            0] if observations.x.shape else None
                        action_space_batch_size = (
                            train_env.action_space.shape[0]
                            if train_env.action_space.shape else None)
                        if (obs_batch_size is not None
                                and obs_batch_size != action_space_batch_size):
                            action_space = batch_space(
                                train_env.single_action_space, obs_batch_size)

                y_preds = action_space.sample()
                rewards = train_env.send(Actions(y_pred=y_preds))
Exemplo n.º 4
0
    def __init__(self, num_envs, observation_space, action_space):
        super(VectorEnv, self).__init__()
        self.num_envs = num_envs
        self.observation_space = batch_space(observation_space, n=num_envs)
        self.action_space = Tuple((action_space, ) * num_envs)

        # The observation and action spaces of a single environment are
        # kept in separate properties
        self.single_observation_space = observation_space
        self.single_action_space = action_space
Exemplo n.º 5
0
    def __init__(self, num_envs: int, observation_space: gym.Space,
                 action_space: gym.Space):
        """Base class for vectorized environments.

        Args:
            num_envs: Number of environments in the vectorized environment.
            observation_space: Observation space of a single environment.
            action_space: Action space of a single environment.
        """
        self.num_envs = num_envs
        self.is_vector_env = True
        self.observation_space = batch_space(observation_space, n=num_envs)
        self.action_space = batch_space(action_space, n=num_envs)

        self.closed = False
        self.viewer = None

        # The observation and action spaces of a single environment are
        # kept in separate properties
        self.single_observation_space = observation_space
        self.single_action_space = action_space
Exemplo n.º 6
0
def test_rng_different_at_each_index(space: Space, n: int, base_seed: int):
    """
    Tests that the rng values produced at each index are different
    to prevent if the rng is copied for each subspace
    """
    space.seed(base_seed)

    batched_space = batch_space(space, n)
    assert space.np_random is not batched_space.np_random
    assert_rng_equal(space.np_random, batched_space.np_random)

    batched_sample = batched_space.sample()
    sample = list(iterate(batched_space, batched_sample))
    assert not all(np.all(element == sample[0]) for element in sample), sample
Exemplo n.º 7
0
    def fit(self, train_env: Environment, valid_env: Environment):
        for i, batch in enumerate(train_env):
            if isinstance(batch, Observations):
                observations, rewards = batch, None
            else:
                assert isinstance(batch, tuple) and len(batch) == 2
                observations, rewards = batch

            y_preds = train_env.action_space.sample()
            if rewards is None:
                action_space = train_env.action_space
                if train_env.action_space.shape:
                    obs_batch_size = observations.x.shape[0]
                    # BUG: Fix the `batch_size` attribute on `Batch` so it works
                    # even when task labels are None, by checking wether there is
                    # one or more shapes, and then if there are, then that the first
                    # dimension match between those.
                    action_space_batch_size = action_space.shape[0]
                    if obs_batch_size != action_space_batch_size:
                        action_space = batch_space(
                            train_env.single_action_space, obs_batch_size)

                rewards = train_env.send(Actions(action_space.sample()))
Exemplo n.º 8
0
def test_batch_space_custom_space(space, expected_batch_space_4):
    batch_space_4 = batch_space(space, n=4)
    assert batch_space_4 == expected_batch_space_4
Exemplo n.º 9
0
    def test_loop(self, method: Method) -> "IncrementalSetting.Results":
        """ (WIP): Runs an incremental test loop and returns the Results.

        The idea is that this loop should be exactly the same, regardless of if
        you're on the RL or the CL side of the tree.

        NOTE: If `self.known_task_boundaries_at_test_time` is `True` and the
        method has the `on_task_switch` callback defined, then a callback
        wrapper is added that will invoke the method's `on_task_switch` and pass
        it the task id (or `None` if `not self.task_labels_available_at_test_time`)
        when a task boundary is encountered.

        This `on_task_switch` 'callback' wrapper gets added the same way for
        Supervised or Reinforcement learning settings.
        """
        test_env = self.test_dataloader()

        test_env: TestEnvironment

        was_training = method.training
        method.set_testing()

        if self.known_task_boundaries_at_test_time and self.nb_tasks > 1:

            def _on_task_switch(step: int, *arg) -> None:
                # TODO: This attribute isn't on IncrementalSetting itself, it's defined
                # on ContinualRLSetting.
                if step not in test_env.boundary_steps:
                    return
                if not hasattr(method, "on_task_switch"):
                    logger.warning(
                        UserWarning(
                            f"On a task boundary, but since your method doesn't "
                            f"have an `on_task_switch` method, it won't know about "
                            f"it! "))
                    return

                if self.task_labels_at_test_time:
                    # TODO: Should this 'test boundary' step depend on the batch size?
                    task_steps = sorted(test_env.boundary_steps)
                    # TODO: If the ordering of tasks were different (shuffled
                    # tasks for example), then this wouldn't work, we'd need a
                    # list of the task ids or something like that.
                    task_id = task_steps.index(step)
                    logger.debug(
                        f"Calling `method.on_task_switch({task_id})` "
                        f"since task labels are available at test-time.")
                    method.on_task_switch(task_id)
                else:
                    logger.debug(f"Calling `method.on_task_switch(None)` "
                                 f"since task labels aren't available at "
                                 f"test-time, but task boundaries are known.")
                    method.on_task_switch(None)

            test_env = StepCallbackWrapper(test_env,
                                           callbacks=[_on_task_switch])

        try:
            # If the Method has `test` defined, use it.
            method.test(test_env)
            test_env.close()
            test_env: TestEnvironment
            # Get the metrics from the test environment
            test_results: Results = test_env.get_results()

        except NotImplementedError:
            logger.debug(f"Will query the method for actions at each step, "
                         f"since it doesn't implement a `test` method.")
            obs = test_env.reset()

            # TODO: Do we always have a maximum number of steps? or of episodes?
            # Will it work the same for Supervised and Reinforcement learning?
            max_steps: int = getattr(test_env, "step_limit", None)

            # Reset on the last step is causing trouble, since the env is closed.
            pbar = tqdm.tqdm(itertools.count(), total=max_steps, desc="Test")
            episode = 0

            for step in pbar:
                if obs is None:
                    break
                # NOTE: The env might not be closed, while `obs` is actually still there.
                # if test_env.is_closed():
                #     logger.debug(f"Env is closed")
                #     break
                # logger.debug(f"At step {step}")

                # BUG: Need to pass an action space that actually reflects the batch
                # size, even for the last batch!

                # BUG: This doesn't work if the env isn't batched.
                action_space = test_env.action_space
                batch_size = getattr(test_env, "num_envs",
                                     getattr(test_env, "batch_size", 0))
                env_is_batched = batch_size is not None and batch_size >= 1
                if env_is_batched:
                    # NOTE: Need to pass an action space that actually reflects the batch
                    # size, even for the last batch!
                    obs_batch_size = obs.x.shape[0] if obs.x.shape else None
                    action_space_batch_size = (test_env.action_space.shape[0]
                                               if test_env.action_space.shape
                                               else None)
                    if (obs_batch_size is not None
                            and obs_batch_size != action_space_batch_size):
                        action_space = batch_space(
                            test_env.single_action_space, obs_batch_size)

                action = method.get_actions(obs, action_space)

                # logger.debug(f"action: {action}")
                # TODO: Remove this:
                if isinstance(action, Actions):
                    action = action.y_pred
                if isinstance(action, Tensor):
                    action = action.cpu().numpy()

                if test_env.is_closed():
                    break

                obs, reward, done, info = test_env.step(action)

                if done and not test_env.is_closed():
                    # logger.debug(f"end of test episode {episode}")
                    obs = test_env.reset()
                    episode += 1

            test_env.close()
            test_results: TaskSequenceResults = test_env.get_results()

        # Restore 'training' mode, if it was set at the start.
        if was_training:
            method.set_training()

        return test_results
Exemplo n.º 10
0
    def test_loop(self, method: Method) -> "IncrementalSetting.Results":
        """ (WIP): Runs an incremental test loop and returns the Results.

        The idea is that this loop should be exactly the same, regardless of if
        you're on the RL or the CL side of the tree.
        
        NOTE: If `self.known_task_boundaries_at_test_time` is `True` and the
        method has the `on_task_switch` callback defined, then a callback
        wrapper is added that will invoke the method's `on_task_switch` and pass
        it the task id (or `None` if `not self.task_labels_available_at_test_time`) 
        when a task boundary is encountered.

        This `on_task_switch` 'callback' wrapper gets added the same way for
        Supervised or Reinforcement learning settings.
        """
        nb_tasks = self.get_attribute("nb_tasks")
        known_task_boundaries_at_test_time = self.get_attribute(
            "known_task_boundaries_at_test_time")
        task_labels_at_test_time = self.get_attribute(
            "task_labels_at_test_time")

        was_training = method.training
        method.set_testing()
        test_env = self.test_dataloader()

        if known_task_boundaries_at_test_time and nb_tasks > 1:
            # TODO: We need to have a way to inform the Method of task boundaries, if the
            # Setting allows it.
            # Not sure how to do this. It might be simpler to just do something like
            # `obs, rewards, done, info, task_switched = <endpoint>.step(actions)`?
            # # Add this wrapper that will call `on_task_switch` when the right step is
            # # reached.
            # test_env = StepCallbackWrapper(test_env, callbacks=[_on_task_switch])
            pass

        obs = test_env.reset()
        batch_size = test_env.batch_size
        max_steps: int = self.get_attribute("test_steps") // (batch_size or 1)

        # Reset on the last step is causing trouble, since the env is closed.
        pbar = tqdm.tqdm(itertools.count(), total=max_steps, desc="Test")
        episode = 0
        for step in pbar:
            if test_env.is_closed():
                logger.debug(f"Env is closed")
                break

            # BUG: This doesn't work if the env isn't batched.
            action_space = test_env.action_space
            env_is_batched = getattr(test_env, "num_envs", 0) >= 1
            if env_is_batched:
                # NOTE: Need to pass an action space that actually reflects the batch
                # size, even for the last batch!
                obs_batch_size = obs.x.shape[0] if obs.x.shape else None
                action_space_batch_size = (test_env.action_space.shape[0]
                                           if test_env.action_space.shape else
                                           None)
                if obs_batch_size is not None and obs_batch_size != action_space_batch_size:
                    action_space = batch_space(test_env.single_action_space,
                                               obs_batch_size)

            action = method.get_actions(obs, action_space)

            # logger.debug(f"action: {action}")
            obs, reward, done, info = test_env.step(action)

            # TODO: Add something to `info` that indicates when a task boundary is
            # reached, so that we can call the `on_task_switch` method on the Method
            # ourselves.

            if done and not test_env.is_closed():
                # logger.debug(f"end of test episode {episode}")
                obs = test_env.reset()
                episode += 1

        test_env.close()
        test_results = test_env.get_results()

        if was_training:
            method.set_training()

        return test_results