def test_check_iterate_and_step(dataset: str, expected_obs_shape: Tuple[int, ...], batch_size: int): # TODO: Fix the default transforms, shouldn't necessarily have `to_tensor` in there. setting = IncrementalRLSetting( dataset=dataset, nb_tasks=5, train_transforms=[Transforms.to_tensor], val_transforms=[Transforms.to_tensor], test_transforms=[Transforms.to_tensor], ) assert setting.train_transforms == [Transforms.to_tensor] assert setting.val_transforms == [Transforms.to_tensor] assert setting.test_transforms == [Transforms.to_tensor] # TODO: Interesting issue: can't pickle only the to_tensor transform, as it modifies # the given class in-place? assert len(setting.train_task_schedule) == 5 assert not setting.smooth_task_boundaries assert setting.task_labels_at_train_time # TODO: Should we have the task label space in this case? assert setting.task_labels_at_train_time assert not setting.task_labels_at_test_time if batch_size is None: expected_obs_batch_shape = expected_obs_shape else: expected_obs_batch_shape = (batch_size, *expected_obs_shape) with setting.train_dataloader(batch_size=batch_size) as temp_env: obs_space = temp_env.observation_space assert obs_space[0] == spaces.Box(0.0, 1.0, expected_obs_batch_shape, dtype=np.float32) assert (obs_space[1] == spaces.MultiDiscrete([5] * batch_size) if batch_size else spaces.Discrete(5)) with setting.val_dataloader(batch_size=batch_size) as temp_env: # No task labels: obs_space = temp_env.observation_space assert obs_space[0] == spaces.Box(0.0, 1.0, expected_obs_batch_shape, dtype=np.float32) if batch_size: assert str(obs_space[1]) == str( spaces.MultiDiscrete([5] * batch_size)) else: # TODO: Should the task labels be given in the valid dataloader if they # arent' during testing? assert obs_space[1] == spaces.Discrete(5) # NOTE: Limitting the batch size at test time to None (i.e. a single env) # because of how the Monitor class works atm. with setting.test_dataloader(batch_size=None) as temp_env: obs_space = temp_env.observation_space assert obs_space[1] == Sparse(spaces.Discrete(5), sparsity=1.0) def check_obs(obs, task_label: int = None): if batch_size is None: assert obs[1] == task_label else: assert isinstance(obs, IncrementalRLSetting.Observations), obs[0].shape assert obs.task_labels is task_label or all( task_label == task_label for task_label in obs.task_labels) env = setting.train_dataloader(batch_size=batch_size) reset_obs = env.reset() check_obs(reset_obs, task_label=0) for i in range(5): step_obs, *_ = env.step(env.action_space.sample()) check_obs(step_obs, task_label=0) for iter_obs in take(env, 3): check_obs(iter_obs, task_label=0) _ = env.send(env.action_space.sample()) env.render("human") env.close()
def test_check_iterate_and_step( self, setting_kwargs: Dict[str, Any], batch_size: Optional[int], ): """ Test that the observations are of the right type and shape, regardless of wether we iterate on the env by calling 'step' or by using it as a DataLoader. """ with gym.make(setting_kwargs["dataset"]) as temp_env: expected_x_space = temp_env.observation_space expected_action_space = temp_env.action_space setting = self.Setting(**setting_kwargs, num_workers=0) if batch_size is not None: expected_batched_x_space = batch_space(expected_x_space, batch_size) expected_batched_action_space = batch_space( setting.action_space, batch_size ) else: expected_batched_x_space = expected_x_space expected_batched_action_space = expected_action_space assert setting.observation_space.x == expected_x_space assert setting.action_space == expected_action_space # TODO: This is changing: assert setting.train_transforms == [] # assert setting.train_transforms == [Transforms.to_tensor, Transforms.three_channels] def check_env_spaces(env: gym.Env) -> None: if env.batch_size is not None: # TODO: This might not be totally accurate, for example because the # TransformObservation wrapper applied to a VectorEnv doesn't change the # single_observation_space, AFAIR. assert env.single_observation_space.x == expected_x_space assert env.single_action_space == expected_action_space assert isinstance(env.observation_space, TypedDictSpace), (env, env.observation_space) assert env.observation_space.x == expected_batched_x_space assert env.action_space == expected_batched_action_space else: assert env.observation_space.x == expected_x_space assert env.action_space == expected_action_space # FIXME: Move this to an instance method on the test class so that subclasses # can change stuff in it. def check_obs(obs: ContinualRLSetting.Observations) -> None: if isinstance(self.Setting, partial): # NOTE: This Happens when we sneakily switch out the self.Setting # attribute in other tests (for the SettingProxy for example). assert isinstance(obs, self.Setting.args[0].Observations) else: assert isinstance(obs, self.Setting.Observations) assert obs.x in expected_batched_x_space # In this particular case here, the task labels should be None. # FIXME: For InrementalRL, this isn't correct! TestIncrementalRL should # therefore have its own version of this function. if self.Setting is ContinualRLSetting: assert obs.task_labels is None or all( task_label == None for task_label in obs.task_labels ) with setting.train_dataloader(batch_size=batch_size, num_workers=0) as env: assert env.batch_size == batch_size check_env_spaces(env) obs = env.reset() # BUG: TODO: The observation space that we use should actually check with # isinstance and over the fields that fit in the space. Here there is a bug # because the env observations also have a `done` field, while the space # doesnt. # assert obs in env.observation_space assert obs.x in env.observation_space.x # this works though. # BUG: This doesn't currently work: (would need a tuple value rather than an # array. # assert obs.task_labels in env.observation_space.task_labels if batch_size: # FIXME: This differs between ContinualRL and IncrementalRL: if not setting.known_task_boundaries_at_train_time: assert obs.task_labels[0] in setting.task_label_space assert tuple(obs.task_labels) in env.observation_space.task_labels else: assert obs.task_labels[0] in setting.task_label_space assert obs.task_labels in env.observation_space.task_labels assert ( np.array(obs.task_labels) in env.observation_space.task_labels ) else: assert obs.task_labels in env.observation_space.task_labels reset_obs = env.reset() check_obs(reset_obs) # BUG: Environment is closed? (batch_size = 3, dataset = 'CartPole-v0') step_obs, *_ = env.step(env.action_space.sample()) check_obs(step_obs) for iter_obs in take(env, 3): check_obs(iter_obs) _ = env.send(env.action_space.sample()) with setting.val_dataloader(batch_size=batch_size, num_workers=0) as env: assert env.batch_size == batch_size check_env_spaces(env) reset_obs = env.reset() check_obs(reset_obs) step_obs, *_ = env.step(env.action_space.sample()) check_obs(step_obs) for iter_obs in take(env, 3): check_obs(iter_obs) _ = env.send(env.action_space.sample()) # NOTE: Limitting the batch size at test time to None (i.e. a single env) # because of how the Monitor class works atm. batch_size = None expected_batched_x_space = expected_x_space expected_batched_action_space = expected_action_space with setting.test_dataloader(batch_size=batch_size, num_workers=0) as env: assert env.batch_size is None check_env_spaces(env) reset_obs = env.reset() check_obs(reset_obs) step_obs, *_ = env.step(env.action_space.sample()) check_obs(step_obs) # NOTE: Can't do this here, unless the episode is over, because the Monitor # doesn't want us to end an episode early! # for iter_obs in take(env, 3): # check_obs(iter_obs) # _ = env.send(env.action_space.sample()) with setting.test_dataloader(batch_size=batch_size) as env: # NOTE: Can't do this here, unless the episode is over, because the Monitor # doesn't want us to end an episode early! for iter_obs in take(env, 3): check_obs(iter_obs) _ = env.send(env.action_space.sample())
def test_check_iterate_and_step( dataset: str, expected_obs_shape: Tuple[int, ...], batch_size: int ): """ Test that the observations are of the right type and shape, regardless of wether we iterate on the env by calling 'step' or by using it as a DataLoader. """ setting = ContinualRLSetting(dataset=dataset) expected_obs_batch_shape = (batch_size, *expected_obs_shape) if batch_size is None: expected_obs_batch_shape = expected_obs_shape # Test the shapes of the obs generated by the train/val/test dataloaders. dataloader_methods = [ setting.train_dataloader, setting.val_dataloader, setting.test_dataloader, ] assert setting.nb_tasks == 1 with setting.train_dataloader(batch_size=batch_size) as temp_env: assert temp_env.observation_space[0] == spaces.Box( 0.0, 1.0, expected_obs_batch_shape, dtype=np.float32 ) obs = temp_env.reset() # BUG: # assert has_tensor_support(temp_env.observation_space) assert obs[0].shape == temp_env.observation_space[0].shape with setting.val_dataloader(batch_size=batch_size) as temp_env: assert temp_env.observation_space[0] == spaces.Box( 0.0, 1.0, expected_obs_batch_shape, dtype=np.float32 ) # NOTE: Limitting the batch size at test time to None (i.e. a single env) # because of how the Monitor class works atm. with setting.test_dataloader(batch_size=None) as temp_env: assert temp_env.observation_space[0] == spaces.Box( 0.0, 1.0, expected_obs_shape, dtype=np.float32 ) assert type(temp_env.observation_space) # assert temp_env.observation_space[0] == spaces.Box(0., 1., expected_obs_batch_shape, dtype=np.float32) def check_obs(obs): assert isinstance(obs, ContinualRLSetting.Observations), obs[0].shape assert obs.x.shape == expected_obs_batch_shape assert obs.task_labels is None or all( task_label == None for task_label in obs.task_labels ) # FIXME: Same a temp copy expected_obs_batch_shape_ = expected_obs_batch_shape for dataloader_method in dataloader_methods: print(f"Testing dataloader method {dataloader_method.__name__}") ## FIXME: Remove this if we allow batched env at test time. if dataloader_method.__name__ == "test_dataloader": # Temporarily change the expected shape. expected_obs_batch_shape = expected_obs_shape env = dataloader_method(batch_size=None) assert env.batch_size is None else: # Restore the original value. expected_obs_batch_shape = expected_obs_batch_shape_ env = dataloader_method(batch_size=batch_size) assert env.batch_size == batch_size ## # env = dataloader_method(batch_size=batch_size) reset_obs = env.reset() check_obs(reset_obs) step_obs, *_ = env.step(env.action_space.sample()) check_obs(step_obs) for iter_obs in take(env, 3): check_obs(iter_obs) reward = env.send(env.action_space.sample())
def shorten(dataloader: DataLoader): return take(dataloader, n=self.max_num_batches)