def test_universal_encoder(): batch_size = 10 budget = None input_space = spaces.Dict({ "x": spaces.Box(low=0, high=1, shape=[3, 32, 32]), "t": spaces.Discrete(2), }) output_space = spaces.Box( -np.inf, np.inf, shape=[ 512, ], dtype=np.float32, ) encoder = create_encoder(input_space, output_space, budget=budget) batch_input_space = batch_space(input_space, batch_size) batch_output_space = batch_space(output_space, batch_size) batch_input_space = add_tensor_support(batch_input_space) output_space = add_tensor_support(output_space) sample = batch_input_space.sample() encoder_output = encoder(sample) if budget: assert n_parameters(encoder) < budget
def observation_space(self) -> NamedTupleSpace: """ The un-batched observation space, based on the choice of dataset and the transforms at `self.transforms` (which apply to the train/valid/test environments). The returned spaces is a NamedTupleSpace, with the following properties: - `x`: observation space (e.g. `Image` space) - `task_labels`: Union[Discrete, Sparse[Discrete]] The task labels for each sample. When task labels are not available, the task labels space is Sparse, and entries will be `None`. """ x_space = base_observation_spaces[self.dataset] if not self.transforms: # NOTE: When we don't pass any transforms, continuum scenarios still # at least use 'to_tensor'. x_space = Transforms.to_tensor(x_space) # apply the transforms to the observation space. for transform in self.transforms: x_space = transform(x_space) x_space = add_tensor_support(x_space) task_label_space = spaces.Discrete(self.nb_tasks) if not self.task_labels_at_train_time: task_label_space = Sparse(task_label_space, 1.0) task_label_space = add_tensor_support(task_label_space) return NamedTupleSpace( x=x_space, task_labels=task_label_space, dtype=self.Observations, )
def __init__(self, env: gym.Env, f: Union[Callable, Compose]): if isinstance(f, list) and not callable(f): f = Compose(f) super().__init__(env, f=f) self.f: Transform # try: self.observation_space = self(self.env.observation_space) if has_tensor_support(self.env.observation_space): self.observation_space = add_tensor_support(self.observation_space)
def _resize_space(x: spaces.Box, size: Tuple[int, ...], **kwargs) -> spaces.Box: # Hmm, not sure if the bounds would actually also be respected though. new_space = type(x)( low=resize(x.low, size, **kwargs), high=resize(x.high, size, **kwargs), dtype=x.dtype, ) # If the 'old' space supported tensors as samples, then so will the new space. if has_tensor_support(x): return add_tensor_support(new_space) return new_space
def _(image: spaces.Box) -> spaces.Box: if image.dtype == np.uint8: # images get their bounds changed to [0. 1.] and their shape changed to # channels_first. image = type(image)(low=0., high=1., shape=channels_first_if_needed(image.shape), dtype=np.float32) # TODO: it sometimes happens that the `image` space has already been # through 'to_tensor`, not sure what to do in that case. # elif not has_tensor_support(image): # raise RuntimeError(f"image spaces should have dtype np.uint8!: {image}") # Since the transform would convert images / ndarrays to tensors, then we # add 'Tensor' support when applying the same transform on the Space of # images! image = add_tensor_support(image) return image
def __init__(self, setting: SettingType, hparams: HParams, config: Config): super().__init__() self.setting: SettingType = setting self.hp: BaseModel.HParams = hparams self.Observations: Type[Observations] = setting.Observations self.Actions: Type[Actions] = setting.Actions self.Rewards: Type[Rewards] = setting.Rewards self.observation_space: gym.Space = setting.observation_space self.action_space: gym.Space = setting.action_space self.reward_space: gym.Space = setting.reward_space self.input_shape = self.observation_space[0].shape self.reward_shape = self.reward_space.shape self.split_batch_transform = SplitBatch(observation_type=self.Observations, reward_type=self.Rewards) self.config: Config = config # TODO: Decided to Not set this property, so the trainer doesn't # fallback to using it instead of the passed datamodules/dataloaders. # self.datamodule: LightningDataModule = setting # (Testing) Setting this attribute is supposed to help with ddp/etc # training in pytorch-lightning. Not 100% sure. # self.example_input_array = torch.rand(self.batch_size, *self.input_shape) # Create the encoder and the output head. # Space of our encoder representations. self.representation_space: gym.Space if isinstance(setting, ContinualRLSetting) and setting.observe_state_directly: # ISSUE # 62: Need to add a dense network instead of no encoder, and # change the PolicyHead to have only one layer. # Only pass the image, not the task labels to the encoder (for now). input_dims = flatdim(self.observation_space[0]) output_dims = self.hp.new_hidden_size or 128 self.encoder = FCNet( in_features=input_dims, out_features=output_dims, hidden_layers=3, hidden_neurons=[256, 128, output_dims], activation=nn.ReLU, ) self.representation_space = add_tensor_support( spaces.Box(low=-np.inf, high=np.inf, shape=[output_dims]) ) self.hidden_size = output_dims else: # TODO: Refactor this 'make_encoder' being on the hparams, its a bit # weird. self.encoder, self.hidden_size = self.hp.make_encoder() # TODO: Check that the outputs of the encoders are actually # flattened. I'm not sure they all are, which case the samples # wouldn't match with this space. self.representation_space = spaces.Box(-np.inf, np.inf, (self.hidden_size,), np.float32) logger.info(f"Moving encoder to device {self.config.device}") self.encoder = self.encoder.to(self.config.device) self.representation_space = add_tensor_support(self.representation_space) self.output_head: OutputHead = self.create_output_head(setting, task_id=None)
# "rotatedmnist": (28, 28, 1), "core50": (3, 224, 224), "core50-v2-79": (3, 224, 224), "core50-v2-196": (3, 224, 224), "core50-v2-391": (3, 224, 224), "synbols": (3, 224, 224), } from sequoia.common.gym_wrappers.convert_tensors import add_tensor_support # NOTE: This dict reflects the observation space of the different datasets # *BEFORE* any transforms are applied. The resulting property on the Setting is # based on this 'base' observation space, passed through the transforms. base_observation_spaces: Dict[str, Space] = { dataset_name: add_tensor_support(Image(0, 1, image_shape, np.float32)) for dataset_name, image_shape in { "mnist": (1, 28, 28), "fashionmnist": (1, 28, 28), "kmnist": (28, 28, 1), "emnist": (28, 28, 1), "qmnist": (28, 28, 1), "mnistfellowship": (28, 28, 1), "cifar10": (32, 32, 3), "cifar100": (32, 32, 3), "cifarfellowship": (32, 32, 3), "imagenet100": (224, 224, 3), "imagenet1000": (224, 224, 3), # "permutedmnist": (28, 28, 1), # "rotatedmnist": (28, 28, 1), "core50": (224, 224, 3),
def __init__( self, dataset: Union[IterableDataset, Dataset], split_batch_fn: Callable[ [Tuple[Any, ...]], Tuple[ObservationType, ActionType] ] = None, observation_space: gym.Space = None, action_space: gym.Space = None, reward_space: gym.Space = None, n_classes: int = None, pretend_to_be_active: bool = False, strict: bool = False, **kwargs, ): """Creates the DataLoader/Environment for the given dataset. Parameters ---------- dataset : Union[IterableDataset, Dataset] The dataset to iterate on. Should ideally be indexable (a Map-style dataset). split_batch_fn : Callable[ [Tuple[Any, ...]], Tuple[ObservationType, ActionType] ], optional A function to call on each item in the dataset in order to split it into Observations and Rewards, by default None, in which case we assume that the dataset items are tuples of length 2. observation_space : gym.Space, optional The single (non-batched) observation space. Default to `None`, in which case this will try to infer the shape of the space using the first item in the dataset. action_space : gym.Space, optional The non-batched action space. Defaults to None, in which case the `n_classes` argument must be passed, and the action space is assumed to be discrete (i.e. that the loader is for a classification dataset). reward_space : gym.Space, optional The non-batched reward (label) space. Defaults to `None`, in which case it will be the same as the action space (as is the case in classification). n_classes : int, optional Number of classes in the dataset. Used in case `action_space` isn't passed. Defaults to `None`. pretend_to_be_active : bool, optional Wether to withhold the rewards (labels) from the batches when being iterated on like the usual dataloader, and to only give them back after an action is received through the 'send' method. False by default, in which case this behaves exactly as a normal dataloader when being iterated on. When False, the batches yielded by this dataloader will be of the form `Tuple[Observations, Rewards]` (as usual in SL). However, when set to True, the batches will be `Tuple[Observations, None]`! Rewards will then be returned by the environment when an action is passed to the Send method. strict : bool, optional [description], by default False # Examples: ```python train_env = PassiveEnvironment(MNIST("data"), batch_size=32, num_classes=10) # The usual Dataloader-style: for x, y in train_env: # train as usual (...) # OpenAI Gym style: for episode in range(5): # NOTE: "episode" in RL is an "epoch" in SL: obs = train_env.reset() done = False while not done: actions = train_env.action_space.sample() obs, rewards, done, info = train_env.step(actions) ``` """ super().__init__(dataset=dataset, **kwargs) self.split_batch_fn = split_batch_fn # TODO: When the spaces aren't passed explicitly, assumes a classification dataset. if not observation_space: # NOTE: Assuming min/max of 0 and 1 respectively, but could actually use # min_max of the dataset samples too. first_item = self.dataset[0] if isinstance(first_item, tuple): x, *_ = first_item else: assert isinstance(first_item, (np.ndarray, Tensor)) x = first_item observation_space = Image(0.0, 1.0, x.shape) if not action_space: assert n_classes, "must pass either `action_space`, or `n_classes` for now" action_space = spaces.Discrete(n_classes) elif isinstance(action_space, spaces.Discrete): n_classes = action_space.n if not reward_space: # Assuming a classification dataset by default: # (action space = reward space = Discrete(n_classes)) reward_space = action_space assert observation_space assert action_space assert reward_space self.single_observation_space: Space = observation_space self.single_action_space: Space = action_space self.single_reward_space: Space = reward_space if self.batch_size: observation_space = batch_space(observation_space, self.batch_size) action_space = batch_space(action_space, self.batch_size) reward_space = batch_space(reward_space, self.batch_size) self.observation_space: gym.Space = add_tensor_support(observation_space) self.action_space: gym.Space = add_tensor_support(action_space) self.reward_space: gym.Space = add_tensor_support(reward_space) self.pretend_to_be_active = pretend_to_be_active self._strict = strict self._reward_queue = deque(maxlen=10) self.n_classes: Optional[int] = n_classes self._iterator: Optional[_BaseDataLoaderIter] = None # NOTE: These here are never processed with self.observation or self.reward. self._previous_batch: Optional[Tuple[ObservationType, RewardType]] = None self._current_batch: Optional[Tuple[ObservationType, RewardType]] = None self._next_batch: Optional[Tuple[ObservationType, RewardType]] = None self._done: Optional[bool] = None self._closed: bool = False self._action: Optional[ActionType] = None # from gym.envs.classic_control.rendering import SimpleImageViewer self.viewer = None