def __init__(self, py_client: types.ReverbClient, table_name: Text, max_sequence_length: int, priority: Union[float, int] = 1, bypass_partial_episodes: bool = False): """Creates an instance of the ReverbAddEpisodeObserver. **Note**: This observer is designed to work with py_drivers only, and does not support batches. TODO(b/158865335): Optionally truncate long episodes and add to buffer. Args: py_client: Python client for the reverb replay server. table_name: The table name where samples will be written to. max_sequence_length: An integer. `max_sequence_length` used to write to the replay buffer tables. This defines the size of the internal buffer controlling the `upper` limit of the number of timesteps which can be referenced in a single prioritized item. Note that this is the maximum number of trajectories across all the cached episodes that you are writing into the replay buffer (e.g. `number_of_episodes`). `max_sequence_length` is not a limit of how many timesteps or items that can be inserted into the replay buffer. Note that, since `max_sequence_length` controls the size of internal buffer, it is suggested not to set this value to a very large number. If the number of steps in an episode is more than `max_sequence_length`, only items up to `max_sequence_length` is written into the table. priority: Initial priority for the table. bypass_partial_episodes: If `False` (default) and an episode length is greater than `max_sequence_length`, a `ValueError` is raised. If set to `True`, the episodes with length more than `max_sequence_length` do not cause a `ValueError`. These episodes are bypassed (will NOT be written into the replay buffer) and an error message is shown to the user. Note that in this case (`bypass_partial_episodes=True`), the steps for episodes with length more than `max_sequence_length` are wasted and thrown away. This decision is made to guarantee that the replay buffer always has FULL episodes. Note that, `max_sequence_length` is just an upper bound. Raises: ValueError: If `table_name` is not a string. ValueError: If `priority` is not numeric. ValueError: If max_sequence_length is not positive. """ if max_sequence_length <= 0: raise ValueError( "`max_sequence_length` must be an integer greater equal one.") self._table_name = table_name self._max_sequence_length = max_sequence_length self._priority = priority self._py_client = py_client self._writer = py_client.writer( max_sequence_length=self._max_sequence_length) self._cached_steps = 0 self._bypass_partial_episodes = bypass_partial_episodes self._overflow_episode = False
def __init__(self, py_client: types.ReverbClient, table_name: Union[Text, Sequence[Text]], sequence_length: int, stride_length: int = 1, priority: Union[float, int] = 1, pad_end_of_episodes: bool = False): """Creates an instance of the ReverbAddTrajectoryObserver. If multiple table_names and sequence lengths are provided data will only be stored once but be available for sampling with multiple sequence lengths from the respective reverb tables. **Note**: This observer is designed to work with py_drivers only, and does not support batches. Args: py_client: Python client for the reverb replay server. table_name: The table name(s) where samples will be written to. sequence_length: The sequence_length used to write to the given table. stride_length: The integer stride for the sliding window for overlapping sequences. The default value of `1` creates an item for every window. Using `L = sequence_length` this means items are created for times `{0, 1, .., L-1}, {1, 2, .., L}, ...`. In contrast, `stride_length = L` will create an item only for disjoint windows `{0, 1, ..., L-1}, {L, ..., 2 * L - 1}, ...`. priority: Initial priority for new samples in the RB. pad_end_of_episodes: At the end of an episode, the cache is dropped by default. When `pad_end_of_episodes = True`, the cache gets padded with boundary steps (last->first) with `0` values everywhere and padded items of `sequence_length` are written to Reverb. The last padded item starts with a boundary step from the episode. This ensures that the last few steps are not less likely to get sampled compared to middle steps, this is most useful for environments that have useful rewards at the end of episodes. Note: because we do not pad at the beginning of an episode, for `sequence_length = N > 1` scenarios, the first `N-1` steps in an episode are sampled less frequently than all other steps. This generally does not impact training performance. However, if you have an environment where the only meaningful rewards are at the beginning of the episodes, you may consider filing a feature request to support padding in the front as well. """ if isinstance(table_name, Text): self._table_names = [table_name] else: self._table_names = table_name self._sequence_length = sequence_length self._stride_length = stride_length self._priority = priority self._pad_end_of_episodes = pad_end_of_episodes self._py_client = py_client # TODO(b/153700282): Use a single writer with max_sequence_length=max(...) # once Reverb Dataset with emit_timesteps=True returns properly shaped # sequences. self._writer = py_client.writer(max_sequence_length=sequence_length) self._cached_steps = 0 self._last_trajectory = None
def __init__(self, py_client: types.ReverbClient, table_name: Text, sequence_length: int, stride_length: int = 1, priority: Union[float, int] = 1): """Creates an instance of the ReverbAddTrajectoryObserver. If multiple table_names and sequence lengths are provided data will only be stored once but be available for sampling with multiple sequence lengths from the respective reverb tables. **Note**: This observer is designed to work with py_drivers only, and does not support batches. Args: py_client: Python client for the reverb replay server. table_name: The table name where samples will be written to. sequence_length: The sequence_length used to write to the given table. stride_length: The integer stride for the sliding window for overlapping sequences. The default value of `1` creates an item for every window. Using `L = sequence_length` this means items are created for times `{0, 1, .., L-1}, {1, 2, .., L}, ...`. In contrast, `stride_length = L` will create an item only for disjoint windows `{0, 1, ..., L-1}, {L, ..., 2 * L - 1}, ...`. priority: Initial priority for new samples in the RB. Raises: ValueError: If table_names or sequence_lengths are not lists or their lengths are not equal. """ self._table_name = table_name self._sequence_length = sequence_length self._stride_length = stride_length self._priority = priority self._py_client = py_client # TODO(b/153700282): Use a single writer with max_sequence_length=max(...) # once Reverb Dataset with emit_timesteps=True returns properly shaped # sequences. self._writer = py_client.writer(max_sequence_length=sequence_length) self._cached_steps = 0
def __init__(self, py_client: types.ReverbClient, table_name: Union[Text, Sequence[Text]], sequence_length: int, stride_length: int = 1, priority: Union[float, int] = 1, pad_end_of_episodes: bool = False, tile_end_of_episodes: bool = False): """Creates an instance of the ReverbAddTrajectoryObserver. If multiple table_names and sequence lengths are provided data will only be stored once but be available for sampling with multiple sequence lengths from the respective reverb tables. **Note**: This observer is designed to work with py_drivers only, and does not support batches. Args: py_client: Python client for the reverb replay server. table_name: The table name(s) where samples will be written to. sequence_length: The sequence_length used to write to the given table. stride_length: The integer stride for the sliding window for overlapping sequences. The default value of `1` creates an item for every window. Using `L = sequence_length` this means items are created for times `{0, 1, .., L-1}, {1, 2, .., L}, ...`. In contrast, `stride_length = L` will create an item only for disjoint windows `{0, 1, ..., L-1}, {L, ..., 2 * L - 1}, ...`. priority: Initial priority for new samples in the RB. pad_end_of_episodes: At the end of an episode, the cache is dropped by default. When `pad_end_of_episodes = True`, the cache gets padded with boundary steps (last->first) with `0` values everywhere and padded items of `sequence_length` are written to Reverb. tile_end_of_episodes: If `pad_end_of_episodes` is True then, the last padded item starts with a boundary step from the episode. When this option is True the following items will be generated: F, M, L, P M, L, P, P L, P, P, P If False, only a single one will be generated: F, M, L, P For training recurrent models on environments where required information is only available at the start of the episode it is useful to set `tile_end_of_episodes=False` and the `sequence_length` to be the length of the longest episode. Raises: ValueError: If `tile_end_of_episodes` is set without `pad_end_of_episodes`. """ if isinstance(table_name, Text): self._table_names = [table_name] else: self._table_names = table_name self._sequence_length = sequence_length self._stride_length = stride_length self._priority = priority self._pad_end_of_episodes = pad_end_of_episodes self._tile_end_of_episodes = tile_end_of_episodes if tile_end_of_episodes and not pad_end_of_episodes: raise ValueError("Must set `pad_end_of_episodes=True` when using " "`tile_end_of_episodes`") self._py_client = py_client # TODO(b/153700282): Use a single writer with max_sequence_length=max(...) # once Reverb Dataset with emit_timesteps=True returns properly shaped # sequences. self._writer = py_client.writer(max_sequence_length=sequence_length) self._cached_steps = 0 self._last_trajectory = None