def run_test_adder(self, adder: base.ReverbAdder, first: dm_env.TimeStep, steps: Sequence[Tuple[Any, dm_env.TimeStep]], expected_items: Sequence[Any]): """Runs a unit test case for the adder. Args: adder: The instance of `base.ReverbAdder` that is being tested. first: The first `dm_env.TimeStep` that is used to call `base.ReverbAdder.add_first()`. steps: A sequence of (action, timestep) tuples that are passed to `base.ReverbAdder.add()`. expected_items: The sequence of items that are expected to be created by calling the adder's `add_first()` method on `first` and `add()` on all of the elements in `steps`. """ if not steps: raise ValueError('At least one step must be given.') env_spec = tree.map_structure( _numeric_to_spec, specs.EnvironmentSpec(observations=steps[0][1].observation, actions=steps[0][0], rewards=steps[0][1].reward, discounts=steps[0][1].discount)) signature = adder.signature(env_spec) # Add all the data up to the final step. adder.add_first(first) for action, ts in steps[:-1]: adder.add(action, next_timestep=ts) if len(steps) == 1: # adder.add() has not been called yet, so no writers have been created. self.assertEmpty(self.client.writers) else: # Make sure the writer has been created but not closed. self.assertLen(self.client.writers, 1) self.assertFalse(self.client.writers[0].closed) # Add the final step. adder.add(*steps[-1]) # Ending the episode should close the writer. No new writer should yet have # been created as it is constructed lazily. self.assertLen(self.client.writers, 1) self.assertTrue(self.client.writers[0].closed) # Make sure our expected and observed data match. observed_items = [p[1] for p in self.client.writers[0].priorities] for expected_item, observed_item in zip(expected_items, observed_items): # Set check_types=False because tree.map_structure(np.testing.assert_array_almost_equal, expected_item, observed_item, check_types=False) def _check_signature(spec: tf.TensorSpec, value): # Convert int/float to numpy arrays of dtype np.int64 and np.float64. value = np.asarray(value) self.assertTrue( spec.is_compatible_with(tf.convert_to_tensor(value))) for step in self.client.writers[0].timesteps: tree.map_structure(_check_signature, signature, step) # Add the start of a second trajectory. adder.add_first(first) adder.add(*steps[0]) # Make sure this creates an new writer. self.assertLen(self.client.writers, 2) # The writer is closed if the recently added `dm_env.TimeStep`'s' step_type # is `dm_env.StepType.LAST`. if steps[0][1].last(): self.assertTrue(self.client.writers[1].closed) else: self.assertFalse(self.client.writers[1].closed)
def run_test_adder(self, adder: base.ReverbAdder, first: dm_env.TimeStep, steps: Sequence[Step], expected_items: Sequence[Any], pack_expected_items: bool = False, repeat_episode_times: int = 1, break_end_of_episode: bool = True): """Runs a unit test case for the adder. Args: adder: The instance of `base.ReverbAdder` that is being tested. first: The first `dm_env.TimeStep` that is used to call `base.ReverbAdder.add_first()`. steps: A sequence of (action, timestep) tuples that are passed to `base.ReverbAdder.add()`. expected_items: The sequence of items that are expected to be created by calling the adder's `add_first()` method on `first` and `add()` on all of the elements in `steps`. pack_expected_items: If true the expected items are given unpacked and need to be packed in a list before comparison. repeat_episode_times: How many times to run an episode. break_end_of_episode: If False, an end of an episode does not break the sequence. """ if not steps: raise ValueError('At least one step must be given.') has_extras = len(steps[0]) == 3 env_spec = tree.map_structure( _numeric_to_spec, specs.EnvironmentSpec( observations=steps[0][1].observation, actions=steps[0][0], rewards=steps[0][1].reward, discounts=steps[0][1].discount)) if has_extras: extras_spec = tree.map_structure(_numeric_to_spec, steps[0][2]) else: extras_spec = () signature = adder.signature(env_spec, extras_spec=extras_spec) for episode_id in range(repeat_episode_times): # Add all the data up to the final step. adder.add_first(first) for step in steps[:-1]: action, ts = step[0], step[1] if has_extras: extras = step[2] else: extras = () adder.add(action, next_timestep=ts, extras=extras) # Only check for the first episode. if episode_id == 0: if len(steps) == 1: # adder.add() has not been called yet, so no writers have been # created. self.assertEmpty(self.client.writers) else: # Make sure the writer has been created but not closed. self.assertLen(self.client.writers, 1) self.assertFalse(self.client.writers[0].closed) # Add the final step. adder.add(*steps[-1]) # Ending the episode should close the writer. No new writer should yet have # been created as it is constructed lazily. self.assertLen(self.client.writers, 1) if break_end_of_episode: self.assertTrue(self.client.writers[0].closed) # Make sure our expected and observed data match. observed_items = [p[1] for p in self.client.writers[0].priorities] self.assertEqual(len(expected_items), len(observed_items)) for expected_item, observed_item in zip(expected_items, observed_items): if pack_expected_items: expected_item = [expected_item] # Set check_types=False because tree.map_structure( np.testing.assert_array_almost_equal, expected_item, observed_item, check_types=False) def _check_signature(spec: tf.TensorSpec, value): # Convert int/float to numpy arrays of dtype np.int64 and np.float64. value = np.asarray(value) self.assertTrue(spec.is_compatible_with(tf.convert_to_tensor(value))) for step in self.client.writers[0].timesteps: tree.map_structure(_check_signature, signature, step) if break_end_of_episode: # Add the start of a second trajectory. adder.add_first(first) adder.add(*steps[0]) # Make sure this creates an new writer. self.assertLen(self.client.writers, 2) # The writer is closed if the recently added `dm_env.TimeStep`'s' # step_type is `dm_env.StepType.LAST`. if steps[0][1].last(): self.assertTrue(self.client.writers[1].closed) else: self.assertFalse(self.client.writers[1].closed)
def run_test_adder(self, adder: base.ReverbAdder, first: dm_env.TimeStep, steps: Sequence[Step], expected_items: Sequence[Any], pack_expected_items: bool = False, stack_sequence_fields: bool = True, repeat_episode_times: int = 1, break_end_of_episode: bool = True): """Runs a unit test case for the adder. Args: adder: The instance of `base.ReverbAdder` that is being tested. first: The first `dm_env.TimeStep` that is used to call `base.ReverbAdder.add_first()`. steps: A sequence of (action, timestep) tuples that are passed to `base.ReverbAdder.add()`. expected_items: The sequence of items that are expected to be created by calling the adder's `add_first()` method on `first` and `add()` on all of the elements in `steps`. pack_expected_items: Deprecated and not used. If true the expected items are given unpacked and need to be packed in a list before comparison. stack_sequence_fields: Whether to stack the sequence fields of the expected items before comparing to the observed items. Usually False for transition adders and True for both episode and sequence adders. repeat_episode_times: How many times to run an episode. break_end_of_episode: If False, an end of an episode does not break the sequence. """ del pack_expected_items if not steps: raise ValueError('At least one step must be given.') has_extras = len(steps[0]) == 3 env_spec = tree.map_structure( _numeric_to_spec, specs.EnvironmentSpec(observations=steps[0][1].observation, actions=steps[0][0], rewards=steps[0][1].reward, discounts=steps[0][1].discount)) if has_extras: extras_spec = tree.map_structure(_numeric_to_spec, steps[0][2]) else: extras_spec = () signature = adder.signature(env_spec, extras_spec=extras_spec) for episode_id in range(repeat_episode_times): # Add all the data up to the final step. adder.add_first(first) for step in steps[:-1]: action, ts = step[0], step[1] if has_extras: extras = step[2] else: extras = () adder.add(action, next_timestep=ts, extras=extras) # Add the final step. adder.add(*steps[-1]) # Ending the episode should close the writer. No new writer should yet have # been created as it is constructed lazily. if break_end_of_episode: self.assertEqual(self.client.writer.num_episodes, repeat_episode_times) # Make sure our expected and observed data match. observed_items = [p[2] for p in self.client.writer.priorities] # Check matching number of items. self.assertEqual(len(expected_items), len(observed_items)) # Check items are matching according to numpy's almost_equal. for expected_item, observed_item in zip(expected_items, observed_items): if stack_sequence_fields: expected_item = tree_utils.stack_sequence_fields(expected_item) # Set check_types=False because we check them below. tree.map_structure(np.testing.assert_array_almost_equal, expected_item, tuple(observed_item), check_types=False) # Make sure the signature matches was is being written by Reverb. def _check_signature(spec: tf.TensorSpec, value: np.ndarray): self.assertTrue( spec.is_compatible_with(tf.convert_to_tensor(value))) # Check the last transition's signature. tree.map_structure(_check_signature, signature, observed_items[-1])