示例#1
0
 def test_unstack_sequence_fields(self):
     """Tests that `unstack_sequence_fields(stack_sequence_fields(x)) == x`."""
     stacked = tree_utils.stack_sequence_fields(TEST_SEQUENCE)
     batch_size = len(TEST_SEQUENCE)
     unstacked = tree_utils.unstack_sequence_fields(stacked, batch_size)
     tree.map_structure(np.testing.assert_array_equal, unstacked,
                        TEST_SEQUENCE)
示例#2
0
    def dump(self):
        """Calculates statistics and forwards them to the target logger."""
        results = {}

        stacked_cache = acme_tree.stack_sequence_fields(self._cache)
        for key, values in stacked_cache.items():
            if re.search(self._aggregate_regex, key) is not None:
                results.update({
                    f'{key}_mean': np.mean(values),
                    f'{key}_std': np.std(values),
                    f'{key}_median': np.median(values),
                    f'{key}_max': np.max(values),
                    f'{key}_min': np.min(values),
                })
            else:
                results[key] = values[-1]

        self._to.write(results)
        self._cache.clear()
示例#3
0
    def test_stack_sequence_fields(self):
        """Tests that `stack_sequence_fields` behaves correctly on nested data."""

        stacked = tree_utils.stack_sequence_fields(TEST_SEQUENCE)

        # Check that the stacked output has the correct structure.
        tree.assert_same_structure(stacked, TEST_SEQUENCE[0])

        # Check that the leaves have the correct array shapes.
        self.assertEqual(stacked['action'].shape, (3, 1))
        self.assertEqual(stacked['observation'][0].shape, (3, 3))
        self.assertEqual(stacked['reward'].shape, (3, ))

        # Check values.
        self.assertEqual(stacked['observation'][0].tolist(), [
            [0., 1., 2.],
            [1., 2., 3.],
            [2., 3., 4.],
        ])
        self.assertEqual(stacked['action'].tolist(), [[1.], [0.5], [0.3]])
        self.assertEqual(stacked['reward'].tolist(), [1., 0., 0.5])
示例#4
0
 def sample(self):
     """Sample a batch of experiences."""
     samples = [self._buffer.popleft() for _ in range(self._batch_size)]
     return tree_utils.stack_sequence_fields(samples)
示例#5
0
    def run_test_adder(
        self,
        adder: adders_base.Adder,
        first: dm_env.TimeStep,
        steps: Sequence[Step],
        expected_items: Sequence[Any],
        signature: types.NestedSpec,
        pack_expected_items: bool = False,
        stack_sequence_fields: bool = True,
        repeat_episode_times: int = 1,
        end_behavior: adders.EndBehavior = adders.EndBehavior.ZERO_PAD,
        item_transform: Optional[Callable[[Sequence[np.ndarray]],
                                          Any]] = None):
        """Runs a unit test case for the adder.

    Args:
      adder: The instance of `Adder` that is being tested.
      first: The first `dm_env.TimeStep` that is used to call
        `Adder.add_first()`.
      steps: A sequence of (action, timestep) tuples that are passed to
        `Adder.add()`.
      expected_items: The sequence of items that are expected to be created
        by calling the adder's `add_first()` method on `first` and `add()` on
        all of the elements in `steps`.
      signature: Signature that written items must be compatible with.
      pack_expected_items: Deprecated and not used. If true the expected items
        are given unpacked and need to be packed in a list before comparison.
      stack_sequence_fields: Whether to stack the sequence fields of the
        expected items before comparing to the observed items. Usually False
        for transition adders and True for both episode and sequence adders.
      repeat_episode_times: How many times to run an episode.
      end_behavior: How end of episode should be handled.
      item_transform: Transformation of item simulating the work done by the
        dataset pipeline on the learner in a real setup.
    """

        del pack_expected_items

        if not steps:
            raise ValueError('At least one step must be given.')

        has_extras = len(steps[0]) == 3
        for episode_id in range(repeat_episode_times):
            # Add all the data up to the final step.
            adder.add_first(first)
            for step in steps[:-1]:
                action, ts = step[0], step[1]

                if has_extras:
                    extras = step[2]
                else:
                    extras = ()

                adder.add(action, next_timestep=ts, extras=extras)

            # Add the final step.
            adder.add(*steps[-1])

        # Force run the destructor to trigger the flushing of all pending items.
        getattr(adder, '__del__', lambda: None)()

        # Ending the episode should close the writer. No new writer should yet have
        # been created as it is constructed lazily.
        if end_behavior is not adders.EndBehavior.CONTINUE:
            self.assertEqual(self.num_episodes(), repeat_episode_times)

        # Make sure our expected and observed data match.
        observed_items = self.items()

        # Check matching number of items.
        self.assertEqual(len(expected_items), len(observed_items))

        # Check items are matching according to numpy's almost_equal.
        for expected_item, observed_item in zip(expected_items,
                                                observed_items):
            if stack_sequence_fields:
                expected_item = tree_utils.stack_sequence_fields(expected_item)

            # Apply the transformation which would be done by the dataset in a real
            # setup.
            if item_transform:
                observed_item = item_transform(observed_item)

            tree.map_structure(np.testing.assert_array_almost_equal,
                               tree.flatten(expected_item),
                               tree.flatten(observed_item))

        # Make sure the signature matches was is being written by Reverb.
        def _check_signature(spec: tf.TensorSpec, value: np.ndarray):
            self.assertTrue(
                spec.is_compatible_with(tf.convert_to_tensor(value)))

        # Check that it is possible to unpack observed using the signature.
        for item in observed_items:
            tree.map_structure(_check_signature, tree.flatten(signature),
                               tree.flatten(item))
示例#6
0
    def run_test_adder(self,
                       adder: base.ReverbAdder,
                       first: dm_env.TimeStep,
                       steps: Sequence[Step],
                       expected_items: Sequence[Any],
                       pack_expected_items: bool = False,
                       stack_sequence_fields: bool = True,
                       repeat_episode_times: int = 1,
                       break_end_of_episode: bool = True):
        """Runs a unit test case for the adder.

    Args:
      adder: The instance of `base.ReverbAdder` that is being tested.
      first: The first `dm_env.TimeStep` that is used to call
        `base.ReverbAdder.add_first()`.
      steps: A sequence of (action, timestep) tuples that are passed to
        `base.ReverbAdder.add()`.
      expected_items: The sequence of items that are expected to be created
        by calling the adder's `add_first()` method on `first` and `add()` on
        all of the elements in `steps`.
      pack_expected_items: Deprecated and not used. If true the expected items
        are given unpacked and need to be packed in a list before comparison.
      stack_sequence_fields: Whether to stack the sequence fields of the
        expected items before comparing to the observed items. Usually False
        for transition adders and True for both episode and sequence adders.
      repeat_episode_times: How many times to run an episode.
      break_end_of_episode: If False, an end of an episode does not break the
        sequence.
    """

        del pack_expected_items

        if not steps:
            raise ValueError('At least one step must be given.')

        has_extras = len(steps[0]) == 3
        env_spec = tree.map_structure(
            _numeric_to_spec,
            specs.EnvironmentSpec(observations=steps[0][1].observation,
                                  actions=steps[0][0],
                                  rewards=steps[0][1].reward,
                                  discounts=steps[0][1].discount))
        if has_extras:
            extras_spec = tree.map_structure(_numeric_to_spec, steps[0][2])
        else:
            extras_spec = ()
        signature = adder.signature(env_spec, extras_spec=extras_spec)

        for episode_id in range(repeat_episode_times):
            # Add all the data up to the final step.
            adder.add_first(first)
            for step in steps[:-1]:
                action, ts = step[0], step[1]

                if has_extras:
                    extras = step[2]
                else:
                    extras = ()

                adder.add(action, next_timestep=ts, extras=extras)

            # Add the final step.
            adder.add(*steps[-1])

        # Ending the episode should close the writer. No new writer should yet have
        # been created as it is constructed lazily.
        if break_end_of_episode:
            self.assertEqual(self.client.writer.num_episodes,
                             repeat_episode_times)

        # Make sure our expected and observed data match.
        observed_items = [p[2] for p in self.client.writer.priorities]

        # Check matching number of items.
        self.assertEqual(len(expected_items), len(observed_items))

        # Check items are matching according to numpy's almost_equal.
        for expected_item, observed_item in zip(expected_items,
                                                observed_items):
            if stack_sequence_fields:
                expected_item = tree_utils.stack_sequence_fields(expected_item)

            # Set check_types=False because we check them below.
            tree.map_structure(np.testing.assert_array_almost_equal,
                               expected_item,
                               tuple(observed_item),
                               check_types=False)

        # Make sure the signature matches was is being written by Reverb.
        def _check_signature(spec: tf.TensorSpec, value: np.ndarray):
            self.assertTrue(
                spec.is_compatible_with(tf.convert_to_tensor(value)))

        # Check the last transition's signature.
        tree.map_structure(_check_signature, signature, observed_items[-1])