示例#1
0
    def testClipToBounds(self):
        value = tf.constant([1, 2, 4, -3])
        spec = tensor_spec.BoundedTensorSpec((4, ), tf.float32, [0, 0, 0, 0],
                                             [3, 3, 3, 3])
        expected_clipped_value = np.array([1, 2, 3, 0])
        clipped_value = common.clip_to_spec(value, spec)

        clipped_value_ = self.evaluate(clipped_value)
        self.assertAllClose(expected_clipped_value, clipped_value_)
示例#2
0
  def testScaledDistribution(self):
    output_spec = tensor_spec.BoundedTensorSpec([1], tf.float32, -2, 4)
    network = normal_projection_network.NormalProjectionNetwork(
        output_spec, init_means_output_factor=10, state_dependent_std=True,
        scale_distribution=True)

    inputs = _get_inputs(batch_size=100, num_input_dims=5)

    distributions, _ = network(inputs, outer_rank=1)
    self.evaluate(tf.compat.v1.global_variables_initializer())

    sample = self.evaluate(distributions.sample())
    clipped = self.evaluate(common.clip_to_spec(sample, output_spec))
    np.testing.assert_almost_equal(clipped, sample)
示例#3
0
 def clip_action(action, action_spec):
     if isinstance(action_spec, tensor_spec.BoundedTensorSpec):
         return common.clip_to_spec(action, action_spec)
     return action
示例#4
0
 def _sample(dist, action_spec):
     action = dist.sample(seed=seed_stream())
     if self._clip:
         return common_utils.clip_to_spec(action, action_spec)
     return action
示例#5
0
 def _add_ou_noise(action, ou_process, action_spec):
     noisy_action = action + ou_process()
     if self._clip:
         return common.clip_to_spec(noisy_action, action_spec)
     return noisy_action
示例#6
0
 def _sample(dist, action_spec):
     action = dist.sample(seed=seed)
     if self._clip:
         return common.clip_to_spec(action, action_spec)
     return action