def __init__(self, distribution, spec, name="SquashToSpecDistribution"):
   self.action_means, self.action_magnitudes = common.spec_means_and_magnitudes(
       spec)
   bijectors = [
       tfp.bijectors.AffineScalar(
           shift=self.action_means, scale=self.action_magnitudes),
       tanh_bijector_stable.Tanh()
   ]
   bijector_chain = tfp.bijectors.Chain(bijectors)
   super(SquashToSpecDistribution, self).__init__(
       distribution, bijector_chain, name=name)
Пример #2
0
    def __init__(self,
                 distribution,
                 spec,
                 validate_args=False,
                 name="SquashToSpecNormal"):
        """Constructs a SquashToSpecNormal distribution.

    Args:
      distribution: input normal distribution with normalized mean and std dev
      spec: bounded action spec from which to compute action ranges
      validate_args: Python `bool`, default `False`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may silently render incorrect
        outputs.
      name: Python `str` name prefixed to Ops created by this class.
    """

        if not isinstance(distribution,
                          (tfp.distributions.Normal,
                           tfp.distributions.MultivariateNormalDiag)):
            raise ValueError(
                "Input distribution must be a normal distribution, "
                "got {} instead".format(distribution))
        self.action_means, self.action_magnitudes = common.spec_means_and_magnitudes(
            spec)
        # Parameters here describe the actor network's output, which is a normalized
        # distribution prior to squashing to the action spec.
        # This is necessary (and sufficient) in order for policy info to compare an
        # old policy to a new policy.
        parameters = {"loc": distribution.loc, "scale": distribution.scale}
        # The raw action distribution
        self.input_distribution = distribution

        bijectors = [
            tfp.bijectors.Shift(self.action_means)(tfp.bijectors.Scale(
                self.action_magnitudes)),
            tanh_bijector_stable.Tanh()
        ]
        bijector_chain = tfp.bijectors.Chain(bijectors)
        self._squashed_distribution = tfp.distributions.TransformedDistribution(
            distribution=distribution, bijector=bijector_chain)
        super(SquashToSpecNormal, self).__init__(
            dtype=distribution.dtype,
            reparameterization_type=distribution.reparameterization_type,
            validate_args=validate_args,
            allow_nan_stats=distribution.allow_nan_stats,
            parameters=parameters,
            # We let TransformedDistribution access _graph_parents since this class
            # is more like a baseclass than derived.
            graph_parents=(
                distribution._graph_parents +  # pylint: disable=protected-access
                bijector_chain.graph_parents),
            name=name)
Пример #3
0
 def testSpecMeansAndMagnitudes(self):
   spec = tensor_spec.BoundedTensorSpec(
       (3, 2),
       tf.float32,
       [[-5, -5], [-4, -4], [-2, -6]],
       [[5, 5], [4, 4], [2, 6]],
   )
   means, magnitudes = self.evaluate(common.spec_means_and_magnitudes(spec))
   expected_means = np.zeros((3, 2), dtype=np.float32)
   expected_magnitudes = np.array([[5.0, 5.0], [4.0, 4.0], [2.0, 6.0]],
                                  dtype=np.float32)
   self.assertAllClose(expected_means, means)
   self.assertAllClose(expected_magnitudes, magnitudes)
Пример #4
0
 def testSpecMeansAndMagnitudes(self):
     spec = array_spec.BoundedArraySpec(
         (3, 2),
         np.float32,
         np.array([[-5, -5], [-4, -4], [-2, -6]]),
         np.array([[5, 5], [4, 4], [2, 6]]),
     )
     means, magnitudes = common.spec_means_and_magnitudes(spec)
     expected_means = np.zeros((3, 2), dtype=np.float32)
     expected_magnitudes = np.array([[5.0, 5.0], [4.0, 4.0], [2.0, 6.0]],
                                    dtype=np.float32)
     self.assertAllClose(expected_means, means)
     self.assertAllClose(expected_magnitudes, magnitudes)
Пример #5
0
def scale_distribution_to_spec(distribution, spec):
    """Scales the given distribution to the bounds of the given spec."""
    bijectors = []

    # Bijector to rescale actions to ranges in action spec.
    action_means, action_magnitudes = common.spec_means_and_magnitudes(spec)
    bijectors.append(
        tfp.bijectors.AffineScalar(shift=action_means,
                                   scale=action_magnitudes))

    # Bijector to squash actions to range (-1.0, +1.0).
    bijectors.append(tanh_bijector_stable.Tanh())

    # Chain applies bijectors in reverse order, so squash will happen
    # before rescaling to action spec.
    bijector_chain = tfp.bijectors.Chain(bijectors)
    distributions = tfp.distributions.TransformedDistribution(
        distribution=distribution, bijector=bijector_chain)
    return distributions