示例#1
0
 def _variance(self):
     var = (tf.square(self.scale) * (1. + (
         self._standardized_low * self._normal_pdf(self._standardized_low) -
         self._standardized_high * self._normal_pdf(self._standardized_high)
     ) / self._normalizer - tf.square(
         (self._normal_pdf(self._standardized_low) -
          self._normal_pdf(self._standardized_high)) / self._normalizer)))
     return var
示例#2
0
def normal_conjugates_known_scale_posterior(prior, scale, s, n):
    """Posterior Normal distribution with conjugate prior on the mean.

  This model assumes that `n` observations (with sum `s`) come from a
  Normal with unknown mean `loc` (described by the Normal `prior`)
  and known variance `scale**2`. The "known scale posterior" is
  the distribution of the unknown `loc`.

  Accepts a prior Normal distribution object, having parameters
  `loc0` and `scale0`, as well as known `scale` values of the predictive
  distribution(s) (also assumed Normal),
  and statistical estimates `s` (the sum(s) of the observations) and
  `n` (the number(s) of observations).

  Returns a posterior (also Normal) distribution object, with parameters
  `(loc', scale'**2)`, where:

  ```
  mu ~ N(mu', sigma'**2)
  sigma'**2 = 1/(1/sigma0**2 + n/sigma**2),
  mu' = (mu0/sigma0**2 + s/sigma**2) * sigma'**2.
  ```

  Distribution parameters from `prior`, as well as `scale`, `s`, and `n`.
  will broadcast in the case of multidimensional sets of parameters.

  Args:
    prior: `Normal` object of type `dtype`:
      the prior distribution having parameters `(loc0, scale0)`.
    scale: tensor of type `dtype`, taking values `scale > 0`.
      The known stddev parameter(s).
    s: Tensor of type `dtype`. The sum(s) of observations.
    n: Tensor of type `int`. The number(s) of observations.

  Returns:
    A new Normal posterior distribution object for the unknown observation
    mean `loc`.

  Raises:
    TypeError: if dtype of `s` does not match `dtype`, or `prior` is not a
      Normal object.
  """
    if not isinstance(prior, normal.Normal):
        raise TypeError("Expected prior to be an instance of type Normal")

    if s.dtype != prior.dtype:
        raise TypeError(
            "Observation sum s.dtype does not match prior dtype: %s vs. %s" %
            (s.dtype, prior.dtype))

    n = tf.cast(n, prior.dtype)
    scale0_2 = tf.square(prior.scale)
    scale_2 = tf.square(scale)
    scalep_2 = 1.0 / (1 / scale0_2 + n / scale_2)
    return normal.Normal(loc=(prior.loc / scale0_2 + s / scale_2) * scalep_2,
                         scale=tf.sqrt(scalep_2))
示例#3
0
def _log_ndtr_lower(x, series_order):
    """Asymptotic expansion version of `Log[cdf(x)]`, appropriate for `x<<-1`."""
    x_2 = tf.square(x)
    # Log of the term multiplying (1 + sum)
    log_scale = -0.5 * x_2 - tf.math.log(-x) - 0.5 * np.log(2. * np.pi)
    return log_scale + tf.math.log(_log_ndtr_asymptotic_series(
        x, series_order))
示例#4
0
def log1psquare(x, name=None):
    """Numerically stable calculation of `log(1 + x**2)` for small or large `|x|`.

  For sufficiently large `x` we use the following observation:

  ```none
  log(1 + x**2) =   2 log(|x|) + log(1 + 1 / x**2)
                --> 2 log(|x|)  as x --> inf
  ```

  Numerically, `log(1 + 1 / x**2)` is `0` when `1 / x**2` is small relative to
  machine epsilon.

  Args:
    x: Float `Tensor` input.
    name: Python string indicating the name of the TensorFlow operation.
      Default value: `'log1psquare'`.

  Returns:
    log1psq: Float `Tensor` representing `log(1. + x**2.)`.
  """
    with tf.name_scope(name or 'log1psquare'):
        x = tf.convert_to_tensor(x, dtype_hint=tf.float32, name='x')
        dtype = dtype_util.as_numpy_dtype(x.dtype)

        eps = np.finfo(dtype).eps.astype(np.float64)
        is_large = tf.abs(x) > (eps**-0.5).astype(dtype)

        # Mask out small x's so the gradient correctly propagates.
        abs_large_x = tf.where(is_large, tf.abs(x), tf.ones([], x.dtype))
        return tf.where(is_large, 2. * tf.math.log(abs_large_x),
                        tf.math.log1p(tf.square(x)))
示例#5
0
 def squared_frobenius_norm(x):
     """Helper to make KL calculation slightly more readable."""
     # http://mathworld.wolfram.com/FrobeniusNorm.html
     # The gradient of KL[p,q] is not defined when p==q. The culprit is
     # tf.norm, i.e., we cannot use the commented out code.
     # return tf.square(tf.norm(x, ord="fro", axis=[-2, -1]))
     return tf.reduce_sum(tf.square(x), axis=[-2, -1])
    def _variance(self):
        concentration = tf.convert_to_tensor(self.concentration)
        scale = tf.convert_to_tensor(self.scale)
        var = (tf.square(scale) / tf.square(concentration - 1.) /
               (concentration - 2.))
        if self.allow_nan_stats:
            assertions = []
        else:
            assertions = [
                assert_util.assert_less(
                    tf.constant(2., dtype=self.dtype),
                    concentration,
                    message='variance undefined when any concentration <= 2')
            ]

        with tf.control_dependencies(assertions):
            return tf.where(concentration > 2., var,
                            dtype_util.as_numpy_dtype(self.dtype)(np.nan))
示例#7
0
 def _variance(self):
     # Because df is a scalar, we need to expand dimensions to match
     # scale_operator. We use ellipses notation (...) to select all dimensions
     # and add two dimensions to the end.
     df = self.df[..., tf.newaxis, tf.newaxis]
     x = tf.sqrt(df) * self._square_scale_operator()
     d = tf.expand_dims(tf.linalg.diag_part(x), -1)
     v = tf.square(x) + tf.matmul(d, d, adjoint_b=True)
     return v
示例#8
0
 def _variance(self):
   if distribution_util.is_diagonal_scale(self.scale):
     return tf.square(self.scale.diag_part())
   elif (isinstance(self.scale, tf.linalg.LinearOperatorLowRankUpdate) and
         self.scale.is_self_adjoint):
     return tf.linalg.diag_part(self.scale.matmul(self.scale.to_dense()))
   else:
     return tf.linalg.diag_part(
         self.scale.matmul(self.scale.to_dense(), adjoint_arg=True))
示例#9
0
 def _covariance(self):
   # Let
   #   W = (w1,...,wk), with wj ~ iid Exponential(0, 1).
   # Then this distribution is
   #   X = loc + LW,
   # and then since Cov(wi, wj) = 1 if i=j, and 0 otherwise,
   #   Cov(X) = L Cov(W W^T) L^T = L L^T.
   if distribution_util.is_diagonal_scale(self.scale):
     return tf.linalg.diag(tf.square(self.scale.diag_part()))
   else:
     return self.scale.matmul(self.scale.to_dense(), adjoint_arg=True)
    def _covariance(self):
        if distribution_util.is_diagonal_scale(self.scale):
            mvn_cov = tf.linalg.diag(tf.square(self.scale.diag_part()))
        else:
            mvn_cov = self.scale.matmul(self.scale.to_dense(),
                                        adjoint_arg=True)

        cov_shape = tf.concat(
            [self._sample_shape(),
             self._event_shape_tensor()], -1)
        mvn_cov = tf.broadcast_to(mvn_cov, cov_shape)
        return self._std_var_helper(mvn_cov, 'covariance', 2, lambda x: x)
    def _variance(self):
        if distribution_util.is_diagonal_scale(self.scale):
            mvn_var = tf.square(self.scale.diag_part())
        elif (isinstance(self.scale, tf.linalg.LinearOperatorLowRankUpdate)
              and self.scale.is_self_adjoint):
            mvn_var = tf.linalg.diag_part(
                self.scale.matmul(self.scale.to_dense()))
        else:
            mvn_var = tf.linalg.diag_part(
                self.scale.matmul(self.scale.to_dense(), adjoint_arg=True))

        mvn_var = tf.broadcast_to(mvn_var, self._sample_shape())
        return self._std_var_helper(mvn_var, 'variance', 1, lambda x: x)
 def _covariance(self):
     # Let
     #   W = (w1,...,wk), with wj ~ iid Laplace(0, 1).
     # Then this distribution is
     #   X = loc + LW,
     # and since E[X] = loc,
     #   Cov(X) = E[LW W^T L^T] = L E[W W^T] L^T.
     # Since E[wi wj] = 0 if i != j, and 2 if i == j, we have
     #   Cov(X) = 2 LL^T
     if distribution_util.is_diagonal_scale(self.scale):
         return 2. * tf.linalg.diag(tf.square(self.scale.diag_part()))
     else:
         return 2. * self.scale.matmul(self.scale.to_dense(),
                                       adjoint_arg=True)
示例#13
0
def _log_ndtr_asymptotic_series(x, series_order):
    """Calculates the asymptotic series used in log_ndtr."""
    npdt = dtype_util.as_numpy_dtype(x.dtype)
    if series_order <= 0:
        return npdt(1)
    x_2 = tf.square(x)
    even_sum = tf.zeros_like(x)
    odd_sum = tf.zeros_like(x)
    x_2n = x_2  # Start with x^{2*1} = x^{2*n} with n = 1.
    for n in range(1, series_order + 1):
        y = npdt(_double_factorial(2 * n - 1)) / x_2n
        if n % 2:
            odd_sum += y
        else:
            even_sum += y
        x_2n *= x_2
    return 1. + even_sum - odd_sum
示例#14
0
    def _variance(self):
        concentration = tf.convert_to_tensor(self.concentration)
        mixing_concentration = tf.convert_to_tensor(self.mixing_concentration)
        mixing_rate = tf.convert_to_tensor(self.mixing_rate)

        variance = (tf.square(concentration * mixing_rate /
                              (mixing_concentration - 1.)) /
                    (mixing_concentration - 2.))
        if self.allow_nan_stats:
            return tf.where(mixing_concentration > 2., variance,
                            dtype_util.as_numpy_dtype(self.dtype)(np.nan))
        else:
            with tf.control_dependencies([
                    assert_util.assert_less(
                        tf.ones([], self.dtype) * 2.,
                        mixing_concentration,
                        message=
                        'variance undefined when `mixing_concentration` <= 2')
            ]):
                return tf.identity(variance)
示例#15
0
    def _variance(self):
        df = tf.convert_to_tensor(self.df)
        scale = tf.convert_to_tensor(self.scale)
        # We need to put the tf.where inside the outer tf.where to ensure we never
        # hit a NaN in the gradient.
        denom = tf.where(df > 2., df - 2., tf.ones_like(df))
        # Abs(scale) superfluous.
        var = (tf.ones(self._batch_shape_tensor(df=df, scale=scale),
                       dtype=self.dtype) * tf.square(scale) * df / denom)
        # When 1 < df <= 2, variance is infinite.
        result_where_defined = tf.where(
            df > 2., var,
            dtype_util.as_numpy_dtype(self.dtype)(np.inf))

        if self.allow_nan_stats:
            return tf.where(df > 1., result_where_defined,
                            dtype_util.as_numpy_dtype(self.dtype)(np.nan))
        else:
            return distribution_util.with_dependencies([
                assert_util.assert_less(
                    tf.ones([], dtype=self.dtype),
                    df,
                    message='variance not defined for components of df <= 1'),
            ], result_where_defined)
    def _mean_of_covariance_given_quadrature_component(self, diag_only):
        p = self.mixture_distribution.probs_parameter()

        # To compute E[Cov(Z|V)], we'll add matrices within three categories:
        # scaled-identity, diagonal, and full. Then we'll combine these at the end.
        scale_identity_multiplier = None
        diag = None
        full = None

        for k, aff in enumerate(self.interpolated_affine):
            s = aff.scale  # Just in case aff.scale has side-effects, we'll call once.
            if (s is None or isinstance(s, tf.linalg.LinearOperatorIdentity)):
                scale_identity_multiplier = add(scale_identity_multiplier,
                                                p[..., k, tf.newaxis])
            elif isinstance(s, tf.linalg.LinearOperatorScaledIdentity):
                scale_identity_multiplier = add(
                    scale_identity_multiplier,
                    (p[..., k, tf.newaxis] * tf.square(s.multiplier)))
            elif isinstance(s, tf.linalg.LinearOperatorDiag):
                diag = add(diag,
                           (p[..., k, tf.newaxis] * tf.square(s.diag_part())))
            else:
                x = (p[..., k, tf.newaxis, tf.newaxis] *
                     s.matmul(s.to_dense(), adjoint_arg=True))
                if diag_only:
                    x = tf.linalg.diag_part(x)
                full = add(full, x)

        # We must now account for the fact that the base distribution might have a
        # non-unity variance. Recall that, since X ~ iid Law(X_0),
        #   `Cov(SX+m) = S Cov(X) S.T = S S.T Diag(Var(X_0))`.
        # We can scale by `Var(X)` (vs `Cov(X)`) since X corresponds to `d` iid
        # samples from a scalar-event distribution.
        v = self.distribution.variance()
        if scale_identity_multiplier is not None:
            scale_identity_multiplier = scale_identity_multiplier * v
        if diag is not None:
            diag = diag * v[..., tf.newaxis]
        if full is not None:
            full = full * v[..., tf.newaxis]

        if diag_only:
            # Apparently we don't need the full matrix, just the diagonal.
            r = add(diag, full)
            if r is None and scale_identity_multiplier is not None:
                ones = tf.ones(self.event_shape_tensor(), dtype=self.dtype)
                return scale_identity_multiplier[..., tf.newaxis] * ones
            return add(r, scale_identity_multiplier)

        # `None` indicates we don't know if the result is positive-definite.
        is_positive_definite = (True if all(
            aff.scale.is_positive_definite
            for aff in self.endpoint_affine) else None)

        to_add = []
        if diag is not None:
            to_add.append(
                tf.linalg.LinearOperatorDiag(
                    diag=diag, is_positive_definite=is_positive_definite))
        if full is not None:
            to_add.append(
                tf.linalg.LinearOperatorFullMatrix(
                    matrix=full, is_positive_definite=is_positive_definite))
        if scale_identity_multiplier is not None:
            to_add.append(
                tf.linalg.LinearOperatorScaledIdentity(
                    num_rows=self.event_shape_tensor()[0],
                    multiplier=scale_identity_multiplier,
                    is_positive_definite=is_positive_definite))

        return (linop_add_lib.add_operators(to_add)[0].to_dense()
                if to_add else None)
示例#17
0
 def _stddev(self):
     samples = tf.convert_to_tensor(self._samples)
     axis = self._samples_axis
     r = samples - tf.expand_dims(self._mean(samples), axis=axis)
     var = tf.reduce_mean(tf.square(r), axis=axis)
     return tf.sqrt(var)
示例#18
0
 def _log_prob(self, x):
     scale = tf.convert_to_tensor(self.scale)
     log_unnormalized_prob = -tf.math.log1p(
         tf.square(self._z(x, scale=scale)))
     log_normalization = np.log(np.pi) + tf.math.log(scale)
     return log_unnormalized_prob - log_normalization
示例#19
0
 def _forward_log_det_jacobian(self, x):
     return -0.5 * np.log(2 * np.pi) - tf.square(x) / 2.
示例#20
0
    def _log_prob(self, x):
        if self.input_output_cholesky:
            x_sqrt = x
        else:
            # Complexity: O(nbk**3)
            x_sqrt = tf.linalg.cholesky(x)

        batch_shape = self.batch_shape_tensor()
        event_shape = self.event_shape_tensor()
        x_ndims = tf.rank(x_sqrt)
        num_singleton_axes_to_prepend = (
            tf.maximum(tf.size(batch_shape) + 2, x_ndims) - x_ndims)
        x_with_prepended_singletons_shape = tf.concat([
            tf.ones([num_singleton_axes_to_prepend], dtype=tf.int32),
            tf.shape(x_sqrt)
        ], 0)
        x_sqrt = tf.reshape(x_sqrt, x_with_prepended_singletons_shape)
        ndims = tf.rank(x_sqrt)
        # sample_ndims = ndims - batch_ndims - event_ndims
        sample_ndims = ndims - tf.size(batch_shape) - 2
        sample_shape = tf.shape(x_sqrt)[:sample_ndims]

        # We need to be able to pre-multiply each matrix by its corresponding
        # batch scale matrix. Since a Distribution Tensor supports multiple
        # samples per batch, this means we need to reshape the input matrix `x`
        # so that the first b dimensions are batch dimensions and the last two
        # are of shape [dimension, dimensions*number_of_samples]. Doing these
        # gymnastics allows us to do a batch_solve.
        #
        # After we're done with sqrt_solve (the batch operation) we need to undo
        # this reshaping so what we're left with is a Tensor partitionable by
        # sample, batch, event dimensions.

        # Complexity: O(nbk**2) since transpose must access every element.
        scale_sqrt_inv_x_sqrt = x_sqrt
        perm = tf.concat(
            [tf.range(sample_ndims, ndims),
             tf.range(0, sample_ndims)], 0)
        scale_sqrt_inv_x_sqrt = tf.transpose(a=scale_sqrt_inv_x_sqrt,
                                             perm=perm)
        last_dim_size = (
            tf.cast(self.dimension, dtype=tf.int32) *
            tf.reduce_prod(x_with_prepended_singletons_shape[:sample_ndims]))
        shape = tf.concat([
            x_with_prepended_singletons_shape[sample_ndims:-2],
            [tf.cast(self.dimension, dtype=tf.int32), last_dim_size]
        ],
                          axis=0)
        scale_sqrt_inv_x_sqrt = tf.reshape(scale_sqrt_inv_x_sqrt, shape)

        # Complexity: O(nbM*k) where M is the complexity of the operator solving a
        # vector system. For LinearOperatorLowerTriangular, each solve is O(k**2) so
        # this step has complexity O(nbk^3).
        scale_sqrt_inv_x_sqrt = self.scale_operator.solve(
            scale_sqrt_inv_x_sqrt)

        # Undo make batch-op ready.
        # Complexity: O(nbk**2)
        shape = tf.concat(
            [tf.shape(scale_sqrt_inv_x_sqrt)[:-2], event_shape, sample_shape],
            axis=0)
        scale_sqrt_inv_x_sqrt = tf.reshape(scale_sqrt_inv_x_sqrt, shape)
        perm = tf.concat([
            tf.range(ndims - sample_ndims, ndims),
            tf.range(0, ndims - sample_ndims)
        ], 0)
        scale_sqrt_inv_x_sqrt = tf.transpose(a=scale_sqrt_inv_x_sqrt,
                                             perm=perm)

        # Write V = SS', X = LL'. Then:
        # tr[inv(V) X] = tr[inv(S)' inv(S) L L']
        #              = tr[inv(S) L L' inv(S)']
        #              = tr[(inv(S) L) (inv(S) L)']
        #              = sum_{ik} (inv(S) L)_{ik}**2
        # The second equality follows from the cyclic permutation property.
        # Complexity: O(nbk**2)
        trace_scale_inv_x = tf.reduce_sum(tf.square(scale_sqrt_inv_x_sqrt),
                                          axis=[-2, -1])

        # Complexity: O(nbk)
        half_log_det_x = tf.reduce_sum(tf.math.log(
            tf.linalg.diag_part(x_sqrt)),
                                       axis=[-1])

        # Complexity: O(nbk**2)
        log_prob = ((self.df - self.dimension - 1.) * half_log_det_x -
                    0.5 * trace_scale_inv_x - self.log_normalization())

        # Set shape hints.
        # Try to merge what we know from the input x with what we know from the
        # parameters of this distribution.
        if tensorshape_util.rank(
                x.shape) is not None and tensorshape_util.rank(
                    self.batch_shape) is not None:
            tensorshape_util.set_shape(
                log_prob,
                tf.broadcast_static_shape(x.shape[:-2], self.batch_shape))

        return log_prob
示例#21
0
 def _variance(self):
     return self.concentration / tf.square(self.rate)
示例#22
0
 def _covariance(self):
     if distribution_util.is_diagonal_scale(self.scale):
         return tf.linalg.diag(tf.square(self.scale.diag_part()))
     else:
         return self.scale.matmul(self.scale.to_dense(), adjoint_arg=True)
示例#23
0
 def _normal_pdf(self, x):
     return 1. / np.sqrt(2 * np.pi) * tf.exp(-0.5 * tf.square(x))
示例#24
0
 def _variance(self):
     return tf.square(self.range()) / 12.
示例#25
0
 def _forward(self, x):
   with tf.control_dependencies(self._assertions(x)):
     return tf.square(x)