Пример #1
0
 def _forward(self, x):
     with tf.control_dependencies(self._maybe_assert_valid_x(x)):
         if self.power == 0.:
             return tf.exp(x)
         # If large x accuracy is an issue, consider using:
         # (1. + x * self.power)**(1. / self.power) when x >> 1.
         return tf.exp(tf.math.log1p(x * self.power) / self.power)
Пример #2
0
        def loop_body(should_continue, k):
            """Resample the non-accepted points."""
            # The range of U is chosen so that the resulting sample K lies in
            # [0, tf.int64.max). The final sample, if accepted, is K + 1.
            u = tf.random.uniform(shape,
                                  minval=minval_u,
                                  maxval=maxval_u,
                                  dtype=power.dtype,
                                  seed=seed())

            # Sample the point X from the continuous density h(x) \propto x^(-power).
            x = self._hat_integral_inverse(u, power=power)

            # Rejection-inversion requires a `hat` function, h(x) such that
            # \int_{k - .5}^{k + .5} h(x) dx >= pmf(k + 1) for points k in the
            # support. A natural hat function for us is h(x) = x^(-power).
            #
            # After sampling X from h(x), suppose it lies in the interval
            # (K - .5, K + .5) for integer K. Then the corresponding K is accepted if
            # if lies to the left of x_K, where x_K is defined by:
            #   \int_{x_k}^{K + .5} h(x) dx = H(x_K) - H(K + .5) = pmf(K + 1),
            # where H(x) = \int_x^inf h(x) dx.

            # Solving for x_K, we find that x_K = H_inverse(H(K + .5) + pmf(K + 1)).
            # Or, the acceptance condition is X <= H_inverse(H(K + .5) + pmf(K + 1)).
            # Since X = H_inverse(U), this simplifies to U <= H(K + .5) + pmf(K + 1).

            # Update the non-accepted points.
            # Since X \in (K - .5, K + .5), the sample K is chosen as floor(X + 0.5).
            k = tf.where(should_continue, tf.floor(x + 0.5), k)
            accept = (u <= self._hat_integral(k + .5, power=power) +
                      tf.exp(self._log_prob(k + 1, power=power)))

            return [should_continue & (~accept), k]
    def _marginal_hidden_probs(self):
        """Compute marginal pdf for each individual observable."""

        initial_log_probs = tf.broadcast_to(
            self._log_init,
            tf.concat([self.batch_shape_tensor(), [self._num_states]], axis=0))

        # initial_log_probs :: batch_shape num_states

        def _scan_multiple_steps():
            """Perform `scan` operation when `num_steps` > 1."""

            transition_log_probs = self._log_trans

            def forward_step(log_probs, _):
                return _log_vector_matrix(log_probs, transition_log_probs)

            dummy_index = tf.zeros(self._num_steps - 1, dtype=tf.float32)

            forward_log_probs = tf.scan(forward_step,
                                        dummy_index,
                                        initializer=initial_log_probs,
                                        name="forward_log_probs")

            return tf.concat([[initial_log_probs], forward_log_probs], axis=0)

        forward_log_probs = prefer_static.cond(
            self._num_steps > 1, _scan_multiple_steps,
            lambda: initial_log_probs[tf.newaxis, ...])

        return tf.exp(forward_log_probs)
Пример #4
0
 def _mean(self, distributions=None):
   if distributions is None:
     distributions = self.poisson_and_mixture_distributions()
   dist, mixture_dist = distributions
   return tf.exp(
       tf.reduce_logsumexp(
           mixture_dist.logits + dist.log_rate,
           axis=-1))
Пример #5
0
 def _forward(self, x):
     y = tf.identity(x)
     if self.scale is not None:
         y = y * self.scale
     if self.log_scale is not None:
         y = y * tf.exp(self.log_scale)
     if self.shift is not None:
         y = y + self.shift
     return y
Пример #6
0
 def _inverse(self, y):
     x = tf.identity(y)
     if self.shift is not None:
         x = x - self.shift
     if self.scale is not None:
         x = x / self.scale
     if self.log_scale is not None:
         x = x * tf.exp(-self.log_scale)
     return x
Пример #7
0
 def _cdf(self, x):
     with tf.control_dependencies(self._maybe_assert_valid_sample(x)):
         concentration = tf.convert_to_tensor(self.concentration)
         loc = tf.convert_to_tensor(self.loc)
         return (special_math.ndtr(
             ((concentration / x)**0.5 *
              (x / loc - 1.))) + tf.exp(2. * concentration / loc) *
                 special_math.ndtr(-(concentration / x)**0.5 *
                                   (x / loc + 1)))
Пример #8
0
            def grad(dy):
                """Computes a derivative for the min and max parameters.

        This function implements the derivative wrt the truncation bounds, which
        get blocked by the sampler. We use a custom expression for numerical
        stability instead of automatic differentiation on CDF for implicit
        gradients.

        Args:
          dy: output gradients

        Returns:
           The standard normal samples and the gradients wrt the upper
           bound and lower bound.
        """
                # std_samples has an extra dimension (the sample dimension), expand
                # lower and upper so they broadcast along this dimension.
                # See note above regarding parameterized_truncated_normal, the sample
                # dimension is the final dimension.
                lower_broadcast = lower[..., tf.newaxis]
                upper_broadcast = upper[..., tf.newaxis]

                cdf_samples = ((special_math.ndtr(std_samples) -
                                special_math.ndtr(lower_broadcast)) /
                               (special_math.ndtr(upper_broadcast) -
                                special_math.ndtr(lower_broadcast)))

                # tiny, eps are tolerance parameters to ensure we stay away from giving
                # a zero arg to the log CDF expression.

                tiny = np.finfo(dtype_util.as_numpy_dtype(self.dtype)).tiny
                eps = np.finfo(dtype_util.as_numpy_dtype(self.dtype)).eps
                cdf_samples = tf.clip_by_value(cdf_samples, tiny, 1 - eps)

                du = tf.exp(0.5 * (std_samples**2 - upper_broadcast**2) +
                            tf.math.log(cdf_samples))
                dl = tf.exp(0.5 * (std_samples**2 - lower_broadcast**2) +
                            tf.math.log1p(-cdf_samples))

                # Reduce the gradient across the samples
                grad_u = tf.reduce_sum(dy * du, axis=-1)
                grad_l = tf.reduce_sum(dy * dl, axis=-1)
                return [grad_l, grad_u]
 def _log_cdf(self, x):
     scale = tf.convert_to_tensor(self.scale)
     concentration = tf.convert_to_tensor(self.concentration)
     z = self._z(x, scale, concentration)
     eq_zero = tf.equal(concentration,
                        0)  # Concentration = 0 ==> Exponential.
     nonzero_conc = tf.where(eq_zero, tf.constant(1, self.dtype),
                             concentration)
     where_nonzero = tf.math.log1p(-(1 + nonzero_conc * z)**(-1 /
                                                             nonzero_conc))
     where_zero = tf.math.log1p(-tf.exp(-z))
     return tf.where(eq_zero, where_zero, where_nonzero)
Пример #10
0
def _bessel_ive(v, z, cache=None):
    """Computes I_v(z)*exp(-abs(z)) using a recurrence relation, where z > 0."""
    # TODO(b/67497980): Switch to a more numerically faithful implementation.
    z = tf.convert_to_tensor(z)

    wrap = lambda result: tf.debugging.check_numerics(result, 'besseli{}'.
                                                      format(v))

    if float(v) >= 2:
        raise ValueError(
            'Evaluating bessel_i by recurrence becomes imprecise for large v')

    cache = cache or {}
    safe_z = tf.where(z > 0, z, tf.ones_like(z))
    if v in cache:
        return wrap(cache[v])
    if v == 0:
        cache[v] = tf.math.bessel_i0e(z)
    elif v == 1:
        cache[v] = tf.math.bessel_i1e(z)
    elif v == 0.5:
        # sinh(x)*exp(-abs(x)), sinh(x) = (e^x - e^{-x}) / 2
        sinhe = lambda x: (tf.exp(x - tf.abs(x)) - tf.exp(-x - tf.abs(x))) / 2
        cache[v] = (
            np.sqrt(2 / np.pi) * sinhe(z) *
            tf.where(z > 0, tf.math.rsqrt(safe_z), tf.ones_like(safe_z)))
    elif v == -0.5:
        # cosh(x)*exp(-abs(x)), cosh(x) = (e^x + e^{-x}) / 2
        coshe = lambda x: (tf.exp(x - tf.abs(x)) + tf.exp(-x - tf.abs(x))) / 2
        cache[v] = (
            np.sqrt(2 / np.pi) * coshe(z) *
            tf.where(z > 0, tf.math.rsqrt(safe_z), tf.ones_like(safe_z)))
    if v <= 1:
        return wrap(cache[v])
    # Recurrence relation:
    cache[v] = (_bessel_ive(v - 2, z, cache) -
                (2 * (v - 1)) * _bessel_ive(v - 1, z, cache) / z)
    return wrap(cache[v])
Пример #11
0
 def _variance(self):
     concentration1 = tf.convert_to_tensor(self.concentration1)
     concentration0 = tf.convert_to_tensor(self.concentration0)
     log_moment2 = self._log_moment(2,
                                    concentration1=concentration1,
                                    concentration0=concentration0)
     log_moment1 = self._log_moment(1,
                                    concentration1=concentration1,
                                    concentration0=concentration0)
     lswe, sign = tfp_math.reduce_weighted_logsumexp(tf.stack(
         [log_moment2, 2 * log_moment1], axis=-1), [1., -1],
                                                     axis=-1,
                                                     return_sign=True)
     return sign * tf.exp(lswe)
Пример #12
0
 def _sample_n(self, n, seed=None):
     concentration = tf.convert_to_tensor(self.concentration)
     scale = tf.convert_to_tensor(self.scale)
     shape = tf.concat([[n],
                        self._batch_shape_tensor(
                            concentration=concentration, scale=scale)],
                       axis=0)
     sampled = tf.random.uniform(shape,
                                 maxval=1.,
                                 seed=seed,
                                 dtype=self.dtype)
     log_sample = tf.math.log(
         scale) - tf.math.log1p(-sampled) / concentration
     return tf.exp(log_sample)
Пример #13
0
def _logsum_expbig_minus_expsmall(big, small):
    """Stable evaluation of `Log[exp{big} - exp{small}]`.

  To work correctly, we should have the pointwise relation:  `small <= big`.

  Args:
    big: Floating-point `Tensor`
    small: Floating-point `Tensor` with same `dtype` as `big` and broadcastable
      shape.

  Returns:
    `Tensor` of same `dtype` of `big` and broadcast shape.
  """
    with tf.name_scope("logsum_expbig_minus_expsmall"):
        return tf.math.log1p(-tf.exp(small - big)) + big
 def _sample_n(self, n, seed=None):
     # Here we use the fact that if:
     # lam ~ Gamma(concentration=total_count, rate=(1-probs)/probs)
     # then X ~ Poisson(lam) is Negative Binomially distributed.
     logits = self._logits_parameter_no_checks()
     stream = SeedStream(seed, salt='NegativeBinomial')
     rate = tf.random.gamma(shape=[n],
                            alpha=self.total_count,
                            beta=tf.exp(-logits),
                            dtype=self.dtype,
                            seed=stream())
     return tf.random.poisson(lam=rate,
                              shape=[],
                              dtype=self.dtype,
                              seed=stream())
Пример #15
0
 def _log_prob(self, x):
     scale = tf.convert_to_tensor(self.scale)
     # The exact HalfCauchy-Normal marginal log-density is analytically
     # intractable; we compute a (relatively accurate) numerical
     # approximation. This is a log space version of ref[2] from class docstring.
     xx = (x / scale)**2 / 2
     g = 0.5614594835668851  # tf.exp(-0.5772156649015328606)
     b = 1.0420764938351215  # tf.sqrt(2 * (1-g) / (g * (2-g)))
     h_inf = 1.0801359952503342  #  (1-g)*(g*g-6*g+12) / (3*g * (2-g)**2 * b)
     q = 20. / 47. * xx**1.0919284281983377
     h = 1. / (1 + xx**(1.5)) + h_inf * q / (1 + q)
     c = -.5 * np.log(2 * np.pi**3) - tf.math.log(g * scale)
     return -tf.math.log1p(
         (1 - g) / g * tf.exp(-xx / (1 - g))) + tf.math.log(
             tf.math.log1p(g / xx - (1 - g) / (h + b * xx)**2)) + c
Пример #16
0
 def _finish_prob_for_one_fiber(self, y, x, ildj, event_ndims,
                                **distribution_kwargs):
     """Finish computation of prob on one element of the inverse image."""
     x = self._maybe_rotate_dims(x, rotate_right=True)
     prob = self.distribution.prob(x, **distribution_kwargs)
     if self._is_maybe_event_override:
         prob = tf.reduce_prod(prob, axis=self._reduce_event_indices)
     prob = prob * tf.exp(tf.cast(ildj, prob.dtype))
     if self._is_maybe_event_override and isinstance(event_ndims, int):
         tensorshape_util.set_shape(
             prob,
             tf.broadcast_static_shape(
                 tensorshape_util.with_rank_at_least(y.shape,
                                                     1)[:-event_ndims],
                 self.batch_shape))
     return prob
Пример #17
0
def log_cdf_laplace(x, name="log_cdf_laplace"):
  """Log Laplace distribution function.

  This function calculates `Log[L(x)]`, where `L(x)` is the cumulative
  distribution function of the Laplace distribution, i.e.

  ```L(x) := 0.5 * int_{-infty}^x e^{-|t|} dt```

  For numerical accuracy, `L(x)` is computed in different ways depending on `x`,

  ```
  x <= 0:
    Log[L(x)] = Log[0.5] + x, which is exact

  0 < x:
    Log[L(x)] = Log[1 - 0.5 * e^{-x}], which is exact
  ```

  Args:
    x: `Tensor` of type `float32`, `float64`.
    name: Python string. A name for the operation (default="log_ndtr").

  Returns:
    `Tensor` with `dtype=x.dtype`.

  Raises:
    TypeError: if `x.dtype` is not handled.
  """

  with tf.name_scope(name):
    x = tf.convert_to_tensor(x, name="x")

    # For x < 0, L(x) = 0.5 * exp{x} exactly, so Log[L(x)] = log(0.5) + x.
    lower_solution = -np.log(2.) + x

    # safe_exp_neg_x = exp{-x} for x > 0, but is
    # bounded above by 1, which avoids
    #   log[1 - 1] = -inf for x = log(1/2), AND
    #   exp{-x} --> inf, for x << -1
    safe_exp_neg_x = tf.exp(-tf.abs(x))

    # log1p(z) = log(1 + z) approx z for |z| << 1. This approxmation is used
    # internally by log1p, rather than being done explicitly here.
    upper_solution = tf.math.log1p(-0.5 * safe_exp_neg_x)

    return tf.where(x < 0., lower_solution, upper_solution)
Пример #18
0
    def _hat_integral(self, x, power):
        """Integral of the `hat` function, used for sampling.

    We choose a `hat` function, h(x) = x^(-power), which is a continuous
    (unnormalized) density touching each positive integer at the (unnormalized)
    pmf. This function implements `hat` integral: H(x) = int_x^inf h(t) dt;
    which is needed for sampling purposes.

    Arguments:
      x: A Tensor of points x at which to evaluate H(x).
      power: Power that parameterized hat function.

    Returns:
      A Tensor containing evaluation H(x) at x.
    """
        x = tf.cast(x, power.dtype)
        t = power - 1.
        return tf.exp((-t) * tf.math.log1p(x) - tf.math.log(t))
Пример #19
0
def _kl_bernoulli_bernoulli(a, b, name=None):
    """Calculate the batched KL divergence KL(a || b) with a and b ProbitBernoulli.

  Args:
    a: instance of a ProbitBernoulli distribution object.
    b: instance of a ProbitBernoulli distribution object.
    name: Python `str` name to use for created operations.
      Default value: `None` (i.e., `'kl_bernoulli_bernoulli'`).

  Returns:
    Batchwise KL(a || b)
  """
    with tf.name_scope(name or 'kl_probit_bernoulli_probit_bernoulli'):
        a_log_probs0, a_log_probs1 = a._outcome_log_probs()  # pylint: disable=protected-access
        b_log_probs0, b_log_probs1 = b._outcome_log_probs()  # pylint: disable=protected-access
        a_prob1 = tf.exp(a_log_probs1)

        return (1. - a_prob1) * (a_log_probs0 - b_log_probs0) + a_prob1 * (
            a_log_probs1 - b_log_probs1)
Пример #20
0
    def _prob(self, y, **kwargs):
        if not hasattr(self.distribution, "_prob"):
            return tf.exp(self.log_prob(y, **kwargs))
        distribution_kwargs, bijector_kwargs = self._kwargs_split_fn(kwargs)

        x = self.bijector.inverse(y, **bijector_kwargs)
        event_ndims = self._maybe_get_static_event_ndims()
        ildj = self.bijector.inverse_log_det_jacobian(y,
                                                      event_ndims=event_ndims,
                                                      **bijector_kwargs)
        if self.bijector._is_injective:  # pylint: disable=protected-access
            return self._finish_prob_for_one_fiber(y, x, ildj, event_ndims,
                                                   **distribution_kwargs)

        prob_on_fibers = [
            self._finish_prob_for_one_fiber(y, x_i, ildj_i, event_ndims,
                                            **distribution_kwargs)
            for x_i, ildj_i in zip(x, ildj)
        ]
        return sum(prob_on_fibers)
Пример #21
0
def _kl_laplace_laplace(a, b, name=None):
    """Calculate the batched KL divergence KL(a || b) with a and b Laplace.

  Args:
    a: instance of a Laplace distribution object.
    b: instance of a Laplace distribution object.
    name: Python `str` name to use for created operations.
      Default value: `None` (i.e., `'kl_laplace_laplace'`).

  Returns:
    kl_div: Batchwise KL(a || b)
  """
    with tf.name_scope(name or 'kl_laplace_laplace'):
        # Consistent with
        # http://www.mast.queensu.ca/~communications/Papers/gil-msc11.pdf, page 38
        distance = tf.abs(a.loc - b.loc)
        a_scale = tf.convert_to_tensor(a.scale)
        b_scale = tf.convert_to_tensor(b.scale)
        delta_log_scale = tf.math.log(a_scale) - tf.math.log(b_scale)
        return (-delta_log_scale + distance / b_scale - 1. +
                tf.exp(-distance / a_scale + delta_log_scale))
Пример #22
0
 def _prob(self, x):
     return tf.exp(self._log_prob(x))
Пример #23
0
 def _inverse(self, y):
     x0 = y[..., :1]
     xk = tf.exp(y[..., 1:])
     x = tf.concat([x0, xk], axis=-1)
     return tf.cumsum(x, axis=-1)
Пример #24
0
 def _forward_log_det_jacobian(self, x):
     scale = tf.convert_to_tensor(self.scale)
     z = (x - self.loc) / scale
     return -z - tf.exp(-z) - tf.math.log(scale)
Пример #25
0
 def _forward(self, x):
     z = (x - self.loc) / self.scale
     return tf.exp(-tf.exp(-z))
Пример #26
0
 def _normal_pdf(self, x):
     return 1. / np.sqrt(2 * np.pi) * tf.exp(-0.5 * tf.square(x))
 def _mode(self):
     total_count = tf.convert_to_tensor(self.total_count)
     adjusted_count = tf.where(1. < total_count, total_count - 1.,
                               tf.zeros_like(total_count))
     return tf.floor(adjusted_count *
                     tf.exp(self._logits_parameter_no_checks()))
 def _mean(self, logits=None):
     logits = self._logits_parameter_no_checks(
     ) if logits is None else logits
     return self.total_count * tf.exp(logits)
Пример #29
0
 def _prob(self, x):
     scale = tf.convert_to_tensor(self.scale)
     coeff = np.sqrt(2) / scale / np.sqrt(np.pi)
     pdf = coeff * tf.exp(-0.5 * (x / scale)**2)
     return pdf * tf.cast(x >= 0, self.dtype)
 def _prob(self, counts):
   return tf.exp(self._log_prob(counts))