Exemplo n.º 1
0
  def _log_normalization(self, concentration=None, name='log_normalization'):
    """Returns the log normalization of a CholeskyLKJ distribution.

    Args:
      concentration: `float` or `double` `Tensor`. The positive concentration
        parameter of the CholeskyLKJ distributions.
      name: Python `str` name prefixed to Ops created by this function.

    Returns:
      log_z: A Tensor of the same shape and dtype as `concentration`, containing
        the corresponding log normalizers.
    """
    # The formula is from D. Lewandowski et al [1], p. 1999, from the
    # proof that eqs 16 and 17 are equivalent.
    # Instead of using a for loop for k from 1 to (dimension - 1), we will
    # vectorize the computation by performing operations on the vector
    # `dimension_range = np.arange(1, dimension)`.
    with tf.name_scope(name or 'log_normalization_lkj'):
      if concentration is None:
        concentration = tf.convert_to_tensor(self.concentration)
      logpi = float(np.log(np.pi))
      dimension_range = np.arange(
          1.,
          self.dimension,
          dtype=dtype_util.as_numpy_dtype(concentration.dtype))
      effective_concentration = (
          concentration[..., tf.newaxis] +
          (self.dimension - 1 - dimension_range) / 2.)
      ans = tf.reduce_sum(
          tfp_math.log_gamma_difference(dimension_range / 2.,
                                        effective_concentration),
          axis=-1)
      # Then we add to `ans` the sum of `logpi / 2 * k` for `k` run from 1 to
      # `dimension - 1`.
      ans = ans + logpi * (self.dimension * (self.dimension - 1) / 4.)
      return ans
  def _sample_n(self, n, seed=None):
    temperature = tf.convert_to_tensor(self.temperature)
    logits = self._logits_parameter_no_checks()

    # Uniform variates must be sampled from the open-interval `(0, 1)` rather
    # than `[0, 1)`. To do so, we use
    # `np.finfo(dtype_util.as_numpy_dtype(self.dtype)).tiny` because it is the
    # smallest, positive, 'normal' number. A 'normal' number is such that the
    # mantissa has an implicit leading 1. Normal, positive numbers x, y have the
    # reasonable property that, `x + y >= max(x, y)`. In this case, a subnormal
    # number (i.e., np.nextafter) can cause us to sample 0.
    uniform_shape = tf.concat(
        [[n],
         self._batch_shape_tensor(temperature=temperature, logits=logits),
         self._event_shape_tensor(logits=logits)], 0)
    uniform = tf.random.uniform(
        shape=uniform_shape,
        minval=np.finfo(dtype_util.as_numpy_dtype(self.dtype)).tiny,
        maxval=1.,
        dtype=self.dtype,
        seed=seed)
    gumbel = -tf.math.log(-tf.math.log(uniform))
    noisy_logits = (gumbel + logits) / temperature[..., tf.newaxis]
    return tf.math.log_softmax(noisy_logits)
Exemplo n.º 3
0
def _owens_t_method1(h, a, m):
    """OwensT Method T1 using series expansions."""
    # Method T1, which is evaluation of a particular series expansion of OwensT.

    dtype = dtype_util.common_dtype([h, a], tf.float32)
    numpy_dtype = dtype_util.as_numpy_dtype(dtype)

    neg_half_h_squared = -0.5 * tf.math.square(h)
    a_squared = tf.math.square(a)

    def series_evaluation(should_stop, index, ai, di, gi, series_sum):

        new_ai = a_squared * ai
        new_di = gi - di
        new_gi = neg_half_h_squared / index * gi
        new_series_sum = tf.where(
            should_stop, series_sum,
            series_sum + new_di * new_ai / (2. * index - 1.))
        should_stop = index >= m
        return should_stop, index + 1., new_ai, new_di, new_gi, new_series_sum

    broadcast_shape = prefer_static.broadcast_shape(prefer_static.shape(h),
                                                    prefer_static.shape(a))
    initial_ai = a / numpy_dtype(2 * np.pi)
    initial_di = tf.math.expm1(neg_half_h_squared)
    initial_gi = neg_half_h_squared * tf.math.exp(neg_half_h_squared)
    initial_sum = (tf.math.atan(a) / numpy_dtype(2 * np.pi) +
                   initial_ai * initial_di)

    (_, _, _, _, _, series_sum) = tf.while_loop(
        cond=lambda stop, *_: tf.reduce_any(~stop),
        body=series_evaluation,
        loop_vars=(tf.zeros(broadcast_shape,
                            dtype=tf.bool), tf.cast(2., dtype=dtype),
                   initial_ai, initial_di, initial_gi, initial_sum))
    return series_sum
Exemplo n.º 4
0
    def _sample_n(self, n, seed=None):
        loc = tf.convert_to_tensor(self.loc)
        concentration = tf.convert_to_tensor(self.concentration)
        concentration = tf.broadcast_to(
            concentration,
            self._batch_shape_tensor(loc=loc, concentration=concentration))

        # random_von_mises does not work for zero concentration, so round it up to
        # something very small.
        tiny = np.finfo(dtype_util.as_numpy_dtype(self.dtype)).tiny
        concentration = tf.maximum(concentration, tiny)

        sample_batch_shape = tf.concat(
            [[n], prefer_static.shape(concentration)], axis=0)
        samples = random_von_mises(sample_batch_shape,
                                   concentration,
                                   dtype=self.dtype,
                                   seed=seed)

        # vonMises(0, concentration) -> vonMises(loc, concentration)
        samples = samples + loc
        # Map the samples to [-pi, pi].
        samples = samples - 2. * np.pi * tf.round(samples / (2. * np.pi))
        return samples
Exemplo n.º 5
0
 def _log_prob(self, x):
     x = tf.convert_to_tensor(value=x, name='x')
     right_indices = tf.minimum(
         tf.size(input=self.outcomes) - 1,
         tf.reshape(
             tf.searchsorted(self.outcomes,
                             values=tf.reshape(x, shape=[-1]),
                             side='right'),
             dist_util.prefer_static_shape(x)))
     use_right_indices = self._is_equal_or_close(
         x, tf.gather(self.outcomes, indices=right_indices))
     left_indices = tf.maximum(0, right_indices - 1)
     use_left_indices = self._is_equal_or_close(
         x, tf.gather(self.outcomes, indices=left_indices))
     log_probs = self._categorical.log_prob(
         tf1.where(use_left_indices, left_indices, right_indices))
     should_be_neg_inf = tf.broadcast_to(
         tf.logical_not(use_left_indices | use_right_indices),
         shape=dist_util.prefer_static_shape(log_probs))
     return tf1.where(
         should_be_neg_inf,
         tf.fill(dist_util.prefer_static_shape(should_be_neg_inf),
                 dtype_util.as_numpy_dtype(log_probs.dtype)(-np.inf)),
         log_probs)
Exemplo n.º 6
0
    def testPoissonLogPmfContinuousRelaxation(self):
        batch_size = 12
        lam = tf.constant([3.0] * batch_size)
        x = np.array([-3., -0.5, 0., 2., 2.2, 3., 3.1, 4., 5., 5.5, 6.,
                      7.]).astype(np.float32)
        poisson = self._make_poisson(rate=lam, interpolate_nondiscrete=True)

        expected_continuous_log_pmf = (x * poisson.log_rate -
                                       tf.math.lgamma(1. + x) - poisson.rate)
        expected_continuous_log_pmf = tf.where(
            x >= 0., expected_continuous_log_pmf,
            dtype_util.as_numpy_dtype(
                expected_continuous_log_pmf.dtype)(-np.inf))
        expected_continuous_pmf = tf.exp(expected_continuous_log_pmf)

        log_pmf = poisson.log_prob(x)
        self.assertEqual((batch_size, ), log_pmf.shape)
        self.assertAllClose(self.evaluate(log_pmf),
                            self.evaluate(expected_continuous_log_pmf))

        pmf = poisson.prob(x)
        self.assertEqual((batch_size, ), pmf.shape)
        self.assertAllClose(self.evaluate(pmf),
                            self.evaluate(expected_continuous_pmf))
Exemplo n.º 7
0
 def verify_expectations(self, dimension, dtype):
     num_samples = int(1e6)
     # pylint: disable=protected-access
     x = tfd.lkj._tril_spherical_uniform(dimension=dimension,
                                         batch_shape=[num_samples],
                                         dtype=dtype,
                                         seed=test_util.test_seed())
     # pylint: enable=protected-access
     self.assertEqual(dtype, dtype_util.as_numpy_dtype(x.dtype))
     final_shape = [num_samples, dimension, dimension]
     self.assertAllEqual(final_shape, x.shape)
     sample_mean = tf.reduce_mean(x, axis=0)
     sample_var = tf.reduce_mean(tf.math.squared_difference(x, sample_mean),
                                 axis=0)
     samples, sample_mean, sample_var = self.evaluate(
         [x, sample_mean, sample_var])
     self.assertAllMeansClose(samples,
                              np.zeros_like(sample_mean),
                              axis=0,
                              atol=3e-3,
                              rtol=1e-3)
     expected_var = np.tril(np.ones([dimension, dimension], dtype=dtype))
     expected_var = expected_var / np.arange(1, dimension + 1)[..., None]
     self.assertAllClose(expected_var, sample_var, atol=2e-3, rtol=1e-2)
Exemplo n.º 8
0
def _bessel_kve_naive(v, z):
    """Compute bessel_kve(v, z)."""
    dtype = dtype_util.common_dtype([v, z], tf.float32)
    numpy_dtype = dtype_util.as_numpy_dtype(dtype)
    v = tf.convert_to_tensor(v, dtype=dtype)
    z = tf.convert_to_tensor(z, dtype=dtype)

    # K_{-v} == K_{v} for negative values.
    v = tf.math.abs(v)

    z_abs = tf.math.abs(z)
    # Handle the zero case specially.
    z_abs = tf.where(tf.math.equal(z_abs, 0.), numpy_dtype(1.), z_abs)

    small_v = tf.where(v < 50., v, numpy_dtype(0.1))
    large_v = tf.where(v >= 50., v, numpy_dtype(1000.))

    _, olver_kve = _olver_asymptotic_uniform(large_v, z_abs)
    temme_kve = _temme_expansion(small_v, z_abs)[1]
    kve = tf.where(v >= 50., olver_kve, temme_kve)

    # Handle when z is zero.
    kve = tf.where(tf.math.equal(z, 0.), numpy_dtype(np.inf), kve)
    return tf.where(z < 0., numpy_dtype(np.nan), kve)
def fit_one_step(
    model_matrix,
    response,
    model,
    model_coefficients_start=None,
    predicted_linear_response_start=None,
    l2_regularizer=None,
    dispersion=None,
    offset=None,
    learning_rate=None,
    fast_unsafe_numerics=True,
    l2_regularization_penalty_factor=None,
    name=None):
  """Runs one step of Fisher scoring.

  Args:
    model_matrix: (Batch of) `float`-like, matrix-shaped `Tensor` where each row
      represents a sample's features.
    response: (Batch of) vector-shaped `Tensor` where each element represents a
      sample's observed response (to the corresponding row of features). Must
      have same `dtype` as `model_matrix`.
    model: `tfp.glm.ExponentialFamily`-like instance used to construct the
      negative log-likelihood loss, gradient, and expected Hessian (i.e., the
      Fisher information matrix).
    model_coefficients_start: Optional (batch of) vector-shaped `Tensor`
      representing the initial model coefficients, one for each column in
      `model_matrix`. Must have same `dtype` as `model_matrix`.
      Default value: Zeros.
    predicted_linear_response_start: Optional `Tensor` with `shape`, `dtype`
      matching `response`; represents `offset` shifted initial linear
      predictions based on `model_coefficients_start`.
      Default value: `offset` if `model_coefficients is None`, and
      `tf.linalg.matvec(model_matrix, model_coefficients_start) + offset`
      otherwise.
    l2_regularizer: Optional scalar `Tensor` representing L2 regularization
      penalty, i.e.,
      `loss(w) = sum{-log p(y[i]|x[i],w) : i=1..n} + l2_regularizer ||w||_2^2`.
      Default value: `None` (i.e., no L2 regularization).
    dispersion: Optional (batch of) `Tensor` representing `response` dispersion,
      i.e., as in, `p(y|theta) := exp((y theta - A(theta)) / dispersion)`.
      Must broadcast with rows of `model_matrix`.
      Default value: `None` (i.e., "no dispersion").
    offset: Optional `Tensor` representing constant shift applied to
      `predicted_linear_response`.  Must broadcast to `response`.
      Default value: `None` (i.e., `tf.zeros_like(response)`).
    learning_rate: Optional (batch of) scalar `Tensor` used to dampen iterative
      progress. Typically only needed if optimization diverges, should be no
      larger than `1` and typically very close to `1`.
      Default value: `None` (i.e., `1`).
    fast_unsafe_numerics: Optional Python `bool` indicating if solve should be
      based on Cholesky or QR decomposition.
      Default value: `True` (i.e., "prefer speed via Cholesky decomposition").
    l2_regularization_penalty_factor: Optional (batch of) vector-shaped
      `Tensor`, representing a separate penalty factor to apply to each model
      coefficient, length equal to columns in `model_matrix`. Each penalty
      factor multiplies l2_regularizer to allow differential regularization. Can
      be 0 for some coefficients, which implies no regularization. Default is 1
      for all coefficients.
      `loss(w) = sum{-log p(y[i]|x[i],w) : i=1..n} + l2_regularizer ||w *
        l2_regularization_penalty_factor||_2^2`
    name: Python `str` used as name prefix to ops created by this function.
      Default value: `"fit_one_step"`.

  Returns:
    model_coefficients: (Batch of) vector-shaped `Tensor`; represents the
      next estimate of the model coefficients, one for each column in
      `model_matrix`.
    predicted_linear_response: `response`-shaped `Tensor` representing linear
      predictions based on new `model_coefficients`, i.e.,
      `tf.linalg.matvec(model_matrix, model_coefficients_next) + offset`.
  """
  with tf.name_scope(name or 'fit_one_step'):

    [
        model_matrix,
        response,
        model_coefficients_start,
        predicted_linear_response_start,
        offset,
    ] = prepare_args(
        model_matrix,
        response,
        model_coefficients_start,
        predicted_linear_response_start,
        offset)

    # Compute: mean, grad(mean, predicted_linear_response_start), and variance.
    mean, variance, grad_mean = model(predicted_linear_response_start)

    # If either `grad_mean` or `variance is non-finite or zero, then we'll
    # replace it with a value such that the row is zeroed out. Although this
    # procedure may seem circuitous, it is necessary to ensure this algorithm is
    # itself differentiable.
    is_valid = (
        tf.math.is_finite(grad_mean) & tf.not_equal(grad_mean, 0.)
        & tf.math.is_finite(variance) & (variance > 0.))

    def mask_if_invalid(x, mask):
      return tf.where(
          is_valid, x, np.array(mask, dtype_util.as_numpy_dtype(x.dtype)))

    # Run one step of iteratively reweighted least-squares.
    # Compute "`z`", the adjusted predicted linear response.
    # z = predicted_linear_response_start
    #     + learning_rate * (response - mean) / grad_mean
    z = (response - mean) / mask_if_invalid(grad_mean, 1.)
    # TODO(jvdillon): Rather than use learning rate, we should consider using
    # backtracking line search.
    if learning_rate is not None:
      z *= learning_rate[..., tf.newaxis]
    z += predicted_linear_response_start
    if offset is not None:
      z -= offset

    # Compute "`w`", the per-sample weight.
    if dispersion is not None:
      # For convenience, we'll now scale the variance by the dispersion factor.
      variance *= dispersion
    w = (
        mask_if_invalid(grad_mean, 0.) *
        tf.math.rsqrt(mask_if_invalid(variance, np.inf)))

    a = model_matrix * w[..., tf.newaxis]
    b = z * w
    # Solve `min{ || A @ model_coefficients - b ||_2**2 : model_coefficients }`
    # where `@` denotes `matmul`.

    if l2_regularizer is None:
      l2_regularizer = np.array(0, dtype_util.as_numpy_dtype(a.dtype))
    else:
      l2_regularizer_ = distribution_util.maybe_get_static_value(
          l2_regularizer, dtype_util.as_numpy_dtype(a.dtype))
      if l2_regularizer_ is not None:
        l2_regularizer = l2_regularizer_

    def _embed_l2_regularization():
      """Adds synthetic observations to implement L2 regularization."""
      # `tf.matrix_solve_ls` does not respect the `l2_regularization` argument
      # when `fast_unsafe_numerics` is `False`. This function  adds synthetic
      # observations to the data to implement the regularization instead.
      # Adding observations `sqrt(l2_regularizer) * I` is mathematically
      # equivalent to adding the term
      # `-l2_regularizer ||coefficients||_2**2` to the log-likelihood.
      num_model_coefficients = num_cols(model_matrix)
      batch_shape = tf.shape(model_matrix)[:-2]
      if l2_regularization_penalty_factor is None:
        eye = tf.eye(
            num_model_coefficients, batch_shape=batch_shape, dtype=a.dtype)
      else:
        eye = tf.linalg.tensor_diag(
            tf.cast(l2_regularization_penalty_factor, dtype=a.dtype))
        broadcasted_shape = prefer_static.concat(
            [batch_shape, [num_model_coefficients, num_model_coefficients]],
            axis=0)
        eye = tf.broadcast_to(eye, broadcasted_shape)
      a_ = tf.concat([a, tf.sqrt(l2_regularizer) * eye], axis=-2)
      b_ = distribution_util.pad(
          b, count=num_model_coefficients, axis=-1, back=True)
      # Return l2_regularizer=0 since its now embedded.
      l2_regularizer_ = np.array(0, dtype_util.as_numpy_dtype(a.dtype))
      return a_, b_, l2_regularizer_

    a, b, l2_regularizer = prefer_static.cond(
        prefer_static.reduce_all([
            prefer_static.logical_or(
                not(fast_unsafe_numerics),
                l2_regularization_penalty_factor is not None),
            l2_regularizer > 0.
        ]),
        _embed_l2_regularization,
        lambda: (a, b, l2_regularizer))

    model_coefficients_next = tf.linalg.lstsq(
        a,
        b[..., tf.newaxis],
        fast=fast_unsafe_numerics,
        l2_regularizer=l2_regularizer,
        name='model_coefficients_next')
    model_coefficients_next = model_coefficients_next[..., 0]

    # TODO(b/79122261): The approach used in `matrix_solve_ls` could be made
    # faster by avoiding explicitly forming Q and instead keeping the
    # factorization in 'implicit' form with stacked (rescaled) Householder
    # vectors underneath the 'R' and then applying the (accumulated)
    # reflectors in the appropriate order to apply Q'. However, we don't
    # presently do this because we lack core TF functionality. For reference,
    # the vanilla QR approach is:
    #   q, r = tf.linalg.qr(a)
    #   c = tf.matmul(q, b, adjoint_a=True)
    #   model_coefficients_next = tf.matrix_triangular_solve(
    #       r, c, lower=False, name='model_coefficients_next')

    predicted_linear_response_next = compute_predicted_linear_response(
        model_matrix,
        model_coefficients_next,
        offset,
        name='predicted_linear_response_next')

    return model_coefficients_next, predicted_linear_response_next
Exemplo n.º 10
0
 def test_assert_all_nan_input_numpy_rand(self):
     a = np.random.rand(10, 10,
                        10).astype(dtype_util.as_numpy_dtype(self.dtype))
     with self.assertRaisesRegexp(AssertionError, 'Arrays are not equal'):
         self.assertAllNan(a)
Exemplo n.º 11
0
def _ones_like(input, dtype=None, name=None):  # pylint: disable=redefined-builtin
  s = _shape(input)
  s_ = tf.get_static_value(s)
  if s_ is not None:
    return np.ones(s_, dtype_util.as_numpy_dtype(dtype or input.dtype))
  return tf.ones(s, dtype or s.dtype, name)
Exemplo n.º 12
0
def auto_correlation(x,
                     axis=-1,
                     max_lags=None,
                     center=True,
                     normalize=True,
                     name='auto_correlation'):
    """Auto correlation along one axis.

  Given a `1-D` wide sense stationary (WSS) sequence `X`, the auto correlation
  `RXX` may be defined as  (with `E` expectation and `Conj` complex conjugate)

  ```
  RXX[m] := E{ W[m] Conj(W[0]) } = E{ W[0] Conj(W[-m]) },
  W[n]   := (X[n] - MU) / S,
  MU     := E{ X[0] },
  S**2   := E{ (X[0] - MU) Conj(X[0] - MU) }.
  ```

  This function takes the viewpoint that `x` is (along one axis) a finite
  sub-sequence of a realization of (WSS) `X`, and then uses `x` to produce an
  estimate of `RXX[m]` as follows:

  After extending `x` from length `L` to `inf` by zero padding, the auto
  correlation estimate `rxx[m]` is computed for `m = 0, 1, ..., max_lags` as

  ```
  rxx[m] := (L - m)**-1 sum_n w[n + m] Conj(w[n]),
  w[n]   := (x[n] - mu) / s,
  mu     := L**-1 sum_n x[n],
  s**2   := L**-1 sum_n (x[n] - mu) Conj(x[n] - mu)
  ```

  The error in this estimate is proportional to `1 / sqrt(len(x) - m)`, so users
  often set `max_lags` small enough so that the entire output is meaningful.

  Note that since `mu` is an imperfect estimate of `E{ X[0] }`, and we divide by
  `len(x) - m` rather than `len(x) - m - 1`, our estimate of auto correlation
  contains a slight bias, which goes to zero as `len(x) - m --> infinity`.

  Args:
    x:  `float32` or `complex64` `Tensor`.
    axis:  Python `int`. The axis number along which to compute correlation.
      Other dimensions index different batch members.
    max_lags:  Positive `int` tensor.  The maximum value of `m` to consider (in
      equation above).  If `max_lags >= x.shape[axis]`, we effectively re-set
      `max_lags` to `x.shape[axis] - 1`.
    center:  Python `bool`.  If `False`, do not subtract the mean estimate `mu`
      from `x[n]` when forming `w[n]`.
    normalize:  Python `bool`.  If `False`, do not divide by the variance
      estimate `s**2` when forming `w[n]`.
    name:  `String` name to prepend to created ops.

  Returns:
    `rxx`: `Tensor` of same `dtype` as `x`.  `rxx.shape[i] = x.shape[i]` for
      `i != axis`, and `rxx.shape[axis] = max_lags + 1`.

  Raises:
    TypeError:  If `x` is not a supported type.
  """
    # Implementation details:
    # Extend length N / 2 1-D array x to length N by zero padding onto the end.
    # Then, set
    #   F[x]_k := sum_n x_n exp{-i 2 pi k n / N }.
    # It is not hard to see that
    #   F[x]_k Conj(F[x]_k) = F[R]_k, where
    #   R_m := sum_n x_n Conj(x_{(n - m) mod N}).
    # One can also check that R_m / (N / 2 - m) is an unbiased estimate of RXX[m].

    # Since F[x] is the DFT of x, this leads us to a zero-padding and FFT/IFFT
    # based version of estimating RXX.
    # Note that this is a special case of the Wiener-Khinchin Theorem.
    with tf.name_scope(name):
        x = tf.convert_to_tensor(x, name='x')

        # Rotate dimensions of x in order to put axis at the rightmost dim.
        # FFT op requires this.
        rank = ps.rank(x)
        if axis < 0:
            axis = rank + axis
        shift = rank - 1 - axis
        # Suppose x.shape[axis] = T, so there are T 'time' steps.
        #   ==> x_rotated.shape = B + [T],
        # where B is x_rotated's batch shape.
        x_rotated = distribution_util.rotate_transpose(x, shift)

        if center:
            x_rotated = x_rotated - tf.reduce_mean(
                x_rotated, axis=-1, keepdims=True)

        # x_len = N / 2 from above explanation.  The length of x along axis.
        # Get a value for x_len that works in all cases.
        x_len = ps.shape(x_rotated)[-1]

        # TODO(langmore) Investigate whether this zero padding helps or hurts.  At
        # the moment is necessary so that all FFT implementations work.
        # Zero pad to the next power of 2 greater than 2 * x_len, which equals
        # 2**(ceil(Log_2(2 * x_len))).  Note: Log_2(X) = Log_e(X) / Log_e(2).
        x_len_float64 = ps.cast(x_len, np.float64)
        target_length = ps.pow(np.float64(2.),
                               ps.ceil(ps.log(x_len_float64 * 2) / np.log(2.)))
        pad_length = ps.cast(target_length - x_len_float64, np.int32)

        # We should have:
        # x_rotated_pad.shape = x_rotated.shape[:-1] + [T + pad_length]
        #                     = B + [T + pad_length]
        x_rotated_pad = distribution_util.pad(x_rotated,
                                              axis=-1,
                                              back=True,
                                              count=pad_length)

        dtype = x.dtype
        if not dtype_util.is_complex(dtype):
            if not dtype_util.is_floating(dtype):
                raise TypeError(
                    'Argument x must have either float or complex dtype'
                    ' found: {}'.format(dtype))
            x_rotated_pad = tf.complex(
                x_rotated_pad,
                dtype_util.as_numpy_dtype(dtype_util.real_dtype(dtype))(0.))

        # Autocorrelation is IFFT of power-spectral density (up to some scaling).
        fft_x_rotated_pad = tf.signal.fft(x_rotated_pad)
        spectral_density = fft_x_rotated_pad * tf.math.conj(fft_x_rotated_pad)
        # shifted_product is R[m] from above detailed explanation.
        # It is the inner product sum_n X[n] * Conj(X[n - m]).
        shifted_product = tf.signal.ifft(spectral_density)

        # Cast back to real-valued if x was real to begin with.
        shifted_product = tf.cast(shifted_product, dtype)

        # Figure out if we can deduce the final static shape, and set max_lags.
        # Use x_rotated as a reference, because it has the time dimension in the far
        # right, and was created before we performed all sorts of crazy shape
        # manipulations.
        know_static_shape = True
        if not tensorshape_util.is_fully_defined(x_rotated.shape):
            know_static_shape = False
        if max_lags is None:
            max_lags = x_len - 1
        else:
            max_lags = tf.convert_to_tensor(max_lags, name='max_lags')
            max_lags_ = tf.get_static_value(max_lags)
            if max_lags_ is None or not know_static_shape:
                know_static_shape = False
                max_lags = tf.minimum(x_len - 1, max_lags)
            else:
                max_lags = min(x_len - 1, max_lags_)

        # Chop off the padding.
        # We allow users to provide a huge max_lags, but cut it off here.
        # shifted_product_chopped.shape = x_rotated.shape[:-1] + [max_lags]
        shifted_product_chopped = shifted_product[..., :max_lags + 1]

        # If possible, set shape.
        if know_static_shape:
            chopped_shape = tensorshape_util.as_list(x_rotated.shape)
            chopped_shape[-1] = min(x_len, max_lags + 1)
            tensorshape_util.set_shape(shifted_product_chopped, chopped_shape)

        # Recall R[m] is a sum of N / 2 - m nonzero terms x[n] Conj(x[n - m]).  The
        # other terms were zeros arising only due to zero padding.
        # `denominator = (N / 2 - m)` (defined below) is the proper term to
        # divide by to make this an unbiased estimate of the expectation
        # E[X[n] Conj(X[n - m])].
        x_len = ps.cast(x_len, dtype_util.real_dtype(dtype))
        max_lags = ps.cast(max_lags, dtype_util.real_dtype(dtype))
        denominator = x_len - ps.range(0., max_lags + 1.)
        denominator = ps.cast(denominator, dtype)
        shifted_product_rotated = shifted_product_chopped / denominator

        if normalize:
            shifted_product_rotated /= shifted_product_rotated[..., :1]

        # Transpose dimensions back to those of x.
        return distribution_util.rotate_transpose(shifted_product_rotated,
                                                  -shift)
Exemplo n.º 13
0
  def _log_cdf(self, x):
    # The CDF is (p**x * (1 - p)**(1 - x) + p - 1) / (2 * p - 1).
    # We do this computation in logit space to be more numerically stable.
    # p**x * (1- p)**(1 - x) becomes
    # 1 / (1 + exp(-logits))**x *
    # exp(-logits * (1 - x)) / (1 + exp(-logits)) ** (1 - x) =
    # exp(-logits * (1 - x)) / (1 + exp(-logits))
    # p - 1 becomes -exp(-logits) / (1 + exp(-logits))
    # Thus the whole numerator is
    # (exp(-logits * (1 - x)) - exp(-logits)) / (1 + exp(-logits))
    # The denominator is (1 - exp(-logits)) / (1 + exp(-logits))
    # Putting it all together, this gives:
    # (exp(-logits * (1 - x)) - exp(-logits)) / (1 - exp(-logits)) =
    # (exp(logits * x) - 1) / (exp(logits) - 1)
    logits = self._logits_parameter_no_checks()

    # For logits < 0, we can directly use the expression.
    safe_logits = tf.where(logits < 0., logits, -1.)
    result_negative_logits = (
        tfp_math.log1mexp(
            tf.math.multiply_no_nan(safe_logits, x)) -
        tfp_math.log1mexp(safe_logits))
    # For logits > 0, to avoid infs with large arguments we rewrite the
    # expression. Let z = log(exp(logits) - 1)
    # log_cdf = log((exp(logits * x) - 1) / (exp(logits) - 1))
    #         = log(exp(logits * x) - 1) - log(exp(logits) - 1)
    #         = log(exp(logits * x) - 1) - log(exp(z))
    #         = log(exp(logits * x - z) - exp(-z))
    # Because logits > 0, logits * x - z > -z, so we can pull it out to get
    #         = log(exp(logits * x - z) * (1 - exp(-logits * x)))
    #         = logits * x - z + tf.math.log(1 - exp(-logits * x))
    dtype = dtype_util.as_numpy_dtype(x.dtype)
    eps = np.finfo(dtype).eps
    # log(exp(logits) - 1)
    safe_logits = tf.where(logits > 0., logits, 1.)
    z = tf.where(
        safe_logits > -np.log(eps),
        safe_logits, tf.math.log(tf.math.expm1(safe_logits)))
    result_positive_logits = tf.math.multiply_no_nan(
        safe_logits, x) - z + tfp_math.log1mexp(
            -tf.math.multiply_no_nan(safe_logits, x))

    result = tf.where(
        logits < 0., result_negative_logits, result_positive_logits)

    # Finally, handle the case where `logits` and `p` are on the boundary,
    # as the above expressions can result in ratio of `infs` in that case as
    # well.
    result = tf.where(
        tf.math.equal(logits, np.inf), dtype(-np.inf), result)
    result = tf.where(
        (tf.math.equal(logits, -np.inf) & tf.math.not_equal(x, 0.)) | (
            tf.math.equal(logits, np.inf) & tf.math.equal(x, 1.)),
        tf.zeros_like(logits), result)

    result = tf.where(
        x < 0.,
        dtype(-np.inf),
        tf.where(x > 1., tf.zeros_like(x), result))

    return result
Exemplo n.º 14
0
 def _stddev(self):
   if self.allow_nan_stats:
     return tf.fill(self.batch_shape_tensor(),
                    dtype_util.as_numpy_dtype(self.dtype)(np.nan))
   else:
     raise ValueError('`stddev` is undefined for Cauchy distribution.')
Exemplo n.º 15
0
  def _sample_n(self, n, seed=None):
    power = tf.convert_to_tensor(self.power)
    shape = tf.concat([[n], tf.shape(power)], axis=0)

    has_seed = seed is not None
    seed = SeedStream(seed, salt='zipf')

    minval_u = self._hat_integral(0.5, power=power) + 1.
    maxval_u = self._hat_integral(tf.int64.max - 0.5, power=power)

    def loop_body(should_continue, k):
      """Resample the non-accepted points."""
      # The range of U is chosen so that the resulting sample K lies in
      # [0, tf.int64.max). The final sample, if accepted, is K + 1.
      u = tf.random.uniform(
          shape,
          minval=minval_u,
          maxval=maxval_u,
          dtype=power.dtype,
          seed=seed())

      # Sample the point X from the continuous density h(x) \propto x^(-power).
      x = self._hat_integral_inverse(u, power=power)

      # Rejection-inversion requires a `hat` function, h(x) such that
      # \int_{k - .5}^{k + .5} h(x) dx >= pmf(k + 1) for points k in the
      # support. A natural hat function for us is h(x) = x^(-power).
      #
      # After sampling X from h(x), suppose it lies in the interval
      # (K - .5, K + .5) for integer K. Then the corresponding K is accepted if
      # if lies to the left of x_K, where x_K is defined by:
      #   \int_{x_k}^{K + .5} h(x) dx = H(x_K) - H(K + .5) = pmf(K + 1),
      # where H(x) = \int_x^inf h(x) dx.

      # Solving for x_K, we find that x_K = H_inverse(H(K + .5) + pmf(K + 1)).
      # Or, the acceptance condition is X <= H_inverse(H(K + .5) + pmf(K + 1)).
      # Since X = H_inverse(U), this simplifies to U <= H(K + .5) + pmf(K + 1).

      # Update the non-accepted points.
      # Since X \in (K - .5, K + .5), the sample K is chosen as floor(X + 0.5).
      k = tf.where(should_continue, tf.floor(x + 0.5), k)
      accept = (u <= self._hat_integral(k + .5, power=power) + tf.exp(
          self._log_prob(k + 1, power=power)))

      return [should_continue & (~accept), k]

    should_continue, samples = tf.while_loop(
        cond=lambda should_continue, *ignore: tf.reduce_any(should_continue),
        body=loop_body,
        loop_vars=[
            tf.ones(shape, dtype=tf.bool),  # should_continue
            tf.zeros(shape, dtype=power.dtype),  # k
        ],
        parallel_iterations=1 if has_seed else 10,
        maximum_iterations=self.sample_maximum_iterations,
    )
    samples = samples + 1.

    if self.validate_args and dtype_util.is_integer(self.dtype):
      samples = distribution_util.embed_check_integer_casting_closed(
          samples, target_dtype=self.dtype, assert_positive=True)

    samples = tf.cast(samples, self.dtype)

    if self.validate_args:
      npdt = dtype_util.as_numpy_dtype(self.dtype)
      v = npdt(dtype_util.min(npdt) if dtype_util.is_integer(npdt) else np.nan)
      samples = tf.where(should_continue, v, samples)

    return samples
Exemplo n.º 16
0
def _log_loosum_exp_impl(logx, axis, keepdims, compute_mean):
    """Implementation for `*loosum*` functions."""
    with tf.name_scope('log_loosum_exp_impl'):
        logx = tf.convert_to_tensor(logx, name='logx')
        dtype = dtype_util.as_numpy_dtype(logx.dtype)

        if axis is not None:
            x = np.array(axis)
            axis = (tf.convert_to_tensor(
                axis, name='axis', dtype_hint=tf.int32)
                    if x.dtype is np.object else x.astype(np.int32))

        log_sum_x = tf.reduce_logsumexp(logx, axis=axis, keepdims=True)

        # Later we'll want to compute the mean from a sum so we calculate the number
        # of reduced elements, n.
        n = prefer_static.size(logx) // prefer_static.size(log_sum_x)
        n = prefer_static.cast(n, dtype)

        # log_loosum_x[i] =
        # = logsumexp(logx[j] : j != i)
        # = log( exp(logsumexp(logx)) - exp(logx[i]) )
        # = log( exp(logsumexp(logx - logx[i])) exp(logx[i])  - exp(logx[i]))
        # = logx[i] + log(exp(logsumexp(logx - logx[i])) - 1)
        # = logx[i] + log(exp(logsumexp(logx) - logx[i]) - 1)
        # = logx[i] + softplus_inverse(logsumexp(logx) - logx[i])
        d = log_sum_x - logx
        # We use `d != 0` rather than `d > 0.` because `d < 0.` should never happen;
        # if it does we want to complain loudly (which `softplus_inverse` will).
        d_ok = tf.not_equal(d, 0.)
        safe_d = tf.where(d_ok, d, 1.)
        d_ok_result = logx + softplus_inverse(safe_d)

        neg_inf = tf.constant(-np.inf, dtype=dtype)

        # When not(d_ok) and is_positive_and_largest then we manually compute the
        # log_loosum_x. (We can efficiently do this for any one point but not all,
        # hence we still need the above calculation.) This is good because when
        # this condition is met, we cannot use the above calculation; its -inf.
        # We now compute the log-leave-out-max-sum, replicate it to every
        # point and make sure to select it only when we need to.
        max_logx = tf.reduce_max(logx, axis=axis, keepdims=True)
        is_positive_and_largest = (logx > 0.) & tf.equal(logx, max_logx)
        log_lomsum_x = tf.reduce_logsumexp(tf.where(is_positive_and_largest,
                                                    neg_inf, logx),
                                           axis=axis,
                                           keepdims=True)
        d_not_ok_result = tf.where(is_positive_and_largest, log_lomsum_x,
                                   neg_inf)

        log_loosum_x = tf.where(d_ok, d_ok_result, d_not_ok_result)

        # We now squeeze log_sum_x so as if we used `keepdims=False`.
        # TODO(b/136176077): These mental gymnastics could all be replaced with
        # `tf.squeeze(log_sum_x, axis)` if tf.squeeze supported Tensor valued `axis`
        # arguments.
        if not keepdims:
            if axis is None:
                keepdims = np.array([], dtype=np.int32)
            else:
                rank = prefer_static.rank(logx)
                keepdims = prefer_static.setdiff1d(
                    prefer_static.range(rank),
                    prefer_static.non_negative_axis(axis, rank))
            squeeze_shape = tf.gather(prefer_static.shape(logx),
                                      indices=keepdims)
            log_sum_x = tf.reshape(log_sum_x, shape=squeeze_shape)
            if prefer_static.is_numpy(keepdims):
                tensorshape_util.set_shape(log_sum_x,
                                           np.array(logx.shape)[keepdims])

        # Set static shapes just in case we lost them.
        tensorshape_util.set_shape(n, [])
        tensorshape_util.set_shape(log_loosum_x, logx.shape)

        if not compute_mean:
            return log_loosum_x, log_sum_x, n

        log_nm1 = prefer_static.log(max(1., n - 1.))
        log_n = prefer_static.log(n)
        return log_loosum_x - log_nm1, log_sum_x - log_n, n
Exemplo n.º 17
0
 def _assertions(self, t):
   if not self.validate_args:
     return []
   return [assert_util.assert_none_equal(
       t, dtype_util.as_numpy_dtype(t.dtype)(0.),
       message="All elements must be non-zero.")]
Exemplo n.º 18
0
 def _variance(self):
     if self.allow_nan_stats:
         return tf.fill(self.batch_shape_tensor(),
                        dtype_util.as_numpy_dtype(self.dtype)(np.nan))
     raise ValueError(
         '`variance` is undefined for the half-Cauchy distribution.')
Exemplo n.º 19
0
def quadrature_scheme_softmaxnormal_gauss_hermite(normal_loc,
                                                  normal_scale,
                                                  quadrature_size,
                                                  validate_args=False,
                                                  name=None):
    """Use Gauss-Hermite quadrature to form quadrature on `K - 1` simplex.

  A `SoftmaxNormal` random variable `Y` may be generated via

  ```
  Y = SoftmaxCentered(X),
  X = Normal(normal_loc, normal_scale)
  ```

  Note: for a given `quadrature_size`, this method is generally less accurate
  than `quadrature_scheme_softmaxnormal_quantiles`.

  Args:
    normal_loc: `float`-like `Tensor` with shape `[b1, ..., bB, K-1]`, B>=0.
      The location parameter of the Normal used to construct the SoftmaxNormal.
    normal_scale: `float`-like `Tensor`. Broadcastable with `normal_loc`.
      The scale parameter of the Normal used to construct the SoftmaxNormal.
    quadrature_size: Python `int` scalar representing the number of quadrature
      points.
    validate_args: Python `bool`, default `False`. When `True` distribution
      parameters are checked for validity despite possibly degrading runtime
      performance. When `False` invalid inputs may silently render incorrect
      outputs.
    name: Python `str` name prefixed to Ops created by this class.

  Returns:
    grid: Shape `[b1, ..., bB, K, quadrature_size]` `Tensor` representing the
      convex combination of affine parameters for `K` components.
      `grid[..., :, n]` is the `n`-th grid point, living in the `K - 1` simplex.
    probs:  Shape `[b1, ..., bB, K, quadrature_size]` `Tensor` representing the
      associated with each grid point.
  """
    with tf.name_scope(name
                       or "quadrature_scheme_softmaxnormal_gauss_hermite"):
        normal_loc = tf.convert_to_tensor(value=normal_loc, name="normal_loc")
        npdt = dtype_util.as_numpy_dtype(normal_loc.dtype)
        normal_scale = tf.convert_to_tensor(value=normal_scale,
                                            dtype=npdt,
                                            name="normal_scale")

        normal_scale = maybe_check_quadrature_param(normal_scale,
                                                    "normal_scale",
                                                    validate_args)

        grid, probs = np.polynomial.hermite.hermgauss(deg=quadrature_size)
        grid = grid.astype(npdt)
        probs = probs.astype(npdt)
        probs /= np.linalg.norm(probs, ord=1, keepdims=True)
        probs = tf.convert_to_tensor(value=probs, name="probs", dtype=npdt)

        grid = softmax(-distribution_util.pad(
            (normal_loc[..., tf.newaxis] +
             np.sqrt(2.) * normal_scale[..., tf.newaxis] * grid),
            axis=-2,
            front=True),
                       axis=-2)  # shape: [B, components, deg]

        return grid, probs
Exemplo n.º 20
0
def _get_cdf_pdf(c):
    dtype = dtype_util.as_numpy_dtype(c.dtype)
    d = normal_lib.Normal(dtype(0), 1)
    return d.cdf, d.prob
Exemplo n.º 21
0
def pinv(a, rcond=None, validate_args=False, name=None):
    """Compute the Moore-Penrose pseudo-inverse of a matrix.

  Calculate the [generalized inverse of a matrix](
  https://en.wikipedia.org/wiki/Moore%E2%80%93Penrose_inverse) using its
  singular-value decomposition (SVD) and including all large singular values.

  The pseudo-inverse of a matrix `A`, is defined as: 'the matrix that 'solves'
  [the least-squares problem] `A @ x = b`,' i.e., if `x_hat` is a solution, then
  `A_pinv` is the matrix such that `x_hat = A_pinv @ b`. It can be shown that if
  `U @ Sigma @ V.T = A` is the singular value decomposition of `A`, then
  `A_pinv = V @ inv(Sigma) U^T`. [(Strang, 1980)][1]

  This function is analogous to [`numpy.linalg.pinv`](
  https://docs.scipy.org/doc/numpy/reference/generated/numpy.linalg.pinv.html).
  It differs only in default value of `rcond`. In `numpy.linalg.pinv`, the
  default `rcond` is `1e-15`. Here the default is
  `10. * max(num_rows, num_cols) * np.finfo(dtype).eps`.

  Args:
    a: (Batch of) `float`-like matrix-shaped `Tensor`(s) which are to be
      pseudo-inverted.
    rcond: `Tensor` of small singular value cutoffs.  Singular values smaller
      (in modulus) than `rcond` * largest_singular_value (again, in modulus) are
      set to zero. Must broadcast against `tf.shape(a)[:-2]`.
      Default value: `10. * max(num_rows, num_cols) * np.finfo(a.dtype).eps`.
    validate_args: When `True`, additional assertions might be embedded in the
      graph.
      Default value: `False` (i.e., no graph assertions are added).
    name: Python `str` prefixed to ops created by this function.
      Default value: 'pinv'.

  Returns:
    a_pinv: The pseudo-inverse of input `a`. Has same shape as `a` except
      rightmost two dimensions are transposed.

  Raises:
    TypeError: if input `a` does not have `float`-like `dtype`.
    ValueError: if input `a` has fewer than 2 dimensions.

  #### Examples

  ```python
  import tensorflow as tf
  import tensorflow_probability as tfp

  a = tf.constant([[1.,  0.4,  0.5],
                   [0.4, 0.2,  0.25],
                   [0.5, 0.25, 0.35]])
  tf.matmul(tfp.math.pinv(a), a)
  # ==> array([[1., 0., 0.],
               [0., 1., 0.],
               [0., 0., 1.]], dtype=float32)

  a = tf.constant([[1.,  0.4,  0.5,  1.],
                   [0.4, 0.2,  0.25, 2.],
                   [0.5, 0.25, 0.35, 3.]])
  tf.matmul(tfp.math.pinv(a), a)
  # ==> array([[ 0.76,  0.37,  0.21, -0.02],
               [ 0.37,  0.43, -0.33,  0.02],
               [ 0.21, -0.33,  0.81,  0.01],
               [-0.02,  0.02,  0.01,  1.  ]], dtype=float32)
  ```

  #### References

  [1]: G. Strang. 'Linear Algebra and Its Applications, 2nd Ed.' Academic Press,
       Inc., 1980, pp. 139-142.
  """
    with tf.name_scope(name or 'pinv'):
        a = tf.convert_to_tensor(a, name='a')

        assertions = _maybe_validate_matrix(a, validate_args)
        if assertions:
            with tf.control_dependencies(assertions):
                a = tf.identity(a)

        dtype = dtype_util.as_numpy_dtype(a.dtype)

        if rcond is None:

            def get_dim_size(dim):
                if tf.compat.dimension_value(a.shape[dim]) is not None:
                    return tf.compat.dimension_value(a.shape[dim])
                return tf.shape(a)[dim]

            num_rows = get_dim_size(-2)
            num_cols = get_dim_size(-1)
            if isinstance(num_rows, int) and isinstance(num_cols, int):
                max_rows_cols = float(max(num_rows, num_cols))
            else:
                max_rows_cols = tf.cast(tf.maximum(num_rows, num_cols), dtype)
            rcond = 10. * max_rows_cols * np.finfo(dtype).eps

        rcond = tf.convert_to_tensor(rcond, dtype=dtype, name='rcond')

        # Calculate pseudo inverse via SVD.
        # Note: if a is symmetric then u == v. (We might observe additional
        # performance by explicitly setting `v = u` in such cases.)
        [
            singular_values,  # Sigma
            left_singular_vectors,  # U
            right_singular_vectors,  # V
        ] = tf.linalg.svd(a, full_matrices=False, compute_uv=True)

        # Saturate small singular values to inf. This has the effect of make
        # `1. / s = 0.` while not resulting in `NaN` gradients.
        cutoff = rcond * tf.reduce_max(singular_values, axis=-1)
        singular_values = tf.where(singular_values > cutoff[..., tf.newaxis],
                                   singular_values, np.array(np.inf, dtype))

        # Although `a == tf.matmul(u, s * v, transpose_b=True)` we swap
        # `u` and `v` here so that `tf.matmul(pinv(A), A) = tf.eye()`, i.e.,
        # a matrix inverse has 'transposed' semantics.
        a_pinv = tf.matmul(right_singular_vectors /
                           singular_values[..., tf.newaxis, :],
                           left_singular_vectors,
                           adjoint_b=True)

        if tensorshape_util.rank(a.shape) is not None:
            a_pinv.set_shape(a.shape[:-2].concatenate(
                [a.shape[-1], a.shape[-2]]))

        return a_pinv
Exemplo n.º 22
0
    def _sample_n(self, n, seed=None):
        if self._use_static_graph:
            # This sampling approach is almost the same as the approach used by
            # `MixtureSameFamily`. The differences are due to having a list of
            # `Distribution` objects rather than a single object, and maintaining
            # random seed management that is consistent with the non-static code
            # path.
            samples = []
            cat_samples = self.cat.sample(n, seed=seed)
            stream = SeedStream(seed, salt='Mixture')

            for c in range(self.num_components):
                samples.append(self.components[c].sample(n, seed=stream()))
            stack_axis = -1 - tensorshape_util.rank(self._static_event_shape)
            x = tf.stack(samples, axis=stack_axis)  # [n, B, k, E]
            npdt = dtype_util.as_numpy_dtype(x.dtype)
            mask = tf.one_hot(
                indices=cat_samples,  # [n, B]
                depth=self._num_components,  # == k
                on_value=npdt(1),
                off_value=npdt(0))  # [n, B, k]
            mask = distribution_util.pad_mixture_dimensions(
                mask, self, self._cat,
                tensorshape_util.rank(
                    self._static_event_shape))  # [n, B, k, [1]*e]
            return tf.reduce_sum(x * mask, axis=stack_axis)  # [n, B, E]

        n = tf.convert_to_tensor(n, name='n')
        static_n = tf.get_static_value(n)
        n = int(static_n) if static_n is not None else n
        cat_samples = self.cat.sample(n, seed=seed)

        static_samples_shape = cat_samples.shape
        if tensorshape_util.is_fully_defined(static_samples_shape):
            samples_shape = tensorshape_util.as_list(static_samples_shape)
            samples_size = tensorshape_util.num_elements(static_samples_shape)
        else:
            samples_shape = tf.shape(cat_samples)
            samples_size = tf.size(cat_samples)
        static_batch_shape = self.batch_shape
        if tensorshape_util.is_fully_defined(static_batch_shape):
            batch_shape = tensorshape_util.as_list(static_batch_shape)
            batch_size = tensorshape_util.num_elements(static_batch_shape)
        else:
            batch_shape = tf.shape(cat_samples)[1:]
            batch_size = tf.reduce_prod(batch_shape)
        static_event_shape = self.event_shape
        if tensorshape_util.is_fully_defined(static_event_shape):
            event_shape = np.array(
                tensorshape_util.as_list(static_event_shape), dtype=np.int32)
        else:
            event_shape = None

        # Get indices into the raw cat sampling tensor. We will
        # need these to stitch sample values back out after sampling
        # within the component partitions.
        samples_raw_indices = tf.reshape(tf.range(0, samples_size),
                                         samples_shape)

        # Partition the raw indices so that we can use
        # dynamic_stitch later to reconstruct the samples from the
        # known partitions.
        partitioned_samples_indices = tf.dynamic_partition(
            data=samples_raw_indices,
            partitions=cat_samples,
            num_partitions=self.num_components)

        # Copy the batch indices n times, as we will need to know
        # these to pull out the appropriate rows within the
        # component partitions.
        batch_raw_indices = tf.reshape(tf.tile(tf.range(0, batch_size), [n]),
                                       samples_shape)

        # Explanation of the dynamic partitioning below:
        #   batch indices are i.e., [0, 1, 0, 1, 0, 1]
        # Suppose partitions are:
        #     [1 1 0 0 1 1]
        # After partitioning, batch indices are cut as:
        #     [batch_indices[x] for x in 2, 3]
        #     [batch_indices[x] for x in 0, 1, 4, 5]
        # i.e.
        #     [1 1] and [0 0 0 0]
        # Now we sample n=2 from part 0 and n=4 from part 1.
        # For part 0 we want samples from batch entries 1, 1 (samples 0, 1),
        # and for part 1 we want samples from batch entries 0, 0, 0, 0
        #   (samples 0, 1, 2, 3).
        partitioned_batch_indices = tf.dynamic_partition(
            data=batch_raw_indices,
            partitions=cat_samples,
            num_partitions=self.num_components)
        samples_class = [None for _ in range(self.num_components)]

        stream = SeedStream(seed, salt='Mixture')

        for c in range(self.num_components):
            n_class = tf.size(partitioned_samples_indices[c])
            samples_class_c = self.components[c].sample(n_class, seed=stream())

            if event_shape is None:
                batch_ndims = prefer_static.rank_from_shape(batch_shape)
                event_shape = tf.shape(samples_class_c)[1 + batch_ndims:]

            # Pull out the correct batch entries from each index.
            # To do this, we may have to flatten the batch shape.

            # For sample s, batch element b of component c, we get the
            # partitioned batch indices from
            # partitioned_batch_indices[c]; and shift each element by
            # the sample index. The final lookup can be thought of as
            # a matrix gather along locations (s, b) in
            # samples_class_c where the n_class rows correspond to
            # samples within this component and the batch_size columns
            # correspond to batch elements within the component.
            #
            # Thus the lookup index is
            #   lookup[c, i] = batch_size * s[i] + b[c, i]
            # for i = 0 ... n_class[c] - 1.
            lookup_partitioned_batch_indices = (
                batch_size * tf.range(n_class) + partitioned_batch_indices[c])
            samples_class_c = tf.reshape(
                samples_class_c,
                tf.concat([[n_class * batch_size], event_shape], 0))
            samples_class_c = tf.gather(samples_class_c,
                                        lookup_partitioned_batch_indices,
                                        name='samples_class_c_gather')
            samples_class[c] = samples_class_c

        # Stitch back together the samples across the components.
        lhs_flat_ret = tf.dynamic_stitch(indices=partitioned_samples_indices,
                                         data=samples_class)
        # Reshape back to proper sample, batch, and event shape.
        ret = tf.reshape(lhs_flat_ret,
                         tf.concat([samples_shape, event_shape], 0))
        tensorshape_util.set_shape(
            ret,
            tensorshape_util.concatenate(static_samples_shape,
                                         self.event_shape))
        return ret
Exemplo n.º 23
0
    def __init__(self,
                 loc,
                 scale,
                 validate_args=False,
                 allow_nan_stats=True,
                 name="Gumbel"):
        """Construct Gumbel distributions with location and scale `loc` and `scale`.

    The parameters `loc` and `scale` must be shaped in a way that supports
    broadcasting (e.g. `loc + scale` is a valid operation).

    Args:
      loc: Floating point tensor, the means of the distribution(s).
      scale: Floating point tensor, the scales of the distribution(s).
        scale must contain only positive values.
      validate_args: Python `bool`, default `False`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may silently render incorrect
        outputs.
        Default value: `False`.
      allow_nan_stats: Python `bool`, default `True`. When `True`,
        statistics (e.g., mean, mode, variance) use the value "`NaN`" to
        indicate the result is undefined. When `False`, an exception is raised
        if one or more of the statistic's batch members are undefined.
        Default value: `True`.
      name: Python `str` name prefixed to Ops created by this class.
        Default value: `'Gumbel'`.

    Raises:
      TypeError: if loc and scale are different dtypes.
    """
        parameters = dict(locals())
        with tf.name_scope(name) as name:
            dtype = dtype_util.common_dtype([loc, scale],
                                            preferred_dtype=tf.float32)
            loc = tf.convert_to_tensor(value=loc, name="loc", dtype=dtype)
            scale = tf.convert_to_tensor(value=scale,
                                         name="scale",
                                         dtype=dtype)
            with tf.control_dependencies(
                [assert_util.assert_positive(scale)] if validate_args else []):
                loc = tf.identity(loc, name="loc")
                scale = tf.identity(scale, name="scale")
                tf.debugging.assert_same_float_dtype([loc, scale])
                self._gumbel_bijector = gumbel_bijector.Gumbel(
                    loc=loc, scale=scale, validate_args=validate_args)

            # Because the uniform sampler generates samples in `[0, 1)` this would
            # cause samples to lie in `(inf, -inf]` instead of `(inf, -inf)`. To fix
            # this, we use `np.finfo(dtype_util.as_numpy_dtype(self.dtype).tiny`
            # because it is the smallest, positive, "normal" number.
            super(Gumbel, self).__init__(
                distribution=uniform.Uniform(low=np.finfo(
                    dtype_util.as_numpy_dtype(dtype)).tiny,
                                             high=tf.ones([], dtype=loc.dtype),
                                             allow_nan_stats=allow_nan_stats),
                # The Gumbel bijector encodes the quantile
                # function as the forward, and hence needs to
                # be inverted.
                bijector=invert_bijector.Invert(self._gumbel_bijector),
                batch_shape=distribution_util.get_broadcast_shape(loc, scale),
                parameters=parameters,
                name=name)
Exemplo n.º 24
0
  def _sample_n(self, n, seed=None):
    dim0_seed, otherdims_seed = samplers.split_seed(seed,
                                                    salt='von_mises_fisher')
    # The sampling strategy relies on the fact that vMF variates are symmetric
    # about the mean direction. Accordingly, if we have a sampling strategy for
    # the away-from-mean angle, then we can uniformly sample the remaining
    # dimensions on the S^{dim-2} sphere for , and rotate these samples from a
    # (1, 0, 0, ..., 0)-mode distribution into the target orientation.
    #
    # This is easy to imagine on the 1-sphere (S^1; in 2-D space): sample a
    # von-Mises distributed `x` value in [-1, 1], then uniformly select what
    # amounts to a "up" or "down" additional degree of freedom after unit
    # normalizing, followed by a final rotation to the desired mean direction
    # from a basis of (1, 0).
    #
    # On S^2 (in 3-D), selecting a vMF `x` identifies a circle in `yz` on the
    # unit sphere over which the distribution is uniform, in particular the
    # circle where x = \hat{x} intersects the unit sphere. We pick a point on
    # that circle, then rotate to the desired mean direction from a basis of
    # (1, 0, 0).
    mean_direction = tf.convert_to_tensor(self.mean_direction)
    concentration = tf.convert_to_tensor(self.concentration)
    event_dim = (
        tf.compat.dimension_value(self.event_shape[0]) or
        self._event_shape_tensor(mean_direction=mean_direction)[0])

    sample_batch_shape = ps.concat([[n], self._batch_shape_tensor(
        mean_direction=mean_direction, concentration=concentration)], axis=0)
    dim = tf.cast(event_dim - 1, self.dtype)
    if event_dim == 3:
      samples_dim0 = self._sample_3d(n,
                                     mean_direction=mean_direction,
                                     concentration=concentration,
                                     seed=dim0_seed)
    else:
      # Wood'94 provides a rejection algorithm to sample the x coordinate.
      # Wood'94 definition of b:
      # b = (-2 * kappa + tf.sqrt(4 * kappa**2 + dim**2)) / dim
      # https://stats.stackexchange.com/questions/156729 suggests:
      b = dim / (2 * concentration +
                 tf.sqrt(4 * concentration**2 + dim**2))
      # TODO(bjp): Integrate any useful numerical tricks from hyperspherical VAE
      #     https://github.com/nicola-decao/s-vae-tf/
      x = (1 - b) / (1 + b)
      c = concentration * x + dim * tf.math.log1p(-x**2)
      beta = beta_lib.Beta(dim / 2, dim / 2)

      def cond_fn(w, should_continue, seed):
        del w, seed
        return tf.reduce_any(should_continue)

      def body_fn(w, should_continue, seed):
        """While loop body for sampling the angle `w`."""
        beta_seed, unif_seed, next_seed = samplers.split_seed(seed, n=3)
        z = beta.sample(sample_shape=sample_batch_shape, seed=beta_seed)
        # set_shape needed here because of b/139013403
        tensorshape_util.set_shape(z, w.shape)
        w = tf.where(should_continue,
                     (1. - (1. + b) * z) / (1. - (1. - b) * z),
                     w)
        if not self.allow_nan_stats:
          w = tf.debugging.check_numerics(w, 'w')
        unif = samplers.uniform(
            sample_batch_shape, seed=unif_seed, dtype=self.dtype)
        # set_shape needed here because of b/139013403
        tensorshape_util.set_shape(unif, w.shape)
        should_continue = should_continue & (
            concentration * w + dim * tf.math.log1p(-x * w) - c <
            # Use log1p(-unif) to prevent log(0) and ensure that log(1) is
            # possible.
            tf.math.log1p(-unif))
        return w, should_continue, next_seed

      w = tf.zeros(sample_batch_shape, dtype=self.dtype)
      should_continue = tf.ones(sample_batch_shape, dtype=tf.bool)
      samples_dim0, _, _ = tf.while_loop(
          cond=cond_fn, body=body_fn,
          loop_vars=(w, should_continue, dim0_seed))
      samples_dim0 = samples_dim0[..., tf.newaxis]
    if not self._allow_nan_stats:
      # Verify samples are w/in -1, 1, with useful error output tensors (top
      # value rather than all values).
      with tf.control_dependencies([
          assert_util.assert_less_equal(
              samples_dim0,
              dtype_util.as_numpy_dtype(self.dtype)(1.01)),
          assert_util.assert_greater_equal(
              samples_dim0,
              dtype_util.as_numpy_dtype(self.dtype)(-1.01)),
      ]):
        samples_dim0 = tf.identity(samples_dim0)
    samples_otherdims_shape = ps.concat([sample_batch_shape, [event_dim - 1]],
                                        axis=0)
    unit_otherdims = tf.math.l2_normalize(
        samplers.normal(
            samples_otherdims_shape, seed=otherdims_seed, dtype=self.dtype),
        axis=-1)
    samples = tf.concat([
        samples_dim0,  # we must avoid sqrt(1 - (>1)**2)
        tf.sqrt(tf.maximum(1 - samples_dim0**2, 0.)) * unit_otherdims
    ], axis=-1)
    samples = tf.math.l2_normalize(samples, axis=-1)
    if not self.allow_nan_stats:
      samples = tf.debugging.check_numerics(samples, 'samples')

    # Runtime assert that samples are unit length.
    if not self.allow_nan_stats:
      worst, _ = tf.math.top_k(
          tf.reshape(tf.abs(1 - tf.linalg.norm(samples, axis=-1)), [-1]))
      with tf.control_dependencies([
          assert_util.assert_near(
              dtype_util.as_numpy_dtype(self.dtype)(0),
              worst,
              atol=1e-4,
              summarize=100)
      ]):
        samples = tf.identity(samples)
    # The samples generated are symmetric around a mode at (1, 0, 0, ...., 0).
    # Now, we move the mode to `self.mean_direction` using a rotation matrix.
    if not self.allow_nan_stats:
      # Assert that the basis vector rotates to the mean direction, as expected.
      basis = tf.cast(tf.concat([[1.], tf.zeros([event_dim - 1])], axis=0),
                      self.dtype)
      with tf.control_dependencies([
          assert_util.assert_less(
              tf.linalg.norm(
                  self._rotate(basis, mean_direction=mean_direction) -
                  mean_direction, axis=-1),
              dtype_util.as_numpy_dtype(self.dtype)(1e-5))
      ]):
        return self._rotate(samples, mean_direction=mean_direction)
    return self._rotate(samples, mean_direction=mean_direction)
Exemplo n.º 25
0
def _numpy_dtype(dtype):
  if dtype is None:
    return None
  return dtype_util.as_numpy_dtype(dtype)
Exemplo n.º 26
0
 def _variance(self):
     if self.allow_nan_stats:
         return tf.fill(self.batch_shape_tensor(),
                        dtype_util.as_numpy_dtype(self.dtype)(np.nan))
     raise ValueError("`variance` is undefined for Horseshoe distribution.")
Exemplo n.º 27
0
 def test_assert_all_nan_input_placeholder_with_default(self):
     all_nan = np.full((10, 10, 10),
                       np.nan).astype(dtype_util.as_numpy_dtype(self.dtype))
     a = tf1.placeholder_with_default(all_nan, shape=all_nan.shape)
     self.assertAllNan(a)
Exemplo n.º 28
0
def _potential_scale_reduction_single_state(state, independent_chain_ndims,
                                            split_chains, validate_args):
    """potential_scale_reduction for one single state `Tensor`."""
    # casting integers to floats for floating-point division
    # check to see if the `state` is a numpy object for the numpy test suite
    if dtype_util.as_numpy_dtype(state.dtype) is np.int64:
        state = tf.cast(state, tf.float64)
    elif dtype_util.is_integer(state.dtype):
        state = tf.cast(state, tf.float32)
    with tf.name_scope('potential_scale_reduction_single_state'):
        # We assume exactly one leading dimension indexes e.g. correlated samples
        # from each Markov chain.
        state = tf.convert_to_tensor(state, name='state')

        n_samples_ = tf.compat.dimension_value(state.shape[0])
        if n_samples_ is not None:  # If available statically.
            if split_chains and n_samples_ < 4:
                raise ValueError(
                    'Must provide at least 4 samples when splitting chains. '
                    'Found {}'.format(n_samples_))
            if not split_chains and n_samples_ < 2:
                raise ValueError(
                    'Must provide at least 2 samples.  Found {}'.format(
                        n_samples_))
        elif validate_args:
            if split_chains:
                assertions = [
                    assert_util.assert_greater(
                        ps.shape(state)[0],
                        4,
                        message=
                        'Must provide at least 4 samples when splitting chains.'
                    )
                ]
                with tf.control_dependencies(assertions):
                    state = tf.identity(state)
            else:
                assertions = [
                    assert_util.assert_greater(
                        ps.shape(state)[0],
                        2,
                        message='Must provide at least 2 samples.')
                ]
                with tf.control_dependencies(assertions):
                    state = tf.identity(state)

        # Define so it's not a magic number.
        # Warning!  `if split_chains` logic assumes this is 1!
        sample_ndims = 1

        if split_chains:
            # Split the sample dimension in half, doubling the number of
            # independent chains.

            # For odd number of samples, keep all but the last sample.
            state_shape = ps.shape(state)
            n_samples = state_shape[0]
            state = state[:n_samples - n_samples % 2]

            # Suppose state = [0, 1, 2, 3, 4, 5]
            # Step 1: reshape into [[0, 1, 2], [3, 4, 5]]
            # E.g. reshape states of shape [a, b] into [2, a//2, b].
            state = tf.reshape(
                state, ps.concat([[2, n_samples // 2], state_shape[1:]],
                                 axis=0))
            # Step 2: Put the size `2` dimension in the right place to be treated as a
            # chain, changing [[0, 1, 2], [3, 4, 5]] into [[0, 3], [1, 4], [2, 5]],
            # reshaping [2, a//2, b] into [a//2, 2, b].
            state = tf.transpose(
                a=state,
                perm=ps.concat([[1, 0], tf.range(2, tf.rank(state))], axis=0))

            # We're treating the new dim as indexing 2 chains, so increment.
            independent_chain_ndims += 1

        sample_axis = tf.range(0, sample_ndims)
        chain_axis = tf.range(sample_ndims,
                              sample_ndims + independent_chain_ndims)
        sample_and_chain_axis = tf.range(
            0, sample_ndims + independent_chain_ndims)

        n = _axis_size(state, sample_axis)
        m = _axis_size(state, chain_axis)

        # In the language of Brooks and Gelman (1998),
        # B / n is the between chain variance, the variance of the chain means.
        # W is the within sequence variance, the mean of the chain variances.
        b_div_n = _reduce_variance(tf.reduce_mean(state,
                                                  axis=sample_axis,
                                                  keepdims=True),
                                   sample_and_chain_axis,
                                   biased=False)
        w = tf.reduce_mean(_reduce_variance(state,
                                            sample_axis,
                                            keepdims=True,
                                            biased=False),
                           axis=sample_and_chain_axis)

        # sigma^2_+ is an estimate of the true variance, which would be unbiased if
        # each chain was drawn from the target.  c.f. "law of total variance."
        sigma_2_plus = ((n - 1) / n) * w + b_div_n
        return ((m + 1.) / m) * sigma_2_plus / w - (n - 1.) / (m * n)
 def mask_if_invalid(x, mask):
   return tf.where(
       is_valid, x, np.array(mask, dtype_util.as_numpy_dtype(x.dtype)))
Exemplo n.º 30
0
  def _forward_log_det_jacobian(self, x):
    # Let Y be a symmetric, positive definite matrix and write:
    #   Y = X X.T
    # where X is lower-triangular.
    #
    # Observe that,
    #   dY[i,j]/dX[a,b]
    #   = d/dX[a,b] { X[i,:] X[j,:] }
    #   = sum_{d=1}^p { I[i=a] I[d=b] X[j,d] + I[j=a] I[d=b] X[i,d] }
    #
    # To compute the Jacobian dX/dY we must represent X,Y as vectors. Since Y is
    # symmetric and X is lower-triangular, we need vectors of dimension:
    #   d = p (p + 1) / 2
    # where X, Y are p x p matrices, p > 0. We use a row-major mapping, i.e.,
    #   k = { i (i + 1) / 2 + j   i>=j
    #       { undef               i<j
    # and assume zero-based indexes. When k is undef, the element is dropped.
    # Example:
    #           j      k
    #        0 1 2 3  /
    #    0 [ 0 . . . ]
    # i  1 [ 1 2 . . ]
    #    2 [ 3 4 5 . ]
    #    3 [ 6 7 8 9 ]
    # Write vec[.] to indicate transforming a matrix to vector via k(i,j). (With
    # slight abuse: k(i,j)=undef means the element is dropped.)
    #
    # We now show d vec[Y] / d vec[X] is lower triangular. Assuming both are
    # defined, observe that k(i,j) < k(a,b) iff (1) i<a or (2) i=a and j<b.
    # In both cases dvec[Y]/dvec[X]@[k(i,j),k(a,b)] = 0 since:
    # (1) j<=i<a thus i,j!=a.
    # (2) i=a>j  thus i,j!=a.
    #
    # Since the Jacobian is lower-triangular, we need only compute the product
    # of diagonal elements:
    #   d vec[Y] / d vec[X] @[k(i,j), k(i,j)]
    #   = X[j,j] + I[i=j] X[i,j]
    #   = 2 X[j,j].
    # Since there is a 2 X[j,j] term for every lower-triangular element of X we
    # conclude:
    #   |Jac(d vec[Y]/d vec[X])| = 2^p prod_{j=0}^{p-1} X[j,j]^{p-j}.
    diag = tf.linalg.diag_part(x)

    # We now ensure diag is columnar. Eg, if `diag = [1, 2, 3]` then the output
    # is `[[1], [2], [3]]` and if `diag = [[1, 2, 3], [4, 5, 6]]` then the
    # output is unchanged.
    diag = self._make_columnar(diag)

    with tf.control_dependencies(self._assertions(x)):
      # Create a vector equal to: [p, p-1, ..., 2, 1].
      if tf.compat.dimension_value(x.shape[-1]) is None:
        p_int = tf.shape(x)[-1]
        p_float = tf.cast(p_int, dtype=x.dtype)
      else:
        p_int = tf.compat.dimension_value(x.shape[-1])
        p_float = dtype_util.as_numpy_dtype(x.dtype)(p_int)
      exponents = tf.linspace(p_float, 1., p_int)

      sum_weighted_log_diag = tf.squeeze(
          tf.matmul(tf.math.log(diag), exponents[..., tf.newaxis]), axis=-1)
      fldj = p_float * np.log(2.) + sum_weighted_log_diag

      # We finally need to undo adding an extra column in non-scalar cases
      # where there is a single matrix as input.
      if tensorshape_util.rank(x.shape) is not None:
        if tensorshape_util.rank(x.shape) == 2:
          fldj = tf.squeeze(fldj, axis=-1)
        return fldj

      shape = tf.shape(fldj)
      maybe_squeeze_shape = tf.concat([
          shape[:-1],
          distribution_util.pick_vector(
              tf.equal(tf.rank(x), 2),
              np.array([], dtype=np.int32), shape[-1:])], 0)
      return tf.reshape(fldj, maybe_squeeze_shape)