Пример #1
0
  def testLogits(self):
    logits = [-42., 42.]
    dist = bernoulli.Bernoulli(logits=logits)
    self.assertAllClose(logits, self.evaluate(dist.logits))

    if not special:
      return

    self.assertAllClose(special.expit(logits), self.evaluate(dist.probs))

    p = [0.01, 0.99, 0.42]
    dist = bernoulli.Bernoulli(probs=p)
    self.assertAllClose(special.logit(p), self.evaluate(dist.logits))
Пример #2
0
 def testEntropyWithBatch(self):
   p = [[0.1, 0.7], [0.2, 0.6]]
   dist = bernoulli.Bernoulli(probs=p, validate_args=False)
   self.assertAllClose(
       self.evaluate(dist.entropy()),
       [[entropy(0.1), entropy(0.7)], [entropy(0.2),
                                       entropy(0.6)]])
Пример #3
0
  def testBernoulliBernoulliKL(self):
    batch_size = 6
    a_p = np.array([0.5] * batch_size, dtype=np.float32)
    b_p = np.array([0.4] * batch_size, dtype=np.float32)

    a = bernoulli.Bernoulli(probs=a_p)
    b = bernoulli.Bernoulli(probs=b_p)

    kl = kullback_leibler.kl_divergence(a, b)
    kl_val = self.evaluate(kl)

    kl_expected = (a_p * np.log(a_p / b_p) + (1. - a_p) * np.log(
        (1. - a_p) / (1. - b_p)))

    self.assertEqual(kl.shape, (batch_size,))
    self.assertAllClose(kl_val, kl_expected)
Пример #4
0
  def testPmfShapes(self):
    probs = lambda p: tf.placeholder_with_default(p, shape=None)
    dist = lambda p: bernoulli.Bernoulli(probs=probs(p))
    self.assertEqual(
        2, len(self.evaluate(dist([[0.5], [0.5]]).log_prob(1)).shape))

    dist = bernoulli.Bernoulli(probs=0.5)
    self.assertEqual(2, len(self.evaluate(dist.log_prob([[1], [1]])).shape))

    dist = bernoulli.Bernoulli(probs=0.5)
    self.assertEqual((), dist.log_prob(1).shape)
    self.assertEqual((1), dist.log_prob([1]).shape)
    self.assertEqual((2, 1), dist.log_prob([[1], [1]]).shape)

    dist = bernoulli.Bernoulli(probs=[[0.5], [0.5]])
    self.assertEqual((2, 1), dist.log_prob(1).shape)
Пример #5
0
 def testPmfInvalid(self):
   p = [0.1, 0.2, 0.7]
   dist = bernoulli.Bernoulli(probs=p, validate_args=True)
   with self.assertRaisesOpError("must be non-negative."):
     self.evaluate(dist.prob([1, 1, -1]))
   with self.assertRaisesOpError("Elements cannot exceed 1."):
     self.evaluate(dist.prob([2, 0, 1]))
Пример #6
0
  def testPmfShapes(self):
    with self.cached_session():
      p = tf.placeholder(tf.float32, shape=[None, 1])
      dist = bernoulli.Bernoulli(probs=p)
      self.assertEqual(2, len(dist.log_prob(1).eval({p: [[0.5], [0.5]]}).shape))

      dist = bernoulli.Bernoulli(probs=0.5)
      self.assertEqual(2, len(self.evaluate(dist.log_prob([[1], [1]])).shape))

      dist = bernoulli.Bernoulli(probs=0.5)
      self.assertEqual((), dist.log_prob(1).get_shape())
      self.assertEqual((1), dist.log_prob([1]).get_shape())
      self.assertEqual((2, 1), dist.log_prob([[1], [1]]).get_shape())

      dist = bernoulli.Bernoulli(probs=[[0.5], [0.5]])
      self.assertEqual((2, 1), dist.log_prob(1).get_shape())
Пример #7
0
    def _sample_n(self, n, seed=None):
        indices_seed, edpp_seed = samplers.split_seed(seed)
        eigvals = tf.convert_to_tensor(self.eigenvalues)
        eigvecs = tf.convert_to_tensor(self.eigenvectors)

        batch_shape = self._batch_shape_tensor(eigenvalues=eigvals,
                                               eigenvectors=eigvecs)
        ground_set_size = ps.shape(eigvecs)[-2]
        vecs_size = ps.shape(eigvecs)[-1]

        # First, we select an elementary DPP to construct an elementary DPP kernel.
        # An elementary DPP (E-DPP) is a DPP whose kernel's eigenvalues are in
        # `{0, 1}`. Any DPP is a mixture of E-DPPs. The standard DPP sampling
        # algorithms first selects an E-DPP (this algorithm) before sampling from
        # the E-DPP.
        batch_eigvals_shape = ps.concat([batch_shape, [vecs_size]], axis=0)
        logits = tf.broadcast_to(tf.math.log(eigvals), batch_eigvals_shape)
        # Shape: [n, batch_shape, vecs_size]
        edpp_indices = bernoulli.Bernoulli(logits=logits).sample(
            n, seed=indices_seed)

        # Shape: [n, batch_shape, ground_set_size, vecs_size]
        n_batch_eigvecs_shape = ps.concat(
            [[n], batch_shape, [ground_set_size, vecs_size]], axis=0)
        eigvecs = tf.broadcast_to(eigvecs, n_batch_eigvecs_shape)

        # Shape: [n, batch_shape, ground_set_size]
        return _sample_from_edpp(eigvecs, edpp_indices, seed=edpp_seed)
Пример #8
0
 def testNotReparameterized(self):
   p = tf.constant([0.2, 0.6])
   with tf.GradientTape() as tape:
     tape.watch(p)
     dist = bernoulli.Bernoulli(probs=p)
     samples = dist.sample(100)
   grad_p = tape.gradient(samples, p)
   self.assertIsNone(grad_p)
Пример #9
0
 def testBroadcasting(self):
   probs = lambda p: tf.placeholder_with_default(p, shape=None)
   dist = lambda p: bernoulli.Bernoulli(probs=probs(p))
   self.assertAllClose(np.log(0.5), self.evaluate(dist(0.5).log_prob(1)))
   self.assertAllClose(
       np.log([0.5, 0.5, 0.5]), self.evaluate(dist(0.5).log_prob([1, 1, 1])))
   self.assertAllClose(np.log([0.5, 0.5, 0.5]),
                       self.evaluate(dist([0.5, 0.5, 0.5]).log_prob(1)))
Пример #10
0
 def testPmfWithFloatArgReturnsXEntropy(self):
   p = [[0.2], [0.4], [0.3], [0.6]]
   samps = [0, 0.1, 0.8]
   self.assertAllClose(
       np.float32(samps) * np.log(np.float32(p)) +
       (1 - np.float32(samps)) * np.log(1 - np.float32(p)),
       self.evaluate(
           bernoulli.Bernoulli(probs=p, validate_args=False).log_prob(samps)))
Пример #11
0
  def testInvalidP(self):
    invalid_ps = [1.01, 2.]
    for p in invalid_ps:
      with self.assertRaisesOpError("probs has components greater than 1"):
        dist = bernoulli.Bernoulli(probs=p, validate_args=True)
        self.evaluate(dist.probs)

    invalid_ps = [-0.01, -3.]
    for p in invalid_ps:
      with self.assertRaisesOpError("Condition x >= 0"):
        dist = bernoulli.Bernoulli(probs=p, validate_args=True)
        self.evaluate(dist.probs)

    valid_ps = [0.0, 0.5, 1.0]
    for p in valid_ps:
      dist = bernoulli.Bernoulli(probs=p)
      self.assertEqual(p, self.evaluate(dist.probs))  # Should not fail
Пример #12
0
 def testPmfCorrectBroadcastDynamicShape(self):
   p = tf.placeholder_with_default([0.2, 0.3, 0.4], shape=None)
   dist = bernoulli.Bernoulli(probs=p)
   event1 = [1, 0, 1]
   event2 = [[1, 0, 1]]
   self.assertAllClose(
       [0.2, 0.7, 0.4], self.evaluate(dist.prob(event1)))
   self.assertAllClose(
       [[0.2, 0.7, 0.4]], self.evaluate(dist.prob(event2)))
Пример #13
0
 def testBroadcasting(self):
     with self.cached_session():
         p = tf.placeholder(tf.float32)
         dist = bernoulli.Bernoulli(probs=p)
         self.assertAllClose(np.log(0.5), dist.log_prob(1).eval({p: 0.5}))
         self.assertAllClose(np.log([0.5, 0.5, 0.5]),
                             dist.log_prob([1, 1, 1]).eval({p: 0.5}))
         self.assertAllClose(np.log([0.5, 0.5, 0.5]),
                             dist.log_prob(1).eval({p: [0.5, 0.5, 0.5]}))
Пример #14
0
 def testSampleN(self):
   p = [0.2, 0.6]
   dist = bernoulli.Bernoulli(probs=p)
   n = 100000
   samples = dist.sample(n)
   samples.set_shape([n, 2])
   self.assertEqual(samples.dtype, tf.int32)
   sample_values = self.evaluate(samples)
   self.assertTrue(np.all(sample_values >= 0))
   self.assertTrue(np.all(sample_values <= 1))
   # Note that the standard error for the sample mean is ~ sqrt(p * (1 - p) /
   # n). This means that the tolerance is very sensitive to the value of p
   # as well as n.
   self.assertAllClose(p, np.mean(sample_values, axis=0), atol=1e-2)
   self.assertEqual(set([0, 1]), set(sample_values.flatten()))
   # In this test we're just interested in verifying there isn't a crash
   # owing to mismatched types. b/30940152
   dist = bernoulli.Bernoulli(np.log([.2, .4]))
   self.assertAllEqual((1, 2), dist.sample(1, seed=42).shape.as_list())
Пример #15
0
 def testPmfCorrectBroadcastDynamicShape(self):
     with self.cached_session():
         p = tf.placeholder(dtype=tf.float32)
         dist = bernoulli.Bernoulli(probs=p)
         event1 = [1, 0, 1]
         event2 = [[1, 0, 1]]
         self.assertAllClose(
             dist.prob(event1).eval({p: [0.2, 0.3, 0.4]}), [0.2, 0.7, 0.4])
         self.assertAllClose(
             dist.prob(event2).eval({p: [0.2, 0.3, 0.4]}),
             [[0.2, 0.7, 0.4]])
Пример #16
0
 def testSampleActsLikeSampleN(self):
   with self.cached_session() as sess:
     p = [0.2, 0.6]
     dist = bernoulli.Bernoulli(probs=p)
     n = 1000
     seed = 42
     self.assertAllEqual(
         self.evaluate(dist.sample(n, seed)),
         self.evaluate(dist.sample(n, seed)))
     n = tf.placeholder(tf.int32)
     sample1, sample2 = sess.run([dist.sample(n, seed), dist.sample(n, seed)],
                                 feed_dict={n: 1000})
     self.assertAllEqual(sample1, sample2)
Пример #17
0
            def resample_one_feature(step, seed, sampler_state):
                seed, next_seed = samplers.split_seed(seed, n=2)
                idx = tf.gather(feature_permutation, step)

                # Maybe flip this weight's sparsity indicator.
                proposed_sampler_state = self._flip_feature(sampler_state,
                                                            idx=idx)
                should_flip = bernoulli.Bernoulli(
                    logits=(proposed_sampler_state.unnormalized_log_prob -
                            sampler_state.unnormalized_log_prob),
                    dtype=tf.bool).sample(seed=seed)
                return step + 1, next_seed, mcmc_util.choose(
                    should_flip, proposed_sampler_state, sampler_state)
Пример #18
0
 def testVarianceAndStd(self):
   var = lambda p: p * (1. - p)
   p = [[0.2, 0.7], [0.5, 0.4]]
   dist = bernoulli.Bernoulli(probs=p)
   self.assertAllClose(
       self.evaluate(dist.variance()),
       np.array([[var(0.2), var(0.7)], [var(0.5), var(0.4)]],
                dtype=np.float32))
   self.assertAllClose(
       self.evaluate(dist.stddev()),
       np.array([[np.sqrt(var(0.2)), np.sqrt(var(0.7))],
                 [np.sqrt(var(0.5)), np.sqrt(var(0.4))]],
                dtype=np.float32))
Пример #19
0
 def testSampleActsLikeSampleN(self):
   p = [0.2, 0.6]
   dist = bernoulli.Bernoulli(probs=p)
   n = 1000
   seed = 42
   self.assertAllEqual(
       self.evaluate(dist.sample(n, seed)),
       self.evaluate(dist.sample(n, seed)))
   n = tf.placeholder_with_default(np.int32(1000), shape=None)
   if tf.executing_eagerly(): tf.set_random_seed(42)
   sample1 = dist.sample(n, None if tf.executing_eagerly() else 42)
   if tf.executing_eagerly(): tf.set_random_seed(42)
   sample2 = dist.sample(n, None if tf.executing_eagerly() else 42)
   sample1, sample2 = self.evaluate([sample1, sample2])
   self.assertAllEqual(sample1, sample2)
Пример #20
0
 def testSampleDeterministicScalarVsVector(self):
   p = [0.2, 0.6]
   dist = bernoulli.Bernoulli(probs=p)
   n = 1000
   def _maybe_seed():
     if tf.executing_eagerly():
       tf.set_random_seed(42)
       return None
     return 42
   self.assertAllEqual(
       self.evaluate(dist.sample(n, _maybe_seed())),
       self.evaluate(dist.sample([n], _maybe_seed())))
   n = tf.placeholder_with_default(np.int32(1000), shape=None)
   sample1 = dist.sample(n, _maybe_seed())
   sample2 = dist.sample([n], _maybe_seed())
   sample1, sample2 = self.evaluate([sample1, sample2])
   self.assertAllEqual(sample1, sample2)
Пример #21
0
  def _testPmf(self, **kwargs):
    dist = bernoulli.Bernoulli(**kwargs)
    # pylint: disable=bad-continuation
    xs = [
        0,
        [1],
        [1, 0],
        [[1, 0]],
        [[1, 0], [1, 1]],
    ]
    expected_pmfs = [
        [[0.8, 0.6], [0.7, 0.4]],
        [[0.2, 0.4], [0.3, 0.6]],
        [[0.2, 0.6], [0.3, 0.4]],
        [[0.2, 0.6], [0.3, 0.4]],
        [[0.2, 0.6], [0.3, 0.6]],
    ]
    # pylint: enable=bad-continuation

    for x, expected_pmf in zip(xs, expected_pmfs):
      self.assertAllClose(self.evaluate(dist.prob(x)), expected_pmf)
      self.assertAllClose(self.evaluate(dist.log_prob(x)), np.log(expected_pmf))
Пример #22
0
 def testEntropyNoBatch(self):
   p = 0.2
   dist = bernoulli.Bernoulli(probs=p)
   self.assertAllClose(self.evaluate(dist.entropy()), entropy(p))
Пример #23
0
    def __init__(self,
                 design_matrix,
                 nonzero_prior_prob=0.5,
                 weights_prior_precision=None,
                 default_pseudo_observations=1.,
                 observation_noise_variance_prior_concentration=0.005,
                 observation_noise_variance_prior_scale=0.0025,
                 observation_noise_variance_upper_bound=None,
                 num_missing=0.):
        """Initializes priors for the spike and slab sampler.

    Args:
      design_matrix: (batch of) float `Tensor`(s) regression design matrix (`X`
        in [1]) having shape `[num_outputs, num_features]`.
      nonzero_prior_prob: scalar float `Tensor` prior probability of the 'slab',
        i.e., prior probability that any given feature has nonzero weight (`pi`
        in [1]). Default value: `0.5`.
      weights_prior_precision: (batch of) float `Tensor` complete prior
        precision matrix(s) over the weights, of shape `[num_features,
        num_features]`. If not specified, defaults to the Zellner g-prior
        specified in `[1]` as `Omega^{-1} = kappa * (X'X + diag(X'X)) / (2 *
        num_outputs)`, in which we've plugged in the suggested default of `w =
        0.5`. The parameter `kappa` is controlled by the
        `default_pseudo_observations` argument. Default value: `None`.
      default_pseudo_observations: scalar float `Tensor` Controls the number of
        pseudo-observations for the prior precision matrix over the weights.
        Corresponds to `kappa` in [1]. See also `weights_prior_precision`.
      observation_noise_variance_prior_concentration: scalar float `Tensor`
        concentration parameter of the inverse gamma prior on the noise
        variance. Corresponds to `nu / 2` in [1]. Default value: 0.005.
      observation_noise_variance_prior_scale: scalar float `Tensor` scale
        parameter of the inverse gamma prior on the noise variance. Corresponds
        to `ss / 2` in [1]. Default value: 0.0025.
      observation_noise_variance_upper_bound: optional scalar float `Tensor`
        maximum value of sampled observation noise variance. Specifying a bound
        can help avoid divergence when the sampler is initialized far from the
        posterior. Default value: `None`.
      num_missing: Optional scalar float `Tensor`. Corrects for how many missing
        values are are coded as zero in the design matrix.
    """
        with tf.name_scope('spike_slab_sampler'):
            dtype = dtype_util.common_dtype([
                design_matrix, nonzero_prior_prob, weights_prior_precision,
                observation_noise_variance_prior_concentration,
                observation_noise_variance_prior_scale,
                observation_noise_variance_upper_bound, num_missing
            ],
                                            dtype_hint=tf.float32)
            design_matrix = tf.convert_to_tensor(design_matrix, dtype=dtype)
            nonzero_prior_prob = tf.convert_to_tensor(nonzero_prior_prob,
                                                      dtype=dtype)
            observation_noise_variance_prior_concentration = tf.convert_to_tensor(
                observation_noise_variance_prior_concentration, dtype=dtype)
            observation_noise_variance_prior_scale = tf.convert_to_tensor(
                observation_noise_variance_prior_scale, dtype=dtype)
            num_missing = tf.convert_to_tensor(num_missing, dtype=dtype)
            if observation_noise_variance_upper_bound is not None:
                observation_noise_variance_upper_bound = tf.convert_to_tensor(
                    observation_noise_variance_upper_bound, dtype=dtype)

            design_shape = ps.shape(design_matrix)
            num_outputs = tf.cast(design_shape[-2], dtype=dtype) - num_missing
            num_features = design_shape[-1]

            x_transpose_x = tf.matmul(design_matrix,
                                      design_matrix,
                                      adjoint_a=True)
            if weights_prior_precision is None:
                # Default prior: 'Zellner’s g−prior' from section 3.2.1 of [1]:
                #   `omega^{-1} = kappa * (w X'X + (1 − w) diag(X'X))/n`
                # with default `w = 0.5`.
                padded_inputs = broadcast_util.left_justified_expand_dims_like(
                    num_outputs, x_transpose_x)
                weights_prior_precision = default_pseudo_observations * tf.linalg.set_diag(
                    0.5 * x_transpose_x,
                    tf.linalg.diag_part(x_transpose_x)) / padded_inputs

            observation_noise_variance_posterior_concentration = (
                observation_noise_variance_prior_concentration +
                tf.convert_to_tensor(num_outputs / 2., dtype=dtype))

            self.num_outputs = num_outputs
            self.num_features = num_features
            self.design_matrix = design_matrix
            self.x_transpose_x = x_transpose_x
            self.dtype = dtype
            self.nonzeros_prior = sample_dist.Sample(
                bernoulli.Bernoulli(probs=nonzero_prior_prob),
                sample_shape=[num_features])
            self.weights_prior_precision = weights_prior_precision
            self.observation_noise_variance_prior_concentration = (
                observation_noise_variance_prior_concentration)
            self.observation_noise_variance_prior_scale = (
                observation_noise_variance_prior_scale)
            self.observation_noise_variance_upper_bound = (
                observation_noise_variance_upper_bound)
            self.observation_noise_variance_posterior_concentration = (
                observation_noise_variance_posterior_concentration)
Пример #24
0
def _left_doubling_increments(batch_shape,
                              max_doublings,
                              step_size,
                              seed=None,
                              name=None):
    """Computes the doubling increments for the left end point.

  The doubling procedure expands an initial interval to find a superset of the
  true slice. At each doubling iteration, the interval width is doubled to
  either the left or the right hand side with equal probability.
  If, initially, the left end point is at `L(0)` and the width of the
  interval is `w(0)`, then the left end point and the width at the
  k-th iteration (denoted L(k) and w(k) respectively) are given by the following
  recursions:

  ```none
  w(k) = 2 * w(k-1)
  L(k) = L(k-1) - w(k-1) * X_k, X_k ~ Bernoulli(0.5)
  or, L(0) - L(k) = w(0) Sum(2^i * X(i+1), 0 <= i < k)
  ```

  This function computes the sequence of `L(0)-L(k)` and `w(k)` for k between 0
  and `max_doublings` independently for each chain.

  Args:
    batch_shape: Positive int32 `tf.Tensor`. The batch shape.
    max_doublings: Scalar positive int32 `tf.Tensor`. The maximum number of
      doublings to consider.
    step_size: A real `tf.Tensor` with shape compatible with [num_chains].
      The size of the initial interval.
    seed: PRNG seed; see `tfp.random.sanitize_seed` for details.
    name: Python `str` name prefixed to Ops created by this function.
      Default value: `None` (i.e., 'find_slice_bounds').

  Returns:
    left_increments: A tensor of shape (max_doublings+1, batch_shape). The
      relative position of the left end point after the doublings.
    widths: A tensor of shape (max_doublings+1, ones_like(batch_shape)). The
      widths of the intervals at each stage of the doubling.
  """
    with tf.name_scope(name or 'left_doubling_increments'):
        step_size = tf.convert_to_tensor(value=step_size)
        dtype = dtype_util.base_dtype(step_size.dtype)
        # Output shape of the left increments tensor.
        output_shape = ps.concat(([max_doublings + 1], batch_shape), axis=0)
        # A sample realization of X_k.
        expand_left = bernoulli_lib.Bernoulli(0.5, dtype=dtype).sample(
            sample_shape=output_shape, seed=seed)

        # The widths of the successive intervals. Starts with 1.0 and ends with
        # 2^max_doublings.
        width_multipliers = tf.cast(2**tf.range(0, max_doublings + 1),
                                    dtype=dtype)
        # Output shape of the `widths` tensor.
        widths_shape = ps.concat(
            ([max_doublings + 1], ps.ones_like(batch_shape)), axis=0)
        width_multipliers = tf.reshape(width_multipliers, shape=widths_shape)
        # Widths shape is [max_doublings + 1, 1, 1, 1...].
        widths = width_multipliers * step_size

        # Take the cumulative sum of the left side increments in slice width to give
        # the resulting distance from the initial lower bound.
        left_increments = tf.cumsum(widths * expand_left,
                                    exclusive=True,
                                    axis=0)
        return left_increments, widths
Пример #25
0
 def testP(self):
   p = [0.2, 0.4]
   dist = bernoulli.Bernoulli(probs=p)
   self.assertAllClose(p, self.evaluate(dist.probs))
Пример #26
0
def make_bernoulli(batch_shape, dtype=tf.int32):
  p = np.random.uniform(size=list(batch_shape))
  p = tf.constant(p, dtype=tf.float32)
  return bernoulli.Bernoulli(probs=p, dtype=dtype)
Пример #27
0
 def testBoundaryConditions(self):
   dist = bernoulli.Bernoulli(probs=1.0)
   self.assertAllClose(np.nan, self.evaluate(dist.log_prob(0)))
   self.assertAllClose([np.nan], [self.evaluate(dist.log_prob(1))])
Пример #28
0
 def testMean(self):
   p = np.array([[0.2, 0.7], [0.5, 0.4]], dtype=np.float32)
   dist = bernoulli.Bernoulli(probs=p)
   self.assertAllEqual(self.evaluate(dist.mean()), p)