Пример #1
0
 def testStateParts(self, dtype):
   cast = lambda x: np.array(x, dtype)
   dist_x = tfd.Normal(loc=cast(0), scale=cast(1))
   dist_y = tfd.Independent(
       tfd.Gamma(concentration=cast([1, 2]),
                 rate=cast([0.5, 0.75])),
       reinterpreted_batch_ndims=1)
   def target_log_prob(x, y):
     return dist_x.log_prob(x) + dist_y.log_prob(y)
   stream = test_util.test_seed_stream()
   x0 = [dist_x.sample(8, seed=_set_seed(stream())),  # 8 parallel chains
         dist_y.sample(8, seed=_set_seed(stream()))]
   kernel = tfp.mcmc.HamiltonianMonteCarlo(
       target_log_prob_fn=target_log_prob,
       step_size=1.,
       num_leapfrog_steps=1,
       seed=_set_seed(stream()))
   # We are using bijectors to sample from a transformed density defined on
   # an unbounded domain. The samples returned are from the original bounded
   # domain.
   unconstraining_bijectors = [
       tfb.Identity(),      # Maps R to R.
       tfb.Exp(),           # Maps R to a positive real.
   ]
   transformed_kernel = tfp.mcmc.TransformedTransitionKernel(
       inner_kernel=kernel, bijector=unconstraining_bijectors)
   samples, _ = tfp.mcmc.sample_chain(
       num_results=1000,
       current_state=x0,
       kernel=transformed_kernel,
       num_burnin_steps=500,
       parallel_iterations=1)
   actual_means = [tf.reduce_mean(s, axis=(0, 1)) for s in samples]
   actual_vars = [tf.math.reduce_variance(s, axis=(0, 1)) for s in samples]
   expected_means = [dist_x.mean(), dist_y.mean()]
   expected_vars = [dist_x.variance(), dist_y.variance()]
   [
       actual_means_,
       actual_vars_,
       expected_means_,
       expected_vars_,
   ] = self.evaluate([
       actual_means,
       actual_vars,
       expected_means,
       expected_vars,
   ])
   self.assertAllClose(expected_means_, actual_means_, atol=0.2, rtol=0.)
   self.assertAllClose(expected_vars_, actual_vars_, atol=0., rtol=0.5)
Пример #2
0
 def testWorksWithChain(self):
     shape_out = (4, )
     shape_in = (2, 2)
     x = np.zeros(shape_in)
     y = np.zeros(shape_out)
     bijector = tfb.Chain([
         tfb.Identity(),
         tfb.Reshape(event_shape_out=shape_out, event_shape_in=shape_in)
     ])
     new_y = self.evaluate(bijector.forward(x))
     new_x = self.evaluate(bijector.inverse(y))
     fldj = self.evaluate(
         bijector.forward_log_det_jacobian(x, event_ndims=len(shape_in)))
     ildj = self.evaluate(
         bijector.inverse_log_det_jacobian(y, event_ndims=len(shape_out)))
     self.assertEqual(shape_out, new_y.shape)
     self.assertEqual(shape_in, new_x.shape)
     self.assertEqual((), fldj.shape)
     self.assertEqual((), ildj.shape)
Пример #3
0
  def test_slice_transformed_distribution_with_chain(self):
    dist = tfd.TransformedDistribution(
        distribution=tfd.MultivariateNormalDiag(
            loc=tf.zeros([4]), scale_diag=tf.ones([1, 4])),
        bijector=tfb.Chain([tfb.JointMap([tfb.Identity(),
                                          tfb.Shift(tf.ones([4, 3, 2]))]),
                            tfb.Split(2),
                            tfb.ScaleMatvecDiag(tf.ones([5, 1, 3, 4])),
                            tfb.Exp()]))
    self.assertAllEqual(dist.batch_shape_tensor(), [5, 4, 3])
    self.assertAllEqualNested(
        tf.nest.map_structure(lambda x: x.shape,
                              dist.sample(seed=test_util.test_seed())),
        [[5, 4, 3, 2], [5, 4, 3, 2]])

    sliced = dist[tf.newaxis, ..., 0, :, :-1]
    self.assertAllEqual(sliced.batch_shape_tensor(), [1, 4, 2])
    self.assertAllEqualNested(
        tf.nest.map_structure(lambda x: x.shape,
                              sliced.sample(seed=test_util.test_seed())),
        [[1, 4, 2, 2], [1, 4, 2, 2]])
Пример #4
0
 def testBijector(self):
   with self.test_session():
     for fwd in [
         tfb.Identity(),
         tfb.Exp(),
         tfb.Affine(shift=[0., 1.], scale_diag=[2., 3.]),
         tfb.Softplus(),
         tfb.SoftmaxCentered(),
     ]:
       rev = tfb.Invert(fwd)
       self.assertEqual("_".join(["invert", fwd.name]), rev.name)
       x = [[[1., 2.],
             [2., 3.]]]
       self.assertAllClose(fwd.inverse(x).eval(), rev.forward(x).eval())
       self.assertAllClose(fwd.forward(x).eval(), rev.inverse(x).eval())
       self.assertAllClose(
           fwd.forward_log_det_jacobian(x, event_ndims=1).eval(),
           rev.inverse_log_det_jacobian(x, event_ndims=1).eval())
       self.assertAllClose(
           fwd.inverse_log_det_jacobian(x, event_ndims=1).eval(),
           rev.forward_log_det_jacobian(x, event_ndims=1).eval())
Пример #5
0
 def testBijector(self):
     for fwd in [
             tfb.Identity(),
             tfb.Exp(),
             tfb.Affine(shift=[0., 1.], scale_diag=[2., 3.]),
             tfb.Softplus(),
             tfb.SoftmaxCentered(),
     ]:
         rev = tfb.Invert(fwd)
         self.assertStartsWith(rev.name, "_".join(["invert", fwd.name]))
         x = [[[1., 2.], [2., 3.]]]
         self.assertAllClose(self.evaluate(fwd.inverse(x)),
                             self.evaluate(rev.forward(x)))
         self.assertAllClose(self.evaluate(fwd.forward(x)),
                             self.evaluate(rev.inverse(x)))
         self.assertAllClose(
             self.evaluate(fwd.forward_log_det_jacobian(x, event_ndims=1)),
             self.evaluate(rev.inverse_log_det_jacobian(x, event_ndims=1)))
         self.assertAllClose(
             self.evaluate(fwd.inverse_log_det_jacobian(x, event_ndims=1)),
             self.evaluate(rev.forward_log_det_jacobian(x, event_ndims=1)))
Пример #6
0
  def testCachedSamples(self):
    class ExpForwardOnly(tfb.Bijector):

      def __init__(self):
        parameters = dict(locals())
        super(ExpForwardOnly, self).__init__(
            forward_min_event_ndims=0,
            parameters=parameters)

      def _forward(self, x):
        return tf.exp(x)

      def _forward_log_det_jacobian(self, x):
        return tf.convert_to_tensor(x)

    exp_forward_only = ExpForwardOnly()

    mu = 3.0
    sigma = 0.02
    log_normal = tfd.TransformedDistribution(
        distribution=tfd.Normal(loc=mu, scale=sigma),
        bijector=exp_forward_only,
        validate_args=True)

    sample = log_normal.sample([2, 3], seed=test_util.test_seed())
    sample_val, log_pdf_val = self.evaluate(
        [sample, log_normal.log_prob(sample)])
    expected_log_pdf = stats.lognorm.logpdf(
        sample_val, s=sigma, scale=np.exp(mu))
    self.assertAllClose(expected_log_pdf, log_pdf_val, rtol=1e-4, atol=0.)

    # Check that nesting TransformedDistributions preserves caching.
    identity_log_normal = tfd.TransformedDistribution(
        distribution=log_normal,
        bijector=tfb.Identity(),
        validate_args=True)
    identity_log_normal.log_prob(
        identity_log_normal.sample([2, 3], seed=test_util.test_seed()))
Пример #7
0
    def _build_sts(self, observed_time_series=None):
        max_timesteps = 100
        num_features = 3

        prior = tfd.Laplace(0., 1.)

        # LinearRegression components don't currently take an `observed_time_series`
        # argument, so they can't infer a prior batch shape. This means we have to
        # manually set the batch shape expected by the tests.
        if observed_time_series is not None:
            observed_time_series_tensor, _ = (
                sts_util.canonicalize_observed_time_series_with_mask(
                    observed_time_series))
            batch_shape = tf.shape(observed_time_series_tensor)[:-2]
            prior = tfd.TransformedDistribution(prior,
                                                tfb.Identity(),
                                                event_shape=[num_features],
                                                batch_shape=batch_shape)

        regression = LinearRegression(design_matrix=np.random.randn(
            max_timesteps, num_features).astype(np.float32),
                                      weights_prior=prior)
        return Sum(components=[regression],
                   observed_time_series=observed_time_series)
Пример #8
0
    def __init__(
        self,
        train_student_ids,
        train_question_ids,
        train_correct,
        test_student_ids=None,
        test_question_ids=None,
        test_correct=None,
        name='item_response_theory',
        pretty_name='Item-Response Theory',
    ):
        """Construct the item-response theory model.

    This models a set of students answering a set of questions, and being scored
    whether they get the question correct or not. Each student is associated
    with a scalar `student_ability`, and each question is associated with a
    scalar `question_difficulty`. Additionally, a scalar `mean_student_ability`
    is shared between all the students. This corresponds to the [1PL
    item-response theory](1) model.

    The data are encoded into three parallel arrays per set. I.e.
    `*_correct[i]  == 1` means that student `*_student_ids[i]` answered question
    `*_question_ids[i]` correctly; `*_correct[i] == 0` means they didn't.

    Args:
      train_student_ids: Integer `Tensor` with shape `[num_train_points]`.
        training student ids, ranging from 0 to `num_students`.
      train_question_ids: Integer `Tensor` with shape `[num_train_points]`.
        training question ids, ranging from 0 to `num_questions`.
      train_correct: Integer `Tensor` with shape `[num_train_points]`. Whether
        the student in the training set answered the question correctly, either
        0 or 1.
      test_student_ids: Integer `Tensor` with shape `[num_test_points]`. Testing
        student ids, ranging from 0 to `num_students`. Can be `None`, in which
        case test-related sample transformations are not computed.
      test_question_ids: Integer `Tensor` with shape `[num_test_points]`.
        Testing question ids, ranging from 0 to `num_questions`. Can be `None`,
        in which case test-related sample transformations are not computed.
      test_correct: Integer `Tensor` with shape `[num_test_points]`. Whether the
        student in the testing set answered the question correctly, either 0 or
        1. Can be `None`, in which case test-related sample transformations are
        not computed.
      name: Python `str` name prefixed to Ops created by this class.
      pretty_name: A Python `str`. The pretty name of this model.

    Raises:
      ValueError: If `test_student_ids`, `test_question_ids` or `test_correct`
        are not either all `None` or are all specified.
      ValueError: If the parallel arrays are not all of the same size.

    #### References

    1. https://en.wikipedia.org/wiki/Item_response_theory
    """
        with tf.name_scope(name):
            test_data_present = (
                e is not None
                for e in [test_student_ids, test_question_ids, test_correct])
            self._have_test = all(test_data_present)
            if not self._have_test and any(test_data_present):
                raise ValueError(
                    '`test_student_ids`, `test_question_ids` and '
                    '`test_correct` must either all be `None` or '
                    'all be specified. Got: test_student_ids={}, '
                    'test_question_ids={}, test_correct={}'.format(
                        test_student_ids, test_question_ids, test_correct))
            if not (train_student_ids.shape[0] == train_question_ids.shape[0]
                    == train_correct.shape[0]):
                raise ValueError(
                    '`train_student_ids`, `train_question_ids` and '
                    '`train_correct` must all have the same length. '
                    'Got: {} {} {}'.format(train_student_ids.shape[0],
                                           train_question_ids.shape[0],
                                           train_correct.shape[0]))

            max_student_id = train_student_ids.max()
            max_question_id = train_question_ids.max()
            if self._have_test:
                max_student_id = max(max_student_id, test_student_ids.max())
                max_question_id = max(max_question_id, test_question_ids.max())

            self._num_students = max_student_id + 1
            self._num_questions = max_question_id + 1

            # TODO(siege): Make it an option to use a sparse encoding. The dense
            # encoding is only efficient when the dataset is not very sparse to begin
            # with.
            train_dense_y, train_y_mask = self._sparse_to_dense(
                train_student_ids,
                train_question_ids,
                train_correct,
            )

            self._prior_dist = tfd.JointDistributionNamed(
                dict(
                    mean_student_ability=tfd.Normal(0.75, 1.),
                    student_ability=tfd.Sample(
                        tfd.Normal(0., 1.),
                        self._num_students,
                    ),
                    question_difficulty=tfd.Sample(
                        tfd.Normal(0., 1.),
                        self._num_questions,
                    ),
                ))

            def observation_noise_fn(mean_student_ability, student_ability,
                                     question_difficulty):
                """Creates the observation noise distribution."""
                logits = (mean_student_ability[..., tf.newaxis, tf.newaxis] +
                          student_ability[..., tf.newaxis] -
                          question_difficulty[..., tf.newaxis, :])
                return tfd.Bernoulli(logits)

            self._observation_noise_fn = observation_noise_fn

            def log_likelihood_fn(dense_y, y_mask, reduce_sum=True, **params):
                """The log_likelihood function."""
                log_likelihood = observation_noise_fn(
                    **params).log_prob(dense_y)
                log_likelihood = tf.where(y_mask, log_likelihood,
                                          tf.zeros_like(log_likelihood))
                if reduce_sum:
                    return tf.reduce_sum(log_likelihood, [-1, -2])
                else:
                    return log_likelihood

            self._train_log_likelihood_fn = functools.partial(
                log_likelihood_fn,
                dense_y=train_dense_y,
                y_mask=train_y_mask,
            )

            dtype = self._prior_dist.dtype

            sample_transformations = {
                'identity':
                model.Model.SampleTransformation(
                    fn=lambda params: params,
                    pretty_name='Identity',
                    dtype=dtype,
                )
            }
            if self._have_test:
                if not (test_student_ids.shape[0] == test_question_ids.shape[0]
                        == test_correct.shape[0]):
                    raise ValueError(
                        '`test_student_ids`, `test_question_ids` and '
                        '`test_correct` must all have the same length. '
                        'Got: {} {} {}'.format(test_student_ids.shape[0],
                                               test_question_ids.shape[0],
                                               test_correct.shape[0]))
                test_dense_y, test_y_mask = self._sparse_to_dense(
                    test_student_ids,
                    test_question_ids,
                    test_correct,
                )
                test_log_likelihood_fn = functools.partial(
                    log_likelihood_fn,
                    dense_y=test_dense_y,
                    y_mask=test_y_mask,
                )

                sample_transformations['test_nll'] = (
                    model.Model.SampleTransformation(
                        fn=lambda params: test_log_likelihood_fn(**params),
                        pretty_name='Test NLL',
                    ))

                def _per_example_test_nll(params):
                    """Computes per-example test NLL."""
                    dense_nll = test_log_likelihood_fn(reduce_sum=False,
                                                       **params)
                    return self._dense_to_sparse(test_student_ids,
                                                 test_question_ids, dense_nll)

                sample_transformations['per_example_test_nll'] = (
                    model.Model.SampleTransformation(
                        fn=_per_example_test_nll,
                        pretty_name='Per-example Test NLL',
                    ))

        self._train_student_ids = train_student_ids
        self._train_question_ids = train_question_ids
        self._test_student_ids = test_student_ids
        self._test_question_ids = test_question_ids

        super(ItemResponseTheory, self).__init__(
            default_event_space_bijector=tf.nest.map_structure(
                lambda _: tfb.Identity(), self._prior_dist.dtype),
            event_shape=self._prior_dist.event_shape,
            dtype=dtype,
            name=name,
            pretty_name=pretty_name,
            sample_transformations=sample_transformations,
        )
Пример #9
0
def _make_asvi_trainable_variables(prior,
                                   mean_field=False,
                                   initial_prior_weight=0.5):
    """Generates parameter dictionaries given a prior distribution and list."""
    with tf.name_scope('make_asvi_trainable_variables'):
        param_dicts = []
        prior_dists = prior._get_single_sample_distributions()  # pylint: disable=protected-access
        for dist in prior_dists:
            original_dist = dist.distribution if isinstance(dist,
                                                            Root) else dist

            substituted_dist = _as_trainable_family(original_dist)

            # Grab the base distribution if it exists
            try:
                actual_dist = substituted_dist.distribution
            except AttributeError:
                actual_dist = substituted_dist

            new_params_dict = {}

            #  Build trainable ASVI representation for each distribution's parameters.
            parameter_properties = actual_dist.parameter_properties(
                dtype=actual_dist.dtype)
            sample_shape = tf.concat(
                [dist.batch_shape_tensor(),
                 dist.event_shape_tensor()], axis=0)
            for param, value in actual_dist.parameters.items():
                if param in (_NON_STATISTICAL_PARAMS +
                             _NON_TRAINABLE_PARAMS) or value is None:
                    continue
                try:
                    bijector = parameter_properties[
                        param].default_constraining_bijector_fn()
                except NotImplementedError:
                    bijector = tfb.Identity()
                unconstrained_ones = tf.ones(
                    shape=bijector.inverse_event_shape_tensor(
                        parameter_properties[param].shape_fn(
                            sample_shape=sample_shape)),
                    dtype=actual_dist.dtype)

                if mean_field:
                    new_params_dict[param] = ASVIParameters(
                        prior_weight=None,
                        mean_field_parameter=tfp_util.TransformedVariable(
                            value,
                            bijector=bijector,
                            name='mean_field_parameter/{}/{}'.format(
                                dist.name, param)))
                else:
                    new_params_dict[param] = ASVIParameters(
                        prior_weight=tfp_util.TransformedVariable(
                            initial_prior_weight * unconstrained_ones,
                            bijector=tfb.Sigmoid(),
                            name='prior_weight/{}/{}'.format(dist.name,
                                                             param)),
                        mean_field_parameter=tfp_util.TransformedVariable(
                            value,
                            bijector=bijector,
                            name='mean_field_parameter/{}/{}'.format(
                                dist.name, param)))
            param_dicts.append(new_params_dict)
    return param_dicts
def constraint_for(dist=None, param=None):
    """Get bijector constraint for a given distribution's parameter."""

    constraints = {
        'atol':
            tfb.Softplus(),
        'rtol':
            tfb.Softplus(),
        'concentration':
            tfb.Softplus(),
        'GeneralizedPareto.concentration':  # Permits +ve and -ve concentrations.
            lambda x: tf.math.tanh(x) * 0.24,
        'concentration0':
            tfb.Softplus(),
        'concentration1':
            tfb.Softplus(),
        'df':
            tfb.Softplus(),
        'InverseGaussian.loc':
            tfb.Softplus(),
        'JohnsonSU.tailweight':
            tfb.Softplus(),
        'PowerSpherical.mean_direction':
            lambda x: tf.math.l2_normalize(tf.math.sigmoid(x) + 1e-6, -1),
        'ContinuousBernoulli.probs':
            tfb.Sigmoid(),
        'Geometric.logits':  # TODO(b/128410109): re-enable down to -50
            # Capping at 15. so that probability is less than 1, and entropy is
            # defined. b/147394924
            lambda x: tf.minimum(tf.maximum(x, -16.), 15.
                                ),  # works around the bug
        'Geometric.probs':
            constrain_between_eps_and_one_minus_eps(),
        'Binomial.probs':
            tfb.Sigmoid(),
        'NegativeBinomial.probs':
            tfb.Sigmoid(),
        'Bernoulli.probs':
            tfb.Sigmoid(),
        'PlackettLuce.scores':
            tfb.Softplus(),
        'ProbitBernoulli.probs':
            tfb.Sigmoid(),
        'RelaxedBernoulli.probs':
            tfb.Sigmoid(),
        'cutpoints':  # Permit values that aren't too large
            lambda x: tfb.Ordered().inverse(10. * tf.math.tanh(x)),
        'log_rate':
            lambda x: tf.maximum(x, -16.),
        'mixing_concentration':
            tfb.Softplus(),
        'mixing_rate':
            tfb.Softplus(),
        'rate':
            tfb.Softplus(),
        'scale':
            tfb.Softplus(),
        'scale_diag':
            tfb.Softplus(),
        'scale_identity_multiplier':
            tfb.Softplus(),
        'tailweight':
            tfb.Softplus(),
        'temperature':
            tfb.Softplus(),
        'total_count':
            lambda x: tf.floor(tfb.Sigmoid()(x / 100.) * 100.) + 1.,
        'Bernoulli':
            lambda d: dict(d, dtype=tf.float32),
        'CholeskyLKJ':
            fix_lkj,
        'LKJ':
            fix_lkj,
        'Zipf':
            lambda d: dict(d, dtype=tf.float32),
        'GeneralizedNormal.power':
            tfb.Softplus(),
    }

    if param is not None:
        return constraints.get('{}.{}'.format(dist, param),
                               constraints.get(param, tfb.Identity()))
    return constraints.get(dist, tfb.Identity())
Пример #11
0
 def testScalarCongruency(self):
   bijector = tfb.Identity()
   bijector_test_util.assert_scalar_congruency(
       bijector, lower_x=-2., upper_x=2., eval_func=self.evaluate)
    def __init__(
        self,
        train_locations,
        train_extents,
        train_counts,
        dtype=tf.float64,
        name='log_gaussian_cox_process',
        pretty_name='Log-Gaussian Cox Process',
    ):
        """Log-Gaussian Cox Process[1] regression in a D dimensional space.

    This models the observed event counts at a set of locations with associated
    extents. An extent could correspond to an area (in which case location could
    be the centroid of that area), or duration (in which case the location is
    infinitesimal, but the measurements are taken over an extended period of
    time). Counts divided by the extent at a location is termed the intensity at
    that location.

    A Gaussian Process with a Matern 3/2 kernel is used to model log-intensity
    deviations from the mean log-intensity. The regressed intensity is then
    multiplied by the extents to parameterize the rate a Poisson observation
    model. The posterior of the model is over the amplitude and length scale of
    the Matern kernel, as well as the log-intensity deviations themselves. In
    summary:

    ```none
    amplitude ~ LogNormal(-1, 0.5)
    length_scale ~ LogNormal(-1, 1)
    delta_log_intensities ~ GP(Matern32(amplitude, length_scale), locations)
    counts[i] ~ Poisson(extents[i] *
                        exp(delta_log_intensities[i] + mean_log_intensity))
    ```

    The data are encoded into three parallel arrays. I.e.
    `train_counts[i]` and `train_extents[i]` correspond to `train_locations[i]`.

    Args:
      train_locations: Float `Tensor` with shape `[num_train_points, D]`.
        Training set locations where counts were measured.
      train_extents: Float `Tensor` with shape `[num_train_points]`. Training
        set location extents, must be positive.
      train_counts: Integer `Tensor` with shape `[num_train_points]`. Training
        set counts, must be positive.
      dtype: Datatype to use for the model. Gaussian Process regression tends to
        require double precision.
      name: Python `str` name prefixed to Ops created by this class.
      pretty_name: A Python `str`. The pretty name of this model.

    Raises:
      ValueError: If the parallel arrays are not all of the same size.

    #### References

    1. Diggle, P. J., Moraga, P., Rowlingson, B., & Taylor, B. M. (2013).
      Spatial and spatio-temporal log-gaussian cox processes: Extending the
      geostatistical paradigm. Statistical Science, 28(4), 542-563.

    """
        with tf.name_scope(name):
            if not (train_locations.shape[0] == train_counts.shape[0] ==
                    train_extents.shape[0]):
                raise ValueError(
                    '`train_locations`, `train_counts` and '
                    '`train_extents` must all have the same length. '
                    'Got: {} {} {}'.format(train_locations.shape[0],
                                           train_counts.shape[0],
                                           train_extents.shape[0]))

            train_counts = tf.cast(train_counts, dtype=dtype)
            train_locations = tf.convert_to_tensor(train_locations,
                                                   dtype=dtype)
            train_extents = tf.convert_to_tensor(train_extents, dtype=dtype)

            mean_log_intensity = tf.reduce_mean(
                tf.math.log(train_counts) - tf.math.log(train_extents),
                axis=-1,
            )

            self._prior_dist = tfd.JointDistributionNamed(
                dict(
                    amplitude=tfd.LogNormal(-1, tf.constant(.5, dtype=dtype)),
                    length_scale=tfd.LogNormal(-1, tf.constant(1.,
                                                               dtype=dtype)),
                    log_intensity=lambda amplitude, length_scale: tfd.
                    GaussianProcess(  # pylint: disable=g-long-lambda
                        mean_fn=lambda x: mean_log_intensity,
                        kernel=tfpk.MaternThreeHalves(
                            amplitude=amplitude + .001,
                            length_scale=length_scale + .001),
                        index_points=train_locations,
                        jitter=1e-6),
                ))

            def observation_noise_fn(log_intensity):
                """Creates the observation noise distribution."""
                return tfd.Poisson(log_rate=tf.math.log(train_extents) +
                                   log_intensity)

            self._observation_noise_fn = observation_noise_fn

            def log_likelihood_fn(log_intensity, **_):
                """The log_likelihood function."""
                return tf.reduce_sum(
                    observation_noise_fn(log_intensity).log_prob(train_counts),
                    -1)

            self._log_likelihood_fn = log_likelihood_fn

            sample_transformations = {
                'identity':
                model.Model.SampleTransformation(
                    fn=lambda params: params,
                    pretty_name='Identity',
                    dtype=self._prior_dist.dtype,
                )
            }

        self._train_locations = train_locations
        self._train_extents = train_extents

        super(LogGaussianCoxProcess, self).__init__(
            default_event_space_bijector=dict(
                amplitude=tfb.Exp(),
                length_scale=tfb.Exp(),
                log_intensity=tfb.Identity(),
            ),
            event_shape=self._prior_dist.event_shape,
            dtype=self._prior_dist.dtype,
            name=name,
            pretty_name=pretty_name,
            sample_transformations=sample_transformations,
        )
Пример #13
0
  def __init__(self,
               design_matrix,
               weights_prior_scale=0.1,
               weights_batch_shape=None,
               name=None):
    """Specify a sparse linear regression model.

    Args:
      design_matrix: float `Tensor` of shape `concat([batch_shape,
        [num_timesteps, num_features]])`. This may also optionally be
        an instance of `tf.linalg.LinearOperator`.
      weights_prior_scale: float `Tensor` defining the scale of the Horseshoe
        prior on regression weights. Small values encourage the weights to be
        sparse. The shape must broadcast with `weights_batch_shape`.
        Default value: `0.1`.
      weights_batch_shape: if `None`, defaults to
        `design_matrix.batch_shape_tensor()`. Must broadcast with the batch
        shape of `design_matrix`.
        Default value: `None`.
      name: the name of this model component.
        Default value: 'SparseLinearRegression'.
    """
    with tf.compat.v1.name_scope(
        name, 'SparseLinearRegression',
        values=[design_matrix, weights_prior_scale]) as name:

      if not isinstance(design_matrix, tfl.LinearOperator):
        design_matrix = tfl.LinearOperatorFullMatrix(
            tf.convert_to_tensor(value=design_matrix, name='design_matrix'),
            name='design_matrix_linop')

      if tf.compat.dimension_value(design_matrix.shape[-1]) is not None:
        num_features = design_matrix.shape[-1]
      else:
        num_features = design_matrix.shape_tensor()[-1]

      if weights_batch_shape is None:
        weights_batch_shape = design_matrix.batch_shape_tensor()
      else:
        weights_batch_shape = tf.convert_to_tensor(value=weights_batch_shape,
                                                   dtype=tf.int32)
      weights_shape = tf.concat([weights_batch_shape, [num_features]], axis=0)

      dtype = design_matrix.dtype

      self._design_matrix = design_matrix
      self._weights_prior_scale = weights_prior_scale

      ones_like_weights_batch = tf.ones(weights_batch_shape, dtype=dtype)
      ones_like_weights = tf.ones(weights_shape, dtype=dtype)
      super(SparseLinearRegression, self).__init__(
          parameters=[
              Parameter('global_scale_variance',
                        prior=tfd.InverseGamma(
                            0.5 * ones_like_weights_batch,
                            0.5 * ones_like_weights_batch),
                        bijector=tfb.Softplus()),
              Parameter('global_scale_noncentered',
                        prior=tfd.HalfNormal(
                            scale=ones_like_weights_batch),
                        bijector=tfb.Softplus()),
              Parameter('local_scale_variances',
                        prior=tfd.Independent(tfd.InverseGamma(
                            0.5 * ones_like_weights,
                            0.5 * ones_like_weights),
                                              reinterpreted_batch_ndims=1),
                        bijector=tfb.Softplus()),
              Parameter('local_scales_noncentered',
                        prior=tfd.Independent(tfd.HalfNormal(
                            scale=ones_like_weights),
                                              reinterpreted_batch_ndims=1),
                        bijector=tfb.Softplus()),
              Parameter('weights_noncentered',
                        prior=tfd.Independent(tfd.Normal(
                            loc=tf.zeros_like(ones_like_weights),
                            scale=ones_like_weights),
                                              reinterpreted_batch_ndims=1),
                        bijector=tfb.Identity())
          ],
          latent_size=0,
          name=name)
Пример #14
0
    def __init__(self,
                 train_features,
                 train_labels,
                 test_features=None,
                 test_labels=None,
                 name='logistic_regression',
                 pretty_name='Logistic Regression'):
        """Construct the logistic regression model.

    Args:
      train_features: Floating-point `Tensor` with shape `[num_train_points,
        num_features]`. Training features.
      train_labels: Integer `Tensor` with shape `[num_train_points]`. Training
        labels.
      test_features: Floating-point `Tensor` with shape `[num_test_points,
        num_features]`. Testing features. Can be `None`, in which case
        test-related sample transformations are not computed.
      test_labels: Integer `Tensor` with shape `[num_test_points]`. Testing
        labels. Can be `None`, in which case test-related sample transformations
        are not computed.
      name: Python `str` name prefixed to Ops created by this class.
      pretty_name: A Python `str`. The pretty name of this model.
    """
        with tf.name_scope(name):
            train_features = _add_bias(train_features)
            train_labels = tf.convert_to_tensor(train_labels)
            num_features = int(train_features.shape[1])

            root = tfd.JointDistributionCoroutine.Root
            zero = tf.zeros(num_features)
            one = tf.ones(num_features)

            def model_fn(features):
                weights = yield root(tfd.Independent(tfd.Normal(zero, one), 1))
                logits = tf.einsum('nd,...d->...n', features, weights)
                yield tfd.Independent(tfd.Bernoulli(logits=logits), 1)

            train_joint_dist = tfd.JointDistributionCoroutine(
                functools.partial(model_fn, features=train_features))

            sample_transformations = {
                'identity':
                bayesian_model.BayesianModel.SampleTransformation(
                    fn=lambda params: params,
                    pretty_name='Identity',
                )
            }
            if test_features is not None and test_labels is not None:
                test_features = _add_bias(test_features)
                test_labels = tf.convert_to_tensor(test_labels)
                test_joint_dist = tfd.JointDistributionCoroutine(
                    functools.partial(model_fn, features=test_features))

                def _get_label_dist(weights):
                    # TODO(b/150897904): The seed does nothing since the model is fully
                    # conditioned.
                    distributions, _ = test_joint_dist.sample_distributions(
                        value=[weights, test_labels], seed=42)
                    return distributions[-1]

                sample_transformations['test_nll'] = (
                    bayesian_model.BayesianModel.SampleTransformation(
                        fn=lambda weights: -(  # pylint: disable=g-long-lambda
                            _get_label_dist(weights).log_prob(test_labels)),
                        pretty_name='Test NLL',
                    ))
                sample_transformations['per_example_test_nll'] = (
                    bayesian_model.BayesianModel.SampleTransformation(
                        fn=lambda weights: -(  # pylint: disable=g-long-lambda
                            _get_label_dist(weights).distribution.log_prob(
                                test_labels)),
                        pretty_name='Per-example Test NLL',
                    ))

        self._train_joint_dist = train_joint_dist
        self._train_labels = train_labels

        super(LogisticRegression, self).__init__(
            default_event_space_bijector=tfb.Identity(),
            event_shape=train_joint_dist.event_shape[0],
            dtype=train_joint_dist.dtype[0],
            name=name,
            pretty_name=pretty_name,
            sample_transformations=sample_transformations,
        )
Пример #15
0
def _make_asvi_trainable_variables(prior,
                                   mean_field=False,
                                   initial_prior_weight=0.5):
    """Generates parameter dictionaries given a prior distribution and list."""
    with tf.name_scope('make_asvi_trainable_variables'):
        param_dicts = []
        prior_dists = prior._get_single_sample_distributions()  # pylint: disable=protected-access
        for dist in prior_dists:
            original_dist = dist.distribution if isinstance(dist,
                                                            Root) else dist

            substituted_dist = _as_trainable_family(original_dist)

            # Grab the base distribution if it exists
            try:
                actual_dist = substituted_dist.distribution
            except AttributeError:
                actual_dist = substituted_dist

            new_params_dict = {}

            #  Build trainable ASVI representation for each distribution's parameters.
            parameter_properties = actual_dist.parameter_properties(
                dtype=actual_dist.dtype)

            if isinstance(original_dist, sample.Sample):
                posterior_batch_shape = ps.concat([
                    actual_dist.batch_shape_tensor(),
                    original_dist.sample_shape
                ],
                                                  axis=0)
            else:
                posterior_batch_shape = actual_dist.batch_shape_tensor()

            for param, value in actual_dist.parameters.items():

                if param in (_NON_STATISTICAL_PARAMS +
                             _NON_TRAINABLE_PARAMS) or value is None:
                    continue

                actual_event_shape = parameter_properties[param].shape_fn(
                    actual_dist.event_shape_tensor())
                try:
                    bijector = parameter_properties[
                        param].default_constraining_bijector_fn()
                except NotImplementedError:
                    bijector = tfb.Identity()

                if mean_field:
                    prior_weight = None
                else:
                    unconstrained_ones = tf.ones(shape=ps.concat([
                        posterior_batch_shape,
                        bijector.inverse_event_shape_tensor(actual_event_shape)
                    ],
                                                                 axis=0),
                                                 dtype=actual_dist.dtype)

                    prior_weight = tfp_util.TransformedVariable(
                        initial_prior_weight * unconstrained_ones,
                        bijector=tfb.Sigmoid(),
                        name='prior_weight/{}/{}'.format(dist.name, param))

                # If the prior distribution was a tfd.Sample wrapping a base
                # distribution, we want to give every single sample in the prior its
                # own lambda and alpha value (rather than having a single lambda and
                # alpha).
                if isinstance(original_dist, sample.Sample):
                    value = tf.reshape(
                        value,
                        ps.concat([
                            actual_dist.batch_shape_tensor(),
                            ps.ones(
                                ps.rank_from_shape(
                                    original_dist.sample_shape)),
                            actual_event_shape
                        ],
                                  axis=0))
                    value = tf.broadcast_to(
                        value,
                        ps.concat([posterior_batch_shape, actual_event_shape],
                                  axis=0))
                new_params_dict[param] = ASVIParameters(
                    prior_weight=prior_weight,
                    mean_field_parameter=tfp_util.TransformedVariable(
                        value,
                        bijector=bijector,
                        name='mean_field_parameter/{}/{}'.format(
                            dist.name, param)))

            param_dicts.append(new_params_dict)
    return param_dicts
Пример #16
0
    def EM_with_MCMC(self,
                     num_warmup_iters,
                     em_iters,
                     mcmc_samples,
                     num_leapfrog_steps,
                     initial_state=None,
                     learning_rate=0.01,
                     display_rate=200):

        Wmix = tf.Variable(self.Wmix, name='Wmix_cur')
        unc_noise_init = tf.math.log(tf.exp(self.noise) - 1)
        unc_noise = tf.Variable(unc_noise_init, name='unc_noise')

        # Setting up the step_size and targeted acceptance rate for the MCMC part
        step_size = tf.Variable(0.01, name='step_size')
        target_accept_rate = 0.651

        if initial_state == None:
            beta_init = 1.2 * tf.ones([self.n_latent, self.dim_input],
                                      dtype=tf.float32)
            varm_init = 0.8 * tf.ones([self.n_tasks, self.n_latent],
                                      dtype=tf.float32)
            loc_init = tf.zeros(self.n_tasks)
            varc_init = 1.0
        else:
            beta_init, varm_init, loc_init, varc_init = initial_state

        beta_cur = tf.Variable(beta_init, name='beta_cur', trainable=False)
        varm_cur = tf.Variable(varm_init, name='varm_cur', trainable=False)
        loc_cur = tf.Variable(loc_init, name='loc_cur', trainable=False)
        varc_cur = tf.Variable(varc_init, name='varc_cur', trainable=False)

        unconstraining_bijectors = [
            tfb.Softplus(),
            tfb.Softplus(),
            tfb.Identity(),
            tfb.Softplus()
        ]

        unnormalized_posterior_log_prob = lambda *args: self.joint_log_prob(
            tf.nn.softplus(unc_noise), Wmix, *args)

        current_state = [beta_cur, varm_cur, loc_cur, varc_cur]

        # Initializing a sampler for warmup:
        sampler = TransformedTransitionKernel(
            inner_kernel=HamiltonianMonteCarlo(
                target_log_prob_fn=unnormalized_posterior_log_prob,
                step_size=step_size,
                num_leapfrog_steps=num_leapfrog_steps),
            bijector=unconstraining_bijectors)

        # One step of the sampler
        [beta_next, varm_next, loc_next,
         varc_next], kernel_results = sampler.one_step(
             current_state=current_state,
             previous_kernel_results=sampler.bootstrap_results(current_state))

        # updating the step size
        step_size_update = step_size_simple_update(
            step_size,
            kernel_results,
            target_rate=target_accept_rate,
            decrement_multiplier=0.1,
            increment_multiplier=0.1)

        # Updating the state of the hyperparameters
        beta_update1 = beta_cur.assign(beta_next)
        varm_update1 = varm_cur.assign(varm_next)
        loc_update1 = loc_cur.assign(loc_next)
        varc_update1 = varc_cur.assign(varc_next)

        warmup_update = tf.group([
            beta_update1, varm_update1, loc_update1, varc_update1,
            step_size_update
        ])
        step_size_update2 = step_size.assign(0.95 * step_size)
        simple_update = tf.group(
            [beta_update1, varm_update1, loc_update1, varc_update1])

        # Set up E-step with MCMC
        [beta_probs, varm_probs, loc_probs,
         varc_probs], em_kernel_results = sample_chain(
             num_results=10,
             num_burnin_steps=0,
             current_state=current_state,
             kernel=TransformedTransitionKernel(
                 inner_kernel=HamiltonianMonteCarlo(
                     target_log_prob_fn=unnormalized_posterior_log_prob,
                     step_size=0.95 * step_size,
                     num_leapfrog_steps=num_leapfrog_steps),
                 bijector=unconstraining_bijectors))

        # Updating the state of the hyperparameters
        beta_update2 = beta_cur.assign(tf.reduce_mean(beta_probs, axis=0))
        varm_update2 = varm_cur.assign(tf.reduce_mean(varm_probs, axis=0))
        loc_update2 = loc_cur.assign(tf.reduce_mean(loc_probs, axis=0))
        varc_update2 = varc_cur.assign(tf.reduce_mean(varc_probs, axis=0))

        expectation_update = tf.group(
            [beta_update2, varm_update2, loc_update2, varc_update2])

        #-- Set up M-step (updating noise variance)
        with tf.control_dependencies([expectation_update]):
            loss = -self.joint_log_prob(tf.nn.softplus(unc_noise), Wmix, beta_cur, varm_cur, loc_cur, varc_cur) -self.rv_noise.log_prob(tf.nn.softplus(unc_noise)) \
                       -self.rv_Wmix.log_prob(Wmix)

            optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
            minimization_update = optimizer.minimize(loss)

        init = tf.global_variables_initializer()

        sess = tf.Session()
        sess.run(init)

        # Initial  warm-up stage
        print('First warm-up phase.')
        num_accepted = 0
        for t in range(num_warmup_iters):
            _, is_accepted_val = sess.run(
                [warmup_update, kernel_results.inner_results.is_accepted])
            num_accepted += is_accepted_val
            if (t % display_rate == 0) or (t == num_warmup_iters - 1):
                print(
                    "Warm-Up Iteration: {:>3} Acceptance Rate: {:.3f}".format(
                        t, num_accepted / (t + 1)))

        loss_history = np.zeros(em_iters)
        noise_history = np.zeros((em_iters, self.n_tasks))

        print('Estimating the noise variance: ')
        for t in range(em_iters):
            [_, _, unc_noise_, Wmix_, loss_] = sess.run([
                expectation_update, minimization_update, unc_noise, Wmix, loss
            ])
            loss_history[t] = loss_
            noise_history[t, :] = np.log(np.exp(unc_noise_) + 1)
            if (t % display_rate == 0) or (t == em_iters - 1):
                print("Iteration: {:>4} Loss: {:.3f}".format(t, loss_))

    # Second warmup phase
        print('Second warm-up phase.')
        num_accepted = 0
        for t in range(num_warmup_iters):
            _, is_accepted_val = sess.run(
                [warmup_update, kernel_results.inner_results.is_accepted])
            num_accepted += is_accepted_val
            if (t % display_rate == 0) or (t == num_warmup_iters - 1):
                print(
                    "Warm-Up Iteration: {:>3} Acceptance Rate: {:.3f}".format(
                        t, num_accepted / (t + 1)))

        step_size_ = sess.run(step_size)
        if step_size_ < 1e-4:
            warnings.warn("Estimated step size is low. (less than 1e-4)")

        print('Collecting samples for the GP hyperparameters.')
        sess.run(step_size_update2)
        loc_samples = np.zeros((mcmc_samples, self.n_tasks))
        varm_samples = np.zeros((mcmc_samples, self.n_tasks, self.n_latent))
        beta_samples = np.zeros((mcmc_samples, self.n_latent, self.dim_input))
        varc_samples = np.zeros(mcmc_samples)
        num_accepted = 0
        total_runs = 4 * mcmc_samples
        for t in range(total_runs):
            [
                _, is_accepted_val, loc_next_, varm_next_, beta_next_,
                varc_next_
            ] = sess.run([
                simple_update, kernel_results.inner_results.is_accepted,
                loc_next, varm_next, beta_next, varc_next
            ])
            if (t % 4 == 0):
                idx = t // 4
                loc_samples[idx, :] = loc_next_
                varm_samples[idx, :, :] = varm_next_
                beta_samples[idx, :, :] = beta_next_
                varc_samples[idx] = varc_next_
            num_accepted += is_accepted_val
            if (t % display_rate == 0) or (t == total_runs - 1):
                acceptance_rate = num_accepted / (t + 1)
                print(
                    "Sampling Iteration: {:>3} Acceptance Rate: {:.3f}".format(
                        t, acceptance_rate))
        self.noise = np.log(np.exp(unc_noise_) + 1)
        self.noise = tf.convert_to_tensor(self.noise, tf.float32)
        self.Wmix = tf.convert_to_tensor(Wmix_, tf.float32)
        hyperpar_samples = [
            loc_samples, varm_samples, beta_samples, varc_samples
        ]
        if acceptance_rate < 0.1:
            warnings.warn("Acceptance rate was low  (less than 0.1)")

        sess.close()

        return hyperpar_samples, loss_history, noise_history
Пример #17
0
    def mcmc(self,
             mcmc_samples,
             num_burnin_steps,
             step_size,
             initial_state=None,
             prev_kernel_results=None,
             num_leapfrog_steps=3):
        # Function to perform the sampling for the posterior distributions of the hyperparameters

        noise = self.noise
        Wmix = self.Wmix

        unnormalized_posterior_log_prob = lambda *args: self.joint_log_prob(
            noise, Wmix, *args)

        if initial_state == None:
            print('generating initial state')
            beta_init = 1.2 * tf.ones([self.n_latent, self.dim_input],
                                      dtype=tf.float32)
            varm_init = 0.8 * tf.ones([self.n_tasks, self.n_latent],
                                      dtype=tf.float32)
            loc_init = tf.zeros(self.n_tasks)
            varc_init = 1.0
            initial_state = [beta_init, varm_init, loc_init, varc_init]

        #------- Unconstrained representation---------
        unconstraining_bijectors = [
            tfb.Softplus(),
            tfb.Softplus(),
            tfb.Identity(),
            tfb.Softplus()
        ]

        #----Setting up the mcmc sampler
        [beta_probs, varm_probs, loc_probs,
         varc_probs], kernel_results = sample_chain(
             num_results=mcmc_samples,
             num_burnin_steps=num_burnin_steps,
             num_steps_between_results=4,
             current_state=initial_state,
             previous_kernel_results=prev_kernel_results,
             kernel=TransformedTransitionKernel(
                 inner_kernel=HamiltonianMonteCarlo(
                     target_log_prob_fn=unnormalized_posterior_log_prob,
                     step_size=step_size,
                     num_leapfrog_steps=num_leapfrog_steps),
                 bijector=unconstraining_bijectors))

        acceptance_rate = tf.reduce_mean(
            tf.to_float(kernel_results.inner_results.is_accepted))

        with tf.Session() as sess:
            [
                acceptance_rate_, loc_probs_, varm_probs_, beta_probs_,
                varc_probs_
            ] = sess.run([
                acceptance_rate, loc_probs, varm_probs, beta_probs, varc_probs
            ])

        print('acceptance_rate:', acceptance_rate_)
        hyperpar_samples = [loc_probs_, varm_probs_, beta_probs_, varc_probs_]
        return hyperpar_samples, acceptance_rate_
Пример #18
0
    def warmup(self,
               initial_state=None,
               num_warmup_iters=1000,
               num_leapfrog_steps=3,
               display_rate=500):
        # function to generate an adaptive step size that will be needed for
        # HMC sampling
        noise = self.noise
        Wmix = self.Wmix

        if initial_state == None:
            beta_init = 1.2 * tf.ones([self.n_latent, self.dim_input],
                                      dtype=tf.float32)
            varm_init = 0.8 * tf.ones([self.n_tasks, self.n_latent],
                                      dtype=tf.float32)
            loc_init = tf.zeros(self.n_tasks)
            varc_init = 1.0
        else:
            beta_init, varm_init, loc_init, varc_init = initial_state

        unnormalized_posterior_log_prob = lambda *args: self.joint_log_prob(
            noise, Wmix, *args)

        #------- Unconstrained representation---------
        unconstraining_bijectors = [
            tfb.Softplus(),
            tfb.Softplus(),
            tfb.Identity(),
            tfb.Softplus()
        ]

        target_accept_rate = 0.651

        # Setting up the step_size
        step_size = tf.Variable(0.01, name='step_size')

        beta_cur = tf.Variable(beta_init, name='beta_cur')
        varm_cur = tf.Variable(varm_init, name='varm_cur')
        loc_cur = tf.Variable(loc_init, name='loc_cur')
        varc_cur = tf.Variable(varc_init, name='varc_cur')

        current_state = [beta_cur, varm_cur, loc_cur, varc_cur]

        # Initializing the sampler
        sampler = TransformedTransitionKernel(
            inner_kernel=HamiltonianMonteCarlo(
                target_log_prob_fn=unnormalized_posterior_log_prob,
                step_size=step_size,
                num_leapfrog_steps=num_leapfrog_steps),
            bijector=unconstraining_bijectors)

        # One step of the sampler
        [beta_next, varm_next, loc_next,
         varc_next], kernel_results = sampler.one_step(
             current_state=current_state,
             previous_kernel_results=sampler.bootstrap_results(current_state))

        # updating the step size
        step_size_update = step_size_simple_update(
            step_size,
            kernel_results,
            target_rate=target_accept_rate,
            decrement_multiplier=0.1,
            increment_multiplier=0.1)

        # Updating the state
        beta_update = beta_cur.assign(beta_next)
        varm_update = varm_cur.assign(varm_next)
        loc_update = loc_cur.assign(loc_next)
        varc_update = varc_cur.assign(varc_next)

        warmup_update = tf.group([
            beta_update, varm_update, loc_update, varc_update, step_size_update
        ])

        init = tf.global_variables_initializer()

        with tf.Session() as sess:
            sess.run(init)
            print('Warmup: ')
            num_accepted = 0
            for t in range(num_warmup_iters):
                _, is_accepted_val = sess.run(
                    [warmup_update, kernel_results.inner_results.is_accepted])
                num_accepted += is_accepted_val
                if (t % display_rate == 0) or (t == num_warmup_iters - 1):
                    print("Warm-Up Iteration: {:>3} Acceptance Rate: {:.3f}".
                          format(t, num_accepted / (t + 1)))
            [step_size_, beta_next_, varm_next_, loc_next_,
             varc_next_] = sess.run(
                 [step_size, beta_next, varm_next, loc_next, varc_next])
        next_state = [beta_next_, varm_next_, loc_next_, varc_next_]

        return step_size_, next_state
Пример #19
0
class BatchShapeInferenceTests(test_util.TestCase):

  @parameterized.named_parameters(
      {'testcase_name': '_trivial',
       'value_fn': lambda: tfd.Normal(loc=0., scale=1.),
       'expected_batch_shape': []},
      {'testcase_name': '_simple_tensor_broadcasting',
       'value_fn': lambda: tfd.MultivariateNormalDiag(  # pylint: disable=g-long-lambda
           loc=[0., 0.], scale_diag=tf.convert_to_tensor([[1., 1.], [1., 1.]])),
       'expected_batch_shape': [2]},
      {'testcase_name': '_rank_deficient_tensor_broadcasting',
       'value_fn': lambda: tfd.MultivariateNormalDiag(  # pylint: disable=g-long-lambda
           loc=0., scale_diag=tf.convert_to_tensor([[1., 1.], [1., 1.]])),
       'expected_batch_shape': [2]},
      {'testcase_name': '_mixture_same_family',
       'value_fn': lambda: tfd.MixtureSameFamily(  # pylint: disable=g-long-lambda
           mixture_distribution=tfd.Categorical(
               logits=[[[1., 2., 3.],
                        [4., 5., 6.]]]),
           components_distribution=tfd.Normal(loc=0.,
                                              scale=[[[1., 2., 3.],
                                                      [4., 5., 6.]]])),
       'expected_batch_shape': [1, 2]},
      {'testcase_name': '_deeply_nested',
       'value_fn': lambda: tfd.Independent(  # pylint: disable=g-long-lambda
           tfd.Independent(
               tfd.Independent(
                   tfd.Independent(
                       tfd.Normal(loc=0., scale=[[[[[[[[1.]]]]]]]]),
                       reinterpreted_batch_ndims=2),
                   reinterpreted_batch_ndims=0),
               reinterpreted_batch_ndims=1),
           reinterpreted_batch_ndims=1),
       'expected_batch_shape': [1, 1, 1, 1]})
  def test_batch_shape_inference_is_correct(
      self, value_fn, expected_batch_shape):
    value = value_fn()  # Defer construction until we're in the right graph.
    self.assertAllEqual(
        expected_batch_shape,
        value.batch_shape_tensor())

    batch_shape = value.batch_shape
    self.assertIsInstance(batch_shape, tf.TensorShape)
    self.assertTrue(
        batch_shape.is_compatible_with(expected_batch_shape))

  def assert_all_parameters_have_full_batch_shape(
      self, dist, expected_batch_shape):
    self.assertAllEqual(expected_batch_shape, dist.batch_shape_tensor())
    param_batch_shapes = batch_shape_lib.batch_shape_parts(dist)
    for param_batch_shape in param_batch_shapes.values():
      self.assertAllEqual(expected_batch_shape, param_batch_shape)

  @parameterized.named_parameters(
      {'testcase_name': '_trivial',
       'dist_fn': lambda: tfd.Normal(loc=0., scale=1.)},
      {'testcase_name': '_simple_tensor_broadcasting',
       'dist_fn': lambda: tfd.MultivariateNormalDiag(  # pylint: disable=g-long-lambda
           loc=[0., 0.],
           scale_diag=[[1., 1.], [1., 1.]])},
      {'testcase_name': '_rank_deficient_tensor_broadcasting',
       'dist_fn': lambda: tfd.MultivariateNormalDiag(  # pylint: disable=g-long-lambda
           loc=0.,
           scale_diag=[[1., 1.], [1., 1.]])},
      {'testcase_name': '_deeply_nested',
       'dist_fn': lambda: tfd.Independent(  # pylint: disable=g-long-lambda
           tfd.Independent(
               tfd.Independent(
                   tfd.Independent(
                       tfd.Normal(loc=0.,
                                  scale=[[[[[[[[1.]]]]]]]]),
                       reinterpreted_batch_ndims=2),
                   reinterpreted_batch_ndims=0),
               reinterpreted_batch_ndims=1),
           reinterpreted_batch_ndims=1)},
      {'testcase_name': '_transformed_dist_simple',
       'dist_fn': lambda: tfd.TransformedDistribution(  # pylint: disable=g-long-lambda
           tfd.Normal(loc=[[1., 2., 3.], [3., 4., 5.]], scale=[1.]),
           tfb.Scale(scale=[2., 3., 4.]))},
      {'testcase_name': '_transformed_dist_with_chain',
       'dist_fn': lambda: tfd.TransformedDistribution(  # pylint: disable=g-long-lambda
           tfd.Normal(loc=[[1., 2., 3.], [3., 4., 5.]], scale=[1.]),
           tfb.Shift(-4.)(tfb.Scale(scale=[2., 3., 4.])))},
      {'testcase_name': '_transformed_dist_multipart_nested',
       'dist_fn': lambda: tfd.TransformedDistribution(  # pylint: disable=g-long-lambda
           tfd.TransformedDistribution(
               tfd.TransformedDistribution(
                   tfd.MultivariateNormalDiag(tf.zeros([4, 6]), tf.ones([6])),
                   tfb.Split([3, 3])),
               tfb.JointMap([tfb.Identity(), tfb.Reshape([3, 1])])),
           tfb.JointMap([tfb.Scale(scale=[2., 3., 4.]), tfb.Shift(1.)]))}
      )
  def test_batch_broadcasting(self, dist_fn):
    dist = dist_fn()
    broadcast_dist = dist._broadcast_parameters_with_batch_shape(
        dist.batch_shape)
    self.assert_all_parameters_have_full_batch_shape(
        broadcast_dist,
        expected_batch_shape=broadcast_dist.batch_shape_tensor())

    expanded_batch_shape = ps.concat([[7, 4], dist.batch_shape], axis=0)
    broadcast_params = batch_shape_lib.broadcast_parameters_with_batch_shape(
        dist, expanded_batch_shape)
    broadcast_dist = dist.copy(**broadcast_params)
    self.assert_all_parameters_have_full_batch_shape(
        broadcast_dist,
        expected_batch_shape=expanded_batch_shape)
Пример #20
0
    def __init__(
        self,
        ndims=10,
        name='neals_funnel',
        pretty_name='Neal\'s Funnel',
    ):
        """Construct the Neal's funnel model.

    Args:
      ndims: Python integer. Dimensionality of the distribution. Must be at
        least 2.
      name: Python `str` name prefixed to Ops created by this class.
      pretty_name: A Python `str`. The pretty name of this model.

    Raises:
      ValueError: If ndims < 2.
    """
        if ndims < 2:
            raise ValueError('ndims must be at least 2, saw: {}'.format(ndims))

        with tf.name_scope(name):

            def bijector_fn(x):
                """Funnel transform."""
                batch_shape = ps.shape(x)[:-1]
                scale = tf.concat(
                    [
                        tf.ones(ps.concat([batch_shape, [1]], axis=0)),
                        tf.exp(x[..., :1] / 2) *
                        tf.ones(ps.concat([batch_shape, [ndims - 1]], axis=0)),
                    ],
                    axis=-1,
                )
                return tfb.Scale(scale)

            mg = tfd.MultivariateNormalDiag(loc=tf.zeros(ndims),
                                            scale_diag=[3.] + [1.] *
                                            (ndims - 1))
            funnel = tfd.TransformedDistribution(
                mg,
                bijector=tfb.MaskedAutoregressiveFlow(bijector_fn=bijector_fn))

            sample_transformations = {
                'identity':
                model.Model.SampleTransformation(
                    fn=lambda params: params,
                    pretty_name='Identity',
                    # The trailing dimensions come from a product distribution of
                    # independent standard normal and a log-normal with a scale of
                    # 3 / 2. See
                    # https://en.wikipedia.org/wiki/Product_distribution for the
                    # formulas. For the mean, the formulas yield zero.
                    ground_truth_mean=np.zeros(ndims),
                    # For the standard deviation, all means are zero and standard
                    # deivations of the normals are 1, so the formula reduces to
                    # `sqrt((sigma_log_normal + mean_log_normal**2))` which
                    # reduces to `exp((sigma_log_normal)**2)`.
                    ground_truth_standard_deviation=np.array(
                        [3.] + [np.exp((3. / 2)**2)] * (ndims - 1)),
                )
            }

        self._funnel = funnel

        super(NealsFunnel, self).__init__(
            default_event_space_bijector=tfb.Identity(),
            event_shape=funnel.event_shape,
            dtype=funnel.dtype,
            name=name,
            pretty_name=pretty_name,
            sample_transformations=sample_transformations,
        )
  def __init__(self,
               distribution,
               bijector=None,
               batch_shape=None,
               event_shape=None,
               validate_args=False,
               name=None):
    """Construct a Transformed Distribution.

    Args:
      distribution: The base distribution instance to transform. Typically an
        instance of `Distribution`.
      bijector: The object responsible for calculating the transformation.
        Typically an instance of `Bijector`. `None` means `Identity()`.
      batch_shape: `integer` vector `Tensor` which overrides `distribution`
        `batch_shape`; valid only if `distribution.is_scalar_batch()`.
      event_shape: `integer` vector `Tensor` which overrides `distribution`
        `event_shape`; valid only if `distribution.is_scalar_event()`.
      validate_args: Python `bool`, default `False`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may silently render incorrect
        outputs.
      name: Python `str` name prefixed to Ops created by this class. Default:
        `bijector.name + distribution.name`.
    """
    parameters = dict(locals())
    name = name or (("" if bijector is None else bijector.name) +
                    distribution.name)
    with tf.name_scope(name, values=[event_shape, batch_shape]) as name:
      # For convenience we define some handy constants.
      self._zero = tf.constant(0, dtype=tf.int32, name="zero")
      self._empty = tf.constant([], dtype=tf.int32, name="empty")

      if bijector is None:
        bijector = tfb.Identity(validate_args=validate_args)

      # We will keep track of a static and dynamic version of
      # self._is_{batch,event}_override. This way we can do more prior to graph
      # execution, including possibly raising Python exceptions.

      self._override_batch_shape = self._maybe_validate_shape_override(
          batch_shape, distribution.is_scalar_batch(), validate_args,
          "batch_shape")
      self._is_batch_override = _logical_not(_logical_equal(
          _ndims_from_shape(self._override_batch_shape), self._zero))
      self._is_maybe_batch_override = bool(
          tensor_util.constant_value(self._override_batch_shape) is None or
          tensor_util.constant_value(self._override_batch_shape).size != 0)

      self._override_event_shape = self._maybe_validate_shape_override(
          event_shape, distribution.is_scalar_event(), validate_args,
          "event_shape")
      self._is_event_override = _logical_not(_logical_equal(
          _ndims_from_shape(self._override_event_shape), self._zero))
      self._is_maybe_event_override = bool(
          tensor_util.constant_value(self._override_event_shape) is None or
          tensor_util.constant_value(self._override_event_shape).size != 0)

      # To convert a scalar distribution into a multivariate distribution we
      # will draw dims from the sample dims, which are otherwise iid. This is
      # easy to do except in the case that the base distribution has batch dims
      # and we're overriding event shape. When that case happens the event dims
      # will incorrectly be to the left of the batch dims. In this case we'll
      # cyclically permute left the new dims.
      self._needs_rotation = _logical_and(
          self._is_event_override,
          _logical_not(self._is_batch_override),
          _logical_not(distribution.is_scalar_batch()))
      override_event_ndims = _ndims_from_shape(self._override_event_shape)
      self._rotate_ndims = _pick_scalar_condition(
          self._needs_rotation, override_event_ndims, 0)
      # We'll be reducing the head dims (if at all), i.e., this will be []
      # if we don't need to reduce.
      self._reduce_event_indices = tf.range(
          self._rotate_ndims - override_event_ndims, self._rotate_ndims)

    self._distribution = distribution
    self._bijector = bijector
    super(TransformedDistribution, self).__init__(
        dtype=self._distribution.dtype,
        reparameterization_type=self._distribution.reparameterization_type,
        validate_args=validate_args,
        allow_nan_stats=self._distribution.allow_nan_stats,
        parameters=parameters,
        # We let TransformedDistribution access _graph_parents since this class
        # is more like a baseclass than derived.
        graph_parents=(distribution._graph_parents +  # pylint: disable=protected-access
                       bijector.graph_parents),
        name=name)
Пример #22
0
 def _default_event_space_bijector(self):
     return tfb.Identity()
Пример #23
0
    def __init__(
        self,
        train_features,
        train_labels,
        test_features=None,
        test_labels=None,
        name='logistic_regression',
        pretty_name='Logistic Regression',
    ):
        """Construct the logistic regression model.

    Args:
      train_features: Floating-point `Tensor` with shape `[num_train_points,
        num_features]`. Training features.
      train_labels: Integer `Tensor` with shape `[num_train_points]`. Training
        labels.
      test_features: Floating-point `Tensor` with shape `[num_test_points,
        num_features]`. Testing features. Can be `None`, in which case
        test-related sample transformations are not computed.
      test_labels: Integer `Tensor` with shape `[num_test_points]`. Testing
        labels. Can be `None`, in which case test-related sample transformations
        are not computed.
      name: Python `str` name prefixed to Ops created by this class.
      pretty_name: A Python `str`. The pretty name of this model.

    Raises:
      ValueError: If `test_features` and `test_labels` are either not both
        `None` or not both specified.
    """
        with tf.name_scope(name):
            train_features = tf.convert_to_tensor(train_features, tf.float32)
            train_features = _add_bias(train_features)
            train_labels = tf.convert_to_tensor(train_labels)
            num_features = int(train_features.shape[1])

            self._prior_dist = tfd.Sample(tfd.Normal(0., 1.), num_features)

            def log_likelihood_fn(weights, features, labels, reduce_sum=True):
                """The log_likelihood function."""
                logits = tf.einsum('nd,...d->...n', features, weights)
                log_likelihood = tfd.Bernoulli(logits=logits).log_prob(labels)
                if reduce_sum:
                    return tf.reduce_sum(log_likelihood, [-1])
                else:
                    return log_likelihood

            self._train_log_likelihood_fn = functools.partial(
                log_likelihood_fn,
                features=train_features,
                labels=train_labels)

            sample_transformations = {
                'identity':
                model.Model.SampleTransformation(
                    fn=lambda params: params,
                    pretty_name='Identity',
                )
            }
            if (test_features is not None) != (test_labels is not None):
                raise ValueError(
                    '`test_features` and `test_labels` must either both '
                    'be `None` or both specified. Got: test_features={}, '
                    'test_labels={}'.format(test_features, test_labels))

            if test_features is not None and test_labels is not None:
                test_features = tf.convert_to_tensor(test_features, tf.float32)
                test_features = _add_bias(test_features)
                test_labels = tf.convert_to_tensor(test_labels)
                test_log_likelihood_fn = functools.partial(
                    log_likelihood_fn,
                    features=test_features,
                    labels=test_labels)

                sample_transformations['test_nll'] = (
                    model.Model.SampleTransformation(
                        fn=test_log_likelihood_fn,
                        pretty_name='Test NLL',
                    ))
                sample_transformations['per_example_test_nll'] = (
                    model.Model.SampleTransformation(
                        fn=functools.partial(test_log_likelihood_fn,
                                             reduce_sum=False),
                        pretty_name='Per-example Test NLL',
                    ))

        super(LogisticRegression, self).__init__(
            default_event_space_bijector=tfb.Identity(),
            event_shape=self._prior_dist.event_shape,
            dtype=self._prior_dist.dtype,
            name=name,
            pretty_name=pretty_name,
            sample_transformations=sample_transformations,
        )
Пример #24
0
    def __init__(self,
                 train_features,
                 train_labels,
                 test_features=None,
                 test_labels=None,
                 name='sparse_logistic_regression',
                 pretty_name='Sparse Logistic Regression'):
        """Construct the sparse logistic regression model.

    Args:
      train_features: Floating-point `Tensor` with shape `[num_train_points,
        num_features]`. Training features.
      train_labels: Integer `Tensor` with shape `[num_train_points]`. Training
        labels.
      test_features: Floating-point `Tensor` with shape `[num_test_points,
        num_features]`. Testing features. Can be `None`, in which case
        test-related sample transformations are not computed.
      test_labels: Integer `Tensor` with shape `[num_test_points]`. Testing
        labels. Can be `None`, in which case test-related sample transformations
        are not computed.
      name: Python `str` name prefixed to Ops created by this class.
      pretty_name: A Python `str`. The pretty name of this model.

    Raises:
      ValueError: If `test_features` and `test_labels` are either not both
        `None` or not both specified.
    """
        with tf.name_scope(name):
            train_features = _add_bias(train_features)
            train_labels = tf.convert_to_tensor(train_labels)
            num_features = int(train_features.shape[1])

            root = tfd.JointDistributionCoroutine.Root
            zero = tf.zeros(num_features)
            one = tf.ones(num_features)
            half = tf.fill([num_features], 0.5)

            def model_fn(features):
                """Model definition."""
                unscaled_weights = yield root(
                    tfd.Independent(tfd.Normal(zero, one),
                                    1,
                                    name='unscaled_weights'))
                local_scales = yield root(
                    tfd.Independent(tfd.Gamma(half, half),
                                    1,
                                    name='local_scales'))
                global_scale = yield root(
                    tfd.Gamma(0.5, 0.5, name='global_scale'))

                weights = unscaled_weights * local_scales * global_scale[
                    ..., tf.newaxis]

                logits = tf.einsum('nd,...d->...n', features, weights)
                yield tfd.Independent(tfd.Bernoulli(logits=logits),
                                      1,
                                      name='labels')

            train_joint_dist = tfd.JointDistributionCoroutine(
                functools.partial(model_fn, features=train_features))

            sample_transformations = {
                'identity':
                bayesian_model.BayesianModel.SampleTransformation(
                    fn=lambda params: params,
                    pretty_name='Identity',
                )
            }
            if (test_features is not None) != (test_labels is not None):
                raise ValueError(
                    '`test_features` and `test_labels` must either both '
                    'be `None` or both specified. Got: test_features={}, '
                    'test_labels={}'.format(test_features, test_labels))

            if test_features is not None and test_labels is not None:
                test_features = _add_bias(test_features)
                test_labels = tf.convert_to_tensor(test_labels)
                test_joint_dist = tfd.JointDistributionCoroutine(
                    functools.partial(model_fn, features=test_features))

                def _get_label_dist(params):
                    # TODO(b/150897904): The seed does nothing since the model is fully
                    # conditioned.
                    distributions, _ = test_joint_dist.sample_distributions(
                        value=self._dict_to_tuple(params) + (test_labels, ),
                        seed=42)
                    return distributions[-1]

                sample_transformations['test_nll'] = (
                    bayesian_model.BayesianModel.SampleTransformation(
                        fn=lambda params: -(  # pylint: disable=g-long-lambda
                            _get_label_dist(params).log_prob(test_labels)),
                        pretty_name='Test NLL',
                    ))
                sample_transformations['per_example_test_nll'] = (
                    bayesian_model.BayesianModel.SampleTransformation(
                        fn=lambda params: -(  # pylint: disable=g-long-lambda
                            _get_label_dist(params).distribution.log_prob(
                                test_labels)),
                        pretty_name='Per-example Test NLL',
                    ))

        self._train_joint_dist = train_joint_dist
        self._train_labels = train_labels

        super(SparseLogisticRegression, self).__init__(
            default_event_space_bijector=self._tuple_to_dict(
                (tfb.Identity(), tfb.Exp(), tfb.Exp())),
            event_shape=self._tuple_to_dict(train_joint_dist.event_shape[:-1]),
            dtype=self._tuple_to_dict(train_joint_dist.dtype[:-1]),
            name=name,
            pretty_name=pretty_name,
            sample_transformations=sample_transformations,
        )
 def spline_flow():
   stack = tfb.Identity()
   for i in range(nsplits):
     stack = tfb.RealNVP(5 * i, bijector_fn=splines[i])(stack)
   return stack
Пример #26
0
 def testComposeFromTDSubclassWithAlternateCtorArgs(self):
   # This line used to raise an exception.
   tfb.Identity()(tfd.Chi(df=1., allow_nan_stats=True))
Пример #27
0
  def __init__(self,
               design_matrix,
               weights_prior=None,
               name=None):
    """Specify a linear regression model.

    Note: the statistical behavior of the regression is determined by
    the broadcasting behavior of the `weights` `Tensor`:

    * `weights_prior.batch_shape == []`: shares a single set of weights across
      all design matrices and observed time series. This may make sense if
      the features in each design matrix have the same semantics (e.g.,
      grouping observations by country, with per-country design matrices
      capturing the same set of national economic indicators per country).
    * `weights_prior.batch_shape == `design_matrix.batch_shape`: fits separate
      weights for each design matrix. If there are multiple observed time series
      for each design matrix, this shares statistical strength over those
      observations.
    * `weights_prior.batch_shape == `observed_time_series.batch_shape`: fits a
      separate regression for each individual time series.

    When modeling batches of time series, you should think carefully about
    which behavior makes sense, and specify `weights_prior` accordingly:
    the defaults may not do what you want!

    Args:
      design_matrix: float `Tensor` of shape `concat([batch_shape,
        [num_timesteps, num_features]])`. This may also optionally be
        an instance of `tf.linalg.LinearOperator`.
      weights_prior: `tfd.Distribution` representing a prior over the regression
        weights. Must have event shape `[num_features]` and batch shape
        broadcastable to the design matrix's `batch_shape`. Alternately,
        `event_shape` may be scalar (`[]`), in which case the prior is
        internally broadcast as `TransformedDistribution(weights_prior,
        tfb.Identity(), event_shape=[num_features],
        batch_shape=design_matrix.batch_shape)`. If `None`,
        defaults to `StudentT(df=5, loc=0., scale=10.)`, a weakly-informative
        prior loosely inspired by the [Stan prior choice recommendations](
        https://github.com/stan-dev/stan/wiki/Prior-Choice-Recommendations).
        Default value: `None`.
      name: the name of this model component.
        Default value: 'LinearRegression'.
    """
    with tf.compat.v1.name_scope(
        name, 'LinearRegression', values=[design_matrix]) as name:

      if not isinstance(design_matrix, tfl.LinearOperator):
        design_matrix = tfl.LinearOperatorFullMatrix(
            tf.convert_to_tensor(value=design_matrix, name='design_matrix'),
            name='design_matrix_linop')

      if tf.compat.dimension_value(design_matrix.shape[-1]) is not None:
        num_features = design_matrix.shape[-1]
      else:
        num_features = design_matrix.shape_tensor()[-1]

      # Default to a weakly-informative StudentT(df=5, 0., 10.) prior.
      if weights_prior is None:
        weights_prior = tfd.StudentT(
            df=5,
            loc=tf.zeros([], dtype=design_matrix.dtype),
            scale=10 * tf.ones([], dtype=design_matrix.dtype))
      # Sugar: if prior is static scalar, broadcast it to a default shape.
      if weights_prior.event_shape.ndims == 0:
        if design_matrix.batch_shape.is_fully_defined():
          design_matrix_batch_shape_ = design_matrix.batch_shape
        else:
          design_matrix_batch_shape_ = design_matrix.batch_shape_tensor()
        weights_prior = tfd.TransformedDistribution(
            weights_prior,
            bijector=tfb.Identity(),
            batch_shape=design_matrix_batch_shape_,
            event_shape=[num_features])

      tf.debugging.assert_same_float_dtype([design_matrix, weights_prior])

      self._design_matrix = design_matrix

      super(LinearRegression, self).__init__(
          parameters=[
              Parameter('weights', weights_prior, tfb.Identity()),
          ],
          latent_size=0,
          name=name)
Пример #28
0
    def __init__(
        self,
        ndims=100,
        gamma_shape_parameter=0.5,
        max_eigvalue=None,
        seed=10,
        name='ill_conditioned_gaussian',
        pretty_name='Ill-Conditioned Gaussian',
    ):
        """Construct the ill-conditioned Gaussian.

    Args:
      ndims: Python `int`. Dimensionality of the Gaussian.
      gamma_shape_parameter: Python `float`. The shape parameter of the inverse
        Gamma distribution. Anything below 2 is likely to yield poorly
        conditioned covariance matrices.
      max_eigvalue: Python `float`. If set, will normalize the eigenvalues such
        that the maximum is this value.
      seed: Seed to use when generating the eigenvalues and the random
        orthogonal matrix.
      name: Python `str` name prefixed to Ops created by this class.
      pretty_name: A Python `str`. The pretty name of this model.
    """
        rng = np.random.RandomState(seed=seed & (2**32 - 1))
        eigenvalues = 1. / np.sort(
            rng.gamma(shape=gamma_shape_parameter, scale=1., size=ndims))
        if max_eigvalue is not None:
            eigenvalues *= max_eigvalue / eigenvalues.max()

        q, r = np.linalg.qr(rng.randn(ndims, ndims))
        q *= np.sign(np.diag(r))

        covariance = (q * eigenvalues).dot(q.T)

        gaussian = tfd.MultivariateNormalTriL(loc=tf.zeros(ndims),
                                              scale_tril=tf.linalg.cholesky(
                                                  tf.convert_to_tensor(
                                                      covariance,
                                                      dtype=tf.float32)))
        self._eigenvalues = eigenvalues

        sample_transformations = {
            'identity':
            model.Model.SampleTransformation(
                fn=lambda params: params,
                pretty_name='Identity',
                ground_truth_mean=np.zeros(ndims),
                ground_truth_standard_deviation=np.sqrt(np.diag(covariance)),
            )
        }

        self._gaussian = gaussian

        super(IllConditionedGaussian, self).__init__(
            default_event_space_bijector=tfb.Identity(),
            event_shape=gaussian.event_shape,
            dtype=gaussian.dtype,
            name=name,
            pretty_name=pretty_name,
            sample_transformations=sample_transformations,
        )
Пример #29
0
    def __init__(self,
                 level_scale_prior=None,
                 slope_mean_prior=None,
                 slope_scale_prior=None,
                 autoregressive_coef_prior=None,
                 initial_level_prior=None,
                 initial_slope_prior=None,
                 observed_time_series=None,
                 constrain_ar_coef_stationary=True,
                 constrain_ar_coef_positive=False,
                 name=None):
        """Specify a semi-local linear trend model.

    Args:
      level_scale_prior: optional `tfd.Distribution` instance specifying a prior
        on the `level_scale` parameter. If `None`, a heuristic default prior is
        constructed based on the provided `observed_time_series`.
        Default value: `None`.
      slope_mean_prior: optional `tfd.Distribution` instance specifying a prior
        on the `slope_mean` parameter. If `None`, a heuristic default prior is
        constructed based on the provided `observed_time_series`.
        Default value: `None`.
      slope_scale_prior: optional `tfd.Distribution` instance specifying a prior
        on the `slope_scale` parameter. If `None`, a heuristic default prior is
        constructed based on the provided `observed_time_series`.
        Default value: `None`.
      autoregressive_coef_prior: optional `tfd.Distribution` instance specifying
        a prior on the `autoregressive_coef` parameter. If `None`, the default
        prior is a standard `Normal(0., 1.)`. Note that the prior may be
        implicitly truncated by `constrain_ar_coef_stationary` and/or
        `constrain_ar_coef_positive`.
        Default value: `None`.
      initial_level_prior: optional `tfd.Distribution` instance specifying a
        prior on the initial level. If `None`, a heuristic default prior is
        constructed based on the provided `observed_time_series`.
        Default value: `None`.
      initial_slope_prior: optional `tfd.Distribution` instance specifying a
        prior on the initial slope. If `None`, a heuristic default prior is
        constructed based on the provided `observed_time_series`.
        Default value: `None`.
      observed_time_series: optional `float` `Tensor` of shape
        `batch_shape + [T, 1]` (omitting the trailing unit dimension is also
        supported when `T > 1`), specifying an observed time series.
        Any priors not explicitly set will be given default values according to
        the scale of the observed time series (or batch of time series). May
        optionally be an instance of `tfp.sts.MaskedTimeSeries`, which includes
        a mask `Tensor` to specify timesteps with missing observations.
        Default value: `None`.
      constrain_ar_coef_stationary: if `True`, perform inference using a
        parameterization that restricts `autoregressive_coef` to the interval
        `(-1, 1)`, or `(0, 1)` if `force_positive_ar_coef` is also `True`,
        corresponding to stationary processes. This will implicitly truncates
        the support of `autoregressive_coef_prior`.
        Default value: `True`.
      constrain_ar_coef_positive: if `True`, perform inference using a
        parameterization that restricts `autoregressive_coef` to be positive,
        or in `(0, 1)` if `constrain_ar_coef_stationary` is also `True`. This
        will implicitly truncate the support of `autoregressive_coef_prior`.
        Default value: `False`.
      name: the name of this model component.
        Default value: 'SemiLocalLinearTrend'.
    """

        with tf.name_scope(name or 'SemiLocalLinearTrend') as name:
            if observed_time_series is not None:
                _, observed_stddev, observed_initial = sts_util.empirical_statistics(
                    observed_time_series)
            else:
                observed_stddev, observed_initial = 1., 0.

            # Heuristic default priors. Overriding these may dramatically
            # change inference performance and results.
            if level_scale_prior is None:
                level_scale_prior = tfd.LogNormal(loc=tf.math.log(
                    .01 * observed_stddev),
                                                  scale=2.)
            if slope_mean_prior is None:
                slope_mean_prior = tfd.Normal(loc=0., scale=observed_stddev)
            if slope_scale_prior is None:
                slope_scale_prior = tfd.LogNormal(loc=tf.math.log(
                    .01 * observed_stddev),
                                                  scale=2.)
            if autoregressive_coef_prior is None:
                autoregressive_coef_prior = tfd.Normal(
                    loc=0., scale=tf.ones_like(observed_initial))
            if initial_level_prior is None:
                initial_level_prior = tfd.Normal(
                    loc=observed_initial,
                    scale=tf.abs(observed_initial) + observed_stddev)
            if initial_slope_prior is None:
                initial_slope_prior = tfd.Normal(loc=0., scale=observed_stddev)

            self._initial_state_prior = tfd.MultivariateNormalDiag(
                loc=tf.stack(
                    [initial_level_prior.mean(),
                     initial_slope_prior.mean()],
                    axis=-1),
                scale_diag=tf.stack([
                    initial_level_prior.stddev(),
                    initial_slope_prior.stddev()
                ],
                                    axis=-1))

            # Constrain the support of the autoregressive coefficient.
            if constrain_ar_coef_stationary and constrain_ar_coef_positive:
                autoregressive_coef_bijector = tfb.Sigmoid(
                )  # support in (0, 1)
            elif constrain_ar_coef_positive:
                autoregressive_coef_bijector = tfb.Softplus(
                )  # support in (0, infty)
            elif constrain_ar_coef_stationary:
                autoregressive_coef_bijector = tfb.Tanh()  # support in (-1, 1)
            else:
                autoregressive_coef_bijector = tfb.Identity()  # unconstrained

            stddev_preconditioner = tfb.Scale(scale=observed_stddev)
            scaled_softplus = tfb.Chain(
                [stddev_preconditioner, tfb.Softplus()])
            super(SemiLocalLinearTrend, self).__init__(parameters=[
                Parameter('level_scale', level_scale_prior, scaled_softplus),
                Parameter('slope_mean', slope_mean_prior,
                          stddev_preconditioner),
                Parameter('slope_scale', slope_scale_prior, scaled_softplus),
                Parameter('autoregressive_coef', autoregressive_coef_prior,
                          autoregressive_coef_bijector),
            ],
                                                       latent_size=2,
                                                       name=name)
Пример #30
0
  def __init__(
      self,
      ndims=2,
      curvature=0.03,
      name='banana',
      pretty_name='Banana',
  ):
    """Construct the banana model.

    Args:
      ndims: Python integer. Dimensionality of the distribution. Must be at
        least 2.
      curvature: Python float. Controls the strength of the curvature of
        the distribution.
      name: Python `str` name prefixed to Ops created by this class.
      pretty_name: A Python `str`. The pretty name of this model.

    Raises:
      ValueError: If ndims < 2.
    """
    if ndims < 2:
      raise ValueError('ndims must be at least 2, saw: {}'.format(ndims))

    with tf.name_scope(name):

      def bijector_fn(x):
        """Banana transform."""
        batch_shape = ps.shape(x)[:-1]
        shift = tf.concat(
            [
                tf.zeros(ps.concat([batch_shape, [1]], axis=0)),
                curvature * (tf.square(x[..., :1]) - 100),
                tf.zeros(ps.concat([batch_shape, [ndims - 2]], axis=0)),
            ],
            axis=-1,
        )
        return tfb.Shift(shift)

      mg = tfd.MultivariateNormalDiag(
          loc=tf.zeros(ndims), scale_diag=[10.] + [1.] * (ndims - 1))
      banana = tfd.TransformedDistribution(
          mg, bijector=tfb.MaskedAutoregressiveFlow(bijector_fn=bijector_fn))

      sample_transformations = {
          'identity':
              model.Model.SampleTransformation(
                  fn=lambda params: params,
                  pretty_name='Identity',
                  # The second dimension is a sum of scaled Chi2 and normal
                  # distribution.
                  # Mean of Chi2 with one degree of freedom is 1, but since the
                  # first element has variance of 100, it cancels with the shift
                  # (which is why the shift is there).
                  ground_truth_mean=np.zeros(ndims),
                  # Variance of Chi2 with one degree of freedom is 2.
                  ground_truth_standard_deviation=np.array(
                      [10.] + [np.sqrt(1. + 2 * curvature**2 * 10.**4)] +
                      [1.] * (ndims - 2)),
              )
      }

    self._banana = banana

    super(Banana, self).__init__(
        default_event_space_bijector=tfb.Identity(),
        event_shape=banana.event_shape,
        dtype=banana.dtype,
        name=name,
        pretty_name=pretty_name,
        sample_transformations=sample_transformations,
    )