def joint_log_prob(self, observed_time_series):
    """Build the joint density `log p(params) + log p(y|params)` as a callable.

    Args:
      observed_time_series: Observed `Tensor` trajectories of shape
        `sample_shape + batch_shape + [num_timesteps, 1]` (the trailing
        `1` dimension is optional if `num_timesteps > 1`), where
        `batch_shape` should match `self.batch_shape` (the broadcast batch
        shape of all priors on parameters for this structural time series
        model).

    Returns:
     log_joint_fn: A function taking a `Tensor` argument for each model
       parameter, in canonical order, and returning a `Tensor` log probability
       of shape `batch_shape`. Note that, *unlike* `tfp.Distributions`
       `log_prob` methods, the `log_joint` sums over the `sample_shape` from y,
       so that `sample_shape` does not appear in the output log_prob. This
       corresponds to viewing multiple samples in `y` as iid observations from a
       single model, which is typically the desired behavior for parameter
       inference.
    """

    with tf.name_scope('joint_log_prob', values=[observed_time_series]):
      observed_time_series = tf.convert_to_tensor(observed_time_series)
      observed_time_series = sts_util.maybe_expand_trailing_dim(
          observed_time_series)
      num_timesteps = distribution_util.prefer_static_value(
          tf.shape(observed_time_series))[-2]

      def log_joint_fn(*param_vals):
        """Generated log-density function."""

        # Sum the log_prob values from parameter priors.
        param_lp = sum([
            param.prior.log_prob(param_val)
            for (param, param_val) in zip(self.parameters, param_vals)
        ])

        # Build a linear Gaussian state space model and evaluate the marginal
        # log_prob on observations.
        lgssm = self.make_state_space_model(
            param_vals=param_vals, num_timesteps=num_timesteps)
        observation_lp = lgssm.log_prob(observed_time_series)

        # Sum over likelihoods from iid observations. Without this sum,
        # adding `param_lp + observation_lp` would broadcast the param priors
        # over the sample shape, which incorrectly multi-counts the param
        # priors.
        sample_ndims = tf.maximum(0,
                                  tf.rank(observation_lp) - tf.rank(param_lp))
        observation_lp = tf.reduce_sum(
            observation_lp, axis=tf.range(sample_ndims))

        return param_lp + observation_lp

    return log_joint_fn
示例#2
0
 def testScalarTensor(self):
   x = tf.constant(1.)
   value = distribution_util.prefer_static_value(x)
   if not tf.executing_eagerly():
     self.assertIsInstance(value, np.ndarray)
   self.assertAllEqual(np.array(1.), value)
示例#3
0
 def testDynamicValueEndsUpBeingEmpty(self):
   if tf.executing_eagerly(): return
   x = tf1.placeholder_with_default(
       np.array([], dtype=np.int32), shape=None)
   value = distribution_util.prefer_static_value(x)
   self.assertAllEqual(np.array([]), self.evaluate(value))
示例#4
0
 def testNonEmptyConstantTensor(self):
   x = tf.zeros((2, 3, 4))
   value = distribution_util.prefer_static_value(x)
   self.assertIsInstance(value, np.ndarray)
   self.assertAllEqual(np.zeros((2, 3, 4)), value)
示例#5
0
 def testEmptyConstantTensor(self):
   x = tf.constant([])
   value = distribution_util.prefer_static_value(x)
   self.assertIsInstance(value, np.ndarray)
   self.assertAllEqual(np.array([]), value)
示例#6
0
def forecast(model,
             observed_time_series,
             parameter_samples,
             num_steps_forecast):
  """Construct predictive distribution over future observations.

  Given samples from the posterior over parameters, return the predictive
  distribution over future observations for num_steps_forecast timesteps.

  Args:
    model: An instance of `StructuralTimeSeries` representing a
      time-series model. This represents a joint distribution over
      time-series and their parameters with batch shape `[b1, ..., bN]`.
    observed_time_series: `float` `Tensor` of shape
      `concat([sample_shape, model.batch_shape, [num_timesteps, 1]])` where
      `sample_shape` corresponds to i.i.d. observations, and the trailing `[1]`
      dimension may (optionally) be omitted if `num_timesteps > 1`.
    parameter_samples: Python `list` of `Tensors` representing posterior samples
      of model parameters, with shapes `[concat([[num_posterior_draws],
      param.prior.batch_shape, param.prior.event_shape]) for param in
      model.parameters]`. This may optionally also be a map (Python `dict`) of
      parameter names to `Tensor` values.
    num_steps_forecast: scalar `int` `Tensor` number of steps to forecast.

  Returns:
    forecast_dist: a `tfd.MixtureSameFamily` instance with event shape
      [num_steps_forecast, 1] and batch shape
      `concat([sample_shape, model.batch_shape])`, with `num_posterior_draws`
      mixture components.

  #### Examples

  Suppose we've built a model and fit it to data using HMC:

  ```python
    day_of_week = tfp.sts.Seasonal(
        num_seasons=7,
        observed_time_series=observed_time_series,
        name='day_of_week')
    local_linear_trend = tfp.sts.LocalLinearTrend(
        observed_time_series=observed_time_series,
        name='local_linear_trend')
    model = tfp.sts.Sum(components=[day_of_week, local_linear_trend],
                        observed_time_series=observed_time_series)

    samples, kernel_results = tfp.sts.fit_with_hmc(model, observed_time_series)
  ```

  Passing the posterior samples into `forecast`, we construct a forecast
  distribution:

  ```python
    forecast_dist = tfp.sts.forecast(model, observed_time_series,
                                     parameter_samples=samples,
                                     num_steps_forecast=50)

    forecast_mean = forecast_dist.mean()[..., 0]  # shape: [50]
    forecast_scale = forecast_dist.stddev()[..., 0]  # shape: [50]
    forecast_samples = forecast_dist.sample(10)[..., 0]  # shape: [10, 50]
  ```

  If using variational inference instead of HMC, we'd construct a forecast using
  samples from the variational posterior:

  ```python
    (variational_loss,
     variational_distributions) = tfp.sts.build_factored_variational_loss(
       model=model, observed_time_series=observed_time_series)

    # OMITTED: take steps to optimize variational loss

    samples = {k: q.sample(30) for (k, q) in variational_distributions.items()}
    forecast_dist = tfp.sts.forecast(model, observed_time_series,
                                         parameter_samples=samples,
                                         num_steps_forecast=50)
  ```

  We can visualize the forecast by plotting:

  ```python
    from matplotlib import pylab as plt
    def plot_forecast(observed_time_series,
                      forecast_mean,
                      forecast_scale,
                      forecast_samples):
      plt.figure(figsize=(12, 6))

      num_steps = observed_time_series.shape[-1]
      num_steps_forecast = forecast_mean.shape[-1]
      num_steps_train = num_steps - num_steps_forecast

      c1, c2 = (0.12, 0.47, 0.71), (1.0, 0.5, 0.05)
      plt.plot(np.arange(num_steps), observed_time_series,
               lw=2, color=c1, label='ground truth')

      forecast_steps = np.arange(num_steps_train,
                       num_steps_train+num_steps_forecast)
      plt.plot(forecast_steps, forecast_samples.T, lw=1, color=c2, alpha=0.1)
      plt.plot(forecast_steps, forecast_mean, lw=2, ls='--', color=c2,
               label='forecast')
      plt.fill_between(forecast_steps,
                       forecast_mean - 2 * forecast_scale,
                       forecast_mean + 2 * forecast_scale, color=c2, alpha=0.2)

      plt.xlim([0, num_steps])
      plt.legend()

    plot_forecast(observed_time_series,
                  forecast_mean=forecast_mean,
                  forecast_scale=forecast_scale,
                  forecast_samples=forecast_samples)
  ```

  """

  with tf.compat.v1.name_scope(
      'forecast',
      values=[observed_time_series, parameter_samples, num_steps_forecast]):
    observed_time_series = tf.convert_to_tensor(
        value=observed_time_series, name='observed_time_series')
    observed_time_series = sts_util.maybe_expand_trailing_dim(
        observed_time_series)

    # Run filtering over the observed timesteps to extract the
    # latent state posterior at timestep T+1 (i.e., the final
    # filtering distribution, pushed through the transition model).
    # This is the prior for the forecast model ("today's prior
    # is yesterday's posterior").
    num_observed_steps = dist_util.prefer_static_value(
        tf.shape(input=observed_time_series))[-2]
    observed_data_ssm = model.make_state_space_model(
        num_timesteps=num_observed_steps, param_vals=parameter_samples)
    (_, _, _, predictive_means, predictive_covs, _, _
    ) = observed_data_ssm.forward_filter(observed_time_series)

    # Build a batch of state-space models over the forecast period. Because
    # we'll use MixtureSameFamily to mix over the posterior draws, we need to
    # do some shenanigans to move the `[num_posterior_draws]` batch dimension
    # from the leftmost to the rightmost side of the model's batch shape.
    # TODO(b/120245392): enhance `MixtureSameFamily` to reduce along an
    # arbitrary axis, and eliminate `move_dimension` calls here.
    parameter_samples = model._canonicalize_param_vals_as_map(parameter_samples)  # pylint: disable=protected-access
    parameter_samples_with_reordered_batch_dimension = {
        param.name: dist_util.move_dimension(
            parameter_samples[param.name],
            0, -(1 + _prefer_static_event_ndims(param.prior)))
        for param in model.parameters}
    forecast_prior = tfd.MultivariateNormalFullCovariance(
        loc=dist_util.move_dimension(predictive_means[..., -1, :], 0, -2),
        covariance_matrix=dist_util.move_dimension(
            predictive_covs[..., -1, :, :], 0, -3))
    forecast_ssm = model.make_state_space_model(
        num_timesteps=num_steps_forecast,
        param_vals=parameter_samples_with_reordered_batch_dimension,
        initial_state_prior=forecast_prior,
        initial_step=num_observed_steps)

    num_posterior_draws = dist_util.prefer_static_value(
        forecast_ssm.batch_shape_tensor())[-1]
    return tfd.MixtureSameFamily(
        mixture_distribution=tfd.Categorical(
            logits=tf.zeros([num_posterior_draws], dtype=forecast_ssm.dtype)),
        components_distribution=forecast_ssm)
示例#7
0
def one_step_predictive(model, observed_time_series, parameter_samples):
  """Compute one-step-ahead predictive distributions for all timesteps.

  Given samples from the posterior over parameters, return the predictive
  distribution over observations at each time `T`, given observations up
  through time `T-1`.

  Args:
    model: An instance of `StructuralTimeSeries` representing a
      time-series model. This represents a joint distribution over
      time-series and their parameters with batch shape `[b1, ..., bN]`.
    observed_time_series: `float` `Tensor` of shape
      `concat([sample_shape, model.batch_shape, [num_timesteps, 1]]) where
      `sample_shape` corresponds to i.i.d. observations, and the trailing `[1]`
      dimension may (optionally) be omitted if `num_timesteps > 1`.
    parameter_samples: Python `list` of `Tensors` representing posterior samples
      of model parameters, with shapes `[concat([[num_posterior_draws],
      param.prior.batch_shape, param.prior.event_shape]) for param in
      model.parameters]`. This may optionally also be a map (Python `dict`) of
      parameter names to `Tensor` values.

  Returns:
    forecast_dist: a `tfd.MixtureSameFamily` instance with event shape
      [num_timesteps] and
      batch shape `concat([sample_shape, model.batch_shape])`, with
      `num_posterior_draws` mixture components. The `t`th step represents the
      forecast distribution `p(observed_time_series[t] |
      observed_time_series[0:t-1], parameter_samples)`.

  #### Examples

  Suppose we've built a model and fit it to data using HMC:

  ```python
    day_of_week = tfp.sts.Seasonal(
        num_seasons=7,
        observed_time_series=observed_time_series,
        name='day_of_week')
    local_linear_trend = tfp.sts.LocalLinearTrend(
        observed_time_series=observed_time_series,
        name='local_linear_trend')
    model = tfp.sts.Sum(components=[day_of_week, local_linear_trend],
                        observed_time_series=observed_time_series)

    samples, kernel_results = tfp.sts.fit_with_hmc(model, observed_time_series)
  ```

  Passing the posterior samples into `one_step_predictive`, we construct a
  one-step-ahead predictive distribution:

  ```python
    one_step_predictive_dist = tfp.sts.one_step_predictive(
      model, observed_time_series, parameter_samples=samples)

    predictive_means = one_step_predictive_dist.mean()
    predictive_scales = one_step_predictive_dist.stddev()
  ```

  If using variational inference instead of HMC, we'd construct a forecast using
  samples from the variational posterior:

  ```python
    (variational_loss,
     variational_distributions) = tfp.sts.build_factored_variational_loss(
       model=model, observed_time_series=observed_time_series)

    # OMITTED: take steps to optimize variational loss

    samples = {k: q.sample(30) for (k, q) in variational_distributions.items()}
    one_step_predictive_dist = tfp.sts.one_step_predictive(
      model, observed_time_series, parameter_samples=samples)
  ```

  We can visualize the forecast by plotting:

  ```python
    from matplotlib import pylab as plt
    def plot_one_step_predictive(observed_time_series,
                                 forecast_mean,
                                 forecast_scale):
      plt.figure(figsize=(12, 6))
      num_timesteps = forecast_mean.shape[-1]
      c1, c2 = (0.12, 0.47, 0.71), (1.0, 0.5, 0.05)
      plt.plot(observed_time_series, label="observed time series", color=c1)
      plt.plot(forecast_mean, label="one-step prediction", color=c2)
      plt.fill_between(np.arange(num_timesteps),
                       forecast_mean - 2 * forecast_scale,
                       forecast_mean + 2 * forecast_scale,
                       alpha=0.1, color=c2)
      plt.legend()

    plot_one_step_predictive(observed_time_series,
                             forecast_mean=predictive_means,
                             forecast_scale=predictive_scales)
  ```

  To detect anomalous timesteps, we check whether the observed value at each
  step is within a 95% predictive interval, i.e., two standard deviations from
  the mean:

  ```python
    z_scores = ((observed_time_series[..., 1:] - predictive_means[..., :-1])
                 / predictive_scales[..., :-1])
    anomalous_timesteps = tf.boolean_mask(
        tf.range(1, num_timesteps),
        tf.abs(z_scores) > 2.0)
  ```

  """

  with tf.compat.v1.name_scope(
      'one_step_predictive', values=[observed_time_series, parameter_samples]):
    observed_time_series = tf.convert_to_tensor(
        value=observed_time_series, name='observed_time_series')
    observed_time_series = sts_util.maybe_expand_trailing_dim(
        observed_time_series)

    # Run filtering over the training timesteps to extract the
    # predictive means and variances.
    num_timesteps = dist_util.prefer_static_value(
        tf.shape(input=observed_time_series))[-2]
    lgssm = model.make_state_space_model(
        num_timesteps=num_timesteps, param_vals=parameter_samples)
    (_, _, _, _, _, observation_means, observation_covs
    ) = lgssm.forward_filter(observed_time_series)

    # Construct the predictive distribution by mixing over posterior draws.
    # Unfortunately this requires some shenanigans with shapes. The predictive
    # parameters have shape
    #   `concat([
    #      [num_posterior_draws],
    #      observed_time_series.sample_shape,
    #      lgssm.batch_shape,
    #      lgssm.event_shape  # => [num_timesteps, 1]
    #    ]`
    # Because MixtureSameFamily mixes over the rightmost batch dimension,
    # we need to move the `num_posterior_draws` dimension to be rightmost
    # in the batch shape. This requires use of `Independent` (to preserve
    # `num_timesteps` as part of the event shape) and `move_dimension`.
    # TODO(b/120245392): enhance `MixtureSameFamily` to reduce along an
    # arbitrary axis, and eliminate `move_dimension` calls here.
    predictions = tfd.Independent(
        distribution=tfd.Normal(
            loc=dist_util.move_dimension(observation_means[..., 0], 0, -2),
            scale=tf.sqrt(dist_util.move_dimension(
                observation_covs[..., 0, 0], 0, -2))),
        reinterpreted_batch_ndims=1)

    num_posterior_draws = dist_util.prefer_static_value(
        tf.shape(input=observation_means))[0]
    return tfd.MixtureSameFamily(
        mixture_distribution=tfd.Categorical(
            logits=tf.zeros([num_posterior_draws],
                            dtype=predictions.dtype)),
        components_distribution=predictions)
def decompose_by_component(model, observed_time_series, parameter_samples):
    """Decompose an observed time series into contributions from each component.

  This method decomposes a time series according to the posterior represention
  of a structural time series model. In particular, it:
    - Computes the posterior marginal mean and covariances over the additive
      model's latent space.
    - Decomposes the latent posterior into the marginal blocks for each
      model component.
    - Maps the per-component latent posteriors back through each component's
      observation model, to generate the time series modeled by that component.

  Args:
    model: An instance of `tfp.sts.Sum` representing a structural time series
      model.
    observed_time_series: `float` `Tensor` of shape
      `batch_shape + [num_timesteps, 1]` (omitting the trailing unit dimension
      is also supported when `num_timesteps > 1`), specifying an observed time
      series. May optionally be an instance of `tfp.sts.MaskedTimeSeries`, which
      includes a mask `Tensor` to specify timesteps with missing observations.
    parameter_samples: Python `list` of `Tensors` representing posterior
      samples of model parameters, with shapes `[concat([
      [num_posterior_draws], param.prior.batch_shape,
      param.prior.event_shape]) for param in model.parameters]`. This may
      optionally also be a map (Python `dict`) of parameter names to
      `Tensor` values.
  Returns:
    component_dists: A `collections.OrderedDict` instance mapping
      component StructuralTimeSeries instances (elements of `model.components`)
      to `tfd.Distribution` instances representing the posterior marginal
      distributions on the process modeled by each component. Each distribution
      has batch shape matching that of `posterior_means`/`posterior_covs`, and
      event shape of `[num_timesteps]`.

  #### Examples

  Suppose we've built a model and fit it to data:

  ```python
    day_of_week = tfp.sts.Seasonal(
        num_seasons=7,
        observed_time_series=observed_time_series,
        name='day_of_week')
    local_linear_trend = tfp.sts.LocalLinearTrend(
        observed_time_series=observed_time_series,
        name='local_linear_trend')
    model = tfp.sts.Sum(components=[day_of_week, local_linear_trend],
                        observed_time_series=observed_time_series)

    num_steps_forecast = 50
    samples, kernel_results = tfp.sts.fit_with_hmc(model, observed_time_series)
  ```

  To extract the contributions of individual components, pass the time series
  and sampled parameters into `decompose_by_component`:

  ```python
    component_dists = decompose_by_component(
      model,
      observed_time_series=observed_time_series,
      parameter_samples=samples)

    # Component mean and stddev have shape `[len(observed_time_series)]`.
    day_of_week_effect_mean = component_dists[day_of_week].mean()
    day_of_week_effect_stddev = component_dists[day_of_week].stddev()
  ```

  Using the component distributions, we can visualize the uncertainty for
  each component:

  ```
  from matplotlib import pylab as plt
  num_components = len(component_dists)
  xs = np.arange(len(observed_time_series))
  fig = plt.figure(figsize=(12, 3 * num_components))
  for i, (component, component_dist) in enumerate(component_dists.items()):

    # If in graph mode, replace `.numpy()` with `.eval()` or `sess.run()`.
    component_mean = component_dist.mean().numpy()
    component_stddev = component_dist.stddev().numpy()

    ax = fig.add_subplot(num_components, 1, 1 + i)
    ax.plot(xs, component_mean, lw=2)
    ax.fill_between(xs,
                    component_mean - 2 * component_stddev,
                    component_mean + 2 * component_stddev,
                    alpha=0.5)
    ax.set_title(component.name)
  ```

  """

    with tf1.name_scope('decompose_by_component',
                        values=[observed_time_series]):
        [observed_time_series,
         is_missing] = sts_util.canonicalize_observed_time_series_with_mask(
             observed_time_series)

        # Run smoothing over the training timesteps to extract the
        # posterior on latents.
        num_timesteps = dist_util.prefer_static_value(
            tf.shape(input=observed_time_series))[-2]
        ssm = model.make_state_space_model(num_timesteps=num_timesteps,
                                           param_vals=parameter_samples)
        posterior_means, posterior_covs = ssm.posterior_marginals(
            observed_time_series, mask=is_missing)

        return _decompose_from_posterior_marginals(model, posterior_means,
                                                   posterior_covs,
                                                   parameter_samples)
def _decompose_from_posterior_marginals(model, posterior_means, posterior_covs,
                                        parameter_samples):
    """Utility method to decompose a joint posterior into components.

  Args:
    model: `tfp.sts.Sum` instance defining an additive STS model.
    posterior_means: float `Tensor` of shape `concat(
      [[num_posterior_draws], batch_shape, num_timesteps, latent_size])`
      representing the posterior mean over latents in an
      `AdditiveStateSpaceModel`.
    posterior_covs: float `Tensor` of shape `concat(
      [[num_posterior_draws], batch_shape, num_timesteps,
      latent_size, latent_size])`
      representing the posterior marginal covariances over latents in an
      `AdditiveStateSpaceModel`.
    parameter_samples: Python `list` of `Tensors` representing posterior
      samples of model parameters, with shapes `[concat([
      [num_posterior_draws], param.prior.batch_shape,
      param.prior.event_shape]) for param in model.parameters]`. This may
      optionally also be a map (Python `dict`) of parameter names to
      `Tensor` values.

  Returns:
    component_dists: A `collections.OrderedDict` instance mapping
      component StructuralTimeSeries instances (elements of `model.components`)
      to `tfd.Distribution` instances representing the posterior marginal
      distributions on the process modeled by each component. Each distribution
      has batch shape matching that of `posterior_means`/`posterior_covs`, and
      event shape of `[num_timesteps]`.
  """

    try:
        model.components
    except AttributeError:
        raise ValueError(
            'Model decomposed into components must be an instance of'
            '`tfp.sts.Sum` (passed model {})'.format(model))

    with tf1.name_scope('decompose_from_posterior_marginals'):

        # Extract the component means/covs from the joint latent posterior.
        latent_sizes = [
            component.latent_size for component in model.components
        ]
        component_means = tf.split(posterior_means, latent_sizes, axis=-1)
        component_covs = _split_covariance_into_marginals(
            posterior_covs, latent_sizes)

        # Instantiate per-component state space models, and use them to push the
        # posterior means/covs through the observation model for each component.
        num_timesteps = dist_util.prefer_static_value(
            tf.shape(input=posterior_means))[-2]
        component_ssms = model.make_component_state_space_models(
            num_timesteps=num_timesteps, param_vals=parameter_samples)
        component_predictive_dists = collections.OrderedDict()
        for (component, component_ssm, component_mean,
             component_cov) in zip(model.components, component_ssms,
                                   component_means, component_covs):
            component_obs_mean, component_obs_cov = (
                component_ssm.latents_to_observations(
                    latent_means=component_mean, latent_covs=component_cov))

            # Using the observation means and covs, build a mixture distribution
            # that integrates over the posterior draws.
            component_predictive_dists[
                component] = sts_util.mix_over_posterior_draws(
                    means=component_obs_mean[..., 0],
                    variances=component_obs_cov[..., 0, 0])
    return component_predictive_dists
示例#10
0
 def _get_perm(self):
     if self.perm is None:
         perm_start = (distribution_util.prefer_static_value(
             self.rightmost_transposed_ndims) - 1)
         return tf.range(start=perm_start, limit=-1, delta=-1, name='perm')
     return self.perm
示例#11
0
    def __init__(self,
                 perm=None,
                 rightmost_transposed_ndims=None,
                 validate_args=False,
                 name='transpose'):
        """Instantiates the `Transpose` bijector.

    Args:
      perm: Positive `int32` vector-shaped `Tensor` representing permutation of
        rightmost dims (for forward transformation).  Note that the `0`th index
        represents the first of the rightmost dims and the largest value must be
        `rightmost_transposed_ndims - 1` and corresponds to `tf.rank(x) - 1`.
        Only one of `perm` and `rightmost_transposed_ndims` can (and must) be
        specified.
        Default value:
        `tf.range(start=rightmost_transposed_ndims, limit=-1, delta=-1)`.
      rightmost_transposed_ndims: Positive `int32` scalar-shaped `Tensor`
        representing the number of rightmost dimensions to permute.
        Only one of `perm` and `rightmost_transposed_ndims` can (and must) be
        specified.
        Default value: `tf.size(perm)`.
      validate_args: Python `bool` indicating whether arguments should be
        checked for correctness.
      name: Python `str` name given to ops managed by this object.

    Raises:
      ValueError: if both or neither `perm` and `rightmost_transposed_ndims` are
        specified.
      NotImplementedError: if `rightmost_transposed_ndims` is not known prior to
        graph execution.
    """
        with tf.compat.v1.name_scope(name,
                                     values=[perm,
                                             rightmost_transposed_ndims]):
            if (rightmost_transposed_ndims is None) == (perm is None):
                raise ValueError('Must specify exactly one of '
                                 '`rightmost_transposed_ndims` and `perm`.')
            if rightmost_transposed_ndims is not None:
                rightmost_transposed_ndims = tf.convert_to_tensor(
                    value=rightmost_transposed_ndims,
                    dtype=np.int32,
                    name='rightmost_transposed_ndims')
                rightmost_transposed_ndims_ = tf.get_static_value(
                    rightmost_transposed_ndims)
                assertions = _maybe_validate_rightmost_transposed_ndims(
                    rightmost_transposed_ndims, validate_args)
                if assertions:
                    with tf.control_dependencies(assertions):
                        rightmost_transposed_ndims = tf.identity(
                            rightmost_transposed_ndims)
                perm = tf.range(
                    start=util.prefer_static_value(rightmost_transposed_ndims)
                    - 1,
                    limit=-1,
                    delta=-1,
                    name='perm')
            else:  # perm is not None:
                perm = tf.convert_to_tensor(value=perm,
                                            dtype=np.int32,
                                            name='perm')
                rightmost_transposed_ndims = tf.size(
                    input=perm, name='rightmost_transposed_ndims')
                rightmost_transposed_ndims_ = tf.get_static_value(
                    rightmost_transposed_ndims)
                assertions = _maybe_validate_perm(perm, validate_args)
                if assertions:
                    with tf.control_dependencies(assertions):
                        perm = tf.identity(perm)

            # TODO(b/110828604): If bijector base class ever supports dynamic
            # `min_event_ndims`, then this class already works dynamically and the
            # following five lines can be removed.
            if rightmost_transposed_ndims_ is None:
                raise NotImplementedError(
                    '`rightmost_transposed_ndims` must be '
                    'known prior to graph execution.')
            else:
                rightmost_transposed_ndims_ = int(rightmost_transposed_ndims_)

            self._perm = perm
            self._rightmost_transposed_ndims = rightmost_transposed_ndims
            super(Transpose, self).__init__(
                forward_min_event_ndims=rightmost_transposed_ndims_,
                graph_parents=[perm, rightmost_transposed_ndims],
                is_constant_jacobian=True,
                validate_args=validate_args,
                name=name)
示例#12
0
def impute_missing_values(model,
                          observed_time_series,
                          parameter_samples,
                          include_observation_noise=False):
  """Runs posterior inference to impute the missing values in a time series.

  This method computes the posterior marginals `p(latent state | observations)`,
  given the time series at observed timesteps (a missingness mask should
  be specified using `tfp.sts.MaskedTimeSeries`). It pushes this posterior back
  through the observation model to impute a predictive distribution on the
  observed time series. At unobserved steps, this is an imputed value; at other
  steps it is interpreted as the model's estimate of the underlying noise-free
  series.

  Args:
    model: `tfp.sts.Sum` instance defining an additive STS model.
    observed_time_series: `float` `Tensor` of shape
      `concat([sample_shape, model.batch_shape, [num_timesteps, 1]])` where
      `sample_shape` corresponds to i.i.d. observations, and the trailing `[1]`
      dimension may (optionally) be omitted if `num_timesteps > 1`. May
      optionally be an instance of `tfp.sts.MaskedTimeSeries` including a
      mask `Tensor` to encode the locations of missing observations.
    parameter_samples: Python `list` of `Tensors` representing posterior
      samples of model parameters, with shapes `[concat([
      [num_posterior_draws], param.prior.batch_shape,
      param.prior.event_shape]) for param in model.parameters]`. This may
      optionally also be a map (Python `dict`) of parameter names to
      `Tensor` values.
    include_observation_noise: If `False`, the imputed uncertainties
      represent the model's estimate of the noise-free time series at each
      timestep. If `True`, they represent the model's estimate of the range of
      values that could be *observed* at each timestep, including any i.i.d.
      observation noise.
      Default value: `False`.

  Returns:
    imputed_series_dist: a `tfd.MixtureSameFamily` instance with event shape
      [num_timesteps] and batch shape `concat([sample_shape,
      model.batch_shape])`, with `num_posterior_draws` mixture components.

  #### Example

  To specify a time series with missing values, use `tfp.sts.MaskedTimeSeries`:

  ```python
  time_series_with_nans = [-1., 1., np.nan, 2.4, np.nan, 5]
  observed_time_series = tfp.sts.MaskedTimeSeries(
    time_series=time_series_with_nans,
    is_missing=tf.math.is_nan(time_series_with_nans))
  ```

  Masked time series can be passed to `tfp.sts` methods in place of a
  `observed_time_series` `Tensor`:

  ```python
  # Build model using observed time series to set heuristic priors.
  linear_trend_model = tfp.sts.LocalLinearTrend(
    observed_time_series=observed_time_series)
  model = tfp.sts.Sum([linear_trend_model],
                      observed_time_series=observed_time_series)

  # Fit model to data
  parameter_samples, _ = tfp.sts.fit_with_hmc(model, observed_time_series)
  ```

  After fitting a model, `impute_missing_values` will return a distribution
  ```python
  # Impute missing values
  imputed_series_distribution = tfp.sts.impute_missing_values(
    model, observed_time_series, parameter_samples=parameter_samples)
  print('imputed means and stddevs: ',
        imputed_series_distribution.mean(),
        imputed_series_distribution.stddev())
  ```

  """
  with tf.name_scope('impute_missing_values'):

    [
        observed_time_series,
        mask
    ] = sts_util.canonicalize_observed_time_series_with_mask(
        observed_time_series)

    # Run smoothing over the training timesteps to extract the
    # predictive means and variances.
    num_timesteps = dist_util.prefer_static_value(
        tf.shape(observed_time_series))[-2]
    lgssm = model.make_state_space_model(
        num_timesteps=num_timesteps, param_vals=parameter_samples)
    posterior_means, posterior_covs = lgssm.posterior_marginals(
        observed_time_series, mask=mask)

    observation_means, observation_covs = lgssm.latents_to_observations(
        latent_means=posterior_means,
        latent_covs=posterior_covs)

    if not include_observation_noise:
      # Extract just the variance of observation noise by pushing forward
      # zero-variance latents.
      _, observation_noise_covs = lgssm.latents_to_observations(
          latent_means=posterior_means,
          latent_covs=tf.zeros_like(posterior_covs))
      # Subtract out the observation noise that was added in the original
      # pushforward. Note that this could cause numerical issues if the
      # observation noise is very large. If this becomes an issue we could
      # avoid the subtraction by plumbing `include_observation_noise` through
      # `lgssm.latents_to_observations`.
      observation_covs -= observation_noise_covs

    # Squeeze dims to convert from LGSSM's event shape `[num_timesteps, 1]`
    # to a scalar time series.
    return sts_util.mix_over_posterior_draws(
        means=observation_means[..., 0],
        variances=observation_covs[..., 0, 0])
示例#13
0
def one_step_predictive(model, observed_time_series, parameter_samples):
  """Compute one-step-ahead predictive distributions for all timesteps.

  Given samples from the posterior over parameters, return the predictive
  distribution over observations at each time `T`, given observations up
  through time `T-1`.

  Args:
    model: An instance of `StructuralTimeSeries` representing a
      time-series model. This represents a joint distribution over
      time-series and their parameters with batch shape `[b1, ..., bN]`.
    observed_time_series: `float` `Tensor` of shape
      `concat([sample_shape, model.batch_shape, [num_timesteps, 1]]) where
      `sample_shape` corresponds to i.i.d. observations, and the trailing `[1]`
      dimension may (optionally) be omitted if `num_timesteps > 1`. May
      optionally be an instance of `tfp.sts.MaskedTimeSeries` including a
      mask `Tensor` to encode the locations of missing observations.
    parameter_samples: Python `list` of `Tensors` representing posterior samples
      of model parameters, with shapes `[concat([[num_posterior_draws],
      param.prior.batch_shape, param.prior.event_shape]) for param in
      model.parameters]`. This may optionally also be a map (Python `dict`) of
      parameter names to `Tensor` values.

  Returns:
    forecast_dist: a `tfd.MixtureSameFamily` instance with event shape
      [num_timesteps] and
      batch shape `concat([sample_shape, model.batch_shape])`, with
      `num_posterior_draws` mixture components. The `t`th step represents the
      forecast distribution `p(observed_time_series[t] |
      observed_time_series[0:t-1], parameter_samples)`.

  #### Examples

  Suppose we've built a model and fit it to data using HMC:

  ```python
    day_of_week = tfp.sts.Seasonal(
        num_seasons=7,
        observed_time_series=observed_time_series,
        name='day_of_week')
    local_linear_trend = tfp.sts.LocalLinearTrend(
        observed_time_series=observed_time_series,
        name='local_linear_trend')
    model = tfp.sts.Sum(components=[day_of_week, local_linear_trend],
                        observed_time_series=observed_time_series)

    samples, kernel_results = tfp.sts.fit_with_hmc(model, observed_time_series)
  ```

  Passing the posterior samples into `one_step_predictive`, we construct a
  one-step-ahead predictive distribution:

  ```python
    one_step_predictive_dist = tfp.sts.one_step_predictive(
      model, observed_time_series, parameter_samples=samples)

    predictive_means = one_step_predictive_dist.mean()
    predictive_scales = one_step_predictive_dist.stddev()
  ```

  If using variational inference instead of HMC, we'd construct a forecast using
  samples from the variational posterior:

  ```python
    surrogate_posterior = tfp.sts.build_factored_surrogate_posterior(
      model=model)
    loss_curve = tfp.vi.fit_surrogate_posterior(
      target_log_prob_fn=model.joint_log_prob(observed_time_series),
      surrogate_posterior=surrogate_posterior,
      optimizer=tf.optimizers.Adam(learning_rate=0.1),
      num_steps=200)
    samples = surrogate_posterior.sample(30)

    one_step_predictive_dist = tfp.sts.one_step_predictive(
      model, observed_time_series, parameter_samples=samples)
  ```

  We can visualize the forecast by plotting:

  ```python
    from matplotlib import pylab as plt
    def plot_one_step_predictive(observed_time_series,
                                 forecast_mean,
                                 forecast_scale):
      plt.figure(figsize=(12, 6))
      num_timesteps = forecast_mean.shape[-1]
      c1, c2 = (0.12, 0.47, 0.71), (1.0, 0.5, 0.05)
      plt.plot(observed_time_series, label="observed time series", color=c1)
      plt.plot(forecast_mean, label="one-step prediction", color=c2)
      plt.fill_between(np.arange(num_timesteps),
                       forecast_mean - 2 * forecast_scale,
                       forecast_mean + 2 * forecast_scale,
                       alpha=0.1, color=c2)
      plt.legend()

    plot_one_step_predictive(observed_time_series,
                             forecast_mean=predictive_means,
                             forecast_scale=predictive_scales)
  ```

  To detect anomalous timesteps, we check whether the observed value at each
  step is within a 95% predictive interval, i.e., two standard deviations from
  the mean:

  ```python
    z_scores = ((observed_time_series[..., 1:] - predictive_means[..., :-1])
                 / predictive_scales[..., :-1])
    anomalous_timesteps = tf.boolean_mask(
        tf.range(1, num_timesteps),
        tf.abs(z_scores) > 2.0)
  ```

  """

  with tf.name_scope('one_step_predictive'):

    [
        observed_time_series,
        is_missing
    ] = sts_util.canonicalize_observed_time_series_with_mask(
        observed_time_series)

    # Run filtering over the training timesteps to extract the
    # predictive means and variances.
    num_timesteps = dist_util.prefer_static_value(
        tf.shape(observed_time_series))[-2]
    lgssm = model.make_state_space_model(
        num_timesteps=num_timesteps, param_vals=parameter_samples)
    (_, _, _, _, _, observation_means, observation_covs
    ) = lgssm.forward_filter(observed_time_series, mask=is_missing)

    # Squeeze dims to convert from LGSSM's event shape `[num_timesteps, 1]`
    # to a scalar time series.
    return sts_util.mix_over_posterior_draws(
        means=observation_means[..., 0],
        variances=observation_covs[..., 0, 0])
示例#14
0
def forecast(model,
             observed_time_series,
             parameter_samples,
             num_steps_forecast,
             include_observation_noise=True):
  """Construct predictive distribution over future observations.

  Given samples from the posterior over parameters, return the predictive
  distribution over future observations for num_steps_forecast timesteps.

  Args:
    model: An instance of `StructuralTimeSeries` representing a
      time-series model. This represents a joint distribution over
      time-series and their parameters with batch shape `[b1, ..., bN]`.
    observed_time_series: `float` `Tensor` of shape
      `concat([sample_shape, model.batch_shape, [num_timesteps, 1]])` where
      `sample_shape` corresponds to i.i.d. observations, and the trailing `[1]`
      dimension may (optionally) be omitted if `num_timesteps > 1`. May
      optionally be an instance of `tfp.sts.MaskedTimeSeries` including a
      mask `Tensor` to encode the locations of missing observations.
    parameter_samples: Python `list` of `Tensors` representing posterior samples
      of model parameters, with shapes `[concat([[num_posterior_draws],
      param.prior.batch_shape, param.prior.event_shape]) for param in
      model.parameters]`. This may optionally also be a map (Python `dict`) of
      parameter names to `Tensor` values.
    num_steps_forecast: scalar `int` `Tensor` number of steps to forecast.
    include_observation_noise: Python `bool` indicating whether the forecast
      distribution should include uncertainty from observation noise. If `True`,
      the forecast is over future observations, if `False`, the forecast is over
      future values of the latent noise-free time series.
      Default value: `True`.

  Returns:
    forecast_dist: a `tfd.MixtureSameFamily` instance with event shape
      [num_steps_forecast, 1] and batch shape
      `concat([sample_shape, model.batch_shape])`, with `num_posterior_draws`
      mixture components.

  #### Examples

  Suppose we've built a model and fit it to data using HMC:

  ```python
    day_of_week = tfp.sts.Seasonal(
        num_seasons=7,
        observed_time_series=observed_time_series,
        name='day_of_week')
    local_linear_trend = tfp.sts.LocalLinearTrend(
        observed_time_series=observed_time_series,
        name='local_linear_trend')
    model = tfp.sts.Sum(components=[day_of_week, local_linear_trend],
                        observed_time_series=observed_time_series)

    samples, kernel_results = tfp.sts.fit_with_hmc(model, observed_time_series)
  ```

  Passing the posterior samples into `forecast`, we construct a forecast
  distribution:

  ```python
    forecast_dist = tfp.sts.forecast(model, observed_time_series,
                                     parameter_samples=samples,
                                     num_steps_forecast=50)

    forecast_mean = forecast_dist.mean()[..., 0]  # shape: [50]
    forecast_scale = forecast_dist.stddev()[..., 0]  # shape: [50]
    forecast_samples = forecast_dist.sample(10)[..., 0]  # shape: [10, 50]
  ```

  If using variational inference instead of HMC, we'd construct a forecast using
  samples from the variational posterior:

  ```python
    surrogate_posterior = tfp.sts.build_factored_surrogate_posterior(
      model=model)
    loss_curve = tfp.vi.fit_surrogate_posterior(
      target_log_prob_fn=model.joint_log_prob(observed_time_series),
      surrogate_posterior=surrogate_posterior,
      optimizer=tf.optimizers.Adam(learning_rate=0.1),
      num_steps=200)
    samples = surrogate_posterior.sample(30)

    forecast_dist = tfp.sts.forecast(model, observed_time_series,
                                     parameter_samples=samples,
                                     num_steps_forecast=50)
  ```

  We can visualize the forecast by plotting:

  ```python
    from matplotlib import pylab as plt
    def plot_forecast(observed_time_series,
                      forecast_mean,
                      forecast_scale,
                      forecast_samples):
      plt.figure(figsize=(12, 6))

      num_steps = observed_time_series.shape[-1]
      num_steps_forecast = forecast_mean.shape[-1]
      num_steps_train = num_steps - num_steps_forecast

      c1, c2 = (0.12, 0.47, 0.71), (1.0, 0.5, 0.05)
      plt.plot(np.arange(num_steps), observed_time_series,
               lw=2, color=c1, label='ground truth')

      forecast_steps = np.arange(num_steps_train,
                       num_steps_train+num_steps_forecast)
      plt.plot(forecast_steps, forecast_samples.T, lw=1, color=c2, alpha=0.1)
      plt.plot(forecast_steps, forecast_mean, lw=2, ls='--', color=c2,
               label='forecast')
      plt.fill_between(forecast_steps,
                       forecast_mean - 2 * forecast_scale,
                       forecast_mean + 2 * forecast_scale, color=c2, alpha=0.2)

      plt.xlim([0, num_steps])
      plt.legend()

    plot_forecast(observed_time_series,
                  forecast_mean=forecast_mean,
                  forecast_scale=forecast_scale,
                  forecast_samples=forecast_samples)
  ```

  """

  with tf.name_scope('forecast'):
    [
        observed_time_series,
        mask
    ] = sts_util.canonicalize_observed_time_series_with_mask(
        observed_time_series)

    # Run filtering over the observed timesteps to extract the
    # latent state posterior at timestep T+1 (i.e., the final
    # filtering distribution, pushed through the transition model).
    # This is the prior for the forecast model ("today's prior
    # is yesterday's posterior").
    num_observed_steps = dist_util.prefer_static_value(
        tf.shape(observed_time_series))[-2]
    observed_data_ssm = model.make_state_space_model(
        num_timesteps=num_observed_steps, param_vals=parameter_samples)
    (_, _, _, predictive_means, predictive_covs, _, _
    ) = observed_data_ssm.forward_filter(observed_time_series, mask=mask)

    # Build a batch of state-space models over the forecast period. Because
    # we'll use MixtureSameFamily to mix over the posterior draws, we need to
    # do some shenanigans to move the `[num_posterior_draws]` batch dimension
    # from the leftmost to the rightmost side of the model's batch shape.
    # TODO(b/120245392): enhance `MixtureSameFamily` to reduce along an
    # arbitrary axis, and eliminate `move_dimension` calls here.
    parameter_samples = model._canonicalize_param_vals_as_map(parameter_samples)  # pylint: disable=protected-access
    parameter_samples_with_reordered_batch_dimension = {
        param.name: dist_util.move_dimension(
            parameter_samples[param.name],
            0, -(1 + _prefer_static_event_ndims(param.prior)))
        for param in model.parameters}
    forecast_prior = tfd.MultivariateNormalFullCovariance(
        loc=dist_util.move_dimension(predictive_means[..., -1, :], 0, -2),
        covariance_matrix=dist_util.move_dimension(
            predictive_covs[..., -1, :, :], 0, -3))

    # Ugly hack: because we moved `num_posterior_draws` to the trailing (rather
    # than leading) dimension of parameters, the parameter batch shapes no
    # longer broadcast against the `constant_offset` attribute used in `sts.Sum`
    # models. We fix this by manually adding an extra broadcasting dim to
    # `constant_offset` if present.
    # The root cause of this hack is that we mucked with param dimensions above
    # and are now passing params that are 'invalid' in the sense that they don't
    # match the shapes of the model's param priors. The fix (as above) will be
    # to update MixtureSameFamily so we can avoid changing param dimensions
    # altogether.
    # TODO(b/120245392): enhance `MixtureSameFamily` to reduce along an
    # arbitrary axis, and eliminate this hack.
    kwargs = {}
    if hasattr(model, 'constant_offset'):
      kwargs['constant_offset'] = tf.convert_to_tensor(
          value=model.constant_offset,
          dtype=forecast_prior.dtype)[..., tf.newaxis, :]

    if not include_observation_noise:
      parameter_samples_with_reordered_batch_dimension[
          'observation_noise_scale'] = tf.zeros_like(
              parameter_samples_with_reordered_batch_dimension[
                  'observation_noise_scale'])

    # We assume that any STS model that has a `constant_offset` attribute
    # will allow it to be overridden as a kwarg. This is currently just
    # `sts.Sum`.
    # TODO(b/120245392): when kwargs hack is removed, switch back to calling
    # the public version of `_make_state_space_model`.
    forecast_ssm = model._make_state_space_model(  # pylint: disable=protected-access
        num_timesteps=num_steps_forecast,
        param_map=parameter_samples_with_reordered_batch_dimension,
        initial_state_prior=forecast_prior,
        initial_step=num_observed_steps,
        **kwargs)

    num_posterior_draws = dist_util.prefer_static_value(
        forecast_ssm.batch_shape_tensor())[-1]
    return tfd.MixtureSameFamily(
        mixture_distribution=tfd.Categorical(
            logits=tf.zeros([num_posterior_draws], dtype=forecast_ssm.dtype)),
        components_distribution=forecast_ssm)