Exemplo n.º 1
0
  def test_maybe_expand_trailing_dim(self):

    # static inputs
    self.assertEqual(
        sts_util.maybe_expand_trailing_dim(tf.zeros([4, 3])).shape,
        tf.TensorShape([4, 3, 1]))
    self.assertEqual(
        sts_util.maybe_expand_trailing_dim(tf.zeros([4, 3, 1])).shape,
        tf.TensorShape([4, 3, 1]))

    # dynamic inputs
    for shape_in, static_shape, expected_shape_out in [
        # pyformat: disable
        ([4, 3], None, [4, 3, 1]),
        ([4, 3, 1], None, [4, 3, 1]),
        ([4], [None], [4, 1]),
        ([1], [None], [1]),
        ([4, 3], [None, None], [4, 3, 1]),
        ([4, 1], [None, None], [4, 1]),
        ([4, 1], [None, 1], [4, 1])
        # pyformat: enable
    ]:
      shape_out = self.evaluate(
          sts_util.maybe_expand_trailing_dim(
              tf.placeholder_with_default(
                  input=tf.zeros(shape_in), shape=static_shape))).shape
      self.assertAllEqual(shape_out, expected_shape_out)
Exemplo n.º 2
0
  def test_maybe_expand_trailing_dim(self):

    # static inputs
    self.assertEqual(
        sts_util.maybe_expand_trailing_dim(tf.zeros([4, 3])).shape,
        tf.TensorShape([4, 3, 1]))
    self.assertEqual(
        sts_util.maybe_expand_trailing_dim(tf.zeros([4, 3, 1])).shape,
        tf.TensorShape([4, 3, 1]))

    # dynamic inputs
    for shape_in, static_shape, expected_shape_out in [
        # pyformat: disable
        ([4, 3], None, [4, 3, 1]),
        ([4, 3, 1], None, [4, 3, 1]),
        ([4], [None], [4, 1]),
        ([1], [None], [1]),
        ([4, 3], [None, None], [4, 3, 1]),
        ([4, 1], [None, None], [4, 1]),
        ([4, 1], [None, 1], [4, 1])
        # pyformat: enable
    ]:
      shape_out = self.evaluate(
          sts_util.maybe_expand_trailing_dim(
              tf.placeholder_with_default(
                  input=tf.zeros(shape_in), shape=static_shape))).shape
      self.assertAllEqual(shape_out, expected_shape_out)
Exemplo n.º 3
0
    def joint_log_prob(self, observed_time_series):
        """Build the joint density `log p(params) + log p(y|params)` as a callable.

    Args:
      observed_time_series: Observed `Tensor` trajectories of shape
        `sample_shape + batch_shape + [num_timesteps, 1]` (the trailing
        `1` dimension is optional if `num_timesteps > 1`), where
        `batch_shape` should match `self.batch_shape` (the broadcast batch
        shape of all priors on parameters for this structural time series
        model).

    Returns:
     log_joint_fn: A function taking a `Tensor` argument for each model
       parameter, in canonical order, and returning a `Tensor` log probability
       of shape `batch_shape`. Note that, *unlike* `tfp.Distributions`
       `log_prob` methods, the `log_joint` sums over the `sample_shape` from y,
       so that `sample_shape` does not appear in the output log_prob. This
       corresponds to viewing multiple samples in `y` as iid observations from a
       single model, which is typically the desired behavior for parameter
       inference.
    """

        with tf.compat.v1.name_scope('joint_log_prob',
                                     values=[observed_time_series]):
            observed_time_series = tf.convert_to_tensor(
                value=observed_time_series)
            observed_time_series = sts_util.maybe_expand_trailing_dim(
                observed_time_series)
            num_timesteps = distribution_util.prefer_static_value(
                tf.shape(input=observed_time_series))[-2]

            def log_joint_fn(*param_vals):
                """Generated log-density function."""

                # Sum the log_prob values from parameter priors.
                param_lp = sum([
                    param.prior.log_prob(param_val)
                    for (param, param_val) in zip(self.parameters, param_vals)
                ])

                # Build a linear Gaussian state space model and evaluate the marginal
                # log_prob on observations.
                lgssm = self.make_state_space_model(
                    param_vals=param_vals, num_timesteps=num_timesteps)
                observation_lp = lgssm.log_prob(observed_time_series)

                # Sum over likelihoods from iid observations. Without this sum,
                # adding `param_lp + observation_lp` would broadcast the param priors
                # over the sample shape, which incorrectly multi-counts the param
                # priors.
                sample_ndims = tf.maximum(
                    0,
                    tf.rank(observation_lp) - tf.rank(param_lp))
                observation_lp = tf.reduce_sum(input_tensor=observation_lp,
                                               axis=tf.range(sample_ndims))

                return param_lp + observation_lp

        return log_joint_fn
  def joint_log_prob(self, observed_time_series):
    """Build the joint density `log p(params) + log p(y|params)` as a callable.

    Args:
      observed_time_series: Observed `Tensor` trajectories of shape
        `sample_shape + batch_shape + [num_timesteps, 1]` (the trailing
        `1` dimension is optional if `num_timesteps > 1`), where
        `batch_shape` should match `self.batch_shape` (the broadcast batch
        shape of all priors on parameters for this structural time series
        model).

    Returns:
     log_joint_fn: A function taking a `Tensor` argument for each model
       parameter, in canonical order, and returning a `Tensor` log probability
       of shape `batch_shape`. Note that, *unlike* `tfp.Distributions`
       `log_prob` methods, the `log_joint` sums over the `sample_shape` from y,
       so that `sample_shape` does not appear in the output log_prob. This
       corresponds to viewing multiple samples in `y` as iid observations from a
       single model, which is typically the desired behavior for parameter
       inference.
    """

    with tf.name_scope('joint_log_prob', values=[observed_time_series]):
      observed_time_series = tf.convert_to_tensor(observed_time_series)
      observed_time_series = sts_util.maybe_expand_trailing_dim(
          observed_time_series)
      num_timesteps = distribution_util.prefer_static_value(
          tf.shape(observed_time_series))[-2]

      def log_joint_fn(*param_vals):
        """Generated log-density function."""

        # Sum the log_prob values from parameter priors.
        param_lp = sum([
            param.prior.log_prob(param_val)
            for (param, param_val) in zip(self.parameters, param_vals)
        ])

        # Build a linear Gaussian state space model and evaluate the marginal
        # log_prob on observations.
        lgssm = self.make_state_space_model(
            param_vals=param_vals, num_timesteps=num_timesteps)
        observation_lp = lgssm.log_prob(observed_time_series)

        # Sum over likelihoods from iid observations. Without this sum,
        # adding `param_lp + observation_lp` would broadcast the param priors
        # over the sample shape, which incorrectly multi-counts the param
        # priors.
        sample_ndims = tf.maximum(0,
                                  tf.rank(observation_lp) - tf.rank(param_lp))
        observation_lp = tf.reduce_sum(
            observation_lp, axis=tf.range(sample_ndims))

        return param_lp + observation_lp

    return log_joint_fn
Exemplo n.º 5
0
 def test_maybe_expand_trailing_dim(self):
     for shape_in, expected_shape_out in [
             # pyformat: disable
         ([4, 3], [4, 3, 1]),
         ([4, 3, 1], [4, 3, 1]),
         ([4], [4, 1]),
         ([1], [1]),
         ([4, 1], [4, 1])
             # pyformat: enable
     ]:
         shape_out = self._shape_as_list(
             sts_util.maybe_expand_trailing_dim(
                 self._build_tensor(np.zeros(shape_in))))
         self.assertAllEqual(shape_out, expected_shape_out)
Exemplo n.º 6
0
def pad_batch_dimension_for_multiple_chains(observed_time_series, model,
                                            chain_batch_shape):
    """"Expand the observed time series with extra batch dimension(s)."""

    # Running with multiple chains introduces an extra batch dimension. In
    # general we also need to pad the observed time series with a matching batch
    # dimension.
    #
    # For example, suppose our model has batch shape [3, 4] and
    # the observed time series has shape `concat([[5], [3, 4], [100])`,
    # corresponding to `sample_shape`, `batch_shape`, and `num_timesteps`
    # respectively. The model will produce distributions with batch shape
    # `concat([chain_batch_shape, [3, 4]])`, so we pad `observed_time_series` to
    # have matching shape `[5, 1, 3, 4, 100]`, where the added `1` dimension
    # between the sample and batch shapes will broadcast to `chain_batch_shape`.

    observed_time_series = sts_util.maybe_expand_trailing_dim(
        observed_time_series)  # Guarantee `event_ndims=2`
    event_ndims = 2  # event_shape = [num_timesteps, observation_size=1]

    model_batch_ndims = (model.batch_shape.ndims
                         if model.batch_shape.ndims is not None else tf.shape(
                             input=model.batch_shape_tensor())[0])

    # Compute ndims from chain_batch_shape.
    chain_batch_shape = tf.convert_to_tensor(value=chain_batch_shape,
                                             name='chain_batch_shape',
                                             dtype=tf.int32)
    if not chain_batch_shape.shape.is_fully_defined():
        raise ValueError(
            'Batch shape must have static rank. (given: {})'.format(
                chain_batch_shape))
    if chain_batch_shape.shape.ndims == 0:  # expand int `k` to `[k]`.
        chain_batch_shape = chain_batch_shape[tf.newaxis]
    chain_batch_ndims = tf.compat.dimension_value(chain_batch_shape.shape[0])

    for _ in range(chain_batch_ndims):
        observed_time_series = tf.expand_dims(
            observed_time_series, -(model_batch_ndims + event_ndims + 1))
    return observed_time_series
Exemplo n.º 7
0
def pad_batch_dimension_for_multiple_chains(observed_time_series,
                                            model,
                                            chain_batch_shape):
  """"Expand the observed time series with extra batch dimension(s)."""

  # Running with multiple chains introduces an extra batch dimension. In
  # general we also need to pad the observed time series with a matching batch
  # dimension.
  #
  # For example, suppose our model has batch shape [3, 4] and
  # the observed time series has shape `concat([[5], [3, 4], [100])`,
  # corresponding to `sample_shape`, `batch_shape`, and `num_timesteps`
  # respectively. The model will produce distributions with batch shape
  # `concat([chain_batch_shape, [3, 4]])`, so we pad `observed_time_series` to
  # have matching shape `[5, 1, 3, 4, 100]`, where the added `1` dimension
  # between the sample and batch shapes will broadcast to `chain_batch_shape`.

  observed_time_series = sts_util.maybe_expand_trailing_dim(
      observed_time_series)  # Guarantee `event_ndims=2`
  event_ndims = 2  # event_shape = [num_timesteps, observation_size=1]

  model_batch_ndims = (model.batch_shape.ndims
                       if model.batch_shape.ndims is not None
                       else tf.shape(model.batch_shape_tensor())[0])

  # Compute ndims from chain_batch_shape.
  chain_batch_shape = tf.convert_to_tensor(
      chain_batch_shape, name='chain_batch_shape', dtype=tf.int32)
  if not chain_batch_shape.shape.is_fully_defined():
    raise ValueError('Batch shape must have static rank. (given: {})'.format(
        chain_batch_shape))
  if chain_batch_shape.shape.ndims == 0:  # expand int `k` to `[k]`.
    chain_batch_shape = chain_batch_shape[tf.newaxis]
  chain_batch_ndims = chain_batch_shape.shape[0].value

  for _ in range(chain_batch_ndims):
    observed_time_series = tf.expand_dims(
        observed_time_series, -(model_batch_ndims + event_ndims + 1))
  return observed_time_series
Exemplo n.º 8
0
  def _build_sts(self, observed_time_series=None):
    max_timesteps = 100
    num_features = 3

    prior = tfd.Laplace(0., 1.)

    # LinearRegression components don't currently take an `observed_time_series`
    # argument, so they can't infer a prior batch shape. This means we have to
    # manually set the batch shape expected by the tests.
    if observed_time_series is not None:
      observed_time_series = sts_util.maybe_expand_trailing_dim(
          observed_time_series)
      batch_shape = observed_time_series.shape[:-2]
      prior = tfd.TransformedDistribution(prior, tfb.Identity(),
                                          event_shape=[num_features],
                                          batch_shape=batch_shape)

    regression = LinearRegression(
        design_matrix=tf.random.normal([max_timesteps, num_features]),
        weights_prior=prior)
    return Sum(components=[regression],
               observed_time_series=observed_time_series)
Exemplo n.º 9
0
    def __init__(self,
                 components,
                 constant_offset=None,
                 observation_noise_scale_prior=None,
                 observed_time_series=None,
                 name=None):
        """Specify a structural time series model representing a sum of components.

    Args:
      components: Python `list` of one or more StructuralTimeSeries instances.
        These must have unique names.
      constant_offset: optional scalar `float` `Tensor`, or batch of scalars,
        specifying a constant value added to the sum of outputs from the
        component models. This allows the components to model the shifted series
        `observed_time_series - constant_offset`. If `None`, this is set to the
        mean of the provided `observed_time_series`.
        Default value: `None`.
      observation_noise_scale_prior: optional `tfd.Distribution` instance
        specifying a prior on `observation_noise_scale`. If `None`, a heuristic
        default prior is constructed based on the provided
        `observed_time_series`.
        Default value: `None`.
      observed_time_series: optional `float` `Tensor` of shape
        `batch_shape + [T, 1]` (omitting the trailing unit dimension is also
        supported when `T > 1`), specifying an observed time series. This is
        used to set the constant offset, if not provided, and to construct a
        default heuristic `observation_noise_scale_prior` if not provided.
        Default value: `None`.
      name: Python `str` name of this model component; used as `name_scope`
        for ops created by this class.
        Default value: 'Sum'.

    Raises:
      ValueError: if components do not have unique names.
    """

        with tf.compat.v1.name_scope(name,
                                     'Sum',
                                     values=[observed_time_series]) as name:
            if observed_time_series is not None:
                observed_time_series = tf.convert_to_tensor(
                    value=observed_time_series, name='observed_time_series')
                observed_time_series = sts_util.maybe_expand_trailing_dim(
                    observed_time_series)
                observed_mean, observed_stddev, _ = (
                    sts_util.empirical_statistics(observed_time_series))
            else:
                observed_mean, observed_stddev = 0., 1.

            if observation_noise_scale_prior is None:
                observation_noise_scale_prior = tfd.LogNormal(loc=tf.math.log(
                    .01 * observed_stddev),
                                                              scale=2.)

            if constant_offset is None:
                constant_offset = observed_mean

            # Check that components have unique names, to ensure that inherited
            # parameters will be assigned unique names.
            component_names = [c.name for c in components]
            if len(component_names) != len(set(component_names)):
                raise ValueError(
                    'Components must have unique names: {}'.format(
                        component_names))
            components_by_name = collections.OrderedDict([(c.name, c)
                                                          for c in components])

            # Build parameters list for the combined model, by inheriting parameters
            # from the component models in canonical order.
            parameters = [
                Parameter('observation_noise_scale',
                          observation_noise_scale_prior, tfb.Softplus()),
            ] + [
                Parameter(name='{}_{}'.format(component.name, parameter.name),
                          prior=parameter.prior,
                          bijector=parameter.bijector)
                for component in components
                for parameter in component.parameters
            ]

            self._components = components
            self._components_by_name = components_by_name
            self._constant_offset = constant_offset

            super(Sum, self).__init__(parameters=parameters,
                                      latent_size=sum([
                                          component.latent_size
                                          for component in components
                                      ]),
                                      name=name)
Exemplo n.º 10
0
def one_step_predictive(model, observed_time_series, parameter_samples):
    """Compute one-step-ahead predictive distributions for all timesteps.

  Given samples from the posterior over parameters, return the predictive
  distribution over observations at each time `T`, given observations up
  through time `T-1`.

  Args:
    model: An instance of `StructuralTimeSeries` representing a
      time-series model. This represents a joint distribution over
      time-series and their parameters with batch shape `[b1, ..., bN]`.
    observed_time_series: `float` `Tensor` of shape
      `concat([sample_shape, model.batch_shape, [num_timesteps, 1]]) where
      `sample_shape` corresponds to i.i.d. observations, and the trailing `[1]`
      dimension may (optionally) be omitted if `num_timesteps > 1`.
    parameter_samples: Python `list` of `Tensors` representing posterior samples
      of model parameters, with shapes `[concat([[num_posterior_draws],
      param.prior.batch_shape, param.prior.event_shape]) for param in
      model.parameters]`. This may optionally also be a map (Python `dict`) of
      parameter names to `Tensor` values.

  Returns:
    forecast_dist: a `tfd.MixtureSameFamily` instance with event shape
      [num_timesteps] and
      batch shape `concat([sample_shape, model.batch_shape])`, with
      `num_posterior_draws` mixture components. The `t`th step represents the
      forecast distribution `p(observed_time_series[t] |
      observed_time_series[0:t-1], parameter_samples)`.

  #### Examples

  Suppose we've built a model and fit it to data using HMC:

  ```python
    day_of_week = tfp.sts.Seasonal(
        num_seasons=7,
        observed_time_series=observed_time_series,
        name='day_of_week')
    local_linear_trend = tfp.sts.LocalLinearTrend(
        observed_time_series=observed_time_series,
        name='local_linear_trend')
    model = tfp.sts.Sum(components=[day_of_week, local_linear_trend],
                        observed_time_series=observed_time_series)

    samples, kernel_results = tfp.sts.fit_with_hmc(model, observed_time_series)
  ```

  Passing the posterior samples into `one_step_predictive`, we construct a
  one-step-ahead predictive distribution:

  ```python
    one_step_predictive_dist = tfp.sts.one_step_predictive(
      model, observed_time_series, parameter_samples=samples)

    predictive_means = one_step_predictive_dist.mean()
    predictive_scales = one_step_predictive_dist.stddev()
  ```

  If using variational inference instead of HMC, we'd construct a forecast using
  samples from the variational posterior:

  ```python
    (variational_loss,
     variational_distributions) = tfp.sts.build_factored_variational_loss(
       model=model, observed_time_series=observed_time_series)

    # OMITTED: take steps to optimize variational loss

    samples = {k: q.sample(30) for (k, q) in variational_distributions.items()}
    one_step_predictive_dist = tfp.sts.one_step_predictive(
      model, observed_time_series, parameter_samples=samples)
  ```

  We can visualize the forecast by plotting:

  ```python
    from matplotlib import pylab as plt
    def plot_one_step_predictive(observed_time_series,
                                 forecast_mean,
                                 forecast_scale):
      plt.figure(figsize=(12, 6))
      num_timesteps = forecast_mean.shape[-1]
      c1, c2 = (0.12, 0.47, 0.71), (1.0, 0.5, 0.05)
      plt.plot(observed_time_series, label="observed time series", color=c1)
      plt.plot(forecast_mean, label="one-step prediction", color=c2)
      plt.fill_between(np.arange(num_timesteps),
                       forecast_mean - 2 * forecast_scale,
                       forecast_mean + 2 * forecast_scale,
                       alpha=0.1, color=c2)
      plt.legend()

    plot_one_step_predictive(observed_time_series,
                             forecast_mean=predictive_means,
                             forecast_scale=predictive_scales)
  ```

  To detect anomalous timesteps, we check whether the observed value at each
  step is within a 95% predictive interval, i.e., two standard deviations from
  the mean:

  ```python
    z_scores = ((observed_time_series[..., 1:] - predictive_means[..., :-1])
                 / predictive_scales[..., :-1])
    anomalous_timesteps = tf.boolean_mask(
        tf.range(1, num_timesteps),
        tf.abs(z_scores) > 2.0)
  ```

  """

    with tf.compat.v1.name_scope(
            'one_step_predictive',
            values=[observed_time_series, parameter_samples]):
        observed_time_series = tf.convert_to_tensor(
            value=observed_time_series, name='observed_time_series')
        observed_time_series = sts_util.maybe_expand_trailing_dim(
            observed_time_series)

        # Run filtering over the training timesteps to extract the
        # predictive means and variances.
        num_timesteps = dist_util.prefer_static_value(
            tf.shape(input=observed_time_series))[-2]
        lgssm = model.make_state_space_model(num_timesteps=num_timesteps,
                                             param_vals=parameter_samples)
        (_, _, _, _, _, observation_means,
         observation_covs) = lgssm.forward_filter(observed_time_series)

        # Squeeze dims to convert from LGSSM's event shape `[num_timesteps, 1]`
        # to a scalar time series.
        return sts_util.mix_over_posterior_draws(
            means=observation_means[..., 0],
            variances=observation_covs[..., 0, 0])
Exemplo n.º 11
0
def forecast(model, observed_time_series, parameter_samples,
             num_steps_forecast):
    """Construct predictive distribution over future observations.

  Given samples from the posterior over parameters, return the predictive
  distribution over future observations for num_steps_forecast timesteps.

  Args:
    model: An instance of `StructuralTimeSeries` representing a
      time-series model. This represents a joint distribution over
      time-series and their parameters with batch shape `[b1, ..., bN]`.
    observed_time_series: `float` `Tensor` of shape
      `concat([sample_shape, model.batch_shape, [num_timesteps, 1]])` where
      `sample_shape` corresponds to i.i.d. observations, and the trailing `[1]`
      dimension may (optionally) be omitted if `num_timesteps > 1`.
    parameter_samples: Python `list` of `Tensors` representing posterior samples
      of model parameters, with shapes `[concat([[num_posterior_draws],
      param.prior.batch_shape, param.prior.event_shape]) for param in
      model.parameters]`. This may optionally also be a map (Python `dict`) of
      parameter names to `Tensor` values.
    num_steps_forecast: scalar `int` `Tensor` number of steps to forecast.

  Returns:
    forecast_dist: a `tfd.MixtureSameFamily` instance with event shape
      [num_steps_forecast, 1] and batch shape
      `concat([sample_shape, model.batch_shape])`, with `num_posterior_draws`
      mixture components.

  #### Examples

  Suppose we've built a model and fit it to data using HMC:

  ```python
    day_of_week = tfp.sts.Seasonal(
        num_seasons=7,
        observed_time_series=observed_time_series,
        name='day_of_week')
    local_linear_trend = tfp.sts.LocalLinearTrend(
        observed_time_series=observed_time_series,
        name='local_linear_trend')
    model = tfp.sts.Sum(components=[day_of_week, local_linear_trend],
                        observed_time_series=observed_time_series)

    samples, kernel_results = tfp.sts.fit_with_hmc(model, observed_time_series)
  ```

  Passing the posterior samples into `forecast`, we construct a forecast
  distribution:

  ```python
    forecast_dist = tfp.sts.forecast(model, observed_time_series,
                                     parameter_samples=samples,
                                     num_steps_forecast=50)

    forecast_mean = forecast_dist.mean()[..., 0]  # shape: [50]
    forecast_scale = forecast_dist.stddev()[..., 0]  # shape: [50]
    forecast_samples = forecast_dist.sample(10)[..., 0]  # shape: [10, 50]
  ```

  If using variational inference instead of HMC, we'd construct a forecast using
  samples from the variational posterior:

  ```python
    (variational_loss,
     variational_distributions) = tfp.sts.build_factored_variational_loss(
       model=model, observed_time_series=observed_time_series)

    # OMITTED: take steps to optimize variational loss

    samples = {k: q.sample(30) for (k, q) in variational_distributions.items()}
    forecast_dist = tfp.sts.forecast(model, observed_time_series,
                                         parameter_samples=samples,
                                         num_steps_forecast=50)
  ```

  We can visualize the forecast by plotting:

  ```python
    from matplotlib import pylab as plt
    def plot_forecast(observed_time_series,
                      forecast_mean,
                      forecast_scale,
                      forecast_samples):
      plt.figure(figsize=(12, 6))

      num_steps = observed_time_series.shape[-1]
      num_steps_forecast = forecast_mean.shape[-1]
      num_steps_train = num_steps - num_steps_forecast

      c1, c2 = (0.12, 0.47, 0.71), (1.0, 0.5, 0.05)
      plt.plot(np.arange(num_steps), observed_time_series,
               lw=2, color=c1, label='ground truth')

      forecast_steps = np.arange(num_steps_train,
                       num_steps_train+num_steps_forecast)
      plt.plot(forecast_steps, forecast_samples.T, lw=1, color=c2, alpha=0.1)
      plt.plot(forecast_steps, forecast_mean, lw=2, ls='--', color=c2,
               label='forecast')
      plt.fill_between(forecast_steps,
                       forecast_mean - 2 * forecast_scale,
                       forecast_mean + 2 * forecast_scale, color=c2, alpha=0.2)

      plt.xlim([0, num_steps])
      plt.legend()

    plot_forecast(observed_time_series,
                  forecast_mean=forecast_mean,
                  forecast_scale=forecast_scale,
                  forecast_samples=forecast_samples)
  ```

  """

    with tf.compat.v1.name_scope('forecast',
                                 values=[
                                     observed_time_series, parameter_samples,
                                     num_steps_forecast
                                 ]):
        observed_time_series = tf.convert_to_tensor(
            value=observed_time_series, name='observed_time_series')
        observed_time_series = sts_util.maybe_expand_trailing_dim(
            observed_time_series)

        # Run filtering over the observed timesteps to extract the
        # latent state posterior at timestep T+1 (i.e., the final
        # filtering distribution, pushed through the transition model).
        # This is the prior for the forecast model ("today's prior
        # is yesterday's posterior").
        num_observed_steps = dist_util.prefer_static_value(
            tf.shape(input=observed_time_series))[-2]
        observed_data_ssm = model.make_state_space_model(
            num_timesteps=num_observed_steps, param_vals=parameter_samples)
        (_, _, _, predictive_means, predictive_covs, _,
         _) = observed_data_ssm.forward_filter(observed_time_series)

        # Build a batch of state-space models over the forecast period. Because
        # we'll use MixtureSameFamily to mix over the posterior draws, we need to
        # do some shenanigans to move the `[num_posterior_draws]` batch dimension
        # from the leftmost to the rightmost side of the model's batch shape.
        # TODO(b/120245392): enhance `MixtureSameFamily` to reduce along an
        # arbitrary axis, and eliminate `move_dimension` calls here.
        parameter_samples = model._canonicalize_param_vals_as_map(
            parameter_samples)  # pylint: disable=protected-access
        parameter_samples_with_reordered_batch_dimension = {
            param.name: dist_util.move_dimension(
                parameter_samples[param.name], 0,
                -(1 + _prefer_static_event_ndims(param.prior)))
            for param in model.parameters
        }
        forecast_prior = tfd.MultivariateNormalFullCovariance(
            loc=dist_util.move_dimension(predictive_means[..., -1, :], 0, -2),
            covariance_matrix=dist_util.move_dimension(
                predictive_covs[..., -1, :, :], 0, -3))

        # Ugly hack: because we moved `num_posterior_draws` to the trailing (rather
        # than leading) dimension of parameters, the parameter batch shapes no
        # longer broadcast against the `constant_offset` attribute used in `sts.Sum`
        # models. We fix this by manually adding an extra broadcasting dim to
        # `constant_offset` if present.
        # The root cause of this hack is that we mucked with param dimensions above
        # and are now passing params that are 'invalid' in the sense that they don't
        # match the shapes of the model's param priors. The fix (as above) will be
        # to update MixtureSameFamily so we can avoid changing param dimensions
        # altogether.
        # TODO(b/120245392): enhance `MixtureSameFamily` to reduce along an
        # arbitrary axis, and eliminate this hack.
        kwargs = {}
        if hasattr(model, 'constant_offset'):
            kwargs['constant_offset'] = tf.convert_to_tensor(
                value=model.constant_offset,
                dtype=forecast_prior.dtype)[..., tf.newaxis]

        # We assume that any STS model that has a `constant_offset` attribute
        # will allow it to be overridden as a kwarg. This is currently just
        # `sts.Sum`.
        # TODO(b/120245392): when kwargs hack is removed, switch back to calling
        # the public version of `_make_state_space_model`.
        forecast_ssm = model._make_state_space_model(  # pylint: disable=protected-access
            num_timesteps=num_steps_forecast,
            param_map=parameter_samples_with_reordered_batch_dimension,
            initial_state_prior=forecast_prior,
            initial_step=num_observed_steps,
            **kwargs)

        num_posterior_draws = dist_util.prefer_static_value(
            forecast_ssm.batch_shape_tensor())[-1]
        return tfd.MixtureSameFamily(mixture_distribution=tfd.Categorical(
            logits=tf.zeros([num_posterior_draws], dtype=forecast_ssm.dtype)),
                                     components_distribution=forecast_ssm)
Exemplo n.º 12
0
def decompose_by_component(model, observed_time_series, parameter_samples):
    """Decompose an observed time series into contributions from each component.

  This method decomposes a time series according to the posterior represention
  of a structural time series model. In particular, it:
    - Computes the posterior marginal mean and covariances over the additive
      model's latent space.
    - Decomposes the latent posterior into the marginal blocks for each
      model component.
    - Maps the per-component latent posteriors back through each component's
      observation model, to generate the time series modeled by that component.

  Args:
    model: An instance of `tfp.sts.Sum` representing a structural time series
      model.
    observed_time_series: optional `float` `Tensor` of shape
      `batch_shape + [num_timesteps, 1]` (omitting the trailing unit dimension
      is also supported when `num_timesteps > 1`), specifying an observed time
      series.
    parameter_samples: Python `list` of `Tensors` representing posterior
      samples of model parameters, with shapes `[concat([
      [num_posterior_draws], param.prior.batch_shape,
      param.prior.event_shape]) for param in model.parameters]`. This may
      optionally also be a map (Python `dict`) of parameter names to
      `Tensor` values.
  Returns:
    component_dists: A `collections.OrderedDict` instance mapping
      component StructuralTimeSeries instances (elements of `model.components`)
      to `tfd.Distribution` instances representing the posterior marginal
      distributions on the process modeled by each component. Each distribution
      has batch shape matching that of `posterior_means`/`posterior_covs`, and
      event shape of `[num_timesteps]`.

  #### Examples

  Suppose we've built a model and fit it to data:

  ```python
    day_of_week = tfp.sts.Seasonal(
        num_seasons=7,
        observed_time_series=observed_time_series,
        name='day_of_week')
    local_linear_trend = tfp.sts.LocalLinearTrend(
        observed_time_series=observed_time_series,
        name='local_linear_trend')
    model = tfp.sts.Sum(components=[day_of_week, local_linear_trend],
                        observed_time_series=observed_time_series)

    num_steps_forecast = 50
    samples, kernel_results = tfp.sts.fit_with_hmc(model, observed_time_series)
  ```

  To extract the contributions of individual components, pass the time series
  and sampled parameters into `decompose_by_component`:

  ```python
    component_dists = decompose_by_component(
      model,
      observed_time_series=observed_time_series,
      parameter_samples=samples)

    # Component mean and stddev have shape `[len(observed_time_series)]`.
    day_of_week_effect_mean = component_dists[day_of_week].mean()
    day_of_week_effect_stddev = component_dists[day_of_week].stddev()
  ```

  Using the component distributions, we can visualize the uncertainty for
  each component:

  ```
  from matplotlib import pylab as plt
  num_components = len(component_dists)
  xs = np.arange(len(observed_time_series))
  fig = plt.figure(figsize=(12, 3 * num_components))
  for i, (component, component_dist) in enumerate(component_dists.items()):

    # If in graph mode, replace `.numpy()` with `.eval()` or `sess.run()`.
    component_mean = component_dist.mean().numpy()
    component_stddev = component_dist.stddev().numpy()

    ax = fig.add_subplot(num_components, 1, 1 + i)
    ax.plot(xs, component_mean, lw=2)
    ax.fill_between(xs,
                    component_mean - 2 * component_stddev,
                    component_mean + 2 * component_stddev,
                    alpha=0.5)
    ax.set_title(component.name)
  ```

  """

    with tf.compat.v1.name_scope('decompose_by_component',
                                 values=[observed_time_series]):
        observed_time_series = tf.convert_to_tensor(
            value=observed_time_series, name='observed_time_series')
        observed_time_series = sts_util.maybe_expand_trailing_dim(
            observed_time_series)

        # Run smoothing over the training timesteps to extract the
        # posterior on latents.
        num_timesteps = dist_util.prefer_static_value(
            tf.shape(input=observed_time_series))[-2]
        ssm = model.make_state_space_model(num_timesteps=num_timesteps,
                                           param_vals=parameter_samples)
        posterior_means, posterior_covs = ssm.posterior_marginals(
            observed_time_series)

        return _decompose_from_posterior_marginals(model, posterior_means,
                                                   posterior_covs,
                                                   parameter_samples)