def test_mix_over_posterior_draws(self): num_posterior_draws = 3 batch_shape = [2, 1] means = np.random.randn(*np.concatenate([[num_posterior_draws], batch_shape])) variances = np.exp(np.random.randn(*np.concatenate( [[num_posterior_draws], batch_shape]))) posterior_mixture_dist = sts_util.mix_over_posterior_draws(means, variances) # Compute the true statistics of the mixture distribution. mixture_mean = np.mean(means, axis=0) mixture_variance = np.mean(variances + means**2, axis=0) - mixture_mean**2 self.assertAllClose(mixture_mean, self.evaluate(posterior_mixture_dist.mean())) self.assertAllClose(mixture_variance, self.evaluate(posterior_mixture_dist.variance()))
def _decompose_from_posterior_marginals( model, posterior_means, posterior_covs, parameter_samples): """Utility method to decompose a joint posterior into components. Args: model: `tfp.sts.Sum` instance defining an additive STS model. posterior_means: float `Tensor` of shape `concat( [[num_posterior_draws], batch_shape, num_timesteps, latent_size])` representing the posterior mean over latents in an `AdditiveStateSpaceModel`. posterior_covs: float `Tensor` of shape `concat( [[num_posterior_draws], batch_shape, num_timesteps, latent_size, latent_size])` representing the posterior marginal covariances over latents in an `AdditiveStateSpaceModel`. parameter_samples: Python `list` of `Tensors` representing posterior samples of model parameters, with shapes `[concat([ [num_posterior_draws], param.prior.batch_shape, param.prior.event_shape]) for param in model.parameters]`. This may optionally also be a map (Python `dict`) of parameter names to `Tensor` values. Returns: component_dists: A `collections.OrderedDict` instance mapping component StructuralTimeSeries instances (elements of `model.components`) to `tfd.Distribution` instances representing the posterior marginal distributions on the process modeled by each component. Each distribution has batch shape matching that of `posterior_means`/`posterior_covs`, and event shape of `[num_timesteps]`. """ try: model.components except AttributeError: raise ValueError('Model decomposed into components must be an instance of' '`tfp.sts.Sum` (passed model {})'.format(model)) with tf.compat.v1.name_scope('decompose_from_posterior_marginals'): # Extract the component means/covs from the joint latent posterior. latent_sizes = [component.latent_size for component in model.components] component_means = tf.split(posterior_means, latent_sizes, axis=-1) component_covs = _split_covariance_into_marginals( posterior_covs, latent_sizes) # Instantiate per-component state space models, and use them to push the # posterior means/covs through the observation model for each component. num_timesteps = dist_util.prefer_static_value( tf.shape(input=posterior_means))[-2] component_ssms = model.make_component_state_space_models( num_timesteps=num_timesteps, param_vals=parameter_samples) component_predictive_dists = collections.OrderedDict() for (component, component_ssm, component_mean, component_cov) in zip(model.components, component_ssms, component_means, component_covs): component_obs_mean, component_obs_cov = ( component_ssm.latents_to_observations( latent_means=component_mean, latent_covs=component_cov)) # Using the observation means and covs, build a mixture distribution # that integrates over the posterior draws. component_predictive_dists[component] = sts_util.mix_over_posterior_draws( means=component_obs_mean[..., 0], variances=component_obs_cov[..., 0, 0]) return component_predictive_dists
def impute_missing_values(model, observed_time_series, parameter_samples, include_observation_noise=False): """Runs posterior inference to impute the missing values in a time series. This method computes the posterior marginals `p(latent state | observations)`, given the time series at observed timesteps (a missingness mask should be specified using `tfp.sts.MaskedTimeSeries`). It pushes this posterior back through the observation model to impute a predictive distribution on the observed time series. At unobserved steps, this is an imputed value; at other steps it is interpreted as the model's estimate of the underlying noise-free series. Args: model: `tfp.sts.Sum` instance defining an additive STS model. observed_time_series: `float` `Tensor` of shape `concat([sample_shape, model.batch_shape, [num_timesteps, 1]])` where `sample_shape` corresponds to i.i.d. observations, and the trailing `[1]` dimension may (optionally) be omitted if `num_timesteps > 1`. May optionally be an instance of `tfp.sts.MaskedTimeSeries` including a mask `Tensor` to encode the locations of missing observations. parameter_samples: Python `list` of `Tensors` representing posterior samples of model parameters, with shapes `[concat([ [num_posterior_draws], param.prior.batch_shape, param.prior.event_shape]) for param in model.parameters]`. This may optionally also be a map (Python `dict`) of parameter names to `Tensor` values. include_observation_noise: If `False`, the imputed uncertainties represent the model's estimate of the noise-free time series at each timestep. If `True`, they represent the model's estimate of the range of values that could be *observed* at each timestep, including any i.i.d. observation noise. Default value: `False`. Returns: imputed_series_dist: a `tfd.MixtureSameFamily` instance with event shape [num_timesteps] and batch shape `concat([sample_shape, model.batch_shape])`, with `num_posterior_draws` mixture components. #### Example To specify a time series with missing values, use `tfp.sts.MaskedTimeSeries`: ```python time_series_with_nans = [-1., 1., np.nan, 2.4, np.nan, 5] observed_time_series = tfp.sts.MaskedTimeSeries( time_series=time_series_with_nans, is_missing=tf.math.is_nan(time_series_with_nans)) ``` Masked time series can be passed to `tfp.sts` methods in place of a `observed_time_series` `Tensor`: ```python # Build model using observed time series to set heuristic priors. linear_trend_model = tfp.sts.LocalLinearTrend( observed_time_series=observed_time_series) model = tfp.sts.Sum([linear_trend_model], observed_time_series=observed_time_series) # Fit model to data parameter_samples, _ = tfp.sts.fit_with_hmc(model, observed_time_series) ``` After fitting a model, `impute_missing_values` will return a distribution ```python # Impute missing values imputed_series_distribution = tfp.sts.impute_missing_values( model, observed_time_series) print('imputed means and stddevs: ', imputed_series_distribution.mean(), imputed_series_distribution.stddev()) ``` """ with tf.name_scope('impute_missing_values'): [observed_time_series, mask] = sts_util.canonicalize_observed_time_series_with_mask( observed_time_series) # Run smoothing over the training timesteps to extract the # predictive means and variances. num_timesteps = dist_util.prefer_static_value( tf.shape(input=observed_time_series))[-2] lgssm = model.make_state_space_model(num_timesteps=num_timesteps, param_vals=parameter_samples) posterior_means, posterior_covs = lgssm.posterior_marginals( observed_time_series, mask=mask) observation_means, observation_covs = lgssm.latents_to_observations( latent_means=posterior_means, latent_covs=posterior_covs) if not include_observation_noise: # Extract just the variance of observation noise by pushing forward # zero-variance latents. _, observation_noise_covs = lgssm.latents_to_observations( latent_means=posterior_means, latent_covs=tf.zeros_like(posterior_covs)) # Subtract out the observation noise that was added in the original # pushforward. Note that this could cause numerical issues if the # observation noise is very large. If this becomes an issue we could # avoid the subtraction by plumbing `include_observation_noise` through # `lgssm.latents_to_observations`. observation_covs -= observation_noise_covs # Squeeze dims to convert from LGSSM's event shape `[num_timesteps, 1]` # to a scalar time series. return sts_util.mix_over_posterior_draws( means=observation_means[..., 0], variances=observation_covs[..., 0, 0])
def one_step_predictive(model, observed_time_series, parameter_samples): """Compute one-step-ahead predictive distributions for all timesteps. Given samples from the posterior over parameters, return the predictive distribution over observations at each time `T`, given observations up through time `T-1`. Args: model: An instance of `StructuralTimeSeries` representing a time-series model. This represents a joint distribution over time-series and their parameters with batch shape `[b1, ..., bN]`. observed_time_series: `float` `Tensor` of shape `concat([sample_shape, model.batch_shape, [num_timesteps, 1]]) where `sample_shape` corresponds to i.i.d. observations, and the trailing `[1]` dimension may (optionally) be omitted if `num_timesteps > 1`. May optionally be an instance of `tfp.sts.MaskedTimeSeries` including a mask `Tensor` to encode the locations of missing observations. parameter_samples: Python `list` of `Tensors` representing posterior samples of model parameters, with shapes `[concat([[num_posterior_draws], param.prior.batch_shape, param.prior.event_shape]) for param in model.parameters]`. This may optionally also be a map (Python `dict`) of parameter names to `Tensor` values. Returns: forecast_dist: a `tfd.MixtureSameFamily` instance with event shape [num_timesteps] and batch shape `concat([sample_shape, model.batch_shape])`, with `num_posterior_draws` mixture components. The `t`th step represents the forecast distribution `p(observed_time_series[t] | observed_time_series[0:t-1], parameter_samples)`. #### Examples Suppose we've built a model and fit it to data using HMC: ```python day_of_week = tfp.sts.Seasonal( num_seasons=7, observed_time_series=observed_time_series, name='day_of_week') local_linear_trend = tfp.sts.LocalLinearTrend( observed_time_series=observed_time_series, name='local_linear_trend') model = tfp.sts.Sum(components=[day_of_week, local_linear_trend], observed_time_series=observed_time_series) samples, kernel_results = tfp.sts.fit_with_hmc(model, observed_time_series) ``` Passing the posterior samples into `one_step_predictive`, we construct a one-step-ahead predictive distribution: ```python one_step_predictive_dist = tfp.sts.one_step_predictive( model, observed_time_series, parameter_samples=samples) predictive_means = one_step_predictive_dist.mean() predictive_scales = one_step_predictive_dist.stddev() ``` If using variational inference instead of HMC, we'd construct a forecast using samples from the variational posterior: ```python (variational_loss, variational_distributions) = tfp.sts.build_factored_variational_loss( model=model, observed_time_series=observed_time_series) # OMITTED: take steps to optimize variational loss samples = {k: q.sample(30) for (k, q) in variational_distributions.items()} one_step_predictive_dist = tfp.sts.one_step_predictive( model, observed_time_series, parameter_samples=samples) ``` We can visualize the forecast by plotting: ```python from matplotlib import pylab as plt def plot_one_step_predictive(observed_time_series, forecast_mean, forecast_scale): plt.figure(figsize=(12, 6)) num_timesteps = forecast_mean.shape[-1] c1, c2 = (0.12, 0.47, 0.71), (1.0, 0.5, 0.05) plt.plot(observed_time_series, label="observed time series", color=c1) plt.plot(forecast_mean, label="one-step prediction", color=c2) plt.fill_between(np.arange(num_timesteps), forecast_mean - 2 * forecast_scale, forecast_mean + 2 * forecast_scale, alpha=0.1, color=c2) plt.legend() plot_one_step_predictive(observed_time_series, forecast_mean=predictive_means, forecast_scale=predictive_scales) ``` To detect anomalous timesteps, we check whether the observed value at each step is within a 95% predictive interval, i.e., two standard deviations from the mean: ```python z_scores = ((observed_time_series[..., 1:] - predictive_means[..., :-1]) / predictive_scales[..., :-1]) anomalous_timesteps = tf.boolean_mask( tf.range(1, num_timesteps), tf.abs(z_scores) > 2.0) ``` """ with tf.name_scope('one_step_predictive'): [observed_time_series, is_missing] = sts_util.canonicalize_observed_time_series_with_mask( observed_time_series) # Run filtering over the training timesteps to extract the # predictive means and variances. num_timesteps = dist_util.prefer_static_value( tf.shape(input=observed_time_series))[-2] lgssm = model.make_state_space_model(num_timesteps=num_timesteps, param_vals=parameter_samples) (_, _, _, _, _, observation_means, observation_covs) = lgssm.forward_filter(observed_time_series, mask=is_missing) # Squeeze dims to convert from LGSSM's event shape `[num_timesteps, 1]` # to a scalar time series. return sts_util.mix_over_posterior_draws( means=observation_means[..., 0], variances=observation_covs[..., 0, 0])
def one_step_predictive(model, observed_time_series, parameter_samples, timesteps_are_event_shape=True): """Compute one-step-ahead predictive distributions for all timesteps. Given samples from the posterior over parameters, return the predictive distribution over observations at each time `T`, given observations up through time `T-1`. Args: model: An instance of `StructuralTimeSeries` representing a time-series model. This represents a joint distribution over time-series and their parameters with batch shape `[b1, ..., bN]`. observed_time_series: `float` `Tensor` of shape `concat([sample_shape, model.batch_shape, [num_timesteps, 1]])` where `sample_shape` corresponds to i.i.d. observations, and the trailing `[1]` dimension may (optionally) be omitted if `num_timesteps > 1`. Any `NaN`s are interpreted as missing observations; missingness may be also be explicitly specified by passing a `tfp.sts.MaskedTimeSeries` instance. parameter_samples: Python `list` of `Tensors` representing posterior samples of model parameters, with shapes `[concat([[num_posterior_draws], param.prior.batch_shape, param.prior.event_shape]) for param in model.parameters]`. This may optionally also be a map (Python `dict`) of parameter names to `Tensor` values. timesteps_are_event_shape: Deprecated, for backwards compatibility only. If `False`, the predictive distribution will return per-timestep probabilities Default value: `True`. Returns: predictive_dist: a `tfd.MixtureSameFamily` instance with event shape `[num_timesteps] if timesteps_are_event_shape else []` and batch shape `concat([sample_shape, model.batch_shape, [] if timesteps_are_event_shape else [num_timesteps])`, with `num_posterior_draws` mixture components. The `t`th step represents the forecast distribution `p(observed_time_series[t] | observed_time_series[0:t-1], parameter_samples)`. #### Examples Suppose we've built a model and fit it to data using HMC: ```python day_of_week = tfp.sts.Seasonal( num_seasons=7, observed_time_series=observed_time_series, name='day_of_week') local_linear_trend = tfp.sts.LocalLinearTrend( observed_time_series=observed_time_series, name='local_linear_trend') model = tfp.sts.Sum(components=[day_of_week, local_linear_trend], observed_time_series=observed_time_series) samples, kernel_results = tfp.sts.fit_with_hmc(model, observed_time_series) ``` Passing the posterior samples into `one_step_predictive`, we construct a one-step-ahead predictive distribution: ```python one_step_predictive_dist = tfp.sts.one_step_predictive( model, observed_time_series, parameter_samples=samples) predictive_means = one_step_predictive_dist.mean() predictive_scales = one_step_predictive_dist.stddev() ``` If using variational inference instead of HMC, we'd construct a forecast using samples from the variational posterior: ```python surrogate_posterior = tfp.sts.build_factored_surrogate_posterior( model=model) loss_curve = tfp.vi.fit_surrogate_posterior( target_log_prob_fn=model.joint_distribution(observed_time_series).log_prob, surrogate_posterior=surrogate_posterior, optimizer=tf.optimizers.Adam(learning_rate=0.1), num_steps=200) samples = surrogate_posterior.sample(30) one_step_predictive_dist = tfp.sts.one_step_predictive( model, observed_time_series, parameter_samples=samples) ``` We can visualize the forecast by plotting: ```python from matplotlib import pylab as plt def plot_one_step_predictive(observed_time_series, forecast_mean, forecast_scale): plt.figure(figsize=(12, 6)) num_timesteps = forecast_mean.shape[-1] c1, c2 = (0.12, 0.47, 0.71), (1.0, 0.5, 0.05) plt.plot(observed_time_series, label="observed time series", color=c1) plt.plot(forecast_mean, label="one-step prediction", color=c2) plt.fill_between(np.arange(num_timesteps), forecast_mean - 2 * forecast_scale, forecast_mean + 2 * forecast_scale, alpha=0.1, color=c2) plt.legend() plot_one_step_predictive(observed_time_series, forecast_mean=predictive_means, forecast_scale=predictive_scales) ``` To detect anomalous timesteps, we check whether the observed value at each step is within a 95% predictive interval, i.e., two standard deviations from the mean: ```python z_scores = ((observed_time_series[..., 1:] - predictive_means[..., :-1]) / predictive_scales[..., :-1]) anomalous_timesteps = tf.boolean_mask( tf.range(1, num_timesteps), tf.abs(z_scores) > 2.0) ``` """ with tf.name_scope('one_step_predictive'): [ observed_time_series, is_missing ] = sts_util.canonicalize_observed_time_series_with_mask( observed_time_series) # Run filtering over the training timesteps to extract the # predictive means and variances. num_timesteps = dist_util.prefer_static_value( tf.shape(observed_time_series))[-2] lgssm = tfe_util.JitPublicMethods( model.make_state_space_model(num_timesteps=num_timesteps, param_vals=parameter_samples), trace_only=True) # Avoid eager overhead w/o introducing XLA dependence. (_, _, _, _, _, observation_means, observation_covs ) = lgssm.forward_filter(observed_time_series, mask=is_missing) # Squeeze dims to convert from LGSSM's event shape `[num_timesteps, 1]` # to a scalar time series. predictive_dist = sts_util.mix_over_posterior_draws( means=observation_means[..., 0], variances=observation_covs[..., 0, 0]) if timesteps_are_event_shape: predictive_dist = tfd.Independent( predictive_dist, reinterpreted_batch_ndims=1) return predictive_dist