def test_noise_variance_posterior_matches_expected(self): # Generate a synthetic regression task. num_features = 5 num_outputs = 20 design_matrix, _, targets = self.evaluate( self._random_regression_task(num_features=num_features, num_outputs=num_outputs, batch_shape=[2], seed=test_util.test_seed())) observation_noise_variance_prior_concentration = 0.03 observation_noise_variance_prior_scale = 0.015 # Posterior on noise variance if all weights are zero. naive_posterior = tfd.InverseGamma( concentration=(observation_noise_variance_prior_concentration + num_outputs / 2.), scale=(observation_noise_variance_prior_scale + tf.reduce_sum(tf.square(targets), axis=-1) / 2.)) # Compare to sampler with weights constrained to near-zero. # We can do this by reducing the width of the slab (here), # or by reducing the probability of the slab (below). Both should give # equivalent noise posteriors. tight_slab_sampler = spike_and_slab.SpikeSlabSampler( design_matrix, weights_prior_precision=tf.eye(num_features) * 1e6, observation_noise_variance_prior_concentration=( observation_noise_variance_prior_concentration), observation_noise_variance_prior_scale=( observation_noise_variance_prior_scale)) self.assertAllClose( tight_slab_sampler. observation_noise_variance_posterior_concentration, naive_posterior.concentration) self.assertAllClose(tight_slab_sampler._initialize_sampler_state( targets=targets, nonzeros=tf.ones( [num_features], dtype=tf.bool)).observation_noise_variance_posterior_scale, naive_posterior.scale, atol=1e-2) downweighted_slab_sampler = spike_and_slab.SpikeSlabSampler( design_matrix, observation_noise_variance_prior_concentration=( observation_noise_variance_prior_concentration), observation_noise_variance_prior_scale=( observation_noise_variance_prior_scale)) self.assertAllClose( (downweighted_slab_sampler. observation_noise_variance_posterior_concentration), naive_posterior.concentration) self.assertAllClose( downweighted_slab_sampler._initialize_sampler_state( targets=targets, nonzeros=tf.zeros( [num_features], dtype=tf.bool)).observation_noise_variance_posterior_scale, naive_posterior.scale)
def test_updated_state_matches_initial_computation(self): design_matrix, _, targets = self._random_regression_task( num_outputs=2, num_features=3, batch_shape=[], seed=test_util.test_seed()) sampler = spike_and_slab.SpikeSlabSampler(design_matrix=design_matrix, nonzero_prior_prob=0.3) all_nonzero_sampler_state = sampler._initialize_sampler_state( targets=targets, nonzeros=tf.convert_to_tensor([True, True, True])) # Flipping a weight from nonzero to zero (slab to spike) should result in # the same state as if we'd initialized with that sparsity pattern. flipped_state_from_update = sampler._flip_feature( all_nonzero_sampler_state, idx=0) flipped_state_from_scratch = sampler._initialize_sampler_state( targets=targets, nonzeros=tf.convert_to_tensor([False, True, True])) self.assertAllCloseNested(flipped_state_from_update, flipped_state_from_scratch) # Reverse direction (spike to slab). double_flipped_state_from_update = sampler._flip_feature( flipped_state_from_update, idx=0) self.assertAllCloseNested(double_flipped_state_from_update, all_nonzero_sampler_state, atol=1e-4)
def test_sampler_respects_pseudo_observations(self): design_matrix = self.evaluate( samplers.uniform([2, 20, 5], seed=test_util.test_seed())) first_obs = 2. second_obs = 10. first_sampler = spike_and_slab.SpikeSlabSampler( design_matrix, default_pseudo_observations=first_obs) second_sampler = spike_and_slab.SpikeSlabSampler( design_matrix, default_pseudo_observations=second_obs) self.assertNotAllClose( first_sampler.weights_prior_precision, second_sampler.weights_prior_precision) self.assertAllClose( first_sampler.weights_prior_precision / first_obs, second_sampler.weights_prior_precision / second_obs)
def test_posterior_on_nonzero_subset_matches_bayesian_regression(self): # Generate a synthetic regression task. design_matrix, _, targets = self.evaluate( self._random_regression_task(num_features=5, num_outputs=20, batch_shape=[2], seed=test_util.test_seed())) # Utilities to extract values for nonzero-weight features. nonzeros = tf.convert_to_tensor([True, False, True, False, True]) nonzero_subvector = ( lambda x: tf.boolean_mask(x, nonzeros, axis=ps.rank(x) - 1)) nonzero_submatrix = lambda x: tf.boolean_mask( # pylint: disable=g-long-lambda tf.boolean_mask(x, nonzeros, axis=ps.rank(x) - 2), nonzeros, axis=ps.rank(x) - 1) # Compute the weight posterior mean and precision for these nonzeros. sampler = spike_and_slab.SpikeSlabSampler(design_matrix) initial_state = sampler._initialize_sampler_state(targets=targets, nonzeros=nonzeros) # Compute the analytic posterior for the regression problem restricted to # only the selected features. Note that by slicing a submatrix of the # prior precision we are implicitly *conditioning* on having observed the # other weights to be zero (which is sensible in this case), versus slicing # into the covariance which would give the marginal (unconditional) prior # on the selected weights. (restricted_weights_posterior_mean, restricted_weights_posterior_prec) = tfd.mvn_conjugate_linear_update( prior_scale=tf.linalg.cholesky( tf.linalg.inv( nonzero_submatrix(sampler.weights_prior_precision))), linear_transformation=nonzero_subvector(design_matrix), likelihood_scale=tf.eye(20), observation=targets) # The sampler's posterior should match the posterior from the restricted # problem. self.assertAllClose( nonzero_subvector(initial_state.conditional_weights_mean), restricted_weights_posterior_mean) self.assertAllClose( nonzero_submatrix( initial_state.conditional_posterior_precision_chol), tf.linalg.cholesky(restricted_weights_posterior_prec.to_dense()))
def test_samples_from_weights_prior(self): nonzero_prior_prob = 0.7 num_outputs, num_features = 200, 4 # Setting the design matrix to zero, the targets provide no information # about weights, so the sampler should sample from the prior. design_matrix = tf.zeros([num_outputs, num_features]) targets = 0.42 * samplers.normal([num_outputs], seed=test_util.test_seed()) sampler = spike_and_slab.SpikeSlabSampler( design_matrix=design_matrix, weights_prior_precision=tf.eye(num_features), nonzero_prior_prob=nonzero_prior_prob) # Draw 100 posterior samples. Since all state needed for the # internal feature sweep is a function of the sparsity pattern, it's # sufficient to pass the sparsity pattern (by way of the weights) as # the outer-loop state. @tf.function(autograph=False) def loop_body(var_weights_seed, _): _, weights, seed = var_weights_seed seed, next_seed = samplers.split_seed(seed, n=2) variance, weights = sampler.sample_noise_variance_and_weights( initial_nonzeros=tf.not_equal(weights, 0.), targets=targets, seed=seed) return variance, weights, next_seed init_seed = test_util.test_seed(sampler_type='stateless') variance_samples, weight_samples, _ = tf.scan( fn=loop_body, initializer=(1., tf.ones([num_features]), init_seed), elems=tf.range(100)) # With the default (relatively uninformative) prior, the noise variance # posterior mean should be close to the most-likely value. self.assertAllClose(tf.reduce_mean(variance_samples), tf.math.reduce_std(targets)**2, atol=0.03) # Since there is no evidence for the weights, the sparsity of our samples # should match the prior. nonzero_weight_samples = tf.cast(tf.not_equal(weight_samples, 0.), tf.float32) self.assertAllClose(nonzero_prior_prob, tf.reduce_mean(nonzero_weight_samples), atol=0.03)
def test_sanity_check_sweep_over_features(self): num_outputs = 100 num_features = 3 batch_shape = [2] design_matrix, true_weights, targets = self.evaluate( self._random_regression_task( num_outputs=num_outputs, num_features=num_features, batch_shape=batch_shape, # Specify weights with a clear sparsity pattern. weights=tf.convert_to_tensor([[10., 0., -10.], [0., 0., 0.5]]), seed=test_util.test_seed())) sampler = spike_and_slab.SpikeSlabSampler( design_matrix, # Ensure the probability of keeping an irrelevant feature is tiny. nonzero_prior_prob=1e-6) initial_state = sampler._initialize_sampler_state( targets=targets, nonzeros=tf.convert_to_tensor([True, True, True]), observation_noise_variance=1.) final_state = self.evaluate( sampler._resample_all_features( initial_state, seed=test_util.test_seed())) # Check that we recovered the true sparsity pattern and approximate weights. self.assertAllEqual(final_state.nonzeros, [[True, False, True], [False, False, True]]) self.assertAllClose(final_state.conditional_weights_mean, true_weights, rtol=0.05, atol=0.15) # Check shapes of other components. self.assertAllEqual(final_state.conditional_prior_precision_chol.shape, batch_shape + [num_features, num_features]) self.assertAllEqual(final_state.conditional_posterior_precision_chol.shape, batch_shape + [num_features, num_features]) self.assertAllEqual( final_state.observation_noise_variance_posterior_scale.shape, batch_shape) posterior = sampler._get_conditional_posterior(final_state) posterior_variances, posterior_weights = self.evaluate( posterior.sample(10, seed=test_util.test_seed())) self.assertAllFinite(posterior_variances) self.assertAllFinite(posterior_weights)
def test_deterministic_given_seed(self, use_xla): design_matrix, _, targets = self.evaluate( self._random_regression_task( num_outputs=3, num_features=4, batch_shape=[], seed=test_util.test_seed())) sampler = spike_and_slab.SpikeSlabSampler(design_matrix) initial_nonzeros = tf.convert_to_tensor([True, False, False, True]) seed = test_util.test_seed(sampler_type='stateless') @tf.function(jit_compile=use_xla) def do_sample(seed): return sampler.sample_noise_variance_and_weights( targets, initial_nonzeros, seed=seed) variance1, weights1 = self.evaluate(do_sample(seed)) variance2, weights2 = self.evaluate(do_sample(seed)) self.assertAllFinite(variance1) self.assertAllClose(variance1, variance2) self.assertAllFinite(weights1) self.assertAllClose(weights1, weights2)
def test_updated_state_matches_initial_computation( self, num_outputs, num_features, num_flips, batch_shape, use_xla): rng = test_util.test_np_rng() initial_nonzeros = rng.randint( low=0, high=2, size=batch_shape + [num_features]).astype(np.bool) flip_idxs = rng.choice( num_features, size=num_flips, replace=False).astype(np.int32) if batch_shape: should_flip = rng.randint( low=0, high=2, size=[num_flips] + batch_shape).astype(np.bool) else: should_flip = np.array([True] * num_flips) nonzeros = initial_nonzeros.copy() for i in range(num_flips): nonzeros[..., flip_idxs[i]] = ( nonzeros[..., flip_idxs[i]] != should_flip[i]) design_matrix, _, targets = self._random_regression_task( num_outputs=num_outputs, num_features=num_features, batch_shape=batch_shape, seed=test_util.test_seed()) sampler = spike_and_slab.SpikeSlabSampler(design_matrix=design_matrix, nonzero_prior_prob=0.3) @tf.function(autograph=False, jit_compile=use_xla) def _do_flips(): state = sampler._initialize_sampler_state( targets=targets, nonzeros=initial_nonzeros, observation_noise_variance=1.) def _do_flip(state, i): new_state = sampler._flip_feature(state, tf.gather(flip_idxs, i)) return mcmc_util.choose(tf.gather(should_flip, i), new_state, state) return tf.foldl(_do_flip, elems=tf.range(num_flips), initializer=state) self.assertAllCloseNested( sampler._initialize_sampler_state(targets, nonzeros, 1.), _do_flips(), atol=num_outputs * 2e-4, rtol=num_outputs * 2e-4)
def _build_sampler_loop_body(model, observed_time_series, is_missing=None): """Builds a Gibbs sampler for the given model and observed data. Args: model: A `tf.sts.StructuralTimeSeries` model instance. This must be of the form constructed by `build_model_for_gibbs_sampling`. observed_time_series: Float `Tensor` time series of shape `[..., num_timesteps]`. is_missing: Optional `bool` `Tensor` of shape `[..., num_timesteps]`. A `True` value indicates that the observation for that timestep is missing. Returns: sampler_loop_body: Python callable that performs a single cycle of Gibbs sampling. Its first argument is a `GibbsSamplerState`, and it returns a new `GibbsSamplerState`. The second argument (passed by `tf.scan`) is ignored. """ level_component = model.components[0] if not (isinstance(level_component, sts.LocalLevel) or isinstance(level_component, sts.LocalLinearTrend)): raise ValueError('Expected the first model component to be an instance of ' '`tfp.sts.LocalLevel` or `tfp.sts.LocalLinearTrend`; ' 'instead saw {}'.format(level_component)) model_has_slope = isinstance(level_component, sts.LocalLinearTrend) regression_component = model.components[1] if not (isinstance(regression_component, sts.LinearRegression) or isinstance(regression_component, SpikeAndSlabSparseLinearRegression)): raise ValueError('Expected the second model component to be an instance of ' '`tfp.sts.LinearRegression` or ' '`SpikeAndSlabSparseLinearRegression`; ' 'instead saw {}'.format(regression_component)) model_has_spike_slab_regression = isinstance( regression_component, SpikeAndSlabSparseLinearRegression) if is_missing is not None: # Ensure series does not contain NaNs. observed_time_series = tf.where(is_missing, tf.zeros_like(observed_time_series), observed_time_series) num_observed_steps = prefer_static.shape(observed_time_series)[-1] design_matrix = _get_design_matrix(model).to_dense()[:num_observed_steps] if is_missing is not None: # Replace design matrix with zeros at unobserved timesteps. This ensures # they will not affect the posterior on weights. design_matrix = tf.where(is_missing[..., tf.newaxis], tf.zeros_like(design_matrix), design_matrix) # Untransform scale priors -> variance priors by reaching thru Sqrt bijector. observation_noise_param = model.parameters[0] if 'observation_noise' not in observation_noise_param.name: raise ValueError('Model parameters {} do not match the expected sampler ' 'state.'.format(model.parameters)) observation_noise_variance_prior = observation_noise_param.prior.distribution if model_has_slope: level_scale_variance_prior, slope_scale_variance_prior = [ p.prior.distribution for p in level_component.parameters] else: level_scale_variance_prior = ( level_component.parameters[0].prior.distribution) if model_has_spike_slab_regression: spike_and_slab_sampler = spike_and_slab.SpikeSlabSampler( design_matrix, weights_prior_precision=regression_component._weights_prior_precision, # pylint: disable=protected-access nonzero_prior_prob=regression_component._sparse_weights_nonzero_prob, # pylint: disable=protected-access observation_noise_variance_prior_concentration=( observation_noise_variance_prior.concentration), observation_noise_variance_prior_scale=( observation_noise_variance_prior.scale), observation_noise_variance_upper_bound=( observation_noise_variance_prior.upper_bound if hasattr(observation_noise_variance_prior, 'upper_bound') else None)) else: weights_prior_scale = ( regression_component.parameters[0].prior.scale) def sampler_loop_body(previous_sample, _): """Runs one sampler iteration, resampling all model variables.""" (weights_seed, level_seed, observation_noise_scale_seed, level_scale_seed, loop_seed) = samplers.split_seed( previous_sample.seed, n=5, salt='sampler_loop_body') # Preserve backward-compatible seed behavior by splitting slope separately. slope_scale_seed, = samplers.split_seed( previous_sample.seed, n=1, salt='sampler_loop_body_slope') # We encourage a reasonable initialization by sampling the weights first, # so at the first step they are regressed directly against the observed # time series. If we instead sampled the level first it might 'explain away' # some observed variation that we would ultimately prefer to explain through # the regression weights, because the level can represent arbitrary # variation, while the weights are limited to representing variation in the # subspace given by the design matrix. if model_has_spike_slab_regression: (observation_noise_variance, weights) = spike_and_slab_sampler.sample_noise_variance_and_weights( initial_nonzeros=tf.not_equal(previous_sample.weights, 0.), targets=observed_time_series - previous_sample.level, seed=weights_seed) observation_noise_scale = tf.sqrt(observation_noise_variance) else: weights = _resample_weights( design_matrix=design_matrix, target_residuals=observed_time_series - previous_sample.level, observation_noise_scale=previous_sample.observation_noise_scale, weights_prior_scale=weights_prior_scale, seed=weights_seed) # Noise scale will be resampled below. observation_noise_scale = previous_sample.observation_noise_scale regression_residuals = observed_time_series - tf.linalg.matvec( design_matrix, weights) latents = _resample_latents( observed_residuals=regression_residuals, level_scale=previous_sample.level_scale, slope_scale=previous_sample.slope_scale if model_has_slope else None, observation_noise_scale=observation_noise_scale, initial_state_prior=level_component.initial_state_prior, is_missing=is_missing, seed=level_seed) level = latents[..., 0] level_residuals = level[..., 1:] - level[..., :-1] if model_has_slope: slope = latents[..., 1] level_residuals -= slope[..., :-1] slope_residuals = slope[..., 1:] - slope[..., :-1] # Estimate level scale from the empirical changes in level. level_scale = _resample_scale( prior=level_scale_variance_prior, observed_residuals=level_residuals, is_missing=None, seed=level_scale_seed) if model_has_slope: slope_scale = _resample_scale( prior=slope_scale_variance_prior, observed_residuals=slope_residuals, is_missing=None, seed=slope_scale_seed) if not model_has_spike_slab_regression: # Estimate noise scale from the residuals. observation_noise_scale = _resample_scale( prior=observation_noise_variance_prior, observed_residuals=regression_residuals - level, is_missing=is_missing, seed=observation_noise_scale_seed) return GibbsSamplerState( observation_noise_scale=observation_noise_scale, level_scale=level_scale, slope_scale=(slope_scale if model_has_slope else previous_sample.slope_scale), weights=weights, level=level, slope=(slope if model_has_slope else previous_sample.slope), seed=loop_seed) return sampler_loop_body