def test_scalar_priors_broadcast(self): batch_shape = [4, 3] num_timesteps = 10 num_features = 2 design_matrix = self._build_placeholder( np.random.randn(*(batch_shape + [num_timesteps, num_features]))) # Build a model with scalar Normal(0., 1.) prior. linear_regression = LinearRegression( design_matrix=design_matrix, weights_prior=tfd.Normal(loc=self._build_placeholder(0.), scale=self._build_placeholder(1.))) weights_prior = linear_regression.parameters[0].prior self.assertAllEqual([num_features], self.evaluate(weights_prior.event_shape_tensor())) self.assertAllEqual(batch_shape, self.evaluate(weights_prior.batch_shape_tensor())) prior_sampled_weights = weights_prior.sample() ssm = linear_regression.make_state_space_model( num_timesteps=num_timesteps, param_vals={"weights": prior_sampled_weights}) lp = ssm.log_prob(ssm.sample()) self.assertAllEqual(batch_shape, self.evaluate(lp).shape)
def _build_sts(self, observed_time_series=None): max_timesteps = 100 num_features = 3 prior = tfd.Sample(tfd.Laplace(0., 1.), sample_shape=[num_features]) # LinearRegression components don't currently take an `observed_time_series` # argument, so they can't infer a prior batch shape. This means we have to # manually set the batch shape expected by the tests. dtype = np.float32 if observed_time_series is not None: observed_time_series_tensor, _ = ( sts_util.canonicalize_observed_time_series_with_mask( observed_time_series)) batch_shape = tf.shape(observed_time_series_tensor)[:-2] dtype = dtype_util.as_numpy_dtype( observed_time_series_tensor.dtype) prior = tfd.Sample(tfd.Laplace(tf.zeros(batch_shape, dtype=dtype), 1.), sample_shape=[num_features]) regression = LinearRegression(design_matrix=np.random.randn( max_timesteps, num_features).astype(dtype), weights_prior=prior) return Sum(components=[regression], observed_time_series=observed_time_series)
def test_basic_statistics(self): # Verify that this model constructs a distribution with mean # `matmul(design_matrix, weights)` and stddev 0. batch_shape = [4, 3] num_timesteps = 10 num_features = 2 design_matrix = self._build_placeholder( np.random.randn(*(batch_shape + [num_timesteps, num_features]))) linear_regression = LinearRegression(design_matrix=design_matrix) true_weights = self._build_placeholder( np.random.randn(*(batch_shape + [num_features]))) predicted_time_series = tf.linalg.matmul(design_matrix, true_weights[..., tf.newaxis]) ssm = linear_regression.make_state_space_model( num_timesteps=num_timesteps, param_vals={"weights": true_weights}) self.assertAllEqual(self.evaluate(ssm.mean()), predicted_time_series) self.assertAllEqual( *self.evaluate((ssm.stddev(), tf.zeros_like(predicted_time_series))))
def test_simple_regression_correctness(self): # Verify that optimizing a simple linear regression by gradient descent # recovers the known-correct weights. batch_shape = [4, 3] num_timesteps = 10 num_features = 2 design_matrix = self._build_placeholder( np.random.randn(*(batch_shape + [num_timesteps, num_features]))) true_weights = self._build_placeholder([4., -3.]) predicted_time_series = tf.linalg.matmul(design_matrix, true_weights[..., tf.newaxis]) linear_regression = LinearRegression( design_matrix=design_matrix, weights_prior=tfd.Independent(tfd.Cauchy( loc=self._build_placeholder(np.zeros([num_features])), scale=self._build_placeholder(np.ones([num_features]))), reinterpreted_batch_ndims=1)) observation_noise_scale_prior = tfd.LogNormal( loc=self._build_placeholder(-2), scale=self._build_placeholder(0.1)) model = Sum( components=[linear_regression], observation_noise_scale_prior=observation_noise_scale_prior) learnable_weights = tf.Variable( tf.zeros([num_features], dtype=true_weights.dtype)) def build_loss(): learnable_ssm = model.make_state_space_model( num_timesteps=num_timesteps, param_vals={ "LinearRegression/_weights": learnable_weights, "observation_noise_scale": observation_noise_scale_prior.mode() }) return -learnable_ssm.log_prob(predicted_time_series) # We provide graph- and eager-mode optimization for TF 2.0 compatibility. num_train_steps = 80 optimizer = tf1.train.AdamOptimizer(learning_rate=0.1) if tf.executing_eagerly(): for _ in range(num_train_steps): optimizer.minimize(build_loss) else: train_op = optimizer.minimize(build_loss()) self.evaluate(tf1.global_variables_initializer()) for _ in range(num_train_steps): _ = self.evaluate(train_op) self.assertAllClose(*self.evaluate((true_weights, learnable_weights)), atol=0.2)
def test_custom_weights_prior(self): batch_shape = [4, 3] num_timesteps = 10 num_features = 2 design_matrix = self._build_placeholder( np.random.randn(*(batch_shape + [num_timesteps, num_features]))) # Build a model with scalar Exponential(1.) prior. linear_regression = LinearRegression( design_matrix=design_matrix, weights_prior=tfd.Exponential( rate=self._build_placeholder(np.ones(batch_shape)))) # Check that the prior is broadcast to match the shape of the weights. weights = linear_regression.parameters[0] self.assertAllEqual([num_features], self.evaluate(weights.prior.event_shape_tensor())) self.assertAllEqual(batch_shape, self.evaluate(weights.prior.batch_shape_tensor())) prior_sampled_weights = weights.prior.sample() ssm = linear_regression.make_state_space_model( num_timesteps=num_timesteps, param_vals={"weights": prior_sampled_weights}) lp = ssm.log_prob(ssm.sample()) self.assertAllEqual(batch_shape, self.evaluate(lp).shape) # Verify that the bijector enforces the prior constraint that # weights must be nonnegative. self.assertAllFinite( self.evaluate( weights.prior.log_prob( weights.bijector( tf.random.normal(tf.shape(weights.prior.sample(64)), seed=test_util.test_seed(), dtype=self.dtype)))))
def test_simple_regression_correctness(self): # Verify that optimizing a simple linear regression by gradient descent # recovers the known-correct weights. batch_shape = [4, 3] num_timesteps = 10 num_features = 2 design_matrix = self._build_placeholder( np.random.randn(*(batch_shape + [num_timesteps, num_features]))) true_weights = self._build_placeholder([4., -3.]) predicted_time_series = linear_operator_util.matmul_with_broadcast( design_matrix, true_weights[..., tf.newaxis]) linear_regression = LinearRegression( design_matrix=design_matrix, weights_prior=tfd.Independent(tfd.Cauchy( loc=self._build_placeholder(np.zeros([num_features])), scale=self._build_placeholder(np.ones([num_features]))), reinterpreted_batch_ndims=1)) observation_noise_scale_prior = tfd.LogNormal( loc=self._build_placeholder(-2), scale=self._build_placeholder(0.1)) model = Sum( components=[linear_regression], observation_noise_scale_prior=observation_noise_scale_prior) learnable_weights = tf.Variable( tf.zeros([num_features], dtype=true_weights.dtype)) learnable_ssm = model.make_state_space_model( num_timesteps=num_timesteps, param_vals={ "LinearRegression/_weights": learnable_weights, "observation_noise_scale": observation_noise_scale_prior.mode() }) loss = -learnable_ssm.log_prob(predicted_time_series) train_op = tf.train.AdamOptimizer(0.1).minimize(loss) with self.test_session() as sess: sess.run(tf.global_variables_initializer()) for _ in range(80): _ = sess.run(train_op) self.assertAllClose(*sess.run((true_weights, learnable_weights)), atol=0.2)
def _build_sts(self, observed_time_series=None): max_timesteps = 100 num_features = 3 prior = tfd.Laplace(0., 1.) # LinearRegression components don't currently take an `observed_time_series` # argument, so they can't infer a prior batch shape. This means we have to # manually set the batch shape expected by the tests. if observed_time_series is not None: observed_time_series = sts_util.maybe_expand_trailing_dim( observed_time_series) batch_shape = observed_time_series.shape[:-2] prior = tfd.TransformedDistribution(prior, tfb.Identity(), event_shape=[num_features], batch_shape=batch_shape) regression = LinearRegression( design_matrix=tf.random.normal([max_timesteps, num_features]), weights_prior=prior) return Sum(components=[regression], observed_time_series=observed_time_series)
def _build_sts(self, observed_time_series=None): max_timesteps = 100 num_features = 3 prior = tfd.Laplace(0., 1.) # LinearRegression components don't currently take an `observed_time_series` # argument, so they can't infer a prior batch shape. This means we have to # manually set the batch shape expected by the tests. if observed_time_series is not None: observed_time_series_tensor, _ = ( sts_util.canonicalize_observed_time_series_with_mask( observed_time_series)) batch_shape = tf.shape(input=observed_time_series_tensor)[:-2] prior = tfd.TransformedDistribution(prior, tfb.Identity(), event_shape=[num_features], batch_shape=batch_shape) regression = LinearRegression( design_matrix=np.random.randn( max_timesteps, num_features).astype(np.float32), weights_prior=prior) return Sum(components=[regression], observed_time_series=observed_time_series)