def testConstantFunction(self, constant): data_dims = 3 num_samples = 10**6 effective_mean = 1.5 mean = effective_mean * tf.ones(shape=(data_dims), dtype=tf.float32) effective_log_scale = 0.0 log_scale = effective_log_scale * tf.ones(shape=(data_dims), dtype=tf.float32) dist = dist_utils.multi_normal(loc=mean, log_scale=log_scale) dist_samples = dist.sample(num_samples) dist_samples.shape.assert_is_compatible_with([num_samples, data_dims]) function = lambda x: tf.ones_like(x[:, 0]) loss = gradient_estimators.pathwise_loss(function, dist_samples, dist) loss.shape.assert_is_compatible_with([num_samples]) loss = tf.reduce_mean(loss) mean_grads = tf.gradients(loss, mean)[0] self.assertFalse(mean_grads) log_scale_grads = tf.gradients(loss, log_scale)[0] self.assertFalse(log_scale_grads)
def testQuadraticFunction(self, effective_mean, effective_log_scale): data_dims = 20 num_samples = 10**6 mean = effective_mean * tf.ones(shape=(data_dims), dtype=tf.float32) log_scale = effective_log_scale * tf.ones(shape=(data_dims), dtype=tf.float32) dist = dist_utils.multi_normal(loc=mean, log_scale=log_scale) dist_samples = dist.sample(num_samples) function = lambda x: tf.reduce_sum(x**2) cv, expected_cv, _, _ = control_variates.control_delta_method( dist, dist_samples, function) avg_cv = tf.reduce_mean(cv) expected_cv_value = tf.reduce_sum(dist_samples**2) / num_samples with self.test_session() as sess: init_op = tf.global_variables_initializer() sess.run(init_op) # This should be an analytical computation, the result needs to be # accurate. self.assertAllClose(sess.run(avg_cv), sess.run(expected_cv_value), rtol=1e-1, atol=1e-3) self.assertAllClose(sess.run(expected_cv), sess.run(expected_cv_value), atol=1e-1)
def testNonPolynomialFunction(self, effective_mean, effective_log_scale): data_dims = 10 num_samples = 10**3 mean = effective_mean * tf.ones(shape=(data_dims), dtype=tf.float32) log_scale = effective_log_scale * tf.ones(shape=(data_dims), dtype=tf.float32) dist = dist_utils.multi_normal(loc=mean, log_scale=log_scale) dist_samples = dist.sample(num_samples) function = lambda x: tf.reduce_sum(tf.log(x**2)) cv, expected_cv, _, _ = control_variates.control_delta_method( dist, dist_samples, function) avg_cv = tf.reduce_mean(cv) self.assertTrue(tf.gradients(expected_cv, mean)) self.assertTrue(tf.gradients(expected_cv, log_scale)) with self.test_session() as sess: init_op = tf.global_variables_initializer() sess.run(init_op) # Check that the average value of the control variate is close to the # expected value. self.assertAllClose(sess.run(avg_cv), sess.run(expected_cv), rtol=1e-1, atol=1e-3)
def testApplyZeroSamples(self, batch_size): data_dims = 10 num_samples = 5 dataset_size = 500 mean = tf.Variable(tf.zeros(shape=(data_dims), dtype=tf.float32), name='mean') log_scale = tf.Variable(tf.zeros(shape=(data_dims), dtype=tf.float32), name='log_scale') # Prior = posterior. prior = dist_utils.multi_normal(loc=mean, log_scale=log_scale) posterior = dist_utils.multi_normal(loc=mean, log_scale=log_scale) model = bayes_lr.BayesianLogisticRegression(prior, posterior, dataset_size=dataset_size, use_analytical_kl=True) # Build the data features = tf.random.uniform((batch_size, data_dims)) targets = tf.ones(batch_size) posterior_samples = tf.zeros((num_samples, data_dims)) model_output = model.apply(features, targets, posterior_samples=posterior_samples) expected_predictions = np.ones((batch_size, num_samples)) expected_accuracy = 1. expected_data_log_probs = np.log(0.5) * np.ones((batch_size)) expected_elbo = np.log(0.5) * dataset_size * np.ones((num_samples)) with self.test_session() as sess: sess.run(tf.global_variables_initializer()) self.assertEqual(sess.run(model.analytical_kl), 0) self.assertAllEqual(sess.run(model_output.predictions), expected_predictions) self.assertAllEqual(sess.run(model_output.accuracy), expected_accuracy) self.assertAllClose(sess.run(model_output.data_log_probs), expected_data_log_probs) self.assertAllClose(sess.run(model_output.elbo), expected_elbo)
def testNonPolynomialFunctionConsistencyWithReparam( self, effective_mean, effective_log_scale, function, coupling): num_samples = 10**5 effective_mean = np.array(effective_mean) effective_log_scale = np.array(effective_log_scale) data_dims = len(effective_mean) mean = tf.constant(effective_mean, dtype=tf.float32) log_scale = tf.constant(effective_log_scale, dtype=tf.float32) dist = dist_utils.multi_normal(loc=mean, log_scale=log_scale) dist_samples = dist.sample(num_samples) loss, _ = gradient_estimators.measure_valued_loss(function, dist_samples, dist, coupling=coupling) loss.shape.assert_is_compatible_with([num_samples]) loss = tf.reduce_mean(loss) mean_grads = tf.gradients(loss, mean)[0] mean_grads.shape.assert_is_compatible_with(data_dims) log_scale_grads = tf.gradients(loss, log_scale)[0] log_scale_grads.shape.assert_is_compatible_with(data_dims) reparam_loss = gradient_estimators.pathwise_loss( function, dist_samples, dist) reparam_loss.shape.assert_is_compatible_with([num_samples]) reparam_loss = tf.reduce_mean(reparam_loss) reparam_mean_grads = tf.gradients(reparam_loss, mean)[0] reparam_log_scale_grads = tf.gradients(reparam_loss, log_scale)[0] with self.test_session() as sess: init_op = tf.initialize_all_variables() sess.run(init_op) (mean_grads_np, log_scale_grads_np, reparam_mean_grads_np, reparam_log_scale_grads_np) = sess.run([ mean_grads, log_scale_grads, reparam_mean_grads, reparam_log_scale_grads ]) self.assertAllClose(reparam_mean_grads_np, mean_grads_np, rtol=5e-1, atol=1e-1) self.assertAllClose(reparam_log_scale_grads_np, log_scale_grads_np, rtol=5e-1, atol=1e-1)
def testWeightedQuadratic(self, effective_mean, effective_log_scale, weights, coupling): num_samples = 5 * 10**5 effective_mean = np.array(effective_mean) effective_log_scale = np.array(effective_log_scale) weights = np.array(weights) data_dims = len(effective_mean) mean = tf.constant(effective_mean, dtype=tf.float32) log_scale = tf.constant(effective_log_scale, dtype=tf.float32) dist = dist_utils.multi_normal(loc=mean, log_scale=log_scale) dist_samples = dist.sample(num_samples) function = lambda x: (tf.reduce_sum(x * weights, axis=1))**2 loss, _ = gradient_estimators.measure_valued_loss(function, dist_samples, dist, coupling=coupling) loss.shape.assert_is_compatible_with([num_samples]) loss = tf.reduce_mean(loss) mean_grads = tf.gradients(loss, mean)[0] mean_grads.shape.assert_is_compatible_with(data_dims) log_scale_grads = tf.gradients(loss, log_scale)[0] log_scale_grads.shape.assert_is_compatible_with(data_dims) expected_mean_grads = 2 * weights * np.sum(weights * effective_mean) effective_scale = np.exp(effective_log_scale) expected_scale_grads = 2 * weights**2 * effective_scale expected_log_scale_grads = expected_scale_grads * effective_scale with self.test_session() as sess: init_op = tf.initialize_all_variables() sess.run(init_op) mean_grads_np, log_scale_grads_np = sess.run( [mean_grads, log_scale_grads]) self.assertAllClose(expected_mean_grads, mean_grads_np, rtol=1e-1, atol=1e-1) self.assertAllClose(expected_log_scale_grads, log_scale_grads_np, rtol=1e-1, atol=1e-1)
def testConstantFunction(self, constant): data_dims = 3 num_samples = 10**6 effective_mean = 1.5 mean = effective_mean * tf.ones(shape=(data_dims), dtype=tf.float32) effective_log_scale = 0.0 log_scale = effective_log_scale * tf.ones(shape=(data_dims), dtype=tf.float32) dist = dist_utils.multi_normal(loc=mean, log_scale=log_scale) dist_samples = dist.sample(num_samples) dist_samples.shape.assert_is_compatible_with([num_samples, data_dims]) function = lambda x: tf.ones_like(x[:, 0]) loss = gradient_estimators.score_function_loss(function, dist_samples, dist) # Average over the number of samples. loss.shape.assert_is_compatible_with([num_samples]) loss = tf.reduce_mean(loss) mean_grads = tf.gradients(loss, mean)[0] mean_grads.shape.assert_is_compatible_with(data_dims) expected_mean_grads = np.zeros(data_dims, dtype=np.float32) log_scale_grads = tf.gradients(loss, log_scale)[0] expected_log_scale_grads = np.zeros(data_dims, dtype=np.float32) with self.test_session() as sess: init_op = tf.global_variables_initializer() sess.run(init_op) self.assertAllClose(sess.run(mean_grads), expected_mean_grads, rtol=1e-1, atol=5e-3) self.assertAllClose(sess.run(log_scale_grads), expected_log_scale_grads, rtol=1e-1, atol=5e-3)
def testQuadraticFunction(self, effective_mean, effective_log_scale): data_dims = 1 num_samples = 10**6 mean = effective_mean * tf.ones(shape=(data_dims), dtype=tf.float32) log_scale = effective_log_scale * tf.ones(shape=(data_dims), dtype=tf.float32) dist = dist_utils.multi_normal(loc=mean, log_scale=log_scale) dist_samples = dist.sample(num_samples) function = lambda x: tf.reduce_sum(x**2, axis=1) loss = gradient_estimators.pathwise_loss(function, dist_samples, dist) loss.shape.assert_is_compatible_with([num_samples]) loss = tf.reduce_mean(loss) loss.shape.assert_is_compatible_with([]) mean_grads = tf.gradients(loss, mean)[0] mean_grads.shape.assert_is_compatible_with(data_dims) expected_mean_grads = 2 * effective_mean * np.ones(data_dims, dtype=np.float32) log_scale_grads = tf.gradients(loss, log_scale)[0] log_scale_grads.shape.assert_is_compatible_with(data_dims) expected_log_scale_grads = 2 * np.exp( 2 * effective_log_scale) * np.ones(data_dims, dtype=np.float32) with self.test_session() as sess: init_op = tf.initialize_all_variables() sess.run(init_op) self.assertAllClose(sess.run(mean_grads), expected_mean_grads, rtol=1e-1, atol=1e-3) self.assertAllClose(sess.run(log_scale_grads), expected_log_scale_grads, rtol=1e-1, atol=1e-3)
def testLinearFunction(self, effective_mean, effective_log_scale): data_dims = 3 num_samples = 10**6 mean = effective_mean * tf.ones(shape=(data_dims), dtype=tf.float32) log_scale = effective_log_scale * tf.ones(shape=(data_dims), dtype=tf.float32) dist = dist_utils.multi_normal(loc=mean, log_scale=log_scale) dist_samples = dist.sample(num_samples) dist_samples.shape.assert_is_compatible_with([num_samples, data_dims]) function = lambda x: tf.reduce_sum(x, axis=1) loss = gradient_estimators.pathwise_loss(function, dist_samples, dist) loss.shape.assert_is_compatible_with([num_samples]) loss = tf.reduce_mean(loss) loss.shape.assert_is_compatible_with([]) mean_grads = tf.gradients(loss, mean)[0] mean_grads.shape.assert_is_compatible_with(data_dims) expected_mean_grads = np.ones(data_dims, dtype=np.float32) log_scale_grads = tf.gradients(loss, log_scale)[0] expected_log_scale_grads = np.zeros(data_dims, dtype=np.float32) with self.test_session() as sess: init_op = tf.global_variables_initializer() sess.run(init_op) # This should be an analytical computation, the result needs to be # accurate. self.assertAllClose(sess.run(mean_grads), expected_mean_grads, rtol=1e-1, atol=1e-3) self.assertAllClose(sess.run(log_scale_grads), expected_log_scale_grads, atol=1e-2)
def main(argv): del argv # Training data. features, targets = data_utils.get_sklearn_data_as_tensors( batch_size=config.batch_size, dataset_name=config.dataset_name) # Eval data. eval_features, eval_targets = data_utils.get_sklearn_data_as_tensors( batch_size=None, dataset_name=config.dataset_name) dataset_size = eval_features.get_shape()[0] data_dims = features.shape[1] prior = dist_utils.multi_normal( loc=tf.zeros(data_dims), log_scale=tf.zeros(data_dims)) with tf.variable_scope('posterior'): posterior = dist_utils.diagonal_gaussian_posterior(data_dims) model = bayes_lr.BayesianLogisticRegression( prior=prior, posterior=posterior, dataset_size=dataset_size, use_analytical_kl=config.use_analytical_kl) grad_loss_fn = _get_grad_loss_fn() control_variate_fn = _get_control_variate_fn() jacobian_parallel_iterations = _jacobian_parallel_iterations() def model_loss(features, targets, posterior_samples): num_posterior_samples_cv_coeff = config.num_posterior_samples_cv_coeff return blr_model_grad_utils.model_surrogate_loss( model, features, targets, posterior_samples, grad_loss_fn=grad_loss_fn, control_variate_fn=control_variate_fn, estimate_cv_coeff=config.estimate_cv_coeff, num_posterior_samples_cv_coeff=num_posterior_samples_cv_coeff, jacobian_parallel_iterations=jacobian_parallel_iterations) posterior_samples = posterior.sample(config.num_posterior_samples) train_loss, _ = model_loss(features, targets, posterior_samples) train_loss = tf.reduce_mean(train_loss) num_eval_posterior_samples = config.num_eval_posterior_samples eval_posterior_samples = posterior.sample(num_eval_posterior_samples) eval_model_output = model.apply( eval_features, eval_targets, posterior_samples=eval_posterior_samples) _, jacobians = model_loss( eval_features, eval_targets, eval_posterior_samples) eval_model_metrics = metrics_fetch_dict(eval_model_output) jacobians = _pretty_jacobians(jacobians) # Compute the surrogate loss without any variance reduction. # Used as a sanity check and for debugging. if _variance_reduction(): if control_variate_fn: no_var_reduction_grad_fn = grad_loss_fn no_var_reducion_prefix = 'no_control_variate' elif config.gradient_config.type == 'measure_valued': # Compute the loss and stats when not using coupling. def no_var_reduction_grad_fn(function, dist_samples, dist): return gradient_estimators.measure_valued_loss( function, dist_samples, dist, coupling=False) _, no_var_reduction_jacobians = blr_model_grad_utils.model_surrogate_loss( model, eval_features, eval_targets, eval_posterior_samples, grad_loss_fn=no_var_reduction_grad_fn, jacobian_parallel_iterations=jacobian_parallel_iterations) no_var_reduction_jacobians = _pretty_jacobians(no_var_reduction_jacobians) no_var_reducion_prefix = 'no_coupling' else: # No variance reduction used. No reason for additional logging. no_var_reduction_jacobians = {} for j in no_var_reduction_jacobians.values(): assert j.get_shape().as_list()[0] == num_eval_posterior_samples start_learning_rate = config.start_learning_rate global_step = tf.train.get_or_create_global_step() if config.cosine_learning_rate_decay: training_steps = config.training_steps learning_rate_multiplier = tf.math.cos( np.pi / 2 * tf.cast(global_step, tf.float32) / training_steps) else: learning_rate_multiplier = tf.constant(1.0) learning_rate = start_learning_rate * learning_rate_multiplier optimizer = tf.train.GradientDescentOptimizer(learning_rate) train_op = optimizer.minimize(train_loss, global_step=global_step) hyper_dict = { 'start_learning_rate': config.start_learning_rate, 'num_posterior_samples': config.num_posterior_samples, 'batch_size': config.batch_size} summary_writer = tf.summary.FileWriter( os.path.join(config.experiment_dir, 'logs')) # Checkpointing. hooks = _configure_hooks(train_loss) i = -1 with tf.train.MonitoredSession(hooks=hooks) as sess: logging.info('starting training') for i in range(config.training_steps): sess.run(train_op) if (i + 1) % config.report_interval == 0: # Training loss and debug ops. logging.info('global_step %i', sess.run(global_step)) logging.info('training loss at step %i: %f', i, sess.run(train_loss)) # Compute multi batch eval metrics. multi_batch_metrics = run_multi_batch_metrics( eval_model_metrics, sess, config.num_eval_batches) for key, value in multi_batch_metrics.items(): logging.info('%s at step %i: %f', key, i, value) posterior_vars_value = sess.run( {v.name: v for v in model.posterior.dist_vars}) for k, value in posterior_vars_value.items(): logging.info('%s avg at step %i: %f', k, i, np.mean(value)) metrics = multi_batch_metrics metrics.update({'step': i}) metrics.update({'learning_rate': sess.run(learning_rate)}) metrics.update(hyper_dict) if (i + 1) % config.grad_report_interval == 0: gradient_stats, grad_log_keys = run_gradient_stats( jacobians, sess, config.num_eval_batches) for key in grad_log_keys: logging.info( '%s at step %i: %f', key, i, gradient_stats[key]) metrics.update(gradient_stats) if no_var_reduction_jacobians: no_var_reduction_grad_stats, grad_log_keys = run_gradient_stats( no_var_reduction_jacobians, sess, config.num_eval_batches) no_var_reduction_grad_stats = { no_var_reducion_prefix + '_' + k: v for k, v in no_var_reduction_grad_stats.items()} metrics.update(no_var_reduction_grad_stats) _add_summaries(summary_writer, metrics)
def testNonPolynomialFunctionConsistency(self, effective_mean, effective_log_scale, grad_loss_fn, num_samples): """Check that the gradients are consistent between estimators.""" data_dims = 3 mean = effective_mean * tf.ones(shape=(data_dims), dtype=tf.float32) log_scale = effective_log_scale * tf.ones(shape=(data_dims), dtype=tf.float32) dist_vars = [mean, log_scale] dist = dist_utils.multi_normal(loc=mean, log_scale=log_scale) dist_samples = dist.sample(num_samples) model_loss_fn = lambda x: tf.log(tf.reduce_sum(x**2, axis=1)) control_variate_fn = control_variates.control_delta_method grad_loss_fn = utils.grad_loss_fn_with_jacobians(grad_loss_fn) loss, jacobians = control_variates.control_variates_surrogate_loss( dist=dist, dist_samples=dist_samples, dist_vars=dist_vars, model_loss_fn=model_loss_fn, grad_loss_fn=grad_loss_fn, control_variate_fn=control_variate_fn) loss.shape.assert_is_compatible_with([num_samples]) loss = tf.reduce_mean(loss) mean_jacobians = jacobians[mean] mean_jacobians.shape.assert_is_compatible_with( [num_samples, data_dims]) mean_grads_from_jacobian = tf.reduce_mean(mean_jacobians, axis=0) log_scale_jacobians = jacobians[log_scale] log_scale_jacobians.shape.assert_is_compatible_with( [num_samples, data_dims]) log_scale_grads_from_jacobian = tf.reduce_mean(log_scale_jacobians, axis=0) mean_grads = tf.gradients(loss, mean)[0] mean_grads.shape.assert_is_compatible_with(data_dims) log_scale_grads = tf.gradients(loss, log_scale)[0] log_scale_grads.shape.assert_is_compatible_with(data_dims) no_cv_loss, _ = grad_loss_fn(model_loss_fn, dist_samples, dist) no_cv_loss.shape.assert_is_compatible_with([num_samples]) no_cv_loss = tf.reduce_mean(no_cv_loss) no_cv_mean_grads = tf.gradients(no_cv_loss, mean)[0] no_cv_mean_grads.shape.assert_is_compatible_with(data_dims) no_cv_log_scale_grads = tf.gradients(no_cv_loss, log_scale)[0] no_cv_log_scale_grads.shape.assert_is_compatible_with(data_dims) with self.test_session() as sess: init_op = tf.initialize_all_variables() sess.run(init_op) (mean_grads_from_jacobian_np, mean_grads_np, log_scale_grads_from_jacobian_np, log_scale_grads_np, no_cv_mean_grads_np, no_cv_log_scale_grads_np) = sess.run([ mean_grads_from_jacobian, mean_grads, log_scale_grads_from_jacobian, log_scale_grads, no_cv_mean_grads, no_cv_log_scale_grads ]) self.assertAllClose(mean_grads_from_jacobian_np, mean_grads_np, rtol=1e-1, atol=1e-3) self.assertAllClose(log_scale_grads_from_jacobian_np, log_scale_grads_np, rtol=1e-1, atol=1e-3) self.assertAllClose(mean_grads_np, no_cv_mean_grads_np, rtol=1e-1, atol=1e-1) self.assertAllClose(log_scale_grads_np, no_cv_log_scale_grads_np, rtol=1e-1, atol=1e-1)
def testQuadraticFunctionWithAnalyticalLoss(self, effective_mean, effective_log_scale, grad_loss_fn): data_dims = 3 num_samples = 10**3 mean = effective_mean * tf.ones(shape=(data_dims), dtype=tf.float32) log_scale = effective_log_scale * tf.ones(shape=(data_dims), dtype=tf.float32) dist_vars = [mean, log_scale] dist = dist_utils.multi_normal(loc=mean, log_scale=log_scale) dist_samples = dist.sample(num_samples) model_loss_fn = lambda x: tf.reduce_sum(x**2, axis=1) control_variate_fn = control_variates.control_delta_method loss, jacobians = control_variates.control_variates_surrogate_loss( dist=dist, dist_samples=dist_samples, dist_vars=dist_vars, model_loss_fn=model_loss_fn, grad_loss_fn=utils.grad_loss_fn_with_jacobians(grad_loss_fn), control_variate_fn=control_variate_fn) loss.shape.assert_is_compatible_with([num_samples]) loss = tf.reduce_mean(loss) expected_mean_grads = 2 * effective_mean * np.ones(data_dims, dtype=np.float32) expected_log_scale_grads = 2 * np.exp( 2 * effective_log_scale) * np.ones(data_dims, dtype=np.float32) mean_jacobians = jacobians[mean] mean_jacobians.shape.assert_is_compatible_with( [num_samples, data_dims]) mean_grads_from_jacobian = tf.reduce_mean(mean_jacobians, axis=0) log_scale_jacobians = jacobians[log_scale] log_scale_jacobians.shape.assert_is_compatible_with( [num_samples, data_dims]) log_scale_grads_from_jacobian = tf.reduce_mean(log_scale_jacobians, axis=0) mean_grads = tf.gradients(loss, mean)[0] mean_grads.shape.assert_is_compatible_with(data_dims) log_scale_grads = tf.gradients(loss, log_scale)[0] log_scale_grads.shape.assert_is_compatible_with(data_dims) with self.test_session() as sess: init_op = tf.initialize_all_variables() sess.run(init_op) self.assertAllClose(sess.run(mean_grads), expected_mean_grads, rtol=1e-1, atol=1e-3) self.assertAllClose(sess.run(log_scale_grads), expected_log_scale_grads, rtol=1e-1, atol=1e-3) self.assertAllClose(sess.run(mean_grads_from_jacobian), expected_mean_grads, rtol=1e-1, atol=1e-3) self.assertAllClose(sess.run(log_scale_grads_from_jacobian), expected_log_scale_grads, rtol=1e-1, atol=1e-3)
def testNonPolynomialFunctionWithGradients(self): data_dims = 1 num_samples = 10**3 effective_mean = 1. effective_log_scale = 1. mean = effective_mean * tf.ones(shape=(data_dims), dtype=tf.float32) log_scale = effective_log_scale * tf.ones(shape=(data_dims), dtype=tf.float32) dist = dist_utils.multi_normal(loc=mean, log_scale=log_scale) dist_samples = dist.sample(num_samples) function = lambda x: tf.reduce_sum(tf.log(x**2)) (cv, expected_cv, surrogate_cv, jacobians) = control_variates.control_delta_method( dist, dist_samples, function, grad_loss_fn=utils.grad_loss_fn_with_jacobians( gradient_estimators.pathwise_loss)) surrogate_cv = tf.reduce_mean(surrogate_cv) mean_cv_grads = tf.gradients(surrogate_cv, mean)[0] mean_expected_cv_grads = tf.gradients(expected_cv, mean)[0] log_scale_cv_grads = tf.gradients(surrogate_cv, log_scale)[0] log_scale_expected_cv_grads = tf.gradients(expected_cv, log_scale)[0] # Second order expansion is log(\mu**2) + 1/2 * \sigma**2 (-2 / \mu**2) expected_cv_val = -np.exp(1.)**2 # The gradient is 2 / mu + \sigma ** 2 * 2 expected_cv_mean_grad = 2 + 2 * np.exp(1.)**2 mean_jacobians = jacobians[mean] log_scale_jacobians = jacobians[log_scale] with self.test_session() as sess: init_op = tf.global_variables_initializer() sess.run(init_op) self.assertAllClose(sess.run(tf.reduce_mean(cv)), sess.run(expected_cv), rtol=1e-1, atol=1e-3) self.assertAllClose(sess.run(expected_cv), expected_cv_val, rtol=1e-1, atol=1e-3) self.assertAllClose(sess.run(tf.reduce_mean(cv)), expected_cv_val, rtol=1e-1, atol=1e-3) self.assertAllClose(sess.run(mean_expected_cv_grads[0]), expected_cv_mean_grad, rtol=1e-1, atol=1e-3) self.assertAllClose(sess.run(mean_cv_grads), sess.run(mean_expected_cv_grads), rtol=1e-1, atol=1e-3) self.assertAllClose(sess.run(log_scale_cv_grads), sess.run(log_scale_expected_cv_grads), rtol=1e-1, atol=1e-3) self.assertAllClose( sess.run(tf.reduce_mean(mean_jacobians)), # Strip the leading dimension of 1. sess.run(mean_cv_grads[0]), rtol=1e-1, atol=1e-3) self.assertAllClose( sess.run(tf.reduce_mean(log_scale_jacobians)), # Strip the leading dimension of 1. sess.run(log_scale_cv_grads[0]), rtol=1e-1, atol=1e-3)
def testApply(self): data_dims = 10 batch_size = 50 num_samples = 6 dataset_size = 500 assert not batch_size % 2 assert not num_samples % 2 mean = tf.Variable(tf.zeros(shape=(data_dims), dtype=tf.float32), name='mean') log_scale = tf.Variable(tf.zeros(shape=(data_dims), dtype=tf.float32), name='log_scale') # Prior = posterior. prior = dist_utils.multi_normal(loc=mean, log_scale=log_scale) posterior = dist_utils.multi_normal(loc=mean, log_scale=log_scale) model = bayes_lr.BayesianLogisticRegression(prior, posterior, dataset_size=dataset_size, use_analytical_kl=True) # Build the data features = 3 * tf.ones((batch_size, data_dims), dtype=tf.float32) targets = tf.concat([ tf.zeros(int(batch_size / 2), dtype=tf.float32), tf.ones(int(batch_size / 2), dtype=tf.float32) ], axis=0) posterior_samples = tf.concat([ tf.ones((int(num_samples / 2), data_dims), dtype=tf.float32), -1 * tf.ones((int(num_samples / 2), data_dims), dtype=tf.float32) ], axis=0) model_output = model.apply(features, targets, posterior_samples=posterior_samples) expected_logits = 3 * data_dims * np.concatenate([ np.ones((batch_size, int(num_samples / 2))), -1 * np.ones( (batch_size, int(num_samples / 2))) ], axis=1) quarter_ones = np.ones((int(batch_size / 2), int(num_samples / 2))) # Compute log probs for the entire batch, for the first half of samples. first_half_data_expected_log_probs = np.concatenate([ np.log(1 - _sigmoid(3 * data_dims)) * quarter_ones, np.log(_sigmoid(3 * data_dims)) * quarter_ones ], axis=0) # Compute log probs for the entire batch, for the second half of samples. second_half_data_expected_log_probs = np.concatenate([ np.log(1 - _sigmoid(-3 * data_dims)) * quarter_ones, np.log(_sigmoid(-3 * data_dims)) * quarter_ones ], axis=0) expected_log_probs = np.concatenate([ first_half_data_expected_log_probs, second_half_data_expected_log_probs ], axis=1) first_half_expected_elbo = np.log(1 - _sigmoid(3 * data_dims)) first_half_expected_elbo += np.log(_sigmoid(3 * data_dims)) second_half_expected_elbo = np.log(_sigmoid(-3 * data_dims)) second_half_expected_elbo += np.log(1 - _sigmoid(-3 * data_dims)) expected_elbo = dataset_size / 2. * np.concatenate([ first_half_expected_elbo * np.ones( (int(num_samples / 2))), second_half_expected_elbo * np.ones( (int(num_samples / 2))) ]) expected_predictions = np.concatenate([ np.ones((batch_size, int(num_samples / 2))), np.zeros((batch_size, int(num_samples / 2))) ], axis=1) expected_accuracy = 0.5 with self.test_session() as sess: sess.run(tf.global_variables_initializer()) self.assertEqual(sess.run(model_output.kl), 0) self.assertAllEqual(sess.run(model_output.logits), expected_logits) self.assertAllEqual(sess.run(model_output.predictions), expected_predictions) self.assertAllEqual(sess.run(model_output.accuracy), expected_accuracy) self.assertAllClose(sess.run(model_output.log_probs), expected_log_probs, rtol=1e-1, atol=5e-3) self.assertAllClose(sess.run(model_output.elbo), expected_elbo, rtol=1e-1, atol=5e-3)