Exemplo n.º 1
0
    def testConstantFunction(self, constant):
        data_dims = 3
        num_samples = 10**6

        effective_mean = 1.5
        mean = effective_mean * tf.ones(shape=(data_dims), dtype=tf.float32)

        effective_log_scale = 0.0
        log_scale = effective_log_scale * tf.ones(shape=(data_dims),
                                                  dtype=tf.float32)

        dist = dist_utils.multi_normal(loc=mean, log_scale=log_scale)
        dist_samples = dist.sample(num_samples)
        dist_samples.shape.assert_is_compatible_with([num_samples, data_dims])

        function = lambda x: tf.ones_like(x[:, 0])
        loss = gradient_estimators.pathwise_loss(function, dist_samples, dist)

        loss.shape.assert_is_compatible_with([num_samples])
        loss = tf.reduce_mean(loss)

        mean_grads = tf.gradients(loss, mean)[0]
        self.assertFalse(mean_grads)

        log_scale_grads = tf.gradients(loss, log_scale)[0]
        self.assertFalse(log_scale_grads)
    def testQuadraticFunction(self, effective_mean, effective_log_scale):
        data_dims = 20
        num_samples = 10**6

        mean = effective_mean * tf.ones(shape=(data_dims), dtype=tf.float32)
        log_scale = effective_log_scale * tf.ones(shape=(data_dims),
                                                  dtype=tf.float32)

        dist = dist_utils.multi_normal(loc=mean, log_scale=log_scale)
        dist_samples = dist.sample(num_samples)
        function = lambda x: tf.reduce_sum(x**2)

        cv, expected_cv, _, _ = control_variates.control_delta_method(
            dist, dist_samples, function)
        avg_cv = tf.reduce_mean(cv)
        expected_cv_value = tf.reduce_sum(dist_samples**2) / num_samples

        with self.test_session() as sess:
            init_op = tf.global_variables_initializer()
            sess.run(init_op)
            # This should be an analytical computation, the result needs to be
            # accurate.
            self.assertAllClose(sess.run(avg_cv),
                                sess.run(expected_cv_value),
                                rtol=1e-1,
                                atol=1e-3)

            self.assertAllClose(sess.run(expected_cv),
                                sess.run(expected_cv_value),
                                atol=1e-1)
    def testNonPolynomialFunction(self, effective_mean, effective_log_scale):
        data_dims = 10
        num_samples = 10**3

        mean = effective_mean * tf.ones(shape=(data_dims), dtype=tf.float32)
        log_scale = effective_log_scale * tf.ones(shape=(data_dims),
                                                  dtype=tf.float32)

        dist = dist_utils.multi_normal(loc=mean, log_scale=log_scale)
        dist_samples = dist.sample(num_samples)
        function = lambda x: tf.reduce_sum(tf.log(x**2))

        cv, expected_cv, _, _ = control_variates.control_delta_method(
            dist, dist_samples, function)
        avg_cv = tf.reduce_mean(cv)

        self.assertTrue(tf.gradients(expected_cv, mean))
        self.assertTrue(tf.gradients(expected_cv, log_scale))

        with self.test_session() as sess:
            init_op = tf.global_variables_initializer()
            sess.run(init_op)
            # Check that the average value of the control variate is close to the
            # expected value.
            self.assertAllClose(sess.run(avg_cv),
                                sess.run(expected_cv),
                                rtol=1e-1,
                                atol=1e-3)
Exemplo n.º 4
0
    def testApplyZeroSamples(self, batch_size):
        data_dims = 10
        num_samples = 5
        dataset_size = 500

        mean = tf.Variable(tf.zeros(shape=(data_dims), dtype=tf.float32),
                           name='mean')
        log_scale = tf.Variable(tf.zeros(shape=(data_dims), dtype=tf.float32),
                                name='log_scale')

        # Prior = posterior.
        prior = dist_utils.multi_normal(loc=mean, log_scale=log_scale)
        posterior = dist_utils.multi_normal(loc=mean, log_scale=log_scale)
        model = bayes_lr.BayesianLogisticRegression(prior,
                                                    posterior,
                                                    dataset_size=dataset_size,
                                                    use_analytical_kl=True)

        # Build the data
        features = tf.random.uniform((batch_size, data_dims))
        targets = tf.ones(batch_size)

        posterior_samples = tf.zeros((num_samples, data_dims))
        model_output = model.apply(features,
                                   targets,
                                   posterior_samples=posterior_samples)

        expected_predictions = np.ones((batch_size, num_samples))
        expected_accuracy = 1.

        expected_data_log_probs = np.log(0.5) * np.ones((batch_size))
        expected_elbo = np.log(0.5) * dataset_size * np.ones((num_samples))
        with self.test_session() as sess:
            sess.run(tf.global_variables_initializer())

            self.assertEqual(sess.run(model.analytical_kl), 0)
            self.assertAllEqual(sess.run(model_output.predictions),
                                expected_predictions)
            self.assertAllEqual(sess.run(model_output.accuracy),
                                expected_accuracy)
            self.assertAllClose(sess.run(model_output.data_log_probs),
                                expected_data_log_probs)
            self.assertAllClose(sess.run(model_output.elbo), expected_elbo)
Exemplo n.º 5
0
    def testNonPolynomialFunctionConsistencyWithReparam(
            self, effective_mean, effective_log_scale, function, coupling):
        num_samples = 10**5

        effective_mean = np.array(effective_mean)
        effective_log_scale = np.array(effective_log_scale)
        data_dims = len(effective_mean)

        mean = tf.constant(effective_mean, dtype=tf.float32)
        log_scale = tf.constant(effective_log_scale, dtype=tf.float32)

        dist = dist_utils.multi_normal(loc=mean, log_scale=log_scale)
        dist_samples = dist.sample(num_samples)

        loss, _ = gradient_estimators.measure_valued_loss(function,
                                                          dist_samples,
                                                          dist,
                                                          coupling=coupling)

        loss.shape.assert_is_compatible_with([num_samples])
        loss = tf.reduce_mean(loss)

        mean_grads = tf.gradients(loss, mean)[0]
        mean_grads.shape.assert_is_compatible_with(data_dims)

        log_scale_grads = tf.gradients(loss, log_scale)[0]
        log_scale_grads.shape.assert_is_compatible_with(data_dims)

        reparam_loss = gradient_estimators.pathwise_loss(
            function, dist_samples, dist)

        reparam_loss.shape.assert_is_compatible_with([num_samples])
        reparam_loss = tf.reduce_mean(reparam_loss)

        reparam_mean_grads = tf.gradients(reparam_loss, mean)[0]
        reparam_log_scale_grads = tf.gradients(reparam_loss, log_scale)[0]

        with self.test_session() as sess:
            init_op = tf.initialize_all_variables()
            sess.run(init_op)

            (mean_grads_np, log_scale_grads_np, reparam_mean_grads_np,
             reparam_log_scale_grads_np) = sess.run([
                 mean_grads, log_scale_grads, reparam_mean_grads,
                 reparam_log_scale_grads
             ])
            self.assertAllClose(reparam_mean_grads_np,
                                mean_grads_np,
                                rtol=5e-1,
                                atol=1e-1)
            self.assertAllClose(reparam_log_scale_grads_np,
                                log_scale_grads_np,
                                rtol=5e-1,
                                atol=1e-1)
Exemplo n.º 6
0
    def testWeightedQuadratic(self, effective_mean, effective_log_scale,
                              weights, coupling):
        num_samples = 5 * 10**5

        effective_mean = np.array(effective_mean)
        effective_log_scale = np.array(effective_log_scale)
        weights = np.array(weights)

        data_dims = len(effective_mean)

        mean = tf.constant(effective_mean, dtype=tf.float32)
        log_scale = tf.constant(effective_log_scale, dtype=tf.float32)

        dist = dist_utils.multi_normal(loc=mean, log_scale=log_scale)
        dist_samples = dist.sample(num_samples)

        function = lambda x: (tf.reduce_sum(x * weights, axis=1))**2
        loss, _ = gradient_estimators.measure_valued_loss(function,
                                                          dist_samples,
                                                          dist,
                                                          coupling=coupling)

        loss.shape.assert_is_compatible_with([num_samples])
        loss = tf.reduce_mean(loss)

        mean_grads = tf.gradients(loss, mean)[0]
        mean_grads.shape.assert_is_compatible_with(data_dims)

        log_scale_grads = tf.gradients(loss, log_scale)[0]
        log_scale_grads.shape.assert_is_compatible_with(data_dims)

        expected_mean_grads = 2 * weights * np.sum(weights * effective_mean)
        effective_scale = np.exp(effective_log_scale)
        expected_scale_grads = 2 * weights**2 * effective_scale
        expected_log_scale_grads = expected_scale_grads * effective_scale

        with self.test_session() as sess:
            init_op = tf.initialize_all_variables()
            sess.run(init_op)

            mean_grads_np, log_scale_grads_np = sess.run(
                [mean_grads, log_scale_grads])
            self.assertAllClose(expected_mean_grads,
                                mean_grads_np,
                                rtol=1e-1,
                                atol=1e-1)
            self.assertAllClose(expected_log_scale_grads,
                                log_scale_grads_np,
                                rtol=1e-1,
                                atol=1e-1)
Exemplo n.º 7
0
    def testConstantFunction(self, constant):
        data_dims = 3
        num_samples = 10**6

        effective_mean = 1.5
        mean = effective_mean * tf.ones(shape=(data_dims), dtype=tf.float32)

        effective_log_scale = 0.0
        log_scale = effective_log_scale * tf.ones(shape=(data_dims),
                                                  dtype=tf.float32)

        dist = dist_utils.multi_normal(loc=mean, log_scale=log_scale)
        dist_samples = dist.sample(num_samples)
        dist_samples.shape.assert_is_compatible_with([num_samples, data_dims])

        function = lambda x: tf.ones_like(x[:, 0])
        loss = gradient_estimators.score_function_loss(function, dist_samples,
                                                       dist)

        # Average over the number of samples.
        loss.shape.assert_is_compatible_with([num_samples])
        loss = tf.reduce_mean(loss)

        mean_grads = tf.gradients(loss, mean)[0]
        mean_grads.shape.assert_is_compatible_with(data_dims)
        expected_mean_grads = np.zeros(data_dims, dtype=np.float32)

        log_scale_grads = tf.gradients(loss, log_scale)[0]
        expected_log_scale_grads = np.zeros(data_dims, dtype=np.float32)

        with self.test_session() as sess:
            init_op = tf.global_variables_initializer()
            sess.run(init_op)

            self.assertAllClose(sess.run(mean_grads),
                                expected_mean_grads,
                                rtol=1e-1,
                                atol=5e-3)
            self.assertAllClose(sess.run(log_scale_grads),
                                expected_log_scale_grads,
                                rtol=1e-1,
                                atol=5e-3)
Exemplo n.º 8
0
    def testQuadraticFunction(self, effective_mean, effective_log_scale):
        data_dims = 1
        num_samples = 10**6

        mean = effective_mean * tf.ones(shape=(data_dims), dtype=tf.float32)
        log_scale = effective_log_scale * tf.ones(shape=(data_dims),
                                                  dtype=tf.float32)

        dist = dist_utils.multi_normal(loc=mean, log_scale=log_scale)
        dist_samples = dist.sample(num_samples)
        function = lambda x: tf.reduce_sum(x**2, axis=1)

        loss = gradient_estimators.pathwise_loss(function, dist_samples, dist)
        loss.shape.assert_is_compatible_with([num_samples])
        loss = tf.reduce_mean(loss)
        loss.shape.assert_is_compatible_with([])

        mean_grads = tf.gradients(loss, mean)[0]
        mean_grads.shape.assert_is_compatible_with(data_dims)
        expected_mean_grads = 2 * effective_mean * np.ones(data_dims,
                                                           dtype=np.float32)

        log_scale_grads = tf.gradients(loss, log_scale)[0]
        log_scale_grads.shape.assert_is_compatible_with(data_dims)
        expected_log_scale_grads = 2 * np.exp(
            2 * effective_log_scale) * np.ones(data_dims, dtype=np.float32)

        with self.test_session() as sess:
            init_op = tf.initialize_all_variables()
            sess.run(init_op)

            self.assertAllClose(sess.run(mean_grads),
                                expected_mean_grads,
                                rtol=1e-1,
                                atol=1e-3)
            self.assertAllClose(sess.run(log_scale_grads),
                                expected_log_scale_grads,
                                rtol=1e-1,
                                atol=1e-3)
Exemplo n.º 9
0
    def testLinearFunction(self, effective_mean, effective_log_scale):
        data_dims = 3
        num_samples = 10**6

        mean = effective_mean * tf.ones(shape=(data_dims), dtype=tf.float32)
        log_scale = effective_log_scale * tf.ones(shape=(data_dims),
                                                  dtype=tf.float32)

        dist = dist_utils.multi_normal(loc=mean, log_scale=log_scale)
        dist_samples = dist.sample(num_samples)
        dist_samples.shape.assert_is_compatible_with([num_samples, data_dims])

        function = lambda x: tf.reduce_sum(x, axis=1)
        loss = gradient_estimators.pathwise_loss(function, dist_samples, dist)
        loss.shape.assert_is_compatible_with([num_samples])
        loss = tf.reduce_mean(loss)
        loss.shape.assert_is_compatible_with([])

        mean_grads = tf.gradients(loss, mean)[0]
        mean_grads.shape.assert_is_compatible_with(data_dims)
        expected_mean_grads = np.ones(data_dims, dtype=np.float32)

        log_scale_grads = tf.gradients(loss, log_scale)[0]
        expected_log_scale_grads = np.zeros(data_dims, dtype=np.float32)

        with self.test_session() as sess:
            init_op = tf.global_variables_initializer()
            sess.run(init_op)
            # This should be an analytical computation, the result needs to be
            # accurate.
            self.assertAllClose(sess.run(mean_grads),
                                expected_mean_grads,
                                rtol=1e-1,
                                atol=1e-3)

            self.assertAllClose(sess.run(log_scale_grads),
                                expected_log_scale_grads,
                                atol=1e-2)
Exemplo n.º 10
0
def main(argv):
  del argv

  # Training data.
  features, targets = data_utils.get_sklearn_data_as_tensors(
      batch_size=config.batch_size,
      dataset_name=config.dataset_name)

  # Eval data.
  eval_features, eval_targets = data_utils.get_sklearn_data_as_tensors(
      batch_size=None,
      dataset_name=config.dataset_name)
  dataset_size = eval_features.get_shape()[0]

  data_dims = features.shape[1]

  prior = dist_utils.multi_normal(
      loc=tf.zeros(data_dims), log_scale=tf.zeros(data_dims))

  with tf.variable_scope('posterior'):
    posterior = dist_utils.diagonal_gaussian_posterior(data_dims)

  model = bayes_lr.BayesianLogisticRegression(
      prior=prior, posterior=posterior,
      dataset_size=dataset_size,
      use_analytical_kl=config.use_analytical_kl)

  grad_loss_fn = _get_grad_loss_fn()
  control_variate_fn = _get_control_variate_fn()
  jacobian_parallel_iterations = _jacobian_parallel_iterations()

  def model_loss(features, targets, posterior_samples):
    num_posterior_samples_cv_coeff = config.num_posterior_samples_cv_coeff
    return blr_model_grad_utils.model_surrogate_loss(
        model,
        features, targets, posterior_samples,
        grad_loss_fn=grad_loss_fn,
        control_variate_fn=control_variate_fn,
        estimate_cv_coeff=config.estimate_cv_coeff,
        num_posterior_samples_cv_coeff=num_posterior_samples_cv_coeff,
        jacobian_parallel_iterations=jacobian_parallel_iterations)

  posterior_samples = posterior.sample(config.num_posterior_samples)
  train_loss, _ = model_loss(features, targets, posterior_samples)
  train_loss = tf.reduce_mean(train_loss)

  num_eval_posterior_samples = config.num_eval_posterior_samples
  eval_posterior_samples = posterior.sample(num_eval_posterior_samples)
  eval_model_output = model.apply(
      eval_features, eval_targets, posterior_samples=eval_posterior_samples)

  _, jacobians = model_loss(
      eval_features, eval_targets, eval_posterior_samples)
  eval_model_metrics = metrics_fetch_dict(eval_model_output)
  jacobians = _pretty_jacobians(jacobians)

  # Compute the surrogate loss without any variance reduction.
  # Used as a sanity check and for debugging.
  if _variance_reduction():
    if control_variate_fn:
      no_var_reduction_grad_fn = grad_loss_fn
      no_var_reducion_prefix = 'no_control_variate'
    elif config.gradient_config.type == 'measure_valued':
      # Compute the loss and stats when not using coupling.
      def no_var_reduction_grad_fn(function, dist_samples, dist):
        return gradient_estimators.measure_valued_loss(
            function, dist_samples, dist, coupling=False)

    _, no_var_reduction_jacobians = blr_model_grad_utils.model_surrogate_loss(
        model, eval_features, eval_targets, eval_posterior_samples,
        grad_loss_fn=no_var_reduction_grad_fn,
        jacobian_parallel_iterations=jacobian_parallel_iterations)
    no_var_reduction_jacobians = _pretty_jacobians(no_var_reduction_jacobians)
    no_var_reducion_prefix = 'no_coupling'
  else:
    # No variance reduction used. No reason for additional logging.
    no_var_reduction_jacobians = {}

  for j in no_var_reduction_jacobians.values():
    assert j.get_shape().as_list()[0] == num_eval_posterior_samples

  start_learning_rate = config.start_learning_rate
  global_step = tf.train.get_or_create_global_step()

  if config.cosine_learning_rate_decay:
    training_steps = config.training_steps
    learning_rate_multiplier = tf.math.cos(
        np.pi / 2 * tf.cast(global_step, tf.float32)  / training_steps)
  else:
    learning_rate_multiplier = tf.constant(1.0)

  learning_rate = start_learning_rate * learning_rate_multiplier
  optimizer = tf.train.GradientDescentOptimizer(learning_rate)
  train_op = optimizer.minimize(train_loss, global_step=global_step)

  hyper_dict = {
      'start_learning_rate': config.start_learning_rate,
      'num_posterior_samples': config.num_posterior_samples,
      'batch_size': config.batch_size}

  summary_writer = tf.summary.FileWriter(
      os.path.join(config.experiment_dir, 'logs'))

  # Checkpointing.
  hooks = _configure_hooks(train_loss)

  i = -1
  with tf.train.MonitoredSession(hooks=hooks) as sess:
    logging.info('starting training')
    for i in range(config.training_steps):
      sess.run(train_op)

      if (i + 1) % config.report_interval == 0:
        # Training loss and debug ops.
        logging.info('global_step %i', sess.run(global_step))
        logging.info('training loss at step %i: %f', i, sess.run(train_loss))

        # Compute multi batch eval metrics.
        multi_batch_metrics = run_multi_batch_metrics(
            eval_model_metrics, sess, config.num_eval_batches)

        for key, value in multi_batch_metrics.items():
          logging.info('%s at step %i: %f', key, i, value)

        posterior_vars_value = sess.run(
            {v.name: v for v in model.posterior.dist_vars})
        for k, value in posterior_vars_value.items():
          logging.info('%s avg at step %i: %f', k, i, np.mean(value))

        metrics = multi_batch_metrics
        metrics.update({'step': i})
        metrics.update({'learning_rate': sess.run(learning_rate)})
        metrics.update(hyper_dict)

        if (i + 1) % config.grad_report_interval == 0:
          gradient_stats, grad_log_keys = run_gradient_stats(
              jacobians, sess, config.num_eval_batches)

          for key in grad_log_keys:
            logging.info(
                '%s at step %i: %f', key, i, gradient_stats[key])
          metrics.update(gradient_stats)

          if no_var_reduction_jacobians:
            no_var_reduction_grad_stats, grad_log_keys = run_gradient_stats(
                no_var_reduction_jacobians, sess, config.num_eval_batches)

            no_var_reduction_grad_stats = {
                no_var_reducion_prefix + '_' + k: v
                for k, v in no_var_reduction_grad_stats.items()}

            metrics.update(no_var_reduction_grad_stats)

          _add_summaries(summary_writer, metrics)
    def testNonPolynomialFunctionConsistency(self, effective_mean,
                                             effective_log_scale, grad_loss_fn,
                                             num_samples):
        """Check that the gradients are consistent between estimators."""
        data_dims = 3

        mean = effective_mean * tf.ones(shape=(data_dims), dtype=tf.float32)
        log_scale = effective_log_scale * tf.ones(shape=(data_dims),
                                                  dtype=tf.float32)

        dist_vars = [mean, log_scale]
        dist = dist_utils.multi_normal(loc=mean, log_scale=log_scale)
        dist_samples = dist.sample(num_samples)

        model_loss_fn = lambda x: tf.log(tf.reduce_sum(x**2, axis=1))
        control_variate_fn = control_variates.control_delta_method

        grad_loss_fn = utils.grad_loss_fn_with_jacobians(grad_loss_fn)
        loss, jacobians = control_variates.control_variates_surrogate_loss(
            dist=dist,
            dist_samples=dist_samples,
            dist_vars=dist_vars,
            model_loss_fn=model_loss_fn,
            grad_loss_fn=grad_loss_fn,
            control_variate_fn=control_variate_fn)

        loss.shape.assert_is_compatible_with([num_samples])
        loss = tf.reduce_mean(loss)

        mean_jacobians = jacobians[mean]
        mean_jacobians.shape.assert_is_compatible_with(
            [num_samples, data_dims])
        mean_grads_from_jacobian = tf.reduce_mean(mean_jacobians, axis=0)

        log_scale_jacobians = jacobians[log_scale]
        log_scale_jacobians.shape.assert_is_compatible_with(
            [num_samples, data_dims])
        log_scale_grads_from_jacobian = tf.reduce_mean(log_scale_jacobians,
                                                       axis=0)

        mean_grads = tf.gradients(loss, mean)[0]
        mean_grads.shape.assert_is_compatible_with(data_dims)

        log_scale_grads = tf.gradients(loss, log_scale)[0]
        log_scale_grads.shape.assert_is_compatible_with(data_dims)

        no_cv_loss, _ = grad_loss_fn(model_loss_fn, dist_samples, dist)
        no_cv_loss.shape.assert_is_compatible_with([num_samples])
        no_cv_loss = tf.reduce_mean(no_cv_loss)

        no_cv_mean_grads = tf.gradients(no_cv_loss, mean)[0]
        no_cv_mean_grads.shape.assert_is_compatible_with(data_dims)

        no_cv_log_scale_grads = tf.gradients(no_cv_loss, log_scale)[0]
        no_cv_log_scale_grads.shape.assert_is_compatible_with(data_dims)

        with self.test_session() as sess:
            init_op = tf.initialize_all_variables()
            sess.run(init_op)

            (mean_grads_from_jacobian_np, mean_grads_np,
             log_scale_grads_from_jacobian_np, log_scale_grads_np,
             no_cv_mean_grads_np, no_cv_log_scale_grads_np) = sess.run([
                 mean_grads_from_jacobian, mean_grads,
                 log_scale_grads_from_jacobian, log_scale_grads,
                 no_cv_mean_grads, no_cv_log_scale_grads
             ])

            self.assertAllClose(mean_grads_from_jacobian_np,
                                mean_grads_np,
                                rtol=1e-1,
                                atol=1e-3)

            self.assertAllClose(log_scale_grads_from_jacobian_np,
                                log_scale_grads_np,
                                rtol=1e-1,
                                atol=1e-3)

            self.assertAllClose(mean_grads_np,
                                no_cv_mean_grads_np,
                                rtol=1e-1,
                                atol=1e-1)

            self.assertAllClose(log_scale_grads_np,
                                no_cv_log_scale_grads_np,
                                rtol=1e-1,
                                atol=1e-1)
    def testQuadraticFunctionWithAnalyticalLoss(self, effective_mean,
                                                effective_log_scale,
                                                grad_loss_fn):
        data_dims = 3
        num_samples = 10**3

        mean = effective_mean * tf.ones(shape=(data_dims), dtype=tf.float32)
        log_scale = effective_log_scale * tf.ones(shape=(data_dims),
                                                  dtype=tf.float32)

        dist_vars = [mean, log_scale]
        dist = dist_utils.multi_normal(loc=mean, log_scale=log_scale)
        dist_samples = dist.sample(num_samples)

        model_loss_fn = lambda x: tf.reduce_sum(x**2, axis=1)
        control_variate_fn = control_variates.control_delta_method

        loss, jacobians = control_variates.control_variates_surrogate_loss(
            dist=dist,
            dist_samples=dist_samples,
            dist_vars=dist_vars,
            model_loss_fn=model_loss_fn,
            grad_loss_fn=utils.grad_loss_fn_with_jacobians(grad_loss_fn),
            control_variate_fn=control_variate_fn)

        loss.shape.assert_is_compatible_with([num_samples])
        loss = tf.reduce_mean(loss)

        expected_mean_grads = 2 * effective_mean * np.ones(data_dims,
                                                           dtype=np.float32)
        expected_log_scale_grads = 2 * np.exp(
            2 * effective_log_scale) * np.ones(data_dims, dtype=np.float32)

        mean_jacobians = jacobians[mean]
        mean_jacobians.shape.assert_is_compatible_with(
            [num_samples, data_dims])
        mean_grads_from_jacobian = tf.reduce_mean(mean_jacobians, axis=0)

        log_scale_jacobians = jacobians[log_scale]
        log_scale_jacobians.shape.assert_is_compatible_with(
            [num_samples, data_dims])
        log_scale_grads_from_jacobian = tf.reduce_mean(log_scale_jacobians,
                                                       axis=0)

        mean_grads = tf.gradients(loss, mean)[0]
        mean_grads.shape.assert_is_compatible_with(data_dims)

        log_scale_grads = tf.gradients(loss, log_scale)[0]
        log_scale_grads.shape.assert_is_compatible_with(data_dims)

        with self.test_session() as sess:
            init_op = tf.initialize_all_variables()
            sess.run(init_op)

            self.assertAllClose(sess.run(mean_grads),
                                expected_mean_grads,
                                rtol=1e-1,
                                atol=1e-3)
            self.assertAllClose(sess.run(log_scale_grads),
                                expected_log_scale_grads,
                                rtol=1e-1,
                                atol=1e-3)

            self.assertAllClose(sess.run(mean_grads_from_jacobian),
                                expected_mean_grads,
                                rtol=1e-1,
                                atol=1e-3)
            self.assertAllClose(sess.run(log_scale_grads_from_jacobian),
                                expected_log_scale_grads,
                                rtol=1e-1,
                                atol=1e-3)
    def testNonPolynomialFunctionWithGradients(self):
        data_dims = 1
        num_samples = 10**3
        effective_mean = 1.
        effective_log_scale = 1.

        mean = effective_mean * tf.ones(shape=(data_dims), dtype=tf.float32)
        log_scale = effective_log_scale * tf.ones(shape=(data_dims),
                                                  dtype=tf.float32)

        dist = dist_utils.multi_normal(loc=mean, log_scale=log_scale)
        dist_samples = dist.sample(num_samples)
        function = lambda x: tf.reduce_sum(tf.log(x**2))

        (cv, expected_cv, surrogate_cv,
         jacobians) = control_variates.control_delta_method(
             dist,
             dist_samples,
             function,
             grad_loss_fn=utils.grad_loss_fn_with_jacobians(
                 gradient_estimators.pathwise_loss))

        surrogate_cv = tf.reduce_mean(surrogate_cv)
        mean_cv_grads = tf.gradients(surrogate_cv, mean)[0]
        mean_expected_cv_grads = tf.gradients(expected_cv, mean)[0]

        log_scale_cv_grads = tf.gradients(surrogate_cv, log_scale)[0]
        log_scale_expected_cv_grads = tf.gradients(expected_cv, log_scale)[0]

        # Second order expansion is log(\mu**2) + 1/2 * \sigma**2 (-2 / \mu**2)
        expected_cv_val = -np.exp(1.)**2

        # The gradient is 2 / mu + \sigma ** 2 * 2
        expected_cv_mean_grad = 2 + 2 * np.exp(1.)**2

        mean_jacobians = jacobians[mean]
        log_scale_jacobians = jacobians[log_scale]

        with self.test_session() as sess:
            init_op = tf.global_variables_initializer()
            sess.run(init_op)

            self.assertAllClose(sess.run(tf.reduce_mean(cv)),
                                sess.run(expected_cv),
                                rtol=1e-1,
                                atol=1e-3)

            self.assertAllClose(sess.run(expected_cv),
                                expected_cv_val,
                                rtol=1e-1,
                                atol=1e-3)

            self.assertAllClose(sess.run(tf.reduce_mean(cv)),
                                expected_cv_val,
                                rtol=1e-1,
                                atol=1e-3)

            self.assertAllClose(sess.run(mean_expected_cv_grads[0]),
                                expected_cv_mean_grad,
                                rtol=1e-1,
                                atol=1e-3)

            self.assertAllClose(sess.run(mean_cv_grads),
                                sess.run(mean_expected_cv_grads),
                                rtol=1e-1,
                                atol=1e-3)

            self.assertAllClose(sess.run(log_scale_cv_grads),
                                sess.run(log_scale_expected_cv_grads),
                                rtol=1e-1,
                                atol=1e-3)

            self.assertAllClose(
                sess.run(tf.reduce_mean(mean_jacobians)),
                # Strip the leading dimension of 1.
                sess.run(mean_cv_grads[0]),
                rtol=1e-1,
                atol=1e-3)

            self.assertAllClose(
                sess.run(tf.reduce_mean(log_scale_jacobians)),
                # Strip the leading dimension of 1.
                sess.run(log_scale_cv_grads[0]),
                rtol=1e-1,
                atol=1e-3)
Exemplo n.º 14
0
    def testApply(self):
        data_dims = 10
        batch_size = 50
        num_samples = 6
        dataset_size = 500

        assert not batch_size % 2
        assert not num_samples % 2

        mean = tf.Variable(tf.zeros(shape=(data_dims), dtype=tf.float32),
                           name='mean')
        log_scale = tf.Variable(tf.zeros(shape=(data_dims), dtype=tf.float32),
                                name='log_scale')

        # Prior = posterior.
        prior = dist_utils.multi_normal(loc=mean, log_scale=log_scale)
        posterior = dist_utils.multi_normal(loc=mean, log_scale=log_scale)
        model = bayes_lr.BayesianLogisticRegression(prior,
                                                    posterior,
                                                    dataset_size=dataset_size,
                                                    use_analytical_kl=True)

        # Build the data
        features = 3 * tf.ones((batch_size, data_dims), dtype=tf.float32)
        targets = tf.concat([
            tf.zeros(int(batch_size / 2), dtype=tf.float32),
            tf.ones(int(batch_size / 2), dtype=tf.float32)
        ],
                            axis=0)

        posterior_samples = tf.concat([
            tf.ones((int(num_samples / 2), data_dims), dtype=tf.float32),
            -1 * tf.ones((int(num_samples / 2), data_dims), dtype=tf.float32)
        ],
                                      axis=0)

        model_output = model.apply(features,
                                   targets,
                                   posterior_samples=posterior_samples)

        expected_logits = 3 * data_dims * np.concatenate([
            np.ones((batch_size, int(num_samples / 2))), -1 * np.ones(
                (batch_size, int(num_samples / 2)))
        ],
                                                         axis=1)

        quarter_ones = np.ones((int(batch_size / 2), int(num_samples / 2)))
        # Compute log probs for the entire batch, for the first half of samples.
        first_half_data_expected_log_probs = np.concatenate([
            np.log(1 - _sigmoid(3 * data_dims)) * quarter_ones,
            np.log(_sigmoid(3 * data_dims)) * quarter_ones
        ],
                                                            axis=0)

        # Compute log probs for the entire batch, for the second half of samples.
        second_half_data_expected_log_probs = np.concatenate([
            np.log(1 - _sigmoid(-3 * data_dims)) * quarter_ones,
            np.log(_sigmoid(-3 * data_dims)) * quarter_ones
        ],
                                                             axis=0)

        expected_log_probs = np.concatenate([
            first_half_data_expected_log_probs,
            second_half_data_expected_log_probs
        ],
                                            axis=1)

        first_half_expected_elbo = np.log(1 - _sigmoid(3 * data_dims))
        first_half_expected_elbo += np.log(_sigmoid(3 * data_dims))

        second_half_expected_elbo = np.log(_sigmoid(-3 * data_dims))
        second_half_expected_elbo += np.log(1 - _sigmoid(-3 * data_dims))

        expected_elbo = dataset_size / 2. * np.concatenate([
            first_half_expected_elbo * np.ones(
                (int(num_samples / 2))), second_half_expected_elbo * np.ones(
                    (int(num_samples / 2)))
        ])

        expected_predictions = np.concatenate([
            np.ones((batch_size, int(num_samples / 2))),
            np.zeros((batch_size, int(num_samples / 2)))
        ],
                                              axis=1)

        expected_accuracy = 0.5

        with self.test_session() as sess:
            sess.run(tf.global_variables_initializer())

            self.assertEqual(sess.run(model_output.kl), 0)
            self.assertAllEqual(sess.run(model_output.logits), expected_logits)

            self.assertAllEqual(sess.run(model_output.predictions),
                                expected_predictions)
            self.assertAllEqual(sess.run(model_output.accuracy),
                                expected_accuracy)

            self.assertAllClose(sess.run(model_output.log_probs),
                                expected_log_probs,
                                rtol=1e-1,
                                atol=5e-3)

            self.assertAllClose(sess.run(model_output.elbo),
                                expected_elbo,
                                rtol=1e-1,
                                atol=5e-3)