示例#1
0
    def testUpdateClipCoeff(self):
        with tf.Graph().as_default(), self.test_session() as sess:
            grads_and_vars = [(tf.constant([[1., 2.], [3., 4.]]), None),
                              (tf.constant([[2., 3.], [4., 5.]]), None)]
            pgrads_and_vars = [(tf.constant([[3., 4.], [5., 6.]]), None),
                               (tf.constant([[7., 8.], [9., 10.]]), None)]
            lrate = 0.1

            # Note: without rescaling, the squared Fisher norm of the update
            # is 1.74

            # If the update already satisfies the norm constraint, there should
            # be no rescaling.
            opt = optimizer.KfacOptimizer(lrate,
                                          0.2,
                                          dummy_layer_collection(),
                                          0.3,
                                          norm_constraint=10.,
                                          name='KFAC_1')
            coeff = opt._update_clip_coeff(grads_and_vars, pgrads_and_vars)
            self.assertAlmostEqual(1., sess.run(coeff), places=5)

            # If the update violates the constraint, it should be rescaled to
            # be on the constraint boundary.
            opt = optimizer.KfacOptimizer(lrate,
                                          0.2,
                                          dummy_layer_collection(),
                                          0.3,
                                          norm_constraint=0.5,
                                          name='KFAC_2')
            coeff = opt._update_clip_coeff(grads_and_vars, pgrads_and_vars)
            sq_norm_pgrad = opt._squared_fisher_norm(grads_and_vars,
                                                     pgrads_and_vars)
            sq_norm_update = lrate**2 * coeff**2 * sq_norm_pgrad
            self.assertAlmostEqual(0.5, sess.run(sq_norm_update), places=5)
示例#2
0
    def testOptimizerInit(self):
        with tf.Graph().as_default():
            layer_collection = lc.LayerCollection()

            inputs = tf.ones((2, 1)) * 2
            weights_val = np.ones((1, 1), dtype=np.float32) * 3.
            weights = tf.get_variable('w',
                                      initializer=tf.constant(weights_val))
            bias = tf.get_variable('b',
                                   initializer=tf.zeros_initializer(),
                                   shape=(1, 1))
            output = tf.matmul(inputs, weights) + bias

            layer_collection.register_fully_connected((weights, bias), inputs,
                                                      output)

            logits = tf.tanh(output)
            targets = tf.constant([[0.], [1.]])
            output = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(logits=logits,
                                                        labels=targets))

            layer_collection.register_categorical_predictive_distribution(
                logits)

            optimizer.KfacOptimizer(0.1,
                                    0.2,
                                    layer_collection,
                                    0.3,
                                    momentum=0.5,
                                    momentum_type='regular')
示例#3
0
 def testOptimizerInitInvalidMomentumRegistration(self):
     with self.assertRaises(ValueError):
         optimizer.KfacOptimizer(0.1,
                                 0.2,
                                 lc.LayerCollection(),
                                 0.3,
                                 momentum_type='foo')
示例#4
0
 def testSquaredFisherNorm(self):
     with tf.Graph().as_default(), self.test_session() as sess:
         grads_and_vars = [(tf.constant([[1., 2.], [3., 4.]]), None),
                           (tf.constant([[2., 3.], [4., 5.]]), None)]
         pgrads_and_vars = [(tf.constant([[3., 4.], [5., 6.]]), None),
                            (tf.constant([[7., 8.], [9., 10.]]), None)]
         opt = optimizer.KfacOptimizer(0.1, 0.2, dummy_layer_collection(),
                                       0.3)
         sq_norm = opt._squared_fisher_norm(grads_and_vars, pgrads_and_vars)
         self.assertAlmostEqual(174., sess.run(sq_norm), places=5)
示例#5
0
    def testApplyGradients(self):
        with tf.Graph().as_default(), self.test_session() as sess:
            layer_collection = lc.LayerCollection()

            inputs = tf.ones((2, 1)) * 2
            weights_val = np.ones((1, 1), dtype=np.float32) * 3.
            weights = tf.get_variable('w',
                                      initializer=tf.constant(weights_val))
            bias = tf.get_variable('b',
                                   initializer=tf.zeros_initializer(),
                                   shape=(1, 1))
            output = tf.matmul(inputs, weights) + bias

            layer_collection.register_fully_connected((weights, bias), inputs,
                                                      output)

            logits = tf.tanh(output)
            targets = tf.constant([[0.], [1.]])
            output = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(logits=logits,
                                                        labels=targets))

            layer_collection.register_categorical_predictive_distribution(
                logits)

            opt = optimizer.KfacOptimizer(0.1,
                                          0.2,
                                          layer_collection,
                                          0.3,
                                          momentum=0.5,
                                          momentum_type='regular')
            (cov_update_thunks,
             inv_update_thunks) = opt.make_vars_and_create_op_thunks()
            cov_update_ops = tuple(thunk() for thunk in cov_update_thunks)
            inv_update_ops = tuple(thunk() for thunk in inv_update_thunks)

            grads_and_vars = opt.compute_gradients(output, [weights, bias])
            all_vars = [grad_and_var[1] for grad_and_var in grads_and_vars]

            op = opt.apply_gradients(grads_and_vars)

            sess.run(tf.global_variables_initializer())
            old_vars = sess.run(all_vars)
            sess.run(cov_update_ops)
            sess.run(inv_update_ops)
            sess.run(op)
            new_vars = sess.run(all_vars)

            for old_var, new_var in zip(old_vars, new_vars):
                self.assertNotEqual(old_var, new_var)
示例#6
0
    def testUpdateVelocities(self):
        with tf.Graph().as_default(), self.test_session() as sess:
            layers = lc.LayerCollection()
            layers.register_categorical_predictive_distribution(
                tf.constant([1.0]))
            opt = optimizer.KfacOptimizer(0.1,
                                          0.2,
                                          layers,
                                          0.3,
                                          momentum=0.5,
                                          momentum_type='regular')
            x = tf.get_variable('x', initializer=tf.ones((2, 2)))
            y = tf.get_variable('y', initializer=tf.ones((2, 2)) * 2)
            vec1 = tf.ones((2, 2)) * 3
            vec2 = tf.ones((2, 2)) * 4

            model_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
            update_op = opt._update_velocities([(vec1, x), (vec2, y)], 0.5)
            opt_vars = [
                v for v in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
                if v not in model_vars
            ]

            sess.run(tf.global_variables_initializer())
            old_opt_vars = sess.run(opt_vars)

            # Optimizer vars start out at 0.
            for opt_var in old_opt_vars:
                self.assertAllEqual(sess.run(tf.zeros_like(opt_var)), opt_var)

            sess.run(update_op)
            new_opt_vars = sess.run(opt_vars)
            # After one update, the velocities are equal to the vectors.
            for vec, opt_var in zip([vec1, vec2], new_opt_vars):
                self.assertAllEqual(sess.run(vec), opt_var)

            sess.run(update_op)
            final_opt_vars = sess.run(opt_vars)
            for first, second in zip(new_opt_vars, final_opt_vars):
                self.assertFalse(np.equal(first, second).all())
示例#7
0
  def DISABLED_test_rnn_multi(self):
    """Test automatic registration on a static RNN.

    The model tested here is designed for MNIST classification. To classify
    images using a recurrent neural network, we consider every image row as a
    sequence of pixels. Because MNIST image shape is 28*28px, we will then
    handle 28 sequences of 28 steps for every sample.
    """
    with tf.Graph().as_default():
      dtype = tf.float32
      n_input = 28  # MNIST data input (img shape: 28*28)
      n_timesteps = 28  # timesteps
      n_hidden = 128  # hidden layer num of features
      n_classes = 10  # MNIST total classes (0-9 digits)

      x = tf.placeholder(dtype, [None, n_timesteps, n_input])
      y = tf.placeholder(tf.int32, [None])
      x_unstack = tf.unstack(x, n_timesteps, 1)

      w_input = tf.get_variable(
          'w_input', shape=[n_input, n_hidden], dtype=dtype)
      b_input = tf.get_variable('b_input', shape=[n_hidden], dtype=dtype)

      w_recurrent = tf.get_variable(
          'w_recurrent', shape=[n_hidden, n_hidden], dtype=dtype)
      b_recurrent = tf.get_variable(
          'b_recurrent', shape=[n_hidden], dtype=dtype)

      w_output = tf.get_variable(
          'w_output', shape=[n_hidden, n_classes], dtype=dtype)
      b_output = tf.get_variable('b_output', shape=[n_classes], dtype=dtype)

      layer_collection_manual = lc.LayerCollection()
      layer_collection_auto = lc.LayerCollection()

      a = tf.zeros([tf.shape(x_unstack[0])[0], n_hidden], dtype=dtype)

      # Here 'a' are the activations, 's' the pre-activations.
      a_list = [a]
      s_input_list = []
      s_recurrent_list = []
      s_list = []
      s_out_list = []
      cost = 0.0

      for i in range(len(x_unstack)):
        input_ = x_unstack[i]

        s_in = tf.matmul(input_, w_input) + b_input
        s_rec = tf.matmul(a, w_recurrent) + b_recurrent
        s = s_in + s_rec

        s_input_list.append(s_in)
        s_recurrent_list.append(s_rec)
        s_list.append(s)

        a = tf.tanh(s)
        a_list.append(a)

        s_out = tf.matmul(a, w_output) + b_output
        s_out_list.append(s_out)

        if i == len(x_unstack) - 1:
          labels = y
        else:
          labels = tf.zeros([tf.shape(y)[0]], dtype=tf.int32)

        cost += tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=s_out, labels=labels))

        layer_collection_manual.register_categorical_predictive_distribution(
            s_out)
        layer_collection_auto.register_categorical_predictive_distribution(
            s_out)

      layer_collection_manual.register_fully_connected_multi(
          (w_input, b_input), x_unstack, s_input_list)
      layer_collection_manual.register_fully_connected_multi(
          (w_recurrent, b_recurrent), a_list[:-1], s_recurrent_list)
      layer_collection_manual.register_fully_connected_multi(
          (w_output, b_output), a_list[1:], s_out_list)

      # Constructing the optimizer performs automatic layer registration.
      auto_optimizer = optimizer.KfacOptimizer(  # pylint: disable=unused-variable
          learning_rate=1,
          cov_ema_decay=1,
          damping=1,
          layer_collection=layer_collection_auto,
          momentum=1)

      assert_fisher_blocks_match(self, layer_collection_manual,
                                 layer_collection_auto)
示例#8
0
  def test_multitower_examples_model(self):
    """Ensure graph search runs properly on a multitower setup.

    This test uses linear_model from examples/convnets.
    """
    with tf.Graph().as_default():
      def linear_model(images, labels, num_classes):
        """Creates a linear model.

        Args:
          images: The input image tensors, a tensor of size
              (batch_size x height_in x width_in x channels).
          labels: The sparse target labels, a tensor of size (batch_size x 1).
          num_classes: The number of classes, needed for one-hot encoding (int).

        Returns:
          loss: The total loss for this model (0-D tensor).
          logits: Predictions for this model (batch_size x num_classes).
        """
        images = tf.reshape(images, [images.shape[0], -1])
        logits = tf.layers.dense(images, num_classes, name='logits')
        loss = sparse_softmax_cross_entropy(labels, logits, num_classes)
        return loss, logits

      model = linear_model
      layer_collection = lc.LayerCollection()
      num_towers = 2
      batch_size = num_towers
      num_classes = 2

      # Set up data.
      images = tf.random_uniform(shape=[batch_size, 32, 32, 1])
      labels = tf.random_uniform(
          dtype=tf.int64, shape=[batch_size, 1], maxval=num_classes)

      tower_images = tf.split(images, num_towers)
      tower_labels = tf.split(labels, num_towers)

      # Build model.
      losses = []
      logits = []
      for tower_id in range(num_towers):
        tower_name = 'tower%d' % tower_id
        with tf.name_scope(tower_name):
          with tf.variable_scope(tf.get_variable_scope(), reuse=(tower_id > 0)):
            current_loss, current_logits = model(
                tower_images[tower_id], tower_labels[tower_id], num_classes + 1)
            layer_collection.register_categorical_predictive_distribution(
                current_logits, name='logits')
            losses.append(current_loss)
            logits.append(current_logits)

      # Run the graph scanner.
      with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):
        gs.register_layers(layer_collection, tf.trainable_variables())
      self.assertEqual(len(layer_collection.fisher_blocks), 1)
      fisher_block = list(layer_collection.fisher_blocks.values())[0]
      self.assertIsInstance(fisher_block, fb.FullyConnectedKFACBasicFB)
      self.assertEqual(fisher_block.num_registered_towers, num_towers)

      global_step = tf.train.get_or_create_global_step()
      opt = optimizer.KfacOptimizer(
          learning_rate=0.1,
          cov_ema_decay=0.1,
          damping=0.1,
          layer_collection=layer_collection,
          momentum=0.1)
      cost = tf.reduce_mean(losses)
      (cov_update_thunks,
       inv_update_thunks) = opt.make_vars_and_create_op_thunks()
      cov_update_op = tf.group(*(thunk() for thunk in cov_update_thunks))
      inv_update_op = tf.group(*(thunk() for thunk in inv_update_thunks))
      train_op = opt.minimize(cost, global_step=global_step)
      init = tf.global_variables_initializer()

      # Run a single training step.
      with self.test_session() as sess:
        sess.run(init)
        sess.run([cov_update_op])
        sess.run([inv_update_op])
        sess.run([train_op])