Exemplo n.º 1
0
  def testMultiplyInverseAgainstExplicit(self):
    with ops.Graph().as_default(), self.test_session() as sess:
      random_seed.set_random_seed(200)
      params = (array_ops.constant([1., 2.]), array_ops.constant(3.))
      block = fb.FullFB(lc.LayerCollection(), params)
      block.register_additional_minibatch(32)
      grads = (array_ops.constant([2., 3.]), array_ops.constant(4.))
      damping = 0.5
      block.instantiate_factors((grads,), damping)
      block._factor.instantiate_cov_variables()
      block.register_inverse()
      block._factor.instantiate_inv_variables()

      # Make sure our inverse is something other than the identity.
      sess.run(state_ops.assign(block._factor._cov, _make_psd(3)))
      sess.run(block._factor.make_inverse_update_ops())

      v_flat = np.array([4., 5., 6.], dtype=np.float32)
      vector = utils.column_to_tensors(params, array_ops.constant(v_flat))
      output = block.multiply_inverse(vector)
      output_flat = sess.run(utils.tensors_to_column(output)).ravel()

      full = sess.run(block.full_fisher_block())
      explicit = np.dot(np.linalg.inv(full + damping * np.eye(3)), v_flat)

      self.assertAllClose(output_flat, explicit)
Exemplo n.º 2
0
  def testMultiplyInverseAgainstExplicit(self):
    with ops.Graph().as_default(), self.test_session() as sess:
      random_seed.set_random_seed(200)
      input_dim, output_dim = 3, 2
      inputs = array_ops.zeros([32, input_dim])
      outputs = array_ops.zeros([32, output_dim])
      params = array_ops.zeros([input_dim, output_dim])
      block = fb.FullyConnectedKFACBasicFB(
          lc.LayerCollection(), inputs, outputs, has_bias=False)
      grads = outputs**2
      damping = 0.  # This test is only valid without damping.
      block.instantiate_factors((grads,), damping)

      sess.run(state_ops.assign(block._input_factor._cov, _make_psd(3)))
      sess.run(state_ops.assign(block._output_factor._cov, _make_psd(2)))
      sess.run(block._input_factor.make_inverse_update_ops())
      sess.run(block._output_factor.make_inverse_update_ops())

      v_flat = np.arange(6, dtype=np.float32)
      vector = utils.column_to_tensors(params, array_ops.constant(v_flat))
      output = block.multiply_inverse(vector)
      output_flat = sess.run(utils.tensors_to_column(output)).ravel()

      full = sess.run(block.full_fisher_block())
      explicit = np.dot(np.linalg.inv(full + damping * np.eye(6)), v_flat)

      self.assertAllClose(output_flat, explicit)
Exemplo n.º 3
0
  def testMultiplyInverseAgainstExplicit(self):
    with ops.Graph().as_default(), self.test_session() as sess:
      random_seed.set_random_seed(200)
      params = array_ops.zeros((2, 2, 2, 2))
      inputs = array_ops.zeros((2, 2, 2, 2))
      outputs = array_ops.zeros((2, 2, 2, 2))
      block = fb.ConvKFCBasicFB(lc.LayerCollection(), params, (1, 1, 1, 1),
                                'SAME')
      block.register_additional_minibatch(inputs, outputs)
      grads = outputs**2
      damping = 0.  # This test is only valid without damping.
      block.instantiate_factors(([grads],), damping)

      sess.run(state_ops.assign(block._input_factor._cov, _make_psd(8)))
      sess.run(state_ops.assign(block._output_factor._cov, _make_psd(2)))
      sess.run(block._input_factor.make_inverse_update_ops())
      sess.run(block._output_factor.make_inverse_update_ops())

      v_flat = np.arange(16, dtype=np.float32)
      vector = utils.column_to_tensors(params, array_ops.constant(v_flat))
      output = block.multiply_inverse(vector)
      output_flat = sess.run(utils.tensors_to_column(output)).ravel()

      full = sess.run(block.full_fisher_block())
      explicit = np.dot(np.linalg.inv(full + damping * np.eye(16)), v_flat)

      self.assertAllClose(output_flat, explicit)
Exemplo n.º 4
0
 def multiply_inverse(self, vector):
   vector_flat = utils.tensors_to_column(vector)
   print("vector_flat: %s" % vector_flat)
   out_flat = self._factor.left_multiply_inverse(
       vector_flat, self._damping)
   print("out_flat: %s" % out_flat)
   return utils.column_to_tensors(vector, out_flat)
Exemplo n.º 5
0
 def _compute_new_cov(self, idx=0):
   # This will be a very basic rank 1 estimate
   with maybe_colocate_with(self._params_grads[idx]):
     params_grads_flat = utils.tensors_to_column(self._params_grads[idx])
     return ((params_grads_flat * array_ops.transpose(
         params_grads_flat)) / math_ops.cast(self._batch_size,
                                             params_grads_flat.dtype))
Exemplo n.º 6
0
 def __init__(self, params_grads, batch_size):
   self._batch_size = batch_size
   self._params_grads = tuple(
       utils.tensors_to_column(params_grad) for params_grad in params_grads)
   self._orig_params_grads_name = scope_string_from_params(
       [self._params_grads, self._batch_size])
   super(NaiveDiagonalFactor, self).__init__()
Exemplo n.º 7
0
  def testTensorsToColumn(self):
    with ops.Graph().as_default(), self.test_session() as sess:
      random_seed.set_random_seed(200)

      vector = array_ops.constant(np.array([[0., 1.], [2., 3.]]))
      output = utils.tensors_to_column(vector)
      self.assertListEqual([4, 1], output.get_shape().as_list())
      self.assertAllClose(sess.run(output), np.array([0., 1., 2., 3.])[:, None])

      vector = self._fully_connected_layer_params()
      output = utils.tensors_to_column(vector)
      self.assertListEqual([6, 1], output.get_shape().as_list())
      self.assertAllClose(
          sess.run(output), np.array([1., 2., 4., 3., 1., 2.])[:, None])

      vector = list(vector)
      vector.append(array_ops.constant([[6.], [7.], [8.], [9.]]))

      output = utils.tensors_to_column(vector)
      self.assertListEqual([10, 1], output.get_shape().as_list())
      self.assertAllClose(
          sess.run(output),
          np.array([1., 2., 4., 3., 1., 2., 6., 7., 8., 9.])[:, None])
Exemplo n.º 8
0
 def __init__(self,
              params_grads,
              batch_size,
              colocate_cov_ops_with_inputs=False):
   self._batch_size = batch_size
   self._colocate_cov_ops_with_inputs = colocate_cov_ops_with_inputs
   params_grads_flat = []
   for params_grad in params_grads:
     with _maybe_colocate_with(params_grad,
                               self._colocate_cov_ops_with_inputs):
       col = utils.tensors_to_column(params_grad)
       params_grads_flat.append(col)
   self._params_grads = tuple(params_grads_flat)
   self._orig_params_grads_name = scope_string_from_params(
       [self._params_grads, self._batch_size])
   super(NaiveDiagonalFactor, self).__init__()
Exemplo n.º 9
0
  def testMultiplyInverseAgainstExplicit(self):
    with ops.Graph().as_default(), self.test_session() as sess:
      random_seed.set_random_seed(200)
      params = (array_ops.constant([1., 2.]), array_ops.constant(3.))
      block = fb.NaiveDiagonalFB(lc.LayerCollection(), params, 32)
      grads = (params[0]**2, math_ops.sqrt(params[1]))
      damping = 0.5
      block.instantiate_factors((grads,), damping)

      cov = array_ops.reshape(array_ops.constant([2., 3., 4.]), [-1, 1])
      sess.run(state_ops.assign(block._factor._cov, cov))
      sess.run(block._factor.make_inverse_update_ops())

      v_flat = np.array([4., 5., 6.], dtype=np.float32)
      vector = utils.column_to_tensors(params, array_ops.constant(v_flat))
      output = block.multiply_inverse(vector)
      output_flat = sess.run(utils.tensors_to_column(output)).ravel()

      full = sess.run(block.full_fisher_block())
      explicit = np.dot(np.linalg.inv(full + damping * np.eye(3)), v_flat)

      self.assertAllClose(output_flat, explicit)
Exemplo n.º 10
0
 def multiply_inverse(self, vector):
     inverse = self._factor.get_inverse(self._damping)
     out_flat = math_ops.matmul(inverse, utils.tensors_to_column(vector))
     return utils.column_to_tensors(vector, out_flat)
Exemplo n.º 11
0
 def multiply(self, vector):
   vector_flat = utils.tensors_to_column(vector)
   out_flat = self._factor.left_multiply(
       vector_flat, self._damping)
   return utils.column_to_tensors(vector, out_flat)
Exemplo n.º 12
0
 def multiply_matpower(self, vector, exp):
   vector_flat = utils.tensors_to_column(vector)
   out_flat = self._factor.left_multiply_matpower(
       vector_flat, exp, self._damping_func)
   return utils.column_to_tensors(vector, out_flat)
Exemplo n.º 13
0
 def multiply(self, vector):
   vector_flat = utils.tensors_to_column(vector)
   out_flat = vector_flat * (self._factor.get_cov() + self._damping)
   return utils.column_to_tensors(vector, out_flat)
Exemplo n.º 14
0
 def multiply(self, vector):
   vector_flat = utils.tensors_to_column(vector)
   out_flat = (math_ops.matmul(self._factor.get_cov(), vector_flat) +
               self._damping * vector_flat)
   return utils.column_to_tensors(vector, out_flat)
Exemplo n.º 15
0
 def multiply_inverse(self, vector):
   inverse = self._factor.get_inverse(self._damping)
   out_flat = math_ops.matmul(inverse, utils.tensors_to_column(vector))
   return utils.column_to_tensors(vector, out_flat)
Exemplo n.º 16
0
 def multiply_matpower(self, vector, exp):
   vector_flat = utils.tensors_to_column(vector)
   out_flat = self._factor.left_multiply_matpower(
       vector_flat, exp, self._damping_func)
   return utils.column_to_tensors(vector, out_flat)
Exemplo n.º 17
0
 def multiply(self, vector):
     vector_flat = utils.tensors_to_column(vector)
     out_flat = (math_ops.matmul(self._factor.get_cov(), vector_flat) +
                 self._damping * vector_flat)
     return utils.column_to_tensors(vector, out_flat)
Exemplo n.º 18
0
 def _compute_new_cov(self, idx=0):
   with _maybe_colocate_with(self._params_grads[idx]):
     params_grads_flat = utils.tensors_to_column(self._params_grads[idx])
     return (math_ops.square(params_grads_flat) / math_ops.cast(
         self._batch_size, params_grads_flat.dtype))
Exemplo n.º 19
0
 def _compute_new_cov(self, idx=0):
   with maybe_colocate_with(self._params_grads[idx]):
     params_grads_flat = utils.tensors_to_column(self._params_grads[idx])
     return (math_ops.square(params_grads_flat) / math_ops.cast(
         self._batch_size, params_grads_flat.dtype))
Exemplo n.º 20
0
 def multiply(self, vector):
     vector_flat = utils.tensors_to_column(vector)
     out_flat = vector_flat * (self._factor.get_cov() + self._damping)
     return utils.column_to_tensors(vector, out_flat)