def testMultiplyInverseAgainstExplicit(self):
        with ops.Graph().as_default(), self.test_session() as sess:
            random_seed.set_random_seed(200)
            params = array_ops.zeros((2, 2, 2, 2))
            inputs = array_ops.zeros((2, 2, 2, 2))
            outputs = array_ops.zeros((2, 2, 2, 2))
            block = fb.ConvKFCBasicFB(lc.LayerCollection(), params,
                                      (1, 1, 1, 1), 'SAME')
            block.register_additional_minibatch(inputs, outputs)
            grads = outputs**2
            damping = 0.  # This test is only valid without damping.
            block.instantiate_factors(([grads], ), damping)

            sess.run(state_ops.assign(block._input_factor._cov, _make_psd(8)))
            sess.run(state_ops.assign(block._output_factor._cov, _make_psd(2)))
            sess.run(block._input_factor.make_inverse_update_ops())
            sess.run(block._output_factor.make_inverse_update_ops())

            v_flat = np.arange(16, dtype=np.float32)
            vector = utils.column_to_tensors(params,
                                             array_ops.constant(v_flat))
            output = block.multiply_inverse(vector)
            output_flat = sess.run(utils.tensors_to_column(output)).ravel()

            full = sess.run(block.full_fisher_block())
            explicit = np.dot(np.linalg.inv(full + damping * np.eye(16)),
                              v_flat)

            self.assertAllClose(output_flat, explicit)
    def testMultiplyInverseTuple(self):
        with ops.Graph().as_default(), self.test_session() as sess:
            random_seed.set_random_seed(200)
            params = random_ops.random_normal((2, 2, 2, 2))
            inputs = random_ops.random_normal((2, 2, 2, 2))
            outputs = random_ops.random_normal((2, 2, 2, 2))
            block = fb.ConvKFCBasicFB(lc.LayerCollection(), params,
                                      (1, 1, 1, 1), 'SAME')
            block.register_additional_minibatch(inputs, outputs)
            grads = outputs**2
            block.instantiate_factors(([grads], ), 0.5)

            # Make sure our inverse is something other than the identity.
            sess.run(tf_variables.global_variables_initializer())
            sess.run(block._input_factor.make_inverse_update_ops())
            sess.run(block._output_factor.make_inverse_update_ops())

            vector = (np.arange(1, 15).reshape(7, 2).astype(np.float32),
                      np.arange(2, 4).reshape(2, 1).astype(np.float32))
            output = block.multiply_inverse(
                (array_ops.constant(vector[0]), array_ops.constant(vector[1])))

            output = sess.run(output)
            self.assertAllClose([0.136455, 0.27291], output[0][0])
            self.assertAllClose([0.27291, 0.409365], output[1])
Exemplo n.º 3
0
    def testMultiplyInverseNotTupleWithBias(self):
        with ops.Graph().as_default(), self.test_session() as sess:
            random_seed.set_random_seed(200)
            params = [random_ops.random_normal((2, 2, 2, 2))]
            inputs = random_ops.random_normal((2, 2, 2, 2))
            outputs = random_ops.random_normal((2, 2, 2, 2))
            block = fb.ConvKFCBasicFB(lc.LayerCollection(),
                                      params=params,
                                      padding='SAME')
            block.register_additional_tower(inputs, outputs)
            self.assertTrue(block._has_bias)
            grads = outputs**2
            block.instantiate_factors(((grads, ), ), 0.5)
            block._input_factor.instantiate_cov_variables()
            block._output_factor.instantiate_cov_variables()
            block.register_inverse()
            block._input_factor.instantiate_inv_variables()
            block._output_factor.instantiate_inv_variables()

            # Make sure our inverse is something other than the identity.
            sess.run(tf_variables.global_variables_initializer())
            sess.run(block._input_factor.make_inverse_update_ops())
            sess.run(block._output_factor.make_inverse_update_ops())

            vector = np.arange(1, 19).reshape(9, 2).astype(np.float32)
            output = block.multiply_inverse(array_ops.constant(vector))

            self.assertAllClose([0.136455, 0.27291], sess.run(output)[0])
Exemplo n.º 4
0
  def register_convolution(self,
                           params,
                           inputs,
                           outputs,
                           padding,
                           strides=None,
                           dilation_rate=None,
                           data_format=None,
                           approx=None,
                           reuse=VARIABLE_SCOPE):
    """Register a call to tf.nn.convolution().

    Args:
      params: Tensor or 2-tuple of Tensors corresponding to weight and bias of
        this layer. Weight matrix should have shape [..filter_spatial_size..,
        in_channels, out_channels].  Bias should have shape [out_channels].
      inputs: Tensor of shape [batch_size, ..input_spatial_size.., in_channels].
        Inputs to layer.
      outputs: Tensor of shape [batch_size, ..output_spatial_size..,
        out_channels].  Output produced by layer.
      padding: string. see tf.nn.conv2d for valid values.
      strides: List of ints of length len(..input_spatial_size..). Strides for
        convolution kernel in spatial dimensions.
      dilation_rate: List of ints of length len(..input_spatial_size..).
        Dilations along spatial dimension.
      data_format: str or None. Format of data.
      approx: str or None. If not None must be one of "kron" or "diagonal".
        The Fisher approximation to use. If None the default value is used.
        (Default: None)
      reuse: bool or str.  If True, reuse an existing FisherBlock. If False,
        create a new FisherBlock.  If "VARIABLE_SCOPE", use
        tf.get_variable_scope().reuse.

    Raises:
      ValueError: For improper value to 'approx'.
      KeyError: If reuse == True but no FisherBlock found for 'params'.
      ValueError: If reuse == True and FisherBlock found but of the wrong type.
    """
    # TODO(b/74793309): Have this use _get_block_type like the other
    # registration functions?
    assert approx is None or approx == APPROX_KRONECKER_NAME

    block = self.register_block(
        params,
        fb.ConvKFCBasicFB(
            layer_collection=self,
            params=params,
            padding=padding,
            strides=strides,
            dilation_rate=dilation_rate,
            data_format=data_format),
        reuse=reuse)
    block.register_additional_minibatch(inputs, outputs)

    self._add_uses(params, 1)
Exemplo n.º 5
0
  def register_conv2d(self, params, strides, padding, inputs, outputs,
                      approx=APPROX_KRONECKER_NAME):

    if approx == APPROX_KRONECKER_NAME:
      self.register_block(params,
                          fb.ConvKFCBasicFB(self, params, inputs, outputs,
                                            strides, padding))
    elif approx == APPROX_DIAGONAL_NAME:
      self.register_block(params,
                          fb.ConvDiagonalFB(self, params, inputs, outputs,
                                            strides, padding))
    def _testConvKFCBasicFBInitParams(self, params):
        with ops.Graph().as_default():
            random_seed.set_random_seed(200)
            if isinstance(params, (list, tuple)):
                params = [array_ops.constant(param) for param in params]
            else:
                params = array_ops.constant(params)
            inputs = random_ops.random_normal((2, 2, 2))
            outputs = random_ops.random_normal((2, 2, 2))
            block = fb.ConvKFCBasicFB(lc.LayerCollection(), params, inputs,
                                      outputs, [1, 1, 1], 'SAME')

            self.assertAllEqual(outputs, block.tensors_to_compute_grads())