Exemplo n.º 1
0
 def testConfigWithLearningRateDecay(self):
   with test_utils.use_gpu():
     var0 = tf.Variable([[1.0], [2.0]], dtype=tf.float32)
     for decay_schedule in [
         learning_rate_schedule.InverseTimeDecay(
             0.5, decay_steps=1.0, decay_rate=0.1),
         learning_rate_schedule.PiecewiseConstantDecay(
             [5], [1., .5])
     ]:
       step = 10
       opt = gradient_descent.SGD(decay_schedule)
       config = opt.get_config()
       opt2 = gradient_descent.SGD.from_config(config)
       # assert both are equal float values.
       self.assertAllEqual(
           decay_schedule(step),
           opt._get_hyper('learning_rate')(step))
       self.assertAllEqual(
           decay_schedule(step),
           opt2._get_hyper('learning_rate')(step))
       loss = lambda: 3 * var0
       # learning rate variable is created when calling minimize.
       opt.minimize(loss, [var0])
       self.evaluate(tf.compat.v1.global_variables_initializer())
       config = opt.get_config()
       opt3 = gradient_descent.SGD.from_config(config)
       self.assertAllEqual(
           self.evaluate(opt._get_hyper('learning_rate')(step)),
           opt3._get_hyper('learning_rate')(step))
Exemplo n.º 2
0
 def testNoGradientsForAnyVariables_ApplyGradients(self):
   for dtype in _DATA_TYPES:
     with test_utils.use_gpu():
       var0 = tf.Variable([1.0, 2.0], dtype=dtype)
       var1 = tf.Variable([3.0, 4.0], dtype=dtype)
       sgd_op = gradient_descent.SGD(3.0)
       with self.assertRaisesRegex(ValueError,
                                   'No gradients provided for any variable'):
         sgd_op.apply_gradients([(None, var0), (None, var1)])
Exemplo n.º 3
0
 def testGradClipNorm(self):
   with test_utils.use_gpu():
     var = tf.Variable([1.0])
     loss = lambda: 3 * var
     opt = gradient_descent.SGD(learning_rate=1.0, clipnorm=1.0)
     opt_op = opt.minimize(loss, [var])
     self.evaluate(tf.compat.v1.global_variables_initializer())
     self.evaluate(opt_op)
     self.assertAllClose([0.], self.evaluate(var))
Exemplo n.º 4
0
  def testNumericEquivalenceForNesterovMomentum(self):
    if tf.executing_eagerly():
      self.skipTest(
          'v1 optimizer does not run in eager mode')
    np.random.seed(1331)
    with test_utils.use_gpu():
      train_samples = 20
      input_dim = 3
      num_classes = 2
      (x, y), _ = test_utils.get_test_data(
          train_samples=train_samples,
          test_samples=10,
          input_shape=(input_dim,),
          num_classes=num_classes)
      y = np_utils.to_categorical(y)

      num_hidden = 5
      model_k_v1 = test_utils.get_small_sequential_mlp(
          num_hidden=num_hidden, num_classes=num_classes, input_dim=input_dim)
      model_k_v2 = test_utils.get_small_sequential_mlp(
          num_hidden=num_hidden, num_classes=num_classes, input_dim=input_dim)
      model_k_v2.set_weights(model_k_v1.get_weights())
      model_tf = test_utils.get_small_sequential_mlp(
          num_hidden=num_hidden, num_classes=num_classes, input_dim=input_dim)
      model_tf.set_weights(model_k_v2.get_weights())

      opt_k_v1 = optimizer_v1.SGD(momentum=0.9, nesterov=True)
      opt_k_v2 = gradient_descent.SGD(momentum=0.9, nesterov=True)
      opt_tf = tf.compat.v1.train.MomentumOptimizer(
          learning_rate=0.01, momentum=0.9, use_nesterov=True)

      model_k_v1.compile(
          opt_k_v1,
          loss='categorical_crossentropy',
          metrics=[],
          run_eagerly=test_utils.should_run_eagerly())
      model_k_v2.compile(
          opt_k_v2,
          loss='categorical_crossentropy',
          metrics=[],
          run_eagerly=test_utils.should_run_eagerly())
      model_tf.compile(
          opt_tf,
          loss='categorical_crossentropy',
          metrics=[],
          run_eagerly=test_utils.should_run_eagerly())

      hist_k_v1 = model_k_v1.fit(x, y, batch_size=5, epochs=10, shuffle=False)
      hist_k_v2 = model_k_v2.fit(x, y, batch_size=5, epochs=10, shuffle=False)
      hist_tf = model_tf.fit(x, y, batch_size=5, epochs=10, shuffle=False)

      self.assertAllClose(model_k_v1.get_weights(), model_tf.get_weights())
      self.assertAllClose(model_k_v1.get_weights(), model_k_v2.get_weights())
      self.assertAllClose(opt_k_v1.get_weights(), opt_k_v2.get_weights())
      self.assertAllClose(hist_k_v1.history['loss'], hist_tf.history['loss'])
      self.assertAllClose(hist_k_v1.history['loss'], hist_k_v2.history['loss'])
Exemplo n.º 5
0
 def testNoGradients(self):
   for dtype in _DATA_TYPES:
     with test_utils.use_gpu():
       var0 = tf.Variable([1.0, 2.0], dtype=dtype)
       var1 = tf.Variable([3.0, 4.0], dtype=dtype)
       loss = lambda: 5 * var0  # pylint: disable=cell-var-from-loop
       sgd_op = gradient_descent.SGD(3.0)
       with self.assertRaisesRegex(ValueError, 'No gradients'):
         # var1 has no gradient
         sgd_op.minimize(loss, var_list=[var1])
Exemplo n.º 6
0
  def testNoGradientsForAnyVariables_Minimize(self):
    for dtype in _DATA_TYPES:
      with test_utils.use_gpu():
        var0 = tf.Variable([1.0, 2.0], dtype=dtype)
        var1 = tf.Variable([3.0, 4.0], dtype=dtype)
        loss = lambda: tf.constant(5.0)

        sgd_op = gradient_descent.SGD(3.0)
        with self.assertRaisesRegex(ValueError,
                                    'No gradients provided for any variable'):
          sgd_op.minimize(loss, var_list=[var0, var1])
Exemplo n.º 7
0
    def _run_test(self, kwargs, expected_output_shape):
        num_samples = 2
        stack_size = 3
        num_col = 6

        with test_utils.use_gpu():
            test_utils.layer_test(keras.layers.Conv1DTranspose,
                                  kwargs=kwargs,
                                  input_shape=(num_samples, num_col,
                                               stack_size),
                                  expected_output_shape=expected_output_shape)
Exemplo n.º 8
0
    def test_group_conv_depthwise(self):
        if tf.test.is_gpu_available(cuda_only=True):
            with test_utils.use_gpu():
                inputs = tf.random.uniform(shape=(3, 27, 27, 32))

                layer = keras.layers.Conv2D(32, 3, groups=32, use_bias=False)
                layer.build((3, 27, 27, 32))

                weights_dw = tf.reshape(layer.kernel, [3, 3, 32, 1])
                expected_outputs = tf.compat.v1.nn.depthwise_conv2d(
                    inputs, weights_dw, strides=[1, 1, 1, 1], padding='VALID')

                self.assertAllClose(layer(inputs), expected_outputs, rtol=1e-5)
Exemplo n.º 9
0
 def testGradGlobalClipNorm(self):
   with test_utils.use_gpu():
     # l2 norm is 5.0
     var1 = tf.Variable([1.0])
     var2 = tf.Variable([2.0])
     loss = lambda: 3 * var1 + 4 * var2
     opt = gradient_descent.SGD(learning_rate=1.0, global_clipnorm=2.0)
     opt_op = opt.minimize(loss, [var1, var2])
     self.evaluate(tf.compat.v1.global_variables_initializer())
     self.evaluate(opt_op)
     # grad1 = 3.0 * 2.0 / 5.0 = 1.2
     self.assertAllClose([-.2], self.evaluate(var1))
     # grad2 = 4.0 * 2.0 / 5.0 = 1.6
     self.assertAllClose([.4], self.evaluate(var2))
Exemplo n.º 10
0
  def testComputeGradientsWithTensors(self):
    with test_utils.use_gpu():
      x = tf.convert_to_tensor(1.0)

      def f():
        return x * x

      sgd = gradient_descent.SGD(3.0)
      grads_and_vars = sgd._compute_gradients(f, [x])
      self.assertLen(grads_and_vars, 1)
      grad, x_as_var = grads_and_vars[0]
      self.assertIs(x, x_as_var)
      self.assertEqual(2.0, self.evaluate(grad))

      with self.assertRaises(NotImplementedError):
        sgd.apply_gradients(grads_and_vars)
Exemplo n.º 11
0
  def _testOptimizersCompatibility(self, opt_v1, opt_v2, test_weights=True):
    if tf.executing_eagerly():
      self.skipTest(
          'v1 optimizer does not run in eager mode')
    np.random.seed(1331)
    with test_utils.use_gpu():
      train_samples = 20
      input_dim = 3
      num_classes = 2
      (x, y), _ = test_utils.get_test_data(
          train_samples=train_samples,
          test_samples=10,
          input_shape=(input_dim,),
          num_classes=num_classes)
      y = np_utils.to_categorical(y)

      num_hidden = 5
      model_v1 = test_utils.get_small_sequential_mlp(
          num_hidden=num_hidden, num_classes=num_classes, input_dim=input_dim)
      model_v1.compile(
          opt_v1,
          loss='categorical_crossentropy',
          metrics=[],
          run_eagerly=test_utils.should_run_eagerly())
      model_v1.fit(x, y, batch_size=5, epochs=1)

      model_v2 = test_utils.get_small_sequential_mlp(
          num_hidden=num_hidden, num_classes=num_classes, input_dim=input_dim)
      model_v2.set_weights(model_v1.get_weights())
      model_v2.compile(
          opt_v2,
          loss='categorical_crossentropy',
          metrics=[],
          run_eagerly=test_utils.should_run_eagerly())
      if not tf.compat.v1.executing_eagerly_outside_functions():
        model_v2._make_train_function()
      if test_weights:
        opt_v2.set_weights(opt_v1.get_weights())

      hist_1 = model_v1.fit(x, y, batch_size=5, epochs=1, shuffle=False)
      hist_2 = model_v2.fit(x, y, batch_size=5, epochs=1, shuffle=False)
      self.assertAllClose(model_v1.get_weights(), model_v2.get_weights(),
                          rtol=1e-5, atol=1e-5)
      self.assertAllClose(hist_1.history['loss'], hist_2.history['loss'],
                          rtol=1e-5, atol=1e-5)
Exemplo n.º 12
0
 def testConfig(self):
   with test_utils.use_gpu():
     opt = gradient_descent.SGD(learning_rate=1.0)
     config = opt.get_config()
     opt2 = gradient_descent.SGD.from_config(config)
     lr = opt._get_hyper('learning_rate')
     lr2 = opt2._get_hyper('learning_rate')
     self.evaluate(tf.compat.v1.global_variables_initializer())
     # assert both are equal float values.
     self.assertEqual(self.evaluate(lr), self.evaluate(lr2))
     var0 = tf.Variable([[1.0], [2.0]], dtype=tf.float32)
     loss = lambda: 3 * var0
     # learning rate variable created when calling minimize.
     opt.minimize(loss, [var0])
     opt3 = gradient_descent.SGD.from_config(config)
     lr3 = opt3._get_hyper('learning_rate')
     self.evaluate(tf.compat.v1.global_variables_initializer())
     self.assertEqual(self.evaluate(lr), self.evaluate(lr3))
Exemplo n.º 13
0
  def testBasic(self):
    for dtype in _DATA_TYPES:
      with test_utils.use_gpu():
        var0 = tf.Variable([1.0, 2.0], dtype=dtype)
        var1 = tf.Variable([3.0, 4.0], dtype=dtype)
        loss = lambda: 5 * var0 + 3 * var1  # pylint: disable=cell-var-from-loop
        sgd = gradient_descent.SGD(3.0)

        self.evaluate(tf.compat.v1.global_variables_initializer())
        # Fetch params to validate initial values
        self.assertAllClose([1.0, 2.0], self.evaluate(var0))
        self.assertAllClose([3.0, 4.0], self.evaluate(var1))
        # Run 1 step of sgd through optimizer
        opt_op = sgd.minimize(loss, var_list=[var0, var1])
        self.evaluate(tf.compat.v1.global_variables_initializer())
        self.evaluate(opt_op)
        # Validate updated params
        self.assertAllClose([-14., -13.], self.evaluate(var0))
        self.assertAllClose([-6., -5.], self.evaluate(var1))
Exemplo n.º 14
0
  def testWeights(self):
    with test_utils.use_gpu():
      opt1 = adam.Adam(learning_rate=1.0)
      var1 = tf.Variable([1.0, 2.0], dtype=tf.float32)
      loss1 = lambda: 3 * var1
      opt_op_1 = opt1.minimize(loss1, [var1])
      self.evaluate(tf.compat.v1.global_variables_initializer())
      config = opt1.get_config()
      opt2 = adam.Adam.from_config(config)
      var2 = tf.Variable([1.0, 2.0], dtype=tf.float32)
      loss2 = lambda: 3 * var2
      opt_op_2 = opt2.minimize(loss2, [var2])
      weights = opt1.get_weights()

      # Assert set_weights and both variables get updated to same value.
      self.evaluate(tf.compat.v1.global_variables_initializer())
      opt2.set_weights(weights)
      self.evaluate([opt_op_1, opt_op_2])
      self.assertAllClose(self.evaluate(var1), self.evaluate(var2))
      self.assertEqual(1, self.evaluate(opt1.iterations))
      self.assertEqual(1, self.evaluate(opt2.iterations))

      var3 = tf.Variable([1.0, 2.0, 3.0], dtype=tf.float32)
      var4 = tf.Variable([4.0, 5.0, 6.0], dtype=tf.float32)
      loss3 = lambda: 3 * var3 + 5 * var4
      opt_op_3 = opt1.minimize(loss3, [var3, var4])

      # Assert set_weights with ValueError since weight list does not match.
      self.evaluate(tf.compat.v1.global_variables_initializer())
      weights = opt1.get_weights()
      with self.assertRaisesRegex(ValueError, 'but the optimizer was'):
        opt2.set_weights(weights)

      # Assert set_weights and variables get updated to same value.
      var5 = tf.Variable([1.0, 2.0, 3.0], dtype=tf.float32)
      var6 = tf.Variable([4.0, 5.0, 6.0], dtype=tf.float32)
      loss4 = lambda: 3 * var5 + 5 * var6
      opt_op_4 = opt2.minimize(loss4, [var5, var6])
      self.evaluate(tf.compat.v1.global_variables_initializer())
      opt2.set_weights(weights)
      self.evaluate([opt_op_3, opt_op_4])
      self.assertAllClose(
          self.evaluate([var3, var4]), self.evaluate([var5, var6]))
Exemplo n.º 15
0
    def test_group_conv(self, layer_cls, input_shape):
        if tf.test.is_gpu_available(cuda_only=True):
            with test_utils.use_gpu():
                inputs = tf.random.uniform(shape=input_shape)

                layer = layer_cls(16, 3, groups=4, use_bias=False)
                layer.build(input_shape)

                input_slices = tf.split(inputs, 4, axis=-1)
                weight_slices = tf.split(layer.kernel, 4, axis=-1)
                expected_outputs = tf.concat([
                    tf.nn.convolution(inputs, weights)
                    for inputs, weights in zip(input_slices, weight_slices)
                ],
                                             axis=-1)
                self.assertAllClose(layer(inputs),
                                    expected_outputs,
                                    rtol=3e-5,
                                    atol=3e-5)
Exemplo n.º 16
0
  def testConstraint(self):
    constraint_01 = lambda x: tf.clip_by_value(x, -0.1, 0.)
    constraint_0 = lambda x: tf.clip_by_value(x, 0., 1.)
    with test_utils.use_gpu():
      var0 = tf.Variable([1.0, 2.0], constraint=constraint_01)
      var1 = tf.Variable([3.0, 4.0], constraint=constraint_0)
      loss = lambda: 5 * var0 + 3 * var1
      sgd = gradient_descent.SGD(3.0)

      self.evaluate(tf.compat.v1.global_variables_initializer())
      # Fetch params to validate initial values
      self.assertAllClose([1.0, 2.0], self.evaluate(var0))
      self.assertAllClose([3.0, 4.0], self.evaluate(var1))
      # Run 1 step of sgd through optimizer
      opt_op = sgd.minimize(loss, var_list=[var0, var1])
      self.evaluate(tf.compat.v1.global_variables_initializer())
      self.evaluate(opt_op)
      # Validate updated params
      self.assertAllClose([-0.1, -0.1], self.evaluate(var0))
      self.assertAllClose([0., 0.], self.evaluate(var1))
Exemplo n.º 17
0
  def testPrecomputedGradient(self):
    for dtype in _DATA_TYPES:
      with test_utils.use_gpu():
        var0 = tf.Variable([1.0, 2.0], dtype=dtype)
        var1 = tf.Variable([3.0, 4.0], dtype=dtype)
        loss = lambda: 5 * var0 + 3 * var1  # pylint: disable=cell-var-from-loop
        grad_loss = tf.constant([42, -42], dtype=dtype)
        sgd = gradient_descent.SGD(3.0)

        self.evaluate(tf.compat.v1.global_variables_initializer())
        # Fetch params to validate initial values
        self.assertAllClose([1.0, 2.0], self.evaluate(var0))
        self.assertAllClose([3.0, 4.0], self.evaluate(var1))
        # Run 1 step of sgd through optimizer
        opt_op = sgd.minimize(loss, var_list=[var0, var1], grad_loss=grad_loss)
        self.evaluate(tf.compat.v1.global_variables_initializer())
        self.evaluate(opt_op)
        # Validate updated params
        self.assertAllClose([1.0 - 3 * 5 * 42.0, 2.0 - 3 * 5 * (-42.0)],
                            self.evaluate(var0))
        self.assertAllClose([3.0 - 3 * 3 * 42.0, 4.0 - 3 * 3 * (-42.0)],
                            self.evaluate(var1))
Exemplo n.º 18
0
  def testGradientsAsVariables(self):
    for i, dtype in enumerate(_DATA_TYPES):
      with test_utils.use_gpu():
        var0 = tf.Variable([1.0, 2.0], dtype=dtype)
        var1 = tf.Variable([3.0, 4.0], dtype=dtype)
        loss = lambda: 5 * var0 + 3 * var1  # pylint: disable=cell-var-from-loop

        sgd = gradient_descent.SGD(3.0)
        grads_and_vars = sgd._compute_gradients(loss, [var0, var1])
        # Convert gradients to tf.Variables
        converted_grads = [
            tf.Variable(
                tf.zeros([2], dtype), name='c_%d_%d' % (i, j))
            for j, gv in enumerate(grads_and_vars)
        ]
        convert_ops = [
            tf.compat.v1.assign(converted_grads[j], gv[0])
            for j, gv in enumerate(grads_and_vars)
        ]

        # Run convert_ops to achieve the gradients converting
        self.evaluate(tf.compat.v1.global_variables_initializer())
        self.evaluate(convert_ops)
        # Fetch params to validate initial values
        self.assertAllClose([1.0, 2.0], self.evaluate(var0))
        self.assertAllClose([3.0, 4.0], self.evaluate(var1))

        # Run 1 step of sgd through optimizer
        converted_grads_and_vars = list(zip(converted_grads, [var0, var1]))
        opt_op = sgd.apply_gradients(converted_grads_and_vars)
        self.evaluate(tf.compat.v1.global_variables_initializer())
        self.evaluate(convert_ops)
        self.evaluate(opt_op)

        # Validate updated params
        self.assertAllClose([-14., -13.], self.evaluate(var0))
        self.assertAllClose([-6., -5.], self.evaluate(var1))
Exemplo n.º 19
0
    def testSparse(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        for (
            dtype,
            learning_rate,
            rho,
            momentum,
            epsilon,
            centered,
        ) in _TESTPARAMS:
            with tf.compat.v1.get_default_graph().as_default(), test_utils.use_gpu():  # noqa: E501
                # Initialize variables for numpy implementation.
                var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
                grads0_np = np.array([0.1], dtype=dtype.as_numpy_dtype)
                var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
                grads1_np = np.array([0.01], dtype=dtype.as_numpy_dtype)

                var0 = tf.Variable(var0_np)
                var1 = tf.Variable(var1_np)
                grads0_np_indices = np.array([0], dtype=np.int32)
                grads0 = tf.IndexedSlices(
                    tf.constant(grads0_np),
                    tf.constant(grads0_np_indices),
                    tf.constant([1]),
                )
                grads1_np_indices = np.array([1], dtype=np.int32)
                grads1 = tf.IndexedSlices(
                    tf.constant(grads1_np),
                    tf.constant(grads1_np_indices),
                    tf.constant([1]),
                )
                opt = rmsprop.RMSprop(
                    learning_rate=learning_rate,
                    rho=rho,
                    momentum=momentum,
                    epsilon=epsilon,
                    centered=centered,
                )
                update = opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1])
                )
                self.evaluate(tf.compat.v1.global_variables_initializer())

                if centered:
                    mg0 = opt.get_slot(var0, "mg")
                    self.assertEqual(mg0 is not None, centered)
                    mg1 = opt.get_slot(var1, "mg")
                    self.assertEqual(mg1 is not None, centered)
                else:
                    mg0 = None
                    mg1 = None
                rms0 = opt.get_slot(var0, "rms")
                self.assertIsNotNone(rms0)
                rms1 = opt.get_slot(var1, "rms")
                self.assertIsNotNone(rms1)
                if momentum > 0.0:
                    mom0 = opt.get_slot(var0, "momentum")
                    mom1 = opt.get_slot(var1, "momentum")
                else:
                    mom0 = None
                    mom1 = None

                mg0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
                mg1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
                rms0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
                rms1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
                mom0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
                mom1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)

                # Fetch params to validate initial values
                self.assertAllClose([1.0, 2.0], self.evaluate(var0))
                self.assertAllClose([3.0, 4.0], self.evaluate(var1))

                # Run 3 steps of RMSprop
                for _ in range(1, 4):
                    self.evaluate(update)

                    (
                        var0_np,
                        mg0_np,
                        rms0_np,
                        mom0_np,
                    ) = self._sparse_rmsprop_update_numpy(
                        var0_np,
                        grads0_np_indices,
                        grads0_np,
                        mg0_np,
                        rms0_np,
                        mom0_np,
                        learning_rate,
                        rho,
                        momentum,
                        epsilon,
                        centered,
                    )
                    (
                        var1_np,
                        mg1_np,
                        rms1_np,
                        mom1_np,
                    ) = self._sparse_rmsprop_update_numpy(
                        var1_np,
                        grads1_np_indices,
                        grads1_np,
                        mg1_np,
                        rms1_np,
                        mom1_np,
                        learning_rate,
                        rho,
                        momentum,
                        epsilon,
                        centered,
                    )

                    # Validate updated params
                    if centered:
                        self.assertAllCloseAccordingToType(
                            mg0_np, self.evaluate(mg0)
                        )
                        self.assertAllCloseAccordingToType(
                            mg1_np, self.evaluate(mg1)
                        )
                    self.assertAllCloseAccordingToType(
                        rms0_np, self.evaluate(rms0)
                    )
                    self.assertAllCloseAccordingToType(
                        rms1_np, self.evaluate(rms1)
                    )
                    if momentum > 0.0:
                        self.assertAllCloseAccordingToType(
                            mom0_np, self.evaluate(mom0)
                        )
                        self.assertAllCloseAccordingToType(
                            mom1_np, self.evaluate(mom1)
                        )
                    self.assertAllCloseAccordingToType(
                        var0_np, self.evaluate(var0)
                    )
                    self.assertAllCloseAccordingToType(
                        var1_np, self.evaluate(var1)
                    )
Exemplo n.º 20
0
 def testIterationWithoutMinimize(self):
   with test_utils.use_gpu():
     sgd = gradient_descent.SGD(3.0)
     self.evaluate(sgd.iterations.initializer)
     self.assertEqual(0, self.evaluate(sgd.iterations))
Exemplo n.º 21
0
    def test_model_with_crossentropy_losses_channels_first(self):
        """Tests use of all crossentropy losses with `channels_first`.

        Tests `sparse_categorical_crossentropy`, `categorical_crossentropy`,
        and `binary_crossentropy`.
        Verifies that evaluate gives the same result with either `channels_first`
        or `channels_last` image_data_format.
        """
        def prepare_simple_model(input_tensor, loss_name, target):
            axis = 1 if backend.image_data_format() == "channels_first" else -1
            loss = None
            num_channels = None
            activation = None
            if loss_name == "sparse_categorical_crossentropy":
                loss = lambda y_true, y_pred: backend.sparse_categorical_crossentropy(  # pylint: disable=g-long-lambda
                    y_true,
                    y_pred,
                    axis=axis)
                num_channels = int(np.amax(target) + 1)
                activation = "softmax"
            elif loss_name == "categorical_crossentropy":
                loss = lambda y_true, y_pred: backend.categorical_crossentropy(  # pylint: disable=g-long-lambda
                    y_true,
                    y_pred,
                    axis=axis)
                num_channels = target.shape[axis]
                activation = "softmax"
            elif loss_name == "binary_crossentropy":
                loss = lambda y_true, y_pred: backend.binary_crossentropy(  # pylint: disable=g-long-lambda, unnecessary-lambda
                    y_true, y_pred)
                num_channels = target.shape[axis]
                activation = "sigmoid"

            predictions = Conv2D(
                num_channels,
                1,
                activation=activation,
                kernel_initializer="ones",
                bias_initializer="ones",
            )(input_tensor)
            simple_model = training.Model(inputs=input_tensor,
                                          outputs=predictions)
            simple_model.compile(optimizer="rmsprop", loss=loss)
            return simple_model

        if tf.test.is_gpu_available(cuda_only=True):
            with test_utils.use_gpu():
                losses_to_test = [
                    "sparse_categorical_crossentropy",
                    "categorical_crossentropy",
                    "binary_crossentropy",
                ]

                data_channels_first = np.array(
                    [[[[8.0, 7.1, 0.0], [4.5, 2.6, 0.55], [0.9, 4.2, 11.2]]]],
                    dtype=np.float32,
                )
                # Labels for testing 4-class sparse_categorical_crossentropy, 4-class
                # categorical_crossentropy, and 2-class binary_crossentropy:
                labels_channels_first = [
                    np.array(
                        [[[[0, 1, 3], [2, 1, 0], [2, 2, 1]]]], dtype=np.float32
                    ),  # pylint: disable=line-too-long
                    np.array(
                        [
                            [
                                [[0, 1, 0], [0, 1, 0], [0, 0, 0]],
                                [[1, 0, 0], [0, 0, 1], [0, 1, 0]],
                                [[0, 0, 0], [1, 0, 0], [0, 0, 1]],
                                [[0, 0, 1], [0, 0, 0], [1, 0, 0]],
                            ]
                        ],
                        dtype=np.float32,
                    ),  # pylint: disable=line-too-long
                    np.array(
                        [
                            [
                                [[0, 1, 0], [0, 1, 0], [0, 0, 1]],
                                [[1, 0, 1], [1, 0, 1], [1, 1, 0]],
                            ]
                        ],
                        dtype=np.float32,
                    ),
                ]  # pylint: disable=line-too-long
                # Compute one loss for each loss function in the list `losses_to_test`:
                loss_channels_last = [0.0, 0.0, 0.0]
                loss_channels_first = [0.0, 0.0, 0.0]

                old_data_format = backend.image_data_format()

                # Evaluate a simple network with channels last, with all three loss
                # functions:
                backend.set_image_data_format("channels_last")
                data = np.moveaxis(data_channels_first, 1, -1)
                for index, loss_function in enumerate(losses_to_test):
                    labels = np.moveaxis(labels_channels_first[index], 1, -1)
                    inputs = input_layer.Input(shape=(3, 3, 1))
                    model = prepare_simple_model(inputs, loss_function, labels)
                    loss_channels_last[index] = model.evaluate(x=data,
                                                               y=labels,
                                                               batch_size=1,
                                                               verbose=0)

                # Evaluate the same network with channels first, with all three loss
                # functions:
                backend.set_image_data_format("channels_first")
                data = data_channels_first
                for index, loss_function in enumerate(losses_to_test):
                    labels = labels_channels_first[index]
                    inputs = input_layer.Input(shape=(1, 3, 3))
                    model = prepare_simple_model(inputs, loss_function, labels)
                    loss_channels_first[index] = model.evaluate(x=data,
                                                                y=labels,
                                                                batch_size=1,
                                                                verbose=0)

                backend.set_image_data_format(old_data_format)

                np.testing.assert_allclose(
                    loss_channels_first,
                    loss_channels_last,
                    rtol=1e-06,
                    err_msg="{}{}".format(
                        "Computed different losses for ",
                        "channels_first and channels_last",
                    ),
                )