Exemplo n.º 1
0
 def testConfigWithLearningRateDecay(self):
   with testing_utils.use_gpu():
     var0 = tf.Variable([[1.0], [2.0]], dtype=tf.float32)
     for decay_schedule in [
         learning_rate_schedule.InverseTimeDecay(
             0.5, decay_steps=1.0, decay_rate=0.1),
         learning_rate_schedule.PiecewiseConstantDecay(
             [5], [1., .5])
     ]:
       step = 10
       opt = gradient_descent.SGD(decay_schedule)
       config = opt.get_config()
       opt2 = gradient_descent.SGD.from_config(config)
       # assert both are equal float values.
       self.assertAllEqual(
           decay_schedule(step),
           opt._get_hyper('learning_rate')(step))
       self.assertAllEqual(
           decay_schedule(step),
           opt2._get_hyper('learning_rate')(step))
       loss = lambda: 3 * var0
       # learning rate variable is created when calling minimize.
       opt.minimize(loss, [var0])
       self.evaluate(tf.compat.v1.global_variables_initializer())
       config = opt.get_config()
       opt3 = gradient_descent.SGD.from_config(config)
       self.assertAllEqual(
           self.evaluate(opt._get_hyper('learning_rate')(step)),
           opt3._get_hyper('learning_rate')(step))
Exemplo n.º 2
0
 def testNoGradientsForAnyVariables_ApplyGradients(self):
   for dtype in _DATA_TYPES:
     with testing_utils.use_gpu():
       var0 = tf.Variable([1.0, 2.0], dtype=dtype)
       var1 = tf.Variable([3.0, 4.0], dtype=dtype)
       sgd_op = gradient_descent.SGD(3.0)
       with self.assertRaisesRegex(ValueError,
                                   'No gradients provided for any variable'):
         sgd_op.apply_gradients([(None, var0), (None, var1)])
Exemplo n.º 3
0
 def testGradClipNorm(self):
   with testing_utils.use_gpu():
     var = tf.Variable([1.0])
     loss = lambda: 3 * var
     opt = gradient_descent.SGD(learning_rate=1.0, clipnorm=1.0)
     opt_op = opt.minimize(loss, [var])
     self.evaluate(tf.compat.v1.global_variables_initializer())
     self.evaluate(opt_op)
     self.assertAllClose([0.], self.evaluate(var))
Exemplo n.º 4
0
  def testNumericEquivalenceForNesterovMomentum(self):
    if tf.executing_eagerly():
      self.skipTest(
          'v1 optimizer does not run in eager mode')
    np.random.seed(1331)
    with testing_utils.use_gpu():
      train_samples = 20
      input_dim = 3
      num_classes = 2
      (x, y), _ = testing_utils.get_test_data(
          train_samples=train_samples,
          test_samples=10,
          input_shape=(input_dim,),
          num_classes=num_classes)
      y = np_utils.to_categorical(y)

      num_hidden = 5
      model_k_v1 = testing_utils.get_small_sequential_mlp(
          num_hidden=num_hidden, num_classes=num_classes, input_dim=input_dim)
      model_k_v2 = testing_utils.get_small_sequential_mlp(
          num_hidden=num_hidden, num_classes=num_classes, input_dim=input_dim)
      model_k_v2.set_weights(model_k_v1.get_weights())
      model_tf = testing_utils.get_small_sequential_mlp(
          num_hidden=num_hidden, num_classes=num_classes, input_dim=input_dim)
      model_tf.set_weights(model_k_v2.get_weights())

      opt_k_v1 = optimizer_v1.SGD(momentum=0.9, nesterov=True)
      opt_k_v2 = gradient_descent.SGD(momentum=0.9, nesterov=True)
      opt_tf = tf.compat.v1.train.MomentumOptimizer(
          learning_rate=0.01, momentum=0.9, use_nesterov=True)

      model_k_v1.compile(
          opt_k_v1,
          loss='categorical_crossentropy',
          metrics=[],
          run_eagerly=testing_utils.should_run_eagerly())
      model_k_v2.compile(
          opt_k_v2,
          loss='categorical_crossentropy',
          metrics=[],
          run_eagerly=testing_utils.should_run_eagerly())
      model_tf.compile(
          opt_tf,
          loss='categorical_crossentropy',
          metrics=[],
          run_eagerly=testing_utils.should_run_eagerly())

      hist_k_v1 = model_k_v1.fit(x, y, batch_size=5, epochs=10, shuffle=False)
      hist_k_v2 = model_k_v2.fit(x, y, batch_size=5, epochs=10, shuffle=False)
      hist_tf = model_tf.fit(x, y, batch_size=5, epochs=10, shuffle=False)

      self.assertAllClose(model_k_v1.get_weights(), model_tf.get_weights())
      self.assertAllClose(model_k_v1.get_weights(), model_k_v2.get_weights())
      self.assertAllClose(opt_k_v1.get_weights(), opt_k_v2.get_weights())
      self.assertAllClose(hist_k_v1.history['loss'], hist_tf.history['loss'])
      self.assertAllClose(hist_k_v1.history['loss'], hist_k_v2.history['loss'])
Exemplo n.º 5
0
 def testNoGradients(self):
   for dtype in _DATA_TYPES:
     with testing_utils.use_gpu():
       var0 = tf.Variable([1.0, 2.0], dtype=dtype)
       var1 = tf.Variable([3.0, 4.0], dtype=dtype)
       loss = lambda: 5 * var0  # pylint: disable=cell-var-from-loop
       sgd_op = gradient_descent.SGD(3.0)
       with self.assertRaisesRegex(ValueError, 'No gradients'):
         # var1 has no gradient
         sgd_op.minimize(loss, var_list=[var1])
Exemplo n.º 6
0
  def _run_test(self, kwargs, expected_output_shape):
    num_samples = 2
    stack_size = 3
    num_col = 6

    with testing_utils.use_gpu():
      testing_utils.layer_test(
          keras.layers.Conv1DTranspose,
          kwargs=kwargs,
          input_shape=(num_samples, num_col, stack_size),
          expected_output_shape=expected_output_shape)
Exemplo n.º 7
0
  def testNoGradientsForAnyVariables_Minimize(self):
    for dtype in _DATA_TYPES:
      with testing_utils.use_gpu():
        var0 = tf.Variable([1.0, 2.0], dtype=dtype)
        var1 = tf.Variable([3.0, 4.0], dtype=dtype)
        loss = lambda: tf.constant(5.0)

        sgd_op = gradient_descent.SGD(3.0)
        with self.assertRaisesRegex(ValueError,
                                    'No gradients provided for any variable'):
          sgd_op.minimize(loss, var_list=[var0, var1])
Exemplo n.º 8
0
  def test_group_conv_depthwise(self):
    if tf.test.is_gpu_available(cuda_only=True):
      with testing_utils.use_gpu():
        inputs = tf.random.uniform(shape=(3, 27, 27, 32))

        layer = keras.layers.Conv2D(32, 3, groups=32, use_bias=False)
        layer.build((3, 27, 27, 32))

        weights_dw = tf.reshape(layer.kernel, [3, 3, 32, 1])
        expected_outputs = tf.compat.v1.nn.depthwise_conv2d(
            inputs, weights_dw, strides=[1, 1, 1, 1], padding='VALID')

        self.assertAllClose(layer(inputs), expected_outputs, rtol=1e-5)
Exemplo n.º 9
0
 def testGradGlobalClipNorm(self):
   with testing_utils.use_gpu():
     # l2 norm is 5.0
     var1 = tf.Variable([1.0])
     var2 = tf.Variable([2.0])
     loss = lambda: 3 * var1 + 4 * var2
     opt = gradient_descent.SGD(learning_rate=1.0, global_clipnorm=2.0)
     opt_op = opt.minimize(loss, [var1, var2])
     self.evaluate(tf.compat.v1.global_variables_initializer())
     self.evaluate(opt_op)
     # grad1 = 3.0 * 2.0 / 5.0 = 1.2
     self.assertAllClose([-.2], self.evaluate(var1))
     # grad2 = 4.0 * 2.0 / 5.0 = 1.6
     self.assertAllClose([.4], self.evaluate(var2))
Exemplo n.º 10
0
  def testComputeGradientsWithTensors(self):
    with testing_utils.use_gpu():
      x = tf.convert_to_tensor(1.0)

      def f():
        return x * x

      sgd = gradient_descent.SGD(3.0)
      grads_and_vars = sgd._compute_gradients(f, [x])
      self.assertLen(grads_and_vars, 1)
      grad, x_as_var = grads_and_vars[0]
      self.assertIs(x, x_as_var)
      self.assertEqual(2.0, self.evaluate(grad))

      with self.assertRaises(NotImplementedError):
        sgd.apply_gradients(grads_and_vars)
Exemplo n.º 11
0
  def test_group_conv(self, layer_cls, input_shape):
    if tf.test.is_gpu_available(cuda_only=True):
      with testing_utils.use_gpu():
        inputs = tf.random.uniform(shape=input_shape)

        layer = layer_cls(16, 3, groups=4, use_bias=False)
        layer.build(input_shape)

        input_slices = tf.split(inputs, 4, axis=-1)
        weight_slices = tf.split(layer.kernel, 4, axis=-1)
        expected_outputs = tf.concat([
            tf.nn.convolution(inputs, weights)
            for inputs, weights in zip(input_slices, weight_slices)
        ],
                                            axis=-1)
        self.assertAllClose(
            layer(inputs), expected_outputs, rtol=3e-5, atol=3e-5)
Exemplo n.º 12
0
  def _testOptimizersCompatibility(self, opt_v1, opt_v2, test_weights=True):
    if tf.executing_eagerly():
      self.skipTest(
          'v1 optimizer does not run in eager mode')
    np.random.seed(1331)
    with testing_utils.use_gpu():
      train_samples = 20
      input_dim = 3
      num_classes = 2
      (x, y), _ = testing_utils.get_test_data(
          train_samples=train_samples,
          test_samples=10,
          input_shape=(input_dim,),
          num_classes=num_classes)
      y = np_utils.to_categorical(y)

      num_hidden = 5
      model_v1 = testing_utils.get_small_sequential_mlp(
          num_hidden=num_hidden, num_classes=num_classes, input_dim=input_dim)
      model_v1.compile(
          opt_v1,
          loss='categorical_crossentropy',
          metrics=[],
          run_eagerly=testing_utils.should_run_eagerly())
      model_v1.fit(x, y, batch_size=5, epochs=1)

      model_v2 = testing_utils.get_small_sequential_mlp(
          num_hidden=num_hidden, num_classes=num_classes, input_dim=input_dim)
      model_v2.set_weights(model_v1.get_weights())
      model_v2.compile(
          opt_v2,
          loss='categorical_crossentropy',
          metrics=[],
          run_eagerly=testing_utils.should_run_eagerly())
      if not tf.compat.v1.executing_eagerly_outside_functions():
        model_v2._make_train_function()
      if test_weights:
        opt_v2.set_weights(opt_v1.get_weights())

      hist_1 = model_v1.fit(x, y, batch_size=5, epochs=1, shuffle=False)
      hist_2 = model_v2.fit(x, y, batch_size=5, epochs=1, shuffle=False)
      self.assertAllClose(model_v1.get_weights(), model_v2.get_weights(),
                          rtol=1e-5, atol=1e-5)
      self.assertAllClose(hist_1.history['loss'], hist_2.history['loss'],
                          rtol=1e-5, atol=1e-5)
Exemplo n.º 13
0
 def testConfig(self):
   with testing_utils.use_gpu():
     opt = gradient_descent.SGD(learning_rate=1.0)
     config = opt.get_config()
     opt2 = gradient_descent.SGD.from_config(config)
     lr = opt._get_hyper('learning_rate')
     lr2 = opt2._get_hyper('learning_rate')
     self.evaluate(tf.compat.v1.global_variables_initializer())
     # assert both are equal float values.
     self.assertEqual(self.evaluate(lr), self.evaluate(lr2))
     var0 = tf.Variable([[1.0], [2.0]], dtype=tf.float32)
     loss = lambda: 3 * var0
     # learning rate variable created when calling minimize.
     opt.minimize(loss, [var0])
     opt3 = gradient_descent.SGD.from_config(config)
     lr3 = opt3._get_hyper('learning_rate')
     self.evaluate(tf.compat.v1.global_variables_initializer())
     self.assertEqual(self.evaluate(lr), self.evaluate(lr3))
Exemplo n.º 14
0
  def testBasic(self):
    for dtype in _DATA_TYPES:
      with testing_utils.use_gpu():
        var0 = tf.Variable([1.0, 2.0], dtype=dtype)
        var1 = tf.Variable([3.0, 4.0], dtype=dtype)
        loss = lambda: 5 * var0 + 3 * var1  # pylint: disable=cell-var-from-loop
        sgd = gradient_descent.SGD(3.0)

        self.evaluate(tf.compat.v1.global_variables_initializer())
        # Fetch params to validate initial values
        self.assertAllClose([1.0, 2.0], self.evaluate(var0))
        self.assertAllClose([3.0, 4.0], self.evaluate(var1))
        # Run 1 step of sgd through optimizer
        opt_op = sgd.minimize(loss, var_list=[var0, var1])
        self.evaluate(tf.compat.v1.global_variables_initializer())
        self.evaluate(opt_op)
        # Validate updated params
        self.assertAllClose([-14., -13.], self.evaluate(var0))
        self.assertAllClose([-6., -5.], self.evaluate(var1))
Exemplo n.º 15
0
  def testWeights(self):
    with testing_utils.use_gpu():
      opt1 = adam.Adam(learning_rate=1.0)
      var1 = tf.Variable([1.0, 2.0], dtype=tf.float32)
      loss1 = lambda: 3 * var1
      opt_op_1 = opt1.minimize(loss1, [var1])
      self.evaluate(tf.compat.v1.global_variables_initializer())
      config = opt1.get_config()
      opt2 = adam.Adam.from_config(config)
      var2 = tf.Variable([1.0, 2.0], dtype=tf.float32)
      loss2 = lambda: 3 * var2
      opt_op_2 = opt2.minimize(loss2, [var2])
      weights = opt1.get_weights()

      # Assert set_weights and both variables get updated to same value.
      self.evaluate(tf.compat.v1.global_variables_initializer())
      opt2.set_weights(weights)
      self.evaluate([opt_op_1, opt_op_2])
      self.assertAllClose(self.evaluate(var1), self.evaluate(var2))
      self.assertEqual(1, self.evaluate(opt1.iterations))
      self.assertEqual(1, self.evaluate(opt2.iterations))

      var3 = tf.Variable([1.0, 2.0, 3.0], dtype=tf.float32)
      var4 = tf.Variable([4.0, 5.0, 6.0], dtype=tf.float32)
      loss3 = lambda: 3 * var3 + 5 * var4
      opt_op_3 = opt1.minimize(loss3, [var3, var4])

      # Assert set_weights with ValueError since weight list does not match.
      self.evaluate(tf.compat.v1.global_variables_initializer())
      weights = opt1.get_weights()
      with self.assertRaisesRegex(ValueError, 'but the optimizer was'):
        opt2.set_weights(weights)

      # Assert set_weights and variables get updated to same value.
      var5 = tf.Variable([1.0, 2.0, 3.0], dtype=tf.float32)
      var6 = tf.Variable([4.0, 5.0, 6.0], dtype=tf.float32)
      loss4 = lambda: 3 * var5 + 5 * var6
      opt_op_4 = opt2.minimize(loss4, [var5, var6])
      self.evaluate(tf.compat.v1.global_variables_initializer())
      opt2.set_weights(weights)
      self.evaluate([opt_op_3, opt_op_4])
      self.assertAllClose(
          self.evaluate([var3, var4]), self.evaluate([var5, var6]))
Exemplo n.º 16
0
  def testConstraint(self):
    constraint_01 = lambda x: tf.clip_by_value(x, -0.1, 0.)
    constraint_0 = lambda x: tf.clip_by_value(x, 0., 1.)
    with testing_utils.use_gpu():
      var0 = tf.Variable([1.0, 2.0],
                                constraint=constraint_01)
      var1 = tf.Variable([3.0, 4.0],
                                constraint=constraint_0)
      loss = lambda: 5 * var0 + 3 * var1
      sgd = gradient_descent.SGD(3.0)

      self.evaluate(tf.compat.v1.global_variables_initializer())
      # Fetch params to validate initial values
      self.assertAllClose([1.0, 2.0], self.evaluate(var0))
      self.assertAllClose([3.0, 4.0], self.evaluate(var1))
      # Run 1 step of sgd through optimizer
      opt_op = sgd.minimize(loss, var_list=[var0, var1])
      self.evaluate(tf.compat.v1.global_variables_initializer())
      self.evaluate(opt_op)
      # Validate updated params
      self.assertAllClose([-0.1, -0.1], self.evaluate(var0))
      self.assertAllClose([0., 0.], self.evaluate(var1))
Exemplo n.º 17
0
  def testPrecomputedGradient(self):
    for dtype in _DATA_TYPES:
      with testing_utils.use_gpu():
        var0 = tf.Variable([1.0, 2.0], dtype=dtype)
        var1 = tf.Variable([3.0, 4.0], dtype=dtype)
        loss = lambda: 5 * var0 + 3 * var1  # pylint: disable=cell-var-from-loop
        grad_loss = tf.constant([42, -42], dtype=dtype)
        sgd = gradient_descent.SGD(3.0)

        self.evaluate(tf.compat.v1.global_variables_initializer())
        # Fetch params to validate initial values
        self.assertAllClose([1.0, 2.0], self.evaluate(var0))
        self.assertAllClose([3.0, 4.0], self.evaluate(var1))
        # Run 1 step of sgd through optimizer
        opt_op = sgd.minimize(loss, var_list=[var0, var1], grad_loss=grad_loss)
        self.evaluate(tf.compat.v1.global_variables_initializer())
        self.evaluate(opt_op)
        # Validate updated params
        self.assertAllClose([1.0 - 3 * 5 * 42.0, 2.0 - 3 * 5 * (-42.0)],
                            self.evaluate(var0))
        self.assertAllClose([3.0 - 3 * 3 * 42.0, 4.0 - 3 * 3 * (-42.0)],
                            self.evaluate(var1))
Exemplo n.º 18
0
  def testGradientsAsVariables(self):
    for i, dtype in enumerate(_DATA_TYPES):
      with testing_utils.use_gpu():
        var0 = tf.Variable([1.0, 2.0], dtype=dtype)
        var1 = tf.Variable([3.0, 4.0], dtype=dtype)
        loss = lambda: 5 * var0 + 3 * var1  # pylint: disable=cell-var-from-loop

        sgd = gradient_descent.SGD(3.0)
        grads_and_vars = sgd._compute_gradients(loss, [var0, var1])
        # Convert gradients to tf.Variables
        converted_grads = [
            tf.Variable(
                tf.zeros([2], dtype), name='c_%d_%d' % (i, j))
            for j, gv in enumerate(grads_and_vars)
        ]
        convert_ops = [
            tf.compat.v1.assign(converted_grads[j], gv[0])
            for j, gv in enumerate(grads_and_vars)
        ]

        # Run convert_ops to achieve the gradients converting
        self.evaluate(tf.compat.v1.global_variables_initializer())
        self.evaluate(convert_ops)
        # Fetch params to validate initial values
        self.assertAllClose([1.0, 2.0], self.evaluate(var0))
        self.assertAllClose([3.0, 4.0], self.evaluate(var1))

        # Run 1 step of sgd through optimizer
        converted_grads_and_vars = list(zip(converted_grads, [var0, var1]))
        opt_op = sgd.apply_gradients(converted_grads_and_vars)
        self.evaluate(tf.compat.v1.global_variables_initializer())
        self.evaluate(convert_ops)
        self.evaluate(opt_op)

        # Validate updated params
        self.assertAllClose([-14., -13.], self.evaluate(var0))
        self.assertAllClose([-6., -5.], self.evaluate(var1))
Exemplo n.º 19
0
    def testDense(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        for (dtype, learning_rate, rho, momentum, epsilon,
             centered) in _TESTPARAMS:
            with tf.compat.v1.get_default_graph().as_default(
            ), testing_utils.use_gpu():
                # Initialize variables for numpy implementation.
                var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
                grads0_np = np.array([0.1, 0.2], dtype=dtype.as_numpy_dtype)
                var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
                grads1_np = np.array([0.01, 0.2], dtype=dtype.as_numpy_dtype)

                var0 = tf.Variable(var0_np, dtype=dtype)
                var1 = tf.Variable(var1_np, dtype=dtype)
                grads0 = tf.constant(grads0_np, dtype=dtype)
                grads1 = tf.constant(grads1_np, dtype=dtype)
                opt = rmsprop.RMSprop(learning_rate=learning_rate,
                                      rho=rho,
                                      momentum=momentum,
                                      epsilon=epsilon,
                                      centered=centered)

                update = opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                self.evaluate(tf.compat.v1.global_variables_initializer())

                if centered:
                    mg0 = opt.get_slot(var0, "mg")
                    mg1 = opt.get_slot(var1, "mg")
                else:
                    mg0 = None
                    mg1 = None

                if momentum > 0.:
                    mom0 = opt.get_slot(var0, "momentum")
                    mom1 = opt.get_slot(var1, "momentum")
                else:
                    mom0 = None
                    mom1 = None

                rms0 = opt.get_slot(var0, "rms")
                self.assertIsNotNone(rms0)
                rms1 = opt.get_slot(var1, "rms")
                self.assertIsNotNone(rms1)

                mg0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
                mg1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
                rms0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
                rms1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
                mom0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
                mom1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)

                # Fetch params to validate initial values
                self.assertAllClose([1.0, 2.0], self.evaluate(var0))
                self.assertAllClose([3.0, 4.0], self.evaluate(var1))

                # Run 3 steps of RMSprop
                for _ in range(1, 4):
                    self.evaluate(update)

                    var0_np, mg0_np, rms0_np, mom0_np = self._rmsprop_update_numpy(
                        var0_np, grads0_np, mg0_np, rms0_np, mom0_np,
                        learning_rate, rho, momentum, epsilon, centered)
                    var1_np, mg1_np, rms1_np, mom1_np = self._rmsprop_update_numpy(
                        var1_np, grads1_np, mg1_np, rms1_np, mom1_np,
                        learning_rate, rho, momentum, epsilon, centered)

                    # Validate updated params
                    if centered:
                        self.assertAllCloseAccordingToType(
                            mg0_np, self.evaluate(mg0))
                        self.assertAllCloseAccordingToType(
                            mg1_np, self.evaluate(mg1))
                    if momentum > 0.:
                        self.assertAllCloseAccordingToType(
                            mom0_np, self.evaluate(mom0))
                        self.assertAllCloseAccordingToType(
                            mom1_np, self.evaluate(mom1))
                    self.assertAllCloseAccordingToType(rms0_np,
                                                       self.evaluate(rms0))
                    self.assertAllCloseAccordingToType(rms1_np,
                                                       self.evaluate(rms1))
                    self.assertAllCloseAccordingToType(var0_np,
                                                       self.evaluate(var0))
                    self.assertAllCloseAccordingToType(var1_np,
                                                       self.evaluate(var1))
Exemplo n.º 20
0
 def testIterationWithoutMinimize(self):
   with testing_utils.use_gpu():
     sgd = gradient_descent.SGD(3.0)
     self.evaluate(sgd.iterations.initializer)
     self.assertEqual(0, self.evaluate(sgd.iterations))
Exemplo n.º 21
0
  def test_model_with_crossentropy_losses_channels_first(self):
    """Tests use of all crossentropy losses with `channels_first`.

    Tests `sparse_categorical_crossentropy`, `categorical_crossentropy`,
    and `binary_crossentropy`.
    Verifies that evaluate gives the same result with either `channels_first`
    or `channels_last` image_data_format.
    """
    def prepare_simple_model(input_tensor, loss_name, target):
      axis = 1 if backend.image_data_format() == 'channels_first' else -1
      loss = None
      num_channels = None
      activation = None
      if loss_name == 'sparse_categorical_crossentropy':
        loss = lambda y_true, y_pred: backend.sparse_categorical_crossentropy(  # pylint: disable=g-long-lambda
            y_true, y_pred, axis=axis)
        num_channels = int(np.amax(target) + 1)
        activation = 'softmax'
      elif loss_name == 'categorical_crossentropy':
        loss = lambda y_true, y_pred: backend.categorical_crossentropy(  # pylint: disable=g-long-lambda
            y_true, y_pred, axis=axis)
        num_channels = target.shape[axis]
        activation = 'softmax'
      elif loss_name == 'binary_crossentropy':
        loss = lambda y_true, y_pred: backend.binary_crossentropy(  # pylint: disable=g-long-lambda, unnecessary-lambda
            y_true, y_pred)
        num_channels = target.shape[axis]
        activation = 'sigmoid'

      predictions = Conv2D(num_channels,
                           1,
                           activation=activation,
                           kernel_initializer='ones',
                           bias_initializer='ones')(input_tensor)
      simple_model = training.Model(inputs=input_tensor, outputs=predictions)
      simple_model.compile(optimizer='rmsprop', loss=loss)
      return simple_model

    if tf.test.is_gpu_available(cuda_only=True):
      with testing_utils.use_gpu():
        losses_to_test = ['sparse_categorical_crossentropy',
                          'categorical_crossentropy', 'binary_crossentropy']

        data_channels_first = np.array([[[[8., 7.1, 0.], [4.5, 2.6, 0.55],
                                          [0.9, 4.2, 11.2]]]], dtype=np.float32)
        # Labels for testing 4-class sparse_categorical_crossentropy, 4-class
        # categorical_crossentropy, and 2-class binary_crossentropy:
        labels_channels_first = [np.array([[[[0, 1, 3], [2, 1, 0], [2, 2, 1]]]], dtype=np.float32),  # pylint: disable=line-too-long
                                 np.array([[[[0, 1, 0], [0, 1, 0], [0, 0, 0]],
                                            [[1, 0, 0], [0, 0, 1], [0, 1, 0]],
                                            [[0, 0, 0], [1, 0, 0], [0, 0, 1]],
                                            [[0, 0, 1], [0, 0, 0], [1, 0, 0]]]], dtype=np.float32),  # pylint: disable=line-too-long
                                 np.array([[[[0, 1, 0], [0, 1, 0], [0, 0, 1]],
                                            [[1, 0, 1], [1, 0, 1], [1, 1, 0]]]], dtype=np.float32)]  # pylint: disable=line-too-long
        # Compute one loss for each loss function in the list `losses_to_test`:
        loss_channels_last = [0., 0., 0.]
        loss_channels_first = [0., 0., 0.]

        old_data_format = backend.image_data_format()

        # Evaluate a simple network with channels last, with all three loss
        # functions:
        backend.set_image_data_format('channels_last')
        data = np.moveaxis(data_channels_first, 1, -1)
        for index, loss_function in enumerate(losses_to_test):
          labels = np.moveaxis(labels_channels_first[index], 1, -1)
          inputs = input_layer.Input(shape=(3, 3, 1))
          model = prepare_simple_model(inputs, loss_function, labels)
          loss_channels_last[index] = model.evaluate(x=data, y=labels,
                                                     batch_size=1, verbose=0)

        # Evaluate the same network with channels first, with all three loss
        # functions:
        backend.set_image_data_format('channels_first')
        data = data_channels_first
        for index, loss_function in enumerate(losses_to_test):
          labels = labels_channels_first[index]
          inputs = input_layer.Input(shape=(1, 3, 3))
          model = prepare_simple_model(inputs, loss_function, labels)
          loss_channels_first[index] = model.evaluate(x=data, y=labels,
                                                      batch_size=1, verbose=0)

        backend.set_image_data_format(old_data_format)

        np.testing.assert_allclose(
            loss_channels_first,
            loss_channels_last,
            rtol=1e-06,
            err_msg='{}{}'.format('Computed different losses for ',
                                  'channels_first and channels_last'))