def testConfigWithLearningRateDecay(self): with testing_utils.use_gpu(): var0 = tf.Variable([[1.0], [2.0]], dtype=tf.float32) for decay_schedule in [ learning_rate_schedule.InverseTimeDecay( 0.5, decay_steps=1.0, decay_rate=0.1), learning_rate_schedule.PiecewiseConstantDecay( [5], [1., .5]) ]: step = 10 opt = gradient_descent.SGD(decay_schedule) config = opt.get_config() opt2 = gradient_descent.SGD.from_config(config) # assert both are equal float values. self.assertAllEqual( decay_schedule(step), opt._get_hyper('learning_rate')(step)) self.assertAllEqual( decay_schedule(step), opt2._get_hyper('learning_rate')(step)) loss = lambda: 3 * var0 # learning rate variable is created when calling minimize. opt.minimize(loss, [var0]) self.evaluate(tf.compat.v1.global_variables_initializer()) config = opt.get_config() opt3 = gradient_descent.SGD.from_config(config) self.assertAllEqual( self.evaluate(opt._get_hyper('learning_rate')(step)), opt3._get_hyper('learning_rate')(step))
def testNoGradientsForAnyVariables_ApplyGradients(self): for dtype in _DATA_TYPES: with testing_utils.use_gpu(): var0 = tf.Variable([1.0, 2.0], dtype=dtype) var1 = tf.Variable([3.0, 4.0], dtype=dtype) sgd_op = gradient_descent.SGD(3.0) with self.assertRaisesRegex(ValueError, 'No gradients provided for any variable'): sgd_op.apply_gradients([(None, var0), (None, var1)])
def testGradClipNorm(self): with testing_utils.use_gpu(): var = tf.Variable([1.0]) loss = lambda: 3 * var opt = gradient_descent.SGD(learning_rate=1.0, clipnorm=1.0) opt_op = opt.minimize(loss, [var]) self.evaluate(tf.compat.v1.global_variables_initializer()) self.evaluate(opt_op) self.assertAllClose([0.], self.evaluate(var))
def testNumericEquivalenceForNesterovMomentum(self): if tf.executing_eagerly(): self.skipTest( 'v1 optimizer does not run in eager mode') np.random.seed(1331) with testing_utils.use_gpu(): train_samples = 20 input_dim = 3 num_classes = 2 (x, y), _ = testing_utils.get_test_data( train_samples=train_samples, test_samples=10, input_shape=(input_dim,), num_classes=num_classes) y = np_utils.to_categorical(y) num_hidden = 5 model_k_v1 = testing_utils.get_small_sequential_mlp( num_hidden=num_hidden, num_classes=num_classes, input_dim=input_dim) model_k_v2 = testing_utils.get_small_sequential_mlp( num_hidden=num_hidden, num_classes=num_classes, input_dim=input_dim) model_k_v2.set_weights(model_k_v1.get_weights()) model_tf = testing_utils.get_small_sequential_mlp( num_hidden=num_hidden, num_classes=num_classes, input_dim=input_dim) model_tf.set_weights(model_k_v2.get_weights()) opt_k_v1 = optimizer_v1.SGD(momentum=0.9, nesterov=True) opt_k_v2 = gradient_descent.SGD(momentum=0.9, nesterov=True) opt_tf = tf.compat.v1.train.MomentumOptimizer( learning_rate=0.01, momentum=0.9, use_nesterov=True) model_k_v1.compile( opt_k_v1, loss='categorical_crossentropy', metrics=[], run_eagerly=testing_utils.should_run_eagerly()) model_k_v2.compile( opt_k_v2, loss='categorical_crossentropy', metrics=[], run_eagerly=testing_utils.should_run_eagerly()) model_tf.compile( opt_tf, loss='categorical_crossentropy', metrics=[], run_eagerly=testing_utils.should_run_eagerly()) hist_k_v1 = model_k_v1.fit(x, y, batch_size=5, epochs=10, shuffle=False) hist_k_v2 = model_k_v2.fit(x, y, batch_size=5, epochs=10, shuffle=False) hist_tf = model_tf.fit(x, y, batch_size=5, epochs=10, shuffle=False) self.assertAllClose(model_k_v1.get_weights(), model_tf.get_weights()) self.assertAllClose(model_k_v1.get_weights(), model_k_v2.get_weights()) self.assertAllClose(opt_k_v1.get_weights(), opt_k_v2.get_weights()) self.assertAllClose(hist_k_v1.history['loss'], hist_tf.history['loss']) self.assertAllClose(hist_k_v1.history['loss'], hist_k_v2.history['loss'])
def testNoGradients(self): for dtype in _DATA_TYPES: with testing_utils.use_gpu(): var0 = tf.Variable([1.0, 2.0], dtype=dtype) var1 = tf.Variable([3.0, 4.0], dtype=dtype) loss = lambda: 5 * var0 # pylint: disable=cell-var-from-loop sgd_op = gradient_descent.SGD(3.0) with self.assertRaisesRegex(ValueError, 'No gradients'): # var1 has no gradient sgd_op.minimize(loss, var_list=[var1])
def _run_test(self, kwargs, expected_output_shape): num_samples = 2 stack_size = 3 num_col = 6 with testing_utils.use_gpu(): testing_utils.layer_test( keras.layers.Conv1DTranspose, kwargs=kwargs, input_shape=(num_samples, num_col, stack_size), expected_output_shape=expected_output_shape)
def testNoGradientsForAnyVariables_Minimize(self): for dtype in _DATA_TYPES: with testing_utils.use_gpu(): var0 = tf.Variable([1.0, 2.0], dtype=dtype) var1 = tf.Variable([3.0, 4.0], dtype=dtype) loss = lambda: tf.constant(5.0) sgd_op = gradient_descent.SGD(3.0) with self.assertRaisesRegex(ValueError, 'No gradients provided for any variable'): sgd_op.minimize(loss, var_list=[var0, var1])
def test_group_conv_depthwise(self): if tf.test.is_gpu_available(cuda_only=True): with testing_utils.use_gpu(): inputs = tf.random.uniform(shape=(3, 27, 27, 32)) layer = keras.layers.Conv2D(32, 3, groups=32, use_bias=False) layer.build((3, 27, 27, 32)) weights_dw = tf.reshape(layer.kernel, [3, 3, 32, 1]) expected_outputs = tf.compat.v1.nn.depthwise_conv2d( inputs, weights_dw, strides=[1, 1, 1, 1], padding='VALID') self.assertAllClose(layer(inputs), expected_outputs, rtol=1e-5)
def testGradGlobalClipNorm(self): with testing_utils.use_gpu(): # l2 norm is 5.0 var1 = tf.Variable([1.0]) var2 = tf.Variable([2.0]) loss = lambda: 3 * var1 + 4 * var2 opt = gradient_descent.SGD(learning_rate=1.0, global_clipnorm=2.0) opt_op = opt.minimize(loss, [var1, var2]) self.evaluate(tf.compat.v1.global_variables_initializer()) self.evaluate(opt_op) # grad1 = 3.0 * 2.0 / 5.0 = 1.2 self.assertAllClose([-.2], self.evaluate(var1)) # grad2 = 4.0 * 2.0 / 5.0 = 1.6 self.assertAllClose([.4], self.evaluate(var2))
def testComputeGradientsWithTensors(self): with testing_utils.use_gpu(): x = tf.convert_to_tensor(1.0) def f(): return x * x sgd = gradient_descent.SGD(3.0) grads_and_vars = sgd._compute_gradients(f, [x]) self.assertLen(grads_and_vars, 1) grad, x_as_var = grads_and_vars[0] self.assertIs(x, x_as_var) self.assertEqual(2.0, self.evaluate(grad)) with self.assertRaises(NotImplementedError): sgd.apply_gradients(grads_and_vars)
def test_group_conv(self, layer_cls, input_shape): if tf.test.is_gpu_available(cuda_only=True): with testing_utils.use_gpu(): inputs = tf.random.uniform(shape=input_shape) layer = layer_cls(16, 3, groups=4, use_bias=False) layer.build(input_shape) input_slices = tf.split(inputs, 4, axis=-1) weight_slices = tf.split(layer.kernel, 4, axis=-1) expected_outputs = tf.concat([ tf.nn.convolution(inputs, weights) for inputs, weights in zip(input_slices, weight_slices) ], axis=-1) self.assertAllClose( layer(inputs), expected_outputs, rtol=3e-5, atol=3e-5)
def _testOptimizersCompatibility(self, opt_v1, opt_v2, test_weights=True): if tf.executing_eagerly(): self.skipTest( 'v1 optimizer does not run in eager mode') np.random.seed(1331) with testing_utils.use_gpu(): train_samples = 20 input_dim = 3 num_classes = 2 (x, y), _ = testing_utils.get_test_data( train_samples=train_samples, test_samples=10, input_shape=(input_dim,), num_classes=num_classes) y = np_utils.to_categorical(y) num_hidden = 5 model_v1 = testing_utils.get_small_sequential_mlp( num_hidden=num_hidden, num_classes=num_classes, input_dim=input_dim) model_v1.compile( opt_v1, loss='categorical_crossentropy', metrics=[], run_eagerly=testing_utils.should_run_eagerly()) model_v1.fit(x, y, batch_size=5, epochs=1) model_v2 = testing_utils.get_small_sequential_mlp( num_hidden=num_hidden, num_classes=num_classes, input_dim=input_dim) model_v2.set_weights(model_v1.get_weights()) model_v2.compile( opt_v2, loss='categorical_crossentropy', metrics=[], run_eagerly=testing_utils.should_run_eagerly()) if not tf.compat.v1.executing_eagerly_outside_functions(): model_v2._make_train_function() if test_weights: opt_v2.set_weights(opt_v1.get_weights()) hist_1 = model_v1.fit(x, y, batch_size=5, epochs=1, shuffle=False) hist_2 = model_v2.fit(x, y, batch_size=5, epochs=1, shuffle=False) self.assertAllClose(model_v1.get_weights(), model_v2.get_weights(), rtol=1e-5, atol=1e-5) self.assertAllClose(hist_1.history['loss'], hist_2.history['loss'], rtol=1e-5, atol=1e-5)
def testConfig(self): with testing_utils.use_gpu(): opt = gradient_descent.SGD(learning_rate=1.0) config = opt.get_config() opt2 = gradient_descent.SGD.from_config(config) lr = opt._get_hyper('learning_rate') lr2 = opt2._get_hyper('learning_rate') self.evaluate(tf.compat.v1.global_variables_initializer()) # assert both are equal float values. self.assertEqual(self.evaluate(lr), self.evaluate(lr2)) var0 = tf.Variable([[1.0], [2.0]], dtype=tf.float32) loss = lambda: 3 * var0 # learning rate variable created when calling minimize. opt.minimize(loss, [var0]) opt3 = gradient_descent.SGD.from_config(config) lr3 = opt3._get_hyper('learning_rate') self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertEqual(self.evaluate(lr), self.evaluate(lr3))
def testBasic(self): for dtype in _DATA_TYPES: with testing_utils.use_gpu(): var0 = tf.Variable([1.0, 2.0], dtype=dtype) var1 = tf.Variable([3.0, 4.0], dtype=dtype) loss = lambda: 5 * var0 + 3 * var1 # pylint: disable=cell-var-from-loop sgd = gradient_descent.SGD(3.0) self.evaluate(tf.compat.v1.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 4.0], self.evaluate(var1)) # Run 1 step of sgd through optimizer opt_op = sgd.minimize(loss, var_list=[var0, var1]) self.evaluate(tf.compat.v1.global_variables_initializer()) self.evaluate(opt_op) # Validate updated params self.assertAllClose([-14., -13.], self.evaluate(var0)) self.assertAllClose([-6., -5.], self.evaluate(var1))
def testWeights(self): with testing_utils.use_gpu(): opt1 = adam.Adam(learning_rate=1.0) var1 = tf.Variable([1.0, 2.0], dtype=tf.float32) loss1 = lambda: 3 * var1 opt_op_1 = opt1.minimize(loss1, [var1]) self.evaluate(tf.compat.v1.global_variables_initializer()) config = opt1.get_config() opt2 = adam.Adam.from_config(config) var2 = tf.Variable([1.0, 2.0], dtype=tf.float32) loss2 = lambda: 3 * var2 opt_op_2 = opt2.minimize(loss2, [var2]) weights = opt1.get_weights() # Assert set_weights and both variables get updated to same value. self.evaluate(tf.compat.v1.global_variables_initializer()) opt2.set_weights(weights) self.evaluate([opt_op_1, opt_op_2]) self.assertAllClose(self.evaluate(var1), self.evaluate(var2)) self.assertEqual(1, self.evaluate(opt1.iterations)) self.assertEqual(1, self.evaluate(opt2.iterations)) var3 = tf.Variable([1.0, 2.0, 3.0], dtype=tf.float32) var4 = tf.Variable([4.0, 5.0, 6.0], dtype=tf.float32) loss3 = lambda: 3 * var3 + 5 * var4 opt_op_3 = opt1.minimize(loss3, [var3, var4]) # Assert set_weights with ValueError since weight list does not match. self.evaluate(tf.compat.v1.global_variables_initializer()) weights = opt1.get_weights() with self.assertRaisesRegex(ValueError, 'but the optimizer was'): opt2.set_weights(weights) # Assert set_weights and variables get updated to same value. var5 = tf.Variable([1.0, 2.0, 3.0], dtype=tf.float32) var6 = tf.Variable([4.0, 5.0, 6.0], dtype=tf.float32) loss4 = lambda: 3 * var5 + 5 * var6 opt_op_4 = opt2.minimize(loss4, [var5, var6]) self.evaluate(tf.compat.v1.global_variables_initializer()) opt2.set_weights(weights) self.evaluate([opt_op_3, opt_op_4]) self.assertAllClose( self.evaluate([var3, var4]), self.evaluate([var5, var6]))
def testConstraint(self): constraint_01 = lambda x: tf.clip_by_value(x, -0.1, 0.) constraint_0 = lambda x: tf.clip_by_value(x, 0., 1.) with testing_utils.use_gpu(): var0 = tf.Variable([1.0, 2.0], constraint=constraint_01) var1 = tf.Variable([3.0, 4.0], constraint=constraint_0) loss = lambda: 5 * var0 + 3 * var1 sgd = gradient_descent.SGD(3.0) self.evaluate(tf.compat.v1.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 4.0], self.evaluate(var1)) # Run 1 step of sgd through optimizer opt_op = sgd.minimize(loss, var_list=[var0, var1]) self.evaluate(tf.compat.v1.global_variables_initializer()) self.evaluate(opt_op) # Validate updated params self.assertAllClose([-0.1, -0.1], self.evaluate(var0)) self.assertAllClose([0., 0.], self.evaluate(var1))
def testPrecomputedGradient(self): for dtype in _DATA_TYPES: with testing_utils.use_gpu(): var0 = tf.Variable([1.0, 2.0], dtype=dtype) var1 = tf.Variable([3.0, 4.0], dtype=dtype) loss = lambda: 5 * var0 + 3 * var1 # pylint: disable=cell-var-from-loop grad_loss = tf.constant([42, -42], dtype=dtype) sgd = gradient_descent.SGD(3.0) self.evaluate(tf.compat.v1.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 4.0], self.evaluate(var1)) # Run 1 step of sgd through optimizer opt_op = sgd.minimize(loss, var_list=[var0, var1], grad_loss=grad_loss) self.evaluate(tf.compat.v1.global_variables_initializer()) self.evaluate(opt_op) # Validate updated params self.assertAllClose([1.0 - 3 * 5 * 42.0, 2.0 - 3 * 5 * (-42.0)], self.evaluate(var0)) self.assertAllClose([3.0 - 3 * 3 * 42.0, 4.0 - 3 * 3 * (-42.0)], self.evaluate(var1))
def testGradientsAsVariables(self): for i, dtype in enumerate(_DATA_TYPES): with testing_utils.use_gpu(): var0 = tf.Variable([1.0, 2.0], dtype=dtype) var1 = tf.Variable([3.0, 4.0], dtype=dtype) loss = lambda: 5 * var0 + 3 * var1 # pylint: disable=cell-var-from-loop sgd = gradient_descent.SGD(3.0) grads_and_vars = sgd._compute_gradients(loss, [var0, var1]) # Convert gradients to tf.Variables converted_grads = [ tf.Variable( tf.zeros([2], dtype), name='c_%d_%d' % (i, j)) for j, gv in enumerate(grads_and_vars) ] convert_ops = [ tf.compat.v1.assign(converted_grads[j], gv[0]) for j, gv in enumerate(grads_and_vars) ] # Run convert_ops to achieve the gradients converting self.evaluate(tf.compat.v1.global_variables_initializer()) self.evaluate(convert_ops) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 4.0], self.evaluate(var1)) # Run 1 step of sgd through optimizer converted_grads_and_vars = list(zip(converted_grads, [var0, var1])) opt_op = sgd.apply_gradients(converted_grads_and_vars) self.evaluate(tf.compat.v1.global_variables_initializer()) self.evaluate(convert_ops) self.evaluate(opt_op) # Validate updated params self.assertAllClose([-14., -13.], self.evaluate(var0)) self.assertAllClose([-6., -5.], self.evaluate(var1))
def testDense(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. for (dtype, learning_rate, rho, momentum, epsilon, centered) in _TESTPARAMS: with tf.compat.v1.get_default_graph().as_default( ), testing_utils.use_gpu(): # Initialize variables for numpy implementation. var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.2], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.2], dtype=dtype.as_numpy_dtype) var0 = tf.Variable(var0_np, dtype=dtype) var1 = tf.Variable(var1_np, dtype=dtype) grads0 = tf.constant(grads0_np, dtype=dtype) grads1 = tf.constant(grads1_np, dtype=dtype) opt = rmsprop.RMSprop(learning_rate=learning_rate, rho=rho, momentum=momentum, epsilon=epsilon, centered=centered) update = opt.apply_gradients( zip([grads0, grads1], [var0, var1])) self.evaluate(tf.compat.v1.global_variables_initializer()) if centered: mg0 = opt.get_slot(var0, "mg") mg1 = opt.get_slot(var1, "mg") else: mg0 = None mg1 = None if momentum > 0.: mom0 = opt.get_slot(var0, "momentum") mom1 = opt.get_slot(var1, "momentum") else: mom0 = None mom1 = None rms0 = opt.get_slot(var0, "rms") self.assertIsNotNone(rms0) rms1 = opt.get_slot(var1, "rms") self.assertIsNotNone(rms1) mg0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) mg1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) rms0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) rms1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) mom0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) mom1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 4.0], self.evaluate(var1)) # Run 3 steps of RMSprop for _ in range(1, 4): self.evaluate(update) var0_np, mg0_np, rms0_np, mom0_np = self._rmsprop_update_numpy( var0_np, grads0_np, mg0_np, rms0_np, mom0_np, learning_rate, rho, momentum, epsilon, centered) var1_np, mg1_np, rms1_np, mom1_np = self._rmsprop_update_numpy( var1_np, grads1_np, mg1_np, rms1_np, mom1_np, learning_rate, rho, momentum, epsilon, centered) # Validate updated params if centered: self.assertAllCloseAccordingToType( mg0_np, self.evaluate(mg0)) self.assertAllCloseAccordingToType( mg1_np, self.evaluate(mg1)) if momentum > 0.: self.assertAllCloseAccordingToType( mom0_np, self.evaluate(mom0)) self.assertAllCloseAccordingToType( mom1_np, self.evaluate(mom1)) self.assertAllCloseAccordingToType(rms0_np, self.evaluate(rms0)) self.assertAllCloseAccordingToType(rms1_np, self.evaluate(rms1)) self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))
def testIterationWithoutMinimize(self): with testing_utils.use_gpu(): sgd = gradient_descent.SGD(3.0) self.evaluate(sgd.iterations.initializer) self.assertEqual(0, self.evaluate(sgd.iterations))
def test_model_with_crossentropy_losses_channels_first(self): """Tests use of all crossentropy losses with `channels_first`. Tests `sparse_categorical_crossentropy`, `categorical_crossentropy`, and `binary_crossentropy`. Verifies that evaluate gives the same result with either `channels_first` or `channels_last` image_data_format. """ def prepare_simple_model(input_tensor, loss_name, target): axis = 1 if backend.image_data_format() == 'channels_first' else -1 loss = None num_channels = None activation = None if loss_name == 'sparse_categorical_crossentropy': loss = lambda y_true, y_pred: backend.sparse_categorical_crossentropy( # pylint: disable=g-long-lambda y_true, y_pred, axis=axis) num_channels = int(np.amax(target) + 1) activation = 'softmax' elif loss_name == 'categorical_crossentropy': loss = lambda y_true, y_pred: backend.categorical_crossentropy( # pylint: disable=g-long-lambda y_true, y_pred, axis=axis) num_channels = target.shape[axis] activation = 'softmax' elif loss_name == 'binary_crossentropy': loss = lambda y_true, y_pred: backend.binary_crossentropy( # pylint: disable=g-long-lambda, unnecessary-lambda y_true, y_pred) num_channels = target.shape[axis] activation = 'sigmoid' predictions = Conv2D(num_channels, 1, activation=activation, kernel_initializer='ones', bias_initializer='ones')(input_tensor) simple_model = training.Model(inputs=input_tensor, outputs=predictions) simple_model.compile(optimizer='rmsprop', loss=loss) return simple_model if tf.test.is_gpu_available(cuda_only=True): with testing_utils.use_gpu(): losses_to_test = ['sparse_categorical_crossentropy', 'categorical_crossentropy', 'binary_crossentropy'] data_channels_first = np.array([[[[8., 7.1, 0.], [4.5, 2.6, 0.55], [0.9, 4.2, 11.2]]]], dtype=np.float32) # Labels for testing 4-class sparse_categorical_crossentropy, 4-class # categorical_crossentropy, and 2-class binary_crossentropy: labels_channels_first = [np.array([[[[0, 1, 3], [2, 1, 0], [2, 2, 1]]]], dtype=np.float32), # pylint: disable=line-too-long np.array([[[[0, 1, 0], [0, 1, 0], [0, 0, 0]], [[1, 0, 0], [0, 0, 1], [0, 1, 0]], [[0, 0, 0], [1, 0, 0], [0, 0, 1]], [[0, 0, 1], [0, 0, 0], [1, 0, 0]]]], dtype=np.float32), # pylint: disable=line-too-long np.array([[[[0, 1, 0], [0, 1, 0], [0, 0, 1]], [[1, 0, 1], [1, 0, 1], [1, 1, 0]]]], dtype=np.float32)] # pylint: disable=line-too-long # Compute one loss for each loss function in the list `losses_to_test`: loss_channels_last = [0., 0., 0.] loss_channels_first = [0., 0., 0.] old_data_format = backend.image_data_format() # Evaluate a simple network with channels last, with all three loss # functions: backend.set_image_data_format('channels_last') data = np.moveaxis(data_channels_first, 1, -1) for index, loss_function in enumerate(losses_to_test): labels = np.moveaxis(labels_channels_first[index], 1, -1) inputs = input_layer.Input(shape=(3, 3, 1)) model = prepare_simple_model(inputs, loss_function, labels) loss_channels_last[index] = model.evaluate(x=data, y=labels, batch_size=1, verbose=0) # Evaluate the same network with channels first, with all three loss # functions: backend.set_image_data_format('channels_first') data = data_channels_first for index, loss_function in enumerate(losses_to_test): labels = labels_channels_first[index] inputs = input_layer.Input(shape=(1, 3, 3)) model = prepare_simple_model(inputs, loss_function, labels) loss_channels_first[index] = model.evaluate(x=data, y=labels, batch_size=1, verbose=0) backend.set_image_data_format(old_data_format) np.testing.assert_allclose( loss_channels_first, loss_channels_last, rtol=1e-06, err_msg='{}{}'.format('Computed different losses for ', 'channels_first and channels_last'))