def test_batchnorm_non_trainable_with_fit(self): # We use the same data shape for all the data we use in this test. # This will prevent any used tf.functions from retracing. # This helps us verify that changing trainable and recompiling really # does update the training loop, rather than a different data shape # triggering a retrace. data_shape = (100, 3) inputs = keras.Input((3, )) bn = normalization_v2.BatchNormalization() outputs = bn(inputs) model = keras.Model(inputs, outputs) model.compile('rmsprop', 'mse', run_eagerly=testing_utils.should_run_eagerly()) model.fit(np.random.random(data_shape), np.random.random(data_shape)) test_data = np.random.random(data_shape) test_targets = np.random.random(data_shape) test_loss = model.evaluate(test_data, test_targets) bn.trainable = False model.compile('rmsprop', 'mse', run_eagerly=testing_utils.should_run_eagerly()) train_loss = model.train_on_batch(test_data, test_targets) self.assertAlmostEqual(test_loss, train_loss)
def test_batchnorm_non_trainable_with_tf_function(self): inputs = keras.Input((3, )) bn = normalization_v2.BatchNormalization() outputs = bn(inputs) model = keras.Model(inputs, outputs) loss_fn = keras.losses.MeanSquaredError() optimizer = rmsprop_v2.RMSprop() @def_function.function() def train_step(x, y): with backprop.GradientTape() as tape: y_pred = model(x, training=True) loss = loss_fn(y, y_pred) grads = tape.gradient(loss, model.trainable_weights) optimizer.apply_gradients(zip(grads, model.trainable_weights)) return loss @def_function.function() def test_step(x, y): y_pred = model(x, training=False) loss = loss_fn(y, y_pred) return loss train_step(np.random.random((100, 3)), np.random.random((100, 3))) test_data = np.random.random((10, 3)) test_targets = np.random.random((10, 3)) test_loss = test_step(test_data, test_targets) bn.trainable = False train_loss = train_step(test_data, test_targets) if context.executing_eagerly(): self.assertAlmostEqual(test_loss.numpy(), train_loss.numpy())
def test_v2_fused_attribute(self): norm = normalization_v2.BatchNormalization() self.assertEqual(norm.fused, True) inp = keras.layers.Input(shape=(4, 4, 4)) norm(inp) self.assertEqual(norm.fused, True) norm = normalization_v2.BatchNormalization() self.assertEqual(norm.fused, True) inp = keras.layers.Input(shape=(4, 4)) norm(inp) self.assertEqual(norm.fused, True) norm = normalization_v2.BatchNormalization(virtual_batch_size=2) self.assertEqual(norm.fused, False) inp = keras.layers.Input(shape=(4, 4, 4)) norm(inp) self.assertEqual(norm.fused, False) norm = normalization_v2.BatchNormalization(fused=False) self.assertEqual(norm.fused, False) inp = keras.layers.Input(shape=(4, 4, 4)) norm(inp) self.assertEqual(norm.fused, False) norm = normalization_v2.BatchNormalization(fused=True, axis=[3]) self.assertEqual(norm.fused, True) inp = keras.layers.Input(shape=(4, 4, 4)) norm(inp) self.assertEqual(norm.fused, True) with self.assertRaisesRegexp(ValueError, 'fused.*renorm'): normalization_v2.BatchNormalization(fused=True, renorm=True) with self.assertRaisesRegexp(ValueError, 'fused.*over a single axis'): normalization_v2.BatchNormalization(fused=True, axis=[1, 3]) with self.assertRaisesRegexp(ValueError, 'fused.*virtual_batch_size'): normalization_v2.BatchNormalization(fused=True, virtual_batch_size=2) with self.assertRaisesRegexp(ValueError, 'fused.*adjustment'): normalization_v2.BatchNormalization(fused=True, adjustment=lambda _: (1, 0))
def test_bessels_correction(self): # Bessel's correction is currently only used in the fused case. In the # future, it may be used in the nonfused case as well. x = constant_op.constant([0., 2.], shape=[2, 1, 1, 1]) layer = normalization_v2.BatchNormalization( momentum=0.5, moving_variance_initializer='zeros') layer(x, training=True) self.assertTrue(layer.fused) # Since fused is used, Bessel's correction is used. The variance of [0, 2] # is 2 with Bessel's correction. Since the momentum is 0.5, the variance is # 2 * 0.5 == 1. self.assertAllEqual(self.evaluate(layer.moving_variance), [1.]) x = constant_op.constant([0., 2.], shape=[2, 1, 1, 1, 1]) layer = normalization_v2.BatchNormalization( momentum=0.5, moving_variance_initializer='zeros') layer(x, training=True) self.assertFalse(layer.fused) # Since fused is not used, Bessel's correction is not used. The variance of # [0, 2] is 1 without Bessel's correction. Since the momentum is 0.5, the # variance is 1 * 0.5 == 0.5. self.assertAllEqual(self.evaluate(layer.moving_variance), [0.5])
def test_batchnorm_non_trainable_with_fit(self): inputs = keras.Input((3, )) bn = normalization_v2.BatchNormalization() outputs = bn(inputs) model = keras.Model(inputs, outputs) model.compile('rmsprop', 'mse', run_eagerly=testing_utils.should_run_eagerly()) model.fit(np.random.random((100, 3)), np.random.random((100, 3))) test_data = np.random.random((10, 3)) test_targets = np.random.random((10, 3)) test_loss = model.evaluate(test_data, test_targets) bn.trainable = False model.compile('rmsprop', 'mse', run_eagerly=testing_utils.should_run_eagerly()) train_loss = model.train_on_batch(test_data, test_targets) self.assertAlmostEqual(test_loss, train_loss)
def __init__(self): self._first_dense = core.Dense(18) self._conv = convolutional.Conv2D(2, 2) self._norm = normalization_v2.BatchNormalization() self._second_dense = core.Dense(1)
def test_basic_batchnorm_v2_none_shape_and_virtual_batch_size(self): # Test case for GitHub issue for 32380 norm = normalization_v2.BatchNormalization(virtual_batch_size=8) inp = keras.layers.Input(shape=(None, None, 3)) _ = norm(inp)
def __init__(self, units, hidden_units, feature_columns, activation_fn, dropout, batch_norm, name=None, **kwargs): super(_DNNModelV2, self).__init__(name=name, **kwargs) # Current DenseFeatures is not a pure Keras layer, as it still relies on # variable_scope and get_variables. Here we need to manually add 'dnn' (the # Keras model name) as prefix for backward compatibility. with ops.name_scope('dnn/input_from_feature_columns' ) as input_feature_column_scope: layer_name = input_feature_column_scope + 'input_layer' if feature_column_lib.is_feature_column_v2(feature_columns): self._input_layer = feature_column_lib.DenseFeatures( feature_columns=feature_columns, name=layer_name) else: raise ValueError( 'Received a feature column from TensorFlow v1, but this is a ' 'TensorFlow v2 Estimator. Please either use v2 feature columns ' '(accessible via tf.feature_column.* in TF 2.x) with this ' 'Estimator, or switch to a v1 Estimator for use with v1 feature ' 'columns (accessible via tf.compat.v1.estimator.* and ' 'tf.compat.v1.feature_column.*, respectively.') self._dropout = dropout self._batch_norm = batch_norm self._hidden_layers = [] self._dropout_layers = [] self._batch_norm_layers = [] self._hidden_layer_scope_names = [] for layer_id, num_hidden_units in enumerate(hidden_units): with ops.name_scope('hiddenlayer_%d' % layer_id) as hidden_layer_scope: # Get scope name without the trailing slash. hidden_shared_name = _name_from_scope_name(hidden_layer_scope) hidden_layer = keras_core.Dense( units=num_hidden_units, activation=activation_fn, kernel_initializer=init_ops.glorot_uniform_initializer(), name=hidden_shared_name) self._hidden_layer_scope_names.append(hidden_shared_name) self._hidden_layers.append(hidden_layer) if self._dropout is not None: dropout_layer = keras_core.Dropout(rate=self._dropout) self._dropout_layers.append(dropout_layer) if self._batch_norm: batch_norm_name = hidden_shared_name + '/batchnorm_%d' % layer_id batch_norm_layer = keras_norm.BatchNormalization( # The default momentum 0.99 actually crashes on certain # problem, so here we use 0.999, which is the default of # tf.contrib.layers.batch_norm. momentum=0.999, trainable=True, name=batch_norm_name) self._batch_norm_layers.append(batch_norm_layer) with ops.name_scope('logits') as logits_scope: logits_shared_name = _name_from_scope_name(logits_scope) self._logits_layer = keras_core.Dense( units=units, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=logits_shared_name) self._logits_scope_name = logits_shared_name