def _test_checkpointing_layer_weights(self, strategy_fn, mixed_prec_when_saving, mixed_prec_when_loading): # In this test, we potentially save with mixed precision enabled and load # with mixed precision disabled, or vice versa. This is possible because # variables are float32 regardless of whether mixed precision is enabled. save_policy = 'mixed_float16' if mixed_prec_when_saving else 'float32' load_policy = 'mixed_float16' if mixed_prec_when_loading else 'float32' save_input_dtype = 'float16' if mixed_prec_when_saving else 'float32' load_input_dtype = 'float16' if mixed_prec_when_loading else 'float32' # Create a layer and save a checkpoint. x = tf.constant([1.]) with strategy_fn().scope(): with policy.policy_scope(save_policy): layer = mp_test_util.MultiplyLayer(assert_type=save_input_dtype) layer(x) # Build layer layer.set_weights([np.array(100.)]) self.assertEqual(self.evaluate(layer(x)), 100.) checkpoint = tf.train.Checkpoint(layer=layer) prefix = os.path.join(self.get_temp_dir(), 'ckpt') save_path = checkpoint.save(prefix) # Create a new layer and restore the checkpoint. x = tf.constant([1.]) with strategy_fn().scope(): with policy.policy_scope(load_policy): layer = mp_test_util.MultiplyLayer(assert_type=load_input_dtype) layer(x) # Build layer layer.set_weights([np.array(200.)]) self.assertEqual(self.evaluate(layer(x)), 200.) checkpoint = tf.train.Checkpoint(layer=layer) checkpoint.restore(save_path).assert_consumed().run_restore_ops() self.assertEqual(layer.get_weights(), [100.]) self.assertEqual(self.evaluate(layer(x)), 100.)
def test_config(self, strategy_fn): x = tf.constant([1.0], dtype=tf.float16) with strategy_fn().scope(): for layer, dtype in ( (mp_test_util.MultiplyLayer(), "float32"), (mp_test_util.MultiplyLayer(dtype="float64"), "float64"), ( mp_test_util.MultiplyLayer(dtype=policy.Policy("float64")), "float64", ), ): config = layer.get_config() self.assertEqual(config["dtype"], dtype) self.assertIsInstance(config["dtype"], str) layer = mp_test_util.MultiplyLayer.from_config(config) self.assertEqual(layer.dtype, dtype) self.assertEqual(layer(x).dtype, dtype) self.assertEqual(layer.v.dtype, dtype) layer = mp_test_util.MultiplyLayer(dtype="mixed_float16") config = layer.get_config() self.assertEqual( config["dtype"], { "class_name": "Policy", "config": { "name": "mixed_float16" } }, ) layer = mp_test_util.MultiplyLayer.from_config(config) self.assertEqual(layer.dtype, "float32") self.assertEqual(layer(x).dtype, "float16") self.assertEqual(layer.v.dtype, "float32") config = layer.get_config() self.assertEqual( config["dtype"], { "class_name": "Policy", "config": { "name": "mixed_float16" } }, ) layer = mp_test_util.MultiplyLayer(dtype=policy.Policy("_infer")) config = layer.get_config() self.assertIsNone(config["dtype"]) layer = mp_test_util.MultiplyLayer.from_config(config) # If a layer is serialized with the "_infer" policy, when # deserialized into TF 2 it will have the global policy instead of # "_infer". This is because "_infer" is serialized into None, and # passing dtype=None in TensorFlow 2 indicates to use the global # policy. self.assertEqual(layer.dtype, "float32") self.assertEqual(layer(x).dtype, "float32") self.assertEqual(layer.v.dtype, "float32")
def test_config_policy_v1(self, strategy_fn): x = tf.constant([1.], dtype=tf.float16) with strategy_fn().scope(): layer = mp_test_util.MultiplyLayer(dtype=policy.PolicyV1('mixed_float16', loss_scale=None)) config = layer.get_config() self.assertEqual(config['dtype'], {'class_name': 'PolicyV1', 'config': {'name': 'mixed_float16', 'loss_scale': None}}) layer = mp_test_util.MultiplyLayer.from_config(config) self.assertEqual(layer.dtype, 'float32') self.assertEqual(layer(x).dtype, 'float16') self.assertEqual(layer.v.dtype, 'float32') # Restoring a PolicyV1 silently converts it to a Policy and drops the loss # scale. self.assertEqual(type(layer.dtype_policy), policy.Policy) config = layer.get_config() # The loss_scale is silently dropped self.assertEqual(config['dtype'], {'class_name': 'Policy', 'config': {'name': 'mixed_float16'}}) layer = mp_test_util.MultiplyLayer(dtype=policy.PolicyV1('float64', loss_scale=2.)) config = layer.get_config() self.assertEqual(config['dtype'], {'class_name': 'PolicyV1', 'config': {'name': 'float64', 'loss_scale': { 'class_name': 'FixedLossScale', 'config': {'loss_scale_value': 2.0}}}}) layer = mp_test_util.MultiplyLayer.from_config(config) self.assertEqual(layer.dtype, 'float64') self.assertEqual(layer(x).dtype, 'float64') self.assertEqual(layer.v.dtype, 'float64') self.assertEqual(type(layer.dtype_policy), policy.Policy) config = layer.get_config() self.assertEqual(config['dtype'], 'float64') layer = mp_test_util.MultiplyLayer(dtype=policy.PolicyV1('_infer', loss_scale=2.)) config = layer.get_config() self.assertEqual(config['dtype'], {'class_name': 'PolicyV1', 'config': {'name': '_infer', 'loss_scale': { 'class_name': 'FixedLossScale', 'config': {'loss_scale_value': 2.0}}}}) layer = mp_test_util.MultiplyLayer.from_config(config) self.assertEqual(layer.dtype, None) self.assertEqual(layer(x).dtype, 'float16') self.assertEqual(layer.v.dtype, 'float16') self.assertEqual(type(layer.dtype_policy), policy.Policy) config = layer.get_config() self.assertEqual(config['dtype'], 'float16')
def test_unsupported_strategy(self): strategy = create_central_storage_strategy() with strategy.scope(), self.assertRaisesRegex( ValueError, 'Mixed precision is not supported with the ' 'tf.distribute.Strategy: CentralStorageStrategy. Either ' 'stop using mixed precision by removing the use of the ' '"mixed_float16" policy or use a different Strategy, e.g. ' 'a MirroredStrategy.'): mp_test_util.MultiplyLayer(dtype='mixed_float16') # Non-mixed policies are fine mp_test_util.MultiplyLayer(dtype=policy.Policy('float64'))
def test_from_config_policy_v1(self, strategy_fn): # Test that layers serialized in previous Keras versions with the # now-deleted PolicyV1 can be deserialized. In such cases, the PolicyV1 will # be converted to a Policy, since PolicyV1 no longer exists. Unlike Policy, # PolicyV1 had a "loss_scale" field, which is silently dropped when # deserialized. x = tf.constant([1.], dtype=tf.float16) with strategy_fn().scope(): layer = mp_test_util.MultiplyLayer(dtype='mixed_float16') config = layer.get_config() # Change the serialized dtype policy to a PolicyV1 config['dtype'] = {'class_name': 'PolicyV1', 'config': {'name': 'mixed_float16', 'loss_scale': None}} layer = mp_test_util.MultiplyLayer.from_config(config) self.assertEqual(layer.dtype, 'float32') self.assertEqual(layer(x).dtype, 'float16') self.assertEqual(layer.v.dtype, 'float32') config = layer.get_config() # The loss_scale is silently dropped self.assertEqual(config['dtype'], {'class_name': 'Policy', 'config': {'name': 'mixed_float16'}}) layer = mp_test_util.MultiplyLayer(dtype='float64') config = layer.get_config() config['dtype'] = {'class_name': 'PolicyV1', 'config': {'name': 'float64', 'loss_scale': { 'class_name': 'FixedLossScale', 'config': {'loss_scale_value': 2.0}}}} layer = mp_test_util.MultiplyLayer.from_config(config) self.assertEqual(layer.dtype, 'float64') self.assertEqual(layer(x).dtype, 'float64') self.assertEqual(layer.v.dtype, 'float64') config = layer.get_config() self.assertEqual(config['dtype'], 'float64') layer = mp_test_util.MultiplyLayer(dtype=policy.Policy('_infer')) config = layer.get_config() config['dtype'] = {'class_name': 'PolicyV1', 'config': {'name': '_infer', 'loss_scale': { 'class_name': 'FixedLossScale', 'config': {'loss_scale_value': 2.0}}}} layer = mp_test_util.MultiplyLayer.from_config(config) self.assertEqual(layer.dtype, None) self.assertEqual(layer(x).dtype, 'float16') self.assertEqual(layer.v.dtype, 'float16') self.assertEqual(type(layer.dtype_policy), policy.Policy) config = layer.get_config() self.assertEqual(config['dtype'], 'float16')
def test_save_slot_variables_with_autocast_vars(self, strategy_fn, var_name='v'): p = policy.Policy('mixed_float16') with strategy_fn().scope(), policy.policy_scope(p): x = layers.Input(shape=(2,), batch_size=2) # Having a var_name other than 'v' tests that a fixed bug (b/134713714) # does not reoccur. The bug was that a crash would occur when saving a # checkpoint where an AutoCastVariable with a slot variable would have a # different name than the layer attribute's name (layer.v in this case). layer = mp_test_util.MultiplyLayer(assert_type=tf.float16, var_name=var_name) y = layer(x) model = models.Model(inputs=x, outputs=y) opt = gradient_descent.SGD(1., 1.) opt = loss_scale_optimizer.LossScaleOptimizer(opt, dynamic=False, initial_scale=1) model.compile( optimizer=opt, loss='mse', run_eagerly=testing_utils.should_run_eagerly()) model.fit(np.ones((2, 2)), np.zeros((2, 2)), batch_size=2) weights_file = os.path.join(self.get_temp_dir(), 'weights') model.save_weights(weights_file) saved_slot = backend.get_value(opt.get_slot(layer.v, 'momentum')) model.fit(np.ones((2, 2)), np.zeros((2, 2)), batch_size=2) new_slot = backend.get_value(opt.get_slot(layer.v, 'momentum')) self.assertNotEqual(new_slot, saved_slot) model.load_weights(weights_file) restored_slot = backend.get_value(opt.get_slot(layer.v, 'momentum')) self.assertEqual(restored_slot, saved_slot)
def test_input_spec_dtype(self): # Test the InputSpec's dtype is compared against the inputs before the layer # casts them, not after. layer = mp_test_util.MultiplyLayer(dtype='float64') layer.input_spec = input_spec.InputSpec(dtype='float16') # Test passing Eager tensors x = tf.ones((2, 2), dtype='float16') layer(x) x = tf.ones((2, 2), dtype='float64') with self.assertRaisesRegex( ValueError, 'expected dtype=float16, found dtype=.*float64'): layer(x) # Test passing symbolic tensors x = layers.Input((2, ), dtype='float16') y = layer(x) model = models.Model(x, y) model(tf.ones((2, 2))) x = layers.Input((2, ), dtype='float64') with self.assertRaisesRegex( ValueError, 'expected dtype=float16, found dtype=.*float64'): # In TF2, the error is only raised when the model is run y = layer(x) model = models.Model(x, y) model(tf.ones((2, 2)))
def test_layer_regularizer_runs_in_var_dtype(self, strategy_fn): x = tf.constant([1.0]) with strategy_fn().scope(): with policy.policy_scope("mixed_float16"): # Test on MultiplyLayer layer = mp_test_util.MultiplyLayer( assert_type=tf.float16, regularizer=mp_test_util.IdentityRegularizer(), ) layer(x) (regularizer_loss, ) = layer.losses self.assertEqual(regularizer_loss.dtype, tf.float32) self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertEqual(self.evaluate(regularizer_loss), 1.0) # Test on MultiplyLayerWithoutAutoCast layer = mp_test_util.MultiplyLayerWithoutAutoCast( assert_type=tf.float16, regularizer=mp_test_util.IdentityRegularizer(), ) layer(x) (regularizer_loss, ) = layer.losses self.assertEqual(regularizer_loss.dtype, tf.float32) self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertEqual(self.evaluate(regularizer_loss), 1.0)
def test_gradient(self, strategy_fn): x = tf.constant([1.]) with strategy_fn().scope() as strategy: with policy.policy_scope('mixed_float16'): layer = mp_test_util.MultiplyLayer(assert_type=tf.float16) # Learning rate is small enough that if applied to a float16 variable, # the variable will not change. So this tests the learning rate is not # applied to a float16 value, but instead the float32 variable. opt = gradient_descent.SGD(2**-14) def run_fn(): with tf.GradientTape() as tape: y = layer(x) # Divide by num_replicas_in_sync, as the effective total loss is the # sum of each of the replica's losses. y /= strategy.num_replicas_in_sync grad = tape.gradient(y, layer.v) return opt.apply_gradients([(grad, layer.v)]) op = strategy.experimental_run(run_fn) if not tf.executing_eagerly(): self.evaluate(tf.compat.v1.global_variables_initializer()) self.evaluate(op) # The gradient with respective to the variable is 1. Since the # variable is initialized with 1 and the learning rate is 2**-14, the # new variable value should be: init_val - gradient * learning_rate, # which is 1 - 1 * 2**-14 self.assertEqual(self.evaluate(layer.v), 1 - 2**-14)
def test_error_passing_policy_string_to_layer(self): with self.assertRaisesRegex( TypeError, "Cannot convert value 'mixed_float16' to a " 'TensorFlow DType'): # This is not allowed, as otherwise a "mixed_float16" policy could be # created without an API call that has the name "experimental" in it. mp_test_util.MultiplyLayer(dtype='mixed_float16')
def test_config(self, strategy_fn): x = tf.constant([1.], dtype=tf.float16) with strategy_fn().scope(): for layer, dtype in ((mp_test_util.MultiplyLayer(), 'float32'), (mp_test_util.MultiplyLayer(dtype='float64'), 'float64'), (mp_test_util.MultiplyLayer( dtype=policy.Policy('float64')), 'float64')): config = layer.get_config() self.assertEqual(config['dtype'], dtype) self.assertIsInstance(config['dtype'], str) layer = mp_test_util.MultiplyLayer.from_config(config) self.assertEqual(layer.dtype, dtype) self.assertEqual(layer(x).dtype, dtype) self.assertEqual(layer.v.dtype, dtype) layer = mp_test_util.MultiplyLayer(dtype='mixed_float16') config = layer.get_config() self.assertEqual(config['dtype'], { 'class_name': 'Policy', 'config': { 'name': 'mixed_float16' } }) layer = mp_test_util.MultiplyLayer.from_config(config) self.assertEqual(layer.dtype, 'float32') self.assertEqual(layer(x).dtype, 'float16') self.assertEqual(layer.v.dtype, 'float32') config = layer.get_config() self.assertEqual(config['dtype'], { 'class_name': 'Policy', 'config': { 'name': 'mixed_float16' } }) layer = mp_test_util.MultiplyLayer(dtype=policy.Policy('_infer')) config = layer.get_config() self.assertIsNone(config['dtype']) layer = mp_test_util.MultiplyLayer.from_config(config) # If a layer is serialized with the "_infer" policy, when deserialized # into TF 2 it will have the global policy instead of "_infer". This is # because "_infer" is serialized into None, and passing dtype=None in # TensorFlow 2 indicates to use the global policy. self.assertEqual(layer.dtype, 'float32') self.assertEqual(layer(x).dtype, 'float32') self.assertEqual(layer.v.dtype, 'float32')
def test_save_model_with_dynamic_loss_scaling(self, strategy_fn, h5=False): # TODO(reedwm): Support and test saving model with a mixed_[b]float16 policy # as well. strategy = strategy_fn() if (isinstance(strategy, tf.distribute.MirroredStrategy) and not tf.executing_eagerly()): # TODO(b/121381184): Enable running the test in this case. return # Create and run model. with strategy.scope(): x = layers.Input(shape=(2, ), batch_size=2, dtype=tf.float32) y = mp_test_util.MultiplyLayer()(x) model = models.Model(inputs=x, outputs=y) opt = gradient_descent.SGD(1.) opt = loss_scale_optimizer.LossScaleOptimizer( opt, initial_scale=1., dynamic_growth_steps=2.) model.compile(optimizer=opt, loss='mse', run_eagerly=test_utils.should_run_eagerly()) # Run for 3 steps (6 examples with a batch size of 2) model.fit(np.ones((6, 2)), np.zeros((6, 2)), batch_size=2) self.assertEqual(backend.get_value(opt.loss_scale), 2) self.assertEqual(backend.get_value(opt.dynamic_counter), 1) (weight, ) = model.trainable_weights orig_weight = backend.get_value(weight) # Save model weights. save_path = os.path.join(self.get_temp_dir(), 'model') model.save(save_path, save_format='h5' if h5 else 'tf') # Run model again for 1 step (2 examples with a batch size of 2) model.fit(np.ones((2, 2)), np.zeros((2, 2)), batch_size=2) new_weight = backend.get_value(weight) self.assertNotEqual(new_weight, orig_weight) self.assertEqual(backend.get_value(opt.loss_scale), 4) self.assertEqual(backend.get_value(opt.dynamic_counter), 0) # Load model weights and ensure loss scale weights are restored. model = save.load_model( save_path, custom_objects={'MultiplyLayer': mp_test_util.MultiplyLayer}) (weight, ) = model.trainable_weights loaded_weight = backend.get_value(weight) self.assertEqual(loaded_weight, orig_weight) # Currently the loss scale isn't always saved when the model is saved with # Model.save(). So we assert the loss scale either has the value when it was # saved, or the value it was initialized with. # TODO(reedwm): Always save/restore the loss scale with Model.save(). self.assertIn(backend.get_value(model.optimizer.loss_scale), (1, 2)) self.assertIn(backend.get_value(model.optimizer.dynamic_counter), (0, 1)) # Test optimizer attributes and type self.assertEqual(model.optimizer.initial_scale, 1.) self.assertEqual(model.optimizer.dynamic_growth_steps, 2.) self.assertEqual(type(model.optimizer), loss_scale_optimizer.LossScaleOptimizer)
def test_functional_model_loss_dtype(self): with policy.policy_scope("float16"): x = layers.Input(shape=(1,)) y = mp_test_util.MultiplyLayer()(x) model = models.Model(x, y) model.add_loss(tf.cast(y, "float32")) # The loss should not be casted to the policy's dtype. self.assertEqual(model.losses[0].dtype, "float32")
def test_loss_scale_optimizer_overrides_policy_v1_loss_scale(self): with policy.policy_scope(policy.PolicyV1('float32', loss_scale=10.)): opt = gradient_descent.SGD(1.) opt = loss_scale_optimizer.LossScaleOptimizer(opt, dynamic=False, initial_scale=5.) x = layers.Input(shape=(1,)) y = mp_test_util.MultiplyLayer()(x) model = models.Model(x, y) model.compile(opt, loss='mse') self.assertEqual(self.evaluate(model.optimizer.loss_scale), 5.)
def test_build_and_call_layer_in_function(self): layer = mp_test_util.MultiplyLayer(dtype=policy.Policy('mixed_float16')) @tf.function def f(): return layer(1.) y = f() self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertEqual(y.dtype, 'float16') self.assertEqual(layer.v.dtype, 'float32') self.assertEqual(self.evaluate(y), 1.)
def test_policy_v1_without_loss_scale(self): with policy.policy_scope(policy.PolicyV1('mixed_float16', loss_scale=None)): opt = gradient_descent.SGD(1.) x = layers.Input(shape=(1,)) y = mp_test_util.MultiplyLayer()(x) model = models.Model(x, y) model.compile(opt, loss='mse') self.assertNotIsInstance(model.optimizer, loss_scale_optimizer.LossScaleOptimizer)
def test_pass_invalid_optimizer_with_loss_scaling(self): with policy.policy_scope(policy.Policy("mixed_float16")): x = layers.Input(shape=(1,)) y = mp_test_util.MultiplyLayer()(x) model = models.Model(x, y) if tf.executing_eagerly(): error_msg = "Use a `tf.keras` Optimizer instead" else: error_msg = 'optimizer" must be an instance of ' with self.assertRaisesRegex(ValueError, error_msg): model.compile(optimizer_v1.SGD(1.0), "mse")
def test_passing_policy_to_layer(self, strategy_fn): x = tf.constant([1.], dtype=tf.float16) with strategy_fn().scope(): # Passing a Policy to 'dtype' sets the policy for that layer. layer = mp_test_util.MultiplyLayer( assert_type=tf.float16, dtype=policy.Policy('mixed_float16')) # layer.dtype refers to the variable dtype self.assertEqual(layer.dtype, tf.float32) layer(x) self.assertEqual(layer.v.dtype, tf.float32) with policy.policy_scope('mixed_float16'): # Passing a Policy to dtype overrides the global Policy layer = mp_test_util.MultiplyLayer( assert_type=tf.float64, dtype=policy.Policy('float64')) self.assertEqual(layer.dtype_policy.name, 'float64') self.assertIsInstance(layer.dtype_policy, policy.Policy) self.assertEqual(layer.compute_dtype, tf.float64) self.assertEqual(layer.dtype, tf.float64) self.assertEqual(layer.variable_dtype, tf.float64) self.assertEqual(layer(x).dtype, tf.float64) self.assertEqual(layer.v.dtype, tf.float64)
def test_save_weights_with_dynamic_loss_scaling(self, strategy_fn): strategy = strategy_fn() if ( isinstance(strategy, tf.distribute.MirroredStrategy) and not tf.executing_eagerly() ): # TODO(b/121381184): Enable running the test in this case. return # Create and run model. with strategy.scope(): x = layers.Input(shape=(2,), batch_size=2, dtype=tf.float32) y = mp_test_util.MultiplyLayer(assert_type=tf.float32)(x) model = models.Model(inputs=x, outputs=y) opt = gradient_descent.SGD(1.0) opt = loss_scale_optimizer.LossScaleOptimizer( opt, initial_scale=1.0, dynamic_growth_steps=2.0 ) model.compile( optimizer=opt, loss="mse", run_eagerly=test_utils.should_run_eagerly(), ) # Run for 3 steps (6 examples with a batch size of 2) model.fit(np.zeros((6, 2)), np.zeros((6, 2)), batch_size=2) self.assertEqual(backend.get_value(opt.loss_scale), 2) self.assertEqual(backend.get_value(opt.dynamic_counter), 1) # Save model weights. save_prefix = os.path.join(self.get_temp_dir(), "ckpt") model.save_weights(save_prefix) # Run model again for 1 step (2 examples with a batch size of 2) model.fit(np.zeros((2, 2)), np.zeros((2, 2)), batch_size=2) self.assertEqual(backend.get_value(opt.loss_scale), 4) self.assertEqual(backend.get_value(opt.dynamic_counter), 0) # Load model weights and ensure loss scale weights are restored. model.load_weights(save_prefix) self.assertEqual(backend.get_value(opt.loss_scale), 2) self.assertEqual(backend.get_value(opt.dynamic_counter), 1)
def test_compile_wraps_with_loss_scale_optimizer(self): x = layers.Input(shape=(1,)) y = mp_test_util.MultiplyLayer()(x) with policy.policy_scope("mixed_float16"): # Test optimizer is automatically wrapped with LSO model = models.Model(x, y) model.compile(gradient_descent.SGD(1.0), "mse") self.assertIsInstance( model.optimizer, loss_scale_optimizer.LossScaleOptimizer ) self.assertEqual( backend.get_value(model.optimizer.learning_rate), 1.0 ) # Test optimizer specified as string is automatically wrapped in LSO model = models.Model(x, y) model.compile("sgd", "mse") self.assertIsInstance( model.optimizer, loss_scale_optimizer.LossScaleOptimizer ) # Test if an LSO is passed, optimizer is not automatically wrapped with # another LSO model = models.Model(x, y) optimizer = loss_scale_optimizer.LossScaleOptimizer( gradient_descent.SGD(1.0), dynamic_growth_steps=2 ) model.compile(optimizer, "mse") self.assertIsInstance( model.optimizer, loss_scale_optimizer.LossScaleOptimizer ) self.assertEqual(model.optimizer.dynamic_growth_steps, 2) with policy.policy_scope("mixed_bfloat16"): # Test mixed_bfloat16 models are not automatically wrapped with LSO model = models.Model(x, y) model.compile(gradient_descent.SGD(1.0), "mse") self.assertNotIsInstance( model.optimizer, loss_scale_optimizer.LossScaleOptimizer ) self.assertIsInstance(model.optimizer, gradient_descent.SGD)
def test_save_weights_with_autocast_vars(self, strategy_fn, h5=False): with strategy_fn().scope(): with policy.policy_scope("mixed_float16"): x = layers.Input(shape=(1,), batch_size=2) layer = mp_test_util.MultiplyLayer(assert_type=tf.float16) y = layer(x) model = models.Model(inputs=x, outputs=y) model.set_weights([np.array(100.0)]) x = np.ones((2, 1)) self.assertAllClose(backend.get_value(model(x)), x * 100.0) suffix = ".h5" if h5 else "" weights_file = os.path.join(self.get_temp_dir(), "weights" + suffix) model.save_weights(weights_file) model.set_weights([np.array(200.0)]) self.assertAllClose(backend.get_value(model(x)), x * 200.0) model.load_weights(weights_file) self.assertAllClose(backend.get_value(model(x)), x * 100.0) self.assertEqual(model.get_weights(), [np.array(100.0)])
def test_mixed_policies_(self, strategy_fn): strategy = strategy_fn() for dtype in 'float16', 'bfloat16': x = tf.constant([1.]) policy_name = 'mixed_' + dtype with strategy.scope(), policy.policy_scope(policy_name): layer = mp_test_util.MultiplyLayer(assert_type=dtype) self.assertEqual(layer.dtype, tf.float32) self.assertEqual(layer.dtype_policy.name, policy_name) y = layer(x) self.assertEqual(layer.v.dtype, tf.float32) self.assertEqual(y.dtype, dtype) self.assertEqual(layer.dtype_policy.name, policy_name) self.assertIsInstance(layer.dtype_policy, policy.Policy) self.assertEqual(layer.compute_dtype, dtype) self.assertEqual(layer.dtype, tf.float32) self.assertEqual(layer.variable_dtype, tf.float32) self.assertEqual(layer.dtype_policy.name, policy_name) self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertEqual(self.evaluate(y), 1.)
def test_fixed_loss_scaling(self, strategy_fn): # Note: We do not test mixed precision in this method, only loss scaling. loss_scale = 8.0 batch_size = 4 with strategy_fn().scope(): x = layers.Input(shape=(1,), batch_size=batch_size) layer = mp_test_util.MultiplyLayer() y = layer(x) # The gradient of 'y' at this point is 1. With loss scaling, the gradient # is 'loss_scale'. We divide by the batch size since the loss is averaged # across batch elements. expected_gradient = loss_scale / batch_size identity_with_grad_check_fn = ( mp_test_util.create_identity_with_grad_check_fn( [expected_gradient] ) ) y = core.Lambda(identity_with_grad_check_fn)(y) model = models.Model(inputs=x, outputs=y) def loss_fn(y_true, y_pred): del y_true return tf.reduce_mean(y_pred) opt = gradient_descent.SGD(1.0) opt = loss_scale_optimizer.LossScaleOptimizer( opt, dynamic=False, initial_scale=loss_scale ) model.compile( opt, loss=loss_fn, run_eagerly=test_utils.should_run_eagerly() ) self.assertEqual(backend.eval(layer.v), 1) x = np.ones((batch_size, 1)) y = np.ones((batch_size, 1)) dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(batch_size) model.fit(dataset) # Variable starts at 1, and should have gradient of 1 subtracted from it. expected = 0 self.assertEqual(backend.eval(layer.v), expected)
def test_from_config_policy_v1(self, strategy_fn): # Test that layers serialized in previous Keras versions with the # now-deleted PolicyV1 can be deserialized. In such cases, the PolicyV1 will # be converted to a Policy, since PolicyV1 no longer exists. Unlike Policy, # PolicyV1 had a "loss_scale" field, which is silently dropped when # deserialized. x = tf.constant([1.0], dtype=tf.float16) with strategy_fn().scope(): layer = mp_test_util.MultiplyLayer(dtype="mixed_float16") config = layer.get_config() # Change the serialized dtype policy to a PolicyV1 config["dtype"] = { "class_name": "PolicyV1", "config": { "name": "mixed_float16", "loss_scale": None }, } layer = mp_test_util.MultiplyLayer.from_config(config) self.assertEqual(layer.dtype, "float32") self.assertEqual(layer(x).dtype, "float16") self.assertEqual(layer.v.dtype, "float32") config = layer.get_config() # The loss_scale is silently dropped self.assertEqual( config["dtype"], { "class_name": "Policy", "config": { "name": "mixed_float16" } }, ) layer = mp_test_util.MultiplyLayer(dtype="float64") config = layer.get_config() config["dtype"] = { "class_name": "PolicyV1", "config": { "name": "float64", "loss_scale": { "class_name": "FixedLossScale", "config": { "loss_scale_value": 2.0 }, }, }, } layer = mp_test_util.MultiplyLayer.from_config(config) self.assertEqual(layer.dtype, "float64") self.assertEqual(layer(x).dtype, "float64") self.assertEqual(layer.v.dtype, "float64") config = layer.get_config() self.assertEqual(config["dtype"], "float64") layer = mp_test_util.MultiplyLayer(dtype=policy.Policy("_infer")) config = layer.get_config() config["dtype"] = { "class_name": "PolicyV1", "config": { "name": "_infer", "loss_scale": { "class_name": "FixedLossScale", "config": { "loss_scale_value": 2.0 }, }, }, } layer = mp_test_util.MultiplyLayer.from_config(config) self.assertEqual(layer.dtype, None) self.assertEqual(layer(x).dtype, "float16") self.assertEqual(layer.v.dtype, "float16") self.assertEqual(type(layer.dtype_policy), policy.Policy) config = layer.get_config() self.assertEqual(config["dtype"], "float16")
def test_advanced_model(self, strategy_fn, use_loss_scaling=False): # The advanced model tests mixed-precision-related features that would occur # in a resnet50 model. It tests a model that has: # * Multiple layers, some which use auto-cast variables and some which do # not # * Regularization on some variables and not others. # * A fixed loss scale (if use_loss_scaling is True) strategy = strategy_fn() if use_loss_scaling: loss_scale = 8.0 learning_rate = 2**-14 with strategy.scope(): with policy.policy_scope(policy.Policy("mixed_float16")): x = layers.Input(shape=(1,), batch_size=2) layer1 = mp_test_util.MultiplyLayer( assert_type=tf.float16, regularizer=mp_test_util.IdentityRegularizer(), use_operator=True, ) layer2 = mp_test_util.MultiplyLayerWithoutAutoCast( assert_type=tf.float16, use_operator=True ) layer3 = mp_test_util.MultiplyLayer( assert_type=tf.float16, use_operator=False ) layer4 = mp_test_util.MultiplyLayerWithoutAutoCast( assert_type=tf.float16, regularizer=mp_test_util.IdentityRegularizer(), use_operator=False, ) y = layer1(x) y = layer2(y) y = layer3(y) y = layer4(y) if use_loss_scaling: # The gradient of 'y' at this point is 1. With loss scaling, the # gradient is 'loss_scale'. We divide by the batch size of 2 since the # loss is averaged across batch elements. expected_gradient = loss_scale / 2 identity_with_grad_check_fn = ( mp_test_util.create_identity_with_grad_check_fn( expected_dtype=tf.float16, expected_gradient=[expected_gradient], ) ) y = core.Lambda(identity_with_grad_check_fn)(y) model = models.Model(inputs=x, outputs=y) def loss_fn(y_true, y_pred): del y_true return tf.reduce_mean(y_pred) opt = gradient_descent.SGD(learning_rate) if use_loss_scaling: opt = loss_scale_optimizer.LossScaleOptimizer( opt, dynamic=False, initial_scale=loss_scale ) model.compile( opt, loss=loss_fn, run_eagerly=test_utils.should_run_eagerly(), ) x = np.ones((2, 1)) y = np.ones((2, 1)) dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(2) model.fit(dataset) for layer in (layer1, layer2, layer3, layer4): if layer.losses: # Layer has weight regularizer self.assertEqual(backend.eval(layer.v), 1 - 2 * learning_rate) else: # Layer does not have weight regularizer self.assertEqual(backend.eval(layer.v), 1 - learning_rate)
def test_model( self, strategy_fn, use_operator=False, use_regularizer=False, policy_name="mixed_float16", get_config=False, save_format=None, use_input_spec=False, ): self._skip_if_strategy_unsupported(strategy_fn) self._skip_if_save_format_unsupported(save_format) if use_regularizer: weight_regularizer = mp_test_util.IdentityRegularizer() activity_regularizer = mp_test_util.ReduceSumRegularizer() else: weight_regularizer = activity_regularizer = None with strategy_fn().scope(): with policy.policy_scope(policy_name): layer = mp_test_util.MultiplyLayer( assert_type=tf.float16, use_operator=use_operator, regularizer=weight_regularizer, activity_regularizer=activity_regularizer, input_shape=(1,), ) if use_input_spec: layer.input_spec = input_spec.InputSpec(shape=(None, 1)) model = test_utils.get_model_from_layers( [layer], input_shape=(1,), input_dtype=tf.float16 ) if get_config: config = model.get_config() model = model.__class__.from_config( config, custom_objects={ "MultiplyLayer": mp_test_util.MultiplyLayer }, ) (layer,) = ( layer for layer in model.layers if isinstance(layer, mp_test_util.MultiplyLayer) ) def loss_fn(y_true, y_pred): del y_true return tf.reduce_mean(y_pred) # Learning rate is small enough that if applied to a float16 variable, # the variable will not change. So this tests the learning rate not # applied to a float16 value, but instead the float32 variable. opt = gradient_descent.SGD(2**-14) # Use a fixed loss scale, as this test will fail if gradients are # skipped for a step due to dynamic loss scaling. opt = loss_scale_optimizer.LossScaleOptimizer( opt, dynamic=False, initial_scale=8 ) model.compile( opt, loss=loss_fn, run_eagerly=test_utils.should_run_eagerly(), ) x = np.ones((2, 1)) y = np.ones((2, 1)) dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(2) model.fit(dataset) # Variable starts at 1, and should have gradient of 2 ** -14 subtracted # from it. expected = 1 - 2**-14 if use_regularizer: # Weight and activity regularizer each add another 2 ** -14 to the # gradient. expected -= 2 * 2**-14 self.assertEqual(backend.eval(layer.v), expected) if save_format: with generic_utils.CustomObjectScope( { "MultiplyLayer": mp_test_util.MultiplyLayer, "loss_fn": loss_fn, } ): self._test_saving(model, dataset, save_format, use_regularizer)
def test_dynamic_loss_scaling(self, strategy_fn, get_config=False): strategy = strategy_fn() initial_loss_scale = 2.0 batch_size = 4 expected_gradient = backend.variable( [initial_loss_scale / batch_size], dtype=tf.float16 ) # If this variable is set to True, the model below will have NaN gradients have_nan_gradients = backend.variable(False, dtype=tf.bool) with strategy.scope(): opt = gradient_descent.SGD(1.0) opt = loss_scale_optimizer.LossScaleOptimizer( opt, initial_scale=initial_loss_scale, dynamic_growth_steps=2 ) with policy.policy_scope("mixed_float16"): x = layers.Input( shape=(1,), batch_size=batch_size, dtype=tf.float16 ) layer = mp_test_util.MultiplyLayer(assert_type=tf.float16) y = layer(x) identity_with_nan_grads = ( mp_test_util.create_identity_with_nan_gradients_fn( have_nan_gradients ) ) y = core.Lambda(identity_with_nan_grads)(y) identity_with_grad_check_fn = ( mp_test_util.create_identity_with_grad_check_fn( expected_dtype=tf.float16, expected_gradient=expected_gradient, ) ) y = core.Lambda(identity_with_grad_check_fn)(y) model = models.Model(inputs=x, outputs=y) if get_config: config = model.get_config() model = model.__class__.from_config( config, custom_objects={ "MultiplyLayer": mp_test_util.MultiplyLayer }, ) (layer,) = ( layer for layer in model.layers if isinstance(layer, mp_test_util.MultiplyLayer) ) def loss_fn(y_true, y_pred): del y_true return tf.reduce_mean(y_pred) model.compile( opt, loss=loss_fn, run_eagerly=test_utils.should_run_eagerly(), ) self.assertEqual(backend.eval(layer.v), 1) x = np.ones((batch_size, 1)) y = np.ones((batch_size, 1)) dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(batch_size) model.fit(dataset) # The variables starts with 1 and has a gradient of 1, so will go down by 1 # each step. self.assertEqual(backend.eval(layer.v), 0) model.fit(dataset) self.assertEqual(backend.eval(layer.v), -1) # There have been two steps without NaNs, so the loss scale will double backend.set_value( expected_gradient, backend.get_value(expected_gradient * 2) ) model.fit(dataset) self.assertEqual(backend.eval(layer.v), -2) # Next test with NaN gradients. backend.set_value(have_nan_gradients, True) model.fit(dataset) # Variable should not be updated self.assertEqual(backend.eval(layer.v), -2) # Test with finite gradients again backend.set_value(have_nan_gradients, False) # The loss scale will be halved due to the NaNs, so the gradient will also # be halved backend.set_value( expected_gradient, backend.get_value(expected_gradient / 2) ) model.fit(dataset) self.assertEqual(backend.eval(layer.v), -3)