def test_layer_regularizer_runs_in_var_dtype(self, strategy_fn): x = constant_op.constant([1.]) with strategy_fn().scope(): with policy.policy_scope('mixed_float16'): # Test on MultiplyLayer layer = mp_test_util.MultiplyLayer( assert_type=dtypes.float16, regularizer=mp_test_util.IdentityRegularizer()) layer(x) (regularizer_loss, ) = layer.losses self.assertEqual(regularizer_loss.dtype, dtypes.float32) self.evaluate(variables.global_variables_initializer()) self.assertEqual(self.evaluate(regularizer_loss), 1.) # Test on MultiplyLayerWithoutAutoCast layer = MultiplyLayerWithoutAutoCast( assert_type=dtypes.float16, regularizer=mp_test_util.IdentityRegularizer()) layer(x) (regularizer_loss, ) = layer.losses self.assertEqual(regularizer_loss.dtype, dtypes.float32) self.evaluate(variables.global_variables_initializer()) self.assertEqual(self.evaluate(regularizer_loss), 1.)
def test_advanced_model(self, strategy_fn, use_loss_scaling=False): # The advanced model tests mixed-precision-related features that would occur # in a resnet50 model. It tests a model that has: # * Multiple layers, some which use auto-cast variables and some which do # not # * Regularization on some variables and not others. # * A fixed loss scale (if use_loss_scaling is True) strategy = strategy_fn() if use_loss_scaling: loss_scale = 8. else: loss_scale = None learning_rate = 2**-14 with strategy.scope(): with policy.policy_scope( policy.Policy('mixed_float16', loss_scale=loss_scale)): x = layers.Input(shape=(1, ), batch_size=2) layer1 = mp_test_util.MultiplyLayer( assert_type=dtypes.float16, regularizer=mp_test_util.IdentityRegularizer(), use_operator=True) layer2 = MultiplyLayerWithoutAutoCast( assert_type=dtypes.float16, use_operator=True) layer3 = mp_test_util.MultiplyLayer(assert_type=dtypes.float16, use_operator=False) layer4 = MultiplyLayerWithoutAutoCast( assert_type=dtypes.float16, regularizer=mp_test_util.IdentityRegularizer(), use_operator=False) y = layer1(x) y = layer2(y) y = layer3(y) y = layer4(y) if use_loss_scaling: # The gradient of 'y' at this point is 1. With loss scaling, the # gradient is 'loss_scale'. We divide by the batch size of 2 since the # loss is averaged across batch elements. expected_gradient = loss_scale / 2 identity_with_grad_check_fn = ( mp_test_util.create_identity_with_grad_check_fn( expected_dtype=dtypes.float16, expected_gradient=[expected_gradient])) y = core.Lambda(identity_with_grad_check_fn)(y) model = models.Model(inputs=x, outputs=y) def loss_fn(y_true, y_pred): del y_true return math_ops.reduce_mean(y_pred) opt = gradient_descent.SGD(learning_rate) model.compile(opt, loss=loss_fn, run_eagerly=testing_utils.should_run_eagerly()) x = np.ones((2, 1)) y = np.ones((2, 1)) dataset = dataset_ops.Dataset.from_tensor_slices((x, y)).batch(2) model.fit(dataset) for layer in (layer1, layer2, layer3, layer4): if layer.losses: # Layer has weight regularizer self.assertEqual(backend.eval(layer.v), 1 - 2 * learning_rate) else: # Layer does not have weight regularizer self.assertEqual(backend.eval(layer.v), 1 - learning_rate)
def test_model(self, strategy_fn, use_operator=False, use_regularizer=False, policy_name='mixed_float16', get_config=False, save_format=None, use_input_spec=False): self._skip_if_strategy_unsupported(strategy_fn) self._skip_if_save_format_unsupported(save_format) regularizer = (mp_test_util.IdentityRegularizer() if use_regularizer else None) with strategy_fn().scope(): # Pass loss_scale=None, as this test will fail if the DynamicLossScale # skips applying gradients for a step with policy.policy_scope( policy.Policy(policy_name, loss_scale=None)): layer = mp_test_util.MultiplyLayer(assert_type=dtypes.float16, use_operator=use_operator, regularizer=regularizer, input_shape=(1, )) if use_input_spec: layer.input_spec = input_spec.InputSpec(shape=(2, 1)) model = testing_utils.get_model_from_layers( [layer], input_shape=(1, ), input_dtype=dtypes.float16) if get_config: config = model.get_config() model = model.__class__.from_config( config, custom_objects={ 'MultiplyLayer': mp_test_util.MultiplyLayer }) (layer, ) = ( layer for layer in model.layers if isinstance(layer, mp_test_util.MultiplyLayer)) def loss_fn(y_true, y_pred): del y_true return math_ops.reduce_mean(y_pred) # Learning rate is small enough that if applied to a float16 variable, # the variable will not change. So this tests the learning rate not # applied to a float16 value, but instead the float32 variable. opt = gradient_descent.SGD(2**-14) model.compile(opt, loss=loss_fn, run_eagerly=testing_utils.should_run_eagerly()) x = np.ones((2, 1)) y = np.ones((2, 1)) dataset = dataset_ops.Dataset.from_tensor_slices((x, y)).batch(2) model.fit(dataset) # Variable starts at 1, and should have gradient of 2 ** -14 subtracted # from it. expected = 1 - 2**-14 if use_regularizer: # Regularizer adds another 2 ** -14 to the gradient. expected -= 2**-14 self.assertEqual(backend.eval(layer.v), expected) if save_format: with generic_utils.CustomObjectScope({ 'MultiplyLayer': mp_test_util.MultiplyLayer, 'loss_fn': loss_fn }): self._test_saving(model, dataset, save_format, use_regularizer)