Exemplo n.º 1
0
    def test_layer_regularizer_runs_in_var_dtype(self, strategy_fn):
        x = constant_op.constant([1.])
        with strategy_fn().scope():
            with policy.policy_scope('mixed_float16'):
                # Test on MultiplyLayer
                layer = mp_test_util.MultiplyLayer(
                    assert_type=dtypes.float16,
                    regularizer=mp_test_util.IdentityRegularizer())
                layer(x)
                (regularizer_loss, ) = layer.losses
                self.assertEqual(regularizer_loss.dtype, dtypes.float32)
                self.evaluate(variables.global_variables_initializer())
                self.assertEqual(self.evaluate(regularizer_loss), 1.)

                # Test on MultiplyLayerWithoutAutoCast
                layer = MultiplyLayerWithoutAutoCast(
                    assert_type=dtypes.float16,
                    regularizer=mp_test_util.IdentityRegularizer())
                layer(x)
                (regularizer_loss, ) = layer.losses
                self.assertEqual(regularizer_loss.dtype, dtypes.float32)
                self.evaluate(variables.global_variables_initializer())
                self.assertEqual(self.evaluate(regularizer_loss), 1.)
Exemplo n.º 2
0
    def test_advanced_model(self, strategy_fn, use_loss_scaling=False):
        # The advanced model tests mixed-precision-related features that would occur
        # in a resnet50 model. It tests a model that has:
        #  * Multiple layers, some which use auto-cast variables and some which do
        #    not
        #  * Regularization on some variables and not others.
        #  * A fixed loss scale (if use_loss_scaling is True)

        strategy = strategy_fn()
        if use_loss_scaling:
            loss_scale = 8.
        else:
            loss_scale = None
        learning_rate = 2**-14

        with strategy.scope():
            with policy.policy_scope(
                    policy.Policy('mixed_float16', loss_scale=loss_scale)):
                x = layers.Input(shape=(1, ), batch_size=2)
                layer1 = mp_test_util.MultiplyLayer(
                    assert_type=dtypes.float16,
                    regularizer=mp_test_util.IdentityRegularizer(),
                    use_operator=True)
                layer2 = MultiplyLayerWithoutAutoCast(
                    assert_type=dtypes.float16, use_operator=True)
                layer3 = mp_test_util.MultiplyLayer(assert_type=dtypes.float16,
                                                    use_operator=False)
                layer4 = MultiplyLayerWithoutAutoCast(
                    assert_type=dtypes.float16,
                    regularizer=mp_test_util.IdentityRegularizer(),
                    use_operator=False)
                y = layer1(x)
                y = layer2(y)
                y = layer3(y)
                y = layer4(y)
                if use_loss_scaling:
                    # The gradient of 'y' at this point is 1. With loss scaling, the
                    # gradient is 'loss_scale'. We divide by the batch size of 2 since the
                    # loss is averaged across batch elements.
                    expected_gradient = loss_scale / 2
                    identity_with_grad_check_fn = (
                        mp_test_util.create_identity_with_grad_check_fn(
                            expected_dtype=dtypes.float16,
                            expected_gradient=[expected_gradient]))
                    y = core.Lambda(identity_with_grad_check_fn)(y)
                model = models.Model(inputs=x, outputs=y)

                def loss_fn(y_true, y_pred):
                    del y_true
                    return math_ops.reduce_mean(y_pred)

                opt = gradient_descent.SGD(learning_rate)
                model.compile(opt,
                              loss=loss_fn,
                              run_eagerly=testing_utils.should_run_eagerly())

        x = np.ones((2, 1))
        y = np.ones((2, 1))
        dataset = dataset_ops.Dataset.from_tensor_slices((x, y)).batch(2)
        model.fit(dataset)
        for layer in (layer1, layer2, layer3, layer4):
            if layer.losses:
                # Layer has weight regularizer
                self.assertEqual(backend.eval(layer.v), 1 - 2 * learning_rate)
            else:
                # Layer does not have weight regularizer
                self.assertEqual(backend.eval(layer.v), 1 - learning_rate)
Exemplo n.º 3
0
    def test_model(self,
                   strategy_fn,
                   use_operator=False,
                   use_regularizer=False,
                   policy_name='mixed_float16',
                   get_config=False,
                   save_format=None,
                   use_input_spec=False):
        self._skip_if_strategy_unsupported(strategy_fn)
        self._skip_if_save_format_unsupported(save_format)
        regularizer = (mp_test_util.IdentityRegularizer()
                       if use_regularizer else None)
        with strategy_fn().scope():
            # Pass loss_scale=None, as this test will fail if the DynamicLossScale
            # skips applying gradients for a step
            with policy.policy_scope(
                    policy.Policy(policy_name, loss_scale=None)):
                layer = mp_test_util.MultiplyLayer(assert_type=dtypes.float16,
                                                   use_operator=use_operator,
                                                   regularizer=regularizer,
                                                   input_shape=(1, ))
                if use_input_spec:
                    layer.input_spec = input_spec.InputSpec(shape=(2, 1))
                model = testing_utils.get_model_from_layers(
                    [layer], input_shape=(1, ), input_dtype=dtypes.float16)
                if get_config:
                    config = model.get_config()
                    model = model.__class__.from_config(
                        config,
                        custom_objects={
                            'MultiplyLayer': mp_test_util.MultiplyLayer
                        })
                    (layer, ) = (
                        layer for layer in model.layers
                        if isinstance(layer, mp_test_util.MultiplyLayer))

                def loss_fn(y_true, y_pred):
                    del y_true
                    return math_ops.reduce_mean(y_pred)

                # Learning rate is small enough that if applied to a float16 variable,
                # the variable will not change. So this tests the learning rate not
                # applied to a float16 value, but instead the float32 variable.
                opt = gradient_descent.SGD(2**-14)
                model.compile(opt,
                              loss=loss_fn,
                              run_eagerly=testing_utils.should_run_eagerly())

        x = np.ones((2, 1))
        y = np.ones((2, 1))
        dataset = dataset_ops.Dataset.from_tensor_slices((x, y)).batch(2)
        model.fit(dataset)
        # Variable starts at 1, and should have gradient of 2 ** -14 subtracted
        # from it.
        expected = 1 - 2**-14
        if use_regularizer:
            # Regularizer adds another 2 ** -14 to the gradient.
            expected -= 2**-14
        self.assertEqual(backend.eval(layer.v), expected)

        if save_format:
            with generic_utils.CustomObjectScope({
                    'MultiplyLayer':
                    mp_test_util.MultiplyLayer,
                    'loss_fn':
                    loss_fn
            }):
                self._test_saving(model, dataset, save_format, use_regularizer)