def testSerializationWithBuiltInOptimizer(self, use_v1):
        opt = gradient_descent.SGD(2., momentum=0.5)
        if use_v1:
            loss_scale = tf_loss_scale_module.DynamicLossScale(
                initial_loss_scale=2., increment_period=3.)
            opt = loss_scale_optimizer.LossScaleOptimizerV1(opt, loss_scale)
        else:
            opt = loss_scale_optimizer.LossScaleOptimizer(
                opt, initial_scale=2., dynamic_growth_steps=3.)
        config = optimizers.serialize(opt)
        opt = optimizers.deserialize(config)
        # Force hyperparameters to be created
        opt.lr  # pylint: disable=pointless-statement
        self.evaluate(variables.global_variables_initializer())

        self.assertEqual(self.evaluate(opt.lr), 2.)
        self.assertEqual(self.evaluate(opt.inner_optimizer.momentum), 0.5)
        self.assertEqual(self.evaluate(opt.loss_scale), 2.)
        self.assertEqual(opt.dynamic_growth_steps, 3.)
        self.assertTrue(opt.dynamic, 4.)
        # Deserializing a LossScaleOptimizer always always results in a V2
        # LossScaleOptimizer, even if serialized with a LossScaleOptimizerV1.
        self.assertAllEqual(type(opt), loss_scale_optimizer.LossScaleOptimizer)

        # Ensure the optimizer can be used
        var = variables.Variable([5.0])
        run_op = self._run_fn_with_grad_check(
            distribution_strategy_context.get_strategy(), var, opt, 2)()
        self.evaluate(variables.global_variables_initializer())
        self._run_if_in_graph_mode(run_op)
        self.assertEqual(self.evaluate(var), [3.])
        self.assertEqual(self.evaluate(opt.dynamic_counter), 1)
示例#2
0
  def testGetConfigFixed(self, get_config, from_config):
    # Get a config from LossScaleOptimizerV1, LossScaleOptimizer, or the
    # LossScaleOptimizer from TF 2.3. Then restore the config into a
    # LossScaleOptimizerV1 or LossScaleOptimizer
    opt = gradient_descent.SGD(2., momentum=0.5)
    if get_config == 'v1':
      opt = loss_scale_optimizer.LossScaleOptimizerV1(opt, 2)
      config = opt.get_config()
    elif get_config == 'v2':
      opt = loss_scale_optimizer.LossScaleOptimizer(
          opt, dynamic=False, initial_scale=2)
      config = opt.get_config()
    else:
      self.assertEqual(get_config, 'tf2_3')
      config = {
          'optimizer': {
              'class_name': 'SGD',
              'config': {
                  'learning_rate': 2.0,
                  'momentum': 0.5,
                  'decay': 0.0,
                  'nesterov': False,
                  'name': 'SGD',
              }
          },
          'loss_scale': {
              'class_name': 'FixedLossScale',
              'config': {'loss_scale_value': 2.0}
          },
      }

    if from_config == 'v1':
      opt = loss_scale_optimizer.LossScaleOptimizerV1.from_config(config)
    else:
      self.assertEqual(from_config, 'v2')
      opt = loss_scale_optimizer.LossScaleOptimizer.from_config(config)

    # Force hyperparameters to be created
    opt.lr  # pylint: disable=pointless-statement
    self.evaluate(variables.global_variables_initializer())

    # Test attributes on the optimizer
    self.assertEqual(self.evaluate(opt.lr), 2.)
    self.assertEqual(self.evaluate(opt.inner_optimizer.lr), 2.)
    self.assertEqual(self.evaluate(opt.momentum), 0.5)
    self.assertEqual(self.evaluate(opt.loss_scale), 2.)
    self.assertEqual(opt.initial_scale, 2.)
    self.assertIsNone(opt.dynamic_growth_steps)
    self.assertIsNone(opt.dynamic_counter)
    self.assertFalse(opt.dynamic)

    # Ensure the optimizer can be used
    var = variables.Variable([5.0])
    run_op = self._run_fn_with_grad_check(
        distribution_strategy_context.get_strategy(), var, opt, 2)()
    self.evaluate(variables.global_variables_initializer())
    self._run_if_in_graph_mode(run_op)
    self.assertEqual(self.evaluate(var), [3.])
 def test_optimizer_errors(self):
     opt = gradient_descent_v2.SGD(1.0)
     opt = loss_scale_optimizer_v2.LossScaleOptimizerV1(opt, 'dynamic')
     with self.assertRaisesRegex(
             ValueError, '"opt" must not already be an instance of a '
             'LossScaleOptimizer.'):
         enable_mixed_precision_graph_rewrite(opt)
     self.assertFalse(config.get_optimizer_experimental_options().get(
         'auto_mixed_precision', False))
    def testPassingV1LossScale(self, strategy_fn):
        strategy = strategy_fn()
        learning_rate = 2.
        with strategy.scope():
            # Test FixedLossScale
            var = variables.Variable([5.0])
            opt = gradient_descent.SGD(learning_rate)
            loss_scale = tf_loss_scale_module.FixedLossScale(2.)
            opt = loss_scale_optimizer.LossScaleOptimizerV1(opt, loss_scale)
            self.assertIsInstance(opt.loss_scale, ops.Tensor)
            self.evaluate(variables.global_variables_initializer())
            self.assertEqual(self.evaluate(opt.loss_scale), 2)
            run_fn = self._run_fn_with_grad_check(
                strategy, var, opt, 2 / strategy.num_replicas_in_sync)
            run_op = strategy.experimental_run(run_fn)
            self.evaluate(variables.global_variables_initializer())
            self._run_if_in_graph_mode(run_op)
            # The loss is the identity of the variable. Therefore the gradient is 1,
            # and so the variable will be init_val - grad * lr == 5 - 1 * 2 == 3
            self.assertAllClose([3.], self.evaluate(var))

            # Test DynamicLossScale
            var = variables.Variable([5.0])
            opt = gradient_descent.SGD(learning_rate)
            loss_scale = tf_loss_scale_module.DynamicLossScale(
                initial_loss_scale=4, increment_period=1, multiplier=2)
            loss_scale._current_loss_scale.assign(2)
            opt = loss_scale_optimizer.LossScaleOptimizerV1(opt, loss_scale)
            self.assertEqual(opt.initial_scale, 4)
            self.assertEqual(opt.dynamic_growth_steps, 1)
            self.evaluate(variables.global_variables_initializer())
            # Current loss scale is not copied so loss scale is reinitialized to 4
            self.assertEqual(self.evaluate(opt.loss_scale), 4)
            for s in strategy.experimental_local_results(opt.dynamic_counter):
                self.assertEqual(self.evaluate(s), 0)

            run_fn = self._run_fn_with_grad_check(
                strategy, var, opt, 4 / strategy.num_replicas_in_sync)
            run_op = strategy.experimental_run(run_fn)
            self.evaluate(variables.global_variables_initializer())
            self._run_if_in_graph_mode(run_op)
            self.assertAllClose([3.], self.evaluate(var))
    def testV1Optimizer(self, strategy_fn):
        strategy = strategy_fn()
        learning_rate = 2.
        with strategy.scope():
            # Test FixedLossScale
            var = variables.Variable([5.0])
            opt = gradient_descent.SGD(learning_rate)
            opt = loss_scale_optimizer.LossScaleOptimizerV1(opt, loss_scale=2)
            self.assertIsInstance(opt.loss_scale, ops.Tensor)
            self.evaluate(variables.global_variables_initializer())
            self.assertEqual(self.evaluate(opt.loss_scale), 2)
            self.assertEqual(opt.initial_scale, 2)
            self.assertIsNone(opt.dynamic_growth_steps)
            run_fn = self._run_fn_with_grad_check(
                strategy, var, opt, 2 / strategy.num_replicas_in_sync)
            run_op = strategy.experimental_run(run_fn)
            self.evaluate(variables.global_variables_initializer())
            self._run_if_in_graph_mode(run_op)
            # The loss is the identity of the variable. Therefore the gradient is 1,
            # and so the variable will be init_val - grad * lr == 5 - 1 * 2 == 3
            self.assertAllClose([3.], self.evaluate(var))

            # Test DynamicLossScale
            var = variables.Variable([5.0])
            opt = gradient_descent.SGD(learning_rate)
            opt = loss_scale_optimizer.LossScaleOptimizerV1(opt, 'dynamic')
            self.assertEqual(opt.initial_scale, 2**15)
            self.assertEqual(opt.dynamic_growth_steps, 2000)
            self.evaluate(variables.global_variables_initializer())
            self.assertEqual(self.evaluate(opt.loss_scale), 2**15)
            for s in strategy.experimental_local_results(opt.dynamic_counter):
                self.assertEqual(self.evaluate(s), 0)

            loss = lambda: var * float('NaN')
            run_fn = lambda: opt.minimize(loss, var_list=[var])
            run_op = strategy.experimental_run(run_fn)
            self.evaluate(variables.global_variables_initializer())
            self._run_if_in_graph_mode(run_op)
            self.assertAllClose([5.], self.evaluate(var))
            self.assertEqual(self.evaluate(opt.loss_scale), 2**14)
            for s in strategy.experimental_local_results(opt.dynamic_counter):
                self.assertEqual(self.evaluate(s), 0)
    def testPassingV1LossScaleErrors(self):
        opt = gradient_descent.SGD()
        loss_scale = tf_loss_scale_module.DynamicLossScale(multiplier=4)
        with self.assertRaisesRegex(
                ValueError, 'When passing a DynamicLossScale to "loss_scale", '
                'DynamicLossScale.multiplier must be 2. Got: '
                'DynamicLossScale'):
            loss_scale_optimizer.LossScaleOptimizerV1(opt, loss_scale)

        class MyLossScale(tf_loss_scale_module.LossScale):
            def __call__(self):
                return 1.

            def update(self, grads):
                return None, True

            def get_config(self):
                return {}

        with self.assertRaisesRegex(
                TypeError,
                'Passing a LossScale that is not a FixedLossScale or a '
                'DynamicLossScale is no longer supported. Got:'):
            loss_scale_optimizer.LossScaleOptimizerV1(opt, MyLossScale())
示例#7
0
    def test_save_model_with_dynamic_loss_scaling(
            self, strategy_fn, h5=False, use_v1_loss_scale_optimizer=False):
        # TODO(reedwm): Support and test saving model with a mixed_[b]float16 policy
        # as well.
        strategy = strategy_fn()
        if (isinstance(strategy, mirrored_strategy.MirroredStrategy)
                and not context.executing_eagerly()):
            # TODO(b/121381184): Enable running the test in this case.
            return

        # Create and run model.
        with strategy.scope():
            x = layers.Input(shape=(2, ), batch_size=2, dtype=dtypes.float32)
            y = mp_test_util.MultiplyLayer()(x)
            model = models.Model(inputs=x, outputs=y)

            opt = gradient_descent.SGD(1.)
            if use_v1_loss_scale_optimizer:
                loss_scale = loss_scale_module.DynamicLossScale(
                    initial_loss_scale=1., increment_period=2.)
                opt = loss_scale_optimizer.LossScaleOptimizerV1(
                    opt, loss_scale)
            else:
                opt = loss_scale_optimizer.LossScaleOptimizer(
                    opt, initial_scale=1., dynamic_growth_steps=2.)
            model.compile(optimizer=opt,
                          loss='mse',
                          run_eagerly=testing_utils.should_run_eagerly())
        # Run for 3 steps (6 examples with a batch size of 2)
        model.fit(np.ones((6, 2)), np.zeros((6, 2)), batch_size=2)
        self.assertEqual(backend.get_value(opt.loss_scale), 2)
        self.assertEqual(backend.get_value(opt.dynamic_counter), 1)
        (weight, ) = model.trainable_weights
        orig_weight = backend.get_value(weight)

        # Save model weights.
        save_path = os.path.join(self.get_temp_dir(), 'model')
        model.save(save_path, save_format='h5' if h5 else 'tf')

        # Run model again for 1 step (2 examples with a batch size of 2)
        model.fit(np.ones((2, 2)), np.zeros((2, 2)), batch_size=2)
        new_weight = backend.get_value(weight)
        self.assertNotEqual(new_weight, orig_weight)
        self.assertEqual(backend.get_value(opt.loss_scale), 4)
        self.assertEqual(backend.get_value(opt.dynamic_counter), 0)

        # Load model weights and ensure loss scale weights are restored.
        model = save.load_model(
            save_path,
            custom_objects={'MultiplyLayer': mp_test_util.MultiplyLayer})
        (weight, ) = model.trainable_weights
        loaded_weight = backend.get_value(weight)
        self.assertEqual(loaded_weight, orig_weight)
        # Currently the loss scale isn't always saved when the model is saved with
        # Model.save(). So we assert the loss scale either has the value when it was
        # saved, or the value it was initialized with.
        # TODO(reedwm): Always save/restore the loss scale with Model.save().
        self.assertIn(backend.get_value(model.optimizer.loss_scale), (1, 2))
        self.assertIn(backend.get_value(model.optimizer.dynamic_counter),
                      (0, 1))

        # Test optimizer attributes and type
        self.assertEqual(model.optimizer.initial_scale, 1.)
        self.assertEqual(model.optimizer.dynamic_growth_steps, 2.)
        self.assertEqual(type(model.optimizer),
                         loss_scale_optimizer.LossScaleOptimizer)
示例#8
0
    def test_dynamic_loss_scaling(self,
                                  strategy_fn,
                                  pass_loss_scale_to_policy=False,
                                  get_config=False,
                                  use_v1_loss_scale_optimizer=False):
        strategy = strategy_fn()
        initial_loss_scale = 2.
        batch_size = 4
        expected_gradient = backend.variable([initial_loss_scale / batch_size],
                                             dtype=dtypes.float16)
        # If this variable is set to True, the model below will have NaN gradients
        have_nan_gradients = backend.variable(False, dtype=dtypes.bool)
        with strategy.scope():
            opt = gradient_descent.SGD(1.)
            if pass_loss_scale_to_policy:
                loss_scale = loss_scale_module.DynamicLossScale(
                    initial_loss_scale=initial_loss_scale, increment_period=2)
                p = policy.PolicyV1('mixed_float16', loss_scale=loss_scale)
            elif use_v1_loss_scale_optimizer:
                loss_scale = loss_scale_module.DynamicLossScale(
                    initial_loss_scale=initial_loss_scale, increment_period=2)
                p = policy.Policy('mixed_float16')
                opt = loss_scale_optimizer.LossScaleOptimizerV1(
                    opt, loss_scale)
            else:
                p = policy.Policy('mixed_float16')
                opt = loss_scale_optimizer.LossScaleOptimizer(
                    opt,
                    initial_scale=initial_loss_scale,
                    dynamic_growth_steps=2)
            with policy.policy_scope(p):
                x = layers.Input(shape=(1, ),
                                 batch_size=batch_size,
                                 dtype=dtypes.float16)
                layer = mp_test_util.MultiplyLayer(assert_type=dtypes.float16)
                y = layer(x)
                identity_with_nan_grads = (
                    mp_test_util.create_identity_with_nan_gradients_fn(
                        have_nan_gradients))
                y = core.Lambda(identity_with_nan_grads)(y)
                identity_with_grad_check_fn = (
                    mp_test_util.create_identity_with_grad_check_fn(
                        expected_dtype=dtypes.float16,
                        expected_gradient=expected_gradient))
                y = core.Lambda(identity_with_grad_check_fn)(y)
                model = models.Model(inputs=x, outputs=y)
                if get_config:
                    config = model.get_config()
                    model = model.__class__.from_config(
                        config,
                        custom_objects={
                            'MultiplyLayer': mp_test_util.MultiplyLayer
                        })
                    (layer, ) = (
                        layer for layer in model.layers
                        if isinstance(layer, mp_test_util.MultiplyLayer))

                def loss_fn(y_true, y_pred):
                    del y_true
                    return math_ops.reduce_mean(y_pred)

                model.compile(opt,
                              loss=loss_fn,
                              run_eagerly=testing_utils.should_run_eagerly())

        self.assertEqual(backend.eval(layer.v), 1)
        x = np.ones((batch_size, 1))
        y = np.ones((batch_size, 1))
        dataset = dataset_ops.Dataset.from_tensor_slices(
            (x, y)).batch(batch_size)
        model.fit(dataset)
        # The variables starts with 1 and has a gradient of 1, so will go down by 1
        # each step.
        self.assertEqual(backend.eval(layer.v), 0)

        model.fit(dataset)
        self.assertEqual(backend.eval(layer.v), -1)

        # There have been two steps without NaNs, so the loss scale will double
        backend.set_value(expected_gradient,
                          backend.get_value(expected_gradient * 2))
        model.fit(dataset)
        self.assertEqual(backend.eval(layer.v), -2)

        # Next test with NaN gradients.
        backend.set_value(have_nan_gradients, True)
        model.fit(dataset)
        # Variable should not be updated
        self.assertEqual(backend.eval(layer.v), -2)

        # Test with finite gradients again
        backend.set_value(have_nan_gradients, False)
        # The loss scale will be halved due to the NaNs, so the gradient will also
        # be halved
        backend.set_value(expected_gradient,
                          backend.get_value(expected_gradient / 2))
        model.fit(dataset)
        self.assertEqual(backend.eval(layer.v), -3)