Пример #1
0
    def testDynamicLossScaleWithSlots(self, strategy_fn):
        with strategy_fn().scope() as strategy:
            var = variables.Variable([1.0, 2.0])
            # An SGD optimizer with momentum has slot variables.
            opt = momentum.MomentumOptimizer(1.0, momentum=1.)
            initial_loss_scale = 2.
            loss_scale = loss_scale_module.DynamicLossScale(
                initial_loss_scale=initial_loss_scale,
                increment_period=1,
                multiplier=4)
            opt = loss_scale_optimizer.MixedPrecisionLossScaleOptimizer(
                opt, loss_scale)
            loss = lambda: var / strategy.num_replicas_in_sync
            run_fn = lambda: opt.minimize(loss, var_list=[var])
            run_op = strategy.experimental_run(run_fn)
            self.evaluate(variables.global_variables_initializer())
            self._run_if_in_graph_mode(run_op)
            # The momentum accumulator starts at 0 and the gradient is 1. The
            # accumulator is incremented by the gradient, so it is now 1. Then the
            # variable is subtracted by the accumulator, so the variable is subtracted
            # by 1.
            self.assertAllClose([0.0, 1.0], self.evaluate(var))
            self.assertEqual(self.evaluate(opt._loss_scale()),
                             initial_loss_scale * 4)

            run_op = strategy.experimental_run(run_fn)
            self._run_if_in_graph_mode(run_op)
            # The momentum accumulator was 1 before this step and the gradient is 1.
            # The accumulator is incremented by the gradient, so it is now 2. Then the
            # variable is subtracted by the accumulator, so the variable is subtracted
            # by 2.
            self.assertAllClose([-2., -1.], self.evaluate(var))
            self.assertEqual(self.evaluate(opt._loss_scale()),
                             initial_loss_scale * 16)
Пример #2
0
  def test_optimizer_errors(self):
    opt = 1
    if tf2.enabled():
      expected_regex = ('"opt" must be an instance of a '
                        'tf.keras.optimizers.Optimizer, but got')
    else:
      expected_regex = ('"opt" must be an instance of a tf.train.Optimizer or '
                        'a tf.keras.optimizers.Optimizer, but got')
    with self.assertRaisesRegexp(ValueError, expected_regex):
      enable_mixed_precision_graph_rewrite(opt)
    self.assertFalse(config.get_optimizer_experimental_options()
                     .get('auto_mixed_precision', False))

    opt = gradient_descent_v1.GradientDescentOptimizer(1.0)
    opt = loss_scale_optimizer_v1.MixedPrecisionLossScaleOptimizer(opt,
                                                                   'dynamic')
    with self.assertRaisesRegexp(ValueError,
                                 '"opt" must not already be an instance of a '
                                 'MixedPrecisionLossScaleOptimizer.'):
      enable_mixed_precision_graph_rewrite(opt)
    self.assertFalse(config.get_optimizer_experimental_options()
                     .get('auto_mixed_precision', False))

    opt = gradient_descent_v2.SGD(1.0)
    opt = loss_scale_optimizer_v2.LossScaleOptimizer(opt, 'dynamic')
    with self.assertRaisesRegexp(ValueError,
                                 '"opt" must not already be an instance of a '
                                 'LossScaleOptimizer.'):
      enable_mixed_precision_graph_rewrite(opt)
    self.assertFalse(config.get_optimizer_experimental_options()
                     .get('auto_mixed_precision', False))
Пример #3
0
    def testDynamicUpdate(self, strategy_fn):
        with strategy_fn().scope() as strategy:
            var = variables.Variable([1.0, 2.0])
            opt = gradient_descent.GradientDescentOptimizer(1.0)
            loss_scale = loss_scale_module.DynamicLossScale(
                initial_loss_scale=2, increment_period=1, multiplier=2)
            opt = loss_scale_optimizer.MixedPrecisionLossScaleOptimizer(
                opt, loss_scale)

            # Test optimizer with finite gradients
            loss = lambda: var * 2.0 / strategy.num_replicas_in_sync
            run_fn = lambda: opt.minimize(loss, var_list=[var])
            run_op = strategy.experimental_run(run_fn)
            self.evaluate(variables.global_variables_initializer())
            self._run_if_in_graph_mode(run_op)
            # Gradient is 2, so variable will have 2 subtracted from it
            self.assertAllClose([-1.0, 0.0], self.evaluate(var))
            # Loss scale has doubled from 2 to 4
            self.assertEqual(4., self.evaluate(opt._loss_scale()))

            # Test optimizer with NaN gradients
            loss = lambda: var * float('NaN')
            run_fn = lambda: opt.minimize(loss, var_list=[var])
            run_op = strategy.experimental_run(run_fn)
            self._run_if_in_graph_mode(run_op)
            # Variable should not change from before, due to NaN gradients.
            self.assertAllClose(self.evaluate(var), [-1.0, 0.0])
            # Loss scale should half due to NaN gradients.
            self.assertEqual(2., self.evaluate(opt._loss_scale()))
Пример #4
0
    def testDynamicLossScale(self, strategy_fn):
        strategy = strategy_fn()
        learning_rate = 2.
        expected_gradient = resource_variable_ops.ResourceVariable(
            learning_rate / strategy.num_replicas_in_sync)
        with strategy.scope():
            var = variables.Variable([5.0])
            opt = gradient_descent.GradientDescentOptimizer(learning_rate)
            loss_scale = loss_scale_module.DynamicLossScale(
                initial_loss_scale=2, increment_period=1, multiplier=2)
            opt = loss_scale_optimizer.MixedPrecisionLossScaleOptimizer(
                opt, loss_scale)
            self.assertEqual(
                loss_scale.initial_loss_scale % strategy.num_replicas_in_sync,
                0)

            run_fn = self._run_fn_with_grad_check(strategy, var, opt,
                                                  expected_gradient)
            run_op = strategy.experimental_run(run_fn)
            self.evaluate(variables.global_variables_initializer())
            self._run_if_in_graph_mode(run_op)
            # The loss is the identity of the variable. Therefore the gradient is 1,
            # and so the variable will be init_val - grad * lr == 5 - 1 * 2 == 3
            self.assertAllClose([3.], self.evaluate(var))

            # Loss scale will be double, so the expected gradient is also doubled.
            self.evaluate(
                expected_gradient.assign(2 * learning_rate /
                                         strategy.num_replicas_in_sync))
            run_op = strategy.experimental_run(run_fn)
            self._run_if_in_graph_mode(run_op)
            # As before, the 2 is subtracted from the variable, making it's new value
            # 1.
            self.assertAllClose([1.], self.evaluate(var))
Пример #5
0
 def testFixedLossScaleAppliedToLossWithGetGradients(self):
   var = variables.Variable([2.0])
   opt = gradient_descent.GradientDescentOptimizer(1.0)
   loss_scale = 10.
   opt = loss_scale_optimizer.MixedPrecisionLossScaleOptimizer(opt, loss_scale)
   grad_check_fn = create_identity_with_grad_check_fn(loss_scale)
   loss = grad_check_fn(var)
   run_op = get_gradients(opt, loss, [var])
   self.evaluate(variables.global_variables_initializer())
   # This will cause an assertion to run, as
   # create_identity_with_grad_check_fn added an assertion op.
   self.evaluate(run_op)
Пример #6
0
def _wrap_optimizer(opt, loss_scale, use_v1_behavior):
    """Wraps an optimizer with a LossScaleOptimizer."""

    if isinstance(opt,
                  loss_scale_optimizer_v1.MixedPrecisionLossScaleOptimizer):
        raise ValueError('"opt" must not already be an instance of a '
                         'MixedPrecisionLossScaleOptimizer. '
                         '`enable_mixed_precision_graph_rewrite` will '
                         'automatically wrap the optimizer with a '
                         'MixedPrecisionLossScaleOptimizer.')
    # To avoid a circular dependency, we cannot depend on tf.keras. Because
    # LossScaleOptimizer is in Keras, we cannot use isinstance, so instead check
    # the class name.
    if opt.__class__.__name__ == 'LossScaleOptimizer':
        raise ValueError('"opt" must not already be an instance of a '
                         'LossScaleOptimizer. '
                         '`enable_mixed_precision_graph_rewrite` will '
                         'automatically wrap the optimizer with a '
                         'LossScaleOptimizer.')

    if isinstance(opt, optimizer.Optimizer):
        # For convenience, we allow the V2 version of this function to wrap the V1
        # optimizer, even though we do not document this.
        return loss_scale_optimizer_v1.MixedPrecisionLossScaleOptimizer(
            opt, loss_scale)

    # Because we cannot depend on tf.keras, we see if `opt` is an instance of the
    # Keras OptimizerV2 class by checking the subclass names.
    base_classes = tf_inspect.getmro(opt.__class__)
    base_class_names = [cls.__name__ for cls in base_classes]
    is_loss_scale_optimizer_v2 = 'OptimizerV2' in base_class_names

    if is_loss_scale_optimizer_v2:
        # Because we cannot depend on tf.keras, we cannot unconditionally do this
        # import. But since `opt` is a Keras OptimizerV2, we know keras is
        # importable, so it is safe to do this import. (Technically, it's possible
        # to have a dependency on OptimizerV2 and not LossScaleOptimizer, but this
        # is not done in practice).
        from tensorflow.python.keras.mixed_precision.experimental import loss_scale_optimizer as loss_scale_optimizer_v2  # pylint: disable=g-import-not-at-top
        return loss_scale_optimizer_v2.LossScaleOptimizer(opt, loss_scale)

    if use_v1_behavior:
        raise ValueError(
            '"opt" must be an instance of a tf.train.Optimizer or a '
            'tf.keras.optimizers.Optimizer, but got: %s' % opt)
    else:
        raise ValueError('"opt" must be an instance of a '
                         'tf.keras.optimizers.Optimizer, but got: %s' % opt)
Пример #7
0
        def model_fn():
          """Simple model to test mixed precision."""
          x = np.ones((1, 1))
          loss = model(x, training=True)

          if ((task_type == 'worker' and task_id == 0) or
              task_type is task_id is None):
            loss *= loss_multiplier_for_first_worker
          # Learning rate is small enough that if applied to a float16 variable,
          # the variable will not change. So this tests the learning rate is not
          # applied to a float16 value, but instead the float32 variable.
          optimizer = gradient_descent.GradientDescentOptimizer(2 ** -14)
          optimizer = loss_scale_optimizer.MixedPrecisionLossScaleOptimizer(
              optimizer, loss_scale)
          train_op = optimizer.minimize(
              loss, training_util.get_or_create_global_step())
          return train_op
Пример #8
0
 def testFixedLossScaleAppliedToLossWithMinimize(self, strategy_fn):
     with strategy_fn().scope() as strategy:
         var = variables.Variable([5.0])
         opt = gradient_descent.GradientDescentOptimizer(2.0)
         loss_scale = 10.
         opt = loss_scale_optimizer.MixedPrecisionLossScaleOptimizer(
             opt, loss_scale)
         # We need num_replicas_in_sync to divide loss_scale, otherwise loss_scale
         # / strategy.num_replicas_in_sync will not be exact, which could lead to
         # assertion failures due to rounding issues.
         self.assertEqual(loss_scale % strategy.num_replicas_in_sync, 0)
         run_fn = self._run_fn_with_grad_check(
             strategy, var, opt, loss_scale / strategy.num_replicas_in_sync)
         run_op = strategy.experimental_run(run_fn)
         self.evaluate(variables.global_variables_initializer())
         self._run_if_in_graph_mode(run_op)
         # The loss is the identity of the variable. Therefore the gradient is 1,
         # and so the variable will be init_val - grad * lr == 5 - 1 * 2 == 3
         self.assertAllClose([3.], self.evaluate(var))
Пример #9
0
    def testCheckpoint(self, strategy_fn):
        strategy = strategy_fn()
        if (isinstance(strategy, mirrored_strategy.MirroredStrategy)
                and not context.executing_eagerly()):
            # TODO(b/121381184): Enable running the test in this case.
            return

        with self.test_session(), strategy.scope():
            # Build and run a simple model.
            var = variables.Variable([2.0])
            loss_scale = loss_scale_module.DynamicLossScale(
                initial_loss_scale=1., increment_period=2., multiplier=2.)
            opt = momentum.MomentumOptimizer(1.0, momentum=1.)
            opt = loss_scale_optimizer.MixedPrecisionLossScaleOptimizer(
                opt, loss_scale)
            run_fn = lambda: opt.minimize(lambda: var + 1., var_list=[var])
            opt_op = strategy.experimental_run(run_fn)
            self.evaluate(variables.global_variables_initializer())
            self.evaluate(opt_op)
            self.assertEqual(self.evaluate(loss_scale()), 1.)
            self.assertEqual(self.evaluate(loss_scale._num_good_steps), 1)

            # Save a checkpoint.
            checkpoint = trackable_utils.Checkpoint(optimizer=opt)
            prefix = os.path.join(self.get_temp_dir(), 'ckpt')
            save_path = checkpoint.save(prefix)

            # Run model again.
            self.evaluate(strategy.experimental_run(run_fn))
            self.assertEqual(self.evaluate(loss_scale()), 2.)
            self.assertEqual(self.evaluate(loss_scale._num_good_steps), 0)

            # Load checkpoint and ensure loss scale is back to it's original value.
            status = checkpoint.restore(save_path)
            status.assert_consumed()
            status.run_restore_ops()
            self.assertEqual(self.evaluate(loss_scale()), 1.)
            self.assertEqual(self.evaluate(loss_scale._num_good_steps), 1)
Пример #10
0
 def testPassingNoneToLossScale(self):
     opt = gradient_descent.GradientDescentOptimizer(1.0)
     with self.assertRaisesRegex(ValueError, r'loss_scale cannot be None'):
         loss_scale_optimizer.MixedPrecisionLossScaleOptimizer(opt, None)