示例#1
0
    def testJitCompile(self, strategy):
        # Test the optimizer yields same numerical results when jit_compile is
        # on and off.
        with strategy.scope():
            optimizer_1 = adam_new.Adam(ema_option=optimizer_lib.EMAOption(
                use_ema=True, ema_overwrite_frequency=1))
            optimizer_2 = adam_new.Adam(jit_compile=True,
                                        ema_option=optimizer_lib.EMAOption(
                                            use_ema=True,
                                            ema_overwrite_frequency=1))
            model_1 = keras.Sequential([
                keras.layers.Input(shape=(2, )),
                keras.layers.Dense(5),
                keras.layers.Dense(1)
            ])
            model_2 = keras.models.clone_model(model_1)
            model_2.set_weights(model_1.get_weights())

            def per_worker_dataset_fn():
                def dataset_fn(_):
                    x = np.random.rand(6, 2)
                    y = [1, 1, 1, 0, 0, 0]
                    ds = tf.data.Dataset.from_tensor_slices((x, y))
                    ds = ds.repeat().batch(6)
                    return ds

                return strategy.distribute_datasets_from_function(dataset_fn)

            ds = per_worker_dataset_fn()

            @tf.function
            def train_step(ds):
                def replica_fn(data):
                    features, labels = data
                    with tf.GradientTape() as tape:
                        output_1 = model_1(features)
                        loss_1 = keras.losses.MeanSquaredError(
                            reduction=losses_utils.ReductionV2.NONE)(labels,
                                                                     output_1)
                    grads_1 = tape.gradient(loss_1,
                                            model_1.trainable_variables)
                    optimizer_1.apply_gradients(
                        zip(grads_1, model_1.trainable_variables))

                    with tf.GradientTape() as tape:
                        output_2 = model_2(features)
                        loss_2 = keras.losses.MeanSquaredError(
                            reduction=losses_utils.ReductionV2.NONE)(labels,
                                                                     output_2)
                    grads_2 = tape.gradient(loss_2,
                                            model_2.trainable_variables)
                    optimizer_2.apply_gradients(
                        zip(grads_2, model_2.trainable_variables))

                strategy.run(replica_fn, args=(next(iter(ds)), ))

            for _ in range(3):
                train_step(ds)
                self.assertAllClose(model_1.trainable_variables[0][0],
                                    model_2.trainable_variables[0][0])
示例#2
0
 def test_invalid_ema_option(self):
     ema_option = optimizer_lib.EMAOption(use_ema=True,
                                          ema_momentum=0.5,
                                          ema_overwrite_frequency=50)
     self.assertEqual(ema_option.ema_momentum, 0.5)
     self.assertEqual(ema_option.ema_overwrite_frequency, 50)
     with self.assertRaisesRegex(ValueError,
                                 "`ema_momentum` must be in the*"):
         _ = optimizer_lib.EMAOption(use_ema=True, ema_momentum=-1)
示例#3
0
 def testGetAndFromConfig(self):
     gradients_clip_option = optimizer_lib.GradientsClipOption(clipnorm=0.5)
     ema_option = optimizer_lib.EMAOption(use_ema=True,
                                          ema_momentum=0.5,
                                          ema_overwrite_frequency=50)
     optimizer = adam_new.Adam(learning_rate=np.float64(0.05),
                               beta_1=0.7,
                               beta_2=0.77,
                               amsgrad=True,
                               epsilon=0.001,
                               gradients_clip_option=gradients_clip_option,
                               ema_option=ema_option)
     config = optimizer.get_config()
     self.assertDictEqual(
         config, {
             "learning_rate": np.float32(0.05),
             "beta_1": 0.7,
             "beta_2": 0.77,
             "epsilon": 0.001,
             "amsgrad": True,
             "gradients_clip_option": {
                 "clipnorm": 0.5,
                 "global_clipnorm": None,
                 "clipvalue": None,
             },
             "ema_option": {
                 "use_ema": True,
                 "ema_momentum": 0.5,
                 "ema_overwrite_frequency": 50,
             }
         })
     restored_optimizer = adam_new.Adam.from_config(config)
     self.assertDictEqual(restored_optimizer.get_config(),
                          optimizer.get_config())
示例#4
0
  def testMovingAverageOptimizer(self):
    # We set polyak averaging with ema_momentum = 1 so that the
    #  moving average is always the original value of the variables.
    ema_option = optimizer_lib.EMAOption(
        use_ema=True, ema_momentum=1, ema_overwrite_frequency=2)
    optimizer = adam_new.Adam(ema_option=ema_option)
    x = tf.Variable([1.0, 2.0], dtype=tf.float32)
    x_origin = tf.Variable(x)
    grads = tf.convert_to_tensor([1.0, 2.0])
    # First iteration, we store the moving average, and do not do overriding.
    optimizer.apply_gradients(zip([grads], [x]))
    self.assertAllEqual(optimizer._model_variables_moving_average[0], x_origin)
    self.assertNotAllEqual(x, x_origin)

    # Second iteration, we store the moving average, and override model vars.
    optimizer.apply_gradients(zip([grads], [x]))
    self.assertAllEqual(x, x_origin)
示例#5
0
  def from_config(cls, config):
    """Creates an optimizer from its config.

    This method is the reverse of `get_config`, capable of instantiating the
    same optimizer from the config dictionary.

    Args:
        config: A Python dictionary, typically the output of get_config.

    Returns:
        An optimizer instance.
    """
    if "learning_rate" in config:
      if isinstance(config["learning_rate"], dict):
        config["learning_rate"] = learning_rate_schedule.deserialize(
            config["learning_rate"])
    if "gradients_clip_option" in config:
      config["gradients_clip_option"] = optimizer_lib.GradientsClipOption(
          **config["gradients_clip_option"])
    if "ema_option" in config:
      config["ema_option"] = optimizer_lib.EMAOption(**config["ema_option"])
    return cls(**config)
示例#6
0
    # TODO(b/202992598): Add PSS strategy once the XLA issues is resolved.
    ds_combinations.one_device_strategy,
    ds_combinations.mirrored_strategy_with_cpu_1_and_2,
    ds_combinations.mirrored_strategy_with_two_gpus,
    ds_combinations.tpu_strategy,
    ds_combinations.cloud_tpu_strategy,
    ds_combinations.multi_worker_mirrored_2x1_cpu,
    ds_combinations.multi_worker_mirrored_2x2_gpu,
    ds_combinations.central_storage_strategy_with_two_gpus,
]

adadelta_new_fn = tf.__internal__.test.combinations.NamedObject(
    "experimentaladadelta",
    lambda: adadelta_new.Adadelta(  # pylint: disable=g-long-lambda
        0.002,
        ema_option=optimizer_lib.EMAOption(use_ema=True,
                                           ema_overwrite_frequency=None)))
adagrad_new_fn = tf.__internal__.test.combinations.NamedObject(
    "experimentaladagrad", lambda: adagrad_new.Adagrad(0.002))
adam_new_fn = tf.__internal__.test.combinations.NamedObject(
    "experimentaladam", lambda: adam_new.Adam(0.002))
rmsprop_new_fn = tf.__internal__.test.combinations.NamedObject(
    "experimentalrmsprop", lambda: rmsprop_new.RMSprop(0.002))
sgd_new_fn = tf.__internal__.test.combinations.NamedObject(
    "experimentalsgdaverage",
    lambda: sgd_new.SGD(  # pylint: disable=g-long-lambda
        0.002,
        ema_option=optimizer_lib.EMAOption(use_ema=True,
                                           ema_overwrite_frequency=1)))

OPTIMIZER_FN = [
    adadelta_new_fn,