def testJitCompile(self, strategy): # Test the optimizer yields same numerical results when jit_compile is # on and off. with strategy.scope(): optimizer_1 = adam_new.Adam(ema_option=optimizer_lib.EMAOption( use_ema=True, ema_overwrite_frequency=1)) optimizer_2 = adam_new.Adam(jit_compile=True, ema_option=optimizer_lib.EMAOption( use_ema=True, ema_overwrite_frequency=1)) model_1 = keras.Sequential([ keras.layers.Input(shape=(2, )), keras.layers.Dense(5), keras.layers.Dense(1) ]) model_2 = keras.models.clone_model(model_1) model_2.set_weights(model_1.get_weights()) def per_worker_dataset_fn(): def dataset_fn(_): x = np.random.rand(6, 2) y = [1, 1, 1, 0, 0, 0] ds = tf.data.Dataset.from_tensor_slices((x, y)) ds = ds.repeat().batch(6) return ds return strategy.distribute_datasets_from_function(dataset_fn) ds = per_worker_dataset_fn() @tf.function def train_step(ds): def replica_fn(data): features, labels = data with tf.GradientTape() as tape: output_1 = model_1(features) loss_1 = keras.losses.MeanSquaredError( reduction=losses_utils.ReductionV2.NONE)(labels, output_1) grads_1 = tape.gradient(loss_1, model_1.trainable_variables) optimizer_1.apply_gradients( zip(grads_1, model_1.trainable_variables)) with tf.GradientTape() as tape: output_2 = model_2(features) loss_2 = keras.losses.MeanSquaredError( reduction=losses_utils.ReductionV2.NONE)(labels, output_2) grads_2 = tape.gradient(loss_2, model_2.trainable_variables) optimizer_2.apply_gradients( zip(grads_2, model_2.trainable_variables)) strategy.run(replica_fn, args=(next(iter(ds)), )) for _ in range(3): train_step(ds) self.assertAllClose(model_1.trainable_variables[0][0], model_2.trainable_variables[0][0])
def test_invalid_ema_option(self): ema_option = optimizer_lib.EMAOption(use_ema=True, ema_momentum=0.5, ema_overwrite_frequency=50) self.assertEqual(ema_option.ema_momentum, 0.5) self.assertEqual(ema_option.ema_overwrite_frequency, 50) with self.assertRaisesRegex(ValueError, "`ema_momentum` must be in the*"): _ = optimizer_lib.EMAOption(use_ema=True, ema_momentum=-1)
def testGetAndFromConfig(self): gradients_clip_option = optimizer_lib.GradientsClipOption(clipnorm=0.5) ema_option = optimizer_lib.EMAOption(use_ema=True, ema_momentum=0.5, ema_overwrite_frequency=50) optimizer = adam_new.Adam(learning_rate=np.float64(0.05), beta_1=0.7, beta_2=0.77, amsgrad=True, epsilon=0.001, gradients_clip_option=gradients_clip_option, ema_option=ema_option) config = optimizer.get_config() self.assertDictEqual( config, { "learning_rate": np.float32(0.05), "beta_1": 0.7, "beta_2": 0.77, "epsilon": 0.001, "amsgrad": True, "gradients_clip_option": { "clipnorm": 0.5, "global_clipnorm": None, "clipvalue": None, }, "ema_option": { "use_ema": True, "ema_momentum": 0.5, "ema_overwrite_frequency": 50, } }) restored_optimizer = adam_new.Adam.from_config(config) self.assertDictEqual(restored_optimizer.get_config(), optimizer.get_config())
def testMovingAverageOptimizer(self): # We set polyak averaging with ema_momentum = 1 so that the # moving average is always the original value of the variables. ema_option = optimizer_lib.EMAOption( use_ema=True, ema_momentum=1, ema_overwrite_frequency=2) optimizer = adam_new.Adam(ema_option=ema_option) x = tf.Variable([1.0, 2.0], dtype=tf.float32) x_origin = tf.Variable(x) grads = tf.convert_to_tensor([1.0, 2.0]) # First iteration, we store the moving average, and do not do overriding. optimizer.apply_gradients(zip([grads], [x])) self.assertAllEqual(optimizer._model_variables_moving_average[0], x_origin) self.assertNotAllEqual(x, x_origin) # Second iteration, we store the moving average, and override model vars. optimizer.apply_gradients(zip([grads], [x])) self.assertAllEqual(x, x_origin)
def from_config(cls, config): """Creates an optimizer from its config. This method is the reverse of `get_config`, capable of instantiating the same optimizer from the config dictionary. Args: config: A Python dictionary, typically the output of get_config. Returns: An optimizer instance. """ if "learning_rate" in config: if isinstance(config["learning_rate"], dict): config["learning_rate"] = learning_rate_schedule.deserialize( config["learning_rate"]) if "gradients_clip_option" in config: config["gradients_clip_option"] = optimizer_lib.GradientsClipOption( **config["gradients_clip_option"]) if "ema_option" in config: config["ema_option"] = optimizer_lib.EMAOption(**config["ema_option"]) return cls(**config)
# TODO(b/202992598): Add PSS strategy once the XLA issues is resolved. ds_combinations.one_device_strategy, ds_combinations.mirrored_strategy_with_cpu_1_and_2, ds_combinations.mirrored_strategy_with_two_gpus, ds_combinations.tpu_strategy, ds_combinations.cloud_tpu_strategy, ds_combinations.multi_worker_mirrored_2x1_cpu, ds_combinations.multi_worker_mirrored_2x2_gpu, ds_combinations.central_storage_strategy_with_two_gpus, ] adadelta_new_fn = tf.__internal__.test.combinations.NamedObject( "experimentaladadelta", lambda: adadelta_new.Adadelta( # pylint: disable=g-long-lambda 0.002, ema_option=optimizer_lib.EMAOption(use_ema=True, ema_overwrite_frequency=None))) adagrad_new_fn = tf.__internal__.test.combinations.NamedObject( "experimentaladagrad", lambda: adagrad_new.Adagrad(0.002)) adam_new_fn = tf.__internal__.test.combinations.NamedObject( "experimentaladam", lambda: adam_new.Adam(0.002)) rmsprop_new_fn = tf.__internal__.test.combinations.NamedObject( "experimentalrmsprop", lambda: rmsprop_new.RMSprop(0.002)) sgd_new_fn = tf.__internal__.test.combinations.NamedObject( "experimentalsgdaverage", lambda: sgd_new.SGD( # pylint: disable=g-long-lambda 0.002, ema_option=optimizer_lib.EMAOption(use_ema=True, ema_overwrite_frequency=1))) OPTIMIZER_FN = [ adadelta_new_fn,