def test_fit_simple_linear_model(self): seed = 0x2019 np.random.seed(seed) tf.random.set_seed(seed) num_examples = 50000 x = np.random.standard_normal((num_examples, 3)) w = np.random.standard_normal((3, 1)) y = np.dot(x, w) + np.random.standard_normal((num_examples, 1)) * 1e-4 model = tf.keras.models.Sequential() model.add(tf.keras.layers.Dense(input_shape=(3, ), units=1)) self.evaluate(tf.compat.v1.global_variables_initializer()) opt = MovingAverage('adam') model.compile(opt, loss='mse') model.fit(x, y, epochs=10) opt.assign_average_vars(model.variables) x = np.random.standard_normal((100, 3)) y = np.dot(x, w) predicted = model.predict(x) max_abs_diff = np.max(np.abs(predicted - y)) self.assertLess(max_abs_diff, 1e-3)
def test_fit_simple_linear_model(): seed = 0x2019 np.random.seed(seed) tf.random.set_seed(seed) num_examples = 5000 x = np.random.standard_normal((num_examples, 3)) w = np.random.standard_normal((3, 1)) y = np.dot(x, w) + np.random.standard_normal((num_examples, 1)) * 1e-4 model = tf.keras.models.Sequential() model.add(tf.keras.layers.Dense(input_shape=(3, ), units=1)) opt = MovingAverage("sgd") model.compile(opt, loss="mse") model.fit(x, y, epochs=5) opt.assign_average_vars(model.variables) x = np.random.standard_normal((100, 3)) y = np.dot(x, w) predicted = model.predict(x) max_abs_diff = np.max(np.abs(predicted - y)) assert max_abs_diff < 5e-3
def test_no_average_slot(): max_features = 5000 max_len = 4 embedding_dims = 2 # Some preprocessing layers have TrackableWeightHandler. # They are returned when using model.variables # but it's unable to assign average slot to them. vectorize_layer = tf.keras.layers.experimental.preprocessing.TextVectorization( max_tokens=max_features, output_mode="int", output_sequence_length=max_len ) vectorize_layer.adapt(["foo", "bar", "baz"]) model = tf.keras.models.Sequential( [ tf.keras.Input(shape=(1,), dtype=tf.string), vectorize_layer, tf.keras.layers.Embedding(max_features + 1, embedding_dims), tf.keras.layers.Dense(1), ] ) optimizer = MovingAverage("sgd") model.compile(optimizer, loss="mse") model.fit(x=["foo", "bar", "baz"], y=[0.0, 1.0, 2.0], epochs=1) optimizer.assign_average_vars(model.variables)
def test_run(self): for sequential_update in [True, False]: var0 = tf.Variable([1.0, 2.0]) var1 = tf.Variable([3.0, 4.0]) grads0 = tf.constant([0.1, 0.1]) grads1 = tf.constant([0.01, 0.01]) grads_and_vars = list(zip([grads0, grads1], [var0, var1])) self.evaluate(tf.compat.v1.global_variables_initializer()) opt = MovingAverage( tf.keras.optimizers.SGD(lr=2.0), sequential_update=sequential_update, average_decay=0.5, ) if not tf.executing_eagerly(): update = opt.apply_gradients(grads_and_vars) self.evaluate(tf.compat.v1.global_variables_initializer()) self.evaluate(update) self.evaluate(update) else: opt.apply_gradients(grads_and_vars) opt.apply_gradients(grads_and_vars) self.assertAllClose(var0.read_value(), [0.6, 1.6]) self.assertAllClose(var1.read_value(), [2.96, 3.96]) ema_var0 = opt.get_slot(var0, "average") ema_var1 = opt.get_slot(var1, "average") if sequential_update: self.assertAllClose(ema_var0.read_value(), [0.75, 1.75]) self.assertAllClose(ema_var1.read_value(), [2.975, 3.975]) assign = opt.assign_average_vars([var0, var1]) self.evaluate(assign) if sequential_update: self.assertAllClose(var0.read_value(), [0.75, 1.75]) self.assertAllClose(var1.read_value(), [2.975, 3.975]) perturb = tf.group([ var0.assign_add([1.0, 1.0]), var1.assign_add([2.0, 2.0]), ema_var0.assign_add([3.0, 3.0]), ema_var1.assign_add([4.0, 4.0]), ]) self.evaluate(perturb) if sequential_update: self.assertAllClose(var0.read_value(), [1.75, 2.75]) self.assertAllClose(var1.read_value(), [4.975, 5.975]) self.assertAllClose(ema_var0.read_value(), [3.75, 4.75]) self.assertAllClose(ema_var1.read_value(), [6.975, 7.975])
def test_run(self): self.skipTest( "Wait for https://github.com/tensorflow/tensorflow/issues/31582") for sequential_update in [True, False]: var0 = tf.Variable([1.0, 2.0]) var1 = tf.Variable([3.0, 4.0]) grads0 = tf.constant([0.1, 0.1]) grads1 = tf.constant([0.01, 0.01]) grads_and_vars = list(zip([grads0, grads1], [var0, var1])) opt = MovingAverage( tf.keras.optimizers.SGD(lr=2.0), average_decay=0.5, sequential_update=sequential_update) if not tf.executing_eagerly(): update = opt.apply_gradients(grads_and_vars) self.evaluate(tf.compat.v1.global_variables_initializer()) self.evaluate(update) self.evaluate(update) else: opt.apply_gradients(grads_and_vars) opt.apply_gradients(grads_and_vars) self.assertAllClose(var0.read_value(), [0.6, 1.6]) self.assertAllClose(var1.read_value(), [2.96, 3.96]) ema_var0 = opt._ema.average(var0) # pylint: disable=protected-access ema_var1 = opt._ema.average(var1) # pylint: disable=protected-access if sequential_update: self.assertAllClose(ema_var0.read_value(), [0.75, 1.75]) self.assertAllClose(ema_var1.read_value(), [2.975, 3.975]) assign = opt.assign_average_vars([var0, var1]) self.evaluate(assign) if sequential_update: self.assertAllClose(var0.read_value(), [0.75, 1.75]) self.assertAllClose(var1.read_value(), [2.975, 3.975]) perturb = tf.group([ var0.assign_add([1.0, 1.0]), var1.assign_add([2.0, 2.0]), ema_var0.assign_add([3.0, 3.0]), ema_var1.assign_add([4.0, 4.0]) ]) self.evaluate(perturb) if sequential_update: self.assertAllClose(var0.read_value(), [1.75, 2.75]) self.assertAllClose(var1.read_value(), [4.975, 5.975]) self.assertAllClose(ema_var0.read_value(), [3.75, 4.75]) self.assertAllClose(ema_var1.read_value(), [6.975, 7.975])
def test_model_weights_update(): grad = tf.Variable([[0.1]]) model = tf.keras.Sequential([ tf.keras.layers.Dense( 1, kernel_initializer=tf.keras.initializers.Constant([[1.0]]), use_bias=False, ) ]) model.build(input_shape=[1, 1]) opt = MovingAverage(tf.keras.optimizers.SGD(lr=2.0), average_decay=0.5) _ = opt.apply_gradients(list(zip([grad], model.variables))) np.testing.assert_allclose(model.variables[0].read_value(), [[0.8]]) _ = opt.assign_average_vars(model.variables) np.testing.assert_allclose(model.variables[0].read_value(), [[0.9]])
def test_run(sequential_update): var0 = tf.Variable([1.0, 2.0]) var1 = tf.Variable([3.0, 4.0]) grads0 = tf.constant([0.1, 0.1]) grads1 = tf.constant([0.01, 0.01]) grads_and_vars = list(zip([grads0, grads1], [var0, var1])) opt = MovingAverage( tf.keras.optimizers.SGD(lr=2.0), sequential_update=sequential_update, average_decay=0.5, ) opt.apply_gradients(grads_and_vars) opt.apply_gradients(grads_and_vars) np.testing.assert_allclose(var0.read_value(), [0.6, 1.6]) np.testing.assert_allclose(var1.read_value(), [2.96, 3.96]) ema_var0 = opt.get_slot(var0, "average") ema_var1 = opt.get_slot(var1, "average") if sequential_update: np.testing.assert_allclose(ema_var0.read_value(), [0.75, 1.75]) np.testing.assert_allclose(ema_var1.read_value(), [2.975, 3.975]) _ = opt.assign_average_vars([var0, var1]) if sequential_update: np.testing.assert_allclose(var0.read_value(), [0.75, 1.75]) np.testing.assert_allclose(var1.read_value(), [2.975, 3.975]) var0.assign_add([1.0, 1.0]), var1.assign_add([2.0, 2.0]), ema_var0.assign_add([3.0, 3.0]), ema_var1.assign_add([4.0, 4.0]), if sequential_update: np.testing.assert_allclose(var0.read_value(), [1.75, 2.75]) np.testing.assert_allclose(var1.read_value(), [4.975, 5.975]) np.testing.assert_allclose(ema_var0.read_value(), [3.75, 4.75]) np.testing.assert_allclose(ema_var1.read_value(), [6.975, 7.975])
def test_model_weights_update(self): grad = tf.Variable([[0.1]]) model = tf.keras.Sequential([ tf.keras.layers.Dense( 1, kernel_initializer=tf.keras.initializers.Constant([[1.0]]), use_bias=False) ]) model.build(input_shape=[1, 1]) opt = MovingAverage(tf.keras.optimizers.SGD(lr=2.0), 0.5) update = opt.apply_gradients(list(zip([grad], model.variables))) self.evaluate(tf.compat.v1.global_variables_initializer()) self.evaluate(update) self.assertAllClose(model.variables[0].read_value(), [[0.8]]) mean_update = opt.assign_average_vars(model.variables) self.evaluate(mean_update) self.assertAllClose(model.variables[0].read_value(), [[0.9]])