def test_swap_weights(device): with device.scope(): var = tf.Variable([1.0, 2.0]) grads = tf.constant([0.1, 0.1]) opt = MovingAverage(tf.keras.optimizers.SGD(lr=2.0), average_decay=0.5,) @tf.function def apply_gradients(): opt.apply_gradients([(grads, var)]) device.run(apply_gradients) np.testing.assert_allclose(var.read_value(), [0.8, 1.8]) ema_var = opt.get_slot(var, "average") np.testing.assert_allclose(ema_var.read_value(), [0.85, 1.85]) with device.scope(): opt.shadow_copy([var]) opt.swap_weights() np.testing.assert_allclose(ema_var.read_value(), [0.8, 1.8]) np.testing.assert_allclose(var.read_value(), [0.85, 1.85]) with device.scope(): opt.swap_weights() np.testing.assert_allclose(var.read_value(), [0.8, 1.8]) np.testing.assert_allclose(ema_var.read_value(), [0.85, 1.85])
def test_run(self): for sequential_update in [True, False]: var0 = tf.Variable([1.0, 2.0]) var1 = tf.Variable([3.0, 4.0]) grads0 = tf.constant([0.1, 0.1]) grads1 = tf.constant([0.01, 0.01]) grads_and_vars = list(zip([grads0, grads1], [var0, var1])) self.evaluate(tf.compat.v1.global_variables_initializer()) opt = MovingAverage( tf.keras.optimizers.SGD(lr=2.0), sequential_update=sequential_update, average_decay=0.5, ) if not tf.executing_eagerly(): update = opt.apply_gradients(grads_and_vars) self.evaluate(tf.compat.v1.global_variables_initializer()) self.evaluate(update) self.evaluate(update) else: opt.apply_gradients(grads_and_vars) opt.apply_gradients(grads_and_vars) self.assertAllClose(var0.read_value(), [0.6, 1.6]) self.assertAllClose(var1.read_value(), [2.96, 3.96]) ema_var0 = opt.get_slot(var0, "average") ema_var1 = opt.get_slot(var1, "average") if sequential_update: self.assertAllClose(ema_var0.read_value(), [0.75, 1.75]) self.assertAllClose(ema_var1.read_value(), [2.975, 3.975]) assign = opt.assign_average_vars([var0, var1]) self.evaluate(assign) if sequential_update: self.assertAllClose(var0.read_value(), [0.75, 1.75]) self.assertAllClose(var1.read_value(), [2.975, 3.975]) perturb = tf.group([ var0.assign_add([1.0, 1.0]), var1.assign_add([2.0, 2.0]), ema_var0.assign_add([3.0, 3.0]), ema_var1.assign_add([4.0, 4.0]), ]) self.evaluate(perturb) if sequential_update: self.assertAllClose(var0.read_value(), [1.75, 2.75]) self.assertAllClose(var1.read_value(), [4.975, 5.975]) self.assertAllClose(ema_var0.read_value(), [3.75, 4.75]) self.assertAllClose(ema_var1.read_value(), [6.975, 7.975])
def test_dynamic_decay(): var0 = tf.Variable([1.0, 2.0]) grads0 = tf.constant([0.1, 0.1]) grads_and_vars = [(grads0, var0)] opt = MovingAverage( tf.keras.optimizers.SGD(lr=2.0), average_decay=0.5, dynamic_decay=True, ) opt.apply_gradients(grads_and_vars) opt.apply_gradients(grads_and_vars) np.testing.assert_allclose(var0.read_value(), [0.6, 1.6]) ema_var0 = opt.get_slot(var0, "average") np.testing.assert_allclose(ema_var0.read_value(), [0.64, 1.64])
def test_run(sequential_update): var0 = tf.Variable([1.0, 2.0]) var1 = tf.Variable([3.0, 4.0]) grads0 = tf.constant([0.1, 0.1]) grads1 = tf.constant([0.01, 0.01]) grads_and_vars = list(zip([grads0, grads1], [var0, var1])) opt = MovingAverage( tf.keras.optimizers.SGD(lr=2.0), sequential_update=sequential_update, average_decay=0.5, ) opt.apply_gradients(grads_and_vars) opt.apply_gradients(grads_and_vars) np.testing.assert_allclose(var0.read_value(), [0.6, 1.6]) np.testing.assert_allclose(var1.read_value(), [2.96, 3.96]) ema_var0 = opt.get_slot(var0, "average") ema_var1 = opt.get_slot(var1, "average") if sequential_update: np.testing.assert_allclose(ema_var0.read_value(), [0.75, 1.75]) np.testing.assert_allclose(ema_var1.read_value(), [2.975, 3.975]) _ = opt.assign_average_vars([var0, var1]) if sequential_update: np.testing.assert_allclose(var0.read_value(), [0.75, 1.75]) np.testing.assert_allclose(var1.read_value(), [2.975, 3.975]) var0.assign_add([1.0, 1.0]), var1.assign_add([2.0, 2.0]), ema_var0.assign_add([3.0, 3.0]), ema_var1.assign_add([4.0, 4.0]), if sequential_update: np.testing.assert_allclose(var0.read_value(), [1.75, 2.75]) np.testing.assert_allclose(var1.read_value(), [4.975, 5.975]) np.testing.assert_allclose(ema_var0.read_value(), [3.75, 4.75]) np.testing.assert_allclose(ema_var1.read_value(), [6.975, 7.975])
def test_start_step(): var0 = tf.Variable([1.0, 2.0]) grads0 = tf.constant([0.1, 0.1]) grads_and_vars = [(grads0, var0)] opt = MovingAverage( tf.keras.optimizers.SGD(lr=1.0), average_decay=0.5, start_step=1, ) opt.apply_gradients(grads_and_vars) np.testing.assert_allclose(var0.read_value(), [0.9, 1.9]) ema_var0 = opt.get_slot(var0, "average") opt.apply_gradients(grads_and_vars) np.testing.assert_allclose(var0.read_value(), [0.8, 1.8]) np.testing.assert_allclose(ema_var0.read_value(), [0.85, 1.85])