def test_step(self): """Tests grafting of Adam and SGD steps. Derivation of one step of Adam and SGD: Gradient value is [2,4]. Adam Derivation: Lr_1 = 0.5(1-0.6)^(0.5)/(1-0.5) = 0.63245553203 - Does not matter m_1 = 0.5*G = [1,2] v_1 = 0.4*G^2 = [1.6,6.4] AdamStep = Lr_1*m_1/(sqrt{v_1}+eps) = [0.5, 0.5] Normalized AdamStep = [1.0, 1.0] SGDStep = [0.6, 1.2] Norm = [0.6, 1.2] TotalStep = 0.9*[0.6, 1.2] NewVar = [1.46, 1.92] """ opt1 = tf.train.GradientDescentOptimizer(0.3) opt2 = tf.train.AdamOptimizer(0.5, beta1=0.5, beta2=0.6) opt = adagraft.AdaGraftOptimizer(0.9, opt1, opt2) with self.cached_session() as sess: var0 = tf.Variable(2.0, name="var0") var1 = tf.Variable(3.0, name="var1") loss = (var0 - 1) * (var0 - 1) + (var1 - 1) * (var1 - 1) o = opt.minimize(loss) self.evaluate(tf.global_variables_initializer()) correct_values = [[1.058, 1.46, 1.92], [0.22387284, 1.2116001, 1.4232]] for i in range(2): sess.run(o) step_values = sess.run([loss, var0, var1]) print(step_values) self.assertAllClose(correct_values[i], step_values)
def GetOptimizer(self, lr): params = self.params if params.direction_optimizer_lr is None: dir_lr = lr else: dir_lr = params.direction_optimizer_lr magnitude_tf_optimizer = params.magnitude_optimizer.GetOptimizer(lr=lr) direction_tf_optimizer = params.direction_optimizer.GetOptimizer(lr=dir_lr) return adagraft.AdaGraftOptimizer( 1.0, magnitude_tf_optimizer, direction_tf_optimizer, use_global_norm=params.use_global_norm, diagnostic=params.diagnostic)
def test_identity(self): # AdaGraft(1, opt, opt) should do the same thing as opt. opt1 = tf.train.AdamOptimizer(0.5, beta1=0.5, beta2=0.5) opt2 = tf.train.AdamOptimizer(0.5, beta1=0.5, beta2=0.5) opt3 = tf.train.AdamOptimizer(0.5, beta1=0.5, beta2=0.5) opt = adagraft.AdaGraftOptimizer(1.0, opt1, opt2) with self.cached_session() as sess: var0 = tf.Variable(2.0, name="var0") var1 = tf.Variable(3.0, name="var1") loss = (var0 - 1) * (var0 - 1) + (var1 - 1) * (var1 - 1) o = opt.minimize(loss) oo = opt3.minimize(loss) self.evaluate(tf.global_variables_initializer()) sess.run(o) l1 = sess.run([loss, var0, var1]) print(l1) sess.run([tf.assign(var0, 2.0), tf.assign(var1, 3.0)]) sess.run(oo) l2 = sess.run([loss, var0, var1]) print(l2) self.assertAllClose(l1, l2)