def test_default_initialization(self): maml_inner_loop_instance = maml_inner_loop.MAMLInnerLoopGradientDescent( learning_rate=LEARNING_RATE) with self.session() as sess: features, labels = create_inputs() # We have not populated our custom getter scope yet. with self.assertRaises(ValueError): maml_inner_loop_instance._compute_and_apply_gradients(None) # Our custom getter variable cache has not been populated. self.assertEmpty(maml_inner_loop_instance._custom_getter_variable_cache) with tf.variable_scope( 'init_variables', custom_getter=maml_inner_loop_instance._create_variable_getter_fn()): outputs = inference_network_fn(features=features) loss = model_train_fn( features=features, labels=labels, inference_outputs=outputs) # Now we have variables cached in our custom getter cache. self.assertNotEmpty( maml_inner_loop_instance._custom_getter_variable_cache) # Initially we have nothing in the variable cache. self.assertEmpty(maml_inner_loop_instance._variable_cache) maml_inner_loop_instance._compute_and_apply_gradients(loss) # compute_and_apply_gradients has populated the cache. self.assertLen(maml_inner_loop_instance._variable_cache, 1) sess.run(tf.global_variables_initializer()) variables = sess.run(maml_inner_loop_instance._variable_cache[0]) self.assertEqual(variables['init_variables/x'], X_INIT)
def test_inner_loop(self, learn_inner_lr): tensors = [] # We iterate over the two options to make sure that the second order graph # is indeed larger than the "first order" graph. for use_second_order in [False, True]: graph = tf.Graph() with tf.Session(graph=graph) as sess: maml_inner_loop_instance = maml_inner_loop.MAMLInnerLoopGradientDescent( learning_rate=LEARNING_RATE, use_second_order=use_second_order, learn_inner_lr=learn_inner_lr) inputs = create_inputs() features, labels = inputs outputs, _, _ = maml_inner_loop_instance.inner_loop( [inputs, inputs, inputs], inference_network_fn, model_train_fn) # outputs 0 is unconditioned 1 conditioned. outputs = outputs[1] outer_loss = model_train_fn(features=features, labels=labels, inference_outputs=outputs) # Now we optimize the outer loop. optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE) train_op = optimizer.minimize(outer_loss) # Initialize the variables manually. sess.run(tf.global_variables_initializer()) # We make sure we can run the inner loop. sess.run(outputs, feed_dict={features[COEFF_A]: [COEFF_A_VALUE]}) # We make sure we can run the inner loop. sess.run(train_op, feed_dict={features[COEFF_A]: [COEFF_A_VALUE]}) # We know that the x sequence is converging, the loss might # not be go down monotonically due to the inner loop though. x_previous = sess.run( maml_inner_loop_instance._variable_cache[0].values()[0]) for _ in range(10): sess.run([train_op], feed_dict={features[COEFF_A]: [COEFF_A_VALUE]}) x_new = sess.run( maml_inner_loop_instance._variable_cache[0].values() [0]) self.assertLess(x_new, x_previous) x_previous = x_new tensors.append( tf.contrib.graph_editor.get_tensors( tf.get_default_graph())) # When we use second order, we have a larger graph due to the additional # required computation nodes. self.assertLess(len(tensors[0]), len(tensors[1]))
def test_inner_loop_internals(self, learn_inner_lr): tensors = [] # We iterate over the two options to make sure that the second order graph # is indeed larger than the "first order" graph. for use_second_order in [False, True]: graph = tf.Graph() with tf.Session(graph=graph) as sess: maml_inner_loop_instance = maml_inner_loop.MAMLInnerLoopGradientDescent( learning_rate=LEARNING_RATE, use_second_order=use_second_order, learn_inner_lr=learn_inner_lr) inputs = create_inputs() features, _ = inputs outer_loss, inner_losses = dummy_inner_loop( [inputs, inputs, inputs], maml_inner_loop_instance) sess.run(tf.global_variables_initializer()) # Here we check that we actually improved with our gradient descent # steps. np_inner_losses, np_outer_loss = sess.run( [inner_losses, outer_loss], feed_dict={features[COEFF_A]: [COEFF_A_VALUE]}) # Verify that we make progress in the inner loss with every step. # We know this is true for the first sequence. The learning rate is # small enough such that we do not overshoot. previous_loss_value = np_inner_losses[0] for loss_value in np_inner_losses[1:]: self.assertLess(loss_value, previous_loss_value) previous_loss_value = loss_value # The last inner loss has one gradient step less, which is why it's # value should be higher. self.assertLess(np_outer_loss, np_inner_losses[-1]) # Now we optimize the outer loop. optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE) train_op = optimizer.minimize(outer_loss) # Again we know that the x sequence is converging, the loss might # not be go down monotonically due to the inner loop though. x_previous = sess.run( list(maml_inner_loop_instance._variable_cache[0].values())[0]) for _ in range(10): sess.run([train_op], feed_dict={features[COEFF_A]: [COEFF_A_VALUE]}) x_new = sess.run( list(maml_inner_loop_instance._variable_cache[0].values())[0]) self.assertLess(x_new, x_previous) x_previous = x_new tensors.append( tf.contrib.graph_editor.get_tensors(tf.get_default_graph())) # When we use second order, we have a larger graph due to the additional # required computation nodes. self.assertLess(len(tensors[0]), len(tensors[1]))
def test_inner_loop_reuse(self, learn_inner_lr): # Inner loop should create as many trainable vars in 'inner_loop' scope as a # direct call to inference_network_fn would. Learned learning rates and # learned loss variables should be created *outside* the 'inner_loop' scope # since they do not adapt. graph = tf.Graph() with tf.Session(graph=graph): inputs = create_inputs() features, _ = inputs # Record how many trainable vars a call to inference_network_fn creates. with tf.variable_scope('test_scope'): inference_network_fn(features) expected_num_train_vars = len( tf.trainable_variables(scope='test_scope')) maml_inner_loop_instance = maml_inner_loop.MAMLInnerLoopGradientDescent( learning_rate=LEARNING_RATE, learn_inner_lr=learn_inner_lr) maml_inner_loop_instance.inner_loop([inputs, inputs, inputs], inference_network_fn, learned_model_train_fn) num_train_vars = len(tf.trainable_variables(scope='inner_loop')) self.assertEqual(expected_num_train_vars, num_train_vars)