def testTrainingSubsetsOfVariablesOnlyUpdatesThoseVariables(self): # First, train only the weights of the model. with ops.Graph().as_default(): random_seed.set_random_seed(0) total_loss = self.ModelLoss() optimizer = gradient_descent.GradientDescentOptimizer( learning_rate=1.0) weights, biases = variables_lib.get_variables() train_op = training.create_train_op(total_loss, optimizer) train_weights = training.create_train_op( total_loss, optimizer, variables_to_train=[weights]) train_biases = training.create_train_op( total_loss, optimizer, variables_to_train=[biases]) with self.cached_session() as session: # Initialize the variables. session.run(variables_lib2.global_variables_initializer()) # Get the initial weights and biases values. weights_values, biases_values = session.run([weights, biases]) self.assertGreater(np.linalg.norm(weights_values), 0) self.assertAlmostEqual(np.linalg.norm(biases_values), 0) # Update weights and biases. loss = session.run(train_op) self.assertGreater(loss, .45) new_weights, new_biases = session.run([weights, biases]) # Check that the weights and biases have been updated. self.assertGreater( np.linalg.norm(weights_values - new_weights), 0) self.assertGreater(np.linalg.norm(biases_values - new_biases), 0) weights_values, biases_values = new_weights, new_biases # Update only weights. loss = session.run(train_weights) self.assertGreater(loss, .45) new_weights, new_biases = session.run([weights, biases]) # Check that the weights have been updated, but biases have not. self.assertGreater( np.linalg.norm(weights_values - new_weights), 0) self.assertAlmostEqual( np.linalg.norm(biases_values - new_biases), 0) weights_values = new_weights # Update only biases. loss = session.run(train_biases) self.assertGreater(loss, .45) new_weights, new_biases = session.run([weights, biases]) # Check that the biases have been updated, but weights have not. self.assertAlmostEqual( np.linalg.norm(weights_values - new_weights), 0) self.assertGreater(np.linalg.norm(biases_values - new_biases), 0)
def _train_model(self, checkpoint_dir, num_steps): """Trains a simple classification model. Note that the data has been configured such that after around 300 steps, the model has memorized the dataset (e.g. we can expect %100 accuracy). Args: checkpoint_dir: The directory where the checkpoint is written to. num_steps: The number of steps to train for. """ with ops.Graph().as_default(): random_seed.set_random_seed(0) tf_inputs = constant_op.constant( self._inputs, dtype=dtypes.float32) tf_labels = constant_op.constant( self._labels, dtype=dtypes.float32) tf_predictions = logistic_classifier(tf_inputs) loss = loss_ops.log_loss(tf_labels, tf_predictions) optimizer = gradient_descent.GradientDescentOptimizer( learning_rate=1.0) train_op = training.create_train_op(loss, optimizer) loss = training.train( train_op, checkpoint_dir, hooks=[basic_session_run_hooks.StopAtStepHook(num_steps)])
def create_train_op(self, learning_rate=1.0, gradient_multiplier=1.0): tf_inputs = constant_op.constant(self._inputs, dtype=dtypes.float32) tf_labels = constant_op.constant(self._labels, dtype=dtypes.float32) tf_predictions = logistic_classifier(tf_inputs) losses.log_loss(tf_labels, tf_predictions) total_loss = losses.get_total_loss() optimizer = gradient_descent.GradientDescentOptimizer( learning_rate=learning_rate) def transform_grads_fn(grads): if gradient_multiplier != 1.0: variables = variables_lib2.trainable_variables() gradient_multipliers = { var: gradient_multiplier for var in variables } with ops.name_scope('multiply_grads'): return training.multiply_gradients(grads, gradient_multipliers) else: return grads return training.create_train_op(total_loss, optimizer, transform_grads_fn=transform_grads_fn)
def testTrainWithLocalVariable(self): with ops.Graph().as_default(): random_seed.set_random_seed(0) tf_inputs = constant_op.constant(self._inputs, dtype=dtypes.float32) tf_labels = constant_op.constant(self._labels, dtype=dtypes.float32) local_multiplier = variables_lib.local_variable(1.0) tf_predictions = logistic_classifier(tf_inputs) * local_multiplier losses.log_loss(tf_labels, tf_predictions) total_loss = losses.get_total_loss() optimizer = gradient_descent.GradientDescentOptimizer( learning_rate=1.0) train_op = training.create_train_op(total_loss, optimizer) loss = training.train( train_op, None, hooks=[basic_session_run_hooks.StopAtStepHook(num_steps=300)], save_summaries_steps=None, save_checkpoint_secs=None) self.assertIsNotNone(loss) self.assertLess(loss, .015)
def testGlobalStepNotIncrementedWhenSetToNone(self): with ops.Graph().as_default(): random_seed.set_random_seed(0) tf_inputs = constant_op.constant(self._inputs, dtype=dtypes.float32) tf_labels = constant_op.constant(self._labels, dtype=dtypes.float32) tf_predictions = batchnorm_classifier(tf_inputs) loss = losses.log_loss(tf_labels, tf_predictions) optimizer = gradient_descent.GradientDescentOptimizer( learning_rate=1.0) train_op = training.create_train_op(loss, optimizer, global_step=None) global_step = variables_lib.get_or_create_global_step() with self.cached_session() as session: # Initialize all variables session.run(variables_lib2.global_variables_initializer()) for _ in range(10): session.run(train_op) # Since train_op don't use global_step it shouldn't change. self.assertAllClose(global_step.eval(), 0)
def testEmptyUpdateOps(self): with ops.Graph().as_default(): random_seed.set_random_seed(0) tf_inputs = constant_op.constant(self._inputs, dtype=dtypes.float32) tf_labels = constant_op.constant(self._labels, dtype=dtypes.float32) tf_predictions = batchnorm_classifier(tf_inputs) loss = losses.log_loss(tf_labels, tf_predictions) optimizer = gradient_descent.GradientDescentOptimizer( learning_rate=1.0) train_op = training.create_train_op(loss, optimizer, update_ops=[]) moving_mean = variables_lib.get_variables_by_name('moving_mean')[0] moving_variance = variables_lib.get_variables_by_name( 'moving_variance')[0] with self.cached_session() as session: # Initialize all variables session.run(variables_lib2.global_variables_initializer()) mean, variance = session.run([moving_mean, moving_variance]) # After initialization moving_mean == 0 and moving_variance == 1. self.assertAllClose(mean, [0] * 4) self.assertAllClose(variance, [1] * 4) for _ in range(10): session.run(train_op) mean = moving_mean.eval() variance = moving_variance.eval() # Since we skip update_ops the moving_vars are not updated. self.assertAllClose(mean, [0] * 4) self.assertAllClose(variance, [1] * 4)
def testTrainOpInCollection(self): with ops.Graph().as_default(): tf_inputs = constant_op.constant(self._inputs, dtype=dtypes.float32) tf_labels = constant_op.constant(self._labels, dtype=dtypes.float32) tf_predictions = batchnorm_classifier(tf_inputs) loss = losses.log_loss(tf_labels, tf_predictions) optimizer = gradient_descent.GradientDescentOptimizer( learning_rate=1.0) train_op = training.create_train_op(loss, optimizer) # Make sure the training op was recorded in the proper collection self.assertIn(train_op, ops.get_collection(ops.GraphKeys.TRAIN_OP))
def testResumeTrainAchievesRoughlyTheSameLoss(self): number_of_steps = [300, 1, 5] logdir = tempfile.mkdtemp('resume_train_same_loss') for i in range(len(number_of_steps)): with ops.Graph().as_default(): random_seed.set_random_seed(i) tf_inputs = constant_op.constant(self._inputs, dtype=dtypes.float32) tf_labels = constant_op.constant(self._labels, dtype=dtypes.float32) tf_predictions = logistic_classifier(tf_inputs) losses.log_loss(tf_labels, tf_predictions) total_loss = losses.get_total_loss() optimizer = gradient_descent.GradientDescentOptimizer( learning_rate=1.0) train_op = training.create_train_op(total_loss, optimizer) saver = saver_lib.Saver() loss = training.train( train_op, logdir, hooks=[ basic_session_run_hooks.StopAtStepHook( num_steps=number_of_steps[i]), basic_session_run_hooks.CheckpointSaverHook( logdir, save_steps=50, saver=saver), ], save_checkpoint_secs=None, save_summaries_steps=None) self.assertIsNotNone(loss) self.assertLess(loss, .015)
def testTrainWithNoInitAssignCanAchieveZeroLoss(self): with ops.Graph().as_default(): random_seed.set_random_seed(0) tf_inputs = constant_op.constant(self._inputs, dtype=dtypes.float32) tf_labels = constant_op.constant(self._labels, dtype=dtypes.float32) tf_predictions = batchnorm_classifier(tf_inputs) losses.log_loss(tf_labels, tf_predictions) total_loss = losses.get_total_loss() optimizer = gradient_descent.GradientDescentOptimizer( learning_rate=1.0) train_op = training.create_train_op(total_loss, optimizer) loss = training.train( train_op, None, hooks=[basic_session_run_hooks.StopAtStepHook(num_steps=300)], save_summaries_steps=None, save_checkpoint_secs=None) self.assertLess(loss, .1)
def testTrainAllVarsHasLowerLossThanTrainSubsetOfVars(self): logdir = tempfile.mkdtemp('tmp_logs3/') if gfile.Exists(logdir): # For running on jenkins. gfile.DeleteRecursively(logdir) # First, train only the weights of the model. with ops.Graph().as_default(): random_seed.set_random_seed(0) total_loss = self.ModelLoss() optimizer = gradient_descent.GradientDescentOptimizer( learning_rate=1.0) weights = variables_lib.get_variables_by_name('weights') train_op = training.create_train_op(total_loss, optimizer, variables_to_train=weights) saver = saver_lib.Saver() loss = training.train( train_op, logdir, hooks=[ basic_session_run_hooks.CheckpointSaverHook(logdir, save_steps=200, saver=saver), basic_session_run_hooks.StopAtStepHook(num_steps=200), ], save_checkpoint_secs=None, save_summaries_steps=None) self.assertGreater(loss, .015) self.assertLess(loss, .05) # Next, train the biases of the model. with ops.Graph().as_default(): random_seed.set_random_seed(1) total_loss = self.ModelLoss() optimizer = gradient_descent.GradientDescentOptimizer( learning_rate=1.0) biases = variables_lib.get_variables_by_name('biases') train_op = training.create_train_op(total_loss, optimizer, variables_to_train=biases) saver = saver_lib.Saver() loss = training.train( train_op, logdir, hooks=[ basic_session_run_hooks.CheckpointSaverHook(logdir, save_steps=300, saver=saver), basic_session_run_hooks.StopAtStepHook(num_steps=300), ], save_checkpoint_secs=None, save_summaries_steps=None) self.assertGreater(loss, .015) self.assertLess(loss, .05) # Finally, train both weights and bias to get lower loss. with ops.Graph().as_default(): random_seed.set_random_seed(2) total_loss = self.ModelLoss() optimizer = gradient_descent.GradientDescentOptimizer( learning_rate=1.0) train_op = training.create_train_op(total_loss, optimizer) saver = saver_lib.Saver() loss = training.train( train_op, logdir, hooks=[ basic_session_run_hooks.StopAtStepHook(num_steps=400), ], save_checkpoint_secs=None, save_summaries_steps=None) self.assertIsNotNone(loss) self.assertLess(loss, .015)
def create_train_op(total_loss, optimizer, global_step=_USE_GLOBAL_STEP, update_ops=None, variables_to_train=None, clip_gradient_norm=0, summarize_gradients=False, gate_gradients=tf_optimizer.Optimizer.GATE_OP, aggregation_method=None, colocate_gradients_with_ops=False, gradient_multipliers=None, check_numerics=True): """Creates an `Operation` that evaluates the gradients and returns the loss. Args: total_loss: A `Tensor` representing the total loss. optimizer: A tf.Optimizer to use for computing the gradients. global_step: A `Tensor` representing the global step variable. If left as `_USE_GLOBAL_STEP`, then tf.train.global_step() is used. update_ops: An optional list of updates to execute. If `update_ops` is `None`, then the update ops are set to the contents of the `tf.GraphKeys.UPDATE_OPS` collection. If `update_ops` is not `None`, but it doesn't contain all of the update ops in `tf.GraphKeys.UPDATE_OPS`, a warning will be displayed. variables_to_train: an optional list of variables to train. If None, it will default to all tf.compat.v1.trainable_variables(). clip_gradient_norm: If greater than 0 then the gradients would be clipped by it. summarize_gradients: Whether or not add summaries for each gradient. gate_gradients: How to gate the computation of gradients. See tf.Optimizer. aggregation_method: Specifies the method used to combine gradient terms. Valid values are defined in the class `AggregationMethod`. colocate_gradients_with_ops: Whether or not to try colocating the gradients with the ops that generated them. gradient_multipliers: A dictionary of either `Variables` or `Variable` op names to the coefficient by which the associated gradient should be scaled. check_numerics: Whether or not we apply check_numerics. Returns: A `Tensor` that when evaluated, computes the gradients and returns the total loss value. """ def transform_grads_fn(grads): if gradient_multipliers: with ops.name_scope('multiply_grads'): grads = multiply_gradients(grads, gradient_multipliers) # Clip gradients. if clip_gradient_norm > 0: with ops.name_scope('clip_grads'): grads = clip_gradient_norms(grads, clip_gradient_norm) return grads return training.create_train_op( total_loss=total_loss, optimizer=optimizer, global_step=global_step, update_ops=update_ops, variables_to_train=variables_to_train, transform_grads_fn=transform_grads_fn, summarize_gradients=summarize_gradients, gate_gradients=gate_gradients, aggregation_method=aggregation_method, colocate_gradients_with_ops=colocate_gradients_with_ops, check_numerics=check_numerics)