def testBadSummaries(self): with ops.Graph().as_default() as g, self.test_session(graph=g): _, _, loss, global_step = _setup_model() with self.assertRaises(ValueError): optimizers_lib.optimize_loss( loss, global_step, learning_rate=0.1, optimizer="SGD", summaries=["loss", "bad_summary"])
def _make_training_op(training_loss): """Training op for the DNN linear combined model.""" train_ops = [] if dnn_logits is not None: train_ops.append( optimizers.optimize_loss( loss=training_loss, global_step=contrib_variables.get_global_step(), learning_rate=_DNN_LEARNING_RATE, optimizer=_get_optimizer(dnn_optimizer), gradient_multipliers=_extract_embedding_lr_multipliers( # pylint: disable=protected-access embedding_lr_multipliers, dnn_parent_scope, dnn_input_scope.name), clip_gradients=gradient_clip_norm, variables=ops.get_collection(dnn_parent_scope), name=dnn_parent_scope, # Empty summaries, because head already logs "loss" summary. summaries=[])) if linear_logits is not None: train_ops.append( optimizers.optimize_loss( loss=training_loss, global_step=contrib_variables.get_global_step(), learning_rate=_linear_learning_rate(len(linear_feature_columns)), optimizer=_get_optimizer(linear_optimizer), clip_gradients=gradient_clip_norm, variables=ops.get_collection(linear_parent_scope), name=linear_parent_scope, # Empty summaries, because head already logs "loss" summary. summaries=[])) return control_flow_ops.group(*train_ops)
def testWrongOptimizer(self): optimizers = ["blah", variables.Variable, object(), lambda x: None] for optimizer in optimizers: with ops.Graph().as_default() as g: with self.test_session(graph=g): _, _, loss, global_step = _setup_model() with self.assertRaises(ValueError): optimizers_lib.optimize_loss( loss, global_step, learning_rate=0.1, optimizer=optimizer)
def testInvalidLoss(self): with ops.Graph().as_default() as g, self.test_session(graph=g): _, _, _, global_step = _setup_model() with self.assertRaises(ValueError): optimizers_lib.optimize_loss( None, global_step, learning_rate=0.1, optimizer="SGD") with self.assertRaises(ValueError): optimizers_lib.optimize_loss( [[1.0]], global_step, learning_rate=0.1, optimizer="SGD")
def testIgnoreVariablesWithNoGradients(self): _, _, loss, global_step = _setup_model() unused_variable = variable_scope.get_variable("ignore_me", []) optimizers_lib.optimize_loss( loss, global_step, learning_rate=0.1, optimizer="SGD", gradient_noise_scale=10.0, gradient_multipliers={unused_variable: 1.}, clip_gradients=10.0)
def _dynamic_rnn_model_fn(features, labels, mode): """The model to be passed to an `Estimator`.""" with ops.name_scope(name): initial_state = features.get(initial_state_key) sequence_length = features.get(sequence_length_key) sequence_input = build_sequence_input(features, sequence_feature_columns, context_feature_columns) if mode == model_fn.ModeKeys.TRAIN: cell_for_mode = apply_dropout( cell, input_keep_probability, output_keep_probability) else: cell_for_mode = cell rnn_activations, final_state = construct_rnn( initial_state, sequence_input, cell_for_mode, target_column.num_label_columns, dtype=dtype, parallel_iterations=parallel_iterations, swap_memory=swap_memory) loss = None # Created below for modes TRAIN and EVAL. if prediction_type == PredictionType.MULTIPLE_VALUE: prediction_dict = _multi_value_predictions( rnn_activations, target_column, predict_probabilities) if mode != model_fn.ModeKeys.INFER: loss = _multi_value_loss( rnn_activations, labels, sequence_length, target_column, features) elif prediction_type == PredictionType.SINGLE_VALUE: prediction_dict = _single_value_predictions( rnn_activations, sequence_length, target_column, predict_probabilities) if mode != model_fn.ModeKeys.INFER: loss = _single_value_loss( rnn_activations, labels, sequence_length, target_column, features) prediction_dict[RNNKeys.FINAL_STATE_KEY] = final_state eval_metric_ops = None if mode != model_fn.ModeKeys.INFER: eval_metric_ops = _get_eval_metric_ops( problem_type, prediction_type, sequence_length, prediction_dict, labels) train_op = None if mode == model_fn.ModeKeys.TRAIN: train_op = optimizers.optimize_loss( loss=loss, global_step=None, # Get it internally. learning_rate=learning_rate, optimizer=optimizer, clip_gradients=gradient_clipping_norm, summaries=optimizers.OPTIMIZER_SUMMARIES) return model_fn.ModelFnOps(mode=mode, predictions=prediction_dict, loss=loss, train_op=train_op, eval_metric_ops=eval_metric_ops)
def linear_model_fn_with_model_fn_ops(features, labels, mode): """Same as linear_model_fn, but returns `ModelFnOps`.""" assert mode in (model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL, model_fn.ModeKeys.INFER) prediction, loss = (models.linear_regression_zero_init(features, labels)) train_op = optimizers.optimize_loss( loss, variables.get_global_step(), optimizer='Adagrad', learning_rate=0.1) return model_fn.ModelFnOps( mode=mode, predictions=prediction, loss=loss, train_op=train_op)
def _train_op_fn(loss): """Returns the op to optimize the loss.""" return optimizers.optimize_loss( loss=loss, global_step=contrib_variables.get_global_step(), learning_rate=learning_rate, optimizer=optimizer, name=parent_scope, # Empty summaries to prevent optimizers from logging the training_loss. summaries=[])
def logistic_model_no_mode_fn(features, labels): features = extract(features, 'input') labels = extract(labels, 'labels') labels = array_ops.one_hot(labels, 3, 1, 0) prediction, loss = (models.logistic_regression_zero_init(features, labels)) train_op = optimizers.optimize_loss( loss, variables.get_global_step(), optimizer='Adagrad', learning_rate=0.1) return { 'class': math_ops.argmax(prediction, 1), 'prob': prediction }, loss, train_op
def _train_op_fn(loss): """Returns the op to optimize the loss.""" return optimizers.optimize_loss( loss=loss, global_step=contrib_variables.get_global_step(), learning_rate=_LEARNING_RATE, optimizer=_get_optimizer(optimizer), clip_gradients=gradient_clip_norm, name=parent_scope, # Empty summaries to prevent optimizers from logging the training_loss. summaries=[])
def linear_model_fn(features, labels, mode): features = extract(features, 'input') labels = extract(labels, 'labels') assert mode in (model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL, model_fn.ModeKeys.INFER) if isinstance(features, dict): (_, features), = features.items() prediction, loss = (models.linear_regression_zero_init(features, labels)) train_op = optimizers.optimize_loss( loss, variables.get_global_step(), optimizer='Adagrad', learning_rate=0.1) return prediction, loss, train_op
def linear_model_fn(features, labels, mode): features = extract(features, 'input') labels = extract(labels, 'labels') assert mode in (model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL, model_fn.ModeKeys.INFER) if isinstance(features, dict): (_, features), = features.items() prediction, loss = (models.linear_regression_zero_init(features, labels)) train_op = optimizers.optimize_loss( loss, variables.get_global_step(), optimizer='Adagrad', learning_rate=0.1) return prediction, loss, train_op
def _train_op_fn(loss): """Returns the op to optimize the loss.""" return optimizers.optimize_loss( loss=loss, global_step=contrib_variables.get_global_step(), learning_rate=_LEARNING_RATE, optimizer=_get_optimizer(optimizer), clip_gradients=gradient_clip_norm, name=parent_scope, # Empty summaries to prevent optimizers from logging the training_loss. summaries=[])
def logistic_model_no_mode_fn(features, labels): features = extract(features, 'input') labels = extract(labels, 'labels') labels = array_ops.one_hot(labels, 3, 1, 0) prediction, loss = (models.logistic_regression_zero_init(features, labels)) train_op = optimizers.optimize_loss( loss, variables.get_global_step(), optimizer='Adagrad', learning_rate=0.1) return { 'class': math_ops.argmax(prediction, 1), 'prob': prediction }, loss, train_op
def _dynamic_rnn_model_fn(features, labels, mode): """The model to be passed to an `Estimator`.""" with ops.name_scope(name): initial_state = features.get(initial_state_key) sequence_length = features.get(sequence_length_key) sequence_input = build_sequence_input(features, sequence_feature_columns, context_feature_columns) if mode == model_fn.ModeKeys.TRAIN: cell_for_mode = apply_dropout(cell, input_keep_probability, output_keep_probability) else: cell_for_mode = cell rnn_activations, final_state = construct_rnn( initial_state, sequence_input, cell_for_mode, target_column.num_label_columns, dtype=dtype, parallel_iterations=parallel_iterations, swap_memory=swap_memory) if prediction_type == PredictionType.MULTIPLE_VALUE: prediction_dict = _multi_value_predictions( rnn_activations, target_column, predict_probabilities) loss = _multi_value_loss(rnn_activations, labels, sequence_length, target_column, features) elif prediction_type == PredictionType.SINGLE_VALUE: prediction_dict = _single_value_predictions( rnn_activations, sequence_length, target_column, predict_probabilities) loss = _single_value_loss(rnn_activations, labels, sequence_length, target_column, features) # TODO(roumposg): Return eval_metric_ops here, instead of default_metrics. default_metrics = _get_default_metrics(problem_type, prediction_type, sequence_length) prediction_dict[RNNKeys.FINAL_STATE_KEY] = final_state eval_metric_ops = estimator._make_metrics_ops( # pylint: disable=protected-access default_metrics, features, labels, prediction_dict) train_op = optimizers.optimize_loss( loss=loss, global_step=None, learning_rate=learning_rate, optimizer=optimizer, clip_gradients=gradient_clipping_norm, summaries=optimizers.OPTIMIZER_SUMMARIES) return model_fn.ModelFnOps(mode=mode, predictions=prediction_dict, loss=loss, train_op=train_op, eval_metric_ops=eval_metric_ops)
def _dnn_train_op_fn(loss): """Returns the op to optimize the loss.""" return optimizers.optimize_loss( loss=loss, global_step=training_util.get_global_step(), learning_rate=_DNN_LEARNING_RATE, optimizer=_get_optimizer(dnn_optimizer), name=dnn_parent_scope, variables=ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES, scope=dnn_parent_scope), # Empty summaries to prevent optimizers from logging training_loss. summaries=[])
def _dnn_train_op_fn(loss): """Returns the op to optimize the loss.""" return optimizers.optimize_loss( loss=loss, global_step=training_util.get_global_step(), learning_rate=_DNN_LEARNING_RATE, optimizer=_get_optimizer(dnn_optimizer), name=dnn_parent_scope, variables=ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES, scope=dnn_parent_scope), # Empty summaries to prevent optimizers from logging training_loss. summaries=[])
def linear_model_params_fn(features, labels, mode, params): features = extract(features, 'input') labels = extract(labels, 'labels') assert mode in (model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL, model_fn.ModeKeys.INFER) prediction, loss = (models.linear_regression_zero_init(features, labels)) train_op = optimizers.optimize_loss(loss, variables.get_global_step(), optimizer='Adagrad', learning_rate=params['learning_rate']) return prediction, loss, train_op
def _make_training_op(training_loss): """Training op for the DNN linear combined model.""" train_ops = [] global_step = training_util.get_global_step() if dnn_logits is not None: train_ops.append( optimizers.optimize_loss( loss=training_loss, global_step=global_step, learning_rate=_DNN_LEARNING_RATE, optimizer=dnn_optimizer, gradient_multipliers=_extract_embedding_lr_multipliers( # pylint: disable=protected-access embedding_lr_multipliers, dnn_parent_scope, dnn_input_scope.name), clip_gradients=gradient_clip_norm, variables=ops.get_collection(dnn_parent_scope), name=dnn_parent_scope, # Empty summaries, because head already logs "loss" summary. summaries=[], increment_global_step=not fix_global_step_increment_bug)) if linear_logits is not None: train_ops.append( optimizers.optimize_loss( loss=training_loss, global_step=global_step, learning_rate=_linear_learning_rate(len(linear_feature_columns)), optimizer=linear_optimizer, clip_gradients=gradient_clip_norm, variables=ops.get_collection(linear_parent_scope), name=linear_parent_scope, # Empty summaries, because head already logs "loss" summary. summaries=[], increment_global_step=not fix_global_step_increment_bug)) train_op = control_flow_ops.group(*train_ops) if fix_global_step_increment_bug: with ops.control_dependencies([train_op]): with ops.colocate_with(global_step): return state_ops.assign_add(global_step, 1).op return train_op
def _make_training_op(training_loss): """Training op for the DNN linear combined model.""" train_ops = [] global_step = training_util.get_global_step() if dnn_logits is not None: train_ops.append( optimizers.optimize_loss( loss=training_loss, global_step=global_step, learning_rate=_DNN_LEARNING_RATE, optimizer=dnn_optimizer, gradient_multipliers=_extract_embedding_lr_multipliers( # pylint: disable=protected-access embedding_lr_multipliers, dnn_parent_scope, dnn_input_scope.name), clip_gradients=gradient_clip_norm, variables=ops.get_collection(dnn_parent_scope), name=dnn_parent_scope, # Empty summaries, because head already logs "loss" summary. summaries=[], increment_global_step=not fix_global_step_increment_bug)) if linear_logits is not None: train_ops.append( optimizers.optimize_loss( loss=training_loss, global_step=global_step, learning_rate=_linear_learning_rate(len(linear_feature_columns)), optimizer=linear_optimizer, clip_gradients=gradient_clip_norm, variables=ops.get_collection(linear_parent_scope), name=linear_parent_scope, # Empty summaries, because head already logs "loss" summary. summaries=[], increment_global_step=not fix_global_step_increment_bug)) train_op = control_flow_ops.group(*train_ops) if fix_global_step_increment_bug: with ops.control_dependencies([train_op]): with ops.colocate_with(global_step): return state_ops.assign_add(global_step, 1).op return train_op
def testInvalidGlobalStep(self): with ops.Graph().as_default() as g, self.session(graph=g): x = array_ops.placeholder(dtypes.float32, []) var = variable_scope.get_variable( "test", [], initializer=init_ops.constant_initializer(10)) loss = math_ops.abs(var * x) with self.assertRaises(AttributeError): optimizers_lib.optimize_loss(loss, global_step=constant_op.constant( 43, dtype=dtypes.int64), learning_rate=0.1, optimizer="SGD") with self.assertRaises(TypeError): optimizers_lib.optimize_loss( loss, global_step=variable_scope.get_variable( "global_step", [], trainable=False, dtype=dtypes.float64, initializer=init_ops.constant_initializer( 0.0, dtype=dtypes.float64)), learning_rate=0.1, optimizer="SGD") with self.assertRaises(ValueError): optimizers_lib.optimize_loss( loss, global_step=variable_scope.get_variable( "global_step", [1], trainable=False, dtype=dtypes.int64, initializer=init_ops.constant_initializer( [0], dtype=dtypes.int64)), learning_rate=0.1, optimizer="SGD")
def testInvalidGlobalStep(self): with ops.Graph().as_default() as g, self.test_session(graph=g): x = array_ops.placeholder(dtypes.float32, []) var = variable_scope.get_variable( "test", [], initializer=init_ops.constant_initializer(10)) loss = math_ops.abs(var * x) with self.assertRaises(AttributeError): optimizers_lib.optimize_loss( loss, global_step=constant_op.constant( 43, dtype=dtypes.int64), learning_rate=0.1, optimizer="SGD") with self.assertRaises(TypeError): optimizers_lib.optimize_loss( loss, global_step=variable_scope.get_variable( "global_step", [], trainable=False, dtype=dtypes.float64, initializer=init_ops.constant_initializer( 0.0, dtype=dtypes.float64)), learning_rate=0.1, optimizer="SGD") with self.assertRaises(ValueError): optimizers_lib.optimize_loss( loss, global_step=variable_scope.get_variable( "global_step", [1], trainable=False, dtype=dtypes.int64, initializer=init_ops.constant_initializer( [0], dtype=dtypes.int64)), learning_rate=0.1, optimizer="SGD")
def linear_model_params_fn(features, labels, mode, params): features = extract(features, 'input') labels = extract(labels, 'labels') assert mode in (model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL, model_fn.ModeKeys.INFER) prediction, loss = (models.linear_regression_zero_init(features, labels)) train_op = optimizers.optimize_loss( loss, variables.get_global_step(), optimizer='Adagrad', learning_rate=params['learning_rate']) return prediction, loss, train_op
def testGradientClip(self): with self.cached_session() as session: x, var, loss, global_step = _setup_model() train = optimizers_lib.optimize_loss(loss, global_step, learning_rate=0.1, optimizer="SGD", clip_gradients=0.1) variables.global_variables_initializer().run() session.run(train, feed_dict={x: 5}) var_value, global_step_value = session.run([var, global_step]) self.assertAlmostEqual(var_value, 9.98999, 4) self.assertEqual(global_step_value, 1)
def linear_model_fn_with_model_fn_ops(features, labels, mode): """Same as linear_model_fn, but returns `ModelFnOps`.""" assert mode in (model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL, model_fn.ModeKeys.INFER) prediction, loss = (models.linear_regression_zero_init(features, labels)) train_op = optimizers.optimize_loss(loss, variables.get_global_step(), optimizer='Adagrad', learning_rate=0.1) return model_fn.ModelFnOps(mode=mode, predictions=prediction, loss=loss, train_op=train_op)
def _train_op_fn(loss): """Returns the op to optimize the loss.""" return optimizers.optimize_loss( loss=loss, global_step=contrib_variables.get_global_step(), learning_rate=_LEARNING_RATE, optimizer=_get_optimizer(optimizer), gradient_multipliers=( dnn_linear_combined._extract_embedding_lr_multipliers( # pylint: disable=protected-access embedding_lr_multipliers, parent_scope, input_layer_scope)), clip_gradients=gradient_clip_norm, name=parent_scope, # Empty summaries to prevent optimizers from logging the training_loss. summaries=[])
def _train_op_fn(loss): """Returns the op to optimize the loss.""" return optimizers.optimize_loss( loss=loss, global_step=contrib_variables.get_global_step(), learning_rate=_LEARNING_RATE, optimizer=_get_optimizer(optimizer), gradient_multipliers=( dnn_linear_combined._extract_embedding_lr_multipliers( # pylint: disable=protected-access embedding_lr_multipliers, parent_scope, input_layer_scope)), clip_gradients=gradient_clip_norm, name=parent_scope, # Empty summaries to prevent optimizers from logging the training_loss. summaries=[])
def _train(features): """Add training ops to the graph.""" with variable_scope.variable_scope("model"): model_outputs = state_manager.define_loss( model, features, estimator_lib.ModeKeys.TRAIN) train_op = optimizers.optimize_loss( model_outputs.loss, global_step=variables.get_global_step(), optimizer=optimizer, # Learning rate is set in the Optimizer object learning_rate=None) return estimator_lib.EstimatorSpec(loss=model_outputs.loss, mode=estimator_lib.ModeKeys.TRAIN, train_op=train_op)
def _logistic_regression_model_fn(features, labels, mode): _ = mode logits = layers.linear( features, 1, weights_initializer=init_ops.zeros_initializer(), # Intentionally uses really awful initial values so that # AUC/precision/recall/etc will change meaningfully even on a toy dataset. biases_initializer=init_ops.constant_initializer(-10.0)) predictions = math_ops.sigmoid(logits) loss = losses.sigmoid_cross_entropy(labels, logits) train_op = optimizers.optimize_loss( loss, training_util.get_global_step(), optimizer='Adagrad', learning_rate=0.1) return predictions, loss, train_op
def testNoGlobalStepWithDecay(self): optimizers = [ "SGD", gradient_descent.GradientDescentOptimizer, gradient_descent.GradientDescentOptimizer(learning_rate=0.1) ] for optimizer in optimizers: with ops.Graph().as_default() as g, self.test_session(graph=g): x = array_ops.placeholder(dtypes.float32, []) var = variable_scope.get_variable( "test", [], initializer=init_ops.constant_initializer(10)) loss = math_ops.abs(var * x) update_var = variable_scope.get_variable( "update", [], initializer=init_ops.constant_initializer(10)) update_op = state_ops.assign(update_var, 20) with self.assertRaisesRegexp( ValueError, "global_step is required for learning_rate_decay_fn"): optimizers_lib.optimize_loss( loss, global_step=None, learning_rate=0.1, learning_rate_decay_fn=_no_op_learning_rate_decay_fn, optimizer=optimizer, update_ops=[update_op])
def testGradientClip(self): with self.test_session() as session: x, var, loss, global_step = _setup_model() train = optimizers_lib.optimize_loss( loss, global_step, learning_rate=0.1, optimizer="SGD", clip_gradients=0.1) variables.global_variables_initializer().run() session.run(train, feed_dict={x: 5}) var_value, global_step_value = session.run([var, global_step]) self.assertAlmostEqual(var_value, 9.98999, 4) self.assertEqual(global_step_value, 1)
def _dynamic_rnn_model_fn(features, labels, mode): """The model to be passed to an `Estimator`.""" with ops.name_scope(name): initial_state = features.get(initial_state_key) sequence_length = features.get(sequence_length_key) sequence_input = build_sequence_input(features, sequence_feature_columns, context_feature_columns) if mode == model_fn.ModeKeys.TRAIN: cell_for_mode = apply_dropout( cell, input_keep_probability, output_keep_probability) else: cell_for_mode = cell rnn_activations, final_state = construct_rnn( initial_state, sequence_input, cell_for_mode, target_column.num_label_columns, dtype=dtype, parallel_iterations=parallel_iterations, swap_memory=swap_memory) if prediction_type == PredictionType.MULTIPLE_VALUE: prediction_dict = _multi_value_predictions( rnn_activations, target_column, predict_probabilities) loss = _multi_value_loss( rnn_activations, labels, sequence_length, target_column, features) elif prediction_type == PredictionType.SINGLE_VALUE: prediction_dict = _single_value_predictions( rnn_activations, sequence_length, target_column, predict_probabilities) loss = _single_value_loss( rnn_activations, labels, sequence_length, target_column, features) # TODO(roumposg): Return eval_metric_ops here, instead of default_metrics. default_metrics = _get_default_metrics( problem_type, prediction_type, sequence_length) prediction_dict[RNNKeys.FINAL_STATE_KEY] = final_state eval_metric_ops = estimator._make_metrics_ops( # pylint: disable=protected-access default_metrics, features, labels, prediction_dict) train_op = optimizers.optimize_loss( loss=loss, global_step=None, learning_rate=learning_rate, optimizer=optimizer, clip_gradients=gradient_clipping_norm, summaries=optimizers.OPTIMIZER_SUMMARIES) return model_fn.ModelFnOps(mode=mode, predictions=prediction_dict, loss=loss, train_op=train_op, eval_metric_ops=eval_metric_ops)
def testNoGlobalStepWithDecay(self): optimizers = [ "SGD", gradient_descent.GradientDescentOptimizer, gradient_descent.GradientDescentOptimizer(learning_rate=0.1) ] for optimizer in optimizers: with ops.Graph().as_default() as g, self.test_session(graph=g): x = array_ops.placeholder(dtypes.float32, []) var = variable_scope.get_variable( "test", [], initializer=init_ops.constant_initializer(10)) loss = math_ops.abs(var * x) update_var = variable_scope.get_variable( "update", [], initializer=init_ops.constant_initializer(10)) update_op = state_ops.assign(update_var, 20) with self.assertRaisesRegexp( ValueError, "global_step is required for learning_rate_decay_fn"): optimizers_lib.optimize_loss( loss, global_step=None, learning_rate=0.1, learning_rate_decay_fn=_no_op_learning_rate_decay_fn, optimizer=optimizer, update_ops=[update_op])
def _logistic_regression_model_fn(features, labels, mode): _ = mode logits = layers.linear( features, 1, weights_initializer=init_ops.zeros_initializer(), # Intentionally uses really awful initial values so that # AUC/precision/recall/etc will change meaningfully even on a toy dataset. biases_initializer=init_ops.constant_initializer(-10.0)) predictions = math_ops.sigmoid(logits) loss = loss_ops.sigmoid_cross_entropy(logits, labels) train_op = optimizers.optimize_loss( loss, variables.get_global_step(), optimizer='Adagrad', learning_rate=0.1) return predictions, loss, train_op
def _train_ops(self, features): """Add training ops to the graph.""" mode = estimator_lib.ModeKeys.TRAIN with variable_scope.variable_scope("model"): model_outputs = self.create_loss(features, mode) train_op = optimizers.optimize_loss( model_outputs.loss, global_step=training_util.get_global_step(), optimizer=self.optimizer, # Learning rate is set in the Optimizer object learning_rate=None) return estimator_lib.EstimatorSpec(loss=model_outputs.loss, mode=mode, train_op=train_op)
def _train_ops(self, features): """Add training ops to the graph.""" with variable_scope.variable_scope("model"): model_outputs = self.state_manager.define_loss(self.model, features, estimator_lib.ModeKeys.TRAIN) train_op = optimizers.optimize_loss( model_outputs.loss, global_step=variables.get_global_step(), optimizer=self.optimizer, # Learning rate is set in the Optimizer object learning_rate=None) return estimator_lib.EstimatorSpec( loss=model_outputs.loss, mode=estimator_lib.ModeKeys.TRAIN, train_op=train_op)
def testNoLrCallable(self): def optimizer_fn(): return gradient_descent.GradientDescentOptimizer(learning_rate=0.1) with ops.Graph().as_default() as g: with self.test_session(graph=g) as session: x, var, loss, global_step = _setup_model() train = optimizers_lib.optimize_loss( loss, global_step, learning_rate=None, optimizer=optimizer_fn) variables.global_variables_initializer().run() session.run(train, feed_dict={x: 5}) var_value, global_step_value = session.run([var, global_step]) self.assertEqual(var_value, 9.5) self.assertEqual(global_step_value, 1)
def testGradientNoise(self): random_seed.set_random_seed(42) with self.cached_session() as session: x, var, loss, global_step = _setup_model() train = optimizers_lib.optimize_loss(loss, global_step, learning_rate=0.1, optimizer="SGD", gradient_noise_scale=10.0) variables.global_variables_initializer().run() session.run(train, feed_dict={x: 5}) var_value, global_step_value = session.run([var, global_step]) # Due to randomness the following number may change if graph is different. self.assertAlmostEqual(var_value, 9.86912, 4) self.assertEqual(global_step_value, 1)
def testGradientNoiseWithClipping(self): random_seed.set_random_seed(42) with self.cached_session() as session: x, var, loss, global_step = _setup_model() train = optimizers_lib.optimize_loss(loss, global_step, learning_rate=0.1, optimizer="SGD", gradient_noise_scale=10.0, clip_gradients=10.0) variables.global_variables_initializer().run() session.run(train, feed_dict={x: 5}) var_value, global_step_value = session.run([var, global_step]) self.assertAlmostEqual(var_value, 9.86912, 4) self.assertEqual(global_step_value, 1)
def testNoLrCallable(self): def optimizer_fn(): return gradient_descent.GradientDescentOptimizer(learning_rate=0.1) with ops.Graph().as_default() as g: with self.test_session(graph=g) as session: x, var, loss, global_step = _setup_model() train = optimizers_lib.optimize_loss( loss, global_step, learning_rate=None, optimizer=optimizer_fn) variables.global_variables_initializer().run() session.run(train, feed_dict={x: 5}) var_value, global_step_value = session.run([var, global_step]) self.assertEqual(var_value, 9.5) self.assertEqual(global_step_value, 1)
def testGradientNoiseWithClipping(self): random_seed.set_random_seed(42) with self.test_session() as session: x, var, loss, global_step = _setup_model() train = optimizers_lib.optimize_loss( loss, global_step, learning_rate=0.1, optimizer="SGD", gradient_noise_scale=10.0, clip_gradients=10.0) variables.global_variables_initializer().run() session.run(train, feed_dict={x: 5}) var_value, global_step_value = session.run([var, global_step]) self.assertAlmostEqual(var_value, 9.0, 4) self.assertEqual(global_step_value, 1)
def testGradientMultiply(self): with self.cached_session() as session: x, var, loss, global_step = _setup_model() train = optimizers_lib.optimize_loss( loss, global_step, learning_rate=0.1, optimizer="SGD", gradient_multipliers={var: 7.}) variables.global_variables_initializer().run() session.run(train, feed_dict={x: 5}) var_value, global_step_value = session.run([var, global_step]) # var(0) = 10, x = 5, var(0)/dx = 5, # var(1) = var(0) - learning_rate * gradient_multiplier * var(0)/dx self.assertAlmostEqual(var_value, 6.5, 4) self.assertEqual(global_step_value, 1)
def testGradientMultiply(self): with self.test_session() as session: x, var, loss, global_step = _setup_model() train = optimizers_lib.optimize_loss( loss, global_step, learning_rate=0.1, optimizer="SGD", gradient_multipliers={var: 7.}) variables.global_variables_initializer().run() session.run(train, feed_dict={x: 5}) var_value, global_step_value = session.run([var, global_step]) # var(0) = 10, x = 5, var(0)/dx = 5, # var(1) = var(0) - learning_rate * gradient_multiplier * var(0)/dx self.assertAlmostEqual(var_value, 6.5, 4) self.assertEqual(global_step_value, 1)
def testGradientNoise(self): random_seed.set_random_seed(42) with self.test_session() as session: x, var, loss, global_step = _setup_model() train = optimizers_lib.optimize_loss( loss, global_step, learning_rate=0.1, optimizer="SGD", gradient_noise_scale=10.0) variables.global_variables_initializer().run() session.run(train, feed_dict={x: 5}) var_value, global_step_value = session.run([var, global_step]) # Due to randomness the following number may change if graph is different. self.assertAlmostEqual(var_value, 8.5591021, 4) self.assertEqual(global_step_value, 1)
def testSGDOptimizer(self): optimizers = [ "SGD", gradient_descent.GradientDescentOptimizer, gradient_descent.GradientDescentOptimizer(learning_rate=0.1), lambda lr: gradient_descent.GradientDescentOptimizer(learning_rate=lr) ] for optimizer in optimizers: with ops.Graph().as_default() as g: with self.test_session(graph=g) as session: x, var, loss, global_step = _setup_model() train = optimizers_lib.optimize_loss( loss, global_step, learning_rate=0.1, optimizer=optimizer) variables.global_variables_initializer().run() session.run(train, feed_dict={x: 5}) var_value, global_step_value = session.run([var, global_step]) self.assertEqual(var_value, 9.5) self.assertEqual(global_step_value, 1)
def testGradientMultiplyInt64Tensor(self): with self.cached_session() as session: x, var, loss, global_step = _setup_model() v = array_ops.placeholder(dtypes.float64, []) train = optimizers_lib.optimize_loss( loss, global_step, learning_rate=0.1, optimizer="SGD", gradient_multipliers={var: v}) variables.global_variables_initializer().run() session.run(train, feed_dict={x: 5, v: 7.}) var_value, global_step_value = session.run([var, global_step]) # var(0) = 10, x = 5, var(0)/dx = 5, # var(1) = var(0) - learning_rate * gradient_multiplier * var(0)/dx self.assertAlmostEqual(var_value, 6.5, 4) self.assertEqual(global_step_value, 1)
def testSGDOptimizer(self): optimizers = [ "SGD", gradient_descent.GradientDescentOptimizer, gradient_descent.GradientDescentOptimizer(learning_rate=0.1), lambda lr: gradient_descent.GradientDescentOptimizer(learning_rate=lr) ] for optimizer in optimizers: with ops.Graph().as_default() as g: with self.test_session(graph=g) as session: x, var, loss, global_step = _setup_model() train = optimizers_lib.optimize_loss( loss, global_step, learning_rate=0.1, optimizer=optimizer) variables.global_variables_initializer().run() session.run(train, feed_dict={x: 5}) var_value, global_step_value = session.run([var, global_step]) self.assertEqual(var_value, 9.5) self.assertEqual(global_step_value, 1)
def _dynamic_rnn_model_fn(features, labels, mode): """The model to be passed to an `Estimator`.""" with ops.name_scope(name): initial_state = features.get(initial_state_key) sequence_length = features.get(sequence_length_key) sequence_input = build_sequence_input(features, sequence_feature_columns, context_feature_columns) rnn_activations, final_state = construct_rnn( initial_state, sequence_input, cell, target_column.num_label_columns, dtype=dtype, parallel_iterations=parallel_iterations, swap_memory=swap_memory) if prediction_type == PredictionType.MULTIPLE_VALUE: prediction_dict = _multi_value_predictions( rnn_activations, target_column, predict_probabilities) loss = _multi_value_loss(rnn_activations, labels, sequence_length, target_column, features) elif prediction_type == PredictionType.SINGLE_VALUE: prediction_dict = _single_value_predictions( rnn_activations, sequence_length, target_column, predict_probabilities) loss = _single_value_loss(rnn_activations, labels, sequence_length, target_column, features) default_metrics = _get_default_metrics(problem_type, prediction_type, sequence_length) prediction_dict[RNNKeys.FINAL_STATE_KEY] = final_state training_op = optimizers.optimize_loss( loss=loss, global_step=None, learning_rate=learning_rate, optimizer=optimizer, clip_gradients=gradient_clipping_norm, summaries=optimizers.OPTIMIZER_SUMMARIES) return estimator.ModelFnOps(mode=mode, predictions=prediction_dict, loss=loss, training_op=training_op, default_metrics=default_metrics)
def _train_ops(self, features): """Add training ops to the graph.""" mode = estimator_lib.ModeKeys.TRAIN with variable_scope.variable_scope( "model", # Use ResourceVariables to avoid race conditions. use_resource=True): model_outputs = self.create_loss(features, mode) train_op = optimizers.optimize_loss( model_outputs.loss, global_step=training_util.get_global_step(), optimizer=self.optimizer, # Learning rate is set in the Optimizer object learning_rate=None) return estimator_lib.EstimatorSpec( loss=model_outputs.loss, mode=mode, train_op=train_op)
def testAdaptiveGradientClip(self): with self.cached_session() as session: x, var, loss, global_step = _setup_model() clip_gradients = optimizers_lib.adaptive_clipping_fn() train = optimizers_lib.optimize_loss(loss, global_step, learning_rate=0.1, optimizer="SGD", clip_gradients=clip_gradients) variables.global_variables_initializer().run() session.run(train, feed_dict={x: 5}) var_value, global_step_value = session.run([var, global_step]) self.assertAlmostEqual(var_value, 9.8916, 4) self.assertEqual(global_step_value, 1) var_count = 0 for var in variables.global_variables(): if var.name.startswith("OptimizeLoss/AdaptiveMaxNorm"): var_count += 1 self.assertEqual(2, var_count)
def _train_ops(self, features): """Add training ops to the graph.""" with variable_scope.variable_scope( "model", # Use ResourceVariables to avoid race conditions. use_resource=True): model_outputs = self.state_manager.define_loss( self.model, features, estimator_lib.ModeKeys.TRAIN) train_op = optimizers.optimize_loss( model_outputs.loss, global_step=training_util.get_global_step(), optimizer=self.optimizer, # Learning rate is set in the Optimizer object learning_rate=None) return estimator_lib.EstimatorSpec( loss=model_outputs.loss, mode=estimator_lib.ModeKeys.TRAIN, train_op=train_op)
def _dynamic_rnn_model_fn(features, labels, mode): """The model to be passed to an `Estimator`.""" with ops.name_scope(name): initial_state = features.get(initial_state_key) sequence_length = features.get(sequence_length_key) sequence_input = build_sequence_input(features, sequence_feature_columns, context_feature_columns) rnn_activations, final_state = construct_rnn( initial_state, sequence_input, cell, target_column.num_label_columns, dtype=dtype, parallel_iterations=parallel_iterations, swap_memory=swap_memory) if prediction_type == PredictionType.MULTIPLE_VALUE: prediction_dict = _multi_value_predictions( rnn_activations, target_column, predict_probabilities) loss = _multi_value_loss( rnn_activations, labels, sequence_length, target_column, features) elif prediction_type == PredictionType.SINGLE_VALUE: prediction_dict = _single_value_predictions( rnn_activations, sequence_length, target_column, predict_probabilities) loss = _single_value_loss( rnn_activations, labels, sequence_length, target_column, features) default_metrics = _get_default_metrics( problem_type, prediction_type, sequence_length) prediction_dict[RNNKeys.FINAL_STATE_KEY] = final_state training_op = optimizers.optimize_loss( loss=loss, global_step=None, learning_rate=learning_rate, optimizer=optimizer, clip_gradients=gradient_clipping_norm, summaries=optimizers.OPTIMIZER_SUMMARIES) return estimator.ModelFnOps(mode=mode, predictions=prediction_dict, loss=loss, training_op=training_op, default_metrics=default_metrics)
def testAdaptiveGradientClip(self): with self.test_session() as session: x, var, loss, global_step = _setup_model() clip_gradients = optimizers_lib.adaptive_clipping_fn() train = optimizers_lib.optimize_loss( loss, global_step, learning_rate=0.1, optimizer="SGD", clip_gradients=clip_gradients) variables.global_variables_initializer().run() session.run(train, feed_dict={x: 5}) var_value, global_step_value = session.run([var, global_step]) self.assertAlmostEqual(var_value, 9.8916, 4) self.assertEqual(global_step_value, 1) var_count = 0 for var in variables.global_variables(): if var.name.startswith("OptimizeLoss/AdaptiveMaxNorm"): var_count += 1 self.assertEqual(2, var_count)
def testUpdateOpFromCollection(self): optimizers = [ "SGD", gradient_descent.GradientDescentOptimizer, gradient_descent.GradientDescentOptimizer(learning_rate=0.1) ] for optimizer in optimizers: with ops.Graph().as_default() as g, self.test_session(graph=g) as session: x, var, loss, global_step = _setup_model() update_var = variable_scope.get_variable( "update", [], initializer=init_ops.constant_initializer(10)) update_op = state_ops.assign(update_var, 20) ops.add_to_collection(ops.GraphKeys.UPDATE_OPS, update_op) train = optimizers_lib.optimize_loss( loss, global_step, learning_rate=0.1, optimizer=optimizer) variables.global_variables_initializer().run() session.run(train, feed_dict={x: 5}) var_value, update_var_value, global_step_value = session.run( [var, update_var, global_step]) self.assertEqual(var_value, 9.5) self.assertEqual(update_var_value, 20) self.assertEqual(global_step_value, 1)
def testUpdateOp(self): optimizers = [ "SGD", gradient_descent.GradientDescentOptimizer, gradient_descent.GradientDescentOptimizer(learning_rate=0.1) ] for optimizer in optimizers: with ops.Graph().as_default() as g, self.test_session(graph=g) as session: x, var, loss, global_step = _setup_model() update_var = variable_scope.get_variable( "update", [], initializer=init_ops.constant_initializer(10)) update_op = state_ops.assign(update_var, 20) train = optimizers_lib.optimize_loss( loss, global_step, learning_rate=0.1, optimizer=optimizer, update_ops=[update_op]) variables.global_variables_initializer().run() session.run(train, feed_dict={x: 5}) self.assertEqual(9.5, var.eval()) self.assertEqual(20, update_var.eval()) self.assertEqual(1, global_step.eval())
def testNoGlobalStep(self): optimizers = [ "SGD", gradient_descent.GradientDescentOptimizer, gradient_descent.GradientDescentOptimizer(learning_rate=0.1) ] for optimizer in optimizers: with ops.Graph().as_default() as g, self.test_session(graph=g) as session: x = array_ops.placeholder(dtypes.float32, []) var = variable_scope.get_variable( "test", [], initializer=init_ops.constant_initializer(10)) loss = math_ops.abs(var * x) update_var = variable_scope.get_variable( "update", [], initializer=init_ops.constant_initializer(10)) update_op = state_ops.assign(update_var, 20) train = optimizers_lib.optimize_loss( loss, global_step=None, learning_rate=0.1, optimizer=optimizer, update_ops=[update_op]) variables.global_variables_initializer().run() session.run(train, feed_dict={x: 5}) self.assertEqual(9.5, var.eval()) self.assertEqual(20, update_var.eval())
def _dynamic_rnn_model_fn(features, labels, mode): """The model to be passed to an `Estimator`.""" with ops.name_scope(name): sequence_length = features.get(sequence_length_key) sequence_input = build_sequence_input(features, sequence_feature_columns, context_feature_columns) dropout = (dropout_keep_probabilities if mode == model_fn.ModeKeys.TRAIN else None) # This class promises to use the cell type selected by that function. cell = rnn_common.construct_rnn_cell(num_units, cell_type, dropout) initial_state = dict_to_state_tuple(features, cell) rnn_activations, final_state = construct_rnn( initial_state, sequence_input, cell, target_column.num_label_columns, dtype=dtype, parallel_iterations=parallel_iterations, swap_memory=swap_memory) loss = None # Created below for modes TRAIN and EVAL. if prediction_type == rnn_common.PredictionType.MULTIPLE_VALUE: prediction_dict = rnn_common.multi_value_predictions( rnn_activations, target_column, problem_type, predict_probabilities) if mode != model_fn.ModeKeys.INFER: loss = _multi_value_loss( rnn_activations, labels, sequence_length, target_column, features) elif prediction_type == rnn_common.PredictionType.SINGLE_VALUE: prediction_dict = _single_value_predictions( rnn_activations, sequence_length, target_column, problem_type, predict_probabilities) if mode != model_fn.ModeKeys.INFER: loss = _single_value_loss( rnn_activations, labels, sequence_length, target_column, features) state_dict = state_tuple_to_dict(final_state) prediction_dict.update(state_dict) eval_metric_ops = None if mode != model_fn.ModeKeys.INFER: eval_metric_ops = rnn_common.get_eval_metric_ops( problem_type, prediction_type, sequence_length, prediction_dict, labels) train_op = None if mode == model_fn.ModeKeys.TRAIN: train_op = optimizers.optimize_loss( loss=loss, global_step=None, # Get it internally. learning_rate=learning_rate, optimizer=optimizer, clip_gradients=gradient_clipping_norm, summaries=optimizers.OPTIMIZER_SUMMARIES) output_alternatives = _get_output_alternatives(prediction_type, problem_type, prediction_dict) return model_fn.ModelFnOps(mode=mode, predictions=prediction_dict, loss=loss, train_op=train_op, eval_metric_ops=eval_metric_ops, output_alternatives=output_alternatives)
def _rnn_model_fn(features, labels, mode): """The model to be passed to an `Estimator`.""" with ops.name_scope(name): if mode == model_fn.ModeKeys.TRAIN: cell_for_mode = apply_dropout(cell, input_keep_probability, output_keep_probability) else: cell_for_mode = cell batch = _read_batch( cell=cell_for_mode, features=features, labels=labels, mode=mode, num_unroll=num_unroll, num_layers=num_layers, batch_size=batch_size, input_key_column_name=input_key_column_name, sequence_feature_columns=sequence_feature_columns, context_feature_columns=context_feature_columns, num_threads=num_threads, queue_capacity=queue_capacity) sequence_features = batch.sequences context_features = batch.context if mode != model_fn.ModeKeys.INFER: labels = sequence_features.pop(RNNKeys.LABELS_KEY) inputs = _prepare_inputs_for_rnn(sequence_features, context_features, sequence_feature_columns, num_unroll) state_name = _get_lstm_state_names(num_layers) rnn_activations, final_state = construct_state_saving_rnn( cell=cell_for_mode, inputs=inputs, num_label_columns=target_column.num_label_columns, state_saver=batch, state_name=state_name) loss = None # Created below for modes TRAIN and EVAL. prediction_dict = _multi_value_predictions(rnn_activations, target_column, predict_probabilities) if mode != model_fn.ModeKeys.INFER: loss = _multi_value_loss(rnn_activations, labels, batch.length, target_column, features) eval_metric_ops = None if mode != model_fn.ModeKeys.INFER: default_metrics = _get_default_metrics(problem_type, batch.length) eval_metric_ops = estimator._make_metrics_ops( # pylint: disable=protected-access default_metrics, features, labels, prediction_dict) state_dict = state_tuple_to_dict(final_state) prediction_dict.update(state_dict) train_op = None if mode == model_fn.ModeKeys.TRAIN: train_op = optimizers.optimize_loss( loss=loss, global_step=None, # Get it internally. learning_rate=learning_rate, optimizer=optimizer, clip_gradients=gradient_clipping_norm, summaries=optimizers.OPTIMIZER_SUMMARIES) return model_fn.ModelFnOps(mode=mode, predictions=prediction_dict, loss=loss, train_op=train_op, eval_metric_ops=eval_metric_ops)
def _rnn_model_fn(features, labels, mode): """The model to be passed to an `Estimator`.""" with ops.name_scope(name): dropout = (dropout_keep_probabilities if mode == model_fn.ModeKeys.TRAIN else None) cell = rnn_common.construct_rnn_cell(num_units, cell_type, dropout) batch = _read_batch( cell=cell, features=features, labels=labels, mode=mode, num_unroll=num_unroll, batch_size=batch_size, sequence_feature_columns=sequence_feature_columns, context_feature_columns=context_feature_columns, num_threads=num_threads, queue_capacity=queue_capacity, seed=seed) sequence_features = batch.sequences context_features = batch.context if mode != model_fn.ModeKeys.INFER: labels = sequence_features.pop(rnn_common.RNNKeys.LABELS_KEY) inputs = _prepare_inputs_for_rnn(sequence_features, context_features, sequence_feature_columns, num_unroll) state_name = _get_state_names(cell) rnn_activations, final_state = construct_state_saving_rnn( cell=cell, inputs=inputs, num_label_columns=target_column.num_label_columns, state_saver=batch, state_name=state_name) loss = None # Created below for modes TRAIN and EVAL. prediction_dict = rnn_common.multi_value_predictions( rnn_activations, target_column, problem_type, predict_probabilities) if mode != model_fn.ModeKeys.INFER: loss = _multi_value_loss(rnn_activations, labels, batch.length, target_column, features) eval_metric_ops = None if mode != model_fn.ModeKeys.INFER: eval_metric_ops = rnn_common.get_eval_metric_ops( problem_type, rnn_common.PredictionType.MULTIPLE_VALUE, batch.length, prediction_dict, labels) state_dict = state_tuple_to_dict(final_state) prediction_dict.update(state_dict) train_op = None if mode == model_fn.ModeKeys.TRAIN: train_op = optimizers.optimize_loss( loss=loss, global_step=None, # Get it internally. learning_rate=learning_rate, optimizer=optimizer, clip_gradients=gradient_clipping_norm, summaries=optimizers.OPTIMIZER_SUMMARIES) return model_fn.ModelFnOps(mode=mode, predictions=prediction_dict, loss=loss, train_op=train_op, eval_metric_ops=eval_metric_ops)
def _dnn_classifier_model_fn(features, labels, mode, params): """Deep Neural Net model_fn. Args: features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`). labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * hidden_units: List of hidden units per layer. * feature_columns: An iterable containing all the feature columns used by the model. * n_classes: number of label classes. * weight_column_name: A string defining the weight feature column, or None if there are no weights. * optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training. * activation_fn: Activation function applied to each layer. If `None`, will use `tf.nn.relu`. * dropout: When not `None`, the probability we will drop out a given coordinate. * gradient_clip_norm: A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. * enable_centered_bias: A bool. If True, estimator will learn a centered bias variable for each class. Rest of the model structure learns the residual after centered bias. * num_ps_replicas: The number of parameter server replicas. Returns: predictions: A dict of `Tensor` objects. loss: A scalar containing the loss of the step. train_op: The op for training. """ hidden_units = params["hidden_units"] feature_columns = params["feature_columns"] n_classes = params["n_classes"] weight_column_name = params["weight_column_name"] optimizer = params["optimizer"] activation_fn = params["activation_fn"] dropout = params["dropout"] gradient_clip_norm = params["gradient_clip_norm"] enable_centered_bias = params["enable_centered_bias"] num_ps_replicas = params["num_ps_replicas"] features = _get_feature_dict(features) parent_scope = "dnn" num_label_columns = 1 if n_classes == 2 else n_classes if n_classes == 2: loss_fn = loss_ops.sigmoid_cross_entropy else: loss_fn = loss_ops.sparse_softmax_cross_entropy input_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) with variable_scope.variable_scope( parent_scope + "/input_from_feature_columns", values=features.values(), partitioner=input_layer_partitioner) as scope: net = layers.input_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, weight_collections=[parent_scope], scope=scope) hidden_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas)) for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( parent_scope + "/hiddenlayer_%d" % layer_id, values=[net], partitioner=hidden_layer_partitioner) as scope: net = layers.fully_connected(net, num_hidden_units, activation_fn=activation_fn, variables_collections=[parent_scope], scope=scope) if dropout is not None and mode == estimator.ModeKeys.TRAIN: net = layers.dropout(net, keep_prob=(1.0 - dropout)) _add_hidden_layer_summary(net, scope.name) with variable_scope.variable_scope( parent_scope + "/logits", values=[net], partitioner=hidden_layer_partitioner) as scope: logits = layers.fully_connected(net, num_label_columns, activation_fn=None, variables_collections=[parent_scope], scope=scope) _add_hidden_layer_summary(logits, scope.name) if enable_centered_bias: logits = nn.bias_add(logits, _centered_bias(num_label_columns)) if mode == estimator.ModeKeys.TRAIN: labels = _reshape_labels(labels) weights = _get_weight_tensor(features, weight_column_name) training_loss = loss_fn(logits, labels, weights=weights) loss = _rescale_eval_loss(training_loss, weights) train_ops = [ optimizers.optimize_loss( loss=training_loss, global_step=contrib_variables.get_global_step(), learning_rate=_LEARNING_RATE, optimizer=_get_optimizer(optimizer), clip_gradients=gradient_clip_norm, name=parent_scope, # Empty summaries to prevent optimizers from logging the training_loss. summaries=[]) ] if enable_centered_bias: train_ops.append( _centered_bias_step(labels, loss_fn, num_label_columns)) summary.scalar("loss", loss) return None, loss, control_flow_ops.group(*train_ops) elif mode == estimator.ModeKeys.EVAL: predictions = _predictions(logits=logits, n_classes=n_classes) labels = _reshape_labels(labels) weights = _get_weight_tensor(features, weight_column_name) training_loss = loss_fn(logits, labels, weights=weights) loss = _rescale_eval_loss(training_loss, weights) return predictions, loss, [] else: # mode == estimator.ModeKeys.INFER: predictions = _predictions(logits=logits, n_classes=n_classes) return predictions, None, []