def _create_linear_model(self, feature_cols, partitioner): cols_to_vars = {} with variable_scope.variable_scope("", partitioner=partitioner): # Create the variables. fc.linear_model(features=self._create_dummy_inputs(), feature_columns=feature_cols, units=1, cols_to_vars=cols_to_vars) # Return a dictionary mapping each column to its variable. return cols_to_vars
def _create_linear_model(self, feature_cols, partitioner): cols_to_vars = {} with variable_scope.variable_scope("", partitioner=partitioner): # Create the variables. fc.linear_model( features=self._create_dummy_inputs(), feature_columns=feature_cols, units=1, cols_to_vars=cols_to_vars) # Return a dictionary mapping each column to its variable. return cols_to_vars
def _create_linear_model(self, feature_cols, partitioner): cols_to_vars = {} with variable_scope.variable_scope("", partitioner=partitioner): # Create the variables. fc.linear_model(features=self._create_dummy_inputs(), feature_columns=feature_cols, units=1, cols_to_vars=cols_to_vars) # Return a dictionary mapping each column to its variable, dropping the # 'bias' key that's also filled. cols_to_vars.pop("bias") return cols_to_vars
def _create_linear_model(self, feature_cols, partitioner): cols_to_vars = {} with variable_scope.variable_scope("", partitioner=partitioner): # Create the variables. fc.linear_model( features=self._create_dummy_inputs(), feature_columns=feature_cols, units=1, cols_to_vars=cols_to_vars) # Return a dictionary mapping each column to its variable, dropping the # 'bias' key that's also filled. cols_to_vars.pop("bias") return cols_to_vars
def linear_logit_fn(features): """Linear model logit_fn. Args: features: This is the first item returned from the `input_fn` passed to `train`, `evaluate`, and `predict`. This should be a single `Tensor` or `dict` of same. Returns: A `Tensor` representing the logits. """ cols_to_vars = {} logits = feature_column_lib.linear_model( features=features, feature_columns=feature_columns, units=units, sparse_combiner=sparse_combiner, cols_to_vars=cols_to_vars) bias = cols_to_vars.pop('bias') if units > 1: summary.histogram('bias', bias) else: # If units == 1, the bias value is a length-1 list of a scalar Tensor, # so we should provide a scalar summary. summary.scalar('bias', bias[0][0]) summary.scalar('fraction_of_zero_weights', _compute_fraction_of_zero(cols_to_vars)) return logits
def _linear_model_fn(features, labels, mode, head, feature_columns, optimizer, partitioner, config): """A model_fn for linear models that use a gradient-based optimizer. Args: features: dict of `Tensor`. labels: `Tensor` of shape `[batch_size, logits_dimension]`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. head: A `Head` instance. feature_columns: An iterable containing all the feature columns used by the model. optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training. If `None`, will use a FTRL optimizer. partitioner: Partitioner for variables. config: `RunConfig` object to configure the runtime settings. Returns: An `EstimatorSpec` instance. Raises: ValueError: mode or params are invalid, or features has the wrong type. """ if not isinstance(features, dict): raise ValueError('features should be a dictionary of `Tensor`s. ' 'Given type: {}'.format(type(features))) optimizer = optimizers.get_optimizer_instance( optimizer or _get_default_optimizer(feature_columns), learning_rate=_LEARNING_RATE) num_ps_replicas = config.num_ps_replicas if config else 0 partitioner = partitioner or ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) with variable_scope.variable_scope( 'linear', values=tuple(six.itervalues(features)), partitioner=partitioner): logits = feature_column_lib.linear_model( features=features, feature_columns=feature_columns, units=head.logits_dimension) def _train_op_fn(loss): """Returns the op to optimize the loss.""" return optimizer.minimize( loss, global_step=training_util.get_global_step()) return head.create_estimator_spec( features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits)
def _linear_model_fn(features, labels, mode, params, config): """A model_fn for linear models that use a gradient-based optimizer. Args: features: Dict of `Tensor`. labels: `Tensor` of shape `[batch_size, logits_dimension]`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * head: A `Head` instance. * feature_columns: An iterable containing all the feature columns used by the model. * optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training. If `None`, will use a FTRL optimizer. config: `RunConfig` object to configure the runtime settings. Returns: An `EstimatorSpec` instance. Raises: ValueError: If mode or params are invalid. """ head = params['head'] feature_columns = tuple(params['feature_columns']) optimizer = optimizers.get_optimizer_instance( params.get('optimizer') or _get_default_optimizer(feature_columns), learning_rate=_LEARNING_RATE) num_ps_replicas = config.num_ps_replicas if config else 0 partitioner = params.get('partitioner') or ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) with variable_scope.variable_scope( 'linear', values=tuple(six.itervalues(features)), partitioner=partitioner): logits = feature_column_lib.linear_model( features=features, feature_columns=feature_columns, units=head.logits_dimension) def _train_op_fn(loss): """Returns the op to optimize the loss.""" return optimizer.minimize( loss, global_step=training_util.get_global_step()) return head.create_estimator_spec( features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits)
def _linear_model_fn(features, labels, mode, head, feature_columns, optimizer, partitioner, config): """A model_fn for linear models that use a gradient-based optimizer. Args: features: dict of `Tensor`. labels: `Tensor` of shape `[batch_size, logits_dimension]`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. head: A `Head` instance. feature_columns: An iterable containing all the feature columns used by the model. optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training. If `None`, will use a FTRL optimizer. partitioner: Partitioner for variables. config: `RunConfig` object to configure the runtime settings. Returns: An `EstimatorSpec` instance. Raises: ValueError: mode or params are invalid, or features has the wrong type. """ if not isinstance(features, dict): raise ValueError('features should be a dictionary of `Tensor`s. ' 'Given type: {}'.format(type(features))) optimizer = optimizers.get_optimizer_instance( optimizer or _get_default_optimizer(feature_columns), learning_rate=_LEARNING_RATE) num_ps_replicas = config.num_ps_replicas if config else 0 partitioner = partitioner or ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) with variable_scope.variable_scope('linear', values=tuple(six.itervalues(features)), partitioner=partitioner): logits = feature_column_lib.linear_model( features=features, feature_columns=feature_columns, units=head.logits_dimension) def _train_op_fn(loss): """Returns the op to optimize the loss.""" return optimizer.minimize( loss, global_step=training_util.get_global_step()) return head.create_estimator_spec(features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits)
def _linear_model_fn(features, labels, mode, params, config): """A model_fn for linear models that use a gradient-based optimizer. Args: features: Dict of `Tensor`. labels: `Tensor` of shape `[batch_size, logits_dimension]`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * head: A `Head` instance. * feature_columns: An iterable containing all the feature columns used by the model. * optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training. If `None`, will use a FTRL optimizer. config: `RunConfig` object to configure the runtime settings. Returns: An `EstimatorSpec` instance. Raises: ValueError: If mode or params are invalid. """ head = params['head'] feature_columns = tuple(params['feature_columns']) optimizer = optimizers.get_optimizer_instance( params.get('optimizer') or _get_default_optimizer(feature_columns), learning_rate=_LEARNING_RATE) num_ps_replicas = config.num_ps_replicas if config else 0 partitioner = params.get('partitioner') or ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) with variable_scope.variable_scope('linear', values=tuple(six.itervalues(features)), partitioner=partitioner): logits = feature_column_lib.linear_model( features=features, feature_columns=feature_columns, units=head.logits_dimension) def _train_op_fn(loss): """Returns the op to optimize the loss.""" return optimizer.minimize( loss, global_step=training_util.get_global_step()) return head.create_estimator_spec(features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits)
def linear_logit_fn(features): """Linear model logit_fn. Args: features: This is the first item returned from the `input_fn` passed to `train`, `evaluate`, and `predict`. This should be a single `Tensor` or `dict` of same. Returns: A `Tensor` representing the logits. """ return feature_column_lib.linear_model( features=features, feature_columns=feature_columns, units=units)
def _dnn_linear_combined_model_fn(features, labels, mode, head, linear_feature_columns=None, linear_optimizer='Ftrl', dnn_feature_columns=None, dnn_optimizer='Adagrad', dnn_hidden_units=None, dnn_activation_fn=nn.relu, dnn_dropout=None, input_layer_partitioner=None, config=None): """Deep Neural Net and Linear combined model_fn. Args: features: dict of `Tensor`. labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. head: A `Head` instance. linear_feature_columns: An iterable containing all the feature columns used by the Linear model. linear_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the Linear model. Defaults to the Ftrl optimizer. dnn_feature_columns: An iterable containing all the feature columns used by the DNN model. dnn_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the DNN model. Defaults to the Adagrad optimizer. dnn_hidden_units: List of hidden units per DNN layer. dnn_activation_fn: Activation function applied to each DNN layer. If `None`, will use `tf.nn.relu`. dnn_dropout: When not `None`, the probability we will drop out a given DNN coordinate. input_layer_partitioner: Partitioner for input layer. config: `RunConfig` object to configure the runtime settings. Returns: `ModelFnOps` Raises: ValueError: If both `linear_feature_columns` and `dnn_features_columns` are empty at the same time, or `input_layer_partitioner` is missing, or features has the wrong type. """ if not isinstance(features, dict): raise ValueError('features should be a dictionary of `Tensor`s. ' 'Given type: {}'.format(type(features))) if not linear_feature_columns and not dnn_feature_columns: raise ValueError( 'Either linear_feature_columns or dnn_feature_columns must be defined.' ) num_ps_replicas = config.num_ps_replicas if config else 0 input_layer_partitioner = input_layer_partitioner or ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) # Build DNN Logits. dnn_parent_scope = 'dnn' if not dnn_feature_columns: dnn_logits = None else: dnn_optimizer = optimizers.get_optimizer_instance( dnn_optimizer, learning_rate=_DNN_LEARNING_RATE) _check_no_sync_replicas_optimizer(dnn_optimizer) if not dnn_hidden_units: raise ValueError( 'dnn_hidden_units must be defined when dnn_feature_columns is ' 'specified.') dnn_partitioner = (partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas)) with variable_scope.variable_scope(dnn_parent_scope, values=tuple( six.itervalues(features)), partitioner=dnn_partitioner): with variable_scope.variable_scope( 'input', partitioner=input_layer_partitioner): net = feature_column_lib.input_layer( features=features, feature_columns=dnn_feature_columns) for layer_id, num_hidden_units in enumerate(dnn_hidden_units): with variable_scope.variable_scope( 'hiddenlayer_%d' % layer_id, values=(net, )) as dnn_hidden_layer_scope: net = core_layers.dense(net, units=num_hidden_units, activation=dnn_activation_fn, kernel_initializer=init_ops. glorot_uniform_initializer(), name=dnn_hidden_layer_scope) if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = core_layers.dropout(net, rate=dnn_dropout, training=True) _add_layer_summary(net, dnn_hidden_layer_scope.name) with variable_scope.variable_scope( 'logits', values=(net, )) as dnn_logits_scope: dnn_logits = core_layers.dense( net, units=head.logits_dimension, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=dnn_logits_scope) _add_layer_summary(dnn_logits, dnn_logits_scope.name) linear_parent_scope = 'linear' if not linear_feature_columns: linear_logits = None else: linear_optimizer = optimizers.get_optimizer_instance( linear_optimizer, learning_rate=_linear_learning_rate(len(linear_feature_columns))) _check_no_sync_replicas_optimizer(linear_optimizer) with variable_scope.variable_scope( linear_parent_scope, values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner) as scope: linear_logits = feature_column_lib.linear_model( features=features, feature_columns=linear_feature_columns, units=head.logits_dimension) _add_layer_summary(linear_logits, scope.name) # Combine logits and build full model. if dnn_logits is not None and linear_logits is not None: logits = dnn_logits + linear_logits elif dnn_logits is not None: logits = dnn_logits else: logits = linear_logits def _train_op_fn(loss): """Returns the op to optimize the loss.""" train_ops = [] global_step = training_util.get_global_step() if dnn_logits is not None: train_ops.append( dnn_optimizer.minimize(loss, var_list=ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES, scope=dnn_parent_scope))) if linear_logits is not None: train_ops.append( linear_optimizer.minimize( loss, var_list=ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES, scope=linear_parent_scope))) train_op = control_flow_ops.group(*train_ops) with ops.control_dependencies([train_op]): with ops.colocate_with(global_step): return state_ops.assign_add(global_step, 1) return head.create_estimator_spec(features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits)
def testWarmStartEmbeddingColumnLinearModel(self): # Create old and new vocabs for embedding column "sc_vocab". prev_vocab_path = self._write_vocab(["apple", "banana", "guava", "orange"], "old_vocab") new_vocab_path = self._write_vocab( ["orange", "guava", "banana", "apple", "raspberry", "blueberry"], "new_vocab") # Save checkpoint from which to warm-start. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: variable_scope.get_variable( "linear_model/sc_vocab_embedding/embedding_weights", initializer=[[0.5, 0.4], [1., 1.1], [2., 2.2], [3., 3.3]]) variable_scope.get_variable( "linear_model/sc_vocab_embedding/weights", initializer=[[0.69], [0.71]]) self._write_checkpoint(sess) def _partitioner(shape, dtype): # pylint:disable=unused-argument # Partition each var into 2 equal slices. partitions = [1] * len(shape) partitions[0] = min(2, shape[0].value) return partitions # Create feature columns. sc_vocab = fc.categorical_column_with_vocabulary_file( "sc_vocab", vocabulary_file=new_vocab_path, vocabulary_size=6) emb_vocab = fc.embedding_column( categorical_column=sc_vocab, dimension=2) all_deep_cols = [emb_vocab] # New graph, new session with warm-starting. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: cols_to_vars = {} with variable_scope.variable_scope("", partitioner=_partitioner): # Create the variables. fc.linear_model( features=self._create_dummy_inputs(), feature_columns=all_deep_cols, cols_to_vars=cols_to_vars) # Construct the vocab_info for the embedding weight. vocab_info = ws_util.VocabInfo( new_vocab=sc_vocab.vocabulary_file, new_vocab_size=sc_vocab.vocabulary_size, num_oov_buckets=sc_vocab.num_oov_buckets, old_vocab=prev_vocab_path, # Can't use constant_initializer with load_and_remap. In practice, # use a truncated normal initializer. backup_initializer=init_ops.random_uniform_initializer( minval=0.42, maxval=0.42)) ws_settings = ws_util.WarmStartSettings( self.get_temp_dir(), vars_to_warm_start=".*sc_vocab.*", var_name_to_vocab_info={ "linear_model/sc_vocab_embedding/embedding_weights": vocab_info }) ws_util._warm_start(ws_settings) sess.run(variables.global_variables_initializer()) # Verify weights were correctly warm-started. Var corresponding to # emb_vocab should be correctly warm-started after vocab remapping. # Missing values are filled in with the EmbeddingColumn's initializer. self._assert_cols_to_vars( cols_to_vars, { emb_vocab: [ # embedding_weights part 0. np.array([[3., 3.3], [2., 2.2], [1., 1.1]]), # embedding_weights part 1. np.array([[0.5, 0.4], [0.42, 0.42], [0.42, 0.42]]), # linear weights part 0. np.array([[0.69]]), # linear weights part 1. np.array([[0.71]]) ] }, sess)
def testWarmStartEmbeddingColumnLinearModel(self): # Create old and new vocabs for embedding column "sc_vocab". prev_vocab_path = self._write_vocab(["apple", "banana", "guava", "orange"], "old_vocab") new_vocab_path = self._write_vocab( ["orange", "guava", "banana", "apple", "raspberry", "blueberry"], "new_vocab") # Save checkpoint from which to warm-start. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: variable_scope.get_variable( "linear_model/sc_vocab_embedding/embedding_weights", initializer=[[0.5, 0.4], [1., 1.1], [2., 2.2], [3., 3.3]]) variable_scope.get_variable( "linear_model/sc_vocab_embedding/weights", initializer=[[0.69], [0.71]]) self._write_checkpoint(sess) def _partitioner(shape, dtype): # pylint:disable=unused-argument # Partition each var into 2 equal slices. partitions = [1] * len(shape) partitions[0] = min(2, shape[0].value) return partitions # Create feature columns. sc_vocab = fc.categorical_column_with_vocabulary_file( "sc_vocab", vocabulary_file=new_vocab_path, vocabulary_size=6) emb_vocab = fc.embedding_column( categorical_column=sc_vocab, dimension=2) all_deep_cols = [emb_vocab] # New graph, new session with warmstarting. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: cols_to_vars = {} with variable_scope.variable_scope("", partitioner=_partitioner): # Create the variables. fc.linear_model( features=self._create_dummy_inputs(), feature_columns=all_deep_cols, cols_to_vars=cols_to_vars) # Construct the vocab_info for the embedding weight. vocab_info = ws_util._VocabInfo( new_vocab=sc_vocab.vocabulary_file, new_vocab_size=sc_vocab.vocabulary_size, num_oov_buckets=sc_vocab.num_oov_buckets, old_vocab=prev_vocab_path, # Can't use constant_initializer with load_and_remap. In practice, # use a truncated normal initializer. backup_initializer=init_ops.random_uniform_initializer( minval=0.42, maxval=0.42) ) ws_settings = ws_util._WarmStartSettings( self.get_temp_dir(), vars_to_warmstart=".*sc_vocab.*", var_name_to_vocab_info={ "linear_model/sc_vocab_embedding/embedding_weights": vocab_info }) ws_util._warmstart(ws_settings) sess.run(variables.global_variables_initializer()) # Verify weights were correctly warmstarted. Var corresponding to # emb_vocab should be correctly warmstarted after vocab remapping. # Missing values are filled in with the EmbeddingColumn's initializer. self._assert_cols_to_vars( cols_to_vars, { emb_vocab: [ # embedding_weights part 0. np.array([[3., 3.3], [2., 2.2], [1., 1.1]]), # embedding_weights part 1. np.array([[0.5, 0.4], [0.42, 0.42], [0.42, 0.42]]), # linear weights part 0. np.array([[0.69]]), # linear weights part 1. np.array([[0.71]]) ] }, sess)
def _dnn_linear_combined_model_fn( features, labels, mode, head, linear_feature_columns=None, linear_optimizer='Ftrl', dnn_feature_columns=None, dnn_optimizer='Adagrad', dnn_hidden_units=None, dnn_activation_fn=nn.relu, dnn_dropout=None, input_layer_partitioner=None, config=None): """Deep Neural Net and Linear combined model_fn. Args: features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`). labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. head: A `Head` instance. linear_feature_columns: An iterable containing all the feature columns used by the Linear model. linear_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the Linear model. Defaults to the Ftrl optimizer. dnn_feature_columns: An iterable containing all the feature columns used by the DNN model. dnn_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the DNN model. Defaults to the Adagrad optimizer. dnn_hidden_units: List of hidden units per DNN layer. dnn_activation_fn: Activation function applied to each DNN layer. If `None`, will use `tf.nn.relu`. dnn_dropout: When not `None`, the probability we will drop out a given DNN coordinate. input_layer_partitioner: Partitioner for input layer. config: `RunConfig` object to configure the runtime settings. Returns: `ModelFnOps` Raises: ValueError: If both `linear_feature_columns` and `dnn_features_columns` are empty at the same time, or `input_layer_partitioner` is missing. """ if not linear_feature_columns and not dnn_feature_columns: raise ValueError( 'Either linear_feature_columns or dnn_feature_columns must be defined.') num_ps_replicas = config.num_ps_replicas if config else 0 input_layer_partitioner = input_layer_partitioner or ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) # Build DNN Logits. dnn_parent_scope = 'dnn' if not dnn_feature_columns: dnn_logits = None else: dnn_optimizer = optimizers.get_optimizer_instance( dnn_optimizer, learning_rate=_DNN_LEARNING_RATE) _check_no_sync_replicas_optimizer(dnn_optimizer) if not dnn_hidden_units: raise ValueError( 'dnn_hidden_units must be defined when dnn_feature_columns is ' 'specified.') dnn_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas)) with variable_scope.variable_scope( dnn_parent_scope, values=tuple(six.itervalues(features)), partitioner=dnn_partitioner): with variable_scope.variable_scope('input', partitioner=input_layer_partitioner): net = feature_column_lib.input_layer( features=features, feature_columns=dnn_feature_columns) for layer_id, num_hidden_units in enumerate(dnn_hidden_units): with variable_scope.variable_scope( 'hiddenlayer_%d' % layer_id, values=(net,)) as dnn_hidden_layer_scope: net = core_layers.dense( net, units=num_hidden_units, activation=dnn_activation_fn, kernel_initializer=init_ops.glorot_uniform_initializer(), name=dnn_hidden_layer_scope) if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = core_layers.dropout(net, rate=dnn_dropout, training=True) _add_layer_summary(net, dnn_hidden_layer_scope.name) with variable_scope.variable_scope( 'logits', values=(net,)) as dnn_logits_scope: dnn_logits = core_layers.dense( net, units=head.logits_dimension, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=dnn_logits_scope) _add_layer_summary(dnn_logits, dnn_logits_scope.name) linear_parent_scope = 'linear' if not linear_feature_columns: linear_logits = None else: linear_optimizer = optimizers.get_optimizer_instance( linear_optimizer, learning_rate=_linear_learning_rate(len(linear_feature_columns))) _check_no_sync_replicas_optimizer(linear_optimizer) with variable_scope.variable_scope( linear_parent_scope, values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner) as scope: linear_logits = feature_column_lib.linear_model( features=features, feature_columns=linear_feature_columns, units=head.logits_dimension) _add_layer_summary(linear_logits, scope.name) # Combine logits and build full model. if dnn_logits is not None and linear_logits is not None: logits = dnn_logits + linear_logits elif dnn_logits is not None: logits = dnn_logits else: logits = linear_logits def _train_op_fn(loss): """Returns the op to optimize the loss.""" train_ops = [] global_step = training_util.get_global_step() if dnn_logits is not None: train_ops.append( dnn_optimizer.minimize( loss, var_list=ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES, scope=dnn_parent_scope))) if linear_logits is not None: train_ops.append( linear_optimizer.minimize( loss, var_list=ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES, scope=linear_parent_scope))) train_op = control_flow_ops.group(*train_ops) with ops.control_dependencies([train_op]): with ops.colocate_with(global_step): return state_ops.assign_add(global_step, 1) return head.create_estimator_spec( features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits)
def _linear_model_fn(features, labels, mode, params, config=None): """A model_fn for linear models that use a gradient-based optimizer. Args: features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`). labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * head: A `Head` instance. * feature_columns: An iterable containing all the feature columns used by the model. * optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training. If `None`, will use a FTRL optimizer. * gradient_clip_norm: A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. * joint_weights: If True, the weights for all columns will be stored in a single (possibly partitioned) variable. It's more efficient, but it's incompatible with SDCAOptimizer, and requires all feature columns are sparse and use the 'sum' combiner. config: `RunConfig` object to configure the runtime settings. Returns: A `ModelFnOps` instance. Raises: ValueError: If mode is not any of the `ModeKeys`. """ head = params["head"] feature_columns = params["feature_columns"] optimizer = params.get("optimizer") or _get_default_optimizer(feature_columns) gradient_clip_norm = params.get("gradient_clip_norm", None) num_ps_replicas = config.num_ps_replicas if config else 0 joint_weights = params.get("joint_weights", False) if not isinstance(features, dict): features = {"": features} parent_scope = "linear" partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20) with variable_scope.variable_scope( parent_scope, values=tuple(six.itervalues(features)), partitioner=partitioner) as scope: if all([isinstance(fc, feature_column._FeatureColumn) # pylint: disable=protected-access for fc in feature_columns]): if joint_weights: layer_fn = layers.joint_weighted_sum_from_feature_columns else: layer_fn = layers.weighted_sum_from_feature_columns logits, _, _ = layer_fn( columns_to_tensors=features, feature_columns=feature_columns, num_outputs=head.logits_dimension, weight_collections=[parent_scope], scope=scope) else: logits = fc_core.linear_model( features=features, feature_columns=feature_columns, units=head.logits_dimension, weight_collections=[parent_scope]) def _train_op_fn(loss): global_step = training_util.get_global_step() my_vars = ops.get_collection(parent_scope) grads = gradients.gradients(loss, my_vars) if gradient_clip_norm: grads, _ = clip_ops.clip_by_global_norm(grads, gradient_clip_norm) return (_get_optimizer(optimizer).apply_gradients( zip(grads, my_vars), global_step=global_step)) return head.create_model_fn_ops( features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits)
def _dnn_linear_combined_model_fn(features, labels, mode, params, config=None): """Deep Neural Net and Linear combined model_fn. Args: features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`). labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * head: A `Head` instance. * linear_feature_columns: An iterable containing all the feature columns used by the Linear model. * linear_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the Linear model. Defaults to the Ftrl optimizer. * joint_linear_weights: If True a single (possibly partitioned) variable will be used to store the linear model weights. It's faster, but requires all columns are sparse and have the 'sum' combiner. * dnn_feature_columns: An iterable containing all the feature columns used by the DNN model. * dnn_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the DNN model. Defaults to the Adagrad optimizer. * dnn_hidden_units: List of hidden units per DNN layer. * dnn_activation_fn: Activation function applied to each DNN layer. If `None`, will use `tf.nn.relu`. * dnn_dropout: When not `None`, the probability we will drop out a given DNN coordinate. * gradient_clip_norm: A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. * embedding_lr_multipliers: Optional. A dictionary from `EmbeddingColumn` to a `float` multiplier. Multiplier will be used to multiply with learning rate for the embedding variables. * input_layer_partitioner: Optional. Partitioner for input layer. config: `RunConfig` object to configure the runtime settings. Returns: `ModelFnOps` Raises: ValueError: If both `linear_feature_columns` and `dnn_features_columns` are empty at the same time, or `input_layer_partitioner` is missing. """ head = params["head"] linear_feature_columns = params.get("linear_feature_columns") linear_optimizer = params.get("linear_optimizer") or "Ftrl" joint_linear_weights = params.get("joint_linear_weights") dnn_feature_columns = params.get("dnn_feature_columns") dnn_optimizer = params.get("dnn_optimizer") or "Adagrad" dnn_hidden_units = params.get("dnn_hidden_units") dnn_activation_fn = params.get("dnn_activation_fn") or nn.relu dnn_dropout = params.get("dnn_dropout") gradient_clip_norm = params.get("gradient_clip_norm") num_ps_replicas = config.num_ps_replicas if config else 0 input_layer_partitioner = params.get("input_layer_partitioner") or ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) embedding_lr_multipliers = params.get("embedding_lr_multipliers", {}) fix_global_step_increment_bug = params.get( "fix_global_step_increment_bug", True) if not linear_feature_columns and not dnn_feature_columns: raise ValueError( "Either linear_feature_columns or dnn_feature_columns must be defined.") features = _get_feature_dict(features) linear_optimizer = _get_optimizer(linear_optimizer) _check_no_sync_replicas_optimizer(linear_optimizer) dnn_optimizer = _get_optimizer(dnn_optimizer) _check_no_sync_replicas_optimizer(dnn_optimizer) # Build DNN Logits. dnn_parent_scope = "dnn" if not dnn_feature_columns: dnn_logits = None else: if not dnn_hidden_units: raise ValueError( "dnn_hidden_units must be defined when dnn_feature_columns is " "specified.") dnn_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas)) with variable_scope.variable_scope( dnn_parent_scope, values=tuple(six.itervalues(features)), partitioner=dnn_partitioner): with variable_scope.variable_scope( "input_from_feature_columns", values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner) as dnn_input_scope: if all([ isinstance(fc, feature_column_lib._FeatureColumn) # pylint: disable=protected-access for fc in dnn_feature_columns ]): net = layers.input_from_feature_columns( columns_to_tensors=features, feature_columns=dnn_feature_columns, weight_collections=[dnn_parent_scope], scope=dnn_input_scope) else: net = fc_core.input_layer( features=features, feature_columns=dnn_feature_columns, weight_collections=[dnn_parent_scope]) for layer_id, num_hidden_units in enumerate(dnn_hidden_units): with variable_scope.variable_scope( "hiddenlayer_%d" % layer_id, values=(net,)) as dnn_hidden_layer_scope: net = layers.fully_connected( net, num_hidden_units, activation_fn=dnn_activation_fn, variables_collections=[dnn_parent_scope], scope=dnn_hidden_layer_scope) if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = layers.dropout( net, keep_prob=(1.0 - dnn_dropout)) # TODO(b/31209633): Consider adding summary before dropout. _add_layer_summary(net, dnn_hidden_layer_scope.name) with variable_scope.variable_scope( "logits", values=(net,)) as dnn_logits_scope: dnn_logits = layers.fully_connected( net, head.logits_dimension, activation_fn=None, variables_collections=[dnn_parent_scope], scope=dnn_logits_scope) _add_layer_summary(dnn_logits, dnn_logits_scope.name) # Build Linear logits. linear_parent_scope = "linear" if not linear_feature_columns: linear_logits = None else: linear_partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20) with variable_scope.variable_scope( linear_parent_scope, values=tuple(six.itervalues(features)), partitioner=linear_partitioner) as scope: if all([isinstance(fc, feature_column_lib._FeatureColumn) # pylint: disable=protected-access for fc in linear_feature_columns]): if joint_linear_weights: linear_logits, _, _ = layers.joint_weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=linear_feature_columns, num_outputs=head.logits_dimension, weight_collections=[linear_parent_scope], scope=scope) else: linear_logits, _, _ = layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=linear_feature_columns, num_outputs=head.logits_dimension, weight_collections=[linear_parent_scope], scope=scope) else: linear_logits = fc_core.linear_model( features=features, feature_columns=linear_feature_columns, units=head.logits_dimension, weight_collections=[linear_parent_scope]) _add_layer_summary(linear_logits, scope.name) # Combine logits and build full model. if dnn_logits is not None and linear_logits is not None: logits = dnn_logits + linear_logits elif dnn_logits is not None: logits = dnn_logits else: logits = linear_logits def _make_training_op(training_loss): """Training op for the DNN linear combined model.""" train_ops = [] global_step = training_util.get_global_step() if dnn_logits is not None: train_ops.append( optimizers.optimize_loss( loss=training_loss, global_step=global_step, learning_rate=_DNN_LEARNING_RATE, optimizer=dnn_optimizer, gradient_multipliers=_extract_embedding_lr_multipliers( # pylint: disable=protected-access embedding_lr_multipliers, dnn_parent_scope, dnn_input_scope.name), clip_gradients=gradient_clip_norm, variables=ops.get_collection(dnn_parent_scope), name=dnn_parent_scope, # Empty summaries, because head already logs "loss" summary. summaries=[], increment_global_step=not fix_global_step_increment_bug)) if linear_logits is not None: train_ops.append( optimizers.optimize_loss( loss=training_loss, global_step=global_step, learning_rate=_linear_learning_rate(len(linear_feature_columns)), optimizer=linear_optimizer, clip_gradients=gradient_clip_norm, variables=ops.get_collection(linear_parent_scope), name=linear_parent_scope, # Empty summaries, because head already logs "loss" summary. summaries=[], increment_global_step=not fix_global_step_increment_bug)) train_op = control_flow_ops.group(*train_ops) if fix_global_step_increment_bug: with ops.control_dependencies([train_op]): with ops.colocate_with(global_step): return state_ops.assign_add(global_step, 1).op return train_op return head.create_model_fn_ops( features=features, mode=mode, labels=labels, train_op_fn=_make_training_op, logits=logits)
def linear_classifier_model_fn(features, labels, mode, params, config=None): """A model_fn for linear models that use a gradient-based optimizer. Args: features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`). labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * head: A `Head` instance. * feature_columns: An iterable containing all the feature columns used by the model. * optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training. If `None`, will use a FTRL optimizer. * gradient_clip_norm: A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. * joint_weights: If True, the weights for all columns will be stored in a single (possibly partitioned) variable. It's more efficient, but it's incompatible with SDCAOptimizer, and requires all feature columns are sparse and use the 'sum' combiner. config: `RunConfig` object to configure the runtime settings. Returns: A `ModelFnOps` instance. Raises: ValueError: If mode is not any of the `ModeKeys`. """ if params["num_classes"] > 2: head = head_lib.multi_class_head( params["num_classes"], weight_column_name=params["weight_column_name"], enable_centered_bias=params["enable_centered_bias"], label_keys=params["label_keys"], metric_class_ids=range(0, params["num_classes"])) else: head = head_lib.multi_class_head( params["num_classes"], weight_column_name=params["weight_column_name"], enable_centered_bias=params["enable_centered_bias"]) feature_columns = [layers.real_valued_column(i) for i in features.keys()] optimizer = _get_default_optimizer(feature_columns) gradient_clip_norm = params.get("gradient_clip_norm", None) num_ps_replicas = config.num_ps_replicas if config else 0 joint_weights = params.get("joint_weights", False) if not isinstance(features, dict): features = {"": features} parent_scope = "linear" partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20) with variable_scope.variable_scope(parent_scope, values=tuple(six.itervalues(features)), partitioner=partitioner) as scope: if all([ isinstance(fc, feature_column._FeatureColumn) for fc in feature_columns ]): if joint_weights: layer_fn = layers.joint_weighted_sum_from_feature_columns else: layer_fn = layers.weighted_sum_from_feature_columns logits, _, _ = layer_fn(columns_to_tensors=features, feature_columns=feature_columns, num_outputs=head.logits_dimension, weight_collections=[parent_scope], scope=scope) else: logits = fc_core.linear_model(features=features, feature_columns=feature_columns, units=head.logits_dimension, weight_collections=[parent_scope]) def _train_op_fn(loss): global_step = contrib_variables.get_global_step() my_vars = ops.get_collection(parent_scope) grads = gradients.gradients(loss, my_vars) if gradient_clip_norm: grads, _ = clip_ops.clip_by_global_norm(grads, gradient_clip_norm) return (_get_optimizer(optimizer).apply_gradients( zip(grads, my_vars), global_step=global_step)) return head.create_model_fn_ops(features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits)
def _dnn_linear_combined_model_fn(features, labels, mode, params, config=None): """Deep Neural Net and Linear combined model_fn. Args: features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`). labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * head: A `Head` instance. * linear_feature_columns: An iterable containing all the feature columns used by the Linear model. * linear_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the Linear model. Defaults to the Ftrl optimizer. * joint_linear_weights: If True a single (possibly partitioned) variable will be used to store the linear model weights. It's faster, but requires all columns are sparse and have the 'sum' combiner. * dnn_feature_columns: An iterable containing all the feature columns used by the DNN model. * dnn_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the DNN model. Defaults to the Adagrad optimizer. * dnn_hidden_units: List of hidden units per DNN layer. * dnn_activation_fn: Activation function applied to each DNN layer. If `None`, will use `tf.nn.relu`. * dnn_dropout: When not `None`, the probability we will drop out a given DNN coordinate. * gradient_clip_norm: A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. * embedding_lr_multipliers: Optional. A dictionary from `EmbeddingColumn` to a `float` multiplier. Multiplier will be used to multiply with learning rate for the embedding variables. * input_layer_partitioner: Optional. Partitioner for input layer. config: `RunConfig` object to configure the runtime settings. Returns: `ModelFnOps` Raises: ValueError: If both `linear_feature_columns` and `dnn_features_columns` are empty at the same time, or `input_layer_partitioner` is missing. """ head = params["head"] linear_feature_columns = params.get("linear_feature_columns") linear_optimizer = params.get("linear_optimizer") or "Ftrl" joint_linear_weights = params.get("joint_linear_weights") dnn_feature_columns = params.get("dnn_feature_columns") dnn_optimizer = params.get("dnn_optimizer") or "Adagrad" dnn_hidden_units = params.get("dnn_hidden_units") dnn_activation_fn = params.get("dnn_activation_fn") or nn.relu dnn_dropout = params.get("dnn_dropout") gradient_clip_norm = params.get("gradient_clip_norm") num_ps_replicas = config.num_ps_replicas if config else 0 input_layer_partitioner = params.get("input_layer_partitioner") or ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) embedding_lr_multipliers = params.get("embedding_lr_multipliers", {}) fix_global_step_increment_bug = params.get( "fix_global_step_increment_bug", True) if not linear_feature_columns and not dnn_feature_columns: raise ValueError( "Either linear_feature_columns or dnn_feature_columns must be defined.") features = _get_feature_dict(features) linear_optimizer = _get_optimizer(linear_optimizer) _check_no_sync_replicas_optimizer(linear_optimizer) dnn_optimizer = _get_optimizer(dnn_optimizer) _check_no_sync_replicas_optimizer(dnn_optimizer) # Build DNN Logits. dnn_parent_scope = "dnn" if not dnn_feature_columns: dnn_logits = None else: if not dnn_hidden_units: raise ValueError( "dnn_hidden_units must be defined when dnn_feature_columns is " "specified.") dnn_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas)) with variable_scope.variable_scope( dnn_parent_scope, values=tuple(six.itervalues(features)), partitioner=dnn_partitioner): with variable_scope.variable_scope( "input_from_feature_columns", values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner) as dnn_input_scope: if all([ isinstance(fc, feature_column_lib._FeatureColumn) # pylint: disable=protected-access for fc in dnn_feature_columns ]): net = layers.input_from_feature_columns( columns_to_tensors=features, feature_columns=dnn_feature_columns, weight_collections=[dnn_parent_scope], scope=dnn_input_scope) else: net = fc_core.input_layer( features=features, feature_columns=dnn_feature_columns, weight_collections=[dnn_parent_scope]) for layer_id, num_hidden_units in enumerate(dnn_hidden_units): with variable_scope.variable_scope( "hiddenlayer_%d" % layer_id, values=(net,)) as dnn_hidden_layer_scope: net = layers.fully_connected( net, num_hidden_units, activation_fn=dnn_activation_fn, variables_collections=[dnn_parent_scope], scope=dnn_hidden_layer_scope) if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = layers.dropout( net, keep_prob=(1.0 - dnn_dropout)) # TODO(b/31209633): Consider adding summary before dropout. _add_layer_summary(net, dnn_hidden_layer_scope.name) with variable_scope.variable_scope( "logits", values=(net,)) as dnn_logits_scope: dnn_logits = layers.fully_connected( net, head.logits_dimension, activation_fn=None, variables_collections=[dnn_parent_scope], scope=dnn_logits_scope) _add_layer_summary(dnn_logits, dnn_logits_scope.name) # Build Linear logits. linear_parent_scope = "linear" if not linear_feature_columns: linear_logits = None else: linear_partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20) with variable_scope.variable_scope( linear_parent_scope, values=tuple(six.itervalues(features)), partitioner=linear_partitioner) as scope: if all([isinstance(fc, feature_column_lib._FeatureColumn) # pylint: disable=protected-access for fc in linear_feature_columns]): if joint_linear_weights: linear_logits, _, _ = layers.joint_weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=linear_feature_columns, num_outputs=head.logits_dimension, weight_collections=[linear_parent_scope], scope=scope) else: linear_logits, _, _ = layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=linear_feature_columns, num_outputs=head.logits_dimension, weight_collections=[linear_parent_scope], scope=scope) else: linear_logits = fc_core.linear_model( features=features, feature_columns=linear_feature_columns, units=head.logits_dimension, weight_collections=[linear_parent_scope]) _add_layer_summary(linear_logits, scope.name) # Combine logits and build full model. if dnn_logits is not None and linear_logits is not None: logits = dnn_logits + linear_logits elif dnn_logits is not None: logits = dnn_logits else: logits = linear_logits def _make_training_op(training_loss): """Training op for the DNN linear combined model.""" train_ops = [] global_step = training_util.get_global_step() if dnn_logits is not None: train_ops.append( optimizers.optimize_loss( loss=training_loss, global_step=global_step, learning_rate=_DNN_LEARNING_RATE, optimizer=dnn_optimizer, gradient_multipliers=_extract_embedding_lr_multipliers( # pylint: disable=protected-access embedding_lr_multipliers, dnn_parent_scope, dnn_input_scope.name), clip_gradients=gradient_clip_norm, variables=ops.get_collection(dnn_parent_scope), name=dnn_parent_scope, # Empty summaries, because head already logs "loss" summary. summaries=[], increment_global_step=not fix_global_step_increment_bug)) if linear_logits is not None: train_ops.append( optimizers.optimize_loss( loss=training_loss, global_step=global_step, learning_rate=_linear_learning_rate(len(linear_feature_columns)), optimizer=linear_optimizer, clip_gradients=gradient_clip_norm, variables=ops.get_collection(linear_parent_scope), name=linear_parent_scope, # Empty summaries, because head already logs "loss" summary. summaries=[], increment_global_step=not fix_global_step_increment_bug)) train_op = control_flow_ops.group(*train_ops) if fix_global_step_increment_bug: with ops.control_dependencies([train_op]): with ops.colocate_with(global_step): return state_ops.assign_add(global_step, 1).op return train_op return head.create_model_fn_ops( features=features, mode=mode, labels=labels, train_op_fn=_make_training_op, logits=logits)