def __init__(self, model_dir=None, linear_feature_columns=None, linear_optimizer='Ftrl', dnn_feature_columns=None, dnn_optimizer='Adagrad', dnn_hidden_units=None, dnn_activation_fn=tf.nn.relu, dnn_dropout=None, n_classes=2, weight_column=None, label_vocabulary=None, input_layer_partitioner=None, config=None, warm_start_from=None, loss_reduction=tf.compat.v1.losses.Reduction.SUM, batch_norm=False, linear_sparse_combiner='sum'): self._feature_columns = _validate_feature_columns( linear_feature_columns=linear_feature_columns, dnn_feature_columns=dnn_feature_columns) head = head_lib._binary_logistic_or_multi_class_head( # pylint: disable=protected-access n_classes, weight_column, label_vocabulary, loss_reduction) estimator._canned_estimator_api_gauge.get_cell('Classifier').set( 'DNNLinearCombined') # pylint: disable=protected-access def _model_fn(features, labels, mode, config): """Call the _dnn_linear_combined_model_fn.""" return _dnn_linear_combined_model_fn( features=features, labels=labels, mode=mode, head=head, linear_feature_columns=linear_feature_columns, linear_optimizer=linear_optimizer, dnn_feature_columns=dnn_feature_columns, dnn_optimizer=dnn_optimizer, dnn_hidden_units=dnn_hidden_units, dnn_activation_fn=dnn_activation_fn, dnn_dropout=dnn_dropout, input_layer_partitioner=input_layer_partitioner, config=config, batch_norm=batch_norm, linear_sparse_combiner=linear_sparse_combiner) super(DNNLinearCombinedClassifier, self).__init__( model_fn=_model_fn, model_dir=model_dir, config=config, warm_start_from=warm_start_from)
def _init_baseline_classifier(n_classes, weight_column, label_vocabulary, optimizer, loss_reduction): """Helper function for the initialization of BaselineClassifier.""" head = head_lib._binary_logistic_or_multi_class_head( # pylint: disable=protected-access n_classes, weight_column, label_vocabulary, loss_reduction) def _model_fn(features, labels, mode, config): return _baseline_model_fn(features=features, labels=labels, mode=mode, head=head, optimizer=optimizer, weight_column=weight_column, config=config) return _model_fn
def __init__( self, hidden_units, feature_columns, model_dir=None, n_classes=2, weight_column=None, label_vocabulary=None, optimizer='Adagrad', activation_fn=tf.nn.relu, dropout=None, input_layer_partitioner=None, config=None, warm_start_from=None, loss_reduction=tf.compat.v1.losses.Reduction.SUM, batch_norm=False, ): head = head_lib._binary_logistic_or_multi_class_head( # pylint: disable=protected-access n_classes, weight_column, label_vocabulary, loss_reduction) estimator._canned_estimator_api_gauge.get_cell('Classifier').set('DNN') def _model_fn(features, labels, mode, config): """Call the defined shared dnn_model_fn.""" return _dnn_model_fn( features=features, labels=labels, mode=mode, head=head, hidden_units=hidden_units, feature_columns=tuple(feature_columns or []), optimizer=optimizer, activation_fn=activation_fn, dropout=dropout, input_layer_partitioner=input_layer_partitioner, config=config, batch_norm=batch_norm) super(DNNClassifier, self).__init__( model_fn=_model_fn, model_dir=model_dir, config=config, warm_start_from=warm_start_from)
def __init__(self, feature_columns, model_dir=None, n_classes=2, weight_column=None, label_vocabulary=None, optimizer='Ftrl', config=None, partitioner=None, warm_start_from=None, loss_reduction=losses.Reduction.SUM, sparse_combiner='sum'): _validate_linear_sdca_optimizer_for_linear_classifier( feature_columns=feature_columns, n_classes=n_classes, optimizer=optimizer, sparse_combiner=sparse_combiner) head = head_lib._binary_logistic_or_multi_class_head( # pylint: disable=protected-access n_classes, weight_column, label_vocabulary, loss_reduction) def _model_fn(features, labels, mode, config): """Call the defined shared _linear_model_fn.""" return _linear_model_fn( features=features, labels=labels, mode=mode, head=head, feature_columns=tuple(feature_columns or []), optimizer=optimizer, partitioner=partitioner, config=config, sparse_combiner=sparse_combiner) super(LinearClassifier, self).__init__( model_fn=_model_fn, model_dir=model_dir, config=config, warm_start_from=warm_start_from)
def __init__(self, model_dir=None, n_classes=2, weight_column=None, label_vocabulary=None, optimizer='Ftrl', config=None, loss_reduction=losses.Reduction.SUM): head = head_lib._binary_logistic_or_multi_class_head( # pylint: disable=protected-access n_classes, weight_column, label_vocabulary, loss_reduction) def _model_fn(features, labels, mode, config): return _baseline_model_fn(features=features, labels=labels, mode=mode, head=head, optimizer=optimizer, weight_column=weight_column, config=config) super(BaselineClassifier, self).__init__(model_fn=_model_fn, model_dir=model_dir, config=config)
def _init_dnn_linear_combined_classifier( linear_feature_columns, linear_optimizer, dnn_feature_columns, dnn_optimizer, dnn_hidden_units, dnn_activation_fn, dnn_dropout, n_classes, weight_column, label_vocabulary, input_layer_partitioner, loss_reduction, batch_norm, linear_sparse_combiner): """Helper function for the initialization of DNNLinearCombinedClassifier.""" linear_feature_columns = linear_feature_columns or [] dnn_feature_columns = dnn_feature_columns or [] feature_columns = (list(linear_feature_columns) + list(dnn_feature_columns)) if not feature_columns: raise ValueError( 'Either linear_feature_columns or dnn_feature_columns ' 'must be defined.') head = head_lib._binary_logistic_or_multi_class_head( # pylint: disable=protected-access n_classes, weight_column, label_vocabulary, loss_reduction) def _model_fn(features, labels, mode, config): """Call the _dnn_linear_combined_model_fn.""" return _dnn_linear_combined_model_fn( features=features, labels=labels, mode=mode, head=head, linear_feature_columns=linear_feature_columns, linear_optimizer=linear_optimizer, dnn_feature_columns=dnn_feature_columns, dnn_optimizer=dnn_optimizer, dnn_hidden_units=dnn_hidden_units, dnn_activation_fn=dnn_activation_fn, dnn_dropout=dnn_dropout, input_layer_partitioner=input_layer_partitioner, config=config, batch_norm=batch_norm, linear_sparse_combiner=linear_sparse_combiner) return feature_columns, _model_fn
def _init_dnn_classifier(hidden_units, feature_columns, n_classes, weight_column, label_vocabulary, optimizer, activation_fn, dropout, input_layer_partitioner, loss_reduction, batch_norm): """Helper function for the initialization of DNNClassifier.""" head = head_lib._binary_logistic_or_multi_class_head( # pylint: disable=protected-access n_classes, weight_column, label_vocabulary, loss_reduction) def _model_fn(features, labels, mode, config): """Call the defined shared _dnn_model_fn.""" return _dnn_model_fn(features=features, labels=labels, mode=mode, head=head, hidden_units=hidden_units, feature_columns=tuple(feature_columns or []), optimizer=optimizer, activation_fn=activation_fn, dropout=dropout, input_layer_partitioner=input_layer_partitioner, config=config, batch_norm=batch_norm) return _model_fn
def __init__(self, model_collections, feature_columns, model_dir=None, n_classes=2, weight_column=None, label_vocabulary=None, optimizer='Adagrad', input_layer_partitioner=None, config=None): """Initializes a `DNNClassifier` instance. Args: feature_columns: An iterable containing all the feature columns used by the model. All items in the set should be instances of classes derived from `_FeatureColumn`. model_dir: Directory to save model parameters, graph and etc. This can also be used to load checkpoints from the directory into a estimator to continue training a previously saved model. n_classes: Number of label classes. Defaults to 2, namely binary classification. Must be > 1. weight_column: A string or a `_NumericColumn` created by `tf.feature_column.numeric_column` defining feature column representing weights. It is used to down weight or boost examples during training. It will be multiplied by the loss of the example. If it is a string, it is used as a key to fetch weight tensor from the `features`. If it is a `_NumericColumn`, raw tensor is fetched by key `weight_column.key`, then weight_column.normalizer_fn is applied on it to get weight tensor. label_vocabulary: A list of strings represents possible label values. If given, labels must be string type and have any value in `label_vocabulary`. If it is not given, that means labels are already encoded as integer or float within [0, 1] for `n_classes=2` and encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 . Also there will be errors if vocabulary is not provided and labels are string. optimizer: An instance of `tf.Optimizer` used to train the model. Defaults to Adagrad optimizer. activation_fn: Activation function applied to each layer. If `None`, will use `tf.nn.relu`. dropout: When not `None`, the probability we will drop out a given coordinate. input_layer_partitioner: Optional. Partitioner for input layer. Defaults to `min_max_variable_partitioner` with `min_slice_size` 64 << 20. config: `RunConfig` object to configure the runtime settings. """ if not model_collections: raise ValueError( 'Empty model collections, must fill DNN model instance.') assert isinstance( model_collections, (list, tuple)), "model_collections must be a list or tuple" for model in model_collections: if not isinstance(model, DNN): raise ValueError( "model_collections element must be an instance of class DNN" ) head = head_lib._binary_logistic_or_multi_class_head( # pylint: disable=protected-access n_classes, weight_column=weight_column, label_vocabulary=label_vocabulary, loss_reduction=losses.Reduction.SUM) def _dnn_model_fn(features, labels, mode, head, optimizer='Adagrad', input_layer_partitioner=None, config=None): """Deep Neural Net model_fn. Args: features: dict of `Tensor`. labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. head: A `head_lib._Head` instance. hidden_units: Iterable of integer number of hidden units per layer. feature_columns: Iterable of `feature_column._FeatureColumn` model inputs. optimizer: String, `tf.Optimizer` object, or callable that creates the optimizer to use for training. If not specified, will use the Adagrad optimizer with a default learning rate of 0.05. activation_fn: Activation function applied to each layer. dropout: When not `None`, the probability we will drop out a given coordinate. input_layer_partitioner: Partitioner for input layer. Defaults to `min_max_variable_partitioner` with `min_slice_size` 64 << 20. config: `RunConfig` object to configure the runtime settings. Returns: predictions: A dict of `Tensor` objects. loss: A scalar containing the loss of the step. train_op: The op for training. Raises: ValueError: If features has the wrong type. """ if not isinstance(features, dict): raise ValueError( 'features should be a dictionary of `Tensor`s. ' 'Given type: {}'.format(type(features))) optimizer = _get_optimizer_instance(optimizer, learning_rate=0.05) num_ps_replicas = config.num_ps_replicas if config else 0 partitioner = tf.min_max_variable_partitioner( max_partitions=num_ps_replicas) with tf.variable_scope('dnn', values=tuple(iter(features.values())), partitioner=partitioner): input_layer_partitioner = input_layer_partitioner or ( tf.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) # unit is num_classes, shape(batch_size, num_classes) logits = [] for idx, m in enumerate(model_collections): logits.append( _dnn_logit_fn(features, mode, idx + 1, head.logits_dimension, m.hidden_units, m.connected_layers, feature_columns, input_layer_partitioner)) logits = tf.add_n( logits ) # add logit layer is same with concactenate the layer before logit layer def _train_op_fn(loss): """Returns the op to optimize the loss.""" return optimizer.minimize( loss, global_step=tf.train.get_global_step()) return head.create_estimator_spec(features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits) def _model_fn(features, labels, mode, config): return _dnn_model_fn( features=features, labels=labels, mode=mode, head=head, optimizer=optimizer, input_layer_partitioner=input_layer_partitioner, config=config) super(MultiDNNClassifier, self).__init__(model_fn=_model_fn, model_dir=model_dir, config=config)
def __init__( self, hidden_units, feature_columns, model_dir=None, n_classes=2, weight_column=None, label_vocabulary=None, optimizer='Adagrad', activation_fn=nn.relu, dropout=None, input_layer_partitioner=None, config=None, warm_start_from=None, loss_reduction=losses.Reduction.SUM, batch_norm=False, ): """Initializes a `DNNClassifier` instance. Args: hidden_units: Iterable of number hidden units per layer. All layers are fully connected. Ex. `[64, 32]` means first layer has 64 nodes and second one has 32. feature_columns: An iterable containing all the feature columns used by the model. All items in the set should be instances of classes derived from `_FeatureColumn`. model_dir: Directory to save model parameters, graph and etc. This can also be used to load checkpoints from the directory into a estimator to continue training a previously saved model. n_classes: Number of label classes. Defaults to 2, namely binary classification. Must be > 1. weight_column: A string or a `_NumericColumn` created by `tf.feature_column.numeric_column` defining feature column representing weights. It is used to down weight or boost examples during training. It will be multiplied by the loss of the example. If it is a string, it is used as a key to fetch weight tensor from the `features`. If it is a `_NumericColumn`, raw tensor is fetched by key `weight_column.key`, then weight_column.normalizer_fn is applied on it to get weight tensor. label_vocabulary: A list of strings represents possible label values. If given, labels must be string type and have any value in `label_vocabulary`. If it is not given, that means labels are already encoded as integer or float within [0, 1] for `n_classes=2` and encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 . Also there will be errors if vocabulary is not provided and labels are string. optimizer: An instance of `tf.Optimizer` used to train the model. Can also be a string (one of 'Adagrad', 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or callable. Defaults to Adagrad optimizer. activation_fn: Activation function applied to each layer. If `None`, will use `tf.nn.relu`. dropout: When not `None`, the probability we will drop out a given coordinate. input_layer_partitioner: Optional. Partitioner for input layer. Defaults to `min_max_variable_partitioner` with `min_slice_size` 64 << 20. config: `RunConfig` object to configure the runtime settings. warm_start_from: A string filepath to a checkpoint to warm-start from, or a `WarmStartSettings` object to fully configure warm-starting. If the string filepath is provided instead of a `WarmStartSettings`, then all weights are warm-started, and it is assumed that vocabularies and Tensor names are unchanged. loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to reduce training loss over batch. Defaults to `SUM`. batch_norm: Whether to use batch normalization after each hidden layer. """ head = head_lib._binary_logistic_or_multi_class_head( # pylint: disable=protected-access n_classes, weight_column, label_vocabulary, loss_reduction) def _model_fn(features, labels, mode, config): """Call the defined shared _dnn_model_fn.""" return _dnn_model_fn( features=features, labels=labels, mode=mode, head=head, hidden_units=hidden_units, feature_columns=tuple(feature_columns or []), optimizer=optimizer, activation_fn=activation_fn, dropout=dropout, input_layer_partitioner=input_layer_partitioner, config=config, batch_norm=batch_norm) super(DNNClassifier, self).__init__(model_fn=_model_fn, model_dir=model_dir, config=config, warm_start_from=warm_start_from)
def __init__(self, model_type=None, model_dir=None, with_cnn=False, cnn_optimizer='Adagrad', linear_feature_columns=None, linear_optimizer='Ftrl', dnn_feature_columns=None, dnn_optimizer='Adagrad', dnn_hidden_units=None, dnn_connected_mode=None, n_classes=2, weight_column=None, label_vocabulary=None, input_layer_partitioner=None, loss_reduction=losses.Reduction.SUM, linear_sparse_combiner='sum', warm_start_from=None, config=None): """Initializes a WideDeepCombinedClassifier instance. Args: model_dir: Directory to save model parameters, graph and etc. This can also be used to load checkpoints from the directory into a estimator to continue training a previously saved model. linear_feature_columns: An iterable containing all the feature columns used by linear part of the model. All items in the set must be instances of classes derived from `FeatureColumn`. linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to the linear part of the model. Defaults to FTRL optimizer. dnn_feature_columns: An iterable containing all the feature columns used by deep part of the model. All items in the set must be instances of classes derived from `FeatureColumn`. dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to the deep part of the model. Defaults to Adagrad optimizer. dnn_hidden_units: List of hidden units per layer. All layers are fully connected. dnn_activation_fn: Activation function applied to each layer. If None, will use `tf.nn.relu`. dnn_dropout: When not None, the probability we will drop out a given coordinate. n_classes: Number of label classes. Defaults to 2, namely binary classification. Must be > 1. weight_column: A string or a `_NumericColumn` created by `tf.feature_column.numeric_column` defining feature column representing weights. It is used to down weight or boost examples during training. It will be multiplied by the loss of the example. If it is a string, it is used as a key to fetch weight tensor from the `features`. If it is a `_NumericColumn`, raw tensor is fetched by key `weight_column.key`, then weight_column.normalizer_fn is applied on it to get weight tensor. label_vocabulary: A list of strings represents possible label values. If given, labels must be string type and have any value in `label_vocabulary`. If it is not given, that means labels are already encoded as integer or float within [0, 1] for `n_classes=2` and encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 . Also there will be errors if vocabulary is not provided and labels are string. input_layer_partitioner: Partitioner for input layer. Defaults to `min_max_variable_partitioner` with `min_slice_size` 64 << 20. warm_start_from: A string filepath to a checkpoint to warm-start from, or a `WarmStartSettings` object to fully configure warm-starting. If the string filepath is provided instead of a `WarmStartSettings`, then all weights are warm-started, and it is assumed that vocabularies and Tensor names are unchanged. linear_sparse_combiner: A string specifying how to reduce the linear model if a categorical column is multivalent. One of "mean", "sqrtn", and "sum" -- these are effectively different ways to do example-level normalization, which can be useful for bag-of-words features. For more details, see `tf.feature_column.linear_model`. config: RunConfig object to configure the runtime settings. Raises: ValueError: If both linear_feature_columns and dnn_features_columns are empty at the same time. """ if not linear_feature_columns and not dnn_feature_columns: raise ValueError('Either linear_feature_columns or dnn_feature_columns must be defined.') if model_type is None: raise ValueError("Model type must be defined. one of `wide`, `deep`, `wide_deep`.") else: assert model_type in {'wide', 'deep', 'wide_deep'}, ( "Invalid model type, must be one of `wide`, `deep`, `wide_deep`.") if model_type == 'wide' and not linear_feature_columns: raise ValueError('Linear_feature_columns must be defined for wide model.') elif model_type == 'deep' and not dnn_feature_columns: raise ValueError('Dnn_feature_columns must be defined for deep model.') if dnn_feature_columns and not dnn_hidden_units: raise ValueError('dnn_hidden_units must be defined when dnn_feature_columns is specified.') head = head_lib._binary_logistic_or_multi_class_head( # pylint: disable=protected-access n_classes, weight_column, label_vocabulary, loss_reduction) def _model_fn(features, labels, mode, config): return _wide_deep_combined_model_fn( features=features, labels=labels, mode=mode, head=head, model_type=model_type, with_cnn=with_cnn, cnn_optimizer=cnn_optimizer, linear_feature_columns=linear_feature_columns, linear_optimizer=linear_optimizer, dnn_feature_columns=dnn_feature_columns, dnn_connected_mode=dnn_connected_mode, dnn_optimizer=dnn_optimizer, dnn_hidden_units=dnn_hidden_units, input_layer_partitioner=input_layer_partitioner, linear_sparse_combiner=linear_sparse_combiner, config=config) super(WideAndDeepClassifier, self).__init__( model_fn=_model_fn, model_dir=model_dir, config=config, warm_start_from=warm_start_from)