def __init__(self,
               model_dir=None,
               linear_feature_columns=None,
               linear_optimizer='Ftrl',
               dnn_feature_columns=None,
               dnn_optimizer='Adagrad',
               dnn_hidden_units=None,
               dnn_activation_fn=tf.nn.relu,
               dnn_dropout=None,
               n_classes=2,
               weight_column=None,
               label_vocabulary=None,
               input_layer_partitioner=None,
               config=None,
               warm_start_from=None,
               loss_reduction=tf.compat.v1.losses.Reduction.SUM,
               batch_norm=False,
               linear_sparse_combiner='sum'):
    self._feature_columns = _validate_feature_columns(
        linear_feature_columns=linear_feature_columns,
        dnn_feature_columns=dnn_feature_columns)

    head = head_lib._binary_logistic_or_multi_class_head(  # pylint: disable=protected-access
        n_classes, weight_column, label_vocabulary, loss_reduction)
    estimator._canned_estimator_api_gauge.get_cell('Classifier').set(
        'DNNLinearCombined')  # pylint: disable=protected-access

    def _model_fn(features, labels, mode, config):
      """Call the _dnn_linear_combined_model_fn."""
      return _dnn_linear_combined_model_fn(
          features=features,
          labels=labels,
          mode=mode,
          head=head,
          linear_feature_columns=linear_feature_columns,
          linear_optimizer=linear_optimizer,
          dnn_feature_columns=dnn_feature_columns,
          dnn_optimizer=dnn_optimizer,
          dnn_hidden_units=dnn_hidden_units,
          dnn_activation_fn=dnn_activation_fn,
          dnn_dropout=dnn_dropout,
          input_layer_partitioner=input_layer_partitioner,
          config=config,
          batch_norm=batch_norm,
          linear_sparse_combiner=linear_sparse_combiner)

    super(DNNLinearCombinedClassifier, self).__init__(
        model_fn=_model_fn,
        model_dir=model_dir,
        config=config,
        warm_start_from=warm_start_from)
示例#2
0
def _init_baseline_classifier(n_classes, weight_column, label_vocabulary,
                              optimizer, loss_reduction):
    """Helper function for the initialization of BaselineClassifier."""
    head = head_lib._binary_logistic_or_multi_class_head(  # pylint: disable=protected-access
        n_classes, weight_column, label_vocabulary, loss_reduction)

    def _model_fn(features, labels, mode, config):
        return _baseline_model_fn(features=features,
                                  labels=labels,
                                  mode=mode,
                                  head=head,
                                  optimizer=optimizer,
                                  weight_column=weight_column,
                                  config=config)

    return _model_fn
示例#3
0
  def __init__(
      self,
      hidden_units,
      feature_columns,
      model_dir=None,
      n_classes=2,
      weight_column=None,
      label_vocabulary=None,
      optimizer='Adagrad',
      activation_fn=tf.nn.relu,
      dropout=None,
      input_layer_partitioner=None,
      config=None,
      warm_start_from=None,
      loss_reduction=tf.compat.v1.losses.Reduction.SUM,
      batch_norm=False,
  ):
    head = head_lib._binary_logistic_or_multi_class_head(  # pylint: disable=protected-access
        n_classes, weight_column, label_vocabulary, loss_reduction)
    estimator._canned_estimator_api_gauge.get_cell('Classifier').set('DNN')

    def _model_fn(features, labels, mode, config):
      """Call the defined shared dnn_model_fn."""
      return _dnn_model_fn(
          features=features,
          labels=labels,
          mode=mode,
          head=head,
          hidden_units=hidden_units,
          feature_columns=tuple(feature_columns or []),
          optimizer=optimizer,
          activation_fn=activation_fn,
          dropout=dropout,
          input_layer_partitioner=input_layer_partitioner,
          config=config,
          batch_norm=batch_norm)

    super(DNNClassifier, self).__init__(
        model_fn=_model_fn,
        model_dir=model_dir,
        config=config,
        warm_start_from=warm_start_from)
示例#4
0
  def __init__(self,
               feature_columns,
               model_dir=None,
               n_classes=2,
               weight_column=None,
               label_vocabulary=None,
               optimizer='Ftrl',
               config=None,
               partitioner=None,
               warm_start_from=None,
               loss_reduction=losses.Reduction.SUM,
               sparse_combiner='sum'):
    _validate_linear_sdca_optimizer_for_linear_classifier(
        feature_columns=feature_columns,
        n_classes=n_classes,
        optimizer=optimizer,
        sparse_combiner=sparse_combiner)

    head = head_lib._binary_logistic_or_multi_class_head(  # pylint: disable=protected-access
        n_classes, weight_column, label_vocabulary, loss_reduction)

    def _model_fn(features, labels, mode, config):
      """Call the defined shared _linear_model_fn."""
      return _linear_model_fn(
          features=features,
          labels=labels,
          mode=mode,
          head=head,
          feature_columns=tuple(feature_columns or []),
          optimizer=optimizer,
          partitioner=partitioner,
          config=config,
          sparse_combiner=sparse_combiner)

    super(LinearClassifier, self).__init__(
        model_fn=_model_fn,
        model_dir=model_dir,
        config=config,
        warm_start_from=warm_start_from)
示例#5
0
    def __init__(self,
                 model_dir=None,
                 n_classes=2,
                 weight_column=None,
                 label_vocabulary=None,
                 optimizer='Ftrl',
                 config=None,
                 loss_reduction=losses.Reduction.SUM):
        head = head_lib._binary_logistic_or_multi_class_head(  # pylint: disable=protected-access
            n_classes, weight_column, label_vocabulary, loss_reduction)

        def _model_fn(features, labels, mode, config):
            return _baseline_model_fn(features=features,
                                      labels=labels,
                                      mode=mode,
                                      head=head,
                                      optimizer=optimizer,
                                      weight_column=weight_column,
                                      config=config)

        super(BaselineClassifier, self).__init__(model_fn=_model_fn,
                                                 model_dir=model_dir,
                                                 config=config)
def _init_dnn_linear_combined_classifier(
        linear_feature_columns, linear_optimizer, dnn_feature_columns,
        dnn_optimizer, dnn_hidden_units, dnn_activation_fn, dnn_dropout,
        n_classes, weight_column, label_vocabulary, input_layer_partitioner,
        loss_reduction, batch_norm, linear_sparse_combiner):
    """Helper function for the initialization of DNNLinearCombinedClassifier."""
    linear_feature_columns = linear_feature_columns or []
    dnn_feature_columns = dnn_feature_columns or []
    feature_columns = (list(linear_feature_columns) +
                       list(dnn_feature_columns))
    if not feature_columns:
        raise ValueError(
            'Either linear_feature_columns or dnn_feature_columns '
            'must be defined.')
    head = head_lib._binary_logistic_or_multi_class_head(  # pylint: disable=protected-access
        n_classes, weight_column, label_vocabulary, loss_reduction)

    def _model_fn(features, labels, mode, config):
        """Call the _dnn_linear_combined_model_fn."""
        return _dnn_linear_combined_model_fn(
            features=features,
            labels=labels,
            mode=mode,
            head=head,
            linear_feature_columns=linear_feature_columns,
            linear_optimizer=linear_optimizer,
            dnn_feature_columns=dnn_feature_columns,
            dnn_optimizer=dnn_optimizer,
            dnn_hidden_units=dnn_hidden_units,
            dnn_activation_fn=dnn_activation_fn,
            dnn_dropout=dnn_dropout,
            input_layer_partitioner=input_layer_partitioner,
            config=config,
            batch_norm=batch_norm,
            linear_sparse_combiner=linear_sparse_combiner)

    return feature_columns, _model_fn
示例#7
0
def _init_dnn_classifier(hidden_units, feature_columns, n_classes,
                         weight_column, label_vocabulary, optimizer,
                         activation_fn, dropout, input_layer_partitioner,
                         loss_reduction, batch_norm):
    """Helper function for the initialization of DNNClassifier."""
    head = head_lib._binary_logistic_or_multi_class_head(  # pylint: disable=protected-access
        n_classes, weight_column, label_vocabulary, loss_reduction)

    def _model_fn(features, labels, mode, config):
        """Call the defined shared _dnn_model_fn."""
        return _dnn_model_fn(features=features,
                             labels=labels,
                             mode=mode,
                             head=head,
                             hidden_units=hidden_units,
                             feature_columns=tuple(feature_columns or []),
                             optimizer=optimizer,
                             activation_fn=activation_fn,
                             dropout=dropout,
                             input_layer_partitioner=input_layer_partitioner,
                             config=config,
                             batch_norm=batch_norm)

    return _model_fn
示例#8
0
    def __init__(self,
                 model_collections,
                 feature_columns,
                 model_dir=None,
                 n_classes=2,
                 weight_column=None,
                 label_vocabulary=None,
                 optimizer='Adagrad',
                 input_layer_partitioner=None,
                 config=None):
        """Initializes a `DNNClassifier` instance.

            Args:
               feature_columns: An iterable containing all the feature columns used by
                 the model. All items in the set should be instances of classes derived
                 from `_FeatureColumn`.
               model_dir: Directory to save model parameters, graph and etc. This can
                 also be used to load checkpoints from the directory into a estimator to
                 continue training a previously saved model.
               n_classes: Number of label classes. Defaults to 2, namely binary
                 classification. Must be > 1.
               weight_column: A string or a `_NumericColumn` created by
                 `tf.feature_column.numeric_column` defining feature column representing
                 weights. It is used to down weight or boost examples during training. It
                 will be multiplied by the loss of the example. If it is a string, it is
                 used as a key to fetch weight tensor from the `features`. If it is a
                 `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
                 then weight_column.normalizer_fn is applied on it to get weight tensor.
               label_vocabulary: A list of strings represents possible label values. If
                 given, labels must be string type and have any value in
                 `label_vocabulary`. If it is not given, that means labels are
                 already encoded as integer or float within [0, 1] for `n_classes=2` and
                 encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 .
                 Also there will be errors if vocabulary is not provided and labels are
                 string.
               optimizer: An instance of `tf.Optimizer` used to train the model. Defaults
                 to Adagrad optimizer.
               activation_fn: Activation function applied to each layer. If `None`, will
                 use `tf.nn.relu`.
               dropout: When not `None`, the probability we will drop out a given
                 coordinate.
               input_layer_partitioner: Optional. Partitioner for input layer. Defaults
                 to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
               config: `RunConfig` object to configure the runtime settings.
        """
        if not model_collections:
            raise ValueError(
                'Empty model collections, must fill DNN model instance.')
        assert isinstance(
            model_collections,
            (list, tuple)), "model_collections must be a list or tuple"
        for model in model_collections:
            if not isinstance(model, DNN):
                raise ValueError(
                    "model_collections element must be an instance of class DNN"
                )
        head = head_lib._binary_logistic_or_multi_class_head(  # pylint: disable=protected-access
            n_classes,
            weight_column=weight_column,
            label_vocabulary=label_vocabulary,
            loss_reduction=losses.Reduction.SUM)

        def _dnn_model_fn(features,
                          labels,
                          mode,
                          head,
                          optimizer='Adagrad',
                          input_layer_partitioner=None,
                          config=None):
            """Deep Neural Net model_fn.

            Args:
              features: dict of `Tensor`.
              labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of
                dtype `int32` or `int64` in the range `[0, n_classes)`.
              mode: Defines whether this is training, evaluation or prediction.
                See `ModeKeys`.
              head: A `head_lib._Head` instance.
              hidden_units: Iterable of integer number of hidden units per layer.
              feature_columns: Iterable of `feature_column._FeatureColumn` model inputs.
              optimizer: String, `tf.Optimizer` object, or callable that creates the
                optimizer to use for training. If not specified, will use the Adagrad
                optimizer with a default learning rate of 0.05.
              activation_fn: Activation function applied to each layer.
              dropout: When not `None`, the probability we will drop out a given
                coordinate.
              input_layer_partitioner: Partitioner for input layer. Defaults
                to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
              config: `RunConfig` object to configure the runtime settings.
            Returns:
              predictions: A dict of `Tensor` objects.
              loss: A scalar containing the loss of the step.
              train_op: The op for training.
            Raises:
              ValueError: If features has the wrong type.
            """
            if not isinstance(features, dict):
                raise ValueError(
                    'features should be a dictionary of `Tensor`s. '
                    'Given type: {}'.format(type(features)))
            optimizer = _get_optimizer_instance(optimizer, learning_rate=0.05)
            num_ps_replicas = config.num_ps_replicas if config else 0

            partitioner = tf.min_max_variable_partitioner(
                max_partitions=num_ps_replicas)
            with tf.variable_scope('dnn',
                                   values=tuple(iter(features.values())),
                                   partitioner=partitioner):
                input_layer_partitioner = input_layer_partitioner or (
                    tf.min_max_variable_partitioner(
                        max_partitions=num_ps_replicas,
                        min_slice_size=64 << 20))
                # unit is num_classes, shape(batch_size, num_classes)
                logits = []
                for idx, m in enumerate(model_collections):
                    logits.append(
                        _dnn_logit_fn(features, mode, idx + 1,
                                      head.logits_dimension, m.hidden_units,
                                      m.connected_layers, feature_columns,
                                      input_layer_partitioner))
                logits = tf.add_n(
                    logits
                )  # add logit layer is same with concactenate the layer before logit layer

                def _train_op_fn(loss):
                    """Returns the op to optimize the loss."""
                    return optimizer.minimize(
                        loss, global_step=tf.train.get_global_step())

            return head.create_estimator_spec(features=features,
                                              mode=mode,
                                              labels=labels,
                                              train_op_fn=_train_op_fn,
                                              logits=logits)

        def _model_fn(features, labels, mode, config):
            return _dnn_model_fn(
                features=features,
                labels=labels,
                mode=mode,
                head=head,
                optimizer=optimizer,
                input_layer_partitioner=input_layer_partitioner,
                config=config)

        super(MultiDNNClassifier, self).__init__(model_fn=_model_fn,
                                                 model_dir=model_dir,
                                                 config=config)
示例#9
0
文件: dnn.py 项目: yupbank/estimator
    def __init__(
        self,
        hidden_units,
        feature_columns,
        model_dir=None,
        n_classes=2,
        weight_column=None,
        label_vocabulary=None,
        optimizer='Adagrad',
        activation_fn=nn.relu,
        dropout=None,
        input_layer_partitioner=None,
        config=None,
        warm_start_from=None,
        loss_reduction=losses.Reduction.SUM,
        batch_norm=False,
    ):
        """Initializes a `DNNClassifier` instance.

    Args:
      hidden_units: Iterable of number hidden units per layer. All layers are
        fully connected. Ex. `[64, 32]` means first layer has 64 nodes and
        second one has 32.
      feature_columns: An iterable containing all the feature columns used by
        the model. All items in the set should be instances of classes derived
        from `_FeatureColumn`.
      model_dir: Directory to save model parameters, graph and etc. This can
        also be used to load checkpoints from the directory into a estimator to
        continue training a previously saved model.
      n_classes: Number of label classes. Defaults to 2, namely binary
        classification. Must be > 1.
      weight_column: A string or a `_NumericColumn` created by
        `tf.feature_column.numeric_column` defining feature column representing
        weights. It is used to down weight or boost examples during training. It
        will be multiplied by the loss of the example. If it is a string, it is
        used as a key to fetch weight tensor from the `features`. If it is a
        `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
        then weight_column.normalizer_fn is applied on it to get weight tensor.
      label_vocabulary: A list of strings represents possible label values. If
        given, labels must be string type and have any value in
        `label_vocabulary`. If it is not given, that means labels are
        already encoded as integer or float within [0, 1] for `n_classes=2` and
        encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 .
        Also there will be errors if vocabulary is not provided and labels are
        string.
      optimizer: An instance of `tf.Optimizer` used to train the model. Can also
        be a string (one of 'Adagrad', 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or
        callable. Defaults to Adagrad optimizer.
      activation_fn: Activation function applied to each layer. If `None`, will
        use `tf.nn.relu`.
      dropout: When not `None`, the probability we will drop out a given
        coordinate.
      input_layer_partitioner: Optional. Partitioner for input layer. Defaults
        to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
      config: `RunConfig` object to configure the runtime settings.
      warm_start_from: A string filepath to a checkpoint to warm-start from, or
        a `WarmStartSettings` object to fully configure warm-starting.  If the
        string filepath is provided instead of a `WarmStartSettings`, then all
        weights are warm-started, and it is assumed that vocabularies and Tensor
        names are unchanged.
      loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how
        to reduce training loss over batch. Defaults to `SUM`.
      batch_norm: Whether to use batch normalization after each hidden layer.
    """
        head = head_lib._binary_logistic_or_multi_class_head(  # pylint: disable=protected-access
            n_classes, weight_column, label_vocabulary, loss_reduction)

        def _model_fn(features, labels, mode, config):
            """Call the defined shared _dnn_model_fn."""
            return _dnn_model_fn(
                features=features,
                labels=labels,
                mode=mode,
                head=head,
                hidden_units=hidden_units,
                feature_columns=tuple(feature_columns or []),
                optimizer=optimizer,
                activation_fn=activation_fn,
                dropout=dropout,
                input_layer_partitioner=input_layer_partitioner,
                config=config,
                batch_norm=batch_norm)

        super(DNNClassifier, self).__init__(model_fn=_model_fn,
                                            model_dir=model_dir,
                                            config=config,
                                            warm_start_from=warm_start_from)
示例#10
0
    def __init__(self,
                 model_type=None,
                 model_dir=None,
                 with_cnn=False,
                 cnn_optimizer='Adagrad',
                 linear_feature_columns=None,
                 linear_optimizer='Ftrl',
                 dnn_feature_columns=None,
                 dnn_optimizer='Adagrad',
                 dnn_hidden_units=None,
                 dnn_connected_mode=None,
                 n_classes=2,
                 weight_column=None,
                 label_vocabulary=None,
                 input_layer_partitioner=None,
                 loss_reduction=losses.Reduction.SUM,
                 linear_sparse_combiner='sum',
                 warm_start_from=None,
                 config=None):
        """Initializes a WideDeepCombinedClassifier instance.

        Args:
            model_dir: Directory to save model parameters, graph and etc. This can
                also be used to load checkpoints from the directory into a estimator
                to continue training a previously saved model.
            linear_feature_columns: An iterable containing all the feature columns
                used by linear part of the model. All items in the set must be
                instances of classes derived from `FeatureColumn`.
            linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to
                the linear part of the model. Defaults to FTRL optimizer.
            dnn_feature_columns: An iterable containing all the feature columns used
                by deep part of the model. All items in the set must be instances of
                classes derived from `FeatureColumn`.
            dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to
                the deep part of the model. Defaults to Adagrad optimizer.
            dnn_hidden_units: List of hidden units per layer. All layers are fully
                connected.
            dnn_activation_fn: Activation function applied to each layer. If None,
                will use `tf.nn.relu`.
            dnn_dropout: When not None, the probability we will drop out
                a given coordinate.
            n_classes: Number of label classes. Defaults to 2, namely binary
                classification. Must be > 1.
            weight_column: A string or a `_NumericColumn` created by
                `tf.feature_column.numeric_column` defining feature column representing
                weights. It is used to down weight or boost examples during training. It
                will be multiplied by the loss of the example. If it is a string, it is
                used as a key to fetch weight tensor from the `features`. If it is a
                `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
                then weight_column.normalizer_fn is applied on it to get weight tensor.
            label_vocabulary: A list of strings represents possible label values. If
                given, labels must be string type and have any value in
                `label_vocabulary`. If it is not given, that means labels are
                already encoded as integer or float within [0, 1] for `n_classes=2` and
                encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 .
                Also there will be errors if vocabulary is not provided and labels are
                string.
            input_layer_partitioner: Partitioner for input layer. Defaults to
                `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
            warm_start_from: A string filepath to a checkpoint to warm-start from, or
                a `WarmStartSettings` object to fully configure warm-starting.  If the
                string filepath is provided instead of a `WarmStartSettings`, then all
                weights are warm-started, and it is assumed that vocabularies and Tensor
                names are unchanged.
             linear_sparse_combiner: A string specifying how to reduce the linear model
                if a categorical column is multivalent.  One of "mean", "sqrtn", and
                "sum" -- these are effectively different ways to do example-level
                normalization, which can be useful for bag-of-words features.  For more
                details, see `tf.feature_column.linear_model`.
            config: RunConfig object to configure the runtime settings.

        Raises:
            ValueError: If both linear_feature_columns and dnn_features_columns are
                empty at the same time.
        """
        if not linear_feature_columns and not dnn_feature_columns:
            raise ValueError('Either linear_feature_columns or dnn_feature_columns must be defined.')
        if model_type is None:
            raise ValueError("Model type must be defined. one of `wide`, `deep`, `wide_deep`.")
        else:
            assert model_type in {'wide', 'deep', 'wide_deep'}, (
                "Invalid model type, must be one of `wide`, `deep`, `wide_deep`.")
            if model_type == 'wide' and not linear_feature_columns:
                raise ValueError('Linear_feature_columns must be defined for wide model.')
            elif model_type == 'deep' and not dnn_feature_columns:
                raise ValueError('Dnn_feature_columns must be defined for deep model.')
        if dnn_feature_columns and not dnn_hidden_units:
            raise ValueError('dnn_hidden_units must be defined when dnn_feature_columns is specified.')

        head = head_lib._binary_logistic_or_multi_class_head(  # pylint: disable=protected-access
            n_classes, weight_column, label_vocabulary, loss_reduction)

        def _model_fn(features, labels, mode, config):
            return _wide_deep_combined_model_fn(
                features=features,
                labels=labels,
                mode=mode,
                head=head,
                model_type=model_type,
                with_cnn=with_cnn,
                cnn_optimizer=cnn_optimizer,
                linear_feature_columns=linear_feature_columns,
                linear_optimizer=linear_optimizer,
                dnn_feature_columns=dnn_feature_columns,
                dnn_connected_mode=dnn_connected_mode,
                dnn_optimizer=dnn_optimizer,
                dnn_hidden_units=dnn_hidden_units,
                input_layer_partitioner=input_layer_partitioner,
                linear_sparse_combiner=linear_sparse_combiner,
                config=config)
        super(WideAndDeepClassifier, self).__init__(
            model_fn=_model_fn, model_dir=model_dir, config=config, warm_start_from=warm_start_from)