def testFitAndEvaluateDontThrowExceptionWithCore(self):
        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2
        learner_config.constraints.max_tree_depth = 1
        model_dir = tempfile.mkdtemp()
        config = run_config.RunConfig()

        # Use core head
        head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
            loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE)

        classifier = estimator.DNNBoostedTreeCombinedEstimator(
            head=head_fn,
            dnn_hidden_units=[1],
            # Use core feature columns
            dnn_feature_columns=[core_feature_column.numeric_column("x")],
            tree_learner_config=learner_config,
            num_trees=1,
            tree_examples_per_layer=3,
            model_dir=model_dir,
            config=config,
            dnn_steps_to_train=10,
            dnn_input_layer_to_tree=True,
            tree_feature_columns=[],
            use_core_versions=True)

        classifier.fit(input_fn=_train_input_fn, steps=15)
        classifier.evaluate(input_fn=_eval_input_fn, steps=1)
  def testTrainEvaluateInferDoesNotThrowErrorWithDnnInput(self):
    head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
        loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS)

    learner_config = learner_pb2.LearnerConfig()
    learner_config.num_classes = 2
    learner_config.constraints.max_tree_depth = 3
    model_dir = tempfile.mkdtemp()
    config = run_config.RunConfig()

    est = estimator.CoreDNNBoostedTreeCombinedEstimator(
        head=head_fn,
        dnn_hidden_units=[1],
        dnn_feature_columns=[core_feature_column.numeric_column("x")],
        tree_learner_config=learner_config,
        num_trees=1,
        tree_examples_per_layer=3,
        model_dir=model_dir,
        config=config,
        dnn_steps_to_train=10,
        dnn_input_layer_to_tree=True,
        tree_feature_columns=[])

    # Train for a few steps.
    est.train(input_fn=_train_input_fn, steps=1000)
    res = est.evaluate(input_fn=_eval_input_fn, steps=1)
    self.assertLess(0.5, res["auc"])
    est.predict(input_fn=_eval_input_fn)
    def testTrainEvaluateInferDoesNotThrowErrorWithDnnInput(self):
        head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
            loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS)

        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2
        learner_config.constraints.max_tree_depth = 3
        model_dir = tempfile.mkdtemp()
        config = run_config.RunConfig()

        est = estimator.CoreDNNBoostedTreeCombinedEstimator(
            head=head_fn,
            dnn_hidden_units=[1],
            dnn_feature_columns=[core_feature_column.numeric_column("x")],
            tree_learner_config=learner_config,
            num_trees=1,
            tree_examples_per_layer=3,
            model_dir=model_dir,
            config=config,
            dnn_steps_to_train=10,
            dnn_input_layer_to_tree=True,
            tree_feature_columns=[])

        # Train for a few steps.
        est.train(input_fn=_train_input_fn, steps=1000)
        res = est.evaluate(input_fn=_eval_input_fn, steps=1)
        self.assertLess(0.5, res["auc"])
        est.predict(input_fn=_eval_input_fn)
Exemplo n.º 4
0
    def testRankingDontThrowExceptionForForEstimator(self):
        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2
        learner_config.constraints.max_tree_depth = 1
        model_dir = tempfile.mkdtemp()
        config = run_config.RunConfig()

        head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
            loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS)

        est = estimator.CoreGradientBoostedDecisionTreeRanker(
            head=head_fn,
            learner_config=learner_config,
            num_trees=1,
            examples_per_layer=3,
            model_dir=model_dir,
            config=config,
            feature_columns=[
                core_feature_column.numeric_column("f1"),
                core_feature_column.numeric_column("f2")
            ],
            ranking_model_pair_keys=("a", "b"))

        # Train for a few steps.
        est.train(input_fn=_ranking_train_input_fn, steps=1000)
        est.evaluate(input_fn=_ranking_train_input_fn, steps=1)
        est.predict(input_fn=_infer_ranking_train_input_fn)
Exemplo n.º 5
0
  def testRankingDontThrowExceptionForForEstimator(self):
    learner_config = learner_pb2.LearnerConfig()
    learner_config.num_classes = 2
    learner_config.constraints.max_tree_depth = 1
    model_dir = tempfile.mkdtemp()
    config = run_config.RunConfig()

    head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
        loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS)

    est = estimator.CoreGradientBoostedDecisionTreeRanker(
        head=head_fn,
        learner_config=learner_config,
        num_trees=1,
        examples_per_layer=3,
        model_dir=model_dir,
        config=config,
        feature_columns=[
            core_feature_column.numeric_column("f1"),
            core_feature_column.numeric_column("f2")
        ],
        ranking_model_pair_keys=("a", "b"))

    # Train for a few steps.
    est.train(input_fn=_ranking_train_input_fn, steps=1000)
    est.evaluate(input_fn=_ranking_train_input_fn, steps=1)
    est.predict(input_fn=_infer_ranking_train_input_fn)
  def testFitAndEvaluateDontThrowExceptionWithCore(self):
    learner_config = learner_pb2.LearnerConfig()
    learner_config.num_classes = 2
    learner_config.constraints.max_tree_depth = 1
    model_dir = tempfile.mkdtemp()
    config = run_config.RunConfig()

    # Use core head
    head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
        loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE)

    classifier = estimator.DNNBoostedTreeCombinedEstimator(
        head=head_fn,
        dnn_hidden_units=[1],
        # Use core feature columns
        dnn_feature_columns=[core_feature_column.numeric_column("x")],
        tree_learner_config=learner_config,
        num_trees=1,
        tree_examples_per_layer=3,
        model_dir=model_dir,
        config=config,
        dnn_steps_to_train=10,
        dnn_input_layer_to_tree=True,
        tree_feature_columns=[],
        use_core_versions=True)

    classifier.fit(input_fn=_train_input_fn, steps=15)
    classifier.evaluate(input_fn=_eval_input_fn, steps=1)
Exemplo n.º 7
0
    def __init__(self,
                 model_dir=None,
                 n_classes=2,
                 weight_column=None,
                 label_vocabulary=None,
                 optimizer='Ftrl',
                 config=None,
                 loss_reduction=losses.Reduction.SUM):
        """Initializes a BaselineClassifier instance.

    Args:
      model_dir: Directory to save model parameters, graph and etc. This can
        also be used to load checkpoints from the directory into a estimator to
        continue training a previously saved model.
      n_classes: number of label classes. Default is binary classification.
        It must be greater than 1. Note: Class labels are integers representing
        the class index (i.e. values from 0 to n_classes-1). For arbitrary
        label values (e.g. string labels), convert to class indices first.
      weight_column: A string or a `_NumericColumn` created by
        `tf.feature_column.numeric_column` defining feature column representing
         weights. It will be multiplied by the loss of the example.
      label_vocabulary: Optional list of strings with size `[n_classes]`
        defining the label vocabulary. Only supported for `n_classes` > 2.
      optimizer: String, `tf.Optimizer` object, or callable that creates the
        optimizer to use for training. If not specified, will use
        `FtrlOptimizer` with a default learning rate of 0.3.
      config: `RunConfig` object to configure the runtime settings.
      loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how
        to reduce training loss over batch. Defaults to `SUM`.
    Returns:
      A `BaselineClassifier` estimator.

    Raises:
      ValueError: If `n_classes` < 2.
    """
        if n_classes == 2:
            head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(  # pylint: disable=protected-access
                weight_column=weight_column,
                label_vocabulary=label_vocabulary,
                loss_reduction=loss_reduction)
        else:
            head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(  # pylint: disable=protected-access
                n_classes,
                weight_column=weight_column,
                label_vocabulary=label_vocabulary,
                loss_reduction=loss_reduction)

        def _model_fn(features, labels, mode, config):
            return _baseline_model_fn(features=features,
                                      labels=labels,
                                      mode=mode,
                                      head=head,
                                      optimizer=optimizer,
                                      weight_column=weight_column,
                                      config=config)

        super(BaselineClassifier, self).__init__(model_fn=_model_fn,
                                                 model_dir=model_dir,
                                                 config=config)
Exemplo n.º 8
0
  def __init__(self,
               hidden_units,
               feature_columns,
               model_dir=None,
               n_classes=2,
               weight_feature_key=None,
               optimizer='Adagrad',
               activation_fn=nn.relu,
               dropout=None,
               input_layer_partitioner=None,
               config=None):
    """Initializes a `DNNClassifier` instance.

    Args:
      hidden_units: Iterable of number hidden units per layer. All layers are
        fully connected. Ex. `[64, 32]` means first layer has 64 nodes and
        second one has 32.
      feature_columns: An iterable containing all the feature columns used by
        the model. All items in the set should be instances of classes derived
        from `_FeatureColumn`.
      model_dir: Directory to save model parameters, graph and etc. This can
        also be used to load checkpoints from the directory into a estimator to
        continue training a previously saved model.
      n_classes: Number of label classes. Defaults to 2, namely binary
        classification. Must be > 1.
      weight_feature_key: A string defining feature column name representing
        weights. It is used to down weight or boost examples during training. It
        will be multiplied by the loss of the example.
      optimizer: An instance of `tf.Optimizer` used to train the model. If
        `None`, will use an Adagrad optimizer.
      activation_fn: Activation function applied to each layer. If `None`, will
        use `tf.nn.relu`.
      dropout: When not `None`, the probability we will drop out a given
        coordinate.
      input_layer_partitioner: Optional. Partitioner for input layer. Defaults
        to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
      config: `RunConfig` object to configure the runtime settings.
    """
    if n_classes == 2:
      head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(  # pylint: disable=protected-access
          weight_feature_key=weight_feature_key)
    else:
      head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(  # pylint: disable=protected-access
          n_classes, weight_feature_key=weight_feature_key)
    def _model_fn(features, labels, mode, config):
      return _dnn_model_fn(
          features=features,
          labels=labels,
          mode=mode,
          head=head,
          hidden_units=hidden_units,
          feature_columns=tuple(feature_columns or []),
          optimizer=optimizer,
          activation_fn=activation_fn,
          dropout=dropout,
          input_layer_partitioner=input_layer_partitioner,
          config=config)
    super(DNNClassifier, self).__init__(
        model_fn=_model_fn, model_dir=model_dir, config=config)
Exemplo n.º 9
0
  def __init__(self,
               model_dir=None,
               n_classes=2,
               weight_column=None,
               label_vocabulary=None,
               optimizer='Ftrl',
               config=None,
               loss_reduction=losses.Reduction.SUM):
    """Initializes a BaselineClassifier instance.

    Args:
      model_dir: Directory to save model parameters, graph and etc. This can
        also be used to load checkpoints from the directory into a estimator to
        continue training a previously saved model.
      n_classes: number of label classes. Default is binary classification.
        It must be greater than 1. Note: Class labels are integers representing
        the class index (i.e. values from 0 to n_classes-1). For arbitrary
        label values (e.g. string labels), convert to class indices first.
      weight_column: A string or a `_NumericColumn` created by
        `tf.feature_column.numeric_column` defining feature column representing
         weights. It will be multiplied by the loss of the example.
      label_vocabulary: Optional list of strings with size `[n_classes]`
        defining the label vocabulary. Only supported for `n_classes` > 2.
      optimizer: String, `tf.Optimizer` object, or callable that creates the
        optimizer to use for training. If not specified, will use
        `FtrlOptimizer` with a default learning rate of 0.3.
      config: `RunConfig` object to configure the runtime settings.
      loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how
        to reduce training loss over batch. Defaults to `SUM`.
    Returns:
      A `BaselineClassifier` estimator.

    Raises:
      ValueError: If `n_classes` < 2.
    """
    if n_classes == 2:
      head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(  # pylint: disable=protected-access
          weight_column=weight_column,
          label_vocabulary=label_vocabulary,
          loss_reduction=loss_reduction)
    else:
      head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(  # pylint: disable=protected-access
          n_classes, weight_column=weight_column,
          label_vocabulary=label_vocabulary,
          loss_reduction=loss_reduction)
    def _model_fn(features, labels, mode, config):
      return _baseline_model_fn(
          features=features,
          labels=labels,
          mode=mode,
          head=head,
          optimizer=optimizer,
          weight_column=weight_column,
          config=config)
    super(BaselineClassifier, self).__init__(
        model_fn=_model_fn,
        model_dir=model_dir,
        config=config)
Exemplo n.º 10
0
 def build_estimator_spec(self):
     '''Build EstimatorSpec'''
     my_head = head._binary_logistic_head_with_sigmoid_cross_entropy_loss(  # pylint: disable=protected-access
         loss_reduction=losses.Reduction.SUM)
     return my_head.create_estimator_spec(features=self.features,
                                          mode=self.mode,
                                          labels=self.labels,
                                          train_op_fn=self.train_op_fn,
                                          logits=self.logits)
Exemplo n.º 11
0
def binary_classification_head(weight_column=None,
                               thresholds=None,
                               label_vocabulary=None,
                               loss_reduction=losses.Reduction.SUM,
                               name=None):
    """Creates a `_Head` for single label binary classification.

  This head uses `sigmoid_cross_entropy_with_logits` loss.

  The head expects `logits` with shape `[D0, D1, ... DN, 1]`.
  In many applications, the shape is `[batch_size, 1]`.

  `labels` must be a dense `Tensor` with shape matching `logits`, namely
  `[D0, D1, ... DN, 1]`. If `label_vocabulary` given, `labels` must be a string
  `Tensor` with values from the vocabulary. If `label_vocabulary` is not given,
  `labels` must be float `Tensor` with values in the interval `[0, 1]`.

  If `weight_column` is specified, weights must be of shape
  `[D0, D1, ... DN]`, or `[D0, D1, ... DN, 1]`.

  The loss is the weighted sum over the input dimensions. Namely, if the input
  labels have shape `[batch_size, 1]`, the loss is the weighted sum over
  `batch_size`.

  Args:
    weight_column: A string or a `_NumericColumn` created by
      `tf.feature_column.numeric_column` defining feature column representing
      weights. It is used to down weight or boost examples during training. It
      will be multiplied by the loss of the example.
    thresholds: Iterable of floats in the range `(0, 1)`. For binary
      classification metrics such as precision and recall, an eval metric is
      generated for each threshold value. This threshold is applied to the
      logistic values to determine the binary classification (i.e., above the
      threshold is `true`, below is `false`.
    label_vocabulary: A list or tuple of strings representing possible label
      values. If it is not given, labels must be float with values within
      [0, 1]. If given, labels must be string type and have any value in
      `label_vocabulary`. Note that errors will be raised if `label_vocabulary`
      is not provided but labels are strings.
    loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to
      reduce training loss over batch. Defaults to `SUM`.
    name: name of the head. If provided, summary and metrics keys will be
      suffixed by `"/" + name`. Also used as `name_scope` when creating ops.

  Returns:
    An instance of `_Head` for binary classification.

  Raises:
    ValueError: If `thresholds` contains a value outside of `(0, 1)`.
    ValueError: If `loss_reduction` is invalid.
  """
    return head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(  # pylint:disable=protected-access
        weight_column=weight_column,
        thresholds=thresholds,
        label_vocabulary=label_vocabulary,
        loss_reduction=loss_reduction,
        name=name)
Exemplo n.º 12
0
  def testWeightedCategoricalColumn(self):
    head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
        loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS)

    learner_config = learner_pb2.LearnerConfig()
    learner_config.num_classes = 2
    learner_config.constraints.max_tree_depth = 1
    model_dir = tempfile.mkdtemp()
    config = run_config.RunConfig()

    feature_columns = [
        core_feature_column.weighted_categorical_column(
            categorical_column=core_feature_column.
            categorical_column_with_vocabulary_list(
                key="word", vocabulary_list=["the", "cat", "dog"]),
            weight_feature_key="weight")
    ]

    labels = np.array([[1], [1], [0], [0.]], dtype=np.float32)

    def _make_input_fn():

      def _input_fn():
        features_dict = {}
        # Sparse tensor representing
        # example 0: "cat","the"
        # examaple 1: "dog"
        # example 2: -
        # example 3: "the"
        # Weights for the words are 5 - cat, 6- dog and 1 -the.
        features_dict["word"] = sparse_tensor.SparseTensor(
            indices=[[0, 0], [0, 1], [1, 0], [3, 0]],
            values=constant_op.constant(
                ["the", "cat", "dog", "the"], dtype=dtypes.string),
            dense_shape=[4, 3])
        features_dict["weight"] = sparse_tensor.SparseTensor(
            indices=[[0, 0], [0, 1], [1, 0], [3, 0]],
            values=[1., 5., 6., 1.],
            dense_shape=[4, 3])
        return features_dict, labels

      return _input_fn

    est = estimator.CoreGradientBoostedDecisionTreeEstimator(
        head=head_fn,
        learner_config=learner_config,
        num_trees=1,
        examples_per_layer=3,
        model_dir=model_dir,
        config=config,
        feature_columns=feature_columns)

    input_fn = _make_input_fn()
    est.train(input_fn=input_fn, steps=100)
    est.evaluate(input_fn=input_fn, steps=1)
    est.predict(input_fn=input_fn)
Exemplo n.º 13
0
  def testWeightedCategoricalColumn(self):
    head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
        loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS)

    learner_config = learner_pb2.LearnerConfig()
    learner_config.num_classes = 2
    learner_config.constraints.max_tree_depth = 1
    model_dir = tempfile.mkdtemp()
    config = run_config.RunConfig()

    feature_columns = [
        core_feature_column.weighted_categorical_column(
            categorical_column=core_feature_column
            .categorical_column_with_vocabulary_list(
                key="word", vocabulary_list=["the", "cat", "dog"]),
            weight_feature_key="weight")
    ]

    labels = np.array([[1], [1], [0], [0.]], dtype=np.float32)

    def _make_input_fn():

      def _input_fn():
        features_dict = {}
        # Sparse tensor representing
        # example 0: "cat","the"
        # examaple 1: "dog"
        # example 2: -
        # example 3: "the"
        # Weights for the words are 5 - cat, 6- dog and 1 -the.
        features_dict["word"] = sparse_tensor.SparseTensor(
            indices=[[0, 0], [0, 1], [1, 0], [3, 0]],
            values=constant_op.constant(["the", "cat", "dog", "the"],
                                        dtype=dtypes.string),
            dense_shape=[4, 3])
        features_dict["weight"] = sparse_tensor.SparseTensor(
            indices=[[0, 0], [0, 1], [1, 0], [3, 0]],
            values=[1., 5., 6., 1.],
            dense_shape=[4, 3])
        return features_dict, labels

      return _input_fn

    est = estimator.CoreGradientBoostedDecisionTreeEstimator(
        head=head_fn,
        learner_config=learner_config,
        num_trees=1,
        examples_per_layer=3,
        model_dir=model_dir,
        config=config,
        feature_columns=feature_columns)

    input_fn = _make_input_fn()
    est.train(input_fn=input_fn, steps=100)
    est.evaluate(input_fn=input_fn, steps=1)
    est.predict(input_fn=input_fn)
Exemplo n.º 14
0
def binary_classification_head(
    weight_column=None, thresholds=None, label_vocabulary=None,
    loss_reduction=losses.Reduction.SUM, name=None):
  """Creates a `_Head` for single label binary classification.

  This head uses `sigmoid_cross_entropy_with_logits` loss.

  The head expects `logits` with shape `[D0, D1, ... DN, 1]`.
  In many applications, the shape is `[batch_size, 1]`.

  `labels` must be a dense `Tensor` with shape matching `logits`, namely
  `[D0, D1, ... DN, 1]`. If `label_vocabulary` given, `labels` must be a string
  `Tensor` with values from the vocabulary. If `label_vocabulary` is not given,
  `labels` must be float `Tensor` with values in the interval `[0, 1]`.

  If `weight_column` is specified, weights must be of shape
  `[D0, D1, ... DN]`, or `[D0, D1, ... DN, 1]`.

  The loss is the weighted sum over the input dimensions. Namely, if the input
  labels have shape `[batch_size, 1]`, the loss is the weighted sum over
  `batch_size`.

  Args:
    weight_column: A string or a `_NumericColumn` created by
      `tf.feature_column.numeric_column` defining feature column representing
      weights. It is used to down weight or boost examples during training. It
      will be multiplied by the loss of the example.
    thresholds: Iterable of floats in the range `(0, 1)`. For binary
      classification metrics such as precision and recall, an eval metric is
      generated for each threshold value. This threshold is applied to the
      logistic values to determine the binary classification (i.e., above the
      threshold is `true`, below is `false`.
    label_vocabulary: A list or tuple of strings representing possible label
      values. If it is not given, labels must be float with values within
      [0, 1]. If given, labels must be string type and have any value in
      `label_vocabulary`. Note that errors will be raised if `label_vocabulary`
      is not provided but labels are strings.
    loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to
      reduce training loss over batch. Defaults to `SUM`.
    name: name of the head. If provided, summary and metrics keys will be
      suffixed by `"/" + name`. Also used as `name_scope` when creating ops.

  Returns:
    An instance of `_Head` for binary classification.

  Raises:
    ValueError: If `thresholds` contains a value outside of `(0, 1)`.
    ValueError: If `loss_reduction` is invalid.
  """
  return head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(  # pylint:disable=protected-access
      weight_column=weight_column,
      thresholds=thresholds,
      label_vocabulary=label_vocabulary,
      loss_reduction=loss_reduction,
      name=name)
Exemplo n.º 15
0
 def build_estimator_spec(self):
     '''Build EstimatorSpec 返回一个EstimatorSpec'''
     my_head = head._binary_logistic_head_with_sigmoid_cross_entropy_loss(  # pylint: disable=protected-access
         loss_reduction=losses.Reduction.SUM)
     optimizer = tf.train.AdagradOptimizer(
         learning_rate=self.params['LEARNING_RATE'])
     return my_head.create_estimator_spec(features=self.features,
                                          mode=self.mode,
                                          labels=self.labels,
                                          optimizer=optimizer,
                                          logits=self.logits)
Exemplo n.º 16
0
    def __init__(self,
                 feature_columns,
                 model_dir=None,
                 n_classes=2,
                 weight_feature_key=None,
                 optimizer=None,
                 config=None,
                 partitioner=None):
        """Construct a `LinearClassifier` estimator object.

    Args:
      feature_columns: An iterable containing all the feature columns used by
        the model. All items in the set should be instances of classes derived
        from `FeatureColumn`.
      model_dir: Directory to save model parameters, graph and etc. This can
        also be used to load checkpoints from the directory into a estimator
        to continue training a previously saved model.
      n_classes: number of label classes. Default is binary classification.
        Note that class labels are integers representing the class index (i.e.
        values from 0 to n_classes-1). For arbitrary label values (e.g. string
        labels), convert to class indices first.
      weight_feature_key: A string defining feature column name representing
        weights. It is used to down weight or boost examples during training. It
        will be multiplied by the loss of the example.
      optimizer: The optimizer used to train the model. If specified, it should
        be either an instance of `tf.Optimizer` or the SDCAOptimizer. If `None`,
        the Ftrl optimizer will be used.
      config: `RunConfig` object to configure the runtime settings.
      partitioner: Optional. Partitioner for input layer.

    Returns:
      A `LinearClassifier` estimator.

    Raises:
      ValueError: if n_classes < 2.
    """
        if n_classes == 2:
            head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(  # pylint: disable=protected-access
                weight_feature_key=weight_feature_key)
        else:
            head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(  # pylint: disable=protected-access
                n_classes,
                weight_feature_key=weight_feature_key)
        super(LinearClassifier, self).__init__(model_fn=_linear_model_fn,
                                               model_dir=model_dir,
                                               config=config,
                                               params={
                                                   'head': head,
                                                   'feature_columns':
                                                   feature_columns,
                                                   'optimizer': optimizer,
                                                   'partitioner': partitioner,
                                               })
Exemplo n.º 17
0
    def __init__(
        self,
        hidden_units,
        linear_feature_columns,
        dnn_feature_columns,
        fm_feature_columns,
        model_dir=None,
        n_classes=2,
        weight_column=None,
        label_vocabulary=None,
        optimizer='Adagrad',
        activation_fn=nn.relu,
        dropout=None,
        input_layer_partitioner=None,
        config=None,
        warm_start_from=None,
        loss_reduction=losses.Reduction.SUM,
    ):
        if n_classes == 2:
            head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
                weight_column=weight_column,
                label_vocabulary=label_vocabulary,
                loss_reduction=loss_reduction)
        else:
            head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
                n_classes,
                weight_column=weight_column,
                label_vocabulary=label_vocabulary,
                loss_reduction=loss_reduction)

        def _model_fn(features, labels, mode, config):
            """Call the defined shared _dnn_model_fn."""
            return _dfm_model_fn(
                features=features,
                labels=labels,
                mode=mode,
                head=head,
                hidden_units=hidden_units,
                dnn_feature_columns=dnn_feature_columns,
                linear_feature_columns=linear_feature_columns,
                fm_feature_columns=fm_feature_columns,
                optimizer=optimizer,
                activation_fn=activation_fn,
                dropout=dropout,
                input_layer_partitioner=input_layer_partitioner,
                config=config)

        super(DeepFMClassifier, self).__init__(model_fn=_model_fn,
                                               model_dir=model_dir,
                                               config=config,
                                               warm_start_from=warm_start_from)
Exemplo n.º 18
0
  def __init__(self,
               num_workers,
               model_dir=None,
               linear_feature_columns=None,
               dnn_feature_columns=None,
               dnn_hidden_units=None,
               dnn_activation_fn=nn.relu,
               dnn_dropout=None,
               n_classes=2,
               weight_column=None,
               label_vocabulary=None,
               input_layer_partitioner=None,
               config=None):

    linear_feature_columns = linear_feature_columns or []
    dnn_feature_columns = dnn_feature_columns or []
    self._feature_columns = (
        list(linear_feature_columns) + list(dnn_feature_columns))
    if not self._feature_columns:
      raise ValueError('Either linear_feature_columns or dnn_feature_columns '
                       'must be defined.')
    if n_classes == 2:
      head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(  # pylint: disable=protected-access
          weight_column=weight_column,
          label_vocabulary=label_vocabulary)
    else:
      head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(  # pylint: disable=protected-access
          n_classes,
          weight_column=weight_column,
          label_vocabulary=label_vocabulary)

    self.optimizer = self.create_optimizer(num_workers, linear_feature_columns)

    def _model_fn(features, labels, mode, config):
      return _dnn_linear_combined_model_fn(
          features=features,
          labels=labels,
          mode=mode,
          head=head,
          linear_feature_columns=linear_feature_columns,
          dnn_feature_columns=dnn_feature_columns,
          dnn_hidden_units=dnn_hidden_units,
          dnn_activation_fn=dnn_activation_fn,
          dnn_dropout=dnn_dropout,
          input_layer_partitioner=input_layer_partitioner,
          config=config,
          num_workers=num_workers,
          opt=self.optimizer)

    super(DNNLinearCombinedClassifier, self).__init__(
        model_fn=_model_fn, model_dir=model_dir, config=config)
Exemplo n.º 19
0
def _create_classification_head(n_classes,
                                weight_column=None,
                                label_vocabulary=None):
  """Creates a classification head. Refer to canned.head for details on args."""
  # TODO(nponomareva): Support multi-class cases.
  if n_classes == 2:
    # pylint: disable=protected-access
    return head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
        weight_column=weight_column,
        label_vocabulary=label_vocabulary,
        loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE)
    # pylint: enable=protected-access
  else:
    raise ValueError('For now only binary classification is supported.'
                     'n_classes given as {}'.format(n_classes))
Exemplo n.º 20
0
def _create_classification_head(n_classes,
                                weight_column=None,
                                label_vocabulary=None):
  """Creates a classification head. Refer to canned.head for details on args."""
  # TODO(nponomareva): Support multi-class cases.
  if n_classes == 2:
    # pylint: disable=protected-access
    return head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
        weight_column=weight_column,
        label_vocabulary=label_vocabulary,
        loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE)
    # pylint: enable=protected-access
  else:
    raise ValueError('For now only binary classification is supported.'
                     'n_classes given as {}'.format(n_classes))
Exemplo n.º 21
0
  def testTrainEvaluateWithDnnForInputAndTreeForPredict(self):
    head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
        loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS)

    learner_config = learner_pb2.LearnerConfig()
    learner_config.num_classes = 2
    learner_config.constraints.max_tree_depth = 3
    model_dir = tempfile.mkdtemp()
    config = run_config.RunConfig()

    est = estimator.CoreDNNBoostedTreeCombinedEstimator(
        head=head_fn,
        dnn_hidden_units=[1],
        dnn_feature_columns=[core_feature_column.numeric_column("x")],
        tree_learner_config=learner_config,
        num_trees=1,
        tree_examples_per_layer=3,
        model_dir=model_dir,
        config=config,
        dnn_steps_to_train=10,
        dnn_input_layer_to_tree=True,
        predict_with_tree_only=True,
        dnn_to_tree_distillation_param=(0.5, None),
        tree_feature_columns=[])

    # Train for a few steps.
    est.train(input_fn=_train_input_fn, steps=1000)
    res = est.evaluate(input_fn=_eval_input_fn, steps=1)
    self.assertLess(0.5, res["auc"])
    est.predict(input_fn=_eval_input_fn)
    serving_input_fn = (
        export.build_parsing_serving_input_receiver_fn(
            feature_spec={"x": parsing_ops.FixedLenFeature(
                [1], dtype=dtypes.float32)}))
    base_exporter = exporter.FinalExporter(
        name="Servo",
        serving_input_receiver_fn=serving_input_fn,
        assets_extra=None)
    export_path = os.path.join(model_dir, "export")
    base_exporter.export(
        est,
        export_path=export_path,
        checkpoint_path=None,
        eval_result={},
        is_the_final_export=True)
Exemplo n.º 22
0
def binary_classification_head(weight_column=None,
                               thresholds=None,
                               label_vocabulary=None,
                               name=None):
    """Creates a `_Head` for single label binary classification.

  This head uses `sigmoid_cross_entropy_with_logits` loss.

  This head expects to be fed float labels of shape `(batch_size, 1)`.

  Args:
    weight_column: A string or a `_NumericColumn` created by
      `tf.feature_column.numeric_column` defining feature column representing
      weights. It is used to down weight or boost examples during training. It
      will be multiplied by the loss of the example.
    thresholds: Iterable of floats in the range `(0, 1)`. For binary
      classification metrics such as precision and recall, an eval metric is
      generated for each threshold value. This threshold is applied to the
      logistic values to determine the binary classification (i.e., above the
      threshold is `true`, below is `false`.
    label_vocabulary: A list of strings represents possible label values. If it
      is not given, that means labels are already encoded within [0, 1]. If
      given, labels must be string type and have any value in
      `label_vocabulary`. Also there will be errors if vocabulary is not
      provided and labels are string.
    name: name of the head. If provided, summary and metrics keys will be
      suffixed by `"/" + name`. Also used as `name_scope` when creating ops.

  Returns:
    An instance of `_Head` for binary classification.

  Raises:
    ValueError: if `thresholds` contains a value outside of `(0, 1)`.
  """
    return head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(  # pylint:disable=protected-access
        weight_column=weight_column,
        thresholds=thresholds,
        label_vocabulary=label_vocabulary,
        name=name)
Exemplo n.º 23
0
  def testTrainEvaluateInferDoesNotThrowError(self):
    head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
        loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS)

    learner_config = learner_pb2.LearnerConfig()
    learner_config.num_classes = 2
    learner_config.constraints.max_tree_depth = 1
    model_dir = tempfile.mkdtemp()
    config = run_config.RunConfig()

    est = estimator.CoreGradientBoostedDecisionTreeEstimator(
        head=head_fn,
        learner_config=learner_config,
        num_trees=1,
        examples_per_layer=3,
        model_dir=model_dir,
        config=config,
        feature_columns=[core_feature_column.numeric_column("x")])

    # Train for a few steps.
    est.train(input_fn=_train_input_fn, steps=1000)
    est.evaluate(input_fn=_eval_input_fn, steps=1)
    est.predict(input_fn=_eval_input_fn)
Exemplo n.º 24
0
    def testTrainEvaluateInferDoesNotThrowError(self):
        head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
            loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS)

        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2
        learner_config.constraints.max_tree_depth = 1
        model_dir = tempfile.mkdtemp()
        config = run_config.RunConfig()

        est = estimator.CoreGradientBoostedDecisionTreeEstimator(
            head=head_fn,
            learner_config=learner_config,
            num_trees=1,
            examples_per_layer=3,
            model_dir=model_dir,
            config=config,
            feature_columns=[core_feature_column.numeric_column("x")])

        # Train for a few steps.
        est.train(input_fn=_train_input_fn, steps=1000)
        est.evaluate(input_fn=_eval_input_fn, steps=1)
        est.predict(input_fn=_eval_input_fn)
Exemplo n.º 25
0
def binary_classification_head(
    weight_column=None, thresholds=None, label_vocabulary=None, name=None):
  """Creates a `_Head` for single label binary classification.

  This head uses `sigmoid_cross_entropy_with_logits` loss.

  This head expects to be fed float labels of shape `(batch_size, 1)`.

  Args:
    weight_column: A string or a `_NumericColumn` created by
      `tf.feature_column.numeric_column` defining feature column representing
      weights. It is used to down weight or boost examples during training. It
      will be multiplied by the loss of the example.
    thresholds: Iterable of floats in the range `(0, 1)`. For binary
      classification metrics such as precision and recall, an eval metric is
      generated for each threshold value. This threshold is applied to the
      logistic values to determine the binary classification (i.e., above the
      threshold is `true`, below is `false`.
    label_vocabulary: A list of strings represents possible label values. If it
      is not given, that means labels are already encoded within [0, 1]. If
      given, labels must be string type and have any value in
      `label_vocabulary`. Also there will be errors if vocabulary is not
      provided and labels are string.
    name: name of the head. If provided, summary and metrics keys will be
      suffixed by `"/" + name`.

  Returns:
    An instance of `_Head` for binary classification.

  Raises:
    ValueError: if `thresholds` contains a value outside of `(0, 1)`.
  """
  return head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(  # pylint:disable=protected-access
      weight_column=weight_column,
      thresholds=thresholds,
      label_vocabulary=label_vocabulary,
      name=name)
Exemplo n.º 26
0
  def testFitAndEvaluateDontThrowExceptionWithCoreForEstimator(self):
    learner_config = learner_pb2.LearnerConfig()
    learner_config.num_classes = 2
    learner_config.constraints.max_tree_depth = 1
    model_dir = tempfile.mkdtemp()
    config = run_config.RunConfig()

    # Use core head
    head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
        loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE)

    model = estimator.GradientBoostedDecisionTreeEstimator(
        head=head_fn,
        learner_config=learner_config,
        num_trees=1,
        examples_per_layer=3,
        model_dir=model_dir,
        config=config,
        feature_columns=[core_feature_column.numeric_column("x")],
        use_core_libs=True)

    model.fit(input_fn=_train_input_fn, steps=15)
    model.evaluate(input_fn=_eval_input_fn, steps=1)
    model.export(self._export_dir_base)
Exemplo n.º 27
0
    def testFitAndEvaluateDontThrowExceptionWithCoreForEstimator(self):
        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2
        learner_config.constraints.max_tree_depth = 1
        model_dir = tempfile.mkdtemp()
        config = run_config.RunConfig()

        # Use core head
        head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
            loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE)

        model = estimator.GradientBoostedDecisionTreeEstimator(
            head=head_fn,
            learner_config=learner_config,
            num_trees=1,
            examples_per_layer=3,
            model_dir=model_dir,
            config=config,
            feature_columns=[core_feature_column.numeric_column("x")],
            use_core_libs=True)

        model.fit(input_fn=_train_input_fn, steps=15)
        model.evaluate(input_fn=_eval_input_fn, steps=1)
        model.export(self._export_dir_base)
Exemplo n.º 28
0
    def __init__(self,
                 model_dir=None,
                 linear_feature_columns=None,
                 linear_optimizer='Ftrl',
                 dnn_feature_columns=None,
                 dnn_optimizer='Adagrad',
                 dnn_hidden_units=None,
                 dnn_activation_fn=nn.relu,
                 dnn_dropout=None,
                 n_classes=2,
                 weight_column=None,
                 label_vocabulary=None,
                 input_layer_partitioner=None,
                 config=None,
                 warm_start_from=None,
                 loss_reduction=losses.Reduction.SUM):
        """Initializes a DNNLinearCombinedClassifier instance.

    Args:
      model_dir: Directory to save model parameters, graph and etc. This can
        also be used to load checkpoints from the directory into a estimator
        to continue training a previously saved model.
      linear_feature_columns: An iterable containing all the feature columns
        used by linear part of the model. All items in the set must be
        instances of classes derived from `FeatureColumn`.
      linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to
        the linear part of the model. Defaults to FTRL optimizer.
      dnn_feature_columns: An iterable containing all the feature columns used
        by deep part of the model. All items in the set must be instances of
        classes derived from `FeatureColumn`.
      dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to
        the deep part of the model. Defaults to Adagrad optimizer.
      dnn_hidden_units: List of hidden units per layer. All layers are fully
        connected.
      dnn_activation_fn: Activation function applied to each layer. If None,
        will use `tf.nn.relu`.
      dnn_dropout: When not None, the probability we will drop out
        a given coordinate.
      n_classes: Number of label classes. Defaults to 2, namely binary
        classification. Must be > 1.
      weight_column: A string or a `_NumericColumn` created by
        `tf.feature_column.numeric_column` defining feature column representing
        weights. It is used to down weight or boost examples during training. It
        will be multiplied by the loss of the example. If it is a string, it is
        used as a key to fetch weight tensor from the `features`. If it is a
        `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
        then weight_column.normalizer_fn is applied on it to get weight tensor.
      label_vocabulary: A list of strings represents possible label values. If
        given, labels must be string type and have any value in
        `label_vocabulary`. If it is not given, that means labels are
        already encoded as integer or float within [0, 1] for `n_classes=2` and
        encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 .
        Also there will be errors if vocabulary is not provided and labels are
        string.
      input_layer_partitioner: Partitioner for input layer. Defaults to
        `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
      config: RunConfig object to configure the runtime settings.
      warm_start_from: A string filepath to a checkpoint to warm-start from, or
        a `WarmStartSettings` object to fully configure warm-starting.  If the
        string filepath is provided instead of a `WarmStartSettings`, then all
        weights are warm-started, and it is assumed that vocabularies and Tensor
        names are unchanged.
      loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how
        to reduce training loss over batch. Defaults to `SUM`.

    Raises:
      ValueError: If both linear_feature_columns and dnn_features_columns are
        empty at the same time.
    """
        linear_feature_columns = linear_feature_columns or []
        dnn_feature_columns = dnn_feature_columns or []
        self._feature_columns = (list(linear_feature_columns) +
                                 list(dnn_feature_columns))
        if not self._feature_columns:
            raise ValueError(
                'Either linear_feature_columns or dnn_feature_columns '
                'must be defined.')
        if n_classes == 2:
            head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(  # pylint: disable=protected-access
                weight_column=weight_column,
                label_vocabulary=label_vocabulary,
                loss_reduction=loss_reduction)
        else:
            head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(  # pylint: disable=protected-access
                n_classes,
                weight_column=weight_column,
                label_vocabulary=label_vocabulary,
                loss_reduction=loss_reduction)

        def _model_fn(features, labels, mode, config):
            """Call the _dnn_linear_combined_model_fn."""
            return _dnn_linear_combined_model_fn(
                features=features,
                labels=labels,
                mode=mode,
                head=head,
                linear_feature_columns=linear_feature_columns,
                linear_optimizer=linear_optimizer,
                dnn_feature_columns=dnn_feature_columns,
                dnn_optimizer=dnn_optimizer,
                dnn_hidden_units=dnn_hidden_units,
                dnn_activation_fn=dnn_activation_fn,
                dnn_dropout=dnn_dropout,
                input_layer_partitioner=input_layer_partitioner,
                config=config)

        super(DNNLinearCombinedClassifier,
              self).__init__(model_fn=_model_fn,
                             model_dir=model_dir,
                             config=config,
                             warm_start_from=warm_start_from)
Exemplo n.º 29
0
    def __init__(self,
                 model_dir=None,
                 fm_first_feature_columns=None,
                 fm_second_feature_columns=None,
                 linear_optimizer='Ftrl',
                 dnn_feature_columns=None,
                 dnn_optimizer='Adagrad',
                 dnn_hidden_units=None,
                 embedding_size=None,
                 field_size=None,
                 dnn_activation_fn=nn.relu,
                 dnn_dropout=None,
                 n_classes=2,
                 weight_column=None,
                 label_vocabulary=None,
                 input_layer_partitioner=None,
                 config=None,
                 warm_start_from=None,
                 loss_reduction=losses.Reduction.SUM):
        fm_first_feature_columns = fm_first_feature_columns or []
        dnn_feature_columns = dnn_feature_columns or []
        fm_second_feature_columns = fm_second_feature_columns or []
        self._feature_columns = (list(fm_first_feature_columns) +
                                 list(dnn_feature_columns) +
                                 list(fm_second_feature_columns))
        if not self._feature_columns:
            raise ValueError(
                'Either fm_first_feature_columns or fm_second_feature_columns or dnn_feature_columns '
                'must be defined.')
        if n_classes == 2:
            head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
                weight_column=weight_column,
                label_vocabulary=label_vocabulary,
                loss_reduction=loss_reduction)
        else:
            head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
                n_classes,
                weight_column=weight_column,
                label_vocabulary=label_vocabulary,
                loss_reduction=loss_reduction)

        def _model_fn(features, labels, mode, config):
            """Call the _deepfm_model_fn."""
            return _deepfm_model_fn(
                features=features,
                labels=labels,
                mode=mode,
                head=head,
                fm_first_feature_columns=fm_first_feature_columns,
                fm_second_feature_columns=fm_second_feature_columns,
                embedding_size=embedding_size,
                field_size=field_size,
                linear_optimizer=linear_optimizer,
                dnn_feature_columns=dnn_feature_columns,
                dnn_optimizer=dnn_optimizer,
                dnn_hidden_units=dnn_hidden_units,
                dnn_activation_fn=dnn_activation_fn,
                dnn_dropout=dnn_dropout,
                input_layer_partitioner=input_layer_partitioner,
                config=config)

        super(DeepFM, self).__init__(model_fn=_model_fn,
                                     model_dir=model_dir,
                                     config=config,
                                     warm_start_from=warm_start_from)
Exemplo n.º 30
0
  def __init__(self,
               model_dir=None,
               linear_feature_columns=None,
               linear_optimizer='Ftrl',
               dnn_feature_columns=None,
               dnn_optimizer='Adagrad',
               dnn_hidden_units=None,
               dnn_activation_fn=nn.relu,
               dnn_dropout=None,
               n_classes=2,
               input_layer_partitioner=None,
               config=None):
    """Initializes a DNNLinearCombinedClassifier instance.

    Args:
      model_dir: Directory to save model parameters, graph and etc. This can
        also be used to load checkpoints from the directory into a estimator
        to continue training a previously saved model.
      linear_feature_columns: An iterable containing all the feature columns
        used by linear part of the model. All items in the set must be
        instances of classes derived from `FeatureColumn`.
      linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to
        the linear part of the model. Defaults to FTRL optimizer.
      dnn_feature_columns: An iterable containing all the feature columns used
        by deep part of the model. All items in the set must be instances of
        classes derived from `FeatureColumn`.
      dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to
        the deep part of the model. Defaults to Adagrad optimizer.
      dnn_hidden_units: List of hidden units per layer. All layers are fully
        connected.
      dnn_activation_fn: Activation function applied to each layer. If None,
        will use `tf.nn.relu`.
      dnn_dropout: When not None, the probability we will drop out
        a given coordinate.
      n_classes: Number of label classes. Defaults to 2, namely binary
        classification. Must be > 1.
      input_layer_partitioner: Partitioner for input layer. Defaults to
        `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
      config: RunConfig object to configure the runtime settings.

    Raises:
      ValueError: If both linear_feature_columns and dnn_features_columns are
        empty at the same time.
    """
    linear_feature_columns = linear_feature_columns or []
    dnn_feature_columns = dnn_feature_columns or []
    self._feature_columns = linear_feature_columns + dnn_feature_columns
    if not self._feature_columns:
      raise ValueError('Either linear_feature_columns or dnn_feature_columns '
                       'must be defined.')
    if n_classes == 2:
      head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss()  # pylint: disable=protected-access
    else:
      head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(  # pylint: disable=protected-access
          n_classes)

    def _model_fn(features, labels, mode, config):
      return _dnn_linear_combined_model_fn(
          features=features,
          labels=labels,
          mode=mode,
          head=head,
          linear_feature_columns=linear_feature_columns,
          linear_optimizer=linear_optimizer,
          dnn_feature_columns=dnn_feature_columns,
          dnn_optimizer=dnn_optimizer,
          dnn_hidden_units=dnn_hidden_units,
          dnn_activation_fn=dnn_activation_fn,
          dnn_dropout=dnn_dropout,
          input_layer_partitioner=input_layer_partitioner,
          config=config)

    super(DNNLinearCombinedClassifier, self).__init__(
        model_fn=_model_fn, model_dir=model_dir, config=config)
Exemplo n.º 31
0
  def __init__(self,
               model_dir=None,
               linear_feature_columns=None,
               linear_optimizer='Ftrl',
               dnn_feature_columns=None,
               dnn_optimizer='Adagrad',
               dnn_hidden_units=None,
               dnn_activation_fn=nn.relu,
               dnn_dropout=None,
               n_classes=2,
               weight_column=None,
               label_vocabulary=None,
               input_layer_partitioner=None,
               config=None,
               warm_start_from=None,
               loss_reduction=losses.Reduction.SUM):
    """Initializes a DNNLinearCombinedClassifier instance.

    Args:
      model_dir: Directory to save model parameters, graph and etc. This can
        also be used to load checkpoints from the directory into a estimator
        to continue training a previously saved model.
      linear_feature_columns: An iterable containing all the feature columns
        used by linear part of the model. All items in the set must be
        instances of classes derived from `FeatureColumn`.
      linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to
        the linear part of the model. Defaults to FTRL optimizer.
      dnn_feature_columns: An iterable containing all the feature columns used
        by deep part of the model. All items in the set must be instances of
        classes derived from `FeatureColumn`.
      dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to
        the deep part of the model. Defaults to Adagrad optimizer.
      dnn_hidden_units: List of hidden units per layer. All layers are fully
        connected.
      dnn_activation_fn: Activation function applied to each layer. If None,
        will use `tf.nn.relu`.
      dnn_dropout: When not None, the probability we will drop out
        a given coordinate.
      n_classes: Number of label classes. Defaults to 2, namely binary
        classification. Must be > 1.
      weight_column: A string or a `_NumericColumn` created by
        `tf.feature_column.numeric_column` defining feature column representing
        weights. It is used to down weight or boost examples during training. It
        will be multiplied by the loss of the example. If it is a string, it is
        used as a key to fetch weight tensor from the `features`. If it is a
        `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
        then weight_column.normalizer_fn is applied on it to get weight tensor.
      label_vocabulary: A list of strings represents possible label values. If
        given, labels must be string type and have any value in
        `label_vocabulary`. If it is not given, that means labels are
        already encoded as integer or float within [0, 1] for `n_classes=2` and
        encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 .
        Also there will be errors if vocabulary is not provided and labels are
        string.
      input_layer_partitioner: Partitioner for input layer. Defaults to
        `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
      config: RunConfig object to configure the runtime settings.
      warm_start_from: A string filepath to a checkpoint to warm-start from, or
        a `WarmStartSettings` object to fully configure warm-starting.  If the
        string filepath is provided instead of a `WarmStartSettings`, then all
        weights are warm-started, and it is assumed that vocabularies and Tensor
        names are unchanged.
      loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how
        to reduce training loss over batch. Defaults to `SUM`.

    Raises:
      ValueError: If both linear_feature_columns and dnn_features_columns are
        empty at the same time.
    """
    linear_feature_columns = linear_feature_columns or []
    dnn_feature_columns = dnn_feature_columns or []
    self._feature_columns = (
        list(linear_feature_columns) + list(dnn_feature_columns))
    if not self._feature_columns:
      raise ValueError('Either linear_feature_columns or dnn_feature_columns '
                       'must be defined.')
    if n_classes == 2:
      head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(  # pylint: disable=protected-access
          weight_column=weight_column,
          label_vocabulary=label_vocabulary,
          loss_reduction=loss_reduction)
    else:
      head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(  # pylint: disable=protected-access
          n_classes,
          weight_column=weight_column,
          label_vocabulary=label_vocabulary,
          loss_reduction=loss_reduction)

    def _model_fn(features, labels, mode, config):
      """Call the _dnn_linear_combined_model_fn."""
      return _dnn_linear_combined_model_fn(
          features=features,
          labels=labels,
          mode=mode,
          head=head,
          linear_feature_columns=linear_feature_columns,
          linear_optimizer=linear_optimizer,
          dnn_feature_columns=dnn_feature_columns,
          dnn_optimizer=dnn_optimizer,
          dnn_hidden_units=dnn_hidden_units,
          dnn_activation_fn=dnn_activation_fn,
          dnn_dropout=dnn_dropout,
          input_layer_partitioner=input_layer_partitioner,
          config=config)

    super(DNNLinearCombinedClassifier, self).__init__(
        model_fn=_model_fn, model_dir=model_dir, config=config,
        warm_start_from=warm_start_from)
Exemplo n.º 32
0
def wide_and_deep(features=None, params=None):
    ###############
    WIDE_CATE_COLS = params['WIDE_CATE_COLS']
    CONTINUOUS_COLS = params['CONTINUOUS_COLS']
    DEEP_EMBEDDING_COLS = params['DEEP_EMBEDDING_COLS']
    WIDE_CROSS_COLS = params['WIDE_CROSS_COLS']
    DEEP_SHARED_EMBEDDING_COLS = params['DEEP_SHARED_EMBEDDING_COLS']
    _HIDDEN_UNITS = params['_HIDDEN_UNITS']
    _LINEAR_LEARNING_RATE = params['_LINEAR_LEARNING_RATE']
    _DNN_LEARNING_RATE = params['_DNN_LEARNING_RATE']

    wide_logits = None
    linear_absolute_scope = None
    if params['WIDE']:
        wide_sum = []
        with variable_scope.variable_scope(
                'linear', values=tuple(six.itervalues(features))) as scope:
            linear_absolute_scope = scope.name
            for col, size in WIDE_CATE_COLS:
                w_wide = tf.get_variable(
                    shape=[size, 1],
                    initializer=init_ops.zeros_initializer,
                    trainable=True,
                    name="Wide_Part_Weights_Cate" + col)
                indices = string_ops.string_to_hash_bucket_fast(
                    features[col], size, name="wide_hash_" + col)
                wide_sum.append(
                    tf.nn.embedding_lookup(w_wide,
                                           indices,
                                           name="wide_cat_lookup_" + col))
            # for col, size in WIDE_BUCKET_COLS:
            #     w_wide = tf.get_variable(shape=[size, 1], initializer=init_ops.zeros_initializer, trainable=True,
            #                              name="Wide_Part_Weights_Bucket" + col)
            #     indices = string_ops.string_to_hash_bucket_fast(
            #         features[col], size, name="wide_hash_" + col)
            #     wide_sum.append(tf.nn.embedding_lookup(w_wide, indices, name="wide_bucket_lookup_" + col))
            for col1, col2, size in WIDE_CROSS_COLS:
                w_wide = tf.get_variable(
                    shape=[size, 1],
                    initializer=init_ops.zeros_initializer,
                    trainable=True,
                    name="Wide_Part_Weights_Cross" + col1 + '_' + col2)
                # cross_input = tf.as_string(tf.string_to_number(features[col1],_dtypes.int64)*tf.string_to_number(features[col2],_dtypes.int64))
                cross_input = tf.string_join([features[col1], features[col2]],
                                             separator="_")
                indices = string_ops.string_to_hash_bucket_fast(
                    cross_input, size, name="wide_hash_" + col1 + '_' + col2)
                wide_sum.append(
                    tf.nn.embedding_lookup(w_wide,
                                           indices,
                                           name="wide_cross_lookup_" + col1 +
                                           '_' + col2))

            w_wide = tf.get_variable(shape=[len(CONTINUOUS_COLS), 1],
                                     initializer=init_ops.zeros_initializer,
                                     trainable=True,
                                     name="Wide_Part_Weights_Continus")
            bias = tf.get_variable(shape=[1],
                                   initializer=init_ops.zeros_initializer,
                                   trainable=True,
                                   name="Wide_Part_Bias")
            x = tf.concat([
                tf.expand_dims(tf.to_float(features[col]), -1)
                for col in CONTINUOUS_COLS
            ],
                          1,
                          name='continus_concat')
            continue_logits = tf.matmul(x, w_wide) + bias

            wide_logits = tf.reduce_sum(wide_sum, 0)
            wide_logits += continue_logits
    ##################
    deep_logits = None
    dnn_absolute_scope = None
    if params['DEEP']:
        # with tf.variable_scope('Deep_model'):
        with variable_scope.variable_scope(
                'Deep_model',
                values=tuple(six.itervalues(features)),
        ) as scope:
            dnn_absolute_scope = scope.name
            # Convert categorical (string) values to embeddings
            deep_sum = []
            for col, vals, embedding_size, col_type in DEEP_EMBEDDING_COLS:
                bucket_size = vals if isinstance(vals, int) else len(vals)
                # embed_initializer = tf.truncated_normal_initializer(
                #     stddev=(1.0 / tf.sqrt(float(embedding_size))))
                embeddings = tf.get_variable(
                    shape=[bucket_size, embedding_size],
                    initializer=init_ops.glorot_uniform_initializer(),
                    name="deep_embedding_" + col)

                if col_type != 'int':
                    indices = string_ops.string_to_hash_bucket_fast(
                        features[col], bucket_size, name="deep_hash_" + col)
                else:
                    table = tf.contrib.lookup.index_table_from_tensor(vals)
                    indices = table.lookup(features[col])
                seq_emb = tf.nn.embedding_lookup(embeddings,
                                                 indices,
                                                 name="deep_lookup_" + col)
                if col_type == 'seq':
                    print("test my seq:", col)
                    seq_emb = tf.reduce_mean(seq_emb, 1)
                deep_sum.append(seq_emb)
            for cols, vals, embedding_size, col_type, shared_flag in DEEP_SHARED_EMBEDDING_COLS:

                def get_indices(col, embedding_size, bucket_size):
                    if col_type != 'int':
                        indices = string_ops.string_to_hash_bucket_fast(
                            features[col],
                            bucket_size,
                            name="deep_shared_hash_" + col + str(shared_flag))
                    else:
                        table = tf.contrib.lookup.index_table_from_tensor(
                            embedding_size)
                        indices = table.lookup(features[col])
                    return indices

                bucket_size = vals if isinstance(vals, int) else len(vals)
                embeddings = tf.get_variable(
                    shape=[bucket_size, embedding_size],
                    initializer=init_ops.glorot_uniform_initializer(),
                    name="deep_shared_embedding_" + '_'.join(c for c in cols) +
                    str(shared_flag))
                for col in cols:
                    indices = get_indices(col, embedding_size, bucket_size)
                    seq_emb = tf.nn.embedding_lookup(
                        embeddings,
                        indices,
                        name="deep_shared_lookup_" + col + str(shared_flag))
                    if col.endswith('seq'):
                        seq_emb = tf.reduce_mean(seq_emb, 1)
                    deep_sum.append(seq_emb)
            for col in CONTINUOUS_COLS:
                deep_sum.append(
                    tf.expand_dims(tf.to_float(features[col]),
                                   -1,
                                   name='continuous_' + col))
            curr_layer = tf.concat(deep_sum, 1, name="deep_inputs_layer")

            # Build the DNN

            for index, layer_size in enumerate(_HIDDEN_UNITS):
                curr_layer = tf.layers.dense(
                    curr_layer,
                    layer_size,
                    activation=tf.nn.relu,
                    kernel_initializer=init_ops.glorot_uniform_initializer(),
                    name="deep_hidden_layer" + str(index))
            deep_logits = tf.layers.dense(curr_layer,
                                          units=1,
                                          name="deep_logits")
    ####################################

    my_head = head._binary_logistic_head_with_sigmoid_cross_entropy_loss(  # pylint: disable=protected-access
        loss_reduction=losses.Reduction.SUM)
    print(my_head.logits_dimension)

    if deep_logits is not None and wide_logits is not None:
        logits = deep_logits + wide_logits
    elif deep_logits is not None:
        logits = deep_logits
    else:
        logits = wide_logits

    dnn_optimizer = optimizers.get_optimizer_instance(
        'Adagrad', learning_rate=_DNN_LEARNING_RATE)

    def _linear_learning_rate(num_linear_feature_columns):
        default_learning_rate = 1. / math.sqrt(num_linear_feature_columns)
        return min(_LINEAR_LEARNING_RATE, default_learning_rate)

    linear_optimizer = optimizers.get_optimizer_instance(
        'Ftrl', learning_rate=_linear_learning_rate(len(WIDE_CATE_COLS)))

    def _train_op_fn(loss):
        train_ops = []
        global_step = training_util.get_global_step()
        if deep_logits is not None:
            train_ops.append(
                dnn_optimizer.minimize(loss,
                                       var_list=ops.get_collection(
                                           ops.GraphKeys.TRAINABLE_VARIABLES,
                                           scope=dnn_absolute_scope)))
        if wide_logits is not None:
            train_ops.append(
                linear_optimizer.minimize(
                    loss,
                    var_list=ops.get_collection(
                        ops.GraphKeys.TRAINABLE_VARIABLES,
                        scope=linear_absolute_scope)))

        train_op = control_flow_ops.group(*train_ops)
        with ops.control_dependencies([train_op]):
            return state_ops.assign_add(global_step, 1).op

    return my_head, logits, _train_op_fn
Exemplo n.º 33
0
    def __init__(self,
                 model_dir=None,
                 linear_feature_columns=None,
                 linear_optimizer='Ftrl',
                 dnn_feature_columns=None,
                 dnn_optimizer='Adagrad',
                 dnn_hidden_units=None,
                 dnn_activation_fn=nn.relu,
                 dnn_dropout=None,
                 n_classes=2,
                 input_layer_partitioner=None,
                 config=None):
        """Initializes a DNNLinearCombinedClassifier instance.

    Args:
      model_dir: Directory to save model parameters, graph and etc. This can
        also be used to load checkpoints from the directory into a estimator
        to continue training a previously saved model.
      linear_feature_columns: An iterable containing all the feature columns
        used by linear part of the model. All items in the set must be
        instances of classes derived from `FeatureColumn`.
      linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to
        the linear part of the model. Defaults to FTRL optimizer.
      dnn_feature_columns: An iterable containing all the feature columns used
        by deep part of the model. All items in the set must be instances of
        classes derived from `FeatureColumn`.
      dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to
        the deep part of the model. Defaults to Adagrad optimizer.
      dnn_hidden_units: List of hidden units per layer. All layers are fully
        connected.
      dnn_activation_fn: Activation function applied to each layer. If None,
        will use `tf.nn.relu`.
      dnn_dropout: When not None, the probability we will drop out
        a given coordinate.
      n_classes: Number of label classes. Defaults to 2, namely binary
        classification. Must be > 1.
      input_layer_partitioner: Partitioner for input layer. Defaults to
        `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
      config: RunConfig object to configure the runtime settings.

    Raises:
      ValueError: If both linear_feature_columns and dnn_features_columns are
        empty at the same time.
    """
        linear_feature_columns = linear_feature_columns or []
        dnn_feature_columns = dnn_feature_columns or []
        self._feature_columns = linear_feature_columns + dnn_feature_columns
        if not self._feature_columns:
            raise ValueError(
                'Either linear_feature_columns or dnn_feature_columns '
                'must be defined.')
        if n_classes == 2:
            head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
            )  # pylint: disable=protected-access
        else:
            head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(  # pylint: disable=protected-access
                n_classes)

        def _model_fn(features, labels, mode, config):
            return _dnn_linear_combined_model_fn(
                features=features,
                labels=labels,
                mode=mode,
                head=head,
                linear_feature_columns=linear_feature_columns,
                linear_optimizer=linear_optimizer,
                dnn_feature_columns=dnn_feature_columns,
                dnn_optimizer=dnn_optimizer,
                dnn_hidden_units=dnn_hidden_units,
                dnn_activation_fn=dnn_activation_fn,
                dnn_dropout=dnn_dropout,
                input_layer_partitioner=input_layer_partitioner,
                config=config)

        super(DNNLinearCombinedClassifier, self).__init__(model_fn=_model_fn,
                                                          model_dir=model_dir,
                                                          config=config)
Exemplo n.º 34
0
def binary_classification_head(
        weight_column=None,
        thresholds=None,
        label_vocabulary=None,
        loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE,
        loss_fn=None,
        name=None):
    """Creates a `_Head` for single label binary classification.

  This head uses `sigmoid_cross_entropy_with_logits` loss.

  The head expects `logits` with shape `[D0, D1, ... DN, 1]`.
  In many applications, the shape is `[batch_size, 1]`.

  `labels` must be a dense `Tensor` with shape matching `logits`, namely
  `[D0, D1, ... DN, 1]`. If `label_vocabulary` given, `labels` must be a string
  `Tensor` with values from the vocabulary. If `label_vocabulary` is not given,
  `labels` must be float `Tensor` with values in the interval `[0, 1]`.

  If `weight_column` is specified, weights must be of shape
  `[D0, D1, ... DN]`, or `[D0, D1, ... DN, 1]`.

  The loss is the weighted sum over the input dimensions. Namely, if the input
  labels have shape `[batch_size, 1]`, the loss is the weighted sum over
  `batch_size`.

  Also supports custom `loss_fn`. `loss_fn` takes `(labels, logits)` or
  `(labels, logits, features)` as arguments and returns unreduced loss with
  shape `[D0, D1, ... DN, 1]`. `loss_fn` must support float `labels` with
  shape `[D0, D1, ... DN, 1]`. Namely, the head applies `label_vocabulary` to
  the input labels before passing them to `loss_fn`.

  The head can be used with a canned estimator. Example:

  ```python
  my_head = tf.contrib.estimator.binary_classification_head()
  my_estimator = tf.contrib.estimator.DNNEstimator(
      head=my_head,
      hidden_units=...,
      feature_columns=...)
  ```

  It can also be used with a custom `model_fn`. Example:

  ```python
  def _my_model_fn(features, labels, mode):
    my_head = tf.contrib.estimator.binary_classification_head()
    logits = tf.keras.Model(...)(features)

    return my_head.create_estimator_spec(
        features=features,
        mode=mode,
        labels=labels,
        optimizer=tf.AdagradOptimizer(learning_rate=0.1),
        logits=logits)

  my_estimator = tf.estimator.Estimator(model_fn=_my_model_fn)
  ```

  Args:
    weight_column: A string or a `_NumericColumn` created by
      `tf.feature_column.numeric_column` defining feature column representing
      weights. It is used to down weight or boost examples during training. It
      will be multiplied by the loss of the example.
    thresholds: Iterable of floats in the range `(0, 1)`. For binary
      classification metrics such as precision and recall, an eval metric is
      generated for each threshold value. This threshold is applied to the
      logistic values to determine the binary classification (i.e., above the
      threshold is `true`, below is `false`.
    label_vocabulary: A list or tuple of strings representing possible label
      values. If it is not given, labels must be float with values within
      [0, 1]. If given, labels must be string type and have any value in
      `label_vocabulary`. Note that errors will be raised if `label_vocabulary`
      is not provided but labels are strings.
    loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to
      reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`, namely
      weighted sum of losses divided by batch size. See `tf.losses.Reduction`.
    loss_fn: Optional loss function.
    name: name of the head. If provided, summary and metrics keys will be
      suffixed by `"/" + name`. Also used as `name_scope` when creating ops.

  Returns:
    An instance of `_Head` for binary classification.

  Raises:
    ValueError: If `thresholds` contains a value outside of `(0, 1)`.
    ValueError: If `loss_reduction` is invalid.
  """
    return head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(  # pylint:disable=protected-access
        weight_column=weight_column,
        thresholds=thresholds,
        label_vocabulary=label_vocabulary,
        loss_reduction=loss_reduction,
        loss_fn=loss_fn,
        name=name)
Exemplo n.º 35
0
    def __init__(self,
                 sequence_feature_columns,
                 context_feature_columns=None,
                 num_units=None,
                 cell_type=USE_DEFAULT,
                 rnn_cell_fn=None,
                 model_dir=None,
                 n_classes=2,
                 weight_column=None,
                 label_vocabulary=None,
                 optimizer='Adagrad',
                 loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE,
                 input_layer_partitioner=None,
                 config=None):
        """Initializes a `RNNClassifier` instance.

    Args:
      sequence_feature_columns: An iterable containing the `FeatureColumn`s
        that represent sequential input. All items in the set should either be
        sequence columns (e.g. `sequence_numeric_column`) or constructed from
        one (e.g. `embedding_column` with `sequence_categorical_column_*` as
        input).
      context_feature_columns: An iterable containing the `FeatureColumn`s
        for contextual input. The data represented by these columns will be
        replicated and given to the RNN at each timestep. These columns must be
        instances of classes derived from `_DenseColumn` such as
        `numeric_column`, not the sequential variants.
      num_units: Iterable of integer number of hidden units per RNN layer. If
        set, `cell_type` must also be specified and `rnn_cell_fn` must be
        `None`.
      cell_type: A subclass of `tf.nn.rnn_cell.RNNCell` or a string specifying
        the cell type. Supported strings are: `'basic_rnn'`, `'lstm'`, and
        `'gru'`. If set, `num_units` must also be specified and `rnn_cell_fn`
        must be `None`.
      rnn_cell_fn: A function with one argument, a `tf.estimator.ModeKeys`, and
        returns an object of type `tf.nn.rnn_cell.RNNCell` that will be used to
        construct the RNN. If set, `num_units` and `cell_type` cannot be set.
        This is for advanced users who need additional customization beyond
        `num_units` and `cell_type`. Note that `tf.nn.rnn_cell.MultiRNNCell` is
        needed for stacked RNNs.
      model_dir: Directory to save model parameters, graph and etc. This can
        also be used to load checkpoints from the directory into a estimator to
        continue training a previously saved model.
      n_classes: Number of label classes. Defaults to 2, namely binary
        classification. Must be > 1.
      weight_column: A string or a `_NumericColumn` created by
        `tf.feature_column.numeric_column` defining feature column representing
        weights. It is used to down weight or boost examples during training. It
        will be multiplied by the loss of the example. If it is a string, it is
        used as a key to fetch weight tensor from the `features`. If it is a
        `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
        then weight_column.normalizer_fn is applied on it to get weight tensor.
      label_vocabulary: A list of strings represents possible label values. If
        given, labels must be string type and have any value in
        `label_vocabulary`. If it is not given, that means labels are
        already encoded as integer or float within [0, 1] for `n_classes=2` and
        encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 .
        Also there will be errors if vocabulary is not provided and labels are
        string.
      optimizer: An instance of `tf.Optimizer` or string specifying optimizer
        type. Defaults to Adagrad optimizer.
      loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how
        to reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`.
      input_layer_partitioner: Optional. Partitioner for input layer. Defaults
        to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
      config: `RunConfig` object to configure the runtime settings.

    Raises:
      ValueError: If `num_units`, `cell_type`, and `rnn_cell_fn` are not
        compatible.
    """
        rnn_cell_fn = _assert_rnn_cell_fn(rnn_cell_fn, num_units, cell_type)

        if n_classes == 2:
            head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(  # pylint: disable=protected-access
                weight_column=weight_column,
                label_vocabulary=label_vocabulary,
                loss_reduction=loss_reduction)
        else:
            head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(  # pylint: disable=protected-access
                n_classes,
                weight_column=weight_column,
                label_vocabulary=label_vocabulary,
                loss_reduction=loss_reduction)

        def _model_fn(features, labels, mode, config):
            return _rnn_model_fn(
                features=features,
                labels=labels,
                mode=mode,
                head=head,
                rnn_cell_fn=rnn_cell_fn,
                sequence_feature_columns=tuple(sequence_feature_columns or []),
                context_feature_columns=tuple(context_feature_columns or []),
                optimizer=optimizer,
                input_layer_partitioner=input_layer_partitioner,
                config=config)

        super(RNNClassifier, self).__init__(model_fn=_model_fn,
                                            model_dir=model_dir,
                                            config=config)
Exemplo n.º 36
0
    def __init__(self,
                 model_dir=None,
                 linear_feature_columns=None,
                 linear_optimizer='Ftrl',
                 linear_sparse_combiner='sum',
                 dnn_feature_columns=None,
                 dnn_optimizer='Adagrad',
                 dnn_hidden_units=None,
                 dnn_activation_fn=nn.relu,
                 dnn_dropout=None,
                 fm_embedding_size=None,
                 n_classes=2,
                 weight_column=None,
                 label_vocabulary=None,
                 input_layer_partitioner=None,
                 config=None,
                 warm_start_from=None,
                 loss_reduction=tf.losses.Reduction.SUM,
                 batch_norm=False):
        """
        仿照wide_deep官方模型写法的deepFM模型,使用feature_column作特征处理。
        * 不用将数据集转换为libsvm格式.
        * 支持multi-hot类型特征,例如用户的历史消费物品序列.
        * 暂不支持数值型特征的交叉.
        【特征说明】
        参考train.py.
        1. 原始特征分为两种:
        * 数值特征(dense):没啥好说的
        * 离散特征(sparse):id类特征(vocabulary size特别大,使用特征hash),非id类特征(vocabulary size在几个到几百个之间,直接one-hot编码)
        特别的,当多个不定长离散特征组成一个序列,直接编码就是multi-hot特征,非常常见。
        2. 简要叙述官方wide_deep
        原生的wide部分由于其实现方式,直接支持sparse tensor的输入;
        而dnn部分只支持dense tensor输入。
        这里直接copy官方写法,也分为两部分输入。
        【DeepFM简要说明】
        * 按理说,deepFM的一阶项和二阶项应该都是相同的输入,分为两部分是仿照wide_deep写法,能够直接复用linear model代码。
        * 数值特征目前还不支持。
        * 将离散特征直接传给linear_feature_columns,作为一阶项的输入。
        * 将离散特征通过embedding变为dense tensor后传给dnn_feature_columns,作为二阶项和dnn的输入。
        * 实质上,一阶项和二阶项的输入是相同的。
        * 为了稍微利用数值特征,将数值特征分桶后用在了linear部分,而二阶项和dnn部分目前没有使用数值特征(todo)
        """

        linear_feature_columns = linear_feature_columns or []
        dnn_feature_columns = dnn_feature_columns or []
        self._feature_columns = (list(linear_feature_columns) +
                                 list(dnn_feature_columns))
        if not self._feature_columns:
            raise ValueError('empty columns.')

        if n_classes == 2:
            head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(  # pylint: disable=protected-access
                weight_column=weight_column,
                label_vocabulary=label_vocabulary,
                loss_reduction=loss_reduction)
        else:
            head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(  # pylint: disable=protected-access
                n_classes,
                weight_column=weight_column,
                label_vocabulary=label_vocabulary,
                loss_reduction=loss_reduction)

        def _model_fn(features, labels, mode, config):
            return _DeepFM_model_fn(
                features=features,
                labels=labels,
                mode=mode,
                head=head,
                linear_feature_columns=linear_feature_columns,
                linear_optimizer=linear_optimizer,
                linear_sparse_combiner=linear_sparse_combiner,
                dnn_feature_columns=dnn_feature_columns,
                dnn_optimizer=dnn_optimizer,
                dnn_hidden_units=dnn_hidden_units,
                dnn_activation_fn=dnn_activation_fn,
                dnn_dropout=dnn_dropout,
                fm_embedding_size=fm_embedding_size,
                input_layer_partitioner=input_layer_partitioner,
                config=config,
                batch_norm=batch_norm)

        super(DeepFM, self).__init__(model_fn=_model_fn,
                                     model_dir=model_dir,
                                     config=config,
                                     warm_start_from=warm_start_from)
Exemplo n.º 37
0
def binary_classification_head(
    weight_column=None,
    thresholds=None,
    label_vocabulary=None,
    loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE,
    loss_fn=None,
    name=None):
  """Creates a `_Head` for single label binary classification.

  This head uses `sigmoid_cross_entropy_with_logits` loss.

  The head expects `logits` with shape `[D0, D1, ... DN, 1]`.
  In many applications, the shape is `[batch_size, 1]`.

  `labels` must be a dense `Tensor` with shape matching `logits`, namely
  `[D0, D1, ... DN, 1]`. If `label_vocabulary` given, `labels` must be a string
  `Tensor` with values from the vocabulary. If `label_vocabulary` is not given,
  `labels` must be float `Tensor` with values in the interval `[0, 1]`.

  If `weight_column` is specified, weights must be of shape
  `[D0, D1, ... DN]`, or `[D0, D1, ... DN, 1]`.

  The loss is the weighted sum over the input dimensions. Namely, if the input
  labels have shape `[batch_size, 1]`, the loss is the weighted sum over
  `batch_size`.

  Also supports custom `loss_fn`. `loss_fn` takes `(labels, logits)` or
  `(labels, logits, features)` as arguments and returns unreduced loss with
  shape `[D0, D1, ... DN, 1]`. `loss_fn` must support float `labels` with
  shape `[D0, D1, ... DN, 1]`. Namely, the head applies `label_vocabulary` to
  the input labels before passing them to `loss_fn`.

  The head can be used with a canned estimator. Example:

  ```python
  my_head = tf.contrib.estimator.binary_classification_head()
  my_estimator = tf.contrib.estimator.DNNEstimator(
      head=my_head,
      hidden_units=...,
      feature_columns=...)
  ```

  It can also be used with a custom `model_fn`. Example:

  ```python
  def _my_model_fn(features, labels, mode):
    my_head = tf.contrib.estimator.binary_classification_head()
    logits = tf.keras.Model(...)(features)

    return my_head.create_estimator_spec(
        features=features,
        mode=mode,
        labels=labels,
        optimizer=tf.AdagradOptimizer(learning_rate=0.1),
        logits=logits)

  my_estimator = tf.estimator.Estimator(model_fn=_my_model_fn)
  ```

  Args:
    weight_column: A string or a `_NumericColumn` created by
      `tf.feature_column.numeric_column` defining feature column representing
      weights. It is used to down weight or boost examples during training. It
      will be multiplied by the loss of the example.
    thresholds: Iterable of floats in the range `(0, 1)`. For binary
      classification metrics such as precision and recall, an eval metric is
      generated for each threshold value. This threshold is applied to the
      logistic values to determine the binary classification (i.e., above the
      threshold is `true`, below is `false`.
    label_vocabulary: A list or tuple of strings representing possible label
      values. If it is not given, labels must be float with values within
      [0, 1]. If given, labels must be string type and have any value in
      `label_vocabulary`. Note that errors will be raised if `label_vocabulary`
      is not provided but labels are strings.
    loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to
      reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`, namely
      weighted sum of losses divided by batch size. See `tf.losses.Reduction`.
    loss_fn: Optional loss function.
    name: name of the head. If provided, summary and metrics keys will be
      suffixed by `"/" + name`. Also used as `name_scope` when creating ops.

  Returns:
    An instance of `_Head` for binary classification.

  Raises:
    ValueError: If `thresholds` contains a value outside of `(0, 1)`.
    ValueError: If `loss_reduction` is invalid.
  """
  return head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(  # pylint:disable=protected-access
      weight_column=weight_column,
      thresholds=thresholds,
      label_vocabulary=label_vocabulary,
      loss_reduction=loss_reduction,
      loss_fn=loss_fn,
      name=name)
    def __init__(self,
                 model_collections,
                 feature_columns,
                 model_dir=None,
                 n_classes=2,
                 weight_column=None,
                 label_vocabulary=None,
                 optimizer='Adagrad',
                 input_layer_partitioner=None,
                 config=None):
        """Initializes a `DNNClassifier` instance.
            Args:
               feature_columns: An iterable containing all the feature columns used by
                 the model. All items in the set should be instances of classes derived
                 from `_FeatureColumn`.
               model_dir: Directory to save model parameters, graph and etc. This can
                 also be used to load checkpoints from the directory into a estimator to
                 continue training a previously saved model.
               n_classes: Number of label classes. Defaults to 2, namely binary
                 classification. Must be > 1.
               weight_column: A string or a `_NumericColumn` created by
                 `tf.feature_column.numeric_column` defining feature column representing
                 weights. It is used to down weight or boost examples during training. It
                 will be multiplied by the loss of the example. If it is a string, it is
                 used as a key to fetch weight tensor from the `features`. If it is a
                 `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
                 then weight_column.normalizer_fn is applied on it to get weight tensor.
               label_vocabulary: A list of strings represents possible label values. If
                 given, labels must be string type and have any value in
                 `label_vocabulary`. If it is not given, that means labels are
                 already encoded as integer or float within [0, 1] for `n_classes=2` and
                 encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 .
                 Also there will be errors if vocabulary is not provided and labels are
                 string.
               optimizer: An instance of `tf.Optimizer` used to train the model. Defaults
                 to Adagrad optimizer.
               activation_fn: Activation function applied to each layer. If `None`, will
                 use `tf.nn.relu`.
               dropout: When not `None`, the probability we will drop out a given
                 coordinate.
               input_layer_partitioner: Optional. Partitioner for input layer. Defaults
                 to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
               config: `RunConfig` object to configure the runtime settings.
        """
        if not model_collections:
            raise ValueError(
                'Empty model collections, must fill DNN model instance.')
        assert isinstance(
            model_collections,
            (list, tuple)), "model_collections must be a list or tuple"
        for model in model_collections:
            if not isinstance(model, DNN):
                raise ValueError(
                    "model_collections element must be an instance of class DNN"
                )
        if n_classes == 2:
            head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(  # pylint: disable=protected-access
                weight_column=weight_column,
                label_vocabulary=label_vocabulary)
        else:
            head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(  # pylint: disable=protected-access
                n_classes,
                weight_column=weight_column,
                label_vocabulary=label_vocabulary)

        def _dnn_model_fn(features,
                          labels,
                          mode,
                          head,
                          optimizer='Adagrad',
                          input_layer_partitioner=None,
                          config=None):
            """Deep Neural Net model_fn.
            Args:
              features: dict of `Tensor`.
              labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of
                dtype `int32` or `int64` in the range `[0, n_classes)`.
              mode: Defines whether this is training, evaluation or prediction.
                See `ModeKeys`.
              head: A `head_lib._Head` instance.
              hidden_units: Iterable of integer number of hidden units per layer.
              feature_columns: Iterable of `feature_column._FeatureColumn` model inputs.
              optimizer: String, `tf.Optimizer` object, or callable that creates the
                optimizer to use for training. If not specified, will use the Adagrad
                optimizer with a default learning rate of 0.05.
              activation_fn: Activation function applied to each layer.
              dropout: When not `None`, the probability we will drop out a given
                coordinate.
              input_layer_partitioner: Partitioner for input layer. Defaults
                to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
              config: `RunConfig` object to configure the runtime settings.
            Returns:
              predictions: A dict of `Tensor` objects.
              loss: A scalar containing the loss of the step.
              train_op: The op for training.
            Raises:
              ValueError: If features has the wrong type.
            """
            if not isinstance(features, dict):
                raise ValueError(
                    'features should be a dictionary of `Tensor`s. '
                    'Given type: {}'.format(type(features)))
            optimizer = get_optimizer_instance(optimizer, learning_rate=0.05)
            num_ps_replicas = config.num_ps_replicas if config else 0

            partitioner = tf.compat.v1.min_max_variable_partitioner(
                max_partitions=num_ps_replicas)
            with tf.compat.v1.variable_scope('dnn',
                                             values=tuple(
                                                 six.itervalues(features)),
                                             partitioner=partitioner):
                input_layer_partitioner = input_layer_partitioner or (
                    tf.compat.v1.min_max_variable_partitioner(
                        max_partitions=num_ps_replicas,
                        min_slice_size=64 << 20))
                # unit is num_classes, shape(batch_size, num_classes)
                logits = []
                for idx, m in enumerate(model_collections):
                    logits.append(
                        _dnn_logit_fn(features, mode, idx + 1,
                                      head.logits_dimension, m.hidden_units,
                                      m.connected_layers, feature_columns,
                                      input_layer_partitioner))
                logits = tf.add_n(
                    logits
                )  # add logit layer is same with concactenate the layer before logit layer

                def _train_op_fn(loss):
                    """Returns the op to optimize the loss."""
                    return optimizer.minimize(
                        loss, global_step=tf.compat.v1.train.get_global_step())

            return head.create_estimator_spec(features=features,
                                              mode=mode,
                                              labels=labels,
                                              train_op_fn=_train_op_fn,
                                              logits=logits)

        def _model_fn(features, labels, mode, config):
            return _dnn_model_fn(
                features=features,
                labels=labels,
                mode=mode,
                head=head,
                optimizer=optimizer,
                input_layer_partitioner=input_layer_partitioner,
                config=config)

        super(MultiDNNClassifier, self).__init__(model_fn=_model_fn,
                                                 model_dir=model_dir,
                                                 config=config)
Exemplo n.º 39
0
  def __init__(self,
               feature_columns,
               model_dir=None,
               n_classes=2,
               weight_column=None,
               label_vocabulary=None,
               optimizer='Ftrl',
               config=None,
               partitioner=None,
               warm_start_from=None):
    """Construct a `LinearClassifier` estimator object.

    Args:
      feature_columns: An iterable containing all the feature columns used by
        the model. All items in the set should be instances of classes derived
        from `FeatureColumn`.
      model_dir: Directory to save model parameters, graph and etc. This can
        also be used to load checkpoints from the directory into a estimator
        to continue training a previously saved model.
      n_classes: number of label classes. Default is binary classification.
        Note that class labels are integers representing the class index (i.e.
        values from 0 to n_classes-1). For arbitrary label values (e.g. string
        labels), convert to class indices first.
      weight_column: A string or a `_NumericColumn` created by
        `tf.feature_column.numeric_column` defining feature column representing
        weights. It is used to down weight or boost examples during training. It
        will be multiplied by the loss of the example. If it is a string, it is
        used as a key to fetch weight tensor from the `features`. If it is a
        `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
        then weight_column.normalizer_fn is applied on it to get weight tensor.
      label_vocabulary: A list of strings represents possible label values. If
        given, labels must be string type and have any value in
        `label_vocabulary`. If it is not given, that means labels are
        already encoded as integer or float within [0, 1] for `n_classes=2` and
        encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 .
        Also there will be errors if vocabulary is not provided and labels are
        string.
      optimizer: An instance of `tf.Optimizer` used to train the model. Defaults
        to FTRL optimizer.
      config: `RunConfig` object to configure the runtime settings.
      partitioner: Optional. Partitioner for input layer.
      warm_start_from: A string filepath to a checkpoint to warm-start from, or
        a `WarmStartSettings` object to fully configure warm-starting.  If the
        string filepath is provided instead of a `WarmStartSettings`, then all
        weights and biases are warm-started, and it is assumed that vocabularies
        and Tensor names are unchanged.

    Returns:
      A `LinearClassifier` estimator.

    Raises:
      ValueError: if n_classes < 2.
    """
    if n_classes == 2:
      head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(  # pylint: disable=protected-access
          weight_column=weight_column,
          label_vocabulary=label_vocabulary)
    else:
      head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(  # pylint: disable=protected-access
          n_classes, weight_column=weight_column,
          label_vocabulary=label_vocabulary)

    def _model_fn(features, labels, mode, config):
      """Call the defined shared _linear_model_fn and possibly warm-start."""
      estimator_spec = _linear_model_fn(
          features=features,
          labels=labels,
          mode=mode,
          head=head,
          feature_columns=tuple(feature_columns or []),
          optimizer=optimizer,
          partitioner=partitioner,
          config=config)
      # pylint: disable=protected-access
      warm_start_settings = warm_starting_util._get_default_warm_start_settings(
          warm_start_from)
      if warm_start_settings:
        warm_starting_util._warm_start(warm_start_settings)
      # pylint: enable=protected-access

      return estimator_spec

    super(LinearClassifier, self).__init__(
        model_fn=_model_fn,
        model_dir=model_dir,
        config=config)
Exemplo n.º 40
0
def build_estimator(output_dir, first_layer_size, num_layers, dropout,
                    learning_rate, save_checkpoints_steps):
    """Builds and returns a DNN Estimator, defined by input parameters.

  Args:
    output_dir: string, directory to save Estimator.
    first_layer_size: int, size of first hidden layer of DNN.
    num_layers: int, number of hidden layers.
    dropout: float, dropout rate used in training.
    learning_rate: float, learning_rate used in training.
    save_checkpoints_steps: int, training steps to save Estimator.

  Returns:
    `Estimator` instance.
  """

    # Sets head to default head for DNNClassifier with two classes.
    model_params = {
        'head':
        head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(),
        'feature_columns': [
            tf.feature_column.numeric_column(c, shape=[])
            for c in constants.FEATURE_COLUMNS
        ],
        'hidden_units': [
            max(int(first_layer_size / (pow(2, i))), 2)
            for i in range(int(num_layers))
        ],
        'dropout':
        dropout,
        'optimizer':
        tf.train.AdagradOptimizer(learning_rate)
    }

    def _model_fn(features, labels, mode, params):
        """Build TF graph based on canned DNN classifier."""

        key_column = features.pop(constants.KEY_COLUMN, None)
        if key_column is None:
            raise ValueError('Key is missing from features.')

        spec = _dnn_model_fn(features=features,
                             labels=labels,
                             mode=mode,
                             **params)

        predictions = spec.predictions
        if predictions:
            predictions[
                constants.KEY_COLUMN] = tf.convert_to_tensor_or_sparse_tensor(
                    key_column)
            spec = spec._replace(predictions=predictions)
            spec = spec._replace(
                export_outputs={
                    'classes': tf.estimator.export.PredictOutput(predictions)
                })
        return spec

    config = tf.estimator.RunConfig(
        save_checkpoints_steps=save_checkpoints_steps)
    return tf.estimator.Estimator(model_fn=_model_fn,
                                  model_dir=output_dir,
                                  config=config,
                                  params=model_params)
Exemplo n.º 41
0
  def __init__(self,
               sequence_feature_columns,
               context_feature_columns=None,
               num_units=None,
               cell_type=USE_DEFAULT,
               rnn_cell_fn=None,
               model_dir=None,
               n_classes=2,
               weight_column=None,
               label_vocabulary=None,
               optimizer='Adagrad',
               loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE,
               input_layer_partitioner=None,
               config=None):
    """Initializes a `RNNClassifier` instance.

    Args:
      sequence_feature_columns: An iterable containing the `FeatureColumn`s
        that represent sequential input. All items in the set should either be
        sequence columns (e.g. `sequence_numeric_column`) or constructed from
        one (e.g. `embedding_column` with `sequence_categorical_column_*` as
        input).
      context_feature_columns: An iterable containing the `FeatureColumn`s
        for contextual input. The data represented by these columns will be
        replicated and given to the RNN at each timestep. These columns must be
        instances of classes derived from `_DenseColumn` such as
        `numeric_column`, not the sequential variants.
      num_units: Iterable of integer number of hidden units per RNN layer. If
        set, `cell_type` must also be specified and `rnn_cell_fn` must be
        `None`.
      cell_type: A subclass of `tf.nn.rnn_cell.RNNCell` or a string specifying
        the cell type. Supported strings are: `'basic_rnn'`, `'lstm'`, and
        `'gru'`. If set, `num_units` must also be specified and `rnn_cell_fn`
        must be `None`.
      rnn_cell_fn: A function with one argument, a `tf.estimator.ModeKeys`, and
        returns an object of type `tf.nn.rnn_cell.RNNCell` that will be used to
        construct the RNN. If set, `num_units` and `cell_type` cannot be set.
        This is for advanced users who need additional customization beyond
        `num_units` and `cell_type`. Note that `tf.nn.rnn_cell.MultiRNNCell` is
        needed for stacked RNNs.
      model_dir: Directory to save model parameters, graph and etc. This can
        also be used to load checkpoints from the directory into a estimator to
        continue training a previously saved model.
      n_classes: Number of label classes. Defaults to 2, namely binary
        classification. Must be > 1.
      weight_column: A string or a `_NumericColumn` created by
        `tf.feature_column.numeric_column` defining feature column representing
        weights. It is used to down weight or boost examples during training. It
        will be multiplied by the loss of the example. If it is a string, it is
        used as a key to fetch weight tensor from the `features`. If it is a
        `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
        then weight_column.normalizer_fn is applied on it to get weight tensor.
      label_vocabulary: A list of strings represents possible label values. If
        given, labels must be string type and have any value in
        `label_vocabulary`. If it is not given, that means labels are
        already encoded as integer or float within [0, 1] for `n_classes=2` and
        encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 .
        Also there will be errors if vocabulary is not provided and labels are
        string.
      optimizer: An instance of `tf.Optimizer` or string specifying optimizer
        type. Defaults to Adagrad optimizer.
      loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how
        to reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`.
      input_layer_partitioner: Optional. Partitioner for input layer. Defaults
        to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
      config: `RunConfig` object to configure the runtime settings.

    Raises:
      ValueError: If `num_units`, `cell_type`, and `rnn_cell_fn` are not
        compatible.
    """
    rnn_cell_fn = _assert_rnn_cell_fn(rnn_cell_fn, num_units, cell_type)

    if n_classes == 2:
      head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(  # pylint: disable=protected-access
          weight_column=weight_column,
          label_vocabulary=label_vocabulary,
          loss_reduction=loss_reduction)
    else:
      head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(  # pylint: disable=protected-access
          n_classes,
          weight_column=weight_column,
          label_vocabulary=label_vocabulary,
          loss_reduction=loss_reduction)

    def _model_fn(features, labels, mode, config):
      return _rnn_model_fn(
          features=features,
          labels=labels,
          mode=mode,
          head=head,
          rnn_cell_fn=rnn_cell_fn,
          sequence_feature_columns=tuple(sequence_feature_columns or []),
          context_feature_columns=tuple(context_feature_columns or []),
          optimizer=optimizer,
          input_layer_partitioner=input_layer_partitioner,
          config=config)
    super(RNNClassifier, self).__init__(
        model_fn=_model_fn, model_dir=model_dir, config=config)
Exemplo n.º 42
0
    def __init__(self,
                 model_type=None,
                 model_dir=None,
                 with_cnn=False,
                 cnn_optimizer='Adagrad',
                 linear_feature_columns=None,
                 linear_optimizer='Ftrl',
                 dnn_feature_columns=None,
                 dnn_optimizer='Adagrad',
                 dnn_hidden_units=None,
                 dnn_connected_mode=None,
                 n_classes=2,
                 weight_column=None,
                 label_vocabulary=None,
                 input_layer_partitioner=None,
                 config=None):
        """Initializes a WideDeepCombinedClassifier instance.
        Args:
            model_dir: Directory to save model parameters, graph and etc. This can
                also be used to load checkpoints from the directory into a estimator
                to continue training a previously saved model.
            linear_feature_columns: An iterable containing all the feature columns
                used by linear part of the model. All items in the set must be
                instances of classes derived from `FeatureColumn`.
            linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to
                the linear part of the model. Defaults to FTRL optimizer.
            dnn_feature_columns: An iterable containing all the feature columns used
                by deep part of the model. All items in the set must be instances of
                classes derived from `FeatureColumn`.
            dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to
                the deep part of the model. Defaults to Adagrad optimizer.
            dnn_hidden_units: List of hidden units per layer. All layers are fully
                connected.
            dnn_activation_fn: Activation function applied to each layer. If None,
                will use `tf.nn.relu`.
            dnn_dropout: When not None, the probability we will drop out
                a given coordinate.
            n_classes: Number of label classes. Defaults to 2, namely binary
                classification. Must be > 1.
            weight_column: A string or a `_NumericColumn` created by
                `tf.feature_column.numeric_column` defining feature column representing
                weights. It is used to down weight or boost examples during training. It
                will be multiplied by the loss of the example. If it is a string, it is
                used as a key to fetch weight tensor from the `features`. If it is a
                `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
                then weight_column.normalizer_fn is applied on it to get weight tensor.
            label_vocabulary: A list of strings represents possible label values. If
                given, labels must be string type and have any value in
                `label_vocabulary`. If it is not given, that means labels are
                already encoded as integer or float within [0, 1] for `n_classes=2` and
                encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 .
                Also there will be errors if vocabulary is not provided and labels are
                string.
            input_layer_partitioner: Partitioner for input layer. Defaults to
                `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
            config: RunConfig object to configure the runtime settings.
        Raises:
            ValueError: If both linear_feature_columns and dnn_features_columns are
                empty at the same time.
        """
        if not linear_feature_columns and not dnn_feature_columns:
            raise ValueError('Either linear_feature_columns or dnn_feature_columns must be defined.')
        if model_type is None:
            raise ValueError("Model type must be defined. one of `wide_deep`, `deep`, `wide_deep`.")
        else:
            assert model_type in {'wide_deep', 'deep', 'wide_deep'}, (
                "Invalid model type, must be one of `wide_deep`, `deep`, `wide_deep`.")
            if model_type == 'wide_deep':
                if not linear_feature_columns:
                    raise ValueError('Linear_feature_columns must be defined for wide_deep model.')
            elif model_type == 'deep':
                if not dnn_feature_columns:
                    raise ValueError('Dnn_feature_columns must be defined for deep model.')
        if dnn_feature_columns and not dnn_hidden_units:
            raise ValueError('dnn_hidden_units must be defined when dnn_feature_columns is specified.')

        if n_classes == 2:
            # units = 1
            head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
                weight_column=weight_column,
                label_vocabulary=label_vocabulary)
        else:
            # units = n_classes
            head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
                n_classes,
                weight_column=weight_column,
                label_vocabulary=label_vocabulary)

        def _model_fn(features, labels, mode, config):
            return _wide_deep_combined_model_fn(
                features=features,
                labels=labels,
                mode=mode,
                head=head,
                model_type=model_type,
                with_cnn=with_cnn,
                cnn_optimizer=cnn_optimizer,
                linear_feature_columns=linear_feature_columns,
                linear_optimizer=linear_optimizer,
                dnn_feature_columns=dnn_feature_columns,
                dnn_connected_mode=dnn_connected_mode,
                dnn_optimizer=dnn_optimizer,
                dnn_hidden_units=dnn_hidden_units,
                input_layer_partitioner=input_layer_partitioner,
                config=config)
        super(WideAndDeepClassifier, self).__init__(
            model_fn=_model_fn, model_dir=model_dir, config=config)
Exemplo n.º 43
0
    def __init__(self,
                 hidden_units,
                 feature_columns,
                 model_dir=None,
                 n_classes=2,
                 weight_feature_key=None,
                 optimizer='Adagrad',
                 activation_fn=nn.relu,
                 dropout=None,
                 input_layer_partitioner=None,
                 config=None):
        """Initializes a `DNNClassifier` instance.

    Args:
      hidden_units: Iterable of number hidden units per layer. All layers are
        fully connected. Ex. `[64, 32]` means first layer has 64 nodes and
        second one has 32.
      feature_columns: An iterable containing all the feature columns used by
        the model. All items in the set should be instances of classes derived
        from `_FeatureColumn`.
      model_dir: Directory to save model parameters, graph and etc. This can
        also be used to load checkpoints from the directory into a estimator to
        continue training a previously saved model.
      n_classes: Number of label classes. Defaults to 2, namely binary
        classification. Must be > 1.
      weight_feature_key: A string defining feature column name representing
        weights. It is used to down weight or boost examples during training. It
        will be multiplied by the loss of the example.
      optimizer: An instance of `tf.Optimizer` used to train the model. If
        `None`, will use an Adagrad optimizer.
      activation_fn: Activation function applied to each layer. If `None`, will
        use `tf.nn.relu`.
      dropout: When not `None`, the probability we will drop out a given
        coordinate.
      input_layer_partitioner: Optional. Partitioner for input layer. Defaults
        to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
      config: `RunConfig` object to configure the runtime settings.
    """
        if n_classes == 2:
            head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(  # pylint: disable=protected-access
                weight_feature_key=weight_feature_key)
        else:
            head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(  # pylint: disable=protected-access
                n_classes,
                weight_feature_key=weight_feature_key)

        def _model_fn(features, labels, mode, config):
            return _dnn_model_fn(
                features=features,
                labels=labels,
                mode=mode,
                head=head,
                hidden_units=hidden_units,
                feature_columns=tuple(feature_columns or []),
                optimizer=optimizer,
                activation_fn=activation_fn,
                dropout=dropout,
                input_layer_partitioner=input_layer_partitioner,
                config=config)

        super(DNNClassifier, self).__init__(model_fn=_model_fn,
                                            model_dir=model_dir,
                                            config=config)
Exemplo n.º 44
0
    def __init__(self,
                 feature_columns,
                 model_dir=None,
                 n_classes=2,
                 weight_column=None,
                 label_vocabulary=None,
                 optimizer='Ftrl',
                 config=None,
                 partitioner=None):
        """Construct a `LinearClassifier` estimator object.

    Args:
      feature_columns: An iterable containing all the feature columns used by
        the model. All items in the set should be instances of classes derived
        from `FeatureColumn`.
      model_dir: Directory to save model parameters, graph and etc. This can
        also be used to load checkpoints from the directory into a estimator
        to continue training a previously saved model.
      n_classes: number of label classes. Default is binary classification.
        Note that class labels are integers representing the class index (i.e.
        values from 0 to n_classes-1). For arbitrary label values (e.g. string
        labels), convert to class indices first.
      weight_column: A string or a `_NumericColumn` created by
        `tf.feature_column.numeric_column` defining feature column representing
        weights. It is used to down weight or boost examples during training. It
        will be multiplied by the loss of the example. If it is a string, it is
        used as a key to fetch weight tensor from the `features`. If it is a
        `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
        then weight_column.normalizer_fn is applied on it to get weight tensor.
      label_vocabulary: A list of strings represents possible label values. If
        given, labels must be string type and have any value in
        `label_vocabulary`. If it is not given, that means labels are
        already encoded as integer or float within [0, 1] for `n_classes=2` and
        encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 .
        Also there will be errors if vocabulary is not provided and labels are
        string.
      optimizer: An instance of `tf.Optimizer` used to train the model. Defaults
        to FTRL optimizer.
      config: `RunConfig` object to configure the runtime settings.
      partitioner: Optional. Partitioner for input layer.

    Returns:
      A `LinearClassifier` estimator.

    Raises:
      ValueError: if n_classes < 2.
    """
        if n_classes == 2:
            head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(  # pylint: disable=protected-access
                weight_column=weight_column,
                label_vocabulary=label_vocabulary)
        else:
            head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(  # pylint: disable=protected-access
                n_classes,
                weight_column=weight_column,
                label_vocabulary=label_vocabulary)

        def _model_fn(features, labels, mode, config):
            return _linear_model_fn(features=features,
                                    labels=labels,
                                    mode=mode,
                                    head=head,
                                    feature_columns=tuple(feature_columns
                                                          or []),
                                    optimizer=optimizer,
                                    partitioner=partitioner,
                                    config=config)

        super(LinearClassifier, self).__init__(model_fn=_model_fn,
                                               model_dir=model_dir,
                                               config=config)
Exemplo n.º 45
0
    def __init__(self,
                 hidden_units,
                 feature_columns,
                 model_dir=None,
                 n_classes=2,
                 weight_column=None,
                 label_vocabulary=None,
                 optimizer='Adagrad',
                 activation_fn=nn.relu,
                 dropout=None,
                 input_layer_partitioner=None,
                 config=None):
        """Initializes a `DNNClassifier` instance.

    Args:
      hidden_units: Iterable of number hidden units per layer. All layers are
        fully connected. Ex. `[64, 32]` means first layer has 64 nodes and
        second one has 32.
      feature_columns: An iterable containing all the feature columns used by
        the model. All items in the set should be instances of classes derived
        from `_FeatureColumn`.
      model_dir: Directory to save model parameters, graph and etc. This can
        also be used to load checkpoints from the directory into a estimator to
        continue training a previously saved model.
      n_classes: Number of label classes. Defaults to 2, namely binary
        classification. Must be > 1.
      weight_column: A string or a `_NumericColumn` created by
        `tf.feature_column.numeric_column` defining feature column representing
        weights. It is used to down weight or boost examples during training. It
        will be multiplied by the loss of the example. If it is a string, it is
        used as a key to fetch weight tensor from the `features`. If it is a
        `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
        then weight_column.normalizer_fn is applied on it to get weight tensor.
      label_vocabulary: A list of strings represents possible label values. If
        given, labels must be string type and have any value in
        `label_vocabulary`. If it is not given, that means labels are
        already encoded as integer or float within [0, 1] for `n_classes=2` and
        encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 .
        Also there will be errors if vocabulary is not provided and labels are
        string.
      optimizer: An instance of `tf.Optimizer` used to train the model. Defaults
        to Adagrad optimizer.
      activation_fn: Activation function applied to each layer. If `None`, will
        use `tf.nn.relu`.
      dropout: When not `None`, the probability we will drop out a given
        coordinate.
      input_layer_partitioner: Optional. Partitioner for input layer. Defaults
        to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
      config: `RunConfig` object to configure the runtime settings.
    """
        if n_classes == 2:
            head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(  # pylint: disable=protected-access
                weight_column=weight_column,
                label_vocabulary=label_vocabulary)
        else:
            head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(  # pylint: disable=protected-access
                n_classes,
                weight_column=weight_column,
                label_vocabulary=label_vocabulary)

        def _model_fn(features, labels, mode, config):
            return _dnn_model_fn(
                features=features,
                labels=labels,
                mode=mode,
                head=head,
                hidden_units=hidden_units,
                feature_columns=tuple(feature_columns or []),
                optimizer=optimizer,
                activation_fn=activation_fn,
                dropout=dropout,
                input_layer_partitioner=input_layer_partitioner,
                config=config)

        super(DNNClassifier, self).__init__(model_fn=_model_fn,
                                            model_dir=model_dir,
                                            config=config)
Exemplo n.º 46
0
  def __init__(self,
               feature_columns,
               model_dir=None,
               n_classes=2,
               weight_column=None,
               label_vocabulary=None,
               optimizer='Ftrl',
               config=None,
               partitioner=None,
               warm_start_from=None,
               loss_reduction=losses.Reduction.SUM,
               sparse_combiner='sum'):
    """Construct a `LinearClassifier` estimator object.

    Args:
      feature_columns: An iterable containing all the feature columns used by
        the model. All items in the set should be instances of classes derived
        from `FeatureColumn`.
      model_dir: Directory to save model parameters, graph and etc. This can
        also be used to load checkpoints from the directory into a estimator
        to continue training a previously saved model.
      n_classes: number of label classes. Default is binary classification.
        Note that class labels are integers representing the class index (i.e.
        values from 0 to n_classes-1). For arbitrary label values (e.g. string
        labels), convert to class indices first.
      weight_column: A string or a `_NumericColumn` created by
        `tf.feature_column.numeric_column` defining feature column representing
        weights. It is used to down weight or boost examples during training. It
        will be multiplied by the loss of the example. If it is a string, it is
        used as a key to fetch weight tensor from the `features`. If it is a
        `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
        then weight_column.normalizer_fn is applied on it to get weight tensor.
      label_vocabulary: A list of strings represents possible label values. If
        given, labels must be string type and have any value in
        `label_vocabulary`. If it is not given, that means labels are
        already encoded as integer or float within [0, 1] for `n_classes=2` and
        encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 .
        Also there will be errors if vocabulary is not provided and labels are
        string.
      optimizer: An instance of `tf.Optimizer` used to train the model. Can also
        be a string (one of 'Adagrad', 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or
        callable. Defaults to FTRL optimizer.
      config: `RunConfig` object to configure the runtime settings.
      partitioner: Optional. Partitioner for input layer.
      warm_start_from: A string filepath to a checkpoint to warm-start from, or
        a `WarmStartSettings` object to fully configure warm-starting.  If the
        string filepath is provided instead of a `WarmStartSettings`, then all
        weights and biases are warm-started, and it is assumed that vocabularies
        and Tensor names are unchanged.
      loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how
        to reduce training loss over batch. Defaults to `SUM`.
      sparse_combiner: A string specifying how to reduce if a categorical column
        is multivalent.  One of "mean", "sqrtn", and "sum" -- these are
        effectively different ways to do example-level normalization, which can
        be useful for bag-of-words features. for more details, see
        `tf.feature_column.linear_model`.

    Returns:
      A `LinearClassifier` estimator.

    Raises:
      ValueError: if n_classes < 2.
    """
    if n_classes == 2:
      head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(  # pylint: disable=protected-access
          weight_column=weight_column,
          label_vocabulary=label_vocabulary,
          loss_reduction=loss_reduction)
    else:
      head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(  # pylint: disable=protected-access
          n_classes, weight_column=weight_column,
          label_vocabulary=label_vocabulary,
          loss_reduction=loss_reduction)

    def _model_fn(features, labels, mode, config):
      """Call the defined shared _linear_model_fn."""
      return _linear_model_fn(
          features=features,
          labels=labels,
          mode=mode,
          head=head,
          feature_columns=tuple(feature_columns or []),
          optimizer=optimizer,
          partitioner=partitioner,
          config=config,
          sparse_combiner=sparse_combiner)

    super(LinearClassifier, self).__init__(
        model_fn=_model_fn,
        model_dir=model_dir,
        config=config,
        warm_start_from=warm_start_from)
Exemplo n.º 47
0
  def __init__(self,
               hidden_units,
               feature_columns,
               model_dir=None,
               n_classes=2,
               weight_column=None,
               label_vocabulary=None,
               optimizer='Adagrad',
               activation_fn=nn.relu,
               dropout=None,
               input_layer_partitioner=None,
               config=None):
    """Initializes a `DNNClassifier` instance.

    Args:
      hidden_units: Iterable of number hidden units per layer. All layers are
        fully connected. Ex. `[64, 32]` means first layer has 64 nodes and
        second one has 32.
      feature_columns: An iterable containing all the feature columns used by
        the model. All items in the set should be instances of classes derived
        from `_FeatureColumn`.
      model_dir: Directory to save model parameters, graph and etc. This can
        also be used to load checkpoints from the directory into a estimator to
        continue training a previously saved model.
      n_classes: Number of label classes. Defaults to 2, namely binary
        classification. Must be > 1.
      weight_column: A string or a `_NumericColumn` created by
        `tf.feature_column.numeric_column` defining feature column representing
        weights. It is used to down weight or boost examples during training. It
        will be multiplied by the loss of the example. If it is a string, it is
        used as a key to fetch weight tensor from the `features`. If it is a
        `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
        then weight_column.normalizer_fn is applied on it to get weight tensor.
      label_vocabulary: A list of strings represents possible label values. If
        given, labels must be string type and have any value in
        `label_vocabulary`. If it is not given, that means labels are
        already encoded as integer or float within [0, 1] for `n_classes=2` and
        encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 .
        Also there will be errors if vocabulary is not provided and labels are
        string.
      optimizer: An instance of `tf.Optimizer` used to train the model. Defaults
        to Adagrad optimizer.
      activation_fn: Activation function applied to each layer. If `None`, will
        use `tf.nn.relu`.
      dropout: When not `None`, the probability we will drop out a given
        coordinate.
      input_layer_partitioner: Optional. Partitioner for input layer. Defaults
        to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
      config: `RunConfig` object to configure the runtime settings.
    """
    if n_classes == 2:
      head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(  # pylint: disable=protected-access
          weight_column=weight_column,
          label_vocabulary=label_vocabulary)
    else:
      head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(  # pylint: disable=protected-access
          n_classes, weight_column=weight_column,
          label_vocabulary=label_vocabulary)
    def _model_fn(features, labels, mode, config):
      return _dnn_model_fn(
          features=features,
          labels=labels,
          mode=mode,
          head=head,
          hidden_units=hidden_units,
          feature_columns=tuple(feature_columns or []),
          optimizer=optimizer,
          activation_fn=activation_fn,
          dropout=dropout,
          input_layer_partitioner=input_layer_partitioner,
          config=config)
    super(DNNClassifier, self).__init__(
        model_fn=_model_fn, model_dir=model_dir, config=config)