Python MultiClassHead示例，tensorflow_estimator.python.estimator.head.multi_class_head.MultiClassHead Python示例

示例#1

0

显示文件

  def test_loss_reduction(self):
    """Tests loss reduction.

    Use `loss` method in eager execution, else `create_estimator_spec` in TRAIN
    mode.

    logits = [[[2., 3., 4.], [5., -0.5, 0.]],
              [[-1.0, 2.0, 0.5], [_]]],
    labels = [[0, 1],
              [2, _]]
    weights = [[0.5, 0.2],
               [0.3, _]]
    loss = [0.5*2.40 + 0.2*5.41 + 0.3*1.74] / 3 = 0.94
    """
    static_head = multi_head_lib.MultiClassHead(
        n_classes=3, weight_column='weights')
    head = seq_head_lib.SequentialHeadWrapper(static_head, 'sequence_mask',
                                              'weights')
    expected_loss = 0.942783
    features = {
        'weights':
            tf.sparse.SparseTensor(
                indices=((0, 0), (0, 1), (1, 0)),
                values=(0.5, 0.2, 0.3),
                dense_shape=(2, 2)),
        'sequence_mask':
            ops.convert_to_tensor([[1, 1], [1, 0]])
    }
    logits = ops.convert_to_tensor([[[2., 3., 4.], [5., -0.5, 0.]],
                                    [[-1.0, 2.0, 0.5], [1.0, 0.5, 2.0]]])
    labels = tf.sparse.SparseTensor(
        indices=((0, 0), (0, 1), (1, 0)), values=(0, 1, 2), dense_shape=(2, 2))

    class _Optimizer(tf.keras.optimizers.Optimizer):

      def get_updates(self, loss, params):
        del params, loss
        return [tf.constant('op')]

      def get_config(self):
        config = super(_Optimizer, self).get_config()
        return config

    if tf.executing_eagerly():
      loss = head.loss(logits=logits, labels=labels, features=features)
    else:
      spec = head.create_estimator_spec(
          features,
          ModeKeys.TRAIN,
          logits,
          labels=labels,
          optimizer=_Optimizer('my_optimizer'),
          trainable_variables=[
              tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)
          ])
      with self.cached_session() as sess:
        loss = sess.run(spec.loss)
    self.assertAllClose(loss, expected_loss, atol=1e-4)

示例#2

0

显示文件

文件： sequential_head_test.py 项目： tigerkrunner/estimator

    def test_loss_reduction(self):
        """Tests loss reduction.

    Use `loss` method in eager execution, else `create_estimator_spec` in TRAIN
    mode.

    logits = [[[2., 3., 4.], [5., -0.5, 0.]],
              [[-1.0, 2.0, 0.5], [_]]],
    labels = [[0, 1],
              [2, _]]
    weights = [[0.5, 0.2],
               [0.3, _]]
    loss = [0.5*2.40 + 0.2*5.41 + 0.3*1.74] / 3 = 0.94
    """
        static_head = multi_head_lib.MultiClassHead(n_classes=3,
                                                    weight_column='weights')
        head = seq_head_lib.SequentialHeadWrapper(static_head, 'sequence_mask',
                                                  'weights')
        expected_loss = 0.942783
        features = {
            'weights':
            sparse_tensor.SparseTensor(indices=((0, 0), (0, 1), (1, 0)),
                                       values=(0.5, 0.2, 0.3),
                                       dense_shape=(2, 2)),
            'sequence_mask':
            ops.convert_to_tensor([[1, 1], [1, 0]])
        }
        logits = ops.convert_to_tensor([[[2., 3., 4.], [5., -0.5, 0.]],
                                        [[-1.0, 2.0, 0.5], [1.0, 0.5, 2.0]]])
        labels = sparse_tensor.SparseTensor(indices=((0, 0), (0, 1), (1, 0)),
                                            values=(0, 1, 2),
                                            dense_shape=(2, 2))

        class _Optimizer(object):
            def minimize(self, loss, global_step):
                del global_step, loss
                return constant_op.constant('op')

        if context.executing_eagerly():
            loss = head.loss(logits=logits, labels=labels, features=features)
        else:
            spec = head.create_estimator_spec(features,
                                              ModeKeys.TRAIN,
                                              logits,
                                              labels,
                                              optimizer=_Optimizer())
            with self.cached_session() as sess:
                loss = sess.run(spec.loss)
        self.assertAllClose(loss, expected_loss, atol=1e-4)

示例#3

0

显示文件

def binary_or_multi_class_head(n_classes, weight_column, label_vocabulary,
                               loss_reduction):
    """Creates either binary or multi-class head.

  Args:
    n_classes: Number of label classes.
    weight_column: A string or a `NumericColumn` created by
      `tf.feature_column.numeric_column` defining feature column representing
      weights. It is used to down weight or boost examples during training. It
      will be multiplied by the loss of the example. If it is a string, it is
      used as a key to fetch weight tensor from the `features`. If it is a
      `NumericColumn`, raw tensor is fetched by key `weight_column.key`,
      then weight_column.normalizer_fn is applied on it to get weight tensor.
    label_vocabulary: A list of strings represents possible label values. If
      given, labels must be string type and have any value in
      `label_vocabulary`. If it is not given, that means labels are
      already encoded as integer or float within [0, 1] for `n_classes=2` and
      encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 .
      Also there will be errors if vocabulary is not provided and labels are
      string.
    loss_reduction: One of `tf.losses.Reduction` except `NONE`. Defines how
      to reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`.

  Returns:
    A `Head` instance.
  """
    if n_classes == 2:
        # TODO(b/117517419): Update binary_class_head when it's fully implemented.
        head = head_v1._binary_logistic_head_with_sigmoid_cross_entropy_loss(  # pylint: disable=protected-access
            weight_column=weight_column,
            label_vocabulary=label_vocabulary,
            loss_reduction=loss_reduction)
    else:
        head = multi_class_head.MultiClassHead(
            n_classes,
            weight_column=weight_column,
            label_vocabulary=label_vocabulary,
            loss_reduction=loss_reduction)
    return head

示例#4

0

显示文件

    def __init__(self,
                 sequence_feature_columns,
                 context_feature_columns=None,
                 units=None,
                 cell_type=USE_DEFAULT,
                 rnn_cell_fn=None,
                 return_sequences=False,
                 model_dir=None,
                 n_classes=2,
                 weight_column=None,
                 label_vocabulary=None,
                 optimizer='Adagrad',
                 loss_reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
                 sequence_mask='sequence_mask',
                 config=None):
        """Initializes a `RNNClassifier` instance.

    Args:
      sequence_feature_columns: An iterable containing the `FeatureColumn`s
        that represent sequential input. All items in the set should either be
        sequence columns (e.g. `sequence_numeric_column`) or constructed from
        one (e.g. `embedding_column` with `sequence_categorical_column_*` as
        input).
      context_feature_columns: An iterable containing the `FeatureColumn`s
        for contextual input. The data represented by these columns will be
        replicated and given to the RNN at each timestep. These columns must be
        instances of classes derived from `DenseColumn` such as
        `numeric_column`, not the sequential variants.
      units: Iterable of integer number of hidden units per RNN layer. If
        set, `cell_type` must also be specified and `rnn_cell_fn` must be
        `None`.
      cell_type: A class producing a RNN cell or a string specifying the cell
        type. Supported strings are: `'simple_rnn'`, `'lstm'`, and `'gru'`. If
        set, `units` must also be specified and `rnn_cell_fn` must be `None`.
      rnn_cell_fn: A function that returns a RNN cell instance that will be used
        to construct the RNN. If set, `units` and `cell_type` cannot be set.
        This is for advanced users who need additional customization beyond
        `units` and `cell_type`. Note that `tf.keras.layers.StackedRNNCells` is
        needed for stacked RNNs.
      return_sequences: A boolean indicating whether to return the last output
        in the output sequence, or the full sequence. Note that if True,
        `weight_column` must be None or a string.
      model_dir: Directory to save model parameters, graph and etc. This can
        also be used to load checkpoints from the directory into a estimator to
        continue training a previously saved model.
      n_classes: Number of label classes. Defaults to 2, namely binary
        classification. Must be > 1.
      weight_column: A string or a `NumericColumn` created by
        `tf.feature_column.numeric_column` defining feature column representing
        weights. It is used to down weight or boost examples during training. It
        will be multiplied by the loss of the example. If it is a string, it is
        used as a key to fetch weight tensor from the `features`. If it is a
        `NumericColumn`, raw tensor is fetched by key `weight_column.key`, then
        weight_column.normalizer_fn is applied on it to get weight tensor.
      label_vocabulary: A list of strings represents possible label values. If
        given, labels must be string type and have any value in
        `label_vocabulary`. If it is not given, that means labels are
        already encoded as integer or float within [0, 1] for `n_classes=2` and
        encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 .
        Also there will be errors if vocabulary is not provided and labels are
        string.
      optimizer: An instance of `tf.Optimizer` or string specifying optimizer
        type. Defaults to Adagrad optimizer.
      loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how
        to reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`.
      sequence_mask: A string with the name of the sequence mask tensor. If
        `sequence_mask` is in the features dictionary, the provided tensor is
        used, otherwise the sequence mask is computed from the length of
        sequential features. The sequence mask is used in evaluation and
        training mode to aggregate loss and metrics computation while excluding
        padding steps. It is also added to the predictions dictionary in
        prediction mode to indicate which steps are padding.
      config: `RunConfig` object to configure the runtime settings.

    Note that a RNN cell has:
      - a `call` method.
      - a `state_size` attribute.
      - a `output_size` attribute.
      - a `get_initial_state` method.
    See the documentation on `tf.keras.layers.RNN` for more details.

    Raises:
      ValueError: If `units`, `cell_type`, and `rnn_cell_fn` are not
        compatible.
    """
        if n_classes == 2:
            head = binary_head_lib.BinaryClassHead(
                weight_column=weight_column,
                label_vocabulary=label_vocabulary,
                loss_reduction=loss_reduction)
        else:
            head = multi_head_lib.MultiClassHead(
                n_classes=n_classes,
                weight_column=weight_column,
                label_vocabulary=label_vocabulary,
                loss_reduction=loss_reduction)

        if return_sequences:
            logging.info(
                'Converting head to sequential head with '
                '`SequentialHeadWrapper` to allow sequential predictions.')
            head = seq_head_lib.SequentialHeadWrapper(
                head,
                sequence_length_mask=sequence_mask,
                feature_columns=weight_column)

        super(RNNClassifier,
              self).__init__(head=head,
                             sequence_feature_columns=sequence_feature_columns,
                             context_feature_columns=context_feature_columns,
                             units=units,
                             cell_type=cell_type,
                             rnn_cell_fn=rnn_cell_fn,
                             return_sequences=return_sequences,
                             model_dir=model_dir,
                             optimizer=optimizer,
                             config=config)

示例#5

0

显示文件

文件： dnn_estimator_test.py 项目： Elizaaaaa/tensorflow-estimator

def _dnn_estimator_classifier_fn(n_classes=3, **kwargs):
  return dnn.DNNEstimatorV2(
      head=multi_class_head.MultiClassHead(
          n_classes=n_classes),
      **kwargs)

示例#6

0

显示文件

    def __init__(self,
                 sequence_feature_columns,
                 context_feature_columns=None,
                 num_units=None,
                 cell_type=USE_DEFAULT,
                 rnn_cell_fn=None,
                 model_dir=None,
                 n_classes=2,
                 weight_column=None,
                 label_vocabulary=None,
                 optimizer='Adagrad',
                 loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE,
                 input_layer_partitioner=None,
                 config=None):
        """Initializes a `RNNClassifier` instance.

    Args:
      sequence_feature_columns: An iterable containing the `FeatureColumn`s
        that represent sequential input. All items in the set should either be
        sequence columns (e.g. `sequence_numeric_column`) or constructed from
        one (e.g. `embedding_column` with `sequence_categorical_column_*` as
        input).
      context_feature_columns: An iterable containing the `FeatureColumn`s
        for contextual input. The data represented by these columns will be
        replicated and given to the RNN at each timestep. These columns must be
        instances of classes derived from `_DenseColumn` such as
        `numeric_column`, not the sequential variants.
      num_units: Iterable of integer number of hidden units per RNN layer. If
        set, `cell_type` must also be specified and `rnn_cell_fn` must be
        `None`.
      cell_type: A subclass of `tf.nn.rnn_cell.RNNCell` or a string specifying
        the cell type. Supported strings are: `'basic_rnn'`, `'lstm'`, and
        `'gru'`. If set, `num_units` must also be specified and `rnn_cell_fn`
        must be `None`.
      rnn_cell_fn: A function with one argument, a `tf.estimator.ModeKeys`, and
        returns an object of type `tf.nn.rnn_cell.RNNCell` that will be used to
        construct the RNN. If set, `num_units` and `cell_type` cannot be set.
        This is for advanced users who need additional customization beyond
        `num_units` and `cell_type`. Note that `tf.nn.rnn_cell.MultiRNNCell` is
        needed for stacked RNNs.
      model_dir: Directory to save model parameters, graph and etc. This can
        also be used to load checkpoints from the directory into a estimator to
        continue training a previously saved model.
      n_classes: Number of label classes. Defaults to 2, namely binary
        classification. Must be > 1.
      weight_column: A string or a `_NumericColumn` created by
        `tf.feature_column.numeric_column` defining feature column representing
        weights. It is used to down weight or boost examples during training. It
        will be multiplied by the loss of the example. If it is a string, it is
        used as a key to fetch weight tensor from the `features`. If it is a
        `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
        then weight_column.normalizer_fn is applied on it to get weight tensor.
      label_vocabulary: A list of strings represents possible label values. If
        given, labels must be string type and have any value in
        `label_vocabulary`. If it is not given, that means labels are
        already encoded as integer or float within [0, 1] for `n_classes=2` and
        encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 .
        Also there will be errors if vocabulary is not provided and labels are
        string.
      optimizer: An instance of `tf.Optimizer` or string specifying optimizer
        type. Defaults to Adagrad optimizer.
      loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how
        to reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`.
      input_layer_partitioner: Optional. Partitioner for input layer. Defaults
        to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
      config: `RunConfig` object to configure the runtime settings.

    Raises:
      ValueError: If `num_units`, `cell_type`, and `rnn_cell_fn` are not
        compatible.
    """
        rnn_cell_fn = _assert_rnn_cell_fn(rnn_cell_fn, num_units, cell_type)

        if n_classes == 2:
            head = binary_head_lib.BinaryClassHead(
                weight_column=weight_column,
                label_vocabulary=label_vocabulary,
                loss_reduction=loss_reduction)
        else:
            head = multi_head_lib.MultiClassHead(
                n_classes=n_classes,
                weight_column=weight_column,
                label_vocabulary=label_vocabulary,
                loss_reduction=loss_reduction)

        def _model_fn(features, labels, mode, config):
            return _rnn_model_fn(
                features=features,
                labels=labels,
                mode=mode,
                head=head,
                rnn_cell_fn=rnn_cell_fn,
                sequence_feature_columns=tuple(sequence_feature_columns or []),
                context_feature_columns=tuple(context_feature_columns or []),
                return_sequences=False,
                optimizer=optimizer,
                input_layer_partitioner=input_layer_partitioner,
                config=config)

        super(RNNClassifier, self).__init__(model_fn=_model_fn,
                                            model_dir=model_dir,
                                            config=config)

示例#7

0

显示文件

def _linear_estimator_classifier_fn(n_classes=3, **kwargs):
  return linear.LinearEstimatorV2(
      head=multi_class_head.MultiClassHead(n_classes=n_classes), **kwargs)

示例#8

0

显示文件

文件： estimator_distributed_test_runner.py 项目： sushantjha8/adanet

def train_and_evaluate_estimator():
    """Runs Estimator distributed training."""

    # The tf.estimator.RunConfig automatically parses the TF_CONFIG environment
    # variables during construction.
    # For more information on how tf.estimator.RunConfig uses TF_CONFIG, see
    # https://www.tensorflow.org/api_docs/python/tf/estimator/RunConfig.
    config = tf.estimator.RunConfig(
        tf_random_seed=42,
        model_dir=FLAGS.model_dir,
        session_config=tf_compat.v1.ConfigProto(
            log_device_placement=False,
            # Ignore other workers; only talk to parameter servers.
            # Otherwise, when a chief/worker terminates, the others will hang.
            device_filters=["/job:ps"]))

    kwargs = {
        "max_iteration_steps": 100,
        "force_grow": True,
        "delay_secs_per_worker": .2,
        "max_worker_delay_secs": 1,
        "worker_wait_secs": .5,
        # Set low timeout to reduce wait time for failures.
        "worker_wait_timeout_secs": 60,
        "config": config
    }
    head = regression_head.RegressionHead(
        loss_reduction=tf_compat.SUM_OVER_BATCH_SIZE)
    features = [[1., 0.], [0., 0], [0., 1.], [1., 1.]]
    labels = [[1.], [0.], [1.], [0.]]
    if FLAGS.placement_strategy == "round_robin":
        kwargs["experimental_placement_strategy"] = RoundRobinStrategy()
    if FLAGS.estimator_type == "autoensemble":
        feature_columns = [tf.feature_column.numeric_column("x", shape=[2])]
        candidate_pool = {
            "linear":
            tf.estimator.LinearEstimator(
                head=head,
                feature_columns=feature_columns,
                optimizer=lambda: tf.keras.optimizers.Adam(lr=.001)),
            "dnn":
            tf.estimator.DNNEstimator(
                head=head,
                feature_columns=feature_columns,
                optimizer=lambda: tf.keras.optimizers.Adam(lr=.001),
                hidden_units=[3]),
            "dnn2":
            tf.estimator.DNNEstimator(
                head=head,
                feature_columns=feature_columns,
                optimizer=lambda: tf.keras.optimizers.Adam(lr=.001),
                hidden_units=[5]),
        }

        estimator = AutoEnsembleEstimator(head=head,
                                          candidate_pool=candidate_pool,
                                          **kwargs)
    elif FLAGS.estimator_type == "estimator":
        subnetwork_generator = SimpleGenerator([
            _DNNBuilder("dnn1", config, layer_size=3),
            _DNNBuilder("dnn2", config, layer_size=4),
            _DNNBuilder("dnn3", config, layer_size=5),
        ])

        estimator = Estimator(head=head,
                              subnetwork_generator=subnetwork_generator,
                              **kwargs)
    elif FLAGS.estimator_type == "autoensemble_trees_multiclass":
        n_classes = 3
        head = multi_class_head.MultiClassHead(
            n_classes=n_classes, loss_reduction=tf_compat.SUM_OVER_BATCH_SIZE)

        def tree_loss_fn(labels, logits):
            result = bt_losses.per_example_maxent_loss(labels=labels,
                                                       logits=logits,
                                                       num_classes=n_classes,
                                                       weights=None)
            return result[0]

        tree_head = multi_class_head.MultiClassHead(
            loss_fn=tree_loss_fn,
            n_classes=n_classes,
            loss_reduction=tf_compat.SUM_OVER_BATCH_SIZE)
        labels = [[1], [0], [1], [2]]
        feature_columns = [tf.feature_column.numeric_column("x", shape=[2])]
        candidate_pool = lambda config: {  # pylint: disable=g-long-lambda
            "linear":
                tf.estimator.LinearEstimator(
                    head=head,
                    feature_columns=feature_columns,
                    optimizer=tf.keras.optimizers.Adam(lr=.001),
                    config=config),
            "gbdt":
                CoreGradientBoostedDecisionTreeEstimator(
                    head=tree_head,
                    learner_config=learner_pb2.LearnerConfig(num_classes=n_classes),
                    examples_per_layer=8,
                    num_trees=None,
                    center_bias=False,  # Required for multi-class.
                    feature_columns=feature_columns,
                    config=config),
        }

        estimator = AutoEnsembleEstimator(head=head,
                                          candidate_pool=candidate_pool,
                                          **kwargs)

    def input_fn():
        input_features = {"x": tf.constant(features, name="x")}
        input_labels = tf.constant(labels, name="y")
        return input_features, input_labels

    train_hooks = [
        tf.estimator.ProfilerHook(save_steps=50, output_dir=FLAGS.model_dir)
    ]
    # Train for three iterations.
    train_spec = tf.estimator.TrainSpec(input_fn=input_fn,
                                        max_steps=300,
                                        hooks=train_hooks)
    eval_spec = tf.estimator.EvalSpec(input_fn=input_fn,
                                      steps=1,
                                      start_delay_secs=.5,
                                      throttle_secs=.5)

    # Calling train_and_evaluate is the official way to perform distributed
    # training with an Estimator. Calling Estimator#train directly results
    # in an error when the TF_CONFIG is setup for a cluster.
    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)

示例#9

0

显示文件

文件： model.py 项目： jeffltc/adanet

def __init__(self,
subnetwork_generator,
max_iteration_steps,
logits_dimension=1,
ensemblers=None,
ensemble_strategies=None,
evaluator=None,
adanet_loss_decay=.9,
filepath=None):
"""Initializes an `adanet.keras.Model`.

Args:
subnetwork_generator: The :class:`adanet.subnetwork.Generator` which
defines the candidate subnetworks to train and evaluate at every AdaNet
iteration.
max_iteration_steps: Total number of steps for which to train candidates
per iteration. If :class:`OutOfRange` or :class:`StopIteration` occurs
in the middle, training stops before `max_iteration_steps` steps. When
:code:`None`, it will train the current iteration forever.
logits_dimension: The dimension of the final layer of any subnetworks.
ensemblers: An iterable of :class:`adanet.ensemble.Ensembler` objects that
define how to ensemble a group of subnetworks. If there are multiple,
each should have a different `name` property.
ensemble_strategies: An iterable of :class:`adanet.ensemble.Strategy`
objects that define the candidate ensembles of subnetworks to explore at
each iteration.
evaluator: An :class:`adanet.Evaluator` for candidate selection after all
subnetworks are done training. When :code:`None`, candidate selection
uses a moving average of their :class:`adanet.Ensemble` AdaNet loss
during training instead. In order to use the *AdaNet algorithm* as
described in [Cortes et al., '17], the given :class:`adanet.Evaluator`
must be created with the same dataset partition used during training.
Otherwise, this framework will perform *AdaNet.HoldOut* which uses a
holdout set for candidate selection, but does not benefit from learning
guarantees.
adanet_loss_decay: Float decay for the exponential-moving-average of the
AdaNet objective throughout training. This moving average is a data-
driven way tracking the best candidate with only the training set.
filepath: Directory to save model parameters, graph and etc. This can also
be used to load checkpoints from the directory into a estimator to
continue training a previously saved model.
"""

logging.warning("""The AdaNet Keras API is currently experimental.""")

self._subnetwork_generator = subnetwork_generator
self._max_iteration_steps = max_iteration_steps
self._logits_dimension = logits_dimension
self._ensemblers = ensemblers
self._ensemble_strategies = ensemble_strategies
self._evaluator = evaluator
self._adanet_loss_decay = adanet_loss_decay
self._filepath = filepath
self._model = None
# Use lambdas to defer initialization of Head.
self._loss_head_map = {
"binary_crossentropy":
lambda: binary_class_head.BinaryClassHead(), # pylint: disable=unnecessary-lambda
"mse":
lambda: regression_head.RegressionHead(self._logits_dimension),
"mean_squared_error":
lambda: regression_head.RegressionHead(self._logits_dimension),
"sparse_categorical_crossentropy":
lambda: multi_class_head.MultiClassHead(self._logits_dimension),
}