Exemplo n.º 1
0
  def test_loss_reduction(self):
    """Tests loss reduction.

    Use `loss` method in eager execution, else `create_estimator_spec` in TRAIN
    mode.

    logits = [[[2., 3., 4.], [5., -0.5, 0.]],
              [[-1.0, 2.0, 0.5], [_]]],
    labels = [[0, 1],
              [2, _]]
    weights = [[0.5, 0.2],
               [0.3, _]]
    loss = [0.5*2.40 + 0.2*5.41 + 0.3*1.74] / 3 = 0.94
    """
    static_head = multi_head_lib.MultiClassHead(
        n_classes=3, weight_column='weights')
    head = seq_head_lib.SequentialHeadWrapper(static_head, 'sequence_mask',
                                              'weights')
    expected_loss = 0.942783
    features = {
        'weights':
            tf.sparse.SparseTensor(
                indices=((0, 0), (0, 1), (1, 0)),
                values=(0.5, 0.2, 0.3),
                dense_shape=(2, 2)),
        'sequence_mask':
            ops.convert_to_tensor([[1, 1], [1, 0]])
    }
    logits = ops.convert_to_tensor([[[2., 3., 4.], [5., -0.5, 0.]],
                                    [[-1.0, 2.0, 0.5], [1.0, 0.5, 2.0]]])
    labels = tf.sparse.SparseTensor(
        indices=((0, 0), (0, 1), (1, 0)), values=(0, 1, 2), dense_shape=(2, 2))

    class _Optimizer(tf.keras.optimizers.Optimizer):

      def get_updates(self, loss, params):
        del params, loss
        return [tf.constant('op')]

      def get_config(self):
        config = super(_Optimizer, self).get_config()
        return config

    if tf.executing_eagerly():
      loss = head.loss(logits=logits, labels=labels, features=features)
    else:
      spec = head.create_estimator_spec(
          features,
          ModeKeys.TRAIN,
          logits,
          labels=labels,
          optimizer=_Optimizer('my_optimizer'),
          trainable_variables=[
              tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)
          ])
      with self.cached_session() as sess:
        loss = sess.run(spec.loss)
    self.assertAllClose(loss, expected_loss, atol=1e-4)
    def test_loss_reduction(self):
        """Tests loss reduction.

    Use `loss` method in eager execution, else `create_estimator_spec` in TRAIN
    mode.

    logits = [[[2., 3., 4.], [5., -0.5, 0.]],
              [[-1.0, 2.0, 0.5], [_]]],
    labels = [[0, 1],
              [2, _]]
    weights = [[0.5, 0.2],
               [0.3, _]]
    loss = [0.5*2.40 + 0.2*5.41 + 0.3*1.74] / 3 = 0.94
    """
        static_head = multi_head_lib.MultiClassHead(n_classes=3,
                                                    weight_column='weights')
        head = seq_head_lib.SequentialHeadWrapper(static_head, 'sequence_mask',
                                                  'weights')
        expected_loss = 0.942783
        features = {
            'weights':
            sparse_tensor.SparseTensor(indices=((0, 0), (0, 1), (1, 0)),
                                       values=(0.5, 0.2, 0.3),
                                       dense_shape=(2, 2)),
            'sequence_mask':
            ops.convert_to_tensor([[1, 1], [1, 0]])
        }
        logits = ops.convert_to_tensor([[[2., 3., 4.], [5., -0.5, 0.]],
                                        [[-1.0, 2.0, 0.5], [1.0, 0.5, 2.0]]])
        labels = sparse_tensor.SparseTensor(indices=((0, 0), (0, 1), (1, 0)),
                                            values=(0, 1, 2),
                                            dense_shape=(2, 2))

        class _Optimizer(object):
            def minimize(self, loss, global_step):
                del global_step, loss
                return constant_op.constant('op')

        if context.executing_eagerly():
            loss = head.loss(logits=logits, labels=labels, features=features)
        else:
            spec = head.create_estimator_spec(features,
                                              ModeKeys.TRAIN,
                                              logits,
                                              labels,
                                              optimizer=_Optimizer())
            with self.cached_session() as sess:
                loss = sess.run(spec.loss)
        self.assertAllClose(loss, expected_loss, atol=1e-4)
Exemplo n.º 3
0
def binary_or_multi_class_head(n_classes, weight_column, label_vocabulary,
                               loss_reduction):
    """Creates either binary or multi-class head.

  Args:
    n_classes: Number of label classes.
    weight_column: A string or a `NumericColumn` created by
      `tf.feature_column.numeric_column` defining feature column representing
      weights. It is used to down weight or boost examples during training. It
      will be multiplied by the loss of the example. If it is a string, it is
      used as a key to fetch weight tensor from the `features`. If it is a
      `NumericColumn`, raw tensor is fetched by key `weight_column.key`,
      then weight_column.normalizer_fn is applied on it to get weight tensor.
    label_vocabulary: A list of strings represents possible label values. If
      given, labels must be string type and have any value in
      `label_vocabulary`. If it is not given, that means labels are
      already encoded as integer or float within [0, 1] for `n_classes=2` and
      encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 .
      Also there will be errors if vocabulary is not provided and labels are
      string.
    loss_reduction: One of `tf.losses.Reduction` except `NONE`. Defines how
      to reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`.

  Returns:
    A `Head` instance.
  """
    if n_classes == 2:
        # TODO(b/117517419): Update binary_class_head when it's fully implemented.
        head = head_v1._binary_logistic_head_with_sigmoid_cross_entropy_loss(  # pylint: disable=protected-access
            weight_column=weight_column,
            label_vocabulary=label_vocabulary,
            loss_reduction=loss_reduction)
    else:
        head = multi_class_head.MultiClassHead(
            n_classes,
            weight_column=weight_column,
            label_vocabulary=label_vocabulary,
            loss_reduction=loss_reduction)
    return head
Exemplo n.º 4
0
    def __init__(self,
                 sequence_feature_columns,
                 context_feature_columns=None,
                 units=None,
                 cell_type=USE_DEFAULT,
                 rnn_cell_fn=None,
                 return_sequences=False,
                 model_dir=None,
                 n_classes=2,
                 weight_column=None,
                 label_vocabulary=None,
                 optimizer='Adagrad',
                 loss_reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
                 sequence_mask='sequence_mask',
                 config=None):
        """Initializes a `RNNClassifier` instance.

    Args:
      sequence_feature_columns: An iterable containing the `FeatureColumn`s
        that represent sequential input. All items in the set should either be
        sequence columns (e.g. `sequence_numeric_column`) or constructed from
        one (e.g. `embedding_column` with `sequence_categorical_column_*` as
        input).
      context_feature_columns: An iterable containing the `FeatureColumn`s
        for contextual input. The data represented by these columns will be
        replicated and given to the RNN at each timestep. These columns must be
        instances of classes derived from `DenseColumn` such as
        `numeric_column`, not the sequential variants.
      units: Iterable of integer number of hidden units per RNN layer. If
        set, `cell_type` must also be specified and `rnn_cell_fn` must be
        `None`.
      cell_type: A class producing a RNN cell or a string specifying the cell
        type. Supported strings are: `'simple_rnn'`, `'lstm'`, and `'gru'`. If
        set, `units` must also be specified and `rnn_cell_fn` must be `None`.
      rnn_cell_fn: A function that returns a RNN cell instance that will be used
        to construct the RNN. If set, `units` and `cell_type` cannot be set.
        This is for advanced users who need additional customization beyond
        `units` and `cell_type`. Note that `tf.keras.layers.StackedRNNCells` is
        needed for stacked RNNs.
      return_sequences: A boolean indicating whether to return the last output
        in the output sequence, or the full sequence. Note that if True,
        `weight_column` must be None or a string.
      model_dir: Directory to save model parameters, graph and etc. This can
        also be used to load checkpoints from the directory into a estimator to
        continue training a previously saved model.
      n_classes: Number of label classes. Defaults to 2, namely binary
        classification. Must be > 1.
      weight_column: A string or a `NumericColumn` created by
        `tf.feature_column.numeric_column` defining feature column representing
        weights. It is used to down weight or boost examples during training. It
        will be multiplied by the loss of the example. If it is a string, it is
        used as a key to fetch weight tensor from the `features`. If it is a
        `NumericColumn`, raw tensor is fetched by key `weight_column.key`, then
        weight_column.normalizer_fn is applied on it to get weight tensor.
      label_vocabulary: A list of strings represents possible label values. If
        given, labels must be string type and have any value in
        `label_vocabulary`. If it is not given, that means labels are
        already encoded as integer or float within [0, 1] for `n_classes=2` and
        encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 .
        Also there will be errors if vocabulary is not provided and labels are
        string.
      optimizer: An instance of `tf.Optimizer` or string specifying optimizer
        type. Defaults to Adagrad optimizer.
      loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how
        to reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`.
      sequence_mask: A string with the name of the sequence mask tensor. If
        `sequence_mask` is in the features dictionary, the provided tensor is
        used, otherwise the sequence mask is computed from the length of
        sequential features. The sequence mask is used in evaluation and
        training mode to aggregate loss and metrics computation while excluding
        padding steps. It is also added to the predictions dictionary in
        prediction mode to indicate which steps are padding.
      config: `RunConfig` object to configure the runtime settings.

    Note that a RNN cell has:
      - a `call` method.
      - a `state_size` attribute.
      - a `output_size` attribute.
      - a `get_initial_state` method.
    See the documentation on `tf.keras.layers.RNN` for more details.

    Raises:
      ValueError: If `units`, `cell_type`, and `rnn_cell_fn` are not
        compatible.
    """
        if n_classes == 2:
            head = binary_head_lib.BinaryClassHead(
                weight_column=weight_column,
                label_vocabulary=label_vocabulary,
                loss_reduction=loss_reduction)
        else:
            head = multi_head_lib.MultiClassHead(
                n_classes=n_classes,
                weight_column=weight_column,
                label_vocabulary=label_vocabulary,
                loss_reduction=loss_reduction)

        if return_sequences:
            logging.info(
                'Converting head to sequential head with '
                '`SequentialHeadWrapper` to allow sequential predictions.')
            head = seq_head_lib.SequentialHeadWrapper(
                head,
                sequence_length_mask=sequence_mask,
                feature_columns=weight_column)

        super(RNNClassifier,
              self).__init__(head=head,
                             sequence_feature_columns=sequence_feature_columns,
                             context_feature_columns=context_feature_columns,
                             units=units,
                             cell_type=cell_type,
                             rnn_cell_fn=rnn_cell_fn,
                             return_sequences=return_sequences,
                             model_dir=model_dir,
                             optimizer=optimizer,
                             config=config)
def _dnn_estimator_classifier_fn(n_classes=3, **kwargs):
  return dnn.DNNEstimatorV2(
      head=multi_class_head.MultiClassHead(
          n_classes=n_classes),
      **kwargs)
Exemplo n.º 6
0
    def __init__(self,
                 sequence_feature_columns,
                 context_feature_columns=None,
                 num_units=None,
                 cell_type=USE_DEFAULT,
                 rnn_cell_fn=None,
                 model_dir=None,
                 n_classes=2,
                 weight_column=None,
                 label_vocabulary=None,
                 optimizer='Adagrad',
                 loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE,
                 input_layer_partitioner=None,
                 config=None):
        """Initializes a `RNNClassifier` instance.

    Args:
      sequence_feature_columns: An iterable containing the `FeatureColumn`s
        that represent sequential input. All items in the set should either be
        sequence columns (e.g. `sequence_numeric_column`) or constructed from
        one (e.g. `embedding_column` with `sequence_categorical_column_*` as
        input).
      context_feature_columns: An iterable containing the `FeatureColumn`s
        for contextual input. The data represented by these columns will be
        replicated and given to the RNN at each timestep. These columns must be
        instances of classes derived from `_DenseColumn` such as
        `numeric_column`, not the sequential variants.
      num_units: Iterable of integer number of hidden units per RNN layer. If
        set, `cell_type` must also be specified and `rnn_cell_fn` must be
        `None`.
      cell_type: A subclass of `tf.nn.rnn_cell.RNNCell` or a string specifying
        the cell type. Supported strings are: `'basic_rnn'`, `'lstm'`, and
        `'gru'`. If set, `num_units` must also be specified and `rnn_cell_fn`
        must be `None`.
      rnn_cell_fn: A function with one argument, a `tf.estimator.ModeKeys`, and
        returns an object of type `tf.nn.rnn_cell.RNNCell` that will be used to
        construct the RNN. If set, `num_units` and `cell_type` cannot be set.
        This is for advanced users who need additional customization beyond
        `num_units` and `cell_type`. Note that `tf.nn.rnn_cell.MultiRNNCell` is
        needed for stacked RNNs.
      model_dir: Directory to save model parameters, graph and etc. This can
        also be used to load checkpoints from the directory into a estimator to
        continue training a previously saved model.
      n_classes: Number of label classes. Defaults to 2, namely binary
        classification. Must be > 1.
      weight_column: A string or a `_NumericColumn` created by
        `tf.feature_column.numeric_column` defining feature column representing
        weights. It is used to down weight or boost examples during training. It
        will be multiplied by the loss of the example. If it is a string, it is
        used as a key to fetch weight tensor from the `features`. If it is a
        `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
        then weight_column.normalizer_fn is applied on it to get weight tensor.
      label_vocabulary: A list of strings represents possible label values. If
        given, labels must be string type and have any value in
        `label_vocabulary`. If it is not given, that means labels are
        already encoded as integer or float within [0, 1] for `n_classes=2` and
        encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 .
        Also there will be errors if vocabulary is not provided and labels are
        string.
      optimizer: An instance of `tf.Optimizer` or string specifying optimizer
        type. Defaults to Adagrad optimizer.
      loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how
        to reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`.
      input_layer_partitioner: Optional. Partitioner for input layer. Defaults
        to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
      config: `RunConfig` object to configure the runtime settings.

    Raises:
      ValueError: If `num_units`, `cell_type`, and `rnn_cell_fn` are not
        compatible.
    """
        rnn_cell_fn = _assert_rnn_cell_fn(rnn_cell_fn, num_units, cell_type)

        if n_classes == 2:
            head = binary_head_lib.BinaryClassHead(
                weight_column=weight_column,
                label_vocabulary=label_vocabulary,
                loss_reduction=loss_reduction)
        else:
            head = multi_head_lib.MultiClassHead(
                n_classes=n_classes,
                weight_column=weight_column,
                label_vocabulary=label_vocabulary,
                loss_reduction=loss_reduction)

        def _model_fn(features, labels, mode, config):
            return _rnn_model_fn(
                features=features,
                labels=labels,
                mode=mode,
                head=head,
                rnn_cell_fn=rnn_cell_fn,
                sequence_feature_columns=tuple(sequence_feature_columns or []),
                context_feature_columns=tuple(context_feature_columns or []),
                return_sequences=False,
                optimizer=optimizer,
                input_layer_partitioner=input_layer_partitioner,
                config=config)

        super(RNNClassifier, self).__init__(model_fn=_model_fn,
                                            model_dir=model_dir,
                                            config=config)
Exemplo n.º 7
0
def _linear_estimator_classifier_fn(n_classes=3, **kwargs):
  return linear.LinearEstimatorV2(
      head=multi_class_head.MultiClassHead(n_classes=n_classes), **kwargs)
def train_and_evaluate_estimator():
    """Runs Estimator distributed training."""

    # The tf.estimator.RunConfig automatically parses the TF_CONFIG environment
    # variables during construction.
    # For more information on how tf.estimator.RunConfig uses TF_CONFIG, see
    # https://www.tensorflow.org/api_docs/python/tf/estimator/RunConfig.
    config = tf.estimator.RunConfig(
        tf_random_seed=42,
        model_dir=FLAGS.model_dir,
        session_config=tf_compat.v1.ConfigProto(
            log_device_placement=False,
            # Ignore other workers; only talk to parameter servers.
            # Otherwise, when a chief/worker terminates, the others will hang.
            device_filters=["/job:ps"]))

    kwargs = {
        "max_iteration_steps": 100,
        "force_grow": True,
        "delay_secs_per_worker": .2,
        "max_worker_delay_secs": 1,
        "worker_wait_secs": .5,
        # Set low timeout to reduce wait time for failures.
        "worker_wait_timeout_secs": 60,
        "config": config
    }
    head = regression_head.RegressionHead(
        loss_reduction=tf_compat.SUM_OVER_BATCH_SIZE)
    features = [[1., 0.], [0., 0], [0., 1.], [1., 1.]]
    labels = [[1.], [0.], [1.], [0.]]
    if FLAGS.placement_strategy == "round_robin":
        kwargs["experimental_placement_strategy"] = RoundRobinStrategy()
    if FLAGS.estimator_type == "autoensemble":
        feature_columns = [tf.feature_column.numeric_column("x", shape=[2])]
        candidate_pool = {
            "linear":
            tf.estimator.LinearEstimator(
                head=head,
                feature_columns=feature_columns,
                optimizer=lambda: tf.keras.optimizers.Adam(lr=.001)),
            "dnn":
            tf.estimator.DNNEstimator(
                head=head,
                feature_columns=feature_columns,
                optimizer=lambda: tf.keras.optimizers.Adam(lr=.001),
                hidden_units=[3]),
            "dnn2":
            tf.estimator.DNNEstimator(
                head=head,
                feature_columns=feature_columns,
                optimizer=lambda: tf.keras.optimizers.Adam(lr=.001),
                hidden_units=[5]),
        }

        estimator = AutoEnsembleEstimator(head=head,
                                          candidate_pool=candidate_pool,
                                          **kwargs)
    elif FLAGS.estimator_type == "estimator":
        subnetwork_generator = SimpleGenerator([
            _DNNBuilder("dnn1", config, layer_size=3),
            _DNNBuilder("dnn2", config, layer_size=4),
            _DNNBuilder("dnn3", config, layer_size=5),
        ])

        estimator = Estimator(head=head,
                              subnetwork_generator=subnetwork_generator,
                              **kwargs)
    elif FLAGS.estimator_type == "autoensemble_trees_multiclass":
        n_classes = 3
        head = multi_class_head.MultiClassHead(
            n_classes=n_classes, loss_reduction=tf_compat.SUM_OVER_BATCH_SIZE)

        def tree_loss_fn(labels, logits):
            result = bt_losses.per_example_maxent_loss(labels=labels,
                                                       logits=logits,
                                                       num_classes=n_classes,
                                                       weights=None)
            return result[0]

        tree_head = multi_class_head.MultiClassHead(
            loss_fn=tree_loss_fn,
            n_classes=n_classes,
            loss_reduction=tf_compat.SUM_OVER_BATCH_SIZE)
        labels = [[1], [0], [1], [2]]
        feature_columns = [tf.feature_column.numeric_column("x", shape=[2])]
        candidate_pool = lambda config: {  # pylint: disable=g-long-lambda
            "linear":
                tf.estimator.LinearEstimator(
                    head=head,
                    feature_columns=feature_columns,
                    optimizer=tf.keras.optimizers.Adam(lr=.001),
                    config=config),
            "gbdt":
                CoreGradientBoostedDecisionTreeEstimator(
                    head=tree_head,
                    learner_config=learner_pb2.LearnerConfig(num_classes=n_classes),
                    examples_per_layer=8,
                    num_trees=None,
                    center_bias=False,  # Required for multi-class.
                    feature_columns=feature_columns,
                    config=config),
        }

        estimator = AutoEnsembleEstimator(head=head,
                                          candidate_pool=candidate_pool,
                                          **kwargs)

    def input_fn():
        input_features = {"x": tf.constant(features, name="x")}
        input_labels = tf.constant(labels, name="y")
        return input_features, input_labels

    train_hooks = [
        tf.estimator.ProfilerHook(save_steps=50, output_dir=FLAGS.model_dir)
    ]
    # Train for three iterations.
    train_spec = tf.estimator.TrainSpec(input_fn=input_fn,
                                        max_steps=300,
                                        hooks=train_hooks)
    eval_spec = tf.estimator.EvalSpec(input_fn=input_fn,
                                      steps=1,
                                      start_delay_secs=.5,
                                      throttle_secs=.5)

    # Calling train_and_evaluate is the official way to perform distributed
    # training with an Estimator. Calling Estimator#train directly results
    # in an error when the TF_CONFIG is setup for a cluster.
    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
Exemplo n.º 9
0
  def __init__(self,
               subnetwork_generator,
               max_iteration_steps,
               logits_dimension=1,
               ensemblers=None,
               ensemble_strategies=None,
               evaluator=None,
               adanet_loss_decay=.9,
               filepath=None):
    """Initializes an `adanet.keras.Model`.

    Args:
      subnetwork_generator: The :class:`adanet.subnetwork.Generator` which
        defines the candidate subnetworks to train and evaluate at every AdaNet
        iteration.
      max_iteration_steps: Total number of steps for which to train candidates
        per iteration. If :class:`OutOfRange` or :class:`StopIteration` occurs
        in the middle, training stops before `max_iteration_steps` steps. When
        :code:`None`, it will train the current iteration forever.
      logits_dimension: The dimension of the final layer of any subnetworks.
      ensemblers: An iterable of :class:`adanet.ensemble.Ensembler` objects that
        define how to ensemble a group of subnetworks. If there are multiple,
        each should have a different `name` property.
      ensemble_strategies: An iterable of :class:`adanet.ensemble.Strategy`
        objects that define the candidate ensembles of subnetworks to explore at
        each iteration.
      evaluator: An :class:`adanet.Evaluator` for candidate selection after all
        subnetworks are done training. When :code:`None`, candidate selection
        uses a moving average of their :class:`adanet.Ensemble` AdaNet loss
        during training instead. In order to use the *AdaNet algorithm* as
        described in [Cortes et al., '17], the given :class:`adanet.Evaluator`
        must be created with the same dataset partition used during training.
        Otherwise, this framework will perform *AdaNet.HoldOut* which uses a
        holdout set for candidate selection, but does not benefit from learning
        guarantees.
      adanet_loss_decay: Float decay for the exponential-moving-average of the
        AdaNet objective throughout training. This moving average is a data-
        driven way tracking the best candidate with only the training set.
      filepath: Directory to save model parameters, graph and etc. This can also
        be used to load checkpoints from the directory into a estimator to
        continue training a previously saved model.
    """

    logging.warning("""The AdaNet Keras API is currently experimental.""")

    self._subnetwork_generator = subnetwork_generator
    self._max_iteration_steps = max_iteration_steps
    self._logits_dimension = logits_dimension
    self._ensemblers = ensemblers
    self._ensemble_strategies = ensemble_strategies
    self._evaluator = evaluator
    self._adanet_loss_decay = adanet_loss_decay
    self._filepath = filepath
    self._model = None
    # Use lambdas to defer initialization of Head.
    self._loss_head_map = {
        "binary_crossentropy":
            lambda: binary_class_head.BinaryClassHead(),  # pylint: disable=unnecessary-lambda
        "mse":
            lambda: regression_head.RegressionHead(self._logits_dimension),
        "mean_squared_error":
            lambda: regression_head.RegressionHead(self._logits_dimension),
        "sparse_categorical_crossentropy":
            lambda: multi_class_head.MultiClassHead(self._logits_dimension),
    }