def test_loss_reduction(self): """Tests loss reduction. Use `loss` method in eager execution, else `create_estimator_spec` in TRAIN mode. logits = [[[2., 3., 4.], [5., -0.5, 0.]], [[-1.0, 2.0, 0.5], [_]]], labels = [[0, 1], [2, _]] weights = [[0.5, 0.2], [0.3, _]] loss = [0.5*2.40 + 0.2*5.41 + 0.3*1.74] / 3 = 0.94 """ static_head = multi_head_lib.MultiClassHead( n_classes=3, weight_column='weights') head = seq_head_lib.SequentialHeadWrapper(static_head, 'sequence_mask', 'weights') expected_loss = 0.942783 features = { 'weights': tf.sparse.SparseTensor( indices=((0, 0), (0, 1), (1, 0)), values=(0.5, 0.2, 0.3), dense_shape=(2, 2)), 'sequence_mask': ops.convert_to_tensor([[1, 1], [1, 0]]) } logits = ops.convert_to_tensor([[[2., 3., 4.], [5., -0.5, 0.]], [[-1.0, 2.0, 0.5], [1.0, 0.5, 2.0]]]) labels = tf.sparse.SparseTensor( indices=((0, 0), (0, 1), (1, 0)), values=(0, 1, 2), dense_shape=(2, 2)) class _Optimizer(tf.keras.optimizers.Optimizer): def get_updates(self, loss, params): del params, loss return [tf.constant('op')] def get_config(self): config = super(_Optimizer, self).get_config() return config if tf.executing_eagerly(): loss = head.loss(logits=logits, labels=labels, features=features) else: spec = head.create_estimator_spec( features, ModeKeys.TRAIN, logits, labels=labels, optimizer=_Optimizer('my_optimizer'), trainable_variables=[ tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32) ]) with self.cached_session() as sess: loss = sess.run(spec.loss) self.assertAllClose(loss, expected_loss, atol=1e-4)
def test_loss_reduction(self): """Tests loss reduction. Use `loss` method in eager execution, else `create_estimator_spec` in TRAIN mode. logits = [[[2., 3., 4.], [5., -0.5, 0.]], [[-1.0, 2.0, 0.5], [_]]], labels = [[0, 1], [2, _]] weights = [[0.5, 0.2], [0.3, _]] loss = [0.5*2.40 + 0.2*5.41 + 0.3*1.74] / 3 = 0.94 """ static_head = multi_head_lib.MultiClassHead(n_classes=3, weight_column='weights') head = seq_head_lib.SequentialHeadWrapper(static_head, 'sequence_mask', 'weights') expected_loss = 0.942783 features = { 'weights': sparse_tensor.SparseTensor(indices=((0, 0), (0, 1), (1, 0)), values=(0.5, 0.2, 0.3), dense_shape=(2, 2)), 'sequence_mask': ops.convert_to_tensor([[1, 1], [1, 0]]) } logits = ops.convert_to_tensor([[[2., 3., 4.], [5., -0.5, 0.]], [[-1.0, 2.0, 0.5], [1.0, 0.5, 2.0]]]) labels = sparse_tensor.SparseTensor(indices=((0, 0), (0, 1), (1, 0)), values=(0, 1, 2), dense_shape=(2, 2)) class _Optimizer(object): def minimize(self, loss, global_step): del global_step, loss return constant_op.constant('op') if context.executing_eagerly(): loss = head.loss(logits=logits, labels=labels, features=features) else: spec = head.create_estimator_spec(features, ModeKeys.TRAIN, logits, labels, optimizer=_Optimizer()) with self.cached_session() as sess: loss = sess.run(spec.loss) self.assertAllClose(loss, expected_loss, atol=1e-4)
def binary_or_multi_class_head(n_classes, weight_column, label_vocabulary, loss_reduction): """Creates either binary or multi-class head. Args: n_classes: Number of label classes. weight_column: A string or a `NumericColumn` created by `tf.feature_column.numeric_column` defining feature column representing weights. It is used to down weight or boost examples during training. It will be multiplied by the loss of the example. If it is a string, it is used as a key to fetch weight tensor from the `features`. If it is a `NumericColumn`, raw tensor is fetched by key `weight_column.key`, then weight_column.normalizer_fn is applied on it to get weight tensor. label_vocabulary: A list of strings represents possible label values. If given, labels must be string type and have any value in `label_vocabulary`. If it is not given, that means labels are already encoded as integer or float within [0, 1] for `n_classes=2` and encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 . Also there will be errors if vocabulary is not provided and labels are string. loss_reduction: One of `tf.losses.Reduction` except `NONE`. Defines how to reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`. Returns: A `Head` instance. """ if n_classes == 2: # TODO(b/117517419): Update binary_class_head when it's fully implemented. head = head_v1._binary_logistic_head_with_sigmoid_cross_entropy_loss( # pylint: disable=protected-access weight_column=weight_column, label_vocabulary=label_vocabulary, loss_reduction=loss_reduction) else: head = multi_class_head.MultiClassHead( n_classes, weight_column=weight_column, label_vocabulary=label_vocabulary, loss_reduction=loss_reduction) return head
def __init__(self, sequence_feature_columns, context_feature_columns=None, units=None, cell_type=USE_DEFAULT, rnn_cell_fn=None, return_sequences=False, model_dir=None, n_classes=2, weight_column=None, label_vocabulary=None, optimizer='Adagrad', loss_reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE, sequence_mask='sequence_mask', config=None): """Initializes a `RNNClassifier` instance. Args: sequence_feature_columns: An iterable containing the `FeatureColumn`s that represent sequential input. All items in the set should either be sequence columns (e.g. `sequence_numeric_column`) or constructed from one (e.g. `embedding_column` with `sequence_categorical_column_*` as input). context_feature_columns: An iterable containing the `FeatureColumn`s for contextual input. The data represented by these columns will be replicated and given to the RNN at each timestep. These columns must be instances of classes derived from `DenseColumn` such as `numeric_column`, not the sequential variants. units: Iterable of integer number of hidden units per RNN layer. If set, `cell_type` must also be specified and `rnn_cell_fn` must be `None`. cell_type: A class producing a RNN cell or a string specifying the cell type. Supported strings are: `'simple_rnn'`, `'lstm'`, and `'gru'`. If set, `units` must also be specified and `rnn_cell_fn` must be `None`. rnn_cell_fn: A function that returns a RNN cell instance that will be used to construct the RNN. If set, `units` and `cell_type` cannot be set. This is for advanced users who need additional customization beyond `units` and `cell_type`. Note that `tf.keras.layers.StackedRNNCells` is needed for stacked RNNs. return_sequences: A boolean indicating whether to return the last output in the output sequence, or the full sequence. Note that if True, `weight_column` must be None or a string. model_dir: Directory to save model parameters, graph and etc. This can also be used to load checkpoints from the directory into a estimator to continue training a previously saved model. n_classes: Number of label classes. Defaults to 2, namely binary classification. Must be > 1. weight_column: A string or a `NumericColumn` created by `tf.feature_column.numeric_column` defining feature column representing weights. It is used to down weight or boost examples during training. It will be multiplied by the loss of the example. If it is a string, it is used as a key to fetch weight tensor from the `features`. If it is a `NumericColumn`, raw tensor is fetched by key `weight_column.key`, then weight_column.normalizer_fn is applied on it to get weight tensor. label_vocabulary: A list of strings represents possible label values. If given, labels must be string type and have any value in `label_vocabulary`. If it is not given, that means labels are already encoded as integer or float within [0, 1] for `n_classes=2` and encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 . Also there will be errors if vocabulary is not provided and labels are string. optimizer: An instance of `tf.Optimizer` or string specifying optimizer type. Defaults to Adagrad optimizer. loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`. sequence_mask: A string with the name of the sequence mask tensor. If `sequence_mask` is in the features dictionary, the provided tensor is used, otherwise the sequence mask is computed from the length of sequential features. The sequence mask is used in evaluation and training mode to aggregate loss and metrics computation while excluding padding steps. It is also added to the predictions dictionary in prediction mode to indicate which steps are padding. config: `RunConfig` object to configure the runtime settings. Note that a RNN cell has: - a `call` method. - a `state_size` attribute. - a `output_size` attribute. - a `get_initial_state` method. See the documentation on `tf.keras.layers.RNN` for more details. Raises: ValueError: If `units`, `cell_type`, and `rnn_cell_fn` are not compatible. """ if n_classes == 2: head = binary_head_lib.BinaryClassHead( weight_column=weight_column, label_vocabulary=label_vocabulary, loss_reduction=loss_reduction) else: head = multi_head_lib.MultiClassHead( n_classes=n_classes, weight_column=weight_column, label_vocabulary=label_vocabulary, loss_reduction=loss_reduction) if return_sequences: logging.info( 'Converting head to sequential head with ' '`SequentialHeadWrapper` to allow sequential predictions.') head = seq_head_lib.SequentialHeadWrapper( head, sequence_length_mask=sequence_mask, feature_columns=weight_column) super(RNNClassifier, self).__init__(head=head, sequence_feature_columns=sequence_feature_columns, context_feature_columns=context_feature_columns, units=units, cell_type=cell_type, rnn_cell_fn=rnn_cell_fn, return_sequences=return_sequences, model_dir=model_dir, optimizer=optimizer, config=config)
def _dnn_estimator_classifier_fn(n_classes=3, **kwargs): return dnn.DNNEstimatorV2( head=multi_class_head.MultiClassHead( n_classes=n_classes), **kwargs)
def __init__(self, sequence_feature_columns, context_feature_columns=None, num_units=None, cell_type=USE_DEFAULT, rnn_cell_fn=None, model_dir=None, n_classes=2, weight_column=None, label_vocabulary=None, optimizer='Adagrad', loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE, input_layer_partitioner=None, config=None): """Initializes a `RNNClassifier` instance. Args: sequence_feature_columns: An iterable containing the `FeatureColumn`s that represent sequential input. All items in the set should either be sequence columns (e.g. `sequence_numeric_column`) or constructed from one (e.g. `embedding_column` with `sequence_categorical_column_*` as input). context_feature_columns: An iterable containing the `FeatureColumn`s for contextual input. The data represented by these columns will be replicated and given to the RNN at each timestep. These columns must be instances of classes derived from `_DenseColumn` such as `numeric_column`, not the sequential variants. num_units: Iterable of integer number of hidden units per RNN layer. If set, `cell_type` must also be specified and `rnn_cell_fn` must be `None`. cell_type: A subclass of `tf.nn.rnn_cell.RNNCell` or a string specifying the cell type. Supported strings are: `'basic_rnn'`, `'lstm'`, and `'gru'`. If set, `num_units` must also be specified and `rnn_cell_fn` must be `None`. rnn_cell_fn: A function with one argument, a `tf.estimator.ModeKeys`, and returns an object of type `tf.nn.rnn_cell.RNNCell` that will be used to construct the RNN. If set, `num_units` and `cell_type` cannot be set. This is for advanced users who need additional customization beyond `num_units` and `cell_type`. Note that `tf.nn.rnn_cell.MultiRNNCell` is needed for stacked RNNs. model_dir: Directory to save model parameters, graph and etc. This can also be used to load checkpoints from the directory into a estimator to continue training a previously saved model. n_classes: Number of label classes. Defaults to 2, namely binary classification. Must be > 1. weight_column: A string or a `_NumericColumn` created by `tf.feature_column.numeric_column` defining feature column representing weights. It is used to down weight or boost examples during training. It will be multiplied by the loss of the example. If it is a string, it is used as a key to fetch weight tensor from the `features`. If it is a `_NumericColumn`, raw tensor is fetched by key `weight_column.key`, then weight_column.normalizer_fn is applied on it to get weight tensor. label_vocabulary: A list of strings represents possible label values. If given, labels must be string type and have any value in `label_vocabulary`. If it is not given, that means labels are already encoded as integer or float within [0, 1] for `n_classes=2` and encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 . Also there will be errors if vocabulary is not provided and labels are string. optimizer: An instance of `tf.Optimizer` or string specifying optimizer type. Defaults to Adagrad optimizer. loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`. input_layer_partitioner: Optional. Partitioner for input layer. Defaults to `min_max_variable_partitioner` with `min_slice_size` 64 << 20. config: `RunConfig` object to configure the runtime settings. Raises: ValueError: If `num_units`, `cell_type`, and `rnn_cell_fn` are not compatible. """ rnn_cell_fn = _assert_rnn_cell_fn(rnn_cell_fn, num_units, cell_type) if n_classes == 2: head = binary_head_lib.BinaryClassHead( weight_column=weight_column, label_vocabulary=label_vocabulary, loss_reduction=loss_reduction) else: head = multi_head_lib.MultiClassHead( n_classes=n_classes, weight_column=weight_column, label_vocabulary=label_vocabulary, loss_reduction=loss_reduction) def _model_fn(features, labels, mode, config): return _rnn_model_fn( features=features, labels=labels, mode=mode, head=head, rnn_cell_fn=rnn_cell_fn, sequence_feature_columns=tuple(sequence_feature_columns or []), context_feature_columns=tuple(context_feature_columns or []), return_sequences=False, optimizer=optimizer, input_layer_partitioner=input_layer_partitioner, config=config) super(RNNClassifier, self).__init__(model_fn=_model_fn, model_dir=model_dir, config=config)
def _linear_estimator_classifier_fn(n_classes=3, **kwargs): return linear.LinearEstimatorV2( head=multi_class_head.MultiClassHead(n_classes=n_classes), **kwargs)
def train_and_evaluate_estimator(): """Runs Estimator distributed training.""" # The tf.estimator.RunConfig automatically parses the TF_CONFIG environment # variables during construction. # For more information on how tf.estimator.RunConfig uses TF_CONFIG, see # https://www.tensorflow.org/api_docs/python/tf/estimator/RunConfig. config = tf.estimator.RunConfig( tf_random_seed=42, model_dir=FLAGS.model_dir, session_config=tf_compat.v1.ConfigProto( log_device_placement=False, # Ignore other workers; only talk to parameter servers. # Otherwise, when a chief/worker terminates, the others will hang. device_filters=["/job:ps"])) kwargs = { "max_iteration_steps": 100, "force_grow": True, "delay_secs_per_worker": .2, "max_worker_delay_secs": 1, "worker_wait_secs": .5, # Set low timeout to reduce wait time for failures. "worker_wait_timeout_secs": 60, "config": config } head = regression_head.RegressionHead( loss_reduction=tf_compat.SUM_OVER_BATCH_SIZE) features = [[1., 0.], [0., 0], [0., 1.], [1., 1.]] labels = [[1.], [0.], [1.], [0.]] if FLAGS.placement_strategy == "round_robin": kwargs["experimental_placement_strategy"] = RoundRobinStrategy() if FLAGS.estimator_type == "autoensemble": feature_columns = [tf.feature_column.numeric_column("x", shape=[2])] candidate_pool = { "linear": tf.estimator.LinearEstimator( head=head, feature_columns=feature_columns, optimizer=lambda: tf.keras.optimizers.Adam(lr=.001)), "dnn": tf.estimator.DNNEstimator( head=head, feature_columns=feature_columns, optimizer=lambda: tf.keras.optimizers.Adam(lr=.001), hidden_units=[3]), "dnn2": tf.estimator.DNNEstimator( head=head, feature_columns=feature_columns, optimizer=lambda: tf.keras.optimizers.Adam(lr=.001), hidden_units=[5]), } estimator = AutoEnsembleEstimator(head=head, candidate_pool=candidate_pool, **kwargs) elif FLAGS.estimator_type == "estimator": subnetwork_generator = SimpleGenerator([ _DNNBuilder("dnn1", config, layer_size=3), _DNNBuilder("dnn2", config, layer_size=4), _DNNBuilder("dnn3", config, layer_size=5), ]) estimator = Estimator(head=head, subnetwork_generator=subnetwork_generator, **kwargs) elif FLAGS.estimator_type == "autoensemble_trees_multiclass": n_classes = 3 head = multi_class_head.MultiClassHead( n_classes=n_classes, loss_reduction=tf_compat.SUM_OVER_BATCH_SIZE) def tree_loss_fn(labels, logits): result = bt_losses.per_example_maxent_loss(labels=labels, logits=logits, num_classes=n_classes, weights=None) return result[0] tree_head = multi_class_head.MultiClassHead( loss_fn=tree_loss_fn, n_classes=n_classes, loss_reduction=tf_compat.SUM_OVER_BATCH_SIZE) labels = [[1], [0], [1], [2]] feature_columns = [tf.feature_column.numeric_column("x", shape=[2])] candidate_pool = lambda config: { # pylint: disable=g-long-lambda "linear": tf.estimator.LinearEstimator( head=head, feature_columns=feature_columns, optimizer=tf.keras.optimizers.Adam(lr=.001), config=config), "gbdt": CoreGradientBoostedDecisionTreeEstimator( head=tree_head, learner_config=learner_pb2.LearnerConfig(num_classes=n_classes), examples_per_layer=8, num_trees=None, center_bias=False, # Required for multi-class. feature_columns=feature_columns, config=config), } estimator = AutoEnsembleEstimator(head=head, candidate_pool=candidate_pool, **kwargs) def input_fn(): input_features = {"x": tf.constant(features, name="x")} input_labels = tf.constant(labels, name="y") return input_features, input_labels train_hooks = [ tf.estimator.ProfilerHook(save_steps=50, output_dir=FLAGS.model_dir) ] # Train for three iterations. train_spec = tf.estimator.TrainSpec(input_fn=input_fn, max_steps=300, hooks=train_hooks) eval_spec = tf.estimator.EvalSpec(input_fn=input_fn, steps=1, start_delay_secs=.5, throttle_secs=.5) # Calling train_and_evaluate is the official way to perform distributed # training with an Estimator. Calling Estimator#train directly results # in an error when the TF_CONFIG is setup for a cluster. tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
def __init__(self, subnetwork_generator, max_iteration_steps, logits_dimension=1, ensemblers=None, ensemble_strategies=None, evaluator=None, adanet_loss_decay=.9, filepath=None): """Initializes an `adanet.keras.Model`. Args: subnetwork_generator: The :class:`adanet.subnetwork.Generator` which defines the candidate subnetworks to train and evaluate at every AdaNet iteration. max_iteration_steps: Total number of steps for which to train candidates per iteration. If :class:`OutOfRange` or :class:`StopIteration` occurs in the middle, training stops before `max_iteration_steps` steps. When :code:`None`, it will train the current iteration forever. logits_dimension: The dimension of the final layer of any subnetworks. ensemblers: An iterable of :class:`adanet.ensemble.Ensembler` objects that define how to ensemble a group of subnetworks. If there are multiple, each should have a different `name` property. ensemble_strategies: An iterable of :class:`adanet.ensemble.Strategy` objects that define the candidate ensembles of subnetworks to explore at each iteration. evaluator: An :class:`adanet.Evaluator` for candidate selection after all subnetworks are done training. When :code:`None`, candidate selection uses a moving average of their :class:`adanet.Ensemble` AdaNet loss during training instead. In order to use the *AdaNet algorithm* as described in [Cortes et al., '17], the given :class:`adanet.Evaluator` must be created with the same dataset partition used during training. Otherwise, this framework will perform *AdaNet.HoldOut* which uses a holdout set for candidate selection, but does not benefit from learning guarantees. adanet_loss_decay: Float decay for the exponential-moving-average of the AdaNet objective throughout training. This moving average is a data- driven way tracking the best candidate with only the training set. filepath: Directory to save model parameters, graph and etc. This can also be used to load checkpoints from the directory into a estimator to continue training a previously saved model. """ logging.warning("""The AdaNet Keras API is currently experimental.""") self._subnetwork_generator = subnetwork_generator self._max_iteration_steps = max_iteration_steps self._logits_dimension = logits_dimension self._ensemblers = ensemblers self._ensemble_strategies = ensemble_strategies self._evaluator = evaluator self._adanet_loss_decay = adanet_loss_decay self._filepath = filepath self._model = None # Use lambdas to defer initialization of Head. self._loss_head_map = { "binary_crossentropy": lambda: binary_class_head.BinaryClassHead(), # pylint: disable=unnecessary-lambda "mse": lambda: regression_head.RegressionHead(self._logits_dimension), "mean_squared_error": lambda: regression_head.RegressionHead(self._logits_dimension), "sparse_categorical_crossentropy": lambda: multi_class_head.MultiClassHead(self._logits_dimension), }