def testWarmStartInputLayerEmbeddingColumn(self): # Create old and new vocabs for embedding column "sc_vocab". prev_vocab_path = self._write_vocab(["apple", "banana", "guava", "orange"], "old_vocab") new_vocab_path = self._write_vocab( ["orange", "guava", "banana", "apple", "raspberry", "blueberry"], "new_vocab") # Save checkpoint from which to warm-start. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: _ = variable_scope.get_variable( "input_layer/sc_vocab_embedding/embedding_weights", initializer=[[0.5, 0.4], [1., 1.1], [2., 2.2], [3., 3.3]]) self._write_checkpoint(sess) def _partitioner(shape, dtype): # pylint:disable=unused-argument # Partition each var into 2 equal slices. partitions = [1] * len(shape) partitions[0] = min(2, shape[0].value) return partitions # Create feature columns. sc_vocab = fc.categorical_column_with_vocabulary_file( "sc_vocab", vocabulary_file=new_vocab_path, vocabulary_size=6) emb_vocab = fc.embedding_column( categorical_column=sc_vocab, dimension=2, # Can't use constant_initializer with load_and_remap. In practice, # use a truncated normal initializer. initializer=init_ops.random_uniform_initializer( minval=0.42, maxval=0.42)) all_deep_cols = [emb_vocab] # New graph, new session with warmstarting. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: cols_to_vars = {} with variable_scope.variable_scope("", partitioner=_partitioner): # Create the variables. fc.input_layer( features=self._create_dummy_inputs(), feature_columns=all_deep_cols, cols_to_vars=cols_to_vars) ws_settings = ws_util._WarmStartSettings( self.get_temp_dir(), col_to_prev_vocab={ emb_vocab: prev_vocab_path }) ws_util._warmstart_input_layer(cols_to_vars, ws_settings) sess.run(variables.global_variables_initializer()) # Verify weights were correctly warmstarted. Var corresponding to # emb_vocab should be correctly warmstarted after vocab remapping. # Missing values are filled in with the EmbeddingColumn's initializer. self._assert_cols_to_vars( cols_to_vars, { emb_vocab: [ np.array([[3., 3.3], [2., 2.2], [1., 1.1]]), np.array([[0.5, 0.4], [0.42, 0.42], [0.42, 0.42]]) ] }, sess)
def testWarmStartInputLayerEmbeddingColumn(self): # Create old and new vocabs for embedding column "sc_vocab". prev_vocab_path = self._write_vocab( ["apple", "banana", "guava", "orange"], "old_vocab") new_vocab_path = self._write_vocab( ["orange", "guava", "banana", "apple", "raspberry", "blueberry"], "new_vocab") # Save checkpoint from which to warm-start. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: _ = variable_scope.get_variable( "input_layer/sc_vocab_embedding/embedding_weights", initializer=[[0.5, 0.4], [1., 1.1], [2., 2.2], [3., 3.3]]) self._write_checkpoint(sess) def _partitioner(shape, dtype): # pylint:disable=unused-argument # Partition each var into 2 equal slices. partitions = [1] * len(shape) partitions[0] = min(2, shape[0].value) return partitions # Create feature columns. sc_vocab = fc.categorical_column_with_vocabulary_file( "sc_vocab", vocabulary_file=new_vocab_path, vocabulary_size=6) emb_vocab = fc.embedding_column( categorical_column=sc_vocab, dimension=2, # Can't use constant_initializer with load_and_remap. In practice, # use a truncated normal initializer. initializer=init_ops.random_uniform_initializer(minval=0.42, maxval=0.42)) all_deep_cols = [emb_vocab] # New graph, new session with warmstarting. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: cols_to_vars = {} with variable_scope.variable_scope("", partitioner=_partitioner): # Create the variables. fc.input_layer(features=self._create_dummy_inputs(), feature_columns=all_deep_cols, cols_to_vars=cols_to_vars) ws_settings = ws_util._WarmStartSettings( self.get_temp_dir(), col_to_prev_vocab={emb_vocab: prev_vocab_path}) ws_util._warmstart_input_layer(cols_to_vars, ws_settings) sess.run(variables.global_variables_initializer()) # Verify weights were correctly warmstarted. Var corresponding to # emb_vocab should be correctly warmstarted after vocab remapping. # Missing values are filled in with the EmbeddingColumn's initializer. self._assert_cols_to_vars( cols_to_vars, { emb_vocab: [ np.array([[3., 3.3], [2., 2.2], [1., 1.1]]), np.array([[0.5, 0.4], [0.42, 0.42], [0.42, 0.42]]) ] }, sess)
def add_feature_columns_to_dataset( self, dataset: tf.data.Dataset, input_columns, output_columns): dataset = dataset.map( lambda x: ( tuple(input_layer(x, column) for column in input_columns), tuple(input_layer(x, column) for column in output_columns) ), num_parallel_calls=4 ).apply(tf.data.experimental.unbatch()) return dataset
def interaction_logit_fn(features): """Interaction model logit_fn. Args: features: This is the first item returned from the `input_fn` passed to `train`, `evaluate`, and `predict`. This should be a single `Tensor` or `dict` of same. Returns: A `Tensor` representing the logits. """ flat_val = feature_column.input_layer( features, feature_columns) # shape(batch_size, column_num * embedding_size) vals = gen_array_ops.reshape(flat_val, (-1, column_num, dimension), "interaction_embeddings") # sum-square-part summed_val = math_ops.reduce_sum(vals, 1) summed_square_val = math_ops.square(summed_val) # squre-sum-part...2 squared_val = math_ops.square(vals) squared_sum_val = math_ops.reduce_sum(squared_val, 1) # second order...3 logits = math_ops.reduce_sum( 0.5 * math_ops.subtract(summed_square_val, squared_sum_val), -1) return logits
def dnn_logit_fn(features, mode): with variable_scope.variable_scope( 'input_from_feature_columns', values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner): net = feature_column_lib.input_layer( features=features, feature_columns=feature_columns) for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( 'hiddenlayer_%d' % layer_id, values=(net, )) as hidden_layer_scope: net = core_layers.dense( net, units=num_hidden_units, activation=activation_fn, kernel_initializer=init_ops.glorot_uniform_initializer(), name=hidden_layer_scope) if dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = core_layers.dropout(net, rate=dropout, training=True) _add_hidden_layer_summary(net, hidden_layer_scope.name) with variable_scope.variable_scope('logits', values=(net, )) as logits_scope: logits = core_layers.dense( net, units=units, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=logits_scope) _add_hidden_layer_summary(logits, logits_scope.name) return logits
def _parse_features_if_necessary(features, feature_columns): """Helper function to convert the input points into a usable format. Args: features: The input features. feature_columns: An optionable iterable containing all the feature columns used by the model. All items in the set should be feature column instances that can be passed to `tf.feature_column.input_layer`. If this is None, all features will be used. Returns: If `features` is a dict of `k` features (optionally filtered by `feature_columns`), each of which is a vector of `n` scalars, the return value is a Tensor of shape `(n, k)` representing `n` input points, where the items in the `k` dimension are sorted lexicographically by `features` key. If `features` is not a dict, it is returned unmodified. """ if not isinstance(features, dict): return features if feature_columns: return fc.input_layer(features, feature_columns) keys = sorted(features.keys()) with ops.colocate_with(features[keys[0]]): return array_ops.concat([features[k] for k in keys], axis=1)
def _parse_features_if_necessary(features, feature_columns): """Helper function to convert the input points into a usable format. Args: features: The input features. feature_columns: An optionable iterable containing all the feature columns used by the model. All items in the set should be feature column instances that can be passed to `tf.feature_column.input_layer`. If this is None, all features will be used. Returns: If `features` is a dict of `k` features (optionally filtered by `feature_columns`), each of which is a vector of `n` scalars, the return value is a Tensor of shape `(n, k)` representing `n` input points, where the items in the `k` dimension are sorted lexicographically by `features` key. If `features` is not a dict, it is returned unmodified. """ if not isinstance(features, dict): return features if feature_columns: return fc.input_layer(features, feature_columns) keys = sorted(features.keys()) with ops.colocate_with(features[keys[0]]): return array_ops.concat([features[k] for k in keys], axis=1)
def test_sequence_example_into_input_layer(self): examples = [_make_sequence_example().SerializeToString()] * 100 ctx_cols, seq_cols = self._build_feature_columns() def _parse_example(example): ctx, seq = parsing_ops.parse_single_sequence_example( example, context_features=fc.make_parse_example_spec(ctx_cols), sequence_features=fc.make_parse_example_spec(seq_cols)) ctx.update(seq) return ctx ds = dataset_ops.Dataset.from_tensor_slices(examples) ds = ds.map(_parse_example) ds = ds.batch(20) # Test on a single batch features = ds.make_one_shot_iterator().get_next() # Tile the context features across the sequence features seq_layer, _ = sfc.sequence_input_layer(features, seq_cols) ctx_layer = fc.input_layer(features, ctx_cols) input_layer = sfc.concatenate_context_input(ctx_layer, seq_layer) rnn_layer = recurrent.RNN(recurrent.SimpleRNNCell(10)) output = rnn_layer(input_layer) with self.cached_session() as sess: sess.run(variables.global_variables_initializer()) features_r = sess.run(features) self.assertAllEqual(features_r['int_list'].dense_shape, [20, 3, 6]) output_r = sess.run(output) self.assertAllEqual(output_r.shape, [20, 10])
def dnn_logit_fn(features, mode): """Deep Neural Network logit_fn. Args: features: This is the first item returned from the `input_fn` passed to `train`, `evaluate`, and `predict`. This should be a single `Tensor` or `dict` of same. mode: Optional. Specifies if this training, evaluation or prediction. See `ModeKeys`. Returns: A `Tensor` representing the logits, or a list of `Tensor`'s representing multiple logits in the MultiHead case. """ with variable_scope.variable_scope( 'input_from_feature_columns', values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner): net = feature_column_lib.input_layer( features=features, feature_columns=feature_columns) for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( 'hiddenlayer_%d' % layer_id, values=(net,)) as hidden_layer_scope: net = core_layers.dense( net, units=num_hidden_units, activation=activation_fn, kernel_initializer=init_ops.glorot_uniform_initializer(), name=hidden_layer_scope) if dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = core_layers.dropout(net, rate=dropout, training=True) _add_hidden_layer_summary(net, hidden_layer_scope.name) if isinstance(units, int): with variable_scope.variable_scope( 'logits', values=(net,)) as logits_scope: logits = core_layers.dense( net, units=units, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=logits_scope) _add_hidden_layer_summary(logits, logits_scope.name) else: logits = [] for head_index, logits_dimension in enumerate(units): with variable_scope.variable_scope( 'logits_head_{}'.format(head_index), values=(net,)) as logits_scope: these_logits = core_layers.dense( net, units=logits_dimension, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=logits_scope) _add_hidden_layer_summary(these_logits, logits_scope.name) logits.append(these_logits) return logits
def dnn_logit_fn(features, mode): """Deep Neural Network logit_fn. Args: features: This is the first item returned from the `input_fn` passed to `train`, `evaluate`, and `predict`. This should be a single `Tensor` or `dict` of same. mode: Optional. Specifies if this training, evaluation or prediction. See `ModeKeys`. Returns: A `Tensor` representing the logits, or a list of `Tensor`'s representing multiple logits in the MultiHead case. """ with variable_scope.variable_scope( 'input_from_feature_columns', values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner): net = feature_column_lib.input_layer( features=features, feature_columns=feature_columns) for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( 'hiddenlayer_%d' % layer_id, values=(net,)) as hidden_layer_scope: net = core_layers.dense( net, units=num_hidden_units, activation=activation_fn, kernel_initializer=init_ops.glorot_uniform_initializer(), name=hidden_layer_scope) if dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = core_layers.dropout(net, rate=dropout, training=True) _add_hidden_layer_summary(net, hidden_layer_scope.name) if isinstance(units, int): with variable_scope.variable_scope( 'logits', values=(net,)) as logits_scope: logits = core_layers.dense( net, units=units, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=logits_scope) _add_hidden_layer_summary(logits, logits_scope.name) else: logits = [] for head_index, logits_dimension in enumerate(units): with variable_scope.variable_scope( 'logits_head_{}'.format(head_index), values=(net,)) as logits_scope: these_logits = core_layers.dense( net, units=logits_dimension, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=logits_scope) _add_hidden_layer_summary(these_logits, logits_scope.name) logits.append(these_logits) return logits
def dnn_logit_fn(features, mode): """Deep Neural Network logit_fn. Args: features: This is the first item returned from the `input_fn` passed to `train`, `evaluate`, and `predict`. This should be a single `Tensor` or `dict` of same. mode: Optional. Specifies if this training, evaluation or prediction. See `ModeKeys`. Returns: A `Tensor` representing the logits, or a list of `Tensor`'s representing multiple logits in the MultiHead case. """ is_training = mode == model_fn.ModeKeys.TRAIN with variable_scope.variable_scope( 'input_from_feature_columns', values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner): net = feature_column_lib.input_layer( features=features, feature_columns=feature_columns) for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( 'hiddenlayer_%d' % layer_id, values=(net, )) as hidden_layer_scope: net = core_layers.dense( net, units=num_hidden_units, activation=activation_fn, kernel_initializer=init_ops.glorot_uniform_initializer(), name=hidden_layer_scope) if dropout is not None and is_training: net = core_layers.dropout(net, rate=dropout, training=True) if batch_norm: # TODO(hjm): In future, if this becomes popular, we can enable # customization of the batch normalization params by accepting a # list of `BatchNormalization` instances as `batch_norm`. net = normalization.batch_normalization( net, # The default momentum 0.99 actually crashes on certain # problem, so here we use 0.999, which is the default of # tf.contrib.layers.batch_norm. momentum=0.999, training=is_training, name='batchnorm_%d' % layer_id) _add_hidden_layer_summary(net, hidden_layer_scope.name) with variable_scope.variable_scope('logits', values=(net, )) as logits_scope: logits = core_layers.dense( net, units=units, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=logits_scope) _add_hidden_layer_summary(logits, logits_scope.name) return logits
def dnn_logit_fn(features, mode): """Deep Neural Network logit_fn. Args: features: This is the first item returned from the `input_fn` passed to `train`, `evaluate`, and `predict`. This should be a single `Tensor` or `dict` of same. mode: Optional. Specifies if this training, evaluation or prediction. See `ModeKeys`. Returns: A `Tensor` representing the logits, or a list of `Tensor`'s representing multiple logits in the MultiHead case. """ is_training = mode == model_fn.ModeKeys.TRAIN with variable_scope.variable_scope( 'input_from_feature_columns', values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner): net = feature_column_lib.input_layer( features=features, feature_columns=feature_columns) for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( 'hiddenlayer_%d' % layer_id, values=(net,)) as hidden_layer_scope: net = core_layers.dense( net, units=num_hidden_units, activation=activation_fn, kernel_initializer=init_ops.glorot_uniform_initializer(), name=hidden_layer_scope) if dropout is not None and is_training: net = core_layers.dropout(net, rate=dropout, training=True) if batch_norm: # TODO(hjm): In future, if this becomes popular, we can enable # customization of the batch normalization params by accepting a # list of `BatchNormalization` instances as `batch_norm`. net = normalization.batch_normalization( net, # The default momentum 0.99 actually crashes on certain # problem, so here we use 0.999, which is the default of # tf.contrib.layers.batch_norm. momentum=0.999, training=is_training, name='batchnorm_%d' % layer_id) _add_hidden_layer_summary(net, hidden_layer_scope.name) with variable_scope.variable_scope('logits', values=(net,)) as logits_scope: logits = core_layers.dense( net, units=units, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=logits_scope) _add_hidden_layer_summary(logits, logits_scope.name) return logits
def dnn_logit_fn(features, mode): with variable_scope.variable_scope( 'input_from_feature_columns', values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner): inputs = feature_column_lib.input_layer( features=features, feature_columns=feature_columns) dense = inputs for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( 'dense_layer_%d' % layer_id, values=(dense, )) as hidden_layer_scope: dense = core_layers.dense( dense, units=num_hidden_units, activation=activation_fn, kernel_initializer=init_ops.glorot_uniform_initializer(), name=hidden_layer_scope) if dropout is not None and mode == model_fn.ModeKeys.TRAIN: dense = core_layers.dropout(dense, rate=dropout, training=True) _add_hidden_layer_summary(dense, hidden_layer_scope.name) with variable_scope.variable_scope( 'fm_layer', values=(inputs, )) as cross_layer_scope: builder = feature_column_lib._LazyBuilder(features) fm_outputs = [] for col_pair in fm_feature_columns: column1, column2 = col_pair tensor1 = column1._get_dense_tensor(builder, trainable=True) num_elements = column1._variable_shape.num_elements() batch_size = array_ops.shape(tensor1)[0] tensor2 = column2._get_dense_tensor(builder, trainable=True) tensor1 = array_ops.reshape(tensor1, shape=(batch_size, num_elements)) tensor2 = array_ops.reshape(tensor2, shape=(batch_size, num_elements)) fm_outputs.append(matmul(tensor1, tensor2)) fm_outputs = tf.convert_to_tensor(fm_outputs) _add_hidden_layer_summary(fm_outputs, cross_layer_scope.name) with variable_scope.variable_scope( 'logits', values=(dense, fm_outputs)) as logits_scope: dense_cross = concat([dense, fm_outputs], axis=1) logits = core_layers.dense( dense_cross, units=1, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=logits_scope) _add_hidden_layer_summary(logits, logits_scope.name) return logits
def encode_features(features, feature_columns, mode=model_fn.ModeKeys.TRAIN, scope=None): """Returns dense tensors from features using feature columns. This function encodes the feature column transformation on the 'raw' `features`. Args: features: (dict) mapping feature names to feature values, possibly obtained from input_fn. feature_columns: (list) list of feature columns. mode: (`estimator.ModeKeys`) Specifies if this is training, evaluation or inference. See `ModeKeys`. scope: (str) variable scope for the per column input layers. Returns: (dict) A mapping from columns to dense tensors. """ # Having scope here for backward compatibility. del scope trainable = (mode == model_fn.ModeKeys.TRAIN) cols_to_tensors = {} if hasattr(feature_column_lib, "is_feature_column_v2" ) and feature_column_lib.is_feature_column_v2(feature_columns): dense_layer = feature_column_lib.DenseFeatures( feature_columns=feature_columns, name="encoding_layer", trainable=trainable) dense_layer(features, cols_to_output_tensors=cols_to_tensors) else: feature_column.input_layer( features=features, feature_columns=feature_columns, trainable=trainable, cols_to_output_tensors=cols_to_tensors) return cols_to_tensors
def rnn_logit_fn(features, mode): """Recurrent Neural Network logit_fn. Args: features: This is the first item returned from the `input_fn` passed to `train`, `evaluate`, and `predict`. This should be a single `Tensor` or `dict` of same. mode: Optional. Specifies if this training, evaluation or prediction. See `ModeKeys`. Returns: A tuple of `Tensor` objects representing the logits and the sequence length mask. """ # Can't import from tf.contrib at the module level, otherwise you # can hit a circular import issue if tf_estimator.contrib is # imported before tf.contrib. from tensorflow.contrib.feature_column.python.feature_column import sequence_feature_column as seq_fc # pylint: disable=g-import-not-at-top with variable_scope.variable_scope( 'sequence_input_layer', values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner): sequence_input, sequence_length = seq_fc.sequence_input_layer( features=features, feature_columns=sequence_feature_columns) summary.histogram('sequence_length', sequence_length) if context_feature_columns: context_input = feature_column_lib.input_layer( features=features, feature_columns=context_feature_columns) sequence_input = _concatenate_context_input( sequence_input, context_input) cell = rnn_cell_fn(mode) # Ignore output state. rnn_outputs, _ = rnn.dynamic_rnn(cell=cell, inputs=sequence_input, sequence_length=sequence_length, dtype=dtypes.float32, time_major=False) if not return_sequences: rnn_outputs = _select_last_activations(rnn_outputs, sequence_length) with variable_scope.variable_scope('logits', values=(rnn_outputs, )): logits = core_layers.dense( rnn_outputs, units=output_units, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer()) sequence_length_mask = array_ops.sequence_mask(sequence_length) return logits, sequence_length_mask
def call(self, inputs, **kwargs): # item_input = {} for feature_name in self.params["item_feature_list"]: feature = inputs.get(feature_name) if isinstance(feature, tf.SparseTensor): feature = tf.sparse_tensor_to_dense(feature, default_value='pad') shape = tf.shape(feature) feature = tf.reshape(feature, [-1, 1]) feature = input_layer({feature_name: feature}, [self.columns[feature_name]]) feature = tf.reshape(feature, [shape[0], shape[1], 10]) inputs[feature_name] = feature #[batch_size,history_len,seq_len,embedding_size] key = self.params["user_history_item_title"] em, len, _, _ = self.common_embedding_layer(inputs[key]) inputs[key] = em inputs["history_item_title_len"] = len #[batch_size,recommand_len,seq_len,embedding_size] key = self.params["item_title"] em, len, _, _ = self.common_embedding_layer(inputs[key]) inputs[key] = em inputs["recommand_title_len"] = len # history = inputs[self.params["user_history_item_title"]] # recomm = inputs[self.params["item_title"]] # # history_len = history.dense_shape[1] if isinstance(history,SparseTensor) else tf.shape(history)[1] # recomm_len = recomm.dense_shape[1] if isinstance(recomm,SparseTensor) else tf.shape(recomm)[1] # # history_recommand = tf.sparse_concat(sp_inputs=[history,recomm] ,axis=1,expand_nonconcat_dim = True) \ # if isinstance(history,SparseTensor) else tf.concat([history,recomm] ,axis=1) # # em,len ,_,_ = self.common_embedding_layer(history_recommand) # inputs["history_recommand"] = em # inputs["titles_len"] = len # inputs["history_len"] = history_len # inputs["recomm_len"] = recomm_len # inputs[self.params.item_relevance] = input_layer([inputs[self.params.item_relevance]],[self.item_relevance_col]) # key =self.params["item_hot_score"] # value = tf.sparse_tensor_to_dense(inputs[key]) # inputs[key] = value#input_layer({key:value},[self.item_hot_score_col]) # # key = self.params["user_car_serial"] # value = inputs[key] # inputs[self.params["user_car_serial"]] = input_layer({key:value},[self.car_serial_col]) return inputs
def dnn_logit_fn(features, mode): with variable_scope.variable_scope( 'input_from_feature_columns', values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner): inputs = feature_column_lib.input_layer( features=features, feature_columns=feature_columns) dense = inputs cross = inputs for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( 'dense_layer_%d' % layer_id, values=(dense, )) as hidden_layer_scope: dense = core_layers.dense( dense, units=num_hidden_units, activation=activation_fn, kernel_initializer=init_ops.glorot_uniform_initializer(), name=hidden_layer_scope) if dropout is not None and mode == model_fn.ModeKeys.TRAIN: dense = core_layers.dropout(dense, rate=dropout, training=True) _add_hidden_layer_summary(dense, hidden_layer_scope.name) for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( 'cross_layer_%d' % layer_id, values=(cross, )) as cross_layer_scope: cross = cross_layer(cross, layer_id, inputs, name=cross_layer_scope) _add_hidden_layer_summary(cross, cross_layer_scope.name) with variable_scope.variable_scope('logits', values=(dense, cross)) as logits_scope: dense_cross = concat([dense, cross], axis=1) logits = core_layers.dense( cross, units=1, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=logits_scope) _add_hidden_layer_summary(logits, logits_scope.name) return logits
def rnn_logit_fn(features, mode): """Recurrent Neural Network logit_fn. Args: features: This is the first item returned from the `input_fn` passed to `train`, `evaluate`, and `predict`. This should be a single `Tensor` or `dict` of same. mode: Optional. Specifies if this training, evaluation or prediction. See `ModeKeys`. Returns: A `Tensor` representing the logits. """ with variable_scope.variable_scope( 'sequence_input_layer', values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner): sequence_input, sequence_length = seq_fc.sequence_input_layer( features=features, feature_columns=sequence_feature_columns) summary.histogram('sequence_length', sequence_length) if context_feature_columns: context_input = feature_column_lib.input_layer( features=features, feature_columns=context_feature_columns) sequence_input = _concatenate_context_input( sequence_input, context_input) cell = rnn_cell_fn(mode) # Ignore output state. rnn_outputs, _ = rnn.dynamic_rnn(cell=cell, inputs=sequence_input, sequence_length=sequence_length, dtype=dtypes.float32, time_major=False) if not return_sequences: rnn_outputs = _select_last_activations(rnn_outputs, sequence_length) with variable_scope.variable_scope('logits', values=(rnn_outputs, )): logits = core_layers.dense( rnn_outputs, units=output_units, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer()) return logits
def rnn_logit_fn(features, mode): """Recurrent Neural Network logit_fn. Args: features: This is the first item returned from the `input_fn` passed to `train`, `evaluate`, and `predict`. This should be a single `Tensor` or `dict` of same. mode: Optional. Specifies if this training, evaluation or prediction. See `ModeKeys`. Returns: A `Tensor` representing the logits. """ with variable_scope.variable_scope( 'sequence_input_layer', values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner): sequence_input, sequence_length = seq_fc.sequence_input_layer( features=features, feature_columns=sequence_feature_columns) summary.histogram('sequence_length', sequence_length) if context_feature_columns: context_input = feature_column_lib.input_layer( features=features, feature_columns=context_feature_columns) sequence_input = seq_fc.concatenate_context_input( context_input, sequence_input) cell = rnn_cell_fn(mode) # Ignore output state. rnn_outputs, _ = rnn.dynamic_rnn( cell=cell, inputs=sequence_input, sequence_length=sequence_length, dtype=dtypes.float32, time_major=False) last_activations = _select_last_activations(rnn_outputs, sequence_length) with variable_scope.variable_scope('logits', values=(rnn_outputs,)): logits = core_layers.dense( last_activations, units=output_units, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer()) return logits
def _get_exogenous_embedding_shape(self): """Computes the shape of the vector returned by _process_exogenous_features. Returns: The shape as a list. Does not include a batch dimension. """ if not self._exogenous_feature_columns: return (0,) with ops.Graph().as_default(): parsed_features = ( feature_column.make_parse_example_spec( self._exogenous_feature_columns)) placeholder_features = parsing_ops.parse_example( serialized=array_ops.placeholder(shape=[None], dtype=dtypes.string), features=parsed_features) embedded = feature_column.input_layer( features=placeholder_features, feature_columns=self._exogenous_feature_columns) return embedded.get_shape().as_list()[1:]
def test_indicator_column(self): """Tests that error is raised for sequence indicator column.""" vocabulary_size = 3 sparse_input = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] indices=((0, 0), (1, 0), (1, 1)), values=(2, 0, 1), dense_shape=(2, 2)) categorical_column_a = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) indicator_column_a = fc.indicator_column(categorical_column_a) with self.assertRaisesRegexp( ValueError, r'In indicator_column: aaa_indicator\. categorical_column must not be ' r'of type _SequenceCategoricalColumn\.'): _ = fc.input_layer(features={'aaa': sparse_input}, feature_columns=[indicator_column_a])
def test_indicator_column(self): """Tests that error is raised for sequence indicator column.""" vocabulary_size = 3 sparse_input = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] indices=((0, 0), (1, 0), (1, 1)), values=(2, 0, 1), dense_shape=(2, 2)) categorical_column_a = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) indicator_column_a = fc.indicator_column(categorical_column_a) with self.assertRaisesRegexp( ValueError, r'In indicator_column: aaa_indicator\. categorical_column must not be ' r'of type _SequenceCategoricalColumn\.'): _ = fc.input_layer( features={'aaa': sparse_input}, feature_columns=[indicator_column_a])
def input_from_feature_column(columns_to_tensors, feature_column, dtype=dtypes.float32): """Convert one feature_column to `Tensor`, making necessary transformations. DenseColumns are taken as is, see `tf.feature_column.input_layer`. CategoricalColumns are assumed to be exclusive and it takes only the value of the category. Args: columns_to_tensors: Returned by input_fn. Consider processing first by `layers.transform_features(columns_to_tensors, feature_columns))`, since it may share tf ops for different FeatureColumns. This function transforms one at a time. feature_column: feature_column to transform to `Tensor`. dtype: `_CategoricalColumn`s are converted to this type. Returns: Tensor with transformed feature column for calibration consumption. Raises: ValueError: if type of FeatureColumn is unknown, and this function doesn't know how to handle it. """ # pylint: disable=protected-access if isinstance(feature_column, feature_column_lib._DenseColumn): return feature_column_lib.input_layer(features=columns_to_tensors, feature_columns=set( [feature_column])) elif isinstance(feature_column, feature_column_lib._CategoricalColumn): categorical_ids = math_ops.cast( feature_column._transform_feature(columns_to_tensors).values, dtype) return array_ops.stack([categorical_ids], axis=1) # pylint: enable=protected-access raise ValueError('Cannot handle FeatureColumn {}: only _DenseColumn and ' '_CategoricalColumn are implemented, consider converting ' 'your column to float32 until this FeatureColumn is ' 'supported'.format(feature_column))
def extract_features(features, feature_columns): """Extracts columns from a dictionary of features. Args: features: `dict` of `Tensor` objects. feature_columns: A list of feature_columns. Returns: Seven values: - A list of all feature column names. - A list of dense floats. - A list of sparse float feature indices. - A list of sparse float feature values. - A list of sparse float feature shapes. - A list of sparse int feature indices. - A list of sparse int feature values. - A list of sparse int feature shapes. Raises: ValueError: if features is not valid. """ if not features: raise ValueError("Features dictionary must be specified.") # Make a shallow copy of features to ensure downstream usage # is unaffected by modifications in the model function. features = copy.copy(features) if feature_columns: scope = "gbdt" with variable_scope.variable_scope(scope): feature_columns = list(feature_columns) transformed_features = {} for fc in feature_columns: # pylint: disable=protected-access if isinstance(fc, feature_column_lib._EmbeddingColumn): # pylint: enable=protected-access transformed_features[fc.name] = fc_core.input_layer( features, [fc], weight_collections=[scope]) else: result = feature_column_ops.transform_features(features, [fc]) if len(result) > 1: raise ValueError("Unexpected number of output features") transformed_features[fc.name] = result[list(result.keys())[0]] features = transformed_features dense_float_names = [] dense_floats = [] sparse_float_names = [] sparse_float_indices = [] sparse_float_values = [] sparse_float_shapes = [] sparse_int_names = [] sparse_int_indices = [] sparse_int_values = [] sparse_int_shapes = [] for key in sorted(features.keys()): tensor = features[key] if isinstance(tensor, sparse_tensor.SparseTensor): if tensor.values.dtype == dtypes.float32: sparse_float_names.append(key) sparse_float_indices.append(tensor.indices) sparse_float_values.append(tensor.values) sparse_float_shapes.append(tensor.dense_shape) elif tensor.values.dtype == dtypes.int64: sparse_int_names.append(key) sparse_int_indices.append(tensor.indices) sparse_int_values.append(tensor.values) sparse_int_shapes.append(tensor.dense_shape) else: raise ValueError("Unsupported sparse feature %s with dtype %s." % (tensor.indices.name, tensor.dtype)) else: if tensor.dtype == dtypes.float32: if len(tensor.shape) > 1 and tensor.shape[1] > 1: unstacked = array_ops.unstack(tensor, axis=1) for i in xrange(len(unstacked)): dense_float_names.append(_FEATURE_NAME_TEMPLATE % (key, i)) dense_floats.append(array_ops.reshape(unstacked[i], [-1, 1])) else: dense_float_names.append(key) dense_floats.append(tensor) else: raise ValueError("Unsupported dense feature %s with dtype %s." % (tensor.name, tensor.dtype)) # Feature columns are logically organized into incrementing slots starting # from dense floats, then sparse floats then sparse ints. fc_names = (dense_float_names + sparse_float_names + sparse_int_names) return (fc_names, dense_floats, sparse_float_indices, sparse_float_values, sparse_float_shapes, sparse_int_indices, sparse_int_values, sparse_int_shapes)
def _dnn_linear_combined_model_fn( features, labels, mode, head, linear_feature_columns=None, linear_optimizer='Ftrl', dnn_feature_columns=None, dnn_optimizer='Adagrad', dnn_hidden_units=None, dnn_activation_fn=nn.relu, dnn_dropout=None, input_layer_partitioner=None, config=None): """Deep Neural Net and Linear combined model_fn. Args: features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`). labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. head: A `Head` instance. linear_feature_columns: An iterable containing all the feature columns used by the Linear model. linear_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the Linear model. Defaults to the Ftrl optimizer. dnn_feature_columns: An iterable containing all the feature columns used by the DNN model. dnn_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the DNN model. Defaults to the Adagrad optimizer. dnn_hidden_units: List of hidden units per DNN layer. dnn_activation_fn: Activation function applied to each DNN layer. If `None`, will use `tf.nn.relu`. dnn_dropout: When not `None`, the probability we will drop out a given DNN coordinate. input_layer_partitioner: Partitioner for input layer. config: `RunConfig` object to configure the runtime settings. Returns: `ModelFnOps` Raises: ValueError: If both `linear_feature_columns` and `dnn_features_columns` are empty at the same time, or `input_layer_partitioner` is missing. """ if not linear_feature_columns and not dnn_feature_columns: raise ValueError( 'Either linear_feature_columns or dnn_feature_columns must be defined.') num_ps_replicas = config.num_ps_replicas if config else 0 input_layer_partitioner = input_layer_partitioner or ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) # Build DNN Logits. dnn_parent_scope = 'dnn' if not dnn_feature_columns: dnn_logits = None else: dnn_optimizer = optimizers.get_optimizer_instance( dnn_optimizer, learning_rate=_DNN_LEARNING_RATE) _check_no_sync_replicas_optimizer(dnn_optimizer) if not dnn_hidden_units: raise ValueError( 'dnn_hidden_units must be defined when dnn_feature_columns is ' 'specified.') dnn_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas)) with variable_scope.variable_scope( dnn_parent_scope, values=tuple(six.itervalues(features)), partitioner=dnn_partitioner): with variable_scope.variable_scope('input', partitioner=input_layer_partitioner): net = feature_column_lib.input_layer( features=features, feature_columns=dnn_feature_columns) for layer_id, num_hidden_units in enumerate(dnn_hidden_units): with variable_scope.variable_scope( 'hiddenlayer_%d' % layer_id, values=(net,)) as dnn_hidden_layer_scope: net = core_layers.dense( net, units=num_hidden_units, activation=dnn_activation_fn, kernel_initializer=init_ops.glorot_uniform_initializer(), name=dnn_hidden_layer_scope) if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = core_layers.dropout(net, rate=dnn_dropout, training=True) _add_layer_summary(net, dnn_hidden_layer_scope.name) with variable_scope.variable_scope( 'logits', values=(net,)) as dnn_logits_scope: dnn_logits = core_layers.dense( net, units=head.logits_dimension, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=dnn_logits_scope) _add_layer_summary(dnn_logits, dnn_logits_scope.name) linear_parent_scope = 'linear' if not linear_feature_columns: linear_logits = None else: linear_optimizer = optimizers.get_optimizer_instance( linear_optimizer, learning_rate=_linear_learning_rate(len(linear_feature_columns))) _check_no_sync_replicas_optimizer(linear_optimizer) with variable_scope.variable_scope( linear_parent_scope, values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner) as scope: linear_logits = feature_column_lib.linear_model( features=features, feature_columns=linear_feature_columns, units=head.logits_dimension) _add_layer_summary(linear_logits, scope.name) # Combine logits and build full model. if dnn_logits is not None and linear_logits is not None: logits = dnn_logits + linear_logits elif dnn_logits is not None: logits = dnn_logits else: logits = linear_logits def _train_op_fn(loss): """Returns the op to optimize the loss.""" train_ops = [] global_step = training_util.get_global_step() if dnn_logits is not None: train_ops.append( dnn_optimizer.minimize( loss, var_list=ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES, scope=dnn_parent_scope))) if linear_logits is not None: train_ops.append( linear_optimizer.minimize( loss, var_list=ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES, scope=linear_parent_scope))) train_op = control_flow_ops.group(*train_ops) with ops.control_dependencies([train_op]): with ops.colocate_with(global_step): return state_ops.assign_add(global_step, 1) return head.create_estimator_spec( features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits)
def dnn_logit_fn(features, mode): """Deep Neural Network logit_fn. Args: features: This is the first item returned from the `input_fn` passed to `train`, `evaluate`, and `predict`. This should be a single `Tensor` or `dict` of same. mode: Optional. Specifies if this training, evaluation or prediction. See `ModeKeys`. Returns: A `Tensor` representing the logits, or a list of `Tensor`'s representing multiple logits in the MultiHead case. """ with variable_scope.variable_scope( 'input_from_feature_columns', values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner): net = feature_column_lib.input_layer( features=features, feature_columns=feature_columns) if rnn_feature_columns != None: rnn_features_embedding = feature_column_lib.input_layer( features=features, feature_columns=rnn_feature_columns) rnn_features_embedding = tf.reshape( rnn_features_embedding, [-1, FLAGS.rnn_length, FLAGS.rnn_input_size]) cell = tf.nn.rnn_cell.BasicLSTMCell(FLAGS.rnn_hidden_size) att_wrapper = tf.contrib.rnn.AttentionCellWrapper( cell=cell, attn_length=10) outputs, _ = tf.nn.dynamic_rnn(att_wrapper, rnn_features_embedding, dtype=tf.float32) outputs = tf.reshape( outputs, [-1, FLAGS.rnn_length * FLAGS.rnn_hidden_size]) net = array_ops.concat([net, outputs], 1) for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( 'hiddenlayer_%d' % layer_id, values=(net, )) as hidden_layer_scope: net = core_layers.dense( net, units=num_hidden_units, activation=activation_fn, kernel_initializer=init_ops.glorot_uniform_initializer(), name=hidden_layer_scope) if dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = core_layers.dropout(net, rate=dropout, training=True) _add_hidden_layer_summary(net, hidden_layer_scope.name) with variable_scope.variable_scope('logits', values=(net, )) as logits_scope: logits = core_layers.dense( net, units=units, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=logits_scope) _add_hidden_layer_summary(logits, logits_scope.name) return logits
def _dnn_tree_combined_model_fn( features, labels, mode, head, dnn_hidden_units, dnn_feature_columns, tree_learner_config, num_trees, tree_examples_per_layer, config=None, dnn_optimizer="Adagrad", dnn_activation_fn=nn.relu, dnn_dropout=None, dnn_input_layer_partitioner=None, dnn_input_layer_to_tree=True, dnn_steps_to_train=10000, predict_with_tree_only=False, tree_feature_columns=None, tree_center_bias=False, dnn_to_tree_distillation_param=None, use_core_versions=False, output_type=model.ModelBuilderOutputType.MODEL_FN_OPS): """DNN and GBDT combined model_fn. Args: features: `dict` of `Tensor` objects. labels: Labels used to train on. mode: Mode we are in. (TRAIN/EVAL/INFER) head: A `Head` instance. dnn_hidden_units: List of hidden units per layer. dnn_feature_columns: An iterable containing all the feature columns used by the model's DNN. tree_learner_config: A config for the tree learner. num_trees: Number of trees to grow model to after training DNN. tree_examples_per_layer: Number of examples to accumulate before growing the tree a layer. This value has a big impact on model quality and should be set equal to the number of examples in training dataset if possible. It can also be a function that computes the number of examples based on the depth of the layer that's being built. config: `RunConfig` of the estimator. dnn_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the DNN. If `None`, will use the Adagrad optimizer with default learning rate of 0.001. dnn_activation_fn: Activation function applied to each layer of the DNN. If `None`, will use `tf.nn.relu`. dnn_dropout: When not `None`, the probability to drop out a given unit in the DNN. dnn_input_layer_partitioner: Partitioner for input layer of the DNN. Defaults to `min_max_variable_partitioner` with `min_slice_size` 64 << 20. dnn_input_layer_to_tree: Whether to provide the DNN's input layer as a feature to the tree. dnn_steps_to_train: Number of steps to train dnn for before switching to gbdt. predict_with_tree_only: Whether to use only the tree model output as the final prediction. tree_feature_columns: An iterable containing all the feature columns used by the model's boosted trees. If dnn_input_layer_to_tree is set to True, these features are in addition to dnn_feature_columns. tree_center_bias: Whether a separate tree should be created for first fitting the bias. dnn_to_tree_distillation_param: A Tuple of (float, loss_fn), where the float defines the weight of the distillation loss, and the loss_fn, for computing distillation loss, takes dnn_logits, tree_logits and weight tensor. If the entire tuple is None, no distillation will be applied. If only the loss_fn is None, we will take the sigmoid/softmax cross entropy loss be default. When distillation is applied, `predict_with_tree_only` will be set to True. use_core_versions: Whether feature columns and loss are from the core (as opposed to contrib) version of tensorflow. Returns: A `ModelFnOps` object. Raises: ValueError: if inputs are not valid. """ if not isinstance(features, dict): raise ValueError("features should be a dictionary of `Tensor`s. " "Given type: {}".format(type(features))) if not dnn_feature_columns: raise ValueError("dnn_feature_columns must be specified") if dnn_to_tree_distillation_param: if not predict_with_tree_only: logging.warning("update predict_with_tree_only to True since distillation" "is specified.") predict_with_tree_only = True # Build DNN Logits. dnn_parent_scope = "dnn" dnn_partitioner = dnn_input_layer_partitioner or ( partitioned_variables.min_max_variable_partitioner( max_partitions=config.num_ps_replicas, min_slice_size=64 << 20)) if (output_type == model.ModelBuilderOutputType.ESTIMATOR_SPEC and not use_core_versions): raise ValueError("You must use core versions with Estimator Spec") with variable_scope.variable_scope( dnn_parent_scope, values=tuple(six.itervalues(features)), partitioner=dnn_partitioner): with variable_scope.variable_scope( "input_from_feature_columns", values=tuple(six.itervalues(features)), partitioner=dnn_partitioner) as input_layer_scope: if use_core_versions: input_layer = feature_column_lib.input_layer( features=features, feature_columns=dnn_feature_columns, weight_collections=[dnn_parent_scope]) else: input_layer = layers.input_from_feature_columns( columns_to_tensors=features, feature_columns=dnn_feature_columns, weight_collections=[dnn_parent_scope], scope=input_layer_scope) previous_layer = input_layer for layer_id, num_hidden_units in enumerate(dnn_hidden_units): with variable_scope.variable_scope( "hiddenlayer_%d" % layer_id, values=(previous_layer,)) as hidden_layer_scope: net = layers.fully_connected( previous_layer, num_hidden_units, activation_fn=dnn_activation_fn, variables_collections=[dnn_parent_scope], scope=hidden_layer_scope) if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = layers.dropout(net, keep_prob=(1.0 - dnn_dropout)) _add_hidden_layer_summary(net, hidden_layer_scope.name) previous_layer = net with variable_scope.variable_scope( "logits", values=(previous_layer,)) as logits_scope: dnn_logits = layers.fully_connected( previous_layer, head.logits_dimension, activation_fn=None, variables_collections=[dnn_parent_scope], scope=logits_scope) _add_hidden_layer_summary(dnn_logits, logits_scope.name) def _dnn_train_op_fn(loss): """Returns the op to optimize the loss.""" return optimizers.optimize_loss( loss=loss, global_step=training_util.get_global_step(), learning_rate=_DNN_LEARNING_RATE, optimizer=_get_optimizer(dnn_optimizer), name=dnn_parent_scope, variables=ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES, scope=dnn_parent_scope), # Empty summaries to prevent optimizers from logging training_loss. summaries=[]) # Build Tree Logits. global_step = training_util.get_global_step() with ops.device(global_step.device): ensemble_handle = model_ops.tree_ensemble_variable( stamp_token=0, tree_ensemble_config="", # Initialize an empty ensemble. name="ensemble_model") tree_features = features.copy() if dnn_input_layer_to_tree: tree_features["dnn_input_layer"] = input_layer tree_feature_columns.append(layers.real_valued_column("dnn_input_layer")) gbdt_model = gbdt_batch.GradientBoostedDecisionTreeModel( is_chief=config.is_chief, num_ps_replicas=config.num_ps_replicas, ensemble_handle=ensemble_handle, center_bias=tree_center_bias, examples_per_layer=tree_examples_per_layer, learner_config=tree_learner_config, feature_columns=tree_feature_columns, logits_dimension=head.logits_dimension, features=tree_features, use_core_columns=use_core_versions) with ops.name_scope("gbdt"): predictions_dict = gbdt_model.predict(mode) tree_logits = predictions_dict["predictions"] def _tree_train_op_fn(loss): """Returns the op to optimize the loss.""" if dnn_to_tree_distillation_param: loss_weight, loss_fn = dnn_to_tree_distillation_param weight_tensor = head_lib._weight_tensor( # pylint: disable=protected-access features, head.weight_column_name) dnn_logits_fixed = array_ops.stop_gradient(dnn_logits) if loss_fn is None: # we create the loss_fn similar to the head loss_fn for # multi_class_head used previously as the default one. n_classes = 2 if head.logits_dimension == 1 else head.logits_dimension loss_fn = distillation_loss.create_dnn_to_tree_cross_entropy_loss_fn( n_classes) dnn_to_tree_distillation_loss = loss_weight * loss_fn( dnn_logits_fixed, tree_logits, weight_tensor) summary.scalar("dnn_to_tree_distillation_loss", dnn_to_tree_distillation_loss) loss += dnn_to_tree_distillation_loss update_op = gbdt_model.train(loss, predictions_dict, labels) with ops.control_dependencies( [update_op]), (ops.colocate_with(global_step)): update_op = state_ops.assign_add(global_step, 1).op return update_op if predict_with_tree_only: if mode == model_fn.ModeKeys.TRAIN or mode == model_fn.ModeKeys.INFER: tree_train_logits = tree_logits else: tree_train_logits = control_flow_ops.cond( global_step > dnn_steps_to_train, lambda: tree_logits, lambda: dnn_logits) else: tree_train_logits = dnn_logits + tree_logits def _no_train_op_fn(loss): """Returns a no-op.""" del loss return control_flow_ops.no_op() if tree_center_bias: num_trees += 1 finalized_trees, attempted_trees = gbdt_model.get_number_of_trees_tensor() if output_type == model.ModelBuilderOutputType.MODEL_FN_OPS: if use_core_versions: model_fn_ops = head.create_estimator_spec( features=features, mode=mode, labels=labels, train_op_fn=_no_train_op_fn, logits=tree_train_logits) dnn_train_op = head.create_estimator_spec( features=features, mode=mode, labels=labels, train_op_fn=_dnn_train_op_fn, logits=dnn_logits) dnn_train_op = estimator_utils.estimator_spec_to_model_fn_ops( dnn_train_op).train_op tree_train_op = head.create_estimator_spec( features=tree_features, mode=mode, labels=labels, train_op_fn=_tree_train_op_fn, logits=tree_train_logits) tree_train_op = estimator_utils.estimator_spec_to_model_fn_ops( tree_train_op).train_op model_fn_ops = estimator_utils.estimator_spec_to_model_fn_ops( model_fn_ops) else: model_fn_ops = head.create_model_fn_ops( features=features, mode=mode, labels=labels, train_op_fn=_no_train_op_fn, logits=tree_train_logits) dnn_train_op = head.create_model_fn_ops( features=features, mode=mode, labels=labels, train_op_fn=_dnn_train_op_fn, logits=dnn_logits).train_op tree_train_op = head.create_model_fn_ops( features=tree_features, mode=mode, labels=labels, train_op_fn=_tree_train_op_fn, logits=tree_train_logits).train_op # Add the hooks model_fn_ops.training_hooks.extend([ trainer_hooks.SwitchTrainOp(dnn_train_op, dnn_steps_to_train, tree_train_op), trainer_hooks.StopAfterNTrees(num_trees, attempted_trees, finalized_trees) ]) return model_fn_ops elif output_type == model.ModelBuilderOutputType.ESTIMATOR_SPEC: fusion_spec = head.create_estimator_spec( features=features, mode=mode, labels=labels, train_op_fn=_no_train_op_fn, logits=tree_train_logits) dnn_spec = head.create_estimator_spec( features=features, mode=mode, labels=labels, train_op_fn=_dnn_train_op_fn, logits=dnn_logits) tree_spec = head.create_estimator_spec( features=tree_features, mode=mode, labels=labels, train_op_fn=_tree_train_op_fn, logits=tree_train_logits) training_hooks = [ trainer_hooks.SwitchTrainOp(dnn_spec.train_op, dnn_steps_to_train, tree_spec.train_op), trainer_hooks.StopAfterNTrees(num_trees, attempted_trees, finalized_trees) ] fusion_spec = fusion_spec._replace(training_hooks=training_hooks + list(fusion_spec.training_hooks)) return fusion_spec
def _dnn_model_fn(features, labels, mode, params, config=None): """Deep Neural Net model_fn. Args: features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`). labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * head: A `_Head` instance. * hidden_units: List of hidden units per layer. * feature_columns: An iterable containing all the feature columns used by the model. * optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training. If `None`, will use the Adagrad optimizer with a default learning rate of 0.05. * activation_fn: Activation function applied to each layer. If `None`, will use `tf.nn.relu`. * dropout: When not `None`, the probability we will drop out a given coordinate. * gradient_clip_norm: A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. * embedding_lr_multipliers: Optional. A dictionary from `EmbeddingColumn` to a `float` multiplier. Multiplier will be used to multiply with learning rate for the embedding variables. * input_layer_min_slice_size: Optional. The min slice size of input layer partitions. If not provided, will use the default of 64M. config: `RunConfig` object to configure the runtime settings. Returns: predictions: A dict of `Tensor` objects. loss: A scalar containing the loss of the step. train_op: The op for training. """ head = params["head"] hidden_units = params["hidden_units"] feature_columns = params["feature_columns"] optimizer = params.get("optimizer") or "Adagrad" activation_fn = params.get("activation_fn") dropout = params.get("dropout") gradient_clip_norm = params.get("gradient_clip_norm") input_layer_min_slice_size = ( params.get("input_layer_min_slice_size") or 64 << 20) num_ps_replicas = config.num_ps_replicas if config else 0 embedding_lr_multipliers = params.get("embedding_lr_multipliers", {}) features = _get_feature_dict(features) parent_scope = "dnn" partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas) with variable_scope.variable_scope( parent_scope, values=tuple(six.itervalues(features)), partitioner=partitioner): input_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=input_layer_min_slice_size)) with variable_scope.variable_scope( "input_from_feature_columns", values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner) as input_layer_scope: if all([ isinstance(fc, feature_column._FeatureColumn) # pylint: disable=protected-access for fc in feature_columns ]): net = layers.input_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, weight_collections=[parent_scope], scope=input_layer_scope) else: net = fc_core.input_layer( features=features, feature_columns=feature_columns, weight_collections=[parent_scope]) for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( "hiddenlayer_%d" % layer_id, values=(net,)) as hidden_layer_scope: net = layers.fully_connected( net, num_hidden_units, activation_fn=activation_fn, variables_collections=[parent_scope], scope=hidden_layer_scope) if dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = layers.dropout(net, keep_prob=(1.0 - dropout)) _add_hidden_layer_summary(net, hidden_layer_scope.name) with variable_scope.variable_scope( "logits", values=(net,)) as logits_scope: logits = layers.fully_connected( net, head.logits_dimension, activation_fn=None, variables_collections=[parent_scope], scope=logits_scope) _add_hidden_layer_summary(logits, logits_scope.name) def _train_op_fn(loss): """Returns the op to optimize the loss.""" return optimizers.optimize_loss( loss=loss, global_step=contrib_variables.get_global_step(), learning_rate=_LEARNING_RATE, optimizer=_get_optimizer(optimizer), gradient_multipliers=( dnn_linear_combined._extract_embedding_lr_multipliers( # pylint: disable=protected-access embedding_lr_multipliers, parent_scope, input_layer_scope.name)), clip_gradients=gradient_clip_norm, name=parent_scope, # Empty summaries to prevent optimizers from logging training_loss. summaries=[]) return head.create_model_fn_ops( features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits)
def _process_exogenous_features(self, times, features): """Create a single vector from exogenous features. Args: times: A [batch size, window size] vector of times for this batch, primarily used to check the shape information of exogenous features. features: A dictionary of exogenous features corresponding to the columns in self._exogenous_feature_columns. Each value should have a shape prefixed by [batch size, window size]. Returns: A Tensor with shape [batch size, window size, exogenous dimension], where the size of the exogenous dimension depends on the exogenous feature columns passed to the model's constructor. Raises: ValueError: If an exogenous feature has an unknown rank. """ if self._exogenous_feature_columns: exogenous_features_single_batch_dimension = {} for name, tensor in features.items(): if tensor.get_shape().ndims is None: # input_from_feature_columns does not support completely unknown # feature shapes, so we save on a bit of logic and provide a better # error message by checking that here. raise ValueError( ("Features with unknown rank are not supported. Got shape {} for " "feature {}.").format(tensor.get_shape(), name)) tensor_shape_dynamic = array_ops.shape(tensor) tensor = array_ops.reshape( tensor, array_ops.concat([[tensor_shape_dynamic[0] * tensor_shape_dynamic[1]], tensor_shape_dynamic[2:]], axis=0)) # Avoid shape warnings when embedding "scalar" exogenous features (those # with only batch and window dimensions); input_from_feature_columns # expects input ranks to match the embedded rank. if tensor.get_shape().ndims == 1 and tensor.dtype != dtypes.string: exogenous_features_single_batch_dimension[name] = tensor[:, None] else: exogenous_features_single_batch_dimension[name] = tensor embedded_exogenous_features_single_batch_dimension = ( feature_column.input_layer( features=exogenous_features_single_batch_dimension, feature_columns=self._exogenous_feature_columns, trainable=True)) exogenous_regressors = array_ops.reshape( embedded_exogenous_features_single_batch_dimension, array_ops.concat( [ array_ops.shape(times), array_ops.shape( embedded_exogenous_features_single_batch_dimension)[1:] ], axis=0)) exogenous_regressors.set_shape(times.get_shape().concatenate( embedded_exogenous_features_single_batch_dimension.get_shape()[1:])) exogenous_regressors = math_ops.cast( exogenous_regressors, dtype=self.dtype) else: # Not having any exogenous features is a special case so that models can # avoid superfluous updates, which may not be free of side effects due to # bias terms in transformations. exogenous_regressors = None return exogenous_regressors
def _dnn_linear_combined_model_fn(features, labels, mode, params, config=None): """Deep Neural Net and Linear combined model_fn. Args: features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`). labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * head: A `Head` instance. * linear_feature_columns: An iterable containing all the feature columns used by the Linear model. * linear_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the Linear model. Defaults to the Ftrl optimizer. * joint_linear_weights: If True a single (possibly partitioned) variable will be used to store the linear model weights. It's faster, but requires all columns are sparse and have the 'sum' combiner. * dnn_feature_columns: An iterable containing all the feature columns used by the DNN model. * dnn_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the DNN model. Defaults to the Adagrad optimizer. * dnn_hidden_units: List of hidden units per DNN layer. * dnn_activation_fn: Activation function applied to each DNN layer. If `None`, will use `tf.nn.relu`. * dnn_dropout: When not `None`, the probability we will drop out a given DNN coordinate. * gradient_clip_norm: A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. * embedding_lr_multipliers: Optional. A dictionary from `EmbeddingColumn` to a `float` multiplier. Multiplier will be used to multiply with learning rate for the embedding variables. * input_layer_partitioner: Optional. Partitioner for input layer. config: `RunConfig` object to configure the runtime settings. Returns: `ModelFnOps` Raises: ValueError: If both `linear_feature_columns` and `dnn_features_columns` are empty at the same time, or `input_layer_partitioner` is missing. """ head = params["head"] linear_feature_columns = params.get("linear_feature_columns") linear_optimizer = params.get("linear_optimizer") or "Ftrl" joint_linear_weights = params.get("joint_linear_weights") dnn_feature_columns = params.get("dnn_feature_columns") dnn_optimizer = params.get("dnn_optimizer") or "Adagrad" dnn_hidden_units = params.get("dnn_hidden_units") dnn_activation_fn = params.get("dnn_activation_fn") or nn.relu dnn_dropout = params.get("dnn_dropout") gradient_clip_norm = params.get("gradient_clip_norm") num_ps_replicas = config.num_ps_replicas if config else 0 input_layer_partitioner = params.get("input_layer_partitioner") or ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) embedding_lr_multipliers = params.get("embedding_lr_multipliers", {}) fix_global_step_increment_bug = params.get( "fix_global_step_increment_bug", True) if not linear_feature_columns and not dnn_feature_columns: raise ValueError( "Either linear_feature_columns or dnn_feature_columns must be defined.") features = _get_feature_dict(features) linear_optimizer = _get_optimizer(linear_optimizer) _check_no_sync_replicas_optimizer(linear_optimizer) dnn_optimizer = _get_optimizer(dnn_optimizer) _check_no_sync_replicas_optimizer(dnn_optimizer) # Build DNN Logits. dnn_parent_scope = "dnn" if not dnn_feature_columns: dnn_logits = None else: if not dnn_hidden_units: raise ValueError( "dnn_hidden_units must be defined when dnn_feature_columns is " "specified.") dnn_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas)) with variable_scope.variable_scope( dnn_parent_scope, values=tuple(six.itervalues(features)), partitioner=dnn_partitioner): with variable_scope.variable_scope( "input_from_feature_columns", values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner) as dnn_input_scope: if all([ isinstance(fc, feature_column_lib._FeatureColumn) # pylint: disable=protected-access for fc in dnn_feature_columns ]): net = layers.input_from_feature_columns( columns_to_tensors=features, feature_columns=dnn_feature_columns, weight_collections=[dnn_parent_scope], scope=dnn_input_scope) else: net = fc_core.input_layer( features=features, feature_columns=dnn_feature_columns, weight_collections=[dnn_parent_scope]) for layer_id, num_hidden_units in enumerate(dnn_hidden_units): with variable_scope.variable_scope( "hiddenlayer_%d" % layer_id, values=(net,)) as dnn_hidden_layer_scope: net = layers.fully_connected( net, num_hidden_units, activation_fn=dnn_activation_fn, variables_collections=[dnn_parent_scope], scope=dnn_hidden_layer_scope) if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = layers.dropout( net, keep_prob=(1.0 - dnn_dropout)) # TODO(b/31209633): Consider adding summary before dropout. _add_layer_summary(net, dnn_hidden_layer_scope.name) with variable_scope.variable_scope( "logits", values=(net,)) as dnn_logits_scope: dnn_logits = layers.fully_connected( net, head.logits_dimension, activation_fn=None, variables_collections=[dnn_parent_scope], scope=dnn_logits_scope) _add_layer_summary(dnn_logits, dnn_logits_scope.name) # Build Linear logits. linear_parent_scope = "linear" if not linear_feature_columns: linear_logits = None else: linear_partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20) with variable_scope.variable_scope( linear_parent_scope, values=tuple(six.itervalues(features)), partitioner=linear_partitioner) as scope: if all([isinstance(fc, feature_column_lib._FeatureColumn) # pylint: disable=protected-access for fc in linear_feature_columns]): if joint_linear_weights: linear_logits, _, _ = layers.joint_weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=linear_feature_columns, num_outputs=head.logits_dimension, weight_collections=[linear_parent_scope], scope=scope) else: linear_logits, _, _ = layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=linear_feature_columns, num_outputs=head.logits_dimension, weight_collections=[linear_parent_scope], scope=scope) else: linear_logits = fc_core.linear_model( features=features, feature_columns=linear_feature_columns, units=head.logits_dimension, weight_collections=[linear_parent_scope]) _add_layer_summary(linear_logits, scope.name) # Combine logits and build full model. if dnn_logits is not None and linear_logits is not None: logits = dnn_logits + linear_logits elif dnn_logits is not None: logits = dnn_logits else: logits = linear_logits def _make_training_op(training_loss): """Training op for the DNN linear combined model.""" train_ops = [] global_step = training_util.get_global_step() if dnn_logits is not None: train_ops.append( optimizers.optimize_loss( loss=training_loss, global_step=global_step, learning_rate=_DNN_LEARNING_RATE, optimizer=dnn_optimizer, gradient_multipliers=_extract_embedding_lr_multipliers( # pylint: disable=protected-access embedding_lr_multipliers, dnn_parent_scope, dnn_input_scope.name), clip_gradients=gradient_clip_norm, variables=ops.get_collection(dnn_parent_scope), name=dnn_parent_scope, # Empty summaries, because head already logs "loss" summary. summaries=[], increment_global_step=not fix_global_step_increment_bug)) if linear_logits is not None: train_ops.append( optimizers.optimize_loss( loss=training_loss, global_step=global_step, learning_rate=_linear_learning_rate(len(linear_feature_columns)), optimizer=linear_optimizer, clip_gradients=gradient_clip_norm, variables=ops.get_collection(linear_parent_scope), name=linear_parent_scope, # Empty summaries, because head already logs "loss" summary. summaries=[], increment_global_step=not fix_global_step_increment_bug)) train_op = control_flow_ops.group(*train_ops) if fix_global_step_increment_bug: with ops.control_dependencies([train_op]): with ops.colocate_with(global_step): return state_ops.assign_add(global_step, 1).op return train_op return head.create_model_fn_ops( features=features, mode=mode, labels=labels, train_op_fn=_make_training_op, logits=logits)
def _dnn_linear_combined_model_fn(features, labels, mode, head, linear_feature_columns=None, linear_optimizer='Ftrl', dnn_feature_columns=None, dnn_optimizer='Adagrad', dnn_hidden_units=None, dnn_activation_fn=nn.relu, dnn_dropout=None, input_layer_partitioner=None, config=None): """Deep Neural Net and Linear combined model_fn. Args: features: dict of `Tensor`. labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. head: A `Head` instance. linear_feature_columns: An iterable containing all the feature columns used by the Linear model. linear_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the Linear model. Defaults to the Ftrl optimizer. dnn_feature_columns: An iterable containing all the feature columns used by the DNN model. dnn_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the DNN model. Defaults to the Adagrad optimizer. dnn_hidden_units: List of hidden units per DNN layer. dnn_activation_fn: Activation function applied to each DNN layer. If `None`, will use `tf.nn.relu`. dnn_dropout: When not `None`, the probability we will drop out a given DNN coordinate. input_layer_partitioner: Partitioner for input layer. config: `RunConfig` object to configure the runtime settings. Returns: `ModelFnOps` Raises: ValueError: If both `linear_feature_columns` and `dnn_features_columns` are empty at the same time, or `input_layer_partitioner` is missing, or features has the wrong type. """ if not isinstance(features, dict): raise ValueError('features should be a dictionary of `Tensor`s. ' 'Given type: {}'.format(type(features))) if not linear_feature_columns and not dnn_feature_columns: raise ValueError( 'Either linear_feature_columns or dnn_feature_columns must be defined.' ) num_ps_replicas = config.num_ps_replicas if config else 0 input_layer_partitioner = input_layer_partitioner or ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) # Build DNN Logits. dnn_parent_scope = 'dnn' if not dnn_feature_columns: dnn_logits = None else: dnn_optimizer = optimizers.get_optimizer_instance( dnn_optimizer, learning_rate=_DNN_LEARNING_RATE) _check_no_sync_replicas_optimizer(dnn_optimizer) if not dnn_hidden_units: raise ValueError( 'dnn_hidden_units must be defined when dnn_feature_columns is ' 'specified.') dnn_partitioner = (partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas)) with variable_scope.variable_scope(dnn_parent_scope, values=tuple( six.itervalues(features)), partitioner=dnn_partitioner): with variable_scope.variable_scope( 'input', partitioner=input_layer_partitioner): net = feature_column_lib.input_layer( features=features, feature_columns=dnn_feature_columns) for layer_id, num_hidden_units in enumerate(dnn_hidden_units): with variable_scope.variable_scope( 'hiddenlayer_%d' % layer_id, values=(net, )) as dnn_hidden_layer_scope: net = core_layers.dense(net, units=num_hidden_units, activation=dnn_activation_fn, kernel_initializer=init_ops. glorot_uniform_initializer(), name=dnn_hidden_layer_scope) if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = core_layers.dropout(net, rate=dnn_dropout, training=True) _add_layer_summary(net, dnn_hidden_layer_scope.name) with variable_scope.variable_scope( 'logits', values=(net, )) as dnn_logits_scope: dnn_logits = core_layers.dense( net, units=head.logits_dimension, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=dnn_logits_scope) _add_layer_summary(dnn_logits, dnn_logits_scope.name) linear_parent_scope = 'linear' if not linear_feature_columns: linear_logits = None else: linear_optimizer = optimizers.get_optimizer_instance( linear_optimizer, learning_rate=_linear_learning_rate(len(linear_feature_columns))) _check_no_sync_replicas_optimizer(linear_optimizer) with variable_scope.variable_scope( linear_parent_scope, values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner) as scope: linear_logits = feature_column_lib.linear_model( features=features, feature_columns=linear_feature_columns, units=head.logits_dimension) _add_layer_summary(linear_logits, scope.name) # Combine logits and build full model. if dnn_logits is not None and linear_logits is not None: logits = dnn_logits + linear_logits elif dnn_logits is not None: logits = dnn_logits else: logits = linear_logits def _train_op_fn(loss): """Returns the op to optimize the loss.""" train_ops = [] global_step = training_util.get_global_step() if dnn_logits is not None: train_ops.append( dnn_optimizer.minimize(loss, var_list=ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES, scope=dnn_parent_scope))) if linear_logits is not None: train_ops.append( linear_optimizer.minimize( loss, var_list=ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES, scope=linear_parent_scope))) train_op = control_flow_ops.group(*train_ops) with ops.control_dependencies([train_op]): with ops.colocate_with(global_step): return state_ops.assign_add(global_step, 1) return head.create_estimator_spec(features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits)
def _dnn_model_fn(features, labels, mode, head, hidden_units, feature_columns, optimizer='Adagrad', activation_fn=nn.relu, dropout=None, input_layer_partitioner=None, config=None): """Deep Neural Net model_fn. Args: features: Dict of `Tensor` (depends on data passed to `train`). labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. head: A `head_lib._Head` instance. hidden_units: Iterable of integer number of hidden units per layer. feature_columns: Iterable of `feature_column._FeatureColumn` model inputs. optimizer: String, `tf.Optimizer` object, or callable that creates the optimizer to use for training. If not specified, will use the Adagrad optimizer with a default learning rate of 0.05. activation_fn: Activation function applied to each layer. dropout: When not `None`, the probability we will drop out a given coordinate. input_layer_partitioner: Partitioner for input layer. Defaults to `min_max_variable_partitioner` with `min_slice_size` 64 << 20. config: `RunConfig` object to configure the runtime settings. Returns: predictions: A dict of `Tensor` objects. loss: A scalar containing the loss of the step. train_op: The op for training. """ optimizer = optimizers.get_optimizer_instance(optimizer, learning_rate=_LEARNING_RATE) num_ps_replicas = config.num_ps_replicas if config else 0 partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas) with variable_scope.variable_scope('dnn', values=tuple(six.itervalues(features)), partitioner=partitioner): input_layer_partitioner = input_layer_partitioner or ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) with variable_scope.variable_scope( 'input_from_feature_columns', values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner): net = feature_column_lib.input_layer( features=features, feature_columns=feature_columns) for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( 'hiddenlayer_%d' % layer_id, values=(net, )) as hidden_layer_scope: net = core_layers.dense( net, units=num_hidden_units, activation=activation_fn, kernel_initializer=init_ops.glorot_uniform_initializer(), name=hidden_layer_scope) if dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = core_layers.dropout(net, rate=dropout, training=True) _add_hidden_layer_summary(net, hidden_layer_scope.name) with variable_scope.variable_scope('logits', values=(net, )) as logits_scope: logits = core_layers.dense( net, units=head.logits_dimension, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=logits_scope) _add_hidden_layer_summary(logits, logits_scope.name) def _train_op_fn(loss): """Returns the op to optimize the loss.""" return optimizer.minimize( loss, global_step=training_util.get_global_step()) return head.create_estimator_spec(features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits)
def _dnn_model_fn( features, labels, mode, head, hidden_units, feature_columns, optimizer='Adagrad', activation_fn=nn.relu, dropout=None, input_layer_partitioner=None, config=None): """Deep Neural Net model_fn. Args: features: Dict of `Tensor` (depends on data passed to `train`). labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. head: A `head_lib._Head` instance. hidden_units: Iterable of integer number of hidden units per layer. feature_columns: Iterable of `feature_column._FeatureColumn` model inputs. optimizer: String, `tf.Optimizer` object, or callable that creates the optimizer to use for training. If not specified, will use the Adagrad optimizer with a default learning rate of 0.05. activation_fn: Activation function applied to each layer. dropout: When not `None`, the probability we will drop out a given coordinate. input_layer_partitioner: Partitioner for input layer. Defaults to `min_max_variable_partitioner` with `min_slice_size` 64 << 20. config: `RunConfig` object to configure the runtime settings. Returns: predictions: A dict of `Tensor` objects. loss: A scalar containing the loss of the step. train_op: The op for training. """ optimizer = optimizers.get_optimizer_instance( optimizer, learning_rate=_LEARNING_RATE) num_ps_replicas = config.num_ps_replicas if config else 0 partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas) with variable_scope.variable_scope( 'dnn', values=tuple(six.itervalues(features)), partitioner=partitioner): input_layer_partitioner = input_layer_partitioner or ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) with variable_scope.variable_scope( 'input_from_feature_columns', values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner): net = feature_column_lib.input_layer( features=features, feature_columns=feature_columns) for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( 'hiddenlayer_%d' % layer_id, values=(net,)) as hidden_layer_scope: net = core_layers.dense( net, units=num_hidden_units, activation=activation_fn, kernel_initializer=init_ops.glorot_uniform_initializer(), name=hidden_layer_scope) if dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = core_layers.dropout(net, rate=dropout, training=True) _add_hidden_layer_summary(net, hidden_layer_scope.name) with variable_scope.variable_scope( 'logits', values=(net,)) as logits_scope: logits = core_layers.dense( net, units=head.logits_dimension, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=logits_scope) _add_hidden_layer_summary(logits, logits_scope.name) def _train_op_fn(loss): """Returns the op to optimize the loss.""" return optimizer.minimize( loss, global_step=training_util.get_global_step()) return head.create_estimator_spec( features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits)
def _dnn_tree_combined_model_fn( features, labels, mode, head, dnn_hidden_units, dnn_feature_columns, tree_learner_config, num_trees, tree_examples_per_layer, config=None, dnn_optimizer="Adagrad", dnn_activation_fn=nn.relu, dnn_dropout=None, dnn_input_layer_partitioner=None, dnn_input_layer_to_tree=True, dnn_steps_to_train=10000, predict_with_tree_only=False, tree_feature_columns=None, tree_center_bias=False, dnn_to_tree_distillation_param=None, use_core_versions=False, output_type=model.ModelBuilderOutputType.MODEL_FN_OPS, override_global_step_value=None): """DNN and GBDT combined model_fn. Args: features: `dict` of `Tensor` objects. labels: Labels used to train on. mode: Mode we are in. (TRAIN/EVAL/INFER) head: A `Head` instance. dnn_hidden_units: List of hidden units per layer. dnn_feature_columns: An iterable containing all the feature columns used by the model's DNN. tree_learner_config: A config for the tree learner. num_trees: Number of trees to grow model to after training DNN. tree_examples_per_layer: Number of examples to accumulate before growing the tree a layer. This value has a big impact on model quality and should be set equal to the number of examples in training dataset if possible. It can also be a function that computes the number of examples based on the depth of the layer that's being built. config: `RunConfig` of the estimator. dnn_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the DNN. If `None`, will use the Adagrad optimizer with default learning rate of 0.001. dnn_activation_fn: Activation function applied to each layer of the DNN. If `None`, will use `tf.nn.relu`. dnn_dropout: When not `None`, the probability to drop out a given unit in the DNN. dnn_input_layer_partitioner: Partitioner for input layer of the DNN. Defaults to `min_max_variable_partitioner` with `min_slice_size` 64 << 20. dnn_input_layer_to_tree: Whether to provide the DNN's input layer as a feature to the tree. dnn_steps_to_train: Number of steps to train dnn for before switching to gbdt. predict_with_tree_only: Whether to use only the tree model output as the final prediction. tree_feature_columns: An iterable containing all the feature columns used by the model's boosted trees. If dnn_input_layer_to_tree is set to True, these features are in addition to dnn_feature_columns. tree_center_bias: Whether a separate tree should be created for first fitting the bias. dnn_to_tree_distillation_param: A Tuple of (float, loss_fn), where the float defines the weight of the distillation loss, and the loss_fn, for computing distillation loss, takes dnn_logits, tree_logits and weight tensor. If the entire tuple is None, no distillation will be applied. If only the loss_fn is None, we will take the sigmoid/softmax cross entropy loss be default. When distillation is applied, `predict_with_tree_only` will be set to True. use_core_versions: Whether feature columns and loss are from the core (as opposed to contrib) version of tensorflow. output_type: Whether to return ModelFnOps (old interface) or EstimatorSpec (new interface). override_global_step_value: If after the training is done, global step value must be reset to this value. This is particularly useful for hyper parameter tuning, which can't recognize early stopping due to the number of trees. If None, no override of global step will happen. Returns: A `ModelFnOps` object. Raises: ValueError: if inputs are not valid. """ if not isinstance(features, dict): raise ValueError("features should be a dictionary of `Tensor`s. " "Given type: {}".format(type(features))) if not dnn_feature_columns: raise ValueError("dnn_feature_columns must be specified") if dnn_to_tree_distillation_param: if not predict_with_tree_only: logging.warning( "update predict_with_tree_only to True since distillation" "is specified.") predict_with_tree_only = True # Build DNN Logits. dnn_parent_scope = "dnn" dnn_partitioner = dnn_input_layer_partitioner or ( partitioned_variables.min_max_variable_partitioner( max_partitions=config.num_ps_replicas, min_slice_size=64 << 20)) if (output_type == model.ModelBuilderOutputType.ESTIMATOR_SPEC and not use_core_versions): raise ValueError("You must use core versions with Estimator Spec") with variable_scope.variable_scope(dnn_parent_scope, values=tuple(six.itervalues(features)), partitioner=dnn_partitioner): with variable_scope.variable_scope( "input_from_feature_columns", values=tuple(six.itervalues(features)), partitioner=dnn_partitioner) as input_layer_scope: if use_core_versions: input_layer = feature_column_lib.input_layer( features=features, feature_columns=dnn_feature_columns, weight_collections=[dnn_parent_scope]) else: input_layer = layers.input_from_feature_columns( columns_to_tensors=features, feature_columns=dnn_feature_columns, weight_collections=[dnn_parent_scope], scope=input_layer_scope) previous_layer = input_layer for layer_id, num_hidden_units in enumerate(dnn_hidden_units): with variable_scope.variable_scope( "hiddenlayer_%d" % layer_id, values=(previous_layer, )) as hidden_layer_scope: net = layers.fully_connected( previous_layer, num_hidden_units, activation_fn=dnn_activation_fn, variables_collections=[dnn_parent_scope], scope=hidden_layer_scope) if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = layers.dropout(net, keep_prob=(1.0 - dnn_dropout)) _add_hidden_layer_summary(net, hidden_layer_scope.name) previous_layer = net with variable_scope.variable_scope( "logits", values=(previous_layer, )) as logits_scope: dnn_logits = layers.fully_connected( previous_layer, head.logits_dimension, activation_fn=None, variables_collections=[dnn_parent_scope], scope=logits_scope) _add_hidden_layer_summary(dnn_logits, logits_scope.name) def _dnn_train_op_fn(loss): """Returns the op to optimize the loss.""" return optimizers.optimize_loss( loss=loss, global_step=training_util.get_global_step(), learning_rate=_DNN_LEARNING_RATE, optimizer=_get_optimizer(dnn_optimizer), name=dnn_parent_scope, variables=ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES, scope=dnn_parent_scope), # Empty summaries to prevent optimizers from logging training_loss. summaries=[]) # Build Tree Logits. global_step = training_util.get_global_step() with ops.device(global_step.device): ensemble_handle = model_ops.tree_ensemble_variable( stamp_token=0, tree_ensemble_config="", # Initialize an empty ensemble. name="ensemble_model") tree_features = features.copy() if dnn_input_layer_to_tree: tree_features["dnn_input_layer"] = input_layer tree_feature_columns.append( layers.real_valued_column("dnn_input_layer")) gbdt_model = gbdt_batch.GradientBoostedDecisionTreeModel( is_chief=config.is_chief, num_ps_replicas=config.num_ps_replicas, ensemble_handle=ensemble_handle, center_bias=tree_center_bias, examples_per_layer=tree_examples_per_layer, learner_config=tree_learner_config, feature_columns=tree_feature_columns, logits_dimension=head.logits_dimension, features=tree_features, use_core_columns=use_core_versions) with ops.name_scope("gbdt"): predictions_dict = gbdt_model.predict(mode) tree_logits = predictions_dict["predictions"] def _tree_train_op_fn(loss): """Returns the op to optimize the loss.""" if dnn_to_tree_distillation_param: loss_weight, loss_fn = dnn_to_tree_distillation_param weight_tensor = head_lib._weight_tensor( # pylint: disable=protected-access features, head.weight_column_name) dnn_logits_fixed = array_ops.stop_gradient(dnn_logits) if loss_fn is None: # we create the loss_fn similar to the head loss_fn for # multi_class_head used previously as the default one. n_classes = 2 if head.logits_dimension == 1 else head.logits_dimension loss_fn = distillation_loss.create_dnn_to_tree_cross_entropy_loss_fn( n_classes) dnn_to_tree_distillation_loss = loss_weight * loss_fn( dnn_logits_fixed, tree_logits, weight_tensor) summary.scalar("dnn_to_tree_distillation_loss", dnn_to_tree_distillation_loss) loss += dnn_to_tree_distillation_loss update_op = gbdt_model.train(loss, predictions_dict, labels) with ops.control_dependencies( [update_op]), (ops.colocate_with(global_step)): update_op = state_ops.assign_add(global_step, 1).op return update_op if predict_with_tree_only: if mode == model_fn.ModeKeys.TRAIN or mode == model_fn.ModeKeys.INFER: tree_train_logits = tree_logits else: tree_train_logits = control_flow_ops.cond( global_step > dnn_steps_to_train, lambda: tree_logits, lambda: dnn_logits) else: tree_train_logits = dnn_logits + tree_logits def _no_train_op_fn(loss): """Returns a no-op.""" del loss return control_flow_ops.no_op() if tree_center_bias: num_trees += 1 finalized_trees, attempted_trees = gbdt_model.get_number_of_trees_tensor() if output_type == model.ModelBuilderOutputType.MODEL_FN_OPS: if use_core_versions: model_fn_ops = head.create_estimator_spec( features=features, mode=mode, labels=labels, train_op_fn=_no_train_op_fn, logits=tree_train_logits) dnn_train_op = head.create_estimator_spec( features=features, mode=mode, labels=labels, train_op_fn=_dnn_train_op_fn, logits=dnn_logits) dnn_train_op = estimator_utils.estimator_spec_to_model_fn_ops( dnn_train_op).train_op tree_train_op = head.create_estimator_spec( features=tree_features, mode=mode, labels=labels, train_op_fn=_tree_train_op_fn, logits=tree_train_logits) tree_train_op = estimator_utils.estimator_spec_to_model_fn_ops( tree_train_op).train_op model_fn_ops = estimator_utils.estimator_spec_to_model_fn_ops( model_fn_ops) else: model_fn_ops = head.create_model_fn_ops( features=features, mode=mode, labels=labels, train_op_fn=_no_train_op_fn, logits=tree_train_logits) dnn_train_op = head.create_model_fn_ops( features=features, mode=mode, labels=labels, train_op_fn=_dnn_train_op_fn, logits=dnn_logits).train_op tree_train_op = head.create_model_fn_ops( features=tree_features, mode=mode, labels=labels, train_op_fn=_tree_train_op_fn, logits=tree_train_logits).train_op # Add the hooks model_fn_ops.training_hooks.extend([ trainer_hooks.SwitchTrainOp(dnn_train_op, dnn_steps_to_train, tree_train_op), trainer_hooks.StopAfterNTrees(num_trees, attempted_trees, finalized_trees, override_global_step_value) ]) return model_fn_ops elif output_type == model.ModelBuilderOutputType.ESTIMATOR_SPEC: fusion_spec = head.create_estimator_spec(features=features, mode=mode, labels=labels, train_op_fn=_no_train_op_fn, logits=tree_train_logits) dnn_spec = head.create_estimator_spec(features=features, mode=mode, labels=labels, train_op_fn=_dnn_train_op_fn, logits=dnn_logits) tree_spec = head.create_estimator_spec(features=tree_features, mode=mode, labels=labels, train_op_fn=_tree_train_op_fn, logits=tree_train_logits) training_hooks = [ trainer_hooks.SwitchTrainOp(dnn_spec.train_op, dnn_steps_to_train, tree_spec.train_op), trainer_hooks.StopAfterNTrees(num_trees, attempted_trees, finalized_trees, override_global_step_value) ] fusion_spec = fusion_spec._replace(training_hooks=training_hooks + list(fusion_spec.training_hooks)) return fusion_spec
def _dnn_tree_combined_model_fn(features, labels, mode, head, dnn_hidden_units, dnn_feature_columns, tree_learner_config, num_trees, tree_examples_per_layer, config=None, dnn_optimizer="Adagrad", dnn_activation_fn=nn.relu, dnn_dropout=None, dnn_input_layer_partitioner=None, dnn_input_layer_to_tree=True, dnn_steps_to_train=10000, tree_feature_columns=None, tree_center_bias=False, use_core_versions=False): """DNN and GBDT combined model_fn. Args: features: `dict` of `Tensor` objects. labels: Labels used to train on. mode: Mode we are in. (TRAIN/EVAL/INFER) head: A `Head` instance. dnn_hidden_units: List of hidden units per layer. dnn_feature_columns: An iterable containing all the feature columns used by the model's DNN. tree_learner_config: A config for the tree learner. num_trees: Number of trees to grow model to after training DNN. tree_examples_per_layer: Number of examples to accumulate before growing the tree a layer. This value has a big impact on model quality and should be set equal to the number of examples in training dataset if possible. It can also be a function that computes the number of examples based on the depth of the layer that's being built. config: `RunConfig` of the estimator. dnn_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the DNN. If `None`, will use the Adagrad optimizer with default learning rate of 0.001. dnn_activation_fn: Activation function applied to each layer of the DNN. If `None`, will use `tf.nn.relu`. dnn_dropout: When not `None`, the probability to drop out a given unit in the DNN. dnn_input_layer_partitioner: Partitioner for input layer of the DNN. Defaults to `min_max_variable_partitioner` with `min_slice_size` 64 << 20. dnn_input_layer_to_tree: Whether to provide the DNN's input layer as a feature to the tree. dnn_steps_to_train: Number of steps to train dnn for before switching to gbdt. tree_feature_columns: An iterable containing all the feature columns used by the model's boosted trees. If dnn_input_layer_to_tree is set to True, these features are in addition to dnn_feature_columns. tree_center_bias: Whether a separate tree should be created for first fitting the bias. use_core_versions: Whether feature columns and loss are from the core (as opposed to contrib) version of tensorflow. Returns: A `ModelFnOps` object. Raises: ValueError: if inputs are not valid. """ if not isinstance(features, dict): raise ValueError("features should be a dictionary of `Tensor`s. " "Given type: {}".format(type(features))) if not dnn_feature_columns: raise ValueError("dnn_feature_columns must be specified") # Build DNN Logits. dnn_parent_scope = "dnn" dnn_partitioner = dnn_input_layer_partitioner or ( partitioned_variables.min_max_variable_partitioner( max_partitions=config.num_ps_replicas, min_slice_size=64 << 20)) with variable_scope.variable_scope(dnn_parent_scope, values=tuple(six.itervalues(features)), partitioner=dnn_partitioner): with variable_scope.variable_scope( "input_from_feature_columns", values=tuple(six.itervalues(features)), partitioner=dnn_partitioner) as input_layer_scope: if use_core_versions: input_layer = feature_column_lib.input_layer( features=features, feature_columns=dnn_feature_columns, weight_collections=[dnn_parent_scope]) else: input_layer = layers.input_from_feature_columns( columns_to_tensors=features, feature_columns=dnn_feature_columns, weight_collections=[dnn_parent_scope], scope=input_layer_scope) previous_layer = input_layer for layer_id, num_hidden_units in enumerate(dnn_hidden_units): with variable_scope.variable_scope( "hiddenlayer_%d" % layer_id, values=(previous_layer, )) as hidden_layer_scope: net = layers.fully_connected( previous_layer, num_hidden_units, activation_fn=dnn_activation_fn, variables_collections=[dnn_parent_scope], scope=hidden_layer_scope) if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = layers.dropout(net, keep_prob=(1.0 - dnn_dropout)) _add_hidden_layer_summary(net, hidden_layer_scope.name) previous_layer = net with variable_scope.variable_scope( "logits", values=(previous_layer, )) as logits_scope: dnn_logits = layers.fully_connected( previous_layer, head.logits_dimension, activation_fn=None, variables_collections=[dnn_parent_scope], scope=logits_scope) _add_hidden_layer_summary(dnn_logits, logits_scope.name) def _dnn_train_op_fn(loss): """Returns the op to optimize the loss.""" return optimizers.optimize_loss( loss=loss, global_step=training_util.get_global_step(), learning_rate=_DNN_LEARNING_RATE, optimizer=_get_optimizer(dnn_optimizer), name=dnn_parent_scope, variables=ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES, scope=dnn_parent_scope), # Empty summaries to prevent optimizers from logging training_loss. summaries=[]) # Build Tree Logits. global_step = training_util.get_global_step() with ops.device(global_step.device): ensemble_handle = model_ops.tree_ensemble_variable( stamp_token=0, tree_ensemble_config="", # Initialize an empty ensemble. name="ensemble_model") tree_features = features.copy() if dnn_input_layer_to_tree: tree_features["dnn_input_layer"] = input_layer tree_feature_columns.append( layers.real_valued_column("dnn_input_layer")) gbdt_model = gbdt_batch.GradientBoostedDecisionTreeModel( is_chief=config.is_chief, num_ps_replicas=config.num_ps_replicas, ensemble_handle=ensemble_handle, center_bias=tree_center_bias, examples_per_layer=tree_examples_per_layer, learner_config=tree_learner_config, feature_columns=tree_feature_columns, logits_dimension=head.logits_dimension, features=tree_features) with ops.name_scope("gbdt"): predictions_dict = gbdt_model.predict(mode) tree_logits = predictions_dict["predictions"] def _tree_train_op_fn(loss): """Returns the op to optimize the loss.""" update_op = gbdt_model.train(loss, predictions_dict, labels) with ops.control_dependencies( [update_op]), (ops.colocate_with(global_step)): update_op = state_ops.assign_add(global_step, 1).op return update_op tree_train_logits = dnn_logits + tree_logits def _no_train_op_fn(loss): """Returns a no-op.""" del loss return control_flow_ops.no_op() if use_core_versions: model_fn_ops = head.create_estimator_spec(features=features, mode=mode, labels=labels, train_op_fn=_no_train_op_fn, logits=tree_train_logits) dnn_train_op = head.create_estimator_spec(features=features, mode=mode, labels=labels, train_op_fn=_dnn_train_op_fn, logits=dnn_logits) dnn_train_op = estimator_utils.estimator_spec_to_model_fn_ops( dnn_train_op).train_op tree_train_op = head.create_estimator_spec( features=tree_features, mode=mode, labels=labels, train_op_fn=_tree_train_op_fn, logits=tree_train_logits) tree_train_op = estimator_utils.estimator_spec_to_model_fn_ops( tree_train_op).train_op model_fn_ops = estimator_utils.estimator_spec_to_model_fn_ops( model_fn_ops) else: model_fn_ops = head.create_model_fn_ops(features=features, mode=mode, labels=labels, train_op_fn=_no_train_op_fn, logits=tree_train_logits) dnn_train_op = head.create_model_fn_ops(features=features, mode=mode, labels=labels, train_op_fn=_dnn_train_op_fn, logits=dnn_logits).train_op tree_train_op = head.create_model_fn_ops( features=tree_features, mode=mode, labels=labels, train_op_fn=_tree_train_op_fn, logits=tree_train_logits).train_op if tree_center_bias: num_trees += 1 finalized_trees, attempted_trees = gbdt_model.get_number_of_trees_tensor() model_fn_ops.training_hooks.extend([ trainer_hooks.SwitchTrainOp(dnn_train_op, dnn_steps_to_train, tree_train_op), trainer_hooks.StopAfterNTrees(num_trees, attempted_trees, finalized_trees) ]) return model_fn_ops
def _dnn_model_fn(features, labels, mode, params, config=None): """Deep Neural Net model_fn. Args: features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`). labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * head: A `_Head` instance. * hidden_units: List of hidden units per layer. * feature_columns: An iterable containing all the feature columns used by the model. * optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training. If `None`, will use the Adagrad optimizer with a default learning rate of 0.05. * activation_fn: Activation function applied to each layer. If `None`, will use `tf.nn.relu`. * dropout: When not `None`, the probability we will drop out a given coordinate. * gradient_clip_norm: A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. * embedding_lr_multipliers: Optional. A dictionary from `EmbeddingColumn` to a `float` multiplier. Multiplier will be used to multiply with learning rate for the embedding variables. * input_layer_min_slice_size: Optional. The min slice size of input layer partitions. If not provided, will use the default of 64M. config: `RunConfig` object to configure the runtime settings. Returns: predictions: A dict of `Tensor` objects. loss: A scalar containing the loss of the step. train_op: The op for training. """ head = params["head"] hidden_units = params["hidden_units"] feature_columns = params["feature_columns"] optimizer = params.get("optimizer") or "Adagrad" activation_fn = params.get("activation_fn") dropout = params.get("dropout") gradient_clip_norm = params.get("gradient_clip_norm") input_layer_min_slice_size = (params.get("input_layer_min_slice_size") or 64 << 20) num_ps_replicas = config.num_ps_replicas if config else 0 embedding_lr_multipliers = params.get("embedding_lr_multipliers", {}) features = _get_feature_dict(features) parent_scope = "dnn" # Synthetic minority over-sampling technique # to overcome the lack of B and S signals in the training data if mode == model_fn.ModeKeys.TRAIN: sm = SMOTE(ratio=0.1, k=5, kind='regular', random_state=10) sess = tf.Session(config=tf.ConfigProto(operation_timeout_in_ms=500)) with sess: features, labels = sm.fit_sample(features, labels.eval().ravel()) partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas) with variable_scope.variable_scope(parent_scope, values=tuple(six.itervalues(features)), partitioner=partitioner): input_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=input_layer_min_slice_size)) with variable_scope.variable_scope( "input_from_feature_columns", values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner) as input_layer_scope: if all([ isinstance(fc, feature_column._FeatureColumn) # pylint: disable=protected-access for fc in feature_columns ]): net = layers.input_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, weight_collections=[parent_scope], scope=input_layer_scope) else: net = fc_core.input_layer(features=features, feature_columns=feature_columns, weight_collections=[parent_scope]) for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( "hiddenlayer_%d" % layer_id, values=(net, )) as hidden_layer_scope: net = layers.fully_connected( net, num_hidden_units, activation_fn=activation_fn, variables_collections=[parent_scope], scope=hidden_layer_scope) if dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = layers.dropout(net, keep_prob=(1.0 - dropout)) _add_hidden_layer_summary(net, hidden_layer_scope.name) with variable_scope.variable_scope("logits", values=(net, )) as logits_scope: logits = layers.fully_connected( net, head.logits_dimension, activation_fn=None, variables_collections=[parent_scope], scope=logits_scope) _add_hidden_layer_summary(logits, logits_scope.name) def _train_op_fn(loss): """Returns the op to optimize the loss.""" return optimizers.optimize_loss( loss=loss, global_step=contrib_variables.get_global_step(), learning_rate=_LEARNING_RATE, optimizer=_get_optimizer(optimizer), gradient_multipliers=( dnn_linear_combined._extract_embedding_lr_multipliers( # pylint: disable=protected-access embedding_lr_multipliers, parent_scope, input_layer_scope.name)), clip_gradients=gradient_clip_norm, name=parent_scope, # Empty summaries to prevent optimizers from logging training_loss. summaries=[]) return head.create_model_fn_ops(features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits)
def _dnn_model_fn(features, labels, mode, params, config=None): """Deep Neural Net model_fn. Args: features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`). labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * head: A `_Head` instance. * hidden_units: List of hidden units per layer. * feature_columns: An iterable containing all the feature columns used by the model. * optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training. If `None`, will use the Adagrad optimizer with a default learning rate of 0.05. * activation_fn: Activation function applied to each layer. If `None`, will use `tf.nn.relu`. Note that a string containing the unqualified name of the op may also be provided, e.g., "relu", "tanh", or "sigmoid". * dropout: When not `None`, the probability we will drop out a given coordinate. * gradient_clip_norm: A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. config: `RunConfig` object to configure the runtime settings. Returns: predictions: A dict of `Tensor` objects. loss: A scalar containing the loss of the step. train_op: The op for training. """ head = params["head"] hidden_units = params["hidden_units"] feature_columns = params["feature_columns"] optimizer = params.get("optimizer") or "Adagrad" activation_fn = nn.relu dropout = params.get("dropout") gradient_clip_norm = params.get("gradient_clip_norm") features = _get_feature_dict(features) parent_scope = "dnn" with variable_scope.variable_scope( "input_from_feature_columns", values=tuple(features.values())) as input_layer_scope: if all([ isinstance(fc, feature_column._FeatureColumn) # pylint: disable=protected-access for fc in feature_columns ]): net = layers.input_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, weight_collections=[parent_scope], scope=input_layer_scope) else: net = fc_core.input_layer(features=features, feature_columns=feature_columns, weight_collections=[parent_scope]) with variable_scope.variable_scope(parent_scope, values=tuple(features.values())): for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( "hiddenlayer_%d" % layer_id, values=(net, )) as hidden_layer_scope: net = layers.fully_connected( net, num_hidden_units, activation_fn=activation_fn, variables_collections=[parent_scope], scope=hidden_layer_scope) if dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = layers.dropout(net, keep_prob=(1.0 - dropout)) _add_hidden_layer_summary(net, hidden_layer_scope.name) with variable_scope.variable_scope("logits", values=(net, )) as logits_scope: logits = layers.fully_connected( net, head.logits_dimension, activation_fn=None, variables_collections=[parent_scope], scope=logits_scope) _add_hidden_layer_summary(logits, logits_scope.name) def _train_op_fn(loss): """Returns the op to optimize the loss.""" return optimizers.optimize_loss( loss=loss, global_step=contrib_variables.get_global_step(), learning_rate=_LEARNING_RATE, optimizer=optimizer(), clip_gradients=gradient_clip_norm, name=parent_scope, # Empty summaries to prevent optimizers from logging training_loss. summaries=[]) return head.create_model_fn_ops(features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits)
def extract_features(features, feature_columns): """Extracts columns from a dictionary of features. Args: features: `dict` of `Tensor` objects. feature_columns: A list of feature_columns. Returns: Seven values: - A list of all feature column names. - A list of dense floats. - A list of sparse float feature indices. - A list of sparse float feature values. - A list of sparse float feature shapes. - A list of sparse int feature indices. - A list of sparse int feature values. - A list of sparse int feature shapes. Raises: ValueError: if features is not valid. """ if not features: raise ValueError("Features dictionary must be specified.") # Make a shallow copy of features to ensure downstream usage # is unaffected by modifications in the model function. features = copy.copy(features) if feature_columns: scope = "gbdt" with variable_scope.variable_scope(scope): feature_columns = list(feature_columns) transformed_features = {} for fc in feature_columns: # pylint: disable=protected-access if isinstance(fc, feature_column_lib._EmbeddingColumn): # pylint: enable=protected-access transformed_features[fc.name] = fc_core.input_layer( features, [fc], weight_collections=[scope]) else: result = feature_column_ops.transform_features( features, [fc]) if len(result) > 1: raise ValueError( "Unexpected number of output features") transformed_features[fc.name] = result[list( result.keys())[0]] features = transformed_features dense_float_names = [] dense_floats = [] sparse_float_names = [] sparse_float_indices = [] sparse_float_values = [] sparse_float_shapes = [] sparse_int_names = [] sparse_int_indices = [] sparse_int_values = [] sparse_int_shapes = [] for key in sorted(features.keys()): tensor = features[key] if isinstance(tensor, sparse_tensor.SparseTensor): if tensor.values.dtype == dtypes.float32: sparse_float_names.append(key) sparse_float_indices.append(tensor.indices) sparse_float_values.append(tensor.values) sparse_float_shapes.append(tensor.dense_shape) elif tensor.values.dtype == dtypes.int64: sparse_int_names.append(key) sparse_int_indices.append(tensor.indices) sparse_int_values.append(tensor.values) sparse_int_shapes.append(tensor.dense_shape) else: raise ValueError( "Unsupported sparse feature %s with dtype %s." % (tensor.indices.name, tensor.dtype)) else: if tensor.dtype == dtypes.float32: if len(tensor.shape) > 1 and tensor.shape[1] > 1: unstacked = array_ops.unstack(tensor, axis=1) for i in range(len(unstacked)): dense_float_names.append(_FEATURE_NAME_TEMPLATE % (key, i)) dense_floats.append( array_ops.reshape(unstacked[i], [-1, 1])) else: dense_float_names.append(key) dense_floats.append(tensor) else: raise ValueError( "Unsupported dense feature %s with dtype %s." % (tensor.name, tensor.dtype)) # Feature columns are logically organized into incrementing slots starting # from dense floats, then sparse floats then sparse ints. fc_names = (dense_float_names + sparse_float_names + sparse_int_names) return (fc_names, dense_floats, sparse_float_indices, sparse_float_values, sparse_float_shapes, sparse_int_indices, sparse_int_values, sparse_int_shapes)
def _dnn_linear_combined_model_fn(features, labels, mode, params, config=None): """Deep Neural Net and Linear combined model_fn. Args: features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`). labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * head: A `Head` instance. * linear_feature_columns: An iterable containing all the feature columns used by the Linear model. * linear_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the Linear model. Defaults to the Ftrl optimizer. * joint_linear_weights: If True a single (possibly partitioned) variable will be used to store the linear model weights. It's faster, but requires all columns are sparse and have the 'sum' combiner. * dnn_feature_columns: An iterable containing all the feature columns used by the DNN model. * dnn_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the DNN model. Defaults to the Adagrad optimizer. * dnn_hidden_units: List of hidden units per DNN layer. * dnn_activation_fn: Activation function applied to each DNN layer. If `None`, will use `tf.nn.relu`. * dnn_dropout: When not `None`, the probability we will drop out a given DNN coordinate. * gradient_clip_norm: A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. * embedding_lr_multipliers: Optional. A dictionary from `EmbeddingColumn` to a `float` multiplier. Multiplier will be used to multiply with learning rate for the embedding variables. * input_layer_partitioner: Optional. Partitioner for input layer. config: `RunConfig` object to configure the runtime settings. Returns: `ModelFnOps` Raises: ValueError: If both `linear_feature_columns` and `dnn_features_columns` are empty at the same time, or `input_layer_partitioner` is missing. """ head = params["head"] linear_feature_columns = params.get("linear_feature_columns") linear_optimizer = params.get("linear_optimizer") or "Ftrl" joint_linear_weights = params.get("joint_linear_weights") dnn_feature_columns = params.get("dnn_feature_columns") dnn_optimizer = params.get("dnn_optimizer") or "Adagrad" dnn_hidden_units = params.get("dnn_hidden_units") dnn_activation_fn = params.get("dnn_activation_fn") or nn.relu dnn_dropout = params.get("dnn_dropout") gradient_clip_norm = params.get("gradient_clip_norm") num_ps_replicas = config.num_ps_replicas if config else 0 input_layer_partitioner = params.get("input_layer_partitioner") or ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) embedding_lr_multipliers = params.get("embedding_lr_multipliers", {}) fix_global_step_increment_bug = params.get( "fix_global_step_increment_bug", True) if not linear_feature_columns and not dnn_feature_columns: raise ValueError( "Either linear_feature_columns or dnn_feature_columns must be defined.") features = _get_feature_dict(features) linear_optimizer = _get_optimizer(linear_optimizer) _check_no_sync_replicas_optimizer(linear_optimizer) dnn_optimizer = _get_optimizer(dnn_optimizer) _check_no_sync_replicas_optimizer(dnn_optimizer) # Build DNN Logits. dnn_parent_scope = "dnn" if not dnn_feature_columns: dnn_logits = None else: if not dnn_hidden_units: raise ValueError( "dnn_hidden_units must be defined when dnn_feature_columns is " "specified.") dnn_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas)) with variable_scope.variable_scope( dnn_parent_scope, values=tuple(six.itervalues(features)), partitioner=dnn_partitioner): with variable_scope.variable_scope( "input_from_feature_columns", values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner) as dnn_input_scope: if all([ isinstance(fc, feature_column_lib._FeatureColumn) # pylint: disable=protected-access for fc in dnn_feature_columns ]): net = layers.input_from_feature_columns( columns_to_tensors=features, feature_columns=dnn_feature_columns, weight_collections=[dnn_parent_scope], scope=dnn_input_scope) else: net = fc_core.input_layer( features=features, feature_columns=dnn_feature_columns, weight_collections=[dnn_parent_scope]) for layer_id, num_hidden_units in enumerate(dnn_hidden_units): with variable_scope.variable_scope( "hiddenlayer_%d" % layer_id, values=(net,)) as dnn_hidden_layer_scope: net = layers.fully_connected( net, num_hidden_units, activation_fn=dnn_activation_fn, variables_collections=[dnn_parent_scope], scope=dnn_hidden_layer_scope) if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = layers.dropout( net, keep_prob=(1.0 - dnn_dropout)) # TODO(b/31209633): Consider adding summary before dropout. _add_layer_summary(net, dnn_hidden_layer_scope.name) with variable_scope.variable_scope( "logits", values=(net,)) as dnn_logits_scope: dnn_logits = layers.fully_connected( net, head.logits_dimension, activation_fn=None, variables_collections=[dnn_parent_scope], scope=dnn_logits_scope) _add_layer_summary(dnn_logits, dnn_logits_scope.name) # Build Linear logits. linear_parent_scope = "linear" if not linear_feature_columns: linear_logits = None else: linear_partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20) with variable_scope.variable_scope( linear_parent_scope, values=tuple(six.itervalues(features)), partitioner=linear_partitioner) as scope: if all([isinstance(fc, feature_column_lib._FeatureColumn) # pylint: disable=protected-access for fc in linear_feature_columns]): if joint_linear_weights: linear_logits, _, _ = layers.joint_weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=linear_feature_columns, num_outputs=head.logits_dimension, weight_collections=[linear_parent_scope], scope=scope) else: linear_logits, _, _ = layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=linear_feature_columns, num_outputs=head.logits_dimension, weight_collections=[linear_parent_scope], scope=scope) else: linear_logits = fc_core.linear_model( features=features, feature_columns=linear_feature_columns, units=head.logits_dimension, weight_collections=[linear_parent_scope]) _add_layer_summary(linear_logits, scope.name) # Combine logits and build full model. if dnn_logits is not None and linear_logits is not None: logits = dnn_logits + linear_logits elif dnn_logits is not None: logits = dnn_logits else: logits = linear_logits def _make_training_op(training_loss): """Training op for the DNN linear combined model.""" train_ops = [] global_step = training_util.get_global_step() if dnn_logits is not None: train_ops.append( optimizers.optimize_loss( loss=training_loss, global_step=global_step, learning_rate=_DNN_LEARNING_RATE, optimizer=dnn_optimizer, gradient_multipliers=_extract_embedding_lr_multipliers( # pylint: disable=protected-access embedding_lr_multipliers, dnn_parent_scope, dnn_input_scope.name), clip_gradients=gradient_clip_norm, variables=ops.get_collection(dnn_parent_scope), name=dnn_parent_scope, # Empty summaries, because head already logs "loss" summary. summaries=[], increment_global_step=not fix_global_step_increment_bug)) if linear_logits is not None: train_ops.append( optimizers.optimize_loss( loss=training_loss, global_step=global_step, learning_rate=_linear_learning_rate(len(linear_feature_columns)), optimizer=linear_optimizer, clip_gradients=gradient_clip_norm, variables=ops.get_collection(linear_parent_scope), name=linear_parent_scope, # Empty summaries, because head already logs "loss" summary. summaries=[], increment_global_step=not fix_global_step_increment_bug)) train_op = control_flow_ops.group(*train_ops) if fix_global_step_increment_bug: with ops.control_dependencies([train_op]): with ops.colocate_with(global_step): return state_ops.assign_add(global_step, 1).op return train_op return head.create_model_fn_ops( features=features, mode=mode, labels=labels, train_op_fn=_make_training_op, logits=logits)