def call(self, features, cols_to_output_tensors=None): """Returns a dense tensor corresponding to the `feature_columns`. Args: features: A mapping from key to tensors. `FeatureColumn`s look up via these keys. For example `numeric_column('price')` will look at 'price' key in this dict. Values can be a `SparseTensor` or a `Tensor` depends on corresponding `FeatureColumn`. cols_to_output_tensors: If not `None`, this will be filled with a dict mapping feature columns to output tensors created. Returns: A `Tensor` which represents input layer of a model. Its shape is (batch_size, first_layer_dimension) and its dtype is `float32`. first_layer_dimension is determined based on given `feature_columns`. Raises: ValueError: If features are not a dictionary. """ if not isinstance(features, dict): raise ValueError('We expected a dictionary here. Instead we got: ', features) transformation_cache = fc.FeatureTransformationCache(features) output_tensors = [] for column in self._feature_columns: with ops.name_scope(column.name): tensor = column.get_dense_tensor(transformation_cache, self._state_manager) processed_tensors = self._process_dense_tensor(column, tensor) if cols_to_output_tensors is not None: cols_to_output_tensors[column] = processed_tensors output_tensors.append(processed_tensors) return self._verify_and_concat_tensors(output_tensors)
def _get_sequence_dense_tensor_state(column, features): state_manager = fc._StateManagerImpl(fc_lib.DenseFeatures(column), trainable=True) column.create_state(state_manager) dense_tensor, lengths = column.get_sequence_dense_tensor( fc.FeatureTransformationCache(features), state_manager) return dense_tensor, lengths, state_manager
def call(self, features, cols_to_output_tensors=None, training=None): """Returns a dense tensor corresponding to the `feature_columns`. Example usage: >>> t1 = tf.feature_column.embedding_column( ... tf.feature_column.categorical_column_with_hash_bucket("t1", 2), ... dimension=8) >>> t2 = tf.feature_column.numeric_column('t2') >>> feature_layer = tf.compat.v1.keras.layers.DenseFeatures([t1, t2]) >>> features = {"t1": tf.constant(["a", "b"]), "t2": tf.constant([1, 2])} >>> dense_tensor = feature_layer(features, training=True) Args: features: A mapping from key to tensors. `FeatureColumn`s look up via these keys. For example `numeric_column('price')` will look at 'price' key in this dict. Values can be a `SparseTensor` or a `Tensor` depends on corresponding `FeatureColumn`. cols_to_output_tensors: If not `None`, this will be filled with a dict mapping feature columns to output tensors created. training: Python boolean or None, indicating whether to the layer is being run in training mode. This argument is passed to the call method of any `FeatureColumn` that takes a `training` argument. For example, if a `FeatureColumn` performed dropout, the column could expose a `training` argument to control whether the dropout should be applied. If `None`, defaults to `tf.keras.backend.learning_phase()`. Returns: A `Tensor` which represents input layer of a model. Its shape is (batch_size, first_layer_dimension) and its dtype is `float32`. first_layer_dimension is determined based on given `feature_columns`. Raises: ValueError: If features are not a dictionary. """ if training is None: training = backend.learning_phase() if not isinstance(features, dict): raise ValueError('We expected a dictionary here. Instead we got: ', features) transformation_cache = fc.FeatureTransformationCache(features) output_tensors = [] for column in self._feature_columns: with backend.name_scope(column.name): try: tensor = column.get_dense_tensor(transformation_cache, self._state_manager, training=training) except TypeError: tensor = column.get_dense_tensor(transformation_cache, self._state_manager) processed_tensors = self._process_dense_tensor(column, tensor) if cols_to_output_tensors is not None: cols_to_output_tensors[column] = processed_tensors output_tensors.append(processed_tensors) return self._verify_and_concat_tensors(output_tensors)
def call(self, features, training=None): """Returns sequence input corresponding to the `feature_columns`. Args: features: A dict mapping keys to tensors. training: Python boolean or None, indicating whether to the layer is being run in training mode. This argument is passed to the call method of any `FeatureColumn` that takes a `training` argument. For example, if a `FeatureColumn` performed dropout, the column could expose a `training` argument to control whether the dropout should be applied. If `None`, defaults to `tf.keras.backend.learning_phase()`. Returns: An `(input_layer, sequence_length)` tuple where: - input_layer: A float `Tensor` of shape `[batch_size, T, D]`. `T` is the maximum sequence length for this batch, which could differ from batch to batch. `D` is the sum of `num_elements` for all `feature_columns`. - sequence_length: An int `Tensor` of shape `[batch_size]`. The sequence length for each example. Raises: ValueError: If features are not a dictionary. """ if not isinstance(features, dict): raise ValueError('We expected a dictionary here. Instead we got: ', features) if training is None: training = backend.learning_phase() transformation_cache = fc.FeatureTransformationCache(features) output_tensors = [] sequence_lengths = [] for column in self._feature_columns: with backend.name_scope(column.name): try: dense_tensor, sequence_length = column.get_sequence_dense_tensor( transformation_cache, self._state_manager, training=training) except TypeError: dense_tensor, sequence_length = column.get_sequence_dense_tensor( transformation_cache, self._state_manager) # Flattens the final dimension to produce a 3D Tensor. output_tensors.append( self._process_dense_tensor(column, dense_tensor)) sequence_lengths.append(sequence_length) # Check and process sequence lengths. fc._verify_static_batch_size_equality(sequence_lengths, self._feature_columns) sequence_length = _assert_all_equal_and_return(sequence_lengths) return self._verify_and_concat_tensors(output_tensors), sequence_length
def call(self, features): """Returns sequence input corresponding to the `feature_columns`. Args: features: A dict mapping keys to tensors. Returns: An `(input_layer, sequence_length)` tuple where: - input_layer: A float `Tensor` of shape `[batch_size, T, D]`. `T` is the maximum sequence length for this batch, which could differ from batch to batch. `D` is the sum of `num_elements` for all `feature_columns`. - sequence_length: An int `Tensor` of shape `[batch_size]`. The sequence length for each example. Raises: ValueError: If features are not a dictionary. """ if not isinstance(features, dict): raise ValueError('We expected a dictionary here. Instead we got: ', features) transformation_cache = fc.FeatureTransformationCache(features) output_tensors = [] sequence_lengths = [] for column in self._feature_columns: with ops.name_scope(column.name): dense_tensor, sequence_length = column.get_sequence_dense_tensor( transformation_cache, self._state_manager) # Flattens the final dimension to produce a 3D Tensor. output_tensors.append( self._process_dense_tensor(column, dense_tensor)) sequence_lengths.append(sequence_length) # Check and process sequence lengths. fc._verify_static_batch_size_equality(sequence_lengths, self._feature_columns) sequence_length = _assert_all_equal_and_return(sequence_lengths) return self._verify_and_concat_tensors(output_tensors), sequence_length
def call(self, features, cols_to_output_tensors=None): if not isinstance(features, dict): raise ValueError('We expected a dictionary here. Instead we got: ', features) using_features = self.filter_not_used_features(features) transformation_cache = fc.FeatureTransformationCache(using_features) self.sparse_pulling_features = self.get_sparse_pulling_feature( using_features) pulled_mapping_values = self._state_manager.pull( self.sparse_pulling_features) output_tensors = [] for column in self._feature_columns: if column.categorical_column.name not in pulled_mapping_values: raise ValueError("column not found in pulled_mapping_values") mapping_value = pulled_mapping_values[ column.categorical_column.name] with ops.control_dependencies([mapping_value]): tensor = column.get_dense_tensor(transformation_cache, self._state_manager) processed_tensors = self._process_dense_tensor(column, tensor) if cols_to_output_tensors is not None: cols_to_output_tensors[column] = processed_tensors output_tensors.append(processed_tensors) if self.is_concat: return self._verify_and_concat_tensors(output_tensors) else: return output_tensors
def _get_sparse_tensors(column, features): return column.get_sparse_tensors(fc.FeatureTransformationCache(features), None)
def _get_sequence_dense_tensor(column, features): return column.get_sequence_dense_tensor( fc.FeatureTransformationCache(features), None)
def fc_fn(tensors): fc.transform_feature(fcv2.FeatureTransformationCache(tensors), None)
def get_train_step(self, state_manager, weight_column_name, loss_type, feature_columns, features, targets, bias_var, global_step): """Returns the training operation of an SdcaModel optimizer.""" batch_size = array_ops.shape(targets)[0] cache = feature_column_v2.FeatureTransformationCache(features) # Iterate over all feature columns and create appropriate lists for dense # and sparse features as well as dense and sparse weights (variables) for # SDCA. dense_features, dense_feature_weights = [], [] sparse_feature_with_values, sparse_feature_with_values_weights = [], [] for column in sorted(feature_columns, key=lambda x: x.name): if isinstance(column, feature_column_v2.CategoricalColumn): id_weight_pair = column.get_sparse_tensors( cache, state_manager) sparse_feature_with_values.append( self._prune_and_unique_sparse_ids(id_weight_pair)) # If a partitioner was used during variable creation, we will have a # list of Variables here larger than 1. sparse_feature_with_values_weights.append( state_manager.get_variable(column, 'weights')) elif isinstance(column, feature_column_v2.DenseColumn): if column.variable_shape.ndims != 1: raise ValueError( 'Column %s has rank %d, larger than 1.' % (type(column).__name__, column.variable_shape.ndims)) dense_features.append( column.get_dense_tensor(cache, state_manager)) # For real valued columns, the variables list contains exactly one # element. dense_feature_weights.append( state_manager.get_variable(column, 'weights')) else: raise ValueError( 'LinearSDCA does not support column type %s.' % type(column).__name__) # Add the bias column dense_features.append(array_ops.ones([batch_size, 1])) dense_feature_weights.append(bias_var) example_weights = array_ops.reshape( features[weight_column_name], shape=[-1]) if weight_column_name else array_ops.ones([batch_size]) example_ids = features[self._example_id_column] training_examples = dict(sparse_features=sparse_feature_with_values, dense_features=dense_features, example_labels=math_ops.to_float( array_ops.reshape(targets, shape=[-1])), example_weights=example_weights, example_ids=example_ids) training_variables = dict( sparse_features_weights=sparse_feature_with_values_weights, dense_features_weights=dense_feature_weights) sdca_model = sdca_ops._SDCAModel( # pylint: disable=protected-access examples=training_examples, variables=training_variables, options=dict( symmetric_l1_regularization=self._symmetric_l1_regularization, symmetric_l2_regularization=self._symmetric_l2_regularization, adaptive=self._adaptive, num_loss_partitions=self._num_loss_partitions, num_table_shards=self._num_table_shards, loss_type=loss_type)) train_op = sdca_model.minimize(global_step=global_step) return sdca_model, train_op