def _tensor_to_sparse_feature_column(dense_tensor): """Returns SparseFeatureColumn for the input dense_tensor.""" ignore_value = 0.0 sparse_indices = array_ops.where(math_ops.not_equal( dense_tensor, math_ops.cast(ignore_value, dense_tensor.dtype))) sparse_values = array_ops.gather_nd(dense_tensor, sparse_indices) # TODO(sibyl-Aix6ihai, sibyl-vie3Poto): Makes this efficient, as now SDCA supports # very sparse features with weights and not weights. return sdca_ops.SparseFeatureColumn( array_ops.reshape( array_ops.split(1, 2, sparse_indices)[0], [-1]), array_ops.reshape( array_ops.split(1, 2, sparse_indices)[1], [-1]), array_ops.reshape( math_ops.to_float(sparse_values), [-1]))
def _training_examples_and_variables(): """Returns dictionaries for training examples and variables.""" batch_size = targets.get_shape()[0] # Iterate over all feature columns and create appropriate lists for dense # and sparse features as well as dense and sparse weights (variables) for # SDCA. # TODO(sibyl-vie3Poto): Reshape variables stored as values in column_to_variables # dict as 1-dimensional tensors. dense_features, sparse_features, sparse_feature_with_values = [], [], [] dense_feature_weights = [] sparse_feature_weights, sparse_feature_with_values_weights = [], [] # pylint: disable=protected-access for column in sorted(columns_to_variables.keys(), key=lambda x: x.key): transformed_tensor = features[column] if isinstance(column, layers.feature_column._RealValuedColumn): # A real-valued column corresponds to a dense feature in SDCA. A # transformed tensor corresponding to a RealValuedColumn has rank 2 # (its shape is typically [batch_size, column.dimension]) and so it # can be passed to SDCA as is. dense_features.append(transformed_tensor) # For real valued columns, the variables list contains exactly one # element. dense_feature_weights.append(columns_to_variables[column][0]) elif isinstance(column, layers.feature_column._BucketizedColumn): # A bucketized column corresponds to a sparse feature in SDCA. The # bucketized feature is "sparsified" for SDCA by converting it to a # SparseFeatureColumn respresenting the one-hot encoding of the # bucketized feature. dense_bucket_tensor = layers.input_from_feature_columns( {column: transformed_tensor}, [column]) sparse_feature_column = _tensor_to_sparse_feature_column( dense_bucket_tensor) sparse_feature_with_values.append(sparse_feature_column) # For bucketized columns, the variables list contains exactly one # element. sparse_feature_with_values_weights.append( columns_to_variables[column][0]) elif isinstance(column, (layers.feature_column._CrossedColumn, layers.feature_column._SparseColumn)): sparse_features.append(sdca_ops.SparseFeatureColumn( array_ops.reshape( array_ops.split(1, 2, transformed_tensor.indices)[0], [-1]), array_ops.reshape(transformed_tensor.values, [-1]), None)) sparse_feature_weights.append(columns_to_variables[column][0]) elif isinstance(column, layers.feature_column._WeightedSparseColumn): id_tensor = column.id_tensor(transformed_tensor) weight_tensor = column.weight_tensor(transformed_tensor) sparse_feature_with_values.append(sdca_ops.SparseFeatureColumn( array_ops.reshape( array_ops.split(1, 2, id_tensor.indices)[0], [-1]), array_ops.reshape(id_tensor.values, [-1]), array_ops.reshape( weight_tensor.values, [-1]))) sparse_feature_with_values_weights.append( columns_to_variables[column][0]) else: raise ValueError('SDCAOptimizer does not support column type %s.' % type(column).__name__) # pylint: enable=protected-access example_weights = array_ops.reshape( features[weight_column_name], shape=[-1]) if weight_column_name else array_ops.ones([batch_size]) example_ids = features[self._example_id_column] sparse_feature_with_values.extend(sparse_features) sparse_feature_with_values_weights.extend(sparse_feature_weights) examples = dict(sparse_features=sparse_feature_with_values, dense_features=dense_features, example_labels=math_ops.to_float(array_ops.reshape( targets, shape=[-1])), example_weights=example_weights, example_ids=example_ids) sdca_variables = dict( sparse_features_weights=sparse_feature_with_values_weights, dense_features_weights=dense_feature_weights) return examples, sdca_variables
def _training_examples_and_variables(): """Returns dictionaries for training examples and variables.""" batch_size = targets.get_shape()[0] # Iterate over all feature columns and create appropriate lists for dense # and sparse features as well as dense and sparse weights (variables) for # SDCA. # TODO(sibyl-vie3Poto): Reshape variables stored as values in column_to_variables # dict as 1-dimensional tensors. dense_features, sparse_features, sparse_feature_with_values = [], [], [] dense_feature_weights = [] sparse_feature_weights, sparse_feature_with_values_weights = [], [] # pylint: disable=protected-access for column in sorted(set(linear_feature_columns), key=lambda x: x.key): transformed_tensor = features[column] if isinstance(column, layers.feature_column._RealValuedColumn): # A real-valued column corresponds to a dense feature in SDCA. if column.dimension != 1: raise ValueError( "Invalid column dimension %d for column %s. SDCAOptimizer " "supports only 1-dimensional dense feature columns." % (column.dimension, column.name)) # TODO(sibyl-Aix6ihai, sibyl-vie3Poto): SDCA supports efficient dense representation. # Perhaps concat dense features for efficiency. dense_features.append( array_ops.reshape(transformed_tensor, shape=[-1, 1])) # For real valued columns, the variables list contains exactly one # element. dense_feature_weights.append( columns_to_variables[column][0]) elif isinstance(column, layers.feature_column._BucketizedColumn): # A bucketized column corresponds to a sparse feature in SDCA. The # bucketized feature is "sparsified" for SDCA by converting it to a # SparseFeatureColumn respresenting the one-hot encoding of the # bucketized feature. dense_bucket_tensor = column.to_dnn_input_layer( transformed_tensor) sparse_feature_column = _tensor_to_sparse_feature_column( dense_bucket_tensor) sparse_feature_with_values.append(sparse_feature_column) # For bucketized columns, the variables list contains exactly one # element. sparse_feature_with_values_weights.append( columns_to_variables[column][0]) elif isinstance(column, (layers.feature_column._CrossedColumn, layers.feature_column._SparseColumn)): sparse_features.append( sdca_ops.SparseFeatureColumn( array_ops.reshape( array_ops.split(1, 2, transformed_tensor.indices)[0], [-1]), array_ops.reshape(transformed_tensor.values, [-1]), None)) sparse_feature_weights.append( columns_to_variables[column][0]) elif isinstance(column, layers.feature_column._WeightedSparseColumn): id_tensor = column.id_tensor(transformed_tensor) weight_tensor = column.weight_tensor(transformed_tensor) sparse_feature_with_values.append( sdca_ops.SparseFeatureColumn( array_ops.reshape( array_ops.split(1, 2, id_tensor.indices)[0], [-1]), array_ops.reshape(id_tensor.values, [-1]), array_ops.reshape(weight_tensor.values, [-1]))) sparse_feature_with_values_weights.append( columns_to_variables[column][0]) else: raise ValueError( "SDCAOptimizer does not support column type %s." % type(column).__name__) # pylint: enable=protected-access example_weights = array_ops.reshape( features[weight_column_name], shape=[ -1 ]) if weight_column_name else array_ops.ones([batch_size]) example_ids = features[self._example_id_column] sparse_feature_with_values.extend(sparse_features) sparse_feature_with_values_weights.extend(sparse_feature_weights) examples = dict(sparse_features=sparse_feature_with_values, dense_features=dense_features, example_labels=math_ops.to_float( array_ops.reshape(targets, shape=[-1])), example_weights=example_weights, example_ids=example_ids) sdca_variables = dict( sparse_features_weights=sparse_feature_with_values_weights, dense_features_weights=dense_feature_weights) return examples, sdca_variables