Exemplo n.º 1
0
 def _tensor_to_sparse_feature_column(dense_tensor):
   """Returns SparseFeatureColumn for the input dense_tensor."""
   ignore_value = 0.0
   sparse_indices = array_ops.where(math_ops.not_equal(
       dense_tensor, math_ops.cast(ignore_value, dense_tensor.dtype)))
   sparse_values = array_ops.gather_nd(dense_tensor, sparse_indices)
   # TODO(sibyl-Aix6ihai, sibyl-vie3Poto): Makes this efficient, as now SDCA supports
   # very sparse features with weights and not weights.
   return sdca_ops.SparseFeatureColumn(
       array_ops.reshape(
           array_ops.split(1, 2, sparse_indices)[0], [-1]),
       array_ops.reshape(
           array_ops.split(1, 2, sparse_indices)[1], [-1]),
       array_ops.reshape(
           math_ops.to_float(sparse_values), [-1]))
Exemplo n.º 2
0
    def _training_examples_and_variables():
      """Returns dictionaries for training examples and variables."""
      batch_size = targets.get_shape()[0]

      # Iterate over all feature columns and create appropriate lists for dense
      # and sparse features as well as dense and sparse weights (variables) for
      # SDCA.
      # TODO(sibyl-vie3Poto): Reshape variables stored as values in column_to_variables
      # dict as 1-dimensional tensors.
      dense_features, sparse_features, sparse_feature_with_values = [], [], []
      dense_feature_weights = []
      sparse_feature_weights, sparse_feature_with_values_weights = [], []
      # pylint: disable=protected-access
      for column in sorted(columns_to_variables.keys(), key=lambda x: x.key):
        transformed_tensor = features[column]
        if isinstance(column, layers.feature_column._RealValuedColumn):
          # A real-valued column corresponds to a dense feature in SDCA. A
          # transformed tensor corresponding to a RealValuedColumn has rank 2
          # (its shape is typically [batch_size, column.dimension]) and so it
          # can be passed to SDCA as is.
          dense_features.append(transformed_tensor)
          # For real valued columns, the variables list contains exactly one
          # element.
          dense_feature_weights.append(columns_to_variables[column][0])
        elif isinstance(column, layers.feature_column._BucketizedColumn):
          # A bucketized column corresponds to a sparse feature in SDCA. The
          # bucketized feature is "sparsified" for SDCA by converting it to a
          # SparseFeatureColumn respresenting the one-hot encoding of the
          # bucketized feature.
          dense_bucket_tensor = layers.input_from_feature_columns(
              {column: transformed_tensor}, [column])
          sparse_feature_column = _tensor_to_sparse_feature_column(
              dense_bucket_tensor)
          sparse_feature_with_values.append(sparse_feature_column)
          # For bucketized columns, the variables list contains exactly one
          # element.
          sparse_feature_with_values_weights.append(
              columns_to_variables[column][0])
        elif isinstance(column, (layers.feature_column._CrossedColumn,
                                 layers.feature_column._SparseColumn)):
          sparse_features.append(sdca_ops.SparseFeatureColumn(
              array_ops.reshape(
                  array_ops.split(1, 2, transformed_tensor.indices)[0], [-1]),
              array_ops.reshape(transformed_tensor.values, [-1]), None))
          sparse_feature_weights.append(columns_to_variables[column][0])
        elif isinstance(column, layers.feature_column._WeightedSparseColumn):
          id_tensor = column.id_tensor(transformed_tensor)
          weight_tensor = column.weight_tensor(transformed_tensor)
          sparse_feature_with_values.append(sdca_ops.SparseFeatureColumn(
              array_ops.reshape(
                  array_ops.split(1, 2, id_tensor.indices)[0], [-1]),
              array_ops.reshape(id_tensor.values, [-1]), array_ops.reshape(
                  weight_tensor.values, [-1])))
          sparse_feature_with_values_weights.append(
            columns_to_variables[column][0])
        else:
          raise ValueError('SDCAOptimizer does not support column type %s.' %
                           type(column).__name__)
      # pylint: enable=protected-access

      example_weights = array_ops.reshape(
          features[weight_column_name],
          shape=[-1]) if weight_column_name else array_ops.ones([batch_size])
      example_ids = features[self._example_id_column]
      sparse_feature_with_values.extend(sparse_features)
      sparse_feature_with_values_weights.extend(sparse_feature_weights)
      examples = dict(sparse_features=sparse_feature_with_values,
                      dense_features=dense_features,
                      example_labels=math_ops.to_float(array_ops.reshape(
                          targets, shape=[-1])),
                      example_weights=example_weights,
                      example_ids=example_ids)
      sdca_variables = dict(
          sparse_features_weights=sparse_feature_with_values_weights,
          dense_features_weights=dense_feature_weights)
      return examples, sdca_variables
Exemplo n.º 3
0
        def _training_examples_and_variables():
            """Returns dictionaries for training examples and variables."""
            batch_size = targets.get_shape()[0]

            # Iterate over all feature columns and create appropriate lists for dense
            # and sparse features as well as dense and sparse weights (variables) for
            # SDCA.
            # TODO(sibyl-vie3Poto): Reshape variables stored as values in column_to_variables
            # dict as 1-dimensional tensors.
            dense_features, sparse_features, sparse_feature_with_values = [], [], []
            dense_feature_weights = []
            sparse_feature_weights, sparse_feature_with_values_weights = [], []
            # pylint: disable=protected-access
            for column in sorted(set(linear_feature_columns),
                                 key=lambda x: x.key):
                transformed_tensor = features[column]
                if isinstance(column, layers.feature_column._RealValuedColumn):
                    # A real-valued column corresponds to a dense feature in SDCA.
                    if column.dimension != 1:
                        raise ValueError(
                            "Invalid column dimension %d for column %s. SDCAOptimizer "
                            "supports only 1-dimensional dense feature columns."
                            % (column.dimension, column.name))

                    # TODO(sibyl-Aix6ihai, sibyl-vie3Poto): SDCA supports efficient dense representation.
                    # Perhaps concat dense features for efficiency.
                    dense_features.append(
                        array_ops.reshape(transformed_tensor, shape=[-1, 1]))
                    # For real valued columns, the variables list contains exactly one
                    # element.
                    dense_feature_weights.append(
                        columns_to_variables[column][0])
                elif isinstance(column,
                                layers.feature_column._BucketizedColumn):
                    # A bucketized column corresponds to a sparse feature in SDCA. The
                    # bucketized feature is "sparsified" for SDCA by converting it to a
                    # SparseFeatureColumn respresenting the one-hot encoding of the
                    # bucketized feature.
                    dense_bucket_tensor = column.to_dnn_input_layer(
                        transformed_tensor)
                    sparse_feature_column = _tensor_to_sparse_feature_column(
                        dense_bucket_tensor)
                    sparse_feature_with_values.append(sparse_feature_column)
                    # For bucketized columns, the variables list contains exactly one
                    # element.
                    sparse_feature_with_values_weights.append(
                        columns_to_variables[column][0])
                elif isinstance(column, (layers.feature_column._CrossedColumn,
                                         layers.feature_column._SparseColumn)):
                    sparse_features.append(
                        sdca_ops.SparseFeatureColumn(
                            array_ops.reshape(
                                array_ops.split(1, 2,
                                                transformed_tensor.indices)[0],
                                [-1]),
                            array_ops.reshape(transformed_tensor.values, [-1]),
                            None))
                    sparse_feature_weights.append(
                        columns_to_variables[column][0])
                elif isinstance(column,
                                layers.feature_column._WeightedSparseColumn):
                    id_tensor = column.id_tensor(transformed_tensor)
                    weight_tensor = column.weight_tensor(transformed_tensor)
                    sparse_feature_with_values.append(
                        sdca_ops.SparseFeatureColumn(
                            array_ops.reshape(
                                array_ops.split(1, 2,
                                                id_tensor.indices)[0], [-1]),
                            array_ops.reshape(id_tensor.values, [-1]),
                            array_ops.reshape(weight_tensor.values, [-1])))
                    sparse_feature_with_values_weights.append(
                        columns_to_variables[column][0])
                else:
                    raise ValueError(
                        "SDCAOptimizer does not support column type %s." %
                        type(column).__name__)
            # pylint: enable=protected-access

            example_weights = array_ops.reshape(
                features[weight_column_name], shape=[
                    -1
                ]) if weight_column_name else array_ops.ones([batch_size])
            example_ids = features[self._example_id_column]
            sparse_feature_with_values.extend(sparse_features)
            sparse_feature_with_values_weights.extend(sparse_feature_weights)
            examples = dict(sparse_features=sparse_feature_with_values,
                            dense_features=dense_features,
                            example_labels=math_ops.to_float(
                                array_ops.reshape(targets, shape=[-1])),
                            example_weights=example_weights,
                            example_ids=example_ids)
            sdca_variables = dict(
                sparse_features_weights=sparse_feature_with_values_weights,
                dense_features_weights=dense_feature_weights)
            return examples, sdca_variables