Exemplo n.º 1
0
def weighted_sum_from_feature_columns(columns_to_tensors,
                                      feature_columns,
                                      num_outputs,
                                      weight_collections=None,
                                      trainable=True,
                                      scope=None):
  """A tf.contrib.layer style linear prediction builder based on FeatureColumns.

  Generally a single example in training data is described with feature columns.
  This function generates weighted sum for each num_outputs. Weighted sum refers
  to logits in classification problems. It refers to prediction itself for
  linear regression problems.

  An example usage of weighted_sum_from_feature_columns is as follows:

    # Building model for training
    columns_to_tensor = tf.parse_example(...)
    logits = weighted_sum_from_feature_columns(
        columns_to_tensors=columns_to_tensor,
        feature_columns=feature_columns,
        num_outputs=1)
    loss = tf.nn.sigmoid_cross_entropy_with_logits(logits, labels)

    where feature_columns can be defined as follows:

    occupation = sparse_column_with_hash_bucket(column_name="occupation",
                                              hash_bucket_size=1000)
    occupation_emb = embedding_column(sparse_id_column=occupation, dimension=16,
                                     combiner="sum")
    age = real_valued_column("age")
    age_buckets = bucketized_column(
        source_column=age,
        boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65])
    occupation_x_age = crossed_column(columns=[occupation, age_buckets],
                                      hash_bucket_size=10000)

    feature_columns=[occupation_emb, occupation_x_age]

  Args:
    columns_to_tensors: A mapping from feature column to tensors. 'string' key
      means a base feature (not-transformed). It can have FeatureColumn as a
      key too. That means that FeatureColumn is already transformed by input
      pipeline. For example, `inflow` may have handled transformations.
    feature_columns: A set containing all the feature columns. All items in the
      set should be instances of classes derived from FeatureColumn.
    num_outputs: An integer specifying number of outputs. Default value is 1.
    weight_collections: List of graph collections to which weights are added.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
    scope: Optional scope for variable_scope.

  Returns:
    A tuple of followings:
      * A Tensor which represents predictions of a linear model.
      * A dictionary which maps feature_column to corresponding Variable.
      * A Variable which is used for bias.

  Raises:
    ValueError: if FeatureColumn cannot be used for linear predictions.
  """
  check_feature_columns(feature_columns)
  with variable_scope.variable_scope(
      scope,
      default_name='weighted_sum_from_feature_columns',
      values=columns_to_tensors.values()):
    output_tensors = []
    column_to_variable = dict()
    transformer = _Transformer(columns_to_tensors)
    for column in sorted(set(feature_columns), key=lambda x: x.key):
      with variable_scope.variable_scope(
          None,
          default_name=column.name,
          values=columns_to_tensors.values()):
        try:
          transformed_tensor = transformer.transform(column)
          predictions, variable = column.to_weighted_sum(transformed_tensor,
                                                         num_outputs,
                                                         weight_collections,
                                                         trainable)
        except ValueError as e:
          raise ValueError('Error creating weighted sum for column: {}.\n'
                           '{}'.format(column.name, e))
      output_tensors.append(predictions)
      column_to_variable[column] = variable
      _log_variable(variable)

    predictions_no_bias = math_ops.add_n(output_tensors)
    bias = contrib_variables.model_variable(
        'bias_weight',
        shape=[num_outputs],
        initializer=init_ops.zeros_initializer,
        collections=fc._add_variable_collection(weight_collections))  # pylint: disable=protected-access
    _log_variable(bias)
    predictions = nn_ops.bias_add(predictions_no_bias, bias)

    return predictions, column_to_variable, bias
Exemplo n.º 2
0
def weighted_sum_from_feature_columns(columns_to_tensors,
                                      feature_columns,
                                      num_outputs,
                                      weight_collections=None,
                                      trainable=True,
                                      scope=None):
    """A tf.contrib.layer style linear prediction builder based on FeatureColumns.

  Generally a single example in training data is described with feature columns.
  This function generates weighted sum for each num_outputs. Weighted sum refers
  to logits in classification problems. It refers to prediction itself for
  linear regression problems.

  An example usage of weighted_sum_from_feature_columns is as follows:

    # Building model for training
    columns_to_tensor = tf.parse_example(...)
    logits = weighted_sum_from_feature_columns(
        columns_to_tensors=columns_to_tensor,
        feature_columns=feature_columns,
        num_outputs=1)
    loss = tf.nn.sigmoid_cross_entropy_with_logits(logits, labels)

    where feature_columns can be defined as follows:

    occupation = sparse_column_with_hash_bucket(column_name="occupation",
                                              hash_bucket_size=1000)
    occupation_emb = embedding_column(sparse_id_column=occupation, dimension=16,
                                     combiner="sum")
    age = real_valued_column("age")
    age_buckets = bucketized_column(
        source_column=age,
        boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65])
    occupation_x_age = crossed_column(columns=[occupation, age_buckets],
                                      hash_bucket_size=10000)

    feature_columns=[occupation_emb, occupation_x_age]

  Args:
    columns_to_tensors: A mapping from feature column to tensors. 'string' key
      means a base feature (not-transformed). It can have FeatureColumn as a
      key too. That means that FeatureColumn is already transformed by input
      pipeline. For example, `inflow` may have handled transformations.
    feature_columns: A set containing all the feature columns. All items in the
      set should be instances of classes derived from FeatureColumn.
    num_outputs: An integer specifying number of outputs. Default value is 1.
    weight_collections: List of graph collections to which weights are added.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
    scope: Optional scope for variable_scope.

  Returns:
    A tuple of followings:
      * A Tensor which represents predictions of a linear model.
      * A dictionary which maps feature_column to corresponding Variable.
      * A Variable which is used for bias.

  Raises:
    ValueError: if FeatureColumn cannot be used for linear predictions.
  """
    check_feature_columns(feature_columns)
    with variable_scope.variable_scope(
            scope,
            default_name='weighted_sum_from_feature_columns',
            values=columns_to_tensors.values()):
        output_tensors = []
        column_to_variable = dict()
        transformer = _Transformer(columns_to_tensors)
        for column in sorted(set(feature_columns), key=lambda x: x.key):
            transformed_tensor = transformer.transform(column)
            try:
                embedding_lookup_arguments = column._to_embedding_lookup_arguments(  # pylint: disable=protected-access
                    transformed_tensor)
                variable, predictions = _create_embedding_lookup(
                    column, columns_to_tensors, embedding_lookup_arguments,
                    num_outputs, trainable, weight_collections)
            except NotImplementedError:
                with variable_scope.variable_scope(
                        None,
                        default_name=column.name,
                        values=columns_to_tensors.values()):
                    tensor = column._to_dense_tensor(transformed_tensor)  # pylint: disable=protected-access
                    variable = [
                        contrib_variables.model_variable(
                            name='weight',
                            shape=[tensor.get_shape()[1], num_outputs],
                            initializer=init_ops.zeros_initializer,
                            collections=weight_collections)
                    ]
                    predictions = math_ops.matmul(tensor,
                                                  variable[0],
                                                  name='matmul')
            except ValueError as ee:
                raise ValueError(
                    'Error creating weighted sum for column: {}.\n'
                    '{}'.format(column.name, ee))
            output_tensors.append(predictions)
            column_to_variable[column] = variable
            _log_variable(variable)
            _maybe_restore_from_checkpoint(column._checkpoint_path(), variable)  # pylint: disable=protected-access

        predictions_no_bias = math_ops.add_n(output_tensors)
        bias = contrib_variables.model_variable(
            'bias_weight',
            shape=[num_outputs],
            initializer=init_ops.zeros_initializer,
            collections=fc._add_variable_collection(weight_collections))  # pylint: disable=protected-access
        _log_variable(bias)
        predictions = nn_ops.bias_add(predictions_no_bias, bias)

        return predictions, column_to_variable, bias
Exemplo n.º 3
0
def joint_weighted_sum_from_feature_columns(columns_to_tensors,
                                            feature_columns,
                                            num_outputs,
                                            weight_collections=None,
                                            trainable=True,
                                            scope=None):
    """A restricted linear prediction builder based on FeatureColumns.

  As long as all feature columns are unweighted sparse columns this computes the
  prediction of a linear model which stores all weights in a single variable.

  Args:
    columns_to_tensors: A mapping from feature column to tensors. 'string' key
      means a base feature (not-transformed). It can have FeatureColumn as a
      key too. That means that FeatureColumn is already transformed by input
      pipeline. For example, `inflow` may have handled transformations.
    feature_columns: A set containing all the feature columns. All items in the
      set should be instances of classes derived from FeatureColumn.
    num_outputs: An integer specifying number of outputs. Default value is 1.
    weight_collections: List of graph collections to which weights are added.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
    scope: Optional scope for variable_scope.

  Returns:
    A tuple of followings:
      * A Tensor which represents predictions of a linear model.
      * A list of Variables storing the weights.
      * A Variable which is used for bias.

  Raises:
    ValueError: if FeatureColumn cannot be used for linear predictions.

  """
    check_feature_columns(feature_columns)
    with variable_scope.variable_scope(
            scope,
            default_name='joint_weighted_sum_from_feature_columns',
            values=columns_to_tensors.values()):
        transformer = _Transformer(columns_to_tensors)
        embedding_lookup_arguments = []
        for column in sorted(set(feature_columns), key=lambda x: x.key):
            transformed_tensor = transformer.transform(column)
            try:
                embedding_lookup_arguments.append(
                    column._to_embedding_lookup_arguments(transformed_tensor))  # pylint: disable=protected-access
            except NotImplementedError:
                raise NotImplementedError(
                    'Real-valued columns are not supported. '
                    'Use weighted_sum_from_feature_columns '
                    'instead, or bucketize these columns.')

        variable, predictions_no_bias = _create_joint_embedding_lookup(
            columns_to_tensors, embedding_lookup_arguments, num_outputs,
            trainable, weight_collections)
        bias = contrib_variables.model_variable(
            'bias_weight',
            shape=[num_outputs],
            initializer=init_ops.zeros_initializer,
            collections=fc._add_variable_collection(weight_collections))  # pylint: disable=protected-access
        _log_variable(bias)
        predictions = nn_ops.bias_add(predictions_no_bias, bias)

        return predictions, variable, bias
Exemplo n.º 4
0
def weighted_sum_from_feature_columns(columns_to_tensors,
                                      feature_columns,
                                      num_outputs,
                                      weight_collections=None,
                                      name=None,
                                      trainable=True):
  """A tf.contrib.layer style linear prediction builder based on FeatureColumns.

  Generally a single example in training data is described with feature columns.
  This function generates weighted sum for each num_outputs. Weighted sum refers
  to logits in classification problems. It refers to prediction itself for
  linear regression problems.

  An example usage of weighted_sum_from_feature_columns is as follows:

    # Building model for training
    columns_to_tensor = tf.parse_example(...)
    logits = weighted_sum_from_feature_columns(
        columns_to_tensor,
        feature_columns=feature_columns,
        num_outputs=1)
    loss = tf.nn.sigmoid_cross_entropy_with_logits(logits, labels)

    where feature_columns can be defined as follows:

      query_word = sparse_column_with_hash_bucket(
        'query_word', hash_bucket_size=int(1e6))
      query_embedding = embedding_column(query_word, dimension=16)
      age_bucket = bucketized_column(real_valued_column('age'),
                                     boundaries=[18, 21, 30, 50, 70])
      query_age = crossed_column([query_word, age_bucket],
                                 hash_bucket_size=1e6)

      feature_columns=[query_embedding, query_age]


  Args:
    columns_to_tensors: A mapping from feature column to tensors. 'string' key
      means a base feature (not-transformed). It can have FeatureColumn as a
      key too. That means that FeatureColumn is already transformed by input
      pipeline. For example, `inflow` may have handled transformations.
    feature_columns: A set containing all the feature columns. All items in the
      set should be instances of classes derived from FeatureColumn.
    num_outputs: An integer specifying number of outputs. Default value is 1.
    weight_collections: List of graph collections to which weights are added.
    name: The name for this operation is used to name operations and to find
      variables. If specified it must be unique for this scope, otherwise a
      unique name starting with "fully_connected" will be created.  See
      `tf.variable_op_scope` for details.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).

  Returns:
    A tuple of followings:
      * A Tensor which represents predictions of a linear model.
      * A dictionary which maps feature_column to corresponding Variable.
      * A Variable which is used for bias.

  Raises:
    ValueError: if FeatureColumn cannot be used for linear predictions.
  """
  with variable_scope.variable_op_scope(columns_to_tensors.values(), name,
                                        'weighted_sum_from_feature_columns'):
    output_tensors = []
    column_to_variable = dict()
    transformer = _Transformer(columns_to_tensors)
    for column in sorted(set(feature_columns), key=lambda x: x.key):
      transformed_tensor = transformer.transform(column)
      predictions, variable = column.to_weighted_sum(transformed_tensor,
                                                     num_outputs,
                                                     weight_collections,
                                                     trainable)
      output_tensors.append(predictions)
      column_to_variable[column] = variable
      _log_variable(variable)

    predictions_no_bias = math_ops.add_n(output_tensors)
    bias = variables.Variable(
        array_ops.zeros([num_outputs]),
        collections=fc._add_variable_collection(weight_collections),  # pylint: disable=protected-access
        name='bias_weight')
    _log_variable(bias)
    predictions = nn_ops.bias_add(predictions_no_bias, bias)

    return predictions, column_to_variable, bias
Exemplo n.º 5
0
def weighted_sum_from_feature_columns(columns_to_tensors,
                                      feature_columns,
                                      num_outputs,
                                      weight_collections=None,
                                      name=None,
                                      trainable=True):
  """A tf.contrib.layer style linear prediction builder based on FeatureColumns.

  Generally a single example in training data is described with feature columns.
  This function generates weighted sum for each num_outputs. Weighted sum refers
  to logits in classification problems. It refers to prediction itself for
  linear regression problems.

  An example usage of weighted_sum_from_feature_columns is as follows:

    # Building model for training
    columns_to_tensor = tf.parse_example(...)
    logits = weighted_sum_from_feature_columns(
        columns_to_tensors=columns_to_tensor,
        feature_columns=feature_columns,
        num_outputs=1)
    loss = tf.nn.sigmoid_cross_entropy_with_logits(logits, labels)

    where feature_columns can be defined as follows:

    occupation = sparse_column_with_hash_bucket(column_name="occupation",
                                              hash_bucket_size=1000)
    occupation_emb = embedding_column(sparse_id_column=occupation, dimension=16,
                                     combiner="sum")
    age = real_valued_column("age")
    age_buckets = bucketized_column(
        source_column=age,
        boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65])
    occupation_x_age = crossed_column(columns=[occupation, age_buckets],
                                      hash_bucket_size=10000)

    feature_columns=[occupation_emb, occupation_x_age]

  Args:
    columns_to_tensors: A mapping from feature column to tensors. 'string' key
      means a base feature (not-transformed). It can have FeatureColumn as a
      key too. That means that FeatureColumn is already transformed by input
      pipeline. For example, `inflow` may have handled transformations.
    feature_columns: A set containing all the feature columns. All items in the
      set should be instances of classes derived from FeatureColumn.
    num_outputs: An integer specifying number of outputs. Default value is 1.
    weight_collections: List of graph collections to which weights are added.
    name: The name for this operation is used to name operations and to find
      variables. If specified it must be unique for this scope, otherwise a
      unique name starting with "fully_connected" will be created.  See
      `tf.variable_op_scope` for details.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).

  Returns:
    A tuple of followings:
      * A Tensor which represents predictions of a linear model.
      * A dictionary which maps feature_column to corresponding Variable.
      * A Variable which is used for bias.

  Raises:
    ValueError: if FeatureColumn cannot be used for linear predictions.
  """
  check_feature_columns(feature_columns)
  with variable_scope.variable_op_scope(columns_to_tensors.values(), name,
                                        'weighted_sum_from_feature_columns'):
    output_tensors = []
    column_to_variable = dict()
    transformer = _Transformer(columns_to_tensors)
    for column in sorted(set(feature_columns), key=lambda x: x.key):
      transformed_tensor = transformer.transform(column)
      predictions, variable = column.to_weighted_sum(transformed_tensor,
                                                     num_outputs,
                                                     weight_collections,
                                                     trainable)
      output_tensors.append(predictions)
      column_to_variable[column] = variable
      _log_variable(variable)

    predictions_no_bias = math_ops.add_n(output_tensors)
    bias = variables.Variable(
        array_ops.zeros([num_outputs]),
        collections=fc._add_variable_collection(weight_collections),  # pylint: disable=protected-access
        name='bias_weight')
    _log_variable(bias)
    predictions = nn_ops.bias_add(predictions_no_bias, bias)

    return predictions, column_to_variable, bias
Exemplo n.º 6
0
def joint_weighted_sum_from_feature_columns(columns_to_tensors,
                                            feature_columns,
                                            num_outputs,
                                            weight_collections=None,
                                            trainable=True,
                                            scope=None):
  """A restricted linear prediction builder based on FeatureColumns.

  As long as all feature columns are unweighted sparse columns this computes the
  prediction of a linear model which stores all weights in a single variable.

  Args:
    columns_to_tensors: A mapping from feature column to tensors. 'string' key
      means a base feature (not-transformed). It can have FeatureColumn as a
      key too. That means that FeatureColumn is already transformed by input
      pipeline. For example, `inflow` may have handled transformations.
    feature_columns: A set containing all the feature columns. All items in the
      set should be instances of classes derived from FeatureColumn.
    num_outputs: An integer specifying number of outputs. Default value is 1.
    weight_collections: List of graph collections to which weights are added.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
    scope: Optional scope for variable_scope.

  Returns:
    A tuple of followings:
      * A Tensor which represents predictions of a linear model.
      * A list of Variables storing the weights.
      * A Variable which is used for bias.

  Raises:
    ValueError: if FeatureColumn cannot be used for linear predictions.

  """
  check_feature_columns(feature_columns)
  with variable_scope.variable_scope(
      scope,
      default_name='joint_weighted_sum_from_feature_columns',
      values=columns_to_tensors.values()):
    transformer = _Transformer(columns_to_tensors)
    embedding_lookup_arguments = []
    for column in sorted(set(feature_columns), key=lambda x: x.key):
      transformed_tensor = transformer.transform(column)
      try:
        embedding_lookup_arguments.append(
            column._to_embedding_lookup_arguments(transformed_tensor))   # pylint: disable=protected-access
      except NotImplementedError:
        raise NotImplementedError('Real-valued columns are not supported. '
                                  'Use weighted_sum_from_feature_columns '
                                  'instead, or bucketize these columns.')

    variable, predictions_no_bias = _create_joint_embedding_lookup(
        columns_to_tensors,
        embedding_lookup_arguments,
        num_outputs,
        trainable,
        weight_collections)
    bias = contrib_variables.model_variable(
        'bias_weight',
        shape=[num_outputs],
        initializer=init_ops.zeros_initializer,
        collections=fc._add_variable_collection(weight_collections))  # pylint: disable=protected-access
    _log_variable(bias)
    predictions = nn_ops.bias_add(predictions_no_bias, bias)

    return predictions, variable, bias