Exemplo n.º 1
0
def make_parsing_export_strategy(feature_columns,
                                 default_output_alternative_key=None,
                                 assets_extra=None,
                                 as_text=False,
                                 exports_to_keep=5,
                                 target_core=False,
                                 strip_default_attrs=False):
    # pylint: disable=line-too-long
    """Create an ExportStrategy for use with Experiment, using `FeatureColumn`s.

  Creates a SavedModel export that expects to be fed with a single string
  Tensor containing serialized tf.Examples.  At serving time, incoming
  tf.Examples will be parsed according to the provided `FeatureColumn`s.

  Args:
    feature_columns: An iterable of `FeatureColumn`s representing the features
      that must be provided at serving time (excluding labels!).
    default_output_alternative_key: the name of the head to serve when an
      incoming serving request does not explicitly request a specific head.
      Must be `None` if the estimator inherits from ${tf.estimator.Estimator}
      or for single-headed models.
    assets_extra: A dict specifying how to populate the assets.extra directory
      within the exported SavedModel.  Each key should give the destination
      path (including the filename) relative to the assets.extra directory.
      The corresponding value gives the full path of the source file to be
      copied.  For example, the simple case of copying a single file without
      renaming it is specified as
      `{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`.
    as_text: whether to write the SavedModel proto in text format.
    exports_to_keep: Number of exports to keep.  Older exports will be
      garbage-collected.  Defaults to 5.  Set to None to disable garbage
      collection.
    target_core: If True, prepare an ExportStrategy for use with
      tensorflow.python.estimator.*.  If False (default), prepare an
      ExportStrategy for use with tensorflow.contrib.learn.python.learn.*.
    strip_default_attrs: Boolean. If `True`, default-valued attributes will be
      removed from the NodeDefs. For a detailed guide, see
      [Stripping Default-Valued Attributes](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/README.md#stripping-default-valued-attributes).

  Returns:
    An ExportStrategy that can be passed to the Experiment constructor.
  """
    # pylint: enable=line-too-long
    feature_spec = feature_column.create_feature_spec_for_parsing(
        feature_columns)
    if target_core:
        serving_input_fn = (
            core_export.build_parsing_serving_input_receiver_fn(feature_spec))
    else:
        serving_input_fn = (
            input_fn_utils.build_parsing_serving_input_fn(feature_spec))
    return make_export_strategy(
        serving_input_fn,
        default_output_alternative_key=default_output_alternative_key,
        assets_extra=assets_extra,
        as_text=as_text,
        exports_to_keep=exports_to_keep,
        strip_default_attrs=strip_default_attrs)
def make_parsing_export_strategy(feature_columns,
                                 default_output_alternative_key=None,
                                 assets_extra=None,
                                 as_text=False,
                                 exports_to_keep=5,
                                 target_core=False,
                                 strip_default_attrs=False):
  # pylint: disable=line-too-long
  """Create an ExportStrategy for use with Experiment, using `FeatureColumn`s.

  Creates a SavedModel export that expects to be fed with a single string
  Tensor containing serialized tf.Examples.  At serving time, incoming
  tf.Examples will be parsed according to the provided `FeatureColumn`s.

  Args:
    feature_columns: An iterable of `FeatureColumn`s representing the features
      that must be provided at serving time (excluding labels!).
    default_output_alternative_key: the name of the head to serve when an
      incoming serving request does not explicitly request a specific head.
      Must be `None` if the estimator inherits from ${tf.estimator.Estimator}
      or for single-headed models.
    assets_extra: A dict specifying how to populate the assets.extra directory
      within the exported SavedModel.  Each key should give the destination
      path (including the filename) relative to the assets.extra directory.
      The corresponding value gives the full path of the source file to be
      copied.  For example, the simple case of copying a single file without
      renaming it is specified as
      `{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`.
    as_text: whether to write the SavedModel proto in text format.
    exports_to_keep: Number of exports to keep.  Older exports will be
      garbage-collected.  Defaults to 5.  Set to None to disable garbage
      collection.
    target_core: If True, prepare an ExportStrategy for use with
      tensorflow.python.estimator.*.  If False (default), prepare an
      ExportStrategy for use with tensorflow.contrib.learn.python.learn.*.
    strip_default_attrs: Boolean. If `True`, default-valued attributes will be
      removed from the NodeDefs. For a detailed guide, see
      [Stripping Default-Valued Attributes](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/README.md#stripping-default-valued-attributes).

  Returns:
    An ExportStrategy that can be passed to the Experiment constructor.
  """
  # pylint: enable=line-too-long
  feature_spec = feature_column.create_feature_spec_for_parsing(feature_columns)
  if target_core:
    serving_input_fn = (
        core_export.build_parsing_serving_input_receiver_fn(feature_spec))
  else:
    serving_input_fn = (
        input_fn_utils.build_parsing_serving_input_fn(feature_spec))
  return make_export_strategy(
      serving_input_fn,
      default_output_alternative_key=default_output_alternative_key,
      assets_extra=assets_extra,
      as_text=as_text,
      exports_to_keep=exports_to_keep,
      strip_default_attrs=strip_default_attrs)
Exemplo n.º 3
0
def parse_feature_columns_from_examples(serialized,
                                        feature_columns,
                                        name=None,
                                        example_names=None):
    """Parses tf.Examples to extract tensors for given feature_columns.

  This is a wrapper of 'tf.parse_example'. A typical usage is as follows:

  ```python
  columns_to_tensor = parse_feature_columns_from_examples(
      serialized=my_data,
      feature_columns=my_features)

  # Where my_features are:
  # Define features and transformations
  country = sparse_column_with_keys(column_name="native_country",
                                    keys=["US", "BRA", ...])
  country_emb = embedding_column(sparse_id_column=country, dimension=3,
                                 combiner="sum")
  occupation = sparse_column_with_hash_bucket(column_name="occupation",
                                              hash_bucket_size=1000)
  occupation_emb = embedding_column(sparse_id_column=occupation, dimension=16,
                                   combiner="sum")
  occupation_x_country = crossed_column(columns=[occupation, country],
                                        hash_bucket_size=10000)
  age = real_valued_column("age")
  age_buckets = bucketized_column(
      source_column=age,
      boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65])

  my_features = [occupation_emb, age_buckets, country_emb]
  ```

  Args:
    serialized: A vector (1-D Tensor) of strings, a batch of binary
      serialized `Example` protos.
    feature_columns: An iterable containing all the feature columns. All items
      should be instances of classes derived from _FeatureColumn.
    name: A name for this operation (optional).
    example_names: A vector (1-D Tensor) of strings (optional), the names of
      the serialized protos in the batch.

  Returns:
    A `dict` mapping FeatureColumn to `Tensor` and `SparseTensor` values.
  """
    check_feature_columns(feature_columns)
    columns_to_tensors = parsing_ops.parse_example(
        serialized=serialized,
        features=fc.create_feature_spec_for_parsing(feature_columns),
        name=name,
        example_names=example_names)

    transformer = _Transformer(columns_to_tensors)
    for column in sorted(set(feature_columns), key=lambda x: x.key):
        transformer.transform(column)
    return columns_to_tensors
Exemplo n.º 4
0
def parse_feature_columns_from_examples(serialized,
                                        feature_columns,
                                        name=None,
                                        example_names=None):
  """Parses tf.Examples to extract tensors for given feature_columns.

  This is a wrapper of 'tf.parse_example'. A typical usage is as follows:

  ```python
  columns_to_tensor = parse_feature_columns_from_examples(
      serialized=my_data,
      feature_columns=my_features)

  # Where my_features are:
  # Define features and transformations
  country = sparse_column_with_keys(column_name="native_country",
                                    keys=["US", "BRA", ...])
  country_emb = embedding_column(sparse_id_column=country, dimension=3,
                                 combiner="sum")
  occupation = sparse_column_with_hash_bucket(column_name="occupation",
                                              hash_bucket_size=1000)
  occupation_emb = embedding_column(sparse_id_column=occupation, dimension=16,
                                   combiner="sum")
  occupation_x_country = crossed_column(columns=[occupation, country],
                                        hash_bucket_size=10000)
  age = real_valued_column("age")
  age_buckets = bucketized_column(
      source_column=age,
      boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65])

  my_features = [occupation_emb, age_buckets, country_emb]
  ```

  Args:
    serialized: A vector (1-D Tensor) of strings, a batch of binary
      serialized `Example` protos.
    feature_columns: An iterable containing all the feature columns. All items
      should be instances of classes derived from _FeatureColumn.
    name: A name for this operation (optional).
    example_names: A vector (1-D Tensor) of strings (optional), the names of
      the serialized protos in the batch.

  Returns:
    A `dict` mapping FeatureColumn to `Tensor` and `SparseTensor` values.
  """
  check_feature_columns(feature_columns)
  columns_to_tensors = parsing_ops.parse_example(
      serialized=serialized,
      features=fc.create_feature_spec_for_parsing(feature_columns),
      name=name,
      example_names=example_names)

  transformer = _Transformer(columns_to_tensors)
  for column in sorted(set(feature_columns), key=lambda x: x.key):
    transformer.transform(column)
  return columns_to_tensors
Exemplo n.º 5
0
def parse_feature_columns_from_sequence_examples(
    serialized,
    context_feature_columns,
    sequence_feature_columns,
    name=None,
    example_name=None):
  """Parses tf.SequenceExamples to extract tensors for given `FeatureColumn`s.

  Args:
    serialized: A scalar (0-D Tensor) of type string, a single serialized
      `SequenceExample` proto.
    context_feature_columns: An iterable containing the feature columns for
      context features. All items should be instances of classes derived from
      `_FeatureColumn`. Can be `None`.
    sequence_feature_columns: An iterable containing the feature columns for
      sequence features. All items should be instances of classes derived from
      `_FeatureColumn`. Can be `None`.
    name: A name for this operation (optional).
    example_name: A scalar (0-D Tensor) of type string (optional), the names of
      the serialized proto.

  Returns:
    A tuple consisting of (context_features, sequence_features)

    *  context_features: a dict mapping `FeatureColumns` from
        `context_feature_columns` to their parsed `Tensors`/`SparseTensor`s.
    *  sequence_features: a dict mapping `FeatureColumns` from
        `sequence_feature_columns` to their parsed `Tensors`/`SparseTensor`s.
  """
  # Sequence example parsing requires a single (scalar) example.
  try:
    serialized = array_ops.reshape(serialized, [])
  except ValueError as e:
    raise ValueError(
        'serialized must contain as single sequence example. Batching must be '
        'done after parsing for sequence examples. Error: {}'.format(e))

  if context_feature_columns is None:
    context_feature_columns = []
  if sequence_feature_columns is None:
    sequence_feature_columns = []

  check_feature_columns(context_feature_columns)
  context_feature_spec = fc.create_feature_spec_for_parsing(
      context_feature_columns)

  check_feature_columns(sequence_feature_columns)
  sequence_feature_spec = fc._create_sequence_feature_spec_for_parsing(  # pylint: disable=protected-access
      sequence_feature_columns, allow_missing_by_default=False)

  return parsing_ops.parse_single_sequence_example(serialized,
                                                   context_feature_spec,
                                                   sequence_feature_spec,
                                                   example_name,
                                                   name)
Exemplo n.º 6
0
def parse_feature_columns_from_examples(serialized,
                                        feature_columns,
                                        name=None,
                                        example_names=None):
    """Parses tf.Examples to extract tensors for given feature_columns.

  This is a wrapper of 'tf.parse_example'. A typical usage is as follows:
  ```
  columns_to_tensor = tf.contrib.layers.parse_feature_columns_from_examples(
      serialized=my_data,
      feature_columns=my_features)

  # Where my_features are:
  # Define features and transformations
  country = sparse_column_with_keys("country", ["US", "BRA", ...])
  country_embedding = embedding_column(query_word, dimension=3, combiner="sum")
  query_word = sparse_column_with_hash_bucket(
    "query_word", hash_bucket_size=int(1e6))
  query_embedding = embedding_column(query_word, dimension=16, combiner="sum")
  age_bucket = bucketized_column(real_valued_column("age"),
                                 boundaries=[18+i*5 for i in range(10)])

    my_features = [query_embedding, age_bucket, country_embedding]
  ```

  Args:
    serialized: A vector (1-D Tensor) of strings, a batch of binary
      serialized `Example` protos.
    feature_columns: An iterable containing all the feature columns. All items
      should be instances of classes derived from _FeatureColumn.
    name: A name for this operation (optional).
    example_names: A vector (1-D Tensor) of strings (optional), the names of
      the serialized protos in the batch.

  Returns:
    A `dict` mapping FeatureColumn to `Tensor` and `SparseTensor` values.
  """

    columns_to_tensors = parsing_ops.parse_example(
        serialized=serialized,
        features=fc.create_feature_spec_for_parsing(feature_columns),
        name=name,
        example_names=example_names)

    transformer = _Transformer(columns_to_tensors)
    for column in sorted(set(feature_columns), key=lambda x: x.key):
        transformer.transform(column)
    return columns_to_tensors
Exemplo n.º 7
0
def parse_feature_columns_from_examples(serialized,
                                        feature_columns,
                                        name=None,
                                        example_names=None):
  """Parses tf.Examples to extract tensors for given feature_columns.

  This is a wrapper of 'tf.parse_example'. A typical usage is as follows:
  ```
  columns_to_tensor = tf.contrib.layers.parse_feature_columns_from_examples(
      serialized=my_data,
      feature_columns=my_features)

  # Where my_features are:
  # Define features and transformations
  country = sparse_column_with_keys("country", ["US", "BRA", ...])
  country_embedding = embedding_column(query_word, dimension=3, combiner="sum")
  query_word = sparse_column_with_hash_bucket(
    "query_word", hash_bucket_size=int(1e6))
  query_embedding = embedding_column(query_word, dimension=16, combiner="sum")
  age_bucket = bucketized_column(real_valued_column("age"),
                                 boundaries=[18+i*5 for i in range(10)])

    my_features = [query_embedding, age_bucket, country_embedding]
  ```

  Args:
    serialized: A vector (1-D Tensor) of strings, a batch of binary
      serialized `Example` protos.
    feature_columns: An iterable containing all the feature columns. All items
      should be instances of classes derived from _FeatureColumn.
    name: A name for this operation (optional).
    example_names: A vector (1-D Tensor) of strings (optional), the names of
      the serialized protos in the batch.

  Returns:
    A `dict` mapping FeatureColumn to `Tensor` and `SparseTensor` values.
  """

  columns_to_tensors = parsing_ops.parse_example(
      serialized=serialized,
      features=fc.create_feature_spec_for_parsing(feature_columns),
      name=name,
      example_names=example_names)

  transformer = _Transformer(columns_to_tensors)
  for column in sorted(set(feature_columns), key=lambda x: x.key):
    transformer.transform(column)
  return columns_to_tensors
Exemplo n.º 8
0
  def testCreateFeatureSpec_ExperimentalColumns(self):
    real_valued_col0 = fc._real_valued_var_len_column(
        "real_valued_column0", is_sparse=True)
    real_valued_col1 = fc._real_valued_var_len_column(
        "real_valued_column1", dtype=dtypes.int64, default_value=0,
        is_sparse=False)
    feature_columns = set([real_valued_col0, real_valued_col1])
    expected_config = {
        "real_valued_column0": parsing_ops.VarLenFeature(dtype=dtypes.float32),
        "real_valued_column1":
            parsing_ops.FixedLenSequenceFeature(
                [], dtype=dtypes.int64, allow_missing=True, default_value=0),
    }

    config = fc.create_feature_spec_for_parsing(feature_columns)
    self.assertDictEqual(expected_config, config)
  def testCreateFeatureSpec_ExperimentalColumns(self):
    real_valued_col0 = fc._real_valued_var_len_column(
        "real_valued_column0", is_sparse=True)
    real_valued_col1 = fc._real_valued_var_len_column(
        "real_valued_column1", dtype=dtypes.int64, default_value=0,
        is_sparse=False)
    feature_columns = set([real_valued_col0, real_valued_col1])
    expected_config = {
        "real_valued_column0": parsing_ops.VarLenFeature(dtype=dtypes.float32),
        "real_valued_column1":
            parsing_ops.FixedLenSequenceFeature(
                [], dtype=dtypes.int64, allow_missing=True, default_value=0),
    }

    config = fc.create_feature_spec_for_parsing(feature_columns)
    self.assertDictEqual(expected_config, config)
def _build_estimator_for_resource_export_test():
    def _input_fn():
        iris = base.load_iris()
        return {
            'feature': constant_op.constant(iris.data, dtype=dtypes.float32)
        }, constant_op.constant(iris.target, shape=[150], dtype=dtypes.int32)

    feature_columns = [
        feature_column_lib.real_valued_column('feature', dimension=4)
    ]

    def resource_constant_model_fn(unused_features, unused_labels, mode):
        """A model_fn that loads a constant from a resource and serves it."""
        assert mode in (model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
                        model_fn.ModeKeys.INFER)

        const = constant_op.constant(-1, dtype=dtypes.int64)
        table = lookup.MutableHashTable(dtypes.string,
                                        dtypes.int64,
                                        const,
                                        name='LookupTableModel')
        if mode in (model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL):
            key = constant_op.constant(['key'])
            value = constant_op.constant([42], dtype=dtypes.int64)
            train_op_1 = table.insert(key, value)
            training_state = lookup.MutableHashTable(
                dtypes.string,
                dtypes.int64,
                const,
                name='LookupTableTrainingState')
            training_op_2 = training_state.insert(key, value)
            return const, const, control_flow_ops.group(
                train_op_1, training_op_2)
        if mode == model_fn.ModeKeys.INFER:
            key = constant_op.constant(['key'])
            prediction = table.lookup(key)
            return prediction, const, control_flow_ops.no_op()

    est = estimator.Estimator(model_fn=resource_constant_model_fn)
    est.fit(input_fn=_input_fn, steps=1)

    feature_spec = feature_column_lib.create_feature_spec_for_parsing(
        feature_columns)
    serving_input_fn = input_fn_utils.build_parsing_serving_input_fn(
        feature_spec)
    return est, serving_input_fn
Exemplo n.º 11
0
 def testCreateFeatureSpec_RealValuedColumnWithDefaultValue(self):
   real_valued_col1 = fc.real_valued_column(
       "real_valued_column1", default_value=2)
   real_valued_col2 = fc.real_valued_column(
       "real_valued_column2", 5, default_value=4)
   real_valued_col3 = fc.real_valued_column(
       "real_valued_column3", default_value=[8])
   real_valued_col4 = fc.real_valued_column(
       "real_valued_column4", 3, default_value=[1, 0, 6])
   real_valued_col5 = fc._real_valued_var_len_column(
       "real_valued_column5", default_value=2, is_sparse=True)
   real_valued_col6 = fc._real_valued_var_len_column(
       "real_valued_column6",
       dtype=dtypes.int64,
       default_value=1,
       is_sparse=False)
   feature_columns = [
       real_valued_col1, real_valued_col2, real_valued_col3, real_valued_col4,
       real_valued_col5, real_valued_col6
   ]
   config = fc.create_feature_spec_for_parsing(feature_columns)
   self.assertEqual(6, len(config))
   self.assertDictEqual(
       {
           "real_valued_column1":
               parsing_ops.FixedLenFeature(
                   [1], dtype=dtypes.float32, default_value=[2.]),
           "real_valued_column2":
               parsing_ops.FixedLenFeature(
                   [5],
                   dtype=dtypes.float32,
                   default_value=[4., 4., 4., 4., 4.]),
           "real_valued_column3":
               parsing_ops.FixedLenFeature(
                   [1], dtype=dtypes.float32, default_value=[8.]),
           "real_valued_column4":
               parsing_ops.FixedLenFeature(
                   [3], dtype=dtypes.float32, default_value=[1., 0., 6.]),
           "real_valued_column5":
               parsing_ops.VarLenFeature(dtype=dtypes.float32),
           "real_valued_column6":
               parsing_ops.FixedLenSequenceFeature(
                   [], dtype=dtypes.int64, allow_missing=True, default_value=1)
       },
       config)
Exemplo n.º 12
0
 def testCreateFeatureSpec_RealValuedColumnWithDefaultValue(self):
   real_valued_col1 = fc.real_valued_column(
       "real_valued_column1", default_value=2)
   real_valued_col2 = fc.real_valued_column(
       "real_valued_column2", 5, default_value=4)
   real_valued_col3 = fc.real_valued_column(
       "real_valued_column3", default_value=[8])
   real_valued_col4 = fc.real_valued_column(
       "real_valued_column4", 3, default_value=[1, 0, 6])
   real_valued_col5 = fc._real_valued_var_len_column(
       "real_valued_column5", default_value=2, is_sparse=True)
   real_valued_col6 = fc._real_valued_var_len_column(
       "real_valued_column6", dtype=dtypes.int64, default_value=1,
       is_sparse=False)
   feature_columns = [
       real_valued_col1, real_valued_col2, real_valued_col3, real_valued_col4,
       real_valued_col5, real_valued_col6
   ]
   config = fc.create_feature_spec_for_parsing(feature_columns)
   self.assertEqual(6, len(config))
   self.assertDictEqual(
       {
           "real_valued_column1":
               parsing_ops.FixedLenFeature(
                   [1], dtype=dtypes.float32, default_value=[2.]),
           "real_valued_column2":
               parsing_ops.FixedLenFeature(
                   [5],
                   dtype=dtypes.float32,
                   default_value=[4., 4., 4., 4., 4.]),
           "real_valued_column3":
               parsing_ops.FixedLenFeature(
                   [1], dtype=dtypes.float32, default_value=[8.]),
           "real_valued_column4":
               parsing_ops.FixedLenFeature(
                   [3], dtype=dtypes.float32, default_value=[1., 0., 6.]),
           "real_valued_column5":
               parsing_ops.VarLenFeature(dtype=dtypes.float32),
           "real_valued_column6":
               parsing_ops.FixedLenSequenceFeature(
                   [], dtype=dtypes.int64, allow_missing=True,
                   default_value=1)
       },
       config)
Exemplo n.º 13
0
def _build_estimator_for_resource_export_test():

  def _input_fn():
    iris = base.load_iris()
    return {
        'feature': constant_op.constant(iris.data, dtype=dtypes.float32)
    }, constant_op.constant(
        iris.target, shape=[150], dtype=dtypes.int32)

  feature_columns = [
      feature_column_lib.real_valued_column('feature', dimension=4)
  ]

  def resource_constant_model_fn(unused_features, unused_labels, mode):
    """A model_fn that loads a constant from a resource and serves it."""
    assert mode in (model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
                    model_fn.ModeKeys.INFER)

    const = constant_op.constant(-1, dtype=dtypes.int64)
    table = lookup.MutableHashTable(
        dtypes.string, dtypes.int64, const, name='LookupTableModel')
    update_global_step = variables.get_global_step().assign_add(1)
    if mode in (model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL):
      key = constant_op.constant(['key'])
      value = constant_op.constant([42], dtype=dtypes.int64)
      train_op_1 = table.insert(key, value)
      training_state = lookup.MutableHashTable(
          dtypes.string, dtypes.int64, const, name='LookupTableTrainingState')
      training_op_2 = training_state.insert(key, value)
      return (const, const,
              control_flow_ops.group(train_op_1, training_op_2,
                                     update_global_step))
    if mode == model_fn.ModeKeys.INFER:
      key = constant_op.constant(['key'])
      prediction = table.lookup(key)
      return prediction, const, update_global_step

  est = estimator.Estimator(model_fn=resource_constant_model_fn)
  est.fit(input_fn=_input_fn, steps=1)

  feature_spec = feature_column_lib.create_feature_spec_for_parsing(
      feature_columns)
  serving_input_fn = input_fn_utils.build_parsing_serving_input_fn(feature_spec)
  return est, serving_input_fn
def make_parsing_export_strategy(feature_columns,
                                 default_output_alternative_key=None,
                                 assets_extra=None,
                                 as_text=False,
                                 exports_to_keep=5):
    """Create an ExportStrategy for use with Experiment, using `FeatureColumn`s.

  Creates a SavedModel export that expects to be fed with a single string
  Tensor containing serialized tf.Examples.  At serving time, incoming
  tf.Examples will be parsed according to the provided `FeatureColumn`s.

  Args:
    feature_columns: An iterable of `FeatureColumn`s representing the features
      that must be provided at serving time (excluding labels!).
    default_output_alternative_key: the name of the head to serve when an
      incoming serving request does not explicitly request a specific head.
      Must be `None` if the estimator inherits from ${tf.estimator.Estimator}
      or for single-headed models.
    assets_extra: A dict specifying how to populate the assets.extra directory
      within the exported SavedModel.  Each key should give the destination
      path (including the filename) relative to the assets.extra directory.
      The corresponding value gives the full path of the source file to be
      copied.  For example, the simple case of copying a single file without
      renaming it is specified as
      `{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`.
    as_text: whether to write the SavedModel proto in text format.
    exports_to_keep: Number of exports to keep.  Older exports will be
      garbage-collected.  Defaults to 5.  Set to None to disable garbage
      collection.

  Returns:
    An ExportStrategy that can be passed to the Experiment constructor.
  """
    feature_spec = feature_column.create_feature_spec_for_parsing(
        feature_columns)
    serving_input_fn = input_fn_utils.build_parsing_serving_input_fn(
        feature_spec)
    return make_export_strategy(
        serving_input_fn,
        default_output_alternative_key=default_output_alternative_key,
        assets_extra=assets_extra,
        as_text=as_text,
        exports_to_keep=exports_to_keep)
def make_parsing_export_strategy(feature_columns, exports_to_keep=5):
  """Create an ExportStrategy for use with Experiment, using `FeatureColumn`s.

  Creates a SavedModel export that expects to be fed with a single string
  Tensor containing serialized tf.Examples.  At serving time, incoming
  tf.Examples will be parsed according to the provided `FeatureColumn`s.

  Args:
    feature_columns: An iterable of `FeatureColumn`s representing the features
      that must be provided at serving time (excluding labels!).
    exports_to_keep: Number of exports to keep.  Older exports will be
      garbage-collected.  Defaults to 5.  Set to None to disable garbage
      collection.

  Returns:
    An ExportStrategy that can be passed to the Experiment constructor.
  """
  feature_spec = feature_column.create_feature_spec_for_parsing(feature_columns)
  serving_input_fn = input_fn_utils.build_parsing_serving_input_fn(feature_spec)
  return make_export_strategy(serving_input_fn, exports_to_keep=exports_to_keep)
def make_parsing_export_strategy(feature_columns, exports_to_keep=5):
  """Create an ExportStrategy for use with Experiment, using `FeatureColumn`s.

  Creates a SavedModel export that expects to be fed with a single string
  Tensor containing serialized tf.Examples.  At serving time, incoming
  tf.Examples will be parsed according to the provided `FeatureColumn`s.

  Args:
    feature_columns: An iterable of `FeatureColumn`s representing the features
      that must be provided at serving time (excluding labels!).
    exports_to_keep: Number of exports to keep.  Older exports will be
      garbage-collected.  Defaults to 5.  Set to None to disable garbage
      collection.

  Returns:
    An ExportStrategy that can be passed to the Experiment constructor.
  """
  feature_spec = feature_column.create_feature_spec_for_parsing(feature_columns)
  serving_input_fn = input_fn_utils.build_parsing_serving_input_fn(feature_spec)
  return make_export_strategy(serving_input_fn, exports_to_keep=exports_to_keep)
def make_parsing_export_strategy(feature_columns,
                                 default_output_alternative_key=None,
                                 assets_extra=None,
                                 as_text=False,
                                 exports_to_keep=5):
  """Create an ExportStrategy for use with Experiment, using `FeatureColumn`s.

  Creates a SavedModel export that expects to be fed with a single string
  Tensor containing serialized tf.Examples.  At serving time, incoming
  tf.Examples will be parsed according to the provided `FeatureColumn`s.

  Args:
    feature_columns: An iterable of `FeatureColumn`s representing the features
      that must be provided at serving time (excluding labels!).
    default_output_alternative_key: the name of the head to serve when an
      incoming serving request does not explicitly request a specific head.
      Must be `None` if the estimator inherits from ${tf.estimator.Estimator}
      or for single-headed models.
    assets_extra: A dict specifying how to populate the assets.extra directory
      within the exported SavedModel.  Each key should give the destination
      path (including the filename) relative to the assets.extra directory.
      The corresponding value gives the full path of the source file to be
      copied.  For example, the simple case of copying a single file without
      renaming it is specified as
      `{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`.
    as_text: whether to write the SavedModel proto in text format.
    exports_to_keep: Number of exports to keep.  Older exports will be
      garbage-collected.  Defaults to 5.  Set to None to disable garbage
      collection.

  Returns:
    An ExportStrategy that can be passed to the Experiment constructor.
  """
  feature_spec = feature_column.create_feature_spec_for_parsing(feature_columns)
  serving_input_fn = input_fn_utils.build_parsing_serving_input_fn(feature_spec)
  return make_export_strategy(
      serving_input_fn,
      default_output_alternative_key=default_output_alternative_key,
      assets_extra=assets_extra,
      as_text=as_text,
      exports_to_keep=exports_to_keep)
 def testCreateFeatureSpec_RealValuedColumnWithDefaultValue(self):
     real_valued_col1 = fc.real_valued_column("real_valued_column1",
                                              default_value=2)
     real_valued_col2 = fc.real_valued_column("real_valued_column2",
                                              5,
                                              default_value=4)
     real_valued_col3 = fc.real_valued_column("real_valued_column3",
                                              default_value=[8])
     real_valued_col4 = fc.real_valued_column("real_valued_column4",
                                              3,
                                              default_value=[1, 0, 6])
     real_valued_col5 = fc.real_valued_column("real_valued_column5",
                                              dimension=None,
                                              default_value=2)
     feature_columns = [
         real_valued_col1, real_valued_col2, real_valued_col3,
         real_valued_col4, real_valued_col5
     ]
     config = fc.create_feature_spec_for_parsing(feature_columns)
     self.assertEqual(5, len(config))
     self.assertDictEqual(
         {
             "real_valued_column1":
             parsing_ops.FixedLenFeature(
                 [1], dtype=dtypes.float32, default_value=[2.]),
             "real_valued_column2":
             parsing_ops.FixedLenFeature([5],
                                         dtype=dtypes.float32,
                                         default_value=[4., 4., 4., 4., 4.
                                                        ]),
             "real_valued_column3":
             parsing_ops.FixedLenFeature(
                 [1], dtype=dtypes.float32, default_value=[8.]),
             "real_valued_column4":
             parsing_ops.FixedLenFeature(
                 [3], dtype=dtypes.float32, default_value=[1., 0., 6.]),
             "real_valued_column5":
             parsing_ops.VarLenFeature(dtype=dtypes.float32)
         }, config)
Exemplo n.º 19
0
def _build_estimator_for_export_tests(tmpdir):

  def _input_fn():
    iris = base.load_iris()
    return {
        'feature': constant_op.constant(
            iris.data, dtype=dtypes.float32)
    }, constant_op.constant(
        iris.target, shape=[150], dtype=dtypes.int32)

  feature_columns = [
      feature_column_lib.real_valued_column(
          'feature', dimension=4)
  ]

  est = linear.LinearRegressor(feature_columns)
  est.fit(input_fn=_input_fn, steps=20)

  feature_spec = feature_column_lib.create_feature_spec_for_parsing(
      feature_columns)
  serving_input_fn = input_fn_utils.build_parsing_serving_input_fn(feature_spec)

  # hack in an op that uses an asset, in order to test asset export.
  # this is not actually valid, of course.
  def serving_input_fn_with_asset():
    features, labels, inputs = serving_input_fn()

    vocab_file_name = os.path.join(tmpdir, 'my_vocab_file')
    vocab_file = gfile.GFile(vocab_file_name, mode='w')
    vocab_file.write(VOCAB_FILE_CONTENT)
    vocab_file.close()
    hashtable = lookup.HashTable(
        lookup.TextFileStringTableInitializer(vocab_file_name), 'x')
    features['bogus_lookup'] = hashtable.lookup(
        math_ops.to_int64(features['feature']))

    return input_fn_utils.InputFnOps(features, labels, inputs)

  return est, serving_input_fn_with_asset
Exemplo n.º 20
0
def _build_estimator_for_export_tests(tmpdir):

  def _input_fn():
    iris = base.load_iris()
    return {
        'feature': constant_op.constant(
            iris.data, dtype=dtypes.float32)
    }, constant_op.constant(
        iris.target, shape=[150], dtype=dtypes.int32)

  feature_columns = [
      feature_column_lib.real_valued_column(
          'feature', dimension=4)
  ]

  est = linear.LinearRegressor(feature_columns)
  est.fit(input_fn=_input_fn, steps=20)

  feature_spec = feature_column_lib.create_feature_spec_for_parsing(
      feature_columns)
  serving_input_fn = input_fn_utils.build_parsing_serving_input_fn(feature_spec)

  # hack in an op that uses an asset, in order to test asset export.
  # this is not actually valid, of course.
  def serving_input_fn_with_asset():
    features, labels, inputs = serving_input_fn()

    vocab_file_name = os.path.join(tmpdir, 'my_vocab_file')
    vocab_file = gfile.GFile(vocab_file_name, mode='w')
    vocab_file.write(VOCAB_FILE_CONTENT)
    vocab_file.close()
    hashtable = lookup.HashTable(
        lookup.TextFileStringTableInitializer(vocab_file_name), 'x')
    features['bogus_lookup'] = hashtable.lookup(
        math_ops.to_int64(features['feature']))

    return input_fn_utils.InputFnOps(features, labels, inputs)

  return est, serving_input_fn_with_asset
Exemplo n.º 21
0
 def testCreateFeatureSpec_RealValuedColumnWithDefaultValue(self):
   real_valued_col1 = fc.real_valued_column(
       "real_valued_column1", default_value=2)
   real_valued_col2 = fc.real_valued_column(
       "real_valued_column2", 5, default_value=4)
   real_valued_col3 = fc.real_valued_column(
       "real_valued_column3", default_value=[8])
   real_valued_col4 = fc.real_valued_column(
       "real_valued_column4", 3, default_value=[1, 0, 6])
   real_valued_col5 = fc.real_valued_column(
       "real_valued_column5", dimension=None, default_value=2)
   feature_columns = [
       real_valued_col1, real_valued_col2, real_valued_col3, real_valued_col4,
       real_valued_col5
   ]
   config = fc.create_feature_spec_for_parsing(feature_columns)
   self.assertEqual(5, len(config))
   self.assertDictEqual(
       {
           "real_valued_column1":
               parsing_ops.FixedLenFeature(
                   [1], dtype=dtypes.float32, default_value=[2.]),
           "real_valued_column2":
               parsing_ops.FixedLenFeature(
                   [5],
                   dtype=dtypes.float32,
                   default_value=[4., 4., 4., 4., 4.]),
           "real_valued_column3":
               parsing_ops.FixedLenFeature(
                   [1], dtype=dtypes.float32, default_value=[8.]),
           "real_valued_column4":
               parsing_ops.FixedLenFeature(
                   [3], dtype=dtypes.float32, default_value=[1., 0., 6.]),
           "real_valued_column5":
               parsing_ops.VarLenFeature(dtype=dtypes.float32)
       },
       config)
    def testCreateFeatureSpec(self):
        sparse_col = fc.sparse_column_with_hash_bucket("sparse_column",
                                                       hash_bucket_size=100)
        embedding_col = fc.embedding_column(fc.sparse_column_with_hash_bucket(
            "sparse_column_for_embedding", hash_bucket_size=10),
                                            dimension=4)
        sparse_id_col = fc.sparse_column_with_keys(
            "id_column", ["marlo", "omar", "stringer"])
        weighted_id_col = fc.weighted_sparse_column(sparse_id_col,
                                                    "id_weights_column")
        real_valued_col1 = fc.real_valued_column("real_valued_column1")
        real_valued_col2 = fc.real_valued_column("real_valued_column2", 5)
        real_valued_col3 = fc.real_valued_column("real_valued_column3",
                                                 dimension=None)
        bucketized_col1 = fc.bucketized_column(
            fc.real_valued_column("real_valued_column_for_bucketization1"),
            [0, 4])
        bucketized_col2 = fc.bucketized_column(
            fc.real_valued_column("real_valued_column_for_bucketization2", 4),
            [0, 4])
        a = fc.sparse_column_with_hash_bucket("cross_aaa",
                                              hash_bucket_size=100)
        b = fc.sparse_column_with_hash_bucket("cross_bbb",
                                              hash_bucket_size=100)
        cross_col = fc.crossed_column(set([a, b]), hash_bucket_size=10000)
        feature_columns = set([
            sparse_col, embedding_col, weighted_id_col, real_valued_col1,
            real_valued_col2, real_valued_col3, bucketized_col1,
            bucketized_col2, cross_col
        ])
        expected_config = {
            "sparse_column":
            parsing_ops.VarLenFeature(dtypes.string),
            "sparse_column_for_embedding":
            parsing_ops.VarLenFeature(dtypes.string),
            "id_column":
            parsing_ops.VarLenFeature(dtypes.string),
            "id_weights_column":
            parsing_ops.VarLenFeature(dtypes.float32),
            "real_valued_column1":
            parsing_ops.FixedLenFeature([1], dtype=dtypes.float32),
            "real_valued_column2":
            parsing_ops.FixedLenFeature([5], dtype=dtypes.float32),
            "real_valued_column3":
            parsing_ops.VarLenFeature(dtype=dtypes.float32),
            "real_valued_column_for_bucketization1":
            parsing_ops.FixedLenFeature([1], dtype=dtypes.float32),
            "real_valued_column_for_bucketization2":
            parsing_ops.FixedLenFeature([4], dtype=dtypes.float32),
            "cross_aaa":
            parsing_ops.VarLenFeature(dtypes.string),
            "cross_bbb":
            parsing_ops.VarLenFeature(dtypes.string)
        }

        config = fc.create_feature_spec_for_parsing(feature_columns)
        self.assertDictEqual(expected_config, config)

        # Test that the same config is parsed out if we pass a dictionary.
        feature_columns_dict = {
            str(i): val
            for i, val in enumerate(feature_columns)
        }
        config = fc.create_feature_spec_for_parsing(feature_columns_dict)
        self.assertDictEqual(expected_config, config)
Exemplo n.º 23
0
def parse_feature_columns_from_examples(serialized,
                                        feature_columns,
                                        name=None,
                                        example_names=None):
    """Parses tf.Examples to extract tensors for given feature_columns.

  This is a wrapper of 'tf.io.parse_example'.

  Example:

  ```python
  columns_to_tensor = parse_feature_columns_from_examples(
      serialized=my_data,
      feature_columns=my_features)

  # Where my_features are:
  # Define features and transformations
  sparse_feature_a = sparse_column_with_keys(
      column_name="sparse_feature_a", keys=["AB", "CD", ...])

  embedding_feature_a = embedding_column(
      sparse_id_column=sparse_feature_a, dimension=3, combiner="sum")

  sparse_feature_b = sparse_column_with_hash_bucket(
      column_name="sparse_feature_b", hash_bucket_size=1000)

  embedding_feature_b = embedding_column(
      sparse_id_column=sparse_feature_b, dimension=16, combiner="sum")

  crossed_feature_a_x_b = crossed_column(
      columns=[sparse_feature_a, sparse_feature_b], hash_bucket_size=10000)

  real_feature = real_valued_column("real_feature")
  real_feature_buckets = bucketized_column(
      source_column=real_feature, boundaries=[...])

  my_features = [embedding_feature_b, real_feature_buckets, embedding_feature_a]
  ```

  Args:
    serialized: A vector (1-D Tensor) of strings, a batch of binary
      serialized `Example` protos.
    feature_columns: An iterable containing all the feature columns. All items
      should be instances of classes derived from _FeatureColumn.
    name: A name for this operation (optional).
    example_names: A vector (1-D Tensor) of strings (optional), the names of
      the serialized protos in the batch.

  Returns:
    A `dict` mapping FeatureColumn to `Tensor` and `SparseTensor` values.
  """
    check_feature_columns(feature_columns)
    columns_to_tensors = parsing_ops.parse_example(
        serialized=serialized,
        features=fc.create_feature_spec_for_parsing(feature_columns),
        name=name,
        example_names=example_names)

    transformer = _Transformer(columns_to_tensors)
    for column in sorted(set(feature_columns), key=lambda x: x.key):
        transformer.transform(column)
    return columns_to_tensors
Exemplo n.º 24
0
def parse_feature_columns_from_examples(serialized,
                                        feature_columns,
                                        name=None,
                                        example_names=None):
  """Parses tf.Examples to extract tensors for given feature_columns.

  This is a wrapper of 'tf.parse_example'.

  Example:

  ```python
  columns_to_tensor = parse_feature_columns_from_examples(
      serialized=my_data,
      feature_columns=my_features)

  # Where my_features are:
  # Define features and transformations
  sparse_feature_a = sparse_column_with_keys(
      column_name="sparse_feature_a", keys=["AB", "CD", ...])

  embedding_feature_a = embedding_column(
      sparse_id_column=sparse_feature_a, dimension=3, combiner="sum")

  sparse_feature_b = sparse_column_with_hash_bucket(
      column_name="sparse_feature_b", hash_bucket_size=1000)

  embedding_feature_b = embedding_column(
      sparse_id_column=sparse_feature_b, dimension=16, combiner="sum")

  crossed_feature_a_x_b = crossed_column(
      columns=[sparse_feature_a, sparse_feature_b], hash_bucket_size=10000)

  real_feature = real_valued_column("real_feature")
  real_feature_buckets = bucketized_column(
      source_column=real_feature, boundaries=[...])

  my_features = [embedding_feature_b, real_feature_buckets, embedding_feature_a]
  ```

  Args:
    serialized: A vector (1-D Tensor) of strings, a batch of binary
      serialized `Example` protos.
    feature_columns: An iterable containing all the feature columns. All items
      should be instances of classes derived from _FeatureColumn.
    name: A name for this operation (optional).
    example_names: A vector (1-D Tensor) of strings (optional), the names of
      the serialized protos in the batch.

  Returns:
    A `dict` mapping FeatureColumn to `Tensor` and `SparseTensor` values.
  """
  check_feature_columns(feature_columns)
  columns_to_tensors = parsing_ops.parse_example(
      serialized=serialized,
      features=fc.create_feature_spec_for_parsing(feature_columns),
      name=name,
      example_names=example_names)

  transformer = _Transformer(columns_to_tensors)
  for column in sorted(set(feature_columns), key=lambda x: x.key):
    transformer.transform(column)
  return columns_to_tensors
Exemplo n.º 25
0
  def testCreateFeatureSpec(self):
    sparse_col = fc.sparse_column_with_hash_bucket(
        "sparse_column", hash_bucket_size=100)
    embedding_col = fc.embedding_column(
        fc.sparse_column_with_hash_bucket(
            "sparse_column_for_embedding", hash_bucket_size=10),
        dimension=4)
    sparse_id_col = fc.sparse_column_with_keys("id_column",
                                               ["marlo", "omar", "stringer"])
    weighted_id_col = fc.weighted_sparse_column(sparse_id_col,
                                                "id_weights_column")
    real_valued_col1 = fc.real_valued_column("real_valued_column1")
    real_valued_col2 = fc.real_valued_column("real_valued_column2", 5)
    real_valued_col3 = fc.real_valued_column(
        "real_valued_column3", dimension=None)
    bucketized_col1 = fc.bucketized_column(
        fc.real_valued_column("real_valued_column_for_bucketization1"), [0, 4])
    bucketized_col2 = fc.bucketized_column(
        fc.real_valued_column("real_valued_column_for_bucketization2", 4),
        [0, 4])
    a = fc.sparse_column_with_hash_bucket("cross_aaa", hash_bucket_size=100)
    b = fc.sparse_column_with_hash_bucket("cross_bbb", hash_bucket_size=100)
    cross_col = fc.crossed_column(set([a, b]), hash_bucket_size=10000)
    feature_columns = set([
        sparse_col, embedding_col, weighted_id_col, real_valued_col1,
        real_valued_col2, real_valued_col3, bucketized_col1, bucketized_col2,
        cross_col
    ])
    expected_config = {
        "sparse_column":
            parsing_ops.VarLenFeature(dtypes.string),
        "sparse_column_for_embedding":
            parsing_ops.VarLenFeature(dtypes.string),
        "id_column":
            parsing_ops.VarLenFeature(dtypes.string),
        "id_weights_column":
            parsing_ops.VarLenFeature(dtypes.float32),
        "real_valued_column1":
            parsing_ops.FixedLenFeature(
                [1], dtype=dtypes.float32),
        "real_valued_column2":
            parsing_ops.FixedLenFeature(
                [5], dtype=dtypes.float32),
        "real_valued_column3":
            parsing_ops.VarLenFeature(dtype=dtypes.float32),
        "real_valued_column_for_bucketization1":
            parsing_ops.FixedLenFeature(
                [1], dtype=dtypes.float32),
        "real_valued_column_for_bucketization2":
            parsing_ops.FixedLenFeature(
                [4], dtype=dtypes.float32),
        "cross_aaa":
            parsing_ops.VarLenFeature(dtypes.string),
        "cross_bbb":
            parsing_ops.VarLenFeature(dtypes.string)
    }

    config = fc.create_feature_spec_for_parsing(feature_columns)
    self.assertDictEqual(expected_config, config)

    # Test that the same config is parsed out if we pass a dictionary.
    feature_columns_dict = {
        str(i): val
        for i, val in enumerate(feature_columns)
    }
    config = fc.create_feature_spec_for_parsing(feature_columns_dict)
    self.assertDictEqual(expected_config, config)
  def testCreateFeatureSpec(self):
    sparse_col = fc.sparse_column_with_hash_bucket(
        "sparse_column", hash_bucket_size=100)
    embedding_col = fc.embedding_column(
        fc.sparse_column_with_hash_bucket(
            "sparse_column_for_embedding", hash_bucket_size=10),
        dimension=4)
    str_sparse_id_col = fc.sparse_column_with_keys(
        "str_id_column", ["marlo", "omar", "stringer"])
    int32_sparse_id_col = fc.sparse_column_with_keys(
        "int32_id_column", [42, 1, -1000], dtype=dtypes.int32)
    int64_sparse_id_col = fc.sparse_column_with_keys(
        "int64_id_column", [42, 1, -1000], dtype=dtypes.int64)
    weighted_id_col = fc.weighted_sparse_column(str_sparse_id_col,
                                                "str_id_weights_column")
    real_valued_col1 = fc.real_valued_column("real_valued_column1")
    real_valued_col2 = fc.real_valued_column("real_valued_column2", 5)
    bucketized_col1 = fc.bucketized_column(
        fc.real_valued_column("real_valued_column_for_bucketization1"), [0, 4])
    bucketized_col2 = fc.bucketized_column(
        fc.real_valued_column("real_valued_column_for_bucketization2", 4),
        [0, 4])
    a = fc.sparse_column_with_hash_bucket("cross_aaa", hash_bucket_size=100)
    b = fc.sparse_column_with_hash_bucket("cross_bbb", hash_bucket_size=100)
    cross_col = fc.crossed_column(set([a, b]), hash_bucket_size=10000)
    one_hot_col = fc.one_hot_column(fc.sparse_column_with_hash_bucket(
        "sparse_column_for_one_hot", hash_bucket_size=100))
    scattered_embedding_col = fc.scattered_embedding_column(
        "scattered_embedding_column", size=100, dimension=10, hash_key=1)
    feature_columns = set([
        sparse_col, embedding_col, weighted_id_col, int32_sparse_id_col,
        int64_sparse_id_col, real_valued_col1, real_valued_col2,
        bucketized_col1, bucketized_col2, cross_col, one_hot_col,
        scattered_embedding_col
    ])
    expected_config = {
        "sparse_column":
            parsing_ops.VarLenFeature(dtypes.string),
        "sparse_column_for_embedding":
            parsing_ops.VarLenFeature(dtypes.string),
        "str_id_column":
            parsing_ops.VarLenFeature(dtypes.string),
        "int32_id_column":
            parsing_ops.VarLenFeature(dtypes.int32),
        "int64_id_column":
            parsing_ops.VarLenFeature(dtypes.int64),
        "str_id_weights_column":
            parsing_ops.VarLenFeature(dtypes.float32),
        "real_valued_column1":
            parsing_ops.FixedLenFeature(
                [1], dtype=dtypes.float32),
        "real_valued_column2":
            parsing_ops.FixedLenFeature(
                [5], dtype=dtypes.float32),
        "real_valued_column_for_bucketization1":
            parsing_ops.FixedLenFeature(
                [1], dtype=dtypes.float32),
        "real_valued_column_for_bucketization2":
            parsing_ops.FixedLenFeature(
                [4], dtype=dtypes.float32),
        "cross_aaa":
            parsing_ops.VarLenFeature(dtypes.string),
        "cross_bbb":
            parsing_ops.VarLenFeature(dtypes.string),
        "sparse_column_for_one_hot":
            parsing_ops.VarLenFeature(dtypes.string),
        "scattered_embedding_column":
            parsing_ops.VarLenFeature(dtypes.string),
    }

    config = fc.create_feature_spec_for_parsing(feature_columns)
    self.assertDictEqual(expected_config, config)

    # Tests that contrib feature columns work with core library:
    config_core = fc_core.make_parse_example_spec(feature_columns)
    self.assertDictEqual(expected_config, config_core)

    # Test that the same config is parsed out if we pass a dictionary.
    feature_columns_dict = {
        str(i): val
        for i, val in enumerate(feature_columns)
    }
    config = fc.create_feature_spec_for_parsing(feature_columns_dict)
    self.assertDictEqual(expected_config, config)
Exemplo n.º 27
0
  def testCreateFeatureSpec(self):
    sparse_col = fc.sparse_column_with_hash_bucket(
        "sparse_column", hash_bucket_size=100)
    embedding_col = fc.embedding_column(
        fc.sparse_column_with_hash_bucket(
            "sparse_column_for_embedding", hash_bucket_size=10),
        dimension=4)
    str_sparse_id_col = fc.sparse_column_with_keys(
        "str_id_column", ["marlo", "omar", "stringer"])
    int32_sparse_id_col = fc.sparse_column_with_keys(
        "int32_id_column", [42, 1, -1000], dtype=dtypes.int32)
    int64_sparse_id_col = fc.sparse_column_with_keys(
        "int64_id_column", [42, 1, -1000], dtype=dtypes.int64)
    weighted_id_col = fc.weighted_sparse_column(str_sparse_id_col,
                                                "str_id_weights_column")
    real_valued_col1 = fc.real_valued_column("real_valued_column1")
    real_valued_col2 = fc.real_valued_column("real_valued_column2", 5)
    real_valued_col3 = fc._real_valued_var_len_column(
        "real_valued_column3", is_sparse=True)
    real_valued_col4 = fc._real_valued_var_len_column(
        "real_valued_column4", dtype=dtypes.int64, default_value=0,
        is_sparse=False)
    bucketized_col1 = fc.bucketized_column(
        fc.real_valued_column("real_valued_column_for_bucketization1"), [0, 4])
    bucketized_col2 = fc.bucketized_column(
        fc.real_valued_column("real_valued_column_for_bucketization2", 4),
        [0, 4])
    a = fc.sparse_column_with_hash_bucket("cross_aaa", hash_bucket_size=100)
    b = fc.sparse_column_with_hash_bucket("cross_bbb", hash_bucket_size=100)
    cross_col = fc.crossed_column(set([a, b]), hash_bucket_size=10000)
    one_hot_col = fc.one_hot_column(fc.sparse_column_with_hash_bucket(
        "sparse_column_for_one_hot", hash_bucket_size=100))
    scattered_embedding_col = fc.scattered_embedding_column(
        "scattered_embedding_column", size=100, dimension=10, hash_key=1)
    feature_columns = set([
        sparse_col, embedding_col, weighted_id_col, int32_sparse_id_col,
        int64_sparse_id_col, real_valued_col1, real_valued_col2,
        real_valued_col3, real_valued_col4, bucketized_col1, bucketized_col2,
        cross_col, one_hot_col, scattered_embedding_col
    ])
    expected_config = {
        "sparse_column":
            parsing_ops.VarLenFeature(dtypes.string),
        "sparse_column_for_embedding":
            parsing_ops.VarLenFeature(dtypes.string),
        "str_id_column":
            parsing_ops.VarLenFeature(dtypes.string),
        "int32_id_column":
            parsing_ops.VarLenFeature(dtypes.int32),
        "int64_id_column":
            parsing_ops.VarLenFeature(dtypes.int64),
        "str_id_weights_column":
            parsing_ops.VarLenFeature(dtypes.float32),
        "real_valued_column1":
            parsing_ops.FixedLenFeature(
                [1], dtype=dtypes.float32),
        "real_valued_column2":
            parsing_ops.FixedLenFeature(
                [5], dtype=dtypes.float32),
        "real_valued_column3":
            parsing_ops.VarLenFeature(dtype=dtypes.float32),
        "real_valued_column4":
            parsing_ops.FixedLenSequenceFeature(
                [], dtype=dtypes.int64, allow_missing=True, default_value=0),
        "real_valued_column_for_bucketization1":
            parsing_ops.FixedLenFeature(
                [1], dtype=dtypes.float32),
        "real_valued_column_for_bucketization2":
            parsing_ops.FixedLenFeature(
                [4], dtype=dtypes.float32),
        "cross_aaa":
            parsing_ops.VarLenFeature(dtypes.string),
        "cross_bbb":
            parsing_ops.VarLenFeature(dtypes.string),
        "sparse_column_for_one_hot":
            parsing_ops.VarLenFeature(dtypes.string),
        "scattered_embedding_column":
            parsing_ops.VarLenFeature(dtypes.string),
    }

    config = fc.create_feature_spec_for_parsing(feature_columns)
    self.assertDictEqual(expected_config, config)

    # Test that the same config is parsed out if we pass a dictionary.
    feature_columns_dict = {
        str(i): val
        for i, val in enumerate(feature_columns)
    }
    config = fc.create_feature_spec_for_parsing(feature_columns_dict)
    self.assertDictEqual(expected_config, config)
Exemplo n.º 28
0
  def testCreateFeatureSpec(self):
    sparse_col = fc.sparse_column_with_hash_bucket(
        "sparse_column", hash_bucket_size=100)
    embedding_col = fc.embedding_column(
        fc.sparse_column_with_hash_bucket(
            "sparse_column_for_embedding", hash_bucket_size=10),
        dimension=4)
    str_sparse_id_col = fc.sparse_column_with_keys(
        "str_id_column", ["marlo", "omar", "stringer"])
    int32_sparse_id_col = fc.sparse_column_with_keys(
        "int32_id_column", [42, 1, -1000], dtype=dtypes.int32)
    int64_sparse_id_col = fc.sparse_column_with_keys(
        "int64_id_column", [42, 1, -1000], dtype=dtypes.int64)
    weighted_id_col = fc.weighted_sparse_column(str_sparse_id_col,
                                                "str_id_weights_column")
    real_valued_col1 = fc.real_valued_column("real_valued_column1")
    real_valued_col2 = fc.real_valued_column("real_valued_column2", 5)
    bucketized_col1 = fc.bucketized_column(
        fc.real_valued_column("real_valued_column_for_bucketization1"), [0, 4])
    bucketized_col2 = fc.bucketized_column(
        fc.real_valued_column("real_valued_column_for_bucketization2", 4),
        [0, 4])
    a = fc.sparse_column_with_hash_bucket("cross_aaa", hash_bucket_size=100)
    b = fc.sparse_column_with_hash_bucket("cross_bbb", hash_bucket_size=100)
    cross_col = fc.crossed_column(set([a, b]), hash_bucket_size=10000)
    one_hot_col = fc.one_hot_column(fc.sparse_column_with_hash_bucket(
        "sparse_column_for_one_hot", hash_bucket_size=100))
    scattered_embedding_col = fc.scattered_embedding_column(
        "scattered_embedding_column", size=100, dimension=10, hash_key=1)
    feature_columns = set([
        sparse_col, embedding_col, weighted_id_col, int32_sparse_id_col,
        int64_sparse_id_col, real_valued_col1, real_valued_col2,
        bucketized_col1, bucketized_col2, cross_col, one_hot_col,
        scattered_embedding_col
    ])
    expected_config = {
        "sparse_column":
            parsing_ops.VarLenFeature(dtypes.string),
        "sparse_column_for_embedding":
            parsing_ops.VarLenFeature(dtypes.string),
        "str_id_column":
            parsing_ops.VarLenFeature(dtypes.string),
        "int32_id_column":
            parsing_ops.VarLenFeature(dtypes.int32),
        "int64_id_column":
            parsing_ops.VarLenFeature(dtypes.int64),
        "str_id_weights_column":
            parsing_ops.VarLenFeature(dtypes.float32),
        "real_valued_column1":
            parsing_ops.FixedLenFeature(
                [1], dtype=dtypes.float32),
        "real_valued_column2":
            parsing_ops.FixedLenFeature(
                [5], dtype=dtypes.float32),
        "real_valued_column_for_bucketization1":
            parsing_ops.FixedLenFeature(
                [1], dtype=dtypes.float32),
        "real_valued_column_for_bucketization2":
            parsing_ops.FixedLenFeature(
                [4], dtype=dtypes.float32),
        "cross_aaa":
            parsing_ops.VarLenFeature(dtypes.string),
        "cross_bbb":
            parsing_ops.VarLenFeature(dtypes.string),
        "sparse_column_for_one_hot":
            parsing_ops.VarLenFeature(dtypes.string),
        "scattered_embedding_column":
            parsing_ops.VarLenFeature(dtypes.string),
    }

    config = fc.create_feature_spec_for_parsing(feature_columns)
    self.assertDictEqual(expected_config, config)

    # Tests that contrib feature columns work with core library:
    config_core = fc_core.make_parse_example_spec(feature_columns)
    self.assertDictEqual(expected_config, config_core)

    # Test that the same config is parsed out if we pass a dictionary.
    feature_columns_dict = {
        str(i): val
        for i, val in enumerate(feature_columns)
    }
    config = fc.create_feature_spec_for_parsing(feature_columns_dict)
    self.assertDictEqual(expected_config, config)
Exemplo n.º 29
0
  def testCreateFeatureSpec(self):
    sparse_col = fc.sparse_column_with_hash_bucket(
        "sparse_column", hash_bucket_size=100)
    embedding_col = fc.embedding_column(
        fc.sparse_column_with_hash_bucket(
            "sparse_column_for_embedding", hash_bucket_size=10),
        dimension=4)
    str_sparse_id_col = fc.sparse_column_with_keys(
        "str_id_column", ["marlo", "omar", "stringer"])
    int32_sparse_id_col = fc.sparse_column_with_keys(
        "int32_id_column", [42, 1, -1000], dtype=dtypes.int32)
    int64_sparse_id_col = fc.sparse_column_with_keys(
        "int64_id_column", [42, 1, -1000], dtype=dtypes.int64)
    weighted_id_col = fc.weighted_sparse_column(str_sparse_id_col,
                                                "str_id_weights_column")
    real_valued_col1 = fc.real_valued_column("real_valued_column1")
    real_valued_col2 = fc.real_valued_column("real_valued_column2", 5)
    real_valued_col3 = fc._real_valued_var_len_column(
        "real_valued_column3", is_sparse=True)
    real_valued_col4 = fc._real_valued_var_len_column(
        "real_valued_column4", dtype=dtypes.int64, default_value=0,
        is_sparse=False)
    bucketized_col1 = fc.bucketized_column(
        fc.real_valued_column("real_valued_column_for_bucketization1"), [0, 4])
    bucketized_col2 = fc.bucketized_column(
        fc.real_valued_column("real_valued_column_for_bucketization2", 4),
        [0, 4])
    a = fc.sparse_column_with_hash_bucket("cross_aaa", hash_bucket_size=100)
    b = fc.sparse_column_with_hash_bucket("cross_bbb", hash_bucket_size=100)
    cross_col = fc.crossed_column(set([a, b]), hash_bucket_size=10000)
    one_hot_col = fc.one_hot_column(fc.sparse_column_with_hash_bucket(
        "sparse_column_for_one_hot", hash_bucket_size=100))
    scattered_embedding_col = fc.scattered_embedding_column(
        "scattered_embedding_column", size=100, dimension=10, hash_key=1)
    feature_columns = set([
        sparse_col, embedding_col, weighted_id_col, int32_sparse_id_col,
        int64_sparse_id_col, real_valued_col1, real_valued_col2,
        real_valued_col3, real_valued_col4, bucketized_col1, bucketized_col2,
        cross_col, one_hot_col, scattered_embedding_col
    ])
    expected_config = {
        "sparse_column":
            parsing_ops.VarLenFeature(dtypes.string),
        "sparse_column_for_embedding":
            parsing_ops.VarLenFeature(dtypes.string),
        "str_id_column":
            parsing_ops.VarLenFeature(dtypes.string),
        "int32_id_column":
            parsing_ops.VarLenFeature(dtypes.int32),
        "int64_id_column":
            parsing_ops.VarLenFeature(dtypes.int64),
        "str_id_weights_column":
            parsing_ops.VarLenFeature(dtypes.float32),
        "real_valued_column1":
            parsing_ops.FixedLenFeature(
                [1], dtype=dtypes.float32),
        "real_valued_column2":
            parsing_ops.FixedLenFeature(
                [5], dtype=dtypes.float32),
        "real_valued_column3":
            parsing_ops.VarLenFeature(dtype=dtypes.float32),
        "real_valued_column4":
            parsing_ops.FixedLenSequenceFeature(
                [], dtype=dtypes.int64, allow_missing=True, default_value=0),
        "real_valued_column_for_bucketization1":
            parsing_ops.FixedLenFeature(
                [1], dtype=dtypes.float32),
        "real_valued_column_for_bucketization2":
            parsing_ops.FixedLenFeature(
                [4], dtype=dtypes.float32),
        "cross_aaa":
            parsing_ops.VarLenFeature(dtypes.string),
        "cross_bbb":
            parsing_ops.VarLenFeature(dtypes.string),
        "sparse_column_for_one_hot":
            parsing_ops.VarLenFeature(dtypes.string),
        "scattered_embedding_column":
            parsing_ops.VarLenFeature(dtypes.string),
    }

    config = fc.create_feature_spec_for_parsing(feature_columns)
    self.assertDictEqual(expected_config, config)

    # Test that the same config is parsed out if we pass a dictionary.
    feature_columns_dict = {
        str(i): val
        for i, val in enumerate(feature_columns)
    }
    config = fc.create_feature_spec_for_parsing(feature_columns_dict)
    self.assertDictEqual(expected_config, config)