def testCreateSequenceFeatureSpec(self):
    sparse_col = tf.contrib.layers.sparse_column_with_hash_bucket(
        "sparse_column", hash_bucket_size=100)
    embedding_col = tf.contrib.layers.embedding_column(
        tf.contrib.layers.sparse_column_with_hash_bucket(
            "sparse_column_for_embedding",
            hash_bucket_size=10),
        dimension=4)
    sparse_id_col = tf.contrib.layers.sparse_column_with_keys(
        "id_column", ["marlo", "omar", "stringer"])
    weighted_id_col = tf.contrib.layers.weighted_sparse_column(
        sparse_id_col, "id_weights_column")
    real_valued_col1 = tf.contrib.layers.real_valued_column(
        "real_valued_column", dimension=2)
    real_valued_col2 = tf.contrib.layers.real_valued_column(
        "real_valued_default_column", dimension=5, default_value=3.0)

    feature_columns = set([sparse_col, embedding_col, weighted_id_col,
                           real_valued_col1, real_valued_col2])

    feature_spec = fc._create_sequence_feature_spec_for_parsing(feature_columns)

    expected_feature_spec = {
        "sparse_column": tf.VarLenFeature(tf.string),
        "sparse_column_for_embedding": tf.VarLenFeature(tf.string),
        "id_column": tf.VarLenFeature(tf.string),
        "id_weights_column": tf.VarLenFeature(tf.float32),
        "real_valued_column": tf.FixedLenSequenceFeature(
            shape=[2], dtype=tf.float32, allow_missing=False),
        "real_valued_default_column": tf.FixedLenSequenceFeature(
            shape=[5], dtype=tf.float32, allow_missing=True)}

    self.assertDictEqual(expected_feature_spec, feature_spec)
示例#2
0
  def testCreateSequenceFeatureSpec(self):
    sparse_col = tf.contrib.layers.sparse_column_with_hash_bucket(
        "sparse_column", hash_bucket_size=100)
    embedding_col = tf.contrib.layers.embedding_column(
        tf.contrib.layers.sparse_column_with_hash_bucket(
            "sparse_column_for_embedding",
            hash_bucket_size=10),
        dimension=4)
    sparse_id_col = tf.contrib.layers.sparse_column_with_keys(
        "id_column", ["marlo", "omar", "stringer"])
    weighted_id_col = tf.contrib.layers.weighted_sparse_column(
        sparse_id_col, "id_weights_column")
    real_valued_col1 = tf.contrib.layers.real_valued_column(
        "real_valued_column", dimension=2)
    real_valued_col2 = tf.contrib.layers.real_valued_column(
        "real_valued_default_column", dimension=5, default_value=3.0)

    feature_columns = set([sparse_col, embedding_col, weighted_id_col,
                           real_valued_col1, real_valued_col2])

    feature_spec = fc._create_sequence_feature_spec_for_parsing(feature_columns)

    expected_feature_spec = {
        "sparse_column": tf.VarLenFeature(tf.string),
        "sparse_column_for_embedding": tf.VarLenFeature(tf.string),
        "id_column": tf.VarLenFeature(tf.string),
        "id_weights_column": tf.VarLenFeature(tf.float32),
        "real_valued_column": tf.FixedLenSequenceFeature(
            shape=[2], dtype=tf.float32, allow_missing=False),
        "real_valued_default_column": tf.FixedLenSequenceFeature(
            shape=[5], dtype=tf.float32, allow_missing=True)}

    self.assertDictEqual(expected_feature_spec, feature_spec)
示例#3
0
def parse_feature_columns_from_sequence_examples(
    serialized,
    context_feature_columns,
    sequence_feature_columns,
    name=None,
    example_name=None):
  """Parses tf.SequenceExamples to extract tensors for given `FeatureColumn`s.

  Args:
    serialized: A scalar (0-D Tensor) of type string, a single serialized
      `SequenceExample` proto.
    context_feature_columns: An iterable containing the feature columns for
      context features. All items should be instances of classes derived from
      `_FeatureColumn`. Can be `None`.
    sequence_feature_columns: An iterable containing the feature columns for
      sequence features. All items should be instances of classes derived from
      `_FeatureColumn`. Can be `None`.
    name: A name for this operation (optional).
    example_name: A scalar (0-D Tensor) of type string (optional), the names of
      the serialized proto.

  Returns:
    A tuple consisting of (context_features, sequence_features)

    *  context_features: a dict mapping `FeatureColumns` from
        `context_feature_columns` to their parsed `Tensors`/`SparseTensor`s.
    *  sequence_features: a dict mapping `FeatureColumns` from
        `sequence_feature_columns` to their parsed `Tensors`/`SparseTensor`s.
  """
  # Sequence example parsing requires a single (scalar) example.
  try:
    serialized = array_ops.reshape(serialized, [])
  except ValueError as e:
    raise ValueError(
        'serialized must contain as single sequence example. Batching must be '
        'done after parsing for sequence examples. Error: {}'.format(e))

  if context_feature_columns is None:
    context_feature_columns = []
  if sequence_feature_columns is None:
    sequence_feature_columns = []

  check_feature_columns(context_feature_columns)
  context_feature_spec = fc.create_feature_spec_for_parsing(
      context_feature_columns)

  check_feature_columns(sequence_feature_columns)
  sequence_feature_spec = fc._create_sequence_feature_spec_for_parsing(  # pylint: disable=protected-access
      sequence_feature_columns, allow_missing_by_default=False)

  return parsing_ops.parse_single_sequence_example(serialized,
                                                   context_feature_spec,
                                                   sequence_feature_spec,
                                                   example_name,
                                                   name)
  def testCreateSequenceFeatureSpec(self):
    sparse_col = fc.sparse_column_with_hash_bucket(
        "sparse_column", hash_bucket_size=100)
    embedding_col = fc.embedding_column(
        fc.sparse_column_with_hash_bucket(
            "sparse_column_for_embedding", hash_bucket_size=10),
        dimension=4)
    sparse_id_col = fc.sparse_column_with_keys("id_column",
                                               ["marlo", "omar", "stringer"])
    weighted_id_col = fc.weighted_sparse_column(sparse_id_col,
                                                "id_weights_column")
    real_valued_col1 = fc.real_valued_column("real_valued_column", dimension=2)
    real_valued_col2 = fc.real_valued_column(
        "real_valued_default_column", dimension=5, default_value=3.0)
    real_valued_col3 = fc._real_valued_var_len_column(
        "real_valued_var_len_column", default_value=3.0, is_sparse=True)
    real_valued_col4 = fc._real_valued_var_len_column(
        "real_valued_var_len_dense_column", default_value=4.0, is_sparse=False)

    feature_columns = set([
        sparse_col, embedding_col, weighted_id_col, real_valued_col1,
        real_valued_col2, real_valued_col3, real_valued_col4
    ])

    feature_spec = fc._create_sequence_feature_spec_for_parsing(feature_columns)

    expected_feature_spec = {
        "sparse_column":
            parsing_ops.VarLenFeature(dtypes.string),
        "sparse_column_for_embedding":
            parsing_ops.VarLenFeature(dtypes.string),
        "id_column":
            parsing_ops.VarLenFeature(dtypes.string),
        "id_weights_column":
            parsing_ops.VarLenFeature(dtypes.float32),
        "real_valued_column":
            parsing_ops.FixedLenSequenceFeature(
                shape=[2], dtype=dtypes.float32, allow_missing=False),
        "real_valued_default_column":
            parsing_ops.FixedLenSequenceFeature(
                shape=[5], dtype=dtypes.float32, allow_missing=True),
        "real_valued_var_len_column":
            parsing_ops.VarLenFeature(dtype=dtypes.float32),
        "real_valued_var_len_dense_column":
            parsing_ops.FixedLenSequenceFeature(
                shape=[], dtype=dtypes.float32, allow_missing=True,
                default_value=4.0),
    }

    self.assertDictEqual(expected_feature_spec, feature_spec)
示例#5
0
  def testCreateSequenceFeatureSpec(self):
    sparse_col = fc.sparse_column_with_hash_bucket(
        "sparse_column", hash_bucket_size=100)
    embedding_col = fc.embedding_column(
        fc.sparse_column_with_hash_bucket(
            "sparse_column_for_embedding", hash_bucket_size=10),
        dimension=4)
    sparse_id_col = fc.sparse_column_with_keys("id_column",
                                               ["marlo", "omar", "stringer"])
    weighted_id_col = fc.weighted_sparse_column(sparse_id_col,
                                                "id_weights_column")
    real_valued_col1 = fc.real_valued_column("real_valued_column", dimension=2)
    real_valued_col2 = fc.real_valued_column(
        "real_valued_default_column", dimension=5, default_value=3.0)
    real_valued_col3 = fc._real_valued_var_len_column(
        "real_valued_var_len_column", default_value=3.0, is_sparse=True)
    real_valued_col4 = fc._real_valued_var_len_column(
        "real_valued_var_len_dense_column", default_value=4.0, is_sparse=False)

    feature_columns = set([
        sparse_col, embedding_col, weighted_id_col, real_valued_col1,
        real_valued_col2, real_valued_col3, real_valued_col4
    ])

    feature_spec = fc._create_sequence_feature_spec_for_parsing(feature_columns)

    expected_feature_spec = {
        "sparse_column":
            parsing_ops.VarLenFeature(dtypes.string),
        "sparse_column_for_embedding":
            parsing_ops.VarLenFeature(dtypes.string),
        "id_column":
            parsing_ops.VarLenFeature(dtypes.string),
        "id_weights_column":
            parsing_ops.VarLenFeature(dtypes.float32),
        "real_valued_column":
            parsing_ops.FixedLenSequenceFeature(
                shape=[2], dtype=dtypes.float32, allow_missing=False),
        "real_valued_default_column":
            parsing_ops.FixedLenSequenceFeature(
                shape=[5], dtype=dtypes.float32, allow_missing=True),
        "real_valued_var_len_column":
            parsing_ops.VarLenFeature(dtype=dtypes.float32),
        "real_valued_var_len_dense_column":
            parsing_ops.FixedLenSequenceFeature(
                shape=[], dtype=dtypes.float32, allow_missing=True,
                default_value=4.0),
    }

    self.assertDictEqual(expected_feature_spec, feature_spec)