def testCreateSequenceFeatureSpec(self): sparse_col = tf.contrib.layers.sparse_column_with_hash_bucket( "sparse_column", hash_bucket_size=100) embedding_col = tf.contrib.layers.embedding_column( tf.contrib.layers.sparse_column_with_hash_bucket( "sparse_column_for_embedding", hash_bucket_size=10), dimension=4) sparse_id_col = tf.contrib.layers.sparse_column_with_keys( "id_column", ["marlo", "omar", "stringer"]) weighted_id_col = tf.contrib.layers.weighted_sparse_column( sparse_id_col, "id_weights_column") real_valued_col1 = tf.contrib.layers.real_valued_column( "real_valued_column", dimension=2) real_valued_col2 = tf.contrib.layers.real_valued_column( "real_valued_default_column", dimension=5, default_value=3.0) feature_columns = set([sparse_col, embedding_col, weighted_id_col, real_valued_col1, real_valued_col2]) feature_spec = fc._create_sequence_feature_spec_for_parsing(feature_columns) expected_feature_spec = { "sparse_column": tf.VarLenFeature(tf.string), "sparse_column_for_embedding": tf.VarLenFeature(tf.string), "id_column": tf.VarLenFeature(tf.string), "id_weights_column": tf.VarLenFeature(tf.float32), "real_valued_column": tf.FixedLenSequenceFeature( shape=[2], dtype=tf.float32, allow_missing=False), "real_valued_default_column": tf.FixedLenSequenceFeature( shape=[5], dtype=tf.float32, allow_missing=True)} self.assertDictEqual(expected_feature_spec, feature_spec)
def parse_feature_columns_from_sequence_examples( serialized, context_feature_columns, sequence_feature_columns, name=None, example_name=None): """Parses tf.SequenceExamples to extract tensors for given `FeatureColumn`s. Args: serialized: A scalar (0-D Tensor) of type string, a single serialized `SequenceExample` proto. context_feature_columns: An iterable containing the feature columns for context features. All items should be instances of classes derived from `_FeatureColumn`. Can be `None`. sequence_feature_columns: An iterable containing the feature columns for sequence features. All items should be instances of classes derived from `_FeatureColumn`. Can be `None`. name: A name for this operation (optional). example_name: A scalar (0-D Tensor) of type string (optional), the names of the serialized proto. Returns: A tuple consisting of (context_features, sequence_features) * context_features: a dict mapping `FeatureColumns` from `context_feature_columns` to their parsed `Tensors`/`SparseTensor`s. * sequence_features: a dict mapping `FeatureColumns` from `sequence_feature_columns` to their parsed `Tensors`/`SparseTensor`s. """ # Sequence example parsing requires a single (scalar) example. try: serialized = array_ops.reshape(serialized, []) except ValueError as e: raise ValueError( 'serialized must contain as single sequence example. Batching must be ' 'done after parsing for sequence examples. Error: {}'.format(e)) if context_feature_columns is None: context_feature_columns = [] if sequence_feature_columns is None: sequence_feature_columns = [] check_feature_columns(context_feature_columns) context_feature_spec = fc.create_feature_spec_for_parsing( context_feature_columns) check_feature_columns(sequence_feature_columns) sequence_feature_spec = fc._create_sequence_feature_spec_for_parsing( # pylint: disable=protected-access sequence_feature_columns, allow_missing_by_default=False) return parsing_ops.parse_single_sequence_example(serialized, context_feature_spec, sequence_feature_spec, example_name, name)
def testCreateSequenceFeatureSpec(self): sparse_col = fc.sparse_column_with_hash_bucket( "sparse_column", hash_bucket_size=100) embedding_col = fc.embedding_column( fc.sparse_column_with_hash_bucket( "sparse_column_for_embedding", hash_bucket_size=10), dimension=4) sparse_id_col = fc.sparse_column_with_keys("id_column", ["marlo", "omar", "stringer"]) weighted_id_col = fc.weighted_sparse_column(sparse_id_col, "id_weights_column") real_valued_col1 = fc.real_valued_column("real_valued_column", dimension=2) real_valued_col2 = fc.real_valued_column( "real_valued_default_column", dimension=5, default_value=3.0) real_valued_col3 = fc._real_valued_var_len_column( "real_valued_var_len_column", default_value=3.0, is_sparse=True) real_valued_col4 = fc._real_valued_var_len_column( "real_valued_var_len_dense_column", default_value=4.0, is_sparse=False) feature_columns = set([ sparse_col, embedding_col, weighted_id_col, real_valued_col1, real_valued_col2, real_valued_col3, real_valued_col4 ]) feature_spec = fc._create_sequence_feature_spec_for_parsing(feature_columns) expected_feature_spec = { "sparse_column": parsing_ops.VarLenFeature(dtypes.string), "sparse_column_for_embedding": parsing_ops.VarLenFeature(dtypes.string), "id_column": parsing_ops.VarLenFeature(dtypes.string), "id_weights_column": parsing_ops.VarLenFeature(dtypes.float32), "real_valued_column": parsing_ops.FixedLenSequenceFeature( shape=[2], dtype=dtypes.float32, allow_missing=False), "real_valued_default_column": parsing_ops.FixedLenSequenceFeature( shape=[5], dtype=dtypes.float32, allow_missing=True), "real_valued_var_len_column": parsing_ops.VarLenFeature(dtype=dtypes.float32), "real_valued_var_len_dense_column": parsing_ops.FixedLenSequenceFeature( shape=[], dtype=dtypes.float32, allow_missing=True, default_value=4.0), } self.assertDictEqual(expected_feature_spec, feature_spec)