def test_encode_features(self): with tf.Graph().as_default(): # Inputs. vocabulary_size = 4 # -1 values are ignored. input_a = np.array([ [3, -1, -1], # example 0, ids [3] [0, 1, -1], # example 1, ids [0, 1] ]) input_b = np.array([ [0, -1, -1], # example 0, ids [0] [-1, -1, -1], # example 1, ids [] ]) input_features = {"aaa": input_a, "bbb": input_b} # Embedding variable. embedding_dimension = 2 embedding_values = ( (1., 2.), # id 0 (3., 5.), # id 1 (7., 11.), # id 2 (9., 13.) # id 3 ) # Expected lookup result, using combiner='mean'. expected_lookups_a = ( # example 0: (9., 13.), # ids [3], embedding = [9, 13] # example 1: (2., 3.5), # ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] ) expected_lookups_b = ( # example 0: (1., 2.), # ids [0], embedding = [1, 2] # example 1: (0., 0.), # ids [], embedding = [0, 0] ) # Build columns. categorical_column_a = feature_column.categorical_column_with_identity( key="aaa", num_buckets=vocabulary_size) categorical_column_b = feature_column.categorical_column_with_identity( key="bbb", num_buckets=vocabulary_size) embed_column_a, embed_column_b = feature_column.shared_embedding_columns( [categorical_column_a, categorical_column_b], dimension=embedding_dimension, initializer=lambda shape, dtype, partition_info: embedding_values, shared_embedding_collection_name="custom_collection_name") feature_columns = {"aaa": embed_column_a, "bbb": embed_column_b} cols_to_tensors = feature_lib.encode_features( input_features, feature_columns.values(), mode=tf.estimator.ModeKeys.EVAL) embedding_lookup_a = cols_to_tensors[feature_columns["aaa"]] embedding_lookup_b = cols_to_tensors[feature_columns["bbb"]] # Assert expected embedding variable and lookups. global_vars = tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.GLOBAL_VARIABLES) embedding_var = global_vars[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) sess.run(tf.compat.v1.tables_initializer()) self.assertAllEqual(embedding_values, embedding_var.eval()) self.assertAllEqual(expected_lookups_a, embedding_lookup_a.eval()) self.assertAllEqual(expected_lookups_b, embedding_lookup_b.eval())
def test_encode_features_sequence_column(self): with tf.Graph().as_default(): # Inputs. vocabulary_size = 4 # Sequence of ids. -1 values are ignored. input_seq_ids = np.array([ [3, -1, -1], # example 0 [0, 1, -1], # example 1 ]) # Sequence of numeric values. # input_seq_nums = [ # [1.], # example 0. # [2., 3.], # example 1 # ] input_seq_nums = tf.sparse.SparseTensor(indices=[[0, 0], [1, 0], [1, 1]], values=[1., 2., 3.], dense_shape=(2, 3)) input_features = { "seq_ids": input_seq_ids, "seq_nums": input_seq_nums } # Embedding variable. embedding_dimension = 2 embedding_values = ( (1., 2.), # id 0 (3., 5.), # id 1 (7., 11.), # id 2 (9., 13.) # id 3 ) # Expected sequence embeddings for input_seq_ids. expected_seq_embed = [ # example 0: [[9., 13.], [0., 0.], [0., 0.]], # example 1: [[1., 2.], [3., 5.], [0., 0.]], ] expected_seq_nums = [ # example 0: [[1.], [0.], [0.]], # example 1: [[2.], [3.], [0.]], ] # Build columns. seq_categorical_column = ( feature_column.sequence_categorical_column_with_identity( key="seq_ids", num_buckets=vocabulary_size)) seq_embed_column = feature_column.embedding_column( seq_categorical_column, dimension=embedding_dimension, initializer=lambda shape, dtype, partition_info: embedding_values) seq_numeric_column = feature_column.sequence_numeric_column( "seq_nums") cols_to_tensors = feature_lib.encode_features( input_features, [seq_embed_column, seq_numeric_column], mode=tf.estimator.ModeKeys.EVAL) actual_seq_embed = cols_to_tensors[seq_embed_column] actual_seq_nums = cols_to_tensors[seq_numeric_column] # Assert embedding variable and encoded sequence features. global_vars = tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.GLOBAL_VARIABLES) embedding_var = global_vars[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) sess.run(tf.compat.v1.tables_initializer()) self.assertAllEqual(embedding_values, embedding_var.eval()) self.assertAllEqual(expected_seq_embed, actual_seq_embed) self.assertAllEqual(expected_seq_nums, actual_seq_nums)