def _test_dense_features(self, trainable=True): # Inputs. vocabulary_size = 3 sparse_input_a = tf.compat.v1.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] indices=((0, 0), (1, 0), (1, 4)), values=(2, 0, 1), dense_shape=(2, 5)) sparse_input_b = tf.compat.v1.SparseTensorValue( # example 0, ids [0] # example 1, ids [] indices=((0, 0), ), values=(0, ), dense_shape=(2, 5)) sparse_input_c = tf.compat.v1.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] indices=((0, 1), (1, 1), (1, 3)), values=(2, 0, 1), dense_shape=(2, 5)) sparse_input_d = tf.compat.v1.SparseTensorValue( # example 0, ids [2] # example 1, ids [] indices=((0, 1), ), values=(2, ), dense_shape=(2, 5)) # Embedding variable. embedding_dimension = 2 embedding_values = ( (1., 2.), # id 0 (3., 5.), # id 1 (7., 11.) # id 2 ) def _initializer(shape, dtype, partition_info=None): self.assertAllEqual((vocabulary_size, embedding_dimension), shape) self.assertEqual(tf.float32, dtype) self.assertIsNone(partition_info) return embedding_values # Expected lookup result, using combiner='mean'. expected_lookups = ( # example 0: # A ids [2], embedding = [7, 11] # B ids [0], embedding = [1, 2] # C ids [2], embedding = [7, 11] # D ids [2], embedding = [7, 11] (7., 11., 1., 2., 7., 11., 7., 11.), # example 1: # A ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] # B ids [], embedding = [0, 0] # C ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] # D ids [], embedding = [0, 0] (2., 3.5, 0., 0., 2., 3.5, 0., 0.), ) # Build columns. categorical_column_a = tf.feature_column.categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) categorical_column_b = tf.feature_column.categorical_column_with_identity( key='bbb', num_buckets=vocabulary_size) categorical_column_c = tf.feature_column.categorical_column_with_identity( key='ccc', num_buckets=vocabulary_size) categorical_column_d = tf.feature_column.categorical_column_with_identity( key='ddd', num_buckets=vocabulary_size) embedding_column_a, embedding_column_b = tf.feature_column.shared_embeddings( [categorical_column_a, categorical_column_b], dimension=embedding_dimension, initializer=_initializer, trainable=trainable) embedding_column_c, embedding_column_d = tf.feature_column.shared_embeddings( [categorical_column_c, categorical_column_d], dimension=embedding_dimension, initializer=_initializer, trainable=trainable) features = { 'aaa': sparse_input_a, 'bbb': sparse_input_b, 'ccc': sparse_input_c, 'ddd': sparse_input_d } # Provide sparse input and get dense result. dense_features = df.DenseFeatures( feature_columns=(embedding_column_b, embedding_column_a, embedding_column_c, embedding_column_d))(features) # Assert expected embedding variable and lookups. global_vars = tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.GLOBAL_VARIABLES) self.assertCountEqual( ['aaa_bbb_shared_embedding:0', 'ccc_ddd_shared_embedding:0'], tuple([v.name for v in global_vars])) for v in global_vars: self.assertIsInstance(v, tf.Variable) trainable_vars = tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES) if trainable: self.assertCountEqual( ['aaa_bbb_shared_embedding:0', 'ccc_ddd_shared_embedding:0'], tuple([v.name for v in trainable_vars])) else: self.assertCountEqual([], tuple([v.name for v in trainable_vars])) shared_embedding_vars = global_vars self.evaluate(tf.compat.v1.global_variables_initializer()) self.evaluate(tf.compat.v1.tables_initializer()) self.assertAllEqual(embedding_values, self.evaluate(shared_embedding_vars[0])) self.assertAllEqual(expected_lookups, self.evaluate(dense_features))
def test_raises_if_empty_feature_columns(self): with self.assertRaisesRegex(ValueError, 'feature_columns must not be empty'): df.DenseFeatures(feature_columns=[])(features={})
def test_dense_features_not_trainable(self): # Inputs. vocabulary_size = 3 sparse_input = tf.compat.v1.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] # example 2, ids [] # example 3, ids [1] indices=((0, 0), (1, 0), (1, 4), (3, 0)), values=(2, 0, 1, 1), dense_shape=(4, 5)) # Embedding variable. embedding_dimension = 2 embedding_values = ( (1., 2.), # id 0 (3., 5.), # id 1 (7., 11.) # id 2 ) def _initializer(shape, dtype, partition_info=None): self.assertAllEqual((vocabulary_size, embedding_dimension), shape) self.assertEqual(tf.float32, dtype) self.assertIsNone(partition_info) return embedding_values # Expected lookup result, using combiner='mean'. expected_lookups = ( # example 0, ids [2], embedding = [7, 11] (7., 11.), # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] (2., 3.5), # example 2, ids [], embedding = [0, 0] (0., 0.), # example 3, ids [1], embedding = [3, 5] (3., 5.), ) # Build columns. categorical_column = tf.feature_column.categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) embedding_column = tf.feature_column.embedding_column( categorical_column, dimension=embedding_dimension, initializer=_initializer, trainable=False) # Provide sparse input and get dense result. dense_features = df.DenseFeatures((embedding_column, ))({ 'aaa': sparse_input }) # Assert expected embedding variable and lookups. global_vars = tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.GLOBAL_VARIABLES) self.assertCountEqual( ('dense_features/aaa_embedding/embedding_weights:0', ), tuple([v.name for v in global_vars])) self.assertCountEqual([], tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES)) self.evaluate(tf.compat.v1.global_variables_initializer()) self.evaluate(tf.compat.v1.tables_initializer()) self.assertAllEqual(embedding_values, self.evaluate(global_vars[0])) self.assertAllEqual(expected_lookups, self.evaluate(dense_features))
def test_dense_features(self, use_safe_embedding_lookup, partition_variables): # Inputs. vocabulary_size = 4 sparse_input = tf.compat.v1.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] # example 2, ids [] # example 3, ids [1] indices=((0, 0), (1, 0), (1, 4), (3, 0)), values=(2, 0, 1, 1), dense_shape=(4, 5)) # Embedding variable. embedding_dimension = 2 embedding_values = ( (1., 2.), # id 0 (3., 5.), # id 1 (7., 11.), # id 2 (9., 13.) # id 3 ) def _initializer(shape, dtype, partition_info=None): if partition_variables: self.assertEqual([vocabulary_size, embedding_dimension], partition_info.full_shape) self.assertAllEqual((2, embedding_dimension), shape) else: self.assertAllEqual((vocabulary_size, embedding_dimension), shape) self.assertIsNone(partition_info) self.assertEqual(tf.float32, dtype) return embedding_values # Expected lookup result, using combiner='mean'. expected_lookups = ( # example 0, ids [2], embedding = [7, 11] (7., 11.), # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] (2., 3.5), # example 2, ids [], embedding = [0, 0] (0., 0.), # example 3, ids [1], embedding = [3, 5] (3., 5.), ) # Build columns. categorical_column = tf.feature_column.categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) partitioner = None if partition_variables: partitioner = tf.compat.v1.fixed_size_partitioner(2, axis=0) with tf.compat.v1.variable_scope('vars', partitioner=partitioner): embedding_column = tf.feature_column.embedding_column( categorical_column, dimension=embedding_dimension, initializer=_initializer, use_safe_embedding_lookup=use_safe_embedding_lookup) # Provide sparse input and get dense result. l = df.DenseFeatures((embedding_column, )) dense_features = l({'aaa': sparse_input}) # Assert expected embedding variable and lookups. global_vars = tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.GLOBAL_VARIABLES) if partition_variables: self.assertCountEqual(( 'vars/dense_features/aaa_embedding/embedding_weights/part_0:0', 'vars/dense_features/aaa_embedding/embedding_weights/part_1:0' ), tuple([v.name for v in global_vars])) else: self.assertCountEqual( ('vars/dense_features/aaa_embedding/embedding_weights:0', ), tuple([v.name for v in global_vars])) for v in global_vars: self.assertIsInstance(v, tf.Variable) trainable_vars = tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES) if partition_variables: self.assertCountEqual(( 'vars/dense_features/aaa_embedding/embedding_weights/part_0:0', 'vars/dense_features/aaa_embedding/embedding_weights/part_1:0' ), tuple([v.name for v in trainable_vars])) else: self.assertCountEqual( ('vars/dense_features/aaa_embedding/embedding_weights:0', ), tuple([v.name for v in trainable_vars])) self.evaluate(tf.compat.v1.global_variables_initializer()) self.evaluate(tf.compat.v1.tables_initializer()) self.assertAllEqual(embedding_values, self.evaluate(trainable_vars[0])) self.assertAllEqual(expected_lookups, self.evaluate(dense_features)) if use_safe_embedding_lookup: self.assertIn('SparseFillEmptyRows', [ x.type for x in tf.compat.v1.get_default_graph().get_operations() ]) else: self.assertNotIn('SparseFillEmptyRows', [ x.type for x in tf.compat.v1.get_default_graph().get_operations() ])
def test_retrieving_input(self): features = {'a': [0.]} dense_features = df.DenseFeatures( tf.feature_column.numeric_column('a')) inputs = self.evaluate(dense_features(features)) self.assertAllClose([[0.]], inputs)
def __init__(self, feature_columns, units, name=None, **kwargs): super(TestDNNModel, self).__init__(name=name, **kwargs) self._input_layer = df.DenseFeatures(feature_columns, name='input_layer') self._dense_layer = keras.layers.Dense(units, name='dense_layer')
def DISABLED_test_function_model_multiple_feature_layer_inputs(self): col_a = tf.feature_column.numeric_column('a') col_b = tf.feature_column.numeric_column('b') col_c = tf.feature_column.numeric_column('c') fc1 = df.DenseFeatures([col_a, col_b], name='fc1') fc2 = df.DenseFeatures([col_b, col_c], name='fc2') dense = keras.layers.Dense(4) # This seems problematic.... We probably need something for DenseFeatures # the way Input is for InputLayer. output = dense(fc1) + dense(fc2) model = keras.models.Model([fc1, fc2], [output]) optimizer = 'rmsprop' loss = 'mse' loss_weights = [1., 0.5] model.compile(optimizer, loss, metrics=[metrics_module.CategoricalAccuracy(), 'mae'], loss_weights=loss_weights) data_list = ([{ 'a': np.arange(10), 'b': np.arange(10) }, { 'b': np.arange(10), 'c': np.arange(10) }], np.arange(10, 100)) model.fit(*data_list, epochs=1) data_bloated_list = ([{ 'a': np.arange(10), 'b': np.arange(10), 'c': np.arange(10) }, { 'a': np.arange(10), 'b': np.arange(10), 'c': np.arange(10) }], np.arange(10, 100)) model.fit(*data_bloated_list, epochs=1) data_dict = ({ 'fc1': { 'a': np.arange(10), 'b': np.arange(10) }, 'fc2': { 'b': np.arange(10), 'c': np.arange(10) } }, np.arange(10, 100)) model.fit(*data_dict, epochs=1) data_bloated_dict = ({ 'fc1': { 'a': np.arange(10), 'b': np.arange(10), 'c': np.arange(10) }, 'fc2': { 'a': np.arange(10), 'b': np.arange(10), 'c': np.arange(10) } }, np.arange(10, 100)) model.fit(*data_bloated_dict, epochs=1)