Пример #1
0
    def _test_dense_features(self, trainable=True):
        # Inputs.
        vocabulary_size = 3
        sparse_input_a = tf.compat.v1.SparseTensorValue(
            # example 0, ids [2]
            # example 1, ids [0, 1]
            indices=((0, 0), (1, 0), (1, 4)),
            values=(2, 0, 1),
            dense_shape=(2, 5))
        sparse_input_b = tf.compat.v1.SparseTensorValue(
            # example 0, ids [0]
            # example 1, ids []
            indices=((0, 0), ),
            values=(0, ),
            dense_shape=(2, 5))
        sparse_input_c = tf.compat.v1.SparseTensorValue(
            # example 0, ids [2]
            # example 1, ids [0, 1]
            indices=((0, 1), (1, 1), (1, 3)),
            values=(2, 0, 1),
            dense_shape=(2, 5))
        sparse_input_d = tf.compat.v1.SparseTensorValue(
            # example 0, ids [2]
            # example 1, ids []
            indices=((0, 1), ),
            values=(2, ),
            dense_shape=(2, 5))

        # Embedding variable.
        embedding_dimension = 2
        embedding_values = (
            (1., 2.),  # id 0
            (3., 5.),  # id 1
            (7., 11.)  # id 2
        )

        def _initializer(shape, dtype, partition_info=None):
            self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
            self.assertEqual(tf.float32, dtype)
            self.assertIsNone(partition_info)
            return embedding_values

        # Expected lookup result, using combiner='mean'.
        expected_lookups = (
            # example 0:
            # A ids [2], embedding = [7, 11]
            # B ids [0], embedding = [1, 2]
            # C ids [2], embedding = [7, 11]
            # D ids [2], embedding = [7, 11]
            (7., 11., 1., 2., 7., 11., 7., 11.),
            # example 1:
            # A ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
            # B ids [], embedding = [0, 0]
            # C ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
            # D ids [], embedding = [0, 0]
            (2., 3.5, 0., 0., 2., 3.5, 0., 0.),
        )

        # Build columns.
        categorical_column_a = tf.feature_column.categorical_column_with_identity(
            key='aaa', num_buckets=vocabulary_size)
        categorical_column_b = tf.feature_column.categorical_column_with_identity(
            key='bbb', num_buckets=vocabulary_size)
        categorical_column_c = tf.feature_column.categorical_column_with_identity(
            key='ccc', num_buckets=vocabulary_size)
        categorical_column_d = tf.feature_column.categorical_column_with_identity(
            key='ddd', num_buckets=vocabulary_size)

        embedding_column_a, embedding_column_b = tf.feature_column.shared_embeddings(
            [categorical_column_a, categorical_column_b],
            dimension=embedding_dimension,
            initializer=_initializer,
            trainable=trainable)
        embedding_column_c, embedding_column_d = tf.feature_column.shared_embeddings(
            [categorical_column_c, categorical_column_d],
            dimension=embedding_dimension,
            initializer=_initializer,
            trainable=trainable)

        features = {
            'aaa': sparse_input_a,
            'bbb': sparse_input_b,
            'ccc': sparse_input_c,
            'ddd': sparse_input_d
        }

        # Provide sparse input and get dense result.
        dense_features = df.DenseFeatures(
            feature_columns=(embedding_column_b, embedding_column_a,
                             embedding_column_c, embedding_column_d))(features)

        # Assert expected embedding variable and lookups.
        global_vars = tf.compat.v1.get_collection(
            tf.compat.v1.GraphKeys.GLOBAL_VARIABLES)
        self.assertCountEqual(
            ['aaa_bbb_shared_embedding:0', 'ccc_ddd_shared_embedding:0'],
            tuple([v.name for v in global_vars]))
        for v in global_vars:
            self.assertIsInstance(v, tf.Variable)
        trainable_vars = tf.compat.v1.get_collection(
            tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES)
        if trainable:
            self.assertCountEqual(
                ['aaa_bbb_shared_embedding:0', 'ccc_ddd_shared_embedding:0'],
                tuple([v.name for v in trainable_vars]))
        else:
            self.assertCountEqual([], tuple([v.name for v in trainable_vars]))
        shared_embedding_vars = global_vars

        self.evaluate(tf.compat.v1.global_variables_initializer())
        self.evaluate(tf.compat.v1.tables_initializer())

        self.assertAllEqual(embedding_values,
                            self.evaluate(shared_embedding_vars[0]))
        self.assertAllEqual(expected_lookups, self.evaluate(dense_features))
Пример #2
0
 def test_raises_if_empty_feature_columns(self):
     with self.assertRaisesRegex(ValueError,
                                 'feature_columns must not be empty'):
         df.DenseFeatures(feature_columns=[])(features={})
Пример #3
0
    def test_dense_features_not_trainable(self):
        # Inputs.
        vocabulary_size = 3
        sparse_input = tf.compat.v1.SparseTensorValue(
            # example 0, ids [2]
            # example 1, ids [0, 1]
            # example 2, ids []
            # example 3, ids [1]
            indices=((0, 0), (1, 0), (1, 4), (3, 0)),
            values=(2, 0, 1, 1),
            dense_shape=(4, 5))

        # Embedding variable.
        embedding_dimension = 2
        embedding_values = (
            (1., 2.),  # id 0
            (3., 5.),  # id 1
            (7., 11.)  # id 2
        )

        def _initializer(shape, dtype, partition_info=None):
            self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
            self.assertEqual(tf.float32, dtype)
            self.assertIsNone(partition_info)
            return embedding_values

        # Expected lookup result, using combiner='mean'.
        expected_lookups = (
            # example 0, ids [2], embedding = [7, 11]
            (7., 11.),
            # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
            (2., 3.5),
            # example 2, ids [], embedding = [0, 0]
            (0., 0.),
            # example 3, ids [1], embedding = [3, 5]
            (3., 5.),
        )

        # Build columns.
        categorical_column = tf.feature_column.categorical_column_with_identity(
            key='aaa', num_buckets=vocabulary_size)
        embedding_column = tf.feature_column.embedding_column(
            categorical_column,
            dimension=embedding_dimension,
            initializer=_initializer,
            trainable=False)

        # Provide sparse input and get dense result.
        dense_features = df.DenseFeatures((embedding_column, ))({
            'aaa':
            sparse_input
        })

        # Assert expected embedding variable and lookups.
        global_vars = tf.compat.v1.get_collection(
            tf.compat.v1.GraphKeys.GLOBAL_VARIABLES)
        self.assertCountEqual(
            ('dense_features/aaa_embedding/embedding_weights:0', ),
            tuple([v.name for v in global_vars]))
        self.assertCountEqual([],
                              tf.compat.v1.get_collection(
                                  tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES))

        self.evaluate(tf.compat.v1.global_variables_initializer())
        self.evaluate(tf.compat.v1.tables_initializer())

        self.assertAllEqual(embedding_values, self.evaluate(global_vars[0]))
        self.assertAllEqual(expected_lookups, self.evaluate(dense_features))
Пример #4
0
    def test_dense_features(self, use_safe_embedding_lookup,
                            partition_variables):
        # Inputs.
        vocabulary_size = 4
        sparse_input = tf.compat.v1.SparseTensorValue(
            # example 0, ids [2]
            # example 1, ids [0, 1]
            # example 2, ids []
            # example 3, ids [1]
            indices=((0, 0), (1, 0), (1, 4), (3, 0)),
            values=(2, 0, 1, 1),
            dense_shape=(4, 5))

        # Embedding variable.
        embedding_dimension = 2
        embedding_values = (
            (1., 2.),  # id 0
            (3., 5.),  # id 1
            (7., 11.),  # id 2
            (9., 13.)  # id 3
        )

        def _initializer(shape, dtype, partition_info=None):
            if partition_variables:
                self.assertEqual([vocabulary_size, embedding_dimension],
                                 partition_info.full_shape)
                self.assertAllEqual((2, embedding_dimension), shape)
            else:
                self.assertAllEqual((vocabulary_size, embedding_dimension),
                                    shape)
                self.assertIsNone(partition_info)

            self.assertEqual(tf.float32, dtype)
            return embedding_values

        # Expected lookup result, using combiner='mean'.
        expected_lookups = (
            # example 0, ids [2], embedding = [7, 11]
            (7., 11.),
            # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
            (2., 3.5),
            # example 2, ids [], embedding = [0, 0]
            (0., 0.),
            # example 3, ids [1], embedding = [3, 5]
            (3., 5.),
        )

        # Build columns.
        categorical_column = tf.feature_column.categorical_column_with_identity(
            key='aaa', num_buckets=vocabulary_size)
        partitioner = None
        if partition_variables:
            partitioner = tf.compat.v1.fixed_size_partitioner(2, axis=0)
        with tf.compat.v1.variable_scope('vars', partitioner=partitioner):
            embedding_column = tf.feature_column.embedding_column(
                categorical_column,
                dimension=embedding_dimension,
                initializer=_initializer,
                use_safe_embedding_lookup=use_safe_embedding_lookup)

            # Provide sparse input and get dense result.
            l = df.DenseFeatures((embedding_column, ))
            dense_features = l({'aaa': sparse_input})

        # Assert expected embedding variable and lookups.
        global_vars = tf.compat.v1.get_collection(
            tf.compat.v1.GraphKeys.GLOBAL_VARIABLES)
        if partition_variables:
            self.assertCountEqual((
                'vars/dense_features/aaa_embedding/embedding_weights/part_0:0',
                'vars/dense_features/aaa_embedding/embedding_weights/part_1:0'
            ), tuple([v.name for v in global_vars]))
        else:
            self.assertCountEqual(
                ('vars/dense_features/aaa_embedding/embedding_weights:0', ),
                tuple([v.name for v in global_vars]))
        for v in global_vars:
            self.assertIsInstance(v, tf.Variable)
        trainable_vars = tf.compat.v1.get_collection(
            tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES)
        if partition_variables:
            self.assertCountEqual((
                'vars/dense_features/aaa_embedding/embedding_weights/part_0:0',
                'vars/dense_features/aaa_embedding/embedding_weights/part_1:0'
            ), tuple([v.name for v in trainable_vars]))
        else:
            self.assertCountEqual(
                ('vars/dense_features/aaa_embedding/embedding_weights:0', ),
                tuple([v.name for v in trainable_vars]))

        self.evaluate(tf.compat.v1.global_variables_initializer())
        self.evaluate(tf.compat.v1.tables_initializer())

        self.assertAllEqual(embedding_values, self.evaluate(trainable_vars[0]))
        self.assertAllEqual(expected_lookups, self.evaluate(dense_features))

        if use_safe_embedding_lookup:
            self.assertIn('SparseFillEmptyRows', [
                x.type
                for x in tf.compat.v1.get_default_graph().get_operations()
            ])
        else:
            self.assertNotIn('SparseFillEmptyRows', [
                x.type
                for x in tf.compat.v1.get_default_graph().get_operations()
            ])
Пример #5
0
 def test_retrieving_input(self):
     features = {'a': [0.]}
     dense_features = df.DenseFeatures(
         tf.feature_column.numeric_column('a'))
     inputs = self.evaluate(dense_features(features))
     self.assertAllClose([[0.]], inputs)
 def __init__(self, feature_columns, units, name=None, **kwargs):
     super(TestDNNModel, self).__init__(name=name, **kwargs)
     self._input_layer = df.DenseFeatures(feature_columns,
                                          name='input_layer')
     self._dense_layer = keras.layers.Dense(units, name='dense_layer')
    def DISABLED_test_function_model_multiple_feature_layer_inputs(self):
        col_a = tf.feature_column.numeric_column('a')
        col_b = tf.feature_column.numeric_column('b')
        col_c = tf.feature_column.numeric_column('c')

        fc1 = df.DenseFeatures([col_a, col_b], name='fc1')
        fc2 = df.DenseFeatures([col_b, col_c], name='fc2')
        dense = keras.layers.Dense(4)

        # This seems problematic.... We probably need something for DenseFeatures
        # the way Input is for InputLayer.
        output = dense(fc1) + dense(fc2)

        model = keras.models.Model([fc1, fc2], [output])

        optimizer = 'rmsprop'
        loss = 'mse'
        loss_weights = [1., 0.5]
        model.compile(optimizer,
                      loss,
                      metrics=[metrics_module.CategoricalAccuracy(), 'mae'],
                      loss_weights=loss_weights)

        data_list = ([{
            'a': np.arange(10),
            'b': np.arange(10)
        }, {
            'b': np.arange(10),
            'c': np.arange(10)
        }], np.arange(10, 100))
        model.fit(*data_list, epochs=1)

        data_bloated_list = ([{
            'a': np.arange(10),
            'b': np.arange(10),
            'c': np.arange(10)
        }, {
            'a': np.arange(10),
            'b': np.arange(10),
            'c': np.arange(10)
        }], np.arange(10, 100))
        model.fit(*data_bloated_list, epochs=1)

        data_dict = ({
            'fc1': {
                'a': np.arange(10),
                'b': np.arange(10)
            },
            'fc2': {
                'b': np.arange(10),
                'c': np.arange(10)
            }
        }, np.arange(10, 100))
        model.fit(*data_dict, epochs=1)

        data_bloated_dict = ({
            'fc1': {
                'a': np.arange(10),
                'b': np.arange(10),
                'c': np.arange(10)
            },
            'fc2': {
                'a': np.arange(10),
                'b': np.arange(10),
                'c': np.arange(10)
            }
        }, np.arange(10, 100))
        model.fit(*data_bloated_dict, epochs=1)