def test_serialization_sequence_features(self):
    rating = fc.sequence_numeric_column('rating')
    sequence_feature = fc.SequenceFeatures([rating])
    config = keras.layers.serialize(sequence_feature)

    revived = keras.layers.deserialize(config)
    self.assertIsInstance(revived, fc.SequenceFeatures)
Пример #2
0
    def test_saving_with_sequence_features(self):
        cols = [
            feature_column_lib.sequence_numeric_column('a'),
            feature_column_lib.indicator_column(
                feature_column_lib.
                sequence_categorical_column_with_vocabulary_list(
                    'b', ['one', 'two']))
        ]
        input_layers = {
            'a':
            keras.layers.Input(shape=(None, 1), sparse=True, name='a'),
            'b':
            keras.layers.Input(shape=(None, 1),
                               sparse=True,
                               name='b',
                               dtype='string')
        }

        fc_layer, _ = feature_column_lib.SequenceFeatures(cols)(input_layers)
        # TODO(tibell): Figure out the right dtype and apply masking.
        # sequence_length_mask = array_ops.sequence_mask(sequence_length)
        # x = keras.layers.GRU(32)(fc_layer, mask=sequence_length_mask)
        x = keras.layers.GRU(32)(fc_layer)
        output = keras.layers.Dense(10)(x)

        model = keras.models.Model(input_layers, output)

        model.compile(loss=keras.losses.MSE,
                      optimizer='rmsprop',
                      metrics=[keras.metrics.categorical_accuracy])

        config = model.to_json()
        loaded_model = model_config.model_from_json(config)

        batch_size = 10
        timesteps = 1

        values_a = np.arange(10, dtype=np.float32)
        indices_a = np.zeros((10, 3), dtype=np.int64)
        indices_a[:, 0] = np.arange(10)
        inputs_a = sparse_tensor.SparseTensor(indices_a, values_a,
                                              (batch_size, timesteps, 1))

        values_b = np.zeros(10, dtype=np.str)
        indices_b = np.zeros((10, 3), dtype=np.int64)
        indices_b[:, 0] = np.arange(10)
        inputs_b = sparse_tensor.SparseTensor(indices_b, values_b,
                                              (batch_size, timesteps, 1))

        with self.cached_session():
            # Initialize tables for V1 lookup.
            if not context.executing_eagerly():
                self.evaluate(lookup_ops.tables_initializer())

            self.assertLen(
                loaded_model.predict({
                    'a': inputs_a,
                    'b': inputs_b
                }, steps=1), batch_size)
 def test_non_v1_feature_column(self):
     parsing_spec = self._parse_example_fn(
         feature_columns=[fc.sequence_numeric_column('a')], label_key='b')
     expected_spec = {
         'a': parsing_ops.VarLenFeature(dtype=dtypes.float32),
         'b': parsing_ops.FixedLenFeature((1, ), dtype=dtypes.float32),
     }
     self.assertDictEqual(expected_spec, parsing_spec)
Пример #4
0
    def test_encode_features_sequence_column(self):
        with tf.Graph().as_default():
            # Inputs.
            vocabulary_size = 4
            # Sequence of ids. -1 values are ignored.
            input_seq_ids = np.array([
                [3, -1, -1],  # example 0
                [0, 1, -1],  # example 1
            ])
            # Sequence of numeric values.
            # input_seq_nums = [
            #  [1.],  # example 0.
            #  [2., 3.],  # example 1
            # ]
            input_seq_nums = tf.sparse.SparseTensor(indices=[[0, 0], [1, 0],
                                                             [1, 1]],
                                                    values=[1., 2., 3.],
                                                    dense_shape=(2, 3))

            input_features = {
                "seq_ids": input_seq_ids,
                "seq_nums": input_seq_nums
            }

            # Embedding variable.
            embedding_dimension = 2
            embedding_values = (
                (1., 2.),  # id 0
                (3., 5.),  # id 1
                (7., 11.),  # id 2
                (9., 13.)  # id 3
            )

            # Expected sequence embeddings for input_seq_ids.
            expected_seq_embed = [
                # example 0:
                [[9., 13.], [0., 0.], [0., 0.]],
                # example 1:
                [[1., 2.], [3., 5.], [0., 0.]],
            ]
            expected_seq_nums = [
                # example 0:
                [[1.], [0.], [0.]],
                # example 1:
                [[2.], [3.], [0.]],
            ]

            # Build columns.
            seq_categorical_column = (
                feature_column.sequence_categorical_column_with_identity(
                    key="seq_ids", num_buckets=vocabulary_size))
            seq_embed_column = feature_column.embedding_column(
                seq_categorical_column,
                dimension=embedding_dimension,
                initializer=lambda shape, dtype, partition_info:
                embedding_values)
            seq_numeric_column = feature_column.sequence_numeric_column(
                "seq_nums")

            cols_to_tensors = feature_lib.encode_features(
                input_features, [seq_embed_column, seq_numeric_column],
                mode=tf.estimator.ModeKeys.EVAL)
            actual_seq_embed = cols_to_tensors[seq_embed_column]
            actual_seq_nums = cols_to_tensors[seq_numeric_column]

            # Assert embedding variable and encoded sequence features.
            global_vars = tf.compat.v1.get_collection(
                tf.compat.v1.GraphKeys.GLOBAL_VARIABLES)
            embedding_var = global_vars[0]
            with tf.compat.v1.Session() as sess:
                sess.run(tf.compat.v1.global_variables_initializer())
                sess.run(tf.compat.v1.tables_initializer())
                self.assertAllEqual(embedding_values, embedding_var.eval())
                self.assertAllEqual(expected_seq_embed, actual_seq_embed)
                self.assertAllEqual(expected_seq_nums, actual_seq_nums)
Пример #5
0
    def test_save_load_with_sequence_features(self):
        cols = [
            feature_column_lib.sequence_numeric_column("a"),
            feature_column_lib.indicator_column(
                feature_column_lib.
                sequence_categorical_column_with_vocabulary_list(
                    "b", ["one", "two"])),
        ]
        input_layers = {
            "a":
            keras.layers.Input(shape=(None, 1), sparse=True, name="a"),
            "b":
            keras.layers.Input(shape=(None, 1),
                               sparse=True,
                               name="b",
                               dtype="string"),
        }

        fc_layer, _ = ksfc.SequenceFeatures(cols)(input_layers)
        x = keras.layers.GRU(32)(fc_layer)
        output = keras.layers.Dense(10)(x)

        model = keras.models.Model(input_layers, output)

        model.compile(
            loss=keras.losses.MSE,
            optimizer="rmsprop",
            metrics=[keras.metrics.categorical_accuracy],
        )

        tiledb_uri = os.path.join(self.get_temp_dir(), "model_array")
        tiledb_model_obj = TensorflowTileDB(uri=tiledb_uri)
        tiledb_model_obj.save(model=model, include_optimizer=True)
        loaded_model = tiledb_model_obj.load(compile_model=True)

        model_opt_weights = batch_get_value(getattr(model.optimizer,
                                                    "weights"))
        loaded_opt_weights = batch_get_value(
            getattr(loaded_model.optimizer, "weights"))

        # Assert optimizer weights are equal
        for weight_model, weight_loaded_model in zip(model_opt_weights,
                                                     loaded_opt_weights):
            np.testing.assert_array_equal(weight_model, weight_loaded_model)

        batch_size = 10
        timesteps = 1

        values_a = np.arange(10, dtype=np.float32)
        indices_a = np.zeros((10, 3), dtype=np.int64)
        indices_a[:, 0] = np.arange(10)
        inputs_a = sparse_tensor.SparseTensor(indices_a, values_a,
                                              (batch_size, timesteps, 1))

        values_b = np.zeros(10, dtype=np.str)
        indices_b = np.zeros((10, 3), dtype=np.int64)
        indices_b[:, 0] = np.arange(10)
        inputs_b = sparse_tensor.SparseTensor(indices_b, values_b,
                                              (batch_size, timesteps, 1))

        # Assert model predictions are equal
        np.testing.assert_array_equal(
            loaded_model.predict({
                "a": inputs_a,
                "b": inputs_b
            }, steps=1),
            model.predict({
                "a": inputs_a,
                "b": inputs_b
            }, steps=1),
        )