示例#1
0
    def test_trace_features_layer(self):
        columns = [feature_column_lib.numeric_column('x')]
        model = sequential.Sequential(
            [feature_column_lib.DenseFeatures(columns)])
        model_input = {'x': constant_op.constant([[1.]])}
        model.predict(model_input, steps=1)
        fn = saving_utils.trace_model_call(model)
        self.assertAllClose({'output_1': [[1.]]}, fn({'x': [[1.]]}))

        columns = [
            feature_column_lib.numeric_column('x'),
            feature_column_lib.numeric_column('y')
        ]
        model = sequential.Sequential(
            [feature_column_lib.DenseFeatures(columns)])
        model_input = {
            'x': constant_op.constant([[1.]]),
            'y': constant_op.constant([[2.]])
        }
        model.predict(model_input, steps=1)
        fn = saving_utils.trace_model_call(model)
        self.assertAllClose({'output_1': [[1., 2.]]},
                            fn({
                                'x': [[1.]],
                                'y': [[2.]]
                            }))
示例#2
0
    def test_invalid_cases(self, shared):

        # Inputs.
        input_sparse_tensor = sparse_tensor.SparseTensorValue(
            indices=((0, 0), (1, 0), (1, 1), (1, 4)),
            values=(2, 0, 1, 3),
            dense_shape=(2, 5))
        input_features = {'inp': input_sparse_tensor}

        # Build columns.
        categorical_column_input = fc_lib.categorical_column_with_identity(
            key='inp', num_buckets=3)

        # Training on TPU with cpu embedding lookups is not supported.
        if shared:
            embedding_column = tpu_fc.shared_embedding_columns_v2(
                [categorical_column_input],
                dimension=2,
                embedding_lookup_device='cpu',
                tensor_core_shape=[None, 3])
        else:
            embedding_column = tpu_fc.embedding_column_v2(
                categorical_column_input,
                dimension=2,
                embedding_lookup_device='cpu',
                tensor_core_shape=[None, 3])
        dense_features = fc_lib.DenseFeatures(embedding_column)
        with self.assertRaisesRegexp(
                ValueError,
                r'.*embedding_lookup_device=\"cpu\" during training is not'):
            dense_features(input_features)

        # Inference on with TPU Embedding Hardware is not supported.
        if shared:
            embedding_column = tpu_fc.shared_embedding_columns_v2(
                [categorical_column_input],
                dimension=2,
                embedding_lookup_device='tpu_embedding_core',
                tensor_core_shape=[None, 3])
        else:
            embedding_column = tpu_fc.embedding_column_v2(
                categorical_column_input,
                dimension=2,
                embedding_lookup_device='tpu_embedding_core',
                tensor_core_shape=[None, 3])
        context = tpu._TPUInferenceContext('tpu_inference')
        context.Enter()
        dense_features = fc_lib.DenseFeatures(embedding_column)
        with self.assertRaisesRegexp(
                ValueError,
                r'Using embedding_lookup_device=tpu_embedding_core during inference is '
        ):
            dense_features(input_features)
        context.Exit()
    def test_sequential_model_with_ds_input(self):
        if testing_utils.should_run_distributed():
            self.skipTest('b/137397816')
        columns = [fc.numeric_column('a')]
        model = keras.models.Sequential([
            fc.DenseFeatures(columns),
            keras.layers.Dense(64, activation='relu'),
            keras.layers.Dense(20, activation='softmax')
        ])
        model.compile(optimizer='rmsprop',
                      loss='categorical_crossentropy',
                      metrics=['accuracy'],
                      run_eagerly=testing_utils.should_run_eagerly(),
                      run_distributed=testing_utils.should_run_distributed())

        y = np.random.randint(20, size=(100, 1))
        y = keras.utils.to_categorical(y, num_classes=20)
        x = {'a': np.random.random((100, 1))}
        ds1 = dataset_ops.Dataset.from_tensor_slices(x)
        ds2 = dataset_ops.Dataset.from_tensor_slices(y)
        ds = dataset_ops.Dataset.zip((ds1, ds2)).batch(5)
        model.fit(ds, steps_per_epoch=1)
        model.fit(ds, steps_per_epoch=1)
        model.evaluate(ds, steps=1)
        model.predict(ds, steps=1)
示例#4
0
    def rnn_logit_fn(features, mode):
        """Recurrent Neural Network logit_fn.

    Args:
      features: This is the first item returned from the `input_fn`
                passed to `train`, `evaluate`, and `predict`. This should be a
                single `Tensor` or `dict` of same.
      mode: Optional. Specifies if this training, evaluation or prediction. See
            `ModeKeys`.

    Returns:
      A tuple of `Tensor` objects representing the logits and the sequence
      length mask.
    """
        with ops.name_scope('sequence_input_layer'):
            sequence_input, sequence_length = fc.SequenceFeatures(
                sequence_feature_columns)(features)
            summary.histogram('sequence_length', sequence_length)

            if context_feature_columns:
                context_input = fc.DenseFeatures(context_feature_columns)(
                    features)
                sequence_input = fc.concatenate_context_input(
                    context_input, sequence_input=sequence_input)

        # Ignore output state.
        sequence_length_mask = array_ops.sequence_mask(sequence_length)
        rnn_layer = rnn_layer_fn()
        rnn_outputs = rnn_layer(sequence_input,
                                mask=sequence_length_mask,
                                training=(mode == model_fn.ModeKeys.TRAIN))

        logits = keras_layers.Dense(units=output_units,
                                    name='logits')(rnn_outputs)
        return logits, sequence_length_mask
    def model_fn(features, labels, mode, params):
        del params

        dense_features = fc_lib.DenseFeatures(feature_columns)
        input_layer = dense_features(features)
        hidden_layer = tf.layers.dense(
            input_layer,
            HIDDEN_LAYER_SIZE,
            kernel_initializer=tf.constant_initializer(KERNEL_INIT_VALUE),
            bias_initializer=tf.constant_initializer(BIAS_INIT_VALUE))

        last_layer = tf.reduce_sum(hidden_layer, axis=1)

        logits = tf.reshape(last_layer, [-1])
        labels = tf.reshape(labels, [-1])
        losses = tf.square(labels - logits)

        # Use reduce_mean to match the CrossShardOptimizer reduction.
        loss = tf.reduce_mean(losses)
        if optimizer_type == 'adagrad':
            optimizer = tf.train.AdagradOptimizer(
                LEARNING_RATE, initial_accumulator_value=ADADGRAD_INIT_VALUE)
        elif optimizer_type == 'sgd':
            optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE)
        else:
            raise ValueError('{} is not supported.'.format(optimizer_type))
        # Default reduction=tf.losses.Reduction.MEAN
        optimizer = tf.tpu.CrossShardOptimizer(optimizer)

        train_op = optimizer.minimize(loss,
                                      global_step=tf.train.get_global_step())
        return tpu_estimator.TPUEstimatorSpec(mode=mode,
                                              loss=loss,
                                              train_op=train_op)
    def test_saving_with_dense_features(self):
        cols = [
            feature_column_lib.numeric_column('a'),
            feature_column_lib.indicator_column(
                feature_column_lib.categorical_column_with_vocabulary_list(
                    'b', ['one', 'two']))
        ]
        input_layers = {
            'a': keras.layers.Input(shape=(1, ), name='a'),
            'b': keras.layers.Input(shape=(1, ), name='b', dtype='string')
        }

        fc_layer = feature_column_lib.DenseFeatures(cols)(input_layers)
        output = keras.layers.Dense(10)(fc_layer)

        model = keras.models.Model(input_layers, output)

        model.compile(loss=keras.losses.MSE,
                      optimizer=keras.optimizers.RMSprop(lr=0.0001),
                      metrics=[keras.metrics.categorical_accuracy])

        config = model.to_json()
        loaded_model = model_config.model_from_json(config)

        inputs_a = np.arange(10).reshape(10, 1)
        inputs_b = np.arange(10).reshape(10, 1).astype('str')

        # Initialize tables for V1 lookup.
        if not context.executing_eagerly():
            self.evaluate(lookup_ops.tables_initializer())

        self.assertLen(loaded_model.predict({
            'a': inputs_a,
            'b': inputs_b
        }), 10)
示例#7
0
def _get_sequence_dense_tensor_state(column, features):
    state_manager = fc._StateManagerImpl(fc_lib.DenseFeatures(column),
                                         trainable=True)
    column.create_state(state_manager)
    dense_tensor, lengths = column.get_sequence_dense_tensor(
        fc.FeatureTransformationCache(features), state_manager)
    return dense_tensor, lengths, state_manager
示例#8
0
def encode_features(features,
                    feature_columns,
                    mode=tf.estimator.ModeKeys.TRAIN,
                    scope=None):
  """Returns dense tensors from features using feature columns.

  This function encodes the feature column transformation on the 'raw'
  `features`.


  Args:
    features: (dict) mapping feature names to feature values, possibly obtained
      from input_fn.
    feature_columns: (list)  list of feature columns.
    mode: (`estimator.ModeKeys`) Specifies if this is training, evaluation or
      inference. See `ModeKeys`.
    scope: (str) variable scope for the per column input layers.

  Returns:
    (dict) A mapping from columns to dense tensors.
  """
  # Having scope here for backward compatibility.
  del scope
  trainable = (mode == tf.estimator.ModeKeys.TRAIN)
  cols_to_tensors = {}

  # TODO: Ensure only v2 Feature Columns are used.
  if hasattr(feature_column_lib, "is_feature_column_v2"
            ) and feature_column_lib.is_feature_column_v2(feature_columns):
    dense_feature_columns = [
        col for col in feature_columns if not _is_sequence_column_v2(col)
    ]
    sequence_feature_columns = [
        col for col in feature_columns if _is_sequence_column_v2(col)
    ]

    if dense_feature_columns:
      dense_layer = feature_column_lib.DenseFeatures(
          feature_columns=dense_feature_columns,
          name="encoding_layer",
          trainable=trainable)
      dense_layer(features, cols_to_output_tensors=cols_to_tensors)

    for col in sequence_feature_columns:
      sequence_feature_layer = tf.keras.experimental.SequenceFeatures(col)
      sequence_input, _ = sequence_feature_layer(features)
      cols_to_tensors[col] = sequence_input
  else:
    tf.compat.v1.feature_column.input_layer(
        features=features,
        feature_columns=feature_columns,
        trainable=trainable,
        cols_to_output_tensors=cols_to_tensors)

  return cols_to_tensors
示例#9
0
    def test_serialization_dense_features(self):
        dense_feature = fc.DenseFeatures([fc.numeric_column('a')])
        config = keras.layers.serialize(dense_feature)
        self.assertEqual(config['class_name'], 'DenseFeatures')

        revived = keras.layers.deserialize(config)
        if tf2.enabled():
            self.assertIsInstance(revived, dense_features_v2.DenseFeatures)
        else:
            self.assertIsInstance(revived, fc.DenseFeatures)
            self.assertNotIsInstance(revived, dense_features_v2.DenseFeatures)
示例#10
0
    def __init__(self,
                 rnn_layer,
                 units,
                 sequence_feature_columns,
                 context_feature_columns=None,
                 activation=None,
                 return_sequences=False,
                 **kwargs):
        """Initializes a RNNModel instance.

    Args:
      rnn_layer: A Keras RNN layer.
      units: An int indicating the dimension of the logit layer, and of the
        model output.
      sequence_feature_columns: An iterable containing the `FeatureColumn`s
        that represent sequential input. All items in the set should either be
        sequence columns (e.g. `sequence_numeric_column`) or constructed from
        one (e.g. `embedding_column` with `sequence_categorical_column_*` as
        input).
      context_feature_columns: An iterable containing the `FeatureColumn`s
        for contextual input. The data represented by these columns will be
        replicated and given to the RNN at each timestep. These columns must be
        instances of classes derived from `DenseColumn` such as
        `numeric_column`, not the sequential variants.
      activation: Activation function to apply to the logit layer (for instance
        `tf.keras.activations.sigmoid`). If you don't specify anything, no
        activation is applied.
      return_sequences: A boolean indicating whether to return the last output
        in the output sequence, or the full sequence.
      **kwargs: Additional arguments.

    Raises:
      ValueError: If `units` is not an int.
    """
        super(RNNModel, self).__init__(**kwargs)
        if not isinstance(units, int):
            raise ValueError('units must be an int.  Given type: {}'.format(
                type(units)))
        self._return_sequences = return_sequences
        self._sequence_feature_columns = sequence_feature_columns
        self._context_feature_columns = context_feature_columns
        self._sequence_features_layer = fc.SequenceFeatures(
            sequence_feature_columns)
        self._dense_features_layer = None
        if context_feature_columns:
            self._dense_features_layer = fc.DenseFeatures(
                context_feature_columns)
        self._rnn_layer = rnn_layer
        self._logits_layer = keras_layers.Dense(units=units,
                                                activation=activation,
                                                name='logits')
示例#11
0
 def testDenseFeatures(self):
   features = {
       "text_a": ["hello world", "pair-programming"],
       "text_b": ["hello world", "oov token"],
   }
   feature_columns = [
       hub.text_embedding_column("text_a", self.spec, trainable=False),
       hub.text_embedding_column("text_b", self.spec, trainable=False),
   ]
   with tf.Graph().as_default():
     feature_layer = feature_column_lib.DenseFeatures(feature_columns)
     feature_layer_out = feature_layer(features)
     with tf_v1.train.MonitoredSession() as sess:
       output = sess.run(feature_layer_out)
       self.assertAllEqual(
           output, [[1, 2, 3, 4, 1, 2, 3, 4], [5, 5, 5, 5, 0, 0, 0, 0]])
示例#12
0
    def test_sequential_model(self):
        columns = [fc.numeric_column('a')]
        model = keras.models.Sequential([
            fc.DenseFeatures(columns),
            keras.layers.Dense(64, activation='relu'),
            keras.layers.Dense(20, activation='softmax')
        ])
        model.compile(optimizer=rmsprop.RMSPropOptimizer(1e-3),
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])

        x = {'a': np.random.random((10, 1))}
        y = np.random.randint(20, size=(10, 1))
        y = keras.utils.to_categorical(y, num_classes=20)
        model.fit(x, y, epochs=1, batch_size=5)
        model.fit(x, y, epochs=1, batch_size=5)
        model.evaluate(x, y, batch_size=5)
        model.predict(x, batch_size=5)
 def testDenseFeatures(self):
     features = {
         "image_a": [[[[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]],
                     [[[0.7, 0.7, 0.7], [0.1, 0.2, 0.3]]]],
         "image_b": [[[[0.1, 0.2, 0.1], [0.2, 0.1, 0.2]]],
                     [[[0.1, 0.2, 0.3], [0.3, 0.2, 0.1]]]],
     }
     feature_columns = [
         hub.image_embedding_column("image_a", self.spec),
         hub.image_embedding_column("image_b", self.spec),
     ]
     with tf.Graph().as_default():
         feature_layer = feature_column_lib.DenseFeatures(feature_columns)
         feature_layer_out = feature_layer(features)
         with tf_v1.train.MonitoredSession() as sess:
             output = sess.run(feature_layer_out)
             self.assertAllClose(output, [[0.5, 0.7, 0.9, 0.3, 0.3, 0.3],
                                          [0.8, 0.9, 1.0, 0.4, 0.4, 0.4]])
示例#14
0
  def test_string_input(self):
    x = {'age': np.random.random((1024, 1)),
         'cabin': np.array(['a'] * 1024)}
    y = np.random.randint(2, size=(1024, 1))
    ds1 = dataset_ops.Dataset.from_tensor_slices(x)
    ds2 = dataset_ops.Dataset.from_tensor_slices(y)
    dataset = dataset_ops.Dataset.zip((ds1, ds2)).batch(4)
    categorical_cols = [fc.categorical_column_with_hash_bucket('cabin', 10)]
    feature_cols = ([fc.numeric_column('age')]
                    + [fc.indicator_column(cc) for cc in categorical_cols])
    layers = [fc.DenseFeatures(feature_cols),
              keras.layers.Dense(128),
              keras.layers.Dense(1)]

    model = keras.models.Sequential(layers)
    model.compile(optimizer='sgd',
                  loss=keras.losses.BinaryCrossentropy())
    model.fit(dataset)
def encode_features(features,
                    feature_columns,
                    mode=model_fn.ModeKeys.TRAIN,
                    scope=None):
  """Returns dense tensors from features using feature columns.

  This function encodes the feature column transformation on the 'raw'
  `features`.


  Args:
    features: (dict) mapping feature names to feature values, possibly obtained
      from input_fn.
    feature_columns: (list)  list of feature columns.
    mode: (`estimator.ModeKeys`) Specifies if this is training, evaluation or
      inference. See `ModeKeys`.
    scope: (str) variable scope for the per column input layers.

  Returns:
    (dict) A mapping from columns to dense tensors.
  """
  # Having scope here for backward compatibility.
  del scope
  trainable = (mode == model_fn.ModeKeys.TRAIN)
  cols_to_tensors = {}

  if hasattr(feature_column_lib, "is_feature_column_v2"
            ) and feature_column_lib.is_feature_column_v2(feature_columns):
    dense_layer = feature_column_lib.DenseFeatures(
        feature_columns=feature_columns,
        name="encoding_layer",
        trainable=trainable)
    dense_layer(features, cols_to_output_tensors=cols_to_tensors)
  else:
    feature_column.input_layer(
        features=features,
        feature_columns=feature_columns,
        trainable=trainable,
        cols_to_output_tensors=cols_to_tensors)

  return cols_to_tensors
示例#16
0
    def test_sequential_model_with_ds_input(self):
        columns = [fc.numeric_column('a')]
        model = keras.models.Sequential([
            fc.DenseFeatures(columns),
            keras.layers.Dense(64, activation='relu'),
            keras.layers.Dense(20, activation='softmax')
        ])
        model.compile(optimizer=rmsprop.RMSPropOptimizer(1e-3),
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])

        y = np.random.randint(20, size=(100, 1))
        y = keras.utils.to_categorical(y, num_classes=20)
        x = {'a': np.random.random((100, 1))}
        ds1 = dataset_ops.Dataset.from_tensor_slices(x)
        ds2 = dataset_ops.Dataset.from_tensor_slices(y)
        ds = dataset_ops.Dataset.zip((ds1, ds2)).batch(5)
        model.fit(ds, steps_per_epoch=1)
        model.fit(ds, steps_per_epoch=1)
        model.evaluate(ds, steps=1)
        model.predict(ds, steps=1)
示例#17
0
  def test_sequential_model(self):
    columns = [fc.numeric_column('a')]
    model = keras.models.Sequential([
        fc.DenseFeatures(columns),
        keras.layers.Dense(64, activation='relu'),
        keras.layers.Dense(20, activation='softmax')
    ])
    model.compile(
        optimizer='rmsprop',
        loss='categorical_crossentropy',
        metrics=['accuracy'],
        run_eagerly=testing_utils.should_run_eagerly(),
        experimental_run_tf_function=testing_utils.should_run_tf_function())

    x = {'a': np.random.random((10, 1))}
    y = np.random.randint(20, size=(10, 1))
    y = keras.utils.to_categorical(y, num_classes=20)
    model.fit(x, y, epochs=1, batch_size=5)
    model.fit(x, y, epochs=1, batch_size=5)
    model.evaluate(x, y, batch_size=5)
    model.predict(x, batch_size=5)
示例#18
0
    def test_sequential_model_with_crossed_column(self):
        feature_columns = []
        age_buckets = fc.bucketized_column(
            fc.numeric_column('age'),
            boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65])
        feature_columns.append(age_buckets)

        # indicator cols
        thal = fc.categorical_column_with_vocabulary_list(
            'thal', ['fixed', 'normal', 'reversible'])

        crossed_feature = fc.crossed_column([age_buckets, thal],
                                            hash_bucket_size=1000)
        crossed_feature = fc.indicator_column(crossed_feature)
        feature_columns.append(crossed_feature)

        feature_layer = fc.DenseFeatures(feature_columns)

        model = keras.models.Sequential([
            feature_layer,
            keras.layers.Dense(128, activation='relu'),
            keras.layers.Dense(128, activation='relu'),
            keras.layers.Dense(1, activation='sigmoid')
        ])

        age_data = np.random.randint(10, 100, size=100)
        thal_data = np.random.choice(['fixed', 'normal', 'reversible'],
                                     size=100)
        inp_x = {'age': age_data, 'thal': thal_data}
        inp_y = np.random.randint(0, 1, size=100)
        ds = dataset_ops.Dataset.from_tensor_slices((inp_x, inp_y)).batch(5)
        model.compile(
            optimizer='adam',
            loss='binary_crossentropy',
            metrics=['accuracy'],
        )
        model.fit(ds, epochs=1)
        model.fit(ds, epochs=1)
        model.evaluate(ds)
        model.predict(ds)
示例#19
0
    def DISABLED_test_function_model_feature_layer_input(self):
        col_a = fc.numeric_column('a')
        col_b = fc.numeric_column('b')

        feature_layer = fc.DenseFeatures([col_a, col_b], name='fc')
        dense = keras.layers.Dense(4)

        # This seems problematic.... We probably need something for DenseFeatures
        # the way Input is for InputLayer.
        output = dense(feature_layer)

        model = keras.models.Model([feature_layer], [output])

        optimizer = 'rmsprop'
        loss = 'mse'
        loss_weights = [1., 0.5]
        model.compile(optimizer,
                      loss,
                      metrics=[metrics_module.CategoricalAccuracy(), 'mae'],
                      loss_weights=loss_weights)

        data = ({'a': np.arange(10), 'b': np.arange(10)}, np.arange(10, 20))
        model.fit(*data, epochs=1)
示例#20
0
    def test_feature_layer_cpu(self):
        # Inputs.
        vocabulary_size = 3
        input_a = sparse_tensor.SparseTensorValue(
            # example 0, ids [2]
            # example 1, ids [0, 1]
            indices=((0, 0), (1, 0), (1, 1)),
            values=(2, 0, 1),
            dense_shape=(2, 2))
        input_b = sparse_tensor.SparseTensorValue(
            # example 0, ids [2]
            # example 1, ids [0, 1]
            # example 2, ids []
            indices=((0, 0), (1, 0), (1, 1)),
            values=(2, 0, 1),
            dense_shape=(3, 2))
        input_features = {'aaa': input_a, 'bbb': input_b}

        # Embedding variable.
        embedding_dimension = 2
        embedding_values = (
            (1., 2.),  # id 0
            (3., 5.),  # id 1
            (7., 11.)  # id 2
        )

        def _initializer(shape, dtype, partition_info=None):
            self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
            self.assertEqual(dtypes.float32, dtype)
            self.assertIsNone(partition_info)
            return embedding_values

        # Expected lookup result, using combiner='mean'.
        expected_lookups_a = (
            # example 0:
            (7., 11.),  # ids [2], embedding = [7, 11]
            # example 1:
            (2., 3.5
             ),  # ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
        )
        expected_lookups_b = (
            # example 0:
            (
                (7., 11.),
                (0., 0.),
            ),  # ids [2], embedding = [[7, 11], [0, 0]]
            # example 1:
            (
                (1., 2.),
                (3., 5.),
            ),  # ids [0, 1], embedding = [[1, 2], [3, 5]]
            # example 2:
            (
                (0., 0.),
                (0., 0.),
            ),  # ids [], embedding = [[0, 0], [0, 0]]
        )

        # Build columns.
        categorical_column_a = fc_lib.categorical_column_with_identity(
            key='aaa', num_buckets=vocabulary_size)
        categorical_column_b = fc_lib.sequence_categorical_column_with_identity(
            key='bbb', num_buckets=vocabulary_size)
        embedding_column_a, embedding_column_b = tpu_fc.shared_embedding_columns_v2(
            [categorical_column_a, categorical_column_b],
            dimension=embedding_dimension,
            initializer=_initializer,
            max_sequence_lengths=[0, 2])

        # Provide sparse input and get dense result.
        dense_features = fc_lib.DenseFeatures([embedding_column_a])
        sequence_features = fc_lib.SequenceFeatures([embedding_column_b])
        embedding_lookup_a = dense_features(input_features)
        embedding_lookup_b = sequence_features(input_features)

        # Assert expected embedding variable and lookups.
        global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
        self.assertItemsEqual(('aaa_bbb_shared_embedding:0', ),
                              tuple([v.name for v in global_vars]))
        embedding_var = global_vars[0]
        with _initialized_session():
            self.assertAllEqual(embedding_values, embedding_var.eval())
            self.assertAllEqual(expected_lookups_a, embedding_lookup_a.eval())
            self.assertAllEqual(expected_lookups_b,
                                embedding_lookup_b[0].eval())
示例#21
0
  def __init__(self,
               units,
               hidden_units,
               feature_columns,
               activation_fn,
               dropout,
               input_layer_partitioner,
               batch_norm,
               name=None,
               **kwargs):
    super(_DNNModel, self).__init__(name=name, **kwargs)
    if feature_column_lib.is_feature_column_v2(feature_columns):
      self._input_layer = feature_column_lib.DenseFeatures(
          feature_columns=feature_columns, name='input_layer')
    else:
      self._input_layer = feature_column.InputLayer(
          feature_columns=feature_columns,
          name='input_layer',
          create_scope_now=False)

    self._add_layer(self._input_layer, 'input_layer')

    self._dropout = dropout
    self._batch_norm = batch_norm

    self._hidden_layers = []
    self._dropout_layers = []
    self._batch_norm_layers = []
    self._hidden_layer_scope_names = []
    for layer_id, num_hidden_units in enumerate(hidden_units):
      with variable_scope.variable_scope(
          'hiddenlayer_%d' % layer_id) as hidden_layer_scope:
        hidden_layer = core_layers.Dense(
            units=num_hidden_units,
            activation=activation_fn,
            kernel_initializer=init_ops.glorot_uniform_initializer(),
            name=hidden_layer_scope,
            _scope=hidden_layer_scope)
        self._add_layer(hidden_layer, hidden_layer_scope.name)
        self._hidden_layer_scope_names.append(hidden_layer_scope.name)
        self._hidden_layers.append(hidden_layer)
        if self._dropout is not None:
          dropout_layer = core_layers.Dropout(rate=self._dropout)
          self._add_layer(dropout_layer, dropout_layer.name)
          self._dropout_layers.append(dropout_layer)
        if self._batch_norm:
          batch_norm_layer = normalization.BatchNormalization(
              # The default momentum 0.99 actually crashes on certain
              # problem, so here we use 0.999, which is the default of
              # tf.contrib.layers.batch_norm.
              momentum=0.999,
              trainable=True,
              name='batchnorm_%d' % layer_id,
              _scope='batchnorm_%d' % layer_id)
          self._add_layer(batch_norm_layer, batch_norm_layer.name)
          self._batch_norm_layers.append(batch_norm_layer)

    with variable_scope.variable_scope('logits') as logits_scope:
      self._logits_layer = core_layers.Dense(
          units=units,
          activation=None,
          kernel_initializer=init_ops.glorot_uniform_initializer(),
          name=logits_scope,
          _scope=logits_scope)
      self._add_layer(self._logits_layer, logits_scope.name)
      self._logits_scope_name = logits_scope.name
    self._input_layer_partitioner = input_layer_partitioner
context_features, example_features = tfr.feature.encode_listwise_features(
    features=parsed_features,
    context_feature_columns=context_feature_columns(),
    example_feature_columns=example_feature_columns(),
    mode=tf.estimator.ModeKeys.TRAIN,
    input_size=_INPUT_SIZE)

# Encoded feature column
example_features['encoded_clust_index'].numpy()

#%% Try to convert features to dense values to read

from tensorflow.python.feature_column import feature_column_lib

dense_layer = feature_column_lib.DenseFeatures(feature_columns=list(
    example_feature_columns().values()),
                                               name='encoding_layer',
                                               trainable=True)

dense_layer(features)

parsed_features['clust_index']._values
parsed_features['clust_index']._dense_shape
parsed_features['clust_index'].dense_shape
parsed_features['clust_index'].values

#%% Did I save my features (strings) incorrectly?

# For text or sequence problems, the embeddign layer takes
# a 2D tensor of integers of shape (sampes, sequence_length)
# where each entry is a sequence of integers
# It can embed sequeces of variable length
 def __init__(self, feature_columns, units, name=None, **kwargs):
   super(TestDNNModel, self).__init__(name=name, **kwargs)
   self._input_layer = fc.DenseFeatures(feature_columns, name='input_layer')
   self._dense_layer = keras.layers.Dense(units, name='dense_layer')
  def DISABLED_test_function_model_multiple_feature_layer_inputs(self):
    col_a = fc.numeric_column_v2('a')
    col_b = fc.numeric_column_v2('b')
    col_c = fc.numeric_column_v2('c')

    fc1 = fc.DenseFeatures([col_a, col_b], name='fc1')
    fc2 = fc.DenseFeatures([col_b, col_c], name='fc2')
    dense = keras.layers.Dense(4)

    # This seems problematic.... We probably need something for DenseFeatures
    # the way Input is for InputLayer.
    output = dense(fc1) + dense(fc2)

    model = keras.models.Model([fc1, fc2], [output])

    optimizer = rmsprop.RMSPropOptimizer(learning_rate=0.001)
    loss = 'mse'
    loss_weights = [1., 0.5]
    model.compile(
        optimizer,
        loss,
        metrics=[metrics_module.CategoricalAccuracy(), 'mae'],
        loss_weights=loss_weights)

    data_list = ([{
        'a': np.arange(10),
        'b': np.arange(10)
    }, {
        'b': np.arange(10),
        'c': np.arange(10)
    }], np.arange(10, 100))
    print(model.fit(*data_list, epochs=1))

    data_bloated_list = ([{
        'a': np.arange(10),
        'b': np.arange(10),
        'c': np.arange(10)
    }, {
        'a': np.arange(10),
        'b': np.arange(10),
        'c': np.arange(10)
    }], np.arange(10, 100))
    print(model.fit(*data_bloated_list, epochs=1))

    data_dict = ({
        'fc1': {
            'a': np.arange(10),
            'b': np.arange(10)
        },
        'fc2': {
            'b': np.arange(10),
            'c': np.arange(10)
        }
    }, np.arange(10, 100))
    print(model.fit(*data_dict, epochs=1))

    data_bloated_dict = ({
        'fc1': {
            'a': np.arange(10),
            'b': np.arange(10),
            'c': np.arange(10)
        },
        'fc2': {
            'a': np.arange(10),
            'b': np.arange(10),
            'c': np.arange(10)
        }
    }, np.arange(10, 100))
    print(model.fit(*data_bloated_dict, epochs=1))
示例#25
0
    def __init__(self,
                 units,
                 hidden_units,
                 feature_columns,
                 activation_fn,
                 dropout,
                 batch_norm,
                 name=None,
                 **kwargs):
        super(_DNNModelV2, self).__init__(name=name, **kwargs)

        # Add this name_scope for backward compatibility, as previously it's used
        # in variable_scope
        with ops.name_scope(
                'input_from_feature_columns') as input_feature_column_scope:
            layer_name = input_feature_column_scope + 'input_layer'
            if feature_column_lib.is_feature_column_v2(feature_columns):
                self._input_layer = feature_column_lib.DenseFeatures(
                    feature_columns=feature_columns, name=layer_name)
            else:
                self._input_layer = feature_column.InputLayer(
                    feature_columns=feature_columns,
                    name=layer_name,
                    create_scope_now=False)

        self._add_layer(self._input_layer, self._input_layer.name)

        self._dropout = dropout
        self._batch_norm = batch_norm

        self._hidden_layers = []
        self._dropout_layers = []
        self._batch_norm_layers = []
        self._hidden_layer_scope_names = []
        for layer_id, num_hidden_units in enumerate(hidden_units):
            with ops.name_scope('hiddenlayer_%d' %
                                layer_id) as hidden_layer_scope:
                # Get scope name without the trailing slash.
                hidden_shared_name = _name_from_scope_name(hidden_layer_scope)
                hidden_layer = core_layers.Dense(
                    units=num_hidden_units,
                    activation=activation_fn,
                    kernel_initializer=init_ops.glorot_uniform_initializer(),
                    name=hidden_shared_name)
                self._add_layer(hidden_layer, hidden_shared_name)
                self._hidden_layer_scope_names.append(hidden_shared_name)
                self._hidden_layers.append(hidden_layer)
                if self._dropout is not None:
                    dropout_layer = core_layers.Dropout(rate=self._dropout)
                    self._add_layer(dropout_layer, dropout_layer.name)
                    self._dropout_layers.append(dropout_layer)
                if self._batch_norm:
                    batch_norm_name = hidden_shared_name + '/batchnorm_%d' % layer_id
                    batch_norm_layer = normalization.BatchNormalization(
                        # The default momentum 0.99 actually crashes on certain
                        # problem, so here we use 0.999, which is the default of
                        # tf.contrib.layers.batch_norm.
                        momentum=0.999,
                        trainable=True,
                        name=batch_norm_name)
                    self._add_layer(batch_norm_layer, batch_norm_name)
                    self._batch_norm_layers.append(batch_norm_layer)

        with ops.name_scope('logits') as logits_scope:
            logits_shared_name = _name_from_scope_name(logits_scope)
            self._logits_layer = core_layers.Dense(
                units=units,
                activation=None,
                kernel_initializer=init_ops.glorot_uniform_initializer(),
                name=logits_shared_name)
            self._add_layer(self._logits_layer, logits_shared_name)
            self._logits_scope_name = logits_shared_name
示例#26
0
    def test_empty_row(self):
        # Inputs.
        vocabulary_size = 3
        input_sparse_tensor = sparse_tensor.SparseTensorValue(
            # example 0, ids []
            # example 1, ids [0, 1, 3]
            indices=((1, 0), (1, 1), (1, 4)),
            values=(0, 1, 3),
            dense_shape=(2, 5))
        input_features = {'inp': input_sparse_tensor}

        # Embedding variable.
        embedding_dimension = 2
        embedding_values = (
            (1., 2.),  # id 0
            (3., 5.),  # id 1
            (7., 11.),  # id 2
            (13., 17.)  # id 3
        )

        def _initializer(shape, dtype, partition_info=None):
            self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
            self.assertEqual(dtypes.float32, dtype)
            self.assertIsNone(partition_info)
            return embedding_values

        # Build columns.
        categorical_column_input = fc_lib.categorical_column_with_identity(
            key='inp', num_buckets=vocabulary_size)

        # Set tensor_core_shape to be [None, 20] to ensure some padding and
        # dynamic batch size.
        embedding_column = tpu_fc.embedding_column_v2(
            categorical_column_input,
            dimension=embedding_dimension,
            initializer=_initializer,
            combiner='mean',
            embedding_lookup_device='tpu_tensor_core',
            tensor_core_shape=[None, 3])

        # Run in TPUContexts so that we hit the intended densification case.
        context = tpu._TPUInferenceContext('tpu_inference')
        context.Enter()
        with tpu_function.tpu_shard_context(1):
            dense_features = fc_lib.DenseFeatures(embedding_column)
            expected_lookups = (
                # example 0:
                (0., 0.),  # ids [], embedding = [0, 0]
                # example 1:
                (2., 3.5
                 ),  # ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
            )

            embedding_lookup = dense_features(input_features)

            # Assert expected embedding variable and lookups.
            global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
            self.assertCountEqual(
                ('dense_features/inp_embedding/embedding_weights:0', ),
                tuple([v.name for v in global_vars]))

            embedding_var = global_vars[0]
            with _initialized_session():
                self.assertAllEqual(embedding_values, embedding_var)
                eval_res = embedding_lookup.eval()
                self.assertAllEqual(expected_lookups, eval_res)
            context.Exit()
示例#27
0
    def test_dense_embedding_lookup(self, shared, combiner):
        # Inputs.
        vocabulary_size = 3
        input_sparse_tensor = sparse_tensor.SparseTensorValue(
            # example 0, ids [2]
            # example 1, ids [0, 1, 3]
            indices=((0, 0), (1, 0), (1, 1), (1, 4)),
            values=(2, 0, 1, 3),
            dense_shape=(2, 5))
        input_features = {'inp': input_sparse_tensor}

        # Embedding variable.
        embedding_dimension = 2
        embedding_values = (
            (1., 2.),  # id 0
            (3., 5.),  # id 1
            (7., 11.),  # id 2
            (13., 17.)  # id 3
        )

        def _initializer(shape, dtype, partition_info=None):
            self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
            self.assertEqual(dtypes.float32, dtype)
            self.assertIsNone(partition_info)
            return embedding_values

        # Build columns.
        categorical_column_input = fc_lib.categorical_column_with_identity(
            key='inp', num_buckets=vocabulary_size)

        # Set tensor_core_shape to be [None, 20] to ensure some padding and
        # dynamic batch size.
        if shared:
            embedding_column = tpu_fc.shared_embedding_columns_v2(
                [categorical_column_input],
                dimension=embedding_dimension,
                initializer=_initializer,
                combiner=combiner,
                embedding_lookup_device='tpu_tensor_core',
                tensor_core_shape=[None, 3])
        else:
            embedding_column = tpu_fc.embedding_column_v2(
                categorical_column_input,
                dimension=embedding_dimension,
                initializer=_initializer,
                combiner=combiner,
                embedding_lookup_device='tpu_tensor_core',
                tensor_core_shape=[None, 3])

        # Run in TPUInferenceContext so that we hit the intended densification case.
        context = tpu._TPUInferenceContext('tpu_inference')
        context.Enter()

        dense_features = fc_lib.DenseFeatures(embedding_column)
        # Sqrtn combiner not supported for now.
        if combiner == 'sqrtn':
            with self.assertRaisesRegexp(
                    ValueError,
                    'Dense TPU Embedding does not support combiner'):
                embedding_lookup = dense_features(input_features)
            return
        if combiner == 'mean':
            expected_lookups = (
                # example 0:
                (7., 11.),  # ids [2], embedding = [7, 11]
                # example 1:
                (2., 3.5
                 ),  # ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
            )
        elif combiner == 'sum':
            expected_lookups = (
                # example 0:
                (7., 11.),  # ids [2], embedding = [7, 11]
                # example 1:
                (4., 7
                 ),  # ids [0, 1], embedding = sum([1, 2] + [3, 5]) = [4, 7]
            )

        embedding_lookup = dense_features(input_features)

        # Assert expected embedding variable and lookups.
        global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
        if shared:
            self.assertCountEqual(('inp_shared_embedding:0', ),
                                  tuple([v.name for v in global_vars]))
        else:
            self.assertCountEqual(
                ('dense_features/inp_embedding/embedding_weights:0', ),
                tuple([v.name for v in global_vars]))

        embedding_var = global_vars[0]
        with _initialized_session():
            self.assertAllEqual(embedding_values, embedding_var.eval())
            eval_res = embedding_lookup.eval()
            self.assertAllEqual(expected_lookups, eval_res)
        context.Exit()
示例#28
0
    def test_feature_layer_cpu(self):
        # Inputs.
        vocabulary_size = 3
        sparse_input = sparse_tensor.SparseTensorValue(
            # example 0, ids [2]
            # example 1, ids [0, 1]
            # example 2, ids []
            # example 3, ids [1]
            indices=((0, 0), (1, 0), (1, 1), (3, 0)),
            values=(2, 0, 1, 1),
            dense_shape=(4, 2))

        # Embedding variable.
        embedding_dimension = 2
        embedding_values = (
            (1., 2.),  # id 0
            (3., 5.),  # id 1
            (7., 11.)  # id 2
        )

        def _initializer(shape, dtype, partition_info=None):
            self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
            self.assertEqual(dtypes.float32, dtype)
            self.assertIsNone(partition_info)
            return embedding_values

        # Expected lookup result, using combiner='mean'.
        expected_lookups = (
            # example 0, ids [2], embedding = [7, 11]
            (7., 11.),
            # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
            (2., 3.5),
            # example 2, ids [], embedding = [0, 0]
            (0., 0.),
            # example 3, ids [1], embedding = [3, 5]
            (3., 5.),
        )
        expected_lookups_sequence = (
            # example 0, ids [2], embedding = [[7, 11], [0, 0]]
            (
                (7., 11.),
                (0., 0.),
            ),
            # example 1, ids [0, 1], embedding = [[1, 2], [3. 5]]
            (
                (1., 2.),
                (3., 5.),
            ),
            # example 2, ids [], embedding = [0, 0]
            (
                (0., 0.),
                (0., 0.),
            ),
            # example 3, ids [1], embedding = [3, 5]
            (
                (3., 5.),
                (0., 0.),
            ),
        )

        # Build columns.
        categorical_column = fc_lib.categorical_column_with_identity(
            key='aaa', num_buckets=vocabulary_size)
        sequence_categorical_column = (
            fc_lib.sequence_categorical_column_with_identity(
                key='bbb', num_buckets=vocabulary_size))
        embedding_column = tpu_fc.embedding_column_v2(
            categorical_column,
            dimension=embedding_dimension,
            initializer=_initializer)
        sequence_embedding_column = tpu_fc.embedding_column_v2(
            sequence_categorical_column,
            dimension=embedding_dimension,
            initializer=_initializer,
            max_sequence_length=2)

        # Provide sparse input and get dense result.
        features = {'aaa': sparse_input, 'bbb': sparse_input}
        dense_features = fc_lib.DenseFeatures([embedding_column])
        sequence_features = fc_lib.SequenceFeatures(
            [sequence_embedding_column])
        embedding_lookup = dense_features(features)
        sequence_embedding_lookup = sequence_features(features)

        # Assert expected embedding variable and lookups.
        global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
        self.assertItemsEqual((
            'dense_features/aaa_embedding/embedding_weights:0',
            'sequence_features/bbb_embedding/embedding_weights:0',
        ), tuple([v.name for v in global_vars]))
        with _initialized_session():
            self.assertAllEqual(embedding_values, global_vars[0].eval())
            self.assertAllEqual(expected_lookups, embedding_lookup.eval())
            self.assertAllEqual(expected_lookups_sequence,
                                sequence_embedding_lookup[0].eval())
示例#29
0
    def __init__(self,
                 units,
                 hidden_units,
                 feature_columns,
                 activation_fn,
                 dropout,
                 batch_norm,
                 name=None,
                 **kwargs):
        super(_DNNModelV2, self).__init__(name=name, **kwargs)

        # Current DenseFeatures is not a pure Keras layer, as it still relies on
        # variable_scope and get_variables. Here we need to manually add 'dnn' (the
        # Keras model name) as prefix for backward compatibility.
        with ops.name_scope('dnn/input_from_feature_columns'
                            ) as input_feature_column_scope:
            layer_name = input_feature_column_scope + 'input_layer'
            if feature_column_lib.is_feature_column_v2(feature_columns):
                self._input_layer = feature_column_lib.DenseFeatures(
                    feature_columns=feature_columns, name=layer_name)
            else:
                raise ValueError(
                    'Received a feature column from TensorFlow v1, but this is a '
                    'TensorFlow v2 Estimator. Please either use v2 feature columns '
                    '(accessible via tf.feature_column.* in TF 2.x) with this '
                    'Estimator, or switch to a v1 Estimator for use with v1 feature '
                    'columns (accessible via tf.compat.v1.estimator.* and '
                    'tf.compat.v1.feature_column.*, respectively.')

        self._dropout = dropout
        self._batch_norm = batch_norm

        self._hidden_layers = []
        self._dropout_layers = []
        self._batch_norm_layers = []
        self._hidden_layer_scope_names = []
        for layer_id, num_hidden_units in enumerate(hidden_units):
            with ops.name_scope('hiddenlayer_%d' %
                                layer_id) as hidden_layer_scope:
                # Get scope name without the trailing slash.
                hidden_shared_name = _name_from_scope_name(hidden_layer_scope)
                hidden_layer = keras_core.Dense(
                    units=num_hidden_units,
                    activation=activation_fn,
                    kernel_initializer=init_ops.glorot_uniform_initializer(),
                    name=hidden_shared_name)
                self._hidden_layer_scope_names.append(hidden_shared_name)
                self._hidden_layers.append(hidden_layer)
                if self._dropout is not None:
                    dropout_layer = keras_core.Dropout(rate=self._dropout)
                    self._dropout_layers.append(dropout_layer)
                if self._batch_norm:
                    batch_norm_name = hidden_shared_name + '/batchnorm_%d' % layer_id
                    batch_norm_layer = keras_norm.BatchNormalization(
                        # The default momentum 0.99 actually crashes on certain
                        # problem, so here we use 0.999, which is the default of
                        # tf.contrib.layers.batch_norm.
                        momentum=0.999,
                        trainable=True,
                        name=batch_norm_name)
                    self._batch_norm_layers.append(batch_norm_layer)

        with ops.name_scope('logits') as logits_scope:
            logits_shared_name = _name_from_scope_name(logits_scope)
            self._logits_layer = keras_core.Dense(
                units=units,
                activation=None,
                kernel_initializer=init_ops.glorot_uniform_initializer(),
                name=logits_shared_name)
            self._logits_scope_name = logits_shared_name