def test_sequence_length_not_equal(self): """Tests that an error is raised when sequence lengths are not equal.""" # Input a with sequence_length = [2, 1] sparse_input_a = sparse_tensor.SparseTensorValue(indices=((0, 0), (0, 1), (1, 0)), values=(0., 1., 10.), dense_shape=(2, 2)) # Input b with sequence_length = [1, 1] sparse_input_b = sparse_tensor.SparseTensorValue(indices=((0, 0), (1, 0)), values=(1., 10.), dense_shape=(2, 2)) numeric_column_a = sfc.sequence_numeric_column('aaa') numeric_column_b = sfc.sequence_numeric_column('bbb') _, sequence_length = sfc.sequence_input_layer( features={ 'aaa': sparse_input_a, 'bbb': sparse_input_b, }, feature_columns=[numeric_column_a, numeric_column_b]) with monitored_session.MonitoredSession() as sess: with self.assertRaisesRegexp( errors.InvalidArgumentError, r'\[Condition x == y did not hold element-wise:\] ' r'\[x \(sequence_input_layer/aaa/sequence_length:0\) = \] \[2 1\] ' r'\[y \(sequence_input_layer/bbb/sequence_length:0\) = \] \[1 1\]' ): sess.run(sequence_length)
def test_sequence_example_into_input_layer(self): examples = [_make_sequence_example().SerializeToString()] * 100 ctx_cols, seq_cols = self._build_feature_columns() def _parse_example(example): ctx, seq = parsing_ops.parse_single_sequence_example( example, context_features=fc.make_parse_example_spec(ctx_cols), sequence_features=fc.make_parse_example_spec(seq_cols)) ctx.update(seq) return ctx ds = dataset_ops.Dataset.from_tensor_slices(examples) ds = ds.map(_parse_example) ds = ds.batch(20) # Test on a single batch features = ds.make_one_shot_iterator().get_next() # Tile the context features across the sequence features seq_layer, _ = sfc.sequence_input_layer(features, seq_cols) ctx_layer = fc.input_layer(features, ctx_cols) input_layer = sfc.concatenate_context_input(ctx_layer, seq_layer) rnn_layer = recurrent.RNN(recurrent.SimpleRNNCell(10)) output = rnn_layer(input_layer) with self.cached_session() as sess: sess.run(variables.global_variables_initializer()) features_r = sess.run(features) self.assertAllEqual(features_r['int_list'].dense_shape, [20, 3, 6]) output_r = sess.run(output) self.assertAllEqual(output_r.shape, [20, 10])
def test_numeric_column_multi_dim(self): """Tests sequence_input_layer for multi-dimensional numeric_column.""" sparse_input = sparse_tensor.SparseTensorValue( # example 0, values [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]] # example 1, [[[10., 11.], [12., 13.]]] indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)), values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), dense_shape=(2, 8)) # The output of numeric_column._get_dense_tensor should be flattened. expected_input_layer = [ [[0., 1., 2., 3.], [4., 5., 6., 7.]], [[10., 11., 12., 13.], [0., 0., 0., 0.]], ] expected_sequence_length = [2, 1] numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2)) input_layer, sequence_length = sfc.sequence_input_layer( features={'aaa': sparse_input}, feature_columns=[numeric_column]) with monitored_session.MonitoredSession() as sess: self.assertAllEqual(expected_input_layer, input_layer.eval(session=sess)) self.assertAllEqual(expected_sequence_length, sequence_length.eval(session=sess))
def test_sequence_length_not_equal(self): """Tests that an error is raised when sequence lengths are not equal.""" # Input a with sequence_length = [2, 1] sparse_input_a = sparse_tensor.SparseTensorValue( indices=((0, 0), (0, 1), (1, 0)), values=(0., 1., 10.), dense_shape=(2, 2)) # Input b with sequence_length = [1, 1] sparse_input_b = sparse_tensor.SparseTensorValue( indices=((0, 0), (1, 0)), values=(1., 10.), dense_shape=(2, 2)) numeric_column_a = sfc.sequence_numeric_column('aaa') numeric_column_b = sfc.sequence_numeric_column('bbb') _, sequence_length = sfc.sequence_input_layer( features={ 'aaa': sparse_input_a, 'bbb': sparse_input_b, }, feature_columns=[numeric_column_a, numeric_column_b]) with monitored_session.MonitoredSession() as sess: with self.assertRaisesRegexp( errors.InvalidArgumentError, r'\[Condition x == y did not hold element-wise:\] ' r'\[x \(sequence_input_layer/aaa/sequence_length:0\) = \] \[2 1\] ' r'\[y \(sequence_input_layer/bbb/sequence_length:0\) = \] \[1 1\]'): sess.run(sequence_length)
def test_shared_embedding_column_with_non_sequence_categorical(self): """Tests that error is raised for non-sequence shared embedding column.""" vocabulary_size = 3 sparse_input_a = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] indices=((0, 0), (1, 0), (1, 1)), values=(2, 0, 1), dense_shape=(2, 2)) sparse_input_b = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] indices=((0, 0), (1, 0), (1, 1)), values=(2, 0, 1), dense_shape=(2, 2)) categorical_column_a = fc.categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) categorical_column_b = fc.categorical_column_with_identity( key='bbb', num_buckets=vocabulary_size) shared_embedding_columns = fc.shared_embedding_columns( [categorical_column_a, categorical_column_b], dimension=2) with self.assertRaisesRegexp( ValueError, r'In embedding_column: aaa_shared_embedding\. categorical_column must ' r'be of type _SequenceCategoricalColumn to use sequence_input_layer\.'): _, _ = sfc.sequence_input_layer( features={ 'aaa': sparse_input_a, 'bbb': sparse_input_b }, feature_columns=shared_embedding_columns)
def test_numeric_column_multi_dim(self): """Tests sequence_input_layer for multi-dimensional numeric_column.""" sparse_input = sparse_tensor.SparseTensorValue( # example 0, values [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]] # example 1, [[[10., 11.], [12., 13.]]] indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)), values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), dense_shape=(2, 8)) # The output of numeric_column._get_dense_tensor should be flattened. expected_input_layer = [ [[0., 1., 2., 3.], [4., 5., 6., 7.]], [[10., 11., 12., 13.], [0., 0., 0., 0.]], ] expected_sequence_length = [2, 1] numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2)) input_layer, sequence_length = sfc.sequence_input_layer( features={'aaa': sparse_input}, feature_columns=[numeric_column]) with monitored_session.MonitoredSession() as sess: self.assertAllEqual(expected_input_layer, input_layer.eval(session=sess)) self.assertAllEqual( expected_sequence_length, sequence_length.eval(session=sess))
def rnn_logit_fn(features, mode): """Recurrent Neural Network logit_fn. Args: features: This is the first item returned from the `input_fn` passed to `train`, `evaluate`, and `predict`. This should be a single `Tensor` or `dict` of same. mode: Optional. Specifies if this training, evaluation or prediction. See `ModeKeys`. Returns: A tuple of `Tensor` objects representing the logits and the sequence length mask. """ # Can't import from tf.contrib at the module level, otherwise you # can hit a circular import issue if tf_estimator.contrib is # imported before tf.contrib. from tensorflow.contrib.feature_column.python.feature_column import sequence_feature_column as seq_fc # pylint: disable=g-import-not-at-top with variable_scope.variable_scope( 'sequence_input_layer', values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner): sequence_input, sequence_length = seq_fc.sequence_input_layer( features=features, feature_columns=sequence_feature_columns) summary.histogram('sequence_length', sequence_length) if context_feature_columns: context_input = feature_column_lib.input_layer( features=features, feature_columns=context_feature_columns) sequence_input = _concatenate_context_input( sequence_input, context_input) cell = rnn_cell_fn(mode) # Ignore output state. rnn_outputs, _ = rnn.dynamic_rnn(cell=cell, inputs=sequence_input, sequence_length=sequence_length, dtype=dtypes.float32, time_major=False) if not return_sequences: rnn_outputs = _select_last_activations(rnn_outputs, sequence_length) with variable_scope.variable_scope('logits', values=(rnn_outputs, )): logits = core_layers.dense( rnn_outputs, units=output_units, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer()) sequence_length_mask = array_ops.sequence_mask(sequence_length) return logits, sequence_length_mask
def rnn_logit_fn(features, mode): """Recurrent Neural Network logit_fn. Args: features: This is the first item returned from the `input_fn` passed to `train`, `evaluate`, and `predict`. This should be a single `Tensor` or `dict` of same. mode: Optional. Specifies if this training, evaluation or prediction. See `ModeKeys`. Returns: A `Tensor` representing the logits. """ with variable_scope.variable_scope( 'sequence_input_layer', values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner): sequence_input, sequence_length = seq_fc.sequence_input_layer( features=features, feature_columns=sequence_feature_columns) summary.histogram('sequence_length', sequence_length) if context_feature_columns: context_input = feature_column_lib.input_layer( features=features, feature_columns=context_feature_columns) sequence_input = _concatenate_context_input( sequence_input, context_input) cell = rnn_cell_fn(mode) # Ignore output state. rnn_outputs, _ = rnn.dynamic_rnn(cell=cell, inputs=sequence_input, sequence_length=sequence_length, dtype=dtypes.float32, time_major=False) if not return_sequences: rnn_outputs = _select_last_activations(rnn_outputs, sequence_length) with variable_scope.variable_scope('logits', values=(rnn_outputs, )): logits = core_layers.dense( rnn_outputs, units=output_units, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer()) return logits
def rnn_logit_fn(features, mode): """Recurrent Neural Network logit_fn. Args: features: This is the first item returned from the `input_fn` passed to `train`, `evaluate`, and `predict`. This should be a single `Tensor` or `dict` of same. mode: Optional. Specifies if this training, evaluation or prediction. See `ModeKeys`. Returns: A `Tensor` representing the logits. """ with variable_scope.variable_scope( 'sequence_input_layer', values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner): sequence_input, sequence_length = seq_fc.sequence_input_layer( features=features, feature_columns=sequence_feature_columns) summary.histogram('sequence_length', sequence_length) if context_feature_columns: context_input = feature_column_lib.input_layer( features=features, feature_columns=context_feature_columns) sequence_input = seq_fc.concatenate_context_input( context_input, sequence_input) cell = rnn_cell_fn(mode) # Ignore output state. rnn_outputs, _ = rnn.dynamic_rnn( cell=cell, inputs=sequence_input, sequence_length=sequence_length, dtype=dtypes.float32, time_major=False) last_activations = _select_last_activations(rnn_outputs, sequence_length) with variable_scope.variable_scope('logits', values=(rnn_outputs,)): logits = core_layers.dense( last_activations, units=output_units, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer()) return logits
def test_indicator_column(self): vocabulary_size_a = 3 sparse_input_a = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] indices=((0, 0), (1, 0), (1, 1)), values=(2, 0, 1), dense_shape=(2, 2)) vocabulary_size_b = 2 sparse_input_b = sparse_tensor.SparseTensorValue( # example 0, ids [1] # example 1, ids [1, 0] indices=((0, 0), (1, 0), (1, 1)), values=(1, 1, 0), dense_shape=(2, 2)) expected_input_layer = [ # example 0, ids_a [2], ids_b [1] [[0., 0., 1., 0., 1.], [0., 0., 0., 0., 0.]], # example 1, ids_a [0, 1], ids_b [1, 0] [[1., 0., 0., 0., 1.], [0., 1., 0., 1., 0.]], ] expected_sequence_length = [1, 2] categorical_column_a = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size_a) indicator_column_a = sfc._sequence_indicator_column( categorical_column_a) categorical_column_b = sfc.sequence_categorical_column_with_identity( key='bbb', num_buckets=vocabulary_size_b) indicator_column_b = sfc._sequence_indicator_column( categorical_column_b) input_layer, sequence_length = sfc.sequence_input_layer( features={ 'aaa': sparse_input_a, 'bbb': sparse_input_b, }, # Test that columns are reordered alphabetically. feature_columns=[indicator_column_b, indicator_column_a]) with monitored_session.MonitoredSession() as sess: self.assertAllEqual(expected_input_layer, input_layer.eval(session=sess)) self.assertAllEqual(expected_sequence_length, sequence_length.eval(session=sess))
def test_indicator_column(self): vocabulary_size_a = 3 sparse_input_a = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] indices=((0, 0), (1, 0), (1, 1)), values=(2, 0, 1), dense_shape=(2, 2)) vocabulary_size_b = 2 sparse_input_b = sparse_tensor.SparseTensorValue( # example 0, ids [1] # example 1, ids [1, 0] indices=((0, 0), (1, 0), (1, 1)), values=(1, 1, 0), dense_shape=(2, 2)) expected_input_layer = [ # example 0, ids_a [2], ids_b [1] [[0., 0., 1., 0., 1.], [0., 0., 0., 0., 0.]], # example 1, ids_a [0, 1], ids_b [1, 0] [[1., 0., 0., 0., 1.], [0., 1., 0., 1., 0.]], ] expected_sequence_length = [1, 2] categorical_column_a = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size_a) indicator_column_a = fc.indicator_column(categorical_column_a) categorical_column_b = sfc.sequence_categorical_column_with_identity( key='bbb', num_buckets=vocabulary_size_b) indicator_column_b = fc.indicator_column(categorical_column_b) input_layer, sequence_length = sfc.sequence_input_layer( features={ 'aaa': sparse_input_a, 'bbb': sparse_input_b, }, # Test that columns are reordered alphabetically. feature_columns=[indicator_column_b, indicator_column_a]) with monitored_session.MonitoredSession() as sess: self.assertAllEqual(expected_input_layer, input_layer.eval(session=sess)) self.assertAllEqual( expected_sequence_length, sequence_length.eval(session=sess))
def test_indicator_column_with_non_sequence_categorical(self): """Tests that error is raised for non-sequence categorical column.""" vocabulary_size = 3 sparse_input = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] indices=((0, 0), (1, 0), (1, 1)), values=(2, 0, 1), dense_shape=(2, 2)) categorical_column_a = fc.categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) indicator_column_a = fc.indicator_column(categorical_column_a) with self.assertRaisesRegexp( ValueError, r'In indicator_column: aaa_indicator\. categorical_column must be of ' r'type _SequenceCategoricalColumn to use sequence_input_layer\.'): _, _ = sfc.sequence_input_layer( features={'aaa': sparse_input}, feature_columns=[indicator_column_a])
def test_numeric_column(self): sparse_input = sparse_tensor.SparseTensorValue( # example 0, values [[0.], [1]] # example 1, [[10.]] indices=((0, 0), (0, 1), (1, 0)), values=(0., 1., 10.), dense_shape=(2, 2)) expected_input_layer = [ [[0.], [1.]], [[10.], [0.]], ] expected_sequence_length = [2, 1] numeric_column = sfc.sequence_numeric_column('aaa') input_layer, sequence_length = sfc.sequence_input_layer( features={'aaa': sparse_input}, feature_columns=[numeric_column]) with monitored_session.MonitoredSession() as sess: self.assertAllEqual(expected_input_layer, input_layer.eval(session=sess)) self.assertAllEqual(expected_sequence_length, sequence_length.eval(session=sess))
def test_numeric_column(self): sparse_input = sparse_tensor.SparseTensorValue( # example 0, values [[0.], [1]] # example 1, [[10.]] indices=((0, 0), (0, 1), (1, 0)), values=(0., 1., 10.), dense_shape=(2, 2)) expected_input_layer = [ [[0.], [1.]], [[10.], [0.]], ] expected_sequence_length = [2, 1] numeric_column = sfc.sequence_numeric_column('aaa') input_layer, sequence_length = sfc.sequence_input_layer( features={'aaa': sparse_input}, feature_columns=[numeric_column]) with monitored_session.MonitoredSession() as sess: self.assertAllEqual(expected_input_layer, input_layer.eval(session=sess)) self.assertAllEqual( expected_sequence_length, sequence_length.eval(session=sess))
def test_embedding_column(self): vocabulary_size = 3 sparse_input_a = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] indices=((0, 0), (1, 0), (1, 1)), values=(2, 0, 1), dense_shape=(2, 2)) sparse_input_b = sparse_tensor.SparseTensorValue( # example 0, ids [1] # example 1, ids [2, 0] indices=((0, 0), (1, 0), (1, 1)), values=(1, 2, 0), dense_shape=(2, 2)) embedding_dimension_a = 2 embedding_values_a = ( (1., 2.), # id 0 (3., 4.), # id 1 (5., 6.) # id 2 ) embedding_dimension_b = 3 embedding_values_b = ( (11., 12., 13.), # id 0 (14., 15., 16.), # id 1 (17., 18., 19.) # id 2 ) def _get_initializer(embedding_dimension, embedding_values): def _initializer(shape, dtype, partition_info): self.assertAllEqual((vocabulary_size, embedding_dimension), shape) self.assertEqual(dtypes.float32, dtype) self.assertIsNone(partition_info) return embedding_values return _initializer expected_input_layer = [ # example 0, ids_a [2], ids_b [1] [[5., 6., 14., 15., 16.], [0., 0., 0., 0., 0.]], # example 1, ids_a [0, 1], ids_b [2, 0] [[1., 2., 17., 18., 19.], [3., 4., 11., 12., 13.]], ] expected_sequence_length = [1, 2] categorical_column_a = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) embedding_column_a = sfc._sequence_embedding_column( categorical_column_a, dimension=embedding_dimension_a, initializer=_get_initializer(embedding_dimension_a, embedding_values_a)) categorical_column_b = sfc.sequence_categorical_column_with_identity( key='bbb', num_buckets=vocabulary_size) embedding_column_b = sfc._sequence_embedding_column( categorical_column_b, dimension=embedding_dimension_b, initializer=_get_initializer(embedding_dimension_b, embedding_values_b)) input_layer, sequence_length = sfc.sequence_input_layer( features={ 'aaa': sparse_input_a, 'bbb': sparse_input_b, }, # Test that columns are reordered alphabetically. feature_columns=[embedding_column_b, embedding_column_a]) global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) self.assertItemsEqual( ('sequence_input_layer/aaa_embedding/embedding_weights:0', 'sequence_input_layer/bbb_embedding/embedding_weights:0'), tuple([v.name for v in global_vars])) with monitored_session.MonitoredSession() as sess: self.assertAllEqual(embedding_values_a, global_vars[0].eval(session=sess)) self.assertAllEqual(embedding_values_b, global_vars[1].eval(session=sess)) self.assertAllEqual(expected_input_layer, input_layer.eval(session=sess)) self.assertAllEqual(expected_sequence_length, sequence_length.eval(session=sess))
def test_embedding_column(self): vocabulary_size = 3 sparse_input_a = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] indices=((0, 0), (1, 0), (1, 1)), values=(2, 0, 1), dense_shape=(2, 2)) sparse_input_b = sparse_tensor.SparseTensorValue( # example 0, ids [1] # example 1, ids [2, 0] indices=((0, 0), (1, 0), (1, 1)), values=(1, 2, 0), dense_shape=(2, 2)) embedding_dimension_a = 2 embedding_values_a = ( (1., 2.), # id 0 (3., 4.), # id 1 (5., 6.) # id 2 ) embedding_dimension_b = 3 embedding_values_b = ( (11., 12., 13.), # id 0 (14., 15., 16.), # id 1 (17., 18., 19.) # id 2 ) def _get_initializer(embedding_dimension, embedding_values): def _initializer(shape, dtype, partition_info): self.assertAllEqual((vocabulary_size, embedding_dimension), shape) self.assertEqual(dtypes.float32, dtype) self.assertIsNone(partition_info) return embedding_values return _initializer expected_input_layer = [ # example 0, ids_a [2], ids_b [1] [[5., 6., 14., 15., 16.], [0., 0., 0., 0., 0.]], # example 1, ids_a [0, 1], ids_b [2, 0] [[1., 2., 17., 18., 19.], [3., 4., 11., 12., 13.]], ] expected_sequence_length = [1, 2] categorical_column_a = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) embedding_column_a = fc.embedding_column( categorical_column_a, dimension=embedding_dimension_a, initializer=_get_initializer(embedding_dimension_a, embedding_values_a)) categorical_column_b = sfc.sequence_categorical_column_with_identity( key='bbb', num_buckets=vocabulary_size) embedding_column_b = fc.embedding_column( categorical_column_b, dimension=embedding_dimension_b, initializer=_get_initializer(embedding_dimension_b, embedding_values_b)) input_layer, sequence_length = sfc.sequence_input_layer( features={ 'aaa': sparse_input_a, 'bbb': sparse_input_b, }, # Test that columns are reordered alphabetically. feature_columns=[embedding_column_b, embedding_column_a]) global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) self.assertItemsEqual( ('sequence_input_layer/aaa_embedding/embedding_weights:0', 'sequence_input_layer/bbb_embedding/embedding_weights:0'), tuple([v.name for v in global_vars])) with monitored_session.MonitoredSession() as sess: self.assertAllEqual(embedding_values_a, global_vars[0].eval(session=sess)) self.assertAllEqual(embedding_values_b, global_vars[1].eval(session=sess)) self.assertAllEqual(expected_input_layer, input_layer.eval(session=sess)) self.assertAllEqual( expected_sequence_length, sequence_length.eval(session=sess))