def test_invalid_cases(self, shared): # Inputs. input_sparse_tensor = sparse_tensor.SparseTensorValue( indices=((0, 0), (1, 0), (1, 1), (1, 4)), values=(2, 0, 1, 3), dense_shape=(2, 5)) input_features = {'inp': input_sparse_tensor} # Build columns. categorical_column_input = fc_lib.categorical_column_with_identity( key='inp', num_buckets=3) # Training on TPU with cpu embedding lookups is not supported. if shared: embedding_column = tpu_fc.shared_embedding_columns_v2( [categorical_column_input], dimension=2, embedding_lookup_device='cpu', tensor_core_shape=[None, 3]) else: embedding_column = tpu_fc.embedding_column_v2( categorical_column_input, dimension=2, embedding_lookup_device='cpu', tensor_core_shape=[None, 3]) dense_features = fc_lib.DenseFeatures(embedding_column) with self.assertRaisesRegexp( ValueError, r'.*embedding_lookup_device=\"cpu\" during training is not'): dense_features(input_features) # Inference on with TPU Embedding Hardware is not supported. if shared: embedding_column = tpu_fc.shared_embedding_columns_v2( [categorical_column_input], dimension=2, embedding_lookup_device='tpu_embedding_core', tensor_core_shape=[None, 3]) else: embedding_column = tpu_fc.embedding_column_v2( categorical_column_input, dimension=2, embedding_lookup_device='tpu_embedding_core', tensor_core_shape=[None, 3]) context = tpu._TPUInferenceContext('tpu_inference') context.Enter() dense_features = fc_lib.DenseFeatures(embedding_column) with self.assertRaisesRegexp( ValueError, r'Using embedding_lookup_device=tpu_embedding_core during inference is ' ): dense_features(input_features) context.Exit()
def test_deepcopy(self): categorical_column = fc_lib.categorical_column_with_identity( key='aaa', num_buckets=3) embedding_column = tpu_fc.embedding_column_v2(categorical_column, dimension=2) embedding_column_copy = copy.deepcopy(embedding_column) self.assertEqual(embedding_column.dimension, embedding_column_copy.dimension) self.assertEqual(embedding_column._max_sequence_length, embedding_column_copy._max_sequence_length)
def test_defaults(self): categorical_column = fc_lib.categorical_column_with_identity( key='aaa', num_buckets=3) embedding_dimension = 2 embedding_column = tpu_fc.embedding_column_v2( categorical_column, dimension=embedding_dimension) # Can't test default initializer as it's a random function. self.assertIs(categorical_column, embedding_column.categorical_column) self.assertEqual(embedding_dimension, embedding_column.dimension) self.assertEqual('mean', embedding_column.combiner) self.assertEqual('aaa_embedding', embedding_column.name) self.assertEqual((embedding_dimension,), embedding_column.variable_shape)
def get_feature_columns(): initializer = tf.zeros_initializer() column = fc_lib.categorical_column_with_identity(key=KEY_NAME, num_buckets=BUCKET_SIZE) embedding_fc = tpu_fc.embedding_column_v2(column, dimension=EMBEDDING_DIM, combiner='mean', initializer=initializer) all_fc = [embedding_fc] return all_fc
def test_all_constructor_args(self): categorical_column = fc_lib.categorical_column_with_identity( key='aaa', num_buckets=3) embedding_dimension = 2 embedding_column = tpu_fc.embedding_column_v2( categorical_column, dimension=embedding_dimension, combiner='my_combiner', initializer=lambda: 'my_initializer') self.assertIs(categorical_column, embedding_column.categorical_column) self.assertEqual(embedding_dimension, embedding_column.dimension) self.assertEqual('my_combiner', embedding_column.combiner) self.assertEqual('my_initializer', embedding_column.initializer()) self.assertEqual('aaa_embedding', embedding_column.name) self.assertEqual((embedding_dimension, ), embedding_column.variable_shape) self.assertEqual({'aaa': parsing_ops.VarLenFeature(dtypes.int64)}, embedding_column._parse_example_spec)
def test_feature_layer_cpu(self): # Inputs. vocabulary_size = 3 sparse_input = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] # example 2, ids [] # example 3, ids [1] indices=((0, 0), (1, 0), (1, 1), (3, 0)), values=(2, 0, 1, 1), dense_shape=(4, 2)) # Embedding variable. embedding_dimension = 2 embedding_values = ( (1., 2.), # id 0 (3., 5.), # id 1 (7., 11.) # id 2 ) def _initializer(shape, dtype, partition_info=None): self.assertAllEqual((vocabulary_size, embedding_dimension), shape) self.assertEqual(dtypes.float32, dtype) self.assertIsNone(partition_info) return embedding_values # Expected lookup result, using combiner='mean'. expected_lookups = ( # example 0, ids [2], embedding = [7, 11] (7., 11.), # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] (2., 3.5), # example 2, ids [], embedding = [0, 0] (0., 0.), # example 3, ids [1], embedding = [3, 5] (3., 5.), ) expected_lookups_sequence = ( # example 0, ids [2], embedding = [[7, 11], [0, 0]] ( (7., 11.), (0., 0.), ), # example 1, ids [0, 1], embedding = [[1, 2], [3. 5]] ( (1., 2.), (3., 5.), ), # example 2, ids [], embedding = [0, 0] ( (0., 0.), (0., 0.), ), # example 3, ids [1], embedding = [3, 5] ( (3., 5.), (0., 0.), ), ) # Build columns. categorical_column = fc_lib.categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) sequence_categorical_column = ( fc_lib.sequence_categorical_column_with_identity( key='bbb', num_buckets=vocabulary_size)) embedding_column = tpu_fc.embedding_column_v2( categorical_column, dimension=embedding_dimension, initializer=_initializer) sequence_embedding_column = tpu_fc.embedding_column_v2( sequence_categorical_column, dimension=embedding_dimension, initializer=_initializer, max_sequence_length=2) # Provide sparse input and get dense result. features = {'aaa': sparse_input, 'bbb': sparse_input} dense_features = fc_lib.DenseFeatures([embedding_column]) sequence_features = fc_lib.SequenceFeatures( [sequence_embedding_column]) embedding_lookup = dense_features(features) sequence_embedding_lookup = sequence_features(features) # Assert expected embedding variable and lookups. global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) self.assertItemsEqual(( 'dense_features/aaa_embedding/embedding_weights:0', 'sequence_features/bbb_embedding/embedding_weights:0', ), tuple([v.name for v in global_vars])) with _initialized_session(): self.assertAllEqual(embedding_values, global_vars[0].eval()) self.assertAllEqual(expected_lookups, embedding_lookup.eval()) self.assertAllEqual(expected_lookups_sequence, sequence_embedding_lookup[0].eval())
def test_dense_embedding_lookup(self, shared, combiner): # Inputs. vocabulary_size = 3 input_sparse_tensor = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1, 3] indices=((0, 0), (1, 0), (1, 1), (1, 4)), values=(2, 0, 1, 3), dense_shape=(2, 5)) input_features = {'inp': input_sparse_tensor} # Embedding variable. embedding_dimension = 2 embedding_values = ( (1., 2.), # id 0 (3., 5.), # id 1 (7., 11.), # id 2 (13., 17.) # id 3 ) def _initializer(shape, dtype, partition_info=None): self.assertAllEqual((vocabulary_size, embedding_dimension), shape) self.assertEqual(dtypes.float32, dtype) self.assertIsNone(partition_info) return embedding_values # Build columns. categorical_column_input = fc_lib.categorical_column_with_identity( key='inp', num_buckets=vocabulary_size) # Set tensor_core_shape to be [None, 20] to ensure some padding and # dynamic batch size. if shared: embedding_column = tpu_fc.shared_embedding_columns_v2( [categorical_column_input], dimension=embedding_dimension, initializer=_initializer, combiner=combiner, embedding_lookup_device='tpu_tensor_core', tensor_core_shape=[None, 3]) else: embedding_column = tpu_fc.embedding_column_v2( categorical_column_input, dimension=embedding_dimension, initializer=_initializer, combiner=combiner, embedding_lookup_device='tpu_tensor_core', tensor_core_shape=[None, 3]) # Run in TPUInferenceContext so that we hit the intended densification case. context = tpu._TPUInferenceContext('tpu_inference') context.Enter() dense_features = fc_lib.DenseFeatures(embedding_column) # Sqrtn combiner not supported for now. if combiner == 'sqrtn': with self.assertRaisesRegexp( ValueError, 'Dense TPU Embedding does not support combiner'): embedding_lookup = dense_features(input_features) return if combiner == 'mean': expected_lookups = ( # example 0: (7., 11.), # ids [2], embedding = [7, 11] # example 1: (2., 3.5 ), # ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] ) elif combiner == 'sum': expected_lookups = ( # example 0: (7., 11.), # ids [2], embedding = [7, 11] # example 1: (4., 7 ), # ids [0, 1], embedding = sum([1, 2] + [3, 5]) = [4, 7] ) embedding_lookup = dense_features(input_features) # Assert expected embedding variable and lookups. global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) if shared: self.assertCountEqual(('inp_shared_embedding:0', ), tuple([v.name for v in global_vars])) else: self.assertCountEqual( ('dense_features/inp_embedding/embedding_weights:0', ), tuple([v.name for v in global_vars])) embedding_var = global_vars[0] with _initialized_session(): self.assertAllEqual(embedding_values, embedding_var.eval()) eval_res = embedding_lookup.eval() self.assertAllEqual(expected_lookups, eval_res) context.Exit()
def test_empty_row(self): # Inputs. vocabulary_size = 3 input_sparse_tensor = sparse_tensor.SparseTensorValue( # example 0, ids [] # example 1, ids [0, 1, 3] indices=((1, 0), (1, 1), (1, 4)), values=(0, 1, 3), dense_shape=(2, 5)) input_features = {'inp': input_sparse_tensor} # Embedding variable. embedding_dimension = 2 embedding_values = ( (1., 2.), # id 0 (3., 5.), # id 1 (7., 11.), # id 2 (13., 17.) # id 3 ) def _initializer(shape, dtype, partition_info=None): self.assertAllEqual((vocabulary_size, embedding_dimension), shape) self.assertEqual(dtypes.float32, dtype) self.assertIsNone(partition_info) return embedding_values # Build columns. categorical_column_input = fc_lib.categorical_column_with_identity( key='inp', num_buckets=vocabulary_size) # Set tensor_core_shape to be [None, 20] to ensure some padding and # dynamic batch size. embedding_column = tpu_fc.embedding_column_v2( categorical_column_input, dimension=embedding_dimension, initializer=_initializer, combiner='mean', embedding_lookup_device='tpu_tensor_core', tensor_core_shape=[None, 3]) # Run in TPUContexts so that we hit the intended densification case. context = tpu._TPUInferenceContext('tpu_inference') context.Enter() with tpu_function.tpu_shard_context(1): dense_features = fc_lib.DenseFeatures(embedding_column) expected_lookups = ( # example 0: (0., 0.), # ids [], embedding = [0, 0] # example 1: (2., 3.5 ), # ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] ) embedding_lookup = dense_features(input_features) # Assert expected embedding variable and lookups. global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) self.assertCountEqual( ('dense_features/inp_embedding/embedding_weights:0', ), tuple([v.name for v in global_vars])) embedding_var = global_vars[0] with _initialized_session(): self.assertAllEqual(embedding_values, embedding_var) eval_res = embedding_lookup.eval() self.assertAllEqual(expected_lookups, eval_res) context.Exit()