def test_cpu_sparse_lookup_with_non_sparse_weights(self): mid_level = self._create_mid_level() features = self._get_sparse_tensors() weights = self._get_dense_tensors(dtype=dtypes.float32) with self.assertRaisesRegex( ValueError, 'but it does not match type of the input which is'): tpu_embedding_v2.cpu_embedding_lookup( features, weights=weights, tables=mid_level.embedding_tables, feature_config=self.feature_config)
def test_cpu_invalid_structure_for_features(self): mid_level = self._create_mid_level() # Remove one element of the tuple, self.feature_config has 3 so we need to # pass 3. features = tuple(self._get_sparse_tensors()[:2]) with self.assertRaises(ValueError): tpu_embedding_v2.cpu_embedding_lookup( features, weights=None, tables=mid_level.embedding_tables, feature_config=self.feature_config)
def test_cpu_dense_lookup_with_weights(self): mid_level = self._create_mid_level() features = self._get_dense_tensors() weights = self._get_dense_tensors(dtype=dtypes.float32) with self.assertRaisesRegex( ValueError, 'Weight specified for .*, but input is dense.'): tpu_embedding_v2.cpu_embedding_lookup( features, weights=weights, tables=mid_level.embedding_tables, feature_config=self.feature_config)
def test_cpu_sequence_lookup(self): feature_config = (tpu_embedding_v2_utils.FeatureConfig( table=self.table_video, name='watched', max_sequence_length=2), ) optimizer = tpu_embedding_v2_utils.SGD(learning_rate=0.1) mid_level = tpu_embedding_v2.TPUEmbedding( feature_config=feature_config, optimizer=optimizer) features = tuple(self._get_sparse_tensors()[:1]) with self.assertRaisesRegex( ValueError, 'Sequence features unsupported at this time.'): tpu_embedding_v2.cpu_embedding_lookup( features, weights=None, tables=mid_level.embedding_tables, feature_config=feature_config)
def test_cpu_sparse_lookup_with_weights(self): mid_level = self._create_mid_level() features = self._get_sparse_tensors() weights = self._get_sparse_tensors(dtype=dtypes.float32) results = tpu_embedding_v2.cpu_embedding_lookup( features, weights=weights, tables=mid_level.embedding_tables, feature_config=self.feature_config) weighted_sum = [] for feature, weight, config in zip(nest.flatten(features), nest.flatten(weights), self.feature_config): table = mid_level.embedding_tables[config.table].numpy() # Expand dims here needed to broadcast this multiplication properly. weight = np.expand_dims(weight.values.numpy(), axis=1) all_lookups = table[feature.values.numpy()] * weight # With row starts we can use reduceat in numpy. Get row starts from the # ragged tensor API. row_starts = ragged_tensor.RaggedTensor.from_sparse( feature).row_starts() row_starts = row_starts.numpy() weighted_sum.append(np.add.reduceat(all_lookups, row_starts)) if config.table.combiner == 'mean': weighted_sum[-1] /= np.add.reduceat(weight, row_starts) self.assertAllClose(results, nest.pack_sequence_as(results, weighted_sum))
def test_cpu_sequence_lookup_ragged(self): feature_config = ( tpu_embedding_v2_utils.FeatureConfig( table=self.table_video, name='watched', max_sequence_length=2),) optimizer = tpu_embedding_v2_utils.SGD(learning_rate=0.1) mid_level = tpu_embedding_v2.TPUEmbedding( feature_config=feature_config, optimizer=optimizer) features = self._get_ragged_tensors()[:1] result = tpu_embedding_v2.cpu_embedding_lookup( features, weights=None, tables=mid_level.embedding_tables, feature_config=feature_config) sparse_ver = features[0].to_sparse() golden = self._numpy_sequence_lookup( mid_level.embedding_tables[self.table_video].numpy(), sparse_ver.indices.numpy(), sparse_ver.values.numpy(), self.data_batch_size, feature_config[0].max_sequence_length, self.table_video.dim) self.assertAllClose(result[0], golden)
def test_cpu_high_dimensional_invalid_lookup_ragged(self): # Prod of output shape is not a factor of the data batch size. # An error will be raised in this case. feature_config = (tpu_embedding_v2_utils.FeatureConfig( table=self.table_user, name='friends', output_shape=[3]), ) optimizer = tpu_embedding_v2_utils.SGD(learning_rate=0.1) mid_level = tpu_embedding_v2.TPUEmbedding( feature_config=feature_config, optimizer=optimizer) features = self._get_ragged_tensors()[2:3] with self.assertRaisesRegex( ValueError, 'Output shape set in the FeatureConfig should be the factor'): tpu_embedding_v2.cpu_embedding_lookup( features, weights=None, tables=mid_level.embedding_tables, feature_config=feature_config)
def test_cpu_dense_lookup(self): mid_level = self._create_mid_level() features = self._get_dense_tensors() results = tpu_embedding_v2.cpu_embedding_lookup( features, weights=None, tables=mid_level.embedding_tables, feature_config=self.feature_config) all_lookups = [] for feature, config in zip(nest.flatten(features), self.feature_config): table = mid_level.embedding_tables[config.table].numpy() all_lookups.append(table[feature.numpy()]) self.assertAllClose(results, nest.pack_sequence_as(results, all_lookups))
def test_cpu_high_dimensional_lookup_ragged(self): feature_config = (tpu_embedding_v2_utils.FeatureConfig( table=self.table_user, name='friends', output_shape=[2, 2]), ) optimizer = tpu_embedding_v2_utils.SGD(learning_rate=0.1) mid_level = tpu_embedding_v2.TPUEmbedding( feature_config=feature_config, optimizer=optimizer) features = self._get_ragged_tensors()[2:3] result = tpu_embedding_v2.cpu_embedding_lookup( features, weights=None, tables=mid_level.embedding_tables, feature_config=feature_config) self.assertAllClose(result[0].shape, (2, 2, 2))
def test_cpu_high_dimensional_sequence_lookup_ragged(self): # Prod of output shape is a factor of the data batch size. # The divide result will be the sequence length. feature_config = (tpu_embedding_v2_utils.FeatureConfig( table=self.table_user, name='friends', output_shape=[2, 4]), ) optimizer = tpu_embedding_v2_utils.SGD(learning_rate=0.1) mid_level = tpu_embedding_v2.TPUEmbedding( feature_config=feature_config, optimizer=optimizer) features = self._get_ragged_tensors()[2:3] result = tpu_embedding_v2.cpu_embedding_lookup( features, weights=None, tables=mid_level.embedding_tables, feature_config=feature_config) self.assertAllClose(result[0].shape, (2, 4, 2))
def test_cpu_sparse_lookup(self): mid_level = self._create_mid_level() features = self._get_sparse_tensors() results = tpu_embedding_v2.cpu_embedding_lookup( features, weights=None, tables=mid_level.embedding_tables, feature_config=self.feature_config) reduced = [] for feature, config in zip(nest.flatten(features), self.feature_config): table = mid_level.embedding_tables[config.table].numpy() all_lookups = table[feature.values.numpy()] # With row starts we can use reduceat in numpy. Get row starts from the # ragged tensor API. ragged = ragged_tensor.RaggedTensor.from_sparse(feature) row_starts = ragged.row_starts().numpy() reduced.append(np.add.reduceat(all_lookups, row_starts)) if config.table.combiner == 'mean': # for mean, divide by the row lengths. reduced[-1] /= np.expand_dims(ragged.row_lengths().numpy(), axis=1) self.assertAllClose(results, nest.pack_sequence_as(results, reduced))
def serve_tensors(features): features = tpu_embedding_v2.cpu_embedding_lookup( features, None, self.cpu_mid_level.embedding_tables, self.cpu_mid_level._feature_config) return features[0]