def _get_dense_tensor_internal(self, transformation_cache, state_manager): """Private method that follows _get_dense_tensor_internal.""" if (tpu.under_tpu_inference_context() and self._embedding_lookup_device == EmbeddingDevice.TPU_EMBEDDING_CORE): raise ValueError( 'Using embedding_lookup_device=tpu_embedding_core ' 'during inference is not supported.') if self._embedding_lookup_device == EmbeddingDevice.CPU: if tpu.under_tpu_inference_context(): return super(_TPUSharedDeviceSpecificEmbeddingColumnV2, self)._get_dense_tensor_internal( transformation_cache, state_manager) else: raise ValueError( 'Using TPUSharedEmbeddingColumn with ' 'embedding_lookup_device="cpu" during training is not supported.' ) sparse_tensor = transformation_cache.get(self.categorical_column.name, state_manager) # Use outside compile to densify and pad the input tensors. def host_computation(): return pad_sparse_embedding_lookup_indices( sparse_tensor, self._tensor_core_shape[1]) values, mask = tpu.outside_compilation(host_computation) # Do a dense embedding lookup on TensorCore. embedding_weights = self.shared_embedding_column_creator.embedding_weights embedding = sparse_embedding_aggregate_slice(embedding_weights, (values, mask), self.get_combiner()) return embedding
def _check_invalid_cases(embedding_lookup_device): """Checks for invalid embedding_lookup_device configurations.""" if (tpu.under_tpu_inference_context() and embedding_lookup_device == EmbeddingDevice.TPU_EMBEDDING_CORE): raise ValueError( 'Using embedding_lookup_device=tpu_embedding_core during inference ' 'is not supported.') if embedding_lookup_device == EmbeddingDevice.CPU: if not tpu.under_tpu_inference_context(): raise ValueError( 'Using TPUEmbeddingColumn with embedding_lookup_device="cpu" ' 'during training is not supported.')
def get_dense_tensor(self, transformation_cache, state_manager): """Private method that follows get_dense_tensor.""" # If we aren't inferencing on TensorCore, just delegate to parent. if not tpu.under_tpu_inference_context( ) or not self._tensor_core_shape: return super(_TPUDeviceSpecificEmbeddingColumnV2, self).get_dense_tensor(transformation_cache, state_manager) sparse_tensor = transformation_cache.get(self.categorical_column.name, state_manager) # Use outside compile to densify and pad the input tensors. def host_computation(): return pad_sparse_embedding_lookup_indices( sparse_tensor, self._tensor_core_shape[1]) values, mask = tpu.outside_compilation(host_computation) # Do a dense embedding lookup on TensorCore. embedding_weights = state_manager.get_variable(self, 'embedding_weights') embedding = sparse_embedding_aggregate_slice(embedding_weights, (values, mask), self.get_combiner()) return embedding
def create_state(self, state_manager): if (tpu.under_tpu_inference_context() and self._embedding_lookup_device == EmbeddingDevice.TPU_EMBEDDING_CORE): raise ValueError( 'Using embedding_lookup_device=tpu_embedding_core during inference ' 'is not supported.') if self._embedding_lookup_device == EmbeddingDevice.CPU: if tpu.under_tpu_inference_context(): return fc_lib.EmbeddingColumn.create_state(self, state_manager) else: raise ValueError( 'Using TPUEmbeddingColumn with embedding_lookup_device="cpu" ' 'during training is not supported.') return super(_TPUDeviceSpecificEmbeddingColumnV2, self).create_state(state_manager)
def _get_sequence_dense_tensor( self, inputs, weight_collections=None, trainable=None): if tpu.under_tpu_inference_context(): if self._partition_strategy == 'mod': raise NotImplementedError('Export saved model does not support MOD ' 'sharded embeddings.') def host_computation(): return fc._SharedEmbeddingColumn._get_sequence_dense_tensor( self, inputs, weight_collections, trainable) return tpu.outside_compilation(host_computation) if _is_running_on_cpu(): if self._partition_strategy == 'mod': raise NotImplementedError('TPUEmbedding on CPU does not support MOD ' 'sharded embeddings.') return fc._SharedEmbeddingColumn._get_sequence_dense_tensor( self, inputs, weight_collections, trainable) tensor = inputs.get(self.get_feature_key_name()) tensor_lengths = inputs.get(self.get_sequence_length_feature_key_name()) # Add to collection for _create_tpu_embedding_variables_and_ops _record_variable_scope_and_name( self.get_embedding_var_name(), 'embedding_weights', is_shared_embedding=True) return fc._SequenceDenseColumn.TensorSequenceLengthPair( dense_tensor=tensor, sequence_length=tensor_lengths)
def get_sequence_dense_tensor( self, transformation_cache, state_manager): if tpu.under_tpu_inference_context(): def host_computation(): return fc_lib.SharedEmbeddingColumn.get_sequence_dense_tensor( self, transformation_cache, state_manager) return tpu.outside_compilation(host_computation) if _is_running_on_cpu(): return fc_lib.SharedEmbeddingColumn.get_sequence_dense_tensor( self, transformation_cache, state_manager) tensor = fc_lib.SharedEmbeddingColumn._dense_tensor_internal( self, transformation_cache, state_manager) tensor_lengths = transformation_cache.get( self.get_sequence_length_feature_key_name(), state_manager) # FeatureTransformationCache expands rank 1 tensors (like sequence length) # to rank 2. We need to undo this to match the standard CPU sequence # embedding. tensor_lengths = array_ops.squeeze(tensor_lengths, -1) return fc_lib.SequenceDenseColumn.TensorSequenceLengthPair( dense_tensor=tensor, sequence_length=tensor_lengths)
def _get_dense_tensor(self, inputs, weight_collections=None, trainable=None): if tpu.under_tpu_inference_context(): if self._partition_strategy == 'mod': raise NotImplementedError('Export saved model does not support MOD ' 'sharded embeddings.') def host_computation(): return fc._EmbeddingColumn._get_dense_tensor( self, inputs, weight_collections, trainable) return tpu.outside_compilation(host_computation) if _is_running_on_cpu(): if self._partition_strategy == 'mod': raise NotImplementedError('TPUEmbedding on CPU does not support MOD ' 'sharded embeddings.') return fc._EmbeddingColumn._get_dense_tensor( self, inputs, weight_collections, trainable) # TPU mode # Get the embeddings from the LazyBuilder. tensor = inputs.get(self.get_feature_key_name()) # Add to collection for _create_tpu_embedding_variables_and_ops _record_variable_scope_and_name(self.get_embedding_var_name(), 'embedding_weights') return tensor
def _get_dense_tensor(self, inputs, weight_collections=None, trainable=None): if tpu.under_tpu_inference_context(): def host_computation(): return fc._SharedEmbeddingColumn._get_dense_tensor( self, inputs, weight_collections, trainable) return tpu.outside_compilation(host_computation) if _is_running_on_cpu(): return fc._SharedEmbeddingColumn._get_dense_tensor( self, inputs, weight_collections, trainable) # TPU mode # Get the embeddings from the LazyBuilder. tensor = inputs.get(self.get_feature_key_name()) # Add to collection for _create_tpu_embedding_variables_and_ops _record_variable_scope_and_name(self.get_embedding_var_name(), 'embedding_weights', is_shared_embedding=True) return tensor
def _get_sequence_dense_tensor( self, inputs, weight_collections=None, trainable=None): if tpu.under_tpu_inference_context(): def host_computation(): return fc_lib.EmbeddingColumn._get_sequence_dense_tensor( self, inputs, weight_collections, trainable) return tpu.outside_compilation(host_computation) if _is_running_on_cpu(): return fc_lib.EmbeddingColumn._get_sequence_dense_tensor( self, inputs, weight_collections, trainable) tensor = inputs.get(self.get_feature_key_name()) tensor_lengths = inputs.get(self.get_sequence_length_feature_key_name()) # inputs is a _LazyBuilder and for rank 1 tensors, it calls expand_dims(-1). # We need to undo this to match the standard CPU sequence embedding. tensor_lengths = array_ops.squeeze(tensor_lengths, -1) # Add to collection for _create_tpu_embedding_variables_and_ops _record_variable_scope_and_name(self.get_embedding_var_name(), 'embedding_weights') return fc_lib.SequenceDenseColumn.TensorSequenceLengthPair( dense_tensor=tensor, sequence_length=tensor_lengths)
def _get_sequence_dense_tensor(self, inputs, weight_collections=None, trainable=None): if tpu.under_tpu_inference_context(): def host_computation(): return fc._SharedEmbeddingColumn._get_sequence_dense_tensor( self, inputs, weight_collections, trainable) return tpu.outside_compilation(host_computation) if _is_running_on_cpu(): return fc._SharedEmbeddingColumn._get_sequence_dense_tensor( self, inputs, weight_collections, trainable) tensor = inputs.get(self.get_feature_key_name()) tensor_lengths = inputs.get( self.get_sequence_length_feature_key_name()) # Add to collection for _create_tpu_embedding_variables_and_ops _record_variable_scope_and_name(self.get_embedding_var_name(), 'embedding_weights', is_shared_embedding=True) return fc._SequenceDenseColumn.TensorSequenceLengthPair( dense_tensor=tensor, sequence_length=tensor_lengths)
def _get_sequence_dense_tensor( self, inputs, weight_collections=None, trainable=None): if tpu.under_tpu_inference_context(): if self._partition_strategy == 'mod': raise NotImplementedError('Export saved model does not support MOD ' 'sharded embeddings.') def host_computation(): return fc._EmbeddingColumn._get_sequence_dense_tensor( self, inputs, weight_collections, trainable) return tpu.outside_compilation(host_computation) if _is_running_on_cpu(): if self._partition_strategy == 'mod': raise NotImplementedError('TPUEmbedding on CPU does not support MOD ' 'sharded embeddings.') return fc._EmbeddingColumn._get_sequence_dense_tensor( self, inputs, weight_collections, trainable) tensor = inputs.get(self.get_feature_key_name()) tensor_lengths = inputs.get(self.get_sequence_length_feature_key_name()) # inputs is a _LazyBuilder and for rank 1 tensors, it calls expand_dims(-1). # We need to undo this to match the standard CPU sequence embedding. tensor_lengths = array_ops.squeeze(tensor_lengths, -1) # Add to collection for _create_tpu_embedding_variables_and_ops _record_variable_scope_and_name(self.get_embedding_var_name(), 'embedding_weights') return fc._SequenceDenseColumn.TensorSequenceLengthPair( dense_tensor=tensor, sequence_length=tensor_lengths)
def _get_dense_tensor_internal(self, transformation_cache, state_manager): if tpu.under_tpu_inference_context(): def host_computation(): return fc_lib.SharedEmbeddingColumn._get_dense_tensor_internal( self, transformation_cache, state_manager) return tpu.outside_compilation(host_computation) if _is_running_on_cpu(): return fc_lib.SharedEmbeddingColumn._get_dense_tensor_internal( self, transformation_cache, state_manager) # TPU mode # Get the embeddings from the FeatureTransformationCache. tensor = transformation_cache.get(self.get_feature_key_name(), state_manager) # Add to collection for _create_tpu_embedding_variables_and_ops # Note that in Feature Column V2, shared embeddings have no scope. _record_variable_scope_and_name( self.get_embedding_var_name(), self.shared_embedding_column_creator._name, is_shared_embedding=True) return tensor
def _get_dense_tensor(self, inputs, weight_collections=None, trainable=None): _check_invalid_cases(self._embedding_lookup_device) # CPU Case. is_cpu = self._embedding_lookup_device == EmbeddingDevice.CPU is_cpu = is_cpu or _is_running_on_cpu() if is_cpu: return super(_TPUDeviceSpecificEmbeddingColumnV2, self)._get_dense_tensor(inputs, weight_collections, trainable) # TPU_EMBEDDING_CORE case. elif self._embedding_lookup_device == EmbeddingDevice.TPU_EMBEDDING_CORE: return super(_TPUDeviceSpecificEmbeddingColumnV2, self)._get_dense_tensor(inputs, weight_collections, trainable) # TPU_EMBEDDING_CORE cases. if tpu.under_tpu_inference_context(): # For inference, use outside compile to densify and pad the input tensors. sparse_tensor = inputs.get(self.get_feature_key_name()) def host_computation(): return pad_sparse_embedding_lookup_indices( sparse_tensor, self._tensor_core_shape[1]) values, mask = tpu.outside_compilation(host_computation) else: # For training, the inputs should already have been densified and padded. values = inputs.get(self.get_feature_key_name()) mask = inputs.get(self.get_feature_key_name() + _TENSOR_CORE_MASK_KEY_SUFFIX) embedding_shape = (self.categorical_column._num_buckets, self.dimension ) # pylint: disable=protected-access if (weight_collections and ops.GraphKeys.GLOBAL_VARIABLES not in weight_collections): weight_collections.append(ops.GraphKeys.GLOBAL_VARIABLES) embedding_weights = variable_scope.get_variable( name='embedding_weights', shape=embedding_shape, dtype=dtypes.float32, initializer=self.initializer, trainable=self.trainable and trainable, collections=weight_collections) return sparse_embedding_aggregate_slice(embedding_weights, (values, mask), self.get_combiner())
def get_dense_tensor(self, transformation_cache, state_manager): if tpu.under_tpu_inference_context(): def host_computation(): return fc_lib.EmbeddingColumn.get_dense_tensor( self, transformation_cache, state_manager) return tpu.outside_compilation(host_computation) if _is_running_on_cpu(): return fc_lib.EmbeddingColumn.get_dense_tensor( self, transformation_cache, state_manager) # TPU mode # Get the embeddings from the FeatureTransformationCache. tensor = transformation_cache.get(self.get_feature_key_name(), state_manager) return tensor
def _get_dense_tensor(self, inputs, weight_collections=None, trainable=None): if tpu.under_tpu_inference_context(): def host_computation(): return fc._EmbeddingColumn._get_dense_tensor( self, inputs, weight_collections, trainable) return tpu.outside_compilation(host_computation) if _is_running_on_cpu(): return fc._EmbeddingColumn._get_dense_tensor( self, inputs, weight_collections, trainable) # TPU mode # Get the embeddings from the LazyBuilder. tensor = inputs.get(self.get_feature_key_name()) # Add to collection for _create_tpu_embedding_variables_and_ops _record_variable_scope_and_name(self.get_embedding_var_name(), 'embedding_weights') return tensor
def _get_dense_tensor_internal(self, transformation_cache, state_manager): """Private method that follows _get_dense_tensor_internal.""" _check_invalid_cases(self._embedding_lookup_device) # CPU Case. is_cpu = self._embedding_lookup_device == EmbeddingDevice.CPU is_cpu = is_cpu or _is_running_on_cpu() if is_cpu: return super(_TPUSharedDeviceSpecificEmbeddingColumnV2, self)._get_dense_tensor_internal( transformation_cache, state_manager) # TPU_EMBEDDING_CORE case. if self._embedding_lookup_device == EmbeddingDevice.TPU_EMBEDDING_CORE: return super(_TPUSharedDeviceSpecificEmbeddingColumnV2, self)._get_dense_tensor_internal( transformation_cache, state_manager) # TPU_EMBEDDING_CORE cases. if tpu.under_tpu_inference_context(): # For inference, use outside compile to densify and pad the input tensors. sparse_tensor = transformation_cache.get( self.categorical_column.name, state_manager) def host_computation(): return pad_sparse_embedding_lookup_indices( sparse_tensor, self._tensor_core_shape[1]) values, mask = tpu.outside_compilation(host_computation) else: # For training, the inputs should already have been densified and padded. values = transformation_cache.get(self.categorical_column.name, state_manager) mask = transformation_cache.get( self.categorical_column.name + _TENSOR_CORE_MASK_KEY_SUFFIX, state_manager) # Do a dense embedding lookup on TensorCore. embedding_weights = self.shared_embedding_column_creator.embedding_weights return sparse_embedding_aggregate_slice(embedding_weights, (values, mask), self.get_combiner())