def _get_sequence_dense_tensor(
      self, inputs, weight_collections=None, trainable=None):
    # Do nothing with weight_collections and trainable since no variables are
    # created in this function.
    del weight_collections
    del trainable
    sp_tensor = inputs.get(self)
    dense_tensor = sparse_ops.sparse_tensor_to_dense(
        sp_tensor, default_value=self.default_value)
    # Reshape into [batch_size, T, variable_shape].
    dense_shape = array_ops.concat(
        [array_ops.shape(dense_tensor)[:1], [-1], self._variable_shape],
        axis=0)
    dense_tensor = array_ops.reshape(dense_tensor, shape=dense_shape)

    # Get the number of timesteps per example
    # For the 2D case, the raw values are grouped according to num_elements;
    # for the 3D case, the grouping happens in the third dimension, and
    # sequence length is not affected.
    num_elements = (self._variable_shape.num_elements()
                    if sp_tensor.shape.ndims == 2 else 1)
    seq_length = fc_utils.sequence_length_from_sparse_tensor(
        sp_tensor, num_elements=num_elements)

    return fc._SequenceDenseColumn.TensorSequenceLengthPair(
        dense_tensor=dense_tensor, sequence_length=seq_length)
Пример #2
0
    def _get_sequence_dense_tensor(self,
                                   inputs,
                                   weight_collections=None,
                                   trainable=None):
        # Do nothing with weight_collections and trainable since no variables are
        # created in this function.
        del weight_collections
        del trainable
        sp_tensor = inputs.get(self)
        dense_tensor = sparse_ops.sparse_tensor_to_dense(
            sp_tensor, default_value=self.default_value)
        # Reshape into [batch_size, T, variable_shape].
        dense_shape = array_ops.concat(
            [array_ops.shape(dense_tensor)[:1], [-1], self._variable_shape],
            axis=0)
        dense_tensor = array_ops.reshape(dense_tensor, shape=dense_shape)

        # Get the number of timesteps per example
        # For the 2D case, the raw values are grouped according to num_elements;
        # for the 3D case, the grouping happens in the third dimension, and
        # sequence length is not affected.
        num_elements = (self._variable_shape.num_elements()
                        if sp_tensor.shape.ndims == 2 else 1)
        seq_length = fc_utils.sequence_length_from_sparse_tensor(
            sp_tensor, num_elements=num_elements)

        return fc._SequenceDenseColumn.TensorSequenceLengthPair(
            dense_tensor=dense_tensor, sequence_length=seq_length)
Пример #3
0
    def get_sequence_dense_tensor(self, transformation_cache, state_manager):
        """Returns a `TensorSequenceLengthPair`.

    Args:
      transformation_cache: A `FeatureTransformationCache` object to access
        features.
      state_manager: A `StateManager` to create / access resources such as
        lookup tables.
    """
        sp_tensor = transformation_cache.get(self, state_manager)
        dense_tensor = sparse_ops.sparse_tensor_to_dense(
            sp_tensor, default_value=self.default_value)
        # Reshape into [batch_size, T, variable_shape].
        dense_shape = array_ops.concat(
            [array_ops.shape(dense_tensor)[:1], [-1], self.variable_shape],
            axis=0)
        dense_tensor = array_ops.reshape(dense_tensor, shape=dense_shape)

        # Get the number of timesteps per example
        # For the 2D case, the raw values are grouped according to num_elements;
        # for the 3D case, the grouping happens in the third dimension, and
        # sequence length is not affected.
        if sp_tensor.shape.ndims == 2:
            num_elements = self.variable_shape.num_elements()
        else:
            num_elements = 1
        seq_length = fc_utils.sequence_length_from_sparse_tensor(
            sp_tensor, num_elements=num_elements)

        return fc.SequenceDenseColumn.TensorSequenceLengthPair(
            dense_tensor=dense_tensor, sequence_length=seq_length)
def split_inputs(ctx, features, labels, num_cores_per_batch=1):
  """Splits the dense and sparse tensors inside the features and labels."""
  enqueue_datas = collections.OrderedDict()

  if ctx.embedding_config:
    tpu_embedding_ = ctx.embedding_config.tpu_embedding
    for feature_key in tpu_embedding_.feature_to_config_dict:
      sparse_feature = _get_sparse_feature_from_feature(feature_key, features)
      max_sequence_length = tpu_embedding_.feature_to_config_dict[
          feature_key].max_sequence_length
      combiner = tpu_embedding_._table_to_config_dict[
          tpu_embedding_._feature_to_config_dict[feature_key].table_id].combiner
      if max_sequence_length > 0:
        length_feature_name = (
            tpu_fc.get_sequence_length_feature_key_name_from_feature_key_name(
                feature_key))
        length_feature = tf.math.minimum(
            fc_utils.sequence_length_from_sparse_tensor(sparse_feature),
            max_sequence_length)
        length_feature.set_shape(ctx.batch_size_for_input_fn)
        features[length_feature_name] = length_feature
      weight_key = tpu_embedding_.feature_to_config_dict[feature_key].weight_key
      sparse_feature_split = _split_tensor(
          sparse_feature, num_cores_per_batch)
      if combiner is None and not isinstance(sparse_feature,
                                             tf.sparse.SparseTensor):
        # A dense tensor with no combiner was provided so we assume that each
        # of the embedding_indices belongs to a different sample (setting
        # sample_indices to None).
        if weight_key is not None:
          raise ValueError(
              'Found weights {} for weighted_categorical_column, which is not'
              'compatible with sparse feature {} enqueued as dense tensor.'
              .format(weight_key, feature_key))
        enqueue_data = []
        for i in range(num_cores_per_batch):
          enqueue_data.append(tpu_embedding.EnqueueData(
              sparse_feature_split[i]))
      else:
        weights = None
        if isinstance(sparse_feature, tf.sparse.SparseTensor):
          weights = _get_weights_from_features(weight_key, features)
          weights_split = _split_tensor(weights, num_cores_per_batch)
        enqueue_data = []
        for i in range(num_cores_per_batch):
          split_weights = weights_split[i] if weights else None
          enqueue_data.append(
              tpu_embedding.EnqueueData.from_sparse_tensor(
                  _maybe_dense_to_sparse(sparse_feature_split[i]),
                  weights=split_weights))
      enqueue_datas[feature_key] = enqueue_data

  # Transpose the enqueue_datas dict into a list of dicts
  enqueue_datas_list = []
  for i in range(num_cores_per_batch):
    enqueue_data = {}
    for key, value in enqueue_datas.items():
      enqueue_data[key] = value[i]
    enqueue_datas_list.append(enqueue_data)
  return features, labels, enqueue_datas_list
  def get_sequence_dense_tensor(self, transformation_cache, state_manager):
    """Returns a `TensorSequenceLengthPair`.

    Args:
      transformation_cache: A `FeatureTransformationCache` object to access
        features.
      state_manager: A `StateManager` to create / access resources such as
        lookup tables.
    """
    sp_tensor = transformation_cache.get(self, state_manager)
    dense_tensor = sparse_ops.sparse_tensor_to_dense(
        sp_tensor, default_value=self.default_value)
    # Reshape into [batch_size, T, variable_shape].
    dense_shape = array_ops.concat(
        [array_ops.shape(dense_tensor)[:1], [-1], self.variable_shape],
        axis=0)
    dense_tensor = array_ops.reshape(dense_tensor, shape=dense_shape)

    # Get the number of timesteps per example
    # For the 2D case, the raw values are grouped according to num_elements;
    # for the 3D case, the grouping happens in the third dimension, and
    # sequence length is not affected.
    if sp_tensor.shape.ndims == 2:
      num_elements = self.variable_shape.num_elements()
    else:
      num_elements = 1
    seq_length = fc_utils.sequence_length_from_sparse_tensor(
        sp_tensor, num_elements=num_elements)

    return fc.SequenceDenseColumn.TensorSequenceLengthPair(
        dense_tensor=dense_tensor, sequence_length=seq_length)
Пример #6
0
 def _get_sequence_dense_tensor(self,
                                inputs,
                                weight_collections=None,
                                trainable=None):
     # Do nothing with weight_collections and trainable since no variables are
     # created in this function.
     del weight_collections
     del trainable
     if not isinstance(
             self.categorical_column,
         (SequenceCategoricalColumn, fc_old._SequenceCategoricalColumn)):  # pylint: disable=protected-access
         raise ValueError(
             'In indicator_column: {}. '
             'categorical_column must be of type _SequenceCategoricalColumn '
             'to use SequenceFeatures. '
             'Suggested fix: Use one of sequence_categorical_column_with_*. '
             'Given (type {}): {}'.format(self.name,
                                          type(self.categorical_column),
                                          self.categorical_column))
     # Feature has been already transformed. Return the intermediate
     # representation created by _transform_feature.
     dense_tensor = inputs.get(self)
     sparse_tensors = self.categorical_column._get_sparse_tensors(inputs)  # pylint: disable=protected-access
     sequence_length = fc_utils.sequence_length_from_sparse_tensor(
         sparse_tensors.id_tensor)
     return SequenceDenseColumn.TensorSequenceLengthPair(
         dense_tensor=dense_tensor, sequence_length=sequence_length)
Пример #7
0
def sparse_tensor_len():
    # sequence_length_from_sparse_tensor 获得每个文档的长度。
    # sparse tensor保存了batch size这么多个文档,由于sparse的格式,所以每个文档的长度是不一样的,sparse tensor的dense shape是最长的文档的长度。
    # 这个op计算每个文档的长度
    with tf.Graph().as_default():
        sess = tf.Session()
        sp_tensor = create_sparse_tensor()
        seq_len = sequence_length_from_sparse_tensor(sp_tensor)
        print(sess.run(seq_len))
Пример #8
0
def tfhub_embedding(x, embedding_layer, embedding_size):
    sp_tensor = tf.compat.v1.string_split(x, sep=' ')
    seq_length = sequence_length_from_sparse_tensor(sp_tensor)

    dense_tensor = tf.sparse.to_dense(sp_tensor)
    batch_size = tf.shape(dense_tensor)[0]

    flatten_embedding = embedding_layer(tf.reshape(dense_tensor, (-1, )))
    seq_embedding = tf.reshape(flatten_embedding,
                               (batch_size, -1, embedding_size))

    return seq_embedding, seq_length
Пример #9
0
def split_inputs(ctx, features, labels, num_cores_per_batch=1):
    """Splits the dense and sparse tensors inside the features and labels."""
    enqueue_datas = collections.OrderedDict()

    if ctx.embedding_config:
        tpu_embedding_ = ctx.embedding_config.tpu_embedding
        for feature_key in tpu_embedding_.feature_to_config_dict:
            sparse_feature = _get_sparse_feature_from_feature(
                feature_key, features)
            max_sequence_length = tpu_embedding_.feature_to_config_dict[
                feature_key].max_sequence_length
            if max_sequence_length > 0:
                length_feature_name = (
                    tpu_fc.
                    get_sequence_length_feature_key_name_from_feature_key_name(
                        feature_key))
                length_feature = math_ops.minimum(
                    fc_utils.sequence_length_from_sparse_tensor(
                        sparse_feature), max_sequence_length)
                length_feature.set_shape(ctx.batch_size_for_input_fn)
                features[length_feature_name] = length_feature
            weight_key = tpu_embedding_.feature_to_config_dict[
                feature_key].weight_key
            sparse_feature_split = _split_tensor(sparse_feature,
                                                 num_cores_per_batch)
            if isinstance(sparse_feature, sparse_tensor.SparseTensor):
                weights = _get_weights_from_features(weight_key, features)
                weights_split = _split_tensor(weights, num_cores_per_batch)
                enqueue_data = []
                for i in range(num_cores_per_batch):
                    enqueue_data.append(
                        tpu_embedding.EnqueueData.from_sparse_tensor(
                            sparse_feature_split[i], weights_split[i]))
            else:
                if weight_key is not None:
                    raise ValueError(
                        'Found weights {} for weighted_categorical_column, which is not'
                        'compatible with sparse feature {} enqueued as dense tensor.'
                        .format(weight_key, feature_key))
                enqueue_data = []
                for i in range(num_cores_per_batch):
                    enqueue_data.append(
                        tpu_embedding.EnqueueData(sparse_feature_split[i]))
            enqueue_datas[feature_key] = enqueue_data

    # Transpose the enqueue_datas dict into a list of dicts
    enqueue_datas_list = []
    for i in range(num_cores_per_batch):
        enqueue_data = {}
        for key, value in enqueue_datas.items():
            enqueue_data[key] = value[i]
        enqueue_datas_list.append(enqueue_data)
    return features, labels, enqueue_datas_list
Пример #10
0
 def get_sequence_dense_tensor(self, transformation_cache, state_manager):
     """See `SequenceDenseColumn` base class."""
     if not isinstance(self.categorical_column, SequenceCategoricalColumn):
         raise ValueError(
             'In indicator_column: {}. categorical_column must be of type SequenceCategoricalColumn '
             'to use SequenceFeatures. Suggested fix: Use one of sequence_categorical_column_with_*. '
             'Given (type {}): {}'.format(self.name,
                                          type(self.categorical_column),
                                          self.categorical_column))
     # Feature has been already transformed. Return the intermediate representation created by transform_feature.
     dense_tensor = transformation_cache.get(self, state_manager)
     sparse_tensors = self.categorical_column.get_sparse_tensors(
         transformation_cache, state_manager)
     sequence_length = fc_utils.sequence_length_from_sparse_tensor(
         sparse_tensors.id_tensor)
     return SequenceDenseColumn.TensorSequenceLengthPair(
         dense_tensor=dense_tensor, sequence_length=sequence_length)
def split_inputs(ctx, features, labels):
    """Splits the dense and sparse tensors inside the features and labels."""
    enqueue_datas = collections.OrderedDict()
    if ctx.embedding_config:
        tpu_embedding_ = ctx.embedding_config.tpu_embedding
        feature_to_weight_key_name_dict = (
            ctx.embedding_config.feature_to_weight_key_name_dict)
        for feature_key in tpu_embedding_.feature_to_config_dict:
            sparse_feature = _get_sparse_feature_from_feature(
                feature_key, features)
            max_sequence_length = tpu_embedding_.feature_to_config_dict[
                feature_key].max_sequence_length
            if max_sequence_length > 0:
                length_feature_name = (
                    tpu_fc.
                    get_sequence_length_feature_key_name_from_feature_key_name(
                        feature_key))
                length_feature = math_ops.minimum(
                    fc_utils.sequence_length_from_sparse_tensor(
                        sparse_feature), max_sequence_length)
                length_feature.set_shape(ctx.batch_size_for_input_fn)
                features[length_feature_name] = length_feature
            weight_key_name = feature_to_weight_key_name_dict[feature_key]
            if isinstance(sparse_feature, sparse_tensor.SparseTensor):
                weights = _get_weights_from_features(weight_key_name, features)
                enqueue_data = tpu_embedding.EnqueueData.from_sparse_tensor(
                    sparse_feature, weights)
            else:
                if weight_key_name is not None:
                    raise ValueError(
                        'Found weights {} for weighted_categorical_column, which is not'
                        'compatible with sparse feature {} enqueued as dense tensor.'
                        .format(weight_key_name, feature_key))
                enqueue_data = tpu_embedding.EnqueueData(sparse_feature)
            enqueue_datas[feature_key] = enqueue_data

    return features, labels, enqueue_datas