示例#1
0
  def call(self, inputs, training=None):
    """Transforms the features into dense context features and example features.

    This is the Keras equivalent of `tfr.feature.encode_listwise_features`.

    Args:
      inputs: (dict) Features with a mix of context (2D) and example features
        (3D).
      training: (bool) whether in train or inference mode.

    Returns:
      context_features: (dict) context feature names to dense 2D tensors of
        shape [batch_size, feature_dims].
      example_features: (dict) example feature names to dense 3D tensors of
        shape [batch_size, list_size, feature_dims].
    """
    features = inputs
    context_features = {}
    if self._context_feature_columns:
      context_cols_to_tensors = {}
      self._context_dense_layer(
          features,
          training=training,
          cols_to_output_tensors=context_cols_to_tensors)
      context_features = {
          name: context_cols_to_tensors[col]
          for name, col in six.iteritems(self.context_feature_columns)
      }
    example_features = {}
    if self._example_feature_columns:
      # Compute example_features. Note that the key in `example_feature_columns`
      # dict can be different from the key in the `features` dict. We only need
      # to reshape the per-example tensors in `features`. To obtain the keys for
      # per-example features, we use the parsing feature specs.
      example_specs = tf.feature_column.make_parse_example_spec(
          list(six.itervalues(self._example_feature_columns)))
      example_name = next(six.iterkeys(example_specs))
      batch_size = tf.shape(input=features[example_name])[0]
      list_size = tf.shape(input=features[example_name])[1]
      reshaped_example_features = {}
      for name in example_specs:
        if name not in features:
          continue
        reshaped_example_features[name] = utils.reshape_first_ndims(
            features[name], 2, [batch_size * list_size])

      example_cols_to_tensors = {}
      self._example_dense_layer(
          reshaped_example_features,
          training=training,
          cols_to_output_tensors=example_cols_to_tensors)
      example_features = {
          name: utils.reshape_first_ndims(example_cols_to_tensors[col], 1,
                                          [batch_size, list_size])
          for name, col in six.iteritems(self._example_feature_columns)
      }
    return context_features, example_features
示例#2
0
文件: dnn.py 项目: yizhiru/toyML
    def compute_logits(self,
                       context_features=None,
                       example_features=None,
                       training=True,
                       mask=None):
        tensor = next(six.itervalues(example_features))
        batch_size = tf.shape(tensor)[0]
        list_size = tf.shape(tensor)[1]
        if mask is None:
            mask = tf.ones(shape=[batch_size, list_size], dtype=tf.bool)
        nd_indices, nd_mask = utils.padded_nd_indices(is_valid=mask)

        # Expand query features to be of [batch_size, list_size, ...].
        large_batch_context_features = {}
        for name, tensor in six.iteritems(context_features):
            x = tf.expand_dims(input=tensor, axis=1)
            x = tf.gather(x, tf.zeros([list_size], tf.int32), axis=1)
            large_batch_context_features[name] = utils.reshape_first_ndims(
                x, 2, [batch_size * list_size])

        large_batch_example_features = {}
        for name, tensor in six.iteritems(example_features):
            # Replace invalid example features with valid ones.
            padded_tensor = tf.gather_nd(tensor, nd_indices)
            large_batch_example_features[name] = utils.reshape_first_ndims(
                padded_tensor, 2, [batch_size * list_size])

        # Get scores for large batch.
        sparse_input, dense_input = [], []
        for name in large_batch_context_features:
            if name in self._sparse_embed_layers:
                sparse_input.append(self._sparse_embed_layers[name](large_batch_context_features[name]))
            else:
                dense_input.append(context_features[name])
        for name in large_batch_example_features:
            if name in self._sparse_embed_layers:
                sparse_input.append(self._sparse_embed_layers[name](large_batch_example_features[name]))
            else:
                dense_input.append(large_batch_example_features[name])
        sparse_input = [tf.keras.layers.Flatten()(inpt) for inpt in sparse_input]

        inputs = tf.concat(sparse_input + dense_input, 1)
        outputs = inputs
        for layer in self._scoring_layers:
            outputs = layer(outputs, training=training)

        scores = self._output_score_layer(outputs, training=training)
        logits = tf.reshape(
            scores, shape=[batch_size, list_size])

        # Apply nd_mask to zero out invalid entries.
        logits = tf.where(nd_mask, logits, tf.zeros_like(logits))
        return logits
示例#3
0
    def call(
        self, inputs: Tuple[Dict[str, tf.Tensor], Dict[str, tf.Tensor],
                            tf.Tensor]
    ) -> Tuple[Dict[str, tf.Tensor], Dict[str, tf.Tensor]]:
        """Call FlattenList layer to flatten context_features and example_features.

    Args:
      inputs: A tuple of (context_features, example_features, list_mask), which
        are described below:
      * `context_features`: A map of context features to 2D tensors of shape
        [batch_size, feature_dim].
      * `example_features`: A map of example features to 3D tensors of shape
        [batch_size, list_size, feature_dim].
      * `list_mask`: A Tensor of shape [batch_size, list_size] to mask out the
        invalid examples.

    Returns:
      A tuple of (flattened_context_features, flattened_example_fatures) where
      the former is a dict of context features to 2D tensors of shape
      [batch_size * list_size, feature_dim] and the latter is a dict of example
      features to 2D tensors of shape [batch_size * list_size, feature_dim].

    Raises:
      ValueError: If `example_features` is empty dict or None.
    """
        context_features, example_features, list_mask = inputs
        if not example_features:
            raise ValueError('Need a valid example feature.')
        batch_size = tf.shape(list_mask)[0]
        list_size = tf.shape(list_mask)[1]
        # Expand context features to be of [batch_size, list_size, ...].
        flattened_context_features = {}
        for name, tensor in context_features.items():
            expanded_tensor = tf.repeat(tf.expand_dims(tensor, axis=1),
                                        repeats=[list_size],
                                        axis=1)
            flattened_context_features[name] = utils.reshape_first_ndims(
                expanded_tensor, 2, [batch_size * list_size])

        nd_indices = None
        if self._circular_padding:
            nd_indices, _ = utils.padded_nd_indices(is_valid=list_mask)

        flattened_example_features = {}
        for name, tensor in example_features.items():
            if nd_indices is not None:
                # Replace invalid example features with valid ones.
                tensor = tf.gather_nd(tensor, nd_indices)
            flattened_example_features[name] = utils.reshape_first_ndims(
                tensor, 2, [batch_size * list_size])

        return flattened_context_features, flattened_example_features
示例#4
0
    def compute_logits(self,
                       context_features=None,
                       example_features=None,
                       training=None,
                       mask=None):
        """Scores context and examples to return a score per document.

    Args:
      context_features: (dict) context feature names to 2D tensors of shape
        [batch_size, feature_dims].
      example_features: (dict) example feature names to 3D tensors of shape
        [batch_size, list_size, feature_dims].
      training: (bool) whether in train or inference mode.
      mask: (tf.Tensor) Mask is a tensor of shape [batch_size, list_size], which
        is True for a valid example and False for invalid one. If mask is None,
        all entries are valid.

    Returns:
      (tf.Tensor) A score tensor of shape [batch_size, list_size].
    """
        tensor = next(six.itervalues(example_features))
        batch_size = tf.shape(tensor)[0]
        list_size = tf.shape(tensor)[1]
        if mask is None:
            mask = tf.ones(shape=[batch_size, list_size], dtype=tf.bool)
        nd_indices, nd_mask = utils.padded_nd_indices(is_valid=mask)

        # Expand query features to be of [batch_size, list_size, ...].
        large_batch_context_features = {}
        for name, tensor in six.iteritems(context_features):
            x = tf.expand_dims(input=tensor, axis=1)
            x = tf.gather(x, tf.zeros([list_size], tf.int32), axis=1)
            large_batch_context_features[name] = utils.reshape_first_ndims(
                x, 2, [batch_size * list_size])

        large_batch_example_features = {}
        for name, tensor in six.iteritems(example_features):
            # Replace invalid example features with valid ones.
            padded_tensor = tf.gather_nd(tensor, nd_indices)
            large_batch_example_features[name] = utils.reshape_first_ndims(
                padded_tensor, 2, [batch_size * list_size])

        # Get scores for large batch.
        scores = self.score(context_features=large_batch_context_features,
                            example_features=large_batch_example_features,
                            training=training)
        logits = tf.reshape(scores, shape=[batch_size, list_size])

        # Apply nd_mask to zero out invalid entries.
        logits = tf.where(nd_mask, logits, tf.zeros_like(logits))
        return logits
示例#5
0
  def test_reshape_first_ndims_dense_tensor(self):
    # Batch size = 2, list size = 5, embedding size = 10.
    tensor = tf.reshape(tf.range(100), shape=(2, 5, 10))
    target_tensor = tf.reshape(tf.range(100), shape=(10, 10))
    reshaped_tensor = utils.reshape_first_ndims(tensor, 2, [10])
    self.assertAllEqual(reshaped_tensor.get_shape().as_list(), [10, 10])

    with tf.compat.v1.Session() as sess:
      reshaped, target = sess.run([reshaped_tensor, target_tensor])
      self.assertAllEqual(reshaped, target)
示例#6
0
文件: data.py 项目: seulrq/ranking
    def parse(self, serialized):
        """See `_RankingDataParser`."""
        (serialized_context, serialized_list,
         sizes) = self._decode_as_serialized_example_list(serialized)

        # Use static batch size whenever possible.
        batch_size = serialized_context.get_shape().as_list()[0] or tf.shape(
            input=serialized_list)[0]
        cur_list_size = tf.shape(input=serialized_list)[1]
        list_size = self._list_size

        if self._shuffle_examples:
            is_valid = tf.sequence_mask(sizes, cur_list_size)
            indices = utils.shuffle_valid_indices(is_valid, seed=self._seed)
            serialized_list = tf.gather_nd(serialized_list, indices)

        # Apply truncation or padding to align tensor shape.
        if list_size:

            def truncate_fn():
                return tf.slice(serialized_list, [0, 0],
                                [batch_size, list_size])

            def pad_fn():
                return tf.pad(tensor=serialized_list,
                              paddings=[[0, 0], [0,
                                                 list_size - cur_list_size]],
                              constant_values="")

            serialized_list = tf.cond(pred=cur_list_size > list_size,
                                      true_fn=truncate_fn,
                                      false_fn=pad_fn)
            cur_list_size = list_size

        features = {}
        example_features = tf.compat.v1.io.parse_example(
            tf.reshape(serialized_list, [-1]), self._example_feature_spec)
        for k, v in six.iteritems(example_features):
            features[k] = utils.reshape_first_ndims(
                v, 1, [batch_size, cur_list_size])

        if self._context_feature_spec:
            features.update(
                tf.compat.v1.io.parse_example(
                    tf.reshape(serialized_context, [batch_size]),
                    self._context_feature_spec))

        # Add example list sizes to features, if needed.
        if self._size_feature_name:
            features[self._size_feature_name] = sizes
        return features
示例#7
0
 def _expand_dims_for_example_features(features):
   """Converts example features for listwise inference."""
   # NOTE: our current design of `model` has three layers: (a) input layer,
   # (b) GenerateMask, and (c) RankingNetwork. So we can access RankingNetwork
   # with model.layers[-1], but we may need to revisit this when the design is
   # changed in the future.
   example_feature_columns = model.layers[-1].example_feature_columns
   example_specs = tf.feature_column.make_parse_example_spec(
       list(six.itervalues(example_feature_columns)))
   # Expand dimension of example features for listwise inference.
   for name in example_specs:
     if name not in features:
       continue
     features[name] = utils.reshape_first_ndims(features[name], 1, [-1, 1])
   return features
示例#8
0
    def parse(self, serialized):
        """See `_RankingDataParser`."""
        (serialized_context,
         serialized_list) = self._decode_as_serialized_example_list(serialized)
        # Use static batch size whenever possible.
        batch_size = serialized_context.get_shape().as_list()[0] or tf.shape(
            serialized_list)[0]
        cur_list_size = tf.shape(serialized_list)[1]
        list_size = self._list_size

        # Apply truncation or padding to align tensor shape.
        if list_size:

            def truncate_fn():
                return tf.slice(serialized_list, [0, 0],
                                [batch_size, list_size])

            def pad_fn():
                return tf.pad(tensor=serialized_list,
                              paddings=[[0, 0], [0,
                                                 list_size - cur_list_size]],
                              constant_values="")

            serialized_list = tf.cond(pred=cur_list_size > list_size,
                                      true_fn=truncate_fn,
                                      false_fn=pad_fn)
            cur_list_size = list_size

        features = {}
        example_features = tf.compat.v1.io.parse_example(
            tf.reshape(serialized_list, [-1]), self._example_feature_spec)
        for k, v in six.iteritems(example_features):
            features[k] = utils.reshape_first_ndims(
                v, 1, [batch_size, cur_list_size])

        if self._context_feature_spec:
            features.update(
                tf.compat.v1.io.parse_example(
                    tf.reshape(serialized_context, [batch_size]),
                    self._context_feature_spec))

        return features
示例#9
0
 def test_reshape_first_ndims_sparse_tensor(self):
   # Batch size = 2, list size = 3, embedding size = 3.
   # Tensor:
   # [[[1, 0, 0], [0, 2, 0], [0, 0, 3]], [[4, 0, 0], [0, 5, 0], [0, 0, 6]]].
   # Reshaped :
   # [[[1, 0, 0], [0, 2, 0], [0, 0, 3], [4, 0, 0], [0, 5, 0], [0, 0, 6]]].
   sparse_tensor = tf.SparseTensor(
       indices=[[0, 0, 0], [0, 1, 1], [0, 2, 2], [1, 0, 0], [1, 1, 1],
                [1, 2, 2]],
       values=[1, 2, 3, 4, 5, 6],
       dense_shape=[2, 3, 3])
   target = tf.SparseTensor(
       indices=[[0, 0], [1, 1], [2, 2], [3, 0], [4, 1], [5, 2]],
       values=[1, 2, 3, 4, 5, 6],
       dense_shape=[6, 3])
   reshaped = utils.reshape_first_ndims(sparse_tensor, 2, [6])
   with tf.compat.v1.Session() as sess:
     reshaped_array, target_array = sess.run([reshaped, target])
     self.assertAllEqual(reshaped_array.indices, target_array.indices)
     self.assertAllEqual(reshaped_array.values, target_array.values)
     self.assertAllEqual(reshaped_array.dense_shape, target_array.dense_shape)
示例#10
0
    def call(
        self,
        context_features: Dict[str, tf.Tensor],
        example_features: Dict[str, tf.Tensor],
        list_mask: [tf.Tensor],
    ) -> tf.Tensor:
        """Call method for ConcatFeatures layer.

    Args:
      context_features: A dict of `Tensor`s with shape [batch_size, ...].
      example_features:  A dict of `Tensor`s with shape [batch_size, list_size,
        ...].
      list_mask: A boolean tensor of shape [batch_size, list_size], which is
        True for a valid example and False for invalid one.

    Returns:
      A `Tensor` of shape [batch_size, list_size, ...].
    """
        (flattened_context_features,
         flattened_example_features) = self._flatten_list(
             context_features=context_features,
             example_features=example_features,
             list_mask=list_mask)
        # Concatenate flattened context and example features along `list_size` dim.
        context_input = [
            tf.keras.layers.Flatten()(flattened_context_features[name])
            for name in sorted(flattened_context_features)
        ]
        example_input = [
            tf.keras.layers.Flatten()(flattened_example_features[name])
            for name in sorted(flattened_example_features)
        ]
        flattened_concat_features = tf.concat(context_input + example_input, 1)

        # Reshape to 3D.
        batch_size = tf.shape(list_mask)[0]
        list_size = tf.shape(list_mask)[1]
        return utils.reshape_first_ndims(flattened_concat_features, 1,
                                         [batch_size, list_size])
示例#11
0
    def call(
        self, inputs: Tuple[Dict[str, tf.Tensor], Dict[str, tf.Tensor],
                            tf.Tensor]
    ) -> Tuple[Dict[str, tf.Tensor], Dict[str, tf.Tensor]]:
        """Call layer to flatten context_features and example_features.

    Args:
      inputs: A tuple of (context_features, example_features, list_mask), which
        are described below:
      * `context_features`: A map of context features to 2D tensors of shape
        [batch_size, feature_dim].
      * `example_features`: A map of example features to 3D tensors of shape
        [batch_size, list_size, feature_dim].
      * `list_mask`: A Tensor of shape [batch_size, list_size] to mask out the
        invalid examples.

    Returns:
      A tensor of shape [batch_size, list_size, concat_feature_dim].
    """
        context_features, example_features, list_mask = inputs
        (flattened_context_features,
         flattened_example_features) = self._flatten_list(
             (context_features, example_features, list_mask))
        # Concatenate flattened context and example features along `list_size` dim.
        context_input = [
            tf.keras.layers.Flatten()(flattened_context_features[name])
            for name in sorted(flattened_context_features)
        ]
        example_input = [
            tf.keras.layers.Flatten()(flattened_example_features[name])
            for name in sorted(flattened_example_features)
        ]
        flattened_concat_features = tf.concat(context_input + example_input, 1)

        # Reshape to 3D.
        batch_size = tf.shape(list_mask)[0]
        list_size = tf.shape(list_mask)[1]
        return utils.reshape_first_ndims(flattened_concat_features, 1,
                                         [batch_size, list_size])
示例#12
0
    def sample(self, labels, logits, weights=None):
        """Samples scores from Concrete(logits).

    Args:
      labels: A `Tensor` with shape [batch_size, list_size] same as `logits`,
        representing graded relevance. Or in the diversity tasks, a `Tensor`
        with shape [batch_size, list_size, subtopic_size]. Each value represents
        relevance to a subtopic, 1 for relevent subtopic, 0 for irrelevant, and
        -1 for paddings. When the actual subtopic number of a query is smaller
        than the `subtopic_size`, `labels` will be padded to `subtopic_size`
        with -1.
      logits: A `Tensor` with shape [batch_size, list_size]. Each value is the
        ranking score of the corresponding item.
      weights: A scalar, a `Tensor` with shape [batch_size, 1] for list-wise
        weights, or a `Tensor` with shape [batch_size, list_size] for item-wise
        weights. If None, the weight of a list in the mini-batch is set to the
        sum of the labels of the items in that list.

    Returns:
      A tuple of expanded labels, logits, and weights where the first dimension
      is now batch_size * sample_size. Logit Tensors are sampled from
      Concrete(logits) while labels and weights are simply tiled so the
      resulting
      Tensor has the updated dimensions.
    """
        with tf.compat.v1.name_scope(self._name, 'gumbel_softmax_sample',
                                     (labels, logits, weights)):
            batch_size = tf.shape(input=labels)[0]
            list_size = tf.shape(input=labels)[1]

            # Expand labels.
            expanded_labels = tf.expand_dims(labels, 1)
            expanded_labels = tf.repeat(expanded_labels, [self._sample_size],
                                        axis=1)
            expanded_labels = utils.reshape_first_ndims(
                expanded_labels, 2, [batch_size * self._sample_size])

            # Sample logits from Concrete(logits).
            sampled_logits = tf.expand_dims(logits, 1)
            sampled_logits = tf.tile(sampled_logits, [1, self._sample_size, 1])
            sampled_logits += _sample_gumbel(
                [batch_size, self._sample_size, list_size], seed=self._seed)
            sampled_logits = tf.reshape(
                sampled_logits, [batch_size * self._sample_size, list_size])

            is_label_valid = utils.is_label_valid(expanded_labels)
            if is_label_valid.shape.rank > 2:
                is_label_valid = tf.reduce_any(is_label_valid, axis=-1)
            sampled_logits = tf.compat.v1.where(
                is_label_valid, sampled_logits / self._temperature,
                tf.math.log(1e-20) * tf.ones_like(sampled_logits))
            sampled_logits = tf.math.log(tf.nn.softmax(sampled_logits) + 1e-20)

            expanded_weights = weights
            if expanded_weights is not None:
                true_fn = lambda: tf.expand_dims(
                    tf.expand_dims(expanded_weights, 1), 1)
                false_fn = lambda: tf.expand_dims(expanded_weights, 1)
                expanded_weights = tf.cond(pred=tf.math.equal(
                    tf.rank(expanded_weights), 1),
                                           true_fn=true_fn,
                                           false_fn=false_fn)
                expanded_weights = tf.tile(expanded_weights,
                                           [1, self._sample_size, 1])
                expanded_weights = tf.reshape(
                    expanded_weights, [batch_size * self._sample_size, -1])

            return expanded_labels, sampled_logits, expanded_weights
示例#13
0
    def parse(self, serialized):
        """See `_RankingDataParser`."""
        (serialized_context,
         serialized_list) = self._decode_as_serialized_example_list(serialized)
        # Use static batch size whenever possible.
        batch_size = serialized_context.get_shape().as_list()[0] or tf.shape(
            input=serialized_list)[0]
        cur_list_size = tf.shape(input=serialized_list)[1]
        list_size = self._list_size

        # Apply truncation or padding to align tensor shape.
        if list_size:

            def truncate_fn():
                return tf.slice(serialized_list, [0, 0],
                                [batch_size, list_size])

            def pad_fn():
                # Create feature spec for tf.train.Example to append
                pad_spec = {}
                # Default values are 0 or an empty byte string depending on
                # original serialized data type
                dtype_map = {
                    tf.float32:
                    tf.train.Feature(float_list=tf.train.FloatList(
                        value=[0.0])),
                    tf.int32:
                    tf.train.Feature(int64_list=tf.train.Int64List(value=[0])),
                    tf.string:
                    tf.train.Feature(bytes_list=tf.train.BytesList(
                        value=[bytes('', encoding='UTF-8')]))
                }
                # Create the feature spec
                for key, item in self._example_feature_spec.items():
                    dtype = item.dtype
                    pad_spec[key] = dtype_map[dtype]
                # Make and serialize example to append
                constant_values = tf.train.Example(features=tf.train.Features(
                    feature=pad_spec))
                constant_val_str = constant_values.SerializeToString()

                # Add serialized padding to end of list
                return tf.pad(tensor=serialized_list,
                              paddings=[[0, 0], [0,
                                                 list_size - cur_list_size]],
                              constant_values=constant_val_str)

            serialized_list = tf.cond(pred=cur_list_size > list_size,
                                      true_fn=truncate_fn,
                                      false_fn=pad_fn)
            cur_list_size = list_size

        features = {}
        example_features = tf.compat.v1.io.parse_example(
            tf.reshape(serialized_list, [-1]), self._example_feature_spec)
        for k, v in six.iteritems(example_features):
            features[k] = utils.reshape_first_ndims(
                v, 1, [batch_size, cur_list_size])

        if self._context_feature_spec:
            features.update(
                tf.compat.v1.io.parse_example(
                    tf.reshape(serialized_context, [batch_size]),
                    self._context_feature_spec))

        return features
示例#14
0
    def _compute_logits_impl(self, context_features, example_features, labels,
                             mode, params, config):
        # Scatter/Gather per-example scores through groupwise comparison. Each
        # instance in a mini-batch will form a number of groups. Each group of
        # examples are scored by `_score_fn` and scores for individual examples are
        # accumulated into logits.
        with tf.compat.v1.name_scope('groupwise_dnn_v2'):
            batch_size, list_size, is_valid = _infer_sizes(
                example_features, labels)
            # For each example feature, assuming the shape is [batch_size, list_size,
            # feature_size], the groups are formed along the 2nd dim. Each group has a
            # 'group_size' number of indices in [0, list_size). Based on these
            # indices, we can gather the example feature into a sub-tensor for each
            # group. The total number of groups we have for a mini-batch is batch_size
            # * num_groups. Inside each group, we have a 'group_size' number of
            # examples.
            self._update_scatter_gather_indices(is_valid, mode, params)
            num_groups = tf.shape(input=self._indices_mask)[1]

            with tf.compat.v1.name_scope('group_features'):
                # For context features, We have shape [batch_size * num_groups, ...].
                large_batch_context_features = {}
                for name, value in six.iteritems(context_features):
                    # [batch_size, 1, ...].
                    value = tf.expand_dims(value, axis=1)
                    # [batch_size, num_groups, ...].
                    value = tf.gather(value,
                                      tf.zeros([num_groups], tf.int32),
                                      axis=1)
                    # [batch_size * num_groups, ...]
                    large_batch_context_features[
                        name] = utils.reshape_first_ndims(
                            value, 2, [batch_size * num_groups])

                # For example feature, we have shape [batch_size * num_groups,
                # group_size, ...].
                large_batch_group_features = {}
                for name, value in six.iteritems(example_features):
                    # [batch_size, num_groups, group_size, ...].
                    value = tf.gather_nd(value, self._feature_gather_indices)
                    # [batch_size * num_groups, group_size, ...].
                    large_batch_group_features[
                        name] = utils.reshape_first_ndims(
                            value, 3,
                            [batch_size * num_groups, self._group_size])

            # Do the inference and get scores for the large batch of [batch_size *
            # num_groups, logits_size] and reshape them to [batch_size, num_groups,
            # logits_size].
            with tf.compat.v1.variable_scope('group_score'):
                scores = self._score_fn(large_batch_context_features,
                                        large_batch_group_features, mode,
                                        params, config)
                scores = tf.reshape(scores,
                                    tf.shape(self._score_scatter_indices)[0:3])

            with tf.compat.v1.name_scope('accumulate_scores'):
                # Reset invalid scores to 0 based on mask.
                scores_mask = tf.gather(tf.expand_dims(self._indices_mask, 2),
                                        tf.zeros([tf.shape(scores)[2]],
                                                 tf.int32),
                                        axis=2)
                scores = tf.where(scores_mask, scores, tf.zeros_like(scores))
                # Scatter scores from [batch_size, num_groups, logits_size] to
                # [batch_size, list_size].
                logits = tf.scatter_nd(self._score_scatter_indices, scores,
                                       [batch_size, list_size])
                counts = tf.scatter_nd(self._score_scatter_indices,
                                       tf.cast(scores_mask, tf.float32),
                                       [batch_size, list_size])
                # Use average.
                logits = tf.compat.v1.div_no_nan(logits, counts)
        return logits
示例#15
0
def listwise_scoring(scorer,
                     context_features,
                     example_features,
                     training=None,
                     mask=None):
    """Listwise scoring op for context and example features.

  Args:
    scorer: A callable (e.g., A keras layer instance, a function) for scoring
    with the following signature:
      * Args:
        `context_features`: (dict) A dict of Tensors with the shape [batch_size,
          ...].
        `example_features`: (dict) A dict of Tensors with the shape [batch_size,
          ...].
        `training`: (bool) whether in training or inference mode.
      * Returns: The computed logits, a Tensor of shape [batch_size,
        output_size].
    context_features: (dict) context feature names to dense 2D tensors of shape
      [batch_size, ...].
    example_features: (dict) example feature names to dense 3D tensors of shape
      [batch_size, list_size, ...].
    training: (bool) whether in train or inference mode.
    mask: (tf.Tensor) Mask is a tensor of shape [batch_size, list_size], which
      is True for a valid example and False for invalid one.

  Returns:
    (tf.Tensor) A score tensor of shape [batch_size, list_size, output_size].

  Raises:
    ValueError: If example features is None or an empty dict.
  """
    # Raise error if example features is None or empty dict.
    if not example_features:
        raise ValueError('Need a valid example feature.')

    tensor = next(six.itervalues(example_features))
    batch_size = tf.shape(tensor)[0]
    list_size = tf.shape(tensor)[1]
    if mask is None:
        mask = tf.ones(shape=[batch_size, list_size], dtype=tf.bool)
    nd_indices, nd_mask = utils.padded_nd_indices(is_valid=mask)

    # Expand context features to be of [batch_size, list_size, ...].
    large_batch_context_features = {}
    for name, tensor in six.iteritems(context_features):
        x = tf.expand_dims(input=tensor, axis=1)
        x = tf.gather(x, tf.zeros([list_size], tf.int32), axis=1)
        large_batch_context_features[name] = utils.reshape_first_ndims(
            x, 2, [batch_size * list_size])

    large_batch_example_features = {}
    for name, tensor in six.iteritems(example_features):
        # Replace invalid example features with valid ones.
        padded_tensor = tf.gather_nd(tensor, nd_indices)
        large_batch_example_features[name] = utils.reshape_first_ndims(
            padded_tensor, 2, [batch_size * list_size])

    # Get scores for large batch.
    scores = scorer(large_batch_context_features,
                    large_batch_example_features,
                    training=training)
    scores = tf.reshape(scores, shape=[batch_size, list_size, -1])

    # Apply nd_mask to zero out invalid entries.
    # Expand dimension and use broadcasting for filtering.
    expanded_nd_mask = tf.expand_dims(nd_mask, axis=2)
    scores = tf.where(expanded_nd_mask, scores, tf.zeros_like(scores))

    return scores
def encode_listwise_features(features,
                             input_size,
                             context_feature_columns,
                             example_feature_columns,
                             mode=model_fn.ModeKeys.TRAIN,
                             scope=None):
  """Returns dense tensors from features using feature columns.

  Args:
    features: (dict) mapping feature names (str) to feature values (`tf.Tensor`
      or `tf.SparseTensor`), possibly obtained from input_fn. For context
      features, the tensors are 2-D, while for example features the tensors are
      3-D.
    input_size: (int) number of examples per query. This is the size of second
      dimension of the Tensor corresponding to one of the example feature
      columns.
    context_feature_columns: (dict) context feature names to columns.
    example_feature_columns: (dict) example feature names to columns.
    mode: (`estimator.ModeKeys`) Specifies if this is training, evaluation or
      inference. See `ModeKeys`.
    scope: (str) variable scope for the per column input layers.

  Returns:
    context_features: (dict) A mapping from context feature names to dense
    2-D tensors of shape [batch_size, ...].
    example_features: (dict) A mapping frome example feature names to dense
    3-D tensors of shape [batch_size, input_size, ...].

  Raises:
    ValueError: If `input size` is not equal to 2nd dimension of example
    tensors.
  """
  context_features = {}
  if context_feature_columns:
    context_cols_to_tensors = encode_features(
        features, context_feature_columns.values(), mode=mode, scope=scope)
    context_features = {
        name: context_cols_to_tensors[col]
        for name, col in six.iteritems(context_feature_columns)
    }

  example_features = {}
  if example_feature_columns:
    # Reshape [batch_size, input_size] to [batch * input_size] so that
    # features are encoded.
    batch_size = None
    reshaped_features = {}
    for name in example_feature_columns:
      if name not in features:
        continue
      batch_size = array_ops.shape(features[name])[0]
      try:
        reshaped_features[name] = utils.reshape_first_ndims(
            features[name], 2, [batch_size * input_size])
      except:
        raise ValueError(
            "2nd dimesion of tensor must be equal to input size: {}, "
            "but found feature {} with shape {}.".format(
                input_size, name, features[name].get_shape()))
    example_cols_to_tensors = encode_features(
        reshaped_features,
        example_feature_columns.values(),
        mode=mode,
        scope=scope)
    example_features = {
        name: utils.reshape_first_ndims(example_cols_to_tensors[col], 1,
                                        [batch_size, input_size])
        for name, col in six.iteritems(example_feature_columns)
    }

  return context_features, example_features
示例#17
0
文件: network.py 项目: yizhiru/toyML
    def compute_logits(self,
                       context_features=None,
                       example_features=None,
                       training=None,
                       mask=None):
        """Scores context and examples to return a score per example.

        Args:
          context_features: (dict) context feature names to 2D tensors of shape
            [batch_size, feature_dims].
          example_features: (dict) example feature names to 3D tensors of shape
            [batch_size, list_size, feature_dims].
          training: (bool) whether in train or inference mode.
          mask: (tf.Tensor) Mask is a tensor of shape [batch_size, list_size], which
            is True for a valid example and False for invalid one. If mask is None,
            all entries are valid.

        Returns:
          (tf.Tensor) A score tensor of shape [batch_size, list_size].

        Raises:
          ValueError: If `scorer` does not return a scalar output.

        """

        if not example_features:
            raise ValueError('Need a valid example feature.')

        tensor = next(six.itervalues(example_features))
        batch_size = tf.shape(tensor)[0]
        list_size = tf.shape(tensor)[1]
        if mask is None:
            mask = tf.ones(shape=[batch_size, list_size], dtype=tf.bool)
        nd_indices, nd_mask = utils.padded_nd_indices(is_valid=mask)

        # Expand context features to be of [batch_size, list_size, ...].
        batch_context_features = {}
        for name, tensor in six.iteritems(context_features):
            x = tf.expand_dims(input=tensor, axis=1)
            x = tf.gather(x, tf.zeros([list_size], tf.int32), axis=1)
            batch_context_features[name] = utils.reshape_first_ndims(
                x, 2, [batch_size, list_size])

        batch_example_features = {}
        for name, tensor in six.iteritems(example_features):
            # Replace invalid example features with valid ones.
            padded_tensor = tf.gather_nd(tensor, nd_indices)
            batch_example_features[name] = utils.reshape_first_ndims(
                padded_tensor, 2, [batch_size, list_size])

        sparse_inputs, dense_inputs = [], []
        for name in batch_context_features:
            if name in self._sparse_embed_layers:
                sparse_inputs.append(self._sparse_embed_layers[name](
                    batch_context_features[name]))
            else:
                dense_inputs.append(context_features[name])
        for name in batch_example_features:
            if name in self._sparse_embed_layers:
                sparse_inputs.append(self._sparse_embed_layers[name](
                    batch_example_features[name]))
            else:
                dense_inputs.append(batch_example_features[name])
        sparse_inputs = [tf.squeeze(inpt, axis=2) for inpt in sparse_inputs]
        inputs = tf.concat(sparse_inputs + dense_inputs, axis=-1)

        scores = self.score(inputs, nd_mask, training=training)
        scores = tf.reshape(scores, shape=[batch_size, list_size, -1])

        # Apply nd_mask to zero out invalid entries.
        # Expand dimension and use broadcasting for filtering.
        expanded_nd_mask = tf.expand_dims(nd_mask, axis=2)
        scores = tf.where(expanded_nd_mask, scores, tf.zeros_like(scores))
        # Remove last dimension of shape = 1.
        try:
            logits = tf.squeeze(scores, axis=2)
        except:
            raise ValueError(
                'Logits not of shape: [batch_size, list_size, 1]. '
                'This could occur if the `scorer` does not return '
                'a scalar output.')
        return logits
示例#18
0
文件: model.py 项目: zhengd07/ranking
    def _groupwise_dnn_v2(features, labels, mode, params, config):
        """Defines the dnn for groupwise scoring functions."""
        with ops.name_scope('transform'):
            context_features, per_example_features = _call_transform_fn(
                features, mode)

        def _score_fn(context_features, group_features, reuse):
            with variable_scope.variable_scope('group_score', reuse=reuse):
                return group_score_fn(context_features, group_features, mode,
                                      params, config)

        # Scatter/Gather per-example scores through groupwise comparison. Each
        # instance in a mini-batch will form a number of groups. Each groups of
        # examples are scored by 'score_fn' and socres for individual examples
        # accumulated over groups.
        with ops.name_scope('groupwise_dnn_v2'):
            with ops.name_scope('infer_sizes'):
                if labels is not None:
                    batch_size, list_size = array_ops.unstack(
                        array_ops.shape(labels))
                    is_valid = utils.is_label_valid(labels)
                else:
                    # Infer batch_size and list_size from a feature.
                    example_tensor_shape = array_ops.shape(
                        next(six.itervalues(per_example_features)))
                    batch_size = example_tensor_shape[0]
                    list_size = example_tensor_shape[1]
                    is_valid = utils.is_label_valid(
                        array_ops.ones([batch_size, list_size]))
            if batch_size is None or list_size is None:
                raise ValueError('Invalid batch_size=%s or list_size=%s' %
                                 (batch_size, list_size))

            # For each example feature, assume the shape is [batch_size, list_size,
            # feature_size], the groups are formed along the 2nd dim. Each group has a
            # 'group_size' number of indices in [0, list_size). Based on these
            # indices, we can gather the example feature into a sub-tensor for each
            # group. The total number of groups we have for a mini-batch is batch_size
            # * num_groups. Inside each group, we have a 'group_size' number of
            # examples.
            indices, mask = _form_group_indices_nd(
                is_valid,
                group_size,
                shuffle=(mode != model_fn.ModeKeys.PREDICT))
            num_groups = array_ops.shape(mask)[1]

            with ops.name_scope('group_features'):
                # For context features, We have shape [batch_size * num_groups, ...].
                large_batch_context_features = {}
                for name, value in six.iteritems(context_features):
                    # [batch_size, 1, ...].
                    value = array_ops.expand_dims(value, axis=1)
                    # [batch_size, num_groups, ...].
                    value = array_ops.gather(value,
                                             array_ops.zeros([num_groups],
                                                             dtypes.int32),
                                             axis=1)
                    # [batch_size * num_groups, ...]
                    large_batch_context_features[
                        name] = utils.reshape_first_ndims(
                            value, 2, [batch_size * num_groups])

                # For example feature, we have shape [batch_size * num_groups,
                # group_size, ...].
                large_batch_group_features = {}
                for name, value in six.iteritems(per_example_features):
                    # [batch_size, num_groups, group_size, ...].
                    value = array_ops.gather_nd(value, indices)
                    # [batch_size * num_groups, group_size, ...].
                    large_batch_group_features[
                        name] = utils.reshape_first_ndims(
                            value, 3, [batch_size * num_groups, group_size])

            # Do the inference and get scores for the large batch.
            # [batch_size * num_groups, group_size].
            scores = _score_fn(large_batch_context_features,
                               large_batch_group_features,
                               reuse=False)

            with ops.name_scope('accumulate_scores'):
                scores = array_ops.reshape(
                    scores, [batch_size, num_groups, group_size])
                # Reset invalid scores to 0 based on mask.
                scores = array_ops.where(
                    array_ops.gather(array_ops.expand_dims(mask, 2),
                                     array_ops.zeros([group_size],
                                                     dtypes.int32),
                                     axis=2), scores,
                    array_ops.zeros_like(scores))
                # [batch_size, num_groups, group_size].
                list_scores = array_ops.scatter_nd(indices, scores,
                                                   [batch_size, list_size])
                # Use average.
                list_scores /= math_ops.to_float(group_size)

        if mode == model_fn.ModeKeys.PREDICT:
            return list_scores
        else:
            features.update(context_features)
            features.update(per_example_features)
            return list_scores
示例#19
0
def encode_listwise_features(features,
                             context_feature_columns,
                             example_feature_columns,
                             input_size=None,
                             mode=tf_estimator.ModeKeys.TRAIN,
                             scope=None):
    """Returns dense tensors from features using feature columns.

  Args:
    features: (dict) mapping feature names (str) to feature values (`tf.Tensor`
      or `tf.SparseTensor`), possibly obtained from input_fn. For context
      features, the tensors are 2-D, while for example features the tensors are
      3-D.
    context_feature_columns: (dict) context feature names to columns.
    example_feature_columns: (dict) example feature names to columns.
    input_size: (int) [DEPRECATED: Use without this argument.] number of
      examples per query. If this is None, input_size is inferred as the size
      of second dimension of the Tensor corresponding to one of the example
      feature columns.
    mode: (`estimator.ModeKeys`) Specifies if this is training, evaluation or
      inference. See `ModeKeys`.
    scope: (str) variable scope for the per column input layers.

  Returns:
    context_features: (dict) A mapping from context feature names to dense
    2-D tensors of shape [batch_size, ...].
    example_features: (dict) A mapping from example feature names to dense
    3-D tensors of shape [batch_size, input_size, ...].

  Raises:
    ValueError: If `input size` is not equal to 2nd dimension of example
    tensors.
  """
    context_features = {}
    if context_feature_columns:
        context_cols_to_tensors = encode_features(
            features, context_feature_columns.values(), mode=mode, scope=scope)
        context_features = {
            name: context_cols_to_tensors[col]
            for name, col in six.iteritems(context_feature_columns)
        }

    # Compute example_features. Note that the keys in `example_feature_columns`
    # dict can be different from the keys in the `features` dict. We only need to
    # reshape the per-example tensors in `features`. To obtain the keys for
    # per-example features, we use the parsing feature specs.
    example_features = {}
    if example_feature_columns:
        if feature_column_lib.is_feature_column_v2(
                example_feature_columns.values()):
            example_specs = tf.compat.v2.feature_column.make_parse_example_spec(
                example_feature_columns.values())
        else:
            example_specs = tf.compat.v1.feature_column.make_parse_example_spec(
                example_feature_columns.values())
        example_name = next(six.iterkeys(example_specs))
        batch_size = tf.shape(input=features[example_name])[0]
        if input_size is None:
            input_size = tf.shape(input=features[example_name])[1]
        # Reshape [batch_size, input_size] to [batch * input_size] so that
        # features are encoded.
        reshaped_features = {}
        for name in example_specs:
            if name not in features:
                tf.compat.v1.logging.warn(
                    "Feature {} is not found.".format(name))
                continue
            try:
                reshaped_features[name] = utils.reshape_first_ndims(
                    features[name], 2, [batch_size * input_size])
            except:
                raise ValueError(
                    "2nd dimension of tensor must be equal to input size: {}, "
                    "but found feature {} with shape {}.".format(
                        input_size, name, features[name].get_shape()))

        example_cols_to_tensors = encode_features(
            reshaped_features,
            example_feature_columns.values(),
            mode=mode,
            scope=scope)
        example_features = {
            name: utils.reshape_first_ndims(example_cols_to_tensors[col], 1,
                                            [batch_size, input_size])
            for name, col in six.iteritems(example_feature_columns)
        }

    return context_features, example_features