示例#1
0
  def test_compute_logits(self, mode):
    group_size = 2
    params = {
        'num_shuffles_train': 2,
        'num_shuffles_eval': 2,
        'num_shuffles_predict': 2,
    }

    def _dummy_score_fn(context_features, group_features, mode, params, config):
      del [mode, params, config]
      # 'context': [batch_size * num_groups, 1]
      # 'example_f1': [batch_size * num_groups, group_size, 1]
      logits = tf.expand_dims(
          context_features['context'], axis=1) + group_features['example_f1']
      logits = tf.reshape(logits, [-1, group_size])
      # Add the shape of the logits to differentiate number of shuffles.
      return logits + tf.cast(tf.shape(logits)[0], tf.float32)

    with tf.Graph().as_default():
      tf.compat.v1.set_random_seed(1)
      with tf.compat.v1.Session() as sess:
        ranking_model = model._GroupwiseRankingModel(
            _dummy_score_fn,
            group_size=group_size,
            transform_fn=feature.make_identity_transform_fn(['context']),
        )

        # batch_size = 1, list_size = 3, is_valid = [True, True, False]
        features = {
            'context': [[1.]],
            'example_f1': [[[1.], [2.], [3.]]],
        }
        labels = [[1., 0, -1]]
        # No params.
        logits = sess.run(
            ranking_model.compute_logits(features, labels, mode, None, None))
        self.assertEqual(
            ranking_model._feature_gather_indices.get_shape().as_list(),
            [1, 3, 2, 2])
        self.assertAllEqual(logits, [[5., 6., 0.]])
        # Trigger params.
        logits = sess.run(
            ranking_model.compute_logits(features, labels, mode, params, None))
        self.assertEqual(
            ranking_model._feature_gather_indices.get_shape().as_list(),
            [1, 6, 2, 2])
        self.assertAllEqual(logits, [[8., 9., 0.]])

        # batch_size = 1, list_size = 3, is_valid = [True, True, True]
        features = {
            'context': [[1.]],
            'example_f1': [[[1.], [2.], [0.]]],
        }
        labels = [[1., 0, 1]]
        logits = sess.run(
            ranking_model.compute_logits(features, labels, mode, params, None))
        self.assertEqual(
            ranking_model._feature_gather_indices.get_shape().as_list(),
            [1, 6, 2, 2])
        self.assertAllEqual(logits, [[8., 9., 7.]])
示例#2
0
 def setUp(self):
   super(GroupwiseRankingEstimatorTest, self).setUp()
   ops.reset_default_graph()
   self._model_dir = test.get_temp_dir()
   gfile.MakeDirs(self._model_dir)
   model_fn = model.make_groupwise_ranking_fn(
       _group_score_fn,
       group_size=2,
       transform_fn=feature.make_identity_transform_fn(['context', 'weight']),
       ranking_head=head.create_ranking_head(
           loss_fn=losses.make_loss_fn(
               losses.RankingLossKey.PAIRWISE_HINGE_LOSS,
               weights_feature_name='weight'),
           optimizer=training.AdagradOptimizer(learning_rate=0.1)))
   self._estimator = estimator.Estimator(model_fn, self._model_dir)
示例#3
0
    def test_make_identity_transform_fn(self):
        features = {
            "context":  # Input size: (batch_size=2, num_features=2).
            ops.convert_to_tensor([[1.0, 1.0], [1.0, 1.0]]),
            "per_example": ops.convert_to_tensor([[[10.0]], [[10.0]]]),
        }
        with session.Session() as sess:
            transform_fn = feature_lib.make_identity_transform_fn(["context"])
            context_features, per_example_features = sess.run(
                transform_fn(features, 1))
            self.assertEqual(["context"], sorted(context_features))
            self.assertAllEqual([[1.0, 1.0], [1.0, 1.0]],
                                context_features["context"])

            self.assertEqual(["per_example"], sorted(per_example_features))
            self.assertAllEqual([[[10.0]], [[10.0]]],
                                per_example_features["per_example"])
示例#4
0
  def test_make_identity_transform_fn(self):
    with tf.Graph().as_default():
      features = {
          "context":  # Input size: (batch_size=2, num_features=2).
              tf.convert_to_tensor(value=[[1.0, 1.0], [1.0, 1.0]]),
          "per_example":
              tf.convert_to_tensor(value=[[[10.0]], [[10.0]]]),
      }
      with tf.compat.v1.Session() as sess:
        transform_fn = feature_lib.make_identity_transform_fn(["context"])
        context_features, per_example_features = sess.run(
            transform_fn(features, 1))
        self.assertCountEqual(["context"], context_features)
        self.assertAllEqual([[1.0, 1.0], [1.0, 1.0]],
                            context_features["context"])

        self.assertCountEqual(["per_example"], per_example_features)
        self.assertAllEqual([[[10.0]], [[10.0]]],
                            per_example_features["per_example"])
示例#5
0
文件: model.py 项目: we1559/ranking
  def __init__(self, transform_fn=None):
    """Constructor for the common components of all ranking models.

    Args:
      transform_fn: (function) A user-provided function that transforms raw
        features into dense Tensors with the following signature:
        * Args:
          `features`: A dict of Tensors or SparseTensors that contains the raw
            features from an input_fn.
          `mode`: Optional. See estimator `ModeKeys`.
        * Returns:
          `context_features`: A dict of `Tensor`s with shape [batch_size, ...]
          `example_features`: A dict of `Tensor`s with shape [batch_size,
            list_size, ...]
    """
    if transform_fn is None:
      self._transform_fn = feature.make_identity_transform_fn({})
    else:
      self._transform_fn = transform_fn
示例#6
0
文件: model.py 项目: zhengd07/ranking
def make_groupwise_ranking_fn(group_score_fn,
                              group_size,
                              ranking_head,
                              transform_fn=None):
    """Builds an `Estimator` model_fn for groupwise comparison ranking models.

  Args:
    group_score_fn: Scoring function for a group of examples with `group_size`
      that returns a score per example. It has to follow signature:
      * Args:
        `context_features`: A dict of `Tensor`s with shape [batch_size, ...].
        `per_example_features`: A dict of `Tensor`s with shape [batch_size,
          group_size, ...]
        `mode`: Optional. Specifies if this is training, evaluation or
          inference. See `ModeKeys`.
        `params`: Optional dict of hyperparameters, same value passed in the
          `Estimator` constructor.
        `config`: Optional configuration object, same value passed in the
          `Estimator` constructor.
      * Returns: Tensor of shape [batch_size, group_size] containing per-example
        scores.
    group_size: An integer denoting the number of examples in `group_score_fn`.
    ranking_head: A `head._RankingHead` object.
    transform_fn: Function transforming the raw features into dense tensors. It
      has the following signature:
      * Args:
        `features`: A dict of `Tensor`s contains the raw input.
        `mode`: Optional. See estimator `ModeKeys`.
      * Returns:
        `context_features`: A dict of `Tensor`s with shape [batch_size, ...]
        `per_example_features`: A dict of `Tensor`s with shape [batch_size,
          list_size, ...]

  Returns:
    An `Estimator` `model_fn` (see estimator.py) with the following signature:
    * Args:
      * `features`: dict of Tensors of shape [batch_size, list_size, ...] for
      per-example features and shape [batch_size, ...] for non-example context
      features.
      * `labels`: Tensor with shape [batch_size, list_size] denoting relevance.
      * `mode`: No difference.
      * `params`: No difference.
      * `config`: No difference..
    * Returns:
      `EstimatorSpec`
  Raises:
    ValueError: when group_size is invalid.
  """
    if group_size <= 0:
        raise ValueError('Invalid group_size %d' % group_size)
    if transform_fn is None:
        transform_fn = feature.make_identity_transform_fn({})

    def _call_transform_fn(features, mode):
        """Calling transform function."""
        transform_fn_args = function_utils.fn_args(transform_fn)
        if 'mode' in transform_fn_args:
            return transform_fn(features, mode=mode)
        else:
            return transform_fn(features)

    def _groupwise_dnn_v2(features, labels, mode, params, config):
        """Defines the dnn for groupwise scoring functions."""
        with ops.name_scope('transform'):
            context_features, per_example_features = _call_transform_fn(
                features, mode)

        def _score_fn(context_features, group_features, reuse):
            with variable_scope.variable_scope('group_score', reuse=reuse):
                return group_score_fn(context_features, group_features, mode,
                                      params, config)

        # Scatter/Gather per-example scores through groupwise comparison. Each
        # instance in a mini-batch will form a number of groups. Each groups of
        # examples are scored by 'score_fn' and socres for individual examples
        # accumulated over groups.
        with ops.name_scope('groupwise_dnn_v2'):
            with ops.name_scope('infer_sizes'):
                if labels is not None:
                    batch_size, list_size = array_ops.unstack(
                        array_ops.shape(labels))
                    is_valid = utils.is_label_valid(labels)
                else:
                    # Infer batch_size and list_size from a feature.
                    example_tensor_shape = array_ops.shape(
                        next(six.itervalues(per_example_features)))
                    batch_size = example_tensor_shape[0]
                    list_size = example_tensor_shape[1]
                    is_valid = utils.is_label_valid(
                        array_ops.ones([batch_size, list_size]))
            if batch_size is None or list_size is None:
                raise ValueError('Invalid batch_size=%s or list_size=%s' %
                                 (batch_size, list_size))

            # For each example feature, assume the shape is [batch_size, list_size,
            # feature_size], the groups are formed along the 2nd dim. Each group has a
            # 'group_size' number of indices in [0, list_size). Based on these
            # indices, we can gather the example feature into a sub-tensor for each
            # group. The total number of groups we have for a mini-batch is batch_size
            # * num_groups. Inside each group, we have a 'group_size' number of
            # examples.
            indices, mask = _form_group_indices_nd(
                is_valid,
                group_size,
                shuffle=(mode != model_fn.ModeKeys.PREDICT))
            num_groups = array_ops.shape(mask)[1]

            with ops.name_scope('group_features'):
                # For context features, We have shape [batch_size * num_groups, ...].
                large_batch_context_features = {}
                for name, value in six.iteritems(context_features):
                    # [batch_size, 1, ...].
                    value = array_ops.expand_dims(value, axis=1)
                    # [batch_size, num_groups, ...].
                    value = array_ops.gather(value,
                                             array_ops.zeros([num_groups],
                                                             dtypes.int32),
                                             axis=1)
                    # [batch_size * num_groups, ...]
                    large_batch_context_features[
                        name] = utils.reshape_first_ndims(
                            value, 2, [batch_size * num_groups])

                # For example feature, we have shape [batch_size * num_groups,
                # group_size, ...].
                large_batch_group_features = {}
                for name, value in six.iteritems(per_example_features):
                    # [batch_size, num_groups, group_size, ...].
                    value = array_ops.gather_nd(value, indices)
                    # [batch_size * num_groups, group_size, ...].
                    large_batch_group_features[
                        name] = utils.reshape_first_ndims(
                            value, 3, [batch_size * num_groups, group_size])

            # Do the inference and get scores for the large batch.
            # [batch_size * num_groups, group_size].
            scores = _score_fn(large_batch_context_features,
                               large_batch_group_features,
                               reuse=False)

            with ops.name_scope('accumulate_scores'):
                scores = array_ops.reshape(
                    scores, [batch_size, num_groups, group_size])
                # Reset invalid scores to 0 based on mask.
                scores = array_ops.where(
                    array_ops.gather(array_ops.expand_dims(mask, 2),
                                     array_ops.zeros([group_size],
                                                     dtypes.int32),
                                     axis=2), scores,
                    array_ops.zeros_like(scores))
                # [batch_size, num_groups, group_size].
                list_scores = array_ops.scatter_nd(indices, scores,
                                                   [batch_size, list_size])
                # Use average.
                list_scores /= math_ops.to_float(group_size)

        if mode == model_fn.ModeKeys.PREDICT:
            return list_scores
        else:
            features.update(context_features)
            features.update(per_example_features)
            return list_scores

    def _model_fn(features, labels, mode, params, config):
        """Defines an `Estimator` model_fn."""
        params = params or {}

        tf_logging.info('Use groupwise dnn v2.')
        logits = _groupwise_dnn_v2(features, labels, mode, params, config)

        return ranking_head.create_estimator_spec(features=features,
                                                  mode=mode,
                                                  logits=logits,
                                                  labels=labels)

    return _model_fn