def test_make_sigmoid_cross_entropy_loss_fn(self):
        scores = [[0.2, 0.5, 0.3], [0.2, 0.3, 0.5]]
        labels = [[0., 0., 1.], [0., 0., 1.]]
        weights = [[2.], [1.]]
        weights_feature_name = 'weights'
        features = {weights_feature_name: weights}
        with self.cached_session():
            loss_fn_simple = ranking_losses.make_loss_fn(
                ranking_losses.RankingLossKey.SIGMOID_CROSS_ENTROPY_LOSS)
            self.assertAlmostEqual(
                loss_fn_simple(labels, scores, features).eval(),
                (_sigmoid_cross_entropy(labels[0], scores[0]) +
                 _sigmoid_cross_entropy(labels[1], scores[1])) / 6.,
                places=5)

            loss_fn_weighted = ranking_losses.make_loss_fn(
                ranking_losses.RankingLossKey.SIGMOID_CROSS_ENTROPY_LOSS,
                weights_feature_name=weights_feature_name)
            self.assertAlmostEqual(
                loss_fn_weighted(labels, scores, features).eval(),
                (_sigmoid_cross_entropy(labels[0], scores[0]) * 2.0 +
                 _sigmoid_cross_entropy(labels[1], scores[1])) / 6.,
                places=5)

            # Test loss reduction method.
            # Two reduction methods should return different loss values.
            loss_fn_1 = ranking_losses.make_loss_fn(
                ranking_losses.RankingLossKey.SIGMOID_CROSS_ENTROPY_LOSS,
                reduction=core_losses.Reduction.SUM)
            loss_fn_2 = ranking_losses.make_loss_fn(
                ranking_losses.RankingLossKey.SIGMOID_CROSS_ENTROPY_LOSS,
                reduction=core_losses.Reduction.MEAN)
            self.assertNotAlmostEqual(
                loss_fn_1(labels, scores, features).eval(),
                loss_fn_2(labels, scores, features).eval())
Пример #2
0
  def test_make_softmax_loss_fn(self):
    scores = [[1., 3., 2.], [1., 2., 3.]]
    labels = [[0., 0., 1.], [0., 0., 2.]]
    weights = [[2.], [1.]]
    weights_feature_name = 'weights'
    features = {weights_feature_name: weights}
    with self.cached_session():
      loss_fn_simple = ranking_losses.make_loss_fn(
          ranking_losses.RankingLossKey.SOFTMAX_LOSS)
      self.assertAlmostEqual(
          loss_fn_simple(labels, scores, features).eval(),
          -(math.log(_softmax(scores[0])[2]) +
            math.log(_softmax(scores[1])[2]) * 2.) / 2.,
          places=5)

      loss_fn_weighted = ranking_losses.make_loss_fn(
          ranking_losses.RankingLossKey.SOFTMAX_LOSS,
          weights_feature_name=weights_feature_name)
      self.assertAlmostEqual(
          loss_fn_weighted(labels, scores, features).eval(),
          -(math.log(_softmax(scores[0])[2]) * 2. +
            math.log(_softmax(scores[1])[2]) * 2. * 1.) / 2.,
          places=5)

      # Test loss reduction method.
      # Two reduction methods should return different loss values.
      loss_fn_1 = ranking_losses.make_loss_fn(
          ranking_losses.RankingLossKey.SOFTMAX_LOSS,
          reduction=tf.compat.v1.losses.Reduction.SUM)
      loss_fn_2 = ranking_losses.make_loss_fn(
          ranking_losses.RankingLossKey.SOFTMAX_LOSS,
          reduction=tf.compat.v1.losses.Reduction.MEAN)
      self.assertNotAlmostEqual(
          loss_fn_1(labels, scores, features).eval(),
          loss_fn_2(labels, scores, features).eval())
Пример #3
0
  def test_make_mean_squared_loss_fn(self):
    scores = [[0.2, 0.5, 0.3], [0.2, 0.3, 0.5]]
    labels = [[0., 0., 1.], [0., 0., 1.]]
    weights = [[2.], [1.]]
    weights_feature_name = 'weights'
    features = {weights_feature_name: weights}
    with self.cached_session():
      loss_fn_simple = ranking_losses.make_loss_fn(
          ranking_losses.RankingLossKey.MEAN_SQUARED_LOSS)
      self.assertAlmostEqual(
          loss_fn_simple(labels, scores, features).eval(),
          (_mean_squared_error(labels[0], scores[0]) +
           _mean_squared_error(labels[1], scores[1])) / 6.,
          places=5)

      loss_fn_weighted = ranking_losses.make_loss_fn(
          ranking_losses.RankingLossKey.MEAN_SQUARED_LOSS,
          weights_feature_name=weights_feature_name)
      self.assertAlmostEqual(
          loss_fn_weighted(labels, scores, features).eval(),
          (_mean_squared_error(labels[0], scores[0]) * 2.0 +
           _mean_squared_error(labels[1], scores[1])) / 6.,
          places=5)

      # Test loss reduction method.
      # Two reduction methods should return different loss values.
      loss_fn_1 = ranking_losses.make_loss_fn(
          ranking_losses.RankingLossKey.MEAN_SQUARED_LOSS,
          reduction=tf.compat.v1.losses.Reduction.SUM)
      loss_fn_2 = ranking_losses.make_loss_fn(
          ranking_losses.RankingLossKey.MEAN_SQUARED_LOSS,
          reduction=tf.compat.v1.losses.Reduction.MEAN)
      self.assertNotAlmostEqual(
          loss_fn_1(labels, scores, features).eval(),
          loss_fn_2(labels, scores, features).eval())
Пример #4
0
  def test_make_list_mle_loss_fn(self):
    with tf.Graph().as_default():
      scores = [[0., ln(3), ln(2)], [0., ln(2), ln(3)]]
      labels = [[0., 2., 1.], [1., 0., 2.]]
      weights = [[2.], [1.]]
      weights_feature_name = 'weights'
      features = {weights_feature_name: weights}
      with self.cached_session():
        loss_fn_simple = ranking_losses.make_loss_fn(
            ranking_losses.RankingLossKey.LIST_MLE_LOSS)
        self.assertAlmostEqual(
            loss_fn_simple(labels, scores, features).eval(),
            -((ln(3. / (3 + 2 + 1)) + ln(2. / (2 + 1)) + ln(1. / 1)) +
              (ln(3. / (3 + 2 + 1)) + ln(1. / (1 + 2)) + ln(2. / 2))) / 2,
            places=5)
        loss_fn_weighted = ranking_losses.make_loss_fn(
            ranking_losses.RankingLossKey.LIST_MLE_LOSS,
            weights_feature_name=weights_feature_name)
        self.assertAlmostEqual(
            loss_fn_weighted(labels, scores, features).eval(),
            -(2 * (ln(3. / (3 + 2 + 1)) + ln(2. / (2 + 1)) + ln(1. / 1)) + 1 *
              (ln(3. / (3 + 2 + 1)) + ln(1. / (1 + 2)) + ln(2. / 2))) / 2,
            places=5)

        # Test loss reduction method.
        # Two reduction methods should return different loss values.
        loss_fn_1 = ranking_losses.make_loss_fn(
            ranking_losses.RankingLossKey.LIST_MLE_LOSS,
            reduction=tf.compat.v1.losses.Reduction.SUM)
        loss_fn_2 = ranking_losses.make_loss_fn(
            ranking_losses.RankingLossKey.LIST_MLE_LOSS,
            reduction=tf.compat.v1.losses.Reduction.MEAN)
        self.assertNotAlmostEqual(
            loss_fn_1(labels, scores, features).eval(),
            loss_fn_2(labels, scores, features).eval())
Пример #5
0
    def test_make_gumbel_approx_ndcg_fn(self):
        with tf.Graph().as_default():
            scores = [[1.4, -2.8, -0.4], [0., 1.8, 10.2], [1., 1.2, -3.2]]
            labels = [[0., 2., 1.], [1., 0., 3.], [1., 0., 0.]]
            weights = [[2.], [1.], [1.]]
            # sampled_scores = [[-.291, -1.643, -2.826],
            #                   [-.0866, -2.924, -3.530],
            #                   [-12.42, -9.492, -7.939e-5],
            #                   [-8.859, -6.830, -1.223e-3],
            #                   [-.8930, -.5266, -45.80183],
            #                   [-.6650, -.7220, -45.94149]]
            # sampled_rank = [[1, 2, 3],
            #                 [1, 2, 3],
            #                 [3, 2, 1],
            #                 [3, 2, 1],
            #                 [2, 1, 3],
            #                 [1, 2, 3]]
            # expanded_labels = [[0., 2., 1.], [0., 2., 1.],
            #                    [1., 0., 3.], [1., 0., 3.],
            #                    [1., 0., 0.], [1., 0., 0.]]
            # expanded_weights = [[2.], [2.], [1.], [1.], [1.], [1.]]

            weights_feature_name = 'weights'
            features = {weights_feature_name: weights}
            with self.cached_session():
                loss_fn_simple = ranking_losses.make_loss_fn(
                    ranking_losses.RankingLossKey.GUMBEL_APPROX_NDCG_LOSS,
                    reduction=tf.compat.v1.losses.Reduction.SUM,
                    params={'temperature': 0.001},
                    gumbel_params={
                        'sample_size': 2,
                        'seed': 1
                    })
                self.assertAlmostEqual(
                    loss_fn_simple(labels, scores, features).eval(),
                    -((2 / (3 / ln(2) + 1 / ln(3))) * (3 / ln(3) + 1 / ln(4)) +
                      (2 / (7 / ln(2) + 1 / ln(3))) * (7 / ln(2) + 1 / ln(4)) +
                      (1 / (1 / ln(2))) * (1 / ln(3)) + 1),
                    places=5)

                loss_fn_weighted = ranking_losses.make_loss_fn(
                    ranking_losses.RankingLossKey.GUMBEL_APPROX_NDCG_LOSS,
                    weights_feature_name=weights_feature_name,
                    reduction=tf.compat.v1.losses.Reduction.SUM,
                    params={'temperature': 0.001},
                    gumbel_params={
                        'sample_size': 2,
                        'seed': 1
                    })
                self.assertAlmostEqual(
                    loss_fn_weighted(labels, scores, features).eval(),
                    -(2 * (2 / (3 / ln(2) + 1 / ln(3))) *
                      (3 / ln(3) + 1 / ln(4)) + 1 * (2 /
                                                     (7 / ln(2) + 1 / ln(3))) *
                      (7 / ln(2) + 1 / ln(4)) + 1 * (1 / (1 / ln(2))) *
                      (1 / ln(3)) + 1 * 1),
                    places=5)
Пример #6
0
  def _model_fn(self):
    """Returns a model_fn."""

    def _train_op_fn(loss):
      """Defines train op used in ranking head."""
      update_ops = tf.compat.v1.get_collection(
          tf.compat.v1.GraphKeys.UPDATE_OPS)
      minimize_op = self._optimizer.minimize(
          loss=loss, global_step=tf.compat.v1.train.get_global_step())
      train_op = tf.group([update_ops, minimize_op])
      return train_op

    ranking_head = head.create_ranking_head(
        loss_fn=losses.make_loss_fn(
            self._hparams.get("loss"),
            weights_feature_name=self._hparams.get(_LOSS_WEIGHT),
            reduction=self._loss_reduction),
        eval_metric_fns=self._eval_metric_fns(),
        train_op_fn=_train_op_fn)

    return model.make_groupwise_ranking_fn(
        group_score_fn=self._group_score_fn,
        group_size=1,
        transform_fn=self._transform_fn,
        ranking_head=ranking_head)
Пример #7
0
  def test_make_loss_metric_fn(self):
    scores = [[1., 3., 2.], [1., 2., 3.]]
    labels = [[0., 0., 1.], [0., 0., 2.]]
    weights = [[2.], [1.]]
    weights_feature_name = 'weights'
    features = {
        weights_feature_name: weights,
    }
    m = ranking_losses.make_loss_metric_fn(
        ranking_losses.RankingLossKey.SOFTMAX_LOSS)
    m_w = ranking_losses.make_loss_metric_fn(
        ranking_losses.RankingLossKey.SOFTMAX_LOSS,
        weights_feature_name=weights_feature_name)
    self._check_metrics([
        (m(labels, scores,
           features), -(math.log(_softmax(scores[0])[2]) +
                        math.log(_softmax(scores[1])[2]) * 2.) / 3.),
        (m_w(labels, scores,
             features), -(math.log(_softmax(scores[0])[2]) * 2. +
                          math.log(_softmax(scores[1])[2]) * 2. * 1.) / 4.),
    ])

    # Value of loss metric is the same as loss with MEAN reduction.
    with self.cached_session():
      loss_fn_mean = ranking_losses.make_loss_fn(
          ranking_losses.RankingLossKey.SOFTMAX_LOSS,
          reduction=tf.compat.v1.losses.Reduction.MEAN)
      loss_mean = loss_fn_mean(labels, scores, features).eval()
    self._check_metrics([
        (m(labels, scores, features), loss_mean),
    ])
Пример #8
0
  def test_make_pairwise_mse_loss(self):
    with tf.Graph().as_default():
      scores = [[1., 3., 2.], [1., 2., 3.]]
      labels = [[0., 0., 1.], [0., 0., 2.]]
      weights = [[1.], [2.]]
      weights_feature_name = 'weights'
      features = {weights_feature_name: weights}
      with self.cached_session():
        loss_fn_simple = ranking_losses.make_loss_fn(
            ranking_losses.RankingLossKey.PAIRWISE_MSE_LOSS,
            reduction=tf.compat.v1.losses.Reduction.MEAN)
        expected = (((2. - 3.) - (1. - 0.))**2 + ((2. - 1.) - (1. - 0.))**2 +
                    ((3. - 1.) - (0. - 0.))**2 + ((3. - 2.) - (2. - 0.))**2 +
                    ((3. - 1.) - (2. - 0.))**2 + ((2. - 1.) -
                                                  (0. - 0.))**2) / 6.

        self.assertAlmostEqual(
            loss_fn_simple(labels, scores, features).eval(), expected, places=5)

        loss_fn_weighted = ranking_losses.make_loss_fn(
            ranking_losses.RankingLossKey.PAIRWISE_MSE_LOSS,
            reduction=tf.compat.v1.losses.Reduction.MEAN,
            weights_feature_name=weights_feature_name)
        expected = (((2. - 3.) - (1. - 0.))**2 + ((2. - 1.) - (1. - 0.))**2 +
                    ((3. - 1.) - (0. - 0.))**2 + 2 * ((3. - 2.) -
                                                      (2. - 0.))**2 + 2 *
                    ((3. - 1.) - (2. - 0.))**2 + 2 * ((2. - 1.) -
                                                      (0. - 0.))**2) / 9.
        self.assertAlmostEqual(
            loss_fn_weighted(labels, scores, features).eval(),
            expected,
            places=5)

        # Test loss reduction method.
        # Two reduction methods should return different loss values.
        loss_fn_1 = ranking_losses.make_loss_fn(
            ranking_losses.RankingLossKey.PAIRWISE_MSE_LOSS,
            reduction=tf.compat.v1.losses.Reduction.SUM)
        loss_fn_2 = ranking_losses.make_loss_fn(
            ranking_losses.RankingLossKey.PAIRWISE_MSE_LOSS,
            reduction=tf.compat.v1.losses.Reduction.MEAN)
        self.assertNotAlmostEqual(
            loss_fn_1(labels, scores, features).eval(),
            loss_fn_2(labels, scores, features).eval())
Пример #9
0
    def test_make_neural_sort_cross_entropy_loss_fn(self):
        with tf.Graph().as_default():
            scores = [[0.2, 0.5, 0.3], [0.2, 0.3, 0.5]]
            labels = [[0., 0., 1.], [0., 0., 1.]]
            weights = [[2.], [1.]]
            p_scores = _neural_sort(scores)
            p_labels = _neural_sort(labels)
            weights_feature_name = 'weights'
            features = {weights_feature_name: weights}
            with self.cached_session():
                loss_fn_simple = ranking_losses.make_loss_fn(
                    ranking_losses.RankingLossKey.
                    NEURAL_SORT_CROSS_ENTROPY_LOSS)
                self.assertAlmostEqual(
                    loss_fn_simple(labels, scores, features).eval(),
                    (_softmax_cross_entropy(p_labels[0], p_scores[0]) +
                     _softmax_cross_entropy(p_labels[1], p_scores[1])) / 6.,
                    places=5)

                loss_fn_weighted = ranking_losses.make_loss_fn(
                    ranking_losses.RankingLossKey.
                    NEURAL_SORT_CROSS_ENTROPY_LOSS,
                    weights_feature_name=weights_feature_name)
                self.assertAlmostEqual(
                    loss_fn_weighted(labels, scores, features).eval(),
                    (_softmax_cross_entropy(p_labels[0], p_scores[0]) * 2.0 +
                     _softmax_cross_entropy(p_labels[1], p_scores[1])) / 6.,
                    places=5)

                # Test loss reduction method.
                # Two reduction methods should return different loss values.
                loss_fn_1 = ranking_losses.make_loss_fn(
                    ranking_losses.RankingLossKey.
                    NEURAL_SORT_CROSS_ENTROPY_LOSS,
                    reduction=tf.compat.v1.losses.Reduction.SUM)
                loss_fn_2 = ranking_losses.make_loss_fn(
                    ranking_losses.RankingLossKey.
                    NEURAL_SORT_CROSS_ENTROPY_LOSS,
                    reduction=tf.compat.v1.losses.Reduction.MEAN)
                self.assertNotAlmostEqual(
                    loss_fn_1(labels, scores, features).eval(),
                    loss_fn_2(labels, scores, features).eval())
Пример #10
0
  def test_make_circle_loss(self):
    with tf.Graph().as_default():
      scores = [[0.1, 0.3, 0.2], [0.1, 0.2, 0.3]]
      labels = [[0., 0., 1.], [0., 1., 2.]]
      weights = [[2.], [1.]]
      weights_feature_name = 'weights'
      features = {weights_feature_name: weights}
      with self.cached_session():
        loss_fn_simple = ranking_losses.make_loss_fn(
            ranking_losses.RankingLossKey.CIRCLE_LOSS)
        loss_0, _, _ = _circle_loss(labels[0], scores[0])
        loss_1, _, _ = _circle_loss(labels[1], scores[1])
        expected = (math.log1p(loss_0) + math.log1p(loss_1)) / 2.
        self.assertAlmostEqual(
            loss_fn_simple(labels, scores, features).eval(),
            expected, places=5)

        loss_fn_weighted = ranking_losses.make_loss_fn(
            ranking_losses.RankingLossKey.CIRCLE_LOSS,
            weights_feature_name=weights_feature_name)
        loss_0, _, _ = _circle_loss(labels[0], scores[0])
        loss_1, _, _ = _circle_loss(labels[1], scores[1])
        expected = (math.log1p(loss_0) * 2. + math.log1p(loss_1) * 1.) / 2.
        self.assertAlmostEqual(
            loss_fn_weighted(labels, scores, features).eval(),
            expected, places=5)

        # Test loss reduction method.
        # Two reduction methods should return different loss values.
        loss_fn_1 = ranking_losses.make_loss_fn(
            ranking_losses.RankingLossKey.CIRCLE_LOSS,
            reduction=tf.compat.v1.losses.Reduction.SUM)
        loss_fn_2 = ranking_losses.make_loss_fn(
            ranking_losses.RankingLossKey.CIRCLE_LOSS,
            reduction=tf.compat.v1.losses.Reduction.MEAN)
        self.assertNotAlmostEqual(
            loss_fn_1(labels, scores, features).eval(),
            loss_fn_2(labels, scores, features).eval())
Пример #11
0
 def setUp(self):
   super(GroupwiseRankingEstimatorTest, self).setUp()
   ops.reset_default_graph()
   self._model_dir = test.get_temp_dir()
   gfile.MakeDirs(self._model_dir)
   model_fn = model.make_groupwise_ranking_fn(
       _group_score_fn,
       group_size=2,
       transform_fn=feature.make_identity_transform_fn(['context', 'weight']),
       ranking_head=head.create_ranking_head(
           loss_fn=losses.make_loss_fn(
               losses.RankingLossKey.PAIRWISE_HINGE_LOSS,
               weights_feature_name='weight'),
           optimizer=training.AdagradOptimizer(learning_rate=0.1)))
   self._estimator = estimator.Estimator(model_fn, self._model_dir)
Пример #12
0
  def test_make_neural_sort_ndcg_fn(self):
    with tf.Graph().as_default():
      scores = [[1.4, -2.8, -0.4], [0., 1.8, 10.2], [1., 1.2, -3.2]]
      labels = [[0., 2., 1.], [1., 0., 3.], [0., 0., 0.]]
      weights = [[2.], [1.], [1.]]
      weights_feature_name = 'weights'
      features = {weights_feature_name: weights}
      with self.cached_session():
        loss_fn_simple = ranking_losses.make_loss_fn(
            ranking_losses.RankingLossKey.NEURAL_SORT_NDCG_LOSS,
            params={'temperature': 0.1},
            reduction=tf.compat.v1.losses.Reduction.SUM)
        self.assertAlmostEqual(
            loss_fn_simple(labels, scores, features).eval(),
            -((1 / (3 / ln(2) + 1 / ln(3))) * (3 / ln(4) + 1 / ln(3)) +
              (1 / (7 / ln(2) + 1 / ln(3))) * (7 / ln(2) + 1 / ln(4))),
            places=5)

        loss_fn_weighted = ranking_losses.make_loss_fn(
            ranking_losses.RankingLossKey.NEURAL_SORT_NDCG_LOSS,
            params={'temperature': 0.1},
            weights_feature_name=weights_feature_name,
            reduction=tf.compat.v1.losses.Reduction.SUM)
        self.assertAlmostEqual(
            loss_fn_weighted(labels, scores, features).eval(),
            -(2 * (1 / (3 / ln(2) + 1 / ln(3))) * (3 / ln(4) + 1 / ln(3)) + 1 *
              (1 / (7 / ln(2) + 1 / ln(3))) * (7 / ln(2) + 1 / ln(4))),
            places=5)

        # Test different temperatures.
        loss_fn_1 = ranking_losses.make_loss_fn(
            ranking_losses.RankingLossKey.NEURAL_SORT_NDCG_LOSS,
            params={'temperature': 0.1})
        loss_fn_2 = ranking_losses.make_loss_fn(
            ranking_losses.RankingLossKey.NEURAL_SORT_NDCG_LOSS,
            params={'temperature': 100.})
        self.assertNotAlmostEqual(
            loss_fn_1(labels, scores, features).eval(),
            loss_fn_2(labels, scores, features).eval())

        # Test loss reduction method.
        # Two reduction methods should return different loss values.
        loss_fn_1 = ranking_losses.make_loss_fn(
            ranking_losses.RankingLossKey.NEURAL_SORT_NDCG_LOSS,
            reduction=tf.compat.v1.losses.Reduction.SUM)
        loss_fn_2 = ranking_losses.make_loss_fn(
            ranking_losses.RankingLossKey.NEURAL_SORT_NDCG_LOSS,
            reduction=tf.compat.v1.losses.Reduction.MEAN)
        self.assertNotAlmostEqual(
            loss_fn_1(labels, scores, features).eval(),
            loss_fn_2(labels, scores, features).eval())
    def test_make_approx_ndcg_fn(self):
        scores = [[1.4, -2.8, -0.4], [0., 1.8, 10.2], [1., 1.2, -3.2]]
        labels = [[0., 2., 1.], [1., 0., 3.], [0., 0., 0.]]
        weights = [[2.], [1.], [1.]]
        weights_feature_name = 'weights'
        features = {weights_feature_name: weights}
        with self.cached_session():
            loss_fn_simple = ranking_losses.make_loss_fn(
                ranking_losses.RankingLossKey.APPROX_NDCG_LOSS,
                reduction=core_losses.Reduction.SUM)
            self.assertAlmostEqual(
                loss_fn_simple(labels, scores, features).eval(),
                -((1 / (3 / ln(2) + 1 / ln(3))) * (3 / ln(4) + 1 / ln(3)) +
                  (1 / (7 / ln(2) + 1 / ln(3))) * (7 / ln(2) + 1 / ln(4))),
                places=5)

            loss_fn_weighted = ranking_losses.make_loss_fn(
                ranking_losses.RankingLossKey.APPROX_NDCG_LOSS,
                weights_feature_name=weights_feature_name,
                reduction=core_losses.Reduction.SUM)
            self.assertAlmostEqual(
                loss_fn_weighted(labels, scores, features).eval(),
                -(2 * (1 /
                       (3 / ln(2) + 1 / ln(3))) * (3 / ln(4) + 1 / ln(3)) + 1 *
                  (1 / (7 / ln(2) + 1 / ln(3))) * (7 / ln(2) + 1 / ln(4))),
                places=5)

            # Test different alphas.
            loss_fn_1 = ranking_losses.make_loss_fn(
                ranking_losses.RankingLossKey.APPROX_NDCG_LOSS,
                extra_args={'alpha': 0.1})
            loss_fn_2 = ranking_losses.make_loss_fn(
                ranking_losses.RankingLossKey.APPROX_NDCG_LOSS,
                extra_args={'alpha': 100.})
            self.assertNotAlmostEqual(
                loss_fn_1(labels, scores, features).eval(),
                loss_fn_2(labels, scores, features).eval())

            # Test loss reduction method.
            # Two reduction methods should return different loss values.
            loss_fn_1 = ranking_losses.make_loss_fn(
                ranking_losses.RankingLossKey.APPROX_NDCG_LOSS,
                reduction=core_losses.Reduction.SUM)
            loss_fn_2 = ranking_losses.make_loss_fn(
                ranking_losses.RankingLossKey.APPROX_NDCG_LOSS,
                reduction=core_losses.Reduction.MEAN)
            self.assertNotAlmostEqual(
                loss_fn_1(labels, scores, features).eval(),
                loss_fn_2(labels, scores, features).eval())
Пример #14
0
    def test_make_approx_mrr_fn(self):
        with tf.Graph().as_default():
            scores = [[1.4, -2.8, -0.4], [0., 1.8, 10.2], [1., 1.2, -3.2]]
            labels = [[0., 0., 1.], [1., 0., 1.], [0., 0., 0.]]
            weights = [[2.], [1.], [1.]]
            weights_feature_name = 'weights'
            features = {weights_feature_name: weights}
            with self.cached_session():
                loss_fn_simple = ranking_losses.make_loss_fn(
                    ranking_losses.RankingLossKey.APPROX_MRR_LOSS,
                    reduction=tf.compat.v1.losses.Reduction.SUM)
                self.assertAlmostEqual(
                    loss_fn_simple(labels, scores, features).eval(),
                    -((1 / 2.) + 1 / 2. * (1 / 3. + 1 / 1.)),
                    places=5)

                loss_fn_weighted = ranking_losses.make_loss_fn(
                    ranking_losses.RankingLossKey.APPROX_MRR_LOSS,
                    weights_feature_name=weights_feature_name,
                    reduction=tf.compat.v1.losses.Reduction.SUM)
                self.assertAlmostEqual(
                    loss_fn_weighted(labels, scores, features).eval(),
                    -(2 * 1 / 2. + 1 * 1 / 2. * (1 / 3. + 1 / 1.)),
                    places=5)

                # Test different temperatures.
                loss_fn_1 = ranking_losses.make_loss_fn(
                    ranking_losses.RankingLossKey.APPROX_MRR_LOSS,
                    params={'temperature': 10})
                loss_fn_2 = ranking_losses.make_loss_fn(
                    ranking_losses.RankingLossKey.APPROX_MRR_LOSS,
                    params={'temperature': 0.01})
                self.assertNotAlmostEqual(
                    loss_fn_1(labels, scores, features).eval(),
                    loss_fn_2(labels, scores, features).eval())

                # Test loss reduction method.
                # Two reduction methods should return different loss values.
                loss_fn_1 = ranking_losses.make_loss_fn(
                    ranking_losses.RankingLossKey.APPROX_MRR_LOSS,
                    reduction=tf.compat.v1.losses.Reduction.SUM)
                loss_fn_2 = ranking_losses.make_loss_fn(
                    ranking_losses.RankingLossKey.APPROX_MRR_LOSS,
                    reduction=tf.compat.v1.losses.Reduction.MEAN)
                self.assertNotAlmostEqual(
                    loss_fn_1(labels, scores, features).eval(),
                    loss_fn_2(labels, scores, features).eval())
Пример #15
0
  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
    """The `model_fn` for TPUEstimator."""
    tf_logging.info("*** Features ***")
    for name in sorted(features.keys()):
      tf_logging.info("  name = %s, shape = %s" % (name, features[name].shape))

    input_ids = features["input_ids"]
    input_mask = features["input_mask"]
    segment_ids = features["segment_ids"]
    if mode == tf.estimator.ModeKeys.PREDICT:
        label_ids = tf.ones([input_ids.shape[0]], dtype=tf.int32)
    else:
        label_ids = features["label_ids"]
        label_ids = tf.reshape(label_ids, [-1])

    if "is_real_example" in features:
        is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32)
    else:
        is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)

    is_training = (mode == tf.estimator.ModeKeys.TRAIN)

    if "feed_features" in special_flags:
        model = model_class(
            config=bert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=train_config.use_one_hot_embeddings,
            features=features,
        )
    else:
        model = model_class(
            config=bert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=train_config.use_one_hot_embeddings,
        )
    if "new_pooling" in special_flags:
        pooled = mimic_pooling(model.get_sequence_output(), bert_config.hidden_size, bert_config.initializer_range)
    else:
        pooled = model.get_pooled_output()

    if train_config.checkpoint_type != "bert_nli" and train_config.use_old_logits:
        tf_logging.info("Use old version of logistic regression")
        logits = tf.keras.layers.Dense(train_config.num_classes, name="cls_dense")(pooled)
    else:
        tf_logging.info("Use fixed version of logistic regression")
        output_weights = tf.compat.v1.get_variable(
            "output_weights", [3, bert_config.hidden_size],
            initializer=tf.compat.v1.truncated_normal_initializer(stddev=0.02)
        )

        output_bias = tf.compat.v1.get_variable(
            "output_bias", [3],
            initializer=tf.compat.v1.zeros_initializer()
        )

        if is_training:
            pooled = dropout(pooled, 0.1)

        logits = tf.matmul(pooled, output_weights, transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias)

    # TODO given topic_ids, reorder logits to [num_group, num_max_items]
    #
    ndcg_rankin_loss = make_loss_fn(RankingLossKey.APPROX_NDCG_LOSS)
    one_hot_labels = tf.one_hot(label_ids, 3)
    # logits ; [batch_size, num_classes]
    logits_t = tf.transpose(logits, [1,0])
    one_hot_labels_t = tf.transpose(one_hot_labels, [1,0])
    fake_logit = logits_t * 1e-5 + one_hot_labels_t
    loss_arr = ndcg_rankin_loss(fake_logit, one_hot_labels_t, {})
    loss = loss_arr
    tvars = tf.compat.v1.trainable_variables()

    initialized_variable_names = {}

    if train_config.checkpoint_type == "bert":
        assignment_fn = tlm.training.assignment_map.get_bert_assignment_map
    elif train_config.checkpoint_type == "v2":
        assignment_fn = tlm.training.assignment_map.assignment_map_v2_to_v2
    elif train_config.checkpoint_type == "bert_nli":
        assignment_fn = tlm.training.assignment_map.get_bert_nli_assignment_map
    elif train_config.checkpoint_type == "attention_bert":
        assignment_fn = tlm.training.assignment_map.bert_assignment_only_attention
    elif train_config.checkpoint_type == "attention_bert_v2":
        assignment_fn = tlm.training.assignment_map.assignment_map_v2_to_v2_only_attention
    elif train_config.checkpoint_type == "wo_attention_bert":
        assignment_fn = tlm.training.assignment_map.bert_assignment_wo_attention
    elif train_config.checkpoint_type == "as_is":
        assignment_fn = tlm.training.assignment_map.get_assignment_map_as_is
    else:
        if not train_config.init_checkpoint:
            pass
        else:
            raise Exception("init_checkpoint exists, but checkpoint_type is not specified")

    scaffold_fn = None
    if train_config.init_checkpoint:
      assignment_map, initialized_variable_names = assignment_fn(tvars, train_config.init_checkpoint)

      def init_fn():
        tf.compat.v1.train.init_from_checkpoint(train_config.init_checkpoint, assignment_map)
      scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu)
    log_var_assignments(tvars, initialized_variable_names)

    TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec
    output_spec = None
    if mode == tf.estimator.ModeKeys.TRAIN:
        if "simple_optimizer" in special_flags:
            tf_logging.info("using simple optimizer")
            train_op = create_simple_optimizer(loss, train_config.learning_rate, train_config.use_tpu)
        else:
            train_op = optimization.create_optimizer_from_config(loss, train_config)
        output_spec = TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op, scaffold_fn=scaffold_fn)

    elif mode == tf.estimator.ModeKeys.EVAL:
        eval_metrics = (classification_metric_fn, [
            logits, label_ids, is_real_example
        ])
        output_spec = TPUEstimatorSpec(mode=model, loss=loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn)
    else:
        predictions = {
                "input_ids": input_ids,
                "logits": logits
        }
        output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                predictions=predictions,
                scaffold_fn=scaffold_fn)


    return output_spec
Пример #16
0
  def test_make_loss_fn(self):
    scores = [[0.2, 0.5, 0.3], [0.2, 0.3, 0.5]]
    labels = [[0., 0., 1.], [0., 0., 1.]]
    weights = [[2.], [1.]]
    weights_1d = [2., 1.]
    weights_3d = [[[2.], [1.], [0.]], [[0.], [1.], [2.]]]
    weights_feature_name = 'weights'
    weights_1d_feature_name = 'weights_1d'
    weights_3d_feature_name = 'weights_3d'
    features = {
        weights_feature_name: weights,
        weights_1d_feature_name: weights_1d,
        weights_3d_feature_name: weights_3d
    }
    with self.cached_session():
      pairwise_hinge_loss = ranking_losses._pairwise_hinge_loss(labels,
                                                                scores).eval()
      pairwise_hinge_loss_weighted = ranking_losses._pairwise_hinge_loss(
          labels, scores, weights=weights).eval()
      pairwise_hinge_loss_itemwise_weighted = (
          ranking_losses._pairwise_hinge_loss(
              labels, scores, weights=tf.squeeze(weights_3d)).eval())
      mean_squared_loss = ranking_losses._mean_squared_loss(labels,
                                                            scores).eval()
      mean_squared_loss_weighted = ranking_losses._mean_squared_loss(
          labels, scores, weights=weights).eval()
      mean_squared_loss_itemwise_weighted = ranking_losses._mean_squared_loss(
          labels, scores, weights=tf.squeeze(weights_3d)).eval()

      loss_keys = [
          ranking_losses.RankingLossKey.PAIRWISE_HINGE_LOSS,
          ranking_losses.RankingLossKey.MEAN_SQUARED_LOSS
      ]
      loss_fn_simple = ranking_losses.make_loss_fn(loss_keys)
      self.assertAlmostEqual(
          loss_fn_simple(labels, scores, features).eval(),
          pairwise_hinge_loss + mean_squared_loss,
          places=5)

      # With 2-d list-wise weighted examples.
      loss_fn_weighted_example = ranking_losses.make_loss_fn(
          loss_keys, weights_feature_name=weights_feature_name)
      self.assertAlmostEqual(
          loss_fn_weighted_example(labels, scores, features).eval(),
          pairwise_hinge_loss_weighted + mean_squared_loss_weighted,
          places=5)

      # With 1-d list-wise weighted examples.
      loss_fn_weighted_example = ranking_losses.make_loss_fn(
          loss_keys, weights_feature_name=weights_1d_feature_name)
      self.assertAlmostEqual(
          loss_fn_weighted_example(labels, scores, features).eval(),
          pairwise_hinge_loss_weighted + mean_squared_loss_weighted,
          places=5)

      # With 3-d item-wise weighted examples.
      loss_fn_weighted_example = ranking_losses.make_loss_fn(
          loss_keys, weights_feature_name=weights_3d_feature_name)
      self.assertAlmostEqual(
          loss_fn_weighted_example(labels, scores, features).eval(),
          pairwise_hinge_loss_itemwise_weighted +
          mean_squared_loss_itemwise_weighted,
          places=5)

      # With both weighted loss and weighted examples.
      loss_weights = [3., 2.]
      weighted_loss_fn_weighted_example = ranking_losses.make_loss_fn(
          loss_keys, loss_weights, weights_feature_name=weights_feature_name)
      self.assertAlmostEqual(
          weighted_loss_fn_weighted_example(labels, scores, features).eval(),
          pairwise_hinge_loss_weighted * loss_weights[0] +
          mean_squared_loss_weighted * loss_weights[1],
          places=5)

      # Test loss reduction method.
      # Two reduction methods should return different loss values.
      loss_fn_1 = ranking_losses.make_loss_fn(
          loss_keys, reduction=tf.compat.v1.losses.Reduction.SUM)
      loss_fn_2 = ranking_losses.make_loss_fn(
          loss_keys, reduction=tf.compat.v1.losses.Reduction.MEAN)
      self.assertNotAlmostEqual(
          loss_fn_1(labels, scores, features).eval(),
          loss_fn_2(labels, scores, features).eval())

      # Test invalid inputs.
      with self.assertRaisesRegexp(ValueError,
                                   r'loss_keys cannot be None or empty.'):
        ranking_losses.make_loss_fn([])

      with self.assertRaisesRegexp(ValueError,
                                   r'loss_keys cannot be None or empty.'):
        ranking_losses.make_loss_fn('')

      with self.assertRaisesRegexp(
          ValueError, r'loss_keys and loss_weights must have the same size.'):
        ranking_losses.make_loss_fn(loss_keys, [2.0])

      invalid_loss_fn = ranking_losses.make_loss_fn(['invalid_key'])
      with self.assertRaisesRegexp(ValueError,
                                   r'Invalid loss_key: invalid_key.'):
        invalid_loss_fn(labels, scores, features).eval()
Пример #17
0
  def _check_make_pairwise_loss(self, loss_key):
    """Helper function to test `make_loss_fn`."""
    scores = [[1., 3., 2.], [1., 2., 3.]]
    labels = [[0., 0., 1.], [0., 0., 2.]]
    listwise_weights = [[2.], [1.]]
    listwise_weights_expanded = [[2.] * 3, [1.] * 3]
    itemwise_weights = [[2., 3., 4.], [1., 1., 1.]]
    default_weights = [1.] * 3
    weights_feature_name = 'weights'
    list_size = 3.
    features = {}

    loss_fn = ranking_losses.make_loss_fn(loss_key)
    with self.cached_session():
      # Individual lists.
      self.assertAlmostEqual(
          loss_fn([labels[0]], [scores[0]], features).eval(),
          _batch_aggregation(
              [_pairwise_loss(labels[0], scores[0], default_weights,
                              loss_key)]),
          places=5)
      self.assertAlmostEqual(
          loss_fn([labels[1]], [scores[1]], features).eval(),
          _batch_aggregation(
              [_pairwise_loss(labels[1], scores[1], default_weights,
                              loss_key)]),
          places=5)

      # Itemwise weights.
      loss_fn = ranking_losses.make_loss_fn(
          loss_key, weights_feature_name=weights_feature_name)
      features[weights_feature_name] = [itemwise_weights[0]]
      self.assertAlmostEqual(
          loss_fn([labels[0]], [scores[0]], features).eval(),
          _batch_aggregation([
              _pairwise_loss(labels[0], scores[0], itemwise_weights[0],
                             loss_key)
          ]),
          places=5)

      features[weights_feature_name] = [itemwise_weights[1]]
      self.assertAlmostEqual(
          loss_fn([labels[1]], [scores[1]], features).eval(),
          _batch_aggregation([
              _pairwise_loss(labels[1], scores[1], itemwise_weights[1],
                             loss_key)
          ]),
          places=5)

      # Multiple lists.
      features[weights_feature_name] = listwise_weights
      self.assertAlmostEqual(
          loss_fn(labels, scores, features).eval(),
          _batch_aggregation([
              _pairwise_loss(labels[0], scores[0], listwise_weights_expanded[0],
                             loss_key),
              _pairwise_loss(labels[1], scores[1], listwise_weights_expanded[1],
                             loss_key)
          ]),
          places=5)

      # Test LambdaWeight.
      lambda_weight = ranking_losses.DCGLambdaWeight(
          rank_discount_fn=lambda r: 1. / tf.math.log1p(r), smooth_fraction=1.)
      loss_fn = ranking_losses.make_loss_fn(
          loss_key,
          weights_feature_name=weights_feature_name,
          lambda_weight=lambda_weight)
      self.assertAlmostEqual(
          loss_fn(labels, scores, features).eval(),
          _batch_aggregation([
              _pairwise_loss(
                  labels[0],
                  scores[0],
                  listwise_weights_expanded[0],
                  loss_key,
                  rank_discount_form='LOG'),
              _pairwise_loss(
                  labels[1],
                  scores[1],
                  listwise_weights_expanded[1],
                  loss_key,
                  rank_discount_form='LOG')
          ]) * list_size,
          places=5)

      # Test loss reduction method.
      # Two reduction methods should return different loss values.
      loss_fn_1 = ranking_losses.make_loss_fn(
          loss_key, reduction=tf.compat.v1.losses.Reduction.SUM)
      loss_fn_2 = ranking_losses.make_loss_fn(
          loss_key, reduction=tf.compat.v1.losses.Reduction.MEAN)
      self.assertNotAlmostEqual(
          loss_fn_1(labels, scores, features).eval(),
          loss_fn_2(labels, scores, features).eval())