Exemplo n.º 1
0
def test_err_perfect_first_trumps_many_good():
    """Tests that a perfect document at rank 1 trumps later rankings.

    The authors of [1] list this as a motivating example. A ranking that
    puts a "perfect" document at rank 1 (i.e. one that is almost certain
    to satisfy the user's needs) should trump one that puts a "good" one
    at rank 1, regardless of the documents at later ranks. The reasoning
    is that later ranks won't need to be examined when the first is
    already sufficient.

    References
    ----------
        [1] Chapelle, Olivier, et al. "Expected reciprocal rank for graded
        relevance." Proceedings of the 18th ACM conference on Information and
        knowledge management. ACM, 2009. http://olivier.chapelle.cc/pub/err.pdf
    """
    y_true = ranking_ordering_conversion([range(20)])

    # gets the "perfect" one right, everything else wrong
    perfect_first = ranking_ordering_conversion([
        [0, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1]
        ])

    # does pretty good for most, but ranks the "perfect" one wrong
    all_good = ranking_ordering_conversion([
        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 0]
    ])

    assert K.eval(err(y_true, perfect_first)) > K.eval(err(y_true, all_good))
Exemplo n.º 2
0
def test_err_against_manually_verified_example():
    """Compares the implementation against a manual calculation."""
    y_true = ranking_ordering_conversion([[1, 2, 0]])
    y_pred = ranking_ordering_conversion([[2, 1, 0]])
    # The resulting probabilities that each document satisfies the
    # user's need:
    # [2**1-1, 2**2-1, 2**0 - 1] / 2**2 = [1/4, 3/4, 0]
    # Multiplied by the respective rank utilities (1/(r+1)):
    # [(1/4)/3, (3/4)/2, 0/1] = [1/12, 3/8, 0]
    # The resulting ERR:

    # We ranked object 2 first, which has a true rank of 1 and therefore
    # (with the relevance gain probability mapping) a probability of
    # (2**(2-1)-1) / 2**2 = 1/4
    # of matching the user's need. It is at rank 0, which has utility
    # 1/(0+1) = 1.

    # Object 1 is next. True rank of 0, probability
    # (2**(2-0)-1) / 2**2 = 3/4
    # and utility
    # 1/(1+1) = 1/2.

    # Object 0 last. True rank of 2, probability
    # (2**(2-2)-1) / 2**2 = 0
    # and utility
    # 1/(2+1) = 1/3.

    # The resulting expected utility:
    # 1/4 * 1 + (1 - 1/4) * 3/4 * 1/2 + (1 - 1/4) * (1 - 3/4) * 0 * 1/3
    # = 17/32
    # Approx because comparing floats is inherently error-prone.
    assert K.eval(err(y_true, y_pred)) == approx(17/32)
Exemplo n.º 3
0
def test_err_implementations_equivalent():
    """Spot-checks equivalence of plain python and tf implementations"""
    # A simple grading where each grade occurs once. We want to check
    # for equivalence at every permutation of this grading.
    elems = np.array([4, 3, 2, 1, 0])
    y_true = np.reshape(elems, (1, -1))
    # Spot check some permutations (5! / 20 = 6 checks are performed)
    for perm in list(itertools.permutations(elems))[::20]:
        perm = np.reshape(perm, (1, -1))
        assert K.eval(err(y_true, perm)) == approx(err_np(y_true, perm))