def test_cascade_first_stage_applies_cutoff():
    X, _, qid = fixtures.train_data()
    cascade = factories.cascade(num_stages=1, cutoffs=[2])
    ranker = cascade.rankers[0]
    ranker.booster.update()
    offsets = group_offsets(qid)
    a, b = next(offsets)

    cascade.predict(X, qid)

    expected = (b - a) * [0.01948363]
    np.testing.assert_almost_equal(ranker.kappa[a:b], expected)
def test_cascade_score_mask_does_not_appear_in_first_stage():
    X, _, qid = fixtures.train_data()
    cascade = factories.cascade(num_stages=2, cutoffs=[4, 2])
    cascade.update()
    ranker_one = cascade.rankers[0]
    ranker_two = cascade.rankers[1]
    offsets = group_offsets(qid)
    a, b = next(offsets)

    cascade.predict(X, qid, is_train=True)

    assert Cascade.SCORE_MASK not in ranker_one.predict
def test_cascade_second_stage_applies_cutoff():
    X, _, qid = fixtures.train_data()
    cascade = factories.cascade(num_stages=2, cutoffs=[4, 2])
    cascade.update()
    ranker_two = cascade.rankers[1]
    offsets = group_offsets(qid)
    a, b = next(offsets)

    cascade.predict(X, qid)

    topk = sorted(ranker_two.predict[a:b], reverse=True)
    expected = (b - a) * [topk[1]]
    np.testing.assert_almost_equal(ranker_two.kappa[a:b], expected)
def test_cascade_second_stage_applies_mask():
    X, _, qid = fixtures.train_data()
    cascade = factories.cascade(num_stages=2, cutoffs=[4, 2])
    cascade.update()
    ranker_one = cascade.rankers[0]
    ranker_two = cascade.rankers[1]
    offsets = group_offsets(qid)
    a, b = next(offsets)

    cascade.predict(X, qid)

    expected = [1, 0, 1, 1, 1]
    np.testing.assert_almost_equal(ranker_one.mask[a:b], expected)
    expected = [0, 0, 1, 1, 0]
    np.testing.assert_almost_equal(ranker_two.mask[a:b], expected)
def test_cascade_uses_score_mask():
    """As per previous implementation, always use the SCORE_MASK during predict
    regardless of whether we are doing training or inference.
    """
    X, _, qid = fixtures.train_data()
    cascade = factories.cascade(num_stages=2, cutoffs=[4, 2])
    cascade.update()
    ranker_one = cascade.rankers[0]
    ranker_two = cascade.rankers[1]
    offsets = group_offsets(qid)
    a, b = next(offsets)

    for is_train in [True, False]:
        cascade.predict(X, qid, is_train=is_train)

        assert Cascade.SCORE_MASK in ranker_two.predict
def test_cascade_computed_kappa_when_training():
    qid = np.array([1, 1, 1, 1, 1])
    offsets = group_offsets(qid)
    a, b = next(offsets)
    cascade = factories.dummy_cascade()
    ranker = factories.ranker()
    ranker.cutoff = 2
    prev_mask = [1, 1, 0, 1, 1]
    scores = np.array([0.1, 1.0, -0.03, 0.5, 0.25])
    ranker.predict = np.copy(scores)
    # according to previous mask
    ranker.predict[2] = Cascade.SCORE_MASK

    scores = cascade.ranker_apply_cutoff(ranker,
                                         scores,
                                         prev_mask,
                                         qid,
                                         is_train=True)

    expected = [0.5] * 5
    np.testing.assert_almost_equal(ranker.kappa[a:b], expected)
    assert scores is not ranker.predict