Пример #1
0
 def testBalancedInterleave(self):
     bi = BalancedInterleave()
     r1 = DeterministicRankingFunction(None, self.weights_1)
     r2 = DeterministicRankingFunction(None, self.weights_2)
     (interleaved_list, assignments) = bi.interleave(r1, r2, self.query, 10)
     self.assertIn(interleaved_list.tolist(),
                   [[0, 1, 2, 3], [1, 0, 2, 3], [0, 1, 3, 2], [1, 0, 3, 2]])
     self.assertEqual(assignments[0].tolist(), [1, 3, 2, 0])
     self.assertEqual(assignments[1].tolist(), [0, 1, 3, 2])
     o = bi.infer_outcome([1, 0, 3, 2], [[1, 3, 2, 0], [0, 1, 3, 2]],
                          [1, 0, 0, 0], self.query)
     self.assertEqual(o, -1, "l1 should win (1), o = %g" % o)
     o = bi.infer_outcome([1, 0, 3, 2], [[1, 3, 2, 0], [0, 1, 3, 2]],
                          [1, 0, 1, 0], self.query)
     self.assertEqual(o, -1, "l1 should win (2), o = %g" % o)
     o = bi.infer_outcome([1, 0, 3, 2], [[1, 2, 3, 0], [0, 1, 3, 2]],
                          [1, 0, 1, 0], self.query)
     self.assertEqual(o, 0, "The rankers should tie (1), o = %g" % o)
     o = bi.infer_outcome([0, 1, 2, 3], [[0, 1, 2, 3], [1, 2, 3, 0]],
                          [0, 1, 0, 1], self.query)
     self.assertEqual(o, 1, "l1 should win, o = %g" % o)
     o = bi.infer_outcome([1, 0, 2, 3], [[0, 1, 2, 3], [1, 2, 3, 0]],
                          [0, 1, 0, 1], self.query)
     self.assertEqual(o, 0, "The rankers should tie (2), o = %g" % o)
     o = bi.infer_outcome([0, 2, 1, 3], [[3, 0, 1, 2], [1, 3, 2, 0]],
                          [1, 0, 1, 0], self.query)
     self.assertEqual(o, -1, "l1 should win (3), o = %g" % o)
     o = bi.infer_outcome([0, 2, 1, 3], [[3, 0, 1, 2], [4, 3, 2, 0]],
                          [1, 0, 1, 0], self.query)
     self.assertEqual(o, -1, "l1 should win (4), o = %g" % o)
Пример #2
0
 def testHistBalancedInterleave(self):
     hbi = HistBalancedInterleave()
     r1 = DeterministicRankingFunction(None, self.weights_1)
     r1_test = DeterministicRankingFunction(None, self.weights_1)
     r2 = DeterministicRankingFunction(None, self.weights_2)
     self.assertEqual(
         hbi._get_assignment(r1, r1_test, self.query, 4)[0].tolist(),
         [1, 3, 2, 0])
     self.assertEqual(
         hbi._get_assignment(r1, r1_test, self.query, 4)[1].tolist(),
         [1, 3, 2, 0])  # r1
     self.assertEqual(
         hbi._get_assignment(r1, r2, self.query, 4)[1].tolist(),
         [0, 1, 3, 2])  # r2
     o = hbi.infer_outcome([1, 0, 3, 2], ["src a is ignored"], [1, 0, 0, 0],
                           r1, r2, self.query)
     self.assertEqual(o, -1, "Same as original, l1 should win, o = %g" % o)
     o = hbi.infer_outcome([1, 0, 3, 2], ["src a is ignored"], [1, 0, 0, 0],
                           r2, r1, self.query)
     self.assertEqual(
         o, 1, "Different from original, l2 should win, "
         "o = %g" % o)
     o = hbi.infer_outcome([1, 0, 3, 2], ["src a is ignored"], [1, 0, 0, 0],
                           r1_test, r1, self.query)
     self.assertEqual(o, 0, "Same ranking - tie (1), o = %g" % o)
     o = hbi.infer_outcome([2, 0, 3, 1], ["src a is ignored"], [1, 1, 0, 0],
                           r1, r2, self.query)
     self.assertEqual(o, 0, "Same ranking - tie (2), o = %g" % o)
     o = hbi.infer_outcome([2, 0, 3, 4], ["src a is ignored"], [1, 1, 0, 0],
                           r1, r2, self.query)
     self.assertEqual(o, 0, "Same ranking - tie (3), o = %g" % o)
Пример #3
0
 def testProbabilisticInterleaveWithDeterministicRankers(self):
     pi = ProbabilisticInterleave(None)
     # test a few possible interleavings
     r1 = DeterministicRankingFunction(None, self.weights_1)
     r2 = DeterministicRankingFunction(None, self.weights_2)
     test_lists = {"0,1,3,2": 0, "1,0,3,2": 0, "1,3,0,2": 0, "1,3,2,0": 0}
     trials = 0
     MAX_TRIALS = 10000
     while trials < MAX_TRIALS and 0 in test_lists.values():
         trials += 1
         (l, a) = pi.interleave(r1, r2, self.query, 10)
         list_str = ",".join(str(a) for a in l.tolist())
         self.assertIn(list_str, test_lists.keys())
         test_lists[list_str] += 1
     for list_str, count in test_lists.items():
         self.assertNotEqual(0, count,
                             "Interleave failed for: %s" % list_str)
     # test interleaving outcomes
     context = (None, r1, r2)
     self.assertEqual(
         pi.infer_outcome([0, 1, 2, 3], context, [0, 0, 0, 0], self.query),
         0, "No clicks, outcome should be 0.")
     self.assertEqual(
         pi.infer_outcome([0, 1, 2, 3], context, [1, 0, 0, 0], self.query),
         0, "No possible assignment, outcome should be 0.")
     o = pi.infer_outcome([1, 0, 3, 2], context, [1, 0, 0, 0], self.query)
     self.assertAlmostEquals(o, -0.0625, 4,
                             "Ranker 1 should win (o = %.4f)." % o)
     o = pi.infer_outcome([0, 1, 3, 2], context, [1, 0, 0, 0], self.query)
     self.assertAlmostEquals(o, 0.0625, 4,
                             "Ranker 2 should win (o = %.4f)." % o)
     # test get_probability_of_list
     p = pi.get_probability_of_list([1, 0, 3, 2], context, self.query)
     self.assertEqual(p, 0.25, "Probability of the most "
                      "likely list. p = %g" % p)
Пример #4
0
 def testTeamDraftInterleave(self):
     td = TeamDraft(None)
     r1 = DeterministicRankingFunction(None, self.weights_1)
     r2 = DeterministicRankingFunction(None, self.weights_2)
     (interleaved_list, assignments) = td.interleave(r1, r2, self.query, 10)
     self.assertIn(interleaved_list.tolist(),
                   [[0, 1, 2, 3], [1, 0, 2, 3], [0, 1, 3, 2], [1, 0, 3, 2]])
     self.assertIn(assignments.tolist(),
                   [[0, 1, 0, 1], [1, 0, 1, 0], [1, 0, 0, 1], [0, 1, 1, 0]])
Пример #5
0
 def testHistTeamDraft_getPossibleAssignment(self):
     r1 = DeterministicRankingFunction(None, self.weights_1)
     r2 = DeterministicRankingFunction(None, self.weights_2)
     htd = HistTeamDraft(None)
     l = [0, 1, 3, 2]
     self.assertIn(htd._get_possible_assignment(l, r1, r2, self.query),
                   [[1, 0, 0, 1], [1, 0, 1, 0]])
     l = [1, 0, 3, 2]
     self.assertIn(htd._get_possible_assignment(l, r1, r2, self.query),
                   [[0, 1, 0, 1], [0, 1, 1, 0]])
     l = [1, 0, 2, 3]
     self.assertEquals(htd._get_possible_assignment(l, r1, r2, self.query),
                       None)
Пример #6
0
 def testDocumentConstraints(self):
     dc = DocumentConstraints()
     r1 = DeterministicRankingFunction(None, self.weights_1)
     r2 = DeterministicRankingFunction(None, self.weights_2)
     (interleaved_list, assignments) = dc.interleave(r1, r2, self.query, 10)
     self.assertIn(interleaved_list.tolist(),
                   [[0, 1, 2, 3], [1, 0, 2, 3], [0, 1, 3, 2], [1, 0, 3, 2]])
     self.assertIn(assignments[0].tolist(), [[1, 2, 3, 0], [1, 3, 2, 0]])
     self.assertIn(assignments[1].tolist(), [[0, 1, 2, 3], [0, 1, 3, 2]])
     o = dc.infer_outcome([1, 0, 3, 2], [[1, 3, 2, 0], [0, 1, 3, 2]],
                          [1, 0, 0, 0], self.query)
     self.assertEqual(o, -1, "l1 should win (1), o = %g" % o)
     o = dc.infer_outcome([1, 0, 3, 2], [[1, 3, 2, 0], [0, 1, 3, 2]],
                          [0, 0, 0, 1], self.query)
     self.assertEqual(o, -1, "l1 should win (2), o = %g" % o)
     o = dc.infer_outcome([1, 0, 3, 2], [[1, 3, 2, 0], [0, 1, 3, 2]],
                          [0, 1, 0, 0], self.query)
     self.assertEqual(o, 1, "l2 should win (1), o = %g" % o)
     o = dc.infer_outcome([1, 0, 3, 2], [[1, 0, 2, 3], [0, 1, 3, 2]],
                          [0, 1, 0, 0], self.query)
     # constraints: 0 > 1, 0 > 3
     self.assertEqual(o, 1, "l2 should win (2), o = %g" % o)
     o = dc.infer_outcome([1, 0, 3, 2], [[1, 2, 0, 3], [1, 0, 2, 3]],
                          [0, 1, 1, 0], self.query)
     # constraints: 0 > 1, 3 > 1, 0 > 2, 3 > 2
     self.assertEqual(o, 1, "l2 should win (3), o = %g" % o)
     o = dc.infer_outcome([1, 0, 3, 2], [[1, 3, 2, 0], [0, 1, 3, 2]],
                          [0, 0, 0, 0], self.query)
     self.assertEqual(o, 0, "No winner when there are no clicks o = %g" % o)
     o = dc.infer_outcome([1, 0, 3, 2], [[1, 3, 2, 0], [0, 1, 3, 2]],
                          [1, 1, 1, 1], self.query)
     self.assertEqual(o, 0, "No winner when all are clicked o = %g" % o)
     dc = DocumentConstraints("--constraints 1")
     o = dc.infer_outcome([1, 0, 3, 2], [[1, 0, 2, 3], [3, 0, 1, 2]],
                          [0, 1, 0, 0], self.query)
     # constraint: 0 > 1
     self.assertEqual(o, 1, "l2 should win with one constraint, o = %g" % o)
     dc = DocumentConstraints("--constraints 2")
     o = dc.infer_outcome([1, 0, 3, 2], [[1, 0, 2, 3], [3, 0, 1, 2]],
                          [0, 1, 0, 0], self.query)
     self.assertEqual(o, 0, "Tie with two constraint types (1), o = %g" % o)
     o = dc.infer_outcome([1, 0, 3, 2], [[1, 0, 2, 3], [1, 2, 0, 3]],
                          [0, 1, 1, 0], self.query)
     # constraints: 0 > 1, 3 > 1, 3 > 2
     self.assertEqual(o, 0, "Tie with two constraint types (2), o = %g" % o)
     o = dc.infer_outcome([1, 0, 3, 2], [[1, 0, 4, 3], [1, 0, 3, 2]],
                          [0, 1, 1, 0], self.query)
     self.assertEqual(o, 0, "Tie with two constraint types (3), o = %g" % o)
     o = dc.infer_outcome([1, 0, 3, 2], [[1, 0, 4, 3], [1, 0, 2, 3]],
                          [0, 1, 1, 0], self.query)
     self.assertEqual(o, -1, "l1 should win with two constr., o = %g" % o)
Пример #7
0
    def setUp(self):
        # Create ranker to test with
        ranker_args = ['3']
        ranker_tie = 'random'
        init_weights = 'random'
        feature_count = 5
        self.ranker = DRF(ranker_args,
                          ranker_tie,
                          feature_count,
                          sample=None,
                          init=init_weights)

        # Create queries to test with
        test_queries = """
        1 qid:373 1:0.089908 2:0.531250 3:0.500000 4:0.500000 5:0.156538
        0 qid:373 1:0.066055 2:0.171875 3:0.000000 4:0.250000 5:0.084715
        0 qid:373 1:0.148624 2:0.015625 3:0.250000 4:0.250000 5:0.151013
        0 qid:373 1:0.099083 2:0.250000 3:0.500000 4:0.750000 5:0.134438
        0 qid:373 1:0.051376 2:0.078125 3:0.250000 4:0.250000 5:0.060773
        0 qid:373 1:0.045872 2:1.000000 3:0.250000 4:0.250000 5:0.163904
        """
        query_fh = cStringIO.StringIO(test_queries)
        self.query = query.Queries(query_fh, feature_count)['373']
        query_fh.close()

        # Save the original ranking
        self.ranker.init_ranking(self.query)
        self.ranking = [
            self.ranker.next() for _ in range(self.ranker.document_count())
        ]
Пример #8
0
    def setUp(self):
        # Create ranker to test with
        ranker_args = ['3']
        ranker_tie = 'random'
        init_weights = 'random'
        feature_count = 5
        self.ranker = DRF(
            ranker_args,
            ranker_tie,
            feature_count,
            sample=None,
            init=init_weights
        )

        # Create queries to test with
        test_queries = """
        1 qid:373 1:0.089908 2:0.531250 3:0.500000 4:0.500000 5:0.156538
        0 qid:373 1:0.066055 2:0.171875 3:0.000000 4:0.250000 5:0.084715
        0 qid:373 1:0.148624 2:0.015625 3:0.250000 4:0.250000 5:0.151013
        0 qid:373 1:0.099083 2:0.250000 3:0.500000 4:0.750000 5:0.134438
        0 qid:373 1:0.051376 2:0.078125 3:0.250000 4:0.250000 5:0.060773
        0 qid:373 1:0.045872 2:1.000000 3:0.250000 4:0.250000 5:0.163904
        """
        query_fh = cStringIO.StringIO(test_queries)
        self.query = query.Queries(query_fh, feature_count)['373']
        query_fh.close()

        # Save the original ranking
        self.ranker.init_ranking(self.query)
        self.ranking = [
            self.ranker.next() for _ in range(self.ranker.document_count())
        ]
Пример #9
0
 def testHistTeamDraft(self):
     r1 = DeterministicRankingFunction(None, self.weights_1)
     r2 = DeterministicRankingFunction(None, self.weights_2)
     interleaved_list = [0, 1, 3, 2]
     htd = HistTeamDraft()
     self.assertEqual(
         htd.infer_outcome(interleaved_list, None, [0, 0, 0, 0], r1, r2,
                           self.query), 0, "No clicks.")
     self.assertEqual(
         htd.infer_outcome(interleaved_list, None, [1, 0, 0, 0], r1, r2,
                           self.query), 1, "Target rankers"
         " are the same as the original rankers, so ranker 2 has to win.")
     self.assertEqual(
         htd.infer_outcome(interleaved_list, None, [1, 0, 0, 0], r2, r1,
                           self.query), -1, "Target rankers"
         " are switched, so ranker 1 has to win.")
Пример #10
0
 def testHistTeamDraft_getPossibleAssignment_randomization(self):
     r1 = DeterministicRankingFunction(None, self.weights_1)
     r2 = DeterministicRankingFunction(None, self.weights_2)
     htd = HistTeamDraft(None)
     l = [0, 1, 3, 2]
     test_assignments = {"1,0,0,1": 0, "1,0,1,0": 0}
     trials = 0
     MAX_TRIALS = 1000
     while trials < MAX_TRIALS and 0 in test_assignments.values():
         trials += 1
         observed_assignment = ",".join(
             str(a)
             for a in htd._get_possible_assignment(l, r1, r2, self.query))
         self.assertIn(observed_assignment, test_assignments.keys())
         test_assignments[observed_assignment] += 1
     for assignment, count in test_assignments.items():
         self.assertNotEqual(0, count, "Test failed for: %s" % assignment)
Пример #11
0
 def testHistDocumentConstraints(self):
     hdc = HistDocumentConstraints()
     r1 = DeterministicRankingFunction(None, self.weights_1)
     r2 = DeterministicRankingFunction(None, self.weights_2)
     # results in assignments l1 = [1, 2, 3, 0] or [1, 3, 2, 0]
     # and l2 = [0, 1, 2, 3] or [0, 1, 3, 2]
     o = hdc.infer_outcome([1, 0, 3, 2], None, [1, 0, 0, 0], r1, r2,
                           self.query)
     self.assertEqual(o, -1, "l1 should win, o = %g" % o)
     o = hdc.infer_outcome([2, 1, 3, 0], None, [1, 0, 0, 0], r1, r2,
                           self.query)
     self.assertEqual(o, 0, "No winner, both have 1 > 2 (1), o = %g" % o)
     o = hdc.infer_outcome([2, 1, 4, 0], None, [1, 0, 0, 0], r1, r2,
                           self.query)
     self.assertEqual(o, 0, "No winner, both have 1 > 2 (2), o = %g" % o)
     o = hdc.infer_outcome([2, 1, 3, 0], None, [0, 0, 0, 0], r1, r2,
                           self.query)
     self.assertEqual(o, 0, "No winner when none are clicked, o = %g" % o)
     o = hdc.infer_outcome([2, 1, 3, 0], None, [1, 1, 1, 1], r1, r2,
                           self.query)
     self.assertEqual(o, 0, "No winner when all are clicked, o = %g" % o)
Пример #12
0
class TestPerturbation(unittest.TestCase):
    TRIES = 1000
    MARGIN = 5 / TRIES

    def setUp(self):
        # Create ranker to test with
        ranker_args = ['3']
        ranker_tie = 'random'
        init_weights = 'random'
        feature_count = 5
        self.ranker = DRF(ranker_args,
                          ranker_tie,
                          feature_count,
                          sample=None,
                          init=init_weights)

        # Create queries to test with
        test_queries = """
        1 qid:373 1:0.089908 2:0.531250 3:0.500000 4:0.500000 5:0.156538
        0 qid:373 1:0.066055 2:0.171875 3:0.000000 4:0.250000 5:0.084715
        0 qid:373 1:0.148624 2:0.015625 3:0.250000 4:0.250000 5:0.151013
        0 qid:373 1:0.099083 2:0.250000 3:0.500000 4:0.750000 5:0.134438
        0 qid:373 1:0.051376 2:0.078125 3:0.250000 4:0.250000 5:0.060773
        0 qid:373 1:0.045872 2:1.000000 3:0.250000 4:0.250000 5:0.163904
        """
        query_fh = cStringIO.StringIO(test_queries)
        self.query = query.Queries(query_fh, feature_count)['373']
        query_fh.close()

        # Save the original ranking
        self.ranker.init_ranking(self.query)
        self.ranking = [
            self.ranker.next() for _ in range(self.ranker.document_count())
        ]

    @staticmethod
    def count_swaps(single_start, list1, list2):
        """
        Count the number of pairs that are swapped.

        If the lists aren't a swapped version of each other an error is raised
        """
        lislen = len(list1)
        if lislen != len(list2):
            raise ValueError("Lists are not the same size")

        if single_start and not list1[0] == list2[0]:
            raise ValueError("First element not the same")
        if bool(single_start) ^ lislen % 2 and list1[-1] != list2[-1]:
            # in this case there is a left over item at the end of the list
            raise ValueError("Last element not the same")

        swapped = 0
        for i in xrange(single_start, lislen - 1, 2):
            if list1[i] == list2[i + 1] and list2[i] == list1[i + 1]:
                swapped += 1
            elif list1[i] != list2[i] or list2[i + 1] != list1[i + 1]:
                raise ValueError("Impossible swap")
        return swapped

    def test_prob_0(self):
        """
        Test perturbing with a probability of 0
        """
        new_ranked, single_start = ProbabilisticPerturbator(0).perturb(
            self.ranker, self.query)
        self.assertEqual(new_ranked, self.ranking)

    def test_prob_1(self):
        """
        Test perturbing with a probability of 1
        """
        new_ranked, single_start = ProbabilisticPerturbator(1).perturb(
            self.ranker, self.query)
        self.assertNotEqual(new_ranked, self.ranking)
        swaps = self.count_swaps(single_start, new_ranked, self.ranking)
        self.assertEqual(swaps, (len(self.ranking) - single_start) // 2)

    @given(floats(0, 1))
    def test_prob_hypothesis(self, probability):
        perturb = ProbabilisticPerturbator(probability).perturb
        self.assertLessEqual(
            sum(
                self.count_swaps(
                    *(list(reversed(perturb(self.ranker, self.query))) +
                      [self.ranking])) for _ in range(self.TRIES)) /
            self.TRIES * 2 / len(self.ranking) - probability, self.MARGIN)
Пример #13
0
class TestPerturbation(unittest.TestCase):
    TRIES = 1000
    MARGIN = 5 / TRIES

    def setUp(self):
        # Create ranker to test with
        ranker_args = ['3']
        ranker_tie = 'random'
        init_weights = 'random'
        feature_count = 5
        self.ranker = DRF(
            ranker_args,
            ranker_tie,
            feature_count,
            sample=None,
            init=init_weights
        )

        # Create queries to test with
        test_queries = """
        1 qid:373 1:0.089908 2:0.531250 3:0.500000 4:0.500000 5:0.156538
        0 qid:373 1:0.066055 2:0.171875 3:0.000000 4:0.250000 5:0.084715
        0 qid:373 1:0.148624 2:0.015625 3:0.250000 4:0.250000 5:0.151013
        0 qid:373 1:0.099083 2:0.250000 3:0.500000 4:0.750000 5:0.134438
        0 qid:373 1:0.051376 2:0.078125 3:0.250000 4:0.250000 5:0.060773
        0 qid:373 1:0.045872 2:1.000000 3:0.250000 4:0.250000 5:0.163904
        """
        query_fh = cStringIO.StringIO(test_queries)
        self.query = query.Queries(query_fh, feature_count)['373']
        query_fh.close()

        # Save the original ranking
        self.ranker.init_ranking(self.query)
        self.ranking = [
            self.ranker.next() for _ in range(self.ranker.document_count())
        ]

    @staticmethod
    def count_swaps(single_start, list1, list2):
        """
        Count the number of pairs that are swapped.

        If the lists aren't a swapped version of each other an error is raised
        """
        lislen = len(list1)
        if lislen != len(list2):
            raise ValueError("Lists are not the same size")

        if single_start and not list1[0] == list2[0]:
            raise ValueError("First element not the same")
        if bool(single_start) ^ lislen % 2 and list1[-1] != list2[-1]:
            # in this case there is a left over item at the end of the list
            raise ValueError("Last element not the same")

        swapped = 0
        for i in xrange(single_start, lislen - 1, 2):
            if list1[i] == list2[i+1] and list2[i] == list1[i+1]:
                swapped += 1
            elif list1[i] != list2[i] or list2[i+1] != list1[i+1]:
                raise ValueError("Impossible swap")
        return swapped

    def test_prob_0(self):
        """
        Test perturbing with a probability of 0
        """
        new_ranked, single_start = ProbabilisticPerturbator(0).perturb(
            self.ranker,
            self.query
        )
        self.assertEqual(new_ranked, self.ranking)

    def test_prob_1(self):
        """
        Test perturbing with a probability of 1
        """
        new_ranked, single_start = ProbabilisticPerturbator(1).perturb(
            self.ranker,
            self.query
        )
        self.assertNotEqual(new_ranked, self.ranking)
        swaps = self.count_swaps(single_start, new_ranked, self.ranking)
        self.assertEqual(swaps, (len(self.ranking) - single_start) // 2)

    @given(floats(0, 1))
    def test_prob_hypothesis(self, probability):
        perturb = ProbabilisticPerturbator(probability).perturb
        self.assertLessEqual(sum(
            self.count_swaps(*(
                    list(
                        reversed(perturb(self.ranker, self.query))
                    ) + [self.ranking]
                )
            )
            for _ in range(self.TRIES)
        ) / self.TRIES * 2 / len(self.ranking) - probability, self.MARGIN)