Exemplo n.º 1
0
    def testPairs(self):
        t1 = IIBTree([(1, 10), (3, 30), (7, 70)])
        t2 = IIBTree([(3, 30), (5, 50), (7, 7), (9, 90)])
        allkeys = [1, 3, 5, 7, 9]
        b1 = IIBucket(t1)
        b2 = IIBucket(t2)
        for x in t1, t2, b1, b2:
            for key in x.keys():
                self.assertEqual(key in allkeys, 1)
            for y in t1, t2, b1, b2:
                for w1, w2 in (0, 0), (1, 10), (10, 1), (2, 3):
                    # Test the union.
                    expected = []
                    for key in allkeys:
                        if x.has_key(key) or y.has_key(key):
                            result = x.get(key, 0) * w1 + y.get(key, 0) * w2
                            expected.append((key, result))
                    expected.sort()
                    got = mass_weightedUnion([(x, w1), (y, w2)])
                    self.assertEqual(expected, list(got.items()))
                    got = mass_weightedUnion([(y, w2), (x, w1)])
                    self.assertEqual(expected, list(got.items()))

                    # Test the intersection.
                    expected = []
                    for key in allkeys:
                        if x.has_key(key) and y.has_key(key):
                            result = x[key] * w1 + y[key] * w2
                            expected.append((key, result))
                    expected.sort()
                    got = mass_weightedIntersection([(x, w1), (y, w2)])
                    self.assertEqual(expected, list(got.items()))
                    got = mass_weightedIntersection([(y, w2), (x, w1)])
                    self.assertEqual(expected, list(got.items()))
Exemplo n.º 2
0
    def testPairs(self):
        t1 = IIBTree([(1, 10), (3, 30), (7, 70)])
        t2 = IIBTree([(3, 30), (5, 50), (7, 7), (9, 90)])
        allkeys = [1, 3, 5, 7, 9]
        b1 = IIBucket(t1)
        b2 = IIBucket(t2)
        for x in t1, t2, b1, b2:
            for key in x.keys():
                self.assertEqual(key in allkeys, 1)
            for y in t1, t2, b1, b2:
                for w1, w2 in (0, 0), (1, 10), (10, 1), (2, 3):
                    # Test the union.
                    expected = []
                    for key in allkeys:
                        if x.has_key(key) or y.has_key(key):
                            result = x.get(key, 0) * w1 + y.get(key, 0) * w2
                            expected.append((key, result))
                    expected.sort()
                    got = mass_weightedUnion([(x, w1), (y, w2)])
                    self.assertEqual(expected, list(got.items()))
                    got = mass_weightedUnion([(y, w2), (x, w1)])
                    self.assertEqual(expected, list(got.items()))

                    # Test the intersection.
                    expected = []
                    for key in allkeys:
                        if x.has_key(key) and y.has_key(key):
                            result = x[key] * w1 + y[key] * w2
                            expected.append((key, result))
                    expected.sort()
                    got = mass_weightedIntersection([(x, w1), (y, w2)])
                    self.assertEqual(expected, list(got.items()))
                    got = mass_weightedIntersection([(y, w2), (x, w1)])
                    self.assertEqual(expected, list(got.items()))
Exemplo n.º 3
0
 def executeQuery(self, index):
     weighted = []
     for node in self.getValue():
         r = node.executeQuery(index)
         # If None, technically it matches every doc, but we treat
         # it as if it matched none (we want
         #     real_word OR stop_word
         # to act like plain real_word).
         if r is not None:
             weighted.append((r, 1))
     return mass_weightedUnion(weighted)
Exemplo n.º 4
0
 def executeQuery(self, index):
     weighted = []
     for node in self.getValue():
         r = node.executeQuery(index)
         # If None, technically it matches every doc, but we treat
         # it as if it matched none (we want
         #     real_word OR stop_word
         # to act like plain real_word).
         if r is not None:
             weighted.append((r, 1))
     return mass_weightedUnion(weighted)
Exemplo n.º 5
0
    def testMany(self):
        import random
        N = 15  # number of IIBTrees to feed in
        L = []
        commonkey = N * 1000
        allkeys = {commonkey: 1}
        for i in range(N):
            t = IIBTree()
            t[commonkey] = i
            for j in range(N - i):
                key = i + j
                allkeys[key] = 1
                t[key] = N * i + j
            L.append((t, i + 1))
        random.shuffle(L)
        allkeys = allkeys.keys()
        allkeys.sort()

        # Test the union.
        expected = []
        for key in allkeys:
            sum = 0
            for t, w in L:
                if t.has_key(key):
                    sum += t[key] * w
            expected.append((key, sum))
        # print 'union', expected
        got = mass_weightedUnion(L)
        self.assertEqual(expected, list(got.items()))

        # Test the intersection.
        expected = []
        for key in allkeys:
            sum = 0
            for t, w in L:
                if t.has_key(key):
                    sum += t[key] * w
                else:
                    break
            else:
                # We didn't break out of the loop so it's in the intersection.
                expected.append((key, sum))
        # print 'intersection', expected
        got = mass_weightedIntersection(L)
        self.assertEqual(expected, list(got.items()))
Exemplo n.º 6
0
    def testMany(self):
        import random
        N = 15  # number of IIBTrees to feed in
        L = []
        commonkey = N * 1000
        allkeys = {commonkey: 1}
        for i in range(N):
            t = IIBTree()
            t[commonkey] = i
            for j in range(N-i):
                key = i + j
                allkeys[key] = 1
                t[key] = N*i + j
            L.append((t, i+1))
        random.shuffle(L)
        allkeys = allkeys.keys()
        allkeys.sort()

        # Test the union.
        expected = []
        for key in allkeys:
            sum = 0
            for t, w in L:
                if t.has_key(key):
                    sum += t[key] * w
            expected.append((key, sum))
        # print 'union', expected
        got = mass_weightedUnion(L)
        self.assertEqual(expected, list(got.items()))

        # Test the intersection.
        expected = []
        for key in allkeys:
            sum = 0
            for t, w in L:
                if t.has_key(key):
                    sum += t[key] * w
                else:
                    break
            else:
                # We didn't break out of the loop so it's in the intersection.
                expected.append((key, sum))
        # print 'intersection', expected
        got = mass_weightedIntersection(L)
        self.assertEqual(expected, list(got.items()))
Exemplo n.º 7
0
 def executeQuery(self, index):
     L = []
     Nots = []
     for subnode in self.getValue():
         if subnode.nodeType() == "NOT":
             r = subnode.getValue().executeQuery(index)
             # If None, technically it matches every doc, but we treat
             # it as if it matched none (we want
             #     real_word AND NOT stop_word
             # to act like plain real_word).
             if r is not None:
                 Nots.append((r, 1))
         else:
             r = subnode.executeQuery(index)
             # If None, technically it matches every doc, so needn't be
             # included.
             if r is not None:
                 L.append((r, 1))
     set = mass_weightedIntersection(L)
     if Nots:
         notset = mass_weightedUnion(Nots)
         set = difference(set, notset)
     return set
Exemplo n.º 8
0
 def executeQuery(self, index):
     L = []
     Nots = []
     for subnode in self.getValue():
         if subnode.nodeType() == "NOT":
             r = subnode.getValue().executeQuery(index)
             # If None, technically it matches every doc, but we treat
             # it as if it matched none (we want
             #     real_word AND NOT stop_word
             # to act like plain real_word).
             if r is not None:
                 Nots.append((r, 1))
         else:
             r = subnode.executeQuery(index)
             # If None, technically it matches every doc, so needn't be
             # included.
             if r is not None:
                 L.append((r, 1))
     set = mass_weightedIntersection(L)
     if Nots:
         notset = mass_weightedUnion(Nots)
         set = difference(set, notset)
     return set
Exemplo n.º 9
0
 def testEmptyLists(self):
     self.assertEqual(len(mass_weightedIntersection([])), 0)
     self.assertEqual(len(mass_weightedUnion([])), 0)
Exemplo n.º 10
0
 def search_glob(self, pattern):
     wids = self._lexicon.globToWordIds(pattern)
     wids = self._remove_oov_wids(wids)
     return mass_weightedUnion(self._search_wids(wids))
Exemplo n.º 11
0
 def search(self, term):
     wids = self._lexicon.termToWordIds(term)
     if not wids:
         return None # All docs match
     wids = self._remove_oov_wids(wids)
     return mass_weightedUnion(self._search_wids(wids))
Exemplo n.º 12
0
 def testEmptyLists(self):
     self.assertEqual(len(mass_weightedIntersection([])), 0)
     self.assertEqual(len(mass_weightedUnion([])), 0)
Exemplo n.º 13
0
 def search_glob(self, pattern):
     wids = self._lexicon.globToWordIds(pattern)
     wids = self._remove_oov_wids(wids)
     return mass_weightedUnion(self._search_wids(wids))
Exemplo n.º 14
0
 def search(self, term):
     wids = self._lexicon.termToWordIds(term)
     if not wids:
         return None  # All docs match
     wids = self._remove_oov_wids(wids)
     return mass_weightedUnion(self._search_wids(wids))