Exemplo n.º 1
0
 def testAllSameScore(self):
     inputs = [(i, 0) for i in range(10)]
     for n in range(1, 12):
         nb = NBest(n)
         nb.addmany(inputs)
         outputs = nb.getbest()
         self.assertEqual(outputs, inputs[:len(outputs)])
Exemplo n.º 2
0
    def testConstructor(self):
        self.assertRaises(ValueError, NBest, 0)
        self.assertRaises(ValueError, NBest, -1)

        for n in range(1, 11):
            nb = NBest(n)
            self.assertEqual(len(nb), 0)
            self.assertEqual(nb.capacity(), n)
Exemplo n.º 3
0
def main(rt):
    index = rt["index"]
    files = rt["files"]
    times = {}
    ITERS = range(50)
    for i in range(11):
        for q in QUERIES:
            terms = q.split()
            for c in " OR ", " AND ":
                query = c.join(terms)
                t0 = clock()
                if TEXTINDEX:
                    if c == " OR ":
                        op = Or
                    else:
                        op = And
                    _q = " ".join(terms)
                    for _ in ITERS:
                        b = index.query(_q, op).bucket()
                        num = len(b)
                        chooser = NBest(10)
                        chooser.addmany(b.items())
                        results = chooser.getbest()

                else:
                    try:
                        for _ in ITERS:
                            results, num = index.query(query)
                    except:
                        continue
                t1 = clock()
                print "<p>Query: \"%s\"" % query
                print "<br>Num results: %d" % num
                print "<br>time.clock(): %s" % (t1 - t0)
                key = query
                if i == 0:
                    print "<ol>"
                    for docid, score in results:
                        url = path2url(files[docid])
                        fmt = '<li><a href="%s">%s</A> score = %s'
                        print fmt % (url, url, score)
                    print "</ol>"
                    continue
                l = times.setdefault(key, [])
                l.append(t1 - t0)

    l = times.keys()
    l.sort()
    print "<hr>"
    for k in l:
        v = times[k]
        print "<p>Query: \"%s\"" % k
        print "<br>Min time: %s" % min(v)
        print "<br>All times: %s" % " ".join(map(str, v))
Exemplo n.º 4
0
def mass_weightedUnion(L, family=BTrees.family32):
    "A list of (mapping, weight) pairs -> their weightedUnion IFBucket."
    if len(L) < 2:
        return _trivial(L, family)
    # Balance unions as closely as possible, smallest to largest.
    merge = NBest(len(L))
    for x, weight in L:
        merge.add((x, weight), len(x))
    while len(merge) > 1:
        # Merge the two smallest so far, and add back to the queue.
        (x, wx), dummy = merge.pop_smallest()
        (y, wy), dummy = merge.pop_smallest()
        dummy, z = family.IF.weightedUnion(x, y, wx, wy)
        merge.add((z, 1), len(z))
    (result, weight), dummy = merge.pop_smallest()
    return result
Exemplo n.º 5
0
def mass_weightedUnion(L, family=BTrees.family32):
    "A list of (mapping, weight) pairs -> their weightedUnion IFBucket."
    if len(L) < 2:
        return _trivial(L, family)
    # Balance unions as closely as possible, smallest to largest.
    merge = NBest(len(L))
    for x, weight in L:
        merge.add((x, weight), len(x))
    while len(merge) > 1:
        # Merge the two smallest so far, and add back to the queue.
        (x, wx), dummy = merge.pop_smallest()
        (y, wy), dummy = merge.pop_smallest()
        dummy, z = family.IF.weightedUnion(x, y, wx, wy)
        merge.add((z, 1), len(z))
    (result, weight), dummy = merge.pop_smallest()
    return result
Exemplo n.º 6
0
    def testMany(self):
        import random
        inputs = [(-i, i) for i in range(50)]

        reversed_inputs = inputs[:]
        reversed_inputs.reverse()

        # Test the N-best for a variety of n (1, 6, 11, ... 50).
        for n in range(1, len(inputs)+1, 5):
            expected = inputs[-n:]
            expected.reverse()

            random_inputs = inputs[:]
            random.shuffle(random_inputs)

            for source in inputs, reversed_inputs, random_inputs:
                # Try feeding them one at a time.
                nb = NBest(n)
                for item, score in source:
                    nb.add(item, score)
                self.assertEqual(len(nb), n)
                self.assertEqual(nb.capacity(), n)
                self.assertEqual(nb.getbest(), expected)

                # And again in one gulp.
                nb = NBest(n)
                nb.addmany(source)
                self.assertEqual(len(nb), n)
                self.assertEqual(nb.capacity(), n)
                self.assertEqual(nb.getbest(), expected)

                for i in range(1, n+1):
                    self.assertEqual(nb.pop_smallest(), expected[-i])
                self.assertRaises(IndexError, nb.pop_smallest)
Exemplo n.º 7
0
    def testOne(self):
        nb = NBest(1)
        nb.add('a', 0)
        self.assertEqual(nb.getbest(), [('a', 0)])

        nb.add('b', 1)
        self.assertEqual(len(nb), 1)
        self.assertEqual(nb.capacity(), 1)
        self.assertEqual(nb.getbest(), [('b', 1)])

        nb.add('c', -1)
        self.assertEqual(len(nb), 1)
        self.assertEqual(nb.capacity(), 1)
        self.assertEqual(nb.getbest(), [('b', 1)])

        nb.addmany([('d', 3), ('e', -6), ('f', 5), ('g', 4)])
        self.assertEqual(len(nb), 1)
        self.assertEqual(nb.capacity(), 1)
        self.assertEqual(nb.getbest(), [('f', 5)])
Exemplo n.º 8
0
    def testOne(self):
        nb = NBest(1)
        nb.add("a", 0)
        self.assertEqual(nb.getbest(), [("a", 0)])

        nb.add("b", 1)
        self.assertEqual(len(nb), 1)
        self.assertEqual(nb.capacity(), 1)
        self.assertEqual(nb.getbest(), [("b", 1)])

        nb.add("c", -1)
        self.assertEqual(len(nb), 1)
        self.assertEqual(nb.capacity(), 1)
        self.assertEqual(nb.getbest(), [("b", 1)])

        nb.addmany([("d", 3), ("e", -6), ("f", 5), ("g", 4)])
        self.assertEqual(len(nb), 1)
        self.assertEqual(nb.capacity(), 1)
        self.assertEqual(nb.getbest(), [("f", 5)])