def testAllSameScore(self): inputs = [(i, 0) for i in range(10)] for n in range(1, 12): nb = NBest(n) nb.addmany(inputs) outputs = nb.getbest() self.assertEqual(outputs, inputs[:len(outputs)])
def testConstructor(self): self.assertRaises(ValueError, NBest, 0) self.assertRaises(ValueError, NBest, -1) for n in range(1, 11): nb = NBest(n) self.assertEqual(len(nb), 0) self.assertEqual(nb.capacity(), n)
def main(rt): index = rt["index"] files = rt["files"] times = {} ITERS = range(50) for i in range(11): for q in QUERIES: terms = q.split() for c in " OR ", " AND ": query = c.join(terms) t0 = clock() if TEXTINDEX: if c == " OR ": op = Or else: op = And _q = " ".join(terms) for _ in ITERS: b = index.query(_q, op).bucket() num = len(b) chooser = NBest(10) chooser.addmany(b.items()) results = chooser.getbest() else: try: for _ in ITERS: results, num = index.query(query) except: continue t1 = clock() print "<p>Query: \"%s\"" % query print "<br>Num results: %d" % num print "<br>time.clock(): %s" % (t1 - t0) key = query if i == 0: print "<ol>" for docid, score in results: url = path2url(files[docid]) fmt = '<li><a href="%s">%s</A> score = %s' print fmt % (url, url, score) print "</ol>" continue l = times.setdefault(key, []) l.append(t1 - t0) l = times.keys() l.sort() print "<hr>" for k in l: v = times[k] print "<p>Query: \"%s\"" % k print "<br>Min time: %s" % min(v) print "<br>All times: %s" % " ".join(map(str, v))
def mass_weightedUnion(L, family=BTrees.family32): "A list of (mapping, weight) pairs -> their weightedUnion IFBucket." if len(L) < 2: return _trivial(L, family) # Balance unions as closely as possible, smallest to largest. merge = NBest(len(L)) for x, weight in L: merge.add((x, weight), len(x)) while len(merge) > 1: # Merge the two smallest so far, and add back to the queue. (x, wx), dummy = merge.pop_smallest() (y, wy), dummy = merge.pop_smallest() dummy, z = family.IF.weightedUnion(x, y, wx, wy) merge.add((z, 1), len(z)) (result, weight), dummy = merge.pop_smallest() return result
def testMany(self): import random inputs = [(-i, i) for i in range(50)] reversed_inputs = inputs[:] reversed_inputs.reverse() # Test the N-best for a variety of n (1, 6, 11, ... 50). for n in range(1, len(inputs)+1, 5): expected = inputs[-n:] expected.reverse() random_inputs = inputs[:] random.shuffle(random_inputs) for source in inputs, reversed_inputs, random_inputs: # Try feeding them one at a time. nb = NBest(n) for item, score in source: nb.add(item, score) self.assertEqual(len(nb), n) self.assertEqual(nb.capacity(), n) self.assertEqual(nb.getbest(), expected) # And again in one gulp. nb = NBest(n) nb.addmany(source) self.assertEqual(len(nb), n) self.assertEqual(nb.capacity(), n) self.assertEqual(nb.getbest(), expected) for i in range(1, n+1): self.assertEqual(nb.pop_smallest(), expected[-i]) self.assertRaises(IndexError, nb.pop_smallest)
def testOne(self): nb = NBest(1) nb.add('a', 0) self.assertEqual(nb.getbest(), [('a', 0)]) nb.add('b', 1) self.assertEqual(len(nb), 1) self.assertEqual(nb.capacity(), 1) self.assertEqual(nb.getbest(), [('b', 1)]) nb.add('c', -1) self.assertEqual(len(nb), 1) self.assertEqual(nb.capacity(), 1) self.assertEqual(nb.getbest(), [('b', 1)]) nb.addmany([('d', 3), ('e', -6), ('f', 5), ('g', 4)]) self.assertEqual(len(nb), 1) self.assertEqual(nb.capacity(), 1) self.assertEqual(nb.getbest(), [('f', 5)])
def testOne(self): nb = NBest(1) nb.add("a", 0) self.assertEqual(nb.getbest(), [("a", 0)]) nb.add("b", 1) self.assertEqual(len(nb), 1) self.assertEqual(nb.capacity(), 1) self.assertEqual(nb.getbest(), [("b", 1)]) nb.add("c", -1) self.assertEqual(len(nb), 1) self.assertEqual(nb.capacity(), 1) self.assertEqual(nb.getbest(), [("b", 1)]) nb.addmany([("d", 3), ("e", -6), ("f", 5), ("g", 4)]) self.assertEqual(len(nb), 1) self.assertEqual(nb.capacity(), 1) self.assertEqual(nb.getbest(), [("f", 5)])