def test_termindex(): terms = [("a", "alfa"), ("a", "bravo"), ("a", "charlie"), ("a", "delta"), ("b", "able"), ("b", "baker"), ("b", "dog"), ("b", "easy")] st = RamStorage() tw = TermIndexWriter(st.create_file("test.trm")) for i, t in enumerate(terms): tw.add(t, FileTermInfo(1.0, i)) tw.close() tr = TermIndexReader(st.open_file("test.trm")) for i, (t1, t2) in enumerate(zip(tr.keys(), terms)): assert_equal(t1, t2) ti = tr.get(t1) assert_equal(ti.weight(), 1.0) assert_equal(ti.doc_frequency(), i)
def test_termindex(): terms = [("a", "alfa"), ("a", "bravo"), ("a", "charlie"), ("a", "delta"), ("b", "able"), ("b", "baker"), ("b", "dog"), ("b", "easy")] st = RamStorage() tw = TermIndexWriter(st.create_file("test.trm")) for i, t in enumerate(terms): tw.add(t, FileTermInfo(1.0, i)) tw.close() tr = TermIndexReader(st.open_file("test.trm")) for i, (t1, t2) in enumerate(zip(tr.keys(), terms)): assert_equal(t1, t2) ti = tr.get(t1) assert_equal(ti.weight(), 1.0) assert_equal(ti.doc_frequency(), i)
def test_random_termkeys(): def random_fieldname(): return "".join(chr(random.randint(65, 90)) for _ in xrange(1, 20)) def random_token(): return "".join(unichr(random.randint(0, 0xd7ff)) for _ in xrange(1, 20)) domain = sorted([(random_fieldname(), random_token()) for _ in xrange(1000)]) st = RamStorage() tw = TermIndexWriter(st.create_file("test.trm")) for term in domain: tw.add(term, FileTermInfo(1.0, 1)) tw.close() tr = TermIndexReader(st.open_file("test.trm")) for term in domain: assert term in tr
def test_random_termkeys(): def random_fieldname(): return "".join(chr(random.randint(65, 90)) for _ in xrange(1, 20)) def random_token(): return "".join( unichr(random.randint(0, 0xd7ff)) for _ in xrange(1, 20)) domain = sorted([(random_fieldname(), random_token()) for _ in xrange(1000)]) st = RamStorage() tw = TermIndexWriter(st.create_file("test.trm")) for term in domain: tw.add(term, FileTermInfo(1.0, 1)) tw.close() tr = TermIndexReader(st.open_file("test.trm")) for term in domain: assert term in tr
def test_termkey(): with TempStorage("termkey") as st: tw = TermIndexWriter(st.create_file("test.trm")) tw.add(("alfa", u("bravo")), FileTermInfo(1.0, 3)) tw.add(("alfa", u('\xc3\xa6\xc3\xaf\xc5\ufffd\xc3\xba')), FileTermInfo(4.0, 6)) tw.add(("text", u('\xe6\u2014\xa5\xe6\u0153\xac\xe8\xaa\u017e')), FileTermInfo(7.0, 9)) tw.close() tr = TermIndexReader(st.open_file("test.trm")) assert ("alfa", u("bravo")) in tr assert ("alfa", u('\xc3\xa6\xc3\xaf\xc5\ufffd\xc3\xba')) in tr assert ("text", u('\xe6\u2014\xa5\xe6\u0153\xac\xe8\xaa\u017e')) in tr tr.close()
def test_termkey(): with TempStorage("termkey") as st: tw = TermIndexWriter(st.create_file("test.trm")) tw.add(("alfa", u("bravo")), FileTermInfo(1.0, 3)) tw.add(("alfa", u('\xc3\xa6\xc3\xaf\xc5\ufffd\xc3\xba')), FileTermInfo(4.0, 6)) tw.add(("text", u('\xe6\u2014\xa5\xe6\u0153\xac\xe8\xaa\u017e')), FileTermInfo(7.0, 9)) tw.close() tr = TermIndexReader(st.open_file("test.trm")) assert ("alfa", u("bravo")) in tr assert ("alfa", u('\xc3\xa6\xc3\xaf\xc5\ufffd\xc3\xba')) in tr assert ("text", u('\xe6\u2014\xa5\xe6\u0153\xac\xe8\xaa\u017e')) in tr tr.close()