Пример #1
0
def test_compatibility():
    from whoosh.scoring import Weighting
    
    # This is the old way of doing a custom weighting model, check that
    # it's still supported...
    class LegacyWeighting(Weighting):
        use_final = True
        
        def score(self, searcher, fieldname, text, docnum, weight):
            return weight + 0.5
        
        def final(self, searcher, docnum, score):
            return score * 1.5
    
    schema = fields.Schema(text=fields.TEXT)
    ix = RamStorage().create_index(schema)
    w = ix.writer()
    domain = "alfa bravo charlie delta".split()
    for ls in permutations(domain, 3):
        w.add_document(text=u(" ").join(ls))
    w.commit()
    
    s = ix.searcher(weighting=LegacyWeighting())
    r = s.search(query.Term("text", u("bravo")))
    assert_equal(r.score(0), 2.25)
Пример #2
0
def test_deleteall():
    schema = fields.Schema(text=fields.TEXT)
    with TempIndex(schema, "deleteall") as ix:
        w = ix.writer()
        domain = u("alfa bravo charlie delta echo").split()
        for i, ls in enumerate(permutations(domain)):
            w.add_document(text=u(" ").join(ls))
            if not i % 10:
                w.commit()
                w = ix.writer()
        w.commit()

        # This is just a test, don't use this method to delete all docs IRL!
        doccount = ix.doc_count_all()
        w = ix.writer()
        for docnum in xrange(doccount):
            w.delete_document(docnum)
        w.commit()

        with ix.searcher() as s:
            r = s.search(
                query.Or([
                    query.Term("text", u("alfa")),
                    query.Term("text", u("bravo"))
                ]))
            assert_equal(len(r), 0)

        ix.optimize()
        assert_equal(ix.doc_count_all(), 0)

        with ix.reader() as r:
            assert_equal(list(r), [])
Пример #3
0
def test_excludematcher():
    schema = fields.Schema(content=fields.TEXT(stored=True))
    ix = RamStorage().create_index(schema)

    domain = ("alfa", "bravo", "charlie", "delta")

    for _ in xrange(3):
        w = ix.writer()
        for ls in permutations(domain):
            w.add_document(content=u(" ").join(ls))
        w.commit(merge=False)

    w = ix.writer()
    w.delete_document(5)
    w.delete_document(10)
    w.delete_document(28)
    w.commit(merge=False)

    q = Term("content", "bravo")
    with ix.searcher() as s:
        m = q.matcher(s)
        while m.is_active():
            content = s.stored_fields(m.id())["content"].split()
            spans = m.spans()
            for span in spans:
                assert_equal(content[span.start], "bravo")
            m.next()
Пример #4
0
def test_excludematcher():
    schema = fields.Schema(content=fields.TEXT(stored=True))
    ix = RamStorage().create_index(schema)
    
    domain = ("alfa", "bravo", "charlie", "delta")
    
    for _ in xrange(3):
        w = ix.writer()
        for ls in permutations(domain):
            w.add_document(content=u(" ").join(ls))
        w.commit(merge=False)
    
    w = ix.writer()
    w.delete_document(5)
    w.delete_document(10)
    w.delete_document(28)
    w.commit(merge=False)
    
    q = Term("content", "bravo")
    with ix.searcher() as s:
        m = q.matcher(s)
        while m.is_active():
            content = s.stored_fields(m.id())["content"].split()
            spans = m.spans()
            for span in spans:
                assert_equal(content[span.start], "bravo")
            m.next()
Пример #5
0
def test_boost_phrase():
    schema = fields.Schema(title=fields.TEXT(field_boost=5.0, stored=True), text=fields.TEXT)
    ix = RamStorage().create_index(schema)
    domain = u("alfa bravo charlie delta").split()
    w = ix.writer()
    for ls in permutations(domain):
        t = u(" ").join(ls)
        w.add_document(title=t, text=t)
    w.commit()

    q = Or([Term("title", u("alfa")), Term("title", u("bravo")), Phrase("text", [u("bravo"), u("charlie"), u("delta")])])

    def boost_phrases(q):
        if isinstance(q, Phrase):
            q.boost *= 1000.0
            return q
        else:
            return q.apply(boost_phrases)
    q = boost_phrases(q)

    with ix.searcher() as s:
        r = s.search(q, limit=None)
        for hit in r:
            if "bravo charlie delta" in hit["title"]:
                assert hit.score > 100.0
Пример #6
0
def test_deleteall():
    schema = fields.Schema(text=fields.TEXT)
    with TempIndex(schema, "deleteall") as ix:
        w = ix.writer()
        domain = u("alfa bravo charlie delta echo").split()
        for i, ls in enumerate(permutations(domain)):
            w.add_document(text=u(" ").join(ls))
            if not i % 10:
                w.commit()
                w = ix.writer()
        w.commit()

        # This is just a test, don't use this method to delete all docs IRL!
        doccount = ix.doc_count_all()
        w = ix.writer()
        for docnum in xrange(doccount):
            w.delete_document(docnum)
        w.commit()

        with ix.searcher() as s:
            r = s.search(query.Or([query.Term("text", u("alfa")), query.Term("text", u("bravo"))]))
            assert_equal(len(r), 0)

        ix.optimize()
        assert_equal(ix.doc_count_all(), 0)

        with ix.reader() as r:
            assert_equal(list(r), [])
Пример #7
0
def test_spelling_field_order():
    ana = analysis.StemmingAnalyzer()
    schema = fields.Schema(a=fields.TEXT, b=fields.TEXT(analyzer=ana),
                           c=fields.TEXT, d=fields.TEXT(analyzer=ana),
                           e=fields.TEXT(analyzer=ana), f=fields.TEXT)
    ix = RamStorage().create_index(schema)

    domain = u("alfa bravo charlie delta").split()
    w = ix.writer()
    for ls in permutations(domain):
        value = " ".join(ls)
        w.add_document(a=value, b=value, c=value, d=value, e=value, f=value)
    w.commit()
Пример #8
0
def test_spelling_field_order():
    ana = analysis.StemmingAnalyzer()
    schema = fields.Schema(a=fields.TEXT,
                           b=fields.TEXT(analyzer=ana),
                           c=fields.TEXT,
                           d=fields.TEXT(analyzer=ana),
                           e=fields.TEXT(analyzer=ana),
                           f=fields.TEXT)
    ix = RamStorage().create_index(schema)

    domain = u("alfa bravo charlie delta").split()
    w = ix.writer()
    for ls in permutations(domain):
        value = " ".join(ls)
        w.add_document(a=value, b=value, c=value, d=value, e=value, f=value)
    w.commit()
def test_current_terms():
    domain = u("alfa bravo charlie delta").split()
    schema = fields.Schema(text=fields.TEXT(stored=True))
    ix = RamStorage().create_index(schema)
    w = ix.writer()
    for ls in permutations(domain, 3):
        w.add_document(text=" ".join(ls), _stored_text=ls)
    w.commit()

    with ix.searcher() as s:
        q = query.And([query.Term("text", "alfa"), query.Term("text", "charlie")])
        m = q.matcher(s)

        while m.is_active():
            assert_equal(sorted(m.matching_terms()), [("text", "alfa"), ("text", "charlie")])
            m.next()
Пример #10
0
def get_index():
    global _ix
    
    if _ix is not None:
        return _ix
    
    charfield = fields.FieldType(formats.Characters(), analysis.SimpleAnalyzer(),
                                 scorable=True, stored=True)
    schema = fields.Schema(text=charfield)
    st = RamStorage()
    _ix = st.create_index(schema)
    
    w = _ix.writer()
    for ls in permutations(domain, 4):
        w.add_document(text=u(" ").join(ls), _stored_text=ls)
    w.commit()
    
    return _ix
Пример #11
0
def test_stability():
    schema = fields.Schema(text=fields.TEXT)
    ix = RamStorage().create_index(schema)
    domain = u("alfa bravo charlie delta").split()
    w = ix.writer()
    for ls in permutations(domain, 3):
        w.add_document(text=u(" ").join(ls))
    w.commit()

    with ix.searcher() as s:
        q = query.Term("text", u("bravo"))
        last = []
        for i in xrange(s.doc_frequency("text", u("bravo"))):
            # Only un-optimized results are stable
            r = s.search(q, limit=i + 1, optimize=False)
            docnums = [hit.docnum for hit in r]
            assert_equal(docnums[:-1], last)
            last = docnums
Пример #12
0
def test_stability():
    schema = fields.Schema(text=fields.TEXT)
    ix = RamStorage().create_index(schema)
    domain = u("alfa bravo charlie delta").split()
    w = ix.writer()
    for ls in permutations(domain, 3):
        w.add_document(text=u(" ").join(ls))
    w.commit()

    with ix.searcher() as s:
        q = query.Term("text", u("bravo"))
        last = []
        for i in xrange(s.doc_frequency("text", u("bravo"))):
            # Only un-optimized results are stable
            r = s.search(q, limit=i + 1, optimize=False)
            docnums = [hit.docnum for hit in r]
            assert_equal(docnums[:-1], last)
            last = docnums
Пример #13
0
def test_phrase_order():
    tfield = fields.TEXT(stored=True, analyzer=analysis.SimpleAnalyzer())
    schema = fields.Schema(text=tfield)
    storage = RamStorage()
    ix = storage.create_index(schema)

    writer = ix.writer()
    for ls in permutations(["ape", "bay", "can", "day"], 4):
        writer.add_document(text=u(" ").join(ls))
    writer.commit()

    with ix.searcher() as s:
        def result(q):
            r = s.search(q, limit=None, sortedby=None)
            return sorted([d['text'] for d in r])

        q = Phrase("text", ["bay", "can", "day"])
        assert_equal(result(q), [u('ape bay can day'), u('bay can day ape')])
Пример #14
0
def test_current_terms():
    domain = u("alfa bravo charlie delta").split()
    schema = fields.Schema(text=fields.TEXT(stored=True))
    ix = RamStorage().create_index(schema)
    w = ix.writer()
    for ls in permutations(domain, 3):
        w.add_document(text=" ".join(ls), _stored_text=ls)
    w.commit()

    with ix.searcher() as s:
        q = query.And(
            [query.Term("text", "alfa"),
             query.Term("text", "charlie")])
        m = q.matcher(s)

        while m.is_active():
            assert_equal(sorted(m.matching_terms()), [("text", "alfa"),
                                                      ("text", "charlie")])
            m.next()
Пример #15
0
def test_phrase_multi():
    schema = fields.Schema(id=fields.STORED, text=fields.TEXT)
    ix = RamStorage().create_index(schema)

    domain = u("alfa bravo charlie delta echo").split()
    w = None
    for i, ls in enumerate(permutations(domain)):
        if w is None:
            w = ix.writer()
        w.add_document(id=i, text=u(" ").join(ls))
        if not i % 30:
            w.commit()
            w = None
    if w is not None:
        w.commit()

    with ix.searcher() as s:
        q = Phrase("text", ["alfa", "bravo"])
        _ = s.search(q)
Пример #16
0
def test_lengths2():
    schema = fields.Schema(text=fields.TEXT(stored=True))
    ix = RamStorage().create_index(schema)
    count = 0
    for _ in xrange(3):
        w = ix.writer()
        for ls in permutations(u("alfa bravo charlie").split()):
            if "bravo" in ls and "charlie" in ls:
                count += 1
            w.add_document(text=u(" ").join(ls))
        w.commit(merge=False)

    with ix.searcher() as s:
        q = query.Or([query.Term("text", u("bravo")), query.Term("text", u("charlie"))])
        r = s.search(q, limit=None)
        assert_equal(len(r), count)

        r = s.search(q, limit=3)
        assert_equal(len(r), count)
Пример #17
0
def get_index():
    global _ix

    if _ix is not None:
        return _ix

    charfield = fields.FieldType(formats.Characters(),
                                 analysis.SimpleAnalyzer(),
                                 scorable=True,
                                 stored=True)
    schema = fields.Schema(text=charfield)
    st = RamStorage()
    _ix = st.create_index(schema)

    w = _ix.writer()
    for ls in permutations(domain, 4):
        w.add_document(text=u(" ").join(ls), _stored_text=ls)
    w.commit()

    return _ix
Пример #18
0
def test_lengths2():
    schema = fields.Schema(text=fields.TEXT(stored=True))
    ix = RamStorage().create_index(schema)
    count = 0
    for _ in xrange(3):
        w = ix.writer()
        for ls in permutations(u("alfa bravo charlie").split()):
            if "bravo" in ls and "charlie" in ls:
                count += 1
            w.add_document(text=u(" ").join(ls))
        w.commit(merge=False)

    with ix.searcher() as s:
        q = query.Or(
            [query.Term("text", u("bravo")),
             query.Term("text", u("charlie"))])
        r = s.search(q, limit=None)
        assert_equal(len(r), count)

        r = s.search(q, limit=3)
        assert_equal(len(r), count)
Пример #19
0
def test_ordered():
    domain = u("alfa bravo charlie delta echo foxtrot").split(" ")

    schema = fields.Schema(f=fields.TEXT(stored=True))
    ix = RamStorage().create_index(schema)
    writer = ix.writer()
    for ls in permutations(domain):
        writer.add_document(f=u(" ").join(ls))
    writer.commit()

    with ix.searcher() as s:
        q = Ordered([Term("f", u("alfa")), Term("f", u("charlie")), Term("f", "echo")])
        r = s.search(q)
        for hit in r:
            ls = hit["f"].split()
            assert "alfa" in ls
            assert "charlie" in ls
            assert "echo" in ls
            a = ls.index("alfa")
            c = ls.index("charlie")
            e = ls.index("echo")
            assert a < c and c < e, repr(ls)
Пример #20
0
def test_resultspage():
    schema = fields.Schema(id=fields.STORED, content=fields.TEXT)
    ix = RamStorage().create_index(schema)

    domain = ("alfa", "bravo", "bravo", "charlie", "delta")
    w = ix.writer()
    for i, lst in enumerate(permutations(domain, 3)):
        w.add_document(id=text_type(i), content=u(" ").join(lst))
    w.commit()

    with ix.searcher() as s:
        q = query.Term("content", u("bravo"))
        r = s.search(q, limit=10)
        tops = list(r)

        rp = s.search_page(q, 1, pagelen=5)
        assert_equal(rp.scored_length(), 5)
        assert_equal(list(rp), tops[0:5])
        assert_equal(rp[10:], [])

        rp = s.search_page(q, 2, pagelen=5)
        assert_equal(list(rp), tops[5:10])

        rp = s.search_page(q, 1, pagelen=10)
        assert_equal(len(rp), 54)
        assert_equal(rp.pagecount, 6)
        rp = s.search_page(q, 6, pagelen=10)
        assert_equal(len(list(rp)), 4)
        assert rp.is_last_page()

        assert_raises(ValueError, s.search_page, q, 0)
        assert_raises(ValueError, s.search_page, q, 7)

        rp = s.search_page(query.Term("content", "glonk"), 1)
        assert_equal(len(rp), 0)
        assert rp.is_last_page()
Пример #21
0
def test_resultspage():
    schema = fields.Schema(id=fields.STORED, content=fields.TEXT)
    ix = RamStorage().create_index(schema)

    domain = ("alfa", "bravo", "bravo", "charlie", "delta")
    w = ix.writer()
    for i, lst in enumerate(permutations(domain, 3)):
        w.add_document(id=text_type(i), content=u(" ").join(lst))
    w.commit()

    with ix.searcher() as s:
        q = query.Term("content", u("bravo"))
        r = s.search(q, limit=10)
        tops = list(r)

        rp = s.search_page(q, 1, pagelen=5)
        assert_equal(rp.scored_length(), 5)
        assert_equal(list(rp), tops[0:5])
        assert_equal(rp[10:], [])

        rp = s.search_page(q, 2, pagelen=5)
        assert_equal(list(rp), tops[5:10])

        rp = s.search_page(q, 1, pagelen=10)
        assert_equal(len(rp), 54)
        assert_equal(rp.pagecount, 6)
        rp = s.search_page(q, 6, pagelen=10)
        assert_equal(len(list(rp)), 4)
        assert rp.is_last_page()

        assert_raises(ValueError, s.search_page, q, 0)
        assert_raises(ValueError, s.search_page, q, 7)

        rp = s.search_page(query.Term("content", "glonk"), 1)
        assert_equal(len(rp), 0)
        assert rp.is_last_page()