Пример #1
0
def test_sorting():
    from whoosh import sorting

    schema = fields.Schema(id=fields.STORED,
                           name=fields.ID(stored=True),
                           size=fields.NUMERIC)
    ix = RamIndex(schema)

    with ix.writer() as w:
        w.add_document(id=0, name=u("bravo"), size=10)
        w.add_document(id=1, name=u("alfa"), size=9)
        w.add_document(id=2, name=u("delta"), size=8)
        w.add_document(id=3, name=u("charlie"), size=7)

    with ix.searcher() as s:
        q = query.Every()
        r = s.search(q, sortedby="name")
        assert_equal([hit["id"] for hit in r], [1, 0, 3, 2])

        r = s.search(q, sortedby="size")
        assert_equal([hit["id"] for hit in r], [3, 2, 1, 0])

        facet = sorting.FieldFacet("size", reverse=True)
        r = s.search(q, sortedby=facet)
        assert_equal([hit["id"] for hit in r], [0, 1, 2, 3])
Пример #2
0
def test_missing_term_docfreq():
    schema = fields.Schema(id=fields.ID)
    ix = RamIndex(schema)
    ix.add_document(id=u("alfa bravo charlie"))
    ix.add_document(id=u("charlie delta echo"))
    ix.add_document(id=u("sierra tango xray"))
    assert_raises(TermNotFound, ix.doc_frequency, "content", "foo")
    assert_equal(ix.doc_frequency("id", "foo"), 0)
Пример #3
0
def test_update():
    schema = fields.Schema(id=fields.ID(unique=True, stored=True),
                           text=fields.ID(stored=True))
    ix = RamIndex(schema)
    for word in u("alfa bravo charlie delta").split():
        ix.update_document(id=word[0], text=word)
    for word in u("apple burrito cat dollhouse").split():
        ix.update_document(id=word[0], text=word)

    assert ix.has_deletions()
    assert_equal(ix.deleted, set([0, 1, 2, 3]))
    assert_equal(ix.doc_count(), 4)
    assert_equal([(d["id"], d["text"]) for d in ix.all_stored_fields()],
                 [("a", "apple"), ("b", "burrito"), ("c", "cat"),
                  ("d", "dollhouse")])
Пример #4
0
def test_update():
    schema = fields.Schema(id=fields.ID(unique=True, stored=True),
                           text=fields.ID(stored=True))
    ix = RamIndex(schema)
    for word in u("alfa bravo charlie delta").split():
        ix.update_document(id=word[0], text=word)
    for word in u("apple burrito cat dollhouse").split():
        ix.update_document(id=word[0], text=word)

    assert ix.has_deletions()
    assert_equal(ix.deleted, set([0, 1, 2, 3]))
    assert_equal(ix.doc_count(), 4)
    assert ([(d["id"], d["text"]) for d in ix.all_stored_fields()]
            == [("a", "apple"), ("b", "burrito"), ("c", "cat"), ("d", "dollhouse")])
Пример #5
0
def test_threaded():
    from threading import Thread

    class TWriter(Thread):
        def __init__(self, ix):
            Thread.__init__(self)
            self.ix = ix

        def run(self):
            ix = self.ix
            for i in xrange(1000):
                ix.update_document(id=text_type(i), key=u("a"))

    class TReader(Thread):
        def __init__(self, ix):
            Thread.__init__(self)
            self.ix = ix
            self.go = True

        def run(self):
            s = self.ix.searcher()
            while self.go:
                r = s.search(query.Term("key", u("a")))
                assert_equal(len(r), 1)

    schema = fields.Schema(id=fields.ID(stored=True),
                           key=fields.ID(unique=True, stored=True))
    ix = RamIndex(schema)
    tw = TWriter(ix)
    tr = TReader(ix)
    tw.start()
    tr.start()
    tw.join()
    tr.go = False
    tr.join()

    assert_equal(ix.doc_count(), 1)
    with ix.searcher() as s:
        assert_equal(len(list(s.documents(key="a"))), 1)
Пример #6
0
def test_missing_term_docfreq():
    schema = fields.Schema(id=fields.ID)
    ix = RamIndex(schema)
    ix.add_document(id=u("alfa bravo charlie"))
    ix.add_document(id=u("charlie delta echo"))
    ix.add_document(id=u("sierra tango xray"))
    assert_raises(TermNotFound, ix.doc_frequency, "content", "foo")
    assert_equal(ix.doc_frequency("id", "foo"), 0)
Пример #7
0
def test_threaded():
    from threading import Thread

    class TWriter(Thread):
        def __init__(self, ix):
            Thread.__init__(self)
            self.ix = ix

        def run(self):
            ix = self.ix
            for i in xrange(1000):
                ix.update_document(id=text_type(i), key=u("a"))

    class TReader(Thread):
        def __init__(self, ix):
            Thread.__init__(self)
            self.ix = ix
            self.go = True

        def run(self):
            s = self.ix.searcher()
            while self.go:
                r = s.search(query.Term("key", u("a")))
                assert_equal(len(r), 1)

    schema = fields.Schema(id=fields.ID(stored=True), key=fields.ID(unique=True, stored=True))
    ix = RamIndex(schema)
    tw = TWriter(ix)
    tr = TReader(ix)
    tw.start()
    tr.start()
    tw.join()
    tr.go = False
    tr.join()

    assert_equal(ix.doc_count(), 1)
    with ix.searcher() as s:
        assert_equal(len(list(s.documents(key="a"))), 1)
Пример #8
0
def test_sorting():
    from whoosh import sorting

    schema = fields.Schema(id=fields.STORED, name=fields.ID(stored=True),
                           size=fields.NUMERIC)
    ix = RamIndex(schema)

    with ix.writer() as w:
        w.add_document(id=0, name=u("bravo"), size=10)
        w.add_document(id=1, name=u("alfa"), size=9)
        w.add_document(id=2, name=u("delta"), size=8)
        w.add_document(id=3, name=u("charlie"), size=7)

    with ix.searcher() as s:
        q = query.Every()
        r = s.search(q, sortedby="name")
        assert_equal([hit["id"] for hit in r], [1, 0, 3, 2])

        r = s.search(q, sortedby="size")
        assert_equal([hit["id"] for hit in r], [3, 2, 1, 0])

        facet = sorting.FieldFacet("size", reverse=True)
        r = s.search(q, sortedby=facet)
        assert_equal([hit["id"] for hit in r], [0, 1, 2, 3])
Пример #9
0
def test_block_info():
    schema = fields.Schema(key=fields.KEYWORD)
    ix = RamIndex(schema)
    ix.add_document(key=u("alfa bravo charlie"))
    ix.add_document(key=u("bravo delta"))
    ix.add_document(key=u("charlie delta echo foxtrot"))
    ix.add_document(key=u("delta echo foxtrot golf hotel india"))
    ix.add_document(key=u("echo foxtrot golf hotel india juliet alfa bravo"))
    s = ix.searcher()
    p = s.postings("key", "bravo")
    assert p.supports_block_quality()
    assert_equal(p.block_min_length(), 2)
    assert_equal(p.block_max_length(), 8)
    assert_equal(p.block_max_wol(), 0.5)
Пример #10
0
def test_missing_postings():
    schema = fields.Schema(id=fields.ID)
    ix = RamIndex(schema)
    ix.add_document(id=u("one"))
    assert_raises(TermNotFound, ix.postings, "content", "foo")
    assert_raises(TermNotFound, ix.postings, "id", "foo")
Пример #11
0
def test_empty_field():
    schema = fields.Schema(id=fields.ID(stored=True), text=fields.TEXT)
    ix = RamIndex(schema)
    ix.add_document(id=u("alfa"), text=u("bravo"))
    ix.add_document(id=u("charlie"))
    ix.add_document(text=u("delta"))
    ix.add_document(id=u("echo"), text=None)
    ix.add_document(id=None, text=u("foxtrot"))
    ix.add_document(id=u("golf"), text=u(""))
    ix.add_document(id=u(""), text=u("hotel"))
    ix.add_document(id=u(""), text=u(""))
Пример #12
0
def make_index():
    ana = analysis.StandardAnalyzer(stoplist=None)
    sc = fields.Schema(id=fields.ID(stored=True),
                       text=fields.TEXT(analyzer=ana, vector=formats.Frequency()),
                       subs=fields.NUMERIC(int, stored=True))
    ix = RamIndex(sc)
    ix.add_document(id=u("fieldtype"),
                    text=u("The FieldType object supports the following attributes"),
                    subs=56)
    ix.add_document(id=u("format"),
                    text=u("the storage format for the field contents"),
                    subs=100)
    ix.add_document(id=u("vector"),
                    text=u("the storage format for the field vectors (forward index)"),
                    subs=23)
    ix.add_document(id=u("scorable"),
                    text=u("whether searches against this field may be scored."),
                    subs=34)
    ix.add_document(id=u("stored"),
                    text=u("whether the content of this field is stored for each document."),
                    subs=575)
    ix.add_document(id=u("unique"),
                    text=u("whether this field value is unique to each document."),
                    subs=2)
    ix.add_document(id=u("const"),
                    text=u("The constructor for the base field type simply"),
                    subs=58204)
    return ix
Пример #13
0
def test_block_info():
    schema = fields.Schema(key=fields.KEYWORD)
    ix = RamIndex(schema)
    ix.add_document(key=u("alfa bravo charlie"))
    ix.add_document(key=u("bravo delta"))
    ix.add_document(key=u("charlie delta echo foxtrot"))
    ix.add_document(key=u("delta echo foxtrot golf hotel india"))
    ix.add_document(key=u("echo foxtrot golf hotel india juliet alfa bravo"))
    s = ix.searcher()
    p = s.postings("key", "bravo")
    assert p.supports_block_quality()
    assert_equal(p.block_min_length(), 2)
    assert_equal(p.block_max_length(), 8)
    assert_equal(p.block_max_wol(), 0.5)
Пример #14
0
def test_missing_postings():
    schema = fields.Schema(id=fields.ID)
    ix = RamIndex(schema)
    ix.add_document(id=u("one"))
    assert_raises(TermNotFound, ix.postings, "content", "foo")
    assert_raises(TermNotFound, ix.postings, "id", "foo")
Пример #15
0
def test_empty_field():
    schema = fields.Schema(id=fields.ID(stored=True), text=fields.TEXT)
    ix = RamIndex(schema)
    ix.add_document(id=u("alfa"), text=u("bravo"))
    ix.add_document(id=u("charlie"))
    ix.add_document(text=u("delta"))
    ix.add_document(id=u("echo"), text=None)
    ix.add_document(id=None, text=u("foxtrot"))
    ix.add_document(id=u("golf"), text=u(""))
    ix.add_document(id=u(""), text=u("hotel"))
    ix.add_document(id=u(""), text=u(""))
Пример #16
0
def make_index():
    ana = analysis.StandardAnalyzer(stoplist=None)
    sc = fields.Schema(id=fields.ID(stored=True),
                       text=fields.TEXT(analyzer=ana,
                                        vector=formats.Frequency()),
                       subs=fields.NUMERIC(int, stored=True))
    ix = RamIndex(sc)
    ix.add_document(
        id=u("fieldtype"),
        text=u("The FieldType object supports the following attributes"),
        subs=56)
    ix.add_document(id=u("format"),
                    text=u("the storage format for the field contents"),
                    subs=100)
    ix.add_document(
        id=u("vector"),
        text=u("the storage format for the field vectors (forward index)"),
        subs=23)
    ix.add_document(
        id=u("scorable"),
        text=u("whether searches against this field may be scored."),
        subs=34)
    ix.add_document(
        id=u("stored"),
        text=u(
            "whether the content of this field is stored for each document."),
        subs=575)
    ix.add_document(
        id=u("unique"),
        text=u("whether this field value is unique to each document."),
        subs=2)
    ix.add_document(id=u("const"),
                    text=u("The constructor for the base field type simply"),
                    subs=58204)
    return ix