示例#1
0
def test_ordered_closest():
    keys = [
        'alfa', 'bravo', 'charlie', 'delta', 'echo', 'foxtrot', 'golf',
        'hotel', 'india', 'juliet', 'kilo', 'lima', 'mike', 'november'
    ]
    # Make into bytes for Python 3
    keys = [b(k) for k in keys]
    values = [str(len(k)).encode("ascii") for k in keys]

    with TempStorage("orderedclosest") as st:
        hw = OrderedHashWriter(st.create_file("test.hsh"))
        hw.add_all(zip(keys, values))
        hw.close()

        hr = OrderedHashReader.open(st, "test.hsh")
        ck = hr.closest_key
        assert ck(b('')) == b('alfa')
        assert ck(b(' ')) == b('alfa')
        assert ck(b('alfa')) == b('alfa')
        assert ck(b('bravot')) == b('charlie')
        assert ck(b('charlie')) == b('charlie')
        assert ck(b('kiloton')) == b('lima')
        assert ck(b('oskar')) is None
        assert list(hr.keys()) == keys
        assert list(hr.values()) == values
        assert list(hr.keys_from(b('f'))) == keys[5:]
        hr.close()
def test_closed_searcher():
    from whoosh.reading import ReaderClosed

    schema = fields.Schema(key=fields.KEYWORD(stored=True, sortable=True))

    with TempStorage() as st:
        ix = st.create_index(schema)
        with ix.writer() as w:
            w.add_document(key=u"alfa")
            w.add_document(key=u"bravo")
            w.add_document(key=u"charlie")
            w.add_document(key=u"delta")
            w.add_document(key=u"echo")

        s = ix.searcher()
        r = s.search(query.TermRange("key", "b", "d"))
        s.close()
        assert s.is_closed
        with pytest.raises(ReaderClosed):
            assert r[0]["key"] == "bravo"
        with pytest.raises(ReaderClosed):
            s.reader().column_reader("key")
        with pytest.raises(ReaderClosed):
            s.suggest("key", "brovo")

        s = ix.searcher()
        r = s.search(query.TermRange("key", "b", "d"))
        assert r[0]
        assert r[0]["key"] == "bravo"
        c = s.reader().column_reader("key")
        assert c[1] == "bravo"
        assert s.suggest("key", "brovo") == ["bravo"]
示例#3
0
def test_huge_postfile():
    with TempStorage("hugeindex") as st:
        pf = st.create_file("test.pst")

        gb5 = 5 * 1024 * 1024 * 1024
        pf.seek(gb5)
        pf.write("\x00\x00\x00\x00")
        assert_equal(pf.tell(), gb5 + 4)

        fpw = FilePostingWriter(pf)
        format = formats.Frequency(None)
        offset = fpw.start(format)
        for i in xrange(10):
            fpw.write(i, float(i), struct.pack("!I", i), 10)
        posttotal = fpw.finish()
        assert_equal(posttotal, 10)
        fpw.close()

        pf = st.open_file("test.pst")
        pfr = FilePostingReader(pf, offset, format)
        i = 0
        while pfr.is_active():
            assert_equal(pfr.id(), i)
            assert_equal(pfr.weight(), float(i))
            assert_equal(pfr.value(), struct.pack("!I", i))
            pfr.next()
            i += 1
        pf.close()
示例#4
0
def test_bigtable():
    with TempStorage("bigtable") as st:

        def randstring(min, max):
            return "".join(
                chr(randint(1, 255)) for _ in xrange(randint(min, max)))

        count = 100000
        samp = dict(
            (randstring(1, 50), randstring(1, 50)) for _ in xrange(count))

        fhw = HashWriter(st.create_file("big.hsh"))
        fhw.add_all(iteritems(samp))
        fhw.close()

        fhr = HashReader(st.open_file("big.hsh"))
        keys = list(samp.keys())
        shuffle(keys)
        for key in keys:
            assert_equal(samp[key], fhr[key])

        set1 = set(iteritems(samp))
        set2 = set(fhr.items())
        assert_equal(set1, set2)

        fhr.close()
def test_random_multistream():
    letters = "abcdefghijklmnopqrstuvwxyz"

    def randstring(n):
        s = "".join(random.choice(letters) for _ in xrange(n))
        return s.encode("latin1")

    domain = {}
    for _ in xrange(100):
        name = randstring(random.randint(5, 10))
        value = randstring(2500)
        domain[name] = value

    outfiles = dict((name, BytesIO(value)) for name, value in domain.items())

    with TempStorage() as st:
        msw = compound.CompoundWriter(st, buffersize=1024)
        mfiles = {}
        for name in domain:
            mfiles[name] = msw.create_file(name)
        while outfiles:
            name = random.choice(list(outfiles.keys()))
            v = outfiles[name].read(1000)
            mfiles[name].write(v)
            if len(v) < 1000:
                del outfiles[name]
        f = st.create_file("test")
        msw.save_as_compound(f)

        f = st.open_file("test")
        msr = compound.CompoundStorage(f)
        for name, value in domain.items():
            assert msr.open_file(name).read() == value
        msr.close()
示例#6
0
def test_threaded_filelock():
    with TempStorage("threadedfilelock") as st:
        lock1 = st.lock("testlock")
        result = []

        # The thread function tries to acquire the lock and then quits
        def fn():
            lock2 = st.lock("testlock")
            gotit = try_for(lock2.acquire, 1.0, 0.1)
            if gotit:
                result.append(True)
                lock2.release()

        t = threading.Thread(target=fn)

        # Acquire the lock in this thread
        lock1.acquire()
        # Start the other thread trying to acquire the lock
        t.start()
        # Wait for a bit
        time.sleep(0.15)
        # Release the lock
        lock1.release()
        # Wait for the other thread to finish
        t.join()
        # If the other thread got the lock, it should have appended True to the
        # "results" list.
        assert result == [True]
示例#7
0
def test_random_hash():
    from string import ascii_letters as domain

    times = 1000
    minlen = 1
    maxlen = len(domain)

    def randstring():
        s = "".join(random.sample(domain, random.randint(minlen, maxlen)))
        return b(s)

    with TempStorage("randomhash") as st:
        samp = dict((randstring(), randstring()) for _ in xrange(times))

        hw = HashWriter(st.create_file("test.hsh"))
        for k, v in iteritems(samp):
            hw.add(k, v)
        hw.close()

        keys = list(samp.keys())
        random.shuffle(keys)
        hr = HashReader.open(st, "test.hsh")
        for k in keys:
            assert hr[k] == samp[k]
        hr.close()
示例#8
0
def test_hash_contents():
    samp = [
        ('alfa', 'bravo'),
        ('charlie', 'delta'),
        ('echo', 'foxtrot'),
        ('golf', 'hotel'),
        ('india', 'juliet'),
        ('kilo', 'lima'),
        ('mike', 'november'),
        ('oskar', 'papa'),
        ('quebec', 'romeo'),
        ('sierra', 'tango'),
        ('ultra', 'victor'),
        ('whiskey', 'xray'),
    ]
    # Convert to bytes
    samp = set((b(k), b(v)) for k, v in samp)

    with TempStorage("hashcontents") as st:
        hw = HashWriter(st.create_file("test.hsh"))
        hw.add_all(samp)
        hw.close()

        hr = HashReader.open(st, "test.hsh")

        probes = list(samp)
        random.shuffle(probes)
        for key, value in probes:
            assert hr[key] == value

        assert set(hr.keys()) == set([k for k, v in samp])
        assert set(hr.values()) == set([v for k, v in samp])
        assert set(hr.items()) == samp

        hr.close()
示例#9
0
def test_words():
    words = enlist("alfa alpaca amtrak bellow fellow fiona zebulon")
    with TempStorage() as st:
        gwrite(words, st)
        gr = greader(st)
        cur = fst.Cursor(gr)
        assert list(cur.flatten_strings()) == words
        gr.close()
示例#10
0
def test_within():
    with TempStorage() as st:
        gwrite(enlist("0 00 000 001 01 010 011 1 10 100 101 11 110 111"), st)
        gr = greader(st)
        s = set(fst.within(gr, "01", k=1))
        gr.close()
    assert s == set(
        ["0", "00", "01", "011", "010", "001", "10", "101", "1", "11"])
示例#11
0
def test_stored_fields():
    codec = default_codec()
    fieldobj = fields.TEXT(stored=True)
    with TempStorage("storedfields") as st:
        seg = codec.new_segment(st, "test")

        dw = codec.per_document_writer(st, seg)
        dw.start_doc(0)
        dw.add_field("a", fieldobj, "hello", 1)
        dw.add_field("b", fieldobj, "there", 1)
        dw.finish_doc()

        dw.start_doc(1)
        dw.add_field("a", fieldobj, "one", 1)
        dw.add_field("b", fieldobj, "two", 1)
        dw.add_field("c", fieldobj, "three", 1)
        dw.finish_doc()

        dw.start_doc(2)
        dw.finish_doc()

        dw.start_doc(3)
        dw.add_field("a", fieldobj, "alfa", 1)
        dw.add_field("b", fieldobj, "bravo", 1)
        dw.finish_doc()

        dw.close()
        seg.set_doc_count(4)

        pdr = codec.per_document_reader(st, seg)
        assert pdr.doc_count_all() == 4
        assert pdr.stored_fields(0) == {"a": "hello", "b": "there"}
        # Note: access out of order
        assert pdr.stored_fields(3), {"a": "alfa", "b": "bravo"}
        assert pdr.stored_fields(1) == {"a": "one", "b": "two", "c": "three"}

        sfs = list(pdr.all_stored_fields())
        assert len(sfs) == 4
        assert sfs == [
            {
                "a": "hello",
                "b": "there"
            },
            {
                "a": "one",
                "b": "two",
                "c": "three"
            },
            {},
            {
                "a": "alfa",
                "b": "bravo"
            },
        ]
        pdr.close()
示例#12
0
def test_hash():
    with TempStorage("hash") as st:
        hwf = st.create_file("test.hsh")
        hw = HashWriter(hwf)
        hw.add(b("foo"), b("bar"))
        hw.add(b("glonk"), b("baz"))
        hw.close()

        hr = HashReader.open(st, "test.hsh")
        assert hr.get(b("foo")) == b("bar")
        assert hr.get(b("baz")) is None
        hr.close()
示例#13
0
def test_random_access():
    times = 1000
    with TempStorage("orderedhash") as st:
        hw = HashWriter(st.create_file("test.hsh"))
        hw.add_all((b("%08x" % x), b(str(x))) for x in xrange(times))
        hw.close()

        keys = list(range(times))
        random.shuffle(keys)
        hr = HashReader.open(st, "test.hsh")
        for x in keys:
            assert hr[b("%08x" % x)] == b(str(x))
        hr.close()
示例#14
0
def test_filelock_simple():
    with TempStorage("simplefilelock") as st:
        lock1 = st.lock("testlock")
        lock2 = st.lock("testlock")
        assert lock1 is not lock2

        assert lock1.acquire()
        assert st.file_exists("testlock")
        assert not lock2.acquire()
        lock1.release()
        assert lock2.acquire()
        assert not lock1.acquire()
        lock2.release()
示例#15
0
def _fst_roundtrip(domain, t):
    with TempStorage() as st:
        f = st.create_file("test")
        gw = fst.GraphWriter(f, vtype=t)
        gw.start_field("_")
        for key, value in domain:
            gw.insert(key, value)
        gw.finish_field()
        gw.close()

        f = st.open_file("test")
        gr = fst.GraphReader(f, vtype=t)
        cur = fst.Cursor(gr)
        assert list(cur.flatten_v()) == domain
        f.close()
示例#16
0
def test_readwrite():
    schema = fields.Schema(id=fields.ID(stored=True), content=fields.TEXT)
    with TempStorage("threading") as st:
        domain = ("alfa", "bravo", "charlie", "delta", "echo", "foxtrot",
                  "golf", "hotel", "india", "juliet", "kilo", "lima", "mike",
                  "november", "oscar", "papa", "quebec", "romeo", "sierra",
                  "tango", "uniform", "victor", "whiskey", "xray", "yankee",
                  "zulu")

        class WriterThread(threading.Thread):
            def run(self):
                ix = st.create_index(dir, schema)
                num = 0

                for i in xrange(50):
                    print(i)
                    w = ix.writer()
                    for _ in xrange(random.randint(1, 100)):
                        content = u(" ").join(
                            random.sample(domain, random.randint(5, 20)))
                        w.add_document(id=text_type(num), content=content)
                        num += 1
                    w.commit()

                    time.sleep(0.1)

        class SearcherThread(threading.Thread):
            def run(self):
                print(self.name + " starting")
                for _ in xrange(10):
                    ix = st.open_index()
                    s = ix.searcher()
                    q = query.Term("content", random.choice(domain))
                    s.search(q, limit=10)
                    s.close()
                    ix.close()
                    time.sleep(0.1)
                print(self.name + " done")

        wt = WriterThread()
        wt.start()
        time.sleep(0.5)
        for _ in xrange(20):
            SearcherThread().start()
            time.sleep(0.5)
        wt.join()
示例#17
0
def test_version_in():

    with TempStorage("versionin") as st:
        assert not index.exists(st)

        schema = fields.Schema(text=fields.TEXT)
        ix = st.create_index(schema)
        assert index.exists(st)
        assert ix.is_empty()

        v = index.version(st)
        assert v[0] == __version__
        assert v[1] == index._CURRENT_TOC_VERSION

        with ix.writer() as w:
            w.add_document(text=u("alfa"))

        assert not ix.is_empty()
示例#18
0
def test_fields():
    with TempStorage() as st:
        f = st.create_file("test")
        gw = fst.GraphWriter(f)
        gw.start_field("f1")
        gw.insert("a")
        gw.insert("aa")
        gw.insert("ab")
        gw.finish_field()
        gw.start_field("f2")
        gw.insert("ba")
        gw.insert("baa")
        gw.insert("bab")
        gw.close()

        gr = fst.GraphReader(st.open_file("test"))
        cur1 = fst.Cursor(gr, gr.root("f1"))
        cur2 = fst.Cursor(gr, gr.root("f2"))
        assert list(cur1.flatten_strings()) == ["a", "aa", "ab"]
        assert list(cur2.flatten_strings()) == ["ba", "baa", "bab"]
        gr.close()
示例#19
0
def _roundtrip(content, format_, astype, ana=None):
    with TempStorage("roundtrip") as st:
        codec = default_codec()
        seg = codec.new_segment(st, "")
        ana = ana or analysis.StandardAnalyzer()
        field = fields.FieldType(format=format_, analyzer=ana)

        fw = codec.field_writer(st, seg)
        fw.start_field("f1", field)
        for text, _, weight, valuestring in sorted(field.index(content)):
            fw.start_term(text)
            fw.add(0, weight, valuestring, None)
            fw.finish_term()
        fw.finish_field()
        fw.close()

        tr = codec.terms_reader(st, seg)
        ps = []
        for fieldname, btext in tr.terms():
            m = tr.matcher(fieldname, btext, format_)
            ps.append((field.from_bytes(btext), m.value_as(astype)))
        tr.close()
        return ps
示例#20
0
def test_random():
    def randstring():
        length = random.randint(1, 5)
        a = array("B", (random.randint(0, 255) for _ in xrange(length)))
        return array_tobytes(a)

    keys = sorted(randstring() for _ in xrange(100))

    with TempStorage() as st:
        gwrite(keys, st)
        gr = greader(st)
        cur = fst.Cursor(gr)
        s1 = cur.flatten()
        s2 = sorted(set(keys))
        for i, (k1, k2) in enumerate(zip(s1, s2)):
            assert k1 == k2, "%s: %r != %r" % (i, k1, k2)

        sample = list(keys)
        random.shuffle(sample)
        for key in sample:
            cur.reset()
            cur.find_path(key)
            assert cur.prefix_bytes() == key
        gr.close()
示例#21
0
def test_simple_compound_mmap():
    with TempStorage("compound") as st:
        assert st.supports_mmap
        _test_simple_compound(st)