def test_random_multistream():
    letters = "abcdefghijklmnopqrstuvwxyz"

    def randstring(n):
        s = "".join(random.choice(letters) for _ in xrange(n))
        return s.encode("latin1")

    domain = {}
    for _ in xrange(100):
        name = randstring(random.randint(5, 10))
        value = randstring(2500)
        domain[name] = value

    outfiles = dict((name, BytesIO(value)) for name, value in domain.items())

    with TempStorage() as st:
        msw = compound.CompoundWriter(st, buffersize=1024)
        mfiles = {}
        for name in domain:
            mfiles[name] = msw.create_file(name)
        while outfiles:
            name = random.choice(list(outfiles.keys()))
            v = outfiles[name].read(1000)
            mfiles[name].write(v)
            if len(v) < 1000:
                del outfiles[name]
        f = st.create_file("test")
        msw.save_as_compound(f)

        f = st.open_file("test")
        msr = compound.CompoundStorage(f)
        for name, value in domain.items():
            assert msr.open_file(name).read() == value
        msr.close()
def test_multistream():
    domain = [("a", "12345"), ("b", "abc"), ("c", "AaBbC"),
              ("a", "678"), ("c", "cDdEeF"), ("b", "defgh"),
              ("b", "ijk"), ("c", "fGgHh"), ("a", "9abc")]

    st = RamStorage()
    msw = compound.CompoundWriter(st)
    files = dict((name, msw.create_file(name)) for name in "abc")
    for name, data in domain:
        files[name].write(b(data))
    f = st.create_file("test")
    msw.save_as_compound(f)

    f = st.open_file("test")
    msr = compound.CompoundStorage(f)
    assert msr.open_file("a").read() == b("123456789abc")
    assert msr.open_file("b").read() == b("abcdefghijk")
    assert msr.open_file("c").read() == b("AaBbCcDdEeFfGgHh")
예제 #3
0
    def __init__(self, codec, storage, segment):
        self._codec = codec
        self._storage = storage
        self._segment = segment

        tempst = storage.temp_storage("%s.tmp" % segment.indexname)
        self._cols = compound.CompoundWriter(tempst)
        self._colwriters = {}
        self._create_column("_stored", STORED_COLUMN)

        self._fieldlengths = defaultdict(int)
        self._doccount = 0
        self._docnum = None
        self._storedfields = None
        self._indoc = False
        self.is_closed = False

        # We'll wait to create the vector file until someone actually tries
        # to add a vector
        self._vpostfile = None