Пример #1
0
    def read(cls, storage, indexname, gen=None, schema=None):
        if gen is None:
            gen = cls._latest_generation(storage, indexname)
            if gen < 0:
                raise EmptyIndexError("Index %r does not exist in %r"
                                      % (indexname, storage))
        tocfilename = cls._filename(indexname, gen)
        stream = storage.open_file(tocfilename)
        stream = ChecksumFile(stream)

        # Do general sanity checks at the beginning and read the version
        # numbers
        toc_version, release = cls._read_preamble(stream)

        if toc_version != _CURRENT_TOC_VERSION:
            # If there's a backwards-compatible loader function for this
            # version, use it to load the rest of the TOC
            if toc_version in toc_loaders:
                loader = toc_loaders[toc_version]
                schema, segments = loader(stream, gen, schema, toc_version)
            else:
                # Otherwise, raise an error
                raise IndexVersionError("Can't read format %s" % toc_version,
                                        toc_version)
        else:
            loader = cls._read_info
            schema, segments = loader(stream, gen, schema, toc_version)
            file_check = stream.checksum()
            orig_check = stream.read_uint()
            if file_check != orig_check:
                raise Exception("TOC checksum does not match %d != %d"
                                % (file_check, orig_check))

        stream.close()
        return cls(schema, segments, gen, version=toc_version, release=release)
Пример #2
0
    def write(self, storage, indexname):
        schema = ensure_schema(self.schema)
        schema.clean()

        # Use a temporary file for atomic write
        tocfilename = self._filename(indexname, self.generation)
        tempfilename = '%s.%s' % (tocfilename, time())
        stream = storage.create_file(tempfilename)
        stream = ChecksumFile(stream)

        # Write the sanity checks and version numbers
        self._write_preamble(stream)

        # Write pickles as strings to allow them to be skipped
        try:
            stream.write_string(pickle.dumps(schema, -1))
        except pickle.PicklingError:
            # Try to narrow down the error to a single field
            for fieldname, field in schema.items():
                try:
                    pickle.dumps(field)
                except pickle.PicklingError:
                    e = sys.exc_info()[1]
                    raise pickle.PicklingError("%s %s=%r"
                                               % (e, fieldname, field))
            # Otherwise, re-raise the original exception
            raise

        # Write the list of segments
        stream.write_varint(len(self.segments))
        for segment in self.segments:
            # Write the segment's module and class name before the pickle to
            # possibly allow later versions to load the segment differently
            # based on the class (for backwards compatibility)
            segtype = segment.__class__
            typename = "%s.%s" % (segtype.__module__, segtype.__name__)
            stream.write_string(typename.encode("latin1"))
            stream.write_string(pickle.dumps(segment, -1))

        stream.write_uint(stream.checksum())
        stream.close()
        storage.rename_file(tempfilename, tocfilename, safe=True)
Пример #3
0
def test_checksum_file():
    from whoosh.filedb.structfile import ChecksumFile
    from zlib import crc32

    def wr(f):
        f.write(b("Testing"))
        f.write_int(-100)
        f.write_varint(10395)
        f.write_string(b("Hello"))
        f.write_ushort(32959)

    st = RamStorage()
    # Write a file normally
    f = st.create_file("control")
    wr(f)
    f.close()
    # Checksum the contents
    f = st.open_file("control")
    target = crc32(f.read()) & 0xffffffff
    f.close()

    # Write a file with checksumming
    f = st.create_file("test")
    cf = ChecksumFile(f)
    wr(cf)
    assert cf.checksum() == target
    f.close()

    # Read the file with checksumming
    f = st.open_file("test")
    cf = ChecksumFile(f)
    assert cf.read(7) == b("Testing")
    assert cf.read_int() == -100
    assert cf.read_varint() == 10395
    assert cf.read_string() == b("Hello")
    assert cf.read_ushort() == 32959
    assert cf.checksum() == target
    cf.close()
Пример #4
0
def test_checksum_file():
    from whoosh.filedb.structfile import ChecksumFile
    from zlib import crc32

    def wr(f):
        f.write(b("Testing"))
        f.write_int(-100)
        f.write_varint(10395)
        f.write_string(b("Hello"))
        f.write_ushort(32959)

    st = RamStorage()
    # Write a file normally
    f = st.create_file("control")
    wr(f)
    f.close()
    # Checksum the contents
    f = st.open_file("control")
    target = crc32(f.read()) & 0xffffffff
    f.close()

    # Write a file with checksumming
    f = st.create_file("test")
    cf = ChecksumFile(f)
    wr(cf)
    assert cf.checksum() == target
    f.close()

    # Read the file with checksumming
    f = st.open_file("test")
    cf = ChecksumFile(f)
    assert cf.read(7) == b("Testing")
    assert cf.read_int() == -100
    assert cf.read_varint() == 10395
    assert cf.read_string() == b("Hello")
    assert cf.read_ushort() == 32959
    assert cf.checksum() == target
    cf.close()