示例#1
0
    def write(self, storage, indexname):
        schema = ensure_schema(self.schema)
        schema.clean()

        # Use a temporary file for atomic write.
        tocfilename = self._filename(indexname, self.generation)
        tempfilename = '%s.%s' % (tocfilename, time())
        stream = storage.create_file(tempfilename)

        stream.write_varint(_INT_SIZE)
        stream.write_varint(_LONG_SIZE)
        stream.write_varint(_FLOAT_SIZE)
        stream.write_int(-12345)

        stream.write_int(_INDEX_VERSION)
        for num in __version__[:3]:
            stream.write_varint(num)

        stream.write_string(pickle.dumps(schema, -1))
        stream.write_int(self.generation)
        stream.write_int(0)  # Unused
        stream.write_pickle(self.segments)
        stream.close()

        # Rename temporary file to the proper filename
        storage.rename_file(tempfilename, tocfilename, safe=True)
示例#2
0
    def write(self, storage, indexname):
        schema = ensure_schema(self.schema)
        schema.clean()

        # Use a temporary file for atomic write.
        tocfilename = self._filename(indexname, self.generation)
        tempfilename = '%s.%s' % (tocfilename, time())
        stream = storage.create_file(tempfilename)

        stream.write_varint(_INT_SIZE)
        stream.write_varint(_LONG_SIZE)
        stream.write_varint(_FLOAT_SIZE)
        stream.write_int(-12345)

        stream.write_int(_INDEX_VERSION)
        for num in __version__[:3]:
            stream.write_varint(num)

        stream.write_string(pickle.dumps(schema, -1))
        stream.write_int(self.generation)
        stream.write_int(0)  # Unused
        stream.write_pickle(self.segments)
        stream.close()

        # Rename temporary file to the proper filename
        storage.rename_file(tempfilename, tocfilename, safe=True)
示例#3
0
    def read(cls, storage, indexname, gen=None, schema=None):
        if gen is None:
            gen = cls._latest_generation(storage, indexname)
            if gen < 0:
                raise EmptyIndexError("Index %r does not exist in %r" %
                                      (indexname, storage))

        # Read the content of this index from the .toc file.
        tocfilename = cls._filename(indexname, gen)
        stream = storage.open_file(tocfilename)

        def check_size(name, target):
            sz = stream.read_varint()
            if sz != target:
                raise IndexError("Index was created on different architecture:"
                                 " saved %s = %s, this computer = %s" %
                                 (name, sz, target))

        check_size("int", _INT_SIZE)
        check_size("long", _LONG_SIZE)
        check_size("float", _FLOAT_SIZE)

        if not stream.read_int() == -12345:
            raise IndexError("Number misread: byte order problem")

        version = stream.read_int()
        release = (stream.read_varint(), stream.read_varint(),
                   stream.read_varint())

        if version != _CURRENT_TOC_VERSION:
            if version in toc_loaders:
                loader = toc_loaders[version]
                schema, segments = loader(stream, gen, schema, version)
            else:
                raise IndexVersionError("Can't read format %s" % version,
                                        version)
        else:
            # If the user supplied a schema object with the constructor, don't
            # load the pickled schema from the saved index.
            if schema:
                stream.skip_string()
            else:
                schema = pickle.loads(stream.read_string())
            schema = ensure_schema(schema)

            # Generation
            index_gen = stream.read_int()
            assert gen == index_gen

            _ = stream.read_int()  # Unused
            segments = stream.read_pickle()

        stream.close()
        return cls(schema, segments, gen, version=version, release=release)
    def read(cls, storage, indexname, gen=None, schema=None):
        if gen is None:
            gen = cls._latest_generation(storage, indexname)
            if gen < 0:
                raise EmptyIndexError("Index %r does not exist in %r"
                                      % (indexname, storage))

        # Read the content of this index from the .toc file.
        tocfilename = cls._filename(indexname, gen)
        stream = storage.open_file(tocfilename)

        def check_size(name, target):
            sz = stream.read_varint()
            if sz != target:
                raise IndexError("Index was created on different architecture:"
                                 " saved %s = %s, this computer = %s"
                                 % (name, sz, target))

        check_size("int", _INT_SIZE)
        check_size("long", _LONG_SIZE)
        check_size("float", _FLOAT_SIZE)

        if not stream.read_int() == -12345:
            raise IndexError("Number misread: byte order problem")

        version = stream.read_int()
        release = (stream.read_varint(), stream.read_varint(),
                   stream.read_varint())

        if version != _CURRENT_TOC_VERSION:
            if version in toc_loaders:
                loader = toc_loaders[version]
                schema, segments = loader(stream, gen, schema, version)
            else:
                raise IndexVersionError("Can't read format %s" % version,
                                        version)
        else:
            # If the user supplied a schema object with the constructor, don't
            # load the pickled schema from the saved index.
            if schema:
                stream.skip_string()
            else:
                schema = pickle.loads(stream.read_string())
            schema = ensure_schema(schema)

            # Generation
            index_gen = stream.read_int()
            assert gen == index_gen

            _ = stream.read_int()  # Unused
            segments = stream.read_pickle()

        stream.close()
        return cls(schema, segments, gen, version=version, release=release)
示例#5
0
    def create(cls, storage, schema, indexname=_DEF_INDEX_NAME):
        schema = ensure_schema(schema)

        # Clear existing files
        prefix = "_%s_" % indexname
        for filename in storage:
            if filename.startswith(prefix):
                storage.delete_file(filename)

        # Write a TOC file with an empty list of segments
        toc = cls(schema, [], 0)
        toc.write(storage, indexname)
示例#6
0
    def create(cls, storage, schema, indexname=_DEF_INDEX_NAME):
        schema = ensure_schema(schema)

        # Clear existing files
        prefix = "_%s_" % indexname
        for filename in storage:
            if filename.startswith(prefix):
                storage.delete_file(filename)

        # Write a TOC file with an empty list of segments
        toc = cls(schema, [], 0)
        toc.write(storage, indexname)
示例#7
0
    def __init__(self, storage, schema=None, indexname=_DEF_INDEX_NAME):
        if not isinstance(storage, Storage):
            raise ValueError("%r is not a Storage object" % storage)
        if not isinstance(indexname, string_type):
            raise ValueError("indexname %r is not a string" % indexname)

        if schema:
            schema = ensure_schema(schema)

        self.storage = storage
        self._schema = schema
        self.indexname = indexname

        # Try reading the TOC to see if it's possible
        TOC.read(self.storage, self.indexname, schema=self._schema)
示例#8
0
    def __init__(self, storage, schema=None, indexname=_DEF_INDEX_NAME):
        if not isinstance(storage, Storage):
            raise ValueError("%r is not a Storage object" % storage)
        if not isinstance(indexname, string_type):
            raise ValueError("indexname %r is not a string" % indexname)

        if schema:
            schema = ensure_schema(schema)

        self.storage = storage
        self._schema = schema
        self.indexname = indexname

        # Try reading the TOC to see if it's possible
        TOC.read(self.storage, self.indexname, schema=self._schema)
示例#9
0
    def write(self, storage, indexname):
        schema = ensure_schema(self.schema)
        schema.clean()

        # Use a temporary file for atomic write.
        tocfilename = self._filename(indexname, self.generation)
        tempfilename = '%s.%s' % (tocfilename, time())
        stream = storage.create_file(tempfilename)

        stream.write_varint(_INT_SIZE)
        stream.write_varint(_LONG_SIZE)
        stream.write_varint(_FLOAT_SIZE)
        stream.write_int(-12345)

        stream.write_int(_CURRENT_TOC_VERSION)
        for num in __version__[:3]:
            stream.write_varint(num)

        try:
            stream.write_string(pickle.dumps(schema, 2))
        except pickle.PicklingError:
            # Try to narrow down the error to a single field
            for fieldname, field in schema.items():
                try:
                    pickle.dumps(field)
                except pickle.PicklingError:
                    e = sys.exc_info()[1]
                    raise pickle.PicklingError("%s %s=%r" %
                                               (e, fieldname, field))
                except TypeError:
                    e = sys.exc_info()[1]
                    raise TypeError("%s %s=%r" % (e, fieldname, field))
            # Otherwise, re-raise the original exception
            raise

        stream.write_int(self.generation)
        stream.write_int(0)  # Unused
        stream.write_pickle(self.segments)
        stream.close()

        # Rename temporary file to the proper filename
        storage.rename_file(tempfilename, tocfilename, safe=True)
示例#10
0
    def write(self, storage, indexname):
        schema = ensure_schema(self.schema)
        schema.clean()

        # Use a temporary file for atomic write
        tocfilename = self._filename(indexname, self.generation)
        tempfilename = '%s.%s' % (tocfilename, time())
        stream = storage.create_file(tempfilename)
        stream = ChecksumFile(stream)

        # Write the sanity checks and version numbers
        self._write_preamble(stream)

        # Write pickles as strings to allow them to be skipped
        try:
            stream.write_string(pickle.dumps(schema, -1))
        except pickle.PicklingError:
            # Try to narrow down the error to a single field
            for fieldname, field in schema.items():
                try:
                    pickle.dumps(field)
                except pickle.PicklingError:
                    e = sys.exc_info()[1]
                    raise pickle.PicklingError("%s %s=%r"
                                               % (e, fieldname, field))
            # Otherwise, re-raise the original exception
            raise

        # Write the list of segments
        stream.write_varint(len(self.segments))
        for segment in self.segments:
            # Write the segment's module and class name before the pickle to
            # possibly allow later versions to load the segment differently
            # based on the class (for backwards compatibility)
            segtype = segment.__class__
            typename = "%s.%s" % (segtype.__module__, segtype.__name__)
            stream.write_string(typename.encode("latin1"))
            stream.write_string(pickle.dumps(segment, -1))

        stream.write_uint(stream.checksum())
        stream.close()
        storage.rename_file(tempfilename, tocfilename, safe=True)
示例#11
0
    def write(self, storage, indexname):
        schema = ensure_schema(self.schema)
        schema.clean()

        # Use a temporary file for atomic write.
        tocfilename = self._filename(indexname, self.generation)
        tempfilename = '%s.%s' % (tocfilename, time())
        stream = storage.create_file(tempfilename)

        stream.write_varint(_INT_SIZE)
        stream.write_varint(_LONG_SIZE)
        stream.write_varint(_FLOAT_SIZE)
        stream.write_int(-12345)

        stream.write_int(_CURRENT_TOC_VERSION)
        for num in __version__[:3]:
            stream.write_varint(num)

        try:
            stream.write_string(pickle.dumps(schema, 2))
        except pickle.PicklingError:
            # Try to narrow down the error to a single field
            for fieldname, field in schema.items():
                try:
                    pickle.dumps(field)
                except pickle.PicklingError:
                    e = sys.exc_info()[1]
                    raise pickle.PicklingError("%s %s=%r" % (e, fieldname, field))
                except TypeError:
                    e = sys.exc_info()[1]
                    raise TypeError("%s %s=%r" % (e, fieldname, field))
            # Otherwise, re-raise the original exception
            raise

        stream.write_int(self.generation)
        stream.write_int(0)  # Unused
        stream.write_pickle(self.segments)
        stream.close()

        # Rename temporary file to the proper filename
        storage.rename_file(tempfilename, tocfilename, safe=True)