def write(self, storage, indexname): schema = ensure_schema(self.schema) schema.clean() # Use a temporary file for atomic write. tocfilename = self._filename(indexname, self.generation) tempfilename = '%s.%s' % (tocfilename, time()) stream = storage.create_file(tempfilename) stream.write_varint(_INT_SIZE) stream.write_varint(_LONG_SIZE) stream.write_varint(_FLOAT_SIZE) stream.write_int(-12345) stream.write_int(_INDEX_VERSION) for num in __version__[:3]: stream.write_varint(num) stream.write_string(pickle.dumps(schema, -1)) stream.write_int(self.generation) stream.write_int(0) # Unused stream.write_pickle(self.segments) stream.close() # Rename temporary file to the proper filename storage.rename_file(tempfilename, tocfilename, safe=True)
def read(cls, storage, indexname, gen=None, schema=None): if gen is None: gen = cls._latest_generation(storage, indexname) if gen < 0: raise EmptyIndexError("Index %r does not exist in %r" % (indexname, storage)) # Read the content of this index from the .toc file. tocfilename = cls._filename(indexname, gen) stream = storage.open_file(tocfilename) def check_size(name, target): sz = stream.read_varint() if sz != target: raise IndexError("Index was created on different architecture:" " saved %s = %s, this computer = %s" % (name, sz, target)) check_size("int", _INT_SIZE) check_size("long", _LONG_SIZE) check_size("float", _FLOAT_SIZE) if not stream.read_int() == -12345: raise IndexError("Number misread: byte order problem") version = stream.read_int() release = (stream.read_varint(), stream.read_varint(), stream.read_varint()) if version != _CURRENT_TOC_VERSION: if version in toc_loaders: loader = toc_loaders[version] schema, segments = loader(stream, gen, schema, version) else: raise IndexVersionError("Can't read format %s" % version, version) else: # If the user supplied a schema object with the constructor, don't # load the pickled schema from the saved index. if schema: stream.skip_string() else: schema = pickle.loads(stream.read_string()) schema = ensure_schema(schema) # Generation index_gen = stream.read_int() assert gen == index_gen _ = stream.read_int() # Unused segments = stream.read_pickle() stream.close() return cls(schema, segments, gen, version=version, release=release)
def create(cls, storage, schema, indexname=_DEF_INDEX_NAME): schema = ensure_schema(schema) # Clear existing files prefix = "_%s_" % indexname for filename in storage: if filename.startswith(prefix): storage.delete_file(filename) # Write a TOC file with an empty list of segments toc = cls(schema, [], 0) toc.write(storage, indexname)
def __init__(self, storage, schema=None, indexname=_DEF_INDEX_NAME): if not isinstance(storage, Storage): raise ValueError("%r is not a Storage object" % storage) if not isinstance(indexname, string_type): raise ValueError("indexname %r is not a string" % indexname) if schema: schema = ensure_schema(schema) self.storage = storage self._schema = schema self.indexname = indexname # Try reading the TOC to see if it's possible TOC.read(self.storage, self.indexname, schema=self._schema)
def write(self, storage, indexname): schema = ensure_schema(self.schema) schema.clean() # Use a temporary file for atomic write. tocfilename = self._filename(indexname, self.generation) tempfilename = '%s.%s' % (tocfilename, time()) stream = storage.create_file(tempfilename) stream.write_varint(_INT_SIZE) stream.write_varint(_LONG_SIZE) stream.write_varint(_FLOAT_SIZE) stream.write_int(-12345) stream.write_int(_CURRENT_TOC_VERSION) for num in __version__[:3]: stream.write_varint(num) try: stream.write_string(pickle.dumps(schema, 2)) except pickle.PicklingError: # Try to narrow down the error to a single field for fieldname, field in schema.items(): try: pickle.dumps(field) except pickle.PicklingError: e = sys.exc_info()[1] raise pickle.PicklingError("%s %s=%r" % (e, fieldname, field)) except TypeError: e = sys.exc_info()[1] raise TypeError("%s %s=%r" % (e, fieldname, field)) # Otherwise, re-raise the original exception raise stream.write_int(self.generation) stream.write_int(0) # Unused stream.write_pickle(self.segments) stream.close() # Rename temporary file to the proper filename storage.rename_file(tempfilename, tocfilename, safe=True)
def write(self, storage, indexname): schema = ensure_schema(self.schema) schema.clean() # Use a temporary file for atomic write tocfilename = self._filename(indexname, self.generation) tempfilename = '%s.%s' % (tocfilename, time()) stream = storage.create_file(tempfilename) stream = ChecksumFile(stream) # Write the sanity checks and version numbers self._write_preamble(stream) # Write pickles as strings to allow them to be skipped try: stream.write_string(pickle.dumps(schema, -1)) except pickle.PicklingError: # Try to narrow down the error to a single field for fieldname, field in schema.items(): try: pickle.dumps(field) except pickle.PicklingError: e = sys.exc_info()[1] raise pickle.PicklingError("%s %s=%r" % (e, fieldname, field)) # Otherwise, re-raise the original exception raise # Write the list of segments stream.write_varint(len(self.segments)) for segment in self.segments: # Write the segment's module and class name before the pickle to # possibly allow later versions to load the segment differently # based on the class (for backwards compatibility) segtype = segment.__class__ typename = "%s.%s" % (segtype.__module__, segtype.__name__) stream.write_string(typename.encode("latin1")) stream.write_string(pickle.dumps(segment, -1)) stream.write_uint(stream.checksum()) stream.close() storage.rename_file(tempfilename, tocfilename, safe=True)