def mv(src, out): fname = src.name self._fire_event('begin_move', fname) with fopen(fname, mode='rb') as f: while True: data = f.read(buf_size) if len(data) == 0: break out.write(data) out.flush() os.remove(fname) self._fire_event('end_move', fname)
def set_tag_value(filename, name, value): with fopen(filename, 'rb+') as f: f.seek(len(MAGIC) + 16) encoding = read_byte_string(f, U_CHAR).decode(UTF8) if encodings.search_function(encoding) is None: raise UnknownEncoding(encoding) f = StructWriter(StructReader(f, encoding=encoding), encoding=encoding) f.read_tiny_text() tag_count = f.read_byte() for _ in range(tag_count): key = f.read_tiny_text() if key == name: f.write_tiny_text(value, editable=True) return f.read_tiny_text() raise TagNotFound(name)
def _sort(self): self._fire_event('begin_sort') f_ref_positions_sorted = self._wbfopen('ref-positions-sorted') self.f_refs.flush() self.f_ref_positions.close() with MultiFileReader(self.f_ref_positions.name, self.f_refs.name) as f: ref_list = RefList(f, self.encoding, count=self.ref_count) sortkey_func = sortkey(IDENTICAL) for i in sorted(range(len(ref_list)), key=lambda j: sortkey_func(ref_list[j].key)): ref_pos = ref_list.pos(i) f_ref_positions_sorted.write_long(ref_pos) f_ref_positions_sorted.close() os.remove(self.f_ref_positions.name) os.rename(f_ref_positions_sorted.name, self.f_ref_positions.name) self.f_ref_positions = StructWriter(fopen(self.f_ref_positions.name, 'ab'), encoding=self.encoding) self._fire_event('end_sort')
def __init__(self, *args): filenames = [] for arg in args: if isinstance(arg, str): filenames.append(arg) else: for name in arg: filenames.append(name) files = [] ranges = [] offset = 0 for name in filenames: size = os.stat(name).st_size ranges.append(range(offset, offset + size)) files.append(fopen(name, 'rb')) offset += size self.size = offset self._ranges = ranges self._files = files self._fcount = len(self._files) self._offset = -1 self.seek(0)
def _wbfopen(self, name): return StructWriter(fopen(os.path.join(self.tmpdir.name, name), 'wb'), encoding=self.encoding)
def __init__( self, filename, workdir=None, encoding=UTF8, compression=DEFAULT_COMPRESSION, min_bin_size=512 * 1024, max_redirects=5, observer=None, ): self.filename = filename self.observer = observer if os.path.exists(self.filename): raise SystemExit('File %r already exists' % self.filename) # make sure we can write with fopen(self.filename, 'wb'): pass self.encoding = encoding if encodings.search_function(self.encoding) is None: raise UnknownEncoding(self.encoding) self.workdir = workdir self.tmpdir = tmpdir = tempfile.TemporaryDirectory( prefix='{0}-'.format(os.path.basename(filename)), dir=workdir) self.f_ref_positions = self._wbfopen('ref-positions') self.f_store_positions = self._wbfopen('store-positions') self.f_refs = self._wbfopen('refs') self.f_store = self._wbfopen('store') self.max_redirects = max_redirects if max_redirects: self.aliases_path = os.path.join(tmpdir.name, 'aliases') self.f_aliases = Writer( self.aliases_path, workdir=tmpdir.name, max_redirects=0, compression=None, ) if compression is None: compression = '' if compression not in COMPRESSIONS: raise UnknownCompression(compression) else: self.compress = COMPRESSIONS[compression].compress self.compression = compression self.content_types = {} self.min_bin_size = min_bin_size self.current_bin = None self.blob_count = 0 self.ref_count = 0 self.bin_count = 0 self._tags = { 'version.python': sys.version.replace('\n', ' '), 'version.pyicu': icu.VERSION, 'version.icu': icu.ICU_VERSION, 'created.at': datetime.now(timezone.utc).isoformat() } self.tags = MappingProxyType(self._tags)
def finalize(self): self._fire_event('begin_finalize') if self.current_bin is not None: self._write_current_bin() self._sort() if self.max_redirects: self._resolve_aliases() files = ( self.f_ref_positions, self.f_refs, self.f_store_positions, self.f_store, ) for f in files: f.close() buf_size = 10 * 1024 * 1024 with fopen(self.filename, mode='wb') as output_file: out = StructWriter(output_file, self.encoding) out.write(MAGIC) out.write(uuid4().bytes) out.write_tiny_text(self.encoding, encoding=UTF8) out.write_tiny_text(self.compression) def write_tags(tags, f): f.write(pack(U_CHAR, len(tags))) for key, value in tags.items(): f.write_tiny_text(key) f.write_tiny_text(value, editable=True) write_tags(self.tags, out) def write_content_types(content_types, f): count = len(content_types) f.write(pack(U_CHAR, count)) types = sorted(content_types.items(), key=lambda x: x[1]) for content_type, _ in types: f.write_text(content_type) write_content_types(self.content_types, out) out.write_int(self.blob_count) store_offset = ( out.tell() + U_LONG_LONG_SIZE + # this value U_LONG_LONG_SIZE + # file size value U_INT_SIZE + # ref count value os.stat(self.f_ref_positions.name).st_size + os.stat(self.f_refs.name).st_size) out.write_long(store_offset) out.flush() file_size = ( out.tell() + # bytes written so far U_LONG_LONG_SIZE + # file size value 2 * U_INT_SIZE # ref count and bin count ) file_size += sum((os.stat(f.name).st_size for f in files)) out.write_long(file_size) def mv(src, out): fname = src.name self._fire_event('begin_move', fname) with fopen(fname, mode='rb') as f: while True: data = f.read(buf_size) if len(data) == 0: break out.write(data) out.flush() os.remove(fname) self._fire_event('end_move', fname) out.write_int(self.ref_count) mv(self.f_ref_positions, out) mv(self.f_refs, out) out.write_int(self.bin_count) mv(self.f_store_positions, out) mv(self.f_store, out) self.tmpdir.cleanup() self._fire_event('end_finalize')