def stream_regular_file(self, filepath, tarinfo_buf, file_info): try: file_system = files.get_fs_by_file_path(filepath) with file_system.open(filepath, 'rb') as fd: f_iter = iter(lambda: fd.read(self.CHUNKSIZE), '') # pylint: disable=cell-var-from-loop try: yield tarinfo_buf chunk = '' for chunk in f_iter: yield chunk if len(chunk) % self.BLOCKSIZE != 0: yield (self.BLOCKSIZE - (len(chunk) % self.BLOCKSIZE)) * b'\0' except (IOError, fs.errors.OperationFailed): msg = ( "Error happened during sending file content in archive stream, file path: %s, " "container: %s/%s, archive path: %s" % file_info) self.log.critical(msg) self.abort(500, msg) except (fs.errors.ResourceNotFound, fs.errors.OperationFailed, IOError): self.log.critical( "Couldn't find the file during creating archive stream: %s, " "container: %s/%s, archive path: %s" % file_info) tarinfo = TarInfo() tarinfo.name = file_info[3] + '.MISSING' yield tarinfo.tobuf()
def compute(self, conn, data=None): tarinfo = TarInfo() tarinfo.name = self.name tarinfo.mod = 0o700 tarinfo.uid = 0 tarinfo.gid = 0 tarinfo.type = REGTYPE tarinfo.linkname = "" if self.name == CONTAINER_PROPERTIES: meta = data or conn.container_get_properties(self.acct, self.ref) tarinfo.size = len(json.dumps(meta['properties'], sort_keys=True)) self._filesize = tarinfo.size self._buf = tarinfo.tobuf(format=PAX_FORMAT) return elif self.name == CONTAINER_MANIFEST: tarinfo.size = len(json.dumps(data, sort_keys=True)) self._filesize = tarinfo.size self._buf = tarinfo.tobuf(format=PAX_FORMAT) return entry = conn.object_get_properties(self.acct, self.ref, self.name) properties = entry['properties'] # x-static-large-object if properties.get(SLO, False): tarinfo.size = int(properties.get(SLO_SIZE)) _, slo = conn.object_fetch(self.acct, self.ref, self.name) self._slo = json.loads("".join(slo), object_pairs_hook=OrderedDict) else: tarinfo.size = int(entry['length']) self._filesize = tarinfo.size # XATTR # do we have to store basic properties like policy, ... ? for key, val in properties.items(): assert isinstance(val, basestring), \ "Invalid type for %s:%s:%s" % (self.acct, self.name, key) if self.slo and key in SLO_HEADERS: continue tarinfo.pax_headers[SCHILY + key] = val tarinfo.pax_headers['mime_type'] = entry['mime_type'] self._buf = tarinfo.tobuf(format=PAX_FORMAT)
def archivestream(self, ticket): stream = cStringIO.StringIO() with tarfile.open(mode='w|', fileobj=stream): for filepath, arcpath, cont_name, cont_id, f_size, f_modified in ticket[ 'target']: tarinfo = TarInfo() tarinfo.name = arcpath.lstrip('/') tarinfo.size = f_size tarinfo.mtime = datetime_to_epoch(f_modified) tarinfo_buf = tarinfo.tobuf() signed_url = None try: signed_url = files.get_signed_url(filepath, config.fs) except fs.errors.ResourceNotFound: pass if signed_url: content_generator = self.stream_file_signed_url( signed_url, tarinfo_buf, (filepath, cont_name, cont_id, arcpath)) else: content_generator = self.stream_regular_file( filepath, tarinfo_buf, (filepath, cont_name, cont_id, arcpath)) for chunk in content_generator: yield chunk self.log_user_access( AccessType.download_file, cont_name=cont_name, cont_id=cont_id, filename=os.path.basename(arcpath), origin_override=ticket['origin'], download_ticket=ticket['_id']) # log download yield stream.getvalue() # get tar stream trailer stream.close()
def compute(self, conn, data=None): tarinfo = TarInfo() tarinfo.name = self.name tarinfo.mod = 0o700 tarinfo.uid = 0 tarinfo.gid = 0 tarinfo.type = REGTYPE tarinfo.linkname = "" if self.name == CONTAINER_PROPERTIES: meta = data or conn.container_get_properties(self.acct, self.ref) tarinfo.size = len(json.dumps(meta['properties'], sort_keys=True)) self._filesize = tarinfo.size self._buf = tarinfo.tobuf(format=PAX_FORMAT) return elif self.name == CONTAINER_MANIFEST: tarinfo.size = len(json.dumps(data, sort_keys=True)) self._filesize = tarinfo.size self._buf = tarinfo.tobuf(format=PAX_FORMAT) return entry = conn.object_get_properties(self.acct, self.ref, self.name) properties = entry['properties'] # x-static-large-object if properties.get(SLO, False): tarinfo.size = int(properties.get(SLO_SIZE)) _, slo = conn.object_fetch(self.acct, self.ref, self.name, properties=False) self._slo = json.loads("".join(slo), object_pairs_hook=OrderedDict) self._checksums = {} # format MD5 to share same format as multi chunks object offset = 0 for idx, ck in enumerate(self._slo): self._checksums[idx] = { 'hash': ck['hash'].upper(), 'size': ck['bytes'], 'offset': offset } offset += ck['bytes'] else: tarinfo.size = int(entry['length']) meta, chunks = conn.object_locate(self.acct, self.ref, self.name, properties=False) storage_method = STORAGE_METHODS.load(meta['chunk_method']) chunks = _sort_chunks(chunks, storage_method.ec) for idx in chunks: chunks[idx] = chunks[idx][0] del chunks[idx]['url'] del chunks[idx]['score'] del chunks[idx]['pos'] self._checksums = chunks self._filesize = tarinfo.size # XATTR # do we have to store basic properties like policy, ... ? for key, val in properties.items(): assert isinstance(val, basestring), \ "Invalid type for %s:%s:%s" % (self.acct, self.name, key) if self.slo and key in SLO_HEADERS: continue tarinfo.pax_headers[SCHILY + key] = val tarinfo.pax_headers['mime_type'] = entry['mime_type'] self._buf = tarinfo.tobuf(format=PAX_FORMAT)
def tar(host, backup, share, path): binary_stdout = stdout.buffer fbak = Fruitbak(confdir = Path('/dev/shm/conf')) backup = fbak[host][backup] if path is None: share, path = backup.locate_path(share) else: share = backup[share] def iterator(): for dentry in share.find(path): if dentry.is_file and not dentry.is_hardlink: yield from dentry.hashes with fbak.pool.agent().readahead(iterator()) as reader: for dentry in share.find(path): name = dentry.name or b'.' i = TarInfo(fsdecode(bytes(name))) i.mode = dentry.mode & 0o7777 i.uid = dentry.uid i.gid = dentry.gid i.mtime = dentry.mtime // 1000000000 if dentry.is_hardlink: i.type = LNKTYPE hardlink = dentry.hardlink or b'.' i.linkname = fsdecode(bytes(hardlink)) elif dentry.is_file: i.type = REGTYPE i.size = dentry.size elif dentry.is_symlink: i.type = SYMTYPE i.linkname = fsdecode(bytes(dentry.symlink)) elif dentry.is_chardev: i.type = CHRTYPE i.devmajor = dentry.major i.devminor = dentry.minor elif dentry.is_blockdev: i.type = BLKTYPE i.devmajor = dentry.major i.devminor = dentry.minor elif dentry.is_directory: i.type = DIRTYPE elif dentry.is_fifo: i.type = FIFOTYPE else: continue binary_stdout.write(i.tobuf(GNU_FORMAT)) if dentry.is_file and not dentry.is_hardlink: for hash in dentry.hashes: action = next(reader) if action.exception: raise action.exception[1] binary_stdout.write(action.value) padding = -i.size % BLOCKSIZE if padding: binary_stdout.write(bytes(padding)) binary_stdout.write(b'\0' * (BLOCKSIZE*2))