def zip2tar(zip_file: str, tar_file, tar_mode: Optional[str] = 'w:gz'): """ :param zip_file: zip file path :param tar_file: IO(_io.IOBase): file obj :param tar_mode: ref `tarfile.TarFile.open` :return: """ zip_file = ZipFile(file=zip_file, mode='r') tar_file = TarFile.open(fileobj=tar_file, mode=tar_mode) try: for zip_info in zip_file.infolist(): tar_info = TarInfo(name=zip_info.filename) tar_info.size = zip_info.file_size tar_info.mtime = datetime.now().timestamp() # https://stackoverflow.com/a/434689/11722440 tar_info.mode = zip_info.external_attr >> 16 # https://stackoverflow.com/a/18432983/11722440 # https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT # TODO whg fix other file (like symbolic link) in zip to regular file in tar file if zip_info.filename.endswith('/'): tar_info.type = tarfile.DIRTYPE else: tar_info.type = tarfile.REGTYPE infile = zip_file.open(zip_info.filename) tar_file.addfile(tar_info, infile) except Exception as e: raise finally: tar_file.close() zip_file.close()
def _add_entry(self, name, type, mode, size, data): info = TarInfo(name) info.type = type info.mode = mode info.size = size info.mtime = time.time() self._tarfile.addfile(info, data)
def writeDataFile(self, filename, text, content_type, subdir=None): """ See IExportContext. """ if subdir is not None: filename = '/'.join((subdir, filename)) parents = filename.split('/')[:-1] while parents: path = '/'.join(parents) + '/' if path not in self._archive.getnames(): info = TarInfo(path) info.type = DIRTYPE # tarfile.filemode(0o755) == '-rwxr-xr-x' info.mode = 0o755 info.mtime = time.time() self._archive.addfile(info) parents.pop() info = TarInfo(filename) if isinstance(text, six.text_type): encoding = self.getEncoding() or 'utf-8' text = text.encode(encoding) if isinstance(text, six.binary_type): stream = BytesIO(text) info.size = len(text) else: # Assume text is a an instance of a class like # Products.Archetypes.WebDAVSupport.PdataStreamIterator, # as in the case of ATFile stream = text.file info.size = text.size info.mtime = time.time() self._archive.addfile(info, stream)
def GetTarInfo(filename, filetype=DIRTYPE, mode=0755): """Create information for tar files""" tarinfo = TarInfo(path.basename(filename)) tarinfo.type = filetype tarinfo.mode = mode tarinfo.mtime = time() return tarinfo
def writeDataFile( self, filename, text, content_type, subdir=None ): """ See IExportContext. """ if subdir is not None: filename = '/'.join( ( subdir, filename ) ) parents = filename.split('/')[:-1] while parents: path = '/'.join(parents) + '/' if path not in self._archive.getnames(): info = TarInfo(path) info.type = DIRTYPE # tarfile.filemode(0755) == '-rwxr-xr-x' info.mode = 0755 info.mtime = time.time() self._archive.addfile(info) parents.pop() info = TarInfo(filename) if isinstance(text, basestring): stream = StringIO(text) info.size = len(text) else: # Assume text is a an instance of a class like # Products.Archetypes.WebDAVSupport.PdataStreamIterator, # as in the case of ATFile stream = text.file info.size = text.size info.mtime = time.time() self._archive.addfile( info, stream )
def _build_image_tar(self, target_path): with tarfile.open(target_path, "w:gz") as image_tar: for directory in [ ["./", "var", "lib", "dpkg", "info"], ["./", "var", "log"], ]: info = TarInfo("./" + Path(*directory).as_posix()) info.type = tarfile.DIRTYPE image_tar.addfile(info) for file in [["var", "log", "dpkg.log"]]: image_tar.addfile(TarInfo("./" + Path(*file).as_posix())) status_file = io.BytesIO() for deb_file in self.files: deb_file.unpack_into_tar(image_tar, status_file) status_info = TarInfo( "./" + Path("var", "lib", "dpkg", "status").as_posix()) status_info.size = status_file.getbuffer().nbytes status_file.seek(0) image_tar.addfile(status_info, status_file) status_file.close()
def writeDataFile( self, filename, text, content_type, subdir=None ): """ See IExportContext. """ mod_time = time.time() if subdir is not None: elements = subdir.split('/') parents = filter(None, elements) while parents: dirname = os.path.join(*parents) try: self._archive.getmember(dirname+'/') except KeyError: info = TarInfo(dirname) info.size = 0 info.mode = 509 info.mtime = mod_time info.type = DIRTYPE self._archive.addfile(info, StringIO()) parents = parents[:-1] filename = '/'.join( ( subdir, filename ) ) stream = StringIO( text ) info = TarInfo( filename ) info.size = len( text ) info.mode = 436 info.mtime = mod_time self._archive.addfile( info, stream )
def writeDataFile(self, filename, text, content_type, subdir=None): """ See IExportContext. """ mod_time = time.time() if subdir is not None: elements = subdir.split('/') parents = filter(None, elements) while parents: dirname = os.path.join(*parents) try: self._archive.getmember(dirname + '/') except KeyError: info = TarInfo(dirname) info.size = 0 info.mode = 509 info.mtime = mod_time info.type = DIRTYPE self._archive.addfile(info, StringIO()) parents = parents[:-1] filename = '/'.join((subdir, filename)) stream = StringIO(text) info = TarInfo(filename) info.size = len(text) info.mode = 436 info.mtime = mod_time self._archive.addfile(info, stream)
def writeDataFile( self, filename, text, content_type, subdir=None ): """ See IExportContext. """ if subdir is not None: filename = '/'.join( ( subdir, filename ) ) parents = filename.split('/')[:-1] while parents: path = '/'.join(parents) + '/' if path not in self._archive.getnames(): info = TarInfo(path) info.type = DIRTYPE # tarfile.filemode(0755) == '-rwxr-xr-x' info.mode = 0755 info.mtime = time.time() self._archive.addfile(info) parents.pop() info = TarInfo(filename) if isinstance(text, str): stream = StringIO(text) info.size = len(text) elif isinstance(text, unicode): raise ValueError("Unicode text is not supported, even if it only " "contains ascii. Please encode your data. See " "GS 1.7.0 changes for more") else: # Assume text is a an instance of a class like # Products.Archetypes.WebDAVSupport.PdataStreamIterator, # as in the case of ATFile stream = text.file info.size = text.size info.mtime = time.time() self._archive.addfile( info, stream )
def writeDataFile(self, filename, text, content_type, subdir=None): """ See IExportContext. """ if subdir is not None: filename = '/'.join((subdir, filename)) parents = filename.split('/')[:-1] while parents: path = '/'.join(parents) + '/' if path not in self._archive.getnames(): info = TarInfo(path) info.type = DIRTYPE # tarfile.filemode(0755) == '-rwxr-xr-x' info.mode = 0755 info.mtime = time.time() self._archive.addfile(info) parents.pop() info = TarInfo(filename) if isinstance(text, str): stream = StringIO(text) info.size = len(text) elif isinstance(text, unicode): raise ValueError("Unicode text is not supported, even if it only " "contains ascii. Please encode your data") else: # Assume text is a an instance of a class like # Products.Archetypes.WebDAVSupport.PdataStreamIterator, # as in the case of ATFile stream = text.file info.size = text.size info.mtime = time.time() self._archive.addfile(info, stream)
def file_filter(info: tarfile.TarInfo): info.mode = 0o00777 if executable else 0o00666 info.mtime = 0 info.type = tarfile.REGTYPE info.uid = info.gid = 0 info.uname = info.gname = "root" info.pax_headers = {} return info
def create_dir(self, path): """Create a directory within the tarfile. :param path: the path to put the directory at. """ tarinfo = TarInfo(name=path) tarinfo.type = DIRTYPE tarinfo.mode = 0755 self._set_defaults(tarinfo) self.addfile(tarinfo)
def add_to_tar(tar: TarFile, data: bytes, filename: str): tarinfo = TarInfo(name=filename) tarinfo.size = len(data) tarinfo.mtime = int(datetime.timestamp(datetime.utcnow())) tarinfo.mode = 436 tarinfo.type = b'0' tarinfo.uid = tarinfo.gid = 0 tarinfo.uname = tarinfo.gname = "0" tar.addfile(tarinfo, BytesIO(data))
def _addMember(path, data, modtime): from tarfile import DIRTYPE elements = path.split('/') parents = filter(None, [elements[x] for x in range(len(elements))]) for parent in parents: info = TarInfo() info.name = parent info.size = 0 info.mtime = mod_time info.type = DIRTYPE archive.addfile(info, StringIO()) _addOneMember(path, data, modtime)
def _materialize_layers(self, directory, idx): for layer in reversed(self.layers[-idx:]): self._load_layer(directory, layer) count = sum( [len(layer.getnames()) for layer in self.loaded_layers.values()]) with tqdm(total=count, desc="Slimming down image") as t: with tarfile.open(os.path.join(directory, "image.tar"), "w:gz") as image_tar: for layer in reversed(self.layers[-idx:]): with tarfile.open( os.path.join(directory, layer, "layer.tar")) as layer_tar: for member in layer_tar: if self._is_doc( member.name ) or member.name in image_tar.getnames(): continue if member.islnk() or member.issym(): if member.linkname.startswith( "usr") or member.linkname.startswith( "bin"): name = member.linkname else: name = Path( Path(member.name).parent, member.linkname) target = os.path.normpath(name).lstrip("/") if target not in (layer_tar.getnames() + image_tar.getnames()): self._find_file(directory, target, image_tar) image_tar.addfile(member) elif member.isfile(): fileobj = layer_tar.extractfile(member) self._check_binary(directory, layer_tar, image_tar, fileobj) image_tar.addfile(member, fileobj) else: image_tar.addfile(member) t.update(1) for base_dir in ["tmp", "var", "sys", "proc", "run"]: if base_dir not in image_tar.getnames(): info = TarInfo(base_dir) info.type = tarfile.DIRTYPE image_tar.addfile(info) self.client.api.import_image(os.path.join(directory, "image.tar"), repository=self.image) for layer in self.loaded_layers.values(): layer.close()
def _addMember(filename, data, modtime): from tarfile import DIRTYPE parents = filename.split('/')[:-1] while parents: path = '/'.join(parents) + '/' if path not in archive.getnames(): info = TarInfo(path) info.type = DIRTYPE info.mtime = modtime archive.addfile(info) parents.pop() _addOneMember(filename, data, modtime)
def _add_entry( self, name: str, type: bytes, mode: int, mtime: int, size: int, data: Optional[IO[bytes]], linkname: str = "", ) -> None: info = TarInfo(name) info.type = type info.mode = mode info.size = size info.mtime = mtime info.linkname = linkname return self._inner.addfile(info, data)
def compute(self, conn, data=None): tarinfo = TarInfo() tarinfo.name = self.name tarinfo.mod = 0o700 tarinfo.uid = 0 tarinfo.gid = 0 tarinfo.type = REGTYPE tarinfo.linkname = "" if self.name == CONTAINER_PROPERTIES: meta = data or conn.container_get_properties(self.acct, self.ref) tarinfo.size = len(json.dumps(meta['properties'], sort_keys=True)) self._filesize = tarinfo.size self._buf = tarinfo.tobuf(format=PAX_FORMAT) return elif self.name == CONTAINER_MANIFEST: tarinfo.size = len(json.dumps(data, sort_keys=True)) self._filesize = tarinfo.size self._buf = tarinfo.tobuf(format=PAX_FORMAT) return entry = conn.object_get_properties(self.acct, self.ref, self.name) properties = entry['properties'] # x-static-large-object if properties.get(SLO, False): tarinfo.size = int(properties.get(SLO_SIZE)) _, slo = conn.object_fetch(self.acct, self.ref, self.name) self._slo = json.loads("".join(slo), object_pairs_hook=OrderedDict) else: tarinfo.size = int(entry['length']) self._filesize = tarinfo.size # XATTR # do we have to store basic properties like policy, ... ? for key, val in properties.items(): assert isinstance(val, basestring), \ "Invalid type for %s:%s:%s" % (self.acct, self.name, key) if self.slo and key in SLO_HEADERS: continue tarinfo.pax_headers[SCHILY + key] = val tarinfo.pax_headers['mime_type'] = entry['mime_type'] self._buf = tarinfo.tobuf(format=PAX_FORMAT)
def get_image(self, image): if not image: raise APIError(HTTPError('500 Server Error'), None, explanation='Usage: image_export IMAGE [IMAGE...]') layers = [] next_layer_id = image while next_layer_id: layer = normalizeimage(self._findlayer(next_layer_id), copy=True) layers.append(layer) next_layer_id = layers[-1][':parent_id'] image_file = BytesIO() mtime = time() with tarfile_open(mode='w', fileobj=image_file) as image_tar_file: for layer in layers: ti_dir = TarInfo(layer[':id']) ti_dir.mtime = mtime ti_dir.mode = 0o755 ti_dir.type = DIRTYPE image_tar_file.addfile(ti_dir) layer_tar_src_path = ospath_join(self._my_dir, 'data', layer[':short_id'], 'layer.tar') with open(layer_tar_src_path, 'rb') as layer_tar_src_file: layer_tar_dst_path = '{}/layer.tar'.format(layer[':id']) ti_layer = image_tar_file.gettarinfo( layer_tar_src_path, layer_tar_dst_path) ti_layer.mtime = mtime ti_layer.mode = 0o644 ti_layer.uid = ti_layer.gid = 0 ti_layer.uname = ti_layer.gname = '' image_tar_file.addfile(ti_layer, fileobj=layer_tar_src_file) image_file.seek(0) return image_file
def writeDataFile(self, filename, text, content_type, subdir=None): """ See IExportContext. """ if subdir is not None: filename = '/'.join((subdir, filename)) parents = filename.split('/')[:-1] while parents: path = '/'.join(parents) + '/' if path not in self._archive.getnames(): info = TarInfo(path) info.type = DIRTYPE info.mtime = time.time() self._archive.addfile(info) parents.pop() stream = StringIO(text) info = TarInfo(filename) info.size = len(text) info.mtime = time.time() self._archive.addfile(info, stream)
def uploadDF(dataflowName): dataflowStr = None udfs = {} dataflowPath = os.path.join(path, "dataflows", dataflowName) with open(os.path.join(dataflowPath, "dataflowInfo.json"), 'r') as df: dataflowStr = df.read() if os.path.exists(dataflowPath + "/udfs/"): for udf in os.listdir(os.path.join(dataflowPath, "udfs")): with open(os.path.join(dataflowPath, "udfs", udf), 'r') as udfFile: udfs[udf] = udfFile.read() retinaBuf = io.BytesIO() with tarfile.open(fileobj=retinaBuf, mode="w:gz") as tar: info = TarInfo("dataflowInfo.json") info.size = len(dataflowStr) tar.addfile(info, io.BytesIO(bytearray(dataflowStr, "utf-8"))) # # ##udfs directory if udfs: info = TarInfo("udfs") info.type = tarfile.DIRTYPE info.mode = 0o755 tar.addfile(info) # ##Add udf to the above dir for udfName, udfCode in udfs.items(): info = TarInfo(name="udfs/" + udfName) info.size = len(udfCode) info.mode = 0o755 tar.addfile(info, io.BytesIO(bytearray(udfCode, "utf-8"))) try: retina.delete(dataflowName) except: print("Dataflow deletion failed!", dataflowName, availableRetinas) retina.add(dataflowName, retinaBuf.getvalue())
def get_image(self, image): if not image: raise APIError(HTTPError('500 Server Error'), None, explanation='Usage: image_export IMAGE [IMAGE...]') layers = [] next_layer_id = image while next_layer_id: layer = normalizeimage(self._findlayer(next_layer_id), copy=True) layers.append(layer) next_layer_id = layers[-1][':parent_id'] image_file = BytesIO() mtime = time() with tarfile_open(mode='w', fileobj=image_file) as image_tar_file: for layer in layers: ti_dir = TarInfo(layer[':id']) ti_dir.mtime = mtime ti_dir.mode = 0o755 ti_dir.type = DIRTYPE image_tar_file.addfile(ti_dir) layer_tar_src_path = ospath_join(self._my_dir, 'data', layer[':short_id'], 'layer.tar') with open(layer_tar_src_path, 'rb') as layer_tar_src_file: layer_tar_dst_path = '{}/layer.tar'.format(layer[':id']) ti_layer = image_tar_file.gettarinfo(layer_tar_src_path, layer_tar_dst_path) ti_layer.mtime = mtime ti_layer.mode = 0o644 ti_layer.uid = ti_layer.gid = 0 ti_layer.uname = ti_layer.gname = '' image_tar_file.addfile(ti_layer, fileobj=layer_tar_src_file) image_file.seek(0) return image_file
def compute(self, conn, data=None): tarinfo = TarInfo() tarinfo.name = self.name tarinfo.mod = 0o700 tarinfo.uid = 0 tarinfo.gid = 0 tarinfo.type = REGTYPE tarinfo.linkname = "" if self.name == CONTAINER_PROPERTIES: meta = data or conn.container_get_properties(self.acct, self.ref) tarinfo.size = len(json.dumps(meta['properties'], sort_keys=True)) self._filesize = tarinfo.size self._buf = tarinfo.tobuf(format=PAX_FORMAT) return elif self.name == CONTAINER_MANIFEST: tarinfo.size = len(json.dumps(data, sort_keys=True)) self._filesize = tarinfo.size self._buf = tarinfo.tobuf(format=PAX_FORMAT) return entry = conn.object_get_properties(self.acct, self.ref, self.name) properties = entry['properties'] # x-static-large-object if properties.get(SLO, False): tarinfo.size = int(properties.get(SLO_SIZE)) _, slo = conn.object_fetch(self.acct, self.ref, self.name, properties=False) self._slo = json.loads("".join(slo), object_pairs_hook=OrderedDict) self._checksums = {} # format MD5 to share same format as multi chunks object offset = 0 for idx, ck in enumerate(self._slo): self._checksums[idx] = { 'hash': ck['hash'].upper(), 'size': ck['bytes'], 'offset': offset } offset += ck['bytes'] else: tarinfo.size = int(entry['length']) meta, chunks = conn.object_locate(self.acct, self.ref, self.name, properties=False) storage_method = STORAGE_METHODS.load(meta['chunk_method']) chunks = _sort_chunks(chunks, storage_method.ec) for idx in chunks: chunks[idx] = chunks[idx][0] del chunks[idx]['url'] del chunks[idx]['score'] del chunks[idx]['pos'] self._checksums = chunks self._filesize = tarinfo.size # XATTR # do we have to store basic properties like policy, ... ? for key, val in properties.items(): assert isinstance(val, basestring), \ "Invalid type for %s:%s:%s" % (self.acct, self.name, key) if self.slo and key in SLO_HEADERS: continue tarinfo.pax_headers[SCHILY + key] = val tarinfo.pax_headers['mime_type'] = entry['mime_type'] self._buf = tarinfo.tobuf(format=PAX_FORMAT)
def tar(host, backup, share, path): binary_stdout = stdout.buffer fbak = Fruitbak(confdir = Path('/dev/shm/conf')) backup = fbak[host][backup] if path is None: share, path = backup.locate_path(share) else: share = backup[share] def iterator(): for dentry in share.find(path): if dentry.is_file and not dentry.is_hardlink: yield from dentry.hashes with fbak.pool.agent().readahead(iterator()) as reader: for dentry in share.find(path): name = dentry.name or b'.' i = TarInfo(fsdecode(bytes(name))) i.mode = dentry.mode & 0o7777 i.uid = dentry.uid i.gid = dentry.gid i.mtime = dentry.mtime // 1000000000 if dentry.is_hardlink: i.type = LNKTYPE hardlink = dentry.hardlink or b'.' i.linkname = fsdecode(bytes(hardlink)) elif dentry.is_file: i.type = REGTYPE i.size = dentry.size elif dentry.is_symlink: i.type = SYMTYPE i.linkname = fsdecode(bytes(dentry.symlink)) elif dentry.is_chardev: i.type = CHRTYPE i.devmajor = dentry.major i.devminor = dentry.minor elif dentry.is_blockdev: i.type = BLKTYPE i.devmajor = dentry.major i.devminor = dentry.minor elif dentry.is_directory: i.type = DIRTYPE elif dentry.is_fifo: i.type = FIFOTYPE else: continue binary_stdout.write(i.tobuf(GNU_FORMAT)) if dentry.is_file and not dentry.is_hardlink: for hash in dentry.hashes: action = next(reader) if action.exception: raise action.exception[1] binary_stdout.write(action.value) padding = -i.size % BLOCKSIZE if padding: binary_stdout.write(bytes(padding)) binary_stdout.write(b'\0' * (BLOCKSIZE*2))