def from_file(path): ''' Creates a package object from a package file ''' path = abspath(path) try: pkg = open_tarfile(path) except: raise BuildError(_('Could not open package: {0}').format(path)) try: pkginfo = pkg.extractfile('.PKGINFO').read().decode('utf8') except: raise BuildError( _('Could not read package info: {0}').format(path)) finally: pkg.close() info = PkginfoParser(pkginfo).parse() info['pgpsig'] = isfile(path + Package.SIGEXT) try: info['csize'] = getsize(path) data = open(path, 'rb').read() info['md5sum'] = md5(data).hexdigest() info['sha256sum'] = sha256(data).hexdigest() except OSError: raise BuildError( _('Could not determine package size: {0}').format(path)) except: raise BuildError( _('Could not calculate package checksums: {0}').format(path)) return Package(info['name'], info['version'], path, info)
def load_from_db(self): """ Loads the package list from a repo database file """ if not isfile(self._db): return {} try: db = open_tarfile(self._db) except: raise DbError(_("Could not open database: {0}").format(self._db)) packages = {} for member in (m for m in db.getmembers() if m.isfile() and m.name.endswith("desc")): desc = db.extractfile(member).read().decode("utf8") try: info = DescParser(desc).parse() except ParserError as e: raise DbError(_("Invalid db entry: {0}: {1}").format(member.name, e.message)) path = join(self._path, info["filename"]) packages[info["name"]] = Package(info["name"], info["version"], path, info) db.close() return packages
def load_from_db(self): ''' Loads the package list from a repo database file ''' if not isfile(self._db): return {} try: db = open_tarfile(self._db) except: raise DbError(_('Could not open database: {0}').format(self._db)) packages = {} for member in (m for m in db.getmembers() if m.isfile() and basename(m.name) == Repo.DESC): try: desc = db.extractfile(member).read().decode('utf8') info = DescParser(desc).parse() except ParserError as e: raise DbError( _('Invalid db entry: {0}: {1}').format( member.name, e.message)) except: raise DbError( _('Could not read db entry: {0}').format(member.name)) path = join(self._path, info['filename']) packages[info['name']] = Package(info['name'], info['version'], path, info) try: db.close() except: raise DbError(_('Could not close database: {0}').format(self._db)) return packages
def from_file(path): ''' Creates a package object from a package file ''' path = abspath(path) try: pkg = open_tarfile(path) except: raise BuildError(_('Could not open package: {0}').format(path)) try: pkginfo = pkg.extractfile('.PKGINFO').read().decode('utf8') except: raise BuildError(_('Could not read package info: {0}').format(path)) finally: pkg.close() info = PkginfoParser(pkginfo).parse() info['pgpsig'] = isfile(path + Package.SIGEXT) try: info['csize'] = getsize(path) data = open(path, 'rb').read() info['md5sum'] = md5(data).hexdigest() info['sha256sum'] = sha256(data).hexdigest() except OSError: raise BuildError(_('Could not determine package size: {0}').format(path)) except: raise BuildError(_('Could not calculate package checksums: {0}').format(path)) return Package(info['name'], info['version'], path, info)
def copy_fetched_databases_to_corresponding_tmp_dirs( self, database_dirs, hosts_config, database_dirs_files_counter): for host, _ in hosts_config.items(): host_dir = self.temporary_dir + '/' + host open_tarfile(host_dir + '/passwords.tar.gz').extractall(host_dir) remove(host_dir + '/passwords.tar.gz') db_files = [ f for f in listdir(host_dir) if isfile(join(host_dir, f)) ] if self.debug: print(host + ' : ', db_files) for db_file in db_files: db_file_no_ext = splitext(db_file)[0] counter = database_dirs_files_counter[db_file_no_ext] database_dirs_files_counter[db_file_no_ext] += 1 rename( host_dir + '/' + db_file, self.temporary_dir + '/' + db_file_no_ext + '/db_' + str(counter))
def main(): from optparse import OptionParser o = OptionParser() options, args = o.parse_args() main(args) for path in args: with closing(open_tarfile(path)) as tar: process_file(path, tar)
def download( self, executable, url, basedir, overwrite=False, no_subdir=False, name=None, resource_type='file', path_in_archive='.', progress_cb=None, size=None, ): """ Download a resource by URL """ log = getLogger('ocrd.resource_manager.download') destdir = Path(basedir) if no_subdir else Path(basedir, executable) if not name: url_parsed = urlparse(url) name = Path(unquote(url_parsed.path)).name fpath = Path(destdir, name) is_url = url.startswith('https://') or url.startswith('http://') if fpath.exists() and not overwrite: log.info( "%s to be %s to %s which already exists and overwrite is False" % (url, 'downloaded' if is_url else 'copied', fpath)) return fpath destdir.mkdir(parents=True, exist_ok=True) if resource_type == 'file': if is_url: self._download_impl(url, fpath, progress_cb) else: self._copy_impl(url, fpath, progress_cb) elif resource_type == 'tarball': with pushd_popd(tempdir=True) as tempdir: if is_url: self._download_impl(url, 'download.tar.xx', progress_cb, size) else: self._copy_impl(url, 'download.tar.xx', progress_cb) Path('out').mkdir() with pushd_popd('out'): log.info("Extracting tarball to %s/out" % tempdir) with open_tarfile('../download.tar.xx', 'r:*') as tar: tar.extractall() log.info( "Copying '%s' from extracted tarball %s/out to %s" % (path_in_archive, tempdir, fpath)) copytree(path_in_archive, str(fpath)) # TODO # elif resource_type == 'github-dir': return fpath
def from_file(path): ''' Creates a package object from a package file ''' path = abspath(path) # AAAAARRRGG # # The current version of tarfile (0.9) does not support lzma compressed archives. # The next version will: http://hg.python.org/cpython/file/default/Lib/tarfile.py #try: # pkg = open_tarfile(path) #except: # raise BuildError(_('Could not open package: {0}').format(path)) # #try: # pkginfo = pkg.extractfile('.PKGINFO').read().decode('utf8') #except: # raise BuildError(_('Could not read package info: {0}').format(path)) #finally: # pkg.close() # Begin workaround if not isfile(path): raise BuildError(_('File does not exist: {0}').format(path)) if is_tarfile(path): pkg = open_tarfile(path) try: pkginfo = pkg.extractfile(Package.PKGINFO).read().decode('utf8') except: raise BuildError(_('Could not read package info: {0}').format(path)) finally: pkg.close() else: # Handling lzma compressed archives (.pkg.tar.xz) tmpdir = Package.get_tmpdir() if call(['tar', '-xJf', path, '-C', tmpdir, Package.PKGINFO]) is not 0: raise BuildError(_('An error occurred in tar')) pkginfo = open(join(tmpdir, Package.PKGINFO)).read() # End workaround info = PkginfoParser(pkginfo).parse() info['csize'] = stat(path).st_size data = open(path, 'rb').read() info['md5sum'] = md5(data).hexdigest() info['sha256sum'] = sha256(data).hexdigest() return Package(info['name'], info['version'], path, info)
def list_tar(path, max_files=None): result = [] for idx, item in enumerate(open_tarfile(path, 'r:*')): if idx >= max_files: result.append({ T_TRUNCATED: 0, T_TYPE: 'X', }) break name = item.name letter = '' if item.islnk(): name = name + ' => ' + item.linkname letter = 'L' elif item.issym(): name = name + ' -> ' + item.linkname letter = 'L' elif item.isdir(): letter = 'D' elif item.isfifo(): letter = 'F' elif item.isblk(): letter = 'B' elif item.ischr(): letter = 'C' result.append({ T_NAME: name, T_TYPE: letter, T_MODE: item.mode, T_SPEC: special_to_letter(item.mode), T_UID: item.uid, T_GID: item.gid, T_SIZE: item.size, T_TIMESTAMP: item.mtime, T_HAS_XATTR: False, }) return result
def from_tarball(path, force=False): ''' Extracts a pkgbuild tarball and forward it to the package builder ''' path = abspath(path) try: archive = open_tarfile(path) except: raise BuildError(_('Could not open tarball: {0}').format(path)) tmpdir = Package.get_tmpdir() root = None for member in archive.getmembers(): if isabs(member.name) or not normpath(join( tmpdir, member.name)).startswith(tmpdir): raise BuildError( _('Tarball contains bad member: {0}').format(member.name)) if root is False: continue name = normpath(member.name) _root = name.split('/')[0] if member.isfile() and _root == name: root = False elif root is None: root = _root elif root != _root: root = False if not root: tmpdir = mkdtemp(dir=tmpdir) try: archive.extractall(tmpdir) archive.close() except: raise BuildError(_('Could not extract tarball: {0}').format(path)) return Package.from_pkgbuild(join(tmpdir, root) if root else tmpdir, force=force)
def from_tarball(path): ''' Extracts a pkgbuild tarball and forward it to the package builder ''' path = abspath(path) try: archive = open_tarfile(path) except: raise BuildError(_('Could not open tarball: {0}').format(path)) tmpdir = Package.get_tmpdir() root = None for member in archive.getmembers(): if isabs(member.name) or not normpath(join(tmpdir, member.name)).startswith(tmpdir): raise BuildError(_('Tarball contains bad member: {0}').format(member.name)) if root is False: continue name = normpath(member.name) _root = name.split('/')[0] if member.isfile() and _root == name: root = False elif root is None: root = _root elif root != _root: root = False if not root: tmpdir = mkdtemp(dir=tmpdir) try: archive.extractall(tmpdir) archive.close() except: raise BuildError(_('Could not extract tarball: {0}').format(path)) return Package.from_pkgbuild(join(tmpdir, root) if root else tmpdir)
def _pack_any(self, filepath): try: supported_archive = self._is_supported_archive(filepath) if supported_archive: archive, archive_filepath, archive_subpath = supported_archive if archive == 'zip': with ZipFile(archive_filepath) as zf: for item in zf.infolist(): if item.filename == archive_subpath or \ item.filename.startswith(archive_subpath+'/'): try: archive_filename = item.filename.decode( sys.getfilesystemencoding()) except UnicodeDecodeError: archive_filename = item.filename yield { F_TYPE: T_FILE, F_PATH: '/'.join( [archive_filepath, archive_filename]) } for portion in self._pack_fileobj( zf.open(item)): yield portion elif archive == 'tar': with open_tarfile(archive_filepath) as tf: for item in tf: # For now support only simple files extraction, same as zip if not item.isfile(): continue if item.name == archive_subpath or \ item.name.startswith(archive_subpath+'/'): try: archive_filename = item.name.decode( sys.getfilesystemencoding()) except UnicodeDecodeError: archive_filename = item.name yield { F_TYPE: T_FILE, F_PATH: u'/'.join( [archive_filepath, archive_filename]) } for portion in self._pack_fileobj( tf.extractfile(item)): yield portion elif path.isfile(filepath): root = path.dirname(filepath) basename = path.basename(filepath) portions = self._pack_file(basename, top=root) header = next(portions) if self.follow_symlinks: filestat = stat(filepath) else: filestat = lstat(filepath) header.update({ F_TYPE: T_FILE, F_STAT: self._stat_to_vec(filestat), F_ROOT: root, }) yield header for portion in portions: yield portion del portion elif path.isdir(filepath): if self.find_size: for portion in self._size(filepath): yield portion del portion for portion in self._pack_path(filepath): yield portion del portion else: yield { F_TYPE: T_EXC, F_EXC: 'No download target', F_DATA: filepath } return yield {F_TYPE: T_FINISH, F_DATA: filepath} except Exception, e: yield { F_TYPE: T_EXC, F_EXC: str(type(e)), F_DATA: str(e) + traceback.format_exc(limit=20) }
def _open_tarfile(self): archive = StringIO(self.to_str()) return open_tarfile(mode=self.class_mode, fileobj=archive)
def create_fetched_databases_backup_tarball(self, backup_tarball_file, database_dirs_files_counter): with open_tarfile(self.temporary_dir + '/' + backup_tarball_file, 'w:xz') as backup_tarball: for db_dir in database_dirs_files_counter.keys(): backup_tarball.add(self.temporary_dir + '/' + db_dir)
def _open_tarfile(self): name = self.database.fs.get_basename(self.key) archive = StringIO(self.to_str()) return open_tarfile(name, self.class_mode, fileobj=archive)
def search_in_archive(self, path): any_file = not self.name or self.path # We don't support extended search in archives if is_zipfile(path): zf = ZipFile(path) try: for item in zf.infolist(): if self.terminate and self.terminate.is_set(): break name = os.path.basename(item.filename) if (self.name and self.name.match(name)) or \ (self.path and self.path.match(item.filename)) or \ any_file: try: archive_filename = item.filename.decode( sys.getfilesystemencoding()) except UnicodeDecodeError: archive_filename = item.filename if self.strings: for match in self.search_string_in_fileobj( zf.open(item), filename='zip:' + path + ':' + item.filename): yield ('zip:' + path + ':' + archive_filename, match) elif not any_file: yield u'zip:' + path + u':' + archive_filename finally: zf.close() elif is_tarfile(path): tf = open_tarfile(path, 'r:*') try: for item in tf: if self.terminate and self.terminate.is_set(): break name = os.path.basename(item.name) if (self.name and self.name.match(name)) or \ (self.path and self.path.match(item.name)) or \ any_file: try: archive_filename = item.name.decode( sys.getfilesystemencoding()) except UnicodeDecodeError: archive_filename = item.name if self.strings and item.isfile(): for match in self.search_string_in_fileobj( tf.extractfile(item), filename='tar:+' + archive_filename + ':' + path): yield ('tar:' + path + ':' + archive_filename, match) elif not any_file: yield u'tar:' + path + u':' + archive_filename finally: tf.close()