Пример #1
0
    def fetch_input_file(self, file):

        if not os.path.isdir(self.input_cache_dir):
            os.mkdir(self.input_cache_dir)

        #url = 'https://www.metanetx.org/cgi-bin/mnxget/mnxref/'
        url = 'ftp://ftp.vital-it.ch/databases/metanetx/MNXref/3.2/'

        # 3xCommon + rpReader
        if file in [
                'reac_xref.tsv', 'chem_xref.tsv', 'chem_prop.tsv',
                'comp_xref.tsv'
        ]:
            urllib_request_urlretrieve(url + file,
                                       self.input_cache_dir + '/' + file)

        #TODO: need to add this file to the git or another location
        if file in ['rr_compounds.tsv', 'rxn_recipes.tsv']:
            urllib_request_urlretrieve(
                'https://retrorules.org/dl/this/is/not/a/secret/path/rr02',
                self.input_cache_dir + '/rr02_more_data.tar.gz')
            tar = tarfile_open(self.input_cache_dir + '/rr02_more_data.tar.gz',
                               'r:gz')
            tar.extractall(self.input_cache_dir)
            tar.close()
            shutil_move(self.input_cache_dir + '/rr02_more_data/compounds.tsv',
                        self.input_cache_dir + '/rr_compounds.tsv')
            shutil_move(
                self.input_cache_dir + '/rr02_more_data/rxn_recipes.tsv',
                self.input_cache_dir)
            os.remove(self.input_cache_dir + 'rr02_more_data.tar.gz')
            shutil_rmtree(self.input_cache_dir + 'rr02_more_data')

        if file == 'rules_rall.tsv':
            urllib_request_urlretrieve(
                'https://retrorules.org/dl/preparsed/rr02/rp3/hs',
                self.input_cache_dir + '/retrorules_rr02_rp3_hs.tar.gz')
            tar = tarfile_open(
                self.input_cache_dir + '/retrorules_rr02_rp3_hs.tar.gz',
                'r:gz')
            tar.extractall(self.input_cache_dir)
            tar.close()
            shutil_move(
                self.input_cache_dir +
                '/retrorules_rr02_rp3_hs/retrorules_rr02_flat_all.tsv',
                self.input_cache_dir + '/rules_rall.tsv')
            os.remove(self.input_cache_dir + '/retrorules_rr02_rp3_hs.tar.gz')
            shutil_rmtree(self.input_cache_dir + '/retrorules_rr02_rp3_hs')
Пример #2
0
 def write(self, path: str, mode: str):
     with lz4.LZ4FrameFile(path, mode=mode[0]) as lz4c:
         archive = tarfile_open(mode=mode, fileobj=lz4c, **self.lz4_kwargs)
         try:
             yield archive
         finally:
             archive.close()
Пример #3
0
def copy_csharp_parser():
    parser_path = 'kenja/lib/csharp/kenja-csharp-parser.exe'
    parser_digest = 'a3dfac7de0406e961d0c47f95d9bc522'
    parser_location = 'https://github.com/sdlab-naist/kenja-csharp-parser/releases/download/0.1/kenja-csharp-parser-0.1.tar.gz'
    parser_tar_digest = '583013bc78b3f1f158f094baa37b0808'

    confirm_text = None
    if not os.path.exists(parser_path):
        confirm_text = "{0} does not exist. Do you want to download it?[y/n]".format(parser_path)
    elif hashlib.md5(open(parser_path).read()).hexdigest() != parser_digest:
        confirm_text = "{0} is different from designated parser script. Do you want to overwrite it?[y/n]".format(parser_path)

    if confirm_text is not None:
        print(confirm_text)
        choice = raw_input().lower()
        yes = set(['yes', 'y', 'ye'])
        no = set(['no', 'n'])
        if choice in yes:
            (filename, _) = urllib.urlretrieve(parser_location)
            digest = hashlib.md5(open(filename, 'rb').read()).hexdigest()
            if parser_tar_digest != digest:
                print("md5 hash of {0} is incorrect! remove it and try again.".format(filename))
                sys.exit(1)

            tarfile = tarfile_open(filename, 'r')
            tarfile.extractall('kenja/lib/csharp')

    if not os.path.exists(parser_path):
        print("java parser will not be installed.")
        print("You should disable java parser when you run kenja")
    else:
        data_files.append(("kenja/lib/csharp", glob.glob("kenja/lib/csharp/*")))
 def open_write(self, path: str) -> IO[bytes]:
     mode = 'w|{}'.format(self.compression)
     with tarfile_open(path, mode) as archive:
         with NamedTemporaryFile() as buffer:
             yield buffer
             buffer.seek(0)
             archive.add(buffer.name, self.filename)
Пример #5
0
def logexplore(self, params):
    from os import walk
    from os.path import join as pjoin
    from tarfile import open as tarfile_open
    from contextlib import closing
    
    to_process = []
    for path, dirs, files in walk('.'):
        for filename in files:
            if "log" in filename and "tgz" in filename:
                to_process.append(pjoin(path, filename))
    
    bad_files = []
    for f in to_process:
        with closing(tarfile_open(f)) as tar:
            for member in tar.getmembers():
                if "stdout" not in member.path:
                    continue
                contents = tar.extractfile(member).read()
                if not ("ERROR" in contents or "EXCEPTION" in contents):
                    continue
                tar.extract(member, path="tmp")
                bad_files.append((f, member, contents))
    
    if not bad_files:
        print "No bad files found"
    else:
        print len(bad_files), "bad files found"
    
    for f, member, contents in bad_files:
        print f, member.path, len(contents)
Пример #6
0
def download_certbot_config(config_bucket: str, config_key: str, certbot_config_dir: str, certbot_work_dir: str) -> None:
    """
    Download the configuration tar file from S3 and extract it to the certbot config directory.
    """
    with TemporaryFile(prefix="config", suffix=".tar.gz", dir=certbot_work_dir) as fd:
        try:
            result = s3.get_object(Bucket=config_bucket, Key=config_key)
        except ClientError as e:
            if e.response["Error"]["Code"] == "NoSuchKey":
                return
            raise

        body = result["Body"]
        first_chunk = True

        while True:
            chunk = body.read(65536)
            if not chunk:
                break
            
            if first_chunk:
                if chunk[:4] == b'\x50\x4b\x03\x04':
                    # Legacy ZIP file -- don't use
                    return
                first_chunk = False

            fd.write(chunk)

        fd.seek(0)

        with tarfile_open(fd, "r") as tf:
            tf.extractall(certbot_config_dir)
Пример #7
0
 def read(self, path: str, mode: str):
     with lz4.LZ4FrameFile(path) as lz4d:
         archive = tarfile_open(mode=mode, fileobj=lz4d, **self.lz4_kwargs)
         try:
             yield archive
         finally:
             archive.close()
Пример #8
0
 def compress(self, fobj: IO[bytes]) -> IO[bytes]:
     mode = 'w|{}'.format(self.compression)
     with tarfile_open(fileobj=fobj, mode=mode) as archive:
         with NamedTemporaryFile() as buffer:
             yield buffer
             buffer.seek(0)
             archive.add(buffer.name, self.filename)
Пример #9
0
def compress_tar(inpt: str):  ## {{{
    from os import path, chdir, listdir
    from tarfile import open as tarfile_open
    inpt = remove_trailing_slash(inpt)
    root, base = path.split(inpt)

    dest_dir = root
    dest_tar = f'{base}.tar'
    chdir(dest_dir)
    if path.isdir(inpt):
        with tarfile_open(dest_tar, 'w') as NEW_TAR:
            chdir(base)
            for i in listdir():
                NEW_TAR.add(i)
    else:
        with tarfile_open(dest_tar, 'w') as NEW_TAR:
            NEW_TAR.add(base)
Пример #10
0
    def download_parser(self):
        (filename, _) = urllib.request.urlretrieve(self.parser_location)
        if not validate_md5sum(self.parser_tar_digest, filename):
            print("md5 hash of downloaded file is incorrect! try again.")
            sys.exit(1)

        tarfile = tarfile_open(filename, 'r')
        tarfile.extractall('kenja/lib/csharp')
Пример #11
0
    def download_parser(self):
        (filename, _) = urllib.urlretrieve(self.parser_location)
        if not validate_md5sum(self.parser_tar_digest, filename):
            print("md5 hash of downloaded file is incorrect! try again.")
            sys.exit(1)

        tarfile = tarfile_open(filename, 'r')
        tarfile.extractall('kenja/lib/csharp')
Пример #12
0
 def _open_archive(cls, path: str, mode: str) -> TarFile:
     extension_index = path.rfind('.')
     if extension_index > -1:
         compression = path[extension_index + 1:]
     else:
         compression = cls._compression
     mode = '{}|{}'.format(mode, compression)
     return tarfile_open(path, mode)
Пример #13
0
def download_python(ver) -> (str, str):
    """Download installer, extract it, return the installer dir."""
    ver, url = download_info(ver)
    source_path = PYTHONS + '/Python-' + ver
    print(f'{url=}')
    tar_file = tarfile_open(fileobj=BytesIO(urlopen(url).read()))
    tar_file.extractall(PYTHONS)
    return ver, source_path
Пример #14
0
def extract_tarball(url, data):
    with BytesIO(data) as bio:
        if '.tar.' in url:
            with tarfile_open(fileobj=bio) as tarf:
                for info in tarf.getmembers():
                    if info.isfile() and info.name.startswith('bin/ec-'):
                        return tarf.extractfile(info).read()

    raise AssertionError('unreachable `extract` function')
Пример #15
0
 def _given_resource(self, resource_id: str, name: str, lines: bytes):
     client = self._storage._file_storage._client
     buffer = BytesIO()
     with tarfile_open(mode='w:gz', fileobj=buffer) as archive:
         tarinfo = TarInfo(name)
         tarinfo.size = len(lines)
         archive.addfile(tarinfo, BytesIO(lines))
     buffer.seek(0)
     client.upload_object_via_stream(buffer, resource_id)
Пример #16
0
def tar_cz_relative(*path):
    """tar_cz(*path) -> bytes
    Compress a sequence of files or directories in memory.
    The resulting string could be stored as a .tgz file."""
    file_out = io_BytesIO()
    tar = tarfile_open(mode="w:gz", fileobj=file_out)
    for p in path:
        tar.add(p, arcname='./')
    tar.close()
    return file_out.getvalue()
Пример #17
0
def xtract_tar(inpt: str):  ## {{{
    from os import path, mkdir
    from tarfile import open as tarfile_open
    inpt = remove_trailing_slash(inpt)
    root_base, ext = path.splitext(inpt)
    dest_dir = root_base
    mkdir(dest_dir)

    with tarfile_open(inpt) as CUR_TAR:
        CUR_TAR.extractall(dest_dir)
Пример #18
0
def extractlayers(dc, args, layers, top_most_layer_id):
    target_path = args.target
    flags = O_WRONLY

    if target_path == _TARGET_STDOUT:
        target_fd = stdout.fileno()
    else:
        flags |= O_CREAT | O_TRUNC

        if not args.force:
            flags |= O_EXCL

        target_fd = logexception(
            _LOGGER, ERROR,
            'unable to open target file "{}": {{e}}'.format(target_path),
            os_open, target_path, flags, 0o666)

    with fdopen(target_fd, 'wb') as target_file:
        if hasattr(target_file, 'seekable'):
            seekable = target_file.seekable()
        else:
            try:
                seekable = not lseek(target_fd, 0, SEEK_CUR) < 0 \
                    and S_ISREG(fstat(target_fd).st_mode)
            except OSError as e:
                if errorcode.get(e.errno) != 'ESPIPE':
                    raise

                seekable = False

        open_args = {'fileobj': target_file}

        if args.compression is None:
            open_args['mode'] = 'w' if seekable else 'w|'
        else:
            if seekable:
                mode = 'w:{}'
                open_args['compresslevel'] = args.compress_level
                _, ext = ospath_splitext(target_path)

                if ext.lower() != '{}{}'.format(ospath_extsep,
                                                args.compression):
                    _LOGGER.warning(
                        'target name "%s" doesn\'t match compression type ("%s")',
                        target_path, args.compression)
            else:
                mode = 'w|{}'
                _LOGGER.warning(
                    'target "%s" is not seekable, ignoring compression level (%d)',
                    target_path, args.compress_level)

            open_args['mode'] = mode.format(args.compression)

        with tarfile_open(**open_args) as tar_file:
            dimgx_extractlayers(dc, layers, tar_file, top_most_layer_id)
Пример #19
0
 def write(self, path: str, mode: str):
     with NamedTemporaryFile() as decompressed:
         archive = tarfile_open(decompressed.name, mode=mode)
         try:
             yield archive
         finally:
             archive.close()
         decompressed.seek(0)
         with open(path, 'wb') as compressed:
             zstd = zstandard.ZstdCompressor(**self.zstd_kwargs)
             zstd.copy_stream(decompressed, compressed)
Пример #20
0
 def read(self, path: str, mode: str):
     with NamedTemporaryFile() as decompressed:
         with open(path, 'rb') as compressed:
             zstd = zstandard.ZstdDecompressor(**self.zstd_kwargs)
             zstd.copy_stream(compressed, decompressed)
         decompressed.seek(0)
         archive = tarfile_open(mode=mode, fileobj=decompressed)
         try:
             yield archive
         finally:
             archive.close()
Пример #21
0
def try_tarfile(filename, pattern):
    with closing(tarfile_open(filename)) as tar:
        for f in tar.getmembers():
            if not (".root" in f.path and (not pattern or pattern in f.path)):
                continue
            print " -", f.path
            tmpdir = mkdtemp()
            try:
                tar.extract(f.path, tmpdir)
                yield safe_root_open(tmpdir + "/" + f.path)
            finally:
                rmtree(tmpdir)
Пример #22
0
def try_tarfile(filename, pattern):
    with closing(tarfile_open(filename)) as tar:
        for f in tar.getmembers():
            if not (".root" in f.path and (not pattern or pattern in f.path)):
                continue 
            print " -", f.path
            tmpdir = mkdtemp()
            try:
                tar.extract(f.path, tmpdir)
                yield safe_root_open(tmpdir + "/" + f.path)
            finally:
                rmtree(tmpdir)
Пример #23
0
        def tar_gz_decompress(self, destination):
            """
            Decompress a given file into the given destination.

            Argument:
                - destination: str
                    The destination of the decompressed.
            """

            if destination is not None and isinstance(destination, str):
                with tarfile_open(self.file) as thetar:
                    thetar.extractall(path=destination)
        def tar_gz_compress(self, destination):
            """
            Compress a file into a tar.gz.

            Argument:
                - destination: str
                    The destination of the compressed file.
            """

            if destination is not None and isinstance(destination, str):
                with tarfile_open(destination, "w:gz") as tar:
                    tar.add(self.file)
Пример #25
0
    def tar_gz_decompress(self, destination):
        """
        Decompress the given file into the given destination.

        :param str destination: The destination of the decompression.
        """

        if destination is not None and isinstance(destination, str):
            with tarfile_open(self.file) as thetar:
                thetar.extractall(path=destination)
        else:
            raise ValueError("{0} expected. {1} given.".format(
                type(str), type(destination)))
Пример #26
0
def read(self, fileobj):
    """How to read LDEO ASEP files from an NODC accession."""
    def is_fname_ok(fname):
        if '.csv' not in fname:
            return False
        if fname.find('/') > -1:
            raise ValueError(
                u'CTD Exchange Zip files should not contain directories.')
        return True

    def reader(dfile, fileobj, retain_order, header_only):
        ctdex.read(dfile, fileobj, retain_order, header_only)
        dfile.globals['_FILENAME'] = fileobj.name

    dfiles = []

    datapath = None
    datadirname = '0-data'
    with tarfile_open(mode='r:gz', fileobj=fileobj) as fff:
        for member in fff.getmembers():
            if datapath is None:
                if datadirname in member.name:
                    datapath = member.name.split(
                        datadirname)[0] + datadirname + '/'
                    log.info('NODC accession data path: {0}'.format(datapath))
                else:
                    continue

            if not member.name.startswith(datapath):
                continue
            bname = os.path.basename(member.name)
            if bname.endswith('pdf'):
                continue
            if '_ros.' in bname:
                continue
            # don't want upcasts
            if '_ctd_U.' in bname:
                continue

            dfile = DataFile()
            ggg = fff.extractfile(member)
            if ggg is None:
                log.error(u'Unable to extract file {0!r}'.format(member))
            else:
                ldeo_asep.read(dfile, ggg)
                dfiles.append(dfile)

    self.files = sorted(dfiles,
                        key=lambda dfile: lexico(dfile.globals['STNNBR']))
Пример #27
0
def xtarfile_open(path: str, mode: str, **kwargs):
    compression = get_compression(path, mode)

    if not compression or compression in _NATIVE_FORMATS:
        return tarfile_open(path, mode, **kwargs)

    handler_class = _HANDLERS.get(compression)
    if handler_class is not None:
        handler = handler_class(**kwargs)
        if mode.startswith('r'):
            return handler.read(path, mode[:2])
        elif mode.startswith('w'):
            return handler.write(path, mode[:2])

    raise NotImplementedError
Пример #28
0
def extractlayers(dc, args, layers, top_most_layer_id):
    target_path = args.target
    flags = O_WRONLY

    if target_path == _TARGET_STDOUT:
        target_fd = stdout.fileno()
    else:
        flags |= O_CREAT | O_TRUNC

        if not args.force:
            flags |= O_EXCL

        target_fd = logexception(_LOGGER, ERROR, 'unable to open target file "{}": {{e}}'.format(target_path), os_open, target_path, flags, 0o666)

    with fdopen(target_fd, 'wb') as target_file:
        if hasattr(target_file, 'seekable'):
            seekable = target_file.seekable()
        else:
            try:
                seekable = not lseek(target_fd, 0, SEEK_CUR) < 0 \
                    and S_ISREG(fstat(target_fd).st_mode)
            except OSError as e:
                if errorcode.get(e.errno) != 'ESPIPE':
                    raise

                seekable = False

        open_args = { 'fileobj': target_file }

        if args.compression is None:
            open_args['mode'] = 'w' if seekable else 'w|'
        else:
            if seekable:
                mode = 'w:{}'
                open_args['compresslevel'] = args.compress_level
                _, ext = ospath_splitext(target_path)

                if ext.lower() != '{}{}'.format(ospath_extsep, args.compression):
                    _LOGGER.warning('target name "%s" doesn\'t match compression type ("%s")', target_path, args.compression)
            else:
                mode = 'w|{}'
                _LOGGER.warning('target "%s" is not seekable, ignoring compression level (%d)', target_path, args.compress_level)

            open_args['mode'] = mode.format(args.compression)

        with tarfile_open(**open_args) as tar_file:
            dimgx_extractlayers(dc, layers, tar_file, top_most_layer_id)
Пример #29
0
def create_config_tarfile(config_dir: str, config_tarfile: str) -> CertbotCertificate:
    """
    Create the configuration tar file for storage in S3 and return a dictionary containing Certificate, CertificateChain, and
    PrivateKey with those elements found.
    """
    certificate = None
    chain = None
    full_chain = None
    private_key = None

    with tarfile_open(config_tarfile, "w:gz") as tf:
        for path, _, filenames in walk(config_dir):
            for filename in filenames:
                pathname = path + "/" + filename
                relpath_strip = len(config_dir) + 1
                relpath = pathname[relpath_strip:]
                print(f"Adding {relpath} to archive")
                tf.add(pathname, relpath, recursive=False)

                if fnmatch(relpath, CERT_FILENAME_PATTERN):
                    with open(pathname, "rb") as fd:
                        certificate = fd.read()
                elif fnmatch(relpath, CHAIN_FILENAME_PATTERN):
                    with open(pathname, "rb") as fd:
                        chain = fd.read()
                elif fnmatch(relpath, FULLCHAIN_FILENAME_PATTERN):
                    with open(pathname, "rb") as fd:
                        full_chain = fd.read()
                elif fnmatch(relpath, KEY_FILENAME_PATTERN):
                    with open(pathname, "rb") as fd:
                        private_key = fd.read()

    if certificate is None:
        raise ValueError(f"Did not find live certificate in {config_dir}")

    if chain is None:
        raise ValueError(f"Did not find intermediate certificate in {config_dir}")

    if full_chain is None:
        raise ValueError(f"Did not find full certificate chain in {config_dir}")

    if private_key is None:
        raise ValueError(f"Did not find private key in {config_dir}")

    return CertbotCertificate(certificate=certificate, chain=chain, full_chain=full_chain, private_key=private_key)
Пример #30
0
    def get_image(self, image):
        if not image:
            raise APIError(HTTPError('500 Server Error'),
                           None,
                           explanation='Usage: image_export IMAGE [IMAGE...]')

        layers = []
        next_layer_id = image

        while next_layer_id:
            layer = normalizeimage(self._findlayer(next_layer_id), copy=True)
            layers.append(layer)
            next_layer_id = layers[-1][':parent_id']

        image_file = BytesIO()
        mtime = time()

        with tarfile_open(mode='w', fileobj=image_file) as image_tar_file:
            for layer in layers:
                ti_dir = TarInfo(layer[':id'])
                ti_dir.mtime = mtime
                ti_dir.mode = 0o755
                ti_dir.type = DIRTYPE
                image_tar_file.addfile(ti_dir)

                layer_tar_src_path = ospath_join(self._my_dir, 'data',
                                                 layer[':short_id'],
                                                 'layer.tar')

                with open(layer_tar_src_path, 'rb') as layer_tar_src_file:
                    layer_tar_dst_path = '{}/layer.tar'.format(layer[':id'])
                    ti_layer = image_tar_file.gettarinfo(
                        layer_tar_src_path, layer_tar_dst_path)
                    ti_layer.mtime = mtime
                    ti_layer.mode = 0o644
                    ti_layer.uid = ti_layer.gid = 0
                    ti_layer.uname = ti_layer.gname = ''
                    image_tar_file.addfile(ti_layer,
                                           fileobj=layer_tar_src_file)

        image_file.seek(0)

        return image_file
Пример #31
0
    def __init__(self, name, path, sign_key=None, sudo=False):
        """ :param name: The name of this repository.
        :param path: Path to this repository.
        :param sign_key: GPG key to sign. None means no signing.
        :param sudo: Whether to modify this repository using sudo(1) or not.
        """
        self.name = name
        self.directory = path
        self.sign_key = sign_key
        self.sign_parameters = ['-s', '-k', sign_key] if sign_key else []
        self.sudo = sudo

        self.db_path = join(path, name + '.db.tar.gz')
        if not exists(self.db_path):
            run(['repo-add', self.db_path], sudo=sudo, capture=False)

        packages = [PackageTinyInfo.from_repodb_directory_name(member.name) for member
                    in tarfile_open(self.db_path).getmembers() if member.isdir()]
        self.packages = {package.name: package for package in packages}
Пример #32
0
 def decompress(self, fobj: IO[bytes]) -> IO[bytes]:
     mode = 'r|{}'.format(self.compression)
     archive = tarfile_open(fileobj=fobj, mode=mode)
     try:
         fobj = None
         while True:
             member = archive.next()
             if member is None:
                 break
             if member.name == self.filename:
                 fobj = archive.extractfile(member)
                 break
         if fobj is None:
             raise FileNotFoundError('{} not found'.format(self.filename))
         try:
             yield fobj
         finally:
             fobj.close()
     finally:
         archive.close()
Пример #33
0
def mergeall(self, params):
    from hmerge import merge_files
    from tarfile import open as tarfile_open
    from contextlib import closing
    
    print "Input:", params.files
    
    output_files = {}
    
    fileset = set()
    for f in params.files:
        with closing(tarfile_open(f)) as tar:
            for rootfile in tar.getmembers():
                output_files.setdefault(rootfile.path, set()).add(f)
    
    from multiprocessing import Pool, cpu_count
    pool = Pool(cpu_count())
    to_merge = [(output, sorted(inputs), output)
                for output, inputs in sorted(output_files.iteritems())]
    pool.map(mp_merge, to_merge)
Пример #34
0
 def write(self, path: str, mode: str):
     try:
         with NamedTemporaryFile(delete=False) as decompressed:
             archive = tarfile_open(decompressed.name, mode=mode)
             try:
                 yield archive
             finally:
                 archive.close()
             decompressed.seek(0)
             with open(path, 'wb') as compressed:
                 zstd = zstandard.ZstdCompressor(**self.zstd_kwargs)
                 zstd.copy_stream(decompressed, compressed)
     finally:
         # We delete it manually because otherwise on Windows
         # it gets deleted before we move it to the output file location.
         # This is because on Windows, file handles with the O_TEMPORARY
         # flag (which is set if we pass `delete=True`) are deleted as
         # soon as they're closed.
         decompressed.close()
         os_remove(decompressed.name)
def read_correlate_copynumber_vs_mrnaseq(tar_gz_file_path, genes):

    with tarfile_open(tar_gz_file_path) as tar_gz_file:

        n = read_csv(
            tar_gz_file.extractfile(
                tuple(file for file in tar_gz_file if file.name.endswith("qa.txt"))[0]
            ),
            sep="\t",
            index_col=0,
        ).loc["sample", "comm"]

        df = read_csv(
            tar_gz_file.extractfile(
                tuple(file for file in tar_gz_file if file.name.endswith("cors.txt"))[0]
            ),
            sep="\t",
            index_col=1,
        )

        return n, df.loc[genes, "cor"].to_dict()
 def open_read(self, path: str) -> IO[bytes]:
     mode = 'r|{}'.format(self.compression)
     archive = tarfile_open(path, mode)
     try:
         fobj = None
         while True:
             member = archive.next()
             if member is None:
                 break
             if member.name == self.filename:
                 fobj = archive.extractfile(member)
                 break
         if fobj is None:
             raise FileNotFoundError('{} not found in {}'.format(
                 self.filename, path))
         try:
             yield fobj
         finally:
             fobj.close()
     finally:
         archive.close()
Пример #37
0
    def get_image(self, image):
        if not image:
            raise APIError(HTTPError('500 Server Error'), None, explanation='Usage: image_export IMAGE [IMAGE...]')

        layers = []
        next_layer_id = image

        while next_layer_id:
            layer = normalizeimage(self._findlayer(next_layer_id), copy=True)
            layers.append(layer)
            next_layer_id = layers[-1][':parent_id']

        image_file = BytesIO()
        mtime = time()

        with tarfile_open(mode='w', fileobj=image_file) as image_tar_file:
            for layer in layers:
                ti_dir = TarInfo(layer[':id'])
                ti_dir.mtime = mtime
                ti_dir.mode = 0o755
                ti_dir.type = DIRTYPE
                image_tar_file.addfile(ti_dir)

                layer_tar_src_path = ospath_join(self._my_dir, 'data', layer[':short_id'], 'layer.tar')

                with open(layer_tar_src_path, 'rb') as layer_tar_src_file:
                    layer_tar_dst_path = '{}/layer.tar'.format(layer[':id'])
                    ti_layer = image_tar_file.gettarinfo(layer_tar_src_path, layer_tar_dst_path)
                    ti_layer.mtime = mtime
                    ti_layer.mode = 0o644
                    ti_layer.uid = ti_layer.gid = 0
                    ti_layer.uname = ti_layer.gname = ''
                    image_tar_file.addfile(ti_layer, fileobj=layer_tar_src_file)

        image_file.seek(0)

        return image_file
Пример #38
0
# Check to see if we have the codegen json file in this directory
if not exists(CODEGEN_JSON):

    # Retrieve the codegen archive
    print "Downloading codegen JSON file to %s." % CODEGEN_JSON
    handle = urlopen(CODEGEN_JSON_URL)
    bzip2_tarball = handle.read()

    # Write the file out to a temp file
    tempfile = NamedTemporaryFile(delete=False)
    tempfile.write(bzip2_tarball)
    tempfile.close()

    # Extract the CODEGEN_JSON file to this directory
    tarball = tarfile_open(tempfile.name, 'r:*')
    archived_file = 'rabbitmq-codegen-default/' + CODEGEN_JSON.split('/')[-1]
    json_data = tarball.extractfile(archived_file)

    # Write out the JSON file
    with open(CODEGEN_JSON, 'w') as handle:
        handle.write(json_data.read())

    # Remove the tempfile
    unlink(tempfile.name)

# Read in the codegen JSON file
with open(CODEGEN_JSON, 'r') as handle:
    amqp = load(handle)

# Check to see if we have the codegen xml file in this directory
Пример #39
0
# Check to see if we have the codegen json file in this directory
if not exists(CODEGEN_JSON):

    # Retrieve the codegen archive
    print("Downloading codegen JSON file to %s." % CODEGEN_JSON)
    handle = urlopen(CODEGEN_JSON_URL)
    bzip2_tarball = handle.read()

    # Write the file out to a temp file
    tempfile = NamedTemporaryFile(delete=False)
    tempfile.write(bzip2_tarball)
    tempfile.close()

    # Extract the CODEGEN_JSON file to this directory
    tarball = tarfile_open(tempfile.name, "r:*")
    archived_file = "rabbitmq-codegen-default/" + CODEGEN_JSON.split("/")[-1]
    json_data = tarball.extractfile(archived_file)

    # Write out the JSON file
    with open(CODEGEN_JSON, "w") as handle:
        handle.write(json_data.read())

    # Remove the tempfile
    unlink(tempfile.name)

# Read in the codegen JSON file
with open(CODEGEN_JSON, "r") as handle:
    amqp = load(handle)

# Check to see if we have the codegen xml file in this directory
Пример #40
0
 def __init__(self, archpath):
     super(TARArchive, self).__init__(tarfile_open(archpath))
Пример #41
0
 def __init__(self, archpath):
     ArchiveBase.__init__(self, tarfile_open(archpath))
Пример #42
0
def extractlayers(dc, layers, tar_file, top_most_layer=0):
    """
    :param dc: a |docker.Client|_

    :param layers: a sequence of inspection objects (likely retrieved with
        :func:`inspectlayers`) corresponding to the layers to extract and
        flatten in order of precedence

    :param tar_file: a :class:`~tarfile.TarFile` open for writing to which
        to write the flattened layer archive

    :param top_most_layer: an image ID or an index into :obj:`layers`
        indicating the most recent layer to retrieve (the default of ``0``
        references the first item in :obj:`layers`; see below)

    :raises docker.errors.APIError: on failure interacting with Docker
        (e.g., failed connection, Docker not running, etc.)

    :raises docker.errors.DockerException: on failure interacting with
        Docker (e.g., bad image ID, etc.)

    :raises UnsafeTarPath: - probably indicative of a bug in Docker

    Retrieves the layers corresponding to the :obj:`layers` parameter and
    extracts them into :obj:`tar_file`. Changes from layers corresponding
    to smaller indexes in :obj:`layers` will overwrite or block those from
    larger ones.

    Callers will need to set the :obj:`top_most_layer` parameter if
    :obj:`layers` is not in descending order. It is always safe to provide
    the same value as the :obj:`image_spec` parameter to
    :func:`inspectlayers`, but this may be ineffecient if that layer does
    not appear in :obj:`layers`.
    """
    if not layers:
        _LOGGER.warning('nothing to extract')

        return

    image_spec = top_most_layer if not isinstance(top_most_layer, int) else layers[top_most_layer][':id']
    tmp_dir = path_realpath(mkdtemp())

    try:
        image = logexception(_LOGGER, ERROR, 'unable to retrieve image layers from "{}": {{e}}'.format(image_spec), dc.get_image, image_spec)

        with tarfile_open(mode='r|*', fileobj=image) as image_tar_file:
            next_info = image_tar_file.next()

            while next_info:
                next_path = path_realpath(path_join(tmp_dir, next_info.name))

                if not next_path.startswith(tmp_dir):
                    exc = UnsafeTarPath('unsafe path: "{}"'.format(next_info.name))
                    logexception(_LOGGER, ERROR, 'unable to retrieve entry from export of "{}": {{e}}'.format(image_spec), exc)

                image_tar_file.extract(next_info, tmp_dir)
                next_info = image_tar_file.next()

        seen = set()
        hides_subtrees = set()

        # Look through each layer's archive (newest to oldest)
        for layer in layers:
            layer_id = layer[':id']
            layer_tar_path = path_join(tmp_dir, layer_id, 'layer.tar')

            with tarfile_open(layer_tar_path) as layer_tar_file:
                next_info = layer_tar_file.next()

                while next_info:
                    next_dirname = posixpath_dirname(next_info.name)
                    next_basename = posixpath_basename(next_info.name)

                    if next_basename.startswith(_WHITEOUT_PFX):
                        removed_path = posixpath_join(next_dirname, next_basename[_WHITEOUT_PFX_LEN:])
                        hides_subtrees.add(( removed_path, 'removal' ))

                        if removed_path in seen:
                            _LOGGER.debug('skipping removal "%s"', removed_path)
                        else:
                            _LOGGER.debug('hiding "%s" as removed', removed_path)
                    elif next_info.name in seen:
                        _LOGGER.debug('skipping "%s" as overwritten', next_info.name)
                    else:
                        next_name_len = len(next_info.name)
                        hidden = None

                        for h, deverbal in hides_subtrees: # https://en.wikipedia.org/wiki/deverbal
                            if len(h) > next_name_len:
                                continue

                            common_pfx = posixpath_commonprefix(( h, next_info.name ))
                            common_pfx_len = len(common_pfx)

                            if next_name_len == common_pfx_len \
                                    or next_info.name[common_pfx_len:].startswith(posixpath_sep):
                                hidden = deverbal, h
                                break

                        if hidden:
                            _LOGGER.debug('skipping "%s" hidden by %s of %s', next_info.name, *hidden)
                        else:
                            mtime = naturaltime(datetime.utcfromtimestamp(next_info.mtime).replace(tzinfo=TZ_UTC))
                            _LOGGER.info('writing "%s" from "%s" to archive (size: %s; mode: %o; mtime: %s)', next_info.name, layer_id, naturalsize(next_info.size), next_info.mode, mtime)

                            if next_info.linkname:
                                # TarFile.extractfile() tries to do
                                # something weird when its parameter
                                # represents a link (see the docs)
                                fileobj = None
                            else:
                                fileobj = layer_tar_file.extractfile(next_info)

                            tar_file.addfile(next_info, fileobj)
                            seen.add(next_info.name)

                            if not next_info.isdir():
                                hides_subtrees.add(( next_info.name, 'presence' ))

                    next_info = layer_tar_file.next()
    finally:
        rmtree(tmp_dir, ignore_errors=True)