示例#1
0
def metadata(distribution):
    """
    Extracts the metadata from a distribution's PKG-INFO.
    """
    name, ext = splitext(distribution)
    archive = {
        '.zip': ZipArchive,
        '.tar.gz': TarArchive,
        '.tar.bz2': TarArchive,
        '.tar.tgz': TarArchive,
        '.tar': TarArchive,
        '.tgz': TarArchive,
    }[ext](distribution)

    with closing(archive) as arc:
        try:
            pkg_info = arc.pkg_info()
        except AttributeError:
            # Do it the dumb way -- extract name and version from filename
            file_name, ext = splitext(arc.file_name.rsplit('/')[-1])
            name, version = file_name.rsplit('-', 1)
        else:
            name = None
            version = None
            for line in pkg_info.split('\n'):
                if line.startswith('Name: '):
                    name = line.split()[1]
                elif line.startswith('Version: '):
                    version = line.split()[1]
                if name and version:
                    break
    return name, version
示例#2
0
def is_archive_file(name):
    """Return True if `name` is a considered as an archive file."""
    archives = ('.zip', '.tar.gz', '.tar.bz2', '.tgz', '.tar', '.pybundle')
    ext = splitext(name)[1].lower()
    if ext in archives:
        return True
    return False
示例#3
0
def is_archive_file(name):
    """Return True if `name` is a considered as an archive file."""
    archives = (".zip", ".tar.gz", ".tar.bz2", ".tgz", ".tar", ".pybundle")
    ext = splitext(name)[1].lower()
    if ext in archives:
        return True
    return False
示例#4
0
def get_requirement_from_url(url):
    """Get a requirement from the URL, if possible.  This looks for #egg
    in the URL"""
    link = Link(url)
    egg_info = link.egg_fragment
    if not egg_info:
        egg_info = splitext(link.filename)[0]
    return package_to_requirement(egg_info)
示例#5
0
def get_requirement_from_url(url):
    """Get a requirement from the URL, if possible.  This looks for #egg
    in the URL"""
    link = Link(url)
    egg_info = link.egg_fragment
    if not egg_info:
        egg_info = splitext(link.filename)[0]
    return package_to_requirement(egg_info)
示例#6
0
 def parse_package_and_version(path):
     """ Parse the package name and version number from a path """
     filename = splitext(path)[0]
     if '-' not in filename:
         return None, None
     path_components = filename.split('-')
     for i, comp in enumerate(path_components):
         if comp[0].isdigit():
             return ('_'.join(path_components[:i]).lower(),
                     '-'.join(path_components[i:]))
     return None, None
示例#7
0
 def parse_package_and_version(path):
     """ Parse the package name and version number from a path """
     filename = splitext(path)[0]
     if '-' not in filename:
         return None, None
     path_components = filename.split('-')
     for i, comp in enumerate(path_components):
         if comp[0].isdigit():
             return ('_'.join(path_components[:i]).lower(),
                     '-'.join(path_components[i:]))
     return None, None
示例#8
0
        def _find_cached_match(spec):
            #if spec.is_pinned:
                ## If this is a pinned spec, we can take a shortcut: if it is
                ## found in the dependency cache, we can safely assume it has
                ## been downloaded before, and thus must exist.  We can know
                ## this without every reaching out to PyPI and avoid the
                ## network overhead.
                #name, version = spec.name, first(spec.preds)[1]
                #if (name, version) in self._dep_cache:
                    #source = 'dependency cache'
                    #return version, source
            version = None
            overrides = self.overrides.get(spec.name)

            ## Try the link cache, and otherwise, try PyPI
            if (spec.no_extra, overrides) in self._link_cache:
                link, version = self._link_cache[(spec.no_extra, overrides)]
                source = 'link cache'
            else:
                try:
                    requirement = InstallRequirement.from_line(specline)
                    link = self.finder.find_requirement(requirement, False)
                except DistributionNotFound:
                    requirement = InstallRequirement.from_line(
                        specline, prereleases=True)
                    link = self.finder.find_requirement(requirement, False)

                link, version = self._link_hook(overrides, spec, link)

                # Hack to make pickle work
                link.comes_from = None
                source = 'PyPI'

                if link.egg_fragment:
                    version = link.egg_fragment.rsplit('-', 1)[1]
                    link = Link(
                        link.url_without_fragment + "#%s=%s" % self.get_hash(link)
                    )
                elif not version:
                    _, version = splitext(link.filename)[0].rsplit('-', 1)

                # It's more reliable to get version from pinned spec then filename
                if spec.is_pinned:
                    version = spec.pinned

                assert version, "Version must be set!"
                self._link_cache[(spec.no_extra, overrides)] = (link, version)

                # Take this moment to smartly insert the pinned variant of this
                # spec into the link_cache, too
                pinned_spec = Spec.from_pinned(spec.name, version)
                self._link_cache[pinned_spec.fullname] = (link, version)

            return version, source
示例#9
0
        def _find_cached_match(spec):
            #if spec.is_pinned:
            ## If this is a pinned spec, we can take a shortcut: if it is
            ## found in the dependency cache, we can safely assume it has
            ## been downloaded before, and thus must exist.  We can know
            ## this without every reaching out to PyPI and avoid the
            ## network overhead.
            #name, version = spec.name, first(spec.preds)[1]
            #if (name, version) in self._dep_cache:
            #source = 'dependency cache'
            #return version, source
            version = None
            overrides = self.overrides.get(spec.name)

            ## Try the link cache, and otherwise, try PyPI
            if (spec.no_extra, overrides) in self._link_cache:
                link, version = self._link_cache[(spec.no_extra, overrides)]
                source = 'link cache'
            else:
                try:
                    requirement = InstallRequirement.from_line(specline)
                    link = self.finder.find_requirement(requirement, False)
                except DistributionNotFound:
                    requirement = InstallRequirement.from_line(
                        specline, prereleases=True)
                    link = self.finder.find_requirement(requirement, False)

                link, version = self._link_hook(overrides, spec, link)

                # Hack to make pickle work
                link.comes_from = None
                source = 'PyPI'

                if link.egg_fragment:
                    version = link.egg_fragment.rsplit('-', 1)[1]
                    link = Link(link.url_without_fragment +
                                "#%s=%s" % self.get_hash(link))
                elif not version:
                    _, version = splitext(link.filename)[0].rsplit('-', 1)

                # It's more reliable to get version from pinned spec then filename
                if spec.is_pinned:
                    version = spec.pinned

                assert version, "Version must be set!"
                self._link_cache[(spec.no_extra, overrides)] = (link, version)

                # Take this moment to smartly insert the pinned variant of this
                # spec into the link_cache, too
                pinned_spec = Spec.from_pinned(spec.name, version)
                self._link_cache[pinned_spec.fullname] = (link, version)

            return version, source
示例#10
0
def unpack_http_url(link, location, download_cache, only_download):
    temp_dir = tempfile.mkdtemp('-unpack', 'pip-')
    target_url = link.url.split('#', 1)[0]
    target_file = None
    download_hash = None
    if download_cache:
        target_file = os.path.join(download_cache,
                                   urllib.quote(target_url, ''))
        if not os.path.isdir(download_cache):
            create_download_cache_folder(download_cache)
    if (target_file
        and os.path.exists(target_file)
        and os.path.exists(target_file + '.content-type')):
        fp = open(target_file+'.content-type')
        content_type = fp.read().strip()
        fp.close()
        if link.md5_hash:
            download_hash = _get_md5_from_file(target_file, link)
        temp_location = target_file
        logger.notify('Using download cache from %s' % target_file)
    else:
        resp = _get_response_from_url(target_url, link)
        content_type = resp.info()['content-type']
        filename = link.filename  # fallback
        # Have a look at the Content-Disposition header for a better guess
        content_disposition = resp.info().get('content-disposition')
        if content_disposition:
            type, params = cgi.parse_header(content_disposition)
            # We use ``or`` here because we don't want to use an "empty" value
            # from the filename param.
            filename = params.get('filename') or filename
        ext = splitext(filename)[1]
        if not ext:
            ext = mimetypes.guess_extension(content_type)
            if ext:
                filename += ext
        if not ext and link.url != geturl(resp):
            ext = os.path.splitext(geturl(resp))[1]
            if ext:
                filename += ext
        temp_location = os.path.join(temp_dir, filename)
        download_hash = _download_url(resp, link, temp_location)
    if link.md5_hash:
        _check_md5(download_hash, link)
    if only_download:
        _copy_file(temp_location, location, content_type, link)
    else:
        unpack_file(temp_location, location, content_type, link)
    if target_file and target_file != temp_location:
        cache_download(target_file, temp_location, content_type)
    if target_file is None:
        os.unlink(temp_location)
    os.rmdir(temp_dir)
示例#11
0
def unpack_http_url(link, location, download_cache, only_download):
    temp_dir = tempfile.mkdtemp('-unpack', 'pip-')
    target_url = link.url.split('#', 1)[0]
    target_file = None
    download_hash = None
    if download_cache:
        target_file = os.path.join(download_cache,
                                   urllib.quote(target_url, ''))
        if not os.path.isdir(download_cache):
            create_download_cache_folder(download_cache)
    if (target_file
        and os.path.exists(target_file)
        and os.path.exists(target_file + '.content-type')):
        fp = open(target_file+'.content-type')
        content_type = fp.read().strip()
        fp.close()
        if link.md5_hash:
            download_hash = _get_md5_from_file(target_file, link)
        temp_location = target_file
        logger.notify('Using download cache from %s' % target_file)
    else:
        resp = _get_response_from_url(target_url, link)
        content_type = resp.info()['content-type']
        filename = link.filename  # fallback
        # Have a look at the Content-Disposition header for a better guess
        content_disposition = resp.info().get('content-disposition')
        if content_disposition:
            type, params = cgi.parse_header(content_disposition)
            # We use ``or`` here because we don't want to use an "empty" value
            # from the filename param.
            filename = params.get('filename') or filename
        ext = splitext(filename)[1]
        if not ext:
            ext = mimetypes.guess_extension(content_type)
            if ext:
                filename += ext
        if not ext and link.url != geturl(resp):
            ext = os.path.splitext(geturl(resp))[1]
            if ext:
                filename += ext
        temp_location = os.path.join(temp_dir, filename)
        download_hash = _download_url(resp, link, temp_location)
    if link.md5_hash:
        _check_md5(download_hash, link)
    if only_download:
        _copy_file(temp_location, location, content_type, link)
    else:
        unpack_file(temp_location, location, content_type, link)
    if target_file and target_file != temp_location:
        cache_download(target_file, temp_location, content_type)
    if target_file is None:
        os.unlink(temp_location)
    os.rmdir(temp_dir)
示例#12
0
def unpack_http_url(link, location, download_cache, only_download):
    temp_dir = tempfile.mkdtemp('-unpack', 'pip-')
    target_url = link.url.split('#', 1)[0]
    target_file = None
    download_hash = None
    if download_cache:
        target_file = os.path.join(download_cache,
                                   urllib.quote(target_url, ''))
        if not os.path.isdir(download_cache):
            create_download_cache_folder(download_cache)
    if (target_file
        and os.path.exists(target_file)
        and os.path.exists(target_file+'.content-type')):
        fp = open(target_file+'.content-type')
        content_type = fp.read().strip()
        fp.close()
        if link.md5_hash:
            download_hash = _get_md5_from_file(target_file, link)
        temp_location = target_file
        logger.notify('Using download cache from %s' % target_file)
    else:
        resp = _get_response_from_url(target_url, link)
        content_type = resp.info()['content-type']
        filename = link.filename
        ext = splitext(filename)[1]
        if not ext:
            ext = mimetypes.guess_extension(content_type)
            if ext:
                filename += ext
        if not ext and link.url != geturl(resp):
            ext = os.path.splitext(geturl(resp))[1]
            if ext:
                filename += ext
        temp_location = os.path.join(temp_dir, filename)
        download_hash = _download_url(resp, link, temp_location)
    if link.md5_hash:
        _check_md5(download_hash, link)
    if only_download:
        _copy_file(temp_location, location, content_type, link)
    else:
        unpack_file(temp_location, location, content_type, link)
    if target_file and target_file != temp_location:
        cache_download(target_file, temp_location, content_type)
    if target_file is None:
        os.unlink(temp_location)
    os.rmdir(temp_dir)
示例#13
0
    def _extract_dependencies(self, file_name):
        archive = None
        names = None
        dependencies = []

        if file_name.find('.tar.gz') > 0 or file_name.find('.tar.bz2') > 0:
            archive = tarfile.TarFile.open(
                os.path.join(self.cache_directory, file_name))
        elif file_name.find('.zip') > 0:
            archive = zipfile.ZipFile(
                os.path.join(self.cache_directory, file_name))

        if type(archive) is zipfile.ZipFile:
            names = archive.namelist()
        elif type(archive) is tarfile.TarFile:
            names = archive.getnames()

        if names is not None:
            package_requires = None
            package_name = str(splitext(file_name)[0]).strip()

            if os.path.join(package_name, "requirements.txt") in names:
                archive.extract(os.path.join(package_name, "requirements.txt"),
                                "tmp")
                package_requires = os.path.join("tmp", package_name,
                                                "requirements.txt")
            elif os.path.join(package_name, "tools/pip-requires") in names:
                archive.extract(
                    os.path.join(package_name, "tools/pip-requires"), "tmp")
                package_requires = os.path.join("tmp", package_name,
                                                "tools/pip-requires")

            if package_requires is not None:
                for req in open(package_requires):
                    if len(req.strip()) is 0:
                        continue
                    dependencies.append(req.strip())

            if os.path.isdir(os.path.join("tmp")):
                shutil.rmtree(os.path.join("tmp"))

        if archive is not None:
            archive.close()

        return dependencies
示例#14
0
    def _link_hook(self, overrides, spec, link):
        overrides = overrides or {}
        if overrides.get("src"):
            logger.info(
                '===> Link override %s found for package %s',
                overrides, spec)

            _, version = splitext(link.filename)[0].rsplit('-', 1)
            spec = Spec.from_pinned(name=spec.name, version=version)
            src = env.from_string(
                overrides.get("src")).render({"spec": spec})
            link = Link(src)

            # Hack to make pickle work
            link.comes_from = None

            return link, spec.pinned

        return link, None
示例#15
0
    def _extract_dependencies(self, file_name):
        archive = None
        names = None
        dependencies = []

        if file_name.find('.tar.gz') > 0 or file_name.find('.tar.bz2') > 0:
            archive = tarfile.TarFile.open(os.path.join(self.cache_directory,
                                                        file_name))
        elif file_name.find('.zip') > 0:
            archive = zipfile.ZipFile(os.path.join(self.cache_directory,
                                                   file_name))

        if type(archive) is zipfile.ZipFile:
            names = archive.namelist()
        elif type(archive) is tarfile.TarFile:
            names = archive.getnames()

        if names is not None:
            package_requires = None
            package_name = str(splitext(file_name)[0]).strip()

            if os.path.join(package_name, "requirements.txt") in names:
                archive.extract(os.path.join(package_name, "requirements.txt"),
                                "tmp")
                package_requires = os.path.join("tmp", package_name,
                                                "requirements.txt")
            elif os.path.join(package_name, "tools/pip-requires") in names:
                archive.extract(os.path.join(package_name,
                                             "tools/pip-requires"), "tmp")
                package_requires = os.path.join("tmp", package_name,
                                                "tools/pip-requires")

            if package_requires is not None:
                install_reqs = parse_requirements(package_requires)
                dependencies = [str(ir.req) for ir in install_reqs]

            if os.path.isdir(os.path.join("tmp")):
                shutil.rmtree(os.path.join("tmp"))

        if archive is not None:
            archive.close()

        return dependencies
示例#16
0
    def _extract_dependencies(self, file_name):
        archive = None
        names = None
        dependencies = []

        if file_name.find(".tar.gz") > 0 or file_name.find(".tar.bz2") > 0:
            archive = tarfile.TarFile.open(os.path.join(self.cache_directory, file_name))
        elif file_name.find(".zip") > 0:
            archive = zipfile.ZipFile(os.path.join(self.cache_directory, file_name))

        if type(archive) is zipfile.ZipFile:
            names = archive.namelist()
        elif type(archive) is tarfile.TarFile:
            names = archive.getnames()

        if names is not None:
            package_requires = None
            package_name = str(splitext(file_name)[0]).strip()

            if os.path.join(package_name, "requirements.txt") in names:
                archive.extract(os.path.join(package_name, "requirements.txt"), "tmp")
                package_requires = os.path.join("tmp", package_name, "requirements.txt")
            elif os.path.join(package_name, "tools/pip-requires") in names:
                archive.extract(os.path.join(package_name, "tools/pip-requires"), "tmp")
                package_requires = os.path.join("tmp", package_name, "tools/pip-requires")

            if package_requires is not None:
                for req in open(package_requires):
                    if len(req.strip()) is 0:
                        continue
                    dependencies.append(req.strip())

            if os.path.isdir(os.path.join("tmp")):
                shutil.rmtree(os.path.join("tmp"))

        if archive is not None:
            archive.close()

        return dependencies
示例#17
0
 def splitext(self):
     return splitext(posixpath.basename(self.path.rstrip('/')))
示例#18
0
文件: download.py 项目: 912/M-new
def unpack_http_url(link, location, download_cache, download_dir=None, session=None):
    if session is None:
        session = PipSession()

    temp_dir = tempfile.mkdtemp("-unpack", "pip-")
    temp_location = None
    target_url = link.url.split("#", 1)[0]
    already_cached = False
    cache_file = None
    cache_content_type_file = None
    download_hash = None

    # If a download cache is specified, is the file cached there?
    if download_cache:
        cache_file = os.path.join(download_cache, urllib.quote(target_url, ""))
        cache_content_type_file = cache_file + ".content-type"
        already_cached = os.path.exists(cache_file) and os.path.exists(cache_content_type_file)
        if not os.path.isdir(download_cache):
            create_download_cache_folder(download_cache)

    # If a download dir is specified, is the file already downloaded there?
    already_downloaded = None
    if download_dir:
        already_downloaded = os.path.join(download_dir, link.filename)
        if not os.path.exists(already_downloaded):
            already_downloaded = None

    # If already downloaded, does it's hash match?
    if already_downloaded:
        temp_location = already_downloaded
        content_type = mimetypes.guess_type(already_downloaded)[0]
        logger.notify("File was already downloaded %s" % already_downloaded)
        if link.hash:
            download_hash = _get_hash_from_file(temp_location, link)
            try:
                _check_hash(download_hash, link)
            except HashMismatch:
                logger.warn("Previously-downloaded file %s has bad hash, " "re-downloading." % temp_location)
                temp_location = None
                os.unlink(already_downloaded)
                already_downloaded = None

    # If not a valid download, let's confirm the cached file is valid
    if already_cached and not temp_location:
        with open(cache_content_type_file) as fp:
            content_type = fp.read().strip()
        temp_location = cache_file
        logger.notify("Using download cache from %s" % cache_file)
        if link.hash and link.hash_name:
            download_hash = _get_hash_from_file(cache_file, link)
            try:
                _check_hash(download_hash, link)
            except HashMismatch:
                logger.warn("Cached file %s has bad hash, " "re-downloading." % temp_location)
                temp_location = None
                os.unlink(cache_file)
                os.unlink(cache_content_type_file)
                already_cached = False

    # We don't have either a cached or a downloaded copy
    # let's download to a tmp dir
    if not temp_location:
        try:
            resp = session.get(target_url, stream=True)
            resp.raise_for_status()
        except requests.HTTPError as exc:
            logger.fatal("HTTP error %s while getting %s" % (exc.response.status_code, link))
            raise

        content_type = resp.headers.get("content-type", "")
        filename = link.filename  # fallback
        # Have a look at the Content-Disposition header for a better guess
        content_disposition = resp.headers.get("content-disposition")
        if content_disposition:
            type, params = cgi.parse_header(content_disposition)
            # We use ``or`` here because we don't want to use an "empty" value
            # from the filename param.
            filename = params.get("filename") or filename
        ext = splitext(filename)[1]
        if not ext:
            ext = mimetypes.guess_extension(content_type)
            if ext:
                filename += ext
        if not ext and link.url != resp.url:
            ext = os.path.splitext(resp.url)[1]
            if ext:
                filename += ext
        temp_location = os.path.join(temp_dir, filename)
        download_hash = _download_url(resp, link, temp_location)
        if link.hash and link.hash_name:
            _check_hash(download_hash, link)

    # a download dir is specified; let's copy the archive there
    if download_dir and not already_downloaded:
        _copy_file(temp_location, download_dir, content_type, link)

    # unpack the archive to the build dir location. even when only downloading
    # archives, they have to be unpacked to parse dependencies
    unpack_file(temp_location, location, content_type, link)

    # if using a download cache, cache it, if needed
    if cache_file and not already_cached:
        cache_download(cache_file, temp_location, content_type)

    if not (already_cached or already_downloaded):
        os.unlink(temp_location)

    os.rmdir(temp_dir)
示例#19
0
 def splitext(self):
     return splitext(posixpath.basename(self.path.rstrip('/')))
示例#20
0
def unpack_http_url(link, location, download_cache, download_dir=None):
    temp_dir = tempfile.mkdtemp('-unpack', 'pip-')
    temp_location = None
    target_url = link.url.split('#', 1)[0]

    already_cached = False
    cache_file = None
    cache_content_type_file = None
    download_hash = None
    if download_cache:
        cache_file = os.path.join(download_cache, urllib.quote(target_url, ''))
        cache_content_type_file = cache_file + '.content-type'
        already_cached = (os.path.exists(cache_file)
                          and os.path.exists(cache_content_type_file))
        if not os.path.isdir(download_cache):
            create_download_cache_folder(download_cache)

    already_downloaded = None
    if download_dir:
        already_downloaded = os.path.join(download_dir, link.filename)
        if not os.path.exists(already_downloaded):
            already_downloaded = None

    if already_downloaded:
        temp_location = already_downloaded
        content_type = mimetypes.guess_type(already_downloaded)[0]
        logger.notify('File was already downloaded %s' % already_downloaded)
        if link.hash:
            download_hash = _get_hash_from_file(temp_location, link)
            try:
                _check_hash(download_hash, link)
            except HashMismatch:
                logger.warn('Previously-downloaded file %s has bad hash, '
                            're-downloading.' % temp_location)
                temp_location = None
                os.unlink(already_downloaded)
                already_downloaded = None

    # We have a cached file, and we haven't already found a good downloaded copy
    if already_cached and not temp_location:
        with open(cache_content_type_file) as fp:
            content_type = fp.read().strip()
        temp_location = cache_file
        logger.notify('Using download cache from %s' % cache_file)
        if link.hash and link.hash_name:
            download_hash = _get_hash_from_file(cache_file, link)
            try:
                _check_hash(download_hash, link)
            except HashMismatch:
                logger.warn('Cached file %s has bad hash, '
                            're-downloading.' % temp_location)
                temp_location = None
                os.unlink(cache_file)
                os.unlink(cache_content_type_file)
                already_cached = False

    # We don't have either a cached or a downloaded copy
    if not temp_location:
        resp = _get_response_from_url(target_url, link)
        content_type = resp.info().get('content-type', '')
        filename = link.filename  # fallback
        # Have a look at the Content-Disposition header for a better guess
        content_disposition = resp.info().get('content-disposition')
        if content_disposition:
            type, params = cgi.parse_header(content_disposition)
            # We use ``or`` here because we don't want to use an "empty" value
            # from the filename param.
            filename = params.get('filename') or filename
        ext = splitext(filename)[1]
        if not ext:
            ext = mimetypes.guess_extension(content_type)
            if ext:
                filename += ext
        if not ext and link.url != geturl(resp):
            ext = os.path.splitext(geturl(resp))[1]
            if ext:
                filename += ext
        temp_location = os.path.join(temp_dir, filename)
        download_hash = _download_url(resp, link, temp_location)
        if link.hash and link.hash_name:
            _check_hash(download_hash, link)

    if download_dir and not already_downloaded:
        _copy_file(temp_location, download_dir, content_type, link)
    unpack_file(temp_location, location, content_type, link)
    if cache_file and not already_cached:
        cache_download(cache_file, temp_location, content_type)
    if not (already_cached or already_downloaded):
        os.unlink(temp_location)
    os.rmdir(temp_dir)
示例#21
0
def unpack_http_url(link, location, download_cache, download_dir=None,
                    session=None):
    if session is None:
        session = PipSession()

    temp_dir = tempfile.mkdtemp('-unpack', 'pip-')
    temp_location = None
    target_url = link.url.split('#', 1)[0]
    already_cached = False
    cache_file = None
    cache_content_type_file = None
    download_hash = None

    # If a download cache is specified, is the file cached there?
    if download_cache:
        cache_file = os.path.join(
            download_cache,
            urllib.quote(target_url, '')
        )
        cache_content_type_file = cache_file + '.content-type'
        already_cached = (
            os.path.exists(cache_file) and
            os.path.exists(cache_content_type_file)
        )
        if not os.path.isdir(download_cache):
            create_download_cache_folder(download_cache)

    # If a download dir is specified, is the file already downloaded there?
    already_downloaded = None
    if download_dir:
        already_downloaded = os.path.join(download_dir, link.filename)
        if not os.path.exists(already_downloaded):
            already_downloaded = None

    # If already downloaded, does its hash match?
    if already_downloaded:
        temp_location = already_downloaded
        content_type = mimetypes.guess_type(already_downloaded)[0]
        logger.notify('File was already downloaded %s' % already_downloaded)
        if link.hash:
            download_hash = _get_hash_from_file(temp_location, link)
            try:
                _check_hash(download_hash, link)
            except HashMismatch:
                logger.warn(
                    'Previously-downloaded file %s has bad hash, '
                    're-downloading.' % temp_location
                )
                temp_location = None
                os.unlink(already_downloaded)
                already_downloaded = None

    # If not a valid download, let's confirm the cached file is valid
    if already_cached and not temp_location:
        with open(cache_content_type_file) as fp:
            content_type = fp.read().strip()
        temp_location = cache_file
        logger.notify('Using download cache from %s' % cache_file)
        if link.hash and link.hash_name:
            download_hash = _get_hash_from_file(cache_file, link)
            try:
                _check_hash(download_hash, link)
            except HashMismatch:
                logger.warn(
                    'Cached file %s has bad hash, '
                    're-downloading.' % temp_location
                )
                temp_location = None
                os.unlink(cache_file)
                os.unlink(cache_content_type_file)
                already_cached = False

    # We don't have either a cached or a downloaded copy
    # let's download to a tmp dir
    if not temp_location:
        try:
            resp = session.get(
                target_url,
                # We use Accept-Encoding: identity here because requests
                # defaults to accepting compressed responses. This breaks in
                # a variety of ways depending on how the server is configured.
                # - Some servers will notice that the file isn't a compressible
                #   file and will leave the file alone and with an empty
                #   Content-Encoding
                # - Some servers will notice that the file is already
                #   compressed and will leave the file alone and will add a
                #   Content-Encoding: gzip header
                # - Some servers won't notice anything at all and will take
                #   a file that's already been compressed and compress it again
                #   and set the Content-Encoding: gzip header
                # By setting this to request only the identity encoding We're
                # hoping to eliminate the third case. Hopefully there does not
                # exist a server which when given a file will notice it is
                # already compressed and that you're not asking for a
                # compressed file and will then decompress it before sending
                # because if that's the case I don't think it'll ever be
                # possible to make this work.
                headers={"Accept-Encoding": "identity"},
                stream=True,
            )
            resp.raise_for_status()
        except requests.HTTPError as exc:
            logger.fatal("HTTP error %s while getting %s" %
                         (exc.response.status_code, link))
            raise

        content_type = resp.headers.get('content-type', '')
        filename = link.filename  # fallback
        # Have a look at the Content-Disposition header for a better guess
        content_disposition = resp.headers.get('content-disposition')
        if content_disposition:
            type, params = cgi.parse_header(content_disposition)
            # We use ``or`` here because we don't want to use an "empty" value
            # from the filename param.
            filename = params.get('filename') or filename
        ext = splitext(filename)[1]
        if not ext:
            ext = mimetypes.guess_extension(content_type)
            if ext:
                filename += ext
        if not ext and link.url != resp.url:
            ext = os.path.splitext(resp.url)[1]
            if ext:
                filename += ext
        temp_location = os.path.join(temp_dir, filename)
        download_hash = _download_url(resp, link, temp_location)
        if link.hash and link.hash_name:
            _check_hash(download_hash, link)

    # a download dir is specified; let's copy the archive there
    if download_dir and not already_downloaded:
        _copy_file(temp_location, download_dir, content_type, link)

    # unpack the archive to the build dir location. even when only downloading
    # archives, they have to be unpacked to parse dependencies
    unpack_file(temp_location, location, content_type, link)

    # if using a download cache, cache it, if needed
    if cache_file and not already_cached:
        cache_download(cache_file, temp_location, content_type)

    if not (already_cached or already_downloaded):
        os.unlink(temp_location)

    os.rmdir(temp_dir)
示例#22
0
def unpack_http_url(link, location, download_cache, download_dir=None):
    temp_dir = tempfile.mkdtemp("-unpack", "pip-")
    target_url = link.url.split("#", 1)[0]
    target_file = None
    download_hash = None
    if download_cache:
        target_file = os.path.join(download_cache, urllib.quote(target_url, ""))
        if not os.path.isdir(download_cache):
            create_download_cache_folder(download_cache)

    already_downloaded = None
    if download_dir:
        already_downloaded = os.path.join(download_dir, link.filename)
        if not os.path.exists(already_downloaded):
            already_downloaded = None

    if target_file and os.path.exists(target_file) and os.path.exists(target_file + ".content-type"):
        fp = open(target_file + ".content-type")
        content_type = fp.read().strip()
        fp.close()
        if link.hash and link.hash_name:
            download_hash = _get_hash_from_file(target_file, link)
        temp_location = target_file
        logger.notify("Using download cache from %s" % target_file)
    elif already_downloaded:
        temp_location = already_downloaded
        content_type = mimetypes.guess_type(already_downloaded)
        if link.hash:
            download_hash = _get_hash_from_file(temp_location, link)
        logger.notify("File was already downloaded %s" % already_downloaded)
    else:
        resp = _get_response_from_url(target_url, link)
        content_type = resp.info()["content-type"]
        filename = link.filename  # fallback
        # Have a look at the Content-Disposition header for a better guess
        content_disposition = resp.info().get("content-disposition")
        if content_disposition:
            type, params = cgi.parse_header(content_disposition)
            # We use ``or`` here because we don't want to use an "empty" value
            # from the filename param.
            filename = params.get("filename") or filename
        ext = splitext(filename)[1]
        if not ext:
            ext = mimetypes.guess_extension(content_type)
            if ext:
                filename += ext
        if not ext and link.url != geturl(resp):
            ext = os.path.splitext(geturl(resp))[1]
            if ext:
                filename += ext
        temp_location = os.path.join(temp_dir, filename)
        download_hash = _download_url(resp, link, temp_location)
    if link.hash and link.hash_name:
        _check_hash(download_hash, link)
    if download_dir and not already_downloaded:
        _copy_file(temp_location, download_dir, content_type, link)
    unpack_file(temp_location, location, content_type, link)
    if target_file and target_file != temp_location:
        cache_download(target_file, temp_location, content_type)
    if target_file is None and not already_downloaded:
        os.unlink(temp_location)
    os.rmdir(temp_dir)
示例#23
0
def unpack_http_url(link, location, download_dir=None, session=None):
    if session is None:
        raise TypeError(
            "unpack_http_url() missing 1 required keyword argument: 'session'")

    temp_dir = tempfile.mkdtemp('-unpack', 'pip-')
    temp_location = None
    target_url = link.url.split('#', 1)[0]

    download_hash = None

    # If a download dir is specified, is the file already downloaded there?
    already_downloaded = None
    if download_dir:
        already_downloaded = os.path.join(download_dir, link.filename)
        if not os.path.exists(already_downloaded):
            already_downloaded = None

    # If already downloaded, does its hash match?
    if already_downloaded:
        temp_location = already_downloaded
        content_type = mimetypes.guess_type(already_downloaded)[0]
        logger.notify('File was already downloaded %s' % already_downloaded)
        if link.hash:
            download_hash = _get_hash_from_file(temp_location, link)
            try:
                _check_hash(download_hash, link)
            except HashMismatch:
                logger.warn('Previously-downloaded file %s has bad hash, '
                            're-downloading.' % temp_location)
                temp_location = None
                os.unlink(already_downloaded)
                already_downloaded = None

    # let's download to a tmp dir
    if not temp_location:
        try:
            resp = session.get(
                target_url,
                # We use Accept-Encoding: identity here because requests
                # defaults to accepting compressed responses. This breaks in
                # a variety of ways depending on how the server is configured.
                # - Some servers will notice that the file isn't a compressible
                #   file and will leave the file alone and with an empty
                #   Content-Encoding
                # - Some servers will notice that the file is already
                #   compressed and will leave the file alone and will add a
                #   Content-Encoding: gzip header
                # - Some servers won't notice anything at all and will take
                #   a file that's already been compressed and compress it again
                #   and set the Content-Encoding: gzip header
                # By setting this to request only the identity encoding We're
                # hoping to eliminate the third case. Hopefully there does not
                # exist a server which when given a file will notice it is
                # already compressed and that you're not asking for a
                # compressed file and will then decompress it before sending
                # because if that's the case I don't think it'll ever be
                # possible to make this work.
                headers={"Accept-Encoding": "identity"},
                stream=True,
            )
            resp.raise_for_status()
        except requests.HTTPError as exc:
            logger.fatal("HTTP error %s while getting %s" %
                         (exc.response.status_code, link))
            raise

        content_type = resp.headers.get('content-type', '')
        filename = link.filename  # fallback
        # Have a look at the Content-Disposition header for a better guess
        content_disposition = resp.headers.get('content-disposition')
        if content_disposition:
            type, params = cgi.parse_header(content_disposition)
            # We use ``or`` here because we don't want to use an "empty" value
            # from the filename param.
            filename = params.get('filename') or filename
        ext = splitext(filename)[1]
        if not ext:
            ext = mimetypes.guess_extension(content_type)
            if ext:
                filename += ext
        if not ext and link.url != resp.url:
            ext = os.path.splitext(resp.url)[1]
            if ext:
                filename += ext
        temp_location = os.path.join(temp_dir, filename)
        download_hash = _download_url(resp, link, temp_location)
        if link.hash and link.hash_name:
            _check_hash(download_hash, link)

    # a download dir is specified; let's copy the archive there
    if download_dir and not already_downloaded:
        _copy_file(temp_location, download_dir, content_type, link)

    # unpack the archive to the build dir location. even when only downloading
    # archives, they have to be unpacked to parse dependencies
    unpack_file(temp_location, location, content_type, link)

    if not already_downloaded:
        os.unlink(temp_location)

    os.rmdir(temp_dir)
示例#24
0
def unpack_http_url(link,
                    location,
                    download_cache,
                    download_dir=None,
                    session=None):
    if session is None:
        session = PipSession()

    temp_dir = tempfile.mkdtemp('-unpack', 'pip-')
    temp_location = None
    target_url = link.url.split('#', 1)[0]
    already_cached = False
    cache_file = None
    cache_content_type_file = None
    download_hash = None

    # If a download cache is specified, is the file cached there?
    if download_cache:
        cache_file = os.path.join(download_cache, urllib.quote(target_url, ''))
        cache_content_type_file = cache_file + '.content-type'
        already_cached = (os.path.exists(cache_file)
                          and os.path.exists(cache_content_type_file))
        if not os.path.isdir(download_cache):
            create_download_cache_folder(download_cache)

    # If a download dir is specified, is the file already downloaded there?
    already_downloaded = None
    if download_dir:
        already_downloaded = os.path.join(download_dir, link.filename)
        if not os.path.exists(already_downloaded):
            already_downloaded = None

    # If already downloaded, does it's hash match?
    if already_downloaded:
        temp_location = already_downloaded
        content_type = mimetypes.guess_type(already_downloaded)[0]
        logger.notify('File was already downloaded %s' % already_downloaded)
        if link.hash:
            download_hash = _get_hash_from_file(temp_location, link)
            try:
                _check_hash(download_hash, link)
            except HashMismatch:
                logger.warn('Previously-downloaded file %s has bad hash, '
                            're-downloading.' % temp_location)
                temp_location = None
                os.unlink(already_downloaded)
                already_downloaded = None

    # If not a valid download, let's confirm the cached file is valid
    if already_cached and not temp_location:
        with open(cache_content_type_file) as fp:
            content_type = fp.read().strip()
        temp_location = cache_file
        logger.notify('Using download cache from %s' % cache_file)
        if link.hash and link.hash_name:
            download_hash = _get_hash_from_file(cache_file, link)
            try:
                _check_hash(download_hash, link)
            except HashMismatch:
                logger.warn('Cached file %s has bad hash, '
                            're-downloading.' % temp_location)
                temp_location = None
                os.unlink(cache_file)
                os.unlink(cache_content_type_file)
                already_cached = False

    # We don't have either a cached or a downloaded copy
    # let's download to a tmp dir
    if not temp_location:
        try:
            resp = session.get(target_url, stream=True)
            resp.raise_for_status()
        except requests.HTTPError as exc:
            logger.fatal("HTTP error %s while getting %s" %
                         (exc.response.status_code, link))
            raise

        content_type = resp.headers.get('content-type', '')
        filename = link.filename  # fallback
        # Have a look at the Content-Disposition header for a better guess
        content_disposition = resp.headers.get('content-disposition')
        if content_disposition:
            type, params = cgi.parse_header(content_disposition)
            # We use ``or`` here because we don't want to use an "empty" value
            # from the filename param.
            filename = params.get('filename') or filename
        ext = splitext(filename)[1]
        if not ext:
            ext = mimetypes.guess_extension(content_type)
            if ext:
                filename += ext
        if not ext and link.url != resp.url:
            ext = os.path.splitext(resp.url)[1]
            if ext:
                filename += ext
        temp_location = os.path.join(temp_dir, filename)
        download_hash = _download_url(resp, link, temp_location)
        if link.hash and link.hash_name:
            _check_hash(download_hash, link)

    # a download dir is specified; let's copy the archive there
    if download_dir and not already_downloaded:
        _copy_file(temp_location, download_dir, content_type, link)

    # unpack the archive to the build dir location. even when only downloading
    # archives, they have to be unpacked to parse dependencies
    unpack_file(temp_location, location, content_type, link)

    # if using a download cache, cache it, if needed
    if cache_file and not already_cached:
        cache_download(cache_file, temp_location, content_type)

    if not (already_cached or already_downloaded):
        os.unlink(temp_location)

    os.rmdir(temp_dir)
示例#25
0
def unpack_http_url(link, location, download_cache, download_dir=None,
                    session=None):
    if session is None:
        session = PipSession()

    temp_dir = tempfile.mkdtemp('-unpack', 'pip-')
    temp_location = None
    target_url = link.url.split('#', 1)[0]

    already_cached = False
    cache_file = None
    cache_content_type_file = None
    download_hash = None
    if download_cache:
        cache_file = os.path.join(download_cache,
                                   urllib.quote(target_url, ''))
        cache_content_type_file = cache_file + '.content-type'
        already_cached = (
            os.path.exists(cache_file) and
            os.path.exists(cache_content_type_file)
            )
        if not os.path.isdir(download_cache):
            create_download_cache_folder(download_cache)

    already_downloaded = None
    if download_dir:
        already_downloaded = os.path.join(download_dir, link.filename)
        if not os.path.exists(already_downloaded):
            already_downloaded = None

    if already_downloaded:
        temp_location = already_downloaded
        content_type = mimetypes.guess_type(already_downloaded)[0]
        logger.notify('File was already downloaded %s' % already_downloaded)
        if link.hash:
            download_hash = _get_hash_from_file(temp_location, link)
            try:
                _check_hash(download_hash, link)
            except HashMismatch:
                logger.warn(
                    'Previously-downloaded file %s has bad hash, '
                    're-downloading.' % temp_location
                    )
                temp_location = None
                os.unlink(already_downloaded)
                already_downloaded = None

    # We have a cached file, and we haven't already found a good downloaded copy
    if already_cached and not temp_location:
        with open(cache_content_type_file) as fp:
            content_type = fp.read().strip()
        temp_location = cache_file
        logger.notify('Using download cache from %s' % cache_file)
        if link.hash and link.hash_name:
            download_hash = _get_hash_from_file(cache_file, link)
            try:
                _check_hash(download_hash, link)
            except HashMismatch:
                logger.warn(
                    'Cached file %s has bad hash, '
                    're-downloading.' % temp_location
                    )
                temp_location = None
                os.unlink(cache_file)
                os.unlink(cache_content_type_file)
                already_cached = False

    # We don't have either a cached or a downloaded copy
    if not temp_location:
        try:
            resp = session.get(target_url, stream=True)
            resp.raise_for_status()
        except requests.HTTPError as exc:
            logger.fatal("HTTP error %s while getting %s" %
                         (exc.response.status_code, link))
            raise

        content_type = resp.headers.get('content-type', '')
        filename = link.filename  # fallback
        # Have a look at the Content-Disposition header for a better guess
        content_disposition = resp.headers.get('content-disposition')
        if content_disposition:
            type, params = cgi.parse_header(content_disposition)
            # We use ``or`` here because we don't want to use an "empty" value
            # from the filename param.
            filename = params.get('filename') or filename
        ext = splitext(filename)[1]
        if not ext:
            ext = mimetypes.guess_extension(content_type)
            if ext:
                filename += ext
        if not ext and link.url != resp.url:
            ext = os.path.splitext(resp.url)[1]
            if ext:
                filename += ext
        temp_location = os.path.join(temp_dir, filename)
        download_hash = _download_url(resp, link, temp_location)
        if link.hash and link.hash_name:
            _check_hash(download_hash, link)

    if download_dir and not already_downloaded:
        _copy_file(temp_location, download_dir, content_type, link)
    unpack_file(temp_location, location, content_type, link)
    if cache_file and not already_cached:
        cache_download(cache_file, temp_location, content_type)
    if not (already_cached or already_downloaded):
        os.unlink(temp_location)
    os.rmdir(temp_dir)