def extract_credentials(self, url): """ Extracts user/password from a url. Returns a tuple: (url-without-auth, username, password) """ if isinstance(url, urllib2.Request): result = urlparse.urlsplit(url.get_full_url()) else: result = urlparse.urlsplit(url) scheme, netloc, path, query, frag = result username, password = self.parse_credentials(netloc) if username is None: return url, None, None elif password is None and self.prompting: # remove the auth credentials from the url part netloc = netloc.replace('%s@' % username, '', 1) # prompt for the password prompt = 'Password for %s@%s: ' % (username, netloc) password = urllib.quote(getpass.getpass(prompt)) else: # remove the auth credentials from the url part netloc = netloc.replace('%s:%s@' % (username, password), '', 1) target_url = urlparse.urlunsplit((scheme, netloc, path, query, frag)) return target_url, username, password
def test_unpack_http_url_bad_cache_checksum(mock_get_response, mock_unpack_file): """ If cached download has bad checksum, re-download. """ base_url = 'http://www.example.com/somepackage.tgz' contents = b('downloaded') download_hash = hashlib.new('sha1', contents) link = Link(base_url + '#sha1=' + download_hash.hexdigest()) response = mock_get_response.return_value = MockResponse(contents) response.info = lambda: {'content-type': 'application/x-tar'} response.geturl = lambda: base_url cache_dir = mkdtemp() try: cache_file = os.path.join(cache_dir, urllib.quote(base_url, '')) cache_ct_file = cache_file + '.content-type' _write_file(cache_file, 'some contents') _write_file(cache_ct_file, 'application/x-tar') unpack_http_url(link, 'location', download_cache=cache_dir) # despite existence of cached file with bad hash, downloaded again mock_get_response.assert_called_once_with(base_url, link) # cached file is replaced with newly downloaded file with open(cache_file) as fh: assert fh.read() == 'downloaded' finally: rmtree(cache_dir)
def test_file_index_url_quoting(script, data): """ Test url quoting of file index url with a space """ index_url = data.index_url(urllib.quote("in dex")) result = script.pip('install', '-vvv', '--index-url', index_url, 'simple', expect_error=False) assert (script.site_packages/'simple') in result.files_created, str(result.stdout) assert (script.site_packages/'simple-1.0-py%s.egg-info' % pyversion) in result.files_created, str(result)
def test_file_index_url_quoting(): """ Test url quoting of file index url with a space """ index_url = path_to_url(os.path.join(tests_data, 'indexes', urllib.quote('in dex'))) env = reset_env() result = run_pip('install', '-vvv', '--index-url', index_url, 'simple', expect_error=False) assert (env.site_packages/'simple') in result.files_created, str(result.stdout) assert (env.site_packages/'simple-1.0-py%s.egg-info' % pyversion) in result.files_created, str(result)
def path_to_url(path): """ Convert a path to a file: URL. The path will be made absolute. """ path = os.path.normcase(os.path.abspath(path)) if _drive_re.match(path): path = path[0] + '|' + path[2:] url = urllib.quote(path) url = url.replace(os.path.sep, '/') url = url.lstrip('/') return 'file:///' + url
def unpack_http_url(link, location, download_cache, only_download): temp_dir = tempfile.mkdtemp('-unpack', 'pip-') target_url = link.url.split('#', 1)[0] target_file = None download_hash = None if download_cache: target_file = os.path.join(download_cache, urllib.quote(target_url, '')) if not os.path.isdir(download_cache): create_download_cache_folder(download_cache) if (target_file and os.path.exists(target_file) and os.path.exists(target_file + '.content-type')): fp = open(target_file+'.content-type') content_type = fp.read().strip() fp.close() if link.md5_hash: download_hash = _get_md5_from_file(target_file, link) temp_location = target_file logger.notify('Using download cache from %s' % target_file) else: resp = _get_response_from_url(target_url, link) content_type = resp.info()['content-type'] filename = link.filename # fallback # Have a look at the Content-Disposition header for a better guess content_disposition = resp.info().get('content-disposition') if content_disposition: type, params = cgi.parse_header(content_disposition) # We use ``or`` here because we don't want to use an "empty" value # from the filename param. filename = params.get('filename') or filename ext = splitext(filename)[1] if not ext: ext = mimetypes.guess_extension(content_type) if ext: filename += ext if not ext and link.url != geturl(resp): ext = os.path.splitext(geturl(resp))[1] if ext: filename += ext temp_location = os.path.join(temp_dir, filename) download_hash = _download_url(resp, link, temp_location) if link.md5_hash: _check_md5(download_hash, link) if only_download: _copy_file(temp_location, location, content_type, link) else: unpack_file(temp_location, location, content_type, link) if target_file and target_file != temp_location: cache_download(target_file, temp_location, content_type) if target_file is None: os.unlink(temp_location) os.rmdir(temp_dir)
def path_to_url2(path): """ Convert a path to a file: URL. The path will be made absolute and have quoted path parts. """ path = os.path.normpath(os.path.abspath(path)) drive, path = os.path.splitdrive(path) filepath = path.split(os.path.sep) url = '/'.join([urllib.quote(part) for part in filepath]) if not drive: url = url.lstrip('/') return 'file:///' + drive + url
def get_response(self, url, username=None, password=None): """ does the dirty work of actually getting the rsponse object using urllib2 and its HTTP auth builtins. """ scheme, netloc, path, query, frag = urlparse.urlsplit(url) req = self.get_request(url) stored_username, stored_password = self.passman.find_user_password(None, netloc) # see if we have a password stored if stored_username is None: if username is None and self.prompting: username = urllib.quote(raw_input('User for %s: ' % netloc)) password = urllib.quote(getpass.getpass('Password: ')) if username and password: self.passman.add_password(None, netloc, username, password) stored_username, stored_password = self.passman.find_user_password(None, netloc) authhandler = urllib2.HTTPBasicAuthHandler(self.passman) opener = urllib2.build_opener(authhandler) # FIXME: should catch a 401 and offer to let the user reenter credentials return opener.open(req)
def test_file_index_url_quoting(script, data): """ Test url quoting of file index url with a space """ index_url = data.index_url(urllib.quote("in dex")) result = script.pip('install', '-vvv', '--index-url', index_url, 'simple', expect_error=False) assert (script.site_packages / 'simple') in result.files_created, str( result.stdout) assert (script.site_packages / 'simple-1.0-py%s.egg-info' % pyversion) in result.files_created, str(result)
def unpack_http_url(link, location, download_cache, only_download): temp_dir = tempfile.mkdtemp('-unpack', 'pip-') target_url = link.url.split('#', 1)[0] target_file = None download_hash = None if download_cache: target_file = os.path.join(download_cache, urllib.quote(target_url, '')) if not os.path.isdir(download_cache): create_download_cache_folder(download_cache) if (target_file and os.path.exists(target_file) and os.path.exists(target_file+'.content-type')): fp = open(target_file+'.content-type') content_type = fp.read().strip() fp.close() if link.md5_hash: download_hash = _get_md5_from_file(target_file, link) temp_location = target_file logger.notify('Using download cache from %s' % target_file) else: resp = _get_response_from_url(target_url, link) content_type = resp.info()['content-type'] filename = link.filename ext = splitext(filename)[1] if not ext: ext = mimetypes.guess_extension(content_type) if ext: filename += ext if not ext and link.url != geturl(resp): ext = os.path.splitext(geturl(resp))[1] if ext: filename += ext temp_location = os.path.join(temp_dir, filename) download_hash = _download_url(resp, link, temp_location) if link.md5_hash: _check_md5(download_hash, link) if only_download: _copy_file(temp_location, location, content_type, link) else: unpack_file(temp_location, location, content_type, link) if target_file and target_file != temp_location: cache_download(target_file, temp_location, content_type) if target_file is None: os.unlink(temp_location) os.rmdir(temp_dir)
def unpack_http_url(link, location, download_cache, only_download): temp_dir = tempfile.mkdtemp('-unpack', 'pip-') target_url = link.url.split('#', 1)[0] target_file = None download_hash = None if download_cache: target_file = os.path.join(download_cache, urllib.quote(target_url, '')) if not os.path.isdir(download_cache): create_download_cache_folder(download_cache) if (target_file and os.path.exists(target_file) and os.path.exists(target_file + '.content-type')): fp = open(target_file + '.content-type') content_type = fp.read().strip() fp.close() if link.md5_hash: download_hash = _get_md5_from_file(target_file, link) temp_location = target_file logger.notify('Using download cache from %s' % target_file) else: resp = _get_response_from_url(target_url, link) content_type = resp.info()['content-type'] filename = link.filename ext = splitext(filename)[1] if not ext: ext = mimetypes.guess_extension(content_type) if ext: filename += ext if not ext and link.url != geturl(resp): ext = os.path.splitext(geturl(resp))[1] if ext: filename += ext temp_location = os.path.join(temp_dir, filename) download_hash = _download_url(resp, link, temp_location) if link.md5_hash: _check_md5(download_hash, link) if only_download: _copy_file(temp_location, location, content_type, link) else: unpack_file(temp_location, location, content_type, link) if target_file and target_file != temp_location: cache_download(target_file, temp_location, content_type) if target_file is None: os.unlink(temp_location) os.rmdir(temp_dir)
def test_cache_proxy(): url = 'http://example.com' here = os.path.dirname(os.path.abspath(__file__)) filepath = os.path.join(here, urllib.quote(url, '')) if os.path.exists(filepath): os.remove(filepath) response = pip.backwardcompat.urllib2.urlopen(url) r = CachedResponse(url, here) try: assert_equal(r.code, response.code) assert_equal(r.msg, response.msg) assert_equal(r.read(), response.read()) assert_equal(r.url, response.url) assert_equal(r.geturl(), response.geturl()) assert_equal(set(r.headers.keys()), set(response.headers.keys())) assert_equal(set(r.info().keys()), set(response.info().keys())) assert_equal(r.headers['content-length'], response.headers['content-length']) finally: os.remove(filepath)
def _set_all_fields(self, folder): filename = os.path.join(folder, urllib.quote(self.url, '')) if not os.path.exists(filename): self._cache_url(filename) fp = open(filename, 'rb') try: line = fp.readline().strip() self.code, self.msg = line.split(None, 1) except ValueError: raise ValueError('Bad field line: %r' % line) self.code = int(self.code) self.msg = u(self.msg) for line in fp: if line == b('\n'): break key, value = line.split(b(': '), 1) self.headers[u(key)] = u(value.strip()) for line in fp: self._body += line fp.close()
def get_proxy(self, proxystr=''): """ Get the proxy given the option passed on the command line. If an empty string is passed it looks at the HTTP_PROXY environment variable. """ if not proxystr: proxystr = os.environ.get('HTTP_PROXY', '') if proxystr: if '@' in proxystr: user_password, server_port = proxystr.split('@', 1) if ':' in user_password: user, password = user_password.split(':', 1) else: user = user_password prompt = 'Password for %s@%s: ' % (user, server_port) password = urllib.quote(getpass.getpass(prompt)) return '%s:%s@%s' % (user, password, server_port) else: return proxystr else: return None
def get_proxy(self, proxystr=""): """ Get the proxy given the option passed on the command line. If an empty string is passed it looks at the HTTP_PROXY environment variable. """ if not proxystr: proxystr = os.environ.get("HTTP_PROXY", "") if proxystr: if "@" in proxystr: user_password, server_port = proxystr.split("@", 1) if ":" in user_password: user, password = user_password.split(":", 1) else: user = user_password prompt = "Password for %s@%s: " % (user, server_port) password = urllib.quote(getpass.getpass(prompt)) return "%s:%s@%s" % (user, password, server_port) else: return proxystr else: return None
def unpack_http_url(link, location, download_cache, download_dir=None, session=None): if session is None: session = PipSession() temp_dir = tempfile.mkdtemp('-unpack', 'pip-') temp_location = None target_url = link.url.split('#', 1)[0] already_cached = False cache_file = None cache_content_type_file = None download_hash = None # If a download cache is specified, is the file cached there? if download_cache: cache_file = os.path.join( download_cache, urllib.quote(target_url, '') ) cache_content_type_file = cache_file + '.content-type' already_cached = ( os.path.exists(cache_file) and os.path.exists(cache_content_type_file) ) if not os.path.isdir(download_cache): create_download_cache_folder(download_cache) # If a download dir is specified, is the file already downloaded there? already_downloaded = None if download_dir: already_downloaded = os.path.join(download_dir, link.filename) if not os.path.exists(already_downloaded): already_downloaded = None # If already downloaded, does its hash match? if already_downloaded: temp_location = already_downloaded content_type = mimetypes.guess_type(already_downloaded)[0] logger.notify('File was already downloaded %s' % already_downloaded) if link.hash: download_hash = _get_hash_from_file(temp_location, link) try: _check_hash(download_hash, link) except HashMismatch: logger.warn( 'Previously-downloaded file %s has bad hash, ' 're-downloading.' % temp_location ) temp_location = None os.unlink(already_downloaded) already_downloaded = None # If not a valid download, let's confirm the cached file is valid if already_cached and not temp_location: with open(cache_content_type_file) as fp: content_type = fp.read().strip() temp_location = cache_file logger.notify('Using download cache from %s' % cache_file) if link.hash and link.hash_name: download_hash = _get_hash_from_file(cache_file, link) try: _check_hash(download_hash, link) except HashMismatch: logger.warn( 'Cached file %s has bad hash, ' 're-downloading.' % temp_location ) temp_location = None os.unlink(cache_file) os.unlink(cache_content_type_file) already_cached = False # We don't have either a cached or a downloaded copy # let's download to a tmp dir if not temp_location: try: resp = session.get( target_url, # We use Accept-Encoding: identity here because requests # defaults to accepting compressed responses. This breaks in # a variety of ways depending on how the server is configured. # - Some servers will notice that the file isn't a compressible # file and will leave the file alone and with an empty # Content-Encoding # - Some servers will notice that the file is already # compressed and will leave the file alone and will add a # Content-Encoding: gzip header # - Some servers won't notice anything at all and will take # a file that's already been compressed and compress it again # and set the Content-Encoding: gzip header # By setting this to request only the identity encoding We're # hoping to eliminate the third case. Hopefully there does not # exist a server which when given a file will notice it is # already compressed and that you're not asking for a # compressed file and will then decompress it before sending # because if that's the case I don't think it'll ever be # possible to make this work. headers={"Accept-Encoding": "identity"}, stream=True, ) resp.raise_for_status() except requests.HTTPError as exc: logger.fatal("HTTP error %s while getting %s" % (exc.response.status_code, link)) raise content_type = resp.headers.get('content-type', '') filename = link.filename # fallback # Have a look at the Content-Disposition header for a better guess content_disposition = resp.headers.get('content-disposition') if content_disposition: type, params = cgi.parse_header(content_disposition) # We use ``or`` here because we don't want to use an "empty" value # from the filename param. filename = params.get('filename') or filename ext = splitext(filename)[1] if not ext: ext = mimetypes.guess_extension(content_type) if ext: filename += ext if not ext and link.url != resp.url: ext = os.path.splitext(resp.url)[1] if ext: filename += ext temp_location = os.path.join(temp_dir, filename) download_hash = _download_url(resp, link, temp_location) if link.hash and link.hash_name: _check_hash(download_hash, link) # a download dir is specified; let's copy the archive there if download_dir and not already_downloaded: _copy_file(temp_location, download_dir, content_type, link) # unpack the archive to the build dir location. even when only downloading # archives, they have to be unpacked to parse dependencies unpack_file(temp_location, location, content_type, link) # if using a download cache, cache it, if needed if cache_file and not already_cached: cache_download(cache_file, temp_location, content_type) if not (already_cached or already_downloaded): os.unlink(temp_location) os.rmdir(temp_dir)
def unpack_http_url(link, location, download_cache, download_dir=None): temp_dir = tempfile.mkdtemp("-unpack", "pip-") target_url = link.url.split("#", 1)[0] target_file = None download_hash = None if download_cache: target_file = os.path.join(download_cache, urllib.quote(target_url, "")) if not os.path.isdir(download_cache): create_download_cache_folder(download_cache) already_downloaded = None if download_dir: already_downloaded = os.path.join(download_dir, link.filename) if not os.path.exists(already_downloaded): already_downloaded = None if target_file and os.path.exists(target_file) and os.path.exists(target_file + ".content-type"): fp = open(target_file + ".content-type") content_type = fp.read().strip() fp.close() if link.hash and link.hash_name: download_hash = _get_hash_from_file(target_file, link) temp_location = target_file logger.notify("Using download cache from %s" % target_file) elif already_downloaded: temp_location = already_downloaded content_type = mimetypes.guess_type(already_downloaded) if link.hash: download_hash = _get_hash_from_file(temp_location, link) logger.notify("File was already downloaded %s" % already_downloaded) else: resp = _get_response_from_url(target_url, link) content_type = resp.info()["content-type"] filename = link.filename # fallback # Have a look at the Content-Disposition header for a better guess content_disposition = resp.info().get("content-disposition") if content_disposition: type, params = cgi.parse_header(content_disposition) # We use ``or`` here because we don't want to use an "empty" value # from the filename param. filename = params.get("filename") or filename ext = splitext(filename)[1] if not ext: ext = mimetypes.guess_extension(content_type) if ext: filename += ext if not ext and link.url != geturl(resp): ext = os.path.splitext(geturl(resp))[1] if ext: filename += ext temp_location = os.path.join(temp_dir, filename) download_hash = _download_url(resp, link, temp_location) if link.hash and link.hash_name: _check_hash(download_hash, link) if download_dir and not already_downloaded: _copy_file(temp_location, download_dir, content_type, link) unpack_file(temp_location, location, content_type, link) if target_file and target_file != temp_location: cache_download(target_file, temp_location, content_type) if target_file is None and not already_downloaded: os.unlink(temp_location) os.rmdir(temp_dir)
from pip.backwardcompat import urllib from pip.req import InstallRequirement from pip.index import PackageFinder from tests.path import Path from tests.test_pip import here find_links = "file://" + urllib.quote(str(Path(here).abspath / "packages").replace("\\", "/")) def test_no_mpkg(): """Finder skips zipfiles with "macosx10" in the name.""" finder = PackageFinder([find_links], []) req = InstallRequirement.from_line("pkgwithmpkg") found = finder.find_requirement(req, False) assert found.url.endswith("pkgwithmpkg-1.0.tar.gz"), found
def url_name(self): if self.req is None: return None return urllib.quote(self.req.unsafe_name)
from pip.backwardcompat import urllib from tests.test_pip import here, reset_env, run_pip, pyversion from tests.path import Path index_url = 'file://' + urllib.quote( str(Path(here).abspath / 'in dex').replace('\\', '/')) def test_install(): """ Test installing from a local index. """ env = reset_env() result = run_pip('install', '-vvv', '--index-url', index_url, 'FSPkg', expect_error=False) assert (env.site_packages / 'fspkg') in result.files_created, str( result.stdout) assert (env.site_packages / 'FSPkg-0.1dev-py%s.egg-info' % pyversion) in result.files_created, str(result)
from pip.backwardcompat import urllib from pip.req import InstallRequirement from pip.index import PackageFinder from tests.path import Path from tests.test_pip import here find_links = 'file://' + urllib.quote( str(Path(here).abspath / 'packages').replace('\\', '/')) def test_no_mpkg(): """Finder skips zipfiles with "macosx10" in the name.""" finder = PackageFinder([find_links], []) req = InstallRequirement.from_line("pkgwithmpkg") found = finder.find_requirement(req, False) assert found.url.endswith("pkgwithmpkg-1.0.tar.gz"), found def test_no_partial_name_match(): """Finder requires the full project name to match, not just beginning.""" finder = PackageFinder([find_links], []) req = InstallRequirement.from_line("gmpy") found = finder.find_requirement(req, False) assert found.url.endswith("gmpy-1.15.tar.gz"), found
from pip.backwardcompat import urllib from tests.test_pip import here, reset_env, run_pip, pyversion from tests.path import Path index_url = 'file://' + urllib.quote(str(Path(here).abspath/'in dex').replace('\\', '/')) def test_install(): """ Test installing from a local index. """ env = reset_env() result = run_pip('install', '-vvv', '--index-url', index_url, 'FSPkg', expect_error=False) assert (env.site_packages/'fspkg') in result.files_created, str(result.stdout) assert (env.site_packages/'FSPkg-0.1dev-py%s.egg-info' % pyversion) in result.files_created, str(result)
from pip.backwardcompat import urllib from pip.req import InstallRequirement from pip.index import PackageFinder from tests.path import Path from tests.test_pip import here find_links = 'file://' + urllib.quote(str(Path(here).abspath/'packages').replace('\\', '/')) def test_no_mpkg(): """Finder skips zipfiles with "macosx10" in the name.""" finder = PackageFinder([find_links], []) req = InstallRequirement.from_line("pkgwithmpkg") found = finder.find_requirement(req, False) assert found.url.endswith("pkgwithmpkg-1.0.tar.gz"), found def test_no_partial_name_match(): """Finder requires the full project name to match, not just beginning.""" finder = PackageFinder([find_links], []) req = InstallRequirement.from_line("gmpy") found = finder.find_requirement(req, False) assert found.url.endswith("gmpy-1.15.tar.gz"), found
def unpack_http_url(link, location, download_cache, download_dir=None, session=None): if session is None: session = PipSession() temp_dir = tempfile.mkdtemp('-unpack', 'pip-') temp_location = None target_url = link.url.split('#', 1)[0] already_cached = False cache_file = None cache_content_type_file = None download_hash = None if download_cache: cache_file = os.path.join(download_cache, urllib.quote(target_url, '')) cache_content_type_file = cache_file + '.content-type' already_cached = ( os.path.exists(cache_file) and os.path.exists(cache_content_type_file) ) if not os.path.isdir(download_cache): create_download_cache_folder(download_cache) already_downloaded = None if download_dir: already_downloaded = os.path.join(download_dir, link.filename) if not os.path.exists(already_downloaded): already_downloaded = None if already_downloaded: temp_location = already_downloaded content_type = mimetypes.guess_type(already_downloaded)[0] logger.notify('File was already downloaded %s' % already_downloaded) if link.hash: download_hash = _get_hash_from_file(temp_location, link) try: _check_hash(download_hash, link) except HashMismatch: logger.warn( 'Previously-downloaded file %s has bad hash, ' 're-downloading.' % temp_location ) temp_location = None os.unlink(already_downloaded) already_downloaded = None # We have a cached file, and we haven't already found a good downloaded copy if already_cached and not temp_location: with open(cache_content_type_file) as fp: content_type = fp.read().strip() temp_location = cache_file logger.notify('Using download cache from %s' % cache_file) if link.hash and link.hash_name: download_hash = _get_hash_from_file(cache_file, link) try: _check_hash(download_hash, link) except HashMismatch: logger.warn( 'Cached file %s has bad hash, ' 're-downloading.' % temp_location ) temp_location = None os.unlink(cache_file) os.unlink(cache_content_type_file) already_cached = False # We don't have either a cached or a downloaded copy if not temp_location: try: resp = session.get(target_url, stream=True) resp.raise_for_status() except requests.HTTPError as exc: logger.fatal("HTTP error %s while getting %s" % (exc.response.status_code, link)) raise content_type = resp.headers.get('content-type', '') filename = link.filename # fallback # Have a look at the Content-Disposition header for a better guess content_disposition = resp.headers.get('content-disposition') if content_disposition: type, params = cgi.parse_header(content_disposition) # We use ``or`` here because we don't want to use an "empty" value # from the filename param. filename = params.get('filename') or filename ext = splitext(filename)[1] if not ext: ext = mimetypes.guess_extension(content_type) if ext: filename += ext if not ext and link.url != resp.url: ext = os.path.splitext(resp.url)[1] if ext: filename += ext temp_location = os.path.join(temp_dir, filename) download_hash = _download_url(resp, link, temp_location) if link.hash and link.hash_name: _check_hash(download_hash, link) if download_dir and not already_downloaded: _copy_file(temp_location, download_dir, content_type, link) unpack_file(temp_location, location, content_type, link) if cache_file and not already_cached: cache_download(cache_file, temp_location, content_type) if not (already_cached or already_downloaded): os.unlink(temp_location) os.rmdir(temp_dir)
def unpack_http_url(link, location, download_cache, download_dir=None, session=None): if session is None: session = PipSession() temp_dir = tempfile.mkdtemp('-unpack', 'pip-') temp_location = None target_url = link.url.split('#', 1)[0] already_cached = False cache_file = None cache_content_type_file = None download_hash = None # If a download cache is specified, is the file cached there? if download_cache: cache_file = os.path.join(download_cache, urllib.quote(target_url, '')) cache_content_type_file = cache_file + '.content-type' already_cached = (os.path.exists(cache_file) and os.path.exists(cache_content_type_file)) if not os.path.isdir(download_cache): create_download_cache_folder(download_cache) # If a download dir is specified, is the file already downloaded there? already_downloaded = None if download_dir: already_downloaded = os.path.join(download_dir, link.filename) if not os.path.exists(already_downloaded): already_downloaded = None # If already downloaded, does it's hash match? if already_downloaded: temp_location = already_downloaded content_type = mimetypes.guess_type(already_downloaded)[0] logger.notify('File was already downloaded %s' % already_downloaded) if link.hash: download_hash = _get_hash_from_file(temp_location, link) try: _check_hash(download_hash, link) except HashMismatch: logger.warn('Previously-downloaded file %s has bad hash, ' 're-downloading.' % temp_location) temp_location = None os.unlink(already_downloaded) already_downloaded = None # If not a valid download, let's confirm the cached file is valid if already_cached and not temp_location: with open(cache_content_type_file) as fp: content_type = fp.read().strip() temp_location = cache_file logger.notify('Using download cache from %s' % cache_file) if link.hash and link.hash_name: download_hash = _get_hash_from_file(cache_file, link) try: _check_hash(download_hash, link) except HashMismatch: logger.warn('Cached file %s has bad hash, ' 're-downloading.' % temp_location) temp_location = None os.unlink(cache_file) os.unlink(cache_content_type_file) already_cached = False # We don't have either a cached or a downloaded copy # let's download to a tmp dir if not temp_location: try: resp = session.get(target_url, stream=True) resp.raise_for_status() except requests.HTTPError as exc: logger.fatal("HTTP error %s while getting %s" % (exc.response.status_code, link)) raise content_type = resp.headers.get('content-type', '') filename = link.filename # fallback # Have a look at the Content-Disposition header for a better guess content_disposition = resp.headers.get('content-disposition') if content_disposition: type, params = cgi.parse_header(content_disposition) # We use ``or`` here because we don't want to use an "empty" value # from the filename param. filename = params.get('filename') or filename ext = splitext(filename)[1] if not ext: ext = mimetypes.guess_extension(content_type) if ext: filename += ext if not ext and link.url != resp.url: ext = os.path.splitext(resp.url)[1] if ext: filename += ext temp_location = os.path.join(temp_dir, filename) download_hash = _download_url(resp, link, temp_location) if link.hash and link.hash_name: _check_hash(download_hash, link) # a download dir is specified; let's copy the archive there if download_dir and not already_downloaded: _copy_file(temp_location, download_dir, content_type, link) # unpack the archive to the build dir location. even when only downloading # archives, they have to be unpacked to parse dependencies unpack_file(temp_location, location, content_type, link) # if using a download cache, cache it, if needed if cache_file and not already_cached: cache_download(cache_file, temp_location, content_type) if not (already_cached or already_downloaded): os.unlink(temp_location) os.rmdir(temp_dir)
def unpack_http_url(link, location, download_cache, download_dir=None, session=None): if session is None: session = PipSession() temp_dir = tempfile.mkdtemp("-unpack", "pip-") temp_location = None target_url = link.url.split("#", 1)[0] already_cached = False cache_file = None cache_content_type_file = None download_hash = None # If a download cache is specified, is the file cached there? if download_cache: cache_file = os.path.join(download_cache, urllib.quote(target_url, "")) cache_content_type_file = cache_file + ".content-type" already_cached = os.path.exists(cache_file) and os.path.exists(cache_content_type_file) if not os.path.isdir(download_cache): create_download_cache_folder(download_cache) # If a download dir is specified, is the file already downloaded there? already_downloaded = None if download_dir: already_downloaded = os.path.join(download_dir, link.filename) if not os.path.exists(already_downloaded): already_downloaded = None # If already downloaded, does it's hash match? if already_downloaded: temp_location = already_downloaded content_type = mimetypes.guess_type(already_downloaded)[0] logger.notify("File was already downloaded %s" % already_downloaded) if link.hash: download_hash = _get_hash_from_file(temp_location, link) try: _check_hash(download_hash, link) except HashMismatch: logger.warn("Previously-downloaded file %s has bad hash, " "re-downloading." % temp_location) temp_location = None os.unlink(already_downloaded) already_downloaded = None # If not a valid download, let's confirm the cached file is valid if already_cached and not temp_location: with open(cache_content_type_file) as fp: content_type = fp.read().strip() temp_location = cache_file logger.notify("Using download cache from %s" % cache_file) if link.hash and link.hash_name: download_hash = _get_hash_from_file(cache_file, link) try: _check_hash(download_hash, link) except HashMismatch: logger.warn("Cached file %s has bad hash, " "re-downloading." % temp_location) temp_location = None os.unlink(cache_file) os.unlink(cache_content_type_file) already_cached = False # We don't have either a cached or a downloaded copy # let's download to a tmp dir if not temp_location: try: resp = session.get(target_url, stream=True) resp.raise_for_status() except requests.HTTPError as exc: logger.fatal("HTTP error %s while getting %s" % (exc.response.status_code, link)) raise content_type = resp.headers.get("content-type", "") filename = link.filename # fallback # Have a look at the Content-Disposition header for a better guess content_disposition = resp.headers.get("content-disposition") if content_disposition: type, params = cgi.parse_header(content_disposition) # We use ``or`` here because we don't want to use an "empty" value # from the filename param. filename = params.get("filename") or filename ext = splitext(filename)[1] if not ext: ext = mimetypes.guess_extension(content_type) if ext: filename += ext if not ext and link.url != resp.url: ext = os.path.splitext(resp.url)[1] if ext: filename += ext temp_location = os.path.join(temp_dir, filename) download_hash = _download_url(resp, link, temp_location) if link.hash and link.hash_name: _check_hash(download_hash, link) # a download dir is specified; let's copy the archive there if download_dir and not already_downloaded: _copy_file(temp_location, download_dir, content_type, link) # unpack the archive to the build dir location. even when only downloading # archives, they have to be unpacked to parse dependencies unpack_file(temp_location, location, content_type, link) # if using a download cache, cache it, if needed if cache_file and not already_cached: cache_download(cache_file, temp_location, content_type) if not (already_cached or already_downloaded): os.unlink(temp_location) os.rmdir(temp_dir)
def unpack_http_url(link, location, download_cache, download_dir=None): temp_dir = tempfile.mkdtemp('-unpack', 'pip-') temp_location = None target_url = link.url.split('#', 1)[0] already_cached = False cache_file = None cache_content_type_file = None download_hash = None if download_cache: cache_file = os.path.join(download_cache, urllib.quote(target_url, '')) cache_content_type_file = cache_file + '.content-type' already_cached = (os.path.exists(cache_file) and os.path.exists(cache_content_type_file)) if not os.path.isdir(download_cache): create_download_cache_folder(download_cache) already_downloaded = None if download_dir: already_downloaded = os.path.join(download_dir, link.filename) if not os.path.exists(already_downloaded): already_downloaded = None if already_downloaded: temp_location = already_downloaded content_type = mimetypes.guess_type(already_downloaded)[0] logger.notify('File was already downloaded %s' % already_downloaded) if link.hash: download_hash = _get_hash_from_file(temp_location, link) try: _check_hash(download_hash, link) except HashMismatch: logger.warn('Previously-downloaded file %s has bad hash, ' 're-downloading.' % temp_location) temp_location = None os.unlink(already_downloaded) already_downloaded = None # We have a cached file, and we haven't already found a good downloaded copy if already_cached and not temp_location: with open(cache_content_type_file) as fp: content_type = fp.read().strip() temp_location = cache_file logger.notify('Using download cache from %s' % cache_file) if link.hash and link.hash_name: download_hash = _get_hash_from_file(cache_file, link) try: _check_hash(download_hash, link) except HashMismatch: logger.warn('Cached file %s has bad hash, ' 're-downloading.' % temp_location) temp_location = None os.unlink(cache_file) os.unlink(cache_content_type_file) already_cached = False # We don't have either a cached or a downloaded copy if not temp_location: resp = _get_response_from_url(target_url, link) content_type = resp.info().get('content-type', '') filename = link.filename # fallback # Have a look at the Content-Disposition header for a better guess content_disposition = resp.info().get('content-disposition') if content_disposition: type, params = cgi.parse_header(content_disposition) # We use ``or`` here because we don't want to use an "empty" value # from the filename param. filename = params.get('filename') or filename ext = splitext(filename)[1] if not ext: ext = mimetypes.guess_extension(content_type) if ext: filename += ext if not ext and link.url != geturl(resp): ext = os.path.splitext(geturl(resp))[1] if ext: filename += ext temp_location = os.path.join(temp_dir, filename) download_hash = _download_url(resp, link, temp_location) if link.hash and link.hash_name: _check_hash(download_hash, link) if download_dir and not already_downloaded: _copy_file(temp_location, download_dir, content_type, link) unpack_file(temp_location, location, content_type, link) if cache_file and not already_cached: cache_download(cache_file, temp_location, content_type) if not (already_cached or already_downloaded): os.unlink(temp_location) os.rmdir(temp_dir)