def _get_html_response(url: str, session: PipSession) -> Response: """Access an HTML page with GET, and return the response. This consists of three parts: 1. If the URL looks suspiciously like an archive, send a HEAD first to check the Content-Type is HTML, to avoid downloading a large file. Raise `_NotHTTP` if the content type cannot be determined, or `_NotHTML` if it is not HTML. 2. Actually perform the request. Raise HTTP exceptions on network failures. 3. Check the Content-Type header to make sure we got HTML, and raise `_NotHTML` otherwise. """ if is_archive_file(Link(url).filename): _ensure_html_response(url, session=session) logger.debug("Getting page %s", redact_auth_from_url(url)) resp = session.get( url, headers={ "Accept": "text/html", # We don't want to blindly returned cached data for # /simple/, because authors generally expecting that # twine upload && pip install will function, but if # they've done a pip install in the last ~10 minutes # it won't. Thus by setting this to zero we will not # blindly use any cached data, however the benefit of # using max-age=0 instead of no-cache, is that we will # still support conditional requests, so we will still # minimize traffic sent in cases where the page hasn't # changed at all, we will just always incur the round # trip for the conditional GET now instead of only # once per 10 minutes. # For more information, please see pypa/pip#5670. "Cache-Control": "max-age=0", }, ) raise_for_status(resp) # The check for archives above only works if the url ends with # something that looks like an archive. However that is not a # requirement of an url. Unless we issue a HEAD request on every # url we cannot know ahead of time for sure if something is HTML # or not. However we can check after we've downloaded it. _ensure_html_header(resp) return resp
def get_file_content(url: str, session: PipSession) -> Tuple[str, str]: """Gets the content of a file; it may be a filename, file: URL, or http: URL. Returns (location, content). Content is unicode. Respects # -*- coding: declarations on the retrieved files. :param url: File path or url. :param session: PipSession instance. """ scheme = get_url_scheme(url) # Pip has special support for file:// URLs (LocalFSAdapter). if scheme in ["http", "https", "file"]: resp = session.get(url) raise_for_status(resp) return resp.url, resp.text # Assume this is a bare path. try: with open(url, "rb") as f: content = auto_decode(f.read()) except OSError as exc: raise InstallationError(f"Could not open requirements file: {exc}") return url, content
def _http_get_download(session: PipSession, link: Link) -> Response: target_url = link.url.split("#", 1)[0] resp = session.get(target_url, headers=HEADERS, stream=True) raise_for_status(resp) return resp