def is_archive_file(name): # type: (str) -> bool """Return True if `name` is a considered as an archive file.""" ext = splitext(name)[1].lower() if ext in ARCHIVE_EXTENSIONS: return True return False
def _download_http_url(link, session, temp_dir, hashes, progress_bar): """Download link url into temp_dir using provided session""" target_url = link.url.split('#', 1)[0] try: resp = session.get( target_url, # We use Accept-Encoding: identity here because requests # defaults to accepting compressed responses. This breaks in # a variety of ways depending on how the server is configured. # - Some servers will notice that the file isn't a compressible # file and will leave the file alone and with an empty # Content-Encoding # - Some servers will notice that the file is already # compressed and will leave the file alone and will add a # Content-Encoding: gzip header # - Some servers won't notice anything at all and will take # a file that's already been compressed and compress it again # and set the Content-Encoding: gzip header # By setting this to request only the identity encoding We're # hoping to eliminate the third case. Hopefully there does not # exist a server which when given a file will notice it is # already compressed and that you're not asking for a # compressed file and will then decompress it before sending # because if that's the case I don't think it'll ever be # possible to make this work. headers={"Accept-Encoding": "identity"}, stream=True, ) resp.raise_for_status() except requests.HTTPError as exc: logger.critical( "HTTP error %s while getting %s", exc.response.status_code, link, ) raise content_type = resp.headers.get('content-type', '') filename = link.filename # fallback # Have a look at the Content-Disposition header for a better guess content_disposition = resp.headers.get('content-disposition') if content_disposition: type, params = cgi.parse_header(content_disposition) # We use ``or`` here because we don't want to use an "empty" value # from the filename param. filename = params.get('filename') or filename ext = splitext(filename)[1] if not ext: ext = mimetypes.guess_extension(content_type) if ext: filename += ext if not ext and link.url != resp.url: ext = os.path.splitext(resp.url)[1] if ext: filename += ext file_path = os.path.join(temp_dir, filename) with open(file_path, 'wb') as content_file: _download_url(resp, link, content_file, hashes, progress_bar) return file_path, content_type
def splitext(self): return splitext(posixpath.basename(self.path.rstrip('/')))
def splitext(self): # type: () -> Tuple[str, str] return splitext(posixpath.basename(self.path.rstrip('/')))
def _download_http_url( link, # type: Link session, # type: PipSession temp_dir, # type: str hashes, # type: Optional[Hashes] progress_bar # type: str ): # type: (...) -> Tuple[str, str] """Download link url into temp_dir using provided session""" target_url = link.url.split('#', 1)[0] try: resp = session.get( target_url, # We use Accept-Encoding: identity here because requests # defaults to accepting compressed responses. This breaks in # a variety of ways depending on how the server is configured. # - Some servers will notice that the file isn't a compressible # file and will leave the file alone and with an empty # Content-Encoding # - Some servers will notice that the file is already # compressed and will leave the file alone and will add a # Content-Encoding: gzip header # - Some servers won't notice anything at all and will take # a file that's already been compressed and compress it again # and set the Content-Encoding: gzip header # By setting this to request only the identity encoding We're # hoping to eliminate the third case. Hopefully there does not # exist a server which when given a file will notice it is # already compressed and that you're not asking for a # compressed file and will then decompress it before sending # because if that's the case I don't think it'll ever be # possible to make this work. headers={"Accept-Encoding": "identity"}, stream=True, ) resp.raise_for_status() except requests.HTTPError as exc: logger.critical( "HTTP error %s while getting %s", exc.response.status_code, link, ) raise content_type = resp.headers.get('content-type', '') filename = link.filename # fallback # Have a look at the Content-Disposition header for a better guess content_disposition = resp.headers.get('content-disposition') if content_disposition: filename = parse_content_disposition(content_disposition, filename) ext = splitext(filename)[1] # type: Optional[str] if not ext: ext = mimetypes.guess_extension(content_type) if ext: filename += ext if not ext and link.url != resp.url: ext = os.path.splitext(resp.url)[1] if ext: filename += ext file_path = os.path.join(temp_dir, filename) with open(file_path, 'wb') as content_file: _download_url(resp, link, content_file, hashes, progress_bar) return file_path, content_type
def is_archive_file(name): """Return True if `name` is a considered as an archive file.""" ext = splitext(name)[1].lower() if ext in ARCHIVE_EXTENSIONS: return True return False
def ext(self): # type: () -> str return splitext()[1]
def splitext(self) -> Tuple[str, str]: return splitext(posixpath.basename(self.path.rstrip("/")))
def is_archive_file(name): # type: (str) -> bool """Return True if `name` is a considered as an archive file.""" ext = splitext(name)[1].lower() return ext in ARCHIVE_EXTENSIONS