def pipe_to_file(response, path, size=0): """Pull the data off an HTTP response, shove it in a new file, and show progress. :arg response: A file-like object to read from :arg path: The path of the new file :arg size: The expected size, in bytes, of the download. 0 for unknown or to suppress progress indication (as for cached downloads) """ def response_chunks(chunk_size): while True: chunk = response.read(chunk_size) if not chunk: break yield chunk print('Downloading %s%s...' % (self._req.req, (' (%sK)' % (size / 1000)) if size > 1000 else '')) progress_indicator = (DownloadProgressBar( max=size).iter if size else DownloadProgressSpinner().iter) with open(path, 'wb') as file: for chunk in progress_indicator(response_chunks(4096), 4096): file.write(chunk)
def _download_url(resp, link, content_file, hashes): try: total_length = int(resp.headers['content-length']) except (ValueError, KeyError, TypeError): total_length = 0 cached_resp = getattr(resp, "from_cache", False) if logger.getEffectiveLevel() > logging.INFO: show_progress = False elif cached_resp: show_progress = False elif total_length > (40 * 1000): show_progress = True elif not total_length: show_progress = True else: show_progress = False show_url = link.show_url def resp_read(chunk_size): try: # Special case for urllib3. for chunk in resp.raw.stream( chunk_size, # We use decode_content=False here because we don't # want urllib3 to mess with the raw bytes we get # from the server. If we decompress inside of # urllib3 then we cannot verify the checksum # because the checksum will be of the compressed # file. This breakage will only occur if the # server adds a Content-Encoding header, which # depends on how the server was configured: # - Some servers will notice that the file isn't a # compressible file and will leave the file alone # and with an empty Content-Encoding # - Some servers will notice that the file is # already compressed and will leave the file # alone and will add a Content-Encoding: gzip # header # - Some servers won't notice anything at all and # will take a file that's already been compressed # and compress it again and set the # Content-Encoding: gzip header # # By setting this not to decode automatically we # hope to eliminate problems with the second case. decode_content=False): yield chunk except AttributeError: # Standard file-like object. while True: chunk = resp.raw.read(chunk_size) if not chunk: break yield chunk def written_chunks(chunks): for chunk in chunks: content_file.write(chunk) yield chunk progress_indicator = _progress_indicator if link.netloc == PyPI.netloc: url = show_url else: url = link.url_without_fragment if show_progress: # We don't show progress on cached responses if total_length: logger.info("Downloading %s (%s)", url, format_size(total_length)) progress_indicator = DownloadProgressBar(max=total_length).iter else: logger.info("Downloading %s", url) progress_indicator = DownloadProgressSpinner().iter elif cached_resp: logger.info("Using cached %s", url) else: logger.info("Downloading %s", url) logger.debug('Downloading from URL %s', link) downloaded_chunks = written_chunks( progress_indicator( resp_read(CONTENT_CHUNK_SIZE), CONTENT_CHUNK_SIZE ) ) if hashes: hashes.check_against_chunks(downloaded_chunks) else: consume(downloaded_chunks)
def _download_url(resp, link, content_file): download_hash = None if link.hash and link.hash_name: try: download_hash = hashlib.new(link.hash_name) except ValueError: logger.warning( "Unsupported hash name %s for package %s", link.hash_name, link, ) try: total_length = int(resp.headers['content-length']) except (ValueError, KeyError, TypeError): total_length = 0 cached_resp = getattr(resp, "from_cache", False) if logger.getEffectiveLevel() > logging.INFO: show_progress = False elif cached_resp: show_progress = False elif total_length > (40 * 1000): show_progress = True elif not total_length: show_progress = True else: show_progress = False show_url = link.show_url try: def resp_read(chunk_size): try: # Special case for urllib3. for chunk in resp.raw.stream( chunk_size, # We use decode_content=False here because we do # want urllib3 to mess with the raw bytes we get # from the server. If we decompress inside of # urllib3 then we cannot verify the checksum # because the checksum will be of the compressed # file. This breakage will only occur if the # server adds a Content-Encoding header, which # depends on how the server was configured: # - Some servers will notice that the file isn't a # compressible file and will leave the file alone # and with an empty Content-Encoding # - Some servers will notice that the file is # already compressed and will leave the file # alone and will add a Content-Encoding: gzip # header # - Some servers won't notice anything at all and # will take a file that's already been compressed # and compress it again and set the # Content-Encoding: gzip header # # By setting this not to decode automatically we # hope to eliminate problems with the second case. decode_content=False): yield chunk except AttributeError: # Standard file-like object. while True: chunk = resp.raw.read(chunk_size) if not chunk: break yield chunk progress_indicator = lambda x, *a, **k: x if link.netloc == PyPI.netloc: url = show_url else: url = link.url_without_fragment if show_progress: # We don't show progress on cached responses if total_length: logger.info( "Downloading %s (%s)", url, format_size(total_length), ) progress_indicator = DownloadProgressBar( max=total_length, ).iter else: logger.info("Downloading %s", url) progress_indicator = DownloadProgressSpinner().iter elif cached_resp: logger.info("Using cached %s", url) else: logger.info("Downloading %s", url) logger.debug('Downloading from URL %s', link) for chunk in progress_indicator(resp_read(4096), 4096): if download_hash is not None: download_hash.update(chunk) content_file.write(chunk) finally: if link.hash and link.hash_name: _check_hash(download_hash, link) return download_hash
if link.netloc == PyPI.netloc: url = show_url else: url = link.url_without_fragment if show_progress: # We don't show progress on cached responses if total_length: logger.info( "Downloading %s (%s)", url, format_size(total_length), ) progress_indicator = DownloadProgressBar( max=total_length, ).iter else: logger.info("Downloading %s", url) progress_indicator = DownloadProgressSpinner().iter elif cached_resp: logger.info("Using cached %s", url) else: logger.info("Downloading %s", url) logger.debug('Downloading from URL %s', link) for chunk in progress_indicator(resp_read(4096), 4096): if download_hash is not None: download_hash.update(chunk) content_file.write(chunk) if link.hash and link.hash_name: _check_hash(download_hash, link) return download_hash