def download_with_resume(url: str, path: str, cb: Callback = None) -> bool: """ Download a file pointed by url to a local path @param url: URL to download @param path: Local file to be saved @param cb: Callback object @return: True if the file was completely downloaded """ logging.debug("Downloading {} to {}".format(url, path)) # Clean existing file if os.path.exists(path): os.remove(path) if cb and cb.is_cancelled(): return False try: r = requests.head(url, allow_redirects=True) except requests.exceptions as e: logging.error(e) return False if r.status_code < 200 or r.status_code > 302: logging.error("Failed to reach {}, status is {}".format(url, r.status_code)) r.close() return False expected_size = int(r.headers.get("content-length")) r.close() if cb and cb.is_cancelled(): return False chunk_size = 2**20 last_byte = 0 with open(path, 'wb') as f: while last_byte < expected_size: if cb and cb.is_cancelled(): return False logging.debug("{} vs {}".format(last_byte, expected_size)) logging.debug("Starting download with already {}% of the file". format((100*last_byte)/expected_size)) resume_header = {'Range': 'bytes=%d-' % last_byte} resume_request = requests.get(url, headers=resume_header, stream=True, verify=True, allow_redirects=True) for data in resume_request.iter_content(chunk_size): last_byte += len(data) if cb and cb.is_cancelled(): return False if cb: cb.progress(100 * (last_byte / expected_size)) f.write(data) resume_request.close() if cb and cb.is_cancelled(): return False if cb: cb.progress(100) return True
def download_mp3(self, cb: Callback = None, dry_run: bool = False): """ Will get the list of MP3s and download them into the specified folder @param cb: Callback object @param dry_run: Will not actually download anythin (for test purposes only) @return: None """ if not self.folder(): err_str = 'No folder is defined for the download' logging.error(err_str) raise BulkDownloaderException(err_str) to_download = self.list_mp3(cb) logging.info('{} files will be downloaded'.format(len(to_download))) if cb and cb.is_cancelled(): return if cb: cb.progress(0) count = 0 downloads_successful = 0 downloads_skipped = 0 nb_downloads = len(to_download) step = 100. / nb_downloads for episode in to_download: if cb: if cb.is_cancelled(): continue cb.progress(count * step) # Getting the name and path path = os.path.join(self.folder(), episode.get_filename()) # Check if we should skip the file if not self.overwrite() and os.path.isfile(path): logging.info('Skipping {} as the file already exists at {}' .format(episode.get_filename(), path)) downloads_skipped += 1 count += 1 continue # Download file logging.info('Saving {} to {} from {}'.format(episode.get_filename(), path, episode.url())) if cb: cb.set_function(lambda x: (count + x / 100) * step) if not dry_run and try_download(episode.url(), path, cb=cb): downloads_successful += 1 if cb: cb.set_function(lambda x: x) count += 1 if cb and cb.is_cancelled(): return if cb: cb.progress(100) logging.info('{}/{} episodes were successfully downloaded'.format(downloads_successful, nb_downloads)) logging.info('{}/{} episodes were skipped because files already existed' .format(downloads_skipped, nb_downloads))