def download_binary(url, rel_file, prog, handler_id): """ Downloads the given URL into a binary file, updating the provided status as it goes. :param url: The URL to download :param rel_file: The RelFile to save to :param prog: The progress object to update. :param handler_id: The ID of the controlling Handler, for printout & Errors. :return: a HandlerResponse object, with the download outcome. """ # noinspection PyBroadException try: req = open_request(url, stream=True) if not req or req.status_code != 200: return HandlerResponse( success=False, handler=handler_id, failure_reason="Server Error: %s->%s" % (url, req.status_code if req is not None else None)) size = req.headers.get('content-length') if size: size = int(size) downloaded_size = 0 ext = _guess_media_mimetype(req) if not ext: return HandlerResponse( success=False, handler=handler_id, failure_reason="Unable to determine MIME Type.") rel_file.set_ext(ext) rel_file.mkdirs() prog.set_status("Downloading file...") prog.set_file(rel_file.relative()) with open(rel_file.absolute(), 'wb') as f: for data in req.iter_content(chunk_size=1024 * 1024 * 4): downloaded_size += len(data) f.write(data) if size: prog.set_percent(round(100 * (downloaded_size / size))) return HandlerResponse(success=True, rel_file=rel_file, handler=handler_id) except Exception as ex: print(ex) if rel_file.exists(): rel_file.delete_file() return HandlerResponse(success=False, handler=handler_id, failure_reason="Error Downloading: %s" % ex)
def handle(task, progress): url = task.url if not is_imgur(url): return False # Check for an album/gallery. if is_gallery(url): if 'i.' in url: # Imgur redirects this, but we correct for posterity. url = url.replace('i.', '') urls = [] try: album = ImgurAlbumDownloader(url) urls = album.get_urls() except ImgurAlbumException: pass # It's possible an image incorrectly has a Gallery location prepended. Ignore error. if not len(urls): # Try using the imgur API to locate this album. try: # fallback to imgur API client, if enabled, for hidden albums. client = make_api_client() if not client: return HandlerResponse(success=False, handler=tag, failure_reason="Could not locate hidden album, and API client is disabled.") items = client.get_album_images(extract_id(url)) def best(o): # Find the best-quality link available within the given Imgur object. for b in ['mp4', 'gifv', 'gif', 'link']: if hasattr(o, b): return getattr(o, b) urls = [best(i) for i in items if not getattr(i, 'is_ad', False)] except Exception as e: print('Imgur API:', e) pass # It's possible an image incorrectly has a Gallery location prepended. Ignore error. if len(urls) == 1: url = urls[0] # For single-image albums, set up to directly download the image. elif len(urls): return HandlerResponse(success=True, handler=tag, album_urls=urls) url = build_direct_link(url) ext, stat = http_downloader.is_media_url(url, return_status=True) # Do some pre-processing, mostly to screen filetypes. if not ext or stat != 200: return HandlerResponse(success=False, handler=tag, failure_reason="Unable to determine imgur filetype: HTTP %s: %s" % (stat, url)) if ext in imgur_animation_exts: url = '.'.join(url.split('.')[:-1]) + '.mp4' return http_downloader.download_binary(url, task.file, prog=progress, handler_id=tag)
def handle(task, progress): url = clean_imgur_url(task.url) if not url: return False direct_url = url.replace('/gallery/', '/').replace('/a/', '/') progress.set_status("Parsing url & verifying format...") album_exception = None # Check for an album/gallery. if any(x in url for x in ['gallery', '/a/']): if 'i.' in url: # Imgur redirects this, but we correct for posterity. url = url.replace('i.', '') try: album = ImgurAlbumDownloader(url) return HandlerResponse(success=True, handler=tag, album_urls=album.get_urls()) except ImgurAlbumException as ex: album_exception = ex pass # It's possible an image incorrectly has a Gallery location, which Imgur can resolve. Try direct dl: url = get_direct_link(direct_url) if not url: if album_exception: print("ImgurAlbumException:", album_exception) return False # Unable to parse proper URL. return http_downloader.download_binary(url, task.file, prog=progress, handler_id=tag)
def handle(task, progress): # !cover if 'reddit.com' in task.url or any(task.url.strip().startswith(p) for p in ['/r/', '/u/']): return HandlerResponse(success=False, handler=tag, failure_reason="Reddit links are disabled.") return False
def handle(task, progress): # !cover' res = urlparse(task.url) for domain in disabled_list: if domain.lower() in res.netloc.lower(): return HandlerResponse(success=False, handler=tag, failure_reason="%s links are disabled." % domain) return False
def handle(task, progress): # noinspection PyBroadException try: wrapper = YTDLWrapper(progress) file = wrapper.run(task.url, task.file) return HandlerResponse(success=True, rel_file=file, handler=tag) except Exception as ex: if 'unsupported url' not in str(ex).lower(): print('YTDL:', ex, task.url, file=sys.stderr, flush=True) # Don't allow the script to crash due to a YTDL exception. return False
def handle(task, progress): # noinspection PyBroadException try: wrapper = YTDLWrapper(progress) file = wrapper.run(task.url, task.file) return HandlerResponse(success=True, rel_file=file, handler=tag) except YTDLError: return False except Exception as ex: print('YTDL Handler:', ex, ' URL:', task.url, file=sys.stderr, flush=True) # Don't allow the script to crash due to a YTDL exception. return False
def handle(task, progress): url = task.url progress.set_status("Checking for direct url...") ext, stat = http_downloader.is_media_url(url, return_status=True) if stat != 200: return HandlerResponse(success=False, handler=tag, failure_reason="URL Responded: %s" % stat) if not ext: return False progress.set_status("Downloading direct media...") return http_downloader.download_binary(url, task.file, prog=progress, handler_id=tag)
def handle(task, progress): m = re.match(regex, task.url) if m is None or '.media.tumblr' in task.url: return False gr = m.groups() progress.set_status("Parsing Tumblr page...") try: urls = get_media_urls(gr[0], gr[1]) if not urls: return None if len(urls) > 1: return HandlerResponse(success=True, handler=tag, album_urls=urls) return http_downloader.download_binary(urls[0], task.file, progress, tag) except Exception as ex: print('Tumblr: ERROR:', ex, task.url, file=sys.stderr, flush=True) return False
def handle(task, progress): # !cover if 'github.com' in task.url: return HandlerResponse(success=False, handler=tag, failure_reason="Github links are disabled.") return False