def _generate_packages(self): """ Generate new packages from self.links. """ name = self.info["pattern"].get("N") if name is None: links = [self.fixurl(url) for url in self.links] pdict = self.pyload.api.generate_packages(links) packages = [(name, links, parse_name(name)) for name, links in pdict.items()] else: packages = [(name, self.links, parse_name(name))] self.packages.extend(packages)
def _generate_packages(self): """ Generate new packages from self.links. """ name = self.info["pattern"].get("N") if name is None: links = [self.fixurl(url) for url in self.links] pdict = self.pyload.api.generate_packages(links) packages = [ (name, links, parse_name(name)) for name, links in pdict.items() ] else: packages = [(name, self.links, parse_name(name))] self.packages.extend(packages)
def isresource(self, url, redirect=True, resumable=None): resource = False maxredirs = 5 if resumable is None: resumable = self.resume_download if isinstance(redirect, int): maxredirs = max(redirect, 1) elif redirect: maxredirs = ( int(self.config.get("maxredirs", plugin="UserAgentSwitcher")) or maxredirs ) # TODO: Remove `int` in 0.6.x header = self.load(url, just_header=True) for i in range(1, maxredirs): if not redirect or header.get("connection") == "close": resumable = False if "content-disposition" in header: resource = url elif header.get("location"): location = self.fixurl(header.get("location"), url) code = header.get("code") if code in (301, 302) or resumable: self.log_debug(f"Redirect #{i} to: {location}") header = self.load(location, just_header=True) url = location continue else: contenttype = header.get("content-type") extension = os.path.splitext(parse_name(url))[-1] if contenttype: mimetype = contenttype.split(";")[0].strip() elif extension: mimetype = ( mimetypes.guess_type(extension, False)[0] or "application/octet-stream" ) else: mimetype = None if mimetype and (resource or "html" not in mimetype): resource = url else: resource = False return resource
def isresource(self, url, redirect=True, resumable=None): resource = False maxredirs = 5 if resumable is None: resumable = self.resume_download if isinstance(redirect, int): maxredirs = max(redirect, 1) elif redirect: maxredirs = ( int(self.config.get("maxredirs", plugin="UserAgentSwitcher")) or maxredirs) # TODO: Remove `int` in 0.6.x header = self.load(url, just_header=True) for i in range(1, maxredirs): if not redirect or header.get("connection") == "close": resumable = False if "content-disposition" in header: resource = url elif header.get("location"): location = self.fixurl(header.get("location"), url) code = header.get("code") if code in (301, 302) or resumable: self.log_debug(f"Redirect #{i} to: {location}") header = self.load(location, just_header=True) url = location continue else: contenttype = header.get("content-type") extension = os.path.splitext(parse_name(url))[-1] if contenttype: mimetype = contenttype.split(";")[0].strip() elif extension: mimetype = (mimetypes.guess_type(extension, False)[0] or "application/octet-stream") else: mimetype = None if mimetype and (resource or "html" not in mimetype): resource = url else: resource = False return resource
def _create_packages(self): """ Create new packages from self.packages. """ pack_folder = self.pyfile.package().folder pack_password = self.pyfile.package().password pack_queue = self.pyfile.package().queue folder_per_package = self.config.get("folder_per_package", "Default") if folder_per_package == "Default": folder_per_package = self.pyload.config.get("general", "folder_per_package") else: folder_per_package = folder_per_package == "Yes" for name, links, folder in self.packages: self.log_info( self._("Create package: {}").format(name), self._("{} links").format(len(links)), ) links = [self.fixurl(url) for url in links] self.log_debug("LINKS for package " + name, links) pid = self.pyload.api.add_package(name, links, pack_queue) if pack_password: self.pyload.api.set_package_data(pid, {"password": pack_password}) #: Workaround to do not break API add_package method def set_folder(x): return self.pyload.api.set_package_data( pid, {"folder": safename(x or "")} ) if not folder_per_package: folder = pack_folder elif not folder or folder == name: folder = parse_name(name) self.log_info( self._("Save package `{name}` to folder: {folder}").format( name=name, folder=folder ) ) set_folder(folder)
def _create_packages(self): """ Create new packages from self.packages. """ pack_folder = self.pyfile.package().folder pack_password = self.pyfile.package().password pack_queue = self.pyfile.package().queue folder_per_package = self.config.get("folder_per_package", "Default") if folder_per_package == "Default": folder_per_package = self.pyload.config.get( "general", "folder_per_package") else: folder_per_package = folder_per_package == "Yes" for name, links, folder in self.packages: self.log_info( self._("Create package: {}").format(name), self._("{} links").format(len(links)), ) links = [self.fixurl(url) for url in links] self.log_debug("LINKS for package " + name, links) pid = self.pyload.api.add_package(name, links, pack_queue) if pack_password: self.pyload.api.set_package_data(pid, {"password": pack_password}) #: Workaround to do not break API add_package method def set_folder(x): return self.pyload.api.set_package_data( pid, {"folder": safename(x or "")}) if not folder_per_package: folder = pack_folder elif not folder or folder == name: folder = parse_name(name) self.log_info( self._("Save package `{name}` to folder: {folder}").format( name=name, folder=folder)) set_folder(folder)
def get_info(cls, url="", html=""): url = fixurl(url, unquote=True) info = { "name": parse_name(url), "hash": {}, "pattern": {}, "size": 0, "status": 7 if url else 8, "url": replace_patterns(url, cls.URL_REPLACEMENTS), } try: info["pattern"] = re.match(cls.__pattern__, url).groupdict() except Exception: pass return info
def get_info(cls, url="", html=""): info = super(SimpleDecrypter, cls).get_info(url) info.update(cls.api_info(url)) if not html and info["status"] != 2: if not url: info["error"] = "missing url" info["status"] = 1 elif info["status"] in (3, 7): try: html = get_url(url, cookies=cls.COOKIES, decode=cls.TEXT_ENCODING) except BadHeader as exc: info["error"] = "{}: {}".format(exc.code, exc.content) except Exception: pass if html: if cls.OFFLINE_PATTERN and re.search(cls.OFFLINE_PATTERN, html) is not None: info["status"] = 1 elif ( cls.TEMP_OFFLINE_PATTERN and re.search(cls.TEMP_OFFLINE_PATTERN, html) is not None ): info["status"] = 6 elif cls.NAME_PATTERN: m = re.search(cls.NAME_PATTERN, html) if m is not None: info["status"] = 2 info["pattern"].update(m.groupdict()) if "N" in info["pattern"]: name = replace_patterns(info["pattern"]["N"], cls.NAME_REPLACEMENTS) info["name"] = parse_name(name) return info
def adult_confirmation(self, pyfile): if re.search(self.ADULT_PATTERN, self.data): adult = True self.log_info(self._("Adult content confirmation needed")) url = pyfile.url.replace("ulozto.net", "pornfile.cz") self.load( "https://pornfile.cz/p**n-disclaimer", post={"agree": "Confirm", "_do": "porn_disclaimer-submit"}, ) html = self.load(url) name = re.search(self.NAME_PATTERN, html).group(2) self.pyfile.name = parse_name(name) self.data = html else: adult = False return adult
def adult_confirmation(self, pyfile): if re.search(self.ADULT_PATTERN, self.data): adult = True self.log_info(self._("Adult content confirmation needed")) url = pyfile.url.replace("ulozto.net", "pornfile.cz") self.load( "https://pornfile.cz/p**n-disclaimer", post={ "agree": "Confirm", "_do": "porn_disclaimer-submit" }, ) html = self.load(url) name = re.search(self.NAME_PATTERN, html).group(2) self.pyfile.name = parse_name(name) self.data = html else: adult = False return adult
def process(self, pyfile): p_url = urllib.parse.urlparse(pyfile.url) netloc = p_url.netloc pyfile.name = parse_name(p_url.path.rpartition("/")[2]) if "@" not in netloc: # TODO: Recheck in 0.6.x if self.account: servers = [x["login"] for x in self.account.get_all_accounts()] else: servers = [] if netloc in servers: self.log_debug(f"Logging on to {netloc}") self.req.add_auth(self.account.get_login("password")) else: pwd = self.get_password() if ":" in pwd: self.log_debug(f"Logging on to {netloc}") self.req.add_auth(pwd) else: self.log_debug("Using anonymous logon") try: headers = self.load(pyfile.url, just_header=True) except pycurl.error as exc: if "530" in exc.args[1]: self.fail(self._("Authorization required")) else: self.fail(self._("Error {}: {}").format(exc.args)) self.req.http.c.setopt(pycurl.NOBODY, 0) self.log_debug(self.req.http.header) if "content-length" in headers: pyfile.size = headers.get("content-length") self.download(pyfile.url) else: #: Naive ftp directory listing if re.search(r'^25\d.*?"', self.req.http.header, re.M): pyfile.url = pyfile.url.rstrip("/") pkgname = "/".join( [ pyfile.package().name, urllib.parse.urlparse(pyfile.url).path.rpartition("/")[2], ] ) pyfile.url += "/" self.req.http.c.setopt(48, 1) #: CURLOPT_DIRLISTONLY res = self.load(pyfile.url, decode=False) links = [pyfile.url + x for x in res.splitlines()] self.log_debug("LINKS", links) self.pyload.api.add_package(pkgname, links) else: self.fail(self._("Unexpected server response"))
def download( self, url, get={}, post={}, ref=True, cookies=True, disposition=True, resume=None, chunks=None, fixurl=True, ): """ Downloads the content at url to download folder. :param url: :param get: :param post: :param ref: :param cookies: :param disposition: if True and server provides content-disposition header\ the filename will be changed if needed :return: The location where the file was saved """ self.check_status() if self.pyload.debug: self.log_debug( "DOWNLOAD URL " + url, *[ "{}={}".format(key, value) for key, value in locals().items() if key not in ("self", "url", "_[1]") ], ) dl_url = self.fixurl(url) if fixurl else url dl_basename = parse_name(self.pyfile.name) self.pyfile.name = dl_basename self.check_duplicates() self.pyfile.set_status("downloading") dl_folder = self.pyload.config.get("general", "storage_folder") dl_dirname = safejoin(dl_folder, self.pyfile.package().folder) dl_filename = safejoin(dl_dirname, dl_basename) os.makedirs(dl_dirname, exist_ok=True) self.set_permissions(dl_dirname) self.pyload.addon_manager.dispatch_event( "download_start", self.pyfile, dl_url, dl_filename ) self.check_status() newname = self._download( dl_url, dl_filename, get, post, ref, cookies, disposition, resume, chunks ) # TODO: Recheck in 0.6.x if disposition and newname: safename = parse_name(newname.split(" filename*=")[0]) if safename != newname: try: old_file = os.path.join(dl_dirname, newname) new_file = os.path.join(dl_dirname, safename) os.rename(old_file, new_file) except OSError as exc: self.log_warning( self._("Error renaming `{}` to `{}`").format(newname, safename), exc, ) safename = newname self.log_info( self._("`{}` saved as `{}`").format(self.pyfile.name, safename) ) self.pyfile.name = safename dl_filename = os.path.join(dl_dirname, safename) self.set_permissions(dl_filename) self.last_download = dl_filename return dl_filename
def get_info(cls, url="", html=""): info = super(SimpleDownloader, cls).get_info(url) info.update(cls.api_info(url)) if not html and info["status"] != 2: if not url: info["error"] = "missing url" info["status"] = 1 elif info["status"] in (3, 7): try: html = get_url(url, cookies=cls.COOKIES, decode=cls.TEXT_ENCODING) except BadHeader as exc: info["error"] = "{}: {}".format(exc.code, exc.content) except Exception: pass if html: if cls.OFFLINE_PATTERN and re.search(cls.OFFLINE_PATTERN, html) is not None: info["status"] = 1 elif (cls.TEMP_OFFLINE_PATTERN and re.search(cls.TEMP_OFFLINE_PATTERN, html) is not None): info["status"] = 6 else: for pattern in ( "INFO_PATTERN", "NAME_PATTERN", "SIZE_PATTERN", "HASHSUM_PATTERN", ): try: attr = getattr(cls, pattern) pdict = re.search(attr, html).groupdict() if all(True for k in pdict if k not in info["pattern"]): info["pattern"].update(pdict) except Exception: continue else: info["status"] = 2 if "N" in info["pattern"]: name = replace_patterns(info["pattern"]["N"], cls.NAME_REPLACEMENTS) info["name"] = parse_name(name) if "S" in info["pattern"]: size = replace_patterns( info["pattern"]["S"] + info["pattern"]["U"] if "U" in info["pattern"] else info["pattern"]["S"], cls.SIZE_REPLACEMENTS, ) info["size"] = parse_size(size) elif isinstance(info["size"], str): unit = info["units"] if "units" in info else "" info["size"] = parse_size(info["size"], unit) if "H" in info["pattern"]: hash_type = info["pattern"]["H"].strip("-").upper() info["hash"][hash_type] = info["pattern"]["D"] return info
def download( self, url, get={}, post={}, ref=True, cookies=True, disposition=True, resume=None, chunks=None, fixurl=True, ): """ Downloads the content at url to download folder. :param url: :param get: :param post: :param ref: :param cookies: :param disposition: if True and server provides content-disposition header\ the filename will be changed if needed :return: The location where the file was saved """ self.check_status() if self.pyload.debug: self.log_debug( "DOWNLOAD URL " + url, *[ "{}={}".format(key, value) for key, value in locals().items() if key not in ("self", "url", "_[1]") ], ) dl_url = self.fixurl(url) if fixurl else url dl_basename = parse_name(self.pyfile.name) self.pyfile.name = dl_basename self.check_duplicates() self.pyfile.set_status("downloading") dl_folder = self.pyload.config.get("general", "storage_folder") dl_dirname = safejoin(dl_folder, self.pyfile.package().folder) dl_filename = safejoin(dl_dirname, dl_basename) os.makedirs(dl_dirname, exist_ok=True) self.set_permissions(dl_dirname) self.pyload.addon_manager.dispatch_event("download_start", self.pyfile, dl_url, dl_filename) self.check_status() newname = self._download(dl_url, dl_filename, get, post, ref, cookies, disposition, resume, chunks) # TODO: Recheck in 0.6.x if disposition and newname: safename = parse_name(newname.split(" filename*=")[0]) if safename != newname: try: old_file = os.path.join(dl_dirname, newname) new_file = os.path.join(dl_dirname, safename) os.rename(old_file, new_file) except OSError as exc: self.log_warning( self._("Error renaming `{}` to `{}`").format( newname, safename), exc, ) safename = newname self.log_info( self._("`{}` saved as `{}`").format( self.pyfile.name, safename)) self.pyfile.name = safename dl_filename = os.path.join(dl_dirname, safename) self.set_permissions(dl_filename) self.last_download = dl_filename return dl_filename
def get_info(cls, url="", html=""): info = super(SimpleDownloader, cls).get_info(url) info.update(cls.api_info(url)) if not html and info["status"] != 2: if not url: info["error"] = "missing url" info["status"] = 1 elif info["status"] in (3, 7): try: html = get_url(url, cookies=cls.COOKIES, decode=cls.TEXT_ENCODING) except BadHeader as exc: info["error"] = "{}: {}".format(exc.code, exc.content) except Exception: pass if html: if cls.OFFLINE_PATTERN and re.search(cls.OFFLINE_PATTERN, html) is not None: info["status"] = 1 elif ( cls.TEMP_OFFLINE_PATTERN and re.search(cls.TEMP_OFFLINE_PATTERN, html) is not None ): info["status"] = 6 else: for pattern in ( "INFO_PATTERN", "NAME_PATTERN", "SIZE_PATTERN", "HASHSUM_PATTERN", ): try: attr = getattr(cls, pattern) pdict = re.search(attr, html).groupdict() if all(True for k in pdict if k not in info["pattern"]): info["pattern"].update(pdict) except Exception: continue else: info["status"] = 2 if "N" in info["pattern"]: name = replace_patterns(info["pattern"]["N"], cls.NAME_REPLACEMENTS) info["name"] = parse_name(name) if "S" in info["pattern"]: size = replace_patterns( info["pattern"]["S"] + info["pattern"]["U"] if "U" in info["pattern"] else info["pattern"]["S"], cls.SIZE_REPLACEMENTS, ) info["size"] = parse_size(size) elif isinstance(info["size"], str): unit = info["units"] if "units" in info else "" info["size"] = parse_size(info["size"], unit) if "H" in info["pattern"]: hash_type = info["pattern"]["H"].strip("-").upper() info["hash"][hash_type] = info["pattern"]["D"] return info
def process(self, pyfile): p_url = urllib.parse.urlparse(pyfile.url) netloc = p_url.netloc pyfile.name = parse_name(p_url.path.rpartition("/")[2]) if "@" not in netloc: # TODO: Recheck in 0.6.x if self.account: servers = [x["login"] for x in self.account.get_all_accounts()] else: servers = [] if netloc in servers: self.log_debug(f"Logging on to {netloc}") self.req.add_auth(self.account.get_login("password")) else: pwd = self.get_password() if ":" in pwd: self.log_debug(f"Logging on to {netloc}") self.req.add_auth(pwd) else: self.log_debug("Using anonymous logon") try: headers = self.load(pyfile.url, just_header=True) except pycurl.error as exc: if "530" in exc.args[1]: self.fail(self._("Authorization required")) else: self.fail(self._("Error {}: {}").format(exc.args)) self.req.http.c.setopt(pycurl.NOBODY, 0) self.log_debug(self.req.http.header) if "content-length" in headers: pyfile.size = headers.get("content-length") self.download(pyfile.url) else: #: Naive ftp directory listing if re.search(r'^25\d.*?"', self.req.http.header, re.M): pyfile.url = pyfile.url.rstrip("/") pkgname = "/".join([ pyfile.package().name, urllib.parse.urlparse(pyfile.url).path.rpartition("/")[2], ]) pyfile.url += "/" self.req.http.c.setopt(48, 1) #: CURLOPT_DIRLISTONLY res = self.load(pyfile.url, decode=False) links = [pyfile.url + x for x in res.splitlines()] self.log_debug("LINKS", links) self.pyload.api.add_package(pkgname, links) else: self.fail(self._("Unexpected server response"))