示例#1
0
    def _generate_packages(self):
        """
        Generate new packages from self.links.
        """
        name = self.info["pattern"].get("N")
        if name is None:
            links = [self.fixurl(url) for url in self.links]
            pdict = self.pyload.api.generate_packages(links)
            packages = [(name, links, parse_name(name))
                        for name, links in pdict.items()]

        else:
            packages = [(name, self.links, parse_name(name))]

        self.packages.extend(packages)
示例#2
0
    def _generate_packages(self):
        """
        Generate new packages from self.links.
        """
        name = self.info["pattern"].get("N")
        if name is None:
            links = [self.fixurl(url) for url in self.links]
            pdict = self.pyload.api.generate_packages(links)
            packages = [
                (name, links, parse_name(name)) for name, links in pdict.items()
            ]

        else:
            packages = [(name, self.links, parse_name(name))]

        self.packages.extend(packages)
示例#3
0
    def isresource(self, url, redirect=True, resumable=None):
        resource = False
        maxredirs = 5

        if resumable is None:
            resumable = self.resume_download

        if isinstance(redirect, int):
            maxredirs = max(redirect, 1)

        elif redirect:
            maxredirs = (
                int(self.config.get("maxredirs", plugin="UserAgentSwitcher"))
                or maxredirs
            )  # TODO: Remove `int` in 0.6.x

        header = self.load(url, just_header=True)

        for i in range(1, maxredirs):
            if not redirect or header.get("connection") == "close":
                resumable = False

            if "content-disposition" in header:
                resource = url

            elif header.get("location"):
                location = self.fixurl(header.get("location"), url)
                code = header.get("code")

                if code in (301, 302) or resumable:
                    self.log_debug(f"Redirect #{i} to: {location}")
                    header = self.load(location, just_header=True)
                    url = location
                    continue

            else:
                contenttype = header.get("content-type")
                extension = os.path.splitext(parse_name(url))[-1]

                if contenttype:
                    mimetype = contenttype.split(";")[0].strip()

                elif extension:
                    mimetype = (
                        mimetypes.guess_type(extension, False)[0]
                        or "application/octet-stream"
                    )

                else:
                    mimetype = None

                if mimetype and (resource or "html" not in mimetype):
                    resource = url
                else:
                    resource = False

            return resource
示例#4
0
    def isresource(self, url, redirect=True, resumable=None):
        resource = False
        maxredirs = 5

        if resumable is None:
            resumable = self.resume_download

        if isinstance(redirect, int):
            maxredirs = max(redirect, 1)

        elif redirect:
            maxredirs = (
                int(self.config.get("maxredirs", plugin="UserAgentSwitcher"))
                or maxredirs)  # TODO: Remove `int` in 0.6.x

        header = self.load(url, just_header=True)

        for i in range(1, maxredirs):
            if not redirect or header.get("connection") == "close":
                resumable = False

            if "content-disposition" in header:
                resource = url

            elif header.get("location"):
                location = self.fixurl(header.get("location"), url)
                code = header.get("code")

                if code in (301, 302) or resumable:
                    self.log_debug(f"Redirect #{i} to: {location}")
                    header = self.load(location, just_header=True)
                    url = location
                    continue

            else:
                contenttype = header.get("content-type")
                extension = os.path.splitext(parse_name(url))[-1]

                if contenttype:
                    mimetype = contenttype.split(";")[0].strip()

                elif extension:
                    mimetype = (mimetypes.guess_type(extension, False)[0]
                                or "application/octet-stream")

                else:
                    mimetype = None

                if mimetype and (resource or "html" not in mimetype):
                    resource = url
                else:
                    resource = False

            return resource
示例#5
0
    def _create_packages(self):
        """
        Create new packages from self.packages.
        """
        pack_folder = self.pyfile.package().folder
        pack_password = self.pyfile.package().password
        pack_queue = self.pyfile.package().queue

        folder_per_package = self.config.get("folder_per_package", "Default")

        if folder_per_package == "Default":
            folder_per_package = self.pyload.config.get("general", "folder_per_package")
        else:
            folder_per_package = folder_per_package == "Yes"

        for name, links, folder in self.packages:
            self.log_info(
                self._("Create package: {}").format(name),
                self._("{} links").format(len(links)),
            )

            links = [self.fixurl(url) for url in links]
            self.log_debug("LINKS for package " + name, links)

            pid = self.pyload.api.add_package(name, links, pack_queue)

            if pack_password:
                self.pyload.api.set_package_data(pid, {"password": pack_password})

            #: Workaround to do not break API add_package method
            def set_folder(x):
                return self.pyload.api.set_package_data(
                    pid, {"folder": safename(x or "")}
                )

            if not folder_per_package:
                folder = pack_folder

            elif not folder or folder == name:
                folder = parse_name(name)

            self.log_info(
                self._("Save package `{name}` to folder: {folder}").format(
                    name=name, folder=folder
                )
            )

            set_folder(folder)
示例#6
0
    def _create_packages(self):
        """
        Create new packages from self.packages.
        """
        pack_folder = self.pyfile.package().folder
        pack_password = self.pyfile.package().password
        pack_queue = self.pyfile.package().queue

        folder_per_package = self.config.get("folder_per_package", "Default")

        if folder_per_package == "Default":
            folder_per_package = self.pyload.config.get(
                "general", "folder_per_package")
        else:
            folder_per_package = folder_per_package == "Yes"

        for name, links, folder in self.packages:
            self.log_info(
                self._("Create package: {}").format(name),
                self._("{} links").format(len(links)),
            )

            links = [self.fixurl(url) for url in links]
            self.log_debug("LINKS for package " + name, links)

            pid = self.pyload.api.add_package(name, links, pack_queue)

            if pack_password:
                self.pyload.api.set_package_data(pid,
                                                 {"password": pack_password})

            #: Workaround to do not break API add_package method
            def set_folder(x):
                return self.pyload.api.set_package_data(
                    pid, {"folder": safename(x or "")})

            if not folder_per_package:
                folder = pack_folder

            elif not folder or folder == name:
                folder = parse_name(name)

            self.log_info(
                self._("Save package `{name}` to folder: {folder}").format(
                    name=name, folder=folder))

            set_folder(folder)
示例#7
0
文件: hoster.py 项目: tboh002/pyload
    def get_info(cls, url="", html=""):
        url = fixurl(url, unquote=True)
        info = {
            "name": parse_name(url),
            "hash": {},
            "pattern": {},
            "size": 0,
            "status": 7 if url else 8,
            "url": replace_patterns(url, cls.URL_REPLACEMENTS),
        }

        try:
            info["pattern"] = re.match(cls.__pattern__, url).groupdict()

        except Exception:
            pass

        return info
示例#8
0
    def get_info(cls, url="", html=""):
        info = super(SimpleDecrypter, cls).get_info(url)

        info.update(cls.api_info(url))

        if not html and info["status"] != 2:
            if not url:
                info["error"] = "missing url"
                info["status"] = 1

            elif info["status"] in (3, 7):
                try:
                    html = get_url(url, cookies=cls.COOKIES, decode=cls.TEXT_ENCODING)

                except BadHeader as exc:
                    info["error"] = "{}: {}".format(exc.code, exc.content)

                except Exception:
                    pass

        if html:
            if cls.OFFLINE_PATTERN and re.search(cls.OFFLINE_PATTERN, html) is not None:
                info["status"] = 1

            elif (
                cls.TEMP_OFFLINE_PATTERN
                and re.search(cls.TEMP_OFFLINE_PATTERN, html) is not None
            ):
                info["status"] = 6

            elif cls.NAME_PATTERN:
                m = re.search(cls.NAME_PATTERN, html)
                if m is not None:
                    info["status"] = 2
                    info["pattern"].update(m.groupdict())

        if "N" in info["pattern"]:
            name = replace_patterns(info["pattern"]["N"], cls.NAME_REPLACEMENTS)
            info["name"] = parse_name(name)

        return info
示例#9
0
文件: UlozTo.py 项目: pyload/pyload
    def adult_confirmation(self, pyfile):
        if re.search(self.ADULT_PATTERN, self.data):
            adult = True
            self.log_info(self._("Adult content confirmation needed"))

            url = pyfile.url.replace("ulozto.net", "pornfile.cz")
            self.load(
                "https://pornfile.cz/p**n-disclaimer",
                post={"agree": "Confirm", "_do": "porn_disclaimer-submit"},
            )

            html = self.load(url)
            name = re.search(self.NAME_PATTERN, html).group(2)

            self.pyfile.name = parse_name(name)
            self.data = html

        else:
            adult = False

        return adult
示例#10
0
    def adult_confirmation(self, pyfile):
        if re.search(self.ADULT_PATTERN, self.data):
            adult = True
            self.log_info(self._("Adult content confirmation needed"))

            url = pyfile.url.replace("ulozto.net", "pornfile.cz")
            self.load(
                "https://pornfile.cz/p**n-disclaimer",
                post={
                    "agree": "Confirm",
                    "_do": "porn_disclaimer-submit"
                },
            )

            html = self.load(url)
            name = re.search(self.NAME_PATTERN, html).group(2)

            self.pyfile.name = parse_name(name)
            self.data = html

        else:
            adult = False

        return adult
示例#11
0
文件: Ftp.py 项目: pyload/pyload
    def process(self, pyfile):
        p_url = urllib.parse.urlparse(pyfile.url)
        netloc = p_url.netloc

        pyfile.name = parse_name(p_url.path.rpartition("/")[2])

        if "@" not in netloc:
            # TODO: Recheck in 0.6.x
            if self.account:
                servers = [x["login"] for x in self.account.get_all_accounts()]
            else:
                servers = []

            if netloc in servers:
                self.log_debug(f"Logging on to {netloc}")
                self.req.add_auth(self.account.get_login("password"))

            else:
                pwd = self.get_password()
                if ":" in pwd:
                    self.log_debug(f"Logging on to {netloc}")
                    self.req.add_auth(pwd)
                else:
                    self.log_debug("Using anonymous logon")

        try:
            headers = self.load(pyfile.url, just_header=True)

        except pycurl.error as exc:
            if "530" in exc.args[1]:
                self.fail(self._("Authorization required"))
            else:
                self.fail(self._("Error {}: {}").format(exc.args))

        self.req.http.c.setopt(pycurl.NOBODY, 0)
        self.log_debug(self.req.http.header)

        if "content-length" in headers:
            pyfile.size = headers.get("content-length")
            self.download(pyfile.url)

        else:
            #: Naive ftp directory listing
            if re.search(r'^25\d.*?"', self.req.http.header, re.M):
                pyfile.url = pyfile.url.rstrip("/")
                pkgname = "/".join(
                    [
                        pyfile.package().name,
                        urllib.parse.urlparse(pyfile.url).path.rpartition("/")[2],
                    ]
                )

                pyfile.url += "/"

                self.req.http.c.setopt(48, 1)  #: CURLOPT_DIRLISTONLY
                res = self.load(pyfile.url, decode=False)

                links = [pyfile.url + x for x in res.splitlines()]
                self.log_debug("LINKS", links)

                self.pyload.api.add_package(pkgname, links)

            else:
                self.fail(self._("Unexpected server response"))
示例#12
0
    def download(
        self,
        url,
        get={},
        post={},
        ref=True,
        cookies=True,
        disposition=True,
        resume=None,
        chunks=None,
        fixurl=True,
    ):
        """
        Downloads the content at url to download folder.

        :param url:
        :param get:
        :param post:
        :param ref:
        :param cookies:
        :param disposition: if True and server provides content-disposition header\
        the filename will be changed if needed
        :return: The location where the file was saved
        """
        self.check_status()

        if self.pyload.debug:
            self.log_debug(
                "DOWNLOAD URL " + url,
                *[
                    "{}={}".format(key, value)
                    for key, value in locals().items()
                    if key not in ("self", "url", "_[1]")
                ],
            )

        dl_url = self.fixurl(url) if fixurl else url
        dl_basename = parse_name(self.pyfile.name)

        self.pyfile.name = dl_basename

        self.check_duplicates()

        self.pyfile.set_status("downloading")

        dl_folder = self.pyload.config.get("general", "storage_folder")
        dl_dirname = safejoin(dl_folder, self.pyfile.package().folder)
        dl_filename = safejoin(dl_dirname, dl_basename)

        os.makedirs(dl_dirname, exist_ok=True)
        self.set_permissions(dl_dirname)

        self.pyload.addon_manager.dispatch_event(
            "download_start", self.pyfile, dl_url, dl_filename
        )
        self.check_status()

        newname = self._download(
            dl_url, dl_filename, get, post, ref, cookies, disposition, resume, chunks
        )

        # TODO: Recheck in 0.6.x
        if disposition and newname:
            safename = parse_name(newname.split(" filename*=")[0])

            if safename != newname:
                try:
                    old_file = os.path.join(dl_dirname, newname)
                    new_file = os.path.join(dl_dirname, safename)
                    os.rename(old_file, new_file)

                except OSError as exc:
                    self.log_warning(
                        self._("Error renaming `{}` to `{}`").format(newname, safename),
                        exc,
                    )
                    safename = newname

                self.log_info(
                    self._("`{}` saved as `{}`").format(self.pyfile.name, safename)
                )

            self.pyfile.name = safename

            dl_filename = os.path.join(dl_dirname, safename)

        self.set_permissions(dl_filename)

        self.last_download = dl_filename

        return dl_filename
示例#13
0
    def get_info(cls, url="", html=""):
        info = super(SimpleDownloader, cls).get_info(url)
        info.update(cls.api_info(url))

        if not html and info["status"] != 2:
            if not url:
                info["error"] = "missing url"
                info["status"] = 1

            elif info["status"] in (3, 7):
                try:
                    html = get_url(url,
                                   cookies=cls.COOKIES,
                                   decode=cls.TEXT_ENCODING)

                except BadHeader as exc:
                    info["error"] = "{}: {}".format(exc.code, exc.content)

                except Exception:
                    pass

        if html:
            if cls.OFFLINE_PATTERN and re.search(cls.OFFLINE_PATTERN,
                                                 html) is not None:
                info["status"] = 1

            elif (cls.TEMP_OFFLINE_PATTERN
                  and re.search(cls.TEMP_OFFLINE_PATTERN, html) is not None):
                info["status"] = 6

            else:
                for pattern in (
                        "INFO_PATTERN",
                        "NAME_PATTERN",
                        "SIZE_PATTERN",
                        "HASHSUM_PATTERN",
                ):
                    try:
                        attr = getattr(cls, pattern)
                        pdict = re.search(attr, html).groupdict()

                        if all(True for k in pdict
                               if k not in info["pattern"]):
                            info["pattern"].update(pdict)

                    except Exception:
                        continue

                    else:
                        info["status"] = 2

        if "N" in info["pattern"]:
            name = replace_patterns(info["pattern"]["N"],
                                    cls.NAME_REPLACEMENTS)
            info["name"] = parse_name(name)

        if "S" in info["pattern"]:
            size = replace_patterns(
                info["pattern"]["S"] + info["pattern"]["U"]
                if "U" in info["pattern"] else info["pattern"]["S"],
                cls.SIZE_REPLACEMENTS,
            )
            info["size"] = parse_size(size)

        elif isinstance(info["size"], str):
            unit = info["units"] if "units" in info else ""
            info["size"] = parse_size(info["size"], unit)

        if "H" in info["pattern"]:
            hash_type = info["pattern"]["H"].strip("-").upper()
            info["hash"][hash_type] = info["pattern"]["D"]

        return info
示例#14
0
    def download(
        self,
        url,
        get={},
        post={},
        ref=True,
        cookies=True,
        disposition=True,
        resume=None,
        chunks=None,
        fixurl=True,
    ):
        """
        Downloads the content at url to download folder.

        :param url:
        :param get:
        :param post:
        :param ref:
        :param cookies:
        :param disposition: if True and server provides content-disposition header\
        the filename will be changed if needed
        :return: The location where the file was saved
        """
        self.check_status()

        if self.pyload.debug:
            self.log_debug(
                "DOWNLOAD URL " + url,
                *[
                    "{}={}".format(key, value)
                    for key, value in locals().items()
                    if key not in ("self", "url", "_[1]")
                ],
            )

        dl_url = self.fixurl(url) if fixurl else url
        dl_basename = parse_name(self.pyfile.name)

        self.pyfile.name = dl_basename

        self.check_duplicates()

        self.pyfile.set_status("downloading")

        dl_folder = self.pyload.config.get("general", "storage_folder")
        dl_dirname = safejoin(dl_folder, self.pyfile.package().folder)
        dl_filename = safejoin(dl_dirname, dl_basename)

        os.makedirs(dl_dirname, exist_ok=True)
        self.set_permissions(dl_dirname)

        self.pyload.addon_manager.dispatch_event("download_start", self.pyfile,
                                                 dl_url, dl_filename)
        self.check_status()

        newname = self._download(dl_url, dl_filename, get, post, ref, cookies,
                                 disposition, resume, chunks)

        # TODO: Recheck in 0.6.x
        if disposition and newname:
            safename = parse_name(newname.split(" filename*=")[0])

            if safename != newname:
                try:
                    old_file = os.path.join(dl_dirname, newname)
                    new_file = os.path.join(dl_dirname, safename)
                    os.rename(old_file, new_file)

                except OSError as exc:
                    self.log_warning(
                        self._("Error renaming `{}` to `{}`").format(
                            newname, safename),
                        exc,
                    )
                    safename = newname

                self.log_info(
                    self._("`{}` saved as `{}`").format(
                        self.pyfile.name, safename))

            self.pyfile.name = safename

            dl_filename = os.path.join(dl_dirname, safename)

        self.set_permissions(dl_filename)

        self.last_download = dl_filename

        return dl_filename
示例#15
0
    def get_info(cls, url="", html=""):
        info = super(SimpleDownloader, cls).get_info(url)
        info.update(cls.api_info(url))

        if not html and info["status"] != 2:
            if not url:
                info["error"] = "missing url"
                info["status"] = 1

            elif info["status"] in (3, 7):
                try:
                    html = get_url(url, cookies=cls.COOKIES, decode=cls.TEXT_ENCODING)

                except BadHeader as exc:
                    info["error"] = "{}: {}".format(exc.code, exc.content)

                except Exception:
                    pass

        if html:
            if cls.OFFLINE_PATTERN and re.search(cls.OFFLINE_PATTERN, html) is not None:
                info["status"] = 1

            elif (
                cls.TEMP_OFFLINE_PATTERN
                and re.search(cls.TEMP_OFFLINE_PATTERN, html) is not None
            ):
                info["status"] = 6

            else:
                for pattern in (
                    "INFO_PATTERN",
                    "NAME_PATTERN",
                    "SIZE_PATTERN",
                    "HASHSUM_PATTERN",
                ):
                    try:
                        attr = getattr(cls, pattern)
                        pdict = re.search(attr, html).groupdict()

                        if all(True for k in pdict if k not in info["pattern"]):
                            info["pattern"].update(pdict)

                    except Exception:
                        continue

                    else:
                        info["status"] = 2

        if "N" in info["pattern"]:
            name = replace_patterns(info["pattern"]["N"], cls.NAME_REPLACEMENTS)
            info["name"] = parse_name(name)

        if "S" in info["pattern"]:
            size = replace_patterns(
                info["pattern"]["S"] + info["pattern"]["U"]
                if "U" in info["pattern"]
                else info["pattern"]["S"],
                cls.SIZE_REPLACEMENTS,
            )
            info["size"] = parse_size(size)

        elif isinstance(info["size"], str):
            unit = info["units"] if "units" in info else ""
            info["size"] = parse_size(info["size"], unit)

        if "H" in info["pattern"]:
            hash_type = info["pattern"]["H"].strip("-").upper()
            info["hash"][hash_type] = info["pattern"]["D"]

        return info
示例#16
0
文件: Ftp.py 项目: tboh002/pyload
    def process(self, pyfile):
        p_url = urllib.parse.urlparse(pyfile.url)
        netloc = p_url.netloc

        pyfile.name = parse_name(p_url.path.rpartition("/")[2])

        if "@" not in netloc:
            # TODO: Recheck in 0.6.x
            if self.account:
                servers = [x["login"] for x in self.account.get_all_accounts()]
            else:
                servers = []

            if netloc in servers:
                self.log_debug(f"Logging on to {netloc}")
                self.req.add_auth(self.account.get_login("password"))

            else:
                pwd = self.get_password()
                if ":" in pwd:
                    self.log_debug(f"Logging on to {netloc}")
                    self.req.add_auth(pwd)
                else:
                    self.log_debug("Using anonymous logon")

        try:
            headers = self.load(pyfile.url, just_header=True)

        except pycurl.error as exc:
            if "530" in exc.args[1]:
                self.fail(self._("Authorization required"))
            else:
                self.fail(self._("Error {}: {}").format(exc.args))

        self.req.http.c.setopt(pycurl.NOBODY, 0)
        self.log_debug(self.req.http.header)

        if "content-length" in headers:
            pyfile.size = headers.get("content-length")
            self.download(pyfile.url)

        else:
            #: Naive ftp directory listing
            if re.search(r'^25\d.*?"', self.req.http.header, re.M):
                pyfile.url = pyfile.url.rstrip("/")
                pkgname = "/".join([
                    pyfile.package().name,
                    urllib.parse.urlparse(pyfile.url).path.rpartition("/")[2],
                ])

                pyfile.url += "/"

                self.req.http.c.setopt(48, 1)  #: CURLOPT_DIRLISTONLY
                res = self.load(pyfile.url, decode=False)

                links = [pyfile.url + x for x in res.splitlines()]
                self.log_debug("LINKS", links)

                self.pyload.api.add_package(pkgname, links)

            else:
                self.fail(self._("Unexpected server response"))