示例#1
0
    def parse_domains(self, list):
        _re = re.compile(
            r"^(?:https?://)?(?:www\.)?(?:\w+\.)*((?:(?:\d{1,3}\.){3}\d{1,3}|[\w\-^_]{3,63}(?:\.[a-zA-Z]{2,}){1,2})(?:\:\d+)?)",
            re.I | re.U,
        )

        domains = [
            decode(domain).strip().lower() for url in list
            for domain in _re.findall(url)
        ]

        return self.replace_domains(uniqify(domains))
示例#2
0
    def parse_domains(self, list):
        _re = re.compile(
            r"^(?:https?://)?(?:www\.)?(?:\w+\.)*((?:(?:\d{1,3}\.){3}\d{1,3}|[\w\-^_]{3,63}(?:\.[a-zA-Z]{2,}){1,2})(?:\:\d+)?)",
            re.I | re.U,
        )

        domains = [
            decode(domain).strip().lower()
            for url in list
            for domain in _re.findall(url)
        ]

        return self.replace_domains(uniqify(domains))
示例#3
0
    def add_password(self, password):
        """
        Adds a password to saved list.
        """
        try:
            self.passwords = uniqify([password] + self.passwords)

            file = os.fsdecode(self.config.get("passwordfile"))
            with open(file, mode="wb") as file:
                for pw in self.passwords:
                    file.write(pw + "\n")

        except IOError as exc:
            self.log_error(exc)
示例#4
0
    def add_password(self, password):
        """
        Adds a password to saved list.
        """
        try:
            self.passwords = uniqify([password] + self.passwords)

            file = os.fsdecode(self.config.get("passwordfile"))
            with open(file, mode="wb") as file:
                for pw in self.passwords:
                    file.write(pw + "\n")

        except IOError as exc:
            self.log_error(exc)
示例#5
0
    def get_links(self):
        """
        Extract embedded links from HTML // then check if there are further images which
        will be lazy-loaded.
        """
        def f(url):
            return "http://" + re.sub(r"(\w{7})s\.", r"\1.", url)

        direct_links = uniqify(
            f(x) for x in re.findall(self.LINK_PATTERN, self.data))

        # Imgur Galleryies may contain more images than initially shown. Find
        # the rest now!
        try:
            indirect_links = self.get_indirect_links(direct_links)
            self.log_debug(f"Found {len(indirect_links)} additional links")

        except (TypeError, KeyError, ValueError) as exc:
            # Fail gracefull as we already had some success
            self.log_error(
                self._("Processing of additional links unsuccessful - {}: {}").
                format(type(exc).__name__, exc))
            indirect_links = []

        # Check if all images were found and inform the user
        num_images_found = len(direct_links) + len(indirect_links)
        if num_images_found < self.total_num_images:
            self.log_error(
                self._(
                    "Could not save all images of this gallery: {}/{}").format(
                        num_images_found, self.total_num_images))

        # If we could extract a name, use this to create a specific package
        if self.gallery_name:
            self.packages.append(
                (self.gallery_name, direct_links + indirect_links,
                 self.gallery_name))
            return []

        else:
            return direct_links + indirect_links
示例#6
0
    def _extract(self, pyfile, archive, password):
        name = os.path.basename(archive.filename)

        pyfile.set_status("processing")

        encrypted = False
        try:
            self.log_debug(f"Password: {password or None}")
            passwords = (
                uniqify([password] + self.get_passwords(False))
                if self.config.get("usepasswordfile")
                else [password]
            )

            for pw in passwords:
                try:
                    pyfile.set_custom_status(self._("archive testing"))
                    pyfile.set_progress(0)
                    archive.verify(pw)
                    pyfile.set_progress(100)

                except PasswordError:
                    if not encrypted:
                        self.log_info(name, self._("Password protected"))
                        encrypted = True

                except CRCError as exc:
                    self.log_debug(name, exc)
                    self.log_info(name, self._("CRC Error"))

                    if not self.repair:
                        raise CRCError("Archive damaged")

                    else:
                        self.log_warning(name, self._("Repairing..."))
                        pyfile.set_custom_status(self._("archive repairing"))
                        pyfile.set_progress(0)
                        repaired = archive.repair()
                        pyfile.set_progress(100)

                        if not repaired and not self.config.get("keepbroken"):
                            raise CRCError("Archive damaged")

                        else:
                            self.add_password(pw)
                            break

                except ArchiveError as exc:
                    raise ArchiveError(exc)

                else:
                    self.add_password(pw)
                    break

            pyfile.set_custom_status(self._("archive extracting"))
            pyfile.set_progress(0)

            if not encrypted or not self.config.get("usepasswordfile"):
                self.log_debug(
                    "Extracting using password: {}".format(password or "None")
                )
                archive.extract(password)
            else:
                for pw in [
                    f for f in uniqify([password] + self.get_passwords(False)) if f
                ]:
                    try:
                        self.log_debug(f"Extracting using password: {pw}")

                        archive.extract(pw)
                        self.add_password(pw)
                        break

                    except PasswordError:
                        self.log_debug("Password was wrong")
                else:
                    raise PasswordError

            pyfile.set_progress(100)
            pyfile.set_status("processing")

            extracted_files = archive.files or archive.list()

            delfiles = archive.chunks()
            self.log_debug("Would delete: " + ", ".join(delfiles))

            if self.config.get("delete"):
                self.log_info(self._("Deleting {} files").format(len(delfiles)))

                deltotrash = self.config.get("deltotrash")
                for f in delfiles:
                    file = os.fsdecode(f)
                    if not exists(file):
                        continue

                    if not deltotrash:
                        os.remove(file)

                    else:
                        try:
                            send2trash.send2trash(file)

                        except AttributeError:
                            self.log_warning(
                                self._("Unable to move {} to trash").format(
                                    os.path.basename(f)
                                ),
                                self._("Send2Trash lib not found"),
                            )

                        except Exception as exc:
                            self.log_warning(
                                self._("Unable to move {} to trash").format(
                                    os.path.basename(f)
                                ),
                                exc,
                            )

                        else:
                            self.log_info(
                                self._("Moved {} to trash").format(os.path.basename(f))
                            )

            self.log_info(name, self._("Extracting finished"))

            return extracted_files

        except PasswordError:
            self.log_error(
                name, self._("Wrong password" if password else "No password found")
            )

        except CRCError as exc:
            self.log_error(name, self._("CRC mismatch"), exc)

        except ArchiveError as exc:
            self.log_error(name, self._("Archive error"), exc)

        except Exception as exc:
            self.log_error(name, self._("Unknown error"), exc)

        self.m.dispatch_event("archive_extract_failed", pyfile, archive)

        raise Exception(self._("Extract failed"))
示例#7
0
    def extract(
        self, ids, thread=None
    ):  # TODO: Use pypack, not pid to improve method usability
        if not ids:
            return False

        extracted = []
        failed = []

        def to_list(value):
            return value.replace(" ", "").replace(",", "|").replace(";", "|").split("|")

        destination = self.config.get("destination")
        subfolder = self.config.get("subfolder")
        fullpath = self.config.get("fullpath")
        overwrite = self.config.get("overwrite")
        priority = self.config.get("priority")
        recursive = self.config.get("recursive")
        keepbroken = self.config.get("keepbroken")

        extensions = [
            x.lstrip(".").lower() for x in to_list(self.config.get("extensions"))
        ]
        excludefiles = to_list(self.config.get("excludefiles"))

        if extensions:
            self.log_debug(f"Use for extensions: .{'|.'.join(extensions)}")

        #: Reload from txt file
        self.reload_passwords()

        dl_folder = self.pyload.config.get("general", "storage_folder")

        #: Iterate packages -> extractors -> targets
        for pid in ids:
            pypack = self.pyload.files.get_package(pid)

            if not pypack:
                self.queue.remove(pid)
                continue

            self.log_info(self._("Check package: {}").format(pypack.name))

            pack_dl_folder = os.path.join(
                dl_folder, pypack.folder, ""
            )  #: Force trailing slash

            #: Determine output folder
            extract_folder = os.path.join(
                pack_dl_folder, destination, ""
            )  #: Force trailing slash

            if subfolder:
                extract_folder = os.path.join(
                    extract_folder,
                    pypack.folder or safename(pypack.name.replace("http://", "")),
                )

            os.makedirs(extract_folder, exist_ok=True)
            if subfolder:
                self.set_permissions(extract_folder)

            matched = False
            success = True
            files_ids = list(
                {
                    fdata["name"]: (
                        fdata["id"],
                        (os.path.join(pack_dl_folder, fdata["name"])),
                        extract_folder,
                    )
                    for fdata in pypack.get_children().values()
                }.values()
            )  #: : Remove duplicates

            #: Check as long there are unseen files
            while files_ids:
                new_files_ids = []

                if extensions:  #: Include only specified archive types
                    files_ids = [
                        file_id
                        for file_id in files_ids
                        if any(
                            [
                                Extractor.archivetype(file_id[1]) in extensions
                                for Extractor in self.extractors
                            ]
                        )
                    ]

                #: Sort by filename to ensure (or at least try) that a multivolume archive is targeted by its first part
                #: This is important because, for example, UnRar ignores preceding parts in listing mode
                files_ids.sort(key=lambda file_id: file_id[1])

                for Extractor in self.extractors:
                    targets = Extractor.get_targets(files_ids)
                    if targets:
                        self.log_debug(
                            "Targets for {}: {}".format(Extractor.__name__, targets)
                        )
                        matched = True

                        for fid, fname, fout in targets:
                            name = os.path.basename(fname)

                            if not exists(fname):
                                self.log_debug(name, "File not found")
                                continue

                            self.log_info(name, self._("Extract to: {}").format(fout))
                            try:
                                pyfile = self.pyload.files.get_file(fid)
                                archive = Extractor(
                                    pyfile,
                                    fname,
                                    fout,
                                    fullpath,
                                    overwrite,
                                    excludefiles,
                                    priority,
                                    keepbroken,
                                )

                                thread.add_active(pyfile)
                                archive.init()

                                #: Save for removal from file processing list, which happens after deletion.
                                #: So archive.chunks() would just return an empty list.
                                chunks = archive.chunks()

                                try:
                                    new_files = self._extract(
                                        pyfile, archive, pypack.password
                                    )

                                finally:
                                    pyfile.set_progress(100)
                                    thread.finish_file(pyfile)

                            except Exception as exc:
                                self.log_error(name, exc)
                                success = False
                                continue

                            #: Remove processed file and related multiparts from list
                            files_ids = [
                                (fid, fname, fout)
                                for fid, fname, fout in files_ids
                                if fname not in chunks
                            ]
                            self.log_debug(f"Extracted files: {new_files}")

                            new_folders = uniqify(os.path.dirname(f) for f in new_files)
                            for foldername in new_folders:
                                self.set_permissions(
                                    os.path.join(extract_folder, foldername)
                                )

                            for filename in new_files:
                                self.set_permissions(
                                    os.path.join(extract_folder, filename)
                                )

                            for filename in new_files:
                                file = os.fsdecode(
                                    os.path.join(
                                        os.path.dirname(archive.filename), filename
                                    )
                                )
                                if not exists(file):
                                    self.log_debug(
                                        "New file {} does not exists".format(filename)
                                    )
                                    continue

                                if recursive and os.path.isfile(file):
                                    new_files_ids.append(
                                        (fid, filename, os.path.dirname(filename))
                                    )  #: Append as new target

                            self.m.dispatch_event("archive_extracted", pyfile, archive)

                files_ids = new_files_ids  #: Also check extracted files

            if matched:
                if success:
                    #: Delete empty pack folder if extract_folder resides outside download folder
                    if self.config.get("delete") and self.pyload.config.get(
                        "general", "folder_per_package"
                    ):
                        if not extract_folder.startswith(pack_dl_folder):
                            if len(os.listdir(pack_dl_folder)) == 0:
                                try:
                                    os.rmdir(pack_dl_folder)
                                    self.log_debug(
                                        "Successfully deleted pack folder {}".format(
                                            pack_dl_folder
                                        )
                                    )

                                except OSError:
                                    self.log_warning(
                                        "Unable to delete pack folder {}".format(
                                            pack_dl_folder
                                        )
                                    )

                            else:
                                self.log_warning(
                                    "Not deleting pack folder {}, folder not empty".format(
                                        pack_dl_folder
                                    )
                                )

                    extracted.append(pid)
                    self.m.dispatch_event("package_extracted", pypack)

                else:
                    failed.append(pid)
                    self.m.dispatch_event("package_extract_failed", pypack)

                    self.failed.add(pid)
            else:
                self.log_info(self._("No files found to extract"))

            if not matched or not success and subfolder:
                try:
                    os.rmdir(extract_folder)

                except OSError:
                    pass

            self.queue.remove(pid)

        return True if not failed else False
示例#8
0
    def _extract(self, pyfile, archive, password):
        name = os.path.basename(archive.filename)

        pyfile.set_status("processing")

        encrypted = False
        try:
            self.log_debug(f"Password: {password or None}")
            passwords = (uniqify([password] + self.get_passwords(False))
                         if self.config.get("usepasswordfile") else [password])

            for pw in passwords:
                try:
                    pyfile.set_custom_status(self._("archive testing"))
                    pyfile.set_progress(0)
                    archive.verify(pw)
                    pyfile.set_progress(100)

                except PasswordError:
                    if not encrypted:
                        self.log_info(name, self._("Password protected"))
                        encrypted = True

                except CRCError as exc:
                    self.log_debug(name, exc)
                    self.log_info(name, self._("CRC Error"))

                    if not self.repair:
                        raise CRCError("Archive damaged")

                    else:
                        self.log_warning(name, self._("Repairing..."))
                        pyfile.set_custom_status(self._("archive repairing"))
                        pyfile.set_progress(0)
                        repaired = archive.repair()
                        pyfile.set_progress(100)

                        if not repaired and not self.config.get("keepbroken"):
                            raise CRCError("Archive damaged")

                        else:
                            self.add_password(pw)
                            break

                except ArchiveError as exc:
                    raise ArchiveError(exc)

                else:
                    self.add_password(pw)
                    break

            pyfile.set_custom_status(self._("archive extracting"))
            pyfile.set_progress(0)

            if not encrypted or not self.config.get("usepasswordfile"):
                self.log_debug("Extracting using password: {}".format(
                    password or "None"))
                archive.extract(password)
            else:
                for pw in [
                        f for f in uniqify([password] +
                                           self.get_passwords(False)) if f
                ]:
                    try:
                        self.log_debug(f"Extracting using password: {pw}")

                        archive.extract(pw)
                        self.add_password(pw)
                        break

                    except PasswordError:
                        self.log_debug("Password was wrong")
                else:
                    raise PasswordError

            pyfile.set_progress(100)
            pyfile.set_status("processing")

            extracted_files = archive.files or archive.list()

            delfiles = archive.chunks()
            self.log_debug("Would delete: " + ", ".join(delfiles))

            if self.config.get("delete"):
                self.log_info(
                    self._("Deleting {} files").format(len(delfiles)))

                deltotrash = self.config.get("deltotrash")
                for f in delfiles:
                    file = os.fsdecode(f)
                    if not exists(file):
                        continue

                    if not deltotrash:
                        os.remove(file)

                    else:
                        try:
                            send2trash.send2trash(file)

                        except AttributeError:
                            self.log_warning(
                                self._("Unable to move {} to trash").format(
                                    os.path.basename(f)),
                                self._("Send2Trash lib not found"),
                            )

                        except Exception as exc:
                            self.log_warning(
                                self._("Unable to move {} to trash").format(
                                    os.path.basename(f)),
                                exc,
                            )

                        else:
                            self.log_info(
                                self._("Moved {} to trash").format(
                                    os.path.basename(f)))

            self.log_info(name, self._("Extracting finished"))

            return extracted_files

        except PasswordError:
            self.log_error(
                name,
                self._("Wrong password" if password else "No password found"))

        except CRCError as exc:
            self.log_error(name, self._("CRC mismatch"), exc)

        except ArchiveError as exc:
            self.log_error(name, self._("Archive error"), exc)

        except Exception as exc:
            self.log_error(name, self._("Unknown error"), exc)

        self.m.dispatch_event("archive_extract_failed", pyfile, archive)

        raise Exception(self._("Extract failed"))
示例#9
0
    def extract(self,
                ids,
                thread=None
                ):  # TODO: Use pypack, not pid to improve method usability
        if not ids:
            return False

        extracted = []
        failed = []

        def to_list(value):
            return value.replace(" ", "").replace(",",
                                                  "|").replace(";",
                                                               "|").split("|")

        destination = self.config.get("destination")
        subfolder = self.config.get("subfolder")
        fullpath = self.config.get("fullpath")
        overwrite = self.config.get("overwrite")
        priority = self.config.get("priority")
        recursive = self.config.get("recursive")
        keepbroken = self.config.get("keepbroken")

        extensions = [
            x.lstrip(".").lower()
            for x in to_list(self.config.get("extensions"))
        ]
        excludefiles = to_list(self.config.get("excludefiles"))

        if extensions:
            self.log_debug(f"Use for extensions: .{'|.'.join(extensions)}")

        #: Reload from txt file
        self.reload_passwords()

        dl_folder = self.pyload.config.get("general", "storage_folder")

        #: Iterate packages -> extractors -> targets
        for pid in ids:
            pypack = self.pyload.files.get_package(pid)

            if not pypack:
                self.queue.remove(pid)
                continue

            self.log_info(self._("Check package: {}").format(pypack.name))

            pack_dl_folder = os.path.join(dl_folder, pypack.folder,
                                          "")  #: Force trailing slash

            #: Determine output folder
            extract_folder = os.path.join(pack_dl_folder, destination,
                                          "")  #: Force trailing slash

            if subfolder:
                extract_folder = os.path.join(
                    extract_folder,
                    pypack.folder
                    or safename(pypack.name.replace("http://", "")),
                )

            os.makedirs(extract_folder, exist_ok=True)
            if subfolder:
                self.set_permissions(extract_folder)

            matched = False
            success = True
            files_ids = list({
                fdata["name"]: (
                    fdata["id"],
                    (os.path.join(pack_dl_folder, fdata["name"])),
                    extract_folder,
                )
                for fdata in pypack.get_children().values()
            }.values())  #: : Remove duplicates

            #: Check as long there are unseen files
            while files_ids:
                new_files_ids = []

                if extensions:  #: Include only specified archive types
                    files_ids = [
                        file_id for file_id in files_ids if any([
                            Extractor.archivetype(file_id[1]) in extensions
                            for Extractor in self.extractors
                        ])
                    ]

                #: Sort by filename to ensure (or at least try) that a multivolume archive is targeted by its first part
                #: This is important because, for example, UnRar ignores preceding parts in listing mode
                files_ids.sort(key=lambda file_id: file_id[1])

                for Extractor in self.extractors:
                    targets = Extractor.get_targets(files_ids)
                    if targets:
                        self.log_debug("Targets for {}: {}".format(
                            Extractor.__name__, targets))
                        matched = True

                        for fid, fname, fout in targets:
                            name = os.path.basename(fname)

                            if not exists(fname):
                                self.log_debug(name, "File not found")
                                continue

                            self.log_info(
                                name,
                                self._("Extract to: {}").format(fout))
                            try:
                                pyfile = self.pyload.files.get_file(fid)
                                archive = Extractor(
                                    pyfile,
                                    fname,
                                    fout,
                                    fullpath,
                                    overwrite,
                                    excludefiles,
                                    priority,
                                    keepbroken,
                                )

                                thread.add_active(pyfile)
                                archive.init()

                                #: Save for removal from file processing list, which happens after deletion.
                                #: So archive.chunks() would just return an empty list.
                                chunks = archive.chunks()

                                try:
                                    new_files = self._extract(
                                        pyfile, archive, pypack.password)

                                finally:
                                    pyfile.set_progress(100)
                                    thread.finish_file(pyfile)

                            except Exception as exc:
                                self.log_error(name, exc)
                                success = False
                                continue

                            #: Remove processed file and related multiparts from list
                            files_ids = [(fid, fname, fout)
                                         for fid, fname, fout in files_ids
                                         if fname not in chunks]
                            self.log_debug(f"Extracted files: {new_files}")

                            new_folders = uniqify(
                                os.path.dirname(f) for f in new_files)
                            for foldername in new_folders:
                                self.set_permissions(
                                    os.path.join(extract_folder, foldername))

                            for filename in new_files:
                                self.set_permissions(
                                    os.path.join(extract_folder, filename))

                            for filename in new_files:
                                file = os.fsdecode(
                                    os.path.join(
                                        os.path.dirname(archive.filename),
                                        filename))
                                if not exists(file):
                                    self.log_debug(
                                        "New file {} does not exists".format(
                                            filename))
                                    continue

                                if recursive and os.path.isfile(file):
                                    new_files_ids.append(
                                        (fid, filename,
                                         os.path.dirname(filename)
                                         ))  #: Append as new target

                            self.m.dispatch_event("archive_extracted", pyfile,
                                                  archive)

                files_ids = new_files_ids  #: Also check extracted files

            if matched:
                if success:
                    #: Delete empty pack folder if extract_folder resides outside download folder
                    if self.config.get("delete") and self.pyload.config.get(
                            "general", "folder_per_package"):
                        if not extract_folder.startswith(pack_dl_folder):
                            if len(os.listdir(pack_dl_folder)) == 0:
                                try:
                                    os.rmdir(pack_dl_folder)
                                    self.log_debug(
                                        "Successfully deleted pack folder {}".
                                        format(pack_dl_folder))

                                except OSError:
                                    self.log_warning(
                                        "Unable to delete pack folder {}".
                                        format(pack_dl_folder))

                            else:
                                self.log_warning(
                                    "Not deleting pack folder {}, folder not empty"
                                    .format(pack_dl_folder))

                    extracted.append(pid)
                    self.m.dispatch_event("package_extracted", pypack)

                else:
                    failed.append(pid)
                    self.m.dispatch_event("package_extract_failed", pypack)

                    self.failed.add(pid)
            else:
                self.log_info(self._("No files found to extract"))

            if not matched or not success and subfolder:
                try:
                    os.rmdir(extract_folder)

                except OSError:
                    pass

            self.queue.remove(pid)

        return True if not failed else False