Exemplo n.º 1
0
    def _InternallyExtractChapter(
            self, URL: str,
            soup: Optional[BeautifulSoup]) -> Optional[Chapter]:

        ##
        #
        # Extracts specific chapter.
        #
        # @param URL  The URL of the page containing the chapter.
        # @param soup The tag soup of the page containing the chapter.
        #
        # @return **True** if the chapter is extracted correctly, **False** otherwise.
        #
        ##

        contentElements = soup.select("p")

        contentElements[0].decompose()
        contentElements[1].decompose()

        contentElements[-1].decompose()
        contentElements[-2].decompose()
        contentElements[-3].decompose()

        return Chapter(title=None, content=Stringify(soup))
    def _InternallyExtractChapter(
        self,
        URL: str,
        soup: Optional[BeautifulSoup]
    ) -> Optional[Chapter]:

        ##
        #
        # Extracts specific chapter.
        #
        # @param URL  The URL of the page containing the chapter.
        # @param soup The tag soup of the page containing the chapter.
        #
        # @return **True** if the chapter is extracted correctly, **False** otherwise.
        #
        ##

        # Locate relevant page elements.

        titleElement = soup.select_one("div#chapter-outer > div.caption > div > h4")
        # No error-checking here. Not sure if every chapter has to have a title on WW.

        contentElement = soup.select_one("div#chapter-content")
        if not contentElement:
            logging.error("Content element not found.")
            return False

        # Return.

        return Chapter(
            titleElement.get_text().strip() if titleElement else "",
            Stringify(contentElement.encode_contents())
        )
Exemplo n.º 3
0
    def _InternallyExtractChapter(
            self, URL: str,
            soup: Optional[BeautifulSoup]) -> Optional[Chapter]:

        ##
        #
        # Extracts specific chapter.
        #
        # @param URL  The URL of the page containing the chapter.
        # @param soup The tag soup of the page containing the chapter.
        #
        # @return **True** if the chapter is extracted correctly, **False** otherwise.
        #
        ##

        # Extract the title.

        title = None

        titleElement = soup.select_one("p.highlighted-image__title > a")
        if titleElement:
            title = titleElement.get_text().strip()

        # Extract the content.

        contentElement = soup.select_one("div.storytext-container")
        if not contentElement:
            logging.error("Could find the content element.")
            return None

        # Return.

        return Chapter(title=title,
                       content=Stringify(contentElement.encode_contents()))
Exemplo n.º 4
0
    def ExtractChapter(self, index: int) -> Optional[Chapter]:

        ##
        #
        # Extracts specific chapter.
        #
        # @param index The index of the chapter to be extracted.
        #
        # @return **True** if the chapter is extracted correctly, **False** otherwise.
        #
        ##

        if 1 == self.Story.Metadata.ChapterCount:

            titleElement = None

            contentElement = self._storySoup.select_one(
                "div#chapters div.userstuff")
            if not contentElement:
                logging.error("Content element not found.")
                return None

            if (landmarkElement := contentElement.select_one("h3#work")):
                landmarkElement.decompose()

            return Chapter(title=titleElement.get_text().strip()
                           if titleElement else None,
                           content=Stringify(contentElement.encode_contents()))
Exemplo n.º 5
0
    def _InternallyExtractChapter(
        self,
        URL: str,
        soup: Optional[BeautifulSoup]
    ) -> Optional[Chapter]:

        ##
        #
        # Extracts specific chapter.
        #
        # @param URL  The URL of the page containing the chapter.
        # @param soup The tag soup of the page containing the chapter.
        #
        # @return **True** if the chapter is extracted correctly, **False** otherwise.
        #
        ##

        # Locate relevant page elements.

        titleElement = soup.select_one("h2#quizSubtitle")
        if not titleElement:
            logging.error("Title element not found.")
            return False

        contentElement = soup.select_one("#rescontent")
        if not contentElement:
            logging.error("Content element not found.")
            return False

        # Return.

        return Chapter(
            titleElement.get_text().strip(),
            Stringify(contentElement.encode_contents())
        )
    def _InternallyExtractChapter(
            self, URL: str,
            soup: Optional[BeautifulSoup]) -> Optional[Chapter]:

        ##
        #
        # Extracts specific chapter.
        #
        # @param URL  The URL of the page containing the chapter.
        # @param soup The tag soup of the page containing the chapter.
        #
        # @return **True** if the chapter is extracted correctly, **False** otherwise.
        #
        ##

        # Extract the content.

        contentElement = soup.select_one("div#story")
        if not contentElement:
            logging.error("Couldn't find the content element.")
            return None

        # Return.

        return Chapter(title=self._chapterTitles[URL] if
                       (URL in self._chapterTitles) else None,
                       content=Stringify(contentElement.encode_contents()))
Exemplo n.º 7
0
    def _InternallyExtractChapter(
            self, URL: str,
            soup: Optional[BeautifulSoup]) -> Optional[Chapter]:

        ##
        #
        # Extracts specific chapter.
        #
        # @param URL  The URL of the page containing the chapter.
        # @param soup The tag soup of the page containing the chapter.
        #
        # @return **True** if the chapter is extracted correctly, **False** otherwise.
        #
        ##

        # Read the chapter.

        chapterText = self._webSession.Get(URL, textEncoding="ascii")
        if not chapterText:
            logging.error("Failed to download a chapter.")
            return False

        chapterText = chapterText.splitlines()
        if len(chapterText) < 4:
            logging.error("Invalid chapter format.")
            return False

        chapterText = chapterText[3:]

        # Format the content.

        chapterCode = ""
        currentParagraphCode = ""

        for line in chapterText:

            if not line:

                chapterCode += f"<p>{currentParagraphCode}</p>"
                currentParagraphCode = ""

            else:

                currentParagraphCode += f" {line.strip()}"

        # Return.

        return Chapter(content=chapterCode)
Exemplo n.º 8
0
    def ExtractChapter(self, index: int) -> Optional[Chapter]:

        ##
        #
        # Extracts specific chapter.
        #
        # @param index The index of the chapter to be extracted.
        #
        # @return **True** if the chapter is extracted correctly, **False** otherwise.
        #
        ##

        if (not self._chapters) or (index < 1):
            return None

        return Chapter(content="".join(self._chapters[index - 1]))
Exemplo n.º 9
0
    def _InternallyExtractChapter(
            self, URL: str,
            soup: Optional[BeautifulSoup]) -> Optional[Chapter]:

        ##
        #
        # Extracts specific chapter.
        #
        # @param URL  The URL of the page containing the chapter.
        # @param soup The tag soup of the page containing the chapter.
        #
        # @return **True** if the chapter is extracted correctly, **False** otherwise.
        #
        ##

        rowElements = soup.select("div#contentdata > table > tr")
        if (not rowElements) or len(rowElements) < 3:
            logging.error("Chapter page doesn't conform to expected format.")

        return Chapter(title=None,
                       content=Stringify(rowElements[2].encode_contents()))
Exemplo n.º 10
0
    def _ProcessURL(self, URL: str) -> Optional[Story]:

        ##
        #
        # Processes a URL, in text mode.
        #
        # @param URL The URL to be processed.
        #
        # @return The Story object if the URL has been processed successfully, **None** otherwise.
        #
        ##

        # Locate a working extractor.

        self._interface.Process("Creating the extractor...", section=True)

        extractor = CreateExtractor(URL)
        if not extractor:
            logging.error("No matching extractor found.")
            return None

        self._interface.Comment(
            f'Extractor created: "{type(extractor).__name__}".')

        # Authenticate the user (if supported by the extractor).

        if self._arguments.Authenticate and extractor.SupportsAuthentication():

            self._interface.Process("Logging-in...", section=True)

            authenticationResult = extractor.Authenticate(self._interface)

            if Extractor.AuthenticationResult.FAILURE == authenticationResult:
                self._interface.Error("Failed to authenticate.")
            elif Extractor.AuthenticationResult.ABANDONED == authenticationResult:
                self._interface.Comment("Proceeding without logging-in...")
            else:
                self._interface.Comment("Authenticated successfully.")

        # Scan the story.

        self._interface.Process("Scanning the story...", section=True)

        if not extractor.ScanStory():
            logging.error("Failed to scan the story.")
            return None

        self._PrintMetadata(extractor.Story)

        # Check whether the output files already exist.

        outputFilePaths = self._GetOutputPaths(self._arguments.Output,
                                               extractor.Story)

        if (not self._arguments.Force) and all(
                x.is_file() for x in outputFilePaths.values()):
            self._interface.Comment("This story has been downloaded already.",
                                    section=True)
            return True

        elif self._arguments.Force:
            [x.unlink() for x in outputFilePaths.values() if x.is_file()]

        # Extract content.

        self._interface.Process("Extracting content...", section=True)

        for index in range(1, extractor.Story.Metadata.ChapterCount + 1):

            # Generate cache identifiers.

            cacheOwnerName = extractor.Story.Metadata.URL
            cacheTitleName = f"{index}-Title"
            cacheContentName = f"{index}-Content"

            # Retrieve chapter data, either from cache or by downloading it.

            retrievedFromCache = False

            chapter = Chapter(title=Stringify(
                self._cache.RetrieveItem(cacheOwnerName, cacheTitleName)),
                              content=Stringify(
                                  self._cache.RetrieveItem(
                                      cacheOwnerName, cacheContentName)))

            if chapter:

                retrievedFromCache = True

            else:

                chapter = extractor.ExtractChapter(index)

                if not chapter:

                    if (1 != index) and (extractor.Story.Metadata.ChapterCount
                                         != index):
                        logging.error("Failed to extract story content.")
                        return None

                    else:
                        self._interface.Error(
                            "Failed to extract the last chapter - it doesn't seem to exist."
                        )
                        continue

            extractor.Story.Chapters.append(chapter)

            # Add the chapter to cache.

            if not retrievedFromCache:
                self._cache.AddItem(cacheOwnerName, cacheTitleName,
                                    chapter.Title)
                self._cache.AddItem(cacheOwnerName, cacheContentName,
                                    chapter.Content)

            # Notify the user, then sleep for a while.

            self._interface.ProgressBar(
                index, extractor.Story.Metadata.ChapterCount,
                Configuration.ProgressBarLength,
                f"# Extracted chapter {index}/{extractor.Story.Metadata.ChapterCount}",
                True)

            if extractor.Story.Metadata.ChapterCount == index:
                self._interface.EmptyLine()

            if not retrievedFromCache and extractor.RequiresBreaksBetweenRequests(
            ):
                sleep(Configuration.PostChapterSleepTime)

        # Locate and download images.

        if self._arguments.Images:

            self._interface.Process("Downloading images...", section=True)

            # Locate the images.

            for chapter in extractor.Story.Chapters:
                extractor.Story.Images.extend(FindImagesInCode(
                    chapter.Content))

            storySiteURL = GetSiteURL(extractor.Story.Metadata.URL)
            for image in extractor.Story.Images:
                image.URL = MakeURLAbsolute(image.URL, storySiteURL)

            self._interface.Comment(
                f"Found {len(extractor.Story.Images)} image(s).")

            # Download them.

            if extractor.Story.Images:

                imageCount = len(extractor.Story.Images)
                downloadedImageCount = 0

                previousImageFailedToDownload = False

                for index, image in enumerate(extractor.Story.Images, start=1):

                    retrievedFromCache = False
                    imageData = self._cache.RetrieveItem(
                        extractor.Story.Metadata.URL, image.URL)

                    if not image.CreateFromData(
                            imageData, Configuration.MaximumImageSideLength):

                        imageData = extractor.ExtractMedia(image.URL)

                        if imageData:
                            image.CreateFromData(
                                imageData,
                                Configuration.MaximumImageSideLength)

                    else:

                        retrievedFromCache = True

                    if image:

                        if not retrievedFromCache:
                            self._cache.AddItem(extractor.Story.Metadata.URL,
                                                image.URL, image.Data)

                        self._interface.ProgressBar(
                            index, imageCount, Configuration.ProgressBarLength,
                            f"# Downloaded image {index}/{imageCount}", True)

                        if imageCount == index:
                            print()

                        downloadedImageCount += 1
                        previousImageFailedToDownload = False

                    else:

                        if (index > 1) and (not previousImageFailedToDownload):
                            print()

                        errorMessage =                                                       \
                            f'Failed to download image {index}/{imageCount}: "{image.URL}".' \
                            if not imageData else                                            \
                            f'Failed to process/re-encode image {index}/{imageCount}: "{image.URL}".'

                        self._interface.Error(errorMessage)

                        previousImageFailedToDownload = True

                self._interface.Comment(
                    f"Successfully downloaded {downloadedImageCount}/{imageCount} image(s)."
                )

        # Process content.

        self._interface.Process("Processing content...", section=True)

        extractor.Story.Process()

        for index, chapter in enumerate(extractor.Story.Chapters, start=1):

            # Store original content.

            if self._arguments.Debug:

                fileName = GetSanitizedFileName(f"{index} - Original.html")
                fileSubdirectoryName = GetSanitizedFileName(
                    extractor.Story.Metadata.Title)

                WriteTextFile(
                    Configuration.DebugDirectoryPath / fileSubdirectoryName /
                    fileName, chapter.Content)

            # The sanitizer is used twice - once before any other processing, once after every other
            # processor. The first time is required to clean up the story (remove empty tags and tag
            # trees, for example), the second to guarantee that the story is actually sanitized.

            chapter.Content = SanitizerProcessor().Process(chapter.Content)
            chapter.Content = TypographyProcessor().Process(chapter.Content)
            chapter.Content = SanitizerProcessor().Process(chapter.Content)

            # Store processed content.

            if self._arguments.Debug:

                fileName = GetSanitizedFileName(f"{index} - Processed.html")
                fileSubdirectoryName = GetSanitizedFileName(
                    extractor.Story.Metadata.Title)

                WriteTextFile(
                    Configuration.DebugDirectoryPath / fileSubdirectoryName /
                    fileName, chapter.Content)

        if not extractor.Story.Metadata.WordCount:
            extractor.Story.Metadata.WordCount = extractor.Story.CalculateWordCount(
            )

        self._interface.Comment("Content processed.")

        # Return.

        return extractor.Story
            logging.error("Content element not found.")
            return None

        if (unwantedElement :=
                contentElement.select_one("span.rt-reading-time")):
            unwantedElement.replaceWith("")

        if (unwantedElement :=
                contentElement.select_one("div.wpcm-subscribe")):
            unwantedElement.replaceWith("")

        if (unwantedElement := contentElement.select_one("rating-form")):
            unwantedElement.replaceWith("")

        return Chapter(title=SeparateSubtitle(
            self._CleanStoryTitle(titleElement.get_text().strip())),
                       content=Stringify(contentElement.encode_contents()))

    def _FindAllStoriesByAuthor(self, authorName: str):

        # Download author's page.

        authorsPageURL = f"https://najlepszaerotyka.com.pl/author/{authorName}/"

        soup = self._webSession.GetSoup(authorsPageURL)
        if not soup:
            logging.error("Failed to download page: \"{authorsPageURL\".")
            return None

        # Get the number of subpages.
Exemplo n.º 12
0
                logging.error(f'Failed to download page: "{pageURL}".')
                return None

            contentElement = soup.select_one(
                "div.b-story-body-x > div") or soup.select_one(
                    "div.panel.article")
            if not contentElement:
                logging.error("Story content element not found.")
                return None

            content += "<br/><br/>" + Stringify(
                contentElement.encode_contents())

        # Return.

        return Chapter(title=None, content=content)

    def _GetNormalizedStoryURL(self, URL: str) -> str:

        ##
        #
        # Returns a normalized story URL, i.e. one that can be used for anything.
        #
        # @param URL Input URL (given by the user).
        #
        # @return Normalized URL.
        #
        ##

        if not URL:
            return URL
    def _InternallyExtractChapter(
        self,
        URL: str,
        soup: Optional[BeautifulSoup]
    ) -> Optional[Chapter]:

        ##
        #
        # Extracts specific chapter.
        #
        # @param URL  The URL of the page containing the chapter.
        # @param soup The tag soup of the page containing the chapter.
        #
        # @return **True** if the chapter is extracted correctly, **False** otherwise.
        #
        ##

        # Define usual story endings.

        USUAL_ENDINGS = [
            "~~~",
            "~ ~ ~ ",
            "the end",
            "end",
        ]

        # Locate and cut the end.

        text = self._storyText.splitlines()

        separatorLineIndices = []
        endLineIndices = []

        for index, line in enumerate(text):

            strippedLine = line.strip()

            if strippedLine.startswith("***") or strippedLine.startswith("------"):
                separatorLineIndices.append(index)

            lowercaseLine = strippedLine.lower()

            for ending in USUAL_ENDINGS:
                if lowercaseLine.startswith(ending):
                    endLineIndices.append(index)
                    continue

        firstLineIndex = separatorLineIndices[1] if separatorLineIndices else -1
        lastLineIndex = endLineIndices[-1] if endLineIndices else -1

        if -1 == firstLineIndex:
            logging.error("Invalid story content format.")
            return None

        if -1 == lastLineIndex:
            text = text[firstLineIndex + 1:]
        else:
            text = text[firstLineIndex + 1:lastLineIndex]

        # Format the content.

        chapterCode = ""
        currentParagraphCode = ""

        for line in text:

            if not line:

                chapterCode += f"<p>{currentParagraphCode}</p>"
                currentParagraphCode = ""

            else:

                currentParagraphCode += f" {line.strip()}"

        # Return.

        return Chapter(content = chapterCode)
Exemplo n.º 14
0
                    f"Trying to extract chapter {index}. "
                    f"Only {len(chapterElements)} chapter(s) located. "
                    f"The story supposedly has {self.Story.Metadata.ChapterCount} chapter(s)."
                )
                return None

            currentChapterElement = chapterElements[index - 1]

            titleElement = currentChapterElement.select_one("h3.title")
            contentElement = currentChapterElement.select_one("div.userstuff")

            if (landmarkElement := contentElement.select_one("h3#work")):
                landmarkElement.decompose()

            return Chapter(title=titleElement.get_text().strip()
                           if titleElement else None,
                           content=Stringify(contentElement.encode_contents()))

    def _ScanWorks(self, URL: str) -> Optional[List[str]]:

        ##
        #
        # Scans a list of works: generates the list of story URLs.
        #
        # @param URL The URL.
        #
        # @return **None** when the scan fails, a list of story URLs when it doesn't fail.
        #
        ##

        # Check the arguments.
Exemplo n.º 15
0
        if not contentElement:
            logging.error("Content element not found.")
            return False

        if (element := contentElement.select_one("div#storyHeader")):
            element.decompose()

        if (element := contentElement.select_one("div#authorNotes")):
            element.decompose()

        for element in contentElement.select("form"):
            element.decompose()

        # Return.

        return Chapter(content=Stringify(contentElement.encode_contents()))

    @staticmethod
    def _GetStoryID(URL: str) -> Optional[str]:

        ##
        #
        # Retrieves story ID from story URL.
        #
        # @param URL The URL of the story.
        #
        # @return The ID of the story. Optionally **None**.
        #
        ##

        if not URL:
Exemplo n.º 16
0
        if (selectedChapterElement := soup.find("option", {"selected": True})):
            title = selectedChapterElement.text.strip()

        if title and (titleMatch := re.search("\d+\. (.*)", title)):
            title = titleMatch.group(1)

        # Read the content.

        storyTextElement = soup.find(id="storytext")
        if not storyTextElement:
            logging.error("Story text element not found.")
            return None

        # Create the Chapter and return it.

        return Chapter(title=title,
                       content=Stringify(storyTextElement.encode_contents()))

    @staticmethod
    def _GetStoryID(URL: str) -> Optional[str]:

        if not URL:
            return None

        storyIDMatch = re.search("/s/(\d+)/", URL)
        if not storyIDMatch:
            return None

        return storyIDMatch.group(1)

    @staticmethod
    def _ReformatDate(date: str) -> Optional[str]:
class ExtractorHentaiFoundry(Extractor):
    def __init__(self) -> None:

        ##
        #
        # The constructor.
        #
        ##

        super().__init__()

    def GetSupportedHostnames(self) -> List[str]:

        ##
        #
        # Returns a list of hostnames supposed to be supported by the extractor.
        #
        # @return A list of supported hostnames.
        #
        ##

        return ["hentai-foundry.com"]

    def ScanChannel(self, URL: str) -> Optional[List[str]]:

        ##
        #
        # Scans the channel: generates the list of story URLs.
        #
        # @return **None** when the scan fails, a list of story URLs when it doesn't fail.
        #
        ##

        if (not URL) or (GetHostname(URL) not in self.GetSupportedHostnames()):
            return None

        usernameStoryIDMatch = re.search("/user/([a-zA-Z0-9_]+)/(\d+)", URL)
        if usernameStoryIDMatch:
            return None

        usernameMatch = re.search("/user/([a-zA-Z0-9_]+)", URL)
        if not usernameMatch:
            return None

        username = usernameMatch.group(1)
        normalizedURL = f"http://www.hentai-foundry.com/stories/user/{username}/"

        pageSoup = self._webSession.GetSoup(self._GetAdultView(normalizedURL))
        if not pageSoup:
            return None

        pageCountDescriptionElement = pageSoup.select_one(
            ".galleryHeader > .summary")
        pageCountDescription = pageCountDescriptionElement.get_text().strip()

        pageCountDescriptionMatch = re.search(
            "Displaying (\d+)-(\d+) of (\d+) results", pageCountDescription)

        if not pageCountDescriptionMatch:
            logging.error("Failed to retrieve page count of the Stories tab.")
            return None

        storiesPerPage = int(pageCountDescriptionMatch.group(2))
        storiesInTotal = int(pageCountDescriptionMatch.group(3))

        if not storiesPerPage:
            return None

        pageCount = ceil(storiesInTotal / storiesPerPage)

        storyURLs = []
        for pageIndex in range(1, pageCount + 1):

            pageURL = self._GetAdultView(
                f"http://www.hentai-foundry.com/stories/user/{username}?page={pageIndex}"
            )

            pageSoup = self._webSession.GetSoup(pageURL)
            if not pageSoup:
                return None

            storyLinkElements = pageSoup.select(
                ".items > .storyRow > .titlebar > a")

            for linkElement in storyLinkElements:

                if not linkElement.has_attr("href"):
                    continue

                storyURLs.append(self._baseURL + linkElement["href"])

        return storyURLs

    def _InternallyScanStory(self, URL: str,
                             soup: Optional[BeautifulSoup]) -> bool:

        ##
        #
        # Scans the story: generates the list of chapter URLs and retrieves the
        # metadata.
        #
        # @param URL  The URL of the story.
        # @param soup The tag soup.
        #
        # @return **False** when the scan fails, **True** when it doesn't fail.
        #
        ##

        # Locate metadata.

        titleElement = soup.select_one(".titlebar a")
        if not titleElement:
            logging.error("Title element not found.")
            return False

        authorElement = soup.select_one(".storyInfo > .col1 > a")
        if not authorElement:
            logging.error("Author element not found.")
            return False

        datesElements = soup.select(".storyInfo > .col2 > .indent")
        if (not datesElements) or (len(datesElements) < 2):
            logging.error("Dates elements not found.")
            return False

        datePublishedElement = datesElements[0]
        dateUpdatedElement = datesElements[1]

        summaryElement = soup.select_one(".storyDescript")
        if not summaryElement:
            logging.error("Summary element not found.")
            return False

        chapterCountWordCountElement = soup.select_one(".storyInfo > .col3")
        if not chapterCountWordCountElement:
            logging.error("Chapter/word count elements not found.")
            return False

        # Extract and save metadata.

        self.Story.Metadata.Title = titleElement.get_text().strip()
        self.Story.Metadata.Author = authorElement.get_text().strip()

        rawDatePublished = datePublishedElement.get_text().strip()
        rawDateUpdated = dateUpdatedElement.get_text().strip()

        self.Story.Metadata.DatePublished = self._ReformatDate(
            rawDatePublished)
        self.Story.Metadata.DateUpdated = self._ReformatDate(rawDateUpdated)

        chapterCountWordCountDescription = StripHTML(
            chapterCountWordCountElement.get_text().strip())
        chapterCountMatch = re.search("Chapters:\s+(\d+)",
                                      chapterCountWordCountDescription)
        if not chapterCountMatch:
            logging.error("Chapter count not found.")
            return False

        wordCountMatch = re.search("Words:\s+([0-9,]+)",
                                   chapterCountWordCountDescription)
        if not wordCountMatch:
            logging.error("Word count not found.")
            return False

        self.Story.Metadata.ChapterCount = int(chapterCountMatch.group(1))
        self.Story.Metadata.WordCount = self._ReadWordCount(
            wordCountMatch.group(1))

        self.Story.Metadata.Summary = StripHTML(
            summaryElement.get_text().strip())

        # Retrieve chapter URLs.

        chapterLinkElements = soup.select(".boxbody > p > a")
        if not chapterLinkElements:
            logging.error("No chapter links found.")
            return False

        for linkElement in chapterLinkElements:

            if not linkElement.has_attr("href"):
                continue

            self._chapterURLs.append(self._baseURL + linkElement["href"])

        # Return.

        return True

    def _InternallyExtractChapter(
            self, URL: str,
            soup: Optional[BeautifulSoup]) -> Optional[Chapter]:

        ##
        #
        # Extracts specific chapter.
        #
        # @param URL  The URL of the page containing the chapter.
        # @param soup The tag soup of the page containing the chapter.
        #
        # @return **True** if the chapter is extracted correctly, **False** otherwise.
        #
        ##

        # Read the title.

        chapterTitle = None

        if (titleElement := soup.select_one("#viewChapter > .boxheader")):

            chapterTitle = titleElement.get_text().strip()

        # Read the content.

        storyTextElement = soup.select_one("#viewChapter > .boxbody")
        if not storyTextElement:
            logging.error("Story text element not found.")
            return None

        return Chapter(title=chapterTitle,
                       content=Stringify(storyTextElement.encode_contents()))