示例#1
0
    def parse(self, page_html):
        """Get story metadata from given HTML."""
        soup = BeautifulSoup(page_html, "html.parser")

        title = FanfictionNetParser._get_title(soup)
        if not title:
            return ParseResult.failure("Could not find story title")

        author = FanfictionNetParser._get_author(soup)
        if not author:
            return ParseResult.failure("Could not find author name")

        summary = FanfictionNetParser._get_summary(soup)
        if not summary:
            return ParseResult.failure("Could not find summary")

        universes = self._get_universes(soup)
        if not universes:
            return ParseResult.failure("Could not find universes")
        # TODO: Handle "misc"?

        return ParseResult.success(
            title=title,
            author=author,
            summary=summary,
            universes=universes)
示例#2
0
    def parse(self, page_contents):
        """Parse the html in page_contents for story information."""
        soup = BeautifulSoup(page_contents, "html.parser")

        title_tags = soup.select("li.work > div.header > h4.heading > a")
        title = title_tags[0].string

        author_tags = soup.select(
            "li.work > div.header > h4.heading > a[rel=\"author\"]")
        author = author_tags[0].string

        summary_tags = soup.select("li.work > blockquote.summary > p")
        summary = summary_tags[0].string

        universes_tags = soup.select(
            "li.work > div.header > h5.fandoms > a")
        # TODO: Handle crossovers better
        universes = frozenset([t.string for t in universes_tags])

        return ParseResult.success(
            title=title,
            author=author,
            summary=summary,
            universes=universes)