def parse(self, page_html): """Get story metadata from given HTML.""" soup = BeautifulSoup(page_html, "html.parser") title = FanfictionNetParser._get_title(soup) if not title: return ParseResult.failure("Could not find story title") author = FanfictionNetParser._get_author(soup) if not author: return ParseResult.failure("Could not find author name") summary = FanfictionNetParser._get_summary(soup) if not summary: return ParseResult.failure("Could not find summary") universes = self._get_universes(soup) if not universes: return ParseResult.failure("Could not find universes") # TODO: Handle "misc"? return ParseResult.success( title=title, author=author, summary=summary, universes=universes)
def parse(self, page_contents): """Parse the html in page_contents for story information.""" soup = BeautifulSoup(page_contents, "html.parser") title_tags = soup.select("li.work > div.header > h4.heading > a") title = title_tags[0].string author_tags = soup.select( "li.work > div.header > h4.heading > a[rel=\"author\"]") author = author_tags[0].string summary_tags = soup.select("li.work > blockquote.summary > p") summary = summary_tags[0].string universes_tags = soup.select( "li.work > div.header > h5.fandoms > a") # TODO: Handle crossovers better universes = frozenset([t.string for t in universes_tags]) return ParseResult.success( title=title, author=author, summary=summary, universes=universes)