示例#1
0
    def _parse(self, course: NavigableString) -> ParseType:
        """Parses course to get its link & icon url, title, description
        counts and stores.
        :course: BeautifulSoup Object"""

        info = {
            "link": "",
            "icon": "",
            "title": "",
            "description": "",
            "counts": {}
        }

        info["link"] = course.a["href"]
        info["icon"] = course.a.img["src"]

        description: NavigableString = course.a.div
        info["title"] = description.div.get_text()
        info["description"] = description.p.get_text()

        counts: NavigableString = course.find("div", {"class": "courseCounts"})
        counts_data: ResultSet = counts.find_all("li")
        for data in counts_data:
            name: str = data.span.get_text().lower()
            val: str = data.find("p").get_text()
            info["counts"][name] = int(val.replace(",", ""))

        return info
示例#2
0
    def _parse_details(self, code: NavigableString) -> ParseType:
        """Parses a codeContainer and extracts all the info."""

        # Format of details ->
        # {votes: 1184, answers: 24077, post_link: <PostLink> title: <Title>,
        #  tags: [<Tags>, ...], author_name: <AuthorName>, author_link: <Link>,
        #  data_date: <DateTime>, avatar_link: <Link>}
        details: ParseType = {}
        post_stats: NavigableString = code.find("div", {"class": "postStats"})
        post_stats_children = list(post_stats.children)

        details["votes"] = post_stats_children[1].p.string
        # Note the spelling of <a class='postAnsewers'.
        details["answers"] = post_stats_children[3].p.string

        post_details: NavigableString = code.find("div",
                                                  {"class": "postDetails"})

        details["post_link"] = post_details.p.a["href"]
        details["title"] = post_details.p.a.string

        tags_wrapper: NavigableString = list(post_details.children)[3]
        tags: ResultSet = tags_wrapper.find_all("span")
        tag_list: List[str] = []
        for tag in tags:
            tag_list.append(tag.string)

        details["tags"] = tag_list

        author_details: NavigableString = code.find("div",
                                                    {"class": "authorDetails"})
        details["author_name"] = author_details.div.a.string
        details["author_link"] = author_details.div.a["href"]
        details["data_date"] = author_details.p["data-date"]
        details["avatar_link"] = list(author_details.children)[3].img["src"]

        return details
示例#3
0
 def unwrap_ul(li: element.NavigableString) -> WebToonChapter:
     link = li.find('a')['href']
     pretty_name = li.find('img')['alt']
     chapter = WebToonChapter.from_url(link, pretty_name)
     return chapter