Exemplo n.º 1
0
    def fetchItemList(self):
        htmlSoup = BeautifulSoup(self.mainHtml, "html.parser")
        listDiv = htmlSoup.find(attrs={"class": "alllist"}).find(attrs={"class": "items_area"})
        itemRawList = listDiv.find_all(attrs={"class": "item"})
        for item in itemRawList:
            try:
                article = CnbetaArticle()
                titleTag = item.find(attrs={"class": "title"}).find("a")
                contentTag = item.find("span", attrs={"class": "newsinfo"}).find("p")

                article.title = "".join(titleTag.contents)
                article.url = self.targetMainUrl + titleTag["href"]
                article.cover = item.find("div", attrs={"class": "pic"}).find("a").find("img")['src']
                briefStr = contentTag.renderContents().decode("utf-8")

                article.brief = briefStr
                self.itemList.append(article)
            except Exception as e:
                # print(e)
                pass

        logutil.log("CnbetaSpider", "getItemList finished")