Пример #1
0
    def sectionInfo(self, bookInfo, muluUrl, bookMuluSoup):
        Log.I("[I] on get sectionInfo");
        model = SectionInfoModel();
        model.bookInfo = bookInfo;

        muluList = bookMuluSoup.find(lambda tag: tag.name == "ul" and tag.has_attr("class") and tag["class"][0] == "mulu_list");
        if muluList == None or muluList.contents == None or len(muluList.contents) <= 0:
            return None;

        setted = False;
        for c in muluList.contents:
            atag = c.find("a");
            if atag != -1:
                href = Utils.absoluteUrl(atag["href"], muluUrl, None) ;
                title = atag.string;
                if href != None and title != None:
                    model.addChapter(str(href), str(title));
                    setted = True;
                else:
                    Log.W(" on getSection found invalid tag " + str(atag) + ", href=" + str(href) + ",title=" + str(title));

        if not setted:
            return None;

        return model;
Пример #2
0
    def visit(self, url):
        Log.I("[I] on visit() " + str(url))
        self.setVisitingUrl(url)
        soup = Utils.soupUrl(url)
        if not soup:
            Log.W("[W] on visit() soup is None " + str(url))
            return
        Log.I("[I] on visit() did get soup")
        #将本页所有url放入数据库中
        urls = self.addUrlsFromSoup(soup, url)
        if not Utils.isValidArr(urls):
            Log.W("[W] on visit() urls not found")
            return
        #获取匹配的书页
        bookUrls = self.addBookPageUrls(urls)
        #遍历书页
        if len(bookUrls) > 0:
            for bookUrl in bookUrls:
                self.downloadBook(bookUrl)

        self.removeVisitUrl(url)

        Log.I("[I] on visit() finished " + str(url))
Пример #3
0
    def _checkTLJTag(self, tag, model):
        tljMap = {
            "总点击数:": "clickCount",
            "本月点击:": "monthClickCount",
            "本周点击:": "weekClickCount",
            "收 藏 数:": "collectionCount",
            "总推荐数:": "recommendCount",
            "本月推荐:": "monthRecommendCount",
            "本周推荐:": "weekRecommendCount",
            "文章状态:": "status"
        }

        setted = False;
        for key, value in tljMap.items():
            count = self._getIntFromTLJTag(tag, key);
            if count != None:
                setattr(model, value, count);
                setted = True;
                break;

        if not setted:
            Log.W("[W] 未完全设置tljtag "+ str(tag));
Пример #4
0
    def _checkMetaTag(self, tag, model):
        metaMap = {
            "og:novel:category": "category",
            "og:novel:book_name": "title",
            "og:novel:author": "author",
            "og:description": "des",
            "og:novel:update_time": "updateTime",
            "og:image": "bookImg"
        }
        setted = False;
        for key, value in metaMap.items():
            if tag["property"] == key:
                c = tag["content"];
                if value == "updateTime":
                    c = Utils.getTimestamp(c, "%Y-%m-%d %H:%M");
                elif value == "category":
                    c = self.bookCategory(c);
                setattr(model, value, c);
                setted = True;
                break;

        if not setted:
            Log.W("[W] 未完全设置metatag "+ str(tag));