def analyse_index(self): ret = "" out.printstr("Analysing index...\n") #Get links url_exp = re.compile("<li><a href=\"\\S+?\">",re.I|re.S) page = self.page while True: m = url_exp.search(page) if m == None: break link = page[m.start() : m.end()] page = page[m.end() :] link = link[13 : -2] self.chapters.append(link) #Get title title_exp = re.compile("articlename='\\S+?'",re.I|re.S) page = self.page m = title_exp.search(page) if m != None: ret = page[m.start() + 13 : m.end() - 1] ret = ret.decode('gbk','ignore').encode('utf-8') out.printstr("Novel title : " + ret + "\n") ret = ret + "<br/>" return html_translate.translate(ret)
def analyse_chapter(self,index): ret = "" page = self.page #Get title title_exp = re.compile("<h1>.+?</h1>",re.I|re.S) m = title_exp.search(page) if m == None: return None title = page[m.start() + 4 : m.end() - 5] ret = title.decode('gbk','ignore').encode('utf-8') out.printstr("Chapter title : " + ret + "\n") ret = "第%i章 "%(index) + ret ret = ret + "<br/>" ret = ret.replace(" "," ") page = page[m.end() :] #Get chapter chapter_exp = re.compile("<div id=\"htmlContent\" class=\"contentbox\">",re.I|re.S) m = chapter_exp.search(page) page = page[m.end() :] chapter_exp = re.compile("<div class=\"ad00\"><script>show_style()",re.I|re.S) m = chapter_exp.search(page) page = page[0 : m.start()] ret = ret + page.decode('gbk','ignore').encode('utf-8') ret = ret + "<br/>" out.printstr("Decoding...") ret = ret.replace(" ",""); ret = html_translate.translate(ret) return ret
def get_data(self,index): ret = "" next = self.page cc = re.compile("<cc>",re.I|re.S) cc_end = re.compile("</cc>",re.I|re.S) div = re.compile("<div.*?>",re.I|re.S) div_end = re.compile("</div>",re.I|re.S) a = re.compile("<a.*?>",re.I|re.S) a_end = re.compile("</a>",re.I|re.S) #Get data while True: #cc start = cc.search(next) if start == None: break next = next[start.end() + 1 :] #div start = div.search(next) if start != None: next = next[start.end() + 1 :] #/cc end = cc_end.search(next) if end == None: ret = ret + next break ret = ret + "<br>" ret = ret + next[0 : end.start() - 1] next = next[end.end() + 1:] #/div end = div_end.search(ret) if end != None: ret = ret[0 : end.start()] #a place = a.search(ret) if place != None: ret = ret.replace(ret[place.start() : place.end()],"") #/a place = a_end.search(ret) if place != None: ret = ret.replace(ret[place.start() : place.end()],"") out.printstr("Decoding...") ret = html_translate.translate(ret) return ret