示例#1
0
	def analyse_index(self):
		ret = ""
		
		out.printstr("Analysing index...\n")
		
		#Get links
		url_exp = re.compile("<li><a href=\"\\S+?\">",re.I|re.S)
		page = self.page
		
		while True:
			m = url_exp.search(page)
			if m == None:
				break
			link = page[m.start() : m.end()]
			page = page[m.end() :]
			link = link[13 : -2]
			self.chapters.append(link)
		
		#Get title
		title_exp = re.compile("articlename='\\S+?'",re.I|re.S)
		page = self.page
		m = title_exp.search(page)
		if m != None:
			ret = page[m.start() + 13 : m.end() - 1]
			ret = ret.decode('gbk','ignore').encode('utf-8')
			out.printstr("Novel title : " + ret + "\n")
			ret = ret + "<br/>"
		
		return html_translate.translate(ret)
示例#2
0
	def analyse_chapter(self,index):
		ret = ""
		page = self.page
		
		#Get title
		title_exp = re.compile("<h1>.+?</h1>",re.I|re.S)
		m = title_exp.search(page)
		if m == None:
			return None
		title = page[m.start() + 4 : m.end() - 5]
		ret = title.decode('gbk','ignore').encode('utf-8')
		out.printstr("Chapter title : " + ret + "\n")
		ret = "第%i章 "%(index) + ret
		ret = ret + "<br/>"
		ret = ret.replace(" ","&nbsp;")
		page = page[m.end() :]
		
		#Get chapter
		chapter_exp = re.compile("<div id=\"htmlContent\" class=\"contentbox\">",re.I|re.S)
		m = chapter_exp.search(page)
		page = page[m.end() :]
		chapter_exp = re.compile("<div class=\"ad00\"><script>show_style()",re.I|re.S)
		m = chapter_exp.search(page)
		page = page[0 : m.start()]
		ret = ret + page.decode('gbk','ignore').encode('utf-8')
		ret = ret + "<br/>"

		out.printstr("Decoding...")
		ret = ret.replace("&nbsp;&nbsp;&nbsp;&nbsp;","");
		ret = html_translate.translate(ret)
		
		return ret
示例#3
0
	def	get_data(self,index):
		ret = ""
		next = self.page
		cc = re.compile("<cc>",re.I|re.S)
		cc_end = re.compile("</cc>",re.I|re.S)
		div = re.compile("<div.*?>",re.I|re.S)
		div_end = re.compile("</div>",re.I|re.S)
		a = re.compile("<a.*?>",re.I|re.S)
		a_end = re.compile("</a>",re.I|re.S)
		
		#Get data
		while True:
            #cc
			start = cc.search(next)
			if start == None:
				break
			next = next[start.end() + 1 :]
            #div
			start = div.search(next)
			if start != None:
				next = next[start.end() + 1 :]

			#/cc
			end = cc_end.search(next)
			if end == None:
				ret = ret + next
				break
			ret = ret + "<br>"
			ret = ret + next[0 : end.start() - 1]
			next = next[end.end() + 1:]

			#/div
			end = div_end.search(ret)
			if end != None:
				ret = ret[0 : end.start()]
				
			#a
			place = a.search(ret)
			if place != None:
				ret = ret.replace(ret[place.start() : place.end()],"")
			
			#/a
			place = a_end.search(ret)
			if place != None:
				ret = ret.replace(ret[place.start() : place.end()],"")
		out.printstr("Decoding...")
		ret = html_translate.translate(ret)
		return ret