Пример #1
0
	def getDetailPage(self):
		content=FileUtil.readlines(self.saveFile)
		for s in content:
			title=s.split('|')[0] #标题
			url=s.split('|')[1]	#url
			print url			
			page = HttpUtil.getPage(url);
			arr = HtmlUtil.select_all(page, '.newText .Info span')
			date=''
			source=''
			#获取时间/来源
			for k in arr:
				if k is not None:
					if "年" in str(k):
						date=str(k);
					if "来源" in str(k):
						source=str(k);
			content_review = HtmlUtil.select_v(page, '#ContentBody .c_review')
			
			if content_review is None:
				content_review=''
			arr = HtmlUtil.select_text(page, '#ContentBody p')
			#记录到文件
			newFile="data/finance"+url.split(',')[1][:-6]+".txt"
			FileUtil.put(newFile, '')
			FileUtil.appendline(newFile, title+"\n")
			FileUtil.appendline(newFile, url)
			FileUtil.appendline(newFile, date+"\n")
			FileUtil.appendline(newFile, source+"\n")
			FileUtil.appendline(newFile, content_review+"\n")
			for k in arr:
				try:
					FileUtil.appendline(newFile, str(k))
				except:
					continue;
Пример #2
0
	def getDetailPage(self):
		srcFile=TimeUtil.prefix()+".txt"
		content=FileUtil.readlines(srcFile)
		for str in content:
			url=str.split('|')[1]
			print url