Exemplo n.º 1
0
	def run(self):
		print self.city.name, "线程开始抓取"
		for i in xrange(1, 2):
			articles = []
			url = host + url_args.format(fid=str(self.city.fid), page=str(i))
			home_page = Base.get_home(url)
			article_infos = Base.get_article_infos(home_page)
			for article_info in article_infos:
				time_at = do_time.str2datetime(article_info['time_at'], "%Y-%m-%d %H:%M")
				tid = int(article_info['tid'])
				if time_at <= self.latest_time_at and article_info['_type'] != 'hot':
					Article.add_all(articles)
					print self.city.name, "结束"
					return
				else:
					if tid not in self.tids:
						print "抓取", self.city.name, i, "页", "帖子", article_info['title']
						content = Base.get_content(article_info['url'])
						articles.append(
							Article(
								city_id=self.city.id,
								tid=tid,
								type=article_info['_type'],
								title=article_info['title'],
								time_at=time_at,
								content=content,
								author=article_info['author'],
								reply_number=article_info['reply_number'],
								read_number=article_info['read_number'],
								url=article_info['url'],
							))
			Article.add_all(articles)
		print self.city.name, "结束"