def get_licaishi_viewpoint_list(self, user_id): """ 理财师观点 :param user_id: :return: """ url = self.url.format(user_id=user_id, pid=1) r = get_requests(url, has_proxy=False) # print r.text soup = bs(r.text, "lxml") # 获取关注的总页码 page_count = self._get_page_count(soup) # 获取数据库中的最新发表文章时间 publish_time_lastest = self._get_lastest_publish_time(mysql_table_licaishi_viewpoint, user_id) current_page = 1 while current_page < min((page_count + 1), self.max_page_count + 1): print "Page:%d / %d" % (current_page, page_count) article_list_one_page = self._get_licaishi_viewpoint_list_in_one_page(soup, user_id) # 存入mysql [archive.to_mysql() for archive in article_list_one_page] # 不需判断数据库是否存在,若存在则抛出异常,不插入 # 判断是否存在最新文章 if len(article_list_one_page) > 0: archive = article_list_one_page[-1] if archive.publish_time < str(publish_time_lastest): print encode_wrap("{}:已经获取到最新的微博了".format(user_id)) break current_page += 1 wait_time = self._get_wait_time() time.sleep(wait_time) print "Page:{} Wait time:{}".format(current_page, wait_time) # 点击下一页 url = self.url.format(user_id=user_id, pid=current_page) r = get_requests(url, has_proxy=False) soup = bs(r.text, "lxml")