Пример #1
0
    def get_licaishi_viewpoint_list(self, user_id):
        """
        理财师观点
        :param user_id:
        :return:
        """

        url = self.url.format(user_id=user_id, pid=1)
        r = get_requests(url, has_proxy=False)
        # print r.text

        soup = bs(r.text, "lxml")

        # 获取关注的总页码
        page_count = self._get_page_count(soup)

        # 获取数据库中的最新发表文章时间
        publish_time_lastest = self._get_lastest_publish_time(mysql_table_licaishi_viewpoint, user_id)

        current_page = 1
        while current_page < min((page_count + 1), self.max_page_count + 1):
            print "Page:%d / %d" % (current_page, page_count)
            article_list_one_page = self._get_licaishi_viewpoint_list_in_one_page(soup, user_id)

            # 存入mysql
            [archive.to_mysql() for archive in article_list_one_page]  # 不需判断数据库是否存在,若存在则抛出异常,不插入

            # 判断是否存在最新文章
            if len(article_list_one_page) > 0:
                archive = article_list_one_page[-1]
                if archive.publish_time < str(publish_time_lastest):

                    print encode_wrap("{}:已经获取到最新的微博了".format(user_id))
                    break

            current_page += 1
            wait_time = self._get_wait_time()
            time.sleep(wait_time)
            print "Page:{}   Wait time:{}".format(current_page, wait_time)

            # 点击下一页
            url = self.url.format(user_id=user_id, pid=current_page)
            r = get_requests(url, has_proxy=False)
            soup = bs(r.text, "lxml")