Exemplo n.º 1
0
    def get_blog_by_page(self, containerid, page, name):
        blog_list = []
        if page > 50:
            print("页数不能大于50")
            return False
        try:
            for i in range(page):
                url = 'https://m.weibo.cn/api/container/getIndex?containerid=%s&page=%s' % (containerid, i)
                res = requests.get(url, headers=self.headers).json()
                cards = res['data']['cards']
                try:
                    for j in range(len(cards)):
                        print("-----正在爬取第" + str(i) + "页,第" + str(j + 1) + "条微博------")
                        blog_dict = get_blog_info(cards, j, name, i)
                        blog_list.append(blog_dict)
                except:
                    raise
            return blog_list

        except:
            raise
Exemplo n.º 2
0
    def get_blog_by_text(self, containerid, blog_text, name):
        print('containerid: ', containerid)
        blog_list = []
        page = 1
        while True:
            try:
                url = 'https://m.weibo.cn/api/container/getIndex?containerid=%s&page=%s' % (containerid, page)
                res_code = requests.get(url).status_code
                if res_code == 418:
                    print("访问太频繁,过会再试试吧")
                    return False
                res = requests.get(url).json()
                cards = res['data']['cards']
                print('page', page)
                print('cards: ', cards)
                if len(cards) > 0:
                    for i in range(len(cards)):
                        print("-----正在爬取第" + str(page) + "页,第" + str(i+1) + "条微博------")
                        blog_dict = get_blog_info(cards, i, name, page)
                        if blog_dict is False:
                            continue
                        blog_list.append(blog_dict)
                        mblog_text = blog_dict['mblog_text']
                        create_time = blog_dict['create_time']
                        if blog_text in mblog_text:
                            print("成功找到相关微博")
                            return blog_dict['mblog_id']
                        elif checkTime(create_time, config.day) is False:
                            print("在配置的时间内没有找到相关微博")
                            return blog_list
                        time.sleep(config.sleep_time)
                    page += 1
                    time.sleep(config.sleep_time)
                else:
                    print("没有任何微博哦!")
                    break

            except:
                pass
Exemplo n.º 3
0
    def get_blog_by_text(self, containerid, blog_text, name):
        blog_list = []
        page = 1
        while True:
            try:
                url = 'https://m.weibo.cn/api/container/getIndex?containerid=%s&page=%s' % (containerid, page)
                res_code = requests.get(url).status_code
                if res_code == 418:
                    print("访问太频繁,过会再试试吧")
                    return False
                res = requests.get(url).json()
                cards = res['data']['cards']
                if len(cards) > 0:
                    for i in range(len(cards)):
                        print("-----正在爬取第" + str(page) + "页,第" + str(i+1) + "条微博------")
                        blog_dict = get_blog_info(cards, i, name, page)
                        blog_list.append(blog_dict)
                        if blog_list is False:
                            break
                        mblog_text = blog_dict['mblog_text']
                        create_time = blog_dict['create_time']
                        if blog_text in mblog_text:
                            print("找到相关微博")
                            return blog_dict['mblog_id']
                        elif checkTime(create_time) is False:
                            print("没有找到相关微博")
                            return blog_list
                        # if cards[i]['card_type'] == 9:
                        #     scheme = cards[i]['scheme']  # 微博地址
                        #     mblog = cards[i]['mblog']
                        #     mblog_text = mblog['text']
                        #     create_time = mblog['created_at']
                        #     mblog_id = mblog['id']
                        #     reposts_count = mblog['reposts_count']  # 转发数量
                        #     comments_count = mblog['comments_count']  # 评论数量
                        #     attitudes_count = mblog['attitudes_count']  # 点赞数量
                        #     with open(name, 'a', encoding='utf-8') as f:
                        #         f.write("----第"+str(page)+"页,第"+str(i+1)+"条微博----"+"\n")
                        #         f.write("微博地址:" + str(scheme) + "\n" + "发布时间:" + str(create_time) + "\n"
                        #                 + "微博内容:" + mblog_text + "\n" + "点赞数:" + str(attitudes_count) + "\n"
                        #                 + "评论数:" + str(comments_count) + "\n" + "转发数:" + str(reposts_count) + "\n")
                        #     blog_dict['mblog_id'] = mblog_id
                        #     blog_dict['mblog_text'] = mblog_text
                        #     blog_dict['create_time'] = create_time
                        #     blog_list.append(blog_dict)
                        #     if blog_text in mblog_text:
                        #         print("找到相关微博")
                        #         return blog_list
                        #     elif checkTime(create_time) is False:
                        #         print("没有找到相关微博")
                        #         return blog_list
                        # else:
                        #     print("没有任何微博哦")
                        #     break
                    page += 1
                    time.sleep(config.sleep_time)
                else:
                    print("没有任何微博哦")
                    break

            except:
                pass