Пример #1
0
    def test_weibodata_oper(self):
        db_session.execute("insert into {} ({}.weibo_id) values ('".format(
            weibo_data.name, weibo_data.name) + FAKE_ID + "')")
        assert WbDataOper.get_wb_by_mid(FAKE_ID) is not None
        assert len(WbDataOper.get_weibo_comment_not_crawled()) == 1
        assert len(WbDataOper.get_weibo_repost_not_crawled()) == 1

        WbDataOper.set_weibo_comment_crawled(FAKE_ID)
        WbDataOper.set_weibo_repost_crawled(FAKE_ID)

        assert len(WbDataOper.get_weibo_comment_not_crawled()) == 0
        assert len(WbDataOper.get_weibo_repost_not_crawled()) == 0
Пример #2
0
    def test_weibodata_oper(self):
        db_session.execute("insert into {} ({}.weibo_id) values ('".format(weibo_data.name, weibo_data.name)
                           + FAKE_ID + "')")
        assert WbDataOper.get_wb_by_mid(FAKE_ID) is not None
        assert len(WbDataOper.get_weibo_comment_not_crawled()) == 1
        assert len(WbDataOper.get_weibo_repost_not_crawled()) == 1

        WbDataOper.set_weibo_comment_crawled(FAKE_ID)
        WbDataOper.set_weibo_repost_crawled(FAKE_ID)

        assert len(WbDataOper.get_weibo_comment_not_crawled()) == 0
        assert len(WbDataOper.get_weibo_repost_not_crawled()) == 0
Пример #3
0
def execute_repost_task():
    # regard current weibo url as the original url, you can also analyse from the root url
    weibo_datas = WbDataOper.get_weibo_repost_not_crawled()
    crawler.info('There are {} repost urls have to be crawled'.format(
        len(weibo_datas)))

    for weibo_data in weibo_datas:
        crawl_repost_page(weibo_data.weibo_id, weibo_data.uid)
Пример #4
0
def execute_repost_task():
    # regard current weibo url as the original url, you can also analyse from the root url
    weibo_datas = WbDataOper.get_weibo_repost_not_crawled()
    crawler.info('There are {} repost urls have to be crawled'.format(len(weibo_datas)))

    for weibo_data in weibo_datas:
        app.send_task('tasks.repost.crawl_repost_page', args=(weibo_data.weibo_id, weibo_data.uid),
                      queue='repost_crawler', routing_key='repost_info')