def test_weibodata_oper(self): db_session.execute("insert into {} ({}.weibo_id) values ('".format( weibo_data.name, weibo_data.name) + FAKE_ID + "')") assert WbDataOper.get_wb_by_mid(FAKE_ID) is not None assert len(WbDataOper.get_weibo_comment_not_crawled()) == 1 assert len(WbDataOper.get_weibo_repost_not_crawled()) == 1 WbDataOper.set_weibo_comment_crawled(FAKE_ID) WbDataOper.set_weibo_repost_crawled(FAKE_ID) assert len(WbDataOper.get_weibo_comment_not_crawled()) == 0 assert len(WbDataOper.get_weibo_repost_not_crawled()) == 0
def test_weibodata_oper(self): db_session.execute("insert into {} ({}.weibo_id) values ('".format(weibo_data.name, weibo_data.name) + FAKE_ID + "')") assert WbDataOper.get_wb_by_mid(FAKE_ID) is not None assert len(WbDataOper.get_weibo_comment_not_crawled()) == 1 assert len(WbDataOper.get_weibo_repost_not_crawled()) == 1 WbDataOper.set_weibo_comment_crawled(FAKE_ID) WbDataOper.set_weibo_repost_crawled(FAKE_ID) assert len(WbDataOper.get_weibo_comment_not_crawled()) == 0 assert len(WbDataOper.get_weibo_repost_not_crawled()) == 0
def execute_repost_task(): # regard current weibo url as the original url, you can also analyse from the root url weibo_datas = WbDataOper.get_weibo_repost_not_crawled() crawler.info('There are {} repost urls have to be crawled'.format( len(weibo_datas))) for weibo_data in weibo_datas: crawl_repost_page(weibo_data.weibo_id, weibo_data.uid)
def execute_repost_task(): # regard current weibo url as the original url, you can also analyse from the root url weibo_datas = WbDataOper.get_weibo_repost_not_crawled() crawler.info('There are {} repost urls have to be crawled'.format(len(weibo_datas))) for weibo_data in weibo_datas: app.send_task('tasks.repost.crawl_repost_page', args=(weibo_data.weibo_id, weibo_data.uid), queue='repost_crawler', routing_key='repost_info')