Exemplo n.º 1
0
def auto_crawl_bangumi():
    task_name = "生成番剧国创待爬链接"
    logger.info(task_name)
    redis_connection.rpush("bangumiAndDonghua:start_urls",
                           "https://www.bilibili.com/ranking/bangumi/167/0/7")
    redis_connection.rpush("bangumiAndDonghua:start_urls",
                           "https://www.bilibili.com/ranking/bangumi/13/0/7")
Exemplo n.º 2
0
def auto_crawl_bangumi():
    task_name = "生成番剧国创待爬链接"
    logger.info(task_name)
    t = ProgressTask(task_name, 1, collection=db['tracer'])
    redis_connection.rpush("bangumiAndDonghua:start_urls",
                           "https://www.bilibili.com/ranking/bangumi/167/0/7")
    redis_connection.rpush("bangumiAndDonghua:start_urls",
                           "https://www.bilibili.com/ranking/bangumi/13/0/7")
    t.current_value += 1
Exemplo n.º 3
0
def add_tag_task():
    task_name = "生成待爬标签视频链接"
    coll = db['video']
    doc_filter = {'tag': {'$exists': False}}
    total = coll.find(doc_filter, {"aid": 1}).count()
    cursor = coll.find(doc_filter, {"aid": 1}).batch_size(100)
    url = 'https://www.bilibili.com/video/av{}'
    for each_video in cursor:
        aid = each_video['aid']
        logger.info("待爬AV号{}".format(aid))
        redis_connection.rpush("tagAdder:start_urls", url.format(aid))
Exemplo n.º 4
0
def update_author():
    task_name = "生成每日作者待爬链接"
    logger.info(task_name)
    coll = db['author']
    filter_dict = {'$or': [{'focus': True}, {'forceFocus': True}]}
    cursor = coll.find(filter_dict, {"mid": 1}).batch_size(200)
    total = coll.count_documents(filter_dict)
    if total != 0:
        for each_doc in cursor:
            redis_connection.rpush(AUTHOR_KEY,
                                   AUTHOR_URL.format(mid=each_doc['mid']))
Exemplo n.º 5
0
def add_tag_task():
    task_name = "生成待爬标签视频链接"
    coll = db['video']
    doc_filter = {'tag': {'$exists': False}}
    total = coll.find(doc_filter, {"aid": 1}).count()
    cursor = coll.find(doc_filter, {"aid": 1}).batch_size(100)
    t = ProgressTask(task_name, total, collection=db['tracer'])
    url = 'https://www.bilibili.com/video/av{}'
    for each_video in cursor:
        t.current_value += 1
        aid = each_video['aid']
        redis_connection.rpush("tagAdder:start_urls", url.format(aid))
Exemplo n.º 6
0
def auto_add_video():
    task_name = "生成作者最新发布的视频的待爬链接"
    logger.info(task_name)
    coll = db['author']
    doc_filter = {'$or': [{'focus': True}, {'forceFocus': True}]}
    total = coll.count_documents(doc_filter)
    c = coll.find(doc_filter, {'mid': 1})
    if total != 0:
        for each_doc in c:
            URL = 'https://space.bilibili.com/ajax/member/getSubmitVideos?mid={}&pagesize=10&page=1&order=pubdate'.format(
                each_doc['mid'])
            redis_connection.rpush("videoAutoAdd:start_urls", URL)
Exemplo n.º 7
0
def send_aids(task_name, total, cursor):
    if total == 0:
        return
    t = ProgressTask(task_name, total, collection=db['tracer'])
    aid_list = ''
    i = 0
    for each_doc in cursor:
        aid_list += str(each_doc['aid']) + ','
        i += 1
        if i == 100:
            t.current_value += i
            redis_connection.rpush(VIDEO_KEY,
                                   VIDEO_URL.format(aid=aid_list[:-1]))
            aid_list = ''
            i = 0
    t.current_value += i
    redis_connection.rpush(VIDEO_KEY, VIDEO_URL.format(aid=aid_list[:-1]))
Exemplo n.º 8
0
def send_aids(task_name, total, cursor):
    if total == 0:
        return
    aid_list = ''
    i = 0
    c = 0
    for each_doc in cursor:
        c += 1
        aid_list += str(each_doc['aid']) + ','
        i += 1
        logger.info(each_doc['aid'])
        if i == 50:
            logger.info('传送第{}个'.format(c))
            redis_connection.rpush(VIDEO_KEY,
                                   VIDEO_URL.format(aid=aid_list[:-1]))
            aid_list = ''
            i = 0
    redis_connection.rpush(VIDEO_KEY, VIDEO_URL.format(aid=aid_list[:-1]))
Exemplo n.º 9
0
def auto_add_author():
    task_name = "生成排行榜待爬链接"
    logger.info(task_name)
    start_urls = [
        'https://www.bilibili.com/ranking',
        'https://www.bilibili.com/ranking/all/1/0/3',
        'https://www.bilibili.com/ranking/all/168/0/3',
        'https://www.bilibili.com/ranking/all/3/0/3',
        'https://www.bilibili.com/ranking/all/129/0/3',
        'https://www.bilibili.com/ranking/all/188/0/3',
        'https://www.bilibili.com/ranking/all/4/0/3',
        'https://www.bilibili.com/ranking/all/36/0/3',
        'https://www.bilibili.com/ranking/all/160/0/3',
        'https://www.bilibili.com/ranking/all/119/0/3',
        'https://www.bilibili.com/ranking/all/155/0/3',
        'https://www.bilibili.com/ranking/all/5/0/3',
        'https://www.bilibili.com/ranking/all/181/0/3'
    ]
    for each in start_urls:
        redis_connection.rpush('authorAutoAdd:start_urls', each)
Exemplo n.º 10
0
def auto_add_author():
    task_name = "生成排行榜待爬链接"
    logger.info(task_name)
    start_urls = [
        'https://www.bilibili.com/ranking',
        'https://www.bilibili.com/ranking/all/1/0/3',
        'https://www.bilibili.com/ranking/all/168/0/3',
        'https://www.bilibili.com/ranking/all/3/0/3',
        'https://www.bilibili.com/ranking/all/129/0/3',
        'https://www.bilibili.com/ranking/all/4/0/3',
        'https://www.bilibili.com/ranking/all/36/0/3',
        'https://www.bilibili.com/ranking/all/160/0/3',
        'https://www.bilibili.com/ranking/all/119/0/3',
        'https://www.bilibili.com/ranking/all/155/0/3',
        'https://www.bilibili.com/ranking/all/5/0/3',
        'https://www.bilibili.com/ranking/all/181/0/3'
    ]
    t = ProgressTask(task_name, len(start_urls), collection=db['tracer'])
    for each in start_urls:
        t.current_value += 1
        redis_connection.rpush('authorAutoAdd:start_urls', each)
Exemplo n.º 11
0
def gen_online():
    task_name = "生成在线人数爬取链接"
    t = ProgressTask(task_name, 1, collection=db['tracer'])
    ONLINE_URL = 'https://www.bilibili.com/video/online.html'
    redis_connection.rpush("online:start_urls", ONLINE_URL)
    t.current_value = 1
Exemplo n.º 12
0
def sendSiteInfoCrawlRequest():
    redis_connection.rpush(SITEINFO_KEY, SITEINFO_URL)
Exemplo n.º 13
0
def sendVideoCrawlRequest(aid):
    redis_connection.rpush(VIDEO_KEY, VIDEO_URL.format(aid=aid))
Exemplo n.º 14
0
def sendAuthorCrawlRequest(mid):
    redis_connection.rpush(AUTHOR_KEY, AUTHOR_URL.format(mid=mid))
Exemplo n.º 15
0
def gen_online():
    task_name = "生成在线人数爬取链接"
    ONLINE_URL = 'https://www.bilibili.com/video/online.html'
    redis_connection.rpush("online:start_urls", ONLINE_URL)