示例#1
0
def crawl_person_infos(uid):
    """
    Crawl user info and their fans and followers
    For the limit of weibo's backend, we can only crawl 5 pages of the fans and followers.
    We also have no permissions to view enterprise's followers and fans info
    :param uid: current user id
    :return: None
    """
    if not uid:
        return

    try:
        user, is_crawled = get_profile(uid)
        # If it's enterprise user, just skip it
        if user and user.verify_type == 2:
            SeedidsOper.set_seed_other_crawled(uid)
            return

        # Crawl fans and followers
        if not is_crawled:
            app.send_task('tasks.user.crawl_follower_fans',
                          args=(uid, ),
                          queue='fans_followers',
                          routing_key='for_fans_followers')

    # By adding '--soft-time-limit secs' when you start celery, this will resend task to broker
    # e.g. celery -A tasks.workers -Q user_crawler worker -l info -c 1 --soft-time-limit 10
    except SoftTimeLimitExceeded:
        crawler.error(
            "user SoftTimeLimitExceeded    uid={uid}".format(uid=uid))
        app.send_task('tasks.user.crawl_person_infos',
                      args=(uid, ),
                      queue='user_crawler',
                      routing_key='for_user_info')
示例#2
0
def crawl_person_infos(uid):
    """
    Crawl user info and their fans and followers
    For the limit of weibo's backend, we can only crawl 5 pages of the fans and followers.
    We also have no permissions to view enterprise's followers and fans info
    :param uid: current user id
    :return: None
    """
    if not uid:
        return

    try:
        user, is_crawled = get_profile(uid)
        # If it's enterprise user, just skip it
        if user and user.verify_type == 2:
            SeedidsOper.set_seed_other_crawled(uid)
            return

        # Crawl fans and followers
        if not is_crawled:
            app.send_task('tasks.user.crawl_follower_fans', args=(uid,), queue='fans_followers',
                          routing_key='for_fans_followers')

    # By adding '--soft-time-limit secs' when you start celery, this will resend task to broker
    # e.g. celery -A tasks.workers -Q user_crawler worker -l info -c 1 --soft-time-limit 10
    except SoftTimeLimitExceeded:
        crawler.error("user SoftTimeLimitExceeded    uid={uid}".format(uid=uid))
        app.send_task('tasks.user.crawl_person_infos', args=(uid, ), queue='user_crawler',
                      routing_key='for_user_info')
示例#3
0
def crawl_follower_fans(uid):
    seed = SeedidsOper.get_seed_by_id(uid)
    if seed.other_crawled == 0:
        rs = get_fans_or_followers_ids(uid, 1, 1)
        rs.extend(get_fans_or_followers_ids(uid, 2, 1))
        datas = set(rs)
        # If data already exits, just skip it
        if datas:
            SeedidsOper.insert_seeds(datas)
        SeedidsOper.set_seed_other_crawled(uid)
示例#4
0
def crawl_follower_fans(uid):
    seed = SeedidsOper.get_seed_by_id(uid)
    if seed.other_crawled == 0:
        rs = get_fans_or_followers_ids(uid, 1, 1)
        rs.extend(get_fans_or_followers_ids(uid, 2, 1))
        datas = set(rs)
        # If data already exits, just skip it
        if datas:
            SeedidsOper.insert_seeds(datas)
        SeedidsOper.set_seed_other_crawled(uid)
示例#5
0
def crawl_follower_fans(uid):
    user, is_crawled = get_profile(uid)
    if user and user.verify_type == 2:
        SeedidsOper.set_seed_other_crawled(uid)
        return

    rs = get_fans_or_followers_ids(uid, 1, 1)
    rs.extend(get_fans_or_followers_ids(uid, 2, 1))
    datas = set(rs)
    for uid in datas:
        get_profile(uid)
    # If data already exits, just skip it
    # if datas:
    #     SeedidsOper.insert_seeds(datas)
    SeedidsOper.set_seed_other_crawled(uid)
示例#6
0
def crawl_person_infos(uid):

    if not uid:
        return

    try:
        user, is_crawled = get_profile(uid)
        # If it's enterprise user, just skip it
        if user and user.verify_type == 2:
            SeedidsOper.set_seed_other_crawled(uid)
            return

        # Crawl fans and followers
        if not is_crawled:
            crawl_follower_fans(uid)
    except SoftTimeLimitExceeded:
        crawler.error(
            "user SoftTimeLimitExceeded    uid={uid}".format(uid=uid))
        crawl_person_infos(uid)
示例#7
0
def crawl_person_infos(uid):
    """
    Crawl user info and their fans and followers
    For the limit of weibo's backend, we can only crawl 5 pages of the fans and followers.
    We also have no permissions to view enterprise's followers and fans info
    :param uid: current user id
    :return: None
    """
    if not uid:
        return

    user, is_crawled = get_profile(uid)
    # If it's enterprise user, just skip it
    if user and user.verify_type == 2:
        SeedidsOper.set_seed_other_crawled(uid)
        return

    # Crawl fans and followers
    if not is_crawled:
        app.send_task('tasks.user.crawl_follower_fans', args=(uid,), queue='fans_followers',
                      routing_key='for_fans_followers')