def save_users_timelines(twitter_api, users_ids): for str_id in users_ids: id = int(str_id) timeline = harvest_user_timeline(twitter_api, user_id=id) results = {'timeline': timeline, 'user_id': id} # print(json.dumps(results, indent=1)) DataOperations.save_to_mongo(results, 'users_crawl', 'users_timelines')
def crawl_friends(twitter_api, screen_name, limit=1000, depth=2): # in storage seed_id = str(twitter_api.users.show(screen_name=screen_name)['id']) next_queue = get_friends_followers_ids(twitter_api, user_id=seed_id, friends_limit=limit, followers_limit=0) # Store a seed_id => _follower_ids mapping in MongoDB DataOperations.save_to_mongo({'followers' : [ _id for _id in next_queue ]}, 'users_crawl', 'users_ids'.format(seed_id)) d = 1 while d < depth: d += 1 (queue, next_queue) = (next_queue, []) for fid in queue: follower_ids = get_friends_followers_ids(twitter_api, user_id=fid,friends_limit=limit,followers_limit=0) # Store a fid => follower_ids mapping in MongoDB DataOperations.save_to_mongo({'followers' : [ _id for _id in next_queue ]}, 'users_crawl', 'users_ids') next_queue += follower_ids
def save_single_user_timeline(twitter_api, user_id): id = int(user_id) timeline = harvest_user_timeline(twitter_api, user_id=id) results = {'timeline': timeline, 'user_id': id} DataOperations.save_to_mongo(results, 'users_crawl', 'users_timelines')