def crawl( screen_names=[], friends_limit=10000, followers_limit=10000, depth=1, friends_sample=0.2, #XXX followers_sample=0.0, ): def crawlmapper(screen_name): if r.get(getRedisIdByScreenName(screen_name,'crawled_in_60min')) is None: friends_info = getFriendsBatch(screen_name,friends_limit) map(lambda x: r.sadd(getRedisIdByScreenName(screen_name, 'friend_ids'), x['id']), friends_info) scard = r.scard(getRedisIdByScreenName(screen_name, 'friend_ids')) print >> sys.stderr, 'Fetched %s ids for %s' % (scard, screen_name) followers_info = getFollowersBatch(screen_name,followers_limit) map(lambda x: r.sadd(getRedisIdByScreenName(screen_name, 'follower_ids'), x['id']), followers_info) scard = r.scard(getRedisIdByScreenName(screen_name, 'follower_ids')) print >> sys.stderr, 'Fetched %s ids for %s' % (scard, screen_name) if friends_info==[] or followers_info==[]: r.set(getRedisIdByScreenName(screen_name,'crawled_in_60min'),'1') r.expire(getRedisIdByScreenName(screen_name,'crawled_in_60min'),3600) else: friends_info=map(RedisUserId2UserInfoWraper, list(r.smembers(getRedisIdByScreenName(screen_name,'friend_ids')))) followers_info=map(RedisUserId2UserInfoWraper, list(r.smembers(getRedisIdByScreenName(screen_name,'follower_ids')))) return map(lambda u1: u1['screen_name'], filter(lambda info: (info is not None and info['followers_count']<1000 and info['friends_count']<1000), #filter Public Intellectual and Zombie flat(map(samplemapper, [friends_info,followers_info], [friends_sample,followers_sample])))) getUserInfo(t, r, screen_names=screen_names) d=0 while d<depth: d+=1 screen_names=flat(map(crawlmapper,screen_names)) print 'crawled ',len(screen_names) ,'ids'
def crawl( screen_names, friends_limit=10000, followers_limit=10000, depth=1, friends_sample=0.3, #XXX followers_sample=0.3, ): response = getUserInfo(t, r, screen_names=screen_names) ## ymir r.sadd('justin-ids',response[0]['id']) print "Appended justins-ids. Now it has " + unicode(r.scard('justin-ids')) for screen_name in screen_names: friend_ids = getFriends(screen_name, limit=friends_limit) follower_ids = getFollowers(screen_name, limit=followers_limit) friends_info = getUserInfo(t, r, user_ids=friend_ids, sample=friends_sample) followers_info = getUserInfo(t, r, user_ids=follower_ids, sample=followers_sample) next_queue = [u['screen_name'] for u in friends_info + followers_info] d = 1 while d < depth: d += 1 (queue, next_queue) = (next_queue, []) for _screen_name in queue: friend_ids = getFriends(_screen_name, limit=friends_limit) follower_ids = getFollowers(_screen_name, limit=followers_limit) next_queue.extend(friend_ids + follower_ids) # Note that this function takes a kw between 0.0 and 1.0 called # sample that allows you to crawl only a random sample of nodes # at any given level of the graph getUserInfo(t,r,user_ids=next_queue)
def crawl( screen_names, friends_limit=10000, followers_limit=10000, depth=1, friends_sample=0.2, followers_sample=0.0 # XXX ): getUserInfo(t, r, screen_names=screen_names) for screen_name in screen_names: friend_ids = getFriends(screen_name, limit=friends_limit) follower_ids = getFollowers(screen_name, limit=followers_limit) friends_info = getUserInfo(t, r, user_ids=friend_ids, sample=friends_sample) followers_info = getUserInfo(t, r, user_ids=follower_ids, sample=followers_sample) next_queue = [u["screen_name"] for u in friends_info + followers_info] d = 1 while d < depth: d += 1 (queue, next_queue) = (next_queue, []) for _screen_name in queue: friend_ids = getFriends(_screen_name, limit=friends_limit) follower_ids = getFollowers(_screen_name, limit=followers_limit) next_queue.extend(friend_ids + follower_ids) # Note that this function takes a kw between 0.0 and 1.0 called # sample that allows you to crawl only a random sample of nodes # at any given level of the graph getUserInfo(user_ids=next_queue)
def crawl( screen_names, friends_limit=10000, followers_limit=100000, depth=1, friends_sample=0.2, #XXX followers_sample=0.2, ): logging.info("Getting user info") getUserInfo(t, r, screen_names=screen_names) for screen_name in screen_names: friend_ids = getFriends(screen_name, limit=friends_limit) logging.info("Retrieved %d friends ids", len(friend_ids)) follower_ids = getFollowers(screen_name, limit=followers_limit) logging.info("Retrieved %d follower ids", len(follower_ids)) friends_info = getUserInfo(t, r, user_ids=friend_ids, sample=friends_sample) logging.info("Retrieved user info for %d friends", len(friends_info) ) logging.info("Getting follower info") followers_info = getUserInfo(t, r, user_ids=follower_ids, sample=followers_sample) logging.info("Retrieved user info for %d followers", len(followers_info) ) next_queue = [u['screen_name'] for u in friends_info + followers_info] d = 1 while d < depth: logging.info("while loop: depth = %d, d = %d", depth, d) d += 1 (queue, next_queue) = (next_queue, []) for _screen_name in queue: logging.info("while loop: screen_name: %s:", _screen_name) friend_ids = getFriends(_screen_name, limit=friends_limit) follower_ids = getFollowers(_screen_name, limit=followers_limit) next_queue.extend(friend_ids + follower_ids) # Note that this function takes a kw between 0.0 and 1.0 called # sample that allows you to crawl only a random sample of nodes # at any given level of the graph logging.info("while loop: getting user info") getUserInfo(user_ids=next_queue)
def crawl( screen_names, friends_limit=10000, followers_limit=10000, depth=1, friends_sample=0.2, #XXX followers_sample=0.0, ): getUserInfo(t, r, screen_names=screen_names) for screen_name in screen_names: friend_ids = getFriends(screen_name, limit=friends_limit) follower_ids = getFollowers(screen_name, limit=followers_limit) friends_info = getUserInfo(t, r, user_ids=friend_ids, sample=friends_sample) followers_info = getUserInfo(t, r, user_ids=follower_ids, sample=followers_sample) next_queue = [u['screen_name'] for u in friends_info + followers_info] d = 1 while d < depth: d += 1 (queue, next_queue) = (next_queue, []) for _screen_name in queue: friend_ids = getFriends(_screen_name, limit=friends_limit) follower_ids = getFollowers(_screen_name, limit=followers_limit) next_queue.extend(friend_ids + follower_ids) # Note that this function takes a kw between 0.0 and 1.0 called # sample that allows you to crawl only a random sample of nodes # at any given level of the graph getUserInfo(user_ids=next_queue)
# -*- coding: utf-8 -*- import sys import json import redis from twitter__login import login # A makeTwitterRequest call through to the /users/lookup # resource, which accepts a comma separated list of up # to 100 screen names. Details are fairly uninteresting. # See also http://dev.twitter.com/doc/get/users/lookup from twitter__util import getUserInfo if __name__ == "__main__": screen_names = sys.argv[1:] t = login() r = redis.Redis() print json.dumps(getUserInfo(t, r, screen_names=screen_names), indent=4)
# -*- coding: utf-8 -*- import sys import json import redis from twitter__login import login # A makeTwitterRequest call through to the /users/lookup # resource, which accepts a comma separated list of up # to 100 screen names. Details are fairly uninteresting. # See also http://dev.twitter.com/doc/get/users/lookup from twitter__util import getUserInfo if __name__ == "__main__": screen_names = sys.argv[1:] t = login() r = redis.Redis() print json.dumps( getUserInfo(t, r, screen_names=screen_names), indent=4 )
# -*- coding: utf-8 -*- import sys import json import redis from twitter__login import login # A makeTwitterRequest call through to the /users/lookup # resource, which accepts a comma separated list of up # to 100 screen names. Details are fairly uninteresting. # See also http://dev.twitter.com/doc/get/users/lookup # # JW: adapted original code to retrieve user_ids from redis # and pass them to the getUserInfo function. # from twitter__util import getUserInfo t = login() r = redis.Redis() friend_ids = list( r.smembers("screen_name$timoreilly$friend_ids") ) user_info = getUserInfo(t, r, user_ids = friend_ids) print json.dumps( user_info, indent=4)
# -*- coding: utf-8 -*- import sys import json import redis from twitter__login import login import functools # A makeTwitterRequest call through to the /users/lookup # resource, which accepts a comma separated list of up # to 100 screen names. Details are fairly uninteresting. # See also http://dev.twitter.com/doc/get/users/lookup from twitter__util import getUserInfo, _getFriendsOrFollowersUsingFunc if __name__ == "__main__": uid = sys.argv[1:] t = login() r = redis.Redis() info = getUserInfo(t, r, user_ids=uid)
import redis # A makeTwitterRequest call through to the /users/lookup # resource, which accepts a comma separated list of up # to 100 screen names. Details are fairly uninteresting. # See also http://dev.twitter.com/doc/get/users/lookup from twitter__util import getUserInfo if __name__ == "__main__": # XXX: iPython Notebook cannot prompt for input screen_names = ['timoreilly', 'socialwebmining', 'ptwobrussell'] r = redis.Redis() print json.dumps( getUserInfo(t, r, screen_names=screen_names), indent=4 ) # <markdowncell> # Example 4-7. Finding common friends/followers for multiple Twitterers, with output that's easier on the eyes (friends_followers__friends_followers_in_common.py) # <codecell> import sys import redis from twitter__util import getRedisIdByScreenName # A pretty-print function for numbers
def crawl( screen_names=[], friends_limit=10000, followers_limit=10000, depth=1, friends_sample=0.2, #XXX followers_sample=0.0, ): def crawlmapper(screen_name): if r.get(getRedisIdByScreenName(screen_name, 'crawled_in_60min')) is None: friends_info = getFriendsBatch(screen_name, friends_limit) map( lambda x: r.sadd( getRedisIdByScreenName(screen_name, 'friend_ids'), x['id'] ), friends_info) scard = r.scard(getRedisIdByScreenName(screen_name, 'friend_ids')) print >> sys.stderr, 'Fetched %s ids for %s' % (scard, screen_name) followers_info = getFollowersBatch(screen_name, followers_limit) map( lambda x: r.sadd( getRedisIdByScreenName(screen_name, 'follower_ids'), x[ 'id']), followers_info) scard = r.scard(getRedisIdByScreenName(screen_name, 'follower_ids')) print >> sys.stderr, 'Fetched %s ids for %s' % (scard, screen_name) if friends_info == [] or followers_info == []: r.set(getRedisIdByScreenName(screen_name, 'crawled_in_60min'), '1') r.expire( getRedisIdByScreenName(screen_name, 'crawled_in_60min'), 3600) else: friends_info = map( RedisUserId2UserInfoWraper, list( r.smembers( getRedisIdByScreenName(screen_name, 'friend_ids')))) followers_info = map( RedisUserId2UserInfoWraper, list( r.smembers( getRedisIdByScreenName(screen_name, 'follower_ids')))) return map( lambda u1: u1['screen_name'], filter( lambda info: (info is not None and info[ 'followers_count'] < 1000 and info['friends_count'] < 1000 ), #filter Public Intellectual and Zombie flat( map(samplemapper, [friends_info, followers_info], [friends_sample, followers_sample])))) getUserInfo(t, r, screen_names=screen_names) d = 0 while d < depth: d += 1 screen_names = flat(map(crawlmapper, screen_names)) print 'crawled ', len(screen_names), 'ids'
import redis import json from twitter__login import login from twitter__util import getUserInfo from twitter__util import _getFriendsOrFollowersUsingFunc from twitter__util import getRedisIdByScreenName from twitter__util import getRedisIdByUserId SCREEN_NAME = sys.argv[1] MAXINT = sys.maxint t = login() r = redis.Redis() # get info and friends for central user getUserInfo(t,r,[SCREEN_NAME]) getFriends = functools.partial(_getFriendsOrFollowersUsingFunc, t.friends.ids, 'friend_ids', t, r) getFollowers = functools.partial(_getFriendsOrFollowersUsingFunc, t.followers.ids, 'follower_ids', t, r) # get friends and followers of central user friend_ids = getFriends(SCREEN_NAME) follower_ids = getFollowers(SCREEN_NAME) # do union of friends and followers ids = list(r.sunion(getRedisIdByScreenName(SCREEN_NAME,'friend_ids'), getRedisIdByScreenName(SCREEN_NAME,'follower_ids'))) # get user info for friends and followers getUserInfo(t, r, user_ids=ids) # get friends of friends and followers for user_id in ids:
for screen_name in screen_names: if screen_name != None: print >> sys.stderr, 'Getting friends for %s...' % (screen_name, ) friends_ids = getFriends(screen_name, limit=200) print >> sys.stderr, 'Getting followers for %s...' % (screen_name, ) followers_ids = getFollowers(screen_name, limit=200) # make union of friends and followers union = r.sunion([getRedisIdByScreenName(screen_name,'friends_ids'),getRedisIdByScreenName(screen_name, 'follower_ids')]) # convert from set to list union = list(union) # we need just 200 of them union = union[1:200] # get info for all 200; needed for the location friends_info = getUserInfo(t, r, user_ids=union, sample=1.0) print "Now harvesting ", screen_name,"'s friends subgraphs" for current_friend in friends_info: if current_friend != None: print "+",current_friend['screen_name']," From ", if current_friend['location'] != None and current_friend['location']!= "" : print current_friend['location'].encode('utf-8') else: print " " friend_ids = getFriends(current_friend['screen_name'], limit=200)
# -*- coding: utf-8 -*- import sys import json import redis from twitter__login import login from twitter__util import getUserInfo if __name__ == "__main__": screen_names = sys.argv[1:] t = login() r = redis.Redis() print json.dumps(getUserInfo(t,r, screen_names=screen_names), indent=4)