def calculate(): r = redis.Redis() # Default connection settings on localhost follower_ids = list( r.smembers(getRedisIdByScreenName(SCREEN_NAME, 'follower_ids'))) followers = r.mget([ getRedisIdByUserId(follower_id, 'info.json') for follower_id in follower_ids ]) followers = [json.loads(f) for f in followers if f is not None] freqs = {} for f in followers: cnt = f['followers_count'] if not freqs.has_key(cnt): freqs[cnt] = [] freqs[cnt].append({ 'screen_name': f['screen_name'], 'user_id': f['id'] }) # It could take a few minutes to calculate freqs, so store a snapshot for later use r.set(getRedisIdByScreenName(SCREEN_NAME, 'follower_freqs'), json.dumps(freqs)) keys = freqs.keys() keys.sort() print 'The top 10 followers from the sample:' field_names = ['Date', 'Count'] pt = PrettyTable(field_names=field_names) pt.align = 'l' for (user, freq) in reversed([(user['screen_name'], k) for k in keys[-10:] for user in freqs[k]]): pt.add_row([user, pp(freq)]) print pt all_freqs = [k for k in keys for user in freqs[k]] avg = reduce(lambda x, y: x + y, all_freqs) / len(all_freqs) print "\nThe average number of followers for %s's followers: %s" \ % (SCREEN_NAME, pp(avg))
def calculate(): r = redis.Redis() # Default connection settings on localhost follower_ids = list(r.smembers(getRedisIdByScreenName(SCREEN_NAME, 'follower_ids'))) followers = r.mget([getRedisIdByUserId(follower_id, 'info.json') for follower_id in follower_ids]) followers = [json.loads(f) for f in followers if f is not None] freqs = {} for f in followers: cnt = f['followers_count'] if not freqs.has_key(cnt): freqs[cnt] = [] freqs[cnt].append({'screen_name': f['screen_name'], 'user_id': f['id']}) # It could take a few minutes to calculate freqs, so store a snapshot for later use r.set(getRedisIdByScreenName(SCREEN_NAME, 'follower_freqs'), json.dumps(freqs)) keys = freqs.keys() keys.sort() print 'The top 10 followers from the sample:' fields = ['Date', 'Count'] pt = PrettyTable(fields=fields) [pt.set_field_align(f, 'l') for f in fields] for (user, freq) in reversed([(user['screen_name'], k) for k in keys[-10:] for user in freqs[k]]): pt.add_row([user, pp(freq)]) pt.printt() all_freqs = [k for k in keys for user in freqs[k]] avg = reduce(lambda x, y: x + y, all_freqs) / len(all_freqs) print "\nThe average number of followers for %s's followers: %s" \ % (SCREEN_NAME, pp(avg))
def calculate(): r = redis.Redis() # Default connection settings on localhost follower_ids = list(r.smembers(getRedisIdByScreenName(SCREEN_NAME, "follower_ids"))) followers = r.mget([getRedisIdByUserId(follower_id, "info.json") for follower_id in follower_ids]) followers = [json.loads(f) for f in followers if f is not None] freqs = {} for f in followers: cnt = f["followers_count"] if not freqs.has_key(cnt): freqs[cnt] = [] freqs[cnt].append({"screen_name": f["screen_name"], "user_id": f["id"]}) # It could take a few minutes to calculate freqs, so store a snapshot for later use r.set(getRedisIdByScreenName(SCREEN_NAME, "follower_freqs"), json.dumps(freqs)) keys = freqs.keys() keys.sort() print "The top 10 followers from the sample:" field_names = ["Date", "Count"] pt = PrettyTable(field_names=field_names) pt.align = "l" for (user, freq) in reversed([(user["screen_name"], k) for k in keys[-10:] for user in freqs[k]]): pt.add_row([user, pp(freq)]) print pt all_freqs = [k for k in keys for user in freqs[k]] avg = reduce(lambda x, y: x + y, all_freqs) / len(all_freqs) print "\nThe average number of followers for %s's followers: %s" % (SCREEN_NAME, pp(avg))
SCREEN_NAME = 'timoreilly' # XXX: iPython Notebook cannot prompt for input THRESHOLD = 15 # XXX: iPython Notebook cannot prompt for input # Connect using default settings for localhost r = redis.Redis() # Compute screen_names for friends friend_ids = r.smembers(getRedisIdByScreenName(SCREEN_NAME, 'friend_ids')) friend_screen_names = [] for friend_id in friend_ids: try: friend_screen_names.append( json.loads(r.get(getRedisIdByUserId( friend_id, 'info.json')))['screen_name'].lower()) except TypeError, e: continue # not locally available in Redis - look it up or skip it # Pull the list of (entity, frequency) tuples from CouchDB server = couchdb.Server('http://localhost:5984') db = server['tweets-user-timeline-' + SCREEN_NAME] entities_freqs = sorted( [(row.key, row.value) for row in db.view('index/entity_count_by_doc', group=True)], key=lambda x: x[1]) # Keep only user entities with insufficient frequencies
g = nx.Graph() r = redis.Redis() # Compute all ids for nodes appearing in the graph friend_ids = list(r.smembers(getRedisIdByScreenName(SCREEN_NAME, 'friend_ids'))) id_for_screen_name = json.loads(r.get(getRedisIdByScreenName(SCREEN_NAME, 'info.json')))['id'] ids = [id_for_screen_name] + friend_ids for current_id in ids: print >> sys.stderr, 'Processing user with id', current_id try: current_info = json.loads(r.get(getRedisIdByUserId(current_id, 'info.json' ))) current_screen_name = current_info['screen_name'] friend_ids = list(r.smembers(getRedisIdByScreenName(current_screen_name, 'friend_ids'))) # filter out ids for this person if they aren't also SCREEN_NAME's friends too, # which is the basis of the query friend_ids = [fid for fid in friend_ids if fid in ids] except Exception, e: print >> sys.stderr, 'Skipping', current_id for friend_id in friend_ids: try: friend_info = json.loads(r.get(getRedisIdByUserId(friend_id, 'info.json')))
r = redis.Redis() # Compute all ids for nodes appearing in the graph friend_ids = list(r.smembers(getRedisIdByScreenName(SCREEN_NAME, 'friend_ids'))) id_for_screen_name = json.loads( r.get(getRedisIdByScreenName(SCREEN_NAME, 'info.json')))['id'] ids = [id_for_screen_name] + friend_ids for current_id in ids: print >> sys.stderr, 'Processing user with id', current_id try: current_info = json.loads( r.get(getRedisIdByUserId(current_id, 'info.json'))) current_screen_name = current_info['screen_name'] friend_ids = list( r.smembers( getRedisIdByScreenName(current_screen_name, 'friend_ids'))) # filter out ids for this person if they aren't also SCREEN_NAME's friends too, # which is the basis of the query friend_ids = [fid for fid in friend_ids if fid in ids] except Exception, e: print >> sys.stderr, 'Skipping', current_id for friend_id in friend_ids: try: friend_info = json.loads(
from twitter__util import getRedisIdByUserId SCREEN_NAME = sys.argv[1] THRESHOLD = int(sys.argv[2]) # Connect using default settings for localhost r = redis.Redis() # Compute screen_names for friends friend_ids = r.smembers(getRedisIdByScreenName(SCREEN_NAME, 'friend_ids')) friend_screen_names = [] for friend_id in friend_ids: try: friend_screen_names.append(json.loads(r.get(getRedisIdByUserId(friend_id, 'info.json')))['screen_name'].lower()) except TypeError, e: continue # not locally available in Redis - look it up or skip it # Pull the list of (entity, frequency) tuples from CouchDB server = couchdb.Server('http://*****:*****@'
t = login() r = redis.Redis() # get info and friends for central user getUserInfo(t,r,[SCREEN_NAME]) getFriends = functools.partial(_getFriendsOrFollowersUsingFunc, t.friends.ids, 'friend_ids', t, r) getFollowers = functools.partial(_getFriendsOrFollowersUsingFunc, t.followers.ids, 'follower_ids', t, r) # get friends and followers of central user friend_ids = getFriends(SCREEN_NAME) follower_ids = getFollowers(SCREEN_NAME) # do union of friends and followers ids = list(r.sunion(getRedisIdByScreenName(SCREEN_NAME,'friend_ids'), getRedisIdByScreenName(SCREEN_NAME,'follower_ids'))) # get user info for friends and followers getUserInfo(t, r, user_ids=ids) # get friends of friends and followers for user_id in ids: screen_name = json.loads(r.get(getRedisIdByUserId(user_id, 'info.json')))['screen_name'] try: getFriends(screen_name) except: continue
API_KEY = sys.argv[2] API_ENDPOINT = \ 'http://api.infochimps.com/soc/net/tw/strong_links.json?screen_name=%s&apikey=%s' r = redis.Redis() # default connection settings on localhost try: url = API_ENDPOINT % (SCREEN_NAME, API_KEY) response = urllib2.urlopen(url) except urllib2.URLError, e: print 'Failed to fetch ' + url raise e strong_links = json.loads(response.read()) # resolve screen names and print to screen: print "%s's Strong Links" % (SCREEN_NAME, ) print '-' * 30 for sl in strong_links['strong_links']: if sl is None: continue try: user_info = json.loads(r.get(getRedisIdByUserId(sl[0], 'info.json'))) print user_info['screen_name'], sl[1] except Exception, e: print >> sys.stderr, "ERROR: couldn't resolve screen_name for", sl print >> sys.stderr, "Maybe you haven't harvested data for this person yet?"
from twitter__util import getRedisIdByUserId SCREEN_NAME = sys.argv[1] THRESHOLD = int(sys.argv[2]) # Connect using default settings for localhost r = redis.Redis() # Compute screen_names for friends friend_ids = r.smembers(getRedisIdByScreenName(SCREEN_NAME, "friend_ids")) friend_screen_names = [] for friend_id in friend_ids: try: friend_screen_names.append(json.loads(r.get(getRedisIdByUserId(friend_id, "info.json")))["screen_name"]) except TypeError, e: continue # not locally available in Redis - look it up or skip it # Pull the list of (entity, frequency) tuples from CouchDB server = couchdb.Server("http://*****:*****@" and ef[1] >= THRESHOLD]
import sys import redis import json from twitter__util import getRedisIdByScreenName from twitter__util import getRedisIdByUserId EGO = sys.argv[1] r = redis.Redis() normalized_locations = [] friend_ids = list(r.smembers(getRedisIdByScreenName(EGO, 'friend_ids'))) ego_id = json.loads(r.get(getRedisIdByScreenName(EGO, 'info.json')))['id'] ids = [ego_id] + friend_ids for user_id in ids: redis_id = getRedisIdByUserId(str(user_id), 'info.json') location_json = r.get(redis_id) if location_json: location = json.loads(location_json)['location'] if location: normalized_location = location.lower().encode("utf-8") normalized_locations.append(normalized_location) unique_locations = set(normalized_locations) for ul in unique_locations: print ul
graph = nx.Graph() EGO = sys.argv[1] EGO_ID = json.loads(r.get(getRedisIdByScreenName(EGO, 'info.json')))['id'] TEMP_UNION_KEY = 'temp$union' # union EGO's friends and followers r.sunionstore(TEMP_UNION_KEY, [getRedisIdByScreenName(EGO,'friend_ids'), getRedisIdByScreenName(EGO,'follower_ids')]) friend_follower_ids = list(r.smembers(TEMP_UNION_KEY)) # for each friend/follower : for friend_follower_id in friend_follower_ids: # intersect friend/follower's friends with ego's union # NB: the getFriends function used for mining only uses screen_name keys try: screen_name = json.loads(r.get(getRedisIdByUserId(friend_follower_id, 'info.json')))['screen_name'] except: continue intersecting_ids = list(r.sinter(TEMP_UNION_KEY, getRedisIdByScreenName(screen_name, 'friend_ids'))) # add edge between EGO and the current friend/follower graph.add_edge(EGO_ID, friend_follower_id) # add edges between each id in the intersection and the id of the current friend/follower for intersecting_id in intersecting_ids: graph.add_edge(friend_follower_id, intersecting_id) # cleanup r.delete(TEMP_UNION_KEY) # plot the graph plt.figure(1) nx.draw_spring(graph,node_size=65, node_color='#7FA8FF', node_shape='o',edge_color='.1',with_labels=False,width=1.3)
g = nx.Graph() r = redis.Redis() LIST_NAME = "oslo-ids" starting_ids = list(r.smembers(LIST_NAME)) print starting_ids # Compute all ids for nodes appearing in the graph for _id in starting_ids: friend_ids = list(r.smembers(getRedisIdByUserId(_id, 'friend_ids'))) id_for_screen_name = _id ids = [id_for_screen_name] + friend_ids for current_id in ids: print >> sys.stderr, 'Processing user with id', current_id try: raw_current_info = r.get(getRedisIdByUserId(current_id, 'info.json' )) if not raw_current_info: # try to get it one more time print "Making req to Twitter API" os.system('python friends_followers__get_user_info_by_id.py ' + current_id) current_info = json.loads(r.get(getRedisIdByUserId(current_id, 'info.json'