def analyze_users_in_search_results(t, q, max_batches=5, count=100): # Search for something statuses = search(t, q=q, max_batches=max_batches, count=count) # Extract the screen names from the results # and optionally map them to a useful field like the tweet id screen_name_to_tweet_ids = {} screen_name_to_info = {} screen_name_to_location = {} for status in statuses: screen_name = status['user']['screen_name'] if not screen_name_to_tweet_ids.has_key(screen_name): screen_name_to_tweet_ids[screen_name] = [] screen_name_to_tweet_ids[screen_name] += [ status['id'] ] screen_name_to_info[screen_name] = status['user'] screen_name_to_location[screen_name] = status['user']['location'] # Note that the "location" field can # be anything a user has typed in, and may be something like "Everywhere", # "United States" or something else that won't geocode to a specific coordinate # on a map. # Use the various screen_name_to{tweet_ids, info, location} maps to determine # interesting things about the people who appear in the search results. return screen_name_to_info, screen_name_to_location, screen_name_to_tweet_ids
def analyze_users_in_search_results(t, q, max_batches=5, count=100): # Search for something statuses = search(t, q=q, max_batches=max_batches, count=count) # Extract the screen names from the results # and optionally map them to a useful field like the tweet id screen_name_to_tweet_ids = {} screen_name_to_info = {} screen_name_to_location = {} for status in statuses: screen_name = status['user']['screen_name'] if not screen_name_to_tweet_ids.has_key(screen_name): screen_name_to_tweet_ids[screen_name] = [] screen_name_to_tweet_ids[screen_name] += [status['id']] screen_name_to_info[screen_name] = status['user'] screen_name_to_location[screen_name] = status['user']['location'] # Note that the "location" field can # be anything a user has typed in, and may be something like "Everywhere", # "United States" or something else that won't geocode to a specific coordinate # on a map. # Use the various screen_name_to{tweet_ids, info, location} maps to determine # interesting things about the people who appear in the search results. return screen_name_to_info, screen_name_to_location, screen_name_to_tweet_ids
) return g if __name__ == '__main__': # Your query Q = ' '.join(sys.argv[1]) # How many batches of data to grab for the search results MAX_BATCHES = 2 # How many search results per page COUNT = 100 # Get some search results for a query t = oauth_login() search_results = search(t, q=Q, max_batches=MAX_BATCHES, count=COUNT) g = create_rt_graph(search_results) # Print out some stats print >> sys.stderr, "Number nodes:", g.number_of_nodes() print >> sys.stderr, "Num edges:", g.number_of_edges() print >> sys.stderr, "Num connected components:", \ len(nx.connected_components(g.to_undirected())) print >> sys.stderr, "Node degrees:", sorted(nx.degree(g))
) return g if __name__ == "__main__": # Your query Q = " ".join(sys.argv[1]) # How many batches of data to grab for the search results MAX_BATCHES = 2 # How many search results per page COUNT = 100 # Get some search results for a query t = oauth_login() search_results = search(t, q=Q, max_batches=MAX_BATCHES, count=COUNT) g = create_rt_graph(search_results) # Print out some stats print >>sys.stderr, "Number nodes:", g.number_of_nodes() print >>sys.stderr, "Num edges:", g.number_of_edges() print >>sys.stderr, "Num connected components:", len(nx.connected_components(g.to_undirected())) print >>sys.stderr, "Node degrees:", sorted(nx.degree(g))
# Inspect the tweet to see if was produced with /statuses/retweet/:id # See http://dev.twitter.com/doc/post/statuses/retweet/:id if tweet.has_key('retweeted_status'): rt_origins += [ tweet['retweeted_status']['user']['screen_name'].lower() ] # Also, inspect the tweet for the presence of "legacy" retweet # patterns such as "RT" and "via" try: rt_origins += [ mention.strip() for mention in rt_patterns.findall(tweet['text'])[0][1].split() ] except IndexError, e: pass # Filter out any duplicates return list(set([rto.strip("@").lower() for rto in rt_origins])) if __name__ == '__main__': t = oauth_login() tweets = search(t, q='Python', max_batches=1, count=100) for tweet in tweets: print tweet['text'], get_rt_origins(tweet)
# -*- coding: utf-8 -*- import json from recipe__oauth_login import oauth_login from recipe__search import search def get_entities(tweet): return tweet['entities'] if __name__ == '__main__': t = oauth_login() tweets = search(t, "Python", max_batches=1, count=10) # Use "Python" as a sample query to get some tweets to process entities = [ get_entities(tweet) for tweet in tweets ] print json.dumps(entities, indent=1)
# See http://dev.twitter.com/doc/post/statuses/retweet/:id if tweet.has_key('retweeted_status'): rt_origins += [ tweet['retweeted_status']['user']['screen_name'].lower() ] # Also, inspect the tweet for the presence of "legacy" retweet # patterns such as "RT" and "via" try: rt_origins += [ mention.strip() for mention in rt_patterns.findall(tweet['text'])[0][1].split() ] except IndexError, e: pass # Filter out any duplicates return list(set([rto.strip("@").lower() for rto in rt_origins])) if __name__ == '__main__': t = oauth_login() tweets = search(t, q='RKP1107', max_batches=1, count=100) for tweet in tweets: print tweet['text'], get_rt_origins(tweet)
# Extract geo coordinates from search results import sys import twitter from recipe__oauth_login import oauth_login from recipe__search import search Q = ' '.join(sys.argv[1:]) t = oauth_login() statuses = search(t, q=Q, max_batches=10, count=100) # Extract geocoordinates from tweets in search results coords = [ status['geo'] for status in statuses if status['geo'] is not None ] print coords
# Extract geo coordinates from search results import sys import twitter from recipe__oauth_login import oauth_login from recipe__search import search Q = ' '.join(sys.argv[1:]) t = oauth_login() statuses = search(t, q=Q, max_batches=10, count=100) # Extract geocoordinates from tweets in search results coords = [status['geo'] for status in statuses if status['geo'] is not None] print coords
# See http://dev.twitter.com/doc/post/statuses/retweet/:id if tweet.has_key('retweeted_status'): rt_origins += [ tweet['retweeted_status']['user']['screen_name'].lower() ] # Also, inspect the tweet for the presence of "legacy" retweet # patterns such as "RT" and "via" try: rt_origins += [ mention.strip() for mention in rt_patterns.findall(tweet['text'])[0][1].split() ] except IndexError, e: pass # Filter out any duplicates return list(set([rto.strip("@").lower() for rto in rt_origins])) if __name__ == '__main__': t = oauth_login() tweets = search(t, q='Python', max_batches=1, count=100) for tweet in tweets: print tweet['text'], get_rt_origins(tweet)