""" Script makes figure of retweet propotion in the last week from the Hillary Clinton 2016 collection. @jonathanronen 2015/6 """ import matplotlib.pyplot as plt from datetime import datetime, timedelta from smapp_toolkit.twitter import MongoTweetCollection plt.ion() col = MongoTweetCollection('smapp.politics.fas.nyu.edu', 27011, 'smapp_readOnly', 'PASSWORD', 'USElection2016_Hillary') plt.figure(12, 12) col.since(datetime.utcnow() - timedelta(days=7)).tweets_retweets_figure(show=False) plt.title('Tweets and RT volume from Hillary 2016 collection') plt.savefig('hillary_rts.png')
""" Script makes figure of retweet propotion in the last week from the Hillary Clinton 2016 collection. @jonathanronen 2015/6 """ import matplotlib.pyplot as plt from datetime import datetime, timedelta from smapp_toolkit.twitter import MongoTweetCollection plt.ion() col = MongoTweetCollection('smapp.politics.fas.nyu.edu', 27011, 'smapp_readOnly', 'PASSWORD', 'Sean_Hungary') plt.figure(figsize=(12,12)) col.since(datetime(2015,6,10)).until(datetime(2015,6,11)).tweets_with_hashtags_figure(group_by='hours', xtick_format='%H', show=False) plt.title('Tweets with hashtags volume for a day in the Hungary collection') plt.savefig('hungary_hts.png')
""" This script counts all tweets from the Ukraine collection sent in the past 12 hours where the user's specified 'location' field has one of the following words in it: - Kiev - Kyiv - Kiew """ from smapp_toolkit.twitter import MongoTweetCollection from datetime import datetime, timedelta collection = MongoTweetCollection(address='WRITE REAL DATABASE ADDRESS HERE', port=27011, username='******', password='******', dbname='Ukraine') twelve_hours_ago = datetime.utcnow() - timedelta(hours=12) print "Matched {} tweets.".format(collection.user_location_containing('kiev', 'kiew', 'kyiv').since(twelve_hours_ago).count())
""" Script makes figure of retweet propotion in the last week from the Hillary Clinton 2016 collection. @jonathanronen 2015/6 """ import matplotlib.pyplot as plt from datetime import datetime, timedelta from smapp_toolkit.twitter import MongoTweetCollection plt.ion() col = MongoTweetCollection('smapp.politics.fas.nyu.edu', 27011, 'smapp_readOnly', 'PASSWORD', 'Sean_Hungary') plt.figure(figsize=(12, 12)) col.since(datetime(2015, 6, 10)).until(datetime( 2015, 6, 11)).tweets_with_hashtags_figure(group_by='hours', xtick_format='%H', show=False) plt.title('Tweets with hashtags volume for a day in the Hungary collection') plt.savefig('hungary_hts.png')
""" This script shows how you can use smapp-toolkit to plot tweets languages by time unit. For the purpose of demonstration, we'll plot english and spanish tweets about Ebola by minute for one hour on December 3 2014. @jonathanronen """ from datetime import datetime, timedelta from smapp_toolkit.twitter import MongoTweetCollection start_time = datetime(2014, 12, 3, 20) collection = MongoTweetCollection( address='WRITE REAL DATABASE ADDRESS HERE', port=27011, username='******', password='******', dbname='Ebola' ) collection.languages_per_day_figure( start=start_time, step_size=timedelta(minutes=1), num_steps=60, languages=['en', 'es', 'other'], language_colors=['red', 'royalblue', 'grey'])
""" Script demonstrates the "geolocation_names_per_day_figure" and "user_locations_per_day_figure" functionality. @jonathanronen 2015/6 """ from datetime import datetime, timedelta from smapp_toolkit.twitter import MongoTweetCollection import matplotlib.pyplot as plt plt.ion() col = MongoTweetCollection('smapp.politics.fas.nyu.edu', 27011, 'smapp_readOnly', 'PASSWORD', 'Britain_Geo') plt.figure(figsize=(10,6)) col.geolocation_names_per_day_figure(start=datetime(2015,1,12,17,34), step_size=timedelta(minutes=10), num_steps=6, n_names=5, xtick_format='%H:%M') plt.savefig('geolocation_names.png') plt.figure(figsize=(10,6)) col.user_locations_per_day_figure(start=datetime(2015,1,12,17,34), step_size=timedelta(minutes=10), num_steps=6, n_names=8, xtick_format='%H:%M') plt.savefig('user_locations.png')
You are invited to plot this for the "EricGarner" collection, which will show the enormous volume of tweets using #ericgarner following that second no-indictment decision. @jonathanronen """ from datetime import datetime, timedelta from smapp_toolkit.twitter import MongoTweetCollection from matplotlib import pyplot as plt start_time = datetime(2014, 12, 3, 20) collection = MongoTweetCollection( address='REAL SERVER', port=27011, username='******', password='******', dbname='IfTheyGunnedMeDown' ) events = [ (19, 'No indictment for Darren Wilson', 'bottom'), # nov 24 (28, 'No indictment for Daniel Pantaleo', 'top'), # dec 3 ] collection.tweets_per_day_with_annotations_figure( start=datetime(2014,11,5), num_steps=31, step_size=timedelta(days=1), alpha=.4, line_width=2.0,
""" This script shows how you can use smapp-toolkit to plot tweets languages by time unit. For the purpose of demonstration, we'll plot english and spanish tweets about Ebola by minute for one hour on December 3 2014. @jonathanronen """ from datetime import datetime, timedelta from smapp_toolkit.twitter import MongoTweetCollection start_time = datetime(2014, 12, 3, 20) collection = MongoTweetCollection(address='WRITE REAL DATABASE ADDRESS HERE', port=27011, username='******', password='******', dbname='Ebola') collection.languages_per_day_figure( start=start_time, step_size=timedelta(minutes=1), num_steps=60, languages=['en', 'es', 'other'], language_colors=['red', 'royalblue', 'grey'])
""" This example script finds all tweets sent by users who've set their location to ukraine or kiev, and where the user's language is russian or ukrainian, and saves them to a CSV file called ukraine_data.csv. """ from datetime import datetime from smapp_toolkit.twitter import MongoTweetCollection collection = MongoTweetCollection(address='ACTUAL DB ADDRESS', port=27011, username='******', password='******', dbname='Ukraine') columns = [ 'id_str', 'timestamp', 'coordinates.coordinates.0', 'coordinates.coordinates.1', 'user.id_str', 'user.lang', 'lang', 'text' ] collection.since(datetime(2013,12,1)) \ .until(datetime(2013,12,2)) \ .user_location_containing('ukraine', 'kiev', 'kyiv', 'kiew') \ .user_lang_containing('uk', 'ru') \ .dump_csv('ukraine_data.csv', columns=columns)
""" Script demonstrates the "geolocation_names_per_day_figure" and "user_locations_per_day_figure" functionality. @jonathanronen 2015/6 """ from datetime import datetime, timedelta from smapp_toolkit.twitter import MongoTweetCollection import matplotlib.pyplot as plt plt.ion() col = MongoTweetCollection('smapp.politics.fas.nyu.edu', 27011, 'smapp_readOnly', 'PASSWORD', 'Britain_Geo') plt.figure(figsize=(10, 6)) col.geolocation_names_per_day_figure(start=datetime(2015, 1, 12, 17, 34), step_size=timedelta(minutes=10), num_steps=6, n_names=5, xtick_format='%H:%M') plt.savefig('geolocation_names.png') plt.figure(figsize=(10, 6)) col.user_locations_per_day_figure(start=datetime(2015, 1, 12, 17, 34), step_size=timedelta(minutes=10), num_steps=6, n_names=8, xtick_format='%H:%M') plt.savefig('user_locations.png')
args.database)) TIMEZONE = pytz.timezone(args.timezone) print("Days will be split according to time zone {}".format(args.timezone)) today = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0, tzinfo=TIMEZONE) n_days_ago = today - timedelta(days=args.days) print("The period being considered is {} to {}".format( n_days_ago.strftime('%Y-%m-%d'), today.strftime('%Y-%m-%d'))) print("Connecting to database") collection = MongoTweetCollection(args.server, args.port, args.user, args.password, args.database) ntweets = collection.since(n_days_ago).until(today).count() print("Considering {} tweets".format(ntweets)) userids = set() counts = dict() for i in range(args.days): day_counts = defaultdict(lambda: 0) day_start = n_days_ago + i * timedelta(days=1) day_end = n_days_ago + (i + 1) * timedelta(days=1) print("Counting for {}".format(day_start.strftime('%Y-%m-%d'))) for tweet in collection.since(day_start).until(day_end): day_counts[tweet['user']['id']] += 1 userids.add(tweet['user']['id']) counts[day_start] = day_counts
Demonstrate how to plot tweets per hour with annotation lines, using the "data-then-plot" framework (the smapp_toolkit.plotting module) @jonathanronen 2015/6 """ import pytz import getpass import matplotlib.pyplot as plt from datetime import datetime, timedelta from smapp_toolkit.twitter import MongoTweetCollection from smapp_toolkit.plotting import line_with_annotations # Connect to database print("Enter password for 'smapp_readOnly'") col = MongoTweetCollection('smapp.politics.fas.nyu.edu', 27011, 'smapp_readOnly', getpass.getpass(), 'USElection2016_DTrumps') # Set start time to a full day in the New York time zone start_time = datetime(2015,6,21).replace(tzinfo=pytz.timezone('America/New_York')).astimezone(pytz.UTC).replace(tzinfo=None) end_time = start_time + timedelta(days=1) # Get the tweets per day data from the database data = col.since(start_time).until(end_time).group_by('hours').count() # Define the events to plot horizontal lines for events = [ (datetime(2015,6,21,10), 'Sunrise', 'top'), (datetime(2015,6,21,22), 'Sunset', 'bottom') ] # Make plot
""" Script makes figure of retweet propotion in the last week from the Hillary Clinton 2016 collection. @jonathanronen 2015/6 """ import matplotlib.pyplot as plt from datetime import datetime, timedelta from smapp_toolkit.twitter import MongoTweetCollection plt.ion() col = MongoTweetCollection('smapp.politics.fas.nyu.edu', 27011, 'smapp_readOnly', 'PASSWORD', 'USElection2016_Hillary') plt.figure(12,12) col.since(datetime.utcnow()-timedelta(days=7)).tweets_retweets_figure(show=False) plt.title('Tweets and RT volume from Hillary 2016 collection') plt.savefig('hillary_rts.png')
one of total tweets per minute for 1 hour on november 1st, the other of only tweets containing the word "death" in that same hour. @jonathanronen """ import matplotlib.pyplot as plt from datetime import datetime, timedelta from smapp_toolkit.twitter import MongoTweetCollection start_time = datetime(2014, 12, 3, 20) collection = MongoTweetCollection( address='REAL SERVER HERE', port=27011, username='******', password='******', dbname='Ebola' ) start_time = datetime(2014,11,1) plt.figure() plt.subplot(211) bins, counts = collection.tweets_over_time_figure( start_time, step_size=timedelta(minutes=1), num_steps=60, show=False) plt.title('All tweets')
Script demonstrates plotting two histograms from the Ebola collection: one of total tweets per minute for 1 hour on november 1st, the other of only tweets containing the word "death" in that same hour. @jonathanronen """ import matplotlib.pyplot as plt from datetime import datetime, timedelta from smapp_toolkit.twitter import MongoTweetCollection start_time = datetime(2014, 12, 3, 20) collection = MongoTweetCollection(address='REAL SERVER HERE', port=27011, username='******', password='******', dbname='Ebola') start_time = datetime(2014, 11, 1) plt.figure() plt.subplot(211) bins, counts = collection.tweets_over_time_figure( start_time, step_size=timedelta(minutes=1), num_steps=60, show=False) plt.title('All tweets') plt.subplot(212) bins, counts = collection.containing('death').tweets_over_time_figure( start_time, step_size=timedelta(minutes=1), num_steps=60, show=False) plt.title('Tweets containing "death"')
""" This example script finds all tweets sent by users who've set their location to ukraine or kiev, and where the user's language is russian or ukrainian, and saves them to a CSV file called ukraine_data.csv. """ from datetime import datetime from smapp_toolkit.twitter import MongoTweetCollection collection = MongoTweetCollection(address='ACTUAL DB ADDRESS', port=27011, username='******', password='******', dbname='Ukraine') columns = [ 'id_str', 'timestamp', 'coordinates.coordinates.0', 'coordinates.coordinates.1', 'user.id_str', 'user.lang', 'lang', 'text' ] collection.since(datetime(2013,12,1)) \ .until(datetime(2013,12,2)) \ .user_location_containing('ukraine', 'kiev', 'kyiv', 'kiew') \
parser.add_argument('--output-file', default='histogram.png', help='Output file [histogram.png]') args = parser.parse_args() print("Generating avg tweets/user/day histogram for {}".format(args.database)) TIMEZONE = pytz.timezone(args.timezone) print("Days will be split according to time zone {}".format(args.timezone)) today = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0, tzinfo=TIMEZONE) n_days_ago = today - timedelta(days=args.days) print("The period being considered is {} to {}".format( n_days_ago.strftime('%Y-%m-%d'), today.strftime('%Y-%m-%d'))) print("Connecting to database") collection = MongoTweetCollection(args.server, args.port, args.user, args.password, args.database) ntweets = collection.since(n_days_ago).until(today).count() print("Considering {} tweets".format(ntweets)) userids = set() counts = dict() for i in range(args.days): day_counts = defaultdict(lambda: 0) day_start = n_days_ago + i*timedelta(days=1) day_end = n_days_ago + (i+1)*timedelta(days=1) print("Counting for {}".format(day_start.strftime('%Y-%m-%d'))) for tweet in collection.since(day_start).until(day_end): day_counts[tweet['user']['id']] += 1 userids.add(tweet['user']['id']) counts[day_start] = day_counts