""" Script makes figure of retweet propotion in the last week from the Hillary Clinton 2016 collection. @jonathanronen 2015/6 """ import matplotlib.pyplot as plt from datetime import datetime, timedelta from smapp_toolkit.twitter import MongoTweetCollection plt.ion() col = MongoTweetCollection('smapp.politics.fas.nyu.edu', 27011, 'smapp_readOnly', 'PASSWORD', 'Sean_Hungary') plt.figure(figsize=(12,12)) col.since(datetime(2015,6,10)).until(datetime(2015,6,11)).tweets_with_hashtags_figure(group_by='hours', xtick_format='%H', show=False) plt.title('Tweets with hashtags volume for a day in the Hungary collection') plt.savefig('hungary_hts.png')
and where the user's language is russian or ukrainian, and saves them to a CSV file called ukraine_data.csv. """ from datetime import datetime from smapp_toolkit.twitter import MongoTweetCollection collection = MongoTweetCollection(address='ACTUAL DB ADDRESS', port=27011, username='******', password='******', dbname='Ukraine') columns = [ 'id_str', 'timestamp', 'coordinates.coordinates.0', 'coordinates.coordinates.1', 'user.id_str', 'user.lang', 'lang', 'text' ] collection.since(datetime(2013,12,1)) \ .until(datetime(2013,12,2)) \ .user_location_containing('ukraine', 'kiev', 'kyiv', 'kiew') \ .user_lang_containing('uk', 'ru') \ .dump_csv('ukraine_data.csv', columns=columns)
""" Script makes figure of retweet propotion in the last week from the Hillary Clinton 2016 collection. @jonathanronen 2015/6 """ import matplotlib.pyplot as plt from datetime import datetime, timedelta from smapp_toolkit.twitter import MongoTweetCollection plt.ion() col = MongoTweetCollection('smapp.politics.fas.nyu.edu', 27011, 'smapp_readOnly', 'PASSWORD', 'Sean_Hungary') plt.figure(figsize=(12, 12)) col.since(datetime(2015, 6, 10)).until(datetime( 2015, 6, 11)).tweets_with_hashtags_figure(group_by='hours', xtick_format='%H', show=False) plt.title('Tweets with hashtags volume for a day in the Hungary collection') plt.savefig('hungary_hts.png')
""" Script makes figure of retweet propotion in the last week from the Hillary Clinton 2016 collection. @jonathanronen 2015/6 """ import matplotlib.pyplot as plt from datetime import datetime, timedelta from smapp_toolkit.twitter import MongoTweetCollection plt.ion() col = MongoTweetCollection('smapp.politics.fas.nyu.edu', 27011, 'smapp_readOnly', 'PASSWORD', 'USElection2016_Hillary') plt.figure(12, 12) col.since(datetime.utcnow() - timedelta(days=7)).tweets_retweets_figure(show=False) plt.title('Tweets and RT volume from Hillary 2016 collection') plt.savefig('hillary_rts.png')
""" This example script finds all tweets sent by users who've set their location to ukraine or kiev, and where the user's language is russian or ukrainian, and saves them to a CSV file called ukraine_data.csv. """ from datetime import datetime from smapp_toolkit.twitter import MongoTweetCollection collection = MongoTweetCollection(address='ACTUAL DB ADDRESS', port=27011, username='******', password='******', dbname='Ukraine') columns = [ 'id_str', 'timestamp', 'coordinates.coordinates.0', 'coordinates.coordinates.1', 'user.id_str', 'user.lang', 'lang', 'text' ] collection.since(datetime(2013,12,1)) \ .until(datetime(2013,12,2)) \ .user_location_containing('ukraine', 'kiev', 'kyiv', 'kiew') \ .user_lang_containing('uk', 'ru') \ .dump_csv('ukraine_data.csv', columns=columns)
print("Days will be split according to time zone {}".format(args.timezone)) today = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0, tzinfo=TIMEZONE) n_days_ago = today - timedelta(days=args.days) print("The period being considered is {} to {}".format( n_days_ago.strftime('%Y-%m-%d'), today.strftime('%Y-%m-%d'))) print("Connecting to database") collection = MongoTweetCollection(args.server, args.port, args.user, args.password, args.database) ntweets = collection.since(n_days_ago).until(today).count() print("Considering {} tweets".format(ntweets)) userids = set() counts = dict() for i in range(args.days): day_counts = defaultdict(lambda: 0) day_start = n_days_ago + i * timedelta(days=1) day_end = n_days_ago + (i + 1) * timedelta(days=1) print("Counting for {}".format(day_start.strftime('%Y-%m-%d'))) for tweet in collection.since(day_start).until(day_end): day_counts[tweet['user']['id']] += 1 userids.add(tweet['user']['id']) counts[day_start] = day_counts print("Done getting data from database.")
import pytz import getpass import matplotlib.pyplot as plt from datetime import datetime, timedelta from smapp_toolkit.twitter import MongoTweetCollection from smapp_toolkit.plotting import line_with_annotations # Connect to database print("Enter password for 'smapp_readOnly'") col = MongoTweetCollection('smapp.politics.fas.nyu.edu', 27011, 'smapp_readOnly', getpass.getpass(), 'USElection2016_DTrumps') # Set start time to a full day in the New York time zone start_time = datetime(2015,6,21).replace(tzinfo=pytz.timezone('America/New_York')).astimezone(pytz.UTC).replace(tzinfo=None) end_time = start_time + timedelta(days=1) # Get the tweets per day data from the database data = col.since(start_time).until(end_time).group_by('hours').count() # Define the events to plot horizontal lines for events = [ (datetime(2015,6,21,10), 'Sunrise', 'top'), (datetime(2015,6,21,22), 'Sunset', 'bottom') ] # Make plot plt.figure(figsize=(10,6)) line_with_annotations(data, events, x_tick_timezone='America/New_York', x_tick_date_format='%H:%M') plt.title('Tweets mentioning Donald Trump\non 2015-6-21', fontsize=24) plt.tight_layout() plt.savefig('a_day_in_a_life.png')
""" Script makes figure of retweet propotion in the last week from the Hillary Clinton 2016 collection. @jonathanronen 2015/6 """ import matplotlib.pyplot as plt from datetime import datetime, timedelta from smapp_toolkit.twitter import MongoTweetCollection plt.ion() col = MongoTweetCollection('smapp.politics.fas.nyu.edu', 27011, 'smapp_readOnly', 'PASSWORD', 'USElection2016_Hillary') plt.figure(12,12) col.since(datetime.utcnow()-timedelta(days=7)).tweets_retweets_figure(show=False) plt.title('Tweets and RT volume from Hillary 2016 collection') plt.savefig('hillary_rts.png')
args = parser.parse_args() print("Generating avg tweets/user/day histogram for {}".format(args.database)) TIMEZONE = pytz.timezone(args.timezone) print("Days will be split according to time zone {}".format(args.timezone)) today = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0, tzinfo=TIMEZONE) n_days_ago = today - timedelta(days=args.days) print("The period being considered is {} to {}".format( n_days_ago.strftime('%Y-%m-%d'), today.strftime('%Y-%m-%d'))) print("Connecting to database") collection = MongoTweetCollection(args.server, args.port, args.user, args.password, args.database) ntweets = collection.since(n_days_ago).until(today).count() print("Considering {} tweets".format(ntweets)) userids = set() counts = dict() for i in range(args.days): day_counts = defaultdict(lambda: 0) day_start = n_days_ago + i*timedelta(days=1) day_end = n_days_ago + (i+1)*timedelta(days=1) print("Counting for {}".format(day_start.strftime('%Y-%m-%d'))) for tweet in collection.since(day_start).until(day_end): day_counts[tweet['user']['id']] += 1 userids.add(tweet['user']['id']) counts[day_start] = day_counts print("Done getting data from database.")