Exemplo n.º 1
0
"""
Script makes figure of retweet propotion in the last week from the Hillary Clinton 2016 collection.

@jonathanronen 2015/6
"""

import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from smapp_toolkit.twitter import MongoTweetCollection

plt.ion()

col = MongoTweetCollection('smapp.politics.fas.nyu.edu', 27011, 'smapp_readOnly', 'PASSWORD', 'Sean_Hungary')
plt.figure(figsize=(12,12))
col.since(datetime(2015,6,10)).until(datetime(2015,6,11)).tweets_with_hashtags_figure(group_by='hours', xtick_format='%H', show=False)
plt.title('Tweets with hashtags volume for a day in the Hungary collection')
plt.savefig('hungary_hts.png')
and where the user's language is russian or ukrainian,
and saves them to a CSV file called ukraine_data.csv.


"""

from datetime import datetime
from smapp_toolkit.twitter import MongoTweetCollection

collection = MongoTweetCollection(address='ACTUAL DB ADDRESS',
                                  port=27011,
                                  username='******',
                                  password='******',
                                  dbname='Ukraine')

columns = [
        'id_str',
        'timestamp',
        'coordinates.coordinates.0',
        'coordinates.coordinates.1',
        'user.id_str',
        'user.lang',
        'lang',
        'text'
        ]

collection.since(datetime(2013,12,1)) \
               .until(datetime(2013,12,2)) \
               .user_location_containing('ukraine', 'kiev', 'kyiv', 'kiew') \
               .user_lang_containing('uk', 'ru') \
               .dump_csv('ukraine_data.csv', columns=columns)
Exemplo n.º 3
0
"""
Script makes figure of retweet propotion in the last week from the Hillary Clinton 2016 collection.

@jonathanronen 2015/6
"""

import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from smapp_toolkit.twitter import MongoTweetCollection

plt.ion()

col = MongoTweetCollection('smapp.politics.fas.nyu.edu', 27011,
                           'smapp_readOnly', 'PASSWORD', 'Sean_Hungary')
plt.figure(figsize=(12, 12))
col.since(datetime(2015, 6, 10)).until(datetime(
    2015, 6, 11)).tweets_with_hashtags_figure(group_by='hours',
                                              xtick_format='%H',
                                              show=False)
plt.title('Tweets with hashtags volume for a day in the Hungary collection')
plt.savefig('hungary_hts.png')
Exemplo n.º 4
0
"""
Script makes figure of retweet propotion in the last week from the Hillary Clinton 2016 collection.

@jonathanronen 2015/6
"""

import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from smapp_toolkit.twitter import MongoTweetCollection

plt.ion()

col = MongoTweetCollection('smapp.politics.fas.nyu.edu', 27011,
                           'smapp_readOnly', 'PASSWORD',
                           'USElection2016_Hillary')
plt.figure(12, 12)
col.since(datetime.utcnow() -
          timedelta(days=7)).tweets_retweets_figure(show=False)
plt.title('Tweets and RT volume from Hillary 2016 collection')
plt.savefig('hillary_rts.png')
"""
This example script finds all tweets sent by users who've set their location to ukraine or kiev,
and where the user's language is russian or ukrainian,
and saves them to a CSV file called ukraine_data.csv.


"""

from datetime import datetime
from smapp_toolkit.twitter import MongoTweetCollection

collection = MongoTweetCollection(address='ACTUAL DB ADDRESS',
                                  port=27011,
                                  username='******',
                                  password='******',
                                  dbname='Ukraine')

columns = [
    'id_str', 'timestamp', 'coordinates.coordinates.0',
    'coordinates.coordinates.1', 'user.id_str', 'user.lang', 'lang', 'text'
]

collection.since(datetime(2013,12,1)) \
               .until(datetime(2013,12,2)) \
               .user_location_containing('ukraine', 'kiev', 'kyiv', 'kiew') \
               .user_lang_containing('uk', 'ru') \
               .dump_csv('ukraine_data.csv', columns=columns)
Exemplo n.º 6
0
    print("Days will be split according to time zone {}".format(args.timezone))

    today = datetime.now().replace(hour=0,
                                   minute=0,
                                   second=0,
                                   microsecond=0,
                                   tzinfo=TIMEZONE)
    n_days_ago = today - timedelta(days=args.days)
    print("The period being considered is {} to {}".format(
        n_days_ago.strftime('%Y-%m-%d'), today.strftime('%Y-%m-%d')))

    print("Connecting to database")
    collection = MongoTweetCollection(args.server, args.port, args.user,
                                      args.password, args.database)

    ntweets = collection.since(n_days_ago).until(today).count()
    print("Considering {} tweets".format(ntweets))

    userids = set()
    counts = dict()
    for i in range(args.days):
        day_counts = defaultdict(lambda: 0)
        day_start = n_days_ago + i * timedelta(days=1)
        day_end = n_days_ago + (i + 1) * timedelta(days=1)
        print("Counting for {}".format(day_start.strftime('%Y-%m-%d')))
        for tweet in collection.since(day_start).until(day_end):
            day_counts[tweet['user']['id']] += 1
            userids.add(tweet['user']['id'])
        counts[day_start] = day_counts
    print("Done getting data from database.")
import pytz
import getpass
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from smapp_toolkit.twitter import MongoTweetCollection
from smapp_toolkit.plotting import line_with_annotations

# Connect to database
print("Enter password for 'smapp_readOnly'")
col = MongoTweetCollection('smapp.politics.fas.nyu.edu', 27011, 'smapp_readOnly', getpass.getpass(), 'USElection2016_DTrumps')

# Set start time to a full day in the New York time zone
start_time = datetime(2015,6,21).replace(tzinfo=pytz.timezone('America/New_York')).astimezone(pytz.UTC).replace(tzinfo=None)
end_time = start_time + timedelta(days=1)

# Get the tweets per day data from the database
data = col.since(start_time).until(end_time).group_by('hours').count()

# Define the events to plot horizontal lines for
events = [
      (datetime(2015,6,21,10), 'Sunrise', 'top'),
      (datetime(2015,6,21,22), 'Sunset', 'bottom')
    ]

# Make plot
plt.figure(figsize=(10,6))
line_with_annotations(data, events, x_tick_timezone='America/New_York', x_tick_date_format='%H:%M')
plt.title('Tweets mentioning Donald Trump\non 2015-6-21', fontsize=24)
plt.tight_layout()
plt.savefig('a_day_in_a_life.png')
Exemplo n.º 8
0
"""
Script makes figure of retweet propotion in the last week from the Hillary Clinton 2016 collection.

@jonathanronen 2015/6
"""

import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from smapp_toolkit.twitter import MongoTweetCollection

plt.ion()

col = MongoTweetCollection('smapp.politics.fas.nyu.edu', 27011, 'smapp_readOnly', 'PASSWORD', 'USElection2016_Hillary')
plt.figure(12,12)
col.since(datetime.utcnow()-timedelta(days=7)).tweets_retweets_figure(show=False)
plt.title('Tweets and RT volume from Hillary 2016 collection')
plt.savefig('hillary_rts.png')
    args = parser.parse_args()
    print("Generating avg tweets/user/day histogram for {}".format(args.database))

    TIMEZONE = pytz.timezone(args.timezone)
    print("Days will be split according to time zone {}".format(args.timezone))

    today = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0, tzinfo=TIMEZONE)
    n_days_ago = today - timedelta(days=args.days)
    print("The period being considered is {} to {}".format(
        n_days_ago.strftime('%Y-%m-%d'),
        today.strftime('%Y-%m-%d')))

    print("Connecting to database")
    collection = MongoTweetCollection(args.server, args.port, args.user, args.password, args.database)

    ntweets = collection.since(n_days_ago).until(today).count()
    print("Considering {} tweets".format(ntweets))

    userids = set()
    counts = dict()
    for i in range(args.days):
        day_counts = defaultdict(lambda: 0)
        day_start = n_days_ago + i*timedelta(days=1)
        day_end   = n_days_ago + (i+1)*timedelta(days=1)
        print("Counting for {}".format(day_start.strftime('%Y-%m-%d')))
        for tweet in collection.since(day_start).until(day_end):
            day_counts[tweet['user']['id']] += 1
            userids.add(tweet['user']['id'])
        counts[day_start] = day_counts
    print("Done getting data from database.")