Python MongoTweetCollection示例，smapp_toolkit.twitter.MongoTweetCollection Python示例

示例#1

0

显示文件

文件： tweets_retweets.py 项目： kbenoit/smapp-toolkit

"""
Script makes figure of retweet propotion in the last week from the Hillary Clinton 2016 collection.

@jonathanronen 2015/6
"""

import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from smapp_toolkit.twitter import MongoTweetCollection

plt.ion()

col = MongoTweetCollection('smapp.politics.fas.nyu.edu', 27011,
                           'smapp_readOnly', 'PASSWORD',
                           'USElection2016_Hillary')
plt.figure(12, 12)
col.since(datetime.utcnow() -
          timedelta(days=7)).tweets_retweets_figure(show=False)
plt.title('Tweets and RT volume from Hillary 2016 collection')
plt.savefig('hillary_rts.png')

示例#2

0

显示文件

文件： tweets_hashtags.py 项目： IWhisper/smapp-toolkit

"""
Script makes figure of retweet propotion in the last week from the Hillary Clinton 2016 collection.

@jonathanronen 2015/6
"""

import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from smapp_toolkit.twitter import MongoTweetCollection

plt.ion()

col = MongoTweetCollection('smapp.politics.fas.nyu.edu', 27011, 'smapp_readOnly', 'PASSWORD', 'Sean_Hungary')
plt.figure(figsize=(12,12))
col.since(datetime(2015,6,10)).until(datetime(2015,6,11)).tweets_with_hashtags_figure(group_by='hours', xtick_format='%H', show=False)
plt.title('Tweets with hashtags volume for a day in the Hungary collection')
plt.savefig('hungary_hts.png')

示例#3

0

显示文件

文件： count_tweets_from_specific_user_location.py 项目： kbenoit/smapp-toolkit

"""
This script counts all tweets from the Ukraine collection sent in the past 12 hours where the user's
specified 'location' field has one of the following words in it:
 - Kiev
 - Kyiv
 - Kiew
"""

from smapp_toolkit.twitter import MongoTweetCollection
from datetime import datetime, timedelta

collection = MongoTweetCollection(address='WRITE REAL DATABASE ADDRESS HERE',
                                  port=27011,
                                  username='******',
                                  password='******',
                                  dbname='Ukraine')

twelve_hours_ago = datetime.utcnow() - timedelta(hours=12)

print "Matched {} tweets.".format(collection.user_location_containing('kiev', 'kiew', 'kyiv').since(twelve_hours_ago).count())

示例#4

0

显示文件

文件： tweets_hashtags.py 项目： kbenoit/smapp-toolkit

"""
Script makes figure of retweet propotion in the last week from the Hillary Clinton 2016 collection.

@jonathanronen 2015/6
"""

import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from smapp_toolkit.twitter import MongoTweetCollection

plt.ion()

col = MongoTweetCollection('smapp.politics.fas.nyu.edu', 27011,
                           'smapp_readOnly', 'PASSWORD', 'Sean_Hungary')
plt.figure(figsize=(12, 12))
col.since(datetime(2015, 6, 10)).until(datetime(
    2015, 6, 11)).tweets_with_hashtags_figure(group_by='hours',
                                              xtick_format='%H',
                                              show=False)
plt.title('Tweets with hashtags volume for a day in the Hungary collection')
plt.savefig('hungary_hts.png')

示例#5

0

显示文件

文件： plot_tweet_languages_per_timeunit.py 项目： IWhisper/smapp-toolkit

"""
This script shows how you can use smapp-toolkit to plot tweets languages by time unit.
For the purpose of demonstration, we'll plot english and spanish tweets about Ebola by minute for one hour
on December 3 2014.

@jonathanronen
"""

from datetime import datetime, timedelta
from smapp_toolkit.twitter import MongoTweetCollection

start_time = datetime(2014, 12, 3, 20)

collection = MongoTweetCollection(
    address='WRITE REAL DATABASE ADDRESS HERE',
    port=27011,
    username='******',
    password='******',
    dbname='Ebola'
)


collection.languages_per_day_figure(
    start=start_time,
    step_size=timedelta(minutes=1),
    num_steps=60,
    languages=['en', 'es', 'other'],
    language_colors=['red', 'royalblue', 'grey'])

示例#6

0

显示文件

文件： locations_by_timeunit.py 项目： IWhisper/smapp-toolkit

"""
Script demonstrates the "geolocation_names_per_day_figure" and "user_locations_per_day_figure" functionality.

@jonathanronen 2015/6
"""

from datetime import datetime, timedelta
from smapp_toolkit.twitter import MongoTweetCollection
import matplotlib.pyplot as plt

plt.ion()

col = MongoTweetCollection('smapp.politics.fas.nyu.edu', 27011, 'smapp_readOnly', 'PASSWORD', 'Britain_Geo')

plt.figure(figsize=(10,6))
col.geolocation_names_per_day_figure(start=datetime(2015,1,12,17,34), step_size=timedelta(minutes=10), num_steps=6, n_names=5, xtick_format='%H:%M')
plt.savefig('geolocation_names.png')

plt.figure(figsize=(10,6))
col.user_locations_per_day_figure(start=datetime(2015,1,12,17,34), step_size=timedelta(minutes=10), num_steps=6, n_names=8, xtick_format='%H:%M')
plt.savefig('user_locations.png')

示例#7

0

显示文件

文件： plot_tweets_per_day_with_annotations.py 项目： kbenoit/smapp-toolkit

You are invited to plot this for the "EricGarner" collection, which will show the enormous
volume of tweets using #ericgarner following that second no-indictment decision.

@jonathanronen
"""

from datetime import datetime, timedelta
from smapp_toolkit.twitter import MongoTweetCollection
from matplotlib import pyplot as plt

start_time = datetime(2014, 12, 3, 20)

collection = MongoTweetCollection(
    address='REAL SERVER',
    port=27011,
    username='******',
    password='******',
    dbname='IfTheyGunnedMeDown'
)

events = [
    (19,  'No indictment for Darren Wilson', 'bottom'), # nov 24
    (28, 'No indictment for Daniel Pantaleo', 'top'),  # dec 3
]

collection.tweets_per_day_with_annotations_figure(
    start=datetime(2014,11,5),
    num_steps=31,
    step_size=timedelta(days=1),
    alpha=.4,
    line_width=2.0,

示例#8

0

显示文件

"""
This script shows how you can use smapp-toolkit to plot tweets languages by time unit.
For the purpose of demonstration, we'll plot english and spanish tweets about Ebola by minute for one hour
on December 3 2014.

@jonathanronen
"""

from datetime import datetime, timedelta
from smapp_toolkit.twitter import MongoTweetCollection

start_time = datetime(2014, 12, 3, 20)

collection = MongoTweetCollection(address='WRITE REAL DATABASE ADDRESS HERE',
                                  port=27011,
                                  username='******',
                                  password='******',
                                  dbname='Ebola')

collection.languages_per_day_figure(
    start=start_time,
    step_size=timedelta(minutes=1),
    num_steps=60,
    languages=['en', 'es', 'other'],
    language_colors=['red', 'royalblue', 'grey'])

示例#9

0

显示文件

文件： save_all_tweets_with_location_in_kiev_to_csv.py 项目： kbenoit/smapp-toolkit

"""
This example script finds all tweets sent by users who've set their location to ukraine or kiev,
and where the user's language is russian or ukrainian,
and saves them to a CSV file called ukraine_data.csv.


"""

from datetime import datetime
from smapp_toolkit.twitter import MongoTweetCollection

collection = MongoTweetCollection(address='ACTUAL DB ADDRESS',
                                  port=27011,
                                  username='******',
                                  password='******',
                                  dbname='Ukraine')

columns = [
    'id_str', 'timestamp', 'coordinates.coordinates.0',
    'coordinates.coordinates.1', 'user.id_str', 'user.lang', 'lang', 'text'
]

collection.since(datetime(2013,12,1)) \
               .until(datetime(2013,12,2)) \
               .user_location_containing('ukraine', 'kiev', 'kyiv', 'kiew') \
               .user_lang_containing('uk', 'ru') \
               .dump_csv('ukraine_data.csv', columns=columns)

示例#10

0

显示文件

"""
Script demonstrates the "geolocation_names_per_day_figure" and "user_locations_per_day_figure" functionality.

@jonathanronen 2015/6
"""

from datetime import datetime, timedelta
from smapp_toolkit.twitter import MongoTweetCollection
import matplotlib.pyplot as plt

plt.ion()

col = MongoTweetCollection('smapp.politics.fas.nyu.edu', 27011,
                           'smapp_readOnly', 'PASSWORD', 'Britain_Geo')

plt.figure(figsize=(10, 6))
col.geolocation_names_per_day_figure(start=datetime(2015, 1, 12, 17, 34),
                                     step_size=timedelta(minutes=10),
                                     num_steps=6,
                                     n_names=5,
                                     xtick_format='%H:%M')
plt.savefig('geolocation_names.png')

plt.figure(figsize=(10, 6))
col.user_locations_per_day_figure(start=datetime(2015, 1, 12, 17, 34),
                                  step_size=timedelta(minutes=10),
                                  num_steps=6,
                                  n_names=8,
                                  xtick_format='%H:%M')
plt.savefig('user_locations.png')

示例#11

0

显示文件

        args.database))

    TIMEZONE = pytz.timezone(args.timezone)
    print("Days will be split according to time zone {}".format(args.timezone))

    today = datetime.now().replace(hour=0,
                                   minute=0,
                                   second=0,
                                   microsecond=0,
                                   tzinfo=TIMEZONE)
    n_days_ago = today - timedelta(days=args.days)
    print("The period being considered is {} to {}".format(
        n_days_ago.strftime('%Y-%m-%d'), today.strftime('%Y-%m-%d')))

    print("Connecting to database")
    collection = MongoTweetCollection(args.server, args.port, args.user,
                                      args.password, args.database)

    ntweets = collection.since(n_days_ago).until(today).count()
    print("Considering {} tweets".format(ntweets))

    userids = set()
    counts = dict()
    for i in range(args.days):
        day_counts = defaultdict(lambda: 0)
        day_start = n_days_ago + i * timedelta(days=1)
        day_end = n_days_ago + (i + 1) * timedelta(days=1)
        print("Counting for {}".format(day_start.strftime('%Y-%m-%d')))
        for tweet in collection.since(day_start).until(day_end):
            day_counts[tweet['user']['id']] += 1
            userids.add(tweet['user']['id'])
        counts[day_start] = day_counts

示例#12

0

显示文件

文件： plot_tweets_per_hour_with_annotations.py 项目： kbenoit/smapp-toolkit

Demonstrate how to plot tweets per hour with annotation lines,
using the "data-then-plot" framework (the smapp_toolkit.plotting module)

@jonathanronen 2015/6
"""

import pytz
import getpass
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from smapp_toolkit.twitter import MongoTweetCollection
from smapp_toolkit.plotting import line_with_annotations

# Connect to database
print("Enter password for 'smapp_readOnly'")
col = MongoTweetCollection('smapp.politics.fas.nyu.edu', 27011, 'smapp_readOnly', getpass.getpass(), 'USElection2016_DTrumps')

# Set start time to a full day in the New York time zone
start_time = datetime(2015,6,21).replace(tzinfo=pytz.timezone('America/New_York')).astimezone(pytz.UTC).replace(tzinfo=None)
end_time = start_time + timedelta(days=1)

# Get the tweets per day data from the database
data = col.since(start_time).until(end_time).group_by('hours').count()

# Define the events to plot horizontal lines for
events = [
      (datetime(2015,6,21,10), 'Sunrise', 'top'),
      (datetime(2015,6,21,22), 'Sunset', 'bottom')
    ]

# Make plot

示例#13

0

显示文件

文件： tweets_retweets.py 项目： IWhisper/smapp-toolkit

"""
Script makes figure of retweet propotion in the last week from the Hillary Clinton 2016 collection.

@jonathanronen 2015/6
"""

import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from smapp_toolkit.twitter import MongoTweetCollection

plt.ion()

col = MongoTweetCollection('smapp.politics.fas.nyu.edu', 27011, 'smapp_readOnly', 'PASSWORD', 'USElection2016_Hillary')
plt.figure(12,12)
col.since(datetime.utcnow()-timedelta(days=7)).tweets_retweets_figure(show=False)
plt.title('Tweets and RT volume from Hillary 2016 collection')
plt.savefig('hillary_rts.png')

示例#14

0

显示文件

文件： barcharts.py 项目： IWhisper/smapp-toolkit

one of total tweets per minute for 1 hour on november 1st,
the other of only tweets containing the word "death" in that same hour.

@jonathanronen
"""

import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from smapp_toolkit.twitter import MongoTweetCollection

start_time = datetime(2014, 12, 3, 20)

collection = MongoTweetCollection(
    address='REAL SERVER HERE',
    port=27011,
    username='******',
    password='******',
    dbname='Ebola'
)

start_time = datetime(2014,11,1)
plt.figure()

plt.subplot(211)
bins, counts = collection.tweets_over_time_figure(
    start_time,
    step_size=timedelta(minutes=1),
    num_steps=60,
    show=False)
plt.title('All tweets')

示例#15

0

显示文件

文件： barcharts.py 项目： kbenoit/smapp-toolkit

Script demonstrates plotting two histograms from the Ebola collection:
one of total tweets per minute for 1 hour on november 1st,
the other of only tweets containing the word "death" in that same hour.

@jonathanronen
"""

import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from smapp_toolkit.twitter import MongoTweetCollection

start_time = datetime(2014, 12, 3, 20)

collection = MongoTweetCollection(address='REAL SERVER HERE',
                                  port=27011,
                                  username='******',
                                  password='******',
                                  dbname='Ebola')

start_time = datetime(2014, 11, 1)
plt.figure()

plt.subplot(211)
bins, counts = collection.tweets_over_time_figure(
    start_time, step_size=timedelta(minutes=1), num_steps=60, show=False)
plt.title('All tweets')

plt.subplot(212)
bins, counts = collection.containing('death').tweets_over_time_figure(
    start_time, step_size=timedelta(minutes=1), num_steps=60, show=False)
plt.title('Tweets containing "death"')

示例#16

0

显示文件

文件： save_all_tweets_with_location_in_kiev_to_csv.py 项目： IWhisper/smapp-toolkit

"""
This example script finds all tweets sent by users who've set their location to ukraine or kiev,
and where the user's language is russian or ukrainian,
and saves them to a CSV file called ukraine_data.csv.


"""

from datetime import datetime
from smapp_toolkit.twitter import MongoTweetCollection

collection = MongoTweetCollection(address='ACTUAL DB ADDRESS',
                                  port=27011,
                                  username='******',
                                  password='******',
                                  dbname='Ukraine')

columns = [
        'id_str',
        'timestamp',
        'coordinates.coordinates.0',
        'coordinates.coordinates.1',
        'user.id_str',
        'user.lang',
        'lang',
        'text'
        ]

collection.since(datetime(2013,12,1)) \
               .until(datetime(2013,12,2)) \
               .user_location_containing('ukraine', 'kiev', 'kyiv', 'kiew') \

示例#17

0

显示文件

文件： plot_user_per_day_histogram.py 项目： IWhisper/smapp-toolkit

    parser.add_argument('--output-file', default='histogram.png', help='Output file [histogram.png]')

    args = parser.parse_args()
    print("Generating avg tweets/user/day histogram for {}".format(args.database))

    TIMEZONE = pytz.timezone(args.timezone)
    print("Days will be split according to time zone {}".format(args.timezone))

    today = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0, tzinfo=TIMEZONE)
    n_days_ago = today - timedelta(days=args.days)
    print("The period being considered is {} to {}".format(
        n_days_ago.strftime('%Y-%m-%d'),
        today.strftime('%Y-%m-%d')))

    print("Connecting to database")
    collection = MongoTweetCollection(args.server, args.port, args.user, args.password, args.database)

    ntweets = collection.since(n_days_ago).until(today).count()
    print("Considering {} tweets".format(ntweets))

    userids = set()
    counts = dict()
    for i in range(args.days):
        day_counts = defaultdict(lambda: 0)
        day_start = n_days_ago + i*timedelta(days=1)
        day_end   = n_days_ago + (i+1)*timedelta(days=1)
        print("Counting for {}".format(day_start.strftime('%Y-%m-%d')))
        for tweet in collection.since(day_start).until(day_end):
            day_counts[tweet['user']['id']] += 1
            userids.add(tweet['user']['id'])
        counts[day_start] = day_counts