def main(): #create an instance object of the class TweetToMongo and #use database 'toronto_tweets' tweet_set = TweetToMongo('toronto_tweets') #insert tweets from the txt file to a MongoDB collection tweet_set.insert_to_collection('toronto_tweets.txt', 'tweets') #create a vocab list from items in the vocab.txt file f = open('vocab.txt', 'r') vocab = f.readlines() f.close() vocab = [item.replace('\n', '').decode('utf-8') for item in vocab] #join items in the vocab list by '|', which works as or regex_string = '|'.join(vocab) #filter tweets containing words of the vocab tweet_set.filter_tweets(regex_string, 'tweets', 'health') #call the get_tweets_vocab method to build a vocab from tweets new_vocab = tweet_set.get_tweets_vocab(vocab, 'tweets') print new_vocab #close the MongoDB connection tweet_set.client.close()
access_token_secret = ""; auth = tweepy.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) api = tweepy.API(auth_handler=auth) print " Open MongoDB connection\n" try: conn=pymongo.MongoClient() print "Connected!" except pymongo.errors.ConnectionFailure, e: print "Connection failed : %s" % e rest_db = conn['db_restT'] #mem_db = rest_db.my_collection toMongo = TweetToMongo() for query in ["#Warriors", "#NBAFinals2015"]: toMongo.start() for tweet in tweepy.Cursor(api.search,q=query, since = "2015-07-05", until = "2015-07-12", wait_on_rate_limit = True).items(10): toMongo.write(tweet) # FYI: JSON is in tweet._json print tweet._json print tweet.text print json.dumps(tweet._json).encode('utf8'),"\n" toMongo.end() toMongo.push(rest_db)