def __init__(self, configfile, debug=False): self.src_account = "" self.dst_accounts = [] self.initial_tweetcount = 1 self.dry_run = False self.configfile = configfile self.debug = debug if self.debug: tweepy.debug() self.read_config()
def main(): parser = OptionParser() parser.add_option("-c", "--config", dest="config_path", help="load config from FILE", metavar="FILE") parser.add_option("-d", "--debug", action="store_true", dest="debug", help="enable debug mode") parser.add_option("-k", "--keywords", dest="keywords", help="specifies keywords to track") parser.add_option("-l", "--locations", dest="locations", help="specifies a set of bounding boxes to track") parser.add_option("-r", "--raw", action="store_true", dest="raw", help="output raw tweet") options, args = parser.parse_args() if not options.config_path: parser.error("No config path specified.") if not options.keywords: keywords = None else: keywords = options.keywords.split(",") if not options.locations: locations = None else: locations = map(float, options.locations.split(",")) # Load authorisation config. auth = authorisation.get_auth_from_file(options.config_path) if options.debug: tweepy.debug() # Create a streaming API and set a timeout value of 60 seconds if options.raw: listener = RawStreamListener() else: listener = CustomStreamListener() streaming_api = tweepy.streaming.Stream(auth, listener, timeout=60) streaming_api.filter(follow=None, track=keywords, locations=locations)
class MyStreamListener(tweepy.StreamListener): tweepy.debug(True) def on_status(self, status): if status.retweeted: return if status.lang != "en": return if status.text.startswith('RT'): return if status.in_reply_to_status_id is not None: return if status.in_reply_to_user_id is not None: return if status.in_reply_to_screen_name is not None: return # Save tweet from the search filter for debugging put_dynamodb(status) # Do something with the tweet process_tweet(status) def on_error(self, status_code): print('Exception...') print(status_code) if status_code == 420: print("Status code: %s" % status_code) return True def on_timeout(self): print('Timeout...') return True def on_exception(self, exception): print('Exception...') print(exception) return True
# if "__main__" == __name__: parser = argparse.ArgumentParser(description="Pull geo-tagged status from twitter") parser.add_argument("ACTION", default="start", nargs="?", choices=("start", "stop")) parser.add_argument("-d", action="store_true", dest="DEBUG", help="enable debug mode") args = parser.parse_args() # print parser.parse_args() if "start" == args.ACTION: if None is _get_pid(): pid_file = open(PID_PATH, "w") pid_file.write(str(os.getpid())) pid_file.flush() if args.DEBUG: tweepy.debug() try: oauth = OAuthHandler(settings.TWITTER_CONSUMER_KEY, settings.TWITTER_CONSUMER_SECRET, secure=True) oauth.set_access_token(settings.TWITTER_ACCESS_KEY, settings.TWITTER_ACCESS_SECRET) listener = SgzStreamListener() stream = Stream(oauth, listener) # stream.filter(locations=(103.9278, 30.5620, 104.2097, 30.7882)) # stream.filter(locations=(97.00, 20.54, 123.02, 42.80, # center # 72.74, 26.66, 97.00, 49.43, # west # 115.02, 38.54, 135.50, 53.90)) # ne stream.filter( locations=( # beijing 116.2, 39.75, 116.56,
# get relevant information(e.g. lanaguage, retweeted, favorated count...) of each 'tweet object' # Jinghua Xu # this now may seem liek a dumb idea, if one study the json file well enough, creating an object each time and get information through this approach can be erdandunt??? """ the Tweepy documentation does not provide an intuitive way to obatain relevant information of each 'tweet object' Tweet class is to enable represent each tweet and enable getting relevant information directly from each 'tweet object' """ import tweepy from tweepy import RateLimitError from keys import Keys tweepy.debug(True) class tweet: """ a tweet class represents a tweet, use this class to obatain information of each tweet """ def __init__(self, t): '''pass a 'tweet object' to the constructor''' self._tweet = t @property def json(self): '''return a dictionary representation of a tweet''' return self._json @property def language(self): '''return the language of a tweet'''
import fix_imports from slistener import SListener from db_listener import DatabaseListener import time, tweepy, sys, os tweepy.debug() auth = tweepy.OAuthHandler(os.getenv('CONSUMER_KEY'), os.getenv('CONSUMER_SECRET')) auth.set_access_token(os.getenv('APPLICATION_KEY'), os.getenv('APPLICATION_SECRET')) api = tweepy.API(auth) def main(): print os.getenv('CONSUMER_KEY') stream = tweepy.Stream(auth, DatabaseListener(api=api, handler=None)) print "Sampling started..." if __name__ == '__main__': main()
from config import * import time import tweepy import re tweepy.debug(debug_tweepy) auth = tweepy.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) api = tweepy.API(auth) def limit_handled(cursor): while True: try: yield cursor.next() except tweepy.RateLimitError as e: print(e) time.sleep(15 * 60) except StopIteration: yield None except tweepy.TweepError as e: print(e) time.sleep(15 * 60) def findRecentRcdInCol(collection): try: result = list(collection.find().sort([('created_at', -1)]).limit(1)) return result[0]
import pymongo import tweepy consumer_key = "QNOq44cKGVw07KMNFf9RZmpe5" consumer_secret = "AJrlkQ5MKIyqg5o0U4dwWGQkWnUsiiaEqliGg1ybbtU2IBRXxW" access_key = "352257626-eJG0KdSRjK4w29IrHorKKMPIyvgrjWBEGKIm1GvF" access_secret = "YjvBHiN0LH4W6Y9bgCDNNuHS6yWizhxQr5a4Y3nKDkId0" auth = tweepy.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_key, access_secret) api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True) tweepy.debug(True) def process(get_tweets): for i in get_tweets: client = pymongo.MongoClient('localhost', 27017) db = client['twitter'] collection = db['budget2018'] collection.insert(i._json) query = '#Budget2018' location = "-27.963141,153.380654,2km" max_tweets = 9999999999 total_tweets = 0 last_id = -1 while total_tweets < max_tweets: count = max_tweets - total_tweets try: