def update_twitter_profile(user): a = API() try: profile = user.get_profile() twitter_user = a.get_user(user_id=profile.twitter_profile.twitter_id) except: twitter_user = None if twitter_user: profile.user.first_name = twitter_user.name.split(" ")[0] profile.user.last_name = " ".join(twitter_user.name.split(" ")[1:]) profile.user.save() profile.website = twitter_user.url profile.profile_image_url = twitter_user.profile_image_url profile.description = twitter_user.description profile.twitter_name = twitter_user.screen_name profile.location=twitter_user.location profile.save()
def go(collection_type, project_id, collector_id, rawdir, logdir): if collection_type not in ['track', 'follow', 'none']: print "ThreadedCollector accepts inputs 'track', 'follow', or 'none'." print 'Exiting with invalid params...' sys.exit() else: # Grab collector & project details from DB project = db.get_project_detail(project_id) resp = db.get_collector_detail(project_id, collector_id) if project['status'] and resp['status']: collector = resp['collector'] configdb = project['project_config_db'] project_config_db = db.connection[configdb] project_config_db = project_config_db.config collector_name = collector['collector_name'] project_name = project['project_name'] else: 'Invalid project account & collector. Try again!' # module_config = project_config_db.find_one({'module': 'twitter'}) # Reference for controller if script is active or not. project_config_db.update({'_id': ObjectId(collector_id)}, {'$set': { 'active': 1 }}) Config = ConfigParser.ConfigParser() Config.read(PLATFORM_CONFIG_FILE) # Creates logger w/ level INFO logger = logging.getLogger(collector_name) logger.setLevel(logging.INFO) # Creates rotating file handler w/ level INFO fh = logging.handlers.TimedRotatingFileHandler( logdir + '/' + project_name + '-' + collector_name + '-' + collection_type + '-collector-log-' + collector_id + '.out', 'D', 1, 30, None, False, False) fh.setLevel(logging.INFO) # Creates formatter and applies to rotating handler format = '%(asctime)s %(name)-12s %(levelname)-8s %(message)s' datefmt = '%m-%d %H:%M' formatter = logging.Formatter(format, datefmt) fh.setFormatter(formatter) # Finishes by adding the rotating, formatted handler logger.addHandler(fh) # Sets current date as starting point tmpDate = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') logger.info('Starting collection system at %s' % tmpDate) logger.info('Collector name: %s' % collector_name) # Grabs tweets out file info from config # TODO - move this info to Mongo tweetsOutFilePath = rawdir + '/' if not os.path.exists(tweetsOutFilePath): os.makedirs(tweetsOutFilePath) tweetsOutFileDateFrmt = Config.get('files', 'tweets_file_date_frmt', 0) tweetsOutFile = Config.get('files', 'tweets_file', 0) # NOTE - proper naming for api_auth dictionary from front_end oauth_info = collector['api_auth'] consumerKey = oauth_info['consumer_key'] consumerSecret = oauth_info['consumer_secret'] accessToken = oauth_info['access_token'] accessTokenSecret = oauth_info['access_token_secret'] # Authenticates via app info auth = OAuthHandler(consumerKey, consumerSecret) auth.set_access_token(accessToken, accessTokenSecret) # Sets Mongo collection; sets rate_limitng & error counts to 0 if 'stream_limit_loss' not in collector: project_config_db.update( {'_id': ObjectId(collector_id)}, {'$set': { 'stream_limit_loss': { 'counts': [], 'total': 0 } }}) if 'rate_limit_count' not in collector: project_config_db.update({'_id': ObjectId(collector_id)}, {'$set': { 'rate_limit_count': 0 }}) if 'error_code' not in collector: project_config_db.update({"_id": ObjectId(collector_id)}, {'$set': { 'error_code': 0 }}) runCollector = collector['collector']['run'] if runCollector: print 'Starting process w/ start signal %d' % runCollector logger.info('Starting process w/ start signal %d' % runCollector) collectingData = False i = 0 myThreadCounter = 0 runLoopSleep = 0 while runCollector: i += 1 # Finds Mongo collection & grabs signal info # If Mongo is offline throws an acception and continues exception = None try: resp = db.get_collector_detail(project_id, collector_id) collector = resp['collector'] flags = collector['collector'] runCollector = flags['run'] collectSignal = flags['collect'] updateSignal = flags['update'] except Exception, exception: logger.info('Mongo connection refused with exception: %s' % exception) """ Collection process is running, and: A) An update has been triggered -OR- B) The collection signal is not set -OR- C) Run signal is not set """ if collectingData and (updateSignal or not collectSignal or not runCollector): # Update has been triggered if updateSignal: logger.info( 'MAIN: received UPDATE signal. Attempting to stop collection thread' ) resp = db.set_collector_status(project_id, collector_id, collector_status=1) # Collection thread triggered to stop if not collectSignal: logger.info( 'MAIN: received STOP signal. Attempting to stop collection thread' ) # Entire process trigerred to stop if not runCollector: logger.info( 'MAIN: received EXIT signal. Attempting to stop collection thread' ) resp = db.set_collector_status(project_id, collector_id, collector_status=0) collectSignal = 0 # Send stream disconnect signal, kills thread stream.disconnect() wait_count = 0 while e.isSet() is False: wait_count += 1 print '%d) Waiting on collection thread shutdown' % wait_count sleep(wait_count) collectingData = False logger.info('COLLECTION THREAD: stream stopped after %d tweets' % l.tweet_count) logger.info('COLLECTION THREAD: collected %d error tweets' % l.delete_count) print 'COLLECTION THREAD: collected %d error tweets' % l.delete_count logger.info( 'COLLECTION THREAD: lost %d tweets to stream rate limit' % l.limit_count) print 'COLLECTION THREAD: lost %d tweets to stream rate limit' % l.limit_count print 'COLLECTION THREAD: stream stopped after %d tweets' % l.tweet_count if not l.error_code == 0: resp = db.set_collector_status(project_id, collector_id, collector_status=0) project_config_db.update( {"_id": ObjectId(collector_id)}, {'$set': { 'error_code': l.error_code }}) if not l.limit_count == 0: project_config_db.update( {'_id': ObjectId(collector_id)}, {'$set': { 'stream_limit_loss.total': l.limit_count }}) if not l.rate_limit_count == 0: project_config_db.update({'_id': ObjectId(collector_id)}, {'$set': { 'rate_limit_count': 0 }}) # Collection has been signaled & main program thread is running # TODO - Check Mongo for handle:ID pairs # Only call for new pairs if collectSignal and (threading.activeCount() == 1): # Names collection thread & adds to counter myThreadCounter += 1 myThreadName = 'collector-' + collection_type + '%s' % myThreadCounter termsList = collector['terms_list'] if termsList: print 'Terms list length: ' + str(len(termsList)) # Grab IDs for follow stream if collection_type == 'follow': """ TODO - Update Mongo terms w/ set for collect status 0 or 1 # Updates current stored handles to collect 0 if no longer listed in terms file stored_terms = doc['termsList'] for user in stored_terms: if user['handle'] not in termsList: user_id = user['id'] mongo_config.update({'module': 'collector-follow'}, {'$pull': {'termsList': {'handle': user['handle']}}}) mongo_config.update({'module': 'collecting-follow'}, {'$set': {'termsList': {'handle': user['handle'], 'id': user_id, 'collect': 0 }}}) # Loops thru current stored handles and adds list if both: # A) Value isn't set to None (not valid OR no longer in use) all_stored_handles = [user['handle'] for user in stored_terms] stored_handles = [user['handle'] for user in stored_terms if user['id'] and user['collect']] print 'MAIN: %d user ids for collection found in Mongo!' % len(stored_handles) """ # Loop thru & query (except handles that have been stored) print 'MAIN: Querying Twitter API for handle:id pairs...' logger.info( 'MAIN: Querying Twitter API for handle:id pairs...') # Initiates REST API connection twitter_api = API(auth_handler=auth) failed_handles = [] success_handles = [] # Loops thru user-given terms list for item in termsList: term = item['term'] # If term already has a valid ID, pass if item['id'] is not None: pass # Queries the Twitter API for the ID value of the handle else: try: user = twitter_api.get_user(screen_name=term) except TweepError as tweepy_exception: error_message = tweepy_exception.args[0][0][ 'message'] code = tweepy_exception.args[0][0]['code'] # Rate limited for 15 minutes w/ code 88 if code == 88: print 'MAIN: User ID grab rate limited. Sleeping for 15 minutes.' logger.exception( 'MAIN: User ID grab rate limited. Sleeping for 15 minutes.' ) time.sleep(900) # Handle doesn't exist, added to Mongo as None elif code == 34: print 'MAIN: User w/ handle %s does not exist.' % term logger.exception( 'MAIN: User w/ handle %s does not exist.' % term) item['collect'] = 0 item['id'] = None failed_handles.append(term) # Success - handle:ID pair stored in Mongo else: user_id = user._json['id_str'] item['id'] = user_id success_handles.append(term) print 'MAIN: Collected %d new ids for follow stream.' % len( success_handles) logger.info( 'MAIN: Collected %d new ids for follow stream.' % len(success_handles)) print 'MAIN: %d handles failed to be found.' % len( failed_handles) logger.info('MAIN: %d handles failed to be found.' % len(failed_handles)) logger.info(failed_handles) print failed_handles print 'MAIN: Grabbing full list of follow stream IDs from Mongo.' logger.info( 'MAIN: Grabbing full list of follow stream IDs from Mongo.' ) # Updates term list with follow values project_config_db.update( {'_id': ObjectId(collector_id)}, {'$set': { 'terms_list': termsList }}) # Loops thru current stored handles and adds to list if: # A) Value isn't set to None (not valid OR no longer in use) ids = [ item['id'] for item in termsList if item['id'] and item['collect'] ] noncoll = [ item['term'] for item in termsList if not item['collect'] ] termsList = ids else: terms = [ item['term'] for item in termsList if item['collect'] ] noncoll = [ item['term'] for item in termsList if not item['collect'] ] termsList = terms print 'Terms List: ' print termsList print '' print 'Not collecting for: ' print noncoll print '' logger.info('Terms list: %s' % str(termsList).strip('[]')) logger.info('Not collecting for: %s' % str(noncoll).strip('[]')) print 'COLLECTION THREAD: Initializing Tweepy listener instance...' logger.info( 'COLLECTION THREAD: Initializing Tweepy listener instance...') l = fileOutListener(tweetsOutFilePath, tweetsOutFileDateFrmt, tweetsOutFile, logger, collection_type, project_id, collector_id) print 'TOOLKIT STREAM: Initializing Tweepy stream listener...' logger.info( 'TOOLKIT STREAM: Initializing Tweepy stream listener...') # Initiates async stream via Tweepy, which handles the threading # TODO - location & language languages = collector['languages'] location = collector['location'] if languages: print '%s language codes found!' % len(languages) if location: print 'Location points found!' for i in range(len(location)): location[i] = float(location[i]) stream = ToolkitStream(auth, l, logger, project_id, collector_id, retry_count=100) if collection_type == 'track': stream.filter(track=termsList, languages=languages, locations=location, async=True) elif collection_type == 'follow': stream.filter(follow=termsList, languages=languages, locations=location, async=True) elif collection_type == 'none': stream.filter(locations=location, languages=languages, async=True) else: sys.exit('ERROR: Unrecognized stream filter.') collectingData = True print 'MAIN: Collection thread started (%s)' % myThreadName logger.info('MAIN: Collection thread started (%s)' % myThreadName) #if threading.activeCount() == 1: # print "MAIN: %d iteration with no collection thread running" % i #else: # print "MAIN: %d iteration with collection thread running (%d)" % (i, threading.activeCount()) # Incrementally delays loop if Mongo is offline, otherwise 2 seconds max_sleep_time = 1800 if exception: if runLoopSleep < max_sleep_time: runLoopSleep += 2 else: runLoopSleep = max_sleep_time print "Exception caught, sleeping for: %d" % runLoopSleep time.sleep(runLoopSleep) else: time.sleep(2)
# Loop thru & query (except handles that have been stored) print 'MAIN: Querying Twitter API for new handle:id pairs...' logger.info('MAIN: Querying Twitter API for new handle:id pairs...') # Initiates REST API connection twitter_api = API(auth_handler=auth) failed_handles = [] success_handles = [] # Loops thru user-given terms list for handle in termsList: # If handle already stored, no need to query for ID if handle in stored_handles: pass # Queries the Twitter API for the ID value of the handle else: try: user = twitter_api.get_user(screen_name=handle) except TweepError as tweepy_exception: error_message = tweepy_exception.args[0][0]['message'] code = tweepy_exception.args[0][0]['code'] # Rate limited for 15 minutes w/ code 88 if code == 88: print 'MAIN: User ID grab rate limited. Sleeping for 15 minutes.' logger.exception('MAIN: User ID grab rate limited. Sleeping for 15 minutes.') time.sleep(900) # Handle doesn't exist, added to Mongo as None elif code == 34: print 'MAIN: User w/ handle %s does not exist.' % handle logger.exception('MAIN: User w/ handle %s does not exist.' % handle) if handle not in all_stored_handles: terms_info = { 'handle': handle, 'id': None, 'collect': 0 } mongo_config.update({'module':'collector-follow'},
while 1: try: forward = [] backward = [] #outfile = sys.argv[1] #auth = tweepy.BasicAuthHandler('reza_shz', 'mehdireza') auth = tweepy.OAuthHandler("xg2hLKvf1nxw1TUALvx5xA", "MkX0lDUik0mJuc6nxserddbQDWd7ZTErQN6Tf0OhOM") auth.set_access_token("174566652-MOGbxytlmUHIN5tEMgl5rgqWdWaIQXYZ6XPyYKl1", "yem38OfoUbsoPZvOVr3k0n3X7JSUDYD8oxAKXvrJw6k") twitterApi = API(auth_handler=auth, host='api.twitter.com', search_host='search.twitter.com', cache=FileCache("cache", timeout = -1), secure=False, api_root='/1', search_root='', retry_count=0, retry_delay=0, retry_errors=None, parser=None) #username1, username2,listUsernames = readFile(outfile) user1 = twitterApi.get_user(sys.argv[1]) #@UndefinedVariable user2 = twitterApi.get_user(sys.argv[2]) #@UndefinedVariable forward.append({"obj":user1, "cursor":-1, "friends":[], "cursor_obj":-1, "path":[]}) backward.append({"obj":user2, "cursor":-1, "cursor_obj":-1,"path":[], "followers":[] }) reqs = 0 while 1: fin, path = go_backward() reqs +=1;print reqs if fin: print path;reqs=-2;break while has_node(backward): fin, path = go_backward() reqs +=1;print reqs if fin or reqs >= 1000: print path;break if fin: break if reqs >= 1000: reqs=-2;break
"yem38OfoUbsoPZvOVr3k0n3X7JSUDYD8oxAKXvrJw6k") twitterApi = API(auth_handler=auth, host='api.twitter.com', search_host='search.twitter.com', cache=FileCache("cache", timeout=-1), secure=False, api_root='/1', search_root='', retry_count=0, retry_delay=0, retry_errors=None, parser=None) username1 = sys.argv[1] username2 = sys.argv[2] user1 = twitterApi.get_user(username1) #@UndefinedVariable user2 = twitterApi.get_user(username2) #@UndefinedVariable forward = [] forward_ds = [1] backward_ds = [1] forward_min = backward_min = 0 backward = [] is_opt = 0 forward.append({ "obj": user1, "cursor": -1, "friends": [], "cursor_obj": -1, "path": []
def go(collection_type, project_id, collector_id, rawdir, logdir): if collection_type not in ['track', 'follow', 'none']: print "ThreadedCollector accepts inputs 'track', 'follow', or 'none'." print 'Exiting with invalid params...' sys.exit() else: # Grab collector & project details from DB project = db.get_project_detail(project_id) resp = db.get_collector_detail(project_id, collector_id) if project['status'] and resp['status']: collector = resp['collector'] configdb = project['project_config_db'] project_config_db = db.connection[configdb] project_config_db = project_config_db.config collector_name = collector['collector_name'] project_name = project['project_name'] else: 'Invalid project account & collector. Try again!' # module_config = project_config_db.find_one({'module': 'twitter'}) # Reference for controller if script is active or not. project_config_db.update({'_id': ObjectId(collector_id)}, {'$set': {'active': 1}}) Config = ConfigParser.ConfigParser() Config.read(PLATFORM_CONFIG_FILE) # Creates logger w/ level INFO logger = logging.getLogger(collector_name) logger.setLevel(logging.INFO) # Creates rotating file handler w/ level INFO fh = logging.handlers.TimedRotatingFileHandler(logdir + '/' + project_name + '-' + collector_name + '-' + collection_type + '-collector-log-' + collector_id + '.out', 'D', 1, 30, None, False, False) fh.setLevel(logging.INFO) # Creates formatter and applies to rotating handler format = '%(asctime)s %(name)-12s %(levelname)-8s %(message)s' datefmt = '%m-%d %H:%M' formatter = logging.Formatter(format, datefmt) fh.setFormatter(formatter) # Finishes by adding the rotating, formatted handler logger.addHandler(fh) # Sets current date as starting point tmpDate = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') logger.info('Starting collection system at %s' % tmpDate) logger.info('Collector name: %s' % collector_name) # Grabs tweets out file info from config # TODO - move this info to Mongo tweetsOutFilePath = rawdir + '/' if not os.path.exists(tweetsOutFilePath): os.makedirs(tweetsOutFilePath) tweetsOutFileDateFrmt = Config.get('files', 'tweets_file_date_frmt', 0) tweetsOutFile = Config.get('files', 'tweets_file', 0) # NOTE - proper naming for api_auth dictionary from front_end oauth_info = collector['api_auth'] consumerKey = oauth_info['consumer_key'] consumerSecret = oauth_info['consumer_secret'] accessToken = oauth_info['access_token'] accessTokenSecret = oauth_info['access_token_secret'] # Authenticates via app info auth = OAuthHandler(consumerKey, consumerSecret) auth.set_access_token(accessToken, accessTokenSecret) # Sets Mongo collection; sets rate_limitng & error counts to 0 if 'stream_limit_loss' not in collector: project_config_db.update({'_id': ObjectId(collector_id)}, {'$set' : { 'stream_limit_loss': { 'counts': [], 'total': 0 }}}) if 'rate_limit_count' not in collector: project_config_db.update({'_id': ObjectId(collector_id)}, {'$set': {'rate_limit_count': 0}}) if 'error_code' not in collector: project_config_db.update({"_id" : ObjectId(collector_id)}, {'$set' : {'error_code': 0}}) runCollector = collector['collector']['run'] if runCollector: print 'Starting process w/ start signal %d' % runCollector logger.info('Starting process w/ start signal %d' % runCollector) collectingData = False i = 0 myThreadCounter = 0 runLoopSleep = 0 while runCollector: i += 1 # Finds Mongo collection & grabs signal info # If Mongo is offline throws an acception and continues exception = None try: resp = db.get_collector_detail(project_id, collector_id) collector = resp['collector'] flags = collector['collector'] runCollector = flags['run'] collectSignal = flags['collect'] updateSignal = flags['update'] except Exception, exception: logger.info('Mongo connection refused with exception: %s' % exception) """ Collection process is running, and: A) An update has been triggered -OR- B) The collection signal is not set -OR- C) Run signal is not set """ if collectingData and (updateSignal or not collectSignal or not runCollector): # Update has been triggered if updateSignal: logger.info('MAIN: received UPDATE signal. Attempting to stop collection thread') resp = db.set_collector_status(project_id, collector_id, collector_status=1) # Collection thread triggered to stop if not collectSignal: logger.info('MAIN: received STOP signal. Attempting to stop collection thread') # Entire process trigerred to stop if not runCollector: logger.info('MAIN: received EXIT signal. Attempting to stop collection thread') resp = db.set_collector_status(project_id, collector_id, collector_status=0) collectSignal = 0 # Send stream disconnect signal, kills thread stream.disconnect() wait_count = 0 while e.isSet() is False: wait_count += 1 print '%d) Waiting on collection thread shutdown' % wait_count sleep(wait_count) collectingData = False logger.info('COLLECTION THREAD: stream stopped after %d tweets' % l.tweet_count) logger.info('COLLECTION THREAD: collected %d error tweets' % l.delete_count) print 'COLLECTION THREAD: collected %d error tweets' % l.delete_count logger.info('COLLECTION THREAD: lost %d tweets to stream rate limit' % l.limit_count) print 'COLLECTION THREAD: lost %d tweets to stream rate limit' % l.limit_count print 'COLLECTION THREAD: stream stopped after %d tweets' % l.tweet_count if not l.error_code == 0: resp = db.set_collector_status(project_id, collector_id, collector_status=0) project_config_db.update({"_id" : ObjectId(collector_id)}, {'$set' : {'error_code': l.error_code}}) if not l.limit_count == 0: project_config_db.update({'_id': ObjectId(collector_id)}, {'$set' : { 'stream_limit_loss.total': l.limit_count}}) if not l.rate_limit_count == 0: project_config_db.update({'_id': ObjectId(collector_id)}, {'$set': {'rate_limit_count': 0}}) # Collection has been signaled & main program thread is running # TODO - Check Mongo for handle:ID pairs # Only call for new pairs if collectSignal and (threading.activeCount() == 1): # Names collection thread & adds to counter myThreadCounter += 1 myThreadName = 'collector-' + collection_type + '%s' % myThreadCounter termsList = collector['terms_list'] if termsList: print 'Terms list length: ' + str(len(termsList)) # Grab IDs for follow stream if collection_type == 'follow': """ TODO - Update Mongo terms w/ set for collect status 0 or 1 # Updates current stored handles to collect 0 if no longer listed in terms file stored_terms = doc['termsList'] for user in stored_terms: if user['handle'] not in termsList: user_id = user['id'] mongo_config.update({'module': 'collector-follow'}, {'$pull': {'termsList': {'handle': user['handle']}}}) mongo_config.update({'module': 'collecting-follow'}, {'$set': {'termsList': {'handle': user['handle'], 'id': user_id, 'collect': 0 }}}) # Loops thru current stored handles and adds list if both: # A) Value isn't set to None (not valid OR no longer in use) all_stored_handles = [user['handle'] for user in stored_terms] stored_handles = [user['handle'] for user in stored_terms if user['id'] and user['collect']] print 'MAIN: %d user ids for collection found in Mongo!' % len(stored_handles) """ # Loop thru & query (except handles that have been stored) print 'MAIN: Querying Twitter API for handle:id pairs...' logger.info('MAIN: Querying Twitter API for handle:id pairs...') # Initiates REST API connection twitter_api = API(auth_handler=auth) failed_handles = [] success_handles = [] # Loops thru user-given terms list for item in termsList: term = item['term'] # If term already has a valid ID, pass if item['id'] is not None: pass # Queries the Twitter API for the ID value of the handle else: try: user = twitter_api.get_user(screen_name=term) except TweepError as tweepy_exception: error_message = tweepy_exception.args[0][0]['message'] code = tweepy_exception.args[0][0]['code'] # Rate limited for 15 minutes w/ code 88 if code == 88: print 'MAIN: User ID grab rate limited. Sleeping for 15 minutes.' logger.exception('MAIN: User ID grab rate limited. Sleeping for 15 minutes.') time.sleep(900) # Handle doesn't exist, added to Mongo as None elif code == 34: print 'MAIN: User w/ handle %s does not exist.' % term logger.exception('MAIN: User w/ handle %s does not exist.' % term) item['collect'] = 0 item['id'] = None failed_handles.append(term) # Success - handle:ID pair stored in Mongo else: user_id = user._json['id_str'] item['id'] = user_id success_handles.append(term) print 'MAIN: Collected %d new ids for follow stream.' % len(success_handles) logger.info('MAIN: Collected %d new ids for follow stream.' % len(success_handles)) print 'MAIN: %d handles failed to be found.' % len(failed_handles) logger.info('MAIN: %d handles failed to be found.' % len(failed_handles)) logger.info(failed_handles) print failed_handles print 'MAIN: Grabbing full list of follow stream IDs from Mongo.' logger.info('MAIN: Grabbing full list of follow stream IDs from Mongo.') # Updates term list with follow values project_config_db.update({'_id': ObjectId(collector_id)}, {'$set': {'terms_list': termsList}}) # Loops thru current stored handles and adds to list if: # A) Value isn't set to None (not valid OR no longer in use) ids = [item['id'] for item in termsList if item['id'] and item['collect']] noncoll = [item['term'] for item in termsList if not item['collect']] termsList = ids else: terms = [item['term'] for item in termsList if item['collect']] noncoll = [item['term'] for item in termsList if not item['collect']] termsList = terms print 'Terms List: ' print termsList print '' print 'Not collecting for: ' print noncoll print '' logger.info('Terms list: %s' % str(termsList).strip('[]')) logger.info('Not collecting for: %s' % str(noncoll).strip('[]')) print 'COLLECTION THREAD: Initializing Tweepy listener instance...' logger.info('COLLECTION THREAD: Initializing Tweepy listener instance...') l = fileOutListener(tweetsOutFilePath, tweetsOutFileDateFrmt, tweetsOutFile, logger, collection_type, project_id, collector_id) print 'TOOLKIT STREAM: Initializing Tweepy stream listener...' logger.info('TOOLKIT STREAM: Initializing Tweepy stream listener...') # Initiates async stream via Tweepy, which handles the threading # TODO - location & language languages = collector['languages'] location = collector['location'] if languages: print '%s language codes found!' % len(languages) if location: print 'Location points found!' for i in range(len(location)): location[i] = float(location[i]) stream = ToolkitStream(auth, l, logger, project_id, collector_id, retry_count=100) if collection_type == 'track': stream.filter(track=termsList, languages=languages, locations=location, async=True) elif collection_type == 'follow': stream.filter(follow=termsList, languages=languages, locations=location, async=True) elif collection_type == 'none': stream.filter(locations=location, languages=languages, async=True) else: sys.exit('ERROR: Unrecognized stream filter.') collectingData = True print 'MAIN: Collection thread started (%s)' % myThreadName logger.info('MAIN: Collection thread started (%s)' % myThreadName) #if threading.activeCount() == 1: # print "MAIN: %d iteration with no collection thread running" % i #else: # print "MAIN: %d iteration with collection thread running (%d)" % (i, threading.activeCount()) # Incrementally delays loop if Mongo is offline, otherwise 2 seconds max_sleep_time = 1800 if exception: if runLoopSleep < max_sleep_time: runLoopSleep += 2 else: runLoopSleep = max_sleep_time print "Exception caught, sleeping for: %d" % runLoopSleep time.sleep(runLoopSleep) else: time.sleep( 2 )
class TweepyAPITests(unittest.TestCase): def setUp(self): auth = OAuthHandler(oauth_consumer_key, oauth_consumer_secret) auth.set_access_token(oauth_token, oauth_token_secret) self.api = API(auth) self.api.retry_count = 2 self.api.retry_delay = 5 def testhometimeline(self): self.api.home_timeline() def testfriendstimeline(self): self.api.friends_timeline() def testusertimeline(self): self.api.user_timeline() self.api.user_timeline('twitter') def testmentions(self): self.api.mentions() def testretweetedbyme(self): self.api.retweeted_by_me() def testretweetedbyuser(self): self.api.retweeted_by_user('twitter') def testretweetedtome(self): self.api.retweeted_to_me() def testretweetsofme(self): self.api.retweets_of_me() def testretweet(self): s = self.api.retweet(123) s.destroy() def testretweets(self): self.api.retweets(123) def testgetstatus(self): self.api.get_status(id=123) def testupdateanddestroystatus(self): # test update text = 'testing %i' % random.randint(0, 1000) update = self.api.update_status(status=text) self.assertEqual(update.text, text) # test destroy deleted = self.api.destroy_status(id=update.id) self.assertEqual(deleted.id, update.id) def testgetuser(self): u = self.api.get_user('twitter') self.assertEqual(u.screen_name, 'twitter') u = self.api.get_user(783214) self.assertEqual(u.screen_name, 'twitter') def testsearchusers(self): self.api.search_users('twitter') def testme(self): me = self.api.me() self.assertEqual(me.screen_name, username) def testfriends(self): self.api.friends() def testfollowers(self): self.api.followers() def testdirectmessages(self): self.api.direct_messages() def testsentdirectmessages(self): self.api.sent_direct_messages() def testsendanddestroydirectmessage(self): # send sent_dm = self.api.send_direct_message(username, text='test message') self.assertEqual(sent_dm.text, 'test message') self.assertEqual(sent_dm.sender.screen_name, username) self.assertEqual(sent_dm.recipient.screen_name, username) # destroy destroyed_dm = self.api.destroy_direct_message(sent_dm.id) self.assertEqual(destroyed_dm.text, sent_dm.text) self.assertEqual(destroyed_dm.id, sent_dm.id) self.assertEqual(destroyed_dm.sender.screen_name, username) self.assertEqual(destroyed_dm.recipient.screen_name, username) def testcreatedestroyfriendship(self): enemy = self.api.destroy_friendship('twitter') self.assertEqual(enemy.screen_name, 'twitter') self.assertFalse(self.api.exists_friendship(username, 'twitter')) friend = self.api.create_friendship('twitter') self.assertEqual(friend.screen_name, 'twitter') self.assertTrue(self.api.exists_friendship(username, 'twitter')) def testshowfriendship(self): source, target = self.api.show_friendship(target_screen_name='twtiter') self.assert_(isinstance(source, Friendship)) self.assert_(isinstance(target, Friendship)) def testfriendsids(self): self.api.friends_ids(username) def testfollowersids(self): self.api.followers_ids(username) def testverifycredentials(self): self.assertNotEqual(self.api.verify_credentials(), False) # make sure that `me.status.entities` is not an empty dict me = self.api.verify_credentials(include_entities=True) self.assertTrue(me.status.entities) # `status` shouldn't be included me = self.api.verify_credentials(skip_status=True) self.assertFalse(hasattr(me, 'status')) def testratelimitstatus(self): self.api.rate_limit_status() def testupdateprofilecolors(self): original = self.api.me() updated = self.api.update_profile_colors( '000', '000', '000', '000', '000') # restore colors self.api.update_profile_colors( original.profile_background_color, original.profile_text_color, original.profile_link_color, original.profile_sidebar_fill_color, original.profile_sidebar_border_color ) self.assertEqual(updated.profile_background_color, '000') self.assertEqual(updated.profile_text_color, '000') self.assertEqual(updated.profile_link_color, '000') self.assertEqual(updated.profile_sidebar_fill_color, '000') self.assertEqual(updated.profile_sidebar_border_color, '000') """ def testupateprofileimage(self): self.api.update_profile_image('examples/profile.png') def testupdateprofilebg(self): self.api.update_profile_background_image('examples/bg.png') """ def testupdateprofile(self): original = self.api.me() profile = { 'name': 'Tweepy test 123', 'url': 'http://www.example.com', 'location': 'pytopia', 'description': 'just testing things out' } updated = self.api.update_profile(**profile) self.api.update_profile( name=original.name, url=original.url, location=original.location, description=original.description ) for k, v in profile.items(): if k == 'email': continue self.assertEqual(getattr(updated, k), v) def testfavorites(self): self.api.favorites() def testcreatedestroyfavorite(self): self.api.create_favorite(4901062372) self.api.destroy_favorite(4901062372) def testenabledisablenotifications(self): self.api.enable_notifications('twitter') self.api.disable_notifications('twitter') def testcreatedestroyblock(self): self.api.create_block('twitter') self.assertEqual(self.api.exists_block('twitter'), True) self.api.destroy_block('twitter') self.assertEqual(self.api.exists_block('twitter'), False) self.api.create_friendship('twitter') # restore def testblocks(self): self.api.blocks() def testblocksids(self): self.api.blocks_ids() def testcreateupdatedestroylist(self): self.api.create_list('tweeps') # XXX: right now twitter throws a 500 here, # issue is being looked into by twitter. # self.api.update_list('tweeps', mode='private') self.api.destroy_list('tweeps') def testlists(self): self.api.lists() def testlistsmemberships(self): self.api.lists_memberships() def testlistssubscriptions(self): self.api.lists_subscriptions() def testlisttimeline(self): self.api.list_timeline('applepie', 'stars') def testgetlist(self): self.api.get_list('applepie', 'stars') def testlistmembers(self): self.api.list_members('applepie', 'stars') def testislistmember(self): uid = self.api.get_user('applepie').id self.api.is_list_member('applepie', 'stars', uid) def testsubscribeunsubscribelist(self): self.api.subscribe_list('applepie', 'stars') self.api.unsubscribe_list('applepie', 'stars') def testlistsubscribers(self): self.api.list_subscribers('applepie', 'stars') def testissubscribedlist(self): uid = self.api.get_user('applepie').id self.api.is_subscribed_list('applepie', 'stars', uid) def testsavedsearches(self): s = self.api.create_saved_search('test') self.api.saved_searches() self.assertEqual(self.api.get_saved_search(s.id).query, 'test') self.api.destroy_saved_search(s.id) def testsearch(self): self.api.search('tweepy') def testtrends(self): self.api.trends_daily() self.api.trends_weekly() def testgeoapis(self): self.api.geo_id(id='c3f37afa9efcf94b') # Austin, TX, USA self.api.nearby_places(lat=30.267370168467806, long=-97.74261474609375) # Austin, TX, USA self.api.reverse_geocode(lat=30.267370168467806, long=-97.74261474609375) # Austin, TX, USA
"174566652-MOGbxytlmUHIN5tEMgl5rgqWdWaIQXYZ6XPyYKl1", "yem38OfoUbsoPZvOVr3k0n3X7JSUDYD8oxAKXvrJw6k") twitterApi = API(auth_handler=auth, host='api.twitter.com', search_host='search.twitter.com', cache=FileCache("cache", timeout=-1), secure=False, api_root='/1', search_root='', retry_count=0, retry_delay=0, retry_errors=None, parser=None) #username1, username2,listUsernames = readFile(outfile) user1 = twitterApi.get_user(sys.argv[1]) #@UndefinedVariable user2 = twitterApi.get_user(sys.argv[2]) #@UndefinedVariable forward.append({ "obj": user1, "cursor": -1, "friends": [], "cursor_obj": -1, "path": [] }) backward.append({ "obj": user2, "cursor": -1, "cursor_obj": -1, "path": [], "followers": []
class TwicorderStream(Stream): def __init__(self, auth, listener, **options): super(TwicorderStream, self).__init__(auth, listener, **options) msg = 'Listener starting at {:%d %b %Y %H:%M:%S}'.format(datetime.now()) utils.message('Info', msg) self.api = API(auth) self._id_to_screenname_time = None self._id_to_screenname = {} stream_mode = self.config.get('stream_mode') or 'filter' if stream_mode == 'filter': self.filter( follow=self.follow, track=self.track, locations=self.locations, stall_warnings=self.stall_warnings, languages=self.languages, encoding=self.encoding, filter_level=self.filter_level ) elif stream_mode == 'sample': self.sample( languages=self.languages, stall_warnings=self.stall_warnings ) else: utils.message('Error', 'stream_mode must be "filter" or "sample"') def _read_loop(self, resp): charset = resp.headers.get('content-type', default='') enc_search = re.search('charset=(?P<enc>\S*)', charset) if enc_search is not None: encoding = enc_search.group('enc') else: encoding = 'utf-8' buf = ReadBuffer(resp.raw, self.chunk_size, encoding=encoding) while self.running and not resp.raw.closed: length = 0 try: while not resp.raw.closed: line = buf.read_line() or '' stripped_line = line.strip() if not stripped_line: # keep-alive new lines are expected self.listener.keep_alive() elif stripped_line.isdigit(): length = int(stripped_line) break else: raise TweepError('Expecting length, unexpected value found') next_status_obj = buf.read_len(length) except Exception as error: TwiLogger.exception('Unable to process response: \n') continue if self.running and next_status_obj: self._data(next_status_obj) if resp.raw.closed: self.on_closed(resp) @property def config(self): return Config.get() @property def id_to_screenname(self): now = datetime.now() time_since_lookup = now - (self._id_to_screenname_time or now) expiry = timedelta(minutes=15) if self._id_to_screenname and time_since_lookup <= expiry: return self._id_to_screenname for follow_id in self.follow: user = self.api.get_user(follow_id) self._id_to_screenname[follow_id] = '@{}'.format(user.screen_name) self._id_to_screenname_time = datetime.now() TwiLogger.info(self._id_to_screenname) return self._id_to_screenname @property def track(self): track_list = [t for t in self.config.get('track') or [] if t] or None if track_list and self.follow_also_tracks: track_list += self.id_to_screenname.values() TwiLogger.info('Tracking: ', track_list) return track_list @property def follow(self): return self.config.get('follow') @property def locations(self): return self.config.get('locations') @property def stall_warnings(self): return self.config.get('stall_warnings', False) @property def languages(self): return self.config.get('languages') @property def encoding(self): return self.config.get('encoding', 'utf8') @property def filter_level(self): return self.config.get('filter_level') @property def follow_also_tracks(self): return self.config.get('follow_also_tracks', False)
if __name__ == "__main__": while 1: try: #outfile = sys.argv[1] #auth = tweepy.BasicAuthHandler('reza_shz', 'mehdireza') auth = tweepy.OAuthHandler("xg2hLKvf1nxw1TUALvx5xA", "MkX0lDUik0mJuc6nxserddbQDWd7ZTErQN6Tf0OhOM") auth.set_access_token("174566652-MOGbxytlmUHIN5tEMgl5rgqWdWaIQXYZ6XPyYKl1", "yem38OfoUbsoPZvOVr3k0n3X7JSUDYD8oxAKXvrJw6k") twitterApi = API(auth_handler=auth, host='api.twitter.com', search_host='search.twitter.com', cache=FileCache("cache", timeout = -1), secure=False, api_root='/1', search_root='', retry_count=0, retry_delay=0, retry_errors=None, parser=None) username1 = sys.argv[1] username2 = sys.argv[2] user1 = twitterApi.get_user(username1) #@UndefinedVariable user2 = twitterApi.get_user(username2) #@UndefinedVariable forward = [] forward_ds = [1] backward_ds = [1] forward_min = backward_min = 0 backward = [] is_opt = 0 forward.append({"obj":user1, "cursor":-1, "friends":[], "cursor_obj":-1, "path":[]}) backward.append({"obj":user2, "cursor":-1, "cursor_obj":-1,"path":[], "followers":[] }) reqs = 0 while 1: fin, path = go_backward()