def add_users(usernames=["SSDummy_Janet", "ssdummy_henry", "ssdummy_hoot", "ssdummy_faye" ,"ssdummy_burt", "ssdummy_gustavo", "ssdummy_amanda" ,"ssdummy_duke", "ssdummy_ian", "ssdummy_ellen" , "ssdummy_chrissy"], clear=False): '''Used by analyse team to test their accuracy! Removes all users from the database and then adds the ones in the usernames list. TODO: test it''' sh = StorageHandler(SOLR_SERVER) if clear: print "CLEARING DATABASE!" sh.delete_all() users_left_to_add = len(usernames) for username in usernames: retry = True while retry: try: print "Adding: " + username + " Left to add: " +str(users_left_to_add) users_left_to_add = users_left_to_add - 1 addalyse.addalyse(sh, username) retry = False except addalyse.AddalyseRateLimitExceededError: sys.stderr.write("Rate limit exceeded, waiting " + str(CONFIG.RATE_LIMIT_EXCEEDED_TIME) + " seconds.\n") time.sleep(CONFIG.RATE_LIMIT_EXCEEDED_TIME) retry = True except addalyse.AddalyseError as err: sys.stderr.write("Got error from addalyse: " + str(err) + "\n") retry = False except Exception: sys.stderr.write("Unhandled exception\n") traceback.print_exc() retry = False print "Done adding test users!"
def add_to_solr(username): '''Requests a certain Twitter username to be added. @argument username: A string containing the username of a Twitter user. @return: A string "UserAdded" if succesfull, otherwise en error message, either: "UserNotOnTwitter" or "OtherError".''' try: addalyse.addalyse(SOLR_SERVER, username) return "UserAdded" except addalyse.AddalyseUserNotOnTwitterError: return "UserNotOnTwitter" except addalyse.AddalyseProtectedUserError: return "ProtectedUser" except addalyse.AddalyseRateLimitExceededError: return "RateLimitExceeded" except addalyse.AddalyseUnableToProcureTweetsError as err: sys.stderr.write("Couldn't get tweets for some reason:" + str(err) + "\n") return "OtherError" except Exception: sys.stderr.write("Unhandled exception:\n") traceback.print_exc() return "OtherError" return None
def main(): '''Gets profiles from storageHandler and checks if they need updating, and if so updates those.''' global CONFIG th = TwitterHelp() sh = StorageHandler(SOLR_SERVER) temporarly_ignore_user = {} sleep_time = 10 # Sleep in seconds per update update_time = 1 # Minimum time for a new update (in hours) cycle_time = 60*3 # When all users have been checked: sleep while True: #Get the information from Solr for (username, since_id, update_count, timestamp) in sh.get_user_fields('*', 'id', 'since_id', 'updatecount', 'timestamp'): print("Checking user: "******" Last updated: " + str(timestamp)) # Debug print #Time checks current_datetime = mxDateTime.now() diff_twitter = current_datetime - timestamp if username in temporarly_ignore_user and (current_datetime - temporarly_ignore_user[username]).hours > update_time: del temporarly_ignore_user[username] #If the conditions are met: Continue if diff_twitter.hours > update_time and username not in temporarly_ignore_user: #Continue if it was more than 1 hour ago since the document was updated retry = True while(retry): print("Updating...") #Try to update: try: addalyse.addalyse(SOLR_SERVER, username, since_id, (update_count % UPDATE_N) == 0, update_count + 1) retry = False #If If the user is now protecting his/her account: Remove from Solr except addalyse.AddalyseProtectedUserError as err: sys.stderr.write("Got: " + str(err) + ". Twitter account protected. Deleting from SOLR.\n") sh.delete_ci(username) retry = False #If the user can no longer be found on Twitter: Remove from Solr except addalyse.AddalyseUserNotOnTwitterError as err: sys.stderr.write("Got: " + str(err) + ". Twitter account deleted. Deleting from SOLR.\n") sh.delete_ci(username) retry = False #If Solr can not be updated with new Tweet data at the time of the update. Wait for 1h with this user. except addalyse.AddalyseUnableToProcureTweetsError as err: sys.stderr.write(str(err) + "\n") temporarly_ignore_user[username] = mxDateTime.now() retry = False #If the rate limit was exceeded, pause for 1h1min and try again. except addalyse.AddalyseRateLimitExceededError as err: sys.stderr.write("RateLimitExceeded, trying again in " + str(CONFIG.get_rate_limit_exceeded_time()) + " seconds.\n") time.sleep(CONFIG.get_rate_limit_exceeded_time()) retry = True #If an unhandled exception is found, a traceback will be made so that the programmer can take care of it. except Exception: sys.stderr.write("Unhandled exception:\n") traceback.print_exc() retry = False #Sleep for ten seconds, to not make to many Twitter requests time.sleep(sleep_time) else: print "This user has recently been updated." print "Completed one update cycle. Sleeping for " + str(cycle_time) + " seconds." time.sleep(cycle_time)
def gather_data_loop(request_per_hour=3600, users_to_add=21, no_analyse=False): """Gathers data about twitter IDs, and sends the data to the storage handler.""" global CONFIG # TODO: Change for real implementation! sleep_time = 3600 / request_per_hour th = TwitterHelp() if not NO_ANALYSE: sh = StorageHandler(SOLR_SERVER) added_users = 0 # Creates a set for all the users that will be added successfully users_added = set() while added_users < users_to_add: # The set of users which will be added. try: set_to_add = th.get_public_tweeters() except twitter.TwitterError as err: if err.message[0:19] == "Rate limit exceeded": # TODO: optimal version of this would query the twitter api for how long to wait exactly! sys.stderr.write( "Rate limit exceeded while trying to get public timeline, trying again in " + str(CONFIG.get_rate_limit_exceeded_time()) + " seconds.\n" ) time.sleep(CONFIG.get_rate_limit_exceeded_time()) else: sys.stderr.write( "Got TwitterError while trying to get public timeline " + str(err) + ". Retrying soon.\n" ) traceback.print_exc() time.sleep(100) continue # retry the loop if not NO_ANALYSE: print "These will be added:" for s in set_to_add: print s for user in set_to_add: if NO_ANALYSE: tweets = th.get_all_statuses(user) print "#####_NEW_USER_#####" print user for t in tweets: try: text = t.GetText() print "#####_NEW_TWEET_#####" print text print "#####_END_OF_TWEET_#####" except UnicodeEncodeError: continue time.sleep(sleep_time) else: if not sh.contains(user): retry = True # A retry variable for an inner "goto" while retry: time.sleep(sleep_time) try: if addalyse.addalyse(SOLR_SERVER, user): users_added.add(user) added_users += 1 retry = False except addalyse.AddalyseRateLimitExceededError as err: # Halt for 1 hour if the rate limit is exceeded sys.stderr.write( "RateLimitExceeded, trying again in " + str(CONFIG.get_rate_limit_exceeded_time()) + " seconds.\n" ) time.sleep(CONFIG.get_rate_limit_exceeded_time()) retry = True except addalyse.AddalyseError as err: # we use polymorphism here, WEE sys.stderr.write("Addalyse threw an error: " + str(err) + "\n") retry = False except Exception: # ignore errors non-silently (we print tracebacks!) # TODO: use the logger for this? sys.stderr.write("Unhandled exception\n") traceback.print_exc() retry = False # For debugging purposes, displays all users found in this session. if not NO_ANALYSE: for key in users_added: print key + " was added"