Exemplo n.º 1
0
def add_users(usernames=["SSDummy_Janet", "ssdummy_henry", "ssdummy_hoot", "ssdummy_faye"
                         ,"ssdummy_burt", "ssdummy_gustavo", "ssdummy_amanda"
                         ,"ssdummy_duke", "ssdummy_ian", "ssdummy_ellen" , "ssdummy_chrissy"],
              clear=False):
    '''Used by analyse team to test their accuracy!  Removes all users
    from the database and then adds the ones in the usernames list.
    TODO: test it'''
    
    sh = StorageHandler(SOLR_SERVER)
    if clear:
        print "CLEARING DATABASE!"
        sh.delete_all()
    users_left_to_add = len(usernames)
    for username in usernames:
        retry = True
        while retry:
            try:
                print "Adding: " + username + " Left to add: " +str(users_left_to_add)
                users_left_to_add = users_left_to_add - 1
                addalyse.addalyse(sh, username)
                retry = False
            except addalyse.AddalyseRateLimitExceededError:
                sys.stderr.write("Rate limit exceeded, waiting " + str(CONFIG.RATE_LIMIT_EXCEEDED_TIME) + " seconds.\n")
                time.sleep(CONFIG.RATE_LIMIT_EXCEEDED_TIME)
                retry = True
            except addalyse.AddalyseError as err:
                sys.stderr.write("Got error from addalyse: " + str(err) + "\n")
                retry = False
            except Exception:
                sys.stderr.write("Unhandled exception\n")
                traceback.print_exc()
                retry = False
            
    print "Done adding test users!"
Exemplo n.º 2
0
def add_to_solr(username):
    '''Requests a certain Twitter username to be added.  @argument
    username: A string containing the username of a Twitter user.
    @return: A string "UserAdded" if succesfull, otherwise en error
    message, either: "UserNotOnTwitter" or "OtherError".'''
    
    try:
        addalyse.addalyse(SOLR_SERVER, username)
        return "UserAdded"
    except addalyse.AddalyseUserNotOnTwitterError:
        return "UserNotOnTwitter"
    except addalyse.AddalyseProtectedUserError:
        return "ProtectedUser"
    except addalyse.AddalyseRateLimitExceededError:
        return "RateLimitExceeded"        
    except addalyse.AddalyseUnableToProcureTweetsError as err:
        sys.stderr.write("Couldn't get tweets for some reason:" + str(err) + "\n")
        return "OtherError"
    except Exception:
        sys.stderr.write("Unhandled exception:\n")
        traceback.print_exc()
        return "OtherError"
    return None
Exemplo n.º 3
0
def main():
    '''Gets profiles from storageHandler and checks if they need updating, and if so updates those.'''
    global CONFIG
    
    th = TwitterHelp() 
    sh = StorageHandler(SOLR_SERVER)
    temporarly_ignore_user = {}
    
    sleep_time = 10     # Sleep in seconds per update
    update_time = 1     # Minimum time for a new update (in hours)
    cycle_time = 60*3   # When all users have been checked: sleep
    
    while True:
        #Get the information from Solr
        for (username, since_id, update_count, timestamp) in sh.get_user_fields('*', 'id', 'since_id', 'updatecount', 'timestamp'):
            print("Checking user: "******" Last updated: " + str(timestamp)) # Debug print
            
            #Time checks
            current_datetime = mxDateTime.now()
            diff_twitter = current_datetime - timestamp
            if username in temporarly_ignore_user and (current_datetime - temporarly_ignore_user[username]).hours > update_time:
                del temporarly_ignore_user[username]
                
            #If the conditions are met: Continue
            if diff_twitter.hours > update_time and username not in temporarly_ignore_user:     #Continue if it was more than 1 hour ago since the document was updated
                retry = True
                while(retry):
                    print("Updating...")
                    #Try to update:
                    try:
                        addalyse.addalyse(SOLR_SERVER,
                                 username,
                                 since_id,
                                 (update_count % UPDATE_N) == 0,
                                 update_count + 1)
                        retry = False
                    
                    #If If the user is now protecting his/her account: Remove from Solr
                    except addalyse.AddalyseProtectedUserError as err:
                        sys.stderr.write("Got: " + str(err) + ". Twitter account protected. Deleting from SOLR.\n")
                        sh.delete_ci(username)
                        retry = False
                    
                    #If the user can no longer be found on Twitter: Remove from Solr
                    except addalyse.AddalyseUserNotOnTwitterError as err:
                        sys.stderr.write("Got: " + str(err) + ". Twitter account deleted. Deleting from SOLR.\n")
                        sh.delete_ci(username)
                        retry = False
                        
                    #If Solr can not be updated with new Tweet data at the time of the update. Wait for 1h with this user.
                    except addalyse.AddalyseUnableToProcureTweetsError as err:
                        sys.stderr.write(str(err) + "\n")
                        temporarly_ignore_user[username] = mxDateTime.now()
                        retry = False
                    
                    #If the rate limit was exceeded, pause for 1h1min and try again.
                    except addalyse.AddalyseRateLimitExceededError as err:
                        sys.stderr.write("RateLimitExceeded, trying again in " + str(CONFIG.get_rate_limit_exceeded_time()) + " seconds.\n")
                        time.sleep(CONFIG.get_rate_limit_exceeded_time())
                        retry = True
                    
                    #If an unhandled exception is found, a traceback will be made so that the programmer can take care of it.
                    except Exception:
                            sys.stderr.write("Unhandled exception:\n")
                            traceback.print_exc()
                            retry = False
                    
                    #Sleep for ten seconds, to not make to many Twitter requests
                    time.sleep(sleep_time)                 
            else:
                print "This user has recently been updated."
        print "Completed one update cycle. Sleeping for " + str(cycle_time) + " seconds."
        time.sleep(cycle_time)
Exemplo n.º 4
0
def gather_data_loop(request_per_hour=3600, users_to_add=21, no_analyse=False):
    """Gathers data about twitter IDs, and sends the data to the
    storage handler."""
    global CONFIG

    # TODO: Change for real implementation!
    sleep_time = 3600 / request_per_hour

    th = TwitterHelp()
    if not NO_ANALYSE:
        sh = StorageHandler(SOLR_SERVER)

    added_users = 0

    # Creates a set for all the users that will be added successfully
    users_added = set()

    while added_users < users_to_add:
        # The set of users which will be added.
        try:
            set_to_add = th.get_public_tweeters()
        except twitter.TwitterError as err:
            if err.message[0:19] == "Rate limit exceeded":
                # TODO: optimal version of this would query the twitter api for how long to wait exactly!
                sys.stderr.write(
                    "Rate limit exceeded while trying to get public timeline, trying again in "
                    + str(CONFIG.get_rate_limit_exceeded_time())
                    + " seconds.\n"
                )
                time.sleep(CONFIG.get_rate_limit_exceeded_time())
            else:
                sys.stderr.write(
                    "Got TwitterError while trying to get public timeline " + str(err) + ". Retrying soon.\n"
                )
                traceback.print_exc()
                time.sleep(100)
            continue  # retry the loop

        if not NO_ANALYSE:
            print "These will be added:"
            for s in set_to_add:
                print s

        for user in set_to_add:
            if NO_ANALYSE:
                tweets = th.get_all_statuses(user)
                print "#####_NEW_USER_#####"
                print user
                for t in tweets:
                    try:
                        text = t.GetText()
                        print "#####_NEW_TWEET_#####"
                        print text
                        print "#####_END_OF_TWEET_#####"
                    except UnicodeEncodeError:
                        continue
                time.sleep(sleep_time)
            else:
                if not sh.contains(user):
                    retry = True  # A retry variable for an inner "goto"
                    while retry:
                        time.sleep(sleep_time)
                        try:
                            if addalyse.addalyse(SOLR_SERVER, user):
                                users_added.add(user)
                                added_users += 1
                                retry = False
                        except addalyse.AddalyseRateLimitExceededError as err:  # Halt for 1 hour if the rate limit is exceeded
                            sys.stderr.write(
                                "RateLimitExceeded, trying again in "
                                + str(CONFIG.get_rate_limit_exceeded_time())
                                + " seconds.\n"
                            )
                            time.sleep(CONFIG.get_rate_limit_exceeded_time())
                            retry = True
                        except addalyse.AddalyseError as err:  # we use polymorphism here, WEE
                            sys.stderr.write("Addalyse threw an error: " + str(err) + "\n")
                            retry = False
                        except Exception:
                            # ignore errors non-silently (we print tracebacks!)
                            # TODO: use the logger for this?
                            sys.stderr.write("Unhandled exception\n")
                            traceback.print_exc()
                            retry = False

    # For debugging purposes, displays all users found in this session.
    if not NO_ANALYSE:
        for key in users_added:
            print key + " was added"