Пример #1
0
    def on_timeout(self):
        message = sys.stderr + 'Timeout...'

        print_and_log(message, LOGNAME)
        print(DELIMITER)

        return True  # Don't kill the stream
Пример #2
0
    def on_status(self, status):
        # Extra variable myfriends_ids created in main
        if (status.author.id not in api.myfriends_ids):
            # Tweet skipped because it isn't from a friend account
            return True  # Don't kill the stream

        try:
            conn = DbConnection()
            process_status(conn, status)

            print("Date:", status.created_at)
            print("User:"******"Text:", status.text[:99], "[...]")

            message = "INSERTED tweet {}, from author {}".format(
                status.id, status.author.name)
            print_and_log(message, LOGNAME)

        except Exception as e:
            message = "ERROR: While inserting tweet {} = {}".format(
                status.id, e)

            print_and_log(message, LOGNAME)

        print(DELIMITER)

        return True  # Don't kill the stream
Пример #3
0
    def on_limit(self):
        message = 'Rate Limit Exceeded, Sleep for 15 Mins'

        print_and_log(message, LOGNAME)
        print(DELIMITER)

        time.sleep(15 * 60)
        return True  # Don't kill the stream
Пример #4
0
    def on_error(self, status_code):
        message = 'Encountered error with status code: ' + str(status_code)

        print_and_log(message, LOGNAME)
        print(DELIMITER)

        if status_code == 420:
            return False

        return True  # Don't kill the stream
Пример #5
0
def start_stream(query=None):

    while True:
        try:
            myStream = tweepy.streaming.Stream(api.auth, MyStreamListener())
            myStream.filter(follow=query)

        except Exception as e:
            message = "ERROR: Exception occurred! {}".format(e)

            print_and_log(message, LOGNAME)
            print(DELIMITER)

            continue

        time.sleep(10)

        message = "{0} (RE)STARTING STREAMING {0}".format(HALF_DELIMITER)
        print_and_log(message, LOGNAME)
Пример #6
0
def period_recovery(conn, api):
    last_tweets = conn.last_tweets_list()

    count = len(last_tweets)
    for tw in last_tweets:

        user_info = "{0:<3} User: {1} - {2}".format(count, tw['user_id'],
                                                    tw['user_name'])

        log_file_name = "{}{}".format(tw['user_id'], LOGNAME)

        print_and_log(user_info, log_file_name)

        control_flag = 0
        progress = 0

        try:
            if (tw['max_id'] is None):
                newest = api.user_timeline(user_id=tw['user_id'], count=1)[0]
                max_id = newest.id
                diff = max(
                    0, (newest.author.statuses_count - tw['tweet_counter']))
                max_diff = TWEETS_LIMIT

            else:
                max_id = tw['max_id'] - 1
                diff = max(0, (tw['counter_max'] - tw['tweet_counter'] -
                               tw['counter_diff']))
                max_diff = max(0, (TWEETS_LIMIT - tw['counter_diff']))

            diff = max_diff = min(diff, max_diff)

        except Exception as e:
            print_and_log(
                "{} > ERROR while handling user timeline: {}".format(
                    user_info, e), log_file_name, "\n")
            count -= 1
            continue

        if (max_diff):
            bar = progressbar.ProgressBar(max_value=max_diff)

        while diff > 0:
            # The maximum count = 200
            statuses = api.user_timeline(user_id=tw['user_id'],
                                         since_id=tw['tweet_id'],
                                         max_id=max_id,
                                         count=200)

            if (control_flag == CONTROL_FLAG_LIMIT):
                print_and_log(
                    "{} # Control flag limit reached ({})!".format(
                        user_info, control_flag), log_file_name, "\n")
                diff = 0
                continue

            logfile(
                "{} # Tweets left: {} # List size: {}".format(
                    user_info, diff, len(statuses)), log_file_name)

            if (len(statuses) <= 1):
                control_flag += 1

            for st in statuses:
                try:
                    process_status(conn, st, False, False)
                    message = "{} > Inserted tweet {} - {} - {}".format(
                        user_info, st.id, st.created_at, diff)

                except Exception as e:
                    message = "{} > ERROR to insert Tweet {}: {}".format(
                        user_info, st.id, e)

                diff -= 1
                progress += 1
                max_id = st.id - 1

                if (progress <= max_diff):
                    bar.update(progress)

                logfile(message, log_file_name)

                time.sleep(0.1)  # For each insertion

            time.sleep(5)  # For each API request

        time.sleep(1)  # For each user searched

        if (control_flag == CONTROL_FLAG_LIMIT or diff == 0):
            print_and_log(
                " {} # It's Done! Maximum possible tweets retrived!".format(
                    user_info), log_file_name, "\n")

        count -= 1
Пример #7
0
def user_timeline_recovery(conn, api):
    all_users = api.friends_ids()
    inserted_users = conn.users_list()

    count = len(all_users)

    for user_id in all_users:
        # Initial information to start collect
        try:
            control_flag = 0
            progress = 0

            newest = api.user_timeline(user_id=user_id, count=1)[0]
            max_id = newest.id
            max_diff = diff = min(newest.author.statuses_count, TWEETS_LIMIT)

            user_info = "> {} User: {} - {}".format(count, user_id,
                                                    newest.author.name)

            log_file_name = "{}{}".format(tw['user_id'], LOGNAME)

            print_and_log(user_info, log_file_name)

        except Exception as e:
            user_info = "{} User: {}".format(count, user_id)
            print_and_log(
                "{} > ERROR while handling user timeline: {}".format(
                    user_info, e), log_file_name, "\n")
            count -= 1
            continue

        # In case that the user has not been inserted
        if (user_id not in inserted_users.keys()):
            try:
                insert_new_user(conn, newest.author)
                message = "{} > Inserted new user!".format(user_info)
                inserted_users[user_id] = user_id

            except Exception as e:
                message = "{} > ERROR to insert user!".format(user_info)
                continue

            logfile(message, log_file_name)

        # Start the progress bar if necessary
        if (max_diff):
            bar = progressbar.ProgressBar(max_value=max_diff)

        # Collet tweets until the difference be zero
        while diff > 0:
            # The maximum count allwed by Twitter is 200
            statuses = api.user_timeline(user_id=user_id,
                                         max_id=max_id,
                                         count=200)

            # Controller to don't be trapped into only one user for so long
            if (control_flag == CONTROL_FLAG_LIMIT):
                message = "{} # Control flag limit ({}) reached!".format(
                    user_info, control_flag)
                logfile(message, log_file_name)

                diff = 0
                continue

            logfile(
                "{} # Tweets left: {} # List size: {}".format(
                    user_info, diff, len(statuses)), log_file_name)

            diff = max(diff, len(statuses))

            if (len(statuses) <= 1):
                control_flag += 1

            # Process all tweets from the current block collected
            for st in statuses:
                try:
                    process_status(conn, st, False, False)
                    message = "{} > Inserted tweet {} - {} - {}".format(
                        user_info, st.id, st.created_at, diff)

                except Exception as e:
                    message = "{} > ERROR to insert Tweet {}: {}".format(
                        user_info, st.id, e)

                diff -= 1
                progress += 1
                max_id = st.id - 1

                if (progress <= max_diff):
                    bar.update(progress)

                logfile(message, log_file_name)

                time.sleep(0.1)  # For each insertion

            time.sleep(5)  # For each API request

        time.sleep(1)  # For each user searched

        if (control_flag == CONTROL_FLAG_LIMIT or diff == 0):
            print_and_log(
                " {} # It's Done! Maximum possible tweets retrived!".format(
                    user_info), log_file_name, "\n")

        count -= 1
Пример #8
0
    keys = api_tokens()

    # API keys
    access_token = keys['access_token']
    access_token_secret = keys['access_token_secret']
    consumer_key = keys['consumer_key']
    consumer_secret = keys['consumer_secret']

    # API auth
    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)

    api = tweepy.API(auth, wait_on_rate_limit=True)

    # Variable created to avoid multiple requests for friends ids
    api.myfriends_ids = api.friends_ids()
    api.myfriends_ids.append(api.me().id)

    # Users to stream
    query = list(map(str, api.myfriends_ids))

    # Added own account id to make tests
    query.append(str(api.me().id))

    print("\nQuery being tracked =", query)

    message = "{0} STARTING STREAMING {0}".format(HALF_DELIMITER)
    print_and_log(message, LOGNAME)

    start_stream(query)