def on_timeout(self): message = sys.stderr + 'Timeout...' print_and_log(message, LOGNAME) print(DELIMITER) return True # Don't kill the stream
def on_status(self, status): # Extra variable myfriends_ids created in main if (status.author.id not in api.myfriends_ids): # Tweet skipped because it isn't from a friend account return True # Don't kill the stream try: conn = DbConnection() process_status(conn, status) print("Date:", status.created_at) print("User:"******"Text:", status.text[:99], "[...]") message = "INSERTED tweet {}, from author {}".format( status.id, status.author.name) print_and_log(message, LOGNAME) except Exception as e: message = "ERROR: While inserting tweet {} = {}".format( status.id, e) print_and_log(message, LOGNAME) print(DELIMITER) return True # Don't kill the stream
def on_limit(self): message = 'Rate Limit Exceeded, Sleep for 15 Mins' print_and_log(message, LOGNAME) print(DELIMITER) time.sleep(15 * 60) return True # Don't kill the stream
def on_error(self, status_code): message = 'Encountered error with status code: ' + str(status_code) print_and_log(message, LOGNAME) print(DELIMITER) if status_code == 420: return False return True # Don't kill the stream
def start_stream(query=None): while True: try: myStream = tweepy.streaming.Stream(api.auth, MyStreamListener()) myStream.filter(follow=query) except Exception as e: message = "ERROR: Exception occurred! {}".format(e) print_and_log(message, LOGNAME) print(DELIMITER) continue time.sleep(10) message = "{0} (RE)STARTING STREAMING {0}".format(HALF_DELIMITER) print_and_log(message, LOGNAME)
def period_recovery(conn, api): last_tweets = conn.last_tweets_list() count = len(last_tweets) for tw in last_tweets: user_info = "{0:<3} User: {1} - {2}".format(count, tw['user_id'], tw['user_name']) log_file_name = "{}{}".format(tw['user_id'], LOGNAME) print_and_log(user_info, log_file_name) control_flag = 0 progress = 0 try: if (tw['max_id'] is None): newest = api.user_timeline(user_id=tw['user_id'], count=1)[0] max_id = newest.id diff = max( 0, (newest.author.statuses_count - tw['tweet_counter'])) max_diff = TWEETS_LIMIT else: max_id = tw['max_id'] - 1 diff = max(0, (tw['counter_max'] - tw['tweet_counter'] - tw['counter_diff'])) max_diff = max(0, (TWEETS_LIMIT - tw['counter_diff'])) diff = max_diff = min(diff, max_diff) except Exception as e: print_and_log( "{} > ERROR while handling user timeline: {}".format( user_info, e), log_file_name, "\n") count -= 1 continue if (max_diff): bar = progressbar.ProgressBar(max_value=max_diff) while diff > 0: # The maximum count = 200 statuses = api.user_timeline(user_id=tw['user_id'], since_id=tw['tweet_id'], max_id=max_id, count=200) if (control_flag == CONTROL_FLAG_LIMIT): print_and_log( "{} # Control flag limit reached ({})!".format( user_info, control_flag), log_file_name, "\n") diff = 0 continue logfile( "{} # Tweets left: {} # List size: {}".format( user_info, diff, len(statuses)), log_file_name) if (len(statuses) <= 1): control_flag += 1 for st in statuses: try: process_status(conn, st, False, False) message = "{} > Inserted tweet {} - {} - {}".format( user_info, st.id, st.created_at, diff) except Exception as e: message = "{} > ERROR to insert Tweet {}: {}".format( user_info, st.id, e) diff -= 1 progress += 1 max_id = st.id - 1 if (progress <= max_diff): bar.update(progress) logfile(message, log_file_name) time.sleep(0.1) # For each insertion time.sleep(5) # For each API request time.sleep(1) # For each user searched if (control_flag == CONTROL_FLAG_LIMIT or diff == 0): print_and_log( " {} # It's Done! Maximum possible tweets retrived!".format( user_info), log_file_name, "\n") count -= 1
def user_timeline_recovery(conn, api): all_users = api.friends_ids() inserted_users = conn.users_list() count = len(all_users) for user_id in all_users: # Initial information to start collect try: control_flag = 0 progress = 0 newest = api.user_timeline(user_id=user_id, count=1)[0] max_id = newest.id max_diff = diff = min(newest.author.statuses_count, TWEETS_LIMIT) user_info = "> {} User: {} - {}".format(count, user_id, newest.author.name) log_file_name = "{}{}".format(tw['user_id'], LOGNAME) print_and_log(user_info, log_file_name) except Exception as e: user_info = "{} User: {}".format(count, user_id) print_and_log( "{} > ERROR while handling user timeline: {}".format( user_info, e), log_file_name, "\n") count -= 1 continue # In case that the user has not been inserted if (user_id not in inserted_users.keys()): try: insert_new_user(conn, newest.author) message = "{} > Inserted new user!".format(user_info) inserted_users[user_id] = user_id except Exception as e: message = "{} > ERROR to insert user!".format(user_info) continue logfile(message, log_file_name) # Start the progress bar if necessary if (max_diff): bar = progressbar.ProgressBar(max_value=max_diff) # Collet tweets until the difference be zero while diff > 0: # The maximum count allwed by Twitter is 200 statuses = api.user_timeline(user_id=user_id, max_id=max_id, count=200) # Controller to don't be trapped into only one user for so long if (control_flag == CONTROL_FLAG_LIMIT): message = "{} # Control flag limit ({}) reached!".format( user_info, control_flag) logfile(message, log_file_name) diff = 0 continue logfile( "{} # Tweets left: {} # List size: {}".format( user_info, diff, len(statuses)), log_file_name) diff = max(diff, len(statuses)) if (len(statuses) <= 1): control_flag += 1 # Process all tweets from the current block collected for st in statuses: try: process_status(conn, st, False, False) message = "{} > Inserted tweet {} - {} - {}".format( user_info, st.id, st.created_at, diff) except Exception as e: message = "{} > ERROR to insert Tweet {}: {}".format( user_info, st.id, e) diff -= 1 progress += 1 max_id = st.id - 1 if (progress <= max_diff): bar.update(progress) logfile(message, log_file_name) time.sleep(0.1) # For each insertion time.sleep(5) # For each API request time.sleep(1) # For each user searched if (control_flag == CONTROL_FLAG_LIMIT or diff == 0): print_and_log( " {} # It's Done! Maximum possible tweets retrived!".format( user_info), log_file_name, "\n") count -= 1
keys = api_tokens() # API keys access_token = keys['access_token'] access_token_secret = keys['access_token_secret'] consumer_key = keys['consumer_key'] consumer_secret = keys['consumer_secret'] # API auth auth = tweepy.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) api = tweepy.API(auth, wait_on_rate_limit=True) # Variable created to avoid multiple requests for friends ids api.myfriends_ids = api.friends_ids() api.myfriends_ids.append(api.me().id) # Users to stream query = list(map(str, api.myfriends_ids)) # Added own account id to make tests query.append(str(api.me().id)) print("\nQuery being tracked =", query) message = "{0} STARTING STREAMING {0}".format(HALF_DELIMITER) print_and_log(message, LOGNAME) start_stream(query)