def on_error(self, status_code, data): print("STREAMING API ERROR IN TWEETSTREAMER!") print("Status code:") print(status_code) error_messenger.send_error_message("streaming API error, with code " + str(status_code), "TweetStreamer.on_error from " + arvid220u_error_title) print("Other data:") print(data) print("END OF ERROR MESSAGE")
def on_error(self, status_code, data): print("STREAMING API ERROR IN TWEETSTREAMER!") print("Status code:") print(status_code) error_messenger.send_error_message( "streaming API error, with code " + str(status_code), "TweetStreamer.on_error from " + arvid220u_error_title) print("Other data:") print(data) print("END OF ERROR MESSAGE")
def set_up(): # do some setup first, like loading the user ids into a list, # and also create the dictionary of user ids to sent responses # finally, load the list of responses from the responses text file, for easy access # load the user ids global user_ids with open(setup.USER_IDS_PATH, "r") as user_ids_file: for line in user_ids_file: sml = [x for x in line.strip().split()] user_id = int(sml[0]) user_ids.append(user_id) # find the screen name for every user id global screen_name_for_user_id # if we can't get the screen name, then something's wrong # add that user to the remove queue remove_list = [] for user_id in user_ids: # use the mentions app for checking up the user # this is since it is less critical than the tweeting app, while having the same privileges # if rate limited, wait for 1 minute, and then try again # the show user request can be sent 900 times per 15 minute window while twythonaccess.currently_rate_limited(TwitterApp.mentions, 900): time.sleep(60) # get the screen name of the user try: screen_name = twythonaccess.authorize( TwitterApp.mentions).show_user(user_id=user_id)["screen_name"] screen_name_for_user_id[user_id] = screen_name except Exception as exception: # can't find screen name of this user print(exception) remove_list.append(user_id) print("Can't find screen name of user with id: " + str(user_id)) error_messenger.send_error_message( "Removing " + str(len(remove_list)) + " users due to inability to get their screen name.", "tweet.py > set_up()") for user_id in remove_list: user_ids.remove(user_id) # create the dictionary of empty sets per each user id global sent_responses_to_user for user_id in user_ids: sent_responses_to_user[user_id] = set() # load the responses from the responses.txt file global responses with open(setup.RESPONSES_PATH, "r") as responses_file: for line in responses_file: response = line.strip() responses.append(response)
def set_up(): # do some setup first, like loading the user ids into a list, # and also create the dictionary of user ids to sent responses # finally, load the list of responses from the responses text file, for easy access # load the user ids global user_ids with open(setup.USER_IDS_PATH, "r") as user_ids_file: for line in user_ids_file: sml = [x for x in line.strip().split()] user_id = int(sml[0]) user_ids.append(user_id) # find the screen name for every user id global screen_name_for_user_id # if we can't get the screen name, then something's wrong # add that user to the remove queue remove_list = [] for user_id in user_ids: # use the mentions app for checking up the user # this is since it is less critical than the tweeting app, while having the same privileges # if rate limited, wait for 1 minute, and then try again # the show user request can be sent 900 times per 15 minute window while twythonaccess.currently_rate_limited(TwitterApp.mentions, 900): time.sleep(60) # get the screen name of the user try: screen_name = twythonaccess.authorize(TwitterApp.mentions).show_user(user_id = user_id)["screen_name"] screen_name_for_user_id[user_id] = screen_name except Exception as exception: # can't find screen name of this user print(exception) remove_list.append(user_id) print("Can't find screen name of user with id: " + str(user_id)) error_messenger.send_error_message("Removing " + str(len(remove_list)) + " users due to inability to get their screen name.", "tweet.py > set_up()") for user_id in remove_list: user_ids.remove(user_id) # create the dictionary of empty sets per each user id global sent_responses_to_user for user_id in user_ids: sent_responses_to_user[user_id] = set() # load the responses from the responses.txt file global responses with open(setup.RESPONSES_PATH, "r") as responses_file: for line in responses_file: response = line.strip() responses.append(response)
def mentions_streamer(): print("mentions streamer") # initialize the mentions streamer # use the mentions app global mentions_streamer_object mentions_streamer_object = TweetStreamer( setup.MENTIONS_CONSUMER_KEY, setup.MENTIONS_CONSUMER_SECRET, setup.MENTIONS_ACCESS_TOKEN, setup.MENTIONS_ACCESS_TOKEN_SECRET) # for error logs mentions_streamer_object.arvid220u_error_title = "tweet.py > mentions_streamer()" # add the observer (the new_mention method) mentions_streamer_object.arvid220u_add_observer(new_mention) # start streaming # wrap it in error handling while not self_destruction_flag: try: # RTs will automatically be discarded (default setting) # check for tweets referencing self mentions_streamer_object.statuses.filter( track=("@" + setup.TWITTER_USERNAME)) except Exception as exception: # If self destruction flag is true, then continue (same as break) if self_destruction_flag: break # check if exception is incomplete read: then, just restart immediately """if str(exception) == "('Connection broken: IncompleteRead(0 bytes read, 1 more expected)', IncompleteRead(0 bytes read, 1 more expected))": continue if str(exception) == "('Connection broken: IncompleteRead(0 bytes read, 2 more expected)', IncompleteRead(0 bytes read, 2 more expected))": continue""" if str(exception).startswith( "('Connection broken: IncompleteRead"): print("restarting") continue # print the exception and then sleep for an hour, # and hope that the problem will resolve itself, magically # (as it almost always does, since the problem is probably in Twitter's servers, or something) print("tweet.py > mentions_streamer(): ") print(exception) error_messenger.send_error_message( exception, "tweet.py > mentions_streamer()") print("will sleep for one hour to avoid exception") time.sleep(60 * 60) print( "finished sleep in tweet.py > mentions_streamer. will now start anew" )
def main(): # Start the measuring at the next hour change # This is to make the two bots start at the same time (the reference and control group) # Which really is necessary, since we want their respective data to be comparable # Be sure, though, to not start the two bots at 7.59 and 8.01, since that would make them offset each other by one hour (undersirable outcome) # That seems to be quite unlikely, though (assuming the delay in starting time is around 3 minutes, there is only a 5 % risk of the undesirable outcome outlined above) # We will have two threads tweet_thread = Thread(target = tweet.main) measure_thread = Thread(target = measure.main) # Calculate the start time # 30 days later, we have the tweet start time # Another 30 days later, we have the tweet end time # And, finally, another 30 days, and we have the (global) end time now_time = datetime.utcnow() start_time = datetime.combine(now_time.date(), time(now_time.hour, 0)) start_time += timedelta(hours = 1) tweet_start_time = start_time + timedelta(days = 30) tweet_end_time = tweet_start_time + timedelta(days = 30) end_time = tweet_end_time + timedelta(days = 30) print("INFORMATION") print("Will start at " + str(start_time) + " (UTC).") print("Will end at " + str(end_time) + " (UTC).") error_messenger.send_error_message("Will start at " + str(start_time) + " (UTC).", "manager.py") error_messenger.send_error_message("Will end at " + str(end_time) + " (UTC).", "manager.py") # sleep until the start time sleep((start_time - datetime.utcnow()).total_seconds()) # start the measure thread measure_thread.start() # if we are not in control group, start and manage the tweet thread if not setup.IS_CONTROL_GROUP: # sleep until tweet start time sleep((tweet_start_time - datetime.utcnow()).total_seconds()) tweet_thread.start() # sleep until tweet end time sleep((tweet_end_time - datetime.utcnow()).total_seconds()) # send self destruct message to tweet thread tweet.self_destruct() # sleep until global end time sleep((end_time - datetime.utcnow()).total_seconds()) # send self destruct message to measure process measure.self_destruct() # wow, now we're finished # send a finished message, and also make it clear that one benefits from waiting one full day before shutting down the program print("FINISHED") print("Please wait for at least one day before shutting off this bot.") error_messenger.send_error_message("Finished. Please wait one day before shutting down.", "manager.py") print("Results can be found in " + setup.RAW_DATA_PATH + " and " + setup.PROCESSED_DATA_PATH + ".") print("Goodbye.")
def analyze_tweet_verdict(self, tweet, probability_distribution = None): # if no probability distribution is inputed, then create it from the analyze method if probability_distribution == None: probability_distribution = self.analyze_tweet_probability_distribution(tweet) # find the best match best_match = "none" best_match_prob = 0 for label in probability_distribution: if probability_distribution[label] > best_match_prob: best_match_prob = probability_distribution[label] best_match = label # have a 20 % threshold in place for the likelihood of the best match if best_match_prob < 0.2: print("classification failed – returning neutral value") return SentimentClassification.not_offensive # assert that the best_match is one of the accepted return values if best_match not in SentimentClassification.classification_set: print("illegal match '" + best_match + "' in analyze_tweet_verdict() in sentiment_analyzer.py.") error_messenger.send_error_message("illegal match: '" + best_match + "' . serious.", "analyze_tweet_verdict() in sentiment_analyzer.py") # classification can be trusted to some degree print("classified as " + best_match) return best_match
def mentions_streamer(): print("mentions streamer") # initialize the mentions streamer # use the mentions app global mentions_streamer_object mentions_streamer_object = TweetStreamer(setup.MENTIONS_CONSUMER_KEY, setup.MENTIONS_CONSUMER_SECRET, setup.MENTIONS_ACCESS_TOKEN, setup.MENTIONS_ACCESS_TOKEN_SECRET) # for error logs mentions_streamer_object.arvid220u_error_title = "tweet.py > mentions_streamer()" # add the observer (the new_mention method) mentions_streamer_object.arvid220u_add_observer(new_mention) # start streaming # wrap it in error handling while not self_destruction_flag: try: # RTs will automatically be discarded (default setting) # check for tweets referencing self mentions_streamer_object.statuses.filter(track=("@" + setup.TWITTER_USERNAME)) except Exception as exception: # If self destruction flag is true, then continue (same as break) if self_destruction_flag: break # check if exception is incomplete read: then, just restart immediately """if str(exception) == "('Connection broken: IncompleteRead(0 bytes read, 1 more expected)', IncompleteRead(0 bytes read, 1 more expected))": continue if str(exception) == "('Connection broken: IncompleteRead(0 bytes read, 2 more expected)', IncompleteRead(0 bytes read, 2 more expected))": continue""" if str(exception).startswith("('Connection broken: IncompleteRead"): print("restarting") continue # print the exception and then sleep for an hour, # and hope that the problem will resolve itself, magically # (as it almost always does, since the problem is probably in Twitter's servers, or something) print("tweet.py > mentions_streamer(): ") print(exception) error_messenger.send_error_message(exception, "tweet.py > mentions_streamer()") print("will sleep for one hour to avoid exception") time.sleep(60*60) print("finished sleep in tweet.py > mentions_streamer. will now start anew")
def setup_streamer(): # make the streamer global, so as to be able to access it in the user abort function global streamer global quit_flag # use the tweeting app streamer = TweetStreamer(setup.TWEETING_CONSUMER_KEY, setup.TWEETING_CONSUMER_SECRET, setup.TWEETING_ACCESS_TOKEN, setup.TWEETING_ACCESS_TOKEN_SECRET) # for error logs streamer.arvid220u_error_title = "find_users.py" # add the observer (the new_tweet method) streamer.arvid220u_add_observer(new_tweet) # start streaming # error handling while not quit_flag: try: # the track keyword is interesting. perhaps, if the search phrase is the same as the train search phrase, results will be skewed, in a bad way streamer.statuses.filter(track=setup.SEARCH_PHRASE, language=setup.LANGUAGE) except Exception as exception: print(exception) if quit_flag: break save_user_ids() # if incomplete read, just continue """if str(exception) == "('Connection broken: IncompleteRead(0 bytes read, 1 more expected)', IncompleteRead(0 bytes read, 1 more expected))": print("restarting") continue if str(exception) == "('Connection broken: IncompleteRead(0 bytes read, 2 more expected)', IncompleteRead(0 bytes read, 2 more expected))": print("restarting") continue""" if str(exception).startswith("('Connection broken: IncompleteRead"): print("restarting") continue error_messenger.send_error_message(exception, "find_users.py > setup_streamer()") print("not restarting, rather just saving the currently gathered user ids") print("before reinstating the process, please check so that the user_ids file is not corrupt") error_messenger.send_error_message("Before restarting find_users, make sure the user_ids file is not corrupted", "find_users.py > setup_streamer()") break import sys sys.exit()
def tweet_loop(): global user_ids global sent_responses_to_user global responses global screen_name_for_user_id print("tweet loop") # have an infinte loop # every loop iteration should take one week, and in each iteration, exactly one tweet should be sent to each user while not self_destruction_flag: start_time = datetime.utcnow() # first, scramble the user ids list so as to make the sending of the users completely random user_ids_sendlist = user_ids[:] random.shuffle(user_ids_sendlist) # calculate the interval, so as to make the loop week-long # we do care about achieving perfect week-loops, which is why we make the interval a tiny bit shorter (one hour) than actually needed, and storing the starting time # (the reason we care is for measuring purposes, and credibility in statistics, etc) # the tweet interval is measured in seconds tweet_interval = ((7 * 24 * 60 * 60 - 60 * 60) / len(user_ids)) # now iterate over each user id in the sendlist for user_id in user_ids_sendlist: # if we are in self destruction, then return here (and yes, I know, it may be that not all users receive the same amount of tweets, this way) # (continuing thought: but it is of utter importance to have the treatment stop at the given signal) if self_destruction_flag: break # randomly choose a tweet from the response list # do it repeatedly until a response that has not yet been sent to this user is found # first, check whether the response set for this user has a length that is equal to the response list – if so, reset it if len(sent_responses_to_user[user_id]) >= len(responses): sent_responses_to_user[user_id] = set() response = responses[random.randint(0, len(responses) - 1)] while response in sent_responses_to_user[user_id]: response = responses[random.randint(0, len(responses) - 1)] # send this response to the user, mentioning them response_tweet = "@" + screen_name_for_user_id[ user_id] + " " + response + " " + setup.RESPONSE_SUFFIX # send this tweet # don't care whether it is sent or not – as long as there are not too many users, it should be sent without any problem # risk is twitter banning the bot due to its messages being considered unsolicited and rude try: twythonaccess.send_tweet(response_tweet, TwitterApp.tweeting) except Exception as exception: # oh no! an error occured # well then. just sleep for sixteen minutes (we have one hour spare), and try once again. if it doesn't work this time, something's severly wrong print(exception) error_messenger.send_error_message(exception, "tweet.py > tweet_loop()") print( "will sleep for twenty minutes to try to avoid the exception" ) time.sleep(16 * 60) print( "has slept for twenty minutes and will retry sending the tweet" ) if self_destruction_flag: break try: twythonaccess.send_tweet(response_tweet, TwitterApp.tweeting) except Exception as exception2: # no no no no no! # this is not where we want to end up # switch to the backup tweeting app, by setting the twythonaccess backup mode to on # also send an urgency error message, explaining what's happening print(exception) print("toggling backup mode in tweeting app") twythonaccess.tweeting_in_backup_mode = not twythonaccess.tweeting_in_backup_mode error_messenger.send_error_message( "IMPORTANT: Tweeting app now toggled its backup mode", "tweet.py > tweet_loop()") try: twythonaccess.send_tweet(response_tweet, TwitterApp.tweeting) except Exception as exception3: # i have no idea what to do by now. probably, just shut the whole thing down # we're doomed if we reach this point # goodbye, world print(exception) error_messenger.send_error_message( exception, "tweet.py > tweet_loop()") error_messenger.send_error_message( "We're all doomed. Exception couldn't be resolved, even after tremendous effort. Now, ignoring the error.", "tweet.py > tweet_loop()") # add the chosen response to the sent responses set sent_responses_to_user[user_id].add(response) # now, sleep for the specified interval time.sleep(tweet_interval) # great. all users have been addressed # now, sleep until exactly one week has passed since the start time while (datetime.utcnow() - start_time).total_seconds() <= 7 * 24 * 60 * 60: time.sleep(1)
def on_timeout(self): print("STREAMING API TIMEOUT IN TWEETSTREAMER!") error_messenger.send_error_message( "streaming API timeout", "TweetStreamer.on_timeout from " + arvid220u_error_title) print("END OF ERROR MESSAGE")
def tweet_loop(): global user_ids global sent_responses_to_user global responses global screen_name_for_user_id print("tweet loop") # have an infinte loop # every loop iteration should take one week, and in each iteration, exactly one tweet should be sent to each user while not self_destruction_flag: start_time = datetime.utcnow() # first, scramble the user ids list so as to make the sending of the users completely random user_ids_sendlist = user_ids[:] random.shuffle(user_ids_sendlist) # calculate the interval, so as to make the loop week-long # we do care about achieving perfect week-loops, which is why we make the interval a tiny bit shorter (one hour) than actually needed, and storing the starting time # (the reason we care is for measuring purposes, and credibility in statistics, etc) # the tweet interval is measured in seconds tweet_interval = ((7*24*60*60-60*60) / len(user_ids)) # now iterate over each user id in the sendlist for user_id in user_ids_sendlist: # if we are in self destruction, then return here (and yes, I know, it may be that not all users receive the same amount of tweets, this way) # (continuing thought: but it is of utter importance to have the treatment stop at the given signal) if self_destruction_flag: break # randomly choose a tweet from the response list # do it repeatedly until a response that has not yet been sent to this user is found # first, check whether the response set for this user has a length that is equal to the response list – if so, reset it if len(sent_responses_to_user[user_id]) >= len(responses): sent_responses_to_user[user_id] = set() response = responses[random.randint(0,len(responses)-1)] while response in sent_responses_to_user[user_id]: response = responses[random.randint(0,len(responses)-1)] # send this response to the user, mentioning them response_tweet = "@" + screen_name_for_user_id[user_id] + " " + response + " " + setup.RESPONSE_SUFFIX # send this tweet # don't care whether it is sent or not – as long as there are not too many users, it should be sent without any problem # risk is twitter banning the bot due to its messages being considered unsolicited and rude try: twythonaccess.send_tweet(response_tweet, TwitterApp.tweeting) except Exception as exception: # oh no! an error occured # well then. just sleep for sixteen minutes (we have one hour spare), and try once again. if it doesn't work this time, something's severly wrong print(exception) error_messenger.send_error_message(exception, "tweet.py > tweet_loop()") print("will sleep for twenty minutes to try to avoid the exception") time.sleep(16*60) print("has slept for twenty minutes and will retry sending the tweet") if self_destruction_flag: break try: twythonaccess.send_tweet(response_tweet, TwitterApp.tweeting) except Exception as exception2: # no no no no no! # this is not where we want to end up # switch to the backup tweeting app, by setting the twythonaccess backup mode to on # also send an urgency error message, explaining what's happening print(exception) print("toggling backup mode in tweeting app") twythonaccess.tweeting_in_backup_mode = not twythonaccess.tweeting_in_backup_mode error_messenger.send_error_message("IMPORTANT: Tweeting app now toggled its backup mode", "tweet.py > tweet_loop()") try: twythonaccess.send_tweet(response_tweet, TwitterApp.tweeting) except Exception as exception3: # i have no idea what to do by now. probably, just shut the whole thing down # we're doomed if we reach this point # goodbye, world print(exception) error_messenger.send_error_message(exception, "tweet.py > tweet_loop()") error_messenger.send_error_message("We're all doomed. Exception couldn't be resolved, even after tremendous effort. Now, ignoring the error.", "tweet.py > tweet_loop()") # add the chosen response to the sent responses set sent_responses_to_user[user_id].add(response) # now, sleep for the specified interval time.sleep(tweet_interval) # great. all users have been addressed # now, sleep until exactly one week has passed since the start time while (datetime.utcnow() - start_time).total_seconds() <= 7*24*60*60: time.sleep(1)
def tweet_streamer(): print("tweet streamer") # set up the streamer # use the measuring app global streamer_object global user_ids streamer_object = TweetStreamer(setup.MEASURING_CONSUMER_KEY, setup.MEASURING_CONSUMER_SECRET, setup.MEASURING_ACCESS_TOKEN, setup.MEASURING_ACCESS_TOKEN_SECRET) # for error logs streamer_object.arvid220u_error_title = "measure.py > tweet_streamer()" # add the observer (the new_mention method) streamer_object.arvid220u_add_observer(new_tweet) # start streaming while not self_destruction_flag: try: # RTs will automatically be discarded (default setting) # check for tweets written by any of the user ids in our follow list streamer_object.statuses.filter(follow=user_ids) except Exception as exception: if self_destruction_flag: break # check if error is incomplete read; then just continue """if str(exception) == "('Connection broken: IncompleteRead(0 bytes read, 1 more expected)', IncompleteRead(0 bytes read, 1 more expected))": continue if str(exception) == "('Connection broken: IncompleteRead(0 bytes read, 2 more expected)', IncompleteRead(0 bytes read, 2 more expected))": continue""" if str(exception).startswith( "('Connection broken: IncompleteRead"): print("restarting") continue # oh no! an error occurred # this is not good. not good at all. we don't want the measuring process to have a hole in it # we want complete data. # thus, try immediately with the backup twitter app print("measure.py > tweet_streamer(): ") print(exception) error_messenger.send_error_message( exception, "measure.py > tweet_streamer()") error_messenger.send_error_message( "Starting backup measuring app. Not good.", "measure.py > tweet_streamer()") try: # reinitialize the streamer object streamer_object = TweetStreamer( setup.MEASURING_BACKUP_CONSUMER_KEY, setup.MEASURING_BACKUP_CONSUMER_SECRET, setup.MEASURING_BACKUP_ACCESS_TOKEN, setup.MEASURING_BACKUP_ACCESS_TOKEN_SECRET) # for error logs streamer_object.arvid220u_error_title = "measure.py > tweet_streamer()" # add the observer (the new_mention method) streamer_object.arvid220u_add_observer(new_tweet) # try again, same thing # if it fails this time, then i don't know what to do streamer_object.statuses.filter(follow=user_ids) except Exception as exception: if self_destruction_flag: continue # well # nothing to do, I guess # beyond sending error messages, sleeping for an hour, and hoping for the best print("measure.py > tweet_streamer(): ") print(exception) print("CRITICAL. BACKUP FAILED.") error_messenger.send_error_message( exception, "measure.py > tweet_streamer()") error_messenger.send_error_message( "CRITICAL. BACKUP MEASURING BOT FAILED. Will now sleep for five minutes.", "measure.py > tweet_streamer()") time.sleep(5 * 60) # now, we have finished streaming, and are probably in the self destruct phase # end the json by removing the last character (the comma) and subsituting it for a closed square bracket with open(setup.RAW_DATA_PATH, 'rb+') as filehandle: filehandle.seek(-1, os.SEEK_END) filehandle.truncate() # subsitute it for the closed square bracket with open(setup.RAW_DATA_PATH, "a") as raw_file: raw_file.write("]")
def on_timeout(self): print("STREAMING API TIMEOUT IN TWEETSTREAMER!") error_messenger.send_error_message("streaming API timeout", "TweetStreamer.on_timeout from " + arvid220u_error_title) print("END OF ERROR MESSAGE")