def on_error(self, status): # statuses take from here: # https://dev.twitter.com/overview/api/response-codes if status == 401: message = "[HTTP_ERROR]" + LOG_NAME + "401 Unauthorized - Missing or incorrect authentication credentials." elif status == 304: message = "[HTTP_ERROR]" + LOG_NAME + "304 Not Modified - There was no new data to return." elif status == 403: message = "[HTTP_ERROR]" + LOG_NAME + "403 Forbidden - The request is understood, " + \ "but it has been refused or access is not allowed." elif status == 420: message = "[HTTP_ERROR]" + LOG_NAME + "420 Enhance Your Calm - Returned when you are being rate limited." elif status == 500: message = "[HTTP_ERROR]" + LOG_NAME + "500 Internal Server Error - Something is broken." elif status == 503: message = "[HTTP_ERROR]" + LOG_NAME + "503 Service Unavailable - The Twitter servers are up, " + \ "but overloaded with requests. Try again later." elif status == 504: message = "[HTTP_ERROR]" + LOG_NAME + "504 Gateway timeout - The Twitter servers are up, but " + \ "the request couldn’t be serviced due to some failure within our stack. Try again later." else: message = "[HTTP_ERROR]" + LOG_NAME + status + " Unknown." print(message) read_write.log_message(message) read_write.log_message("[INFO]" + LOG_NAME + "Stopping stream") return False # and stop the stream
def __init__(self): super(StdOutListener, self).__init__() self.flag = False # this flag indicates if stream must stop or not. As long as it is False, we keep stream open self.pause_flag = False # as long as false, pause is closed self.store_counter = None # this counter, counts how many tweets stored to the DB so far read_write.log_message("[INFO]" + LOG_NAME + "StreamListener initialized")
def on_disconnect(self, notice): status = json.loads(notice) message = "[ERROR] (" + type(self).__name__ + ") : Name=" + status["stream_name"] + \ ", Reason=" + status["reason"] + ", Code=" + str(status["code"]) print(message) read_write.log_message(message) return False
def populate_dbs(self): def select_db(): self.selected_db_var = db_var selected_db = str(db_var.get()) self.db_entry.delete(0, "end") self.db_entry.insert(0, selected_db) # we add the drop button, only if a radio-button is pressed self.drop_db_btn = Button(self.dbs_frm, text="Drop database", command=self.drop_db) self.drop_db_btn.grid(row=db_counter, column=2, pady=10, ipadx=5, ipady=2) db_var = StringVar() db_list = self.client.database_names( ) # we get the available database names of this connection db_counter = 0 read_write.log_message("[INFO] (frames.DbFrame) : DBs found: " + str(db_list)) for name in db_list: r = Radiobutton(self.dbs_frm, text=name, variable=db_var, value=name, command=select_db) r.grid(row=db_counter, column=2, pady=2) db_counter += 1
def start_training(): pol_checkfile = os.path.exists('files/sa_polarity.pickle') subj_checkfile = os.path.exists('files/sa_subjectivity.pickle') if pol_checkfile: message1 = "SA Polarity file already exists." messagebox.showinfo("File found", message1) else: message1 = "Cannot find the polarity sentiment analyzer file.\n" message1 += "Training a new one using Naive Bayes Classifier.\n" message1 += "Be patient. It might take a while." messagebox.showinfo("Training", message1) train_sentiment_analyzer_polarity(1000) messagebox.showinfo("Training", "Polarity Training finished.") read_write.log_message("[INFO]" + LOG_NAME + message1) if subj_checkfile: message2 = "SA Subjectivity file already exists." messagebox.showinfo("File found", message2) else: message2 = "Cannot find the subjectivity sentiment analyzer file.\n" message2 += "Training a new one using Naive Bayes Classifier.\n" message2 += "Be patient. It might take a while." messagebox.showinfo("Training", message2) train_sentiment_analyzer_subjectivity(5000) messagebox.showinfo("Training", "Subjectivity Training finished.") read_write.log_message("[INFO]" + LOG_NAME + message2)
def safe_exit(self): x = messagebox.askyesno(title="Exit", message="Are you sure you want to exit?", icon="question") if x: read_write.log_message("[INFO] (frames.StatsFrame) : Exiting...") self.root.destroy()
def show_collections(self): # create the frame self.collections_frm = Frame( self) # the frame that will show the collections self.collections_frm.grid(row=2, pady=5, padx=50) try: self.populate_collections() except ServerSelectionTimeoutError as e: read_write.log_message("[ERROR]" + LOG_NAME + "ServerSelectionTimeoutError: " + str(e)) messagebox.showerror("Error", "Lost Connection to the DB") except AutoReconnect as e: read_write.log_message("[ERROR]" + LOG_NAME + "AutoReconnect: " + str(e)) messagebox.showerror("Error", "Lost Connection to the DB") try: # if dbs already shown, we need to hide them, but if not, this will raise an exception self.hide_dbs() except AttributeError: pass # change the button's text and grid the frame self.previous_collection_btn.config(command=self.hide_collections, text="Hide collections") self.collections_frm.grid()
def stop_stream(frame): global stream_controller frame.mng_stream_btn.config(text="Start Stream", command=lambda: start_stream(frame)) frame.pause_stream_btn.grid_remove() print("Terminating stream...") read_write.log_message("[INFO]" + LOG_NAME + "Terminating stream...") stream_controller.stop() # by calling the stream controller
def safe_exit(self): x = messagebox.askyesno(title="Exit", message="Are you sure you want to exit?", icon="question") if x: stream_util.stream_controller.stop() read_write.log_message("[INFO]" + stream_util.LOG_NAME + "Exiting...") self.root.destroy()
def __init__(self, master): super(DbFrame, self).__init__(master) self.root = master self.client = db_utils.get_client() # get any previous data on last.json previous_data = read_write.read_last() # Two frames will hold the widgets label_frm = Frame(self) # the labels and entries label_frm.grid(row=0, pady=10, padx=50) button_frm = Frame(self) # the buttons button_frm.grid(row=1, pady=5, padx=50) # Build the widgets for label_frm Label(label_frm, text="Database:").grid(column=2, row=0, pady=10, padx=5) Label(label_frm, text="Collection:").grid(column=2, row=1, padx=5) self.db_entry = Entry(label_frm, width=30) self.db_entry.grid(column=3, row=0, pady=10) self.collection_entry = Entry(label_frm, width=30) self.collection_entry.grid(column=3, row=1) # Add data to entries if any data on last.json try: if previous_data["database"] is not "": self.db_entry.insert(0, previous_data["database"]) except KeyError as e: message = "[ERROR] (frames.DbFrame) : KeyError: " + str(e) read_write.log_message(message) try: if previous_data["collection"] is not "": self.collection_entry.insert(0, previous_data["collection"]) except KeyError as e: message = "[ERROR] (frames.DbFrame) : KeyError: " + str(e) read_write.log_message(message) # Build the widgets for button_frm self.next_btn = Button(button_frm, text="Next") self.next_btn.grid(column=2, row=0, pady=10, padx=4, ipadx=2, ipady=2) self.back_btn = Button(button_frm, text="Back") self.back_btn.grid(column=4, row=0, pady=10, padx=4, ipadx=2, ipady=2) self.previous_dbs_btn = Button(button_frm, text="Show databases", command=self.show_dbs) self.previous_dbs_btn.grid(column=2, row=1, ipadx=2, ipady=2) self.previous_collection_btn = Button(button_frm, text="Show collections", command=self.show_collections) self.previous_collection_btn.grid(column=4, row=1, ipadx=2, ipady=2) # Build the widgets for dbs_frm self.selected_db_var = StringVar() self.selected_collection_var = StringVar()
def pause_unpause(frame): global stream_controller if stream_controller.listener.pause_flag: # if flag is True, it means that we already paused the stream stream_controller.unpause() # so un-pause it and change the GUI frame.pause_stream_btn.config(text="Pause Stream") read_write.log_message("[INFO]" + LOG_NAME + "Continuing stream...") else: # but if it false, it means we press the Pause Stream button, so set it accordingly stream_controller.pause() frame.pause_stream_btn.config(text="Continue Stream") print("Stream paused...") read_write.log_message("[INFO]" + LOG_NAME + "Stream paused...")
def train_sentiment_analyzer_polarity(n_instances=None): if n_instances is not None: n_instances = int(0.2 * n_instances) pos_reviews = [] for fileid in movie_reviews.fileids('pos'): words = movie_reviews.words(fileid) pos_reviews.append(words) neg_reviews = [] for fileid in movie_reviews.fileids('neg'): words = movie_reviews.words(fileid) neg_reviews.append(words) # positive reviews feature set pos_reviews_set = [] for words in pos_reviews: pos_reviews_set.append((bag_of_words(words), 'pos')) # negative reviews feature set neg_reviews_set = [] for words in neg_reviews: neg_reviews_set.append((bag_of_words(words), 'neg')) shuffle(pos_reviews_set) shuffle(neg_reviews_set) test_set = pos_reviews_set[:n_instances] + neg_reviews_set[:n_instances] train_set = pos_reviews_set[n_instances:] + neg_reviews_set[n_instances:] print('Training classifier') classifier = NaiveBayesClassifier.train(train_set) print(classifier.show_most_informative_features(10)) classifier_accuracy_percent = (classify.accuracy(classifier, test_set)) * 100 message_acc = 'Accuracy of classifier = ' + str( classifier_accuracy_percent) + '%' print(message_acc) read_write.log_message("[INFO]" + LOG_NAME + message_acc) save_file(classifier, 'files/sa_polarity.pickle') message = "sa_polarity.pickle file saved." print(message) read_write.log_message(message)
def show_textblob_polarity(): try: collection = db_utils.get_collection() all_documents = collection.find() tweets_sum = all_documents.count() number_of_textblob_positive = collection.find({ "textblob.polarity": 'pos' }).count() number_of_textblob_neutral = collection.find({ "textblob.polarity": 'neu' }).count() number_of_textblob_negative = collection.find({ "textblob.polarity": 'neg' }).count() percent_pos = (number_of_textblob_positive / tweets_sum) * 100 percent_neu = (number_of_textblob_neutral / tweets_sum) * 100 percent_neg = (number_of_textblob_negative / tweets_sum) * 100 labels = 'Positive', 'Neutral', 'Negative' sizes = [percent_pos, percent_neu, percent_neg] fig1, ax1 = plt.subplots() ax1.pie(sizes, labels=labels, autopct='%1.1f%%', shadow=True, startangle=90) ax1.axis('equal' ) # Equal aspect ratio ensures that pie is drawn as a circle. plt.title('Textblob Polarity') plt.show() except ServerSelectionTimeoutError as e: read_write.log_message("[ERROR]" + LOG_NAME + "ServerSelectionTimeoutError: " + str(e)) messagebox.showerror("Error", "Lost Connection to the DB") return except AutoReconnect as e: read_write.log_message("[ERROR]" + LOG_NAME + "AutoReconnect: " + str(e)) messagebox.showerror("Error", "Lost Connection to the DB") return
def get_stream(listener): credentials = read_write.read_credentials() try: auth = OAuthHandler(credentials["consumer_key"], credentials["consumer_secret"]) auth.set_access_token(credentials["access_token"], credentials["access_token_secret"]) except KeyError as error: message_er = "[ERROR]" + LOG_NAME + "KeyError : " + str(error) message_fatal = "[FATAL]" + LOG_NAME + "Error on credentials. Please check the credentials.json file." print(message_er) print(message_fatal) read_write.log_message(message_er) read_write.log_message(message_fatal) return None stream = Stream(auth, listener) # and we setting the stream item return stream
def show_training_subjectivity(): try: collection = db_utils.get_collection() all_documents = collection.find() tweets_sum = all_documents.count() number_of_training_subjective = collection.find({ "training.subjectivity": 'subj' }).count() number_of_training_objective = collection.find({ "training.subjectivity": 'obj' }).count() percent_subj = (number_of_training_subjective / tweets_sum) * 100 percent_obj = (number_of_training_objective / tweets_sum) * 100 labels = 'Subjective', 'Objective' sizes = [percent_subj, percent_obj] fig1, ax1 = plt.subplots() ax1.pie(sizes, labels=labels, autopct='%1.1f%%', shadow=True, startangle=90) ax1.axis('equal' ) # Equal aspect ratio ensures that pie is drawn as a circle. plt.title('NLTK Subjectivity') plt.show() except ServerSelectionTimeoutError as e: read_write.log_message("[ERROR]" + LOG_NAME + "ServerSelectionTimeoutError: " + str(e)) messagebox.showerror("Error", "Lost Connection to the DB") return except AutoReconnect as e: read_write.log_message("[ERROR]" + LOG_NAME + "AutoReconnect: " + str(e)) messagebox.showerror("Error", "Lost Connection to the DB") return
def on_connect(self): global stream_controller message_1 = "[SUCCESS]" + LOG_NAME + "Connected to Streaming Server!" message_2 = "[INFO]" + LOG_NAME + "#### Gathering tweets for '" + stream_controller.search_keyword \ + "' keyword. ####" print(message_2) read_write.log_message(message_1) read_write.log_message(message_2) # and save the keyword to the keywords.json file keywords_list = [ x for x in stream_controller.search_keyword.split(",") ] for keyword in keywords_list: keyword = keyword.lstrip() keyword = keyword.rstrip() read_write.write_keywords(keyword) self.store_counter = 0 # initialize the counter self.ignore_counter = 0
def drop_db(self): name = self.selected_db_var.get() answer = messagebox.askokcancel( title="Are you sure?", message="Are you sure you want to delete " + name, default="cancel", parent=self.root) if answer: read_write.log_message( "[INFO] (frames.DbFrame) : Dropping database '" + name + "'") try: self.client.drop_database(name) except ServerSelectionTimeoutError as e: read_write.log_message("[ERROR]" + LOG_NAME + "ServerSelectionTimeoutError: " + str(e)) messagebox.showerror("Error", "Lost Connection to the DB") return self.hide_dbs() self.show_dbs()
def stream(self): stream = manage_credentials.get_stream(listener=self.listener) # this is a try-except block, because if there is something wrong in the Listener class, # like e.g internet connection failure, it raises the exception inside the active thread try: # user can give more than one keywords for searching, we just add them to a list # he must separate them with commas, so we can split them and remove the whitespace with strip search_list = [x.strip() for x in self.search_keyword.split(",")] message = "[INFO]" + LOG_NAME + "Trying to connect to the Streaming Server..." print(message) read_write.log_message(message) stream.filter( track=search_list, async=True ) # start the loop, async sets the Streaming in a new Thread except AttributeError as e: message = "[ERROR]" + LOG_NAME + "AttributeError: " + str(e) print(message) read_write.log_message(message) messagebox.showerror( "Fatal error", "No credentials were found. Please close the script, " + "add the file and try again!") except Exception as e: message = "[ERROR]" + LOG_NAME + "Exception: " + str(repr(e)) print(message) read_write.log_message(message) pass
def on_data(self, data): if self.flag: # flag keep track if we want to stop the stream read_write.log_message("[INFO]" + LOG_NAME + "Gathered " + str(self.store_counter) + " tweets - Ignored " + str(self.ignore_counter) + " tweets") return False # return False to terminate the loop if self.pause_flag: # pause flag keeps track if we want to pause the stream return True # return True and do nothing with the data. It's a virtual pause. data = json.loads(data) # turn the incoming data into json format if "user" not in data: # if tweet has no user, we don't want this tweet print("No user data - ignoring tweet.") self.ignore_counter += 1 return True if data["lang"] != "en": # we deal only with English language text based tweets print("Non English - ignoring tweet.") self.ignore_counter += 1 return True # we pass our data into this static method to clean them and keep only the necessary our_tweet = other_utils.format_tweet(data, method="stream") try: # this method try to save our tweet to the active connection to Mongo and returns the outcome # If all are OK, returns True, but if it fail, it returns False. With this way, we keep track # how many tweets we stored so far if db_utils.store_tweet(our_tweet): self.store_counter += 1 # increase the counter if self.store_counter % 100 == 0: # and if we reach a multiply of 100, we print the result print("Stored " + str(self.store_counter) + " tweets so far.") else: self.ignore_counter += 1 except ServerSelectionTimeoutError as e: read_write.log_message("[ERROR]" + LOG_NAME + "ServerSelectionTimeoutError: " + str(e)) messagebox.showerror("Error", "Lost Connection to the DB") return False except AutoReconnect as e: read_write.log_message("[ERROR]" + LOG_NAME + "AutoReconnect: " + str(e)) messagebox.showerror("Error", "Lost Connection to the DB") return False # return True to continue the loop return True
def can_connect(host, port): response = {"connect": False, "errors": ""} try: port = int( port ) # getting the port, if it is not an int, we will have an exception except ValueError as e: message = "[ERROR]" + LOG_NAME + "ValueError:" + str(e) print(message) read_write.log_message(message) response["errors"] = "Port must be an integer" return response try: # try connect to the MongoDB connection = MongoClient(host=host, port=port, serverSelectionTimeoutMS=10000, tz_aware=True) except ConfigurationError as e: # if host is not appropriate message = "[ERROR]" + LOG_NAME + "ConfigurationError:" + str(e) print(message) read_write.log_message(message) response["errors"] = str(e) return response except TypeError as e: # if port result to an error message = "[ERROR]" + LOG_NAME + "TypeError:" + str(e) print(message) read_write.log_message(message) response["errors"] = str(e) return response # to see if we can connect to the MongoDB, we make a test query to see if we can write in it # so we create a new database and collection with unique names pseudo_random = ''.join( random.choice(string.ascii_uppercase + string.ascii_lowercase + string.digits) for _ in range(16)) a_db = connection["random_database_" + pseudo_random] a_collection = a_db["random_collection_" + pseudo_random] try: # we give the client, 10 seconds to connect read_write.log_message("[INFO]" + LOG_NAME + "Trying to connect to MongoDB with host: " + host + " and port: " + str(port)) a_collection.insert({"test": 1}) except ServerSelectionTimeoutError as e: message = "[ERROR]" + LOG_NAME + "ServerSelectionTimeoutError:" + str( e) print(message) read_write.log_message(message) response["errors"] = "Can't connect" return response # if all OK, drop the test database connection.drop_database(a_db) # but make a global variable of the client, because we reference to it many times global client client = connection read_write.log_message("[INFO]" + LOG_NAME + "Successfully connected") response["connect"] = True response["host"] = host response["port"] = port return response
################################################################################################## # Module that is responsible to read the credentials and return the API item back to the program # ################################################################################################## from utils import read_write import sys try: from tweepy import OAuthHandler, Stream, AppAuthHandler, API except ImportError as e: read_write.log_message("[FATAL] (manage_credentials) : ImportError: " + str(e)) sys.exit("[SEVERE] " + str(e) + ". Please install this module to continue") LOG_NAME = " (manage_credentials) : " def get_stream(listener): credentials = read_write.read_credentials() try: auth = OAuthHandler(credentials["consumer_key"], credentials["consumer_secret"]) auth.set_access_token(credentials["access_token"], credentials["access_token_secret"]) except KeyError as error: message_er = "[ERROR]" + LOG_NAME + "KeyError : " + str(error) message_fatal = "[FATAL]" + LOG_NAME + "Error on credentials. Please check the credentials.json file." print(message_er) print(message_fatal) read_write.log_message(message_er) read_write.log_message(message_fatal)
##################################################################################################### # Module that is responsible for the sentiment analysis of the tweets # ##################################################################################################### from utils import read_write, training import sys try: from textblob import TextBlob from nltk.sentiment.util import * from nltk.tokenize import regexp, word_tokenize except ImportError as e: read_write.log_message("[FATAL] (sentiment_utils) : ImportError: " + str(e)) sys.exit("[SEVERE] " + str(e) + ". Please install this module to continue") try: from nltk.sentiment.vader import SentimentIntensityAnalyzer except LookupError as e: read_write.log_message("[FATAL] (sentiment_utils) : LookupError: " + str(e)) instructions = " **** INSTALLATION INSTRUCTIONS ****\n\n" instructions += " 1) Open a new terminal and type python. This will open a python terminal\n" instructions += " 2) Type import ntlk\n 3) Type nltk.download()\n" instructions += " 4) This will open a new window. Search in 'All Packages' and install vader_lexicon.\n" instructions += " 5) Double click OR click download to install it" read_write.log_message(instructions) sys.exit(str(e) + "\n" + instructions) LOG_NAME = " (sentiment_utils) : " def textblob_polarity(text):
def __init__(self, master): super(StatsFrame, self).__init__(master) self.root = master self.collection = db_utils.get_collection() self.all_documents = self.collection.find() # this is a Cursor object self.quick_facts_frm = Frame(self) self.quick_facts_frm.grid(row=0, column=0, pady=5) self.compare_frm = Frame(self) self.compare_frm.grid(row=1, column=0, pady=5) show_graphs_frm = Frame(self) show_graphs_frm.grid(row=2, column=0, pady=5) exit_frm = Frame(self) exit_frm.grid(row=3, column=0, pady=5) tweets_sum = self.all_documents.count() read_write.log_message("[INFO] (frames.StatsFrame) : Found " + str(tweets_sum) + " tweets in the DB") # if we use a collection with no stored tweets, we do not show any data or metric if tweets_sum > 0: Label(self.quick_facts_frm, text="Textblob").grid(row=2, column=0, padx=2, pady=2) Label(self.quick_facts_frm, text="VADER").grid(row=3, column=0, padx=2, pady=2) Label(self.quick_facts_frm, text="NLTK").grid(row=4, column=0, padx=2, pady=2) Label(self.quick_facts_frm, text="Positive").grid(row=1, column=1, padx=6, pady=2) Label(self.quick_facts_frm, text="Neutral").grid(row=1, column=2, padx=6, pady=2) Label(self.quick_facts_frm, text="Negative").grid(row=1, column=3, padx=6, pady=2) Label(self.quick_facts_frm, text="Subjective").grid(row=1, column=4, padx=6, pady=2) Label(self.quick_facts_frm, text="Objective").grid(row=1, column=5, padx=6, pady=2) number_of_textblob_positive = self.collection.find({ "textblob.polarity": 'pos' }).count() Label(self.quick_facts_frm, text=str(number_of_textblob_positive)).grid(row=2, column=1, pady=2) number_of_textblob_neutral = self.collection.find({ "textblob.polarity": 'neu' }).count() Label(self.quick_facts_frm, text=str(number_of_textblob_neutral)).grid(row=2, column=2, pady=2) number_of_textblob_negative = self.collection.find({ "textblob.polarity": 'neg' }).count() Label(self.quick_facts_frm, text=str(number_of_textblob_negative)).grid(row=2, column=3, pady=2) number_of_textblob_subjective = self.collection.find({ "textblob.subjectivity": 'subj' }).count() Label(self.quick_facts_frm, text=str(number_of_textblob_subjective)).grid(row=2, column=4, pady=2) number_of_textblob_objective = self.collection.find({ "textblob.subjectivity": 'obj' }).count() Label(self.quick_facts_frm, text=str(number_of_textblob_objective)).grid(row=2, column=5, pady=2) number_of_vader_positive = self.collection.find({ "vader.polarity": 'pos' }).count() Label(self.quick_facts_frm, text=str(number_of_vader_positive)).grid(row=3, column=1, pady=2) number_of_vader_neutral = self.collection.find({ "vader.polarity": 'neu' }).count() Label(self.quick_facts_frm, text=str(number_of_vader_neutral)).grid(row=3, column=2, pady=2) number_of_vader_negative = self.collection.find({ "vader.polarity": 'neg' }).count() Label(self.quick_facts_frm, text=str(number_of_vader_negative)).grid(row=3, column=3, pady=2) number_of_training_positive = self.collection.find({ "training.polarity": 'pos' }).count() Label(self.quick_facts_frm, text=str(number_of_training_positive)).grid(row=4, column=1, pady=2) number_of_training_negative = self.collection.find({ "training.polarity": 'neg' }).count() Label(self.quick_facts_frm, text=str(number_of_training_negative)).grid(row=4, column=3, pady=2) number_of_training_subjective = self.collection.find({ "training.subjectivity": 'subj' }).count() Label(self.quick_facts_frm, text=str(number_of_training_subjective)).grid(row=4, column=4, pady=2) number_of_training_objective = self.collection.find({ "training.subjectivity": 'obj' }).count() Label(self.quick_facts_frm, text=str(number_of_training_objective)).grid(row=4, column=5, pady=2) Label(self.compare_frm, text="Total unique tweets stored:").grid(row=1, column=0, padx=2, pady=2, sticky=W) Label(self.compare_frm, text=str(tweets_sum)).grid(row=1, column=1, pady=2) all_pos_counter = self.collection.find({ "textblob.polarity": 'pos', "vader.polarity": 'pos', "training.polarity": 'pos' }).count() all_neg_counter = self.collection.find({ "textblob.polarity": 'neg', "vader.polarity": 'neg', "training.polarity": 'neg' }).count() all_neu_counter = self.collection.find({ "textblob.polarity": 'neu', "vader.polarity": 'neu' }).count() all_subj_counter = self.collection.find({ "textblob.subjectivity": 'subj', "training.subjectivity": 'subj' }).count() all_obj_counter = self.collection.find({ "textblob.subjectivity": 'obj', "training.subjectivity": 'obj' }).count() Label(self.compare_frm, text="Positive tweets that agree: ").grid(row=2, column=0, padx=2, pady=2, sticky=W) Label(self.compare_frm, text=str(round((all_pos_counter / tweets_sum) * 100, 1)) + "%").grid(row=2, column=1, pady=2) Label(self.compare_frm, text="Negative tweets that agree: ").grid(row=3, column=0, padx=2, pady=2, sticky=W) Label(self.compare_frm, text=str(round((all_neg_counter / tweets_sum) * 100, 1)) + "%").grid(row=3, column=1, pady=2) Label(self.compare_frm, text="Neutral tweets that agree: ").grid(row=4, column=0, padx=2, pady=2, sticky=W) Label(self.compare_frm, text=str(round((all_neu_counter / tweets_sum) * 100, 1)) + "%").grid(row=4, column=1, pady=2) Label(self.compare_frm, text="Subjective tweets that agree: ").grid(row=5, column=0, padx=2, pady=2, sticky=W) Label(self.compare_frm, text=str(round((all_subj_counter / tweets_sum) * 100, 1)) + "%").grid(row=5, column=1, pady=2) Label(self.compare_frm, text="Objective tweets that agree: ").grid(row=6, column=0, padx=2, pady=2, sticky=W) Label(self.compare_frm, text=str(round((all_obj_counter / tweets_sum) * 100, 1)) + "%").grid(row=6, column=1, pady=2) # build the widgets for show_graphs_frm # textblob polarity pie chart self.textblob_polarity_btn = Button( show_graphs_frm, text="Textblob Polarity Pie chart", command=chart_utils.show_textblob_polarity) self.textblob_polarity_btn.grid(row=0, column=1, pady=10, ipadx=5) # textblob subjectivity pie chart self.textblob_subjectivity_btn = Button( show_graphs_frm, text="Textblob Subjectivity Pie chart", command=chart_utils.show_textblob_subjectivity) self.textblob_subjectivity_btn.grid(row=1, column=1, pady=10, ipadx=5) # vader polarity pie chart self.vader_polarity_btn = Button( show_graphs_frm, text="VADER Polarity Pie chart", command=chart_utils.show_vader_polarity) self.vader_polarity_btn.grid(row=2, column=1, pady=10, ipadx=5) # training polarity pie chart self.training_polarity_btn = Button( show_graphs_frm, text="NLTK Polarity Pie chart", command=chart_utils.show_training_polarity) self.training_polarity_btn.grid(row=3, column=1, pady=10, ipadx=5) # training subjectivity pie chart self.training_subjectivity_btn = Button( show_graphs_frm, text="NLTK Subjectivity Pie chart", command=chart_utils.show_training_subjectivity) self.training_subjectivity_btn.grid(row=4, column=1, pady=10, ipadx=5) else: # if we have an empty collection message = "No documents found in this collection." read_write.log_message("[WARN] (frames.StatsFrame) : " + message) message += "\nPlease enter some data first." Label(self.quick_facts_frm, text=message).grid(row=0, column=0, padx=10, pady=5) # Build the widgets for exit_frm self.back_btn = Button(exit_frm, text="Back") self.back_btn.grid(row=0, column=1, ipadx=5, ipady=3, pady=15) self.exit_btn = Button(exit_frm, text="Exit", command=self.safe_exit) self.exit_btn.grid(row=0, column=3, ipadx=5, ipady=3, padx=15, pady=10)
##################################################################################################### # Module that is responsible for the polarity and subjectivity training of the tweets # ##################################################################################################### from utils import read_write import sys import os.path import string from tkinter import messagebox from random import shuffle try: from nltk import classify from nltk.sentiment.util import * from nltk.sentiment import SentimentAnalyzer from nltk.classify import NaiveBayesClassifier except ImportError as e: read_write.log_message("[FATAL] (training) : ImportError: " + str(e)) sys.exit("[SEVERE] " + str(e) + ". Please install this module to continue") try: from nltk.corpus import movie_reviews from nltk.corpus import subjectivity from nltk.corpus import stopwords except LookupError as e: read_write.log_message("[FATAL] (training) : LookupError: " + str(e)) instructions = " **** INSTALLATION INSTRUCTIONS ****\n\n" instructions += " 1) Open a new terminal and type python. This will open a python terminal\n" instructions += " 2) Type import ntlk\n 3) Type nltk.download()\n" instructions += " 4) This will open a new window. Search in 'All Packages' and install movie_reviews," \ " subjectivity and stopwords\n" instructions += " 5) Double click OR click download to install it" read_write.log_message(instructions)
def __init__(self, master): super(HostFrame, self).__init__(master) self.root = master # get any previous data on last.json previous_data = read_write.read_last() # Three frames will hold the widgets label_frm = Frame(self) # this will hold the labels label_frm.grid(row=0, pady=10, padx=50) button_frm = Frame(self) # this will hold the buttons button_frm.grid(row=1, pady=5, padx=50) self.hosts_frm = Frame(self) # this will hold the previous hosts self.hosts_frm.grid(row=2, pady=5, padx=50) self.hosts_frm.grid_remove( ) # but we need to show it, only if user wants # Build the widgets for label_frm Label(label_frm, text="Host:").grid(column=2, row=0, pady=10, padx=5) Label(label_frm, text="Port:").grid(column=2, row=1, padx=5) self.host_entry = Entry(label_frm, width=30) self.host_entry.grid(column=3, row=0, pady=10) self.port_entry = Entry(label_frm, width=30) self.port_entry.grid(column=3, row=1) # Add data to entries if any data on last.json try: if previous_data["host"] is not "": self.host_entry.insert(0, previous_data["host"]) self.port_entry.insert(0, previous_data["port"]) except KeyError as e: message = "[ERROR] (frames.HostFrame): KeyError: " + str(e) read_write.log_message(message) # Build the widgets for button_frm self.next_btn = Button(button_frm, text="Next") self.next_btn.grid(column=2, row=0, pady=10, padx=4, ipadx=2, ipady=2) self.exit_btn = Button(button_frm, text="Exit", command=self.root.destroy) self.exit_btn.grid(column=4, row=0, pady=10, padx=4, ipadx=2, ipady=2) self.show_previous_btn = Button(button_frm, text="Show previous hosts", command=self.show_hosts) self.show_previous_btn.grid(column=2, row=1, columnspan=3, ipadx=2, ipady=2) # Build the widgets for hosts_frm def select_host(): selected_data = str(var.get()).split(":") self.host_entry.delete(0, "end") self.host_entry.insert(0, selected_data[0]) self.port_entry.delete(0, "end") self.port_entry.insert(0, selected_data[1]) # populate the hosts_frm with Radio-buttons that show previous connections data = read_write.read_mongo() var = StringVar() counter = 0 # this will show in which row each radio-button will be on the frame for json_object in data: if json_object["host"] is not "": option = json_object["host"] + ":" + str( json_object["port"]) # format host:port r = Radiobutton(self.hosts_frm, text=option, variable=var, value=option, command=select_host) r.grid(row=counter, column=2, pady=2) counter += 1
def __init__(self, master): super(StreamFrame, self).__init__(master) self.root = master # check if user has saved the training sentiment analyzers pol_checkfile = os.path.exists('files/sa_polarity.pickle') subj_checkfile = os.path.exists('files/sa_subjectivity.pickle') if not (pol_checkfile and subj_checkfile): # if we cant find the SA files # These frames will hold the widgets nofiles_frm = Frame( self ) # this for the the warning message and the back and exit buttons nofiles_frm.grid(row=3, column=0, pady=5) exit_frm = Frame(self) # exit frame, contains back and exit button exit_frm.grid(row=4, column=0, pady=5) message = "SA files not found." read_write.log_message("[WARN] (frames.StreamFrame) : " + message) message += "\nClick Start Training first to train the NLTK classifiers." Label(nofiles_frm, text=message).grid(row=0, column=0, padx=10, pady=5) self.mng_stream_btn = Button( nofiles_frm, text="Start Stream") # ignore this, if there are no tweets # Build the widgets for nofiles_frm self.back_btn = Button(exit_frm, text="Back") self.back_btn.grid(row=1, column=1, ipadx=5, ipady=3, pady=15) self.exit_btn = Button(exit_frm, text="Exit", command=self.safe_exit) self.exit_btn.grid(row=1, column=3, ipadx=5, ipady=3, padx=15, pady=10) else: # These frames will hold the widgets label_frm = Frame(self) # this for the label and entry label_frm.grid(row=0, column=2, padx=10, pady=10, ipady=20, ipadx=20) # Frame for keywords self.keywords_frm = Frame( self ) # this will be hidden until user wants to see previous keywords self.keywords_frm.grid(row=0, column=3, rowspan=3, pady=15) # Build the widgets for label_frm Label(label_frm, text="Keyword:").grid(row=0, column=0, padx=20) self.keyword_entry = Entry(label_frm, width=30) self.keyword_entry.grid(row=0, column=1, columnspan=3) # Build the widgets for button_frm self.mng_stream_btn = Button( label_frm, text="Start Stream") # this will change from start to stop self.mng_stream_btn.grid(row=1, column=1, ipadx=5, ipady=3, pady=20) self.pause_stream_btn = Button( label_frm, text="Pause Stream") # if user starts stream, show this button self.pause_stream_btn.grid(row=1, column=3, ipadx=5, ipady=3, padx=10, pady=20) self.pause_stream_btn.grid_remove() # Build the widgets for keywords_frm self.manage_keywords_btn = Button( self.keywords_frm, command=self.show_keywords, text=">>>") # this will change into "<<<" when user clicks it self.manage_keywords_btn.grid(row=0, column=0, ipadx=5, ipady=3, padx=10) # Build the widgets for exit_frm self.back_btn = Button(label_frm, text="Back") self.back_btn.grid(row=2, column=1, ipadx=5, ipady=3, pady=15) self.exit_btn = Button(label_frm, text="Exit", command=self.safe_exit) self.exit_btn.grid(row=2, column=3, ipadx=5, ipady=3, padx=15, pady=10)
def train_sentiment_analyzer_subjectivity(n_instances=None): if n_instances is not None: n_instances = int(n_instances / 2) # NLTK's integrated and subjectivity dataset for the subj training subj_docs = [ (sent, 'subj') for sent in subjectivity.sents(categories='subj')[:n_instances] ] obj_docs = [(sent, 'obj') for sent in subjectivity.sents(categories='obj')[:n_instances]] # We separately split positive and negative instances to keep a balanced # uniform class distribution in both train and test sets. train_subj_docs, test_subj_docs = split_train_test(subj_docs) train_obj_docs, test_obj_docs = split_train_test(obj_docs) training_docs = train_subj_docs + train_obj_docs testing_docs = test_subj_docs + test_obj_docs sentim_analyzer = SentimentAnalyzer() all_words = sentim_analyzer.all_words( [mark_negation(doc) for doc in training_docs]) stopwords_english = stopwords.words('english') punctuation = list(string.punctuation) punctuation.append("''") punctuation.append("``") punctuation.append("—") punctuation.append("…") punctuation.append("...") punctuation.append("--") punctuation.append("..") stopwords_english.extend(punctuation) all_words_clean = [] for word in all_words: if word not in stopwords_english and word not in string.digits: all_words_clean.append(word) # Add simple unigram word features unigram_feats = sentim_analyzer.unigram_word_feats(all_words_clean, min_freq=4) sentim_analyzer.add_feat_extractor(extract_unigram_feats, unigrams=unigram_feats) # Apply features to obtain a feature-value representation of our datasets training_set = sentim_analyzer.apply_features(training_docs) testing_set = sentim_analyzer.apply_features(testing_docs) trainer = NaiveBayesClassifier.train classifier = sentim_analyzer.train(trainer, training_set) try: classifier.show_most_informative_features() except AttributeError: message = "Your classifier does not provide a show_most_informative_features() method." print(message) read_write.log_message(message) sentim_analyzer.evaluate(testing_set) classifier_accuracy_percent = (classify.accuracy(classifier, testing_set)) * 100 message_acc = 'Accuracy of classifier = ' + str( classifier_accuracy_percent) + '%' print(message_acc) read_write.log_message("[INFO]" + LOG_NAME + message_acc) save_file(sentim_analyzer, 'files/sa_subjectivity.pickle') message = "sa_subjectivity.pickle file saved." print(message) read_write.log_message(message)
def on_exception(self, exception): read_write.log_message("[ERROR]" + LOG_NAME + str(exception)) return False
def __init__(self): self.search_keyword = None self.listener = StdOutListener() read_write.log_message("[INFO]" + LOG_NAME + "StreamController initialized")
########################################################################################### # Module that is responsible to show the pie charts of the sentiment analysis results # ########################################################################################### from utils import db_utils, read_write from pymongo.errors import ServerSelectionTimeoutError, AutoReconnect from tkinter import messagebox import sys try: import matplotlib.pyplot as plt except ImportError as e: read_write.log_message("[FATAL] (chart_utils) : ImportError: " + str(e)) sys.exit("[SEVERE] " + str(e) + ". Please install this module to continue") LOG_NAME = " (chart_utils) : " def show_textblob_polarity(): try: collection = db_utils.get_collection() all_documents = collection.find() tweets_sum = all_documents.count() number_of_textblob_positive = collection.find({ "textblob.polarity": 'pos' }).count() number_of_textblob_neutral = collection.find({ "textblob.polarity": 'neu' }).count() number_of_textblob_negative = collection.find({