def on_error(self, status):
        # statuses take from here:
        # https://dev.twitter.com/overview/api/response-codes
        if status == 401:
            message = "[HTTP_ERROR]" + LOG_NAME + "401 Unauthorized - Missing or incorrect authentication credentials."
        elif status == 304:
            message = "[HTTP_ERROR]" + LOG_NAME + "304 Not Modified - There was no new data to return."
        elif status == 403:
            message = "[HTTP_ERROR]" + LOG_NAME + "403 Forbidden - The request is understood, " + \
                      "but it has been refused or access is not allowed."
        elif status == 420:
            message = "[HTTP_ERROR]" + LOG_NAME + "420 Enhance Your Calm - Returned when you are being rate limited."
        elif status == 500:
            message = "[HTTP_ERROR]" + LOG_NAME + "500 Internal Server Error - Something is broken."
        elif status == 503:
            message = "[HTTP_ERROR]" + LOG_NAME + "503 Service Unavailable - The Twitter servers are up, " + \
                      "but overloaded with requests. Try again later."
        elif status == 504:
            message = "[HTTP_ERROR]" + LOG_NAME + "504 Gateway timeout - The Twitter servers are up, but " + \
                      "the request couldn’t be serviced due to some failure within our stack. Try again later."
        else:
            message = "[HTTP_ERROR]" + LOG_NAME + status + " Unknown."

        print(message)
        read_write.log_message(message)
        read_write.log_message("[INFO]" + LOG_NAME + "Stopping stream")
        return False  # and stop the stream
 def __init__(self):
     super(StdOutListener, self).__init__()
     self.flag = False  # this flag indicates if stream must stop or not. As long as it is False, we keep stream open
     self.pause_flag = False  # as long as false, pause is closed
     self.store_counter = None  # this counter, counts how many tweets stored to the DB so far
     read_write.log_message("[INFO]" + LOG_NAME +
                            "StreamListener initialized")
 def on_disconnect(self, notice):
     status = json.loads(notice)
     message = "[ERROR] (" + type(self).__name__ + ") : Name=" + status["stream_name"] + \
               ", Reason=" + status["reason"] + ", Code=" + str(status["code"])
     print(message)
     read_write.log_message(message)
     return False
Пример #4
0
    def populate_dbs(self):
        def select_db():
            self.selected_db_var = db_var
            selected_db = str(db_var.get())
            self.db_entry.delete(0, "end")
            self.db_entry.insert(0, selected_db)
            # we add the drop button, only if a radio-button is pressed
            self.drop_db_btn = Button(self.dbs_frm,
                                      text="Drop database",
                                      command=self.drop_db)
            self.drop_db_btn.grid(row=db_counter,
                                  column=2,
                                  pady=10,
                                  ipadx=5,
                                  ipady=2)

        db_var = StringVar()

        db_list = self.client.database_names(
        )  # we get the available database names of this connection

        db_counter = 0
        read_write.log_message("[INFO] (frames.DbFrame) : DBs found: " +
                               str(db_list))
        for name in db_list:
            r = Radiobutton(self.dbs_frm,
                            text=name,
                            variable=db_var,
                            value=name,
                            command=select_db)
            r.grid(row=db_counter, column=2, pady=2)
            db_counter += 1
def start_training():
    pol_checkfile = os.path.exists('files/sa_polarity.pickle')
    subj_checkfile = os.path.exists('files/sa_subjectivity.pickle')
    if pol_checkfile:
        message1 = "SA Polarity file already exists."
        messagebox.showinfo("File found", message1)
    else:
        message1 = "Cannot find the polarity sentiment analyzer file.\n"
        message1 += "Training a new one using Naive Bayes Classifier.\n"
        message1 += "Be patient. It might take a while."
        messagebox.showinfo("Training", message1)
        train_sentiment_analyzer_polarity(1000)
        messagebox.showinfo("Training", "Polarity Training finished.")
    read_write.log_message("[INFO]" + LOG_NAME + message1)
    if subj_checkfile:
        message2 = "SA Subjectivity file already exists."
        messagebox.showinfo("File found", message2)
    else:
        message2 = "Cannot find the subjectivity sentiment analyzer file.\n"
        message2 += "Training a new one using Naive Bayes Classifier.\n"
        message2 += "Be patient. It might take a while."
        messagebox.showinfo("Training", message2)
        train_sentiment_analyzer_subjectivity(5000)
        messagebox.showinfo("Training", "Subjectivity Training finished.")
    read_write.log_message("[INFO]" + LOG_NAME + message2)
Пример #6
0
 def safe_exit(self):
     x = messagebox.askyesno(title="Exit",
                             message="Are you sure you want to exit?",
                             icon="question")
     if x:
         read_write.log_message("[INFO] (frames.StatsFrame) : Exiting...")
         self.root.destroy()
Пример #7
0
    def show_collections(self):
        # create the frame
        self.collections_frm = Frame(
            self)  # the frame that will show the collections
        self.collections_frm.grid(row=2, pady=5, padx=50)
        try:
            self.populate_collections()
        except ServerSelectionTimeoutError as e:
            read_write.log_message("[ERROR]" + LOG_NAME +
                                   "ServerSelectionTimeoutError: " + str(e))
            messagebox.showerror("Error", "Lost Connection to the DB")
        except AutoReconnect as e:
            read_write.log_message("[ERROR]" + LOG_NAME + "AutoReconnect: " +
                                   str(e))
            messagebox.showerror("Error", "Lost Connection to the DB")

        try:  # if dbs already shown, we need to hide them, but if not, this will raise an exception
            self.hide_dbs()
        except AttributeError:
            pass

        # change the button's text and grid the frame
        self.previous_collection_btn.config(command=self.hide_collections,
                                            text="Hide collections")
        self.collections_frm.grid()
def stop_stream(frame):
    global stream_controller
    frame.mng_stream_btn.config(text="Start Stream",
                                command=lambda: start_stream(frame))
    frame.pause_stream_btn.grid_remove()
    print("Terminating stream...")
    read_write.log_message("[INFO]" + LOG_NAME + "Terminating stream...")
    stream_controller.stop()  # by calling the stream controller
Пример #9
0
 def safe_exit(self):
     x = messagebox.askyesno(title="Exit",
                             message="Are you sure you want to exit?",
                             icon="question")
     if x:
         stream_util.stream_controller.stop()
         read_write.log_message("[INFO]" + stream_util.LOG_NAME +
                                "Exiting...")
         self.root.destroy()
Пример #10
0
    def __init__(self, master):
        super(DbFrame, self).__init__(master)
        self.root = master
        self.client = db_utils.get_client()

        # get any previous data on last.json
        previous_data = read_write.read_last()

        # Two frames will hold the widgets
        label_frm = Frame(self)  # the labels and entries
        label_frm.grid(row=0, pady=10, padx=50)
        button_frm = Frame(self)  # the buttons
        button_frm.grid(row=1, pady=5, padx=50)

        # Build the widgets for label_frm
        Label(label_frm, text="Database:").grid(column=2,
                                                row=0,
                                                pady=10,
                                                padx=5)
        Label(label_frm, text="Collection:").grid(column=2, row=1, padx=5)
        self.db_entry = Entry(label_frm, width=30)
        self.db_entry.grid(column=3, row=0, pady=10)
        self.collection_entry = Entry(label_frm, width=30)
        self.collection_entry.grid(column=3, row=1)

        # Add data to entries if any data on last.json
        try:
            if previous_data["database"] is not "":
                self.db_entry.insert(0, previous_data["database"])
        except KeyError as e:
            message = "[ERROR] (frames.DbFrame) : KeyError: " + str(e)
            read_write.log_message(message)
        try:
            if previous_data["collection"] is not "":
                self.collection_entry.insert(0, previous_data["collection"])
        except KeyError as e:
            message = "[ERROR] (frames.DbFrame) : KeyError: " + str(e)
            read_write.log_message(message)

        # Build the widgets for button_frm
        self.next_btn = Button(button_frm, text="Next")
        self.next_btn.grid(column=2, row=0, pady=10, padx=4, ipadx=2, ipady=2)
        self.back_btn = Button(button_frm, text="Back")
        self.back_btn.grid(column=4, row=0, pady=10, padx=4, ipadx=2, ipady=2)
        self.previous_dbs_btn = Button(button_frm,
                                       text="Show databases",
                                       command=self.show_dbs)
        self.previous_dbs_btn.grid(column=2, row=1, ipadx=2, ipady=2)
        self.previous_collection_btn = Button(button_frm,
                                              text="Show collections",
                                              command=self.show_collections)
        self.previous_collection_btn.grid(column=4, row=1, ipadx=2, ipady=2)

        # Build the widgets for dbs_frm
        self.selected_db_var = StringVar()
        self.selected_collection_var = StringVar()
def pause_unpause(frame):
    global stream_controller
    if stream_controller.listener.pause_flag:  # if flag is True, it means that we already paused the stream
        stream_controller.unpause()  # so un-pause it and change the GUI
        frame.pause_stream_btn.config(text="Pause Stream")
        read_write.log_message("[INFO]" + LOG_NAME + "Continuing stream...")
    else:
        # but if it false, it means we press the Pause Stream button, so set it accordingly
        stream_controller.pause()
        frame.pause_stream_btn.config(text="Continue Stream")
        print("Stream paused...")
        read_write.log_message("[INFO]" + LOG_NAME + "Stream paused...")
def train_sentiment_analyzer_polarity(n_instances=None):

    if n_instances is not None:
        n_instances = int(0.2 * n_instances)

    pos_reviews = []
    for fileid in movie_reviews.fileids('pos'):
        words = movie_reviews.words(fileid)
        pos_reviews.append(words)

    neg_reviews = []
    for fileid in movie_reviews.fileids('neg'):
        words = movie_reviews.words(fileid)
        neg_reviews.append(words)

    # positive reviews feature set
    pos_reviews_set = []
    for words in pos_reviews:
        pos_reviews_set.append((bag_of_words(words), 'pos'))

    # negative reviews feature set
    neg_reviews_set = []
    for words in neg_reviews:
        neg_reviews_set.append((bag_of_words(words), 'neg'))

    shuffle(pos_reviews_set)
    shuffle(neg_reviews_set)

    test_set = pos_reviews_set[:n_instances] + neg_reviews_set[:n_instances]
    train_set = pos_reviews_set[n_instances:] + neg_reviews_set[n_instances:]

    print('Training classifier')
    classifier = NaiveBayesClassifier.train(train_set)

    print(classifier.show_most_informative_features(10))

    classifier_accuracy_percent = (classify.accuracy(classifier,
                                                     test_set)) * 100
    message_acc = 'Accuracy of classifier = ' + str(
        classifier_accuracy_percent) + '%'
    print(message_acc)
    read_write.log_message("[INFO]" + LOG_NAME + message_acc)

    save_file(classifier, 'files/sa_polarity.pickle')
    message = "sa_polarity.pickle file saved."
    print(message)
    read_write.log_message(message)
def show_textblob_polarity():
    try:
        collection = db_utils.get_collection()
        all_documents = collection.find()
        tweets_sum = all_documents.count()

        number_of_textblob_positive = collection.find({
            "textblob.polarity":
            'pos'
        }).count()
        number_of_textblob_neutral = collection.find({
            "textblob.polarity": 'neu'
        }).count()
        number_of_textblob_negative = collection.find({
            "textblob.polarity":
            'neg'
        }).count()

        percent_pos = (number_of_textblob_positive / tweets_sum) * 100
        percent_neu = (number_of_textblob_neutral / tweets_sum) * 100
        percent_neg = (number_of_textblob_negative / tweets_sum) * 100

        labels = 'Positive', 'Neutral', 'Negative'
        sizes = [percent_pos, percent_neu, percent_neg]

        fig1, ax1 = plt.subplots()
        ax1.pie(sizes,
                labels=labels,
                autopct='%1.1f%%',
                shadow=True,
                startangle=90)
        ax1.axis('equal'
                 )  # Equal aspect ratio ensures that pie is drawn as a circle.
        plt.title('Textblob Polarity')
        plt.show()
    except ServerSelectionTimeoutError as e:
        read_write.log_message("[ERROR]" + LOG_NAME +
                               "ServerSelectionTimeoutError: " + str(e))
        messagebox.showerror("Error", "Lost Connection to the DB")
        return
    except AutoReconnect as e:
        read_write.log_message("[ERROR]" + LOG_NAME + "AutoReconnect: " +
                               str(e))
        messagebox.showerror("Error", "Lost Connection to the DB")
        return
def get_stream(listener):
    credentials = read_write.read_credentials()

    try:
        auth = OAuthHandler(credentials["consumer_key"],
                            credentials["consumer_secret"])
        auth.set_access_token(credentials["access_token"],
                              credentials["access_token_secret"])
    except KeyError as error:
        message_er = "[ERROR]" + LOG_NAME + "KeyError : " + str(error)
        message_fatal = "[FATAL]" + LOG_NAME + "Error on credentials. Please check the credentials.json file."
        print(message_er)
        print(message_fatal)
        read_write.log_message(message_er)
        read_write.log_message(message_fatal)
        return None

    stream = Stream(auth, listener)  # and we setting the stream item
    return stream
def show_training_subjectivity():
    try:
        collection = db_utils.get_collection()
        all_documents = collection.find()
        tweets_sum = all_documents.count()

        number_of_training_subjective = collection.find({
            "training.subjectivity":
            'subj'
        }).count()
        number_of_training_objective = collection.find({
            "training.subjectivity":
            'obj'
        }).count()

        percent_subj = (number_of_training_subjective / tweets_sum) * 100
        percent_obj = (number_of_training_objective / tweets_sum) * 100

        labels = 'Subjective', 'Objective'
        sizes = [percent_subj, percent_obj]

        fig1, ax1 = plt.subplots()
        ax1.pie(sizes,
                labels=labels,
                autopct='%1.1f%%',
                shadow=True,
                startangle=90)
        ax1.axis('equal'
                 )  # Equal aspect ratio ensures that pie is drawn as a circle.
        plt.title('NLTK Subjectivity')
        plt.show()

    except ServerSelectionTimeoutError as e:
        read_write.log_message("[ERROR]" + LOG_NAME +
                               "ServerSelectionTimeoutError: " + str(e))
        messagebox.showerror("Error", "Lost Connection to the DB")
        return
    except AutoReconnect as e:
        read_write.log_message("[ERROR]" + LOG_NAME + "AutoReconnect: " +
                               str(e))
        messagebox.showerror("Error", "Lost Connection to the DB")
        return
    def on_connect(self):
        global stream_controller
        message_1 = "[SUCCESS]" + LOG_NAME + "Connected to Streaming Server!"
        message_2 = "[INFO]" + LOG_NAME + "#### Gathering tweets for '" + stream_controller.search_keyword \
                    + "' keyword. ####"
        print(message_2)
        read_write.log_message(message_1)
        read_write.log_message(message_2)

        # and save the keyword to the keywords.json file
        keywords_list = [
            x for x in stream_controller.search_keyword.split(",")
        ]
        for keyword in keywords_list:
            keyword = keyword.lstrip()
            keyword = keyword.rstrip()
            read_write.write_keywords(keyword)

        self.store_counter = 0  # initialize the counter
        self.ignore_counter = 0
Пример #17
0
 def drop_db(self):
     name = self.selected_db_var.get()
     answer = messagebox.askokcancel(
         title="Are you sure?",
         message="Are you sure you want to delete " + name,
         default="cancel",
         parent=self.root)
     if answer:
         read_write.log_message(
             "[INFO] (frames.DbFrame) : Dropping database '" + name + "'")
         try:
             self.client.drop_database(name)
         except ServerSelectionTimeoutError as e:
             read_write.log_message("[ERROR]" + LOG_NAME +
                                    "ServerSelectionTimeoutError: " +
                                    str(e))
             messagebox.showerror("Error", "Lost Connection to the DB")
             return
         self.hide_dbs()
         self.show_dbs()
    def stream(self):
        stream = manage_credentials.get_stream(listener=self.listener)
        # this is a try-except block, because if there is something wrong in the Listener class,
        # like e.g internet connection failure, it raises the exception inside the active thread
        try:
            # user can give more than one keywords for searching, we just add them to a list
            # he must separate them with commas, so we can split them and remove the whitespace with strip
            search_list = [x.strip() for x in self.search_keyword.split(",")]

            message = "[INFO]" + LOG_NAME + "Trying to connect to the Streaming Server..."
            print(message)
            read_write.log_message(message)
            stream.filter(
                track=search_list, async=True
            )  # start the loop, async sets the Streaming in a new Thread
        except AttributeError as e:
            message = "[ERROR]" + LOG_NAME + "AttributeError: " + str(e)
            print(message)
            read_write.log_message(message)
            messagebox.showerror(
                "Fatal error",
                "No credentials were found. Please close the script, " +
                "add the file and try again!")
        except Exception as e:
            message = "[ERROR]" + LOG_NAME + "Exception: " + str(repr(e))
            print(message)
            read_write.log_message(message)
            pass
    def on_data(self, data):
        if self.flag:  # flag keep track if we want to stop the stream
            read_write.log_message("[INFO]" + LOG_NAME + "Gathered " +
                                   str(self.store_counter) +
                                   " tweets - Ignored " +
                                   str(self.ignore_counter) + " tweets")
            return False  # return False to terminate the loop
        if self.pause_flag:  # pause flag keeps track if we want to pause the stream
            return True  # return True and do nothing with the data. It's a virtual pause.

        data = json.loads(data)  # turn the incoming data into json format

        if "user" not in data:  # if tweet has no user, we don't want this tweet
            print("No user data - ignoring tweet.")
            self.ignore_counter += 1
            return True
        if data["lang"] != "en":  # we deal only with English language text based tweets
            print("Non English - ignoring tweet.")
            self.ignore_counter += 1
            return True

        # we pass our data into this static method to clean them and keep only the necessary
        our_tweet = other_utils.format_tweet(data, method="stream")
        try:
            # this method try to save our tweet to the active connection to Mongo and returns the outcome
            # If all are OK, returns True, but if it fail, it returns False. With this way, we keep track
            # how many tweets we stored so far
            if db_utils.store_tweet(our_tweet):
                self.store_counter += 1  # increase the counter
                if self.store_counter % 100 == 0:  # and if we reach a multiply of 100, we print the result
                    print("Stored " + str(self.store_counter) +
                          " tweets so far.")
            else:
                self.ignore_counter += 1
        except ServerSelectionTimeoutError as e:
            read_write.log_message("[ERROR]" + LOG_NAME +
                                   "ServerSelectionTimeoutError: " + str(e))
            messagebox.showerror("Error", "Lost Connection to the DB")
            return False
        except AutoReconnect as e:
            read_write.log_message("[ERROR]" + LOG_NAME + "AutoReconnect: " +
                                   str(e))
            messagebox.showerror("Error", "Lost Connection to the DB")
            return False

        # return True to continue the loop
        return True
Пример #20
0
def can_connect(host, port):
    response = {"connect": False, "errors": ""}
    try:
        port = int(
            port
        )  # getting the port, if it is not an int, we will have an exception
    except ValueError as e:
        message = "[ERROR]" + LOG_NAME + "ValueError:" + str(e)
        print(message)
        read_write.log_message(message)
        response["errors"] = "Port must be an integer"
        return response

    try:  # try connect to the MongoDB
        connection = MongoClient(host=host,
                                 port=port,
                                 serverSelectionTimeoutMS=10000,
                                 tz_aware=True)
    except ConfigurationError as e:  # if host is not appropriate
        message = "[ERROR]" + LOG_NAME + "ConfigurationError:" + str(e)
        print(message)
        read_write.log_message(message)
        response["errors"] = str(e)
        return response
    except TypeError as e:  # if port result to an error
        message = "[ERROR]" + LOG_NAME + "TypeError:" + str(e)
        print(message)
        read_write.log_message(message)
        response["errors"] = str(e)
        return response

    # to see if we can connect to the MongoDB, we make a test query to see if we can write in it
    # so we create a new database and collection with unique names
    pseudo_random = ''.join(
        random.choice(string.ascii_uppercase + string.ascii_lowercase +
                      string.digits) for _ in range(16))
    a_db = connection["random_database_" + pseudo_random]
    a_collection = a_db["random_collection_" + pseudo_random]

    try:  # we give the client, 10 seconds to connect
        read_write.log_message("[INFO]" + LOG_NAME +
                               "Trying to connect to MongoDB with host: " +
                               host + " and port: " + str(port))
        a_collection.insert({"test": 1})
    except ServerSelectionTimeoutError as e:
        message = "[ERROR]" + LOG_NAME + "ServerSelectionTimeoutError:" + str(
            e)
        print(message)
        read_write.log_message(message)
        response["errors"] = "Can't connect"
        return response

    # if all OK, drop the test database
    connection.drop_database(a_db)
    # but make a global variable of the client, because we reference to it many times
    global client
    client = connection
    read_write.log_message("[INFO]" + LOG_NAME + "Successfully connected")
    response["connect"] = True
    response["host"] = host
    response["port"] = port
    return response
##################################################################################################
# Module that is responsible to read the credentials and return the API item back to the program #
##################################################################################################
from utils import read_write
import sys

try:
    from tweepy import OAuthHandler, Stream, AppAuthHandler, API
except ImportError as e:
    read_write.log_message("[FATAL] (manage_credentials) : ImportError: " +
                           str(e))
    sys.exit("[SEVERE] " + str(e) + ". Please install this module to continue")

LOG_NAME = " (manage_credentials) : "


def get_stream(listener):
    credentials = read_write.read_credentials()

    try:
        auth = OAuthHandler(credentials["consumer_key"],
                            credentials["consumer_secret"])
        auth.set_access_token(credentials["access_token"],
                              credentials["access_token_secret"])
    except KeyError as error:
        message_er = "[ERROR]" + LOG_NAME + "KeyError : " + str(error)
        message_fatal = "[FATAL]" + LOG_NAME + "Error on credentials. Please check the credentials.json file."
        print(message_er)
        print(message_fatal)
        read_write.log_message(message_er)
        read_write.log_message(message_fatal)
Пример #22
0
#####################################################################################################
# Module that is responsible for the sentiment analysis of the tweets                               #
#####################################################################################################
from utils import read_write, training
import sys
try:
    from textblob import TextBlob
    from nltk.sentiment.util import *
    from nltk.tokenize import regexp, word_tokenize
except ImportError as e:
    read_write.log_message("[FATAL] (sentiment_utils) : ImportError: " +
                           str(e))
    sys.exit("[SEVERE] " + str(e) + ". Please install this module to continue")

try:
    from nltk.sentiment.vader import SentimentIntensityAnalyzer
except LookupError as e:
    read_write.log_message("[FATAL] (sentiment_utils) : LookupError: " +
                           str(e))
    instructions = " ****   INSTALLATION INSTRUCTIONS   ****\n\n"
    instructions += "    1) Open a new terminal and type python. This will open a python terminal\n"
    instructions += "    2) Type import ntlk\n    3) Type nltk.download()\n"
    instructions += "    4) This will open a new window. Search in 'All Packages' and install vader_lexicon.\n"
    instructions += "    5) Double click OR click download to install it"
    read_write.log_message(instructions)
    sys.exit(str(e) + "\n" + instructions)

LOG_NAME = " (sentiment_utils) : "


def textblob_polarity(text):
Пример #23
0
    def __init__(self, master):
        super(StatsFrame, self).__init__(master)
        self.root = master
        self.collection = db_utils.get_collection()

        self.all_documents = self.collection.find()  # this is a Cursor object

        self.quick_facts_frm = Frame(self)
        self.quick_facts_frm.grid(row=0, column=0, pady=5)
        self.compare_frm = Frame(self)
        self.compare_frm.grid(row=1, column=0, pady=5)
        show_graphs_frm = Frame(self)
        show_graphs_frm.grid(row=2, column=0, pady=5)
        exit_frm = Frame(self)
        exit_frm.grid(row=3, column=0, pady=5)

        tweets_sum = self.all_documents.count()
        read_write.log_message("[INFO] (frames.StatsFrame) : Found " +
                               str(tweets_sum) + " tweets in the DB")

        # if we use a collection with no stored tweets, we do not show any data or metric
        if tweets_sum > 0:
            Label(self.quick_facts_frm, text="Textblob").grid(row=2,
                                                              column=0,
                                                              padx=2,
                                                              pady=2)
            Label(self.quick_facts_frm, text="VADER").grid(row=3,
                                                           column=0,
                                                           padx=2,
                                                           pady=2)
            Label(self.quick_facts_frm, text="NLTK").grid(row=4,
                                                          column=0,
                                                          padx=2,
                                                          pady=2)
            Label(self.quick_facts_frm, text="Positive").grid(row=1,
                                                              column=1,
                                                              padx=6,
                                                              pady=2)
            Label(self.quick_facts_frm, text="Neutral").grid(row=1,
                                                             column=2,
                                                             padx=6,
                                                             pady=2)
            Label(self.quick_facts_frm, text="Negative").grid(row=1,
                                                              column=3,
                                                              padx=6,
                                                              pady=2)
            Label(self.quick_facts_frm, text="Subjective").grid(row=1,
                                                                column=4,
                                                                padx=6,
                                                                pady=2)
            Label(self.quick_facts_frm, text="Objective").grid(row=1,
                                                               column=5,
                                                               padx=6,
                                                               pady=2)

            number_of_textblob_positive = self.collection.find({
                "textblob.polarity":
                'pos'
            }).count()
            Label(self.quick_facts_frm,
                  text=str(number_of_textblob_positive)).grid(row=2,
                                                              column=1,
                                                              pady=2)

            number_of_textblob_neutral = self.collection.find({
                "textblob.polarity":
                'neu'
            }).count()
            Label(self.quick_facts_frm,
                  text=str(number_of_textblob_neutral)).grid(row=2,
                                                             column=2,
                                                             pady=2)

            number_of_textblob_negative = self.collection.find({
                "textblob.polarity":
                'neg'
            }).count()
            Label(self.quick_facts_frm,
                  text=str(number_of_textblob_negative)).grid(row=2,
                                                              column=3,
                                                              pady=2)

            number_of_textblob_subjective = self.collection.find({
                "textblob.subjectivity":
                'subj'
            }).count()
            Label(self.quick_facts_frm,
                  text=str(number_of_textblob_subjective)).grid(row=2,
                                                                column=4,
                                                                pady=2)

            number_of_textblob_objective = self.collection.find({
                "textblob.subjectivity":
                'obj'
            }).count()
            Label(self.quick_facts_frm,
                  text=str(number_of_textblob_objective)).grid(row=2,
                                                               column=5,
                                                               pady=2)

            number_of_vader_positive = self.collection.find({
                "vader.polarity":
                'pos'
            }).count()
            Label(self.quick_facts_frm,
                  text=str(number_of_vader_positive)).grid(row=3,
                                                           column=1,
                                                           pady=2)

            number_of_vader_neutral = self.collection.find({
                "vader.polarity":
                'neu'
            }).count()
            Label(self.quick_facts_frm,
                  text=str(number_of_vader_neutral)).grid(row=3,
                                                          column=2,
                                                          pady=2)

            number_of_vader_negative = self.collection.find({
                "vader.polarity":
                'neg'
            }).count()
            Label(self.quick_facts_frm,
                  text=str(number_of_vader_negative)).grid(row=3,
                                                           column=3,
                                                           pady=2)

            number_of_training_positive = self.collection.find({
                "training.polarity":
                'pos'
            }).count()
            Label(self.quick_facts_frm,
                  text=str(number_of_training_positive)).grid(row=4,
                                                              column=1,
                                                              pady=2)

            number_of_training_negative = self.collection.find({
                "training.polarity":
                'neg'
            }).count()
            Label(self.quick_facts_frm,
                  text=str(number_of_training_negative)).grid(row=4,
                                                              column=3,
                                                              pady=2)

            number_of_training_subjective = self.collection.find({
                "training.subjectivity":
                'subj'
            }).count()
            Label(self.quick_facts_frm,
                  text=str(number_of_training_subjective)).grid(row=4,
                                                                column=4,
                                                                pady=2)

            number_of_training_objective = self.collection.find({
                "training.subjectivity":
                'obj'
            }).count()
            Label(self.quick_facts_frm,
                  text=str(number_of_training_objective)).grid(row=4,
                                                               column=5,
                                                               pady=2)

            Label(self.compare_frm,
                  text="Total unique tweets stored:").grid(row=1,
                                                           column=0,
                                                           padx=2,
                                                           pady=2,
                                                           sticky=W)
            Label(self.compare_frm, text=str(tweets_sum)).grid(row=1,
                                                               column=1,
                                                               pady=2)

            all_pos_counter = self.collection.find({
                "textblob.polarity": 'pos',
                "vader.polarity": 'pos',
                "training.polarity": 'pos'
            }).count()
            all_neg_counter = self.collection.find({
                "textblob.polarity": 'neg',
                "vader.polarity": 'neg',
                "training.polarity": 'neg'
            }).count()
            all_neu_counter = self.collection.find({
                "textblob.polarity": 'neu',
                "vader.polarity": 'neu'
            }).count()
            all_subj_counter = self.collection.find({
                "textblob.subjectivity":
                'subj',
                "training.subjectivity":
                'subj'
            }).count()
            all_obj_counter = self.collection.find({
                "textblob.subjectivity":
                'obj',
                "training.subjectivity":
                'obj'
            }).count()

            Label(self.compare_frm,
                  text="Positive tweets that agree: ").grid(row=2,
                                                            column=0,
                                                            padx=2,
                                                            pady=2,
                                                            sticky=W)
            Label(self.compare_frm,
                  text=str(round((all_pos_counter / tweets_sum) * 100, 1)) +
                  "%").grid(row=2, column=1, pady=2)

            Label(self.compare_frm,
                  text="Negative tweets that agree: ").grid(row=3,
                                                            column=0,
                                                            padx=2,
                                                            pady=2,
                                                            sticky=W)
            Label(self.compare_frm,
                  text=str(round((all_neg_counter / tweets_sum) * 100, 1)) +
                  "%").grid(row=3, column=1, pady=2)

            Label(self.compare_frm,
                  text="Neutral tweets that agree: ").grid(row=4,
                                                           column=0,
                                                           padx=2,
                                                           pady=2,
                                                           sticky=W)
            Label(self.compare_frm,
                  text=str(round((all_neu_counter / tweets_sum) * 100, 1)) +
                  "%").grid(row=4, column=1, pady=2)

            Label(self.compare_frm,
                  text="Subjective tweets that agree: ").grid(row=5,
                                                              column=0,
                                                              padx=2,
                                                              pady=2,
                                                              sticky=W)
            Label(self.compare_frm,
                  text=str(round((all_subj_counter / tweets_sum) * 100, 1)) +
                  "%").grid(row=5, column=1, pady=2)

            Label(self.compare_frm,
                  text="Objective tweets that agree: ").grid(row=6,
                                                             column=0,
                                                             padx=2,
                                                             pady=2,
                                                             sticky=W)
            Label(self.compare_frm,
                  text=str(round((all_obj_counter / tweets_sum) * 100, 1)) +
                  "%").grid(row=6, column=1, pady=2)

            # build the widgets for show_graphs_frm
            # textblob polarity pie chart
            self.textblob_polarity_btn = Button(
                show_graphs_frm,
                text="Textblob Polarity Pie chart",
                command=chart_utils.show_textblob_polarity)
            self.textblob_polarity_btn.grid(row=0, column=1, pady=10, ipadx=5)

            # textblob subjectivity pie chart
            self.textblob_subjectivity_btn = Button(
                show_graphs_frm,
                text="Textblob Subjectivity Pie chart",
                command=chart_utils.show_textblob_subjectivity)
            self.textblob_subjectivity_btn.grid(row=1,
                                                column=1,
                                                pady=10,
                                                ipadx=5)

            # vader polarity pie chart
            self.vader_polarity_btn = Button(
                show_graphs_frm,
                text="VADER Polarity Pie chart",
                command=chart_utils.show_vader_polarity)
            self.vader_polarity_btn.grid(row=2, column=1, pady=10, ipadx=5)

            # training polarity pie chart
            self.training_polarity_btn = Button(
                show_graphs_frm,
                text="NLTK Polarity Pie chart",
                command=chart_utils.show_training_polarity)
            self.training_polarity_btn.grid(row=3, column=1, pady=10, ipadx=5)

            # training subjectivity pie chart
            self.training_subjectivity_btn = Button(
                show_graphs_frm,
                text="NLTK Subjectivity Pie chart",
                command=chart_utils.show_training_subjectivity)
            self.training_subjectivity_btn.grid(row=4,
                                                column=1,
                                                pady=10,
                                                ipadx=5)
        else:  # if we have an empty collection
            message = "No documents found in this collection."
            read_write.log_message("[WARN] (frames.StatsFrame) : " + message)
            message += "\nPlease enter some data first."
            Label(self.quick_facts_frm, text=message).grid(row=0,
                                                           column=0,
                                                           padx=10,
                                                           pady=5)

        # Build the widgets for exit_frm
        self.back_btn = Button(exit_frm, text="Back")
        self.back_btn.grid(row=0, column=1, ipadx=5, ipady=3, pady=15)
        self.exit_btn = Button(exit_frm, text="Exit", command=self.safe_exit)
        self.exit_btn.grid(row=0, column=3, ipadx=5, ipady=3, padx=15, pady=10)
#####################################################################################################
# Module that is responsible for the polarity and subjectivity training of the tweets               #
#####################################################################################################
from utils import read_write
import sys
import os.path
import string
from tkinter import messagebox
from random import shuffle
try:
    from nltk import classify
    from nltk.sentiment.util import *
    from nltk.sentiment import SentimentAnalyzer
    from nltk.classify import NaiveBayesClassifier
except ImportError as e:
    read_write.log_message("[FATAL] (training) : ImportError: " + str(e))
    sys.exit("[SEVERE] " + str(e) + ". Please install this module to continue")

try:
    from nltk.corpus import movie_reviews
    from nltk.corpus import subjectivity
    from nltk.corpus import stopwords
except LookupError as e:
    read_write.log_message("[FATAL] (training) : LookupError: " + str(e))
    instructions = " ****   INSTALLATION INSTRUCTIONS   ****\n\n"
    instructions += "    1) Open a new terminal and type python. This will open a python terminal\n"
    instructions += "    2) Type import ntlk\n    3) Type nltk.download()\n"
    instructions += "    4) This will open a new window. Search in 'All Packages' and install movie_reviews," \
                    " subjectivity and stopwords\n"
    instructions += "    5) Double click OR click download to install it"
    read_write.log_message(instructions)
Пример #25
0
    def __init__(self, master):
        super(HostFrame, self).__init__(master)
        self.root = master

        # get any previous data on last.json
        previous_data = read_write.read_last()

        # Three frames will hold the widgets
        label_frm = Frame(self)  # this will hold the labels
        label_frm.grid(row=0, pady=10, padx=50)
        button_frm = Frame(self)  # this will hold the buttons
        button_frm.grid(row=1, pady=5, padx=50)
        self.hosts_frm = Frame(self)  # this will hold the previous hosts
        self.hosts_frm.grid(row=2, pady=5, padx=50)
        self.hosts_frm.grid_remove(
        )  # but we need to show it, only if user wants

        # Build the widgets for label_frm
        Label(label_frm, text="Host:").grid(column=2, row=0, pady=10, padx=5)
        Label(label_frm, text="Port:").grid(column=2, row=1, padx=5)
        self.host_entry = Entry(label_frm, width=30)
        self.host_entry.grid(column=3, row=0, pady=10)
        self.port_entry = Entry(label_frm, width=30)
        self.port_entry.grid(column=3, row=1)

        # Add data to entries if any data on last.json
        try:
            if previous_data["host"] is not "":
                self.host_entry.insert(0, previous_data["host"])
                self.port_entry.insert(0, previous_data["port"])
        except KeyError as e:
            message = "[ERROR] (frames.HostFrame): KeyError: " + str(e)
            read_write.log_message(message)

        # Build the widgets for button_frm
        self.next_btn = Button(button_frm, text="Next")
        self.next_btn.grid(column=2, row=0, pady=10, padx=4, ipadx=2, ipady=2)
        self.exit_btn = Button(button_frm,
                               text="Exit",
                               command=self.root.destroy)
        self.exit_btn.grid(column=4, row=0, pady=10, padx=4, ipadx=2, ipady=2)
        self.show_previous_btn = Button(button_frm,
                                        text="Show previous hosts",
                                        command=self.show_hosts)
        self.show_previous_btn.grid(column=2,
                                    row=1,
                                    columnspan=3,
                                    ipadx=2,
                                    ipady=2)

        # Build the widgets for hosts_frm
        def select_host():
            selected_data = str(var.get()).split(":")
            self.host_entry.delete(0, "end")
            self.host_entry.insert(0, selected_data[0])
            self.port_entry.delete(0, "end")
            self.port_entry.insert(0, selected_data[1])

        # populate the hosts_frm with Radio-buttons that show previous connections
        data = read_write.read_mongo()

        var = StringVar()
        counter = 0  # this will show in which row each radio-button will be on the frame
        for json_object in data:
            if json_object["host"] is not "":
                option = json_object["host"] + ":" + str(
                    json_object["port"])  # format host:port
                r = Radiobutton(self.hosts_frm,
                                text=option,
                                variable=var,
                                value=option,
                                command=select_host)
                r.grid(row=counter, column=2, pady=2)
                counter += 1
Пример #26
0
    def __init__(self, master):
        super(StreamFrame, self).__init__(master)
        self.root = master

        # check if user has saved the training sentiment analyzers
        pol_checkfile = os.path.exists('files/sa_polarity.pickle')
        subj_checkfile = os.path.exists('files/sa_subjectivity.pickle')

        if not (pol_checkfile
                and subj_checkfile):  # if we cant find the SA files
            # These frames will hold the widgets
            nofiles_frm = Frame(
                self
            )  # this for the the warning message and the back and exit buttons
            nofiles_frm.grid(row=3, column=0, pady=5)
            exit_frm = Frame(self)  # exit frame, contains back and exit button
            exit_frm.grid(row=4, column=0, pady=5)

            message = "SA files not found."
            read_write.log_message("[WARN] (frames.StreamFrame) : " + message)
            message += "\nClick Start Training first to train the NLTK classifiers."
            Label(nofiles_frm, text=message).grid(row=0,
                                                  column=0,
                                                  padx=10,
                                                  pady=5)

            self.mng_stream_btn = Button(
                nofiles_frm,
                text="Start Stream")  # ignore this, if there are no tweets

            # Build the widgets for nofiles_frm
            self.back_btn = Button(exit_frm, text="Back")
            self.back_btn.grid(row=1, column=1, ipadx=5, ipady=3, pady=15)
            self.exit_btn = Button(exit_frm,
                                   text="Exit",
                                   command=self.safe_exit)
            self.exit_btn.grid(row=1,
                               column=3,
                               ipadx=5,
                               ipady=3,
                               padx=15,
                               pady=10)
        else:
            # These frames will hold the widgets
            label_frm = Frame(self)  # this for the label and entry
            label_frm.grid(row=0,
                           column=2,
                           padx=10,
                           pady=10,
                           ipady=20,
                           ipadx=20)

            # Frame for keywords
            self.keywords_frm = Frame(
                self
            )  # this will be hidden until user wants to see previous keywords
            self.keywords_frm.grid(row=0, column=3, rowspan=3, pady=15)

            # Build the widgets for label_frm
            Label(label_frm, text="Keyword:").grid(row=0, column=0, padx=20)
            self.keyword_entry = Entry(label_frm, width=30)
            self.keyword_entry.grid(row=0, column=1, columnspan=3)

            # Build the widgets for button_frm
            self.mng_stream_btn = Button(
                label_frm,
                text="Start Stream")  # this will change from start to stop
            self.mng_stream_btn.grid(row=1,
                                     column=1,
                                     ipadx=5,
                                     ipady=3,
                                     pady=20)
            self.pause_stream_btn = Button(
                label_frm,
                text="Pause Stream")  # if user starts stream, show this button
            self.pause_stream_btn.grid(row=1,
                                       column=3,
                                       ipadx=5,
                                       ipady=3,
                                       padx=10,
                                       pady=20)
            self.pause_stream_btn.grid_remove()

            # Build the widgets for keywords_frm
            self.manage_keywords_btn = Button(
                self.keywords_frm, command=self.show_keywords,
                text=">>>")  # this will change into "<<<" when user clicks it
            self.manage_keywords_btn.grid(row=0,
                                          column=0,
                                          ipadx=5,
                                          ipady=3,
                                          padx=10)

            # Build the widgets for exit_frm
            self.back_btn = Button(label_frm, text="Back")
            self.back_btn.grid(row=2, column=1, ipadx=5, ipady=3, pady=15)
            self.exit_btn = Button(label_frm,
                                   text="Exit",
                                   command=self.safe_exit)
            self.exit_btn.grid(row=2,
                               column=3,
                               ipadx=5,
                               ipady=3,
                               padx=15,
                               pady=10)
def train_sentiment_analyzer_subjectivity(n_instances=None):
    if n_instances is not None:
        n_instances = int(n_instances / 2)

    # NLTK's integrated  and subjectivity dataset for the subj training
    subj_docs = [
        (sent, 'subj')
        for sent in subjectivity.sents(categories='subj')[:n_instances]
    ]
    obj_docs = [(sent, 'obj')
                for sent in subjectivity.sents(categories='obj')[:n_instances]]

    # We separately split positive and negative instances to keep a balanced
    # uniform class distribution in both train and test sets.
    train_subj_docs, test_subj_docs = split_train_test(subj_docs)
    train_obj_docs, test_obj_docs = split_train_test(obj_docs)

    training_docs = train_subj_docs + train_obj_docs
    testing_docs = test_subj_docs + test_obj_docs

    sentim_analyzer = SentimentAnalyzer()

    all_words = sentim_analyzer.all_words(
        [mark_negation(doc) for doc in training_docs])

    stopwords_english = stopwords.words('english')
    punctuation = list(string.punctuation)
    punctuation.append("''")
    punctuation.append("``")
    punctuation.append("—")
    punctuation.append("…")
    punctuation.append("...")
    punctuation.append("--")
    punctuation.append("..")
    stopwords_english.extend(punctuation)
    all_words_clean = []
    for word in all_words:
        if word not in stopwords_english and word not in string.digits:
            all_words_clean.append(word)

    # Add simple unigram word features
    unigram_feats = sentim_analyzer.unigram_word_feats(all_words_clean,
                                                       min_freq=4)
    sentim_analyzer.add_feat_extractor(extract_unigram_feats,
                                       unigrams=unigram_feats)

    # Apply features to obtain a feature-value representation of our datasets
    training_set = sentim_analyzer.apply_features(training_docs)
    testing_set = sentim_analyzer.apply_features(testing_docs)

    trainer = NaiveBayesClassifier.train
    classifier = sentim_analyzer.train(trainer, training_set)
    try:
        classifier.show_most_informative_features()
    except AttributeError:
        message = "Your classifier does not provide a show_most_informative_features() method."
        print(message)
        read_write.log_message(message)
        sentim_analyzer.evaluate(testing_set)
    classifier_accuracy_percent = (classify.accuracy(classifier,
                                                     testing_set)) * 100
    message_acc = 'Accuracy of classifier = ' + str(
        classifier_accuracy_percent) + '%'
    print(message_acc)
    read_write.log_message("[INFO]" + LOG_NAME + message_acc)

    save_file(sentim_analyzer, 'files/sa_subjectivity.pickle')
    message = "sa_subjectivity.pickle file saved."
    print(message)
    read_write.log_message(message)
 def on_exception(self, exception):
     read_write.log_message("[ERROR]" + LOG_NAME + str(exception))
     return False
 def __init__(self):
     self.search_keyword = None
     self.listener = StdOutListener()
     read_write.log_message("[INFO]" + LOG_NAME +
                            "StreamController initialized")
###########################################################################################
# Module that is responsible to show the pie charts of the sentiment analysis results      #
###########################################################################################
from utils import db_utils, read_write
from pymongo.errors import ServerSelectionTimeoutError, AutoReconnect
from tkinter import messagebox
import sys

try:
    import matplotlib.pyplot as plt
except ImportError as e:
    read_write.log_message("[FATAL] (chart_utils) : ImportError: " + str(e))
    sys.exit("[SEVERE] " + str(e) + ". Please install this module to continue")

LOG_NAME = " (chart_utils) : "


def show_textblob_polarity():
    try:
        collection = db_utils.get_collection()
        all_documents = collection.find()
        tweets_sum = all_documents.count()

        number_of_textblob_positive = collection.find({
            "textblob.polarity":
            'pos'
        }).count()
        number_of_textblob_neutral = collection.find({
            "textblob.polarity": 'neu'
        }).count()
        number_of_textblob_negative = collection.find({