Пример #1
0
    def genRandomWebLogs(self):
        """
            Method for generating random web log data.
        """

        self.count += 1
        if self.randomWebLogsWindowOpenedFlag == False:

            self.randomWebLogsWindowOpenedFlag = True # set window opened
            global RandomWebLogsWindow

            def toggleFlag():
                self.randomWebLogsWindowOpenedFlag = False # set window closed
                RandomWebLogsWindow.destroy()

            RandomWebLogsWindow = tk.Toplevel(self)
            RandomWebLogsWindow.minsize(300, 500)
            RandomWebLogsWindow.geometry("300x500+100+100")
            RandomWebLogsWindow.title("Random web log data")
            RandomWebLogsWindow.config(bd=5)
            RandomWebLogsWindow.protocol("WM_DELETE_WINDOW", toggleFlag)

            x = sp.arange(1, 31 * 24) # 1 month of traffic data
            y = sp.array(200 * (sp.sin(2 * sp.pi * x / (7 * 24))), dtype=int)
            y += gamma.rvs(15, loc=0, scale=100, size=len(x))
            y += 2 * sp.exp(x / 100.0)
            y = sp.ma.array(y, mask=[y < 0])
            sp.savetxt(os.path.join("sample_data", "sample_web_traffic.tsv"), list(zip(x, y)), delimiter="\t", fmt="%s")
            model = TableModel() # create a new TableModel for table data
            table = TableCanvas(RandomWebLogsWindow, model=model, editable=False) # create a new TableCanvas for showing the table
            table.createTableFrame()
            tableData = {} # dictionary for storing table data
            for k, v in list(zip(x,y)):
                tableData[uuid.uuid4()] = {'Hour': str(k), 'Hits': str(v)}
            model.importDict(tableData)
            table.resizeColumn(0, 100)
            table.resizeColumn(1, 100)
            table.sortTable(columnName='Hour')
            table.redrawTable()

        else:
            RandomWebLogsWindow.deiconify()  
Пример #2
0
    def findTermFrequencies(self):
        """
            Method for calculating the frequencies of each term in the tweets.
        """

        if not os.path.isfile(self.TwitterKeysFile):
            tkMessageBox.showerror("ERROR", "Twitter API credentials not filled. Use the Set/Update Twitter Credentials button to do so.", parent = self.parent)
            return

        self.count += 1
        if self.termFrequenciesOpenedFlag == False:
            # set window opened
            self.termFrequenciesOpenedFlag = True

            # initialize window
            global TermFrequenciesWindow
            def toggleFlag():
                self.termFrequenciesOpenedFlag = False
                TermFrequenciesWindow.destroy()
                
            TermFrequenciesWindow = tk.Toplevel(self)
            TermFrequenciesWindow.minsize(500, 500)
            #TwitterKeysWindow.overrideredirect(True)
            TermFrequenciesWindow.geometry("500x500+100+100")
            TermFrequenciesWindow.title("Term Frequencies (only > 0.5%)")
            TermFrequenciesWindow.config(bd=5)
            TermFrequenciesWindow.protocol("WM_DELETE_WINDOW", toggleFlag)

            # create a new TableModel for table data
            model = TableModel()

            # create a new TableCanvas for showing the table
            table = TableCanvas(TermFrequenciesWindow, model=model,
                                 editable=False)
            table.createTableFrame()

            # dictionary for storing table data
            tableData = {}
            
            # calculate frequencies
            freqs = {}
            total = 0
            outfile = open(self.TwitterStreamFile)
            for line in outfile:
                json_obj = json.loads(line)
                try:            
                    text = json_obj['text'].decode('utf-8')
                    # clean the text
                    text = ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)","",text).split())
                    text = re.sub(r'^https?:\/\/.*[\r\n]*', '', text, flags=re.MULTILINE)
                    text = re.sub(r'RT', '', text, flags=re.MULTILINE)
                    #print text
                    text_list = text.split(' ')
                    for char in text_list:
                        if char.isalnum():
                            if char not in freqs:
                                freqs[char] = 1
                            else:
                                freqs[char] += 1
                            total += 1  

                except:
                    #print "passed"
                    pass
                    
            for key in freqs.keys():
                if freqs[key]/float(total) > 0.005:
                    # insert frequencies into the table dictionary
                    tableData[uuid.uuid4()] = {'Term': key, 'Frequency (%)': str(round((freqs[key]/float(total))*100, 2))}
                #print key + " " + str(freqs[key]/float(total)) 

            # insert and sort data in the table
            model.importDict(tableData)
            #sort in descending order
            model.setSortOrder(columnIndex = 1, reverse = 1)
            table.adjustColumnWidths()
            table.resizeColumn(0, 200)
            table.resizeColumn(1, 200)
            table.redrawTable()
Пример #3
0
    def findTweetSentiment(self):
        """
            Method for calculating the sentiment of each tweet.
        """

        if not os.path.isfile(self.TwitterKeysFile):
            tkMessageBox.showerror("ERROR", "Twitter API credentials not filled. Use the Set/Update Twitter Credentials button to do so.", parent = self.parent)
            return

        self.count += 1
        if self.tweetSentimentOpenedFlag == False:
            # set window opened
            self.tweetSentimentOpenedFlag = True

            # initialize window
            global TweetSentimentWindow
            def toggleFlag():
                self.tweetSentimentOpenedFlag = False
                TweetSentimentWindow.destroy()

            TweetSentimentWindow = tk.Toplevel(self)
            TweetSentimentWindow.minsize(600, 500)
            #TwitterKeysWindow.overrideredirect(True)
            TweetSentimentWindow.geometry("1000x500+100+100")
            TweetSentimentWindow.title("Tweet Sentiments (Zero values omitted)")
            TweetSentimentWindow.config(bd=5)
            TweetSentimentWindow.protocol("WM_DELETE_WINDOW", toggleFlag)

            # create a new TableModel for table data
            model = TableModel()

            # create a new TableCanvas for showing the table
            table = TableCanvas(TweetSentimentWindow, model=model,
                                 editable=False)
            table.createTableFrame()

            # dictionary for storing table data
            tableData = {}
            
            # calculate sentiments
            afinnfile = open(self.AFINNFile)
            scores = {} 
            for line in afinnfile:
                    term, score  = line.split("\t")  
                    scores[term] = int(score)  

            #print scores.items() 
            positive = 0.0
            negative = 0.0
            
            outfile = open(self.TwitterStreamFile)
            for line in outfile:
                    json_obj = json.loads(line)
                    sentiment = 0
                    try:            
                        text = json_obj['text'].decode('utf-8')
                        # clean the text
                        text = ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)","",text).split())
                        text = re.sub(r'^https?:\/\/.*[\r\n]*', '', text, flags=re.MULTILINE)
                        text = re.sub(r'RT', '', text, flags=re.MULTILINE)
                        #print text
                        text_list = text.split(' ')
                        for char in text_list:
                            if char in scores:
                                    sentiment += scores[char]

                        if sentiment != 0:
                            # add items to table data dictionary
                            tableData[uuid.uuid4()] = {'Tweet': text, 'Score': str(sentiment)}
                            if sentiment > 0:
                                positive += 1
                            elif sentiment < 0:
                                negative += 1
                            #print text + "   " + str(sentiment) + "\n\n"
                            
                    except:
                        #print "passed"
                        pass

            # calculate ratio            
            if positive > 0 and negative > 0:
                ratio = round(float(positive) / float(negative), 2)
                
            # insert and sort data in the table
            model.importDict(tableData)
            table.resizeColumn(0, 850)
            table.resizeColumn(1, 50)
            table.sortTable(columnName='Score')
            table.redrawTable()

            if positive > negative:
                extra = "The overall sentiment is POSITIVE."
            else:    
                extra = "The overall sentiment is NEGATIVE."
            
            # show info box about the overall result
            tkMessageBox.showinfo("Score Ratio", "The ratio of positive vs. negative tweets is " + str(ratio) + ". " + extra, parent = TweetSentimentWindow)
Пример #4
0
    def findTermFrequencies(self):
        """
            Method for calculating the frequencies of each term in the tweets.
        """

        if not os.path.isfile(self.TwitterKeysFile):
            tkMessageBox.showerror(
                "ERROR",
                "Twitter API credentials not filled. Use the Set/Update Twitter Credentials button to do so.",
                parent=self.parent)
            return

        self.count += 1
        if self.termFrequenciesOpenedFlag == False:
            # set window opened
            self.termFrequenciesOpenedFlag = True

            # initialize window
            global TermFrequenciesWindow

            def toggleFlag():
                self.termFrequenciesOpenedFlag = False
                TermFrequenciesWindow.destroy()

            TermFrequenciesWindow = tk.Toplevel(self)
            TermFrequenciesWindow.minsize(500, 500)
            #TwitterKeysWindow.overrideredirect(True)
            TermFrequenciesWindow.geometry("500x500+100+100")
            TermFrequenciesWindow.title("Term Frequencies (only > 0.5%)")
            TermFrequenciesWindow.config(bd=5)
            TermFrequenciesWindow.protocol("WM_DELETE_WINDOW", toggleFlag)

            # create a new TableModel for table data
            model = TableModel()

            # create a new TableCanvas for showing the table
            table = TableCanvas(TermFrequenciesWindow,
                                model=model,
                                editable=False)
            table.createTableFrame()

            # dictionary for storing table data
            tableData = {}

            # calculate frequencies
            freqs = {}
            total = 0
            outfile = open(self.TwitterStreamFile)
            for line in outfile:
                json_obj = json.loads(line)
                try:
                    text = json_obj['text'].decode('utf-8')
                    # clean the text
                    text = ' '.join(
                        re.sub(
                            "(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)",
                            "", text).split())
                    text = re.sub(r'^https?:\/\/.*[\r\n]*',
                                  '',
                                  text,
                                  flags=re.MULTILINE)
                    text = re.sub(r'RT', '', text, flags=re.MULTILINE)
                    #print text
                    text_list = text.split(' ')
                    for char in text_list:
                        if char.isalnum():
                            if char not in freqs:
                                freqs[char] = 1
                            else:
                                freqs[char] += 1
                            total += 1

                except:
                    #print "passed"
                    pass

            for key in freqs.keys():
                if freqs[key] / float(total) > 0.005:
                    # insert frequencies into the table dictionary
                    tableData[uuid.uuid4()] = {
                        'Term':
                        key,
                        'Frequency (%)':
                        str(round((freqs[key] / float(total)) * 100, 2))
                    }
                #print key + " " + str(freqs[key]/float(total))

            # insert and sort data in the table
            model.importDict(tableData)
            #sort in descending order
            model.setSortOrder(columnIndex=1, reverse=1)
            table.adjustColumnWidths()
            table.resizeColumn(0, 200)
            table.resizeColumn(1, 200)
            table.redrawTable()
Пример #5
0
    def findTweetSentiment(self):
        """
            Method for calculating the sentiment of each tweet.
        """

        if not os.path.isfile(self.TwitterKeysFile):
            tkMessageBox.showerror(
                "ERROR",
                "Twitter API credentials not filled. Use the Set/Update Twitter Credentials button to do so.",
                parent=self.parent)
            return

        self.count += 1
        if self.tweetSentimentOpenedFlag == False:
            # set window opened
            self.tweetSentimentOpenedFlag = True

            # initialize window
            global TweetSentimentWindow

            def toggleFlag():
                self.tweetSentimentOpenedFlag = False
                TweetSentimentWindow.destroy()

            TweetSentimentWindow = tk.Toplevel(self)
            TweetSentimentWindow.minsize(600, 500)
            #TwitterKeysWindow.overrideredirect(True)
            TweetSentimentWindow.geometry("1000x500+100+100")
            TweetSentimentWindow.title(
                "Tweet Sentiments (Zero values omitted)")
            TweetSentimentWindow.config(bd=5)
            TweetSentimentWindow.protocol("WM_DELETE_WINDOW", toggleFlag)

            # create a new TableModel for table data
            model = TableModel()

            # create a new TableCanvas for showing the table
            table = TableCanvas(TweetSentimentWindow,
                                model=model,
                                editable=False)
            table.createTableFrame()

            # dictionary for storing table data
            tableData = {}

            # calculate sentiments
            afinnfile = open(self.AFINNFile)
            scores = {}
            for line in afinnfile:
                term, score = line.split("\t")
                scores[term] = int(score)

            #print scores.items()
            positive = 0.0
            negative = 0.0

            outfile = open(self.TwitterStreamFile)
            for line in outfile:
                json_obj = json.loads(line)
                sentiment = 0
                try:
                    text = json_obj['text'].decode('utf-8')
                    # clean the text
                    text = ' '.join(
                        re.sub(
                            "(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)",
                            "", text).split())
                    text = re.sub(r'^https?:\/\/.*[\r\n]*',
                                  '',
                                  text,
                                  flags=re.MULTILINE)
                    text = re.sub(r'RT', '', text, flags=re.MULTILINE)
                    #print text
                    text_list = text.split(' ')
                    for char in text_list:
                        if char in scores:
                            sentiment += scores[char]

                    if sentiment != 0:
                        # add items to table data dictionary
                        tableData[uuid.uuid4()] = {
                            'Tweet': text,
                            'Score': str(sentiment)
                        }
                        if sentiment > 0:
                            positive += 1
                        elif sentiment < 0:
                            negative += 1
                        #print text + "   " + str(sentiment) + "\n\n"

                except:
                    #print "passed"
                    pass

            # calculate ratio
            if positive > 0 and negative > 0:
                ratio = round(float(positive) / float(negative), 2)

            # insert and sort data in the table
            model.importDict(tableData)
            table.resizeColumn(0, 850)
            table.resizeColumn(1, 50)
            table.sortTable(columnName='Score')
            table.redrawTable()

            if positive > negative:
                extra = "The overall sentiment is POSITIVE."
            else:
                extra = "The overall sentiment is NEGATIVE."

            # show info box about the overall result
            tkMessageBox.showinfo(
                "Score Ratio",
                "The ratio of positive vs. negative tweets is " + str(ratio) +
                ". " + extra,
                parent=TweetSentimentWindow)