def test_get_tokens(self):
     """split tokens on word boundaries"""
     util = Util()
     tokens = util.get_tokens("i can't find a new-york t-shirt, nor find a stationary shop.")
     expected = ['i', "can't", 'find', 'a', 'new', 'york', 't', 'shirt', 'nor', 'find', 'a', 'stationary', 'shop']
     print(tokens)
     self.assertTrue(tokens == expected)
 def test_filter_alnum(self):
     """ filter out non-alnum """
     util = Util()
     filtered = util.filter_alnum("i , 123 - apt12".split())
     expected = ['i', "123", 'apt12']
     print(filtered)
     self.assertTrue(filtered == expected)
示例#3
0
    def compute(self, sentence):
        from main import Util
        util = Util()
        tokens = util.get_tokens(sentence)

        total_positive = 0
        count_positive = 0
        total_negative = 0
        count_negative = 0
        total_words = 0

        for token in tokens:
            rating = self.ratings.get(token.lower(), 0)
            if rating < 0:
                total_negative += rating
                count_negative += 1
            elif rating > 0:
                total_positive += rating
                count_positive += 1
            total_words += 1

        positive_pct = 0
        negative_pct = 0

        positive_avg = 0
        negative_avg = 0

        if total_words > 0:
            positive_pct = (count_positive / float(total_words)) * 100
            negative_pct = (count_negative / float(total_words)) * 100

        if count_positive > 0:
            positive_avg = total_positive / float(count_positive)

        if count_negative > 0:
            negative_avg = total_negative / float(count_negative)

        """
        add positive and negative averages together
        e.g. +5 + -1 = +4 marks it as overly positive
        +1 + -5 = -4 marks it as overly negative
        +1 + -1 = 0 marks it as neutral
        +5 + -5 = 0 marks it as neutral (this is confusing)
        """
        total = positive_avg + negative_avg
        if total >= 2.5:
            overall_sentiment = "overly_positive"
        elif total >= 0.5:
            overall_sentiment = "positive"
        elif total <= -0.5:
            overall_sentiment = "negative"
        elif total <= -2.5:
            overall_sentiment = "overly_negative"
        else:
            overall_sentiment = "neutral"

        ssc_values = SscValues(positive_pct, negative_pct, positive_avg, negative_avg, overall_sentiment)
        return ssc_values
 def test_ratings_sum(self):
     "sum ratings for some words"
     afinn = Afinn("AFINN-111.txt")
     sentence = "Rainy day but still in a good mood"
     util = Util()
     aggregate_sentiment = 0
     for word in util.get_tokens(sentence):
         if afinn.has_rating(word):
             aggregate_sentiment += afinn.get_rating(word)
     self.assertTrue(aggregate_sentiment == 2)