def test_get_tokens(self): """split tokens on word boundaries""" util = Util() tokens = util.get_tokens("i can't find a new-york t-shirt, nor find a stationary shop.") expected = ['i', "can't", 'find', 'a', 'new', 'york', 't', 'shirt', 'nor', 'find', 'a', 'stationary', 'shop'] print(tokens) self.assertTrue(tokens == expected)
def test_filter_alnum(self): """ filter out non-alnum """ util = Util() filtered = util.filter_alnum("i , 123 - apt12".split()) expected = ['i', "123", 'apt12'] print(filtered) self.assertTrue(filtered == expected)
def compute(self, sentence): from main import Util util = Util() tokens = util.get_tokens(sentence) total_positive = 0 count_positive = 0 total_negative = 0 count_negative = 0 total_words = 0 for token in tokens: rating = self.ratings.get(token.lower(), 0) if rating < 0: total_negative += rating count_negative += 1 elif rating > 0: total_positive += rating count_positive += 1 total_words += 1 positive_pct = 0 negative_pct = 0 positive_avg = 0 negative_avg = 0 if total_words > 0: positive_pct = (count_positive / float(total_words)) * 100 negative_pct = (count_negative / float(total_words)) * 100 if count_positive > 0: positive_avg = total_positive / float(count_positive) if count_negative > 0: negative_avg = total_negative / float(count_negative) """ add positive and negative averages together e.g. +5 + -1 = +4 marks it as overly positive +1 + -5 = -4 marks it as overly negative +1 + -1 = 0 marks it as neutral +5 + -5 = 0 marks it as neutral (this is confusing) """ total = positive_avg + negative_avg if total >= 2.5: overall_sentiment = "overly_positive" elif total >= 0.5: overall_sentiment = "positive" elif total <= -0.5: overall_sentiment = "negative" elif total <= -2.5: overall_sentiment = "overly_negative" else: overall_sentiment = "neutral" ssc_values = SscValues(positive_pct, negative_pct, positive_avg, negative_avg, overall_sentiment) return ssc_values
def test_ratings_sum(self): "sum ratings for some words" afinn = Afinn("AFINN-111.txt") sentence = "Rainy day but still in a good mood" util = Util() aggregate_sentiment = 0 for word in util.get_tokens(sentence): if afinn.has_rating(word): aggregate_sentiment += afinn.get_rating(word) self.assertTrue(aggregate_sentiment == 2)