def get_sentiment(self, cumulative=False): ''' Calculates the sentiment of the cluster. It returns a list of tuples (date, value) where date value is the accumulated sentiment of that date. ''' emotional_rollercoaster = [] tsa = TwitterSemanticAnalyser() for document in self.document_dict.values(): sentiment = tsa.extract_sentiment(' '.join(token for token in document.tokens)) emotional_rollercoaster.append( (document.date, sentiment) ) #It's important to sort this list otherwise itertools will npt work. #The we group emotion scores by date. t[1][1] is the score of a document at time d. x = sorted(emotional_rollercoaster) grouped_emotions = [(d, sum([float(t[1][1]) for t in g])) for d, g in itertools.groupby(x, lambda x: x[0])] return grouped_emotions
def test_analysing_multiple_docs(self): tsa = TwitterSemanticAnalyser() calculated = tsa.analyse_corpus(corpus) expected = [([], ('positive', '0.0593345'), [ 'Gheblawi Beyond belief', 'Egypt interesting discussion', 'religious history', 'make-up', 'interesting' ]), ([], ('negative', '-0.31492'), ['Messi', 'News']), ([('retweet', 'FieldTerminology')], ('neutral', 0), []), ([], ('positive', '0.060677'), [ 'RT Beyond belief', 'Egypt interesting discussion', 'religious history', 'make-up', 'interesting' ]), ([('Bob Jones', 'Person'), ('Bob', 'Person'), ('Egypt', 'Country')], ('neutral', 0), ['Bob Jones', 'Egypt']), ([], ('negative', '-0.200008'), []), ([], ('positive', '0.61373'), ['happy'])] self.assertEquals(expected, calculated)
def test_extract_keywords(self): tsa = TwitterSemanticAnalyser() calculated = tsa.extract_keywords(tweet5) expected = ['Bob Jones', 'Egypt'] self.assertEqual(expected, calculated)
def test_sentiment_extraction(self): tsa = TwitterSemanticAnalyser() calculated_sad = tsa.extract_sentiment(tweet6) calculated_happy = tsa.extract_sentiment(tweet7) self.assertEqual(('negative', '-0.200008'), calculated_sad) self.assertEqual(('positive', '0.61373'), calculated_happy)
def test_entity_extraction(self): tsa = TwitterSemanticAnalyser() calculated = tsa.extract_entities(tweet5) expected = [('Bob Jones', 'Person'), ('Bob', 'Person'), ('Egypt', 'Country')] self.assertEqual(expected, calculated)
def test_analysing_multiple_docs(self): tsa = TwitterSemanticAnalyser() calculated = tsa.analyse_corpus(corpus) expected = [([], ('positive', '0.0593345'), ['Gheblawi Beyond belief', 'Egypt interesting discussion', 'religious history', 'make-up', 'interesting']), ([], ('negative', '-0.31492'), ['Messi', 'News']), ([('retweet', 'FieldTerminology')], ('neutral', 0), []), ([], ('positive', '0.060677'), ['RT Beyond belief', 'Egypt interesting discussion', 'religious history', 'make-up', 'interesting']), ([('Bob Jones', 'Person'), ('Bob', 'Person'), ('Egypt', 'Country')], ('neutral', 0), ['Bob Jones', 'Egypt']), ([], ('negative', '-0.200008'), []), ([], ('positive', '0.61373'), ['happy'])] self.assertEquals(expected, calculated)