def get_top_word_frequencies(self, texts): ''' Output: list of tuples in the format: (word,frequency,mood) Input: texts is a list of strings. get_top_word_frequencies finds the frequencies of words in texts. See 'getMood' for explaination of mood. ''' # Making the list of sentences into a list of words temp = [] for texts_elem in texts: temp.append(clean_tweet_text(texts_elem)) words = [] for temp_elem in temp: for temp_elem2 in temp_elem: words.append(temp_elem2) # Getting the frequencies of words in a dictionary freq = {} for words_elem in words: if words_elem in freq: freq[words_elem] += 1 else: freq[words_elem] = 1 # Getting the words sorted from highest to lowest frequency words_sorted = sorted(freq.iteritems(), key=operator.itemgetter(1), reverse=True) # Getting the final (word,frequency,mood) list wfm = [] for words_sorted_elem in words_sorted: wfm.append((words_sorted_elem[0], words_sorted_elem[1], self.wordlist.get(words_sorted_elem[0], 'None'))) wfm2 = [] for wfm_elem in wfm: if wfm_elem[2] != 'None': wfm2.append(wfm_elem) return wfm2
def get_tweet_text_mood(self, text): ''' Output: Integer, or the string 'None' if no mood is associated with it. Input: text is a string. get_mood gets the positivity/negativity (mood) of a string (tweet). ''' clean_text = clean_tweet_text(text) # The mood is the sum of the moods of each word. text_mood = sum(map(lambda word: self.wordlist.get(word, 0), clean_text)) # Check whether any mood was found. check = 0 for i in clean_text: if self.wordlist.get(i, 100) != 100: # ? check = 1 if check == 0: text_mood = 'None' return text_mood