def load_dictionaries(self, n, twitter): #loads the dictionary of specific word counts json_words=open(WORD_COUNTS) self.words = json.load(json_words) json_words.close() #loads the dictionary of total word counts json_cats=open(CAT_COUNTS) self.categories = json.load(json_cats) json_cats.close() #Loads the list of most common words to ignore. self.common = create_common_words_list(n, twitter) print '%d most common words ignored.' % (len(self.common))
def initialize(self, n, cat_list, twitter=False): #Dictionary storing specific word counts for each category. self.words = {} #Dictionary storing total word counts for each category. self.categories = {} for cat in cat_list: self.words[cat] = {} self.categories[cat] = 0 #List of most common words (number of words specified by user) self.common = create_common_words_list(n, twitter) #List of English punctuation, to be stripped from words before counting. self.punc_table = (string.punctuation, "")