示例#1
0
	def load_dictionaries(self, n, twitter):
		#loads the dictionary of specific word counts
		json_words=open(WORD_COUNTS)
		self.words = json.load(json_words)
		json_words.close()
		#loads the dictionary of total word counts
		json_cats=open(CAT_COUNTS)
		self.categories = json.load(json_cats)
		json_cats.close()
		#Loads the list of most common words to ignore.
		self.common = create_common_words_list(n, twitter)
		print '%d most common words ignored.' % (len(self.common))
示例#2
0
	def initialize(self, n, cat_list, twitter=False):

		#Dictionary storing specific word counts for each category.
		self.words = {}
		#Dictionary storing total word counts for each category.
		self.categories = {}
		for cat in cat_list:
			self.words[cat] = {}
			self.categories[cat] = 0
		
		#List of most common words (number of words specified by user)
		self.common = create_common_words_list(n, twitter)
		#List of English punctuation, to be stripped from words before counting.
		self.punc_table = (string.punctuation, "")