Пример #1
0
		#get key (class) with maximum value (probability)
		key = max(prob, key=prob.get)
		return key

	def testFile(self, filePath):
		
		if self.version == 'multivariant':
			return self.multivariantTestFile(filePath)
		elif self.version == 'multinomial':
			return self.multinomialTestFile(filePath)


if __name__ == "__main__":
	""" NO crossfold validation """
	"""bayes = Bayes('multivariant')
	# 3rd arg = True for crossvalidation, false for regular, delete clean data or it won't take effect
	bayes.initalPreprocess("data/DR", "data/DT", "data/L", "data/TEST",  "custom", 1, False)			
	validate = CrossValidate("cleanData")
	featureSelector = FeatureSelector()
	validate.regularBayes(bayes, featureSelector)"""
	
	""" USING crossfold validation """
	bayes = Bayes('multinomial')
	# 3rd arg = True for crossvalidation, false for regular, delete clean data or it won't take effect
	bayes.initalPreprocess("data/DR", "data/DT", "data/L", "data/TEST",  None,1, True)			
	validate = CrossValidate("cleanData")
	featureSelector = FeatureSelector()
	accuracy =  validate.crossFoldOnBayes(bayes, featureSelector)
	print "Multivariant Bayes Accuracy:  ", accuracy
Пример #2
0
 method = "SuperGrep"
 print method
 shutil.rmtree("cleanData/", True)
 grep = Grep(True)
 grep.initalPreprocess(DR, DT, L, TEST)
 grep.testDirToOutput("cleanData/TEST/", "cleanData/" )
 with open("cleanData/output.txt") as methodOutput:
     for line in methodOutput.read().split():
         finalOutput.write(method + "," + line + "\n")
 
 method = "MultivariantBayes"
 print method
 shutil.rmtree("cleanData/", True)
 bayes = Bayes('multivariant')
 bayes.initalPreprocess(DR, DT, L, TEST, "custom", 1, False)                
 validate = CrossValidate("cleanData")
 featureSelector = MIFeatureSelector()
 validate.regularBayes(bayes, featureSelector, DISPLAY_ACCURACY)
 with open("cleanData/output.txt") as methodOutput:
     for line in methodOutput.read().split():
         finalOutput.write(method + "," + line + "\n")
 
 method = "MultinomialBayes"
 print method
 shutil.rmtree("cleanData/", True) 
 bayes = Bayes('multinomial')
 bayes.initalPreprocess(DR, DT, L, TEST, "custom", 1, False)            
 validate = CrossValidate("cleanData")
 featureSelector = FeatureSelector()     # Multinomial does much worse with MIFeatureSelector()
 validate.regularBayes(bayes, featureSelector, DISPLAY_ACCURACY)
 with open("cleanData/output.txt") as methodOutput:
Пример #3
0
		self.docWords = {}
		self.knownClasses = {}
		self.classDocs = { docClass : set() for docClass in self.docClasses }
		allWords = set()
		for docClass in self.docClasses:
			for docPath in trainData[docClass]:
				self.knownClasses[docPath] = docClass
				self.classDocs[docClass].add(docPath)
				with open(docPath) as docFile:
					self.docWords[docPath] = set(docFile.read().split())
				allWords.update(self.docWords[docPath])
		
		info = Counter()
		for word in allWords:
			info[word] = self.mutualInformation(word)
		top = info.most_common()[0:numFeatures]
		return [ word for (word, mutualInfo) in top ]


if __name__ == "__main__":
	classifier = Perceptron()
	print "Preprocessing"
	classifier.initalPreprocess("web", 1, True)
	# classifier.initalPreprocess("web",1)
	# classifier.initalPreprocess(None,3)
	# classifier.initalPreprocess("web",3)
	validate = CrossValidate("cleanData")
	featureSelector = MIFeatureSelector()
	accuracy = validate.crossFoldOnPerceptron(classifier, featureSelector)
	print "Perceptron Accuracy: ",accuracy