def testInitFileImporter(self): #no filename with self.assertRaises(TypeError): LibsvmFileImporter() #wrong filename with self.assertRaises(IOError): LibsvmFileImporter('efwef')
def testBinaryImport(self): cwd = os.path.dirname(os.path.abspath(sys.argv[0])) # should do importer = LibsvmFileImporter(os.path.join(cwd,'data/classification/a1a'),binary=True) importer.get_dataSet() # should fail with self.assertRaises(TypeError): #multi-class data importer = LibsvmFileImporter(os.path.join(cwd,'data/classification/satimage.scale'),binary=True)
def validateModel(self, testFile): testdata = LibsvmFileImporter(testFile).get_dataSet() self.__inst_test = testdata.get_numInstances() ## --- statistics correct = 0. sum_error = 0 for i in testdata.get_targets(): if i == 1: #correct correct += 1. else: sum_error += math.pow(1 - i, 2) # percent correct self.__pct_correct = 100 * (correct / self.__inst_test) # root mean squared error self.__rmse = math.sqrt(sum_error / self.__inst_test)
def validateModel(self, testFile): testdata = LibsvmFileImporter(testFile).get_dataSet() self.__inst_test = testdata.get_numInstances() ## --- statistics correct = 0. sum_error = 0 for i in testdata.get_targets(): if i == 1: #correct correct += 1. else: sum_error += math.pow(1 - i, 2) # percent correct self.__pct_correct = 100 * (correct/self.__inst_test) # root mean squared error self.__rmse = math.sqrt(sum_error / self.__inst_test)
def testRidgeRegression(self): cwd = os.path.dirname(os.path.abspath(sys.argv[0])) data = LibsvmFileImporter(os.path.join(cwd,'data/regression/lin_reg'), binary=False).get_dataSet() rr = RidgeRegression(5) rr.trainModel(data) #TODO: create test self.assertTrue(True)
def testImportData(self): cwd = os.path.dirname(os.path.abspath(sys.argv[0])) l = LibsvmFileImporter(os.path.join(cwd,'data/classification/debug'), binary=True) ds = l.get_dataSet() ''' contents of the debug file -1 3:1.4324 76:1 80:1 83:1 +1 14:1 19:1.324 84:1 # A comment # another comment -1 73:1 75:1 76:1 80:1 85:1.155 ''' # 1. we have a result self.assertTrue(ds is not None) # 2. class is loaded correct self.assertTrue(ds.get_targets(0) == -1) # 3. comment lines correctly skipped self.assertTrue(ds.get_targets(2) == -1) with self.assertRaises(IndexError): #should not exist self.assertTrue(ds.get_targets(3) == -1)
def testImportData(self): cwd = os.path.dirname(os.path.abspath(sys.argv[0])) l = LibsvmFileImporter(os.path.join(cwd, 'data/classification/debug'), binary=True) ds = l.get_dataSet() ''' contents of the debug file -1 3:1.4324 76:1 80:1 83:1 +1 14:1 19:1.324 84:1 # A comment # another comment -1 73:1 75:1 76:1 80:1 85:1.155 ''' # 1. we have a result self.assertTrue(ds is not None) # 2. class is loaded correct self.assertTrue(ds.get_targets(0) == -1) # 3. comment lines correctly skipped self.assertTrue(ds.get_targets(2) == -1) with self.assertRaises(IndexError): #should not exist self.assertTrue(ds.get_targets(3) == -1)
def testBinaryImport(self): cwd = os.path.dirname(os.path.abspath(sys.argv[0])) # should do importer = LibsvmFileImporter(os.path.join(cwd, 'data/classification/a1a'), binary=True) importer.get_dataSet() # should fail with self.assertRaises(TypeError): #multi-class data importer = LibsvmFileImporter(os.path.join( cwd, 'data/classification/satimage.scale'), binary=True)
sys.exit() ## process input if classifier == None: print "No classifier specified." usage() sys.exit() if trainingFile == None: print "No training file specified." usage() sys.exit() if testFile == None: print "No test file specified." usage() sys.exit() training = LibsvmFileImporter(trainingFile, binary=True).get_dataSet() testing = LibsvmFileImporter(testFile, binary=True).get_dataSet() # start classification - TODO: implement report and validation if classifier.__class__ == DualCoordinateDescent().__class__: classifier.set_kernel(kernel) if verbose: print classifier classifier.train(training.get_features(), training.get_targets()) elif classifier.__class__ == SMO_Keerthi().__class__: classifier.set_kernel(kernel) if verbose: print classifier classifier.train(training.get_features(), training.get_targets()) print "# support vectors:", classifier.get_num_support_vectors()
def buildClassifier(self, trainFile): '''"builds" a classification model returning always 1 for each instance''' train = LibsvmFileImporter(trainFile).get_dataSet() self.__inst_train = train.get_numInstances()