def test_CanPersistRegressionModelUsingClassifiers(self): """Test the save/load for a regression model - Using average of N classifiers""" # Arrange learners = [AZorngRF.RFLearner(), AZorngCvSVM.CvSVMLearner(), AZorngCvANN.CvANNLearner()] learner = AZorngConsensus.ConsensusLearner(learners = learners) classifier = learner(self.DataReg) # Act predictions = [] for ex in self.DataReg: predictions.append(classifier(ex)) scratchdir = miscUtilities.createScratchDir(desc="ConsensusSaveLoadTest") classifier.write(os.path.join(scratchdir,"./CM.model")) # Assert predictionsL = [] Loaded = AZorngConsensus.Consensusread(os.path.join(scratchdir,"./CM.model")) for ex in self.DataReg: predictionsL.append(Loaded(ex)) self.assertEqual([round(pred.value,4) for pred in predictions], [round(pred.value,4) for pred in predictionsL], "Loaded model predictions differ: Pred. 1 (saved/loaded):"+str(predictions[0])+" / "+str(predictionsL[0])) self.assertEqual(len(Loaded.domain),len(self.DataReg.domain)) self.assertEqual(len(Loaded.imputeData) , len(Loaded.domain)) self.assertEqual(len(Loaded.basicStat), len(Loaded.domain)) self.assertEqual(Loaded.NTrainEx, len(self.DataReg)*0.8) miscUtilities.removeDir(scratchdir)
def test_saveloadReg(self): """Test the save/load for a regression model - Using average of N classifiers""" learnersNames = ["CvANN","CvSVM","RF"] learner = AZorngConsensus.ConsensusLearner(learnersNames = learnersNames) classifier = learner(self.DataReg) predictions = [] for ex in self.DataReg: predictions.append(classifier(ex)) scratchdir = miscUtilities.createScratchDir(desc="ConsensusSaveLoadTest") classifier.write(os.path.join(scratchdir,"./CM.model")) predictionsL = [] Loaded = AZorngConsensus.Consensusread(os.path.join(scratchdir,"./CM.model")) for ex in self.DataReg: predictionsL.append(Loaded(ex)) self.assertEqual([round(pred.value,4) for pred in predictions],[round(pred.value,4) for pred in predictionsL],"Loaded model predictions differ: Pred. 1 (saved/loaded):"+str(predictions[0])+" / "+str(predictionsL[0])) self.assertEqual(len(Loaded.domain),len(self.DataReg.domain)) self.assertEqual(len(Loaded.imputeData) , len(Loaded.domain)) self.assertEqual(len(Loaded.basicStat), len(Loaded.domain)) self.assertEqual(Loaded.NTrainEx, len(self.DataReg)-66) miscUtilities.removeDir(scratchdir)
def test_FeedLearnersReg(self): """Test the creation of Consensus feeding Learners for regression""" #The Learners can be individualy costumized before passing them to the Consensus learners = [AZorngCvSVM.CvSVMLearner(), AZorngCvANN.CvANNLearner(), AZorngRF.RFLearner()] #Passing now the learnersObj instead learner = AZorngConsensus.ConsensusLearner(learnersObj = learners) classifier = learner(self.DataReg) predictions = [] for ex in self.DataReg: predictions.append(classifier(ex)) scratchdir = miscUtilities.createScratchDir(desc="ConsensusSaveLoadTest") classifier.write(os.path.join(scratchdir,"./CM.model")) predictionsL = [] Loaded = AZorngConsensus.Consensusread(os.path.join(scratchdir,"./CM.model")) for ex in self.DataReg: predictionsL.append(Loaded(ex)) self.assertEqual([round(pred.value,4) for pred in predictions],[round(pred.value,4) for pred in predictionsL],"Loaded model predictions differ: Pred. 1 (saved/loaded):"+str(predictions[0])+" / "+str(predictionsL[0])) self.assertEqual(len(Loaded.domain),len(self.DataReg.domain)) self.assertEqual(len(Loaded.imputeData) , len(Loaded.domain)) self.assertEqual(len(Loaded.basicStat), len(Loaded.domain)) self.assertEqual(Loaded.NTrainEx, len(self.DataReg)) miscUtilities.removeDir(scratchdir)
def test_FeedClassifiersReg(self): """Test the feeding of regression classifiers """ #DataSet = dataUtilities.DataTable("/home/palmeida/dev/OpenAZOTesteInstall/tests/source/data/linearTrain.tab") DataSet = self.DataReg learners = [AZorngCvSVM.CvSVMLearner(), AZorngCvANN.CvANNLearner(), AZorngRF.RFLearner()] classifiers = [l(DataSet) for l in learners] classifier = AZorngConsensus.ConsensusClassifier(classifiers = classifiers) predictions = [] for ex in DataSet: predictions.append(classifier(ex)) scratchdir = miscUtilities.createScratchDir(desc="ConsensusSaveLoadTest") classifier.write(os.path.join(scratchdir,"./CM.model")) predictionsL = [] Loaded = AZorngConsensus.Consensusread(os.path.join(scratchdir,"./CM.model")) for ex in DataSet: predictionsL.append(Loaded(ex)) self.assertEqual([round(pred.value,4) for pred in predictions],[round(pred.value,4) for pred in predictionsL],"Loaded model predictions differ: Pred. 1 (saved/loaded):"+str(predictions[0])+" / "+str(predictionsL[0])) self.assertEqual(len(Loaded.domain),len(DataSet.domain)) self.assertEqual(len(Loaded.imputeData) , len(Loaded.domain)) self.assertEqual(len(Loaded.basicStat), len(Loaded.domain)) self.assertEqual(Loaded.NTrainEx, len(DataSet)) miscUtilities.removeDir(scratchdir)
def test_CanPersistClassificationModelMajority(self): """Test the save/load for a classification model - Using Majority""" """ Arrange """ learners = self.createTestLearners() learner = AZorngConsensus.ConsensusLearner(learners = learners) classifier = learner(self.getClassificationTrainingData()) """ Act """ predictions = [] for ex in self.irisData: predictions.append(classifier(ex)) scratchdir = miscUtilities.createScratchDir(desc="ConsensusSaveLoadTest") classifier.write(os.path.join(scratchdir,"./CM.model")) """ Assert """ predictionsL = [] Loaded = AZorngConsensus.Consensusread(os.path.join(scratchdir,"./CM.model")) self.assertEqual(len(Loaded.domain),len(self.irisData.domain)) self.assertEqual(len(Loaded.imputeData) , len(Loaded.domain)) self.assertEqual(len(Loaded.basicStat), len(Loaded.domain)) self.assertEqual(Loaded.NTrainEx, 0.8*len(self.irisData)) for ex in self.irisData: predictionsL.append(Loaded(ex)) self.assertEqual(predictions,predictionsL) miscUtilities.removeDir(scratchdir)
def test_CanPersistClassificationModelProbabilities(self): """Test the save/load for a classification model - Using probabilities average""" # Arrange learners = [AZorngRF.RFLearner(), AZorngCvANN.CvANNLearner()] learner = AZorngConsensus.ConsensusLearner(learners = learners) classifier = learner(self.irisData) # Act predictions = [] for ex in self.irisData: predictions.append(classifier(ex)) scratchdir = miscUtilities.createScratchDir(desc="ConsensusSaveLoadTest") classifier.write(os.path.join(scratchdir,"./CM.model")) # Assert predictionsL = [] Loaded = AZorngConsensus.Consensusread(os.path.join(scratchdir,"./CM.model")) for ex in self.irisData: predictionsL.append(Loaded(ex)) self.assertEqual(predictions,predictionsL) self.assertEqual(len(Loaded.domain),len(self.irisData.domain)) self.assertEqual(len(Loaded.imputeData) , len(Loaded.domain)) self.assertEqual(len(Loaded.basicStat), len(Loaded.domain)) self.assertEqual(Loaded.NTrainEx, len(self.irisData) - int(0.2 * len(self.irisData))) miscUtilities.removeDir(scratchdir)
def TopVarImportanceTest(data, expectNone=False): resA = [] resB = [] learner = AZorngCvSVM.CvSVMLearner(gamma=1.0, svm_type=103, C=1, coef0=0, degree=3, epsR=0.001, kernel_type=2, nu=0.5, p=0.1, probability=0, shrinking=1) CvSVM = learner(data) for ex in data: resA.append(CvSVM.getTopImportantVars(ex, 1)) scratchdir = miscUtilities.createScratchDir( desc="TopVarImportanceTest") modelPath = os.path.join(scratchdir, "CvSVNModel") CvSVM.write(modelPath) LoadedCvSVM = AZorngCvSVM.CvSVMread(modelPath) miscUtilities.removeDir(scratchdir) for ex in data: resB.append(LoadedCvSVM.getTopImportantVars(ex, 1)) if expectNone: return resA == resB == [None] * len(data) else: return resA == resB and None not in resA and resA.count( resA[0]) != len(resA)
def test_CanPersistClassificationModelProbabilities(self): """Test the save/load for a classification model - Using probabilities average""" # Arrange learners = [AZorngRF.RFLearner(), AZorngCvANN.CvANNLearner()] learner = AZorngConsensus.ConsensusLearner(learners=learners) classifier = learner(self.irisData) # Act predictions = [] for ex in self.irisData: predictions.append(classifier(ex)) scratchdir = miscUtilities.createScratchDir( desc="ConsensusSaveLoadTest") print scratchdir classifier.write(os.path.join(scratchdir, "./CM.model")) # Assert predictionsL = [] Loaded = AZorngConsensus.Consensusread( os.path.join(scratchdir, "./CM.model")) for ex in self.irisData: predictionsL.append(Loaded(ex)) self.assertEqual(predictions, predictionsL) self.assertEqual(len(Loaded.domain), len(self.irisData.domain)) self.assertEqual(len(Loaded.imputeData), len(Loaded.domain)) self.assertEqual(len(Loaded.basicStat), len(Loaded.domain)) self.assertEqual(Loaded.NTrainEx, len(self.irisData)) miscUtilities.removeDir(scratchdir)
def test_saveloadClass2(self): """Test the save/load for a classification model - Using probabilities average""" learnersNames = ["RF","CvANN"] learner = AZorngConsensus.ConsensusLearner(learnersNames = learnersNames) classifier = learner(self.irisData) predictions = [] for ex in self.irisData: predictions.append(classifier(ex)) scratchdir = miscUtilities.createScratchDir(desc="ConsensusSaveLoadTest") classifier.write(os.path.join(scratchdir,"./CM.model")) predictionsL = [] Loaded = AZorngConsensus.Consensusread(os.path.join(scratchdir,"./CM.model")) for ex in self.irisData: predictionsL.append(Loaded(ex)) self.assertEqual(predictions,predictionsL) self.assertEqual(len(Loaded.domain),len(self.irisData.domain)) self.assertEqual(len(Loaded.imputeData) , len(Loaded.domain)) self.assertEqual(len(Loaded.basicStat), len(Loaded.domain)) self.assertEqual(Loaded.NTrainEx, len(self.irisData)) miscUtilities.removeDir(scratchdir)
def test_CanPersistClassificationModelMajority(self): """Test the save/load for a classification model - Using Majority""" """ Arrange """ learners = self.createTestLearners() learner = AZorngConsensus.ConsensusLearner(learners=learners) classifier = learner(self.getClassificationTrainingData()) """ Act """ predictions = [] for ex in self.irisData: predictions.append(classifier(ex)) scratchdir = miscUtilities.createScratchDir( desc="ConsensusSaveLoadTest") classifier.write(os.path.join(scratchdir, "./CM.model")) """ Assert """ predictionsL = [] Loaded = AZorngConsensus.Consensusread( os.path.join(scratchdir, "./CM.model")) self.assertEqual(len(Loaded.domain), len(self.irisData.domain)) self.assertEqual(len(Loaded.imputeData), len(Loaded.domain)) self.assertEqual(len(Loaded.basicStat), len(Loaded.domain)) self.assertEqual(Loaded.NTrainEx, len(self.irisData)) for ex in self.irisData: predictionsL.append(Loaded(ex)) self.assertEqual(predictions, predictionsL) miscUtilities.removeDir(scratchdir)
def TopVarImportanceTest(data, expectNone=False): resA = [] resB = [] learner = AZorngCvSVM.CvSVMLearner( gamma=1.0, svm_type=103, C=1, coef0=0, degree=3, epsR=0.001, kernel_type=2, nu=0.5, p=0.1, probability=0, shrinking=1, ) CvSVM = learner(data) for ex in data: resA.append(CvSVM.getTopImportantVars(ex, 1)) scratchdir = miscUtilities.createScratchDir(desc="TopVarImportanceTest") modelPath = os.path.join(scratchdir, "CvSVNModel") CvSVM.write(modelPath) LoadedCvSVM = AZorngCvSVM.CvSVMread(modelPath) miscUtilities.removeDir(scratchdir) for ex in data: resB.append(LoadedCvSVM.getTopImportantVars(ex, 1)) if expectNone: return resA == resB == [None] * len(data) else: return resA == resB and None not in resA and resA.count(resA[0]) != len(resA)
def test_FeedClassifiersClass(self): """Test the creation of Consensus feeding Classifiers""" learners = [AZorngCvSVM.CvSVMLearner(), AZorngCvANN.CvANNLearner(), AZorngRF.RFLearner()] classifiers = [l(self.irisData) for l in learners] classifier = AZorngConsensus.ConsensusClassifier(classifiers = classifiers) predictions = [] for ex in self.irisData: predictions.append(classifier(ex)) scratchdir = miscUtilities.createScratchDir(desc="ConsensusSaveLoadTest") classifier.write(os.path.join(scratchdir,"./CM.model")) predictionsL = [] Loaded = AZorngConsensus.Consensusread(os.path.join(scratchdir,"./CM.model")) for ex in self.irisData: predictionsL.append(Loaded(ex)) self.assertEqual(predictions,predictionsL) self.assertEqual(len(Loaded.domain),len(self.irisData.domain)) self.assertEqual(len(Loaded.imputeData) , len(Loaded.domain)) self.assertEqual(len(Loaded.basicStat), len(Loaded.domain)) self.assertEqual(Loaded.NTrainEx, len(self.irisData)) miscUtilities.removeDir(scratchdir)
def teste_FeelLearnersClass(self): """Test the creation of Consensus feeding Learners for classification""" #The Learners can be individualy costumized before passing them to the Consensus learners = [AZorngCvSVM.CvSVMLearner(), AZorngCvANN.CvANNLearner(), AZorngRF.RFLearner()] #Passing now the learnersObj instead learner = AZorngConsensus.ConsensusLearner(learnersObj = learners) classifier = learner(self.irisData) predictions = [] for ex in self.irisData: predictions.append(classifier(ex)) scratchdir = miscUtilities.createScratchDir(desc="ConsensusSaveLoadTest") classifier.write(os.path.join(scratchdir,"./CM.model")) predictionsL = [] Loaded = AZorngConsensus.Consensusread(os.path.join(scratchdir,"./CM.model")) for ex in self.irisData: predictionsL.append(Loaded(ex)) self.assertEqual(predictions,predictionsL) self.assertEqual(len(Loaded.domain),len(self.irisData.domain)) self.assertEqual(len(Loaded.imputeData) , len(Loaded.domain)) self.assertEqual(len(Loaded.basicStat), len(Loaded.domain)) self.assertEqual(Loaded.NTrainEx, len(self.irisData)) miscUtilities.removeDir(scratchdir)
def testSVM_MPI_3(self): ################################################################### # Test other way of setting appspack ################################################################### # Classification accuracy: ExpectedCA = 0.847 #orange1: 0.837619047619 optimizer = paramOptUtilities.Appspack() learner = AZorngCvSVM.CvSVMLearner() learnerName = "CvSVMLearner" # Create an interface for setting optimizer parameters pars = AZLearnersParamsConfig.API(learnerName) # Set all parameters to not be optimized pars.setOptimizeAllParameters(False) parameterList = ["C", "gamma"] # Set the parameters in parameterList to be optimized for parameter in parameterList: pars.setParameter(parameter,"optimize",True) # Change the range pars.setParameter("C","range",miscUtilities.power2Range(-5,2,1)) trainFile=self.discTrainDataPath # Create a directory for running the appspack (if not defined it will use the present working directory) runPath = miscUtilities.createScratchDir(desc="ParamOptTest_SVM_MPI_3") evalM = "AZutilities.evalUtilities.CA" fMin = False #[<minimum of objective function found>, <optimized parameters>] tunedPars = optimizer(learner=learner,\ dataSet=trainFile,\ evaluateMethod = evalM,\ findMin=fMin,\ runPath = runPath,\ useParameters = pars.getParametersDict(),\ verbose = 0,\ useStd = False,\ advancedMPIoptions = "-v -np 4",\ machinefile = ["localhost:2","localhost:2"]) print "Returned: ", tunedPars print "====================== optimization Done ===========================" print "Learner optimized flag = ", learner.optimized print "Tuned parameters = ", tunedPars[1] print "Best optimization result = ", tunedPars[0] print "check the file intRes.txt to see the intermediate results of optimizer!" # Check if the MPI version was used self.assertEqual(learner.optimized,True) # Check if the MPI version was used self.assertEqual(optimizer.usedMPI, True) self.assertEqual(round(tunedPars[0],3),round(ExpectedCA,3)) self.assert_(len(dataUtilities.DataTable(os.path.join(runPath,"optimizationLog.txt")))>=12) # (orig 14) Must be > 2 #print runPath miscUtilities.removeDir(runPath)
def test_RFRegression(self): """RF - Test of optimizer with continuous class data """ #Create the appspack instance opt=paramOptUtilities.Appspack() #Learner to be optimized learner=AZorngRF.RFLearner() #dataset to use in the parameters optimization (Discrete class in this example) dataSet=self.contTrainDataPath # Define the objective function. This requires: # defining the extreme to find (the min or max): findMin=True or findMin=False fMin=True # defining the method for evaluation (must be a method that accepts as input an orngTest.ExperimentResults): # evaluateMethod="AZutilities.evalUtilities.R2" evalM="AZutilities.evalUtilities.RMSE" # Create an interface for setting optimizer parameters pars = AZLearnersParamsConfig.API("RFLearner") # Set the parameters in parameterList to be optimized pars.setParameter("NumThreads","optimize",False) # Change the default pars.setParameter("NumThreads","default","1") # Create a directory for running the appspack (if not defined it will use the present working directory) runPath = miscUtilities.createScratchDir(desc="ParamOptTest") # Run the appspack which will configure the input learner and aditionaly return #[<minimum of objective function found>, <optimized parameters>] tunedPars = opt(learner=learner,\ dataSet=dataSet,\ evaluateMethod = evalM,\ findMin=fMin,\ runPath = runPath,\ useStd = False,\ useParameters = pars.getParametersDict(),\ verbose = 0) print "Returned: ", tunedPars print "====================== optimization Done ===========================" print "Learner optimized flag = ", learner.optimized print "Tuned parameters = ", tunedPars[1] print "Best optimization result = ", tunedPars[0] print "check the file intRes.txt to see the intermediate results of optimizer!" self.assertEqual(opt.usedMPI,False) self.assertEqual(learner.optimized,True) self.assertEqual(round(tunedPars[0],2),round(3.1499999999999999,2)) #The learner is now with its optimized parameters already set, so we can now make a classifier out of it classifier = learner(self.contTrain) RMSE = evalUtilities.getRMSE(self.contTest,classifier) self.assertEqual(round(RMSE,2),round(2.02,2)) #Ver 0.3 #Check if the best result was not the one with numThreads different of 1 since that way we can get #different results among runs self.assertEqual(int(tunedPars[1]["NumThreads"]),1) miscUtilities.removeDir(runPath)
def test_RFRegression(self): """RF - Test of optimizer with continuous class data """ #Create the appspack instance opt = paramOptUtilities.Appspack() #Learner to be optimized learner = AZorngRF.RFLearner() #dataset to use in the parameters optimization (Discrete class in this example) dataSet = self.contTrainDataPath # Define the objective function. This requires: # defining the extreme to find (the min or max): findMin=True or findMin=False fMin = True # defining the method for evaluation (must be a method that accepts as input an orngTest.ExperimentResults): # evaluateMethod="AZutilities.evalUtilities.R2" evalM = "AZutilities.evalUtilities.RMSE" # Create an interface for setting optimizer parameters pars = AZLearnersParamsConfig.API("RFLearner") # Set the parameters in parameterList to be optimized pars.setParameter("NumThreads", "optimize", False) # Change the default pars.setParameter("NumThreads", "default", "1") # Create a directory for running the appspack (if not defined it will use the present working directory) runPath = miscUtilities.createScratchDir(desc="ParamOptTest") # Run the appspack which will configure the input learner and aditionaly return #[<minimum of objective function found>, <optimized parameters>] tunedPars = opt(learner=learner,\ dataSet=dataSet,\ evaluateMethod = evalM,\ findMin=fMin,\ runPath = runPath,\ useStd = False,\ useParameters = pars.getParametersDict(),\ verbose = 0) print "Returned: ", tunedPars print "====================== optimization Done ===========================" print "Learner optimized flag = ", learner.optimized print "Tuned parameters = ", tunedPars[1] print "Best optimization result = ", tunedPars[0] print "check the file intRes.txt to see the intermediate results of optimizer!" self.assertEqual(opt.usedMPI, False) self.assertEqual(learner.optimized, True) self.assertEqual(round(tunedPars[0], 2), round(3.1499999999999999, 2)) #The learner is now with its optimized parameters already set, so we can now make a classifier out of it classifier = learner(self.contTrain) RMSE = evalUtilities.getRMSE(self.contTest, classifier) self.assertEqual(round(RMSE, 2), round(2.02, 2)) #Ver 0.3 #Check if the best result was not the one with numThreads different of 1 since that way we can get #different results among runs self.assertEqual(int(tunedPars[1]["NumThreads"]), 1) miscUtilities.removeDir(runPath)
def test_RF_Classification(self): """PLS - Test of optimizer with discrete class data """ expectedAcc = [ 0.57999999999999996, 0.58999999999999997, 0.612 ] #Ver 0.3 - Artifact: The second value can be expected on other Systems #Create the appspack instance opt = paramOptUtilities.Appspack() #Learner to be optimized learner = AZorngRF.RFLearner() #dataset to use in the parameters optimization (Discrete class in this example) dataSet = self.discTrainDataPath # Define the objective function. This requires: # defining the extreme to find (the min or max): findMin=True or findMin=False fMin = False # defining the method for evaluation (must be a method that accepts as input an orngTest.ExperimentResults): # evaluateMethod="AZutilities.evalUtilities.CA" evalM = "AZutilities.evalUtilities.CA" # Create a directory for running the appspack (if not defined it will use the present working directory) runPath = miscUtilities.createScratchDir(desc="ParamOptTest") # Run the appspack which will configure the input learner and aditionaly return #[<minimum of objective function found>, <optimized parameters>] tunedPars = opt(learner=learner,\ dataSet=dataSet,\ evaluateMethod = evalM,\ findMin=fMin,\ runPath = runPath,\ useStd = False,\ verbose = 0) print "Returned: ", tunedPars print "====================== optimization Done ===========================" print "Learner optimized flag = ", learner.optimized print "Tuned parameters = ", tunedPars[1] print "Best optimization result = ", tunedPars[0] print "check the file intRes.txt to see the intermediate results of optimizer!" self.assertEqual(opt.usedMPI, False) self.assertEqual(learner.optimized, True) self.assert_( round(tunedPars[0], 2) in [round(x, 2) for x in expectedAcc]) #Ver 0.3 #The learner is now with its optimized parameters already set, so we can now make a classifier out of it classifier = learner(self.discTrain) CA = evalUtilities.getClassificationAccuracy(self.discTest, classifier) expectedCA = [0.9655 ] # Artifact: Second value expected in UBUNTU 10.10 self.assert_(round(CA, 2) in [round(ca, 2) for ca in expectedCA]) # Ver 0.3 miscUtilities.removeDir(runPath)
def arrayJob(jobName = "AZOarray",jobNumber =1 ,jobParams = [], jobParamFile = "Params.pkl", jobQueue = "quick.q", jobScript = "", memSize = "150M"): runPath = miscUtilities.createScratchDir(desc ="optQsub"+jobName, baseDir = AZOC.NFS_SCRATCHDIR) cwd = os.getcwd() os.chdir(runPath) paramFile = open(jobParamFile,"w") cPickle.dump(jobParams,paramFile) paramFile.close() jobFile = open(jobName + ".py","w") jobFile.write(jobScript) jobFile.close() cmd = "echo python " + os.path.join(runPath, str(jobName) + ".py") + \ " | qsub -cwd -V -q " + str(jobQueue) + \ " -p -800 -t 1-" + str(jobNumber) + \ " -N " + str(jobName) + \ " -S /bin/sh -sync yes" + \ AZOC.SGE_QSUB_ARCH_OPTION_CURRENT + \ " -l mf=" + str(memSize) # specify shell /bin/sh so not to get warning: no access to tty in output file. (status, output) = commands.getstatusoutput(cmd) # Check exit status of all our jobs if status != 0: print jobName + " failed! Code = " + str(status) print output raise ValueError for line in output.split("\n"): if not "exit code 0" in line: if not "Your job-array" in line: print jobName + " failed! " + line raise ValueError # Check if error files exist that are not empty. for part in sorted(glob(os.path.join(runPath,jobName+".e*"))): if os.path.getsize(part) != 0: print jobName + " failed! file " + str(part) raise ValueError # Build result list from pickle objects resList = [] for part in sorted(glob(os.path.join(runPath,jobName+".o*"))): file = open(part,"r") resList.append(cPickle.load(file)) file.close() os.chdir(cwd) miscUtilities.removeDir(runPath) return resList
def test_RF_Regression(self): """RF - Test of optimizer with continuous class data """ expectedRes = [ 3.27, 3.2599999999999998, 3.15 ] #Ver 0.3 - Artifact: The second value can be expected on other Systems #Create the appspack instance opt = paramOptUtilities.Appspack() #Learner to be optimized learner = AZorngRF.RFLearner() #dataset to use in the parameters optimization dataSet = self.contTrainDataPath # Define the objective function. This requires: # defining the extreme to find (the min or max): findMin=True or findMin=False fMin = True # defining the method for evaluation (must be a method that accepts as input an orngTest.ExperimentResults): # evaluateMethod="AZutilities.evalUtilities.R2" evalM = "AZutilities.evalUtilities.RMSE" # Create a directory for running the appspack (if not defined it will use the present working directory) runPath = miscUtilities.createScratchDir(desc="ParamOptTest") # Run the appspack which will configure the input learner and aditionaly return #[<minimum of objective function found>, <optimized parameters>] tunedPars = opt(learner=learner,\ dataSet=dataSet,\ evaluateMethod = evalM,\ findMin=fMin,\ runPath = runPath,\ useStd = False,\ verbose = 0) print "Returned: ", tunedPars print "====================== optimization Done ===========================" print "Learner optimized flag = ", learner.optimized print "Tuned parameters = ", tunedPars[1] print "Best optimization result = ", tunedPars[0] print "check the file intRes.txt to see the intermediate results of optimizer!" self.assertEqual(opt.usedMPI, False) self.assertEqual(learner.optimized, True) self.assert_( round(tunedPars[0], 2) in [round(x, 2) for x in expectedRes]) #Ver 0.3 #The learner is now with its optimized parameters already set, so we can now make a classifier out of it classifier = learner(self.contTrain) RMSE = evalUtilities.getRMSE(self.contTest, classifier) expectedRes = [2.89, 2.0158] self.assert_(round(RMSE, 2) in [round(x, 2) for x in expectedRes]) #Ver 0.3 miscUtilities.removeDir(runPath)
def buildModel(trainData, MLMethod, queueType = "NoSGE", verbose = 0, logFile = None): """ Buld the method passed in MLMethod and optimize ( "IndividualStatistics" not in MLMethod) if MLMethod is a Consensus ("individualStatistics" in MLMethod) , build each and optimize first all models and after build the consensus! """ log(logFile, "Building and optimizing learner: "+MLMethod["MLMethod"]+"...") learners = {} MLMethods = {} if "IndividualStatistics" in MLMethod: #It is a consensus for ML in MLMethod["IndividualStatistics"]: MLMethods[ML] = MLMethod["IndividualStatistics"][ML] else: MLMethods[MLMethod["MLMethod"]] = MLMethod # optimize all MLMethods for ML in MLMethods: log(logFile, " Optimizing MLmethod: "+ML) learners[ML] = MLMETHODS[ML](name = ML) runPath = miscUtilities.createScratchDir(baseDir = AZOC.NFS_SCRATCHDIR, desc = "AutoQSAR") trainData.save(os.path.join(runPath,"trainData.tab")) paramOptUtilities.getOptParam( learner = learners[ML], trainDataFile = os.path.join(runPath,"trainData.tab"), useGrid = False, verbose = verbose, queueType = queueType, runPath = runPath, nExtFolds = None) if not learners[ML].optimized: print "ERROR: AutoQSAR: The learner was not optimized." return None else: print "Optimized learner ",learners[ML] miscUtilities.removeDir(runPath) #Train the model if len(learners) == 1: log(logFile, " Building the optimized learner:"+learners.keys()[0]) model = learners[learners.keys()[0]](trainData) elif len(learners) >= 1: model = buildConsensus(trainData,learners,MLMethods) else: print "ERROR: No Learners were selected!" return None return model
def test_PLS_Classification(self): """PLS - Test of optimizer with discrete class data """ expectedAcc = [0.57999999999999996, 0.58999999999999997] #Ver 0.3 - Artifact: The second value can be expected on other Systems #Create the appspack instance opt=paramOptUtilities.Appspack() #Learner to be optimized learner=AZorngPLS.PLSLearner() #dataset to use in the parameters optimization (Discrete class in this example) dataSet=self.discTrainDataPath # Define the objective function. This requires: # defining the extreme to find (the min or max): findMin=True or findMin=False fMin=False # defining the method for evaluation (must be a method that accepts as input an orngTest.ExperimentResults): # evaluateMethod="AZutilities.evalUtilities.CA" evalM="AZutilities.evalUtilities.CA" # Create a directory for running the appspack (if not defined it will use the present working directory) runPath = miscUtilities.createScratchDir(desc="ParamOptTest") # Run the appspack which will configure the input learner and aditionaly return #[<minimum of objective function found>, <optimized parameters>] tunedPars = opt(learner=learner,\ dataSet=dataSet,\ evaluateMethod = evalM,\ findMin=fMin,\ runPath = runPath,\ useStd = False,\ verbose = 0) print "Returned: ", tunedPars print "====================== optimization Done ===========================" print "Learner optimized flag = ", learner.optimized print "Tuned parameters = ", tunedPars[1] print "Best optimization result = ", tunedPars[0] print "check the file intRes.txt to see the intermediate results of optimizer!" self.assertEqual(opt.usedMPI,False) self.assertEqual(learner.optimized,True) self.assert_(round(tunedPars[0],2) in [round(x,2) for x in expectedAcc]) #Ver 0.3 #The learner is now with its optimized parameters already set, so we can now make a classifier out of it classifier = learner(self.discTrain) CA = evalUtilities.getClassificationAccuracy(self.discTest,classifier) expectedCA = [0.58999999999999997,2 ,0.57999999999999996] # Artifact: Second value expected in UBUNTU 10.10 self.assert_(round(CA,2) in [round(ca,2) for ca in expectedCA]) # Ver 0.3 miscUtilities.removeDir(runPath)
def test_SaveLoadCustomRegressionExpression(self): """ Test save/load custom expression using average N regression with object map """ # Arrange learners = { 'firstLearner': AZorngCvSVM.CvSVMLearner(), 'secondLearner': AZorngCvANN.CvANNLearner(), 'thirdLearner': AZorngRF.RFLearner() } # Construct expression learner/classifier regressionExpression = "(firstLearner + secondLearner + thirdLearner) / 3" expressionLearner = AZorngConsensus.ConsensusLearner( learners=learners, expression=regressionExpression) expressionClassifier = expressionLearner(self.DataReg) # Construct default learner/classifier result = [] for ex in self.DataReg: result.append(expressionClassifier(ex)) # Act scratchdir = miscUtilities.createScratchDir( desc="ConsensusSaveLoadTest") expressionClassifier.write(os.path.join(scratchdir, "./CM.model")) resultLoaded = [] loaded = AZorngConsensus.Consensusread( os.path.join(scratchdir, "./CM.model")) self.assertNotEqual(loaded, None) for ex in self.DataReg: resultLoaded.append(loaded(ex)) # Assert for index, item in enumerate(result): if not float_compare(result[index].value, resultLoaded[index].value): print "Not equal on index: ", index self.assertEqual( float_compare(result[index].value, resultLoaded[index].value), True) self.assertEqual(len(loaded.domain), len(self.DataReg.domain)) self.assertEqual(len(loaded.imputeData), len(loaded.domain)) self.assertEqual(len(loaded.basicStat), len(loaded.domain)) self.assertEqual(loaded.NTrainEx, len(self.DataReg)) miscUtilities.removeDir(scratchdir)
def test_SaveLoadCustomLogicalExpression(self): """ Test save/load functionality with a custom logical expression """ # Arrange # Construct expression learner/classifier learners = { 'firstLearner': AZorngCvSVM.CvSVMLearner(), 'secondLearner': AZorngCvANN.CvANNLearner(), 'thirdLearner': AZorngRF.RFLearner() } discreteExpression = [ "firstLearner == Iris-setosa -> Iris-setosa", "-> Iris-virginica" ] discreteLearner = AZorngConsensus.ConsensusLearner( learners=learners, expression=discreteExpression) discreteClassifier = discreteLearner(self.irisData) result = [] for ex in self.irisData: result.append(discreteClassifier(ex)) # Act scratchdir = miscUtilities.createScratchDir( desc="ConsensusSaveLoadTest") discreteClassifier.write(os.path.join(scratchdir, "./CM.model")) resultLoaded = [] loaded = AZorngConsensus.Consensusread( os.path.join(scratchdir, "./CM.model")) self.assertNotEqual(loaded, None) for ex in self.irisData: resultLoaded.append(loaded(ex)) # Assert for index, item in enumerate(result): if not result[index].value == resultLoaded[index].value: print "Not equal on index: ", index self.assertEqual(result[index].value, resultLoaded[index].value) self.assertEqual(len(loaded.domain), len(self.irisData.domain)) self.assertEqual(len(loaded.imputeData), len(loaded.domain)) self.assertEqual(len(loaded.basicStat), len(loaded.domain)) self.assertEqual(loaded.NTrainEx, len(self.irisData)) miscUtilities.removeDir(scratchdir)
def TopVarImportanceTest(data, expectNone = False): resA = [] resB = [] CvBoost = AZorngCvBoost.CvBoostLearner(data) for ex in data: resA.append(CvBoost.getTopImportantVars(ex,1)) scratchdir = miscUtilities.createScratchDir(desc="TopVarImportanceTest") modelPath = os.path.join(scratchdir,"CvBoostModel") CvBoost.write(modelPath) LoadedCvBoost = AZorngCvBoost.CvBoostread(modelPath) miscUtilities.removeDir(scratchdir) for ex in data: resB.append(LoadedCvBoost.getTopImportantVars(ex,1)) if expectNone: return resA == resB == [None]*len(data) else: return resA == resB and None not in resA and resA.count(resA[0]) != len(resA)
def test_PLS_Regression(self): """PLS - Test of optimizer with continuous class data """ #Create the appspack instance opt=paramOptUtilities.Appspack() #Learner to be optimized learner=AZorngPLS.PLSLearner() #dataset to use in the parameters optimization dataSet=self.contTrainDataPath # Define the objective function. This requires: # defining the extreme to find (the min or max): findMin=True or findMin=False fMin=True # defining the method for evaluation (must be a method that accepts as input an orngTest.ExperimentResults): # evaluateMethod="AZutilities.evalUtilities.R2" evalM="AZutilities.evalUtilities.RMSE" # Create a directory for running the appspack (if not defined it will use the present working directory) runPath = miscUtilities.createScratchDir(desc="ParamOptTest") # Run the appspack which will configure the input learner and aditionaly return #[<minimum of objective function found>, <optimized parameters>] tunedPars = opt(learner=learner,\ dataSet=dataSet,\ evaluateMethod = evalM,\ findMin=fMin,\ runPath = runPath,\ useStd = False,\ verbose = 0) print "Returned: ", tunedPars print "====================== optimization Done ===========================" print "Learner optimized flag = ", learner.optimized print "Tuned parameters = ", tunedPars[1] print "Best optimization result = ", tunedPars[0] print "check the file intRes.txt to see the intermediate results of optimizer!" self.assertEqual(opt.usedMPI,False) self.assertEqual(learner.optimized,True) self.assertEqual(round(tunedPars[0],2),round(0.858060000000,2)) #The learner is now with its optimized parameters already set, so we can now make a classifier out of it classifier = learner(self.contTrain) RMSE = evalUtilities.getRMSE(self.contTest,classifier) self.assertEqual(round(RMSE,2),round(0.656979500000,2)) miscUtilities.removeDir(runPath)
def TopVarImportanceTest(data, expectNone = False): resA = [] resB = [] CvANN = AZorngCvANN.CvANNLearner(data, stopUPs=0) for ex in data: resA.append(CvANN.getTopImportantVars(ex,1)) scratchdir = miscUtilities.createScratchDir(desc="TopVarImportanceTest") modelPath = os.path.join(scratchdir,"CvANNModel") CvANN.write(modelPath) LoadedCvANN = AZorngCvANN.CvANNread(modelPath) miscUtilities.removeDir(scratchdir) for ex in data: resB.append(LoadedCvANN.getTopImportantVars(ex,1)) if expectNone: return resA == resB == [None]*len(data) else: return resA == resB and None not in resA and resA.count(resA[0]) != len(resA)
def testTopRankedNoGrid(self): """ Test the TopRanked method in getBestModel with a test set and without grid computing. """ # Fix input arg resultsDir = miscUtilities.createScratchDir(desc="GetBestModelTest") descList = [5, 10] grid = False batchQueue = False optParam = True # Method to test getBestModel.getBestModelTopRank(self.trainPath, self.testPath, resultsDir, descList, grid, optParam, batchQueue) # Assert the existens of a results file resultsFile = resultsDir+"/batchResults.tex" self.assert_(os.path.exists(resultsFile), "No results file created with getBestModelTopRank") resultsFile = resultsDir+"/batchResults.pdf" self.assert_(os.path.exists(resultsFile), "No pdf file created with getBestModelTopRank") miscUtilities.removeDir(resultsDir)
def test_saveloadReg(self): """Test the save/load for a regression model - Using average of N classifiers""" learnersNames = ["CvANN","CvSVM","RF"] learner = AZorngConsensus.ConsensusLearner(learnersNames = learnersNames) classifier = learner(self.DataSol) predictions = [] for ex in self.DataSol: predictions.append(classifier(ex)) scratchdir = miscUtilities.createScratchDir(desc="ConsensusSaveLoadTest") classifier.write(os.path.join(scratchdir,"./CM.model")) predictionsL = [] Loaded = AZorngConsensus.Consensusread(os.path.join(scratchdir,"./CM.model")) for ex in self.DataSol: predictionsL.append(Loaded(ex)) self.assertEqual(predictions,predictionsL) miscUtilities.removeDir(scratchdir)
def test_CanPersistRegressionModelUsingClassifiers(self): """Test the save/load for a regression model - Using average of N classifiers""" # Arrange learners = [ AZorngRF.RFLearner(), AZorngCvSVM.CvSVMLearner(), AZorngCvANN.CvANNLearner() ] learner = AZorngConsensus.ConsensusLearner(learners=learners) classifier = learner(self.DataReg) # Act predictions = [] for ex in self.DataReg: predictions.append(classifier(ex)) scratchdir = miscUtilities.createScratchDir( desc="ConsensusSaveLoadTest") classifier.write(os.path.join(scratchdir, "./CM.model")) # Assert predictionsL = [] Loaded = AZorngConsensus.Consensusread( os.path.join(scratchdir, "./CM.model")) for ex in self.DataReg: predictionsL.append(Loaded(ex)) self.assertEqual( [round(pred.value, 4) for pred in predictions], [round(pred.value, 4) for pred in predictionsL], "Loaded model predictions differ: Pred. 1 (saved/loaded):" + str(predictions[0]) + " / " + str(predictionsL[0])) self.assertEqual(len(Loaded.domain), len(self.DataReg.domain)) self.assertEqual(len(Loaded.imputeData), len(Loaded.domain)) self.assertEqual(len(Loaded.basicStat), len(Loaded.domain)) self.assertEqual(Loaded.NTrainEx, len(self.DataReg)) miscUtilities.removeDir(scratchdir)
def testDescSetNoGrid(self): """ Test the descSet method in getBestModel without a test set and without grid computing. """ # Fix input arg resultsDir = miscUtilities.createScratchDir(desc="GetBestModelTest") descList = ["AZ_descriptors"]#, "SELMA"] grid = False batchQueue = False optParam = False # Method to test getBestModel.getBestModelDescSet(self.trainPath2, "noTest", resultsDir, descList, grid, optParam, batchQueue) # Assert the existens of a results file resultsFile = resultsDir+"/batchResults.tex" self.assert_(os.path.exists(resultsFile), "No results file created with getBestModelDescSet") resultsFile = resultsDir+"/batchResults.pdf" self.assert_(os.path.exists(resultsFile), "No pdf file created with getBestModelDescSet") miscUtilities.removeDir(resultsDir)
def test_SaveLoadCustomRegressionExpression(self): """ Test save/load custom expression using average N regression with object map """ # Arrange learners = {'firstLearner':AZorngCvSVM.CvSVMLearner(), 'secondLearner':AZorngCvANN.CvANNLearner(), 'thirdLearner':AZorngRF.RFLearner()} # Construct expression learner/classifier regressionExpression = "(firstLearner + secondLearner + thirdLearner) / 3" expressionLearner = AZorngConsensus.ConsensusLearner(learners = learners, expression = regressionExpression) expressionClassifier = expressionLearner(self.DataReg) # Construct default learner/classifier result = [] for ex in self.DataReg: result.append(expressionClassifier(ex)) # Act scratchdir = miscUtilities.createScratchDir(desc="ConsensusSaveLoadTest") expressionClassifier.write(os.path.join(scratchdir,"./CM.model")) resultLoaded = [] loaded = AZorngConsensus.Consensusread(os.path.join(scratchdir,"./CM.model")) self.assertNotEqual(loaded, None) for ex in self.DataReg: resultLoaded.append(loaded(ex)) # Assert for index, item in enumerate(result): if not float_compare(result[index].value, resultLoaded[index].value): print "Not equal on index: ", index self.assertEqual(float_compare(result[index].value, resultLoaded[index].value), True) self.assertEqual(len(loaded.domain),len(self.DataReg.domain)) self.assertEqual(len(loaded.imputeData) , len(loaded.domain)) self.assertEqual(len(loaded.basicStat), len(loaded.domain)) self.assertEqual(loaded.NTrainEx, len(self.DataReg)) miscUtilities.removeDir(scratchdir)
def test_SaveLoadCustomLogicalExpression(self): """ Test save/load functionality with a custom logical expression """ # Arrange # Construct expression learner/classifier learners = {'firstLearner':AZorngCvSVM.CvSVMLearner(), 'secondLearner':AZorngCvANN.CvANNLearner(), 'thirdLearner':AZorngRF.RFLearner()} discreteExpression = ["firstLearner == Iris-setosa -> Iris-setosa", "-> Iris-virginica"] discreteLearner = AZorngConsensus.ConsensusLearner(learners = learners, expression = discreteExpression) discreteClassifier = discreteLearner(self.irisData) result = [] for ex in self.irisData: result.append(discreteClassifier(ex)) # Act scratchdir = miscUtilities.createScratchDir(desc="ConsensusSaveLoadTest") discreteClassifier.write(os.path.join(scratchdir,"./CM.model")) resultLoaded = [] loaded = AZorngConsensus.Consensusread(os.path.join(scratchdir,"./CM.model")) self.assertNotEqual(loaded, None) for ex in self.irisData: resultLoaded.append(loaded(ex)) # Assert for index, item in enumerate(result): if not result[index].value == resultLoaded[index].value: print "Not equal on index: ", index self.assertEqual(result[index].value, resultLoaded[index].value) self.assertEqual(len(loaded.domain),len(self.irisData.domain)) self.assertEqual(len(loaded.imputeData) , len(loaded.domain)) self.assertEqual(len(loaded.basicStat), len(loaded.domain)) self.assertEqual(loaded.NTrainEx, len(self.irisData)) miscUtilities.removeDir(scratchdir)
def test_FeedClassifiersReg(self): """Test the feeding of regression classifiers """ DataSet = dataUtilities.DataTable(os.path.join(AZOC.AZORANGEHOME,"tests/source/data/dummy.tab")) #DataSet = self.DataSol learners = [AZorngCvSVM.CvSVMLearner(), AZorngCvANN.CvANNLearner(), AZorngRF.RFLearner()] classifiers = [l(DataSet) for l in learners] classifier = AZorngConsensus.ConsensusClassifier(classifiers = classifiers) predictions = [] for ex in DataSet: predictions.append(classifier(ex)) scratchdir = miscUtilities.createScratchDir(desc="ConsensusSaveLoadTest") classifier.write(os.path.join(scratchdir,"./CM.model")) predictionsL = [] Loaded = AZorngConsensus.Consensusread(os.path.join(scratchdir,"./CM.model")) for ex in DataSet: predictionsL.append(Loaded(ex)) self.assertEqual(predictions,predictionsL) miscUtilities.removeDir(scratchdir)
def test_FeedLearnersReg(self): """Test the creation of Consensus feeding Learners for regression""" #The Learners can be individualy costumized before passing them to the Consensus learners = [AZorngCvSVM.CvSVMLearner(), AZorngCvANN.CvANNLearner(), AZorngRF.RFLearner()] #Passing now the learnersObj instead learner = AZorngConsensus.ConsensusLearner(learnersObj = learners) classifier = learner(self.DataSol) predictions = [] for ex in self.DataSol: predictions.append(classifier(ex)) scratchdir = miscUtilities.createScratchDir(desc="ConsensusSaveLoadTest") classifier.write(os.path.join(scratchdir,"./CM.model")) predictionsL = [] Loaded = AZorngConsensus.Consensusread(os.path.join(scratchdir,"./CM.model")) for ex in self.DataSol: predictionsL.append(Loaded(ex)) self.assertEqual(predictions,predictionsL) miscUtilities.removeDir(scratchdir)
def getAcc(self, callBack=None, callBackWithFoldModel=None): """ For regression problems, it returns the RMSE and the Q2 For Classification problems, it returns CA and the ConfMat The return is made in a Dict: {"RMSE":0.2,"Q2":0.1,"CA":0.98,"CM":[[TP, FP],[FN,TN]]} For the EvalResults not supported for a specific learner/datase, the respective result will be None if the learner is a dict {"LearnerName":learner, ...} the results will be a dict with results for all Learners and for a consensus made out of those that were stable It some error occurred, the respective values in the Dict will be None """ self.__log("Starting Calculating MLStatistics") statistics = {} if not self.__areInputsOK(): return None # Set the response type self.responseType = self.data.domain.classVar.varType == orange.VarTypes.Discrete and "Classification" or "Regression" self.__log(" " + str(self.responseType)) #Create the Train and test sets if self.usePreDefFolds: DataIdxs = self.preDefIndices else: DataIdxs = self.sampler(self.data, self.nExtFolds) foldsN = [f for f in dict.fromkeys(DataIdxs) if f != 0 ] #Folds used only from 1 on ... 0 are for fixed train Bias nFolds = len(foldsN) #Fix the Indexes based on DataIdxs # (0s) represents the train set ( >= 1s) represents the test set folds if self.useVarCtrlCV: nShifted = [0] * nFolds for idx, isTest in enumerate( self.preDefIndices ): # self.preDefIndices == 0 are to be used in TrainBias if not isTest: if DataIdxs[idx]: nShifted[DataIdxs[idx]] += 1 DataIdxs[idx] = 0 for idx, shift in enumerate(nShifted): self.__log("In fold " + str(idx) + ", " + str(shift) + " examples were shifted to the train set.") #Var for saving each Fols result optAcc = {} results = {} exp_pred = {} nTrainEx = {} nTestEx = {} #Set a dict of learners MLmethods = {} if type(self.learner) == dict: for ml in self.learner: MLmethods[ml] = self.learner[ml] else: MLmethods[self.learner.name] = self.learner models = {} self.__log("Calculating Statistics for MLmethods:") self.__log(" " + str([x for x in MLmethods])) #Check data in advance so that, by chance, it will not faill at the last fold! for foldN in foldsN: trainData = self.data.select(DataIdxs, foldN, negate=1) self.__checkTrainData(trainData) #Optional!! # Order Learners so that PLS is the first sortedML = [ml for ml in MLmethods] if "PLS" in sortedML: sortedML.remove("PLS") sortedML.insert(0, "PLS") stepsDone = 0 nTotalSteps = len(sortedML) * self.nExtFolds for ml in sortedML: startTime = time.time() self.__log(" > " + str(ml) + "...") try: #Var for saving each Fols result results[ml] = [] exp_pred[ml] = [] models[ml] = [] nTrainEx[ml] = [] nTestEx[ml] = [] optAcc[ml] = [] logTxt = "" for foldN in foldsN: if type(self.learner) == dict: self.paramList = None trainData = self.data.select(DataIdxs, foldN, negate=1) testData = self.data.select(DataIdxs, foldN) smilesAttr = dataUtilities.getSMILESAttr(trainData) if smilesAttr: self.__log("Found SMILES attribute:" + smilesAttr) if MLmethods[ml].specialType == 1: trainData = dataUtilities.attributeSelectionData( trainData, [smilesAttr, trainData.domain.classVar.name]) testData = dataUtilities.attributeSelectionData( testData, [smilesAttr, testData.domain.classVar.name]) self.__log( "Selected attrs: " + str([attr.name for attr in trainData.domain])) else: trainData = dataUtilities.attributeDeselectionData( trainData, [smilesAttr]) testData = dataUtilities.attributeDeselectionData( testData, [smilesAttr]) self.__log("Selected attrs: " + str( [attr.name for attr in trainData.domain[0:3]] + ["..."] + [ attr.name for attr in trainData. domain[len(trainData.domain) - 3:] ])) nTrainEx[ml].append(len(trainData)) nTestEx[ml].append(len(testData)) #Test if trainsets inside optimizer will respect dataSize criterias. # if not, don't optimize, but still train the model dontOptimize = False if self.responseType != "Classification" and ( len(trainData) * (1 - 1.0 / self.nInnerFolds) < 20): dontOptimize = True else: tmpDataIdxs = self.sampler(trainData, self.nInnerFolds) tmpTrainData = trainData.select(tmpDataIdxs, 1, negate=1) if not self.__checkTrainData(tmpTrainData, False): dontOptimize = True SpecialModel = None if dontOptimize: logTxt += " Fold " + str( foldN ) + ": Too few compounds to optimize model hyper-parameters\n" self.__log(logTxt) if trainData.domain.classVar.varType == orange.VarTypes.Discrete: res = evalUtilities.crossValidation( [MLmethods[ml]], trainData, folds=5, stratified=orange.MakeRandomIndices. StratifiedIfPossible, random_generator=random.randint(0, 100)) CA = evalUtilities.CA(res)[0] optAcc[ml].append(CA) else: res = evalUtilities.crossValidation( [MLmethods[ml]], trainData, folds=5, stratified=orange.MakeRandomIndices. StratifiedIfPossible, random_generator=random.randint(0, 100)) R2 = evalUtilities.R2(res)[0] optAcc[ml].append(R2) else: if MLmethods[ml].specialType == 1: if trainData.domain.classVar.varType == orange.VarTypes.Discrete: optInfo, SpecialModel = MLmethods[ ml].optimizePars(trainData, folds=5) optAcc[ml].append(optInfo["Acc"]) else: res = evalUtilities.crossValidation( [MLmethods[ml]], trainData, folds=5, stratified=orange.MakeRandomIndices. StratifiedIfPossible, random_generator=random.randint(0, 100)) R2 = evalUtilities.R2(res)[0] optAcc[ml].append(R2) else: runPath = miscUtilities.createScratchDir( baseDir=AZOC.NFS_SCRATCHDIR, desc="AccWOptParam", seed=id(trainData)) trainData.save( os.path.join(runPath, "trainData.tab")) tunedPars = paramOptUtilities.getOptParam( learner=MLmethods[ml], trainDataFile=os.path.join( runPath, "trainData.tab"), paramList=self.paramList, useGrid=False, verbose=self.verbose, queueType=self.queueType, runPath=runPath, nExtFolds=None, nFolds=self.nInnerFolds, logFile=self.logFile, getTunedPars=True, fixedParams=self.fixedParams) if not MLmethods[ml] or not MLmethods[ml].optimized: self.__log( " WARNING: GETACCWOPTPARAM: The learner " + str(ml) + " was not optimized.") self.__log( " It will be ignored") #self.__log(" It will be set to default parameters") self.__log( " DEBUG can be done in: " + runPath) #Set learner back to default #MLmethods[ml] = MLmethods[ml].__class__() raise Exception("The learner " + str(ml) + " was not optimized.") else: if trainData.domain.classVar.varType == orange.VarTypes.Discrete: optAcc[ml].append(tunedPars[0]) else: res = evalUtilities.crossValidation( [MLmethods[ml]], trainData, folds=5, stratified=orange.MakeRandomIndices. StratifiedIfPossible, random_generator=random.randint( 0, 100)) R2 = evalUtilities.R2(res)[0] optAcc[ml].append(R2) miscUtilities.removeDir(runPath) #Train the model if SpecialModel is not None: model = SpecialModel else: model = MLmethods[ml](trainData) models[ml].append(model) #Test the model if self.responseType == "Classification": results[ml].append( (evalUtilities.getClassificationAccuracy( testData, model), evalUtilities.getConfMat(testData, model))) else: local_exp_pred = [] # Predict using bulk-predict predictions = model(testData) # Gather predictions for n, ex in enumerate(testData): local_exp_pred.append( (ex.getclass().value, predictions[n].value)) results[ml].append( (evalUtilities.calcRMSE(local_exp_pred), evalUtilities.calcRsqrt(local_exp_pred))) #Save the experimental value and correspondent predicted value exp_pred[ml] += local_exp_pred if callBack: stepsDone += 1 if not callBack((100 * stepsDone) / nTotalSteps): return None if callBackWithFoldModel: callBackWithFoldModel(model) res = self.createStatObj( results[ml], exp_pred[ml], nTrainEx[ml], nTestEx[ml], self.responseType, self.nExtFolds, logTxt, labels=hasattr(self.data.domain.classVar, "values") and list(self.data.domain.classVar.values) or None) if self.verbose > 0: print "UnbiasedAccuracyGetter!Results " + ml + ":\n" pprint(res) if not res: raise Exception("No results available!") res["runningTime"] = time.time() - startTime statistics[ml] = copy.deepcopy(res) self.__writeResults(statistics) self.__log(" OK") except: self.__log(" Learner " + str(ml) + " failed to create/optimize the model!") error = str(sys.exc_info()[0]) +" "+\ str(sys.exc_info()[1]) +" "+\ str(traceback.extract_tb(sys.exc_info()[2])) self.__log(error) res = self.createStatObj() statistics[ml] = copy.deepcopy(res) self.__writeResults(statistics) if not statistics or len(statistics) < 1: self.__log("ERROR: No statistics to return!") return None elif len(statistics) > 1: #We still need to build a consensus model out of the stable models # ONLY if there are more that one model stable! # When only one or no stable models, build a consensus based on all models # ALWAYS exclude specialType models (MLmethods[ml].specialType > 0) consensusMLs = {} for modelName in statistics: StabilityValue = statistics[modelName]["StabilityValue"] if StabilityValue is not None and statistics[modelName][ "stable"]: consensusMLs[modelName] = copy.deepcopy( statistics[modelName]) self.__log("Found " + str(len(consensusMLs)) + " stable MLmethods out of " + str(len(statistics)) + " MLmethods.") if len(consensusMLs ) <= 1: # we need more models to build a consensus! consensusMLs = {} for modelName in statistics: consensusMLs[modelName] = copy.deepcopy( statistics[modelName]) # Exclude specialType models excludeThis = [] for learnerName in consensusMLs: if models[learnerName][0].specialType > 0: excludeThis.append(learnerName) for learnerName in excludeThis: consensusMLs.pop(learnerName) self.__log(" > Excluded special model " + learnerName) self.__log(" > Stable modules: " + str(consensusMLs.keys())) if len(consensusMLs) >= 2: #Var for saving each Fols result startTime = time.time() Cresults = [] Cexp_pred = [] CnTrainEx = [] CnTestEx = [] self.__log( "Calculating the statistics for a Consensus model based on " + str([ml for ml in consensusMLs])) for foldN in range(self.nExtFolds): if self.responseType == "Classification": CLASS0 = str(self.data.domain.classVar.values[0]) CLASS1 = str(self.data.domain.classVar.values[1]) # exprTest0 exprTest0 = "(0" for ml in consensusMLs: exprTest0 += "+( " + ml + " == " + CLASS0 + " )*" + str( optAcc[ml][foldN]) + " " exprTest0 += ")/IF0(sum([False" for ml in consensusMLs: exprTest0 += ", " + ml + " == " + CLASS0 + " " exprTest0 += "]),1)" # exprTest1 exprTest1 = "(0" for ml in consensusMLs: exprTest1 += "+( " + ml + " == " + CLASS1 + " )*" + str( optAcc[ml][foldN]) + " " exprTest1 += ")/IF0(sum([False" for ml in consensusMLs: exprTest1 += ", " + ml + " == " + CLASS1 + " " exprTest1 += "]),1)" # Expression expression = [ exprTest0 + " >= " + exprTest1 + " -> " + CLASS0, " -> " + CLASS1 ] else: Q2sum = sum([optAcc[ml][foldN] for ml in consensusMLs]) expression = "(1 / " + str(Q2sum) + ") * (0" for ml in consensusMLs: expression += " + " + str( optAcc[ml][foldN]) + " * " + ml + " " expression += ")" testData = self.data.select( DataIdxs, foldN + 1) # fold 0 if for the train Bias!! smilesAttr = dataUtilities.getSMILESAttr(testData) if smilesAttr: self.__log("Found SMILES attribute:" + smilesAttr) testData = dataUtilities.attributeDeselectionData( testData, [smilesAttr]) self.__log("Selected attrs: " + str( [attr.name for attr in trainData.domain[0:3]] + ["..."] + [ attr.name for attr in trainData.domain[len(trainData.domain) - 3:] ])) CnTestEx.append(len(testData)) consensusClassifiers = {} for learnerName in consensusMLs: consensusClassifiers[learnerName] = models[ learnerName][foldN] model = AZorngConsensus.ConsensusClassifier( classifiers=consensusClassifiers, expression=expression) CnTrainEx.append(model.NTrainEx) #Test the model if self.responseType == "Classification": Cresults.append( (evalUtilities.getClassificationAccuracy( testData, model), evalUtilities.getConfMat(testData, model))) else: local_exp_pred = [] # Predict using bulk-predict predictions = model(testData) # Gather predictions for n, ex in enumerate(testData): local_exp_pred.append( (ex.getclass().value, predictions[n].value)) Cresults.append( (evalUtilities.calcRMSE(local_exp_pred), evalUtilities.calcRsqrt(local_exp_pred))) #Save the experimental value and correspondent predicted value Cexp_pred += local_exp_pred res = self.createStatObj( Cresults, Cexp_pred, CnTrainEx, CnTestEx, self.responseType, self.nExtFolds, labels=hasattr(self.data.domain.classVar, "values") and list(self.data.domain.classVar.values) or None) res["runningTime"] = time.time() - startTime statistics["Consensus"] = copy.deepcopy(res) statistics["Consensus"][ "IndividualStatistics"] = copy.deepcopy(consensusMLs) self.__writeResults(statistics) self.__log("Returned multiple ML methods statistics.") return statistics #By default return the only existing statistics! self.__writeResults(statistics) self.__log("Returned only one ML method statistics.") return statistics[statistics.keys()[0]]
def optimizeParameters(self): """ Sets up the input learner with tuned parameters """ self.clearErrors() self.tunedPars = None if hasattr(self.learner, "optimized"): self.learner.optimized = False if not self.learner: self.send("Learner - Tuned", None) self.send("Examples - Optimization Steps", None) self.updateInfo() return # Apply the parameters var with values on configuration table of GUI (user could have changed them!) if not self.updateParametersFromTable(): return if not self.dataset: self.dataset = None self.send("Learner - Tuned", None) self.send("Examples - Optimization Steps", None) self.updateInfo() return # Progess Bar 1 optSteps = 3 progress1 = QProgressDialog( "Gathering data and configuring the optimizer...", "Cancel", 0, optSteps, self, Qt.Dialog) #, "progress", True ) progress1.setWindowModality(Qt.WindowModal) bar1 = QProgressBar(progress1) bar1.show() progress1.setBar(bar1) #progress1.setTotalSteps(optSteps) progress1.setMinimumDuration(0) progress1.forceShow() progress1.setValue(0) time.sleep(0.1) progress1.setValue(0) # Create path for running the optimizer randNr = random.randint(0, 10000) if self.execEnv == 0: scratchdir = miscUtilities.createScratchDir( desc="OWParamOpt_Serial") else: scratchdir = miscUtilities.createScratchDir( desc="OWParamOpt_MPI", baseDir=AZOC.NFS_SCRATCHDIR) # Save the dataset to the optimizer running path OrngFile = os.path.join(scratchdir, "OrngData.tab") orange.saveTabDelimited(OrngFile, self.dataset) # Advance Progress Bar progress1.setValue(1) # Define the evaluation method to use if self.dataset.domain.classVar.varType == orange.VarTypes.Continuous: fMin = self.RMethods[self.RMethod][2] evalM = self.RMethods[self.RMethod][1] else: fMin = self.CMethods[self.CMethod][2] evalM = self.CMethods[self.CMethod][1] try: if os.path.exists( os.path.join(scratchdir, "AZLearnersParamsConfig.py")): os.system( "rm " + str(os.path.join(scratchdir, "AZLearnersParamsConfig.py"))) paramFile = file( os.path.join(scratchdir, "AZLearnersParamsConfig.py"), "w") paramFile.write(self.learnerType + "= " + str(self.parameters) + "\r\n") paramFile.close() progress1.setValue(2) # Run the optimizer which will configure the input learner and aditionaly return [<minimum of objective function found>, <optimized parameters>] # Serial print "ENV:", self.execEnv if self.execEnv == 0: print "Executing the optimizer in serial mode on local machine" optPID = self.optimizer( learner=self.learner, dataSet=OrngFile, evaluateMethod=evalM, findMin=fMin, nFolds=self.nFolds, samplingMethod=self.SMethods[self.SMethod][1], runPath=scratchdir, verbose=self.verbose, externalControl=1, useParameters=self.parameters, useGridSearchFirst=self.UseGridSearch, gridSearchInnerPoints=self.nInnerPoints, np=None, machinefile=None, advancedMPIoptions="", ) # Local mpi elif self.execEnv == 1: print "Executing the optimizer in parallel mode on local machine" optPID = self.optimizer( learner=self.learner, dataSet=OrngFile, evaluateMethod=evalM, findMin=fMin, nFolds=self.nFolds, samplingMethod=self.SMethods[self.SMethod][1], runPath=scratchdir, verbose=self.verbose, externalControl=1, useParameters=self.parameters, useGridSearchFirst=self.UseGridSearch, gridSearchInnerPoints=self.nInnerPoints, machinefile=0) # Sge Molndal elif self.execEnv == 2: print "Executing the optimizer in parallel mode in the batch queue on the sge" print "*****************runPath*****************" optPID = self.optimizer( learner=self.learner, dataSet=OrngFile, evaluateMethod=evalM, findMin=fMin, nFolds=self.nFolds, samplingMethod=self.SMethods[self.SMethod][1], runPath=scratchdir, verbose=self.verbose, externalControl=1, useParameters=self.parameters, useGridSearchFirst=self.UseGridSearch, gridSearchInnerPoints=self.nInnerPoints, np=8, machinefile="qsub") #, sgeEnv = "sge_seml") elif self.execEnv == 3: print "Executing the optimizer in parallel mode in the quick queue on the sge" print "*****************runPath*****************" optPID = self.optimizer( learner=self.learner, dataSet=OrngFile, evaluateMethod=evalM, findMin=fMin, nFolds=self.nFolds, samplingMethod=self.SMethods[self.SMethod][1], runPath=scratchdir, verbose=self.verbose, externalControl=1, useParameters=self.parameters, useGridSearchFirst=self.UseGridSearch, gridSearchInnerPoints=self.nInnerPoints, np=8, machinefile="qsub", queueType="quick.q") #, sgeEnv = "sge_seml") else: print "No SGE Env. selected. Nothing will happen." except: progress1.close() self.updateInfo() self.setErrors( "Some error(s) occurred during the optimization.\nCheck the " + str(scratchdir) + " and the output terminal for more information") self.send("Learner - Tuned", None) self.send("Examples - Optimization Steps", None) return progress1.setValue(3) if type(optPID) != types.IntType: progress1.close() self.updateInfo() self.setErrors("Some error(s) occurred during optimization:\n" + str(optPID)) self.send("Learner - Tuned", None) self.send("Examples - Optimization Steps", None) return progress1.close() # Progess Bar optSteps = (1 + round( (len(self.dataset) * len(self.dataset.domain.attributes) * self.nParameters) / 1000)) * 8 print "Learner optimization started at " + time.asctime() print "Optimization steps = ", int( optSteps), " (estimated to aprox. ", optSteps / 2, " seconds)" progress = QProgressDialog("Learner optimization started at " + time.asctime() + " ,please wait...", "Abort Optimization", 0, optSteps, self, Qt.Dialog) #, "progress", True ) progress.setWindowModality(Qt.WindowModal) bar = QProgressBar(progress) bar.show() progress.setBar(bar) #progress.setTotalSteps(optSteps) progress.setMinimumDuration(0) stepsDone = 0 progress.setValue(stepsDone) progress.forceShow() #Loop waiting for the optimizer to finish while 1: if stepsDone < (progress.maximum() - 1): progress.setValue(stepsDone) stepsDone += 1 time.sleep(0.5) else: bar.setTextVisible(False) progress.setLabelText( "The optimizer is taking longer than expected, please wait some more time..." ) stepsDone = 0 progress.setValue(stepsDone) time.sleep(0.5) if progress.wasCanceled(): if not self.optimizer.stop(): progress.setLabelText( "Could not stop the optimizer! Please wait until it finish..." ) else: self.setErrors( "Learner optimization stopped by user at " + time.asctime(), "WARNING") break if self.optimizer.isFinished(): print "Learner optimization finished at " + time.asctime() break progress.setValue(progress.maximum() - 1) time.sleep(0.5) progress.setValue(progress.maximum()) self.tunedPars = self.optimizer.tunedParameters if self.verbose > 0: if self.optimizer.usedMPI: print "appspack version used in fact: MPI" else: print "appspack version used in fact: SERIAL" if type(self.tunedPars ) != types.ListType or self.learner.optimized == False: self.send("Learner - Tuned", None) self.send("Examples - Optimization Steps", None) else: self.send("Learner - Tuned", self.learner) self.intRes = dataUtilities.DataTable(scratchdir + "/optimizationLog.txt") self.send("Examples - Optimization Steps", self.intRes) self.updateInfo() if self.verbose == 0: miscUtilities.removeDir(scratchdir) else: self.setErrors( "The directory " + str(scratchdir) + " was not deleted because verbose flag is ON", "DEBUG")
def disable_testSVM_MPI(self): """ Tests changing the default range of the optimizer. Use MPI versio0n of appspack """ # Classification accuracy: ExpectedCA = [0.6] #New at orange2.0 optimizer = paramOptUtilities.Appspack() learner = AZorngCvSVM.CvSVMLearner() learnerName = "CvSVMLearner" # Create an interface for setting optimizer parameters pars = AZLearnersParamsConfig.API(learnerName) # Set all parameters to not be optimized pars.setOptimizeAllParameters(False) parameterList = ["C", "gamma"] # Set the parameters in parameterList to be optimized for parameter in parameterList: pars.setParameter(parameter, "optimize", True) # Change the range pars.setParameter("C", "range", miscUtilities.power2Range(-5, 2, 1)) trainFile = self.discTrainDataPath # Create a directory for running the appspack (if not defined it will use the present working directory) runPath = miscUtilities.createScratchDir(desc="ParamOptTest_SVM_MPI") evalM = "AZutilities.evalUtilities.CA" fMin = False # Calculate the optimal parameters. This can take a long period of time! tunedPars = optimizer(learner=learner,\ dataSet=trainFile,\ evaluateMethod = evalM,\ useParameters = pars.getParametersDict(),\ findMin=fMin,\ runPath = runPath,\ verbose = 0,\ useStd = False,\ #advancedMPIoptions = "-all-local -allcpus") # to use this the # file "<MPICHDIR>/share/machines.LINUX must be properly configured" np = 4,\ machinefile = os.path.realpath(os.path.join(os.environ["AZORANGEHOME"], "tests/source/APPS_machines"))) verbTunedPars = optimizer.getTunedParameters() print "Returned: ", tunedPars print "====================== optimization Done ===========================" print "Learner optimized flag = ", learner.optimized print "Tuned parameters = ", tunedPars[1] print "Best optimization result = ", tunedPars[0] print "check the file intRes.txt to see the intermediate results of optimizer!" # Check that the learner was optimized self.assertEqual(learner.optimized, True) # Check if the MPI version was used self.assertEqual(optimizer.usedMPI, True) # Check the number of optimized parameters self.assertEqual(len(verbTunedPars["optParam"]), 12) # Check the accuracy self.assert_( round(verbTunedPars["bestRes"], 3) in [round(x, 3) for x in ExpectedCA], "Got:" + str(verbTunedPars["bestRes"])) self.assert_( len( dataUtilities.DataTable( os.path.join(runPath, "optimizationLog.txt"))) >= 12) # (orig: 14) Must be > 2 miscUtilities.removeDir(runPath)
def getProbabilitiesAsAttribute(self, algorithm=None, minsup=None, atts=None): """ For regression problems, it returns the RMSE and the Q2 For Classification problems, it returns CA and the ConfMat The return is made in a Dict: {"RMSE":0.2,"Q2":0.1,"CA":0.98,"CM":[[TP, FP],[FN,TN]]} For the EvalResults not supported for a specific learner/datase, the respective result will be None if the learner is a dict {"LearnerName":learner, ...} the results will be a dict with results for all Learners and for a consensus made out of those that were stable It some error occurred, the respective values in the Dict will be None parameters: algo - key for the structural feature generation algorithm (set dependent structural features that have to be calculated inside the crossvalidation) minsup - minimum support for the algorithm atts - attributes to be removed before learning (e.g. meta etc...) """ self.__log("Starting Calculating MLStatistics") statistics = {} if not self.__areInputsOK(): return None if algorithm: self.__log(" Additional features to be calculated inside of cross-validation") self.__log(" Algorithm for structural features: " + str(algorithm)) self.__log(" Minimum support parameter: " + str(minsup)) # Set the response type self.responseType = ( self.data.domain.classVar.varType == orange.VarTypes.Discrete and "Classification" or "Regression" ) self.__log(" " + str(self.responseType)) # Create the Train and test sets DataIdxs = dataUtilities.SeedDataSampler(self.data, self.nExtFolds) # Var for saving each Fols result optAcc = {} results = {} exp_pred = {} nTrainEx = {} nTestEx = {} # Set a dict of learners MLmethods = {} if type(self.learner) == dict: for ml in self.learner: MLmethods[ml] = self.learner[ml] else: MLmethods[self.learner.name] = self.learner models = {} rocs = {} self.__log("Calculating Statistics for MLmethods:") self.__log(" " + str([x for x in MLmethods])) # Check data in advance so that, by chance, it will not faill at the last fold! for foldN in range(self.nExtFolds): trainData = self.data.select(DataIdxs[foldN], negate=1) self.__checkTrainData(trainData) # Optional!! # Order Learners so that PLS is the first sortedML = [ml for ml in MLmethods] if "PLS" in sortedML: sortedML.remove("PLS") sortedML.insert(0, "PLS") for ml in sortedML: self.__log(" > " + str(ml) + "...") try: # Var for saving each Fols result results[ml] = [] exp_pred[ml] = [] models[ml] = [] rocs[ml] = [] nTrainEx[ml] = [] nTestEx[ml] = [] optAcc[ml] = [] ### mods TG prediction_attribute = orange.FloatVariable("class_prob") domain = [data.domain.attributes, prediction_attribute, data.domain.classvar] data_new = orange.ExampleTable(domain) logTxt = "" for foldN in range(self.nExtFolds): if type(self.learner) == dict: self.paramList = None trainData = self.data.select(DataIdxs[foldN], negate=1) orig_len = len(trainData.domain.attributes) # add structural descriptors to the training data (TG) if algorithm: trainData_structDesc = getStructuralDesc.getStructuralDescResult(trainData, algorithm, minsup) trainData = dataUtilities.attributeDeselectionData(trainData_structDesc, atts) testData = self.data.select(DataIdxs[foldN]) # print "IDX: ", # print DataIdxs[foldN] # calculate the feature values for the test data (TG) if algorithm: cut_off = orig_len - len(atts) smarts = trainData.domain.attributes[cut_off:] self.__log(" Number of structural features added: " + str(len(smarts))) testData_structDesc = getStructuralDesc.getSMARTSrecalcDesc(testData, smarts) testData = dataUtilities.attributeDeselectionData(testData_structDesc, atts) nTrainEx[ml].append(len(trainData)) nTestEx[ml].append(len(testData)) # Test if trainsets inside optimizer will respect dataSize criterias. # if not, don't optimize, but still train the model dontOptimize = False if self.responseType != "Classification" and (len(trainData) * (1 - 1.0 / self.nInnerFolds) < 20): dontOptimize = True else: tmpDataIdxs = dataUtilities.SeedDataSampler(trainData, self.nInnerFolds) tmpTrainData = trainData.select(tmpDataIdxs[0], negate=1) if not self.__checkTrainData(tmpTrainData, False): dontOptimize = True if dontOptimize: logTxt += ( " Fold " + str(foldN) + ": Too few compounds to optimize model hyper-parameters\n" ) self.__log(logTxt) if trainData.domain.classVar.varType == orange.VarTypes.Discrete: res = orngTest.crossValidation( [MLmethods[ml]], trainData, folds=5, strat=orange.MakeRandomIndices.StratifiedIfPossible, randomGenerator=random.randint(0, 100), ) CA = evalUtilities.CA(res)[0] optAcc[ml].append(CA) else: res = orngTest.crossValidation( [MLmethods[ml]], trainData, folds=5, strat=orange.MakeRandomIndices.StratifiedIfPossible, randomGenerator=random.randint(0, 100), ) R2 = evalUtilities.R2(res)[0] optAcc[ml].append(R2) else: runPath = miscUtilities.createScratchDir( baseDir=AZOC.NFS_SCRATCHDIR, desc="AccWOptParam", seed=id(trainData) ) trainData.save(os.path.join(runPath, "trainData.tab")) tunedPars = paramOptUtilities.getOptParam( learner=MLmethods[ml], trainDataFile=os.path.join(runPath, "trainData.tab"), paramList=self.paramList, useGrid=False, verbose=self.verbose, queueType=self.queueType, runPath=runPath, nExtFolds=None, nFolds=self.nInnerFolds, logFile=self.logFile, getTunedPars=True, ) if not MLmethods[ml] or not MLmethods[ml].optimized: self.__log( " WARNING: GETACCWOPTPARAM: The learner " + str(ml) + " was not optimized." ) self.__log(" It will be ignored") # self.__log(" It will be set to default parameters") self.__log(" DEBUG can be done in: " + runPath) # Set learner back to default # MLmethods[ml] = MLmethods[ml].__class__() raise Exception("The learner " + str(ml) + " was not optimized.") else: if trainData.domain.classVar.varType == orange.VarTypes.Discrete: optAcc[ml].append(tunedPars[0]) else: res = orngTest.crossValidation( [MLmethods[ml]], trainData, folds=5, strat=orange.MakeRandomIndices.StratifiedIfPossible, randomGenerator=random.randint(0, 100), ) R2 = evalUtilities.R2(res)[0] optAcc[ml].append(R2) miscUtilities.removeDir(runPath) # Train the model model = MLmethods[ml](trainData) models[ml].append(model) # Test the model if self.responseType == "Classification": results[ml].append( ( evalUtilities.getClassificationAccuracy(testData, model), evalUtilities.getConfMat(testData, model), ) ) roc = self.aroc(testData, [model]) rocs[ml].append(roc) # save the prediction probabilities else: local_exp_pred = [] for ex in testData: local_exp_pred.append((ex.getclass(), model(ex))) results[ml].append( (evalUtilities.calcRMSE(local_exp_pred), evalUtilities.calcRsqrt(local_exp_pred)) ) # Save the experimental value and correspondent predicted value exp_pred[ml] += local_exp_pred res = self.createStatObj( results[ml], exp_pred[ml], nTrainEx[ml], nTestEx[ml], self.responseType, self.nExtFolds, logTxt, rocs[ml], ) if self.verbose > 0: print "UnbiasedAccuracyGetter!Results " + ml + ":\n" pprint(res) if not res: raise Exception("No results available!") statistics[ml] = copy.deepcopy(res) self.__writeResults(statistics) self.__log(" OK") except: self.__log(" Learner " + str(ml) + " failed to create/optimize the model!") res = self.createStatObj() statistics[ml] = copy.deepcopy(res) self.__writeResults(statistics) if not statistics or len(statistics) < 1: self.__log("ERROR: No statistics to return!") return None elif len(statistics) > 1: # We still need to build a consensus model out of the stable models # ONLY if there are more that one model stable! # When only one or no stable models, build a consensus based on all models consensusMLs = {} for modelName in statistics: StabilityValue = statistics[modelName]["StabilityValue"] if StabilityValue is not None and statistics[modelName]["stable"]: consensusMLs[modelName] = copy.deepcopy(statistics[modelName]) self.__log( "Found " + str(len(consensusMLs)) + " stable MLmethods out of " + str(len(statistics)) + " MLmethods." ) if len(consensusMLs) <= 1: # we need more models to build a consensus! consensusMLs = {} for modelName in statistics: consensusMLs[modelName] = copy.deepcopy(statistics[modelName]) if len(consensusMLs) >= 2: # Var for saving each Fols result Cresults = [] Cexp_pred = [] CnTrainEx = [] CnTestEx = [] self.__log( "Calculating the statistics for a Consensus model based on " + str([ml for ml in consensusMLs]) ) for foldN in range(self.nExtFolds): if self.responseType == "Classification": CLASS0 = str(self.data.domain.classVar.values[0]) CLASS1 = str(self.data.domain.classVar.values[1]) exprTest0 = "(0" for ml in consensusMLs: exprTest0 += "+( " + ml + " == " + CLASS0 + " )*" + str(optAcc[ml][foldN]) + " " exprTest0 += ")/IF0(sum([False" for ml in consensusMLs: exprTest0 += ", " + ml + " == " + CLASS0 + " " exprTest0 += "]),1)" exprTest1 = exprTest0.replace(CLASS0, CLASS1) expression = [exprTest0 + " >= " + exprTest1 + " -> " + CLASS0, " -> " + CLASS1] else: Q2sum = sum([optAcc[ml][foldN] for ml in consensusMLs]) expression = "(1 / " + str(Q2sum) + ") * (0" for ml in consensusMLs: expression += " + " + str(optAcc[ml][foldN]) + " * " + ml + " " expression += ")" testData = self.data.select(DataIdxs[foldN]) CnTestEx.append(len(testData)) consensusClassifiers = {} for learnerName in consensusMLs: consensusClassifiers[learnerName] = models[learnerName][foldN] model = AZorngConsensus.ConsensusClassifier(classifiers=consensusClassifiers, expression=expression) CnTrainEx.append(model.NTrainEx) # Test the model if self.responseType == "Classification": Cresults.append( ( evalUtilities.getClassificationAccuracy(testData, model), evalUtilities.getConfMat(testData, model), ) ) else: local_exp_pred = [] for ex in testData: local_exp_pred.append((ex.getclass(), model(ex))) Cresults.append( (evalUtilities.calcRMSE(local_exp_pred), evalUtilities.calcRsqrt(local_exp_pred)) ) # Save the experimental value and correspondent predicted value Cexp_pred += local_exp_pred res = self.createStatObj(Cresults, Cexp_pred, CnTrainEx, CnTestEx, self.responseType, self.nExtFolds) statistics["Consensus"] = copy.deepcopy(res) statistics["Consensus"]["IndividualStatistics"] = copy.deepcopy(consensusMLs) self.__writeResults(statistics) self.__log("Returned multiple ML methods statistics.") return statistics # By default return the only existing statistics! self.__writeResults(statistics) self.__log("Returned only one ML method statistics.") return statistics[statistics.keys()[0]]
def test_RFAdvanced_Usage(self): """PLS - Test of optimizer with advanced configuration """ #Create the appspack instance opt = paramOptUtilities.Appspack() #Learner to be optimized learner = AZorngRF.RFLearner() #dataset to use in the parameters optimization (Discrete class in this example) dataSet = self.discTrainDataPath # Define the objective function. This requires: # defining the extreme to find (the min or max): findMin=True or findMin=False fMin = False # defining the method for evaluation (must be a method that accepts as input an orngTest.ExperimentResults): # evaluateMethod="AZutilities.evalUtilities.CA" evalM = "AZutilities.evalUtilities.CA" # Create a directory for running the appspack (if not defined it will use the present working directory) runPath = miscUtilities.createScratchDir(desc="ParamOptTest") # Load the optimization parameters from the default configuration (AZLearnersParamsConfig.py) parameters = AZLearnersParamsConfig.API("RFLearner") parameters.setParameter("method", "default", 'rf1') # change the optimization parameters parameters.setParameter( "method", "default", 'rf1') # make the method fixed (do not optimize) to be pls1 parameters.setParameter("method", "optimize", False) parameters.setParameter( "method", "rangeType", "values") # assure that the keyword for the values range type is #set correctly for values instead of interval parameters.setParameter( "k", "range", [1, 3, 5, 6, 10 ]) # make the method fixed (do not optimize) to be pls1 parameters.setParameter("k", "optimize", True) parameters.setParameter( "k", "rangeType", "values") # assure that the keyword for the values range type is #set correctly for values instead of interval # Run the appspack which will configure the input learner and aditionaly return #[<minimum of objective function found>, <optimized parameters>] tunedPars = opt(learner=learner,\ dataSet=dataSet,\ evaluateMethod = evalM,\ findMin=fMin,\ runPath = runPath,\ useStd = False,\ useParameters = parameters.getParametersDict(),\ # The 'useParameters' is mandatory, even placing a file with the new configurations in the # running directory, that we pass to the optimizer the correct parameters to use. # The parameters placed on the running directory are for appspack usage, and the # optimizer needs to know what parameters appspack will use, otherwise, it will # load the default ones verbose = 0) print "Returned: ", tunedPars print "====================== optimization Done ===========================" print "Learner optimized flag = ", learner.optimized print "Tuned parameters = ", tunedPars[1] print "Best optimization result = ", tunedPars[0] print "check the file intRes.txt to see the intermediate results of optimizer!" self.assertEqual(opt.usedMPI, False) self.assertEqual(learner.optimized, True) self.assertEqual(round(tunedPars[0], 2), round(0.61, 2)) #Ver 0.3 #The learner is now with its optimized parameters already set, so we can now make a classifier out of it classifier = learner(self.discTrain) CA = evalUtilities.getClassificationAccuracy(self.discTest, classifier) self.assertEqual(round(CA, 2), round(0.97, 2)) #Ver 0.3 self.assert_( len( dataUtilities.DataTable( os.path.join(runPath, "optimizationLog.txt"))) >= 5) # Must be > 2 miscUtilities.removeDir(runPath)
def testRF_MPI(self): """ Tests changing the default range of the optimizer. Use MPI versio0n of appspack """ # Classification accuracy: ExpectedCA = [0.903] #opencv1.1: 0.90480000000000005 optimizer = paramOptUtilities.Appspack() learner = AZorngRF.RFLearner() learnerName = "RFLearner" # Create an interface for setting optimizer parameters pars = AZLearnersParamsConfig.API(learnerName) # Set all parameters to not be optimized pars.setOptimizeAllParameters(False) parameterList = ["nActVars"] # Set the parameters in parameterList to be optimized for parameter in parameterList: pars.setParameter(parameter,"optimize",True) # Set the NumThreads pars.setParameter("NumThreads","optimize",False) # Change the default pars.setParameter("NumThreads","default","1") trainFile=self.discTrainDataPath # Create a directory for running the appspack (if not defined it will use the present working directory) runPath = miscUtilities.createScratchDir(desc="ParamOptTest_RF_MPI") evalM = "AZutilities.evalUtilities.CA" fMin = False # Calculate the optimal parameters. This can take a long period of time! tunedPars = optimizer(learner=learner,\ dataSet=trainFile,\ evaluateMethod = evalM,\ useParameters = pars.getParametersDict(),\ useDefaultPoint = False,\ findMin=fMin,\ runPath = runPath,\ useStd = False,\ verbose = 0,\ #advancedMPIoptions = "-all-local -allcpus") # to use this the # file "<MPICHDIR>/share/machines.LINUX must be properly configured" # Alternatively, we can set machinefile=0 to us also all available cores machinefile =0) print "Returned: ", tunedPars print "====================== optimization Done ===========================" print "Learner optimized flag = ", learner.optimized print "Tuned parameters = ", tunedPars[1] print "Best optimization result = ", tunedPars[0] print "check the file intRes.txt to see the intermediate results of optimizer!" print "Number of cores used: ",optimizer.np verbTunedPars = optimizer.getTunedParameters() # Check that the learner was optimized self.assertEqual(learner.optimized,True) #Check if the number of processors used are all the core available status,out = commands.getstatusoutput("cat /proc/cpuinfo | grep processor") self.assertEqual(optimizer.np, len(out.split("\n"))) # Check if the MPI version was used self.assertEqual(optimizer.usedMPI, True) # Check the number of optimized parameters self.assert_(len(verbTunedPars["optParam"]) in [8,9,10]) # Check the accuracy self.assert_(round(verbTunedPars["bestRes"],3) in [round(x,3) for x in ExpectedCA],"Got:" + str(verbTunedPars["bestRes"])) self.assert_(len(dataUtilities.DataTable(os.path.join(runPath,"optimizationLog.txt")))>=3) # Must be > 2 miscUtilities.removeDir(runPath)
def test_RFClassification(self): """RF - Test of optimizer with discrete class data """ #Create the appspack instance opt = paramOptUtilities.Appspack() #Learner to be optimized learner = AZorngRF.RFLearner() #dataset to use in the parameters optimization (Discrete class in this example) dataSet = self.discTrainDataPath # Define the objective function. This requires: # defining the extreme to find (the min or max): findMin=True or findMin=False fMin = False # defining the method for evaluation (must be a method that accepts as input an orngTest.ExperimentResults): # evaluateMethod="AZutilities.evalUtilities.CA" evalM = "AZutilities.evalUtilities.CA" # Create a directory for running the appspack (if not defined it will use the present working directory) runPath = miscUtilities.createScratchDir(desc="RFTest") # Create an interface for setting optimizer parameters pars = AZLearnersParamsConfig.API("RFLearner") # Set the parameters in parameterList to be optimized pars.setParameter("NumThreads", "optimize", False) # Change the default pars.setParameter("NumThreads", "default", "1") # Run the appspack which will configure the input learner and aditionaly return #[<minimum of objective function found>, <optimized parameters>] tunedPars = opt(learner=learner,\ dataSet=dataSet,\ evaluateMethod = evalM,\ findMin=fMin,\ runPath = runPath,\ useDefaultPoint = False,\ useStd = False,\ useParameters = pars.getParametersDict(),\ verbose = 0) print "Returned: ", tunedPars print "====================== optimization Done ===========================" print "Learner optimized flag = ", learner.optimized print "Tuned parameters = ", tunedPars[1] print "Best optimization result = ", tunedPars[0] print "check the file intRes.txt to see the intermediate results of optimizer!" print "Number of optimization steps: ", len( dataUtilities.DataTable( os.path.join(runPath, "optimizationLog.txt"))) print "Number of Threads used: ", learner.NumThreads #The learner is now with its optimized parameters already set, so we can now make a classifier out of it learner.NumThreads = 1 classifier = learner(self.discTrain) CA = evalUtilities.getClassificationAccuracy(self.discTest, classifier) print "CA of optimized Learner: ", CA self.assertEqual(opt.usedMPI, False) self.log.info("") self.log.info("tunedPars[0]=" + str(tunedPars[0])) self.assertEqual(learner.optimized, True) self.assertEqual(round(tunedPars[0], 2), round(0.61, 2)) # Ver 0.3:390 self.log.info("CA=" + str(CA)) self.assertEqual(round(CA, 2), round(0.965517241379, 2)) #Ver 0.3 #Check if the best result was not the one with numThreads different of 1 since that way we can get #different results among runs self.assertEqual(int(tunedPars[1]["NumThreads"]), 1) miscUtilities.removeDir(runPath)
def buildModel(trainData, MLMethod, queueType="NoSGE", verbose=0, logFile=None): """ Buld the method passed in MLMethod and optimize ( "IndividualStatistics" not in MLMethod) if MLMethod is a Consensus ("individualStatistics" in MLMethod) , build each and optimize first all models and after build the consensus! """ log(logFile, "Building and optimizing learner: " + MLMethod["MLMethod"] + "...") learners = {} MLMethods = {} if "IndividualStatistics" in MLMethod: #It is a consensus and will certaily not contain any #special model as it was filtered in the getUnbiasedAcc for ML in MLMethod["IndividualStatistics"]: MLMethods[ML] = copy.deepcopy(MLMethod["IndividualStatistics"][ML]) else: ML = MLMethod["MLMethod"] if MLMETHODS[ML]( name=ML ).specialType == 1: # If is a special model and has a built-in optimizaer log(logFile, " This is a special model") smilesAttr = dataUtilities.getSMILESAttr(trainData) if smilesAttr: log(logFile, "Found SMILES attribute:" + smilesAttr) trainData = dataUtilities.attributeSelectionData( trainData, [smilesAttr, trainData.domain.classVar.name]) optInfo, SpecialModel = MLMETHODS[ML](name=ML).optimizePars( trainData, folds=5) return SpecialModel else: MLMethods[MLMethod["MLMethod"]] = MLMethod smilesAttr = dataUtilities.getSMILESAttr(trainData) if smilesAttr: trainData = dataUtilities.attributeDeselectionData( trainData, [smilesAttr]) # optimize all MLMethods for ML in MLMethods: log(logFile, " Optimizing MLmethod: " + ML) learners[ML] = MLMETHODS[ML](name=ML) runPath = miscUtilities.createScratchDir( baseDir=AZOC.NFS_SCRATCHDIR, desc="competitiveWorkflow_BuildModel") trainData.save(os.path.join(runPath, "trainData.tab")) tunedPars = paramOptUtilities.getOptParam(learner=learners[ML], trainDataFile=os.path.join( runPath, "trainData.tab"), useGrid=False, verbose=verbose, queueType=queueType, runPath=runPath, nExtFolds=None, logFile=logFile, getTunedPars=True) if not learners[ML].optimized: print "WARNING: competitiveWorkflow: The learner " + str( learners[ML]) + " was not optimized." #print " Using default parameters" print " The " + str(learners[ML]) + " will not be included" #print " Returning None" print " DEBUG can be made in: " + runPath #Setting default parameters #learners[ML] = learners[ML].__class__() #return None learners.pop(ML) continue else: print "Optimized learner ", learners[ML] if trainData.domain.classVar.varType == orange.VarTypes.Discrete: MLMethods[ML]["optAcc"] = tunedPars[0] else: res = orngTest.crossValidation( [learners[ML]], trainData, folds=5, strat=orange.MakeRandomIndices.StratifiedIfPossible, randomGenerator=random.randint(0, 100)) R2 = evalUtilities.R2(res)[0] MLMethods[ML]["optAcc"] = R2 miscUtilities.removeDir(runPath) #Train the model if len(learners) == 1: log(logFile, " Building the model:" + learners.keys()[0]) model = learners[learners.keys()[0]](trainData) elif len(learners) >= 1: model = buildConsensus(trainData, learners, MLMethods) else: print "ERROR: No Learners were selected!" return None return model
def testCvANN(self): """ Tests changing the default range of the optimizer. """ # Classification accuracy: ExpectedCA = [0.585] #Ver 0.3 optimizer = paramOptUtilities.Appspack() learner = AZorngCvANN.CvANNLearner() learnerName = "CvANNLearner" # Create an interface for setting optimizer parameters pars = AZLearnersParamsConfig.API(learnerName) # Set all parameters to not be optimized pars.setOptimizeAllParameters(False) parameterList = ["maxIter", "nHidden"] # Set the parameters in parameterList to be optimized for parameter in parameterList: pars.setParameter(parameter, "optimize", True) trainFile = self.discTrainDataPath # Create a directory for running the appspack (if not defined it will use the present working directory) runPath = miscUtilities.createScratchDir(desc="ParamOptTest_CvANN") evalM = "AZutilities.evalUtilities.CA" fMin = False # Calculate the optimal parameters. This can take a long period of time! tunedPars = optimizer(learner=learner,\ dataSet=trainFile,\ evaluateMethod = evalM,\ useParameters = pars.getParametersDict(),\ findMin=fMin,\ useStd = False,\ runPath = runPath,\ verbose = 0) verbTunedPars = optimizer.getTunedParameters() print "Returned: ", tunedPars print "====================== optimization Done ===========================" print "Learner optimized flag = ", learner.optimized print "Tuned parameters = ", tunedPars[1] print "Best optimization result = ", tunedPars[0] print "Best result index from intRes file:", verbTunedPars["ResIdx"] print "Optimizer runPath:", runPath print "check the file intRes.txt to see the intermediate results of optimizer!" # Check that the learner was optimized self.assertEqual(learner.optimized, True) # Check if the MPI version was not used self.assertEqual(optimizer.usedMPI, False) # Check the number of optimized parameters self.assertEqual(len(verbTunedPars["optParam"]), 14) # Check the accuracy nOptPoints = len( dataUtilities.DataTable( os.path.join(runPath, "optimizationLog.txt"))) self.assert_(nOptPoints > 5, "N. of optimization points:" + str(nOptPoints)) # Must be > 2 self.assert_( round(verbTunedPars["bestRes"], 3) in [round(x, 3) for x in ExpectedCA], "Actual result:" + str(verbTunedPars["bestRes"])) miscUtilities.removeDir(runPath)
def test_GridSearch(self): """ Test GridSearch Module """ #Create the appspack instance opt = paramOptUtilities.Appspack() #Learner to be optimized learner = AZorngRF.RFLearner() #dataset to use in the parameters optimization (Discrete class in this example) dataSet = self.discTestDataPath # Define the objective function. This requires: # defining the extreme to find (the min or max): findMin=True or findMin=False fMin = False # defining the method for evaluation (must be a method that accepts as input an orngTest.ExperimentResults): # evaluateMethod="AZutilities.evalUtilities.CA" evalM = "AZutilities.evalUtilities.CA" # Create a directory for running the appspack (if not defined it will use the present working directory) runPath = miscUtilities.createScratchDir(desc="RFTest") # Create an interface for setting optimizer parameters pars = AZLearnersParamsConfig.API("RFLearner") # Set the parameters in parameterList to be optimized pars.setParameter("NumThreads", "optimize", False) # Change the default pars.setParameter("NumThreads", "default", "1") # Run the appspack which will configure the input learner and aditionaly return #[<minimum of objective function found>, <optimized parameters>] tunedPars = opt(learner=learner,\ dataSet=dataSet,\ evaluateMethod = evalM,\ findMin=fMin,\ runPath = runPath,\ useGridSearchFirst = True,\ gridSearchInnerPoints = 3,\ useDefaultPoint = False,\ useStd = False,\ useParameters = pars.getParametersDict(),\ verbose = 0) print "Returned: ", tunedPars print "====================== optimization Done ===========================" print "Learner optimized flag = ", learner.optimized print "Tuned parameters = ", tunedPars[1] print "Best optimization result = ", tunedPars[0] print "check the file intRes.txt to see the intermediate results of optimizer!" print "CheckSum:", round(sum(opt.GSRes["results"]), 2) print "Number of results: ", len( dataUtilities.DataTable( os.path.join(runPath, "optimizationLog.txt"))) print "Running Path:", runPath # Check that the learner was optimized self.assertEqual(learner.optimized, True) self.log.info("") self.log.info("tunedPars[0]=" + str(tunedPars[0])) # Check the accuracy self.assertEqual(round(tunedPars[0], 2), round(0.621, 2)) # Ver 0.3 #Check if the number of results remain equal self.assert_( len( dataUtilities.DataTable( os.path.join(runPath, "optimizationLog.txt"))) >= 5) #Check that all points were evaluated self.assert_(opt.GSRes["nFailedPoints"] == 0) self.assert_(opt.GSRes["nPoints"] == 3) #CheckSum to assure results are the same expectedValues = [ -1.78, # Ver 0.3 -1.79 ] acctualValue = sum(opt.GSRes["results"]) self.assertRoundedToExpectedArray(acctualValue, expectedValues, 2) #Check if the best result was not the one with numThreads different of 1 since that way we can get #different results among runs self.assertEqual(int(tunedPars[1]["NumThreads"]), 1) miscUtilities.removeDir(runPath)
def testCvSVM(self): """ Tests changing the default range of the optimizer. """ # Classification accuracy: ExpectedCA = [0.6] # Ver 0.3 optimizer = paramOptUtilities.Appspack() learner = AZorngCvSVM.CvSVMLearner() learnerName = "CvSVMLearner" # Create an interface for setting optimizer parameters pars = AZLearnersParamsConfig.API(learnerName) # Set all parameters to not be optimized pars.setOptimizeAllParameters(False) parameterList = ["C", "gamma"] # Set the parameters in parameterList to be optimized for parameter in parameterList: pars.setParameter(parameter, "optimize", True) # Change the range pars.setParameter("C", "range", miscUtilities.power2Range(-5, 2, 1)) pars.setParameter("priors", "default", {"POS": 2, "NEG": 4}) trainFile = self.discTrainDataPath # Create a directory for running the appspack (if not defined it will use the present working directory) runPath = miscUtilities.createScratchDir(desc="ParamOptTest_CvSVM") evalM = "AZutilities.evalUtilities.CA" fMin = False # Calculate the optimal parameters. This can take a long period of time! tunedPars = optimizer(learner=learner,\ dataSet=trainFile,\ evaluateMethod = evalM,\ useParameters = pars.getParametersDict(),\ findMin=fMin,\ useStd = False,\ runPath = runPath,\ verbose = 0) verbTunedPars = optimizer.getTunedParameters() print "Returned: ", tunedPars print "====================== optimization Done ===========================" print "Learner optimized flag = ", learner.optimized print "Tuned parameters = ", tunedPars[1] print "Best optimization result = ", tunedPars[0] print "Best result index from intRes file:", verbTunedPars["ResIdx"] print "Optimizer runPath:", runPath print "check the file intRes.txt to see the intermediate results of optimizer!" # Check that the learner was optimized self.assertEqual(learner.optimized, True) # Check if the MPI version was not used self.assertEqual(optimizer.usedMPI, False) # Check the number of optimized parameters self.assertEqual(len(verbTunedPars["optParam"]), 12) # Check the accuracy self.assert_( round(verbTunedPars["bestRes"], 2) in [round(x, 2) for x in ExpectedCA], "Got:" + str(verbTunedPars["bestRes"])) self.assert_( len( dataUtilities.DataTable( os.path.join(runPath, "optimizationLog.txt"))) >= 12) # (orig: 14) Must be > 2 #Check Priors self.assertEqual( dataUtilities.DataTable( os.path.join(runPath, "optimizationLog.txt"))[1]["priors"].value, "{'NEG':4,'POS':2}") self.assertEqual(tunedPars[1]["priors"], "{'NEG':4,'POS':2}") # Ver 0.3 #Set the priors since it could be choosing the first row as the best, which would be the default values, without the priors learner.priors = {"POS": 2, "NEG": 4} classifier = learner(self.discTest) classifier.write(os.path.join(runPath, "CvSVMModel")) file = open(os.path.join(runPath, "CvSVMModel/model.svm"), "r") lines = file.readlines() file.close() priors = [ round(x, 2) for x in eval((lines[18].strip()).replace("data:", "")) ] self.assertEqual(len(priors), 2) self.assertEqual( priors[self.discTest.domain.classVar.values.index("POS")], 2.0 * float(tunedPars[1]["C"])) self.assertEqual( priors[self.discTest.domain.classVar.values.index("NEG")], 4.0 * float(tunedPars[1]["C"])) miscUtilities.removeDir(runPath)
def test_PLS_MPI_2(self): ################################################################### # Test other way of setting appspack ###################################################################i # Classification accuracy: ExpectedCA = [0.851851851852, 0.865] ExpectedCAwithTest = [0.865238095238, 0.884285714286, 0.85619047619, 0.837] #New at orange2.0 #Create the appspack instance opt=paramOptUtilities.Appspack() #Learner to be optimized learner=AZorngPLS.PLSLearner() #dataset to use in the parameters optimization (Discrete class in this example) dataSet=self.discTrainDataPath # Define the objective function. This requires: # defining the extreme to find (the min or max): findMin=True or findMin=False fMin=False # defining the method for evaluation (must be a method that accepts as input an orngTest.ExperimentResults): # evaluateMethod="AZutilities.evalUtilities.CA" evalM="AZutilities.evalUtilities.CA" # Create a directory for running the appspack (if not defined it will use the present working directory) runPath = miscUtilities.createScratchDir(desc="ParamOptTest_PLS_MPI_2") # Load the optimization parameters from the default configuration (AZLearnersParamsConfig.py) parameters = AZLearnersParamsConfig.API("PLSLearner") parameters.setParameter("method","default",'pls1') # change the optimization parameters parameters.setParameter("method","default",'pls1') # make the method fixed (do not optimize) to be pls1 parameters.setParameter("method","optimize",False) parameters.setParameter("method","rangeType","values") # assure that the keyword for the values range type is #set correctly for values instead of interval parameters.setParameter("k","range",[1 , 3 , 5 , 6 , 10]) # make the method fixed (do not optimize) to be pls1 parameters.setParameter("k","optimize",True) parameters.setParameter("k","rangeType","values") # assure that the keyword for the values range type is #set correctly for values instead of interval #[<minimum of objective function found>, <optimized parameters>] tunedPars = opt(learner=learner,\ dataSet=dataSet,\ evaluateMethod = evalM,\ findMin=fMin,\ runPath = runPath,\ useParameters = parameters.getParametersDict(),\ verbose = 0, useStd = False,\ advancedMPIoptions = None, np = 4, machinefile = ["localhost:2","localhost:2"]) print "Returned: ", tunedPars print "====================== optimization Done ===========================" print "Learner optimized flag = ", learner.optimized print "Tuned parameters = ", tunedPars[1] print "Best optimization result = ", tunedPars[0] print "check the file intRes.txt to see the intermediate results of optimizer!" self.assertEqual(learner.optimized,True) # Check if the MPI version was used self.assertEqual(opt.usedMPI, True) self.assert_(round(tunedPars[0],3) in [round(x,3) for x in ExpectedCAwithTest],"Got:" + str(tunedPars[0])) #The learner is now with its optimized parameters already set, so we can now make a classifier out of it classifier = learner(self.discTrain) CA = evalUtilities.getClassificationAccuracy(self.discTest,classifier) self.assert_(round(CA,3) in [round(x,3) for x in ExpectedCA]) resData2 = dataUtilities.DataTable(os.path.join(runPath,"optimizationLog.txt")) self.assert_(len(resData2)>=4) # (orig 5) Must be > 2 #print runPath miscUtilities.removeDir(runPath)
def getAcc(self, callBack=None, algorithm=None, params=None, atts=None, holdout=None): """ For regression problems, it returns the RMSE and the Q2 For Classification problems, it returns CA and the ConfMat The return is made in a Dict: {"RMSE":0.2,"Q2":0.1,"CA":0.98,"CM":[[TP, FP],[FN,TN]]} For the EvalResults not supported for a specific learner/datase, the respective result will be None if the learner is a dict {"LearnerName":learner, ...} the results will be a dict with results for all Learners and for a consensus made out of those that were stable It some error occurred, the respective values in the Dict will be None parameters: algorithm - list of feature generation algorithms (set dependent features that have to be calculated inside the crossvalidation) params - dictionary of parameters atts - attributes to be removed before learning (e.g. meta etc...) """ self.__log("Starting Calculating MLStatistics") statistics = {} if not self.__areInputsOK(): return None if holdout: self.nExtFolds = 1 if algorithm: self.__log(" Additional features to be calculated inside of cross-validation") for i in algorithm: self.__log(" Algorithm: " + str(i)) for j, v in params.iteritems(): self.__log(" Parameter: " + str(j) + " = " + str(v)) # Set the response type self.responseType = ( self.data.domain.classVar.varType == orange.VarTypes.Discrete and "Classification" or "Regression" ) self.__log(" " + str(self.responseType)) # Create the Train and test sets DataIdxs = None if holdout: self.__log("Using hold out evaluation with " + str(holdout) + "*100 % of data for training") DataIdxs = dataUtilities.SeedDataSampler_holdOut(self.data, holdout) else: DataIdxs = dataUtilities.SeedDataSampler(self.data, self.nExtFolds) # Var for saving each Fols result optAcc = {} results = {} exp_pred = {} nTrainEx = {} nTestEx = {} # Set a dict of learners MLmethods = {} if type(self.learner) == dict: for ml in self.learner: MLmethods[ml] = self.learner[ml] else: MLmethods[self.learner.name] = self.learner models = {} rocs = {} self.__log("Calculating Statistics for MLmethods:") self.__log(" " + str([x for x in MLmethods])) # Check data in advance so that, by chance, it will not fail at the last fold! for foldN in range(self.nExtFolds): trainData = self.data.select(DataIdxs[foldN], negate=1) self.__checkTrainData(trainData) # Optional!! # Order Learners so that PLS is the first sortedML = [ml for ml in MLmethods] if "PLS" in sortedML: sortedML.remove("PLS") sortedML.insert(0, "PLS") stepsDone = 0 nTotalSteps = len(sortedML) * self.nExtFolds for ml in sortedML: self.__log(" > " + str(ml) + "...") try: # Var for saving each Fols result results[ml] = [] exp_pred[ml] = [] models[ml] = [] rocs[ml] = [] nTrainEx[ml] = [] nTestEx[ml] = [] optAcc[ml] = [] logTxt = "" for foldN in range(self.nExtFolds): if type(self.learner) == dict: self.paramList = None trainData = self.data.select(DataIdxs[foldN], negate=1) orig_len = len(trainData.domain.attributes) refs = None methods = [ "rdk_MACCS_keys", "rdk_topo_fps", "rdk_morgan_fps", "rdk_morgan_features_fps", "rdk_atompair_fps", ] train_domain = None # add structural descriptors to the training data (TG) if algorithm: for i in range(len(algorithm)): if algorithm[i] == "structClust": self.__log("Algorithm " + str(i) + ": " + str(algorithm[i])) actData = orange.ExampleTable(trainData.domain) for d in trainData: # only valid for simboosted qsar paper experiments!? if d.getclass() == "2": actData.append(d) refs = structuralClustering.getReferenceStructures( actData, threshold=params["threshold"], minClusterSize=params["minClusterSize"], numThreads=2, ) self.__log( " found " + str(len(refs)) + " reference structures in " + str(len(actData)) + " active structures" ) orig_len = orig_len + (len(refs) * len(methods)) trainData_sim = SimBoostedQSAR.getSimDescriptors(refs, trainData, methods) if i == (len(algorithm) - 1): trainData = dataUtilities.attributeDeselectionData(trainData_sim, atts) else: trainData = dataUtilities.attributeDeselectionData(trainData_sim, []) elif algorithm[i] == "ECFP": self.__log("Algorithm " + str(i) + ": " + str(algorithm[i])) trainData_ecfp = getCinfonyDesc.getCinfonyDescResults(trainData, ["rdk.FingerPrints"]) train_domain = trainData_ecfp.domain if i == (len(algorithm) - 1): trainData = dataUtilities.attributeDeselectionData(trainData_ecfp, atts) else: trainData = dataUtilities.attributeDeselectionData(trainData_ecfp, []) else: self.__log("Algorithm " + str(i) + ": " + str(algorithm[i])) trainData_structDesc = getStructuralDesc.getStructuralDescResult( trainData, algorithm[i], params["minsup"] ) if i == (len(algorithm) - 1): trainData = dataUtilities.attributeDeselectionData(trainData_structDesc, atts) else: trainData = dataUtilities.attributeDeselectionData(trainData_structDesc, []) # trainData.save("/home/girschic/proj/AZ/ProjDev/train.tab") testData = self.data.select(DataIdxs[foldN]) # calculate the feature values for the test data (TG) if algorithm: for i in range(len(algorithm)): if algorithm[i] == "structClust": self.__log(str(algorithm[i])) testData_sim = SimBoostedQSAR.getSimDescriptors(refs, testData, methods) if i == (len(algorithm) - 1): testData = dataUtilities.attributeDeselectionData(testData_sim, atts) else: testData = dataUtilities.attributeDeselectionData(testData_sim, []) elif algorithm[i] == "ECFP": self.__log(str(algorithm[i])) # testData_ecfp = orange.ExampleTable(train_domain) tmp_dat = [] for d in testData: tmp = getCinfonyDesc.getRdkFPforTestInstance(train_domain, d) tmp_dat.append(tmp) testData_ecfp = orange.ExampleTable(tmp_dat[0].domain, tmp_dat) if i == (len(algorithm) - 1): # print "removing atts" testData = dataUtilities.attributeDeselectionData(testData_ecfp, atts) else: # print "removing no atts" testData = dataUtilities.attributeDeselectionData(testData_ecfp, []) else: cut_off = orig_len - len(atts) smarts = trainData.domain.attributes[cut_off:] self.__log(" Number of structural features added: " + str(len(smarts))) testData_structDesc = getStructuralDesc.getSMARTSrecalcDesc(testData, smarts) if i == (len(algorithm) - 1): testData = dataUtilities.attributeDeselectionData(testData_structDesc, atts) else: testData = dataUtilities.attributeDeselectionData(testData_structDesc, []) # testData.save("/home/girschic/proj/AZ/ProjDev/test.tab") nTrainEx[ml].append(len(trainData)) nTestEx[ml].append(len(testData)) # Test if trainsets inside optimizer will respect dataSize criterias. # if not, don't optimize, but still train the model dontOptimize = False if self.responseType != "Classification" and (len(trainData) * (1 - 1.0 / self.nInnerFolds) < 20): dontOptimize = True else: tmpDataIdxs = dataUtilities.SeedDataSampler(trainData, self.nInnerFolds) tmpTrainData = trainData.select(tmpDataIdxs[0], negate=1) if not self.__checkTrainData(tmpTrainData, False): dontOptimize = True if dontOptimize: logTxt += ( " Fold " + str(foldN) + ": Too few compounds to optimize model hyper-parameters\n" ) self.__log(logTxt) if trainData.domain.classVar.varType == orange.VarTypes.Discrete: res = orngTest.crossValidation( [MLmethods[ml]], trainData, folds=5, strat=orange.MakeRandomIndices.StratifiedIfPossible, randomGenerator=random.randint(0, 100), ) CA = evalUtilities.CA(res)[0] optAcc[ml].append(CA) else: res = orngTest.crossValidation( [MLmethods[ml]], trainData, folds=5, strat=orange.MakeRandomIndices.StratifiedIfPossible, randomGenerator=random.randint(0, 100), ) R2 = evalUtilities.R2(res)[0] optAcc[ml].append(R2) else: runPath = miscUtilities.createScratchDir( baseDir=AZOC.NFS_SCRATCHDIR, desc="AccWOptParam", seed=id(trainData) ) # self.__log(" run path:"+str(runPath)) trainData.save(os.path.join(runPath, "trainData.tab")) tunedPars = paramOptUtilities.getOptParam( learner=MLmethods[ml], trainDataFile=os.path.join(runPath, "trainData.tab"), paramList=self.paramList, useGrid=False, verbose=self.verbose, queueType=self.queueType, runPath=runPath, nExtFolds=None, nFolds=self.nInnerFolds, logFile=self.logFile, getTunedPars=True, ) if not MLmethods[ml] or not MLmethods[ml].optimized: self.__log( " WARNING: GETACCWOPTPARAM: The learner " + str(ml) + " was not optimized." ) self.__log(" It will be ignored") # self.__log(" It will be set to default parameters") self.__log(" DEBUG can be done in: " + runPath) # Set learner back to default # MLmethods[ml] = MLmethods[ml].__class__() raise Exception("The learner " + str(ml) + " was not optimized.") else: if trainData.domain.classVar.varType == orange.VarTypes.Discrete: optAcc[ml].append(tunedPars[0]) else: res = orngTest.crossValidation( [MLmethods[ml]], trainData, folds=5, strat=orange.MakeRandomIndices.StratifiedIfPossible, randomGenerator=random.randint(0, 100), ) R2 = evalUtilities.R2(res)[0] optAcc[ml].append(R2) miscUtilities.removeDir(runPath) # Train the model model = MLmethods[ml](trainData) models[ml].append(model) # Test the model if self.responseType == "Classification": results[ml].append( ( evalUtilities.getClassificationAccuracy(testData, model), evalUtilities.getConfMat(testData, model), ) ) roc = self.aroc(testData, [model]) rocs[ml].append(roc) else: local_exp_pred = [] for ex in testData: local_exp_pred.append((ex.getclass(), model(ex))) results[ml].append( (evalUtilities.calcRMSE(local_exp_pred), evalUtilities.calcRsqrt(local_exp_pred)) ) # Save the experimental value and correspondent predicted value exp_pred[ml] += local_exp_pred if callBack: stepsDone += 1 if not callBack((100 * stepsDone) / nTotalSteps): return None res = self.createStatObj( results[ml], exp_pred[ml], nTrainEx[ml], nTestEx[ml], self.responseType, self.nExtFolds, logTxt, rocs[ml], ) if self.verbose > 0: print "UnbiasedAccuracyGetter!Results " + ml + ":\n" pprint(res) if not res: raise Exception("No results available!") statistics[ml] = copy.deepcopy(res) self.__writeResults(statistics) self.__log(" OK") except: print "Unexpected error:", print sys.exc_info()[0] print sys.exc_info()[1] self.__log(" Learner " + str(ml) + " failed to create/optimize the model!") res = self.createStatObj( results[ml], exp_pred[ml], nTrainEx[ml], nTestEx[ml], self.responseType, self.nExtFolds, logTxt, rocs[ml], ) statistics[ml] = copy.deepcopy(res) self.__writeResults(statistics) if not statistics or len(statistics) < 1: self.__log("ERROR: No statistics to return!") return None elif len(statistics) > 1: # We still need to build a consensus model out of the stable models # ONLY if there are more that one model stable! # When only one or no stable models, build a consensus based on all models consensusMLs = {} for modelName in statistics: StabilityValue = statistics[modelName]["StabilityValue"] if StabilityValue is not None and statistics[modelName]["stable"]: consensusMLs[modelName] = copy.deepcopy(statistics[modelName]) self.__log( "Found " + str(len(consensusMLs)) + " stable MLmethods out of " + str(len(statistics)) + " MLmethods." ) if len(consensusMLs) <= 1: # we need more models to build a consensus! consensusMLs = {} for modelName in statistics: consensusMLs[modelName] = copy.deepcopy(statistics[modelName]) if len(consensusMLs) >= 2: # Var for saving each Fols result Cresults = [] Cexp_pred = [] CnTrainEx = [] CnTestEx = [] self.__log( "Calculating the statistics for a Consensus model based on " + str([ml for ml in consensusMLs]) ) for foldN in range(self.nExtFolds): if self.responseType == "Classification": CLASS0 = str(self.data.domain.classVar.values[0]) CLASS1 = str(self.data.domain.classVar.values[1]) exprTest0 = "(0" for ml in consensusMLs: exprTest0 += "+( " + ml + " == " + CLASS0 + " )*" + str(optAcc[ml][foldN]) + " " exprTest0 += ")/IF0(sum([False" for ml in consensusMLs: exprTest0 += ", " + ml + " == " + CLASS0 + " " exprTest0 += "]),1)" exprTest1 = exprTest0.replace(CLASS0, CLASS1) expression = [exprTest0 + " >= " + exprTest1 + " -> " + CLASS0, " -> " + CLASS1] else: Q2sum = sum([optAcc[ml][foldN] for ml in consensusMLs]) expression = "(1 / " + str(Q2sum) + ") * (0" for ml in consensusMLs: expression += " + " + str(optAcc[ml][foldN]) + " * " + ml + " " expression += ")" testData = self.data.select(DataIdxs[foldN]) CnTestEx.append(len(testData)) consensusClassifiers = {} for learnerName in consensusMLs: consensusClassifiers[learnerName] = models[learnerName][foldN] model = AZorngConsensus.ConsensusClassifier(classifiers=consensusClassifiers, expression=expression) CnTrainEx.append(model.NTrainEx) # Test the model if self.responseType == "Classification": Cresults.append( ( evalUtilities.getClassificationAccuracy(testData, model), evalUtilities.getConfMat(testData, model), ) ) else: local_exp_pred = [] for ex in testData: local_exp_pred.append((ex.getclass(), model(ex))) Cresults.append( (evalUtilities.calcRMSE(local_exp_pred), evalUtilities.calcRsqrt(local_exp_pred)) ) # Save the experimental value and correspondent predicted value Cexp_pred += local_exp_pred res = self.createStatObj(Cresults, Cexp_pred, CnTrainEx, CnTestEx, self.responseType, self.nExtFolds) statistics["Consensus"] = copy.deepcopy(res) statistics["Consensus"]["IndividualStatistics"] = copy.deepcopy(consensusMLs) self.__writeResults(statistics) self.__log("Returned multiple ML methods statistics.") return statistics # By default return the only existing statistics! self.__writeResults(statistics) self.__log("Returned only one ML method statistics.") return statistics[statistics.keys()[0]]
def getAcc(self, algorithm = None, minsup = None, atts = None): """ For regression problems, it returns the RMSE and the R2 For Classification problems, it returns CA and the ConfMat The return is made in a Dict: {"RMSE":0.2,"R2":0.1,"CA":0.98,"CM":[[TP, FP],[FN,TN]]} For the EvalResults not supported for a specific learner/datase, the respective result will be None if the learner is a dict {"LearnerName":learner, ...} the results will be a dict with results for all Learners and for a consensus made out of those that were stable It some error occurred, the respective values in the Dict will be None """ self.__log("Starting Calculating MLStatistics") statistics = {} if not self.__areInputsOK(): return None if (self.algorithm): self.__log(" Additional structural features to be calculated inside of cross-validation") self.__log(" Algorithm for structural features: "+str(self.algorithm)) self.__log(" Minimum support parameter: "+str(self.minsup)) # Set the response type responseType = self.data.domain.classVar.varType == orange.VarTypes.Discrete and "Classification" or "Regression" self.__log(" "+str(responseType)) #Create the Train and test sets DataIdxs = dataUtilities.SeedDataSampler(self.data, self.nExtFolds) #Var for saving each Fols result results = {} exp_pred = {} #Set a dict of learners MLmethods = {} if type(self.learner) == dict: for ml in self.learner: MLmethods[ml] = self.learner[ml] else: MLmethods[self.learner.name] = self.learner models={} self.__log("Calculating Statistics for MLmethods:") self.__log(" "+str([x for x in MLmethods])) for ml in MLmethods: self.__log(" > "+str(ml)+"...") try: #Var for saving each Fols result results[ml] = [] exp_pred[ml] = [] models[ml] = [] for foldN in range(self.nExtFolds): if type(self.learner) == dict: self.paramList = None trainData = self.data.select(DataIdxs[foldN],negate=1) orig_len = len(trainData.domain.attributes) if (self.algorithm): # add structural descriptors to the training data (TG) trainData_structDesc = getStructuralDesc.getStructuralDescResult(trainData, self.algorithm, self.minsup) trainData = dataUtilities.attributeDeselectionData(trainData_structDesc, self.atts) runPath = miscUtilities.createScratchDir(baseDir = AZOC.NFS_SCRATCHDIR, desc = "AccWOptParam") trainData.save(os.path.join(runPath,"trainData.tab")) testData = self.data.select(DataIdxs[foldN]) if (self.algorithm): # calculate the feature values for the test data (TG) cut_off = orig_len - len(self.atts) smarts = trainData.domain.attributes[cut_off:] self.__log(" Number of structural features added: "+str(len(smarts))) testData_structDesc = getStructuralDesc.getSMARTSrecalcDesc(testData,smarts) testData = dataUtilities.attributeDeselectionData(testData_structDesc, self.atts) paramOptUtilities.getOptParam( learner = MLmethods[ml], trainDataFile = os.path.join(runPath,"trainData.tab"), paramList = self.paramList, useGrid = False, verbose = self.verbose, queueType = self.queueType, runPath = runPath, nExtFolds = None, nFolds = self.nInnerFolds ) if not MLmethods[ml].optimized: self.__log(" The learner "+str(ml)+" was not optimized.") raise Exception("The learner "+str(ml)+" was not optimized.") miscUtilities.removeDir(runPath) #Train the model model = MLmethods[ml](trainData) models[ml].append(model) #Test the model if responseType == "Classification": results[ml].append((evalUtilities.getClassificationAccuracy(testData, model), evalUtilities.getConfMat(testData, model) ) ) else: local_exp_pred = [] for ex in testData: local_exp_pred.append((ex.getclass(), model(ex))) results[ml].append((evalUtilities.calcRMSE(local_exp_pred), evalUtilities.calcRsqrt(local_exp_pred) ) ) #Save the experimental value and correspondent predicted value exp_pred[ml] += local_exp_pred res = self.createStatObj(results[ml], exp_pred[ml], responseType, self.nExtFolds) if self.verbose > 0: print "AccWOptParamGetter!Results "+ml+":\n" pprint(res) if not res: raise Exception("No results available!") statistics[ml] = res.copy() self.__writeResults(res) self.__log(" OK") except: self.__log(" Learner "+str(ml)+" failed to optimize!") res = self.createStatObj() statistics[ml] = res.copy() if not statistics or len(statistics) < 1: self.__log("ERROR: No statistics to return!") return None elif len(statistics) > 1: #We still need to build a consensus model out of the stable models # ONLY if there are more that one model stable! stableML={} for modelName in statistics: if statistics[modelName]["StabilityValue"] < AZOC.QSARSTABILITYTHRESHOLD: # Select only stable models stableML[modelName] = statistics[modelName].copy() if len(stableML) >= 2: self.__log("Found "+str(len(stableML))+" stable MLmethods out of "+str(len(statistics))+" MLmethods.") if responseType == "Classification": CLASS0 = str(self.data.domain.classVar.values[0]) CLASS1 = str(self.data.domain.classVar.values[1]) exprTest0 = "(0" for ml in stableML: exprTest0 += "+( "+ml+" == "+CLASS0+" )*"+str(stableML[ml]["CA"])+" " exprTest0 += ")/IF0(sum([False" for ml in stableML: exprTest0 += ", "+ml+" == "+CLASS0+" " exprTest0 += "]),1)" exprTest1 = exprTest0.replace(CLASS0,CLASS1) expression = [exprTest0+" >= "+exprTest1+" -> "+CLASS0," -> "+CLASS1] else: R2sum = sum([stableML[ml]["R2"] for ml in stableML]) expression = "(1 / "+str(R2sum)+") * (0" for ml in stableML: expression += " + "+str(stableML[ml]["R2"])+" * "+ml+" " expression += ")" #Var for saving each Fols result Cresults = [] Cexp_pred = [] self.__log("Calculating the statistics for a Consensus model") for foldN in range(self.nExtFolds): testData = self.data.select(DataIdxs[foldN]) consensusClassifiers = {} for learnerName in stableML: consensusClassifiers[learnerName] = models[learnerName][foldN] model = AZorngConsensus.ConsensusClassifier(classifiers = consensusClassifiers, expression = expression) #Test the model if responseType == "Classification": Cresults.append((evalUtilities.getClassificationAccuracy(testData, model), evalUtilities.getConfMat(testData, model) ) ) else: local_exp_pred = [] for ex in testData: local_exp_pred.append((ex.getclass(), model(ex))) Cresults.append((evalUtilities.calcRMSE(local_exp_pred), evalUtilities.calcRsqrt(local_exp_pred) ) ) #Save the experimental value and correspondent predicted value Cexp_pred += local_exp_pred res = self.createStatObj(Cresults, Cexp_pred, responseType, self.nExtFolds) statistics["Consensus"] = res.copy() statistics["Consensus"]["IndividualStatistics"] = stableML.copy() self.__writeResults(statistics) self.__log("Returned multiple ML methods statistics.") return statistics #By default return the only existing statistics! self.__writeResults(statistics) self.__log("Returned only one ML method statistics.") return statistics[statistics.keys()[0]]
def testCvSVM_MPI(self): """ Tests changing the default range of the optimizer. Use MPI versio0n of appspack """ # Classification accuracy: ExpectedCA = [0.585] #Should be the result of the default point optimizer = paramOptUtilities.Appspack() learner = AZorngCvSVM.CvSVMLearner() learnerName = "CvSVMLearner" # Create an interface for setting optimizer parameters pars = AZLearnersParamsConfig.API(learnerName) # Set all parameters to not be optimized pars.setOptimizeAllParameters(False) parameterList = ["C"] #, "gamma"] # Set the parameters in parameterList to be optimized for parameter in parameterList: pars.setParameter(parameter, "optimize", True) # Change the range pars.setParameter("C", "range", miscUtilities.power2Range(-5, 2, 1)) pars.setParameter("priors", "default", { "POS": 50, "NEG": 4 }) # These priors are to ensure the default point will be the best! pars.setParameter( "gamma", "default", 0.001 ) # This is a bad value to ensure the default point will be the best! trainFile = self.discTrainDataPath # Create a directory for running the appspack (if not defined it will use the present working directory) runPath = miscUtilities.createScratchDir(desc="ParamOptTest_CvSVM_MPI") evalM = "AZutilities.evalUtilities.CA" fMin = False # Calculate the optimal parameters. This can take a long period of time! tunedPars = optimizer(learner=learner,\ dataSet=trainFile,\ evaluateMethod = evalM,\ useParameters = pars.getParametersDict(),\ findMin=fMin,\ runPath = runPath,\ verbose = 0,\ useStd = False,\ #advancedMPIoptions = "-all-local -allcpus") # to use this the # file "<MPICHDIR>/share/machines.LINUX must be properly configured" np = 4,\ machinefile = os.path.realpath(os.path.join(os.environ["AZORANGEHOME"], "tests/source/APPS_machines"))) verbTunedPars = optimizer.getTunedParameters() print "Returned: ", tunedPars print "====================== optimization Done ===========================" print "Learner optimized flag = ", learner.optimized print "Tuned parameters = ", tunedPars[1] print "Best optimization result = ", tunedPars[0] print "Best result index from intRes file:", verbTunedPars["ResIdx"] print "Optimizer runPath:", runPath print "check the file intRes.txt to see the intermediate results of optimizer!" # Check that the learner was optimized self.assertEqual(learner.optimized, True) # Check if the MPI version was used self.assertEqual(optimizer.usedMPI, True) # Check the number of optimized parameters self.assertEqual(len(verbTunedPars["optParam"]), 12) # Check the accuracy self.assert_( round(verbTunedPars["bestRes"], 3) in [round(x, 3) for x in ExpectedCA], "Got:" + str(verbTunedPars["bestRes"])) self.assert_( len( dataUtilities.DataTable( os.path.join(runPath, "optimizationLog.txt"))) >= 5) #Check Priors self.assertEqual(tunedPars[1]["priors"], "None") learner.priors = {'NEG': 4, 'POS': 2} classifier = learner(self.discTest) classifier.write(os.path.join(runPath, "CvSVMModel")) file = open(os.path.join(runPath, "CvSVMModel/model.svm"), "r") lines = file.readlines() file.close() priors = [ round(x, 2) for x in eval((lines[18].strip()).replace("data:", "")) ] self.assertEqual(len(priors), 2) self.assertEqual( priors[self.discTest.domain.classVar.values.index("POS")], 2.0 * float(tunedPars[1]["C"])) self.assertEqual( priors[self.discTest.domain.classVar.values.index("NEG")], 4.0 * float(tunedPars[1]["C"])) miscUtilities.removeDir(runPath)
def __call__(self, trainingData, weight=None): """Creates an PLS model from the data in trainingData. """ if not AZBaseClasses.AZLearner.__call__(self,trainingData, weight): return None #Remove from the domain any unused values of discrete attributes including class trainingData = dataUtilities.getDataWithoutUnusedValues(trainingData,True) # Create path for the Orange data scratchdir = miscUtilities.createScratchDir(desc="PLS") OrngFile = os.path.join(scratchdir,"OrngData.tab") # Remove meta attributes from training data to make the imputer work with examples without the meta attributes. #dataUtilities.rmAllMeta(trainingData) if len(trainingData.domain.getmetas()) == 0: trainData = trainingData else: trainData = dataUtilities.getCopyWithoutMeta(trainingData) # Create the imputer self.imputer = orange.ImputerConstructor_average(trainData) # Impute the data trainData = self.imputer(trainData) # Save the Data already imputed to an Orange formated file if self.verbose > 1: print time.asctime(), "Saving Orange Data to a tab file..." orange.saveTabDelimited(OrngFile,trainData) if self.verbose > 1: print time.asctime(), "done" # Create the PLS instance if self.verbose > 1: print time.asctime(), "Creating PLS Object..." learner = pls.PlsAPI() if self.verbose > 1: print time.asctime(), "done" # Assign the PLS parameters learner.SetParameter('v',str(self.verbose)) learner.SetParameter('debug',str(int(self.verbose > 0))) learner.SetParameter('method',self.method) if types.IntType(self.k) > len(trainData.domain.attributes): learner.SetParameter('k',str(len(trainData.domain.attributes))) if self.verbose > 0: print "Warning! The number of components were more than the number of attributes." if self.verbose > 0: print " Components were set to ",len(trainData.domain.attributes) else: learner.SetParameter('k',self.k) learner.SetParameter('precision',self.precision) learner.SetParameter('sDir',scratchdir) #AZOC.SCRATCHDIR) # Read the Orange Formated file and Train the Algorithm # TRAIN if self.verbose > 1: print time.asctime(), "Training..." learner.Train(OrngFile) if self.verbose > 1: print "Train finished at ", time.asctime() print "PLS trained in: " + str(learner.GetCPUTrainTime()) + " seconds"; print "Method: " + learner.GetParameter("method") print "Components: " + learner.GetParameter("k") print "Precision: " + learner.GetParameter("precision") # Remove the scratch file if self.verbose == 0: miscUtilities.removeDir(scratchdir) else: print "The directory " + scratchdir + " was not deleted because DEBUG flag is ON" del trainData impData=self.imputer.defaults return PLSClassifier(classifier = learner, name = "Classifier of " + self.name, classVar = trainingData.domain.classVar, imputeData=impData, verbose = self.verbose, varNames = [attr.name for attr in trainingData.domain.attributes], NTrainEx = len(trainingData), basicStat = self.basicStat, parameters = self.parameters)#learner.GetClassVarName())#
def testRF_MPI(self): """ Tests changing the default range of the optimizer. Use MPI versio0n of appspack """ # Classification accuracy: ExpectedCA = [0.612] #opencv1.1: 0.90480000000000005 optimizer = paramOptUtilities.Appspack() learner = AZorngRF.RFLearner() learnerName = "RFLearner" # Create an interface for setting optimizer parameters pars = AZLearnersParamsConfig.API(learnerName) # Set all parameters to not be optimized pars.setOptimizeAllParameters(False) parameterList = ["nActVars"] # Set the parameters in parameterList to be optimized for parameter in parameterList: pars.setParameter(parameter, "optimize", True) # Set the NumThreads pars.setParameter("NumThreads", "optimize", False) # Change the default pars.setParameter("NumThreads", "default", "1") trainFile = self.discTrainDataPath # Create a directory for running the appspack (if not defined it will use the present working directory) runPath = miscUtilities.createScratchDir(desc="ParamOptTest_RF_MPI") evalM = "AZutilities.evalUtilities.CA" fMin = False # Calculate the optimal parameters. This can take a long period of time! tunedPars = optimizer(learner=learner,\ dataSet=trainFile,\ evaluateMethod = evalM,\ useParameters = pars.getParametersDict(),\ useDefaultPoint = False,\ findMin=fMin,\ runPath = runPath,\ useStd = False,\ verbose = 0,\ #advancedMPIoptions = "-all-local -allcpus") # to use this the # file "<MPICHDIR>/share/machines.LINUX must be properly configured" # Alternatively, we can set machinefile=0 to us also all available cores machinefile =0) print "Returned: ", tunedPars print "====================== optimization Done ===========================" print "Learner optimized flag = ", learner.optimized print "Tuned parameters = ", tunedPars[1] print "Best optimization result = ", tunedPars[0] print "check the file intRes.txt to see the intermediate results of optimizer!" print "Number of cores used: ", optimizer.np verbTunedPars = optimizer.getTunedParameters() # Check that the learner was optimized self.assertEqual(learner.optimized, True) #Check if the number of processors used are all the core available notUsed, out = commands.getstatusoutput( "cat /proc/cpuinfo | grep processor") self.assertEqual(optimizer.np, len(out.split("\n"))) # Check if the MPI version was used self.assertEqual(optimizer.usedMPI, True) # Check the number of optimized parameters self.assert_(len(verbTunedPars["optParam"]) in [8, 9, 10]) # Check the accuracy self.assert_( round(verbTunedPars["bestRes"], 3) in [round(x, 3) for x in ExpectedCA], "Got:" + str(verbTunedPars["bestRes"])) self.assert_( len( dataUtilities.DataTable( os.path.join(runPath, "optimizationLog.txt"))) >= 3) # Must be > 2 miscUtilities.removeDir(runPath)
def disable_testSVM_MPI_3(self): ################################################################### # Test other way of setting appspack ################################################################### # Classification accuracy: ExpectedCA = 0.6 optimizer = paramOptUtilities.Appspack() learner = AZorngCvSVM.CvSVMLearner() learnerName = "CvSVMLearner" # Create an interface for setting optimizer parameters pars = AZLearnersParamsConfig.API(learnerName) # Set all parameters to not be optimized pars.setOptimizeAllParameters(False) parameterList = ["C", "gamma"] # Set the parameters in parameterList to be optimized for parameter in parameterList: pars.setParameter(parameter, "optimize", True) # Change the range pars.setParameter("C", "range", miscUtilities.power2Range(-5, 2, 1)) trainFile = self.discTrainDataPath # Create a directory for running the appspack (if not defined it will use the present working directory) runPath = miscUtilities.createScratchDir(desc="ParamOptTest_SVM_MPI_3") evalM = "AZutilities.evalUtilities.CA" fMin = False #[<minimum of objective function found>, <optimized parameters>] tunedPars = optimizer(learner=learner,\ dataSet=trainFile,\ evaluateMethod = evalM,\ findMin=fMin,\ runPath = runPath,\ useParameters = pars.getParametersDict(),\ verbose = 0,\ useStd = False,\ advancedMPIoptions = "-v -np 4",\ machinefile = ["localhost:2","localhost:2"]) print "Returned: ", tunedPars print "====================== optimization Done ===========================" print "Learner optimized flag = ", learner.optimized print "Tuned parameters = ", tunedPars[1] print "Best optimization result = ", tunedPars[0] print "check the file intRes.txt to see the intermediate results of optimizer!" # Check if the MPI version was used self.assertEqual(learner.optimized, True) # Check if the MPI version was used self.assertEqual(optimizer.usedMPI, True) self.assertEqual(round(tunedPars[0], 3), round(ExpectedCA, 3)) self.assert_( len( dataUtilities.DataTable( os.path.join(runPath, "optimizationLog.txt"))) >= 12) # (orig 14) Must be > 2 #print runPath miscUtilities.removeDir(runPath)
def test_PLS_MPI_2(self): ################################################################### # Test other way of setting appspack ###################################################################i # Classification accuracy: ExpectedCA = [0.567049808429, 0.593869731801] ExpectedCAwithTest = [0.6, 0.579] #New at orange2.0 #Create the appspack instance opt = paramOptUtilities.Appspack() #Learner to be optimized learner = AZorngPLS.PLSLearner() #dataset to use in the parameters optimization (Discrete class in this example) dataSet = self.discTrainDataPath # Define the objective function. This requires: # defining the extreme to find (the min or max): findMin=True or findMin=False fMin = False # defining the method for evaluation (must be a method that accepts as input an orngTest.ExperimentResults): # evaluateMethod="AZutilities.evalUtilities.CA" evalM = "AZutilities.evalUtilities.CA" # Create a directory for running the appspack (if not defined it will use the present working directory) runPath = miscUtilities.createScratchDir(desc="ParamOptTest_PLS_MPI_2") # Load the optimization parameters from the default configuration (AZLearnersParamsConfig.py) parameters = AZLearnersParamsConfig.API("PLSLearner") parameters.setParameter("method", "default", 'pls1') # change the optimization parameters parameters.setParameter( "method", "default", 'pls1') # make the method fixed (do not optimize) to be pls1 parameters.setParameter("method", "optimize", False) parameters.setParameter( "method", "rangeType", "values") # assure that the keyword for the values range type is #set correctly for values instead of interval parameters.setParameter( "k", "range", [1, 3, 5, 6, 10 ]) # make the method fixed (do not optimize) to be pls1 parameters.setParameter("k", "optimize", True) parameters.setParameter( "k", "rangeType", "values") # assure that the keyword for the values range type is # set correctly for values instead of interval #[<minimum of objective function found>, <optimized parameters>] tunedPars = opt(learner=learner,\ dataSet=dataSet,\ evaluateMethod = evalM,\ findMin=fMin,\ runPath = runPath,\ useParameters = parameters.getParametersDict(),\ verbose = 0, useStd = False,\ advancedMPIoptions = None, np = 4, machinefile = ["localhost:2","localhost:2"]) print "Returned: ", tunedPars print "====================== optimization Done ===========================" print "Learner optimized flag = ", learner.optimized print "Tuned parameters = ", tunedPars[1] print "Best optimization result = ", tunedPars[0] print "check the file intRes.txt to see the intermediate results of optimizer!" self.assertEqual(learner.optimized, True) # Check if the MPI version was used self.assertEqual(opt.usedMPI, True) self.assert_( round(tunedPars[0], 3) in [round(x, 3) for x in ExpectedCAwithTest], "Got:" + str(tunedPars[0])) #The learner is now with its optimized parameters already set, so we can now make a classifier out of it classifier = learner(self.discTrain) CA = evalUtilities.getClassificationAccuracy(self.discTest, classifier) self.assert_( round(CA, 3) in [round(x, 3) for x in ExpectedCA], "Got: " + str(CA)) resData2 = dataUtilities.DataTable( os.path.join(runPath, "optimizationLog.txt")) self.assert_(len(resData2) >= 4) # (orig 5) Must be > 2 #print runPath miscUtilities.removeDir(runPath)
def __call__(self, trainingData, weight=None): """Creates an PLS model from the data in trainingData. """ if not AZBaseClasses.AZLearner.__call__(self, trainingData, weight): return None #Remove from the domain any unused values of discrete attributes including class trainingData = dataUtilities.getDataWithoutUnusedValues( trainingData, True) # Create path for the Orange data scratchdir = miscUtilities.createScratchDir(desc="PLS") OrngFile = os.path.join(scratchdir, "OrngData.tab") # Remove meta attributes from training data to make the imputer work with examples without the meta attributes. #dataUtilities.rmAllMeta(trainingData) if len(trainingData.domain.getmetas()) == 0: trainData = trainingData else: trainData = dataUtilities.getCopyWithoutMeta(trainingData) # Create the imputer self.imputer = orange.ImputerConstructor_average(trainData) # Impute the data trainData = self.imputer(trainData) # Save the Data already imputed to an Orange formated file if self.verbose > 1: print time.asctime(), "Saving Orange Data to a tab file..." orange.saveTabDelimited(OrngFile, trainData) if self.verbose > 1: print time.asctime(), "done" # Create the PLS instance if self.verbose > 1: print time.asctime(), "Creating PLS Object..." learner = pls.PlsAPI() if self.verbose > 1: print time.asctime(), "done" # Assign the PLS parameters learner.SetParameter('v', str(self.verbose)) learner.SetParameter('debug', str(int(self.verbose > 0))) learner.SetParameter('method', self.method) if types.IntType(self.k) > len(trainData.domain.attributes): learner.SetParameter('k', str(len(trainData.domain.attributes))) if self.verbose > 0: print "Warning! The number of components were more than the number of attributes." if self.verbose > 0: print " Components were set to ", len( trainData.domain.attributes) else: learner.SetParameter('k', self.k) learner.SetParameter('precision', self.precision) learner.SetParameter('sDir', scratchdir) #AZOC.SCRATCHDIR) # Read the Orange Formated file and Train the Algorithm # TRAIN if self.verbose > 1: print time.asctime(), "Training..." learner.Train(OrngFile) if self.verbose > 1: print "Train finished at ", time.asctime() print "PLS trained in: " + str( learner.GetCPUTrainTime()) + " seconds" print "Method: " + learner.GetParameter("method") print "Components: " + learner.GetParameter("k") print "Precision: " + learner.GetParameter("precision") # Remove the scratch file if self.verbose == 0: miscUtilities.removeDir(scratchdir) else: print "The directory " + scratchdir + " was not deleted because DEBUG flag is ON" del trainData impData = self.imputer.defaults return PLSClassifier( classifier=learner, name="Classifier of " + self.name, classVar=trainingData.domain.classVar, imputeData=impData, verbose=self.verbose, varNames=[attr.name for attr in trainingData.domain.attributes], NTrainEx=len(trainingData), basicStat=self.basicStat, parameters=self.parameters) #learner.GetClassVarName())#