def test_CanPersistRegressionModelUsingClassifiers(self):
        """Test the save/load for a regression model - Using average of N classifiers"""

        # Arrange
        learners = [AZorngRF.RFLearner(), AZorngCvSVM.CvSVMLearner(), AZorngCvANN.CvANNLearner()]
        learner = AZorngConsensus.ConsensusLearner(learners = learners)
        classifier = learner(self.DataReg)

        # Act
        predictions = []
        for ex in self.DataReg:
            predictions.append(classifier(ex))

        scratchdir = miscUtilities.createScratchDir(desc="ConsensusSaveLoadTest")
        classifier.write(os.path.join(scratchdir,"./CM.model"))

        # Assert
        predictionsL = []
        Loaded = AZorngConsensus.Consensusread(os.path.join(scratchdir,"./CM.model"))
        for ex in self.DataReg:
            predictionsL.append(Loaded(ex))

        self.assertEqual([round(pred.value,4) for pred in predictions],
                         [round(pred.value,4) for pred in predictionsL],
                         "Loaded model predictions differ: Pred. 1 (saved/loaded):"+str(predictions[0])+" / "+str(predictionsL[0]))

        self.assertEqual(len(Loaded.domain),len(self.DataReg.domain))
        self.assertEqual(len(Loaded.imputeData) , len(Loaded.domain))
        self.assertEqual(len(Loaded.basicStat), len(Loaded.domain))
        self.assertEqual(Loaded.NTrainEx, len(self.DataReg)*0.8)

        miscUtilities.removeDir(scratchdir)
Пример #2
0
        def TopVarImportanceTest(data, expectNone=False):
            resA = []
            resB = []
            learner = AZorngCvSVM.CvSVMLearner(gamma=1.0,
                                               svm_type=103,
                                               C=1,
                                               coef0=0,
                                               degree=3,
                                               epsR=0.001,
                                               kernel_type=2,
                                               nu=0.5,
                                               p=0.1,
                                               probability=0,
                                               shrinking=1)
            CvSVM = learner(data)

            for ex in data:
                resA.append(CvSVM.getTopImportantVars(ex, 1))

            scratchdir = miscUtilities.createScratchDir(
                desc="TopVarImportanceTest")
            modelPath = os.path.join(scratchdir, "CvSVNModel")
            CvSVM.write(modelPath)
            LoadedCvSVM = AZorngCvSVM.CvSVMread(modelPath)
            miscUtilities.removeDir(scratchdir)

            for ex in data:
                resB.append(LoadedCvSVM.getTopImportantVars(ex, 1))
            if expectNone:
                return resA == resB == [None] * len(data)
            else:
                return resA == resB and None not in resA and resA.count(
                    resA[0]) != len(resA)
Пример #3
0
    def test_FeedClassifiersClass(self):
        """Test the creation of Consensus feeding Classifiers"""

        learners = [AZorngCvSVM.CvSVMLearner(), AZorngCvANN.CvANNLearner(), AZorngRF.RFLearner()]
        classifiers = [l(self.irisData) for l in learners]

        classifier = AZorngConsensus.ConsensusClassifier(classifiers = classifiers)

        predictions = []
        for ex in self.irisData:
            predictions.append(classifier(ex))

        scratchdir = miscUtilities.createScratchDir(desc="ConsensusSaveLoadTest")
        classifier.write(os.path.join(scratchdir,"./CM.model"))

        predictionsL = []
        Loaded = AZorngConsensus.Consensusread(os.path.join(scratchdir,"./CM.model"))
        for ex in self.irisData:
            predictionsL.append(Loaded(ex))

        self.assertEqual(predictions,predictionsL)

        self.assertEqual(len(Loaded.domain),len(self.irisData.domain))
        self.assertEqual(len(Loaded.imputeData) , len(Loaded.domain))
        self.assertEqual(len(Loaded.basicStat), len(Loaded.domain))
        self.assertEqual(Loaded.NTrainEx, len(self.irisData))

        miscUtilities.removeDir(scratchdir)
Пример #4
0
    def test_CanPersistClassificationModelMajority(self):
        """Test the save/load for a classification model - Using Majority"""
        """ Arrange """
        learners = self.createTestLearners()
        learner = AZorngConsensus.ConsensusLearner(learners=learners)
        classifier = learner(self.getClassificationTrainingData())
        """ Act """
        predictions = []
        for ex in self.irisData:
            predictions.append(classifier(ex))

        scratchdir = miscUtilities.createScratchDir(
            desc="ConsensusSaveLoadTest")
        classifier.write(os.path.join(scratchdir, "./CM.model"))
        """ Assert """
        predictionsL = []
        Loaded = AZorngConsensus.Consensusread(
            os.path.join(scratchdir, "./CM.model"))
        self.assertEqual(len(Loaded.domain), len(self.irisData.domain))
        self.assertEqual(len(Loaded.imputeData), len(Loaded.domain))
        self.assertEqual(len(Loaded.basicStat), len(Loaded.domain))
        self.assertEqual(Loaded.NTrainEx, len(self.irisData))
        for ex in self.irisData:
            predictionsL.append(Loaded(ex))

        self.assertEqual(predictions, predictionsL)

        miscUtilities.removeDir(scratchdir)
Пример #5
0
    def test_saveloadClass2(self):
        """Test the save/load for a classification model - Using probabilities average"""
        learnersNames = ["RF","CvANN"]

        learner = AZorngConsensus.ConsensusLearner(learnersNames = learnersNames)
        classifier = learner(self.irisData)
        predictions = []
        for ex in self.irisData:
            predictions.append(classifier(ex))

        scratchdir = miscUtilities.createScratchDir(desc="ConsensusSaveLoadTest")
        classifier.write(os.path.join(scratchdir,"./CM.model"))

        predictionsL = []
        Loaded = AZorngConsensus.Consensusread(os.path.join(scratchdir,"./CM.model"))
        for ex in self.irisData:
            predictionsL.append(Loaded(ex))

        self.assertEqual(predictions,predictionsL)
        self.assertEqual(len(Loaded.domain),len(self.irisData.domain))
        self.assertEqual(len(Loaded.imputeData) , len(Loaded.domain))
        self.assertEqual(len(Loaded.basicStat), len(Loaded.domain))
        self.assertEqual(Loaded.NTrainEx, len(self.irisData))

        miscUtilities.removeDir(scratchdir)
Пример #6
0
    def test_saveloadReg(self):
        """Test the save/load for a regression model - Using average of N classifiers"""
        learnersNames = ["CvANN","CvSVM","RF"]

        learner = AZorngConsensus.ConsensusLearner(learnersNames = learnersNames)
        classifier = learner(self.DataReg)
        predictions = []
        for ex in self.DataReg:
            predictions.append(classifier(ex))

        scratchdir = miscUtilities.createScratchDir(desc="ConsensusSaveLoadTest")
        classifier.write(os.path.join(scratchdir,"./CM.model"))

        predictionsL = []
        Loaded = AZorngConsensus.Consensusread(os.path.join(scratchdir,"./CM.model"))
        for ex in self.DataReg:
            predictionsL.append(Loaded(ex))

        self.assertEqual([round(pred.value,4) for pred in predictions],[round(pred.value,4) for pred in predictionsL],"Loaded model predictions differ: Pred. 1 (saved/loaded):"+str(predictions[0])+" / "+str(predictionsL[0]))

        self.assertEqual(len(Loaded.domain),len(self.DataReg.domain))
        self.assertEqual(len(Loaded.imputeData) , len(Loaded.domain))
        self.assertEqual(len(Loaded.basicStat), len(Loaded.domain))
        self.assertEqual(Loaded.NTrainEx, len(self.DataReg)-66)

        miscUtilities.removeDir(scratchdir)
Пример #7
0
    def test_FeedLearnersReg(self):
        """Test the creation of Consensus feeding Learners for regression"""
        #The Learners can be individualy costumized before passing them to the Consensus
        learners = [AZorngCvSVM.CvSVMLearner(), AZorngCvANN.CvANNLearner(), AZorngRF.RFLearner()]

        #Passing now the learnersObj instead
        learner = AZorngConsensus.ConsensusLearner(learnersObj = learners)
        classifier = learner(self.DataReg)
        predictions = []
        for ex in self.DataReg:
            predictions.append(classifier(ex))

        scratchdir = miscUtilities.createScratchDir(desc="ConsensusSaveLoadTest")
        classifier.write(os.path.join(scratchdir,"./CM.model"))

        predictionsL = []
        Loaded = AZorngConsensus.Consensusread(os.path.join(scratchdir,"./CM.model"))
        for ex in self.DataReg:
            predictionsL.append(Loaded(ex))

        self.assertEqual([round(pred.value,4) for pred in predictions],[round(pred.value,4) for pred in predictionsL],"Loaded model predictions differ: Pred. 1 (saved/loaded):"+str(predictions[0])+" / "+str(predictionsL[0]))

        self.assertEqual(len(Loaded.domain),len(self.DataReg.domain))
        self.assertEqual(len(Loaded.imputeData) , len(Loaded.domain))
        self.assertEqual(len(Loaded.basicStat), len(Loaded.domain))
        self.assertEqual(Loaded.NTrainEx, len(self.DataReg))

        miscUtilities.removeDir(scratchdir)
Пример #8
0
    def test_CanPersistClassificationModelProbabilities(self):
        """Test the save/load for a classification model - Using probabilities average"""

        # Arrange
        learners = [AZorngRF.RFLearner(), AZorngCvANN.CvANNLearner()]
        learner = AZorngConsensus.ConsensusLearner(learners=learners)
        classifier = learner(self.irisData)

        # Act
        predictions = []
        for ex in self.irisData:
            predictions.append(classifier(ex))

        scratchdir = miscUtilities.createScratchDir(
            desc="ConsensusSaveLoadTest")
        print scratchdir
        classifier.write(os.path.join(scratchdir, "./CM.model"))

        # Assert
        predictionsL = []
        Loaded = AZorngConsensus.Consensusread(
            os.path.join(scratchdir, "./CM.model"))
        for ex in self.irisData:
            predictionsL.append(Loaded(ex))

        self.assertEqual(predictions, predictionsL)
        self.assertEqual(len(Loaded.domain), len(self.irisData.domain))
        self.assertEqual(len(Loaded.imputeData), len(Loaded.domain))
        self.assertEqual(len(Loaded.basicStat), len(Loaded.domain))
        self.assertEqual(Loaded.NTrainEx, len(self.irisData))

        miscUtilities.removeDir(scratchdir)
    def test_CanPersistClassificationModelMajority(self):
        """Test the save/load for a classification model - Using Majority"""

        """ Arrange """
        learners = self.createTestLearners()
        learner = AZorngConsensus.ConsensusLearner(learners = learners)
        classifier = learner(self.getClassificationTrainingData())

        """ Act """
        predictions = []
        for ex in self.irisData:
            predictions.append(classifier(ex))

        scratchdir = miscUtilities.createScratchDir(desc="ConsensusSaveLoadTest")
        classifier.write(os.path.join(scratchdir,"./CM.model"))

        """ Assert """
        predictionsL = []
        Loaded = AZorngConsensus.Consensusread(os.path.join(scratchdir,"./CM.model"))
        self.assertEqual(len(Loaded.domain),len(self.irisData.domain))
        self.assertEqual(len(Loaded.imputeData) , len(Loaded.domain))
        self.assertEqual(len(Loaded.basicStat), len(Loaded.domain)) 
        self.assertEqual(Loaded.NTrainEx, 0.8*len(self.irisData))
        for ex in self.irisData:
            predictionsL.append(Loaded(ex))

        self.assertEqual(predictions,predictionsL)

        miscUtilities.removeDir(scratchdir) 
    def test_CanPersistClassificationModelProbabilities(self):
        """Test the save/load for a classification model - Using probabilities average"""

        # Arrange
        learners = [AZorngRF.RFLearner(), AZorngCvANN.CvANNLearner()]
        learner = AZorngConsensus.ConsensusLearner(learners = learners)
        classifier = learner(self.irisData)

        # Act
        predictions = []
        for ex in self.irisData:
            predictions.append(classifier(ex))

        scratchdir = miscUtilities.createScratchDir(desc="ConsensusSaveLoadTest")
        classifier.write(os.path.join(scratchdir,"./CM.model"))

        # Assert
        predictionsL = []
        Loaded = AZorngConsensus.Consensusread(os.path.join(scratchdir,"./CM.model"))
        for ex in self.irisData:
            predictionsL.append(Loaded(ex))

        self.assertEqual(predictions,predictionsL)
        self.assertEqual(len(Loaded.domain),len(self.irisData.domain))
        self.assertEqual(len(Loaded.imputeData) , len(Loaded.domain))
        self.assertEqual(len(Loaded.basicStat), len(Loaded.domain))
        self.assertEqual(Loaded.NTrainEx, len(self.irisData) - int(0.2 * len(self.irisData)))

        miscUtilities.removeDir(scratchdir)
Пример #11
0
    def teste_FeelLearnersClass(self):
        """Test the creation of Consensus feeding Learners for classification"""
        #The Learners can be individualy costumized before passing them to the Consensus
        learners = [AZorngCvSVM.CvSVMLearner(), AZorngCvANN.CvANNLearner(), AZorngRF.RFLearner()]

        #Passing now the learnersObj instead
        learner = AZorngConsensus.ConsensusLearner(learnersObj = learners)
        classifier = learner(self.irisData)
        predictions = []
        for ex in self.irisData:
            predictions.append(classifier(ex))

        scratchdir = miscUtilities.createScratchDir(desc="ConsensusSaveLoadTest")
        classifier.write(os.path.join(scratchdir,"./CM.model"))

        predictionsL = []
        Loaded = AZorngConsensus.Consensusread(os.path.join(scratchdir,"./CM.model"))
        for ex in self.irisData:
            predictionsL.append(Loaded(ex))

        self.assertEqual(predictions,predictionsL)

        self.assertEqual(len(Loaded.domain),len(self.irisData.domain))
        self.assertEqual(len(Loaded.imputeData) , len(Loaded.domain))
        self.assertEqual(len(Loaded.basicStat), len(Loaded.domain))
        self.assertEqual(Loaded.NTrainEx, len(self.irisData))

        miscUtilities.removeDir(scratchdir)
Пример #12
0
    def test_FeedClassifiersReg(self):
        """Test the feeding of regression classifiers """
        #DataSet = dataUtilities.DataTable("/home/palmeida/dev/OpenAZOTesteInstall/tests/source/data/linearTrain.tab")
        DataSet = self.DataReg
        learners = [AZorngCvSVM.CvSVMLearner(), AZorngCvANN.CvANNLearner(), AZorngRF.RFLearner()]
        classifiers = [l(DataSet) for l in learners]

        classifier = AZorngConsensus.ConsensusClassifier(classifiers = classifiers)
        predictions = []
        for ex in DataSet:
            predictions.append(classifier(ex))

        scratchdir = miscUtilities.createScratchDir(desc="ConsensusSaveLoadTest")
        classifier.write(os.path.join(scratchdir,"./CM.model"))

        predictionsL = []
        Loaded = AZorngConsensus.Consensusread(os.path.join(scratchdir,"./CM.model"))
        for ex in DataSet:
            predictionsL.append(Loaded(ex))
        self.assertEqual([round(pred.value,4) for pred in predictions],[round(pred.value,4) for pred in predictionsL],"Loaded model predictions differ: Pred. 1 (saved/loaded):"+str(predictions[0])+" / "+str(predictionsL[0]))

        self.assertEqual(len(Loaded.domain),len(DataSet.domain))
        self.assertEqual(len(Loaded.imputeData) , len(Loaded.domain))
        self.assertEqual(len(Loaded.basicStat), len(Loaded.domain))
        self.assertEqual(Loaded.NTrainEx, len(DataSet))

        miscUtilities.removeDir(scratchdir)
Пример #13
0
        def TopVarImportanceTest(data, expectNone=False):
            resA = []
            resB = []
            learner = AZorngCvSVM.CvSVMLearner(
                gamma=1.0,
                svm_type=103,
                C=1,
                coef0=0,
                degree=3,
                epsR=0.001,
                kernel_type=2,
                nu=0.5,
                p=0.1,
                probability=0,
                shrinking=1,
            )
            CvSVM = learner(data)

            for ex in data:
                resA.append(CvSVM.getTopImportantVars(ex, 1))

            scratchdir = miscUtilities.createScratchDir(desc="TopVarImportanceTest")
            modelPath = os.path.join(scratchdir, "CvSVNModel")
            CvSVM.write(modelPath)
            LoadedCvSVM = AZorngCvSVM.CvSVMread(modelPath)
            miscUtilities.removeDir(scratchdir)

            for ex in data:
                resB.append(LoadedCvSVM.getTopImportantVars(ex, 1))
            if expectNone:
                return resA == resB == [None] * len(data)
            else:
                return resA == resB and None not in resA and resA.count(resA[0]) != len(resA)
Пример #14
0
    def testSVM_MPI_3(self):
        ###################################################################
        #       Test other way of setting appspack
        ###################################################################
        # Classification accuracy:
        ExpectedCA = 0.847 #orange1: 0.837619047619

        optimizer = paramOptUtilities.Appspack()

        learner = AZorngCvSVM.CvSVMLearner()
        learnerName = "CvSVMLearner"

        # Create an interface for setting optimizer parameters
        pars = AZLearnersParamsConfig.API(learnerName)

        # Set all parameters to not be optimized
        pars.setOptimizeAllParameters(False)

        parameterList = ["C", "gamma"]
        # Set the parameters in parameterList to be optimized
        for parameter in parameterList:
            pars.setParameter(parameter,"optimize",True)

        # Change the range
        pars.setParameter("C","range",miscUtilities.power2Range(-5,2,1))

        trainFile=self.discTrainDataPath

        # Create a directory for running the appspack (if not defined it will use the present working directory)
        runPath = miscUtilities.createScratchDir(desc="ParamOptTest_SVM_MPI_3")
        evalM = "AZutilities.evalUtilities.CA"
        fMin = False

        #[<minimum of objective function found>, <optimized parameters>]
        tunedPars = optimizer(learner=learner,\
                        dataSet=trainFile,\
                        evaluateMethod = evalM,\
                        findMin=fMin,\
                        runPath = runPath,\
                        useParameters = pars.getParametersDict(),\
                        verbose = 0,\
                        useStd = False,\
                        advancedMPIoptions = "-v -np 4",\
                        machinefile = ["localhost:2","localhost:2"])
        print "Returned: ", tunedPars
        print "====================== optimization Done ==========================="
        print "Learner optimized flag = ", learner.optimized
        print "Tuned parameters = ", tunedPars[1]
        print "Best optimization result = ", tunedPars[0]
        print "check the file intRes.txt to see the intermediate results of optimizer!"

        # Check if the MPI version was used
        self.assertEqual(learner.optimized,True)
        # Check if the MPI version was used
        self.assertEqual(optimizer.usedMPI, True)
        self.assertEqual(round(tunedPars[0],3),round(ExpectedCA,3))
        self.assert_(len(dataUtilities.DataTable(os.path.join(runPath,"optimizationLog.txt")))>=12) # (orig 14) Must be > 2
        #print runPath
        miscUtilities.removeDir(runPath)
Пример #15
0
 def test_getStatistics(self):
     runPath = miscUtilities.createScratchDir(desc="StatisitcsTest")
     statistics = competitiveWorkflow.getStatistics(self.Ctrain_data,
                                                    runPath,
                                                    None,
                                                    queueType="batch.q",
                                                    getAllModels=False)
     pprint(statistics)
     os.system("rm -rf " + runPath)
Пример #16
0
    def test_RFRegression(self):
        """RF - Test of optimizer with continuous class data        
        """
        #Create  the appspack instance
        opt = paramOptUtilities.Appspack()
        #Learner to be optimized
        learner = AZorngRF.RFLearner()
        #dataset to use in the parameters optimization (Discrete class in this example)
        dataSet = self.contTrainDataPath
        # Define the objective function. This requires:
        #    defining the extreme to find (the min or max): findMin=True or findMin=False
        fMin = True
        #    defining the method for evaluation (must be a method that accepts as input an orngTest.ExperimentResults):
        #       evaluateMethod="AZutilities.evalUtilities.R2"
        evalM = "AZutilities.evalUtilities.RMSE"

        # Create an interface for setting optimizer parameters
        pars = AZLearnersParamsConfig.API("RFLearner")
        # Set the parameters in parameterList to be optimized
        pars.setParameter("NumThreads", "optimize", False)
        # Change the default
        pars.setParameter("NumThreads", "default", "1")

        # Create a directory for running the appspack (if not defined it will use the present working directory)
        runPath = miscUtilities.createScratchDir(desc="ParamOptTest")

        # Run the appspack which will configure the input learner and aditionaly return
        #[<minimum of objective function found>, <optimized parameters>]
        tunedPars = opt(learner=learner,\
                        dataSet=dataSet,\
                        evaluateMethod = evalM,\
                        findMin=fMin,\
                        runPath = runPath,\
                        useStd = False,\
                        useParameters = pars.getParametersDict(),\
                        verbose = 0)
        print "Returned: ", tunedPars
        print "====================== optimization Done ==========================="
        print "Learner optimized flag = ", learner.optimized
        print "Tuned parameters = ", tunedPars[1]
        print "Best optimization result = ", tunedPars[0]
        print "check the file intRes.txt to see the intermediate results of optimizer!"

        self.assertEqual(opt.usedMPI, False)
        self.assertEqual(learner.optimized, True)
        self.assertEqual(round(tunedPars[0], 2), round(3.1499999999999999, 2))

        #The learner is now with its optimized parameters already set, so we can now make a classifier out of it
        classifier = learner(self.contTrain)
        RMSE = evalUtilities.getRMSE(self.contTest, classifier)
        self.assertEqual(round(RMSE, 2), round(2.02, 2))  #Ver 0.3

        #Check if the best result was not the one with numThreads different of 1 since that way we can get
        #different results among runs
        self.assertEqual(int(tunedPars[1]["NumThreads"]), 1)

        miscUtilities.removeDir(runPath)
Пример #17
0
    def test_RFRegression(self):
        """RF - Test of optimizer with continuous class data        
        """
        #Create  the appspack instance
        opt=paramOptUtilities.Appspack()
        #Learner to be optimized
        learner=AZorngRF.RFLearner()
        #dataset to use in the parameters optimization (Discrete class in this example)
        dataSet=self.contTrainDataPath
        # Define the objective function. This requires:
        #    defining the extreme to find (the min or max): findMin=True or findMin=False
        fMin=True
        #    defining the method for evaluation (must be a method that accepts as input an orngTest.ExperimentResults): 
        #       evaluateMethod="AZutilities.evalUtilities.R2"
        evalM="AZutilities.evalUtilities.RMSE"

        # Create an interface for setting optimizer parameters
        pars = AZLearnersParamsConfig.API("RFLearner")
        # Set the parameters in parameterList to be optimized
        pars.setParameter("NumThreads","optimize",False)
        # Change the default
        pars.setParameter("NumThreads","default","1")

        # Create a directory for running the appspack (if not defined it will use the present working directory)
        runPath = miscUtilities.createScratchDir(desc="ParamOptTest")

        # Run the appspack which will configure the input learner and aditionaly return 
        #[<minimum of objective function found>, <optimized parameters>]        
        tunedPars = opt(learner=learner,\
                        dataSet=dataSet,\
                        evaluateMethod = evalM,\
                        findMin=fMin,\
                        runPath = runPath,\
                        useStd = False,\
                        useParameters = pars.getParametersDict(),\
                        verbose = 0)
        print "Returned: ", tunedPars
        print "====================== optimization Done ==========================="
        print "Learner optimized flag = ", learner.optimized
        print "Tuned parameters = ", tunedPars[1]
        print "Best optimization result = ", tunedPars[0]
        print "check the file intRes.txt to see the intermediate results of optimizer!"

        self.assertEqual(opt.usedMPI,False)
        self.assertEqual(learner.optimized,True)
        self.assertEqual(round(tunedPars[0],2),round(3.1499999999999999,2))

        #The learner is now with its optimized parameters already set, so we can now make a classifier out of it
        classifier = learner(self.contTrain)
        RMSE = evalUtilities.getRMSE(self.contTest,classifier)
        self.assertEqual(round(RMSE,2),round(2.02,2)) #Ver 0.3

        #Check if the best result was not the one with numThreads different of 1 since that way we can get 
        #different results among runs
        self.assertEqual(int(tunedPars[1]["NumThreads"]),1)

        miscUtilities.removeDir(runPath)
Пример #18
0
    def test_RF_Classification(self):
        """PLS - Test of optimizer with discrete class data
        """
        expectedAcc = [
            0.57999999999999996, 0.58999999999999997, 0.612
        ]  #Ver 0.3 - Artifact: The second value can be expected on other Systems
        #Create  the appspack instance
        opt = paramOptUtilities.Appspack()
        #Learner to be optimized
        learner = AZorngRF.RFLearner()
        #dataset to use in the parameters optimization (Discrete class in this example)
        dataSet = self.discTrainDataPath
        # Define the objective function. This requires:
        #    defining the extreme to find (the min or max): findMin=True or findMin=False
        fMin = False
        #    defining the method for evaluation (must be a method that accepts as input an orngTest.ExperimentResults):
        #       evaluateMethod="AZutilities.evalUtilities.CA"
        evalM = "AZutilities.evalUtilities.CA"

        # Create a directory for running the appspack (if not defined it will use the present working directory)
        runPath = miscUtilities.createScratchDir(desc="ParamOptTest")

        # Run the appspack which will configure the input learner and aditionaly return
        #[<minimum of objective function found>, <optimized parameters>]
        tunedPars = opt(learner=learner,\
                        dataSet=dataSet,\
                        evaluateMethod = evalM,\
                        findMin=fMin,\
                        runPath = runPath,\
                        useStd = False,\
                        verbose = 0)
        print "Returned: ", tunedPars
        print "====================== optimization Done ==========================="
        print "Learner optimized flag = ", learner.optimized
        print "Tuned parameters = ", tunedPars[1]
        print "Best optimization result = ", tunedPars[0]
        print "check the file intRes.txt to see the intermediate results of optimizer!"
        self.assertEqual(opt.usedMPI, False)
        self.assertEqual(learner.optimized, True)
        self.assert_(
            round(tunedPars[0], 2)
            in [round(x, 2) for x in expectedAcc])  #Ver 0.3

        #The learner is now with its optimized parameters already set, so we can now make a classifier out of it
        classifier = learner(self.discTrain)
        CA = evalUtilities.getClassificationAccuracy(self.discTest, classifier)
        expectedCA = [0.9655
                      ]  # Artifact: Second value expected in UBUNTU 10.10
        self.assert_(round(CA, 2)
                     in [round(ca, 2) for ca in expectedCA])  # Ver 0.3

        miscUtilities.removeDir(runPath)
Пример #19
0
    def test_RF_Regression(self):
        """RF - Test of optimizer with continuous class data        
        """
        expectedRes = [
            3.27, 3.2599999999999998, 3.15
        ]  #Ver 0.3 - Artifact: The second value can be expected on other Systems
        #Create  the appspack instance
        opt = paramOptUtilities.Appspack()
        #Learner to be optimized
        learner = AZorngRF.RFLearner()
        #dataset to use in the parameters optimization
        dataSet = self.contTrainDataPath
        # Define the objective function. This requires:
        #    defining the extreme to find (the min or max): findMin=True or findMin=False
        fMin = True
        #    defining the method for evaluation (must be a method that accepts as input an orngTest.ExperimentResults):
        #       evaluateMethod="AZutilities.evalUtilities.R2"
        evalM = "AZutilities.evalUtilities.RMSE"

        # Create a directory for running the appspack (if not defined it will use the present working directory)
        runPath = miscUtilities.createScratchDir(desc="ParamOptTest")

        # Run the appspack which will configure the input learner and aditionaly return
        #[<minimum of objective function found>, <optimized parameters>]
        tunedPars = opt(learner=learner,\
                        dataSet=dataSet,\
                        evaluateMethod = evalM,\
                        findMin=fMin,\
                        runPath = runPath,\
                        useStd = False,\
                        verbose = 0)
        print "Returned: ", tunedPars
        print "====================== optimization Done ==========================="
        print "Learner optimized flag = ", learner.optimized
        print "Tuned parameters = ", tunedPars[1]
        print "Best optimization result = ", tunedPars[0]
        print "check the file intRes.txt to see the intermediate results of optimizer!"
        self.assertEqual(opt.usedMPI, False)
        self.assertEqual(learner.optimized, True)
        self.assert_(
            round(tunedPars[0], 2)
            in [round(x, 2) for x in expectedRes])  #Ver 0.3
        #The learner is now with its optimized parameters already set, so we can now make a classifier out of it
        classifier = learner(self.contTrain)
        RMSE = evalUtilities.getRMSE(self.contTest, classifier)
        expectedRes = [2.89, 2.0158]
        self.assert_(round(RMSE, 2)
                     in [round(x, 2) for x in expectedRes])  #Ver 0.3

        miscUtilities.removeDir(runPath)
Пример #20
0
def arrayJob(jobName = "AZOarray",jobNumber =1 ,jobParams = [], jobParamFile = "Params.pkl", jobQueue = "quick.q", jobScript = "", memSize = "150M"):   

        runPath = miscUtilities.createScratchDir(desc ="optQsub"+jobName, baseDir = AZOC.NFS_SCRATCHDIR)
        cwd = os.getcwd()
        os.chdir(runPath)

        paramFile = open(jobParamFile,"w")
        cPickle.dump(jobParams,paramFile)
        paramFile.close()
 
        jobFile = open(jobName + ".py","w")
        jobFile.write(jobScript)
        jobFile.close()

        cmd = "echo python " + os.path.join(runPath, str(jobName) + ".py") + \
              " | qsub -cwd -V -q " + str(jobQueue) + \
              " -p -800 -t 1-" + str(jobNumber) + \
              " -N " + str(jobName) + \
              " -S /bin/sh -sync yes" + \
              AZOC.SGE_QSUB_ARCH_OPTION_CURRENT + \
              " -l mf=" + str(memSize) # specify shell /bin/sh so not to get warning: no access to tty in output file.
        (status, output) = commands.getstatusoutput(cmd)

        # Check exit status of all our jobs
        if status != 0:
            print jobName + " failed! Code = " + str(status)
            print output
            raise ValueError
        for line in output.split("\n"):
          if not "exit code 0" in line:
            if not "Your job-array" in line:
                print jobName + " failed! " + line
                raise ValueError

        # Check if error files exist that are not empty.
        for part in sorted(glob(os.path.join(runPath,jobName+".e*"))):
            if os.path.getsize(part) != 0:
                print jobName + " failed! file " + str(part)
                raise ValueError

        # Build result list from pickle objects
        resList = []
        for part in sorted(glob(os.path.join(runPath,jobName+".o*"))):
            file = open(part,"r")
            resList.append(cPickle.load(file))
            file.close()
        
        os.chdir(cwd)
        miscUtilities.removeDir(runPath)
        return resList
def buildModel(trainData, MLMethod, queueType = "NoSGE", verbose = 0, logFile = None):
        """
        Buld the method passed in MLMethod and optimize ( "IndividualStatistics"  not in MLMethod)
        if MLMethod is a Consensus ("individualStatistics"  in MLMethod) , build each and optimize first all models and after build the consensus!
        """
        log(logFile, "Building and optimizing learner: "+MLMethod["MLMethod"]+"...")
        learners = {}
        MLMethods = {}
        if "IndividualStatistics"  in MLMethod:                        #It is a consensus
            for ML in MLMethod["IndividualStatistics"]:
                MLMethods[ML] = MLMethod["IndividualStatistics"][ML]
        else:
            MLMethods[MLMethod["MLMethod"]] = MLMethod

        # optimize all MLMethods
        for ML in MLMethods:
            log(logFile, "  Optimizing MLmethod: "+ML)
            learners[ML] = MLMETHODS[ML](name = ML)

            runPath = miscUtilities.createScratchDir(baseDir = AZOC.NFS_SCRATCHDIR, desc = "AutoQSAR")
            trainData.save(os.path.join(runPath,"trainData.tab"))

            paramOptUtilities.getOptParam(
                learner = learners[ML],
                trainDataFile = os.path.join(runPath,"trainData.tab"),
                useGrid = False,
                verbose = verbose,
                queueType = queueType,
                runPath = runPath,
                nExtFolds = None)

            if not learners[ML].optimized:
                print "ERROR: AutoQSAR: The learner was not optimized."
                return None
            else:
                print "Optimized learner ",learners[ML]           
            miscUtilities.removeDir(runPath)

        #Train the model
        if len(learners) == 1:
            log(logFile, "  Building the optimized learner:"+learners.keys()[0])
            model = learners[learners.keys()[0]](trainData)
        elif len(learners) >= 1:
            model = buildConsensus(trainData,learners,MLMethods)
        else:
            print "ERROR: No Learners were selected!"
            return None

        return model
Пример #22
0
    def test_SaveLoadCustomRegressionExpression(self):
        """ Test save/load custom expression using average N regression with object map """
        # Arrange
        learners = {
            'firstLearner': AZorngCvSVM.CvSVMLearner(),
            'secondLearner': AZorngCvANN.CvANNLearner(),
            'thirdLearner': AZorngRF.RFLearner()
        }

        # Construct expression learner/classifier
        regressionExpression = "(firstLearner + secondLearner + thirdLearner) / 3"
        expressionLearner = AZorngConsensus.ConsensusLearner(
            learners=learners, expression=regressionExpression)
        expressionClassifier = expressionLearner(self.DataReg)

        # Construct default learner/classifier
        result = []
        for ex in self.DataReg:
            result.append(expressionClassifier(ex))

        # Act
        scratchdir = miscUtilities.createScratchDir(
            desc="ConsensusSaveLoadTest")
        expressionClassifier.write(os.path.join(scratchdir, "./CM.model"))

        resultLoaded = []
        loaded = AZorngConsensus.Consensusread(
            os.path.join(scratchdir, "./CM.model"))
        self.assertNotEqual(loaded, None)
        for ex in self.DataReg:
            resultLoaded.append(loaded(ex))

        # Assert
        for index, item in enumerate(result):
            if not float_compare(result[index].value,
                                 resultLoaded[index].value):
                print "Not equal on index: ", index
            self.assertEqual(
                float_compare(result[index].value, resultLoaded[index].value),
                True)

        self.assertEqual(len(loaded.domain), len(self.DataReg.domain))
        self.assertEqual(len(loaded.imputeData), len(loaded.domain))
        self.assertEqual(len(loaded.basicStat), len(loaded.domain))
        self.assertEqual(loaded.NTrainEx, len(self.DataReg))

        miscUtilities.removeDir(scratchdir)
Пример #23
0
    def test_PLS_Classification(self):
        """PLS - Test of optimizer with discrete class data
        """
        expectedAcc = [0.57999999999999996, 0.58999999999999997] #Ver 0.3 - Artifact: The second value can be expected on other Systems
        #Create  the appspack instance
        opt=paramOptUtilities.Appspack()
        #Learner to be optimized
        learner=AZorngPLS.PLSLearner()
        #dataset to use in the parameters optimization (Discrete class in this example)
        dataSet=self.discTrainDataPath
        # Define the objective function. This requires:
        #    defining the extreme to find (the min or max): findMin=True or findMin=False
        fMin=False
        #    defining the method for evaluation (must be a method that accepts as input an orngTest.ExperimentResults): 
        #       evaluateMethod="AZutilities.evalUtilities.CA"
        evalM="AZutilities.evalUtilities.CA"

        # Create a directory for running the appspack (if not defined it will use the present working directory)
        runPath = miscUtilities.createScratchDir(desc="ParamOptTest")

        # Run the appspack which will configure the input learner and aditionaly return 
        #[<minimum of objective function found>, <optimized parameters>]
        tunedPars = opt(learner=learner,\
                        dataSet=dataSet,\
                        evaluateMethod = evalM,\
                        findMin=fMin,\
                        runPath = runPath,\
                        useStd = False,\
                        verbose = 0)
        print "Returned: ", tunedPars
        print "====================== optimization Done ==========================="
        print "Learner optimized flag = ", learner.optimized
        print "Tuned parameters = ", tunedPars[1]
        print "Best optimization result = ", tunedPars[0]
        print "check the file intRes.txt to see the intermediate results of optimizer!"
        self.assertEqual(opt.usedMPI,False)
        self.assertEqual(learner.optimized,True)
        self.assert_(round(tunedPars[0],2) in [round(x,2) for x in expectedAcc]) #Ver 0.3


        #The learner is now with its optimized parameters already set, so we can now make a classifier out of it
        classifier = learner(self.discTrain)
        CA = evalUtilities.getClassificationAccuracy(self.discTest,classifier)
        expectedCA = [0.58999999999999997,2 ,0.57999999999999996] # Artifact: Second value expected in UBUNTU 10.10
        self.assert_(round(CA,2) in [round(ca,2) for ca in expectedCA]) # Ver 0.3

        miscUtilities.removeDir(runPath)
Пример #24
0
    def __init__(self,name = "AZOrangeJob", queue = "batch.q", priority = None, range = None, wd = "temporary", env = "full", shell = "/bin/sh", sync = "yes", resources = None, body = "", params = None , action = None, pyFile = None , hold = None):

        qsub = "#!" + shell                     + "\n\n"
        qsub = qsub + "#$ -N " + name           + "\n"
        self.name = name
        qsub = qsub + "#$ -q " + queue          + "\n"
        self.queue = queue
        self.action = action
        if priority:
            qsub = qsub + "#$ -p " + str(priority)      + "\n"
            self.priority = priority
        if wd == "temporary":
            qsub = qsub + "#$ -cwd"             + "\n"
            self.wd = miscUtilities.createScratchDir(desc ="optQsub"+name, baseDir = AZOC.NFS_SCRATCHDIR)
        else:
            qsub = qsub + "#$ -wd " + wd        + "\n"
            self.wd = wd
        if env == "full":
            qsub = qsub + "#$ -V"        + "\n"
        if sync == "yes":
            qsub = qsub + "#$ -sync yes"        + "\n"
            self.sync = True
        qsub = qsub + "#$ -S " + shell          + "\n"
        self.shell = shell   
        self.range = range
        qsub = qsub + "#$ " + AZOC.SGE_QSUB_ARCH_OPTION_CURRENT + "\n"
        if range:
            qsub = qsub + "#$ -t 1-" + str(range)      + "\n"
        if resources:
            qsub = qsub + "#$ -l " + resources  + "\n"
            self.resources = resources
        if hold:
            qsub = qsub + "#$ -hold_jid " + str(hold)   + "\n"
        if body:
            qsub = qsub + "\n" + body + "\n"
        else:
            qsub = qsub + "\npython " + self.name + ".py\n"
        self.qsubFile = os.path.join(self.wd,name) + ".sh"
        self.qsubScript = qsub
        if pyFile:
            self.textFile(os.path.join(self.wd,name) + ".py",pyFile)
        self.textFile(self.qsubFile,qsub)
        if params:
            paramFile = open(os.path.join(self.wd,name) + ".params","w")
            cPickle.dump(params,paramFile)
            paramFile.close()
Пример #25
0
    def test_SaveLoadCustomLogicalExpression(self):
        """ Test save/load functionality with a custom logical expression """
        # Arrange

        # Construct expression learner/classifier
        learners = {
            'firstLearner': AZorngCvSVM.CvSVMLearner(),
            'secondLearner': AZorngCvANN.CvANNLearner(),
            'thirdLearner': AZorngRF.RFLearner()
        }
        discreteExpression = [
            "firstLearner == Iris-setosa -> Iris-setosa", "-> Iris-virginica"
        ]
        discreteLearner = AZorngConsensus.ConsensusLearner(
            learners=learners, expression=discreteExpression)
        discreteClassifier = discreteLearner(self.irisData)

        result = []
        for ex in self.irisData:
            result.append(discreteClassifier(ex))

        # Act
        scratchdir = miscUtilities.createScratchDir(
            desc="ConsensusSaveLoadTest")
        discreteClassifier.write(os.path.join(scratchdir, "./CM.model"))

        resultLoaded = []
        loaded = AZorngConsensus.Consensusread(
            os.path.join(scratchdir, "./CM.model"))
        self.assertNotEqual(loaded, None)
        for ex in self.irisData:
            resultLoaded.append(loaded(ex))

        # Assert
        for index, item in enumerate(result):
            if not result[index].value == resultLoaded[index].value:
                print "Not equal on index: ", index
            self.assertEqual(result[index].value, resultLoaded[index].value)

        self.assertEqual(len(loaded.domain), len(self.irisData.domain))
        self.assertEqual(len(loaded.imputeData), len(loaded.domain))
        self.assertEqual(len(loaded.basicStat), len(loaded.domain))
        self.assertEqual(loaded.NTrainEx, len(self.irisData))

        miscUtilities.removeDir(scratchdir)
Пример #26
0
        def TopVarImportanceTest(data, expectNone = False):
            resA = []
            resB = []
            CvBoost = AZorngCvBoost.CvBoostLearner(data)

            for ex in data:
                resA.append(CvBoost.getTopImportantVars(ex,1))

            scratchdir = miscUtilities.createScratchDir(desc="TopVarImportanceTest")
            modelPath = os.path.join(scratchdir,"CvBoostModel")
            CvBoost.write(modelPath)
            LoadedCvBoost = AZorngCvBoost.CvBoostread(modelPath)
            miscUtilities.removeDir(scratchdir) 
            for ex in data:
                resB.append(LoadedCvBoost.getTopImportantVars(ex,1))
            if expectNone:
               return resA == resB == [None]*len(data)
            else:
                return resA == resB and None not in resA and resA.count(resA[0]) != len(resA)
Пример #27
0
    def test_PLS_Regression(self):
        """PLS - Test of optimizer with continuous class data        
        """        
        #Create  the appspack instance
        opt=paramOptUtilities.Appspack()
        #Learner to be optimized
        learner=AZorngPLS.PLSLearner()
        #dataset to use in the parameters optimization 
        dataSet=self.contTrainDataPath
        # Define the objective function. This requires:
        #    defining the extreme to find (the min or max): findMin=True or findMin=False
        fMin=True
        #    defining the method for evaluation (must be a method that accepts as input an orngTest.ExperimentResults): 
        #       evaluateMethod="AZutilities.evalUtilities.R2"
        evalM="AZutilities.evalUtilities.RMSE"
        
        # Create a directory for running the appspack (if not defined it will use the present working directory)
        runPath = miscUtilities.createScratchDir(desc="ParamOptTest")
        
        # Run the appspack which will configure the input learner and aditionaly return 
        #[<minimum of objective function found>, <optimized parameters>]        
        tunedPars = opt(learner=learner,\
                        dataSet=dataSet,\
                        evaluateMethod = evalM,\
                        findMin=fMin,\
                        runPath = runPath,\
                        useStd = False,\
                        verbose = 0)
        print "Returned: ", tunedPars
        print "====================== optimization Done ==========================="
        print "Learner optimized flag = ", learner.optimized
        print "Tuned parameters = ", tunedPars[1]
        print "Best optimization result = ", tunedPars[0]
        print "check the file intRes.txt to see the intermediate results of optimizer!"
        self.assertEqual(opt.usedMPI,False)
        self.assertEqual(learner.optimized,True)
        self.assertEqual(round(tunedPars[0],2),round(0.858060000000,2))
        #The learner is now with its optimized parameters already set, so we can now make a classifier out of it
        classifier = learner(self.contTrain)
        RMSE = evalUtilities.getRMSE(self.contTest,classifier)
        self.assertEqual(round(RMSE,2),round(0.656979500000,2))

        miscUtilities.removeDir(runPath)
        def TopVarImportanceTest(data, expectNone = False):
            resA = []
            resB = []
            CvANN = AZorngCvANN.CvANNLearner(data, stopUPs=0)

            for ex in data:
                resA.append(CvANN.getTopImportantVars(ex,1))

            scratchdir = miscUtilities.createScratchDir(desc="TopVarImportanceTest")
            modelPath = os.path.join(scratchdir,"CvANNModel")
            CvANN.write(modelPath)
            LoadedCvANN = AZorngCvANN.CvANNread(modelPath)
            miscUtilities.removeDir(scratchdir) 
            for ex in data:
                resB.append(LoadedCvANN.getTopImportantVars(ex,1))
            if expectNone:
               return resA == resB == [None]*len(data)
            else:
                return resA == resB and None not in resA and resA.count(resA[0]) != len(resA)
Пример #29
0
def competitiveWorkflow(data, modelSavePath = None, statisticsSavePath = None, runningDir = AZOC.NFS_SCRATCHDIR, queueType = "NoSGE", callBack = None):
    """
        modelSavePath and statisticsSavePath are going to be created and cannot exist
    """
    if (modelSavePath and os.path.exists(modelSavePath)) or (statisticsSavePath and os.path.exists(statisticsSavePath)):
        print "ERROR: modelSavePath or statisticsSavePath already exists."
        return {}
    runPath = miscUtilities.createScratchDir(baseDir = os.path.realpath(runningDir), desc = "competitiveWorkflow")
    statistics = getStatistics(data, runPath, os.path.join(runPath,"statistics.pkl"), queueType = queueType, getAllModels = False, callBack = callBack)
    model = getModel(data, savePath = os.path.join(runPath,"modelStat.pkl"), queueType = queueType, callBack = callBack)
    if model and len(model)>=1:
        if modelSavePath:
            model[model.keys()[0]].write(modelSavePath)
    else:
        print "ERROR: No model was returned!"
    if statistics:
        if statisticsSavePath:
            writeResults(statistics, statisticsSavePath)
    else:
        print "ERROR: No statistics were returned!"
    return {"model":model, "statistics":statistics} 
Пример #30
0
    def test_saveloadReg(self):
        """Test the save/load for a regression model - Using average of N classifiers"""
        learnersNames = ["CvANN","CvSVM","RF"]

        learner = AZorngConsensus.ConsensusLearner(learnersNames = learnersNames)
        classifier = learner(self.DataSol)
        predictions = []
        for ex in self.DataSol:
            predictions.append(classifier(ex))

        scratchdir = miscUtilities.createScratchDir(desc="ConsensusSaveLoadTest")
        classifier.write(os.path.join(scratchdir,"./CM.model"))

        predictionsL = []
        Loaded = AZorngConsensus.Consensusread(os.path.join(scratchdir,"./CM.model"))
        for ex in self.DataSol:
            predictionsL.append(Loaded(ex))

        self.assertEqual(predictions,predictionsL)

        miscUtilities.removeDir(scratchdir)
Пример #31
0
    def testTopRankedNoGrid(self):
        """
        Test the TopRanked method in getBestModel with a test set and without grid computing. 
        """
        # Fix input arg
        resultsDir = miscUtilities.createScratchDir(desc="GetBestModelTest") 
        descList = [5, 10]
        grid = False
        batchQueue = False
        optParam = True

        # Method to test
        getBestModel.getBestModelTopRank(self.trainPath, self.testPath, resultsDir, descList, grid, optParam, batchQueue)

        # Assert the existens of a results file
        resultsFile = resultsDir+"/batchResults.tex"
        self.assert_(os.path.exists(resultsFile), "No results file created with getBestModelTopRank")
            
        resultsFile = resultsDir+"/batchResults.pdf"
        self.assert_(os.path.exists(resultsFile), "No pdf file created with getBestModelTopRank")
        miscUtilities.removeDir(resultsDir)
Пример #32
0
    def test_CanPersistRegressionModelUsingClassifiers(self):
        """Test the save/load for a regression model - Using average of N classifiers"""

        # Arrange
        learners = [
            AZorngRF.RFLearner(),
            AZorngCvSVM.CvSVMLearner(),
            AZorngCvANN.CvANNLearner()
        ]
        learner = AZorngConsensus.ConsensusLearner(learners=learners)
        classifier = learner(self.DataReg)

        # Act
        predictions = []
        for ex in self.DataReg:
            predictions.append(classifier(ex))

        scratchdir = miscUtilities.createScratchDir(
            desc="ConsensusSaveLoadTest")
        classifier.write(os.path.join(scratchdir, "./CM.model"))

        # Assert
        predictionsL = []
        Loaded = AZorngConsensus.Consensusread(
            os.path.join(scratchdir, "./CM.model"))
        for ex in self.DataReg:
            predictionsL.append(Loaded(ex))

        self.assertEqual(
            [round(pred.value, 4) for pred in predictions],
            [round(pred.value, 4) for pred in predictionsL],
            "Loaded model predictions differ: Pred. 1 (saved/loaded):" +
            str(predictions[0]) + " / " + str(predictionsL[0]))

        self.assertEqual(len(Loaded.domain), len(self.DataReg.domain))
        self.assertEqual(len(Loaded.imputeData), len(Loaded.domain))
        self.assertEqual(len(Loaded.basicStat), len(Loaded.domain))
        self.assertEqual(Loaded.NTrainEx, len(self.DataReg))

        miscUtilities.removeDir(scratchdir)
Пример #33
0
def competitiveWorkflow(
        data,
        mlList=[ml for ml in MLMETHODS if AZOC.MLMETHODS[ml]["useByDefault"]],
        modelSavePath=None,
        statisticsSavePath=None,
        runningDir=AZOC.NFS_SCRATCHDIR,
        queueType="NoSGE",
        callBack=None):
    """
        modelSavePath and statisticsSavePath are going to be created and cannot exist
    """
    if (modelSavePath and os.path.exists(modelSavePath)) or (
            statisticsSavePath and os.path.exists(statisticsSavePath)):
        print "ERROR: modelSavePath or statisticsSavePath already exists."
        return {}
    runPath = miscUtilities.createScratchDir(
        baseDir=os.path.realpath(runningDir), desc="competitiveWorkflow")
    statistics = getStatistics(data,
                               runPath,
                               os.path.join(runPath, "statistics.pkl"),
                               mlList,
                               queueType=queueType,
                               getAllModels=False,
                               callBack=callBack)
    model = getModel(data,
                     mlList,
                     savePath=os.path.join(runPath, "modelStat.pkl"),
                     queueType=queueType,
                     callBack=callBack)
    if model and len(model) >= 1:
        if modelSavePath:
            model[model.keys()[0]].write(modelSavePath)
    else:
        print "ERROR: No model was returned!"
    if statistics:
        if statisticsSavePath:
            writeResults(statistics, statisticsSavePath)
    else:
        print "ERROR: No statistics were returned!"
    return {"model": model, "statistics": statistics}
    def test_SaveLoadCustomRegressionExpression(self):
        """ Test save/load custom expression using average N regression with object map """
        # Arrange
        learners = {'firstLearner':AZorngCvSVM.CvSVMLearner(),
                    'secondLearner':AZorngCvANN.CvANNLearner(),
                    'thirdLearner':AZorngRF.RFLearner()}
        
        # Construct expression learner/classifier
        regressionExpression = "(firstLearner + secondLearner + thirdLearner) / 3"
        expressionLearner = AZorngConsensus.ConsensusLearner(learners = learners, expression = regressionExpression)
        expressionClassifier = expressionLearner(self.DataReg)

        # Construct default learner/classifier
        result = []
        for ex in self.DataReg:
            result.append(expressionClassifier(ex))

        # Act
        scratchdir = miscUtilities.createScratchDir(desc="ConsensusSaveLoadTest")
        expressionClassifier.write(os.path.join(scratchdir,"./CM.model"))

        resultLoaded = []
        loaded = AZorngConsensus.Consensusread(os.path.join(scratchdir,"./CM.model"))
        self.assertNotEqual(loaded, None)
        for ex in self.DataReg:
            resultLoaded.append(loaded(ex))

        # Assert
        for index, item in enumerate(result):
            if not float_compare(result[index].value, resultLoaded[index].value):
                print "Not equal on index: ", index
            self.assertEqual(float_compare(result[index].value, resultLoaded[index].value), True)

        self.assertEqual(len(loaded.domain),len(self.DataReg.domain))
        self.assertEqual(len(loaded.imputeData) , len(loaded.domain))
        self.assertEqual(len(loaded.basicStat), len(loaded.domain))
        self.assertEqual(loaded.NTrainEx, len(self.DataReg))

        miscUtilities.removeDir(scratchdir)
Пример #35
0
    def testDescSetNoGrid(self):
        """
        Test the descSet method in getBestModel without a test set and without grid computing. 
        """
        # Fix input arg
        resultsDir = miscUtilities.createScratchDir(desc="GetBestModelTest")  
        descList = ["AZ_descriptors"]#, "SELMA"]
        grid = False
        batchQueue = False
        optParam = False

        # Method to test
        getBestModel.getBestModelDescSet(self.trainPath2, "noTest", resultsDir, descList, grid, optParam, batchQueue)

        # Assert the existens of a results file
        resultsFile = resultsDir+"/batchResults.tex"
        self.assert_(os.path.exists(resultsFile), "No results file created with getBestModelDescSet")
            
        resultsFile = resultsDir+"/batchResults.pdf"
        self.assert_(os.path.exists(resultsFile), "No pdf file created with getBestModelDescSet")
        
        miscUtilities.removeDir(resultsDir)
    def test_SaveLoadCustomLogicalExpression(self):
        """ Test save/load functionality with a custom logical expression """
        # Arrange
        
        # Construct expression learner/classifier
        learners = {'firstLearner':AZorngCvSVM.CvSVMLearner(),
                    'secondLearner':AZorngCvANN.CvANNLearner(),
                    'thirdLearner':AZorngRF.RFLearner()}
        discreteExpression = ["firstLearner == Iris-setosa -> Iris-setosa", "-> Iris-virginica"]
        discreteLearner = AZorngConsensus.ConsensusLearner(learners = learners, expression = discreteExpression)
        discreteClassifier = discreteLearner(self.irisData)

        result = []
        for ex in self.irisData:
            result.append(discreteClassifier(ex))
        
        # Act
        scratchdir = miscUtilities.createScratchDir(desc="ConsensusSaveLoadTest")
        discreteClassifier.write(os.path.join(scratchdir,"./CM.model"))

        resultLoaded = []
        loaded = AZorngConsensus.Consensusread(os.path.join(scratchdir,"./CM.model"))
        self.assertNotEqual(loaded, None)
        for ex in self.irisData:
            resultLoaded.append(loaded(ex))

        # Assert
        for index, item in enumerate(result):
            if not result[index].value == resultLoaded[index].value:
                print "Not equal on index: ", index
            self.assertEqual(result[index].value, resultLoaded[index].value)

        self.assertEqual(len(loaded.domain),len(self.irisData.domain))
        self.assertEqual(len(loaded.imputeData) , len(loaded.domain))
        self.assertEqual(len(loaded.basicStat), len(loaded.domain))
        self.assertEqual(loaded.NTrainEx, len(self.irisData))

        miscUtilities.removeDir(scratchdir)
Пример #37
0
    def test_FeedLearnersReg(self):
        """Test the creation of Consensus feeding Learners for regression"""
        #The Learners can be individualy costumized before passing them to the Consensus
        learners = [AZorngCvSVM.CvSVMLearner(), AZorngCvANN.CvANNLearner(), AZorngRF.RFLearner()]

        #Passing now the learnersObj instead
        learner = AZorngConsensus.ConsensusLearner(learnersObj = learners)
        classifier = learner(self.DataSol)
        predictions = []
        for ex in self.DataSol:
            predictions.append(classifier(ex))

        scratchdir = miscUtilities.createScratchDir(desc="ConsensusSaveLoadTest")
        classifier.write(os.path.join(scratchdir,"./CM.model"))

        predictionsL = []
        Loaded = AZorngConsensus.Consensusread(os.path.join(scratchdir,"./CM.model"))
        for ex in self.DataSol:
            predictionsL.append(Loaded(ex))

        self.assertEqual(predictions,predictionsL)

        miscUtilities.removeDir(scratchdir)
Пример #38
0
    def test_FeedClassifiersReg(self):
        """Test the feeding of regression classifiers """
        DataSet = dataUtilities.DataTable(os.path.join(AZOC.AZORANGEHOME,"tests/source/data/dummy.tab"))
        #DataSet = self.DataSol
        learners = [AZorngCvSVM.CvSVMLearner(), AZorngCvANN.CvANNLearner(), AZorngRF.RFLearner()]
        classifiers = [l(DataSet) for l in learners]

        classifier = AZorngConsensus.ConsensusClassifier(classifiers = classifiers)
        predictions = []
        for ex in DataSet:
            predictions.append(classifier(ex))

        scratchdir = miscUtilities.createScratchDir(desc="ConsensusSaveLoadTest")
        classifier.write(os.path.join(scratchdir,"./CM.model"))

        predictionsL = []
        Loaded = AZorngConsensus.Consensusread(os.path.join(scratchdir,"./CM.model"))
        for ex in DataSet:
            predictionsL.append(Loaded(ex))

        self.assertEqual(predictions,predictionsL)

        miscUtilities.removeDir(scratchdir)
Пример #39
0
    def test_RFClassification(self):
        """RF - Test of optimizer with discrete class data
        """
        #Create  the appspack instance
        opt = paramOptUtilities.Appspack()
        #Learner to be optimized
        learner = AZorngRF.RFLearner()
        #dataset to use in the parameters optimization (Discrete class in this example)
        dataSet = self.discTrainDataPath
        # Define the objective function. This requires:
        #    defining the extreme to find (the min or max): findMin=True or findMin=False
        fMin = False
        #    defining the method for evaluation (must be a method that accepts as input an orngTest.ExperimentResults):
        #       evaluateMethod="AZutilities.evalUtilities.CA"
        evalM = "AZutilities.evalUtilities.CA"

        # Create a directory for running the appspack (if not defined it will use the present working directory)
        runPath = miscUtilities.createScratchDir(desc="RFTest")

        # Create an interface for setting optimizer parameters
        pars = AZLearnersParamsConfig.API("RFLearner")
        # Set the parameters in parameterList to be optimized
        pars.setParameter("NumThreads", "optimize", False)
        # Change the default
        pars.setParameter("NumThreads", "default", "1")

        # Run the appspack which will configure the input learner and aditionaly return
        #[<minimum of objective function found>, <optimized parameters>]
        tunedPars = opt(learner=learner,\
                        dataSet=dataSet,\
                        evaluateMethod = evalM,\
                        findMin=fMin,\
                        runPath = runPath,\
                        useDefaultPoint = False,\
                        useStd = False,\
                        useParameters = pars.getParametersDict(),\
                        verbose = 0)
        print "Returned: ", tunedPars
        print "====================== optimization Done ==========================="
        print "Learner optimized flag = ", learner.optimized
        print "Tuned parameters = ", tunedPars[1]
        print "Best optimization result = ", tunedPars[0]
        print "check the file intRes.txt to see the intermediate results of optimizer!"
        print "Number of optimization steps: ", len(
            dataUtilities.DataTable(
                os.path.join(runPath, "optimizationLog.txt")))
        print "Number of Threads used: ", learner.NumThreads
        #The learner is now with its optimized parameters already set, so we can now make a classifier out of it
        learner.NumThreads = 1
        classifier = learner(self.discTrain)
        CA = evalUtilities.getClassificationAccuracy(self.discTest, classifier)
        print "CA of optimized Learner: ", CA

        self.assertEqual(opt.usedMPI, False)

        self.log.info("")
        self.log.info("tunedPars[0]=" + str(tunedPars[0]))

        self.assertEqual(learner.optimized, True)
        self.assertEqual(round(tunedPars[0], 2), round(0.61, 2))  # Ver 0.3:390

        self.log.info("CA=" + str(CA))
        self.assertEqual(round(CA, 2), round(0.965517241379, 2))  #Ver 0.3
        #Check if the best result was not the one with numThreads different of 1 since that way we can get
        #different results among runs
        self.assertEqual(int(tunedPars[1]["NumThreads"]), 1)

        miscUtilities.removeDir(runPath)
Пример #40
0
    def getAcc(self, callBack=None, callBackWithFoldModel=None):
        """ For regression problems, it returns the RMSE and the Q2 
            For Classification problems, it returns CA and the ConfMat
            The return is made in a Dict: {"RMSE":0.2,"Q2":0.1,"CA":0.98,"CM":[[TP, FP],[FN,TN]]}
            For the EvalResults not supported for a specific learner/datase, the respective result will be None

            if the learner is a dict {"LearnerName":learner, ...} the results will be a dict with results for all Learners and for a consensus
                made out of those that were stable

            It some error occurred, the respective values in the Dict will be None
        """
        self.__log("Starting Calculating MLStatistics")
        statistics = {}
        if not self.__areInputsOK():
            return None
        # Set the response type
        self.responseType = self.data.domain.classVar.varType == orange.VarTypes.Discrete and "Classification" or "Regression"
        self.__log("  " + str(self.responseType))

        #Create the Train and test sets
        if self.usePreDefFolds:
            DataIdxs = self.preDefIndices
        else:
            DataIdxs = self.sampler(self.data, self.nExtFolds)
        foldsN = [f for f in dict.fromkeys(DataIdxs) if f != 0
                  ]  #Folds used only from 1 on ... 0 are for fixed train Bias
        nFolds = len(foldsN)
        #Fix the Indexes based on DataIdxs
        # (0s) represents the train set  ( >= 1s) represents the test set folds
        if self.useVarCtrlCV:
            nShifted = [0] * nFolds
            for idx, isTest in enumerate(
                    self.preDefIndices
            ):  # self.preDefIndices == 0 are to be used in TrainBias
                if not isTest:
                    if DataIdxs[idx]:
                        nShifted[DataIdxs[idx]] += 1
                        DataIdxs[idx] = 0
            for idx, shift in enumerate(nShifted):
                self.__log("In fold " + str(idx) + ", " + str(shift) +
                           " examples were shifted to the train set.")

        #Var for saving each Fols result
        optAcc = {}
        results = {}
        exp_pred = {}
        nTrainEx = {}
        nTestEx = {}

        #Set a dict of learners
        MLmethods = {}
        if type(self.learner) == dict:
            for ml in self.learner:
                MLmethods[ml] = self.learner[ml]
        else:
            MLmethods[self.learner.name] = self.learner

        models = {}
        self.__log("Calculating Statistics for MLmethods:")
        self.__log("  " + str([x for x in MLmethods]))

        #Check data in advance so that, by chance, it will not faill at the last fold!
        for foldN in foldsN:
            trainData = self.data.select(DataIdxs, foldN, negate=1)
            self.__checkTrainData(trainData)

        #Optional!!
        # Order Learners so that PLS is the first
        sortedML = [ml for ml in MLmethods]
        if "PLS" in sortedML:
            sortedML.remove("PLS")
            sortedML.insert(0, "PLS")

        stepsDone = 0
        nTotalSteps = len(sortedML) * self.nExtFolds
        for ml in sortedML:
            startTime = time.time()
            self.__log("    > " + str(ml) + "...")
            try:
                #Var for saving each Fols result
                results[ml] = []
                exp_pred[ml] = []
                models[ml] = []
                nTrainEx[ml] = []
                nTestEx[ml] = []
                optAcc[ml] = []
                logTxt = ""
                for foldN in foldsN:
                    if type(self.learner) == dict:
                        self.paramList = None

                    trainData = self.data.select(DataIdxs, foldN, negate=1)
                    testData = self.data.select(DataIdxs, foldN)
                    smilesAttr = dataUtilities.getSMILESAttr(trainData)
                    if smilesAttr:
                        self.__log("Found SMILES attribute:" + smilesAttr)
                        if MLmethods[ml].specialType == 1:
                            trainData = dataUtilities.attributeSelectionData(
                                trainData,
                                [smilesAttr, trainData.domain.classVar.name])
                            testData = dataUtilities.attributeSelectionData(
                                testData,
                                [smilesAttr, testData.domain.classVar.name])
                            self.__log(
                                "Selected attrs: " +
                                str([attr.name for attr in trainData.domain]))
                        else:
                            trainData = dataUtilities.attributeDeselectionData(
                                trainData, [smilesAttr])
                            testData = dataUtilities.attributeDeselectionData(
                                testData, [smilesAttr])
                            self.__log("Selected attrs: " + str(
                                [attr.name for attr in trainData.domain[0:3]] +
                                ["..."] + [
                                    attr.name for attr in trainData.
                                    domain[len(trainData.domain) - 3:]
                                ]))

                    nTrainEx[ml].append(len(trainData))
                    nTestEx[ml].append(len(testData))
                    #Test if trainsets inside optimizer will respect dataSize criterias.
                    #  if not, don't optimize, but still train the model
                    dontOptimize = False
                    if self.responseType != "Classification" and (
                            len(trainData) *
                        (1 - 1.0 / self.nInnerFolds) < 20):
                        dontOptimize = True
                    else:
                        tmpDataIdxs = self.sampler(trainData, self.nInnerFolds)
                        tmpTrainData = trainData.select(tmpDataIdxs,
                                                        1,
                                                        negate=1)
                        if not self.__checkTrainData(tmpTrainData, False):
                            dontOptimize = True

                    SpecialModel = None
                    if dontOptimize:
                        logTxt += "       Fold " + str(
                            foldN
                        ) + ": Too few compounds to optimize model hyper-parameters\n"
                        self.__log(logTxt)
                        if trainData.domain.classVar.varType == orange.VarTypes.Discrete:
                            res = evalUtilities.crossValidation(
                                [MLmethods[ml]],
                                trainData,
                                folds=5,
                                stratified=orange.MakeRandomIndices.
                                StratifiedIfPossible,
                                random_generator=random.randint(0, 100))
                            CA = evalUtilities.CA(res)[0]
                            optAcc[ml].append(CA)
                        else:
                            res = evalUtilities.crossValidation(
                                [MLmethods[ml]],
                                trainData,
                                folds=5,
                                stratified=orange.MakeRandomIndices.
                                StratifiedIfPossible,
                                random_generator=random.randint(0, 100))
                            R2 = evalUtilities.R2(res)[0]
                            optAcc[ml].append(R2)
                    else:
                        if MLmethods[ml].specialType == 1:
                            if trainData.domain.classVar.varType == orange.VarTypes.Discrete:
                                optInfo, SpecialModel = MLmethods[
                                    ml].optimizePars(trainData, folds=5)
                                optAcc[ml].append(optInfo["Acc"])
                            else:
                                res = evalUtilities.crossValidation(
                                    [MLmethods[ml]],
                                    trainData,
                                    folds=5,
                                    stratified=orange.MakeRandomIndices.
                                    StratifiedIfPossible,
                                    random_generator=random.randint(0, 100))
                                R2 = evalUtilities.R2(res)[0]
                                optAcc[ml].append(R2)
                        else:
                            runPath = miscUtilities.createScratchDir(
                                baseDir=AZOC.NFS_SCRATCHDIR,
                                desc="AccWOptParam",
                                seed=id(trainData))
                            trainData.save(
                                os.path.join(runPath, "trainData.tab"))
                            tunedPars = paramOptUtilities.getOptParam(
                                learner=MLmethods[ml],
                                trainDataFile=os.path.join(
                                    runPath, "trainData.tab"),
                                paramList=self.paramList,
                                useGrid=False,
                                verbose=self.verbose,
                                queueType=self.queueType,
                                runPath=runPath,
                                nExtFolds=None,
                                nFolds=self.nInnerFolds,
                                logFile=self.logFile,
                                getTunedPars=True,
                                fixedParams=self.fixedParams)
                            if not MLmethods[ml] or not MLmethods[ml].optimized:
                                self.__log(
                                    "       WARNING: GETACCWOPTPARAM: The learner "
                                    + str(ml) + " was not optimized.")
                                self.__log(
                                    "                It will be ignored")
                                #self.__log("                It will be set to default parameters")
                                self.__log(
                                    "                    DEBUG can be done in: "
                                    + runPath)
                                #Set learner back to default
                                #MLmethods[ml] = MLmethods[ml].__class__()
                                raise Exception("The learner " + str(ml) +
                                                " was not optimized.")
                            else:
                                if trainData.domain.classVar.varType == orange.VarTypes.Discrete:
                                    optAcc[ml].append(tunedPars[0])
                                else:
                                    res = evalUtilities.crossValidation(
                                        [MLmethods[ml]],
                                        trainData,
                                        folds=5,
                                        stratified=orange.MakeRandomIndices.
                                        StratifiedIfPossible,
                                        random_generator=random.randint(
                                            0, 100))
                                    R2 = evalUtilities.R2(res)[0]
                                    optAcc[ml].append(R2)

                                miscUtilities.removeDir(runPath)
                    #Train the model
                    if SpecialModel is not None:
                        model = SpecialModel
                    else:
                        model = MLmethods[ml](trainData)
                    models[ml].append(model)
                    #Test the model
                    if self.responseType == "Classification":
                        results[ml].append(
                            (evalUtilities.getClassificationAccuracy(
                                testData, model),
                             evalUtilities.getConfMat(testData, model)))
                    else:
                        local_exp_pred = []
                        # Predict using bulk-predict
                        predictions = model(testData)
                        # Gather predictions
                        for n, ex in enumerate(testData):
                            local_exp_pred.append(
                                (ex.getclass().value, predictions[n].value))
                        results[ml].append(
                            (evalUtilities.calcRMSE(local_exp_pred),
                             evalUtilities.calcRsqrt(local_exp_pred)))
                        #Save the experimental value and correspondent predicted value
                        exp_pred[ml] += local_exp_pred
                    if callBack:
                        stepsDone += 1
                        if not callBack((100 * stepsDone) / nTotalSteps):
                            return None
                    if callBackWithFoldModel:
                        callBackWithFoldModel(model)

                res = self.createStatObj(
                    results[ml],
                    exp_pred[ml],
                    nTrainEx[ml],
                    nTestEx[ml],
                    self.responseType,
                    self.nExtFolds,
                    logTxt,
                    labels=hasattr(self.data.domain.classVar, "values")
                    and list(self.data.domain.classVar.values) or None)
                if self.verbose > 0:
                    print "UnbiasedAccuracyGetter!Results  " + ml + ":\n"
                    pprint(res)
                if not res:
                    raise Exception("No results available!")
                res["runningTime"] = time.time() - startTime
                statistics[ml] = copy.deepcopy(res)
                self.__writeResults(statistics)
                self.__log("       OK")
            except:
                self.__log("       Learner " + str(ml) +
                           " failed to create/optimize the model!")
                error = str(sys.exc_info()[0]) +" "+\
                            str(sys.exc_info()[1]) +" "+\
                            str(traceback.extract_tb(sys.exc_info()[2]))
                self.__log(error)

                res = self.createStatObj()
                statistics[ml] = copy.deepcopy(res)
                self.__writeResults(statistics)

        if not statistics or len(statistics) < 1:
            self.__log("ERROR: No statistics to return!")
            return None
        elif len(statistics) > 1:
            #We still need to build a consensus model out of the stable models
            #   ONLY if there are more that one model stable!
            #   When only one or no stable models, build a consensus based on all models
            # ALWAYS exclude specialType models (MLmethods[ml].specialType > 0)
            consensusMLs = {}
            for modelName in statistics:
                StabilityValue = statistics[modelName]["StabilityValue"]
                if StabilityValue is not None and statistics[modelName][
                        "stable"]:
                    consensusMLs[modelName] = copy.deepcopy(
                        statistics[modelName])

            self.__log("Found " + str(len(consensusMLs)) +
                       " stable MLmethods out of " + str(len(statistics)) +
                       " MLmethods.")

            if len(consensusMLs
                   ) <= 1:  # we need more models to build a consensus!
                consensusMLs = {}
                for modelName in statistics:
                    consensusMLs[modelName] = copy.deepcopy(
                        statistics[modelName])

            # Exclude specialType models
            excludeThis = []
            for learnerName in consensusMLs:
                if models[learnerName][0].specialType > 0:
                    excludeThis.append(learnerName)
            for learnerName in excludeThis:
                consensusMLs.pop(learnerName)
                self.__log("    > Excluded special model " + learnerName)
            self.__log("    > Stable modules: " + str(consensusMLs.keys()))

            if len(consensusMLs) >= 2:
                #Var for saving each Fols result
                startTime = time.time()
                Cresults = []
                Cexp_pred = []
                CnTrainEx = []
                CnTestEx = []
                self.__log(
                    "Calculating the statistics for a Consensus model based on "
                    + str([ml for ml in consensusMLs]))
                for foldN in range(self.nExtFolds):
                    if self.responseType == "Classification":
                        CLASS0 = str(self.data.domain.classVar.values[0])
                        CLASS1 = str(self.data.domain.classVar.values[1])
                        # exprTest0
                        exprTest0 = "(0"
                        for ml in consensusMLs:
                            exprTest0 += "+( " + ml + " == " + CLASS0 + " )*" + str(
                                optAcc[ml][foldN]) + " "
                        exprTest0 += ")/IF0(sum([False"
                        for ml in consensusMLs:
                            exprTest0 += ", " + ml + " == " + CLASS0 + " "
                        exprTest0 += "]),1)"
                        # exprTest1
                        exprTest1 = "(0"
                        for ml in consensusMLs:
                            exprTest1 += "+( " + ml + " == " + CLASS1 + " )*" + str(
                                optAcc[ml][foldN]) + " "
                        exprTest1 += ")/IF0(sum([False"
                        for ml in consensusMLs:
                            exprTest1 += ", " + ml + " == " + CLASS1 + " "
                        exprTest1 += "]),1)"
                        # Expression
                        expression = [
                            exprTest0 + " >= " + exprTest1 + " -> " + CLASS0,
                            " -> " + CLASS1
                        ]
                    else:
                        Q2sum = sum([optAcc[ml][foldN] for ml in consensusMLs])
                        expression = "(1 / " + str(Q2sum) + ") * (0"
                        for ml in consensusMLs:
                            expression += " + " + str(
                                optAcc[ml][foldN]) + " * " + ml + " "
                        expression += ")"

                    testData = self.data.select(
                        DataIdxs, foldN + 1)  # fold 0 if for the train Bias!!
                    smilesAttr = dataUtilities.getSMILESAttr(testData)
                    if smilesAttr:
                        self.__log("Found SMILES attribute:" + smilesAttr)
                        testData = dataUtilities.attributeDeselectionData(
                            testData, [smilesAttr])
                        self.__log("Selected attrs: " + str(
                            [attr.name
                             for attr in trainData.domain[0:3]] + ["..."] + [
                                 attr.name for attr in
                                 trainData.domain[len(trainData.domain) - 3:]
                             ]))

                    CnTestEx.append(len(testData))
                    consensusClassifiers = {}
                    for learnerName in consensusMLs:
                        consensusClassifiers[learnerName] = models[
                            learnerName][foldN]

                    model = AZorngConsensus.ConsensusClassifier(
                        classifiers=consensusClassifiers,
                        expression=expression)
                    CnTrainEx.append(model.NTrainEx)
                    #Test the model
                    if self.responseType == "Classification":
                        Cresults.append(
                            (evalUtilities.getClassificationAccuracy(
                                testData, model),
                             evalUtilities.getConfMat(testData, model)))
                    else:
                        local_exp_pred = []
                        # Predict using bulk-predict
                        predictions = model(testData)
                        # Gather predictions
                        for n, ex in enumerate(testData):
                            local_exp_pred.append(
                                (ex.getclass().value, predictions[n].value))
                        Cresults.append(
                            (evalUtilities.calcRMSE(local_exp_pred),
                             evalUtilities.calcRsqrt(local_exp_pred)))
                        #Save the experimental value and correspondent predicted value
                        Cexp_pred += local_exp_pred

                res = self.createStatObj(
                    Cresults,
                    Cexp_pred,
                    CnTrainEx,
                    CnTestEx,
                    self.responseType,
                    self.nExtFolds,
                    labels=hasattr(self.data.domain.classVar, "values")
                    and list(self.data.domain.classVar.values) or None)
                res["runningTime"] = time.time() - startTime
                statistics["Consensus"] = copy.deepcopy(res)
                statistics["Consensus"][
                    "IndividualStatistics"] = copy.deepcopy(consensusMLs)
                self.__writeResults(statistics)
            self.__log("Returned multiple ML methods statistics.")
            return statistics

        #By default return the only existing statistics!
        self.__writeResults(statistics)
        self.__log("Returned only one ML method statistics.")
        return statistics[statistics.keys()[0]]
Пример #41
0
    def disable_testSVM_MPI_3(self):
        ###################################################################
        #       Test other way of setting appspack
        ###################################################################
        # Classification accuracy:
        ExpectedCA = 0.6

        optimizer = paramOptUtilities.Appspack()

        learner = AZorngCvSVM.CvSVMLearner()
        learnerName = "CvSVMLearner"

        # Create an interface for setting optimizer parameters
        pars = AZLearnersParamsConfig.API(learnerName)

        # Set all parameters to not be optimized
        pars.setOptimizeAllParameters(False)

        parameterList = ["C", "gamma"]
        # Set the parameters in parameterList to be optimized
        for parameter in parameterList:
            pars.setParameter(parameter, "optimize", True)

        # Change the range
        pars.setParameter("C", "range", miscUtilities.power2Range(-5, 2, 1))

        trainFile = self.discTrainDataPath

        # Create a directory for running the appspack (if not defined it will use the present working directory)
        runPath = miscUtilities.createScratchDir(desc="ParamOptTest_SVM_MPI_3")
        evalM = "AZutilities.evalUtilities.CA"
        fMin = False

        #[<minimum of objective function found>, <optimized parameters>]
        tunedPars = optimizer(learner=learner,\
                        dataSet=trainFile,\
                        evaluateMethod = evalM,\
                        findMin=fMin,\
                        runPath = runPath,\
                        useParameters = pars.getParametersDict(),\
                        verbose = 0,\
                        useStd = False,\
                        advancedMPIoptions = "-v -np 4",\
                        machinefile = ["localhost:2","localhost:2"])
        print "Returned: ", tunedPars
        print "====================== optimization Done ==========================="
        print "Learner optimized flag = ", learner.optimized
        print "Tuned parameters = ", tunedPars[1]
        print "Best optimization result = ", tunedPars[0]
        print "check the file intRes.txt to see the intermediate results of optimizer!"

        # Check if the MPI version was used
        self.assertEqual(learner.optimized, True)
        # Check if the MPI version was used
        self.assertEqual(optimizer.usedMPI, True)
        self.assertEqual(round(tunedPars[0], 3), round(ExpectedCA, 3))
        self.assert_(
            len(
                dataUtilities.DataTable(
                    os.path.join(runPath, "optimizationLog.txt"))) >=
            12)  # (orig 14) Must be > 2
        #print runPath
        miscUtilities.removeDir(runPath)
Пример #42
0
    def getStatistics(self):
        self.warning(0)
        self.error(0)
        self.statInfo = ""
        self.statistics = None
        self.statInfoBox.setText("")
        """Get the statistics and save to desired place if specifyes"""
        if not os.path.isdir(str(self.statPath)):
            statPath = None
        else:
            statPath = os.path.join(str(self.statPath), "statistics.pkl")
        runPath = miscUtilities.createScratchDir(desc="CombiQSAR",
                                                 baseDir=AZOC.NFS_SCRATCHDIR)

        # DEBUG
        #fileh = open("/home/palmeida/dev/AZOrange/orange/OrangeWidgets/Classify/stat.pkl")
        #statistics = pickle.load(fileh)
        #fileh.close()

        print "OptimizeChBox", self.OptimizeChBox
        mlList = []
        for row in range(self.mlTable.rowCount()):
            if self.mlTable.cellWidget(row, 1).checkState() == 2:
                mlList.append(str(self.mlTable.item(row, 0).text()).strip())

        statistics = competitiveWorkflow.getMLStatistics(
            self.dataset,
            mlList=mlList,
            savePath=statPath,
            queueType=self.queueTypes[self.queueType],
            verbose=0,
            logFile=None,
            callBack=self.advance)

        #select the best model
        MLMethod = competitiveWorkflow.selectModel(statistics, logFile=None)
        self.classifier = competitiveWorkflow.buildModel(
            self.dataset,
            MLMethod,
            queueType=self.queueTypes[self.queueType],
            verbose=0,
            logFile=None)
        if not self.classifier:
            self.statInfo = "Could not get a classifier. Please check the output window."

        if not statistics:
            self.statInfo = "Some error occured. Please check the output window"
        else:
            self.statistics = self.createStatData(statistics)
            if statPath and os.path.isfile(statPath):
                self.statInfo = "Statistics were saved to " + statPath+"\n"+\
                                  "You can save the statistics in other place by using \n"
            else:
                self.statInfo += "You can save the statistics by using \n"
        self.statInfo +=    " the button 'Save statistics'\n\n"+\
                              "You can also use or view the statistics by connecting \n"+\
                              " the appropriate widget to this widget output"
        self.statInfoBox.setText(self.statInfo)
        self.classifier.name = str(self.name)
        self.send("Classifier", self.classifier)
        self.send("Examples", self.statistics)
Пример #43
0
    def getProbabilitiesAsAttribute(self, algorithm=None, minsup=None, atts=None):
        """ For regression problems, it returns the RMSE and the Q2 
            For Classification problems, it returns CA and the ConfMat
            The return is made in a Dict: {"RMSE":0.2,"Q2":0.1,"CA":0.98,"CM":[[TP, FP],[FN,TN]]}
            For the EvalResults not supported for a specific learner/datase, the respective result will be None

            if the learner is a dict {"LearnerName":learner, ...} the results will be a dict with results for all Learners and for a consensus
                made out of those that were stable

            It some error occurred, the respective values in the Dict will be None
                
			parameters:
                algo - key for the structural feature generation algorithm (set dependent structural features that have to be calculated inside the crossvalidation)
                minsup - minimum support for the algorithm
                atts - attributes to be removed before learning (e.g. meta etc...)
        """
        self.__log("Starting Calculating MLStatistics")
        statistics = {}
        if not self.__areInputsOK():
            return None

        if algorithm:
            self.__log(" Additional features to be calculated inside of cross-validation")
            self.__log(" Algorithm for structural features: " + str(algorithm))
            self.__log(" Minimum support parameter: " + str(minsup))

        # Set the response type
        self.responseType = (
            self.data.domain.classVar.varType == orange.VarTypes.Discrete and "Classification" or "Regression"
        )
        self.__log("  " + str(self.responseType))

        # Create the Train and test sets
        DataIdxs = dataUtilities.SeedDataSampler(self.data, self.nExtFolds)

        # Var for saving each Fols result
        optAcc = {}
        results = {}
        exp_pred = {}
        nTrainEx = {}
        nTestEx = {}

        # Set a dict of learners
        MLmethods = {}
        if type(self.learner) == dict:
            for ml in self.learner:
                MLmethods[ml] = self.learner[ml]
        else:
            MLmethods[self.learner.name] = self.learner

        models = {}
        rocs = {}
        self.__log("Calculating Statistics for MLmethods:")
        self.__log("  " + str([x for x in MLmethods]))

        # Check data in advance so that, by chance, it will not faill at the last fold!
        for foldN in range(self.nExtFolds):
            trainData = self.data.select(DataIdxs[foldN], negate=1)
            self.__checkTrainData(trainData)

        # Optional!!
        # Order Learners so that PLS is the first
        sortedML = [ml for ml in MLmethods]
        if "PLS" in sortedML:
            sortedML.remove("PLS")
            sortedML.insert(0, "PLS")

        for ml in sortedML:
            self.__log("    > " + str(ml) + "...")
            try:
                # Var for saving each Fols result
                results[ml] = []
                exp_pred[ml] = []
                models[ml] = []
                rocs[ml] = []
                nTrainEx[ml] = []
                nTestEx[ml] = []
                optAcc[ml] = []

                ### mods TG
                prediction_attribute = orange.FloatVariable("class_prob")
                domain = [data.domain.attributes, prediction_attribute, data.domain.classvar]
                data_new = orange.ExampleTable(domain)

                logTxt = ""
                for foldN in range(self.nExtFolds):
                    if type(self.learner) == dict:
                        self.paramList = None

                    trainData = self.data.select(DataIdxs[foldN], negate=1)
                    orig_len = len(trainData.domain.attributes)
                    # add structural descriptors to the training data (TG)
                    if algorithm:
                        trainData_structDesc = getStructuralDesc.getStructuralDescResult(trainData, algorithm, minsup)
                        trainData = dataUtilities.attributeDeselectionData(trainData_structDesc, atts)

                    testData = self.data.select(DataIdxs[foldN])
                    # print "IDX: ",
                    # print DataIdxs[foldN]
                    # calculate the feature values for the test data (TG)
                    if algorithm:
                        cut_off = orig_len - len(atts)
                        smarts = trainData.domain.attributes[cut_off:]
                        self.__log("  Number of structural features added: " + str(len(smarts)))
                        testData_structDesc = getStructuralDesc.getSMARTSrecalcDesc(testData, smarts)
                        testData = dataUtilities.attributeDeselectionData(testData_structDesc, atts)

                    nTrainEx[ml].append(len(trainData))
                    nTestEx[ml].append(len(testData))
                    # Test if trainsets inside optimizer will respect dataSize criterias.
                    #  if not, don't optimize, but still train the model
                    dontOptimize = False
                    if self.responseType != "Classification" and (len(trainData) * (1 - 1.0 / self.nInnerFolds) < 20):
                        dontOptimize = True
                    else:
                        tmpDataIdxs = dataUtilities.SeedDataSampler(trainData, self.nInnerFolds)
                        tmpTrainData = trainData.select(tmpDataIdxs[0], negate=1)
                        if not self.__checkTrainData(tmpTrainData, False):
                            dontOptimize = True

                    if dontOptimize:
                        logTxt += (
                            "       Fold " + str(foldN) + ": Too few compounds to optimize model hyper-parameters\n"
                        )
                        self.__log(logTxt)
                        if trainData.domain.classVar.varType == orange.VarTypes.Discrete:
                            res = orngTest.crossValidation(
                                [MLmethods[ml]],
                                trainData,
                                folds=5,
                                strat=orange.MakeRandomIndices.StratifiedIfPossible,
                                randomGenerator=random.randint(0, 100),
                            )
                            CA = evalUtilities.CA(res)[0]
                            optAcc[ml].append(CA)
                        else:
                            res = orngTest.crossValidation(
                                [MLmethods[ml]],
                                trainData,
                                folds=5,
                                strat=orange.MakeRandomIndices.StratifiedIfPossible,
                                randomGenerator=random.randint(0, 100),
                            )
                            R2 = evalUtilities.R2(res)[0]
                            optAcc[ml].append(R2)
                    else:
                        runPath = miscUtilities.createScratchDir(
                            baseDir=AZOC.NFS_SCRATCHDIR, desc="AccWOptParam", seed=id(trainData)
                        )
                        trainData.save(os.path.join(runPath, "trainData.tab"))

                        tunedPars = paramOptUtilities.getOptParam(
                            learner=MLmethods[ml],
                            trainDataFile=os.path.join(runPath, "trainData.tab"),
                            paramList=self.paramList,
                            useGrid=False,
                            verbose=self.verbose,
                            queueType=self.queueType,
                            runPath=runPath,
                            nExtFolds=None,
                            nFolds=self.nInnerFolds,
                            logFile=self.logFile,
                            getTunedPars=True,
                        )
                        if not MLmethods[ml] or not MLmethods[ml].optimized:
                            self.__log(
                                "       WARNING: GETACCWOPTPARAM: The learner " + str(ml) + " was not optimized."
                            )
                            self.__log("                It will be ignored")
                            # self.__log("                It will be set to default parameters")
                            self.__log("                    DEBUG can be done in: " + runPath)
                            # Set learner back to default
                            # MLmethods[ml] = MLmethods[ml].__class__()
                            raise Exception("The learner " + str(ml) + " was not optimized.")
                        else:
                            if trainData.domain.classVar.varType == orange.VarTypes.Discrete:
                                optAcc[ml].append(tunedPars[0])
                            else:
                                res = orngTest.crossValidation(
                                    [MLmethods[ml]],
                                    trainData,
                                    folds=5,
                                    strat=orange.MakeRandomIndices.StratifiedIfPossible,
                                    randomGenerator=random.randint(0, 100),
                                )
                                R2 = evalUtilities.R2(res)[0]
                                optAcc[ml].append(R2)

                            miscUtilities.removeDir(runPath)
                    # Train the model
                    model = MLmethods[ml](trainData)
                    models[ml].append(model)
                    # Test the model
                    if self.responseType == "Classification":
                        results[ml].append(
                            (
                                evalUtilities.getClassificationAccuracy(testData, model),
                                evalUtilities.getConfMat(testData, model),
                            )
                        )
                        roc = self.aroc(testData, [model])
                        rocs[ml].append(roc)

                    # save the prediction probabilities

                    else:
                        local_exp_pred = []
                        for ex in testData:
                            local_exp_pred.append((ex.getclass(), model(ex)))
                        results[ml].append(
                            (evalUtilities.calcRMSE(local_exp_pred), evalUtilities.calcRsqrt(local_exp_pred))
                        )
                        # Save the experimental value and correspondent predicted value
                        exp_pred[ml] += local_exp_pred

                res = self.createStatObj(
                    results[ml],
                    exp_pred[ml],
                    nTrainEx[ml],
                    nTestEx[ml],
                    self.responseType,
                    self.nExtFolds,
                    logTxt,
                    rocs[ml],
                )
                if self.verbose > 0:
                    print "UnbiasedAccuracyGetter!Results  " + ml + ":\n"
                    pprint(res)
                if not res:
                    raise Exception("No results available!")
                statistics[ml] = copy.deepcopy(res)
                self.__writeResults(statistics)
                self.__log("       OK")
            except:
                self.__log("       Learner " + str(ml) + " failed to create/optimize the model!")
                res = self.createStatObj()
                statistics[ml] = copy.deepcopy(res)
                self.__writeResults(statistics)

        if not statistics or len(statistics) < 1:
            self.__log("ERROR: No statistics to return!")
            return None
        elif len(statistics) > 1:
            # We still need to build a consensus model out of the stable models
            #   ONLY if there are more that one model stable!
            #   When only one or no stable models, build a consensus based on all models
            consensusMLs = {}
            for modelName in statistics:
                StabilityValue = statistics[modelName]["StabilityValue"]
                if StabilityValue is not None and statistics[modelName]["stable"]:
                    consensusMLs[modelName] = copy.deepcopy(statistics[modelName])

            self.__log(
                "Found " + str(len(consensusMLs)) + " stable MLmethods out of " + str(len(statistics)) + " MLmethods."
            )

            if len(consensusMLs) <= 1:  # we need more models to build a consensus!
                consensusMLs = {}
                for modelName in statistics:
                    consensusMLs[modelName] = copy.deepcopy(statistics[modelName])

            if len(consensusMLs) >= 2:
                # Var for saving each Fols result
                Cresults = []
                Cexp_pred = []
                CnTrainEx = []
                CnTestEx = []
                self.__log(
                    "Calculating the statistics for a Consensus model based on " + str([ml for ml in consensusMLs])
                )
                for foldN in range(self.nExtFolds):
                    if self.responseType == "Classification":
                        CLASS0 = str(self.data.domain.classVar.values[0])
                        CLASS1 = str(self.data.domain.classVar.values[1])
                        exprTest0 = "(0"
                        for ml in consensusMLs:
                            exprTest0 += "+( " + ml + " == " + CLASS0 + " )*" + str(optAcc[ml][foldN]) + " "
                        exprTest0 += ")/IF0(sum([False"
                        for ml in consensusMLs:
                            exprTest0 += ", " + ml + " == " + CLASS0 + " "
                        exprTest0 += "]),1)"
                        exprTest1 = exprTest0.replace(CLASS0, CLASS1)
                        expression = [exprTest0 + " >= " + exprTest1 + " -> " + CLASS0, " -> " + CLASS1]
                    else:
                        Q2sum = sum([optAcc[ml][foldN] for ml in consensusMLs])
                        expression = "(1 / " + str(Q2sum) + ") * (0"
                        for ml in consensusMLs:
                            expression += " + " + str(optAcc[ml][foldN]) + " * " + ml + " "
                        expression += ")"

                    testData = self.data.select(DataIdxs[foldN])
                    CnTestEx.append(len(testData))
                    consensusClassifiers = {}
                    for learnerName in consensusMLs:
                        consensusClassifiers[learnerName] = models[learnerName][foldN]

                    model = AZorngConsensus.ConsensusClassifier(classifiers=consensusClassifiers, expression=expression)
                    CnTrainEx.append(model.NTrainEx)
                    # Test the model
                    if self.responseType == "Classification":
                        Cresults.append(
                            (
                                evalUtilities.getClassificationAccuracy(testData, model),
                                evalUtilities.getConfMat(testData, model),
                            )
                        )
                    else:
                        local_exp_pred = []
                        for ex in testData:
                            local_exp_pred.append((ex.getclass(), model(ex)))
                        Cresults.append(
                            (evalUtilities.calcRMSE(local_exp_pred), evalUtilities.calcRsqrt(local_exp_pred))
                        )
                        # Save the experimental value and correspondent predicted value
                        Cexp_pred += local_exp_pred

                res = self.createStatObj(Cresults, Cexp_pred, CnTrainEx, CnTestEx, self.responseType, self.nExtFolds)
                statistics["Consensus"] = copy.deepcopy(res)
                statistics["Consensus"]["IndividualStatistics"] = copy.deepcopy(consensusMLs)
                self.__writeResults(statistics)
            self.__log("Returned multiple ML methods statistics.")
            return statistics

        # By default return the only existing statistics!
        self.__writeResults(statistics)
        self.__log("Returned only one ML method statistics.")
        return statistics[statistics.keys()[0]]
Пример #44
0
    def disable_testSVM_MPI(self):
        """
        Tests changing the default range of the optimizer.
        Use MPI versio0n of appspack
        """
        # Classification accuracy:
        ExpectedCA = [0.6]  #New at orange2.0

        optimizer = paramOptUtilities.Appspack()

        learner = AZorngCvSVM.CvSVMLearner()
        learnerName = "CvSVMLearner"

        # Create an interface for setting optimizer parameters
        pars = AZLearnersParamsConfig.API(learnerName)

        # Set all parameters to not be optimized
        pars.setOptimizeAllParameters(False)

        parameterList = ["C", "gamma"]
        # Set the parameters in parameterList to be optimized
        for parameter in parameterList:
            pars.setParameter(parameter, "optimize", True)

        # Change the range
        pars.setParameter("C", "range", miscUtilities.power2Range(-5, 2, 1))

        trainFile = self.discTrainDataPath

        # Create a directory for running the appspack (if not defined it will use the present working directory)
        runPath = miscUtilities.createScratchDir(desc="ParamOptTest_SVM_MPI")
        evalM = "AZutilities.evalUtilities.CA"
        fMin = False

        # Calculate the optimal parameters. This can take a long period of time!
        tunedPars = optimizer(learner=learner,\
                        dataSet=trainFile,\
                        evaluateMethod = evalM,\
                        useParameters = pars.getParametersDict(),\
                        findMin=fMin,\
                        runPath = runPath,\
                        verbose = 0,\
                        useStd = False,\
                        #advancedMPIoptions = "-all-local -allcpus")  # to use this the 
                        # file "<MPICHDIR>/share/machines.LINUX must be properly configured"


                        np = 4,\
                        machinefile = os.path.realpath(os.path.join(os.environ["AZORANGEHOME"], "tests/source/APPS_machines")))

        verbTunedPars = optimizer.getTunedParameters()

        print "Returned: ", tunedPars
        print "====================== optimization Done ==========================="
        print "Learner optimized flag = ", learner.optimized
        print "Tuned parameters = ", tunedPars[1]
        print "Best optimization result = ", tunedPars[0]
        print "check the file intRes.txt to see the intermediate results of optimizer!"

        # Check that the learner was optimized
        self.assertEqual(learner.optimized, True)

        # Check if the MPI version was used
        self.assertEqual(optimizer.usedMPI, True)

        # Check the number of optimized parameters
        self.assertEqual(len(verbTunedPars["optParam"]), 12)

        # Check the accuracy
        self.assert_(
            round(verbTunedPars["bestRes"],
                  3) in [round(x, 3) for x in ExpectedCA],
            "Got:" + str(verbTunedPars["bestRes"]))
        self.assert_(
            len(
                dataUtilities.DataTable(
                    os.path.join(runPath, "optimizationLog.txt"))) >=
            12)  # (orig: 14)  Must be > 2

        miscUtilities.removeDir(runPath)
Пример #45
0
    def optimizeParameters(self):
        """ Sets up the input learner with tuned parameters  """

        self.clearErrors()
        self.tunedPars = None
        if hasattr(self.learner, "optimized"):
            self.learner.optimized = False

        if not self.learner:
            self.send("Learner - Tuned", None)
            self.send("Examples - Optimization Steps", None)
            self.updateInfo()
            return

        # Apply the parameters var with values  on configuration table of GUI (user could have changed them!)
        if not self.updateParametersFromTable():
            return

        if not self.dataset:
            self.dataset = None
            self.send("Learner - Tuned", None)
            self.send("Examples - Optimization Steps", None)
            self.updateInfo()
            return

        # Progess Bar 1
        optSteps = 3
        progress1 = QProgressDialog(
            "Gathering data and configuring the optimizer...", "Cancel", 0,
            optSteps, self, Qt.Dialog)  #, "progress", True )
        progress1.setWindowModality(Qt.WindowModal)
        bar1 = QProgressBar(progress1)
        bar1.show()
        progress1.setBar(bar1)
        #progress1.setTotalSteps(optSteps)
        progress1.setMinimumDuration(0)
        progress1.forceShow()
        progress1.setValue(0)
        time.sleep(0.1)
        progress1.setValue(0)

        # Create path for running the optimizer
        randNr = random.randint(0, 10000)
        if self.execEnv == 0:
            scratchdir = miscUtilities.createScratchDir(
                desc="OWParamOpt_Serial")
        else:
            scratchdir = miscUtilities.createScratchDir(
                desc="OWParamOpt_MPI", baseDir=AZOC.NFS_SCRATCHDIR)
        # Save the dataset to the optimizer running path
        OrngFile = os.path.join(scratchdir, "OrngData.tab")
        orange.saveTabDelimited(OrngFile, self.dataset)
        # Advance Progress Bar
        progress1.setValue(1)
        # Define the evaluation method to use
        if self.dataset.domain.classVar.varType == orange.VarTypes.Continuous:
            fMin = self.RMethods[self.RMethod][2]
            evalM = self.RMethods[self.RMethod][1]
        else:
            fMin = self.CMethods[self.CMethod][2]
            evalM = self.CMethods[self.CMethod][1]
        try:
            if os.path.exists(
                    os.path.join(scratchdir, "AZLearnersParamsConfig.py")):
                os.system(
                    "rm " +
                    str(os.path.join(scratchdir, "AZLearnersParamsConfig.py")))
            paramFile = file(
                os.path.join(scratchdir, "AZLearnersParamsConfig.py"), "w")
            paramFile.write(self.learnerType + "= " + str(self.parameters) +
                            "\r\n")
            paramFile.close()

            progress1.setValue(2)
            # Run the optimizer which will configure the input learner and aditionaly return [<minimum of objective function found>, <optimized parameters>]
            # Serial
            print "ENV:", self.execEnv
            if self.execEnv == 0:
                print "Executing the optimizer in serial mode on local machine"
                optPID = self.optimizer(
                    learner=self.learner,
                    dataSet=OrngFile,
                    evaluateMethod=evalM,
                    findMin=fMin,
                    nFolds=self.nFolds,
                    samplingMethod=self.SMethods[self.SMethod][1],
                    runPath=scratchdir,
                    verbose=self.verbose,
                    externalControl=1,
                    useParameters=self.parameters,
                    useGridSearchFirst=self.UseGridSearch,
                    gridSearchInnerPoints=self.nInnerPoints,
                    np=None,
                    machinefile=None,
                    advancedMPIoptions="",
                )
            # Local mpi
            elif self.execEnv == 1:
                print "Executing the optimizer in parallel mode on local machine"
                optPID = self.optimizer(
                    learner=self.learner,
                    dataSet=OrngFile,
                    evaluateMethod=evalM,
                    findMin=fMin,
                    nFolds=self.nFolds,
                    samplingMethod=self.SMethods[self.SMethod][1],
                    runPath=scratchdir,
                    verbose=self.verbose,
                    externalControl=1,
                    useParameters=self.parameters,
                    useGridSearchFirst=self.UseGridSearch,
                    gridSearchInnerPoints=self.nInnerPoints,
                    machinefile=0)
            # Sge Molndal
            elif self.execEnv == 2:
                print "Executing the optimizer in parallel mode in the batch queue on the sge"
                print "*****************runPath*****************"
                optPID = self.optimizer(
                    learner=self.learner,
                    dataSet=OrngFile,
                    evaluateMethod=evalM,
                    findMin=fMin,
                    nFolds=self.nFolds,
                    samplingMethod=self.SMethods[self.SMethod][1],
                    runPath=scratchdir,
                    verbose=self.verbose,
                    externalControl=1,
                    useParameters=self.parameters,
                    useGridSearchFirst=self.UseGridSearch,
                    gridSearchInnerPoints=self.nInnerPoints,
                    np=8,
                    machinefile="qsub")  #, sgeEnv = "sge_seml")
            elif self.execEnv == 3:
                print "Executing the optimizer in parallel mode in the quick queue on the sge"
                print "*****************runPath*****************"
                optPID = self.optimizer(
                    learner=self.learner,
                    dataSet=OrngFile,
                    evaluateMethod=evalM,
                    findMin=fMin,
                    nFolds=self.nFolds,
                    samplingMethod=self.SMethods[self.SMethod][1],
                    runPath=scratchdir,
                    verbose=self.verbose,
                    externalControl=1,
                    useParameters=self.parameters,
                    useGridSearchFirst=self.UseGridSearch,
                    gridSearchInnerPoints=self.nInnerPoints,
                    np=8,
                    machinefile="qsub",
                    queueType="quick.q")  #, sgeEnv = "sge_seml")
            else:
                print "No SGE Env. selected. Nothing will happen."
        except:
            progress1.close()
            self.updateInfo()
            self.setErrors(
                "Some error(s) occurred during the optimization.\nCheck the " +
                str(scratchdir) +
                " and the output terminal for more information")
            self.send("Learner - Tuned", None)
            self.send("Examples - Optimization Steps", None)
            return

        progress1.setValue(3)

        if type(optPID) != types.IntType:
            progress1.close()
            self.updateInfo()
            self.setErrors("Some error(s) occurred during optimization:\n" +
                           str(optPID))
            self.send("Learner - Tuned", None)
            self.send("Examples - Optimization Steps", None)
            return

        progress1.close()

        # Progess Bar
        optSteps = (1 + round(
            (len(self.dataset) * len(self.dataset.domain.attributes) *
             self.nParameters) / 1000)) * 8
        print "Learner optimization started at " + time.asctime()
        print "Optimization steps = ", int(
            optSteps), " (estimated to aprox. ", optSteps / 2, " seconds)"
        progress = QProgressDialog("Learner optimization started at " +
                                   time.asctime() + " ,please wait...",
                                   "Abort Optimization", 0, optSteps, self,
                                   Qt.Dialog)  #, "progress", True )
        progress.setWindowModality(Qt.WindowModal)
        bar = QProgressBar(progress)
        bar.show()
        progress.setBar(bar)
        #progress.setTotalSteps(optSteps)
        progress.setMinimumDuration(0)
        stepsDone = 0
        progress.setValue(stepsDone)
        progress.forceShow()
        #Loop waiting for the optimizer to finish
        while 1:
            if stepsDone < (progress.maximum() - 1):
                progress.setValue(stepsDone)
                stepsDone += 1
                time.sleep(0.5)
            else:
                bar.setTextVisible(False)
                progress.setLabelText(
                    "The optimizer is taking longer than expected, please wait some more time..."
                )
                stepsDone = 0
                progress.setValue(stepsDone)
                time.sleep(0.5)
            if progress.wasCanceled():
                if not self.optimizer.stop():
                    progress.setLabelText(
                        "Could not stop the optimizer! Please wait until it finish..."
                    )
                else:
                    self.setErrors(
                        "Learner optimization stopped by user at " +
                        time.asctime(), "WARNING")
                    break
            if self.optimizer.isFinished():
                print "Learner optimization finished at " + time.asctime()
                break
        progress.setValue(progress.maximum() - 1)
        time.sleep(0.5)
        progress.setValue(progress.maximum())
        self.tunedPars = self.optimizer.tunedParameters
        if self.verbose > 0:
            if self.optimizer.usedMPI:
                print "appspack version used in fact: MPI"
            else:
                print "appspack version used in fact: SERIAL"
        if type(self.tunedPars
                ) != types.ListType or self.learner.optimized == False:
            self.send("Learner - Tuned", None)
            self.send("Examples - Optimization Steps", None)
        else:
            self.send("Learner - Tuned", self.learner)
            self.intRes = dataUtilities.DataTable(scratchdir +
                                                  "/optimizationLog.txt")
            self.send("Examples - Optimization Steps", self.intRes)
        self.updateInfo()

        if self.verbose == 0:
            miscUtilities.removeDir(scratchdir)
        else:
            self.setErrors(
                "The directory " + str(scratchdir) +
                " was not deleted because verbose flag is ON", "DEBUG")
Пример #46
0
    def testCvANN(self):
        """             
        Tests changing the default range of the optimizer.
        """
        # Classification accuracy:
        ExpectedCA = [0.585]  #Ver 0.3

        optimizer = paramOptUtilities.Appspack()

        learner = AZorngCvANN.CvANNLearner()
        learnerName = "CvANNLearner"

        # Create an interface for setting optimizer parameters
        pars = AZLearnersParamsConfig.API(learnerName)

        # Set all parameters to not be optimized
        pars.setOptimizeAllParameters(False)

        parameterList = ["maxIter", "nHidden"]
        # Set the parameters in parameterList to be optimized
        for parameter in parameterList:
            pars.setParameter(parameter, "optimize", True)

        trainFile = self.discTrainDataPath

        # Create a directory for running the appspack (if not defined it will use the present working directory)
        runPath = miscUtilities.createScratchDir(desc="ParamOptTest_CvANN")
        evalM = "AZutilities.evalUtilities.CA"
        fMin = False

        # Calculate the optimal parameters. This can take a long period of time!
        tunedPars = optimizer(learner=learner,\
                        dataSet=trainFile,\
                        evaluateMethod = evalM,\
                        useParameters = pars.getParametersDict(),\
                        findMin=fMin,\
                        useStd = False,\
                        runPath = runPath,\
                        verbose = 0)

        verbTunedPars = optimizer.getTunedParameters()

        print "Returned: ", tunedPars
        print "====================== optimization Done ==========================="
        print "Learner optimized flag = ", learner.optimized
        print "Tuned parameters = ", tunedPars[1]
        print "Best optimization result = ", tunedPars[0]
        print "Best result index from intRes file:", verbTunedPars["ResIdx"]
        print "Optimizer runPath:", runPath
        print "check the file intRes.txt to see the intermediate results of optimizer!"

        # Check that the learner was optimized
        self.assertEqual(learner.optimized, True)

        # Check if the MPI version was not used
        self.assertEqual(optimizer.usedMPI, False)

        # Check the number of optimized parameters
        self.assertEqual(len(verbTunedPars["optParam"]), 14)

        # Check the accuracy
        nOptPoints = len(
            dataUtilities.DataTable(
                os.path.join(runPath, "optimizationLog.txt")))
        self.assert_(nOptPoints > 5, "N. of optimization points:" +
                     str(nOptPoints))  # Must be > 2
        self.assert_(
            round(verbTunedPars["bestRes"],
                  3) in [round(x, 3) for x in ExpectedCA],
            "Actual result:" + str(verbTunedPars["bestRes"]))

        miscUtilities.removeDir(runPath)
Пример #47
0
    def testCvSVM(self):
        """
        Tests changing the default range of the optimizer.
        """
        # Classification accuracy:
        ExpectedCA = [0.6]  # Ver 0.3

        optimizer = paramOptUtilities.Appspack()

        learner = AZorngCvSVM.CvSVMLearner()
        learnerName = "CvSVMLearner"

        # Create an interface for setting optimizer parameters
        pars = AZLearnersParamsConfig.API(learnerName)

        # Set all parameters to not be optimized
        pars.setOptimizeAllParameters(False)

        parameterList = ["C", "gamma"]
        # Set the parameters in parameterList to be optimized
        for parameter in parameterList:
            pars.setParameter(parameter, "optimize", True)

        # Change the range
        pars.setParameter("C", "range", miscUtilities.power2Range(-5, 2, 1))
        pars.setParameter("priors", "default", {"POS": 2, "NEG": 4})

        trainFile = self.discTrainDataPath

        # Create a directory for running the appspack (if not defined it will use the present working directory)
        runPath = miscUtilities.createScratchDir(desc="ParamOptTest_CvSVM")
        evalM = "AZutilities.evalUtilities.CA"
        fMin = False

        # Calculate the optimal parameters. This can take a long period of time!
        tunedPars = optimizer(learner=learner,\
                        dataSet=trainFile,\
                        evaluateMethod = evalM,\
                        useParameters = pars.getParametersDict(),\
                        findMin=fMin,\
                        useStd = False,\
                        runPath = runPath,\
                        verbose = 0)

        verbTunedPars = optimizer.getTunedParameters()

        print "Returned: ", tunedPars
        print "====================== optimization Done ==========================="
        print "Learner optimized flag = ", learner.optimized
        print "Tuned parameters = ", tunedPars[1]
        print "Best optimization result = ", tunedPars[0]
        print "Best result index from intRes file:", verbTunedPars["ResIdx"]
        print "Optimizer runPath:", runPath
        print "check the file intRes.txt to see the intermediate results of optimizer!"

        # Check that the learner was optimized
        self.assertEqual(learner.optimized, True)

        # Check if the MPI version was not used
        self.assertEqual(optimizer.usedMPI, False)

        # Check the number of optimized parameters
        self.assertEqual(len(verbTunedPars["optParam"]), 12)

        # Check the accuracy
        self.assert_(
            round(verbTunedPars["bestRes"],
                  2) in [round(x, 2) for x in ExpectedCA],
            "Got:" + str(verbTunedPars["bestRes"]))
        self.assert_(
            len(
                dataUtilities.DataTable(
                    os.path.join(runPath, "optimizationLog.txt"))) >=
            12)  # (orig: 14)  Must be > 2

        #Check Priors
        self.assertEqual(
            dataUtilities.DataTable(
                os.path.join(runPath,
                             "optimizationLog.txt"))[1]["priors"].value,
            "{'NEG':4,'POS':2}")
        self.assertEqual(tunedPars[1]["priors"],
                         "{'NEG':4,'POS':2}")  # Ver 0.3

        #Set the priors since it could be choosing the first row as the best, which would be the default values, without the priors
        learner.priors = {"POS": 2, "NEG": 4}

        classifier = learner(self.discTest)
        classifier.write(os.path.join(runPath, "CvSVMModel"))
        file = open(os.path.join(runPath, "CvSVMModel/model.svm"), "r")
        lines = file.readlines()
        file.close()
        priors = [
            round(x, 2) for x in eval((lines[18].strip()).replace("data:", ""))
        ]
        self.assertEqual(len(priors), 2)
        self.assertEqual(
            priors[self.discTest.domain.classVar.values.index("POS")],
            2.0 * float(tunedPars[1]["C"]))
        self.assertEqual(
            priors[self.discTest.domain.classVar.values.index("NEG")],
            4.0 * float(tunedPars[1]["C"]))
        miscUtilities.removeDir(runPath)
Пример #48
0
def buildModel(trainData,
               MLMethod,
               queueType="NoSGE",
               verbose=0,
               logFile=None):
    """
        Buld the method passed in MLMethod and optimize ( "IndividualStatistics"  not in MLMethod)
        if MLMethod is a Consensus ("individualStatistics"  in MLMethod) , build each and optimize first all models and after build the consensus!
        """
    log(logFile,
        "Building and optimizing learner: " + MLMethod["MLMethod"] + "...")
    learners = {}
    MLMethods = {}
    if "IndividualStatistics" in MLMethod:  #It is a consensus and will certaily not contain any
        #special model as it was filtered in the getUnbiasedAcc
        for ML in MLMethod["IndividualStatistics"]:
            MLMethods[ML] = copy.deepcopy(MLMethod["IndividualStatistics"][ML])
    else:
        ML = MLMethod["MLMethod"]
        if MLMETHODS[ML](
                name=ML
        ).specialType == 1:  # If is a special model and has a built-in optimizaer
            log(logFile, "       This is a special model")
            smilesAttr = dataUtilities.getSMILESAttr(trainData)
            if smilesAttr:
                log(logFile, "Found SMILES attribute:" + smilesAttr)
                trainData = dataUtilities.attributeSelectionData(
                    trainData, [smilesAttr, trainData.domain.classVar.name])
            optInfo, SpecialModel = MLMETHODS[ML](name=ML).optimizePars(
                trainData, folds=5)
            return SpecialModel
        else:
            MLMethods[MLMethod["MLMethod"]] = MLMethod

    smilesAttr = dataUtilities.getSMILESAttr(trainData)
    if smilesAttr:
        trainData = dataUtilities.attributeDeselectionData(
            trainData, [smilesAttr])

    # optimize all MLMethods
    for ML in MLMethods:
        log(logFile, "  Optimizing MLmethod: " + ML)
        learners[ML] = MLMETHODS[ML](name=ML)

        runPath = miscUtilities.createScratchDir(
            baseDir=AZOC.NFS_SCRATCHDIR, desc="competitiveWorkflow_BuildModel")
        trainData.save(os.path.join(runPath, "trainData.tab"))

        tunedPars = paramOptUtilities.getOptParam(learner=learners[ML],
                                                  trainDataFile=os.path.join(
                                                      runPath,
                                                      "trainData.tab"),
                                                  useGrid=False,
                                                  verbose=verbose,
                                                  queueType=queueType,
                                                  runPath=runPath,
                                                  nExtFolds=None,
                                                  logFile=logFile,
                                                  getTunedPars=True)

        if not learners[ML].optimized:
            print "WARNING: competitiveWorkflow: The learner " + str(
                learners[ML]) + " was not optimized."
            #print "         Using default parameters"
            print "         The " + str(learners[ML]) + " will not be included"
            #print "         Returning None"
            print "             DEBUG can be made in: " + runPath
            #Setting default parameters
            #learners[ML] = learners[ML].__class__()
            #return None
            learners.pop(ML)
            continue
        else:
            print "Optimized learner ", learners[ML]
            if trainData.domain.classVar.varType == orange.VarTypes.Discrete:
                MLMethods[ML]["optAcc"] = tunedPars[0]
            else:
                res = orngTest.crossValidation(
                    [learners[ML]],
                    trainData,
                    folds=5,
                    strat=orange.MakeRandomIndices.StratifiedIfPossible,
                    randomGenerator=random.randint(0, 100))
                R2 = evalUtilities.R2(res)[0]
                MLMethods[ML]["optAcc"] = R2
            miscUtilities.removeDir(runPath)
    #Train the model
    if len(learners) == 1:
        log(logFile, "  Building the model:" + learners.keys()[0])
        model = learners[learners.keys()[0]](trainData)
    elif len(learners) >= 1:
        model = buildConsensus(trainData, learners, MLMethods)
    else:
        print "ERROR: No Learners were selected!"
        return None

    return model
Пример #49
0
    def test_GridSearch(self):
        """
        Test GridSearch Module
        """
        #Create  the appspack instance
        opt = paramOptUtilities.Appspack()
        #Learner to be optimized
        learner = AZorngRF.RFLearner()
        #dataset to use in the parameters optimization (Discrete class in this example)
        dataSet = self.discTestDataPath
        # Define the objective function. This requires:
        #    defining the extreme to find (the min or max): findMin=True or findMin=False
        fMin = False
        #    defining the method for evaluation (must be a method that accepts as input an orngTest.ExperimentResults):
        #       evaluateMethod="AZutilities.evalUtilities.CA"
        evalM = "AZutilities.evalUtilities.CA"

        # Create a directory for running the appspack (if not defined it will use the present working directory)
        runPath = miscUtilities.createScratchDir(desc="RFTest")

        # Create an interface for setting optimizer parameters
        pars = AZLearnersParamsConfig.API("RFLearner")
        # Set the parameters in parameterList to be optimized
        pars.setParameter("NumThreads", "optimize", False)
        # Change the default
        pars.setParameter("NumThreads", "default", "1")

        # Run the appspack which will configure the input learner and aditionaly return
        #[<minimum of objective function found>, <optimized parameters>]
        tunedPars = opt(learner=learner,\
                        dataSet=dataSet,\
                        evaluateMethod = evalM,\
                        findMin=fMin,\
                        runPath = runPath,\
                        useGridSearchFirst = True,\
                        gridSearchInnerPoints = 3,\
                        useDefaultPoint = False,\
                        useStd = False,\
                        useParameters = pars.getParametersDict(),\
                        verbose = 0)
        print "Returned: ", tunedPars
        print "====================== optimization Done ==========================="
        print "Learner optimized flag = ", learner.optimized
        print "Tuned parameters = ", tunedPars[1]
        print "Best optimization result = ", tunedPars[0]
        print "check the file intRes.txt to see the intermediate results of optimizer!"
        print "CheckSum:", round(sum(opt.GSRes["results"]), 2)
        print "Number of results: ", len(
            dataUtilities.DataTable(
                os.path.join(runPath, "optimizationLog.txt")))
        print "Running Path:", runPath
        # Check that the learner was optimized
        self.assertEqual(learner.optimized, True)

        self.log.info("")
        self.log.info("tunedPars[0]=" + str(tunedPars[0]))

        # Check the accuracy
        self.assertEqual(round(tunedPars[0], 2), round(0.621, 2))  # Ver 0.3

        #Check if the number of results remain equal
        self.assert_(
            len(
                dataUtilities.DataTable(
                    os.path.join(runPath, "optimizationLog.txt"))) >= 5)

        #Check that all points were evaluated
        self.assert_(opt.GSRes["nFailedPoints"] == 0)
        self.assert_(opt.GSRes["nPoints"] == 3)
        #CheckSum to assure results are the same
        expectedValues = [
            -1.78,  # Ver 0.3
            -1.79
        ]
        acctualValue = sum(opt.GSRes["results"])
        self.assertRoundedToExpectedArray(acctualValue, expectedValues, 2)

        #Check if the best result was not the one with numThreads different of 1 since that way we can get
        #different results among runs
        self.assertEqual(int(tunedPars[1]["NumThreads"]), 1)

        miscUtilities.removeDir(runPath)
Пример #50
0
    def getAcc(self, callBack=None, algorithm=None, params=None, atts=None, holdout=None):
        """ For regression problems, it returns the RMSE and the Q2 
            For Classification problems, it returns CA and the ConfMat
            The return is made in a Dict: {"RMSE":0.2,"Q2":0.1,"CA":0.98,"CM":[[TP, FP],[FN,TN]]}
            For the EvalResults not supported for a specific learner/datase, the respective result will be None

            if the learner is a dict {"LearnerName":learner, ...} the results will be a dict with results for all Learners and for a consensus
                made out of those that were stable

            It some error occurred, the respective values in the Dict will be None
                
			parameters:
                algorithm - list of feature generation algorithms (set dependent features that have to be calculated inside the crossvalidation)
                params - dictionary of parameters
                atts - attributes to be removed before learning (e.g. meta etc...)
        """
        self.__log("Starting Calculating MLStatistics")
        statistics = {}
        if not self.__areInputsOK():
            return None

        if holdout:
            self.nExtFolds = 1

        if algorithm:
            self.__log(" Additional features to be calculated inside of cross-validation")
            for i in algorithm:
                self.__log(" Algorithm: " + str(i))
            for j, v in params.iteritems():
                self.__log(" Parameter: " + str(j) + " = " + str(v))

        # Set the response type
        self.responseType = (
            self.data.domain.classVar.varType == orange.VarTypes.Discrete and "Classification" or "Regression"
        )
        self.__log("  " + str(self.responseType))

        # Create the Train and test sets
        DataIdxs = None
        if holdout:
            self.__log("Using hold out evaluation with " + str(holdout) + "*100 % of data for training")
            DataIdxs = dataUtilities.SeedDataSampler_holdOut(self.data, holdout)
        else:
            DataIdxs = dataUtilities.SeedDataSampler(self.data, self.nExtFolds)

        # Var for saving each Fols result
        optAcc = {}
        results = {}
        exp_pred = {}
        nTrainEx = {}
        nTestEx = {}

        # Set a dict of learners
        MLmethods = {}
        if type(self.learner) == dict:
            for ml in self.learner:
                MLmethods[ml] = self.learner[ml]
        else:
            MLmethods[self.learner.name] = self.learner

        models = {}
        rocs = {}
        self.__log("Calculating Statistics for MLmethods:")
        self.__log("  " + str([x for x in MLmethods]))

        # Check data in advance so that, by chance, it will not fail at the last fold!
        for foldN in range(self.nExtFolds):
            trainData = self.data.select(DataIdxs[foldN], negate=1)
            self.__checkTrainData(trainData)

        # Optional!!
        # Order Learners so that PLS is the first
        sortedML = [ml for ml in MLmethods]
        if "PLS" in sortedML:
            sortedML.remove("PLS")
            sortedML.insert(0, "PLS")

        stepsDone = 0
        nTotalSteps = len(sortedML) * self.nExtFolds
        for ml in sortedML:
            self.__log("    > " + str(ml) + "...")
            try:
                # Var for saving each Fols result
                results[ml] = []
                exp_pred[ml] = []
                models[ml] = []
                rocs[ml] = []
                nTrainEx[ml] = []
                nTestEx[ml] = []
                optAcc[ml] = []
                logTxt = ""

                for foldN in range(self.nExtFolds):
                    if type(self.learner) == dict:
                        self.paramList = None

                    trainData = self.data.select(DataIdxs[foldN], negate=1)
                    orig_len = len(trainData.domain.attributes)
                    refs = None
                    methods = [
                        "rdk_MACCS_keys",
                        "rdk_topo_fps",
                        "rdk_morgan_fps",
                        "rdk_morgan_features_fps",
                        "rdk_atompair_fps",
                    ]
                    train_domain = None
                    # add structural descriptors to the training data (TG)
                    if algorithm:
                        for i in range(len(algorithm)):
                            if algorithm[i] == "structClust":
                                self.__log("Algorithm " + str(i) + ": " + str(algorithm[i]))
                                actData = orange.ExampleTable(trainData.domain)
                                for d in trainData:
                                    # only valid for simboosted qsar paper experiments!?
                                    if d.getclass() == "2":
                                        actData.append(d)

                                refs = structuralClustering.getReferenceStructures(
                                    actData,
                                    threshold=params["threshold"],
                                    minClusterSize=params["minClusterSize"],
                                    numThreads=2,
                                )
                                self.__log(
                                    " found "
                                    + str(len(refs))
                                    + " reference structures in "
                                    + str(len(actData))
                                    + " active structures"
                                )
                                orig_len = orig_len + (len(refs) * len(methods))
                                trainData_sim = SimBoostedQSAR.getSimDescriptors(refs, trainData, methods)

                                if i == (len(algorithm) - 1):
                                    trainData = dataUtilities.attributeDeselectionData(trainData_sim, atts)
                                else:
                                    trainData = dataUtilities.attributeDeselectionData(trainData_sim, [])

                            elif algorithm[i] == "ECFP":
                                self.__log("Algorithm " + str(i) + ": " + str(algorithm[i]))
                                trainData_ecfp = getCinfonyDesc.getCinfonyDescResults(trainData, ["rdk.FingerPrints"])
                                train_domain = trainData_ecfp.domain
                                if i == (len(algorithm) - 1):
                                    trainData = dataUtilities.attributeDeselectionData(trainData_ecfp, atts)
                                else:
                                    trainData = dataUtilities.attributeDeselectionData(trainData_ecfp, [])

                            else:
                                self.__log("Algorithm " + str(i) + ": " + str(algorithm[i]))
                                trainData_structDesc = getStructuralDesc.getStructuralDescResult(
                                    trainData, algorithm[i], params["minsup"]
                                )
                                if i == (len(algorithm) - 1):
                                    trainData = dataUtilities.attributeDeselectionData(trainData_structDesc, atts)
                                else:
                                    trainData = dataUtilities.attributeDeselectionData(trainData_structDesc, [])

                                    # trainData.save("/home/girschic/proj/AZ/ProjDev/train.tab")
                    testData = self.data.select(DataIdxs[foldN])
                    # calculate the feature values for the test data (TG)
                    if algorithm:
                        for i in range(len(algorithm)):
                            if algorithm[i] == "structClust":
                                self.__log(str(algorithm[i]))
                                testData_sim = SimBoostedQSAR.getSimDescriptors(refs, testData, methods)
                                if i == (len(algorithm) - 1):
                                    testData = dataUtilities.attributeDeselectionData(testData_sim, atts)
                                else:
                                    testData = dataUtilities.attributeDeselectionData(testData_sim, [])
                            elif algorithm[i] == "ECFP":
                                self.__log(str(algorithm[i]))
                                # testData_ecfp = orange.ExampleTable(train_domain)
                                tmp_dat = []
                                for d in testData:
                                    tmp = getCinfonyDesc.getRdkFPforTestInstance(train_domain, d)
                                    tmp_dat.append(tmp)
                                testData_ecfp = orange.ExampleTable(tmp_dat[0].domain, tmp_dat)
                                if i == (len(algorithm) - 1):
                                    # 						print "removing atts"
                                    testData = dataUtilities.attributeDeselectionData(testData_ecfp, atts)
                                else:
                                    # 						print "removing no atts"
                                    testData = dataUtilities.attributeDeselectionData(testData_ecfp, [])

                            else:
                                cut_off = orig_len - len(atts)
                                smarts = trainData.domain.attributes[cut_off:]
                                self.__log("  Number of structural features added: " + str(len(smarts)))
                                testData_structDesc = getStructuralDesc.getSMARTSrecalcDesc(testData, smarts)
                                if i == (len(algorithm) - 1):
                                    testData = dataUtilities.attributeDeselectionData(testData_structDesc, atts)
                                else:
                                    testData = dataUtilities.attributeDeselectionData(testData_structDesc, [])

                    #                testData.save("/home/girschic/proj/AZ/ProjDev/test.tab")
                    nTrainEx[ml].append(len(trainData))
                    nTestEx[ml].append(len(testData))
                    # Test if trainsets inside optimizer will respect dataSize criterias.
                    #  if not, don't optimize, but still train the model
                    dontOptimize = False
                    if self.responseType != "Classification" and (len(trainData) * (1 - 1.0 / self.nInnerFolds) < 20):
                        dontOptimize = True
                    else:
                        tmpDataIdxs = dataUtilities.SeedDataSampler(trainData, self.nInnerFolds)
                        tmpTrainData = trainData.select(tmpDataIdxs[0], negate=1)
                        if not self.__checkTrainData(tmpTrainData, False):
                            dontOptimize = True

                    if dontOptimize:
                        logTxt += (
                            "       Fold " + str(foldN) + ": Too few compounds to optimize model hyper-parameters\n"
                        )
                        self.__log(logTxt)
                        if trainData.domain.classVar.varType == orange.VarTypes.Discrete:
                            res = orngTest.crossValidation(
                                [MLmethods[ml]],
                                trainData,
                                folds=5,
                                strat=orange.MakeRandomIndices.StratifiedIfPossible,
                                randomGenerator=random.randint(0, 100),
                            )
                            CA = evalUtilities.CA(res)[0]
                            optAcc[ml].append(CA)
                        else:
                            res = orngTest.crossValidation(
                                [MLmethods[ml]],
                                trainData,
                                folds=5,
                                strat=orange.MakeRandomIndices.StratifiedIfPossible,
                                randomGenerator=random.randint(0, 100),
                            )
                            R2 = evalUtilities.R2(res)[0]
                            optAcc[ml].append(R2)
                    else:
                        runPath = miscUtilities.createScratchDir(
                            baseDir=AZOC.NFS_SCRATCHDIR, desc="AccWOptParam", seed=id(trainData)
                        )
                        # 		    self.__log("	run path:"+str(runPath))
                        trainData.save(os.path.join(runPath, "trainData.tab"))

                        tunedPars = paramOptUtilities.getOptParam(
                            learner=MLmethods[ml],
                            trainDataFile=os.path.join(runPath, "trainData.tab"),
                            paramList=self.paramList,
                            useGrid=False,
                            verbose=self.verbose,
                            queueType=self.queueType,
                            runPath=runPath,
                            nExtFolds=None,
                            nFolds=self.nInnerFolds,
                            logFile=self.logFile,
                            getTunedPars=True,
                        )
                        if not MLmethods[ml] or not MLmethods[ml].optimized:
                            self.__log(
                                "       WARNING: GETACCWOPTPARAM: The learner " + str(ml) + " was not optimized."
                            )
                            self.__log("                It will be ignored")
                            # self.__log("                It will be set to default parameters")
                            self.__log("                    DEBUG can be done in: " + runPath)
                            # Set learner back to default
                            # MLmethods[ml] = MLmethods[ml].__class__()
                            raise Exception("The learner " + str(ml) + " was not optimized.")
                        else:
                            if trainData.domain.classVar.varType == orange.VarTypes.Discrete:
                                optAcc[ml].append(tunedPars[0])
                            else:
                                res = orngTest.crossValidation(
                                    [MLmethods[ml]],
                                    trainData,
                                    folds=5,
                                    strat=orange.MakeRandomIndices.StratifiedIfPossible,
                                    randomGenerator=random.randint(0, 100),
                                )
                                R2 = evalUtilities.R2(res)[0]
                                optAcc[ml].append(R2)

                            miscUtilities.removeDir(runPath)
                    # Train the model
                    model = MLmethods[ml](trainData)
                    models[ml].append(model)
                    # Test the model
                    if self.responseType == "Classification":
                        results[ml].append(
                            (
                                evalUtilities.getClassificationAccuracy(testData, model),
                                evalUtilities.getConfMat(testData, model),
                            )
                        )
                        roc = self.aroc(testData, [model])
                        rocs[ml].append(roc)
                    else:
                        local_exp_pred = []
                        for ex in testData:
                            local_exp_pred.append((ex.getclass(), model(ex)))
                        results[ml].append(
                            (evalUtilities.calcRMSE(local_exp_pred), evalUtilities.calcRsqrt(local_exp_pred))
                        )
                        # Save the experimental value and correspondent predicted value
                        exp_pred[ml] += local_exp_pred
                    if callBack:
                        stepsDone += 1
                        if not callBack((100 * stepsDone) / nTotalSteps):
                            return None

                res = self.createStatObj(
                    results[ml],
                    exp_pred[ml],
                    nTrainEx[ml],
                    nTestEx[ml],
                    self.responseType,
                    self.nExtFolds,
                    logTxt,
                    rocs[ml],
                )

                if self.verbose > 0:
                    print "UnbiasedAccuracyGetter!Results  " + ml + ":\n"
                    pprint(res)
                if not res:
                    raise Exception("No results available!")
                statistics[ml] = copy.deepcopy(res)
                self.__writeResults(statistics)
                self.__log("       OK")
            except:
                print "Unexpected error:",
                print sys.exc_info()[0]
                print sys.exc_info()[1]
                self.__log("       Learner " + str(ml) + " failed to create/optimize the model!")
                res = self.createStatObj(
                    results[ml],
                    exp_pred[ml],
                    nTrainEx[ml],
                    nTestEx[ml],
                    self.responseType,
                    self.nExtFolds,
                    logTxt,
                    rocs[ml],
                )
                statistics[ml] = copy.deepcopy(res)
                self.__writeResults(statistics)

        if not statistics or len(statistics) < 1:
            self.__log("ERROR: No statistics to return!")
            return None
        elif len(statistics) > 1:
            # We still need to build a consensus model out of the stable models
            #   ONLY if there are more that one model stable!
            #   When only one or no stable models, build a consensus based on all models
            consensusMLs = {}
            for modelName in statistics:
                StabilityValue = statistics[modelName]["StabilityValue"]
                if StabilityValue is not None and statistics[modelName]["stable"]:
                    consensusMLs[modelName] = copy.deepcopy(statistics[modelName])

            self.__log(
                "Found " + str(len(consensusMLs)) + " stable MLmethods out of " + str(len(statistics)) + " MLmethods."
            )

            if len(consensusMLs) <= 1:  # we need more models to build a consensus!
                consensusMLs = {}
                for modelName in statistics:
                    consensusMLs[modelName] = copy.deepcopy(statistics[modelName])

            if len(consensusMLs) >= 2:
                # Var for saving each Fols result
                Cresults = []
                Cexp_pred = []
                CnTrainEx = []
                CnTestEx = []
                self.__log(
                    "Calculating the statistics for a Consensus model based on " + str([ml for ml in consensusMLs])
                )
                for foldN in range(self.nExtFolds):
                    if self.responseType == "Classification":
                        CLASS0 = str(self.data.domain.classVar.values[0])
                        CLASS1 = str(self.data.domain.classVar.values[1])
                        exprTest0 = "(0"
                        for ml in consensusMLs:
                            exprTest0 += "+( " + ml + " == " + CLASS0 + " )*" + str(optAcc[ml][foldN]) + " "
                        exprTest0 += ")/IF0(sum([False"
                        for ml in consensusMLs:
                            exprTest0 += ", " + ml + " == " + CLASS0 + " "
                        exprTest0 += "]),1)"
                        exprTest1 = exprTest0.replace(CLASS0, CLASS1)
                        expression = [exprTest0 + " >= " + exprTest1 + " -> " + CLASS0, " -> " + CLASS1]
                    else:
                        Q2sum = sum([optAcc[ml][foldN] for ml in consensusMLs])
                        expression = "(1 / " + str(Q2sum) + ") * (0"
                        for ml in consensusMLs:
                            expression += " + " + str(optAcc[ml][foldN]) + " * " + ml + " "
                        expression += ")"

                    testData = self.data.select(DataIdxs[foldN])
                    CnTestEx.append(len(testData))
                    consensusClassifiers = {}
                    for learnerName in consensusMLs:
                        consensusClassifiers[learnerName] = models[learnerName][foldN]

                    model = AZorngConsensus.ConsensusClassifier(classifiers=consensusClassifiers, expression=expression)
                    CnTrainEx.append(model.NTrainEx)
                    # Test the model
                    if self.responseType == "Classification":
                        Cresults.append(
                            (
                                evalUtilities.getClassificationAccuracy(testData, model),
                                evalUtilities.getConfMat(testData, model),
                            )
                        )
                    else:
                        local_exp_pred = []
                        for ex in testData:
                            local_exp_pred.append((ex.getclass(), model(ex)))
                        Cresults.append(
                            (evalUtilities.calcRMSE(local_exp_pred), evalUtilities.calcRsqrt(local_exp_pred))
                        )
                        # Save the experimental value and correspondent predicted value
                        Cexp_pred += local_exp_pred

                res = self.createStatObj(Cresults, Cexp_pred, CnTrainEx, CnTestEx, self.responseType, self.nExtFolds)
                statistics["Consensus"] = copy.deepcopy(res)
                statistics["Consensus"]["IndividualStatistics"] = copy.deepcopy(consensusMLs)
                self.__writeResults(statistics)
            self.__log("Returned multiple ML methods statistics.")
            return statistics

        # By default return the only existing statistics!
        self.__writeResults(statistics)
        self.__log("Returned only one ML method statistics.")
        return statistics[statistics.keys()[0]]
Пример #51
0
    def test_RFAdvanced_Usage(self):
        """PLS - Test of optimizer with advanced configuration
        """
        #Create  the appspack instance
        opt = paramOptUtilities.Appspack()
        #Learner to be optimized
        learner = AZorngRF.RFLearner()
        #dataset to use in the parameters optimization (Discrete class in this example)
        dataSet = self.discTrainDataPath
        # Define the objective function. This requires:
        #    defining the extreme to find (the min or max): findMin=True or findMin=False
        fMin = False
        #    defining the method for evaluation (must be a method that accepts as input an orngTest.ExperimentResults):
        #       evaluateMethod="AZutilities.evalUtilities.CA"
        evalM = "AZutilities.evalUtilities.CA"

        # Create a directory for running the appspack (if not defined it will use the present working directory)
        runPath = miscUtilities.createScratchDir(desc="ParamOptTest")

        # Load the optimization parameters from the default configuration (AZLearnersParamsConfig.py)
        parameters = AZLearnersParamsConfig.API("RFLearner")
        parameters.setParameter("method", "default", 'rf1')

        # change the optimization parameters
        parameters.setParameter(
            "method", "default",
            'rf1')  #   make the method fixed (do not optimize) to be pls1
        parameters.setParameter("method", "optimize", False)
        parameters.setParameter(
            "method", "rangeType",
            "values")  #   assure that the keyword for the values range type is
        #set correctly for values instead of interval

        parameters.setParameter(
            "k", "range",
            [1, 3, 5, 6, 10
             ])  #   make the method fixed (do not optimize) to be pls1
        parameters.setParameter("k", "optimize", True)
        parameters.setParameter(
            "k", "rangeType",
            "values")  #   assure that the keyword for the values range type is
        #set correctly for values instead of interval

        # Run the appspack which will configure the input learner and aditionaly return
        #[<minimum of objective function found>, <optimized parameters>]
        tunedPars = opt(learner=learner,\
                        dataSet=dataSet,\
                        evaluateMethod = evalM,\
                        findMin=fMin,\
                        runPath = runPath,\
                        useStd = False,\
                        useParameters = parameters.getParametersDict(),\
                                                        #  The 'useParameters' is mandatory, even placing a file with the new configurations in the
                                                        # running directory, that we pass to the optimizer the correct parameters to use.
                                                        #  The parameters placed on the running directory are for appspack usage, and the 
                                                        # optimizer needs to know what parameters appspack will use, otherwise, it will 
                                                        # load the default ones





                        verbose = 0)
        print "Returned: ", tunedPars
        print "====================== optimization Done ==========================="
        print "Learner optimized flag = ", learner.optimized
        print "Tuned parameters = ", tunedPars[1]
        print "Best optimization result = ", tunedPars[0]
        print "check the file intRes.txt to see the intermediate results of optimizer!"
        self.assertEqual(opt.usedMPI, False)
        self.assertEqual(learner.optimized, True)
        self.assertEqual(round(tunedPars[0], 2), round(0.61, 2))  #Ver 0.3

        #The learner is now with its optimized parameters already set, so we can now make a classifier out of it
        classifier = learner(self.discTrain)
        CA = evalUtilities.getClassificationAccuracy(self.discTest, classifier)
        self.assertEqual(round(CA, 2), round(0.97, 2))  #Ver 0.3
        self.assert_(
            len(
                dataUtilities.DataTable(
                    os.path.join(runPath, "optimizationLog.txt"))) >=
            5)  # Must be > 2
        miscUtilities.removeDir(runPath)
Пример #52
0
    def testCvSVM_MPI(self):
        """
        Tests changing the default range of the optimizer.
        Use MPI versio0n of appspack
        """
        # Classification accuracy:
        ExpectedCA = [0.585]  #Should be the result of the default point

        optimizer = paramOptUtilities.Appspack()

        learner = AZorngCvSVM.CvSVMLearner()
        learnerName = "CvSVMLearner"

        # Create an interface for setting optimizer parameters
        pars = AZLearnersParamsConfig.API(learnerName)

        # Set all parameters to not be optimized
        pars.setOptimizeAllParameters(False)

        parameterList = ["C"]  #, "gamma"]
        # Set the parameters in parameterList to be optimized
        for parameter in parameterList:
            pars.setParameter(parameter, "optimize", True)

        # Change the range
        pars.setParameter("C", "range", miscUtilities.power2Range(-5, 2, 1))
        pars.setParameter("priors", "default", {
            "POS": 50,
            "NEG": 4
        })  # These priors are to ensure the default point will be the best!
        pars.setParameter(
            "gamma", "default", 0.001
        )  # This is a bad value to ensure the default point will be the best!

        trainFile = self.discTrainDataPath

        # Create a directory for running the appspack (if not defined it will use the present working directory)
        runPath = miscUtilities.createScratchDir(desc="ParamOptTest_CvSVM_MPI")
        evalM = "AZutilities.evalUtilities.CA"
        fMin = False

        # Calculate the optimal parameters. This can take a long period of time!
        tunedPars = optimizer(learner=learner,\
                        dataSet=trainFile,\
                        evaluateMethod = evalM,\
                        useParameters = pars.getParametersDict(),\
                        findMin=fMin,\
                        runPath = runPath,\
                        verbose = 0,\
                        useStd = False,\
                        #advancedMPIoptions = "-all-local -allcpus")  # to use this the 
                        # file "<MPICHDIR>/share/machines.LINUX must be properly configured"


                        np = 4,\
                        machinefile = os.path.realpath(os.path.join(os.environ["AZORANGEHOME"], "tests/source/APPS_machines")))

        verbTunedPars = optimizer.getTunedParameters()

        print "Returned: ", tunedPars
        print "====================== optimization Done ==========================="
        print "Learner optimized flag = ", learner.optimized
        print "Tuned parameters = ", tunedPars[1]
        print "Best optimization result = ", tunedPars[0]
        print "Best result index from intRes file:", verbTunedPars["ResIdx"]
        print "Optimizer runPath:", runPath
        print "check the file intRes.txt to see the intermediate results of optimizer!"

        # Check that the learner was optimized
        self.assertEqual(learner.optimized, True)

        # Check if the MPI version was used
        self.assertEqual(optimizer.usedMPI, True)

        # Check the number of optimized parameters
        self.assertEqual(len(verbTunedPars["optParam"]), 12)

        # Check the accuracy
        self.assert_(
            round(verbTunedPars["bestRes"],
                  3) in [round(x, 3) for x in ExpectedCA],
            "Got:" + str(verbTunedPars["bestRes"]))
        self.assert_(
            len(
                dataUtilities.DataTable(
                    os.path.join(runPath, "optimizationLog.txt"))) >= 5)

        #Check Priors
        self.assertEqual(tunedPars[1]["priors"], "None")
        learner.priors = {'NEG': 4, 'POS': 2}
        classifier = learner(self.discTest)
        classifier.write(os.path.join(runPath, "CvSVMModel"))
        file = open(os.path.join(runPath, "CvSVMModel/model.svm"), "r")
        lines = file.readlines()
        file.close()
        priors = [
            round(x, 2) for x in eval((lines[18].strip()).replace("data:", ""))
        ]
        self.assertEqual(len(priors), 2)
        self.assertEqual(
            priors[self.discTest.domain.classVar.values.index("POS")],
            2.0 * float(tunedPars[1]["C"]))
        self.assertEqual(
            priors[self.discTest.domain.classVar.values.index("NEG")],
            4.0 * float(tunedPars[1]["C"]))
        miscUtilities.removeDir(runPath)
Пример #53
0
    def getAcc(self, algorithm = None, minsup = None, atts = None):
        """ For regression problems, it returns the RMSE and the R2 
            For Classification problems, it returns CA and the ConfMat
            The return is made in a Dict: {"RMSE":0.2,"R2":0.1,"CA":0.98,"CM":[[TP, FP],[FN,TN]]}
            For the EvalResults not supported for a specific learner/datase, the respective result will be None

            if the learner is a dict {"LearnerName":learner, ...} the results will be a dict with results for all Learners and for a consensus
                made out of those that were stable

            It some error occurred, the respective values in the Dict will be None

        """
        self.__log("Starting Calculating MLStatistics")
        statistics = {}
        if not self.__areInputsOK():
            return None
        
        if (self.algorithm):
            self.__log(" Additional structural features to be calculated inside of cross-validation")
            self.__log(" Algorithm for structural features: "+str(self.algorithm))
            self.__log(" Minimum support parameter: "+str(self.minsup))
        
        # Set the response type
        responseType =  self.data.domain.classVar.varType == orange.VarTypes.Discrete and "Classification"  or "Regression"
        self.__log("  "+str(responseType))

        #Create the Train and test sets
        DataIdxs = dataUtilities.SeedDataSampler(self.data, self.nExtFolds) 
        
        #Var for saving each Fols result
        results = {}
        exp_pred = {}
        
        #Set a dict of learners
        MLmethods = {}
        if type(self.learner) == dict:
            for ml in self.learner:
                MLmethods[ml] = self.learner[ml]
        else:
            MLmethods[self.learner.name] = self.learner

        models={}
        self.__log("Calculating Statistics for MLmethods:")
        self.__log("  "+str([x for x in MLmethods]))
        for ml in MLmethods:
          self.__log("    > "+str(ml)+"...")
          try:
            #Var for saving each Fols result
            results[ml] = []
            exp_pred[ml] = []
            models[ml] = []
            for foldN in range(self.nExtFolds):
                if type(self.learner) == dict:
                    self.paramList = None

                trainData = self.data.select(DataIdxs[foldN],negate=1)
                orig_len = len(trainData.domain.attributes)

		if (self.algorithm):
			# add structural descriptors to the training data (TG)
                	trainData_structDesc = getStructuralDesc.getStructuralDescResult(trainData, self.algorithm, self.minsup)
                	trainData = dataUtilities.attributeDeselectionData(trainData_structDesc, self.atts)

                runPath = miscUtilities.createScratchDir(baseDir = AZOC.NFS_SCRATCHDIR, desc = "AccWOptParam")
                trainData.save(os.path.join(runPath,"trainData.tab"))

                testData = self.data.select(DataIdxs[foldN])
		if (self.algorithm):
			# calculate the feature values for the test data (TG)
			cut_off = orig_len - len(self.atts)
                	smarts = trainData.domain.attributes[cut_off:]
			self.__log("  Number of structural features added: "+str(len(smarts)))
			testData_structDesc = getStructuralDesc.getSMARTSrecalcDesc(testData,smarts)
			testData = dataUtilities.attributeDeselectionData(testData_structDesc, self.atts)

                paramOptUtilities.getOptParam(
                    learner = MLmethods[ml], 
                    trainDataFile = os.path.join(runPath,"trainData.tab"), 
                    paramList = self.paramList, 
                    useGrid = False, 
                    verbose = self.verbose, 
                    queueType = self.queueType, 
                    runPath = runPath, 
                    nExtFolds = None, 
                    nFolds = self.nInnerFolds
                    )
                if not MLmethods[ml].optimized:
                    self.__log("       The learner "+str(ml)+" was not optimized.")
                    raise Exception("The learner "+str(ml)+" was not optimized.")
                miscUtilities.removeDir(runPath) 
		
                #Train the model
                model = MLmethods[ml](trainData)
                models[ml].append(model)
                #Test the model
                if responseType == "Classification":
                    results[ml].append((evalUtilities.getClassificationAccuracy(testData, model), evalUtilities.getConfMat(testData, model) ) )
                else:
                    local_exp_pred = []
                    for ex in testData:
                        local_exp_pred.append((ex.getclass(), model(ex)))
                    results[ml].append((evalUtilities.calcRMSE(local_exp_pred), evalUtilities.calcRsqrt(local_exp_pred) ) )
                    #Save the experimental value and correspondent predicted value
                    exp_pred[ml] += local_exp_pred
            res = self.createStatObj(results[ml], exp_pred[ml], responseType, self.nExtFolds)
            if self.verbose > 0: 
                print "AccWOptParamGetter!Results  "+ml+":\n"
                pprint(res)
            if not res:
                raise Exception("No results available!")
            statistics[ml] = res.copy()
            self.__writeResults(res)
            self.__log("       OK")
          except:
            self.__log("       Learner "+str(ml)+" failed to optimize!")
            res = self.createStatObj()
            statistics[ml] = res.copy()

        if not statistics or len(statistics) < 1:
            self.__log("ERROR: No statistics to return!")
            return None
        elif len(statistics) > 1:
            #We still need to build a consensus model out of the stable models 
            #   ONLY if there are more that one model stable!
            stableML={}
            for modelName in statistics:
                if statistics[modelName]["StabilityValue"] < AZOC.QSARSTABILITYTHRESHOLD:   # Select only stable models
                    stableML[modelName] = statistics[modelName].copy()
            if len(stableML) >= 2:
                self.__log("Found "+str(len(stableML))+" stable MLmethods out of "+str(len(statistics))+" MLmethods.")
                if responseType == "Classification":
                    CLASS0 = str(self.data.domain.classVar.values[0])
                    CLASS1 = str(self.data.domain.classVar.values[1])
                    exprTest0 = "(0"
                    for ml in stableML:
                        exprTest0 += "+( "+ml+" == "+CLASS0+" )*"+str(stableML[ml]["CA"])+" "
                    exprTest0 += ")/IF0(sum([False"
                    for ml in stableML:
                        exprTest0 += ", "+ml+" == "+CLASS0+" "
                    exprTest0 += "]),1)"
                    exprTest1 = exprTest0.replace(CLASS0,CLASS1)
                    expression = [exprTest0+" >= "+exprTest1+" -> "+CLASS0," -> "+CLASS1]
                else:
                    R2sum = sum([stableML[ml]["R2"] for ml in stableML])
                    expression = "(1 / "+str(R2sum)+") * (0"
                    for ml in stableML:
                        expression += " + "+str(stableML[ml]["R2"])+" * "+ml+" "
                    expression += ")"

                #Var for saving each Fols result
                Cresults = []
                Cexp_pred = []
                self.__log("Calculating the statistics for a Consensus model")
                for foldN in range(self.nExtFolds):
                    testData = self.data.select(DataIdxs[foldN])
                    consensusClassifiers = {}
                    for learnerName in stableML:
                        consensusClassifiers[learnerName] = models[learnerName][foldN]

                    model = AZorngConsensus.ConsensusClassifier(classifiers = consensusClassifiers, expression = expression)     
                    #Test the model
                    if responseType == "Classification":
                        Cresults.append((evalUtilities.getClassificationAccuracy(testData, model), evalUtilities.getConfMat(testData, model) ) )
                    else:
                        local_exp_pred = []
                        for ex in testData:
                            local_exp_pred.append((ex.getclass(), model(ex)))
                        Cresults.append((evalUtilities.calcRMSE(local_exp_pred), evalUtilities.calcRsqrt(local_exp_pred) ) )
                        #Save the experimental value and correspondent predicted value
                        Cexp_pred += local_exp_pred

                res = self.createStatObj(Cresults, Cexp_pred, responseType, self.nExtFolds)
                statistics["Consensus"] = res.copy()
                statistics["Consensus"]["IndividualStatistics"] = stableML.copy()
                self.__writeResults(statistics)
            self.__log("Returned multiple ML methods statistics.")
            return statistics
                 
        #By default return the only existing statistics!
        self.__writeResults(statistics)
        self.__log("Returned only one ML method statistics.")
        return statistics[statistics.keys()[0]]
Пример #54
0
    def testRF_MPI(self):
        """
        Tests changing the default range of the optimizer.
        Use MPI versio0n of appspack
        """
        # Classification accuracy:
        ExpectedCA = [0.612]  #opencv1.1: 0.90480000000000005

        optimizer = paramOptUtilities.Appspack()

        learner = AZorngRF.RFLearner()
        learnerName = "RFLearner"

        # Create an interface for setting optimizer parameters
        pars = AZLearnersParamsConfig.API(learnerName)

        # Set all parameters to not be optimized
        pars.setOptimizeAllParameters(False)

        parameterList = ["nActVars"]
        # Set the parameters in parameterList to be optimized
        for parameter in parameterList:
            pars.setParameter(parameter, "optimize", True)

        # Set the NumThreads
        pars.setParameter("NumThreads", "optimize", False)
        # Change the default
        pars.setParameter("NumThreads", "default", "1")

        trainFile = self.discTrainDataPath

        # Create a directory for running the appspack (if not defined it will use the present working directory)
        runPath = miscUtilities.createScratchDir(desc="ParamOptTest_RF_MPI")
        evalM = "AZutilities.evalUtilities.CA"
        fMin = False

        # Calculate the optimal parameters. This can take a long period of time!
        tunedPars = optimizer(learner=learner,\
                        dataSet=trainFile,\
                        evaluateMethod = evalM,\
                        useParameters = pars.getParametersDict(),\
                        useDefaultPoint = False,\
                        findMin=fMin,\
                        runPath = runPath,\
                        useStd = False,\
                        verbose = 0,\
                        #advancedMPIoptions = "-all-local -allcpus")  # to use this the 
                        # file "<MPICHDIR>/share/machines.LINUX must be properly configured"
                        # Alternatively, we can set machinefile=0 to us also all available cores



                        machinefile =0)

        print "Returned: ", tunedPars
        print "====================== optimization Done ==========================="
        print "Learner optimized flag = ", learner.optimized
        print "Tuned parameters = ", tunedPars[1]
        print "Best optimization result = ", tunedPars[0]
        print "check the file intRes.txt to see the intermediate results of optimizer!"
        print "Number of cores used: ", optimizer.np

        verbTunedPars = optimizer.getTunedParameters()

        # Check that the learner was optimized
        self.assertEqual(learner.optimized, True)

        #Check if the number of processors used are all the core available
        notUsed, out = commands.getstatusoutput(
            "cat /proc/cpuinfo | grep processor")
        self.assertEqual(optimizer.np, len(out.split("\n")))

        # Check if the MPI version was used
        self.assertEqual(optimizer.usedMPI, True)

        # Check the number of optimized parameters
        self.assert_(len(verbTunedPars["optParam"]) in [8, 9, 10])

        # Check the accuracy
        self.assert_(
            round(verbTunedPars["bestRes"],
                  3) in [round(x, 3) for x in ExpectedCA],
            "Got:" + str(verbTunedPars["bestRes"]))
        self.assert_(
            len(
                dataUtilities.DataTable(
                    os.path.join(runPath, "optimizationLog.txt"))) >=
            3)  #  Must be > 2

        miscUtilities.removeDir(runPath)
Пример #55
0
    def __call__(self, trainingData, weight=None):
        """Creates an PLS model from the data in trainingData. """
        if not AZBaseClasses.AZLearner.__call__(self,trainingData, weight):
            return None
        #Remove from the domain any unused values of discrete attributes including class
        trainingData = dataUtilities.getDataWithoutUnusedValues(trainingData,True)
        # Create path for the Orange data
        scratchdir = miscUtilities.createScratchDir(desc="PLS")
        OrngFile = os.path.join(scratchdir,"OrngData.tab")

        # Remove meta attributes from training data to make the imputer work with examples without the meta attributes. 
        #dataUtilities.rmAllMeta(trainingData)
        if len(trainingData.domain.getmetas()) == 0:
            trainData = trainingData
        else:
            trainData = dataUtilities.getCopyWithoutMeta(trainingData)

	# Create the imputer
        self.imputer = orange.ImputerConstructor_average(trainData)
	# Impute the data 
	trainData = self.imputer(trainData)
        # Save the Data already imputed to an Orange formated file
	if self.verbose > 1: print time.asctime(), "Saving Orange Data to a tab file..."
        orange.saveTabDelimited(OrngFile,trainData)
	if self.verbose > 1: print time.asctime(), "done"

        # Create the PLS instance
	if self.verbose > 1: print time.asctime(), "Creating PLS Object..."
        learner = pls.PlsAPI()
	if self.verbose > 1: print time.asctime(), "done"

	# Assign the PLS parameters
	learner.SetParameter('v',str(self.verbose))
        learner.SetParameter('debug',str(int(self.verbose > 0)))
	learner.SetParameter('method',self.method)
        if types.IntType(self.k) > len(trainData.domain.attributes):
	    learner.SetParameter('k',str(len(trainData.domain.attributes)))
            if self.verbose > 0: print "Warning! The number of components were more than the number of attributes."
            if self.verbose > 0: print "   Components were set to ",len(trainData.domain.attributes)
        else:
	    learner.SetParameter('k',self.k)
	learner.SetParameter('precision',self.precision)	
	learner.SetParameter('sDir',scratchdir)  #AZOC.SCRATCHDIR)
	
        # Read the Orange Formated file and Train the Algorithm
	# TRAIN
	if self.verbose > 1: print time.asctime(), "Training..."
        learner.Train(OrngFile)
	if self.verbose > 1:
		print "Train finished at ", time.asctime()
		print "PLS trained in: " + str(learner.GetCPUTrainTime()) + " seconds";
		print "Method:     " +  learner.GetParameter("method")
		print "Components: " +  learner.GetParameter("k")
		print "Precision:  " +  learner.GetParameter("precision")

        # Remove the scratch file
        if self.verbose == 0:
	    miscUtilities.removeDir(scratchdir)
	else:
	    print "The directory " + scratchdir + " was not deleted because DEBUG flag is ON"
	del trainData
        impData=self.imputer.defaults
        return PLSClassifier(classifier = learner, name = "Classifier of " + self.name, classVar = trainingData.domain.classVar, imputeData=impData, verbose = self.verbose, varNames = [attr.name for attr in trainingData.domain.attributes], NTrainEx = len(trainingData), basicStat = self.basicStat, parameters = self.parameters)#learner.GetClassVarName())#
Пример #56
0
    def test_PLS_MPI_2(self):
        ###################################################################
        #       Test other way of setting appspack
        ###################################################################i
        # Classification accuracy:
        ExpectedCA = [0.567049808429, 0.593869731801]
        ExpectedCAwithTest = [0.6, 0.579]  #New at orange2.0

        #Create  the appspack instance
        opt = paramOptUtilities.Appspack()
        #Learner to be optimized
        learner = AZorngPLS.PLSLearner()
        #dataset to use in the parameters optimization (Discrete class in this example)
        dataSet = self.discTrainDataPath
        # Define the objective function. This requires:
        #    defining the extreme to find (the min or max): findMin=True or findMin=False
        fMin = False
        #    defining the method for evaluation (must be a method that accepts as input an orngTest.ExperimentResults):
        #       evaluateMethod="AZutilities.evalUtilities.CA"
        evalM = "AZutilities.evalUtilities.CA"

        # Create a directory for running the appspack (if not defined it will use the present working directory)
        runPath = miscUtilities.createScratchDir(desc="ParamOptTest_PLS_MPI_2")

        # Load the optimization parameters from the default configuration (AZLearnersParamsConfig.py)
        parameters = AZLearnersParamsConfig.API("PLSLearner")
        parameters.setParameter("method", "default", 'pls1')

        # change the optimization parameters
        parameters.setParameter(
            "method", "default",
            'pls1')  #   make the method fixed (do not optimize) to be pls1
        parameters.setParameter("method", "optimize", False)
        parameters.setParameter(
            "method", "rangeType",
            "values")  #   assure that the keyword for the values range type is
        #set correctly for values instead of interval

        parameters.setParameter(
            "k", "range",
            [1, 3, 5, 6, 10
             ])  #   make the method fixed (do not optimize) to be pls1
        parameters.setParameter("k", "optimize", True)
        parameters.setParameter(
            "k", "rangeType",
            "values")  #   assure that the keyword for the values range type is
        #   set correctly for values instead of interval
        #[<minimum of objective function found>, <optimized parameters>]
        tunedPars = opt(learner=learner,\
                        dataSet=dataSet,\
                        evaluateMethod = evalM,\
                        findMin=fMin,\
                        runPath = runPath,\
                        useParameters = parameters.getParametersDict(),\
                        verbose = 0,
                        useStd = False,\
                        advancedMPIoptions = None,
                        np = 4,
                        machinefile = ["localhost:2","localhost:2"])
        print "Returned: ", tunedPars
        print "====================== optimization Done ==========================="
        print "Learner optimized flag = ", learner.optimized
        print "Tuned parameters = ", tunedPars[1]
        print "Best optimization result = ", tunedPars[0]
        print "check the file intRes.txt to see the intermediate results of optimizer!"

        self.assertEqual(learner.optimized, True)
        # Check if the MPI version was used
        self.assertEqual(opt.usedMPI, True)
        self.assert_(
            round(tunedPars[0],
                  3) in [round(x, 3) for x in ExpectedCAwithTest],
            "Got:" + str(tunedPars[0]))

        #The learner is now with its optimized parameters already set, so we can now make a classifier out of it
        classifier = learner(self.discTrain)
        CA = evalUtilities.getClassificationAccuracy(self.discTest, classifier)
        self.assert_(
            round(CA, 3) in [round(x, 3) for x in ExpectedCA],
            "Got: " + str(CA))
        resData2 = dataUtilities.DataTable(
            os.path.join(runPath, "optimizationLog.txt"))
        self.assert_(len(resData2) >= 4)  # (orig 5) Must be > 2
        #print runPath
        miscUtilities.removeDir(runPath)
Пример #57
0
    def __call__(self, trainingData, weight=None):
        """Creates an PLS model from the data in trainingData. """
        if not AZBaseClasses.AZLearner.__call__(self, trainingData, weight):
            return None
        #Remove from the domain any unused values of discrete attributes including class
        trainingData = dataUtilities.getDataWithoutUnusedValues(
            trainingData, True)
        # Create path for the Orange data
        scratchdir = miscUtilities.createScratchDir(desc="PLS")
        OrngFile = os.path.join(scratchdir, "OrngData.tab")

        # Remove meta attributes from training data to make the imputer work with examples without the meta attributes.
        #dataUtilities.rmAllMeta(trainingData)
        if len(trainingData.domain.getmetas()) == 0:
            trainData = trainingData
        else:
            trainData = dataUtilities.getCopyWithoutMeta(trainingData)

# Create the imputer
        self.imputer = orange.ImputerConstructor_average(trainData)
        # Impute the data
        trainData = self.imputer(trainData)
        # Save the Data already imputed to an Orange formated file
        if self.verbose > 1:
            print time.asctime(), "Saving Orange Data to a tab file..."
        orange.saveTabDelimited(OrngFile, trainData)
        if self.verbose > 1: print time.asctime(), "done"

        # Create the PLS instance
        if self.verbose > 1: print time.asctime(), "Creating PLS Object..."
        learner = pls.PlsAPI()
        if self.verbose > 1: print time.asctime(), "done"

        # Assign the PLS parameters
        learner.SetParameter('v', str(self.verbose))
        learner.SetParameter('debug', str(int(self.verbose > 0)))
        learner.SetParameter('method', self.method)
        if types.IntType(self.k) > len(trainData.domain.attributes):
            learner.SetParameter('k', str(len(trainData.domain.attributes)))
            if self.verbose > 0:
                print "Warning! The number of components were more than the number of attributes."
            if self.verbose > 0:
                print "   Components were set to ", len(
                    trainData.domain.attributes)
        else:
            learner.SetParameter('k', self.k)
        learner.SetParameter('precision', self.precision)
        learner.SetParameter('sDir', scratchdir)  #AZOC.SCRATCHDIR)

        # Read the Orange Formated file and Train the Algorithm
        # TRAIN
        if self.verbose > 1: print time.asctime(), "Training..."
        learner.Train(OrngFile)
        if self.verbose > 1:
            print "Train finished at ", time.asctime()
            print "PLS trained in: " + str(
                learner.GetCPUTrainTime()) + " seconds"
            print "Method:     " + learner.GetParameter("method")
            print "Components: " + learner.GetParameter("k")
            print "Precision:  " + learner.GetParameter("precision")

        # Remove the scratch file
        if self.verbose == 0:
            miscUtilities.removeDir(scratchdir)
        else:
            print "The directory " + scratchdir + " was not deleted because DEBUG flag is ON"
        del trainData
        impData = self.imputer.defaults
        return PLSClassifier(
            classifier=learner,
            name="Classifier of " + self.name,
            classVar=trainingData.domain.classVar,
            imputeData=impData,
            verbose=self.verbose,
            varNames=[attr.name for attr in trainingData.domain.attributes],
            NTrainEx=len(trainingData),
            basicStat=self.basicStat,
            parameters=self.parameters)  #learner.GetClassVarName())#