Пример #1
0
    def test_MetaDataHandleForSavingModel(self):
        """Test the handling of SaveModel for Data with Meta Atributes
        """
        expectedAccWMeta = 1.0 # Ver 0.3 
        expectedAccNoMeta = 0.55757575800000003 
        #Test the save of a model created from a train data with meta attributes
        self.assert_(len(self.WMetaTest.domain.getmetas())>=1,"The dataset WMetaTest should have Meta Attributes")
        CvBoostlearner = AZorngCvBoost.CvBoostLearner()
        BoostM = CvBoostlearner(self.WMetaTest)
        AccNoMetaBefore = evalUtilities.getClassificationAccuracy(self.NoMetaTrain,BoostM) 
        AccWMetaBefore = evalUtilities.getClassificationAccuracy(self.WMetaTest,BoostM)


        # Save the model 
        scratchdir = os.path.join(AZOC.SCRATCHDIR, "scratchdiriTest"+str(time.time()))
        os.mkdir(scratchdir)
        modelPath = os.path.join(scratchdir,"CvBoostModel.CvBoost")
        BoostM.write(modelPath)

        # Read in the model
        BoostR = AZorngCvBoost.CvBoostread(modelPath)
        self.assert_(len(BoostR.imputer.defaults.domain.getmetas())==0,"There shouldn't be any Meta data now!")

        # Calculate classification accuracy 
        AccNoMetaAfter = evalUtilities.getClassificationAccuracy(self.NoMetaTrain, BoostR)
        AccWMetaAfter = evalUtilities.getClassificationAccuracy(self.WMetaTest, BoostR)

        # Test that the accuracy of the model before and after saved
        self.assertEqual(AccNoMetaBefore, AccNoMetaAfter,"NoMeta: Predictions after loading saved model were different")
        self.assertEqual(AccWMetaBefore, AccWMetaAfter, "WMeta: Predictions after loading saved model were different")
        self.assertEqual(round(AccWMetaAfter,9), round(expectedAccWMeta,9))
        self.assertEqual(round(AccNoMetaAfter,9), round(expectedAccNoMeta,9))
 
        # Remove the scratch directory
        os.system("/bin/rm -rf "+scratchdir)
Пример #2
0
    def test_SavedModel(self):
        """Test to assure that a saved Boost model gives the same predictions as before saving."""

        # Create an Boost model
        Boost = AZorngCvBoost.CvBoostLearner(self.train_data)

        # Calculate classification accuracy 
        Acc = evalUtilities.getClassificationAccuracy(self.test_data, Boost)

        # Save the model
        scratchdir = os.path.join(AZOC.SCRATCHDIR, "scratchdir"+str(time.time()))
        os.mkdir(scratchdir)
        modelPath = os.path.join(scratchdir,"Boost.fBoost")
        Boost.write(modelPath)
        
        # Read in the model
        Boost = AZorngCvBoost.CvBoostread(modelPath)

        # Calculate classification accuracy 
        savedAcc = evalUtilities.getClassificationAccuracy(self.test_data, Boost)

        # Test that the accuracy of the two classifiers is the exact same
        self.assertEqual(Acc, savedAcc)

        # Remove the scratch directory
        os.system("/bin/rm -rf "+scratchdir)
Пример #3
0
        def TopVarImportanceTest(data, expectNone = False):
            resA = []
            resB = []
            CvBoost = AZorngCvBoost.CvBoostLearner(data)

            for ex in data:
                resA.append(CvBoost.getTopImportantVars(ex,1))

            scratchdir = miscUtilities.createScratchDir(desc="TopVarImportanceTest")
            modelPath = os.path.join(scratchdir,"CvBoostModel")
            CvBoost.write(modelPath)
            LoadedCvBoost = AZorngCvBoost.CvBoostread(modelPath)
            miscUtilities.removeDir(scratchdir) 
            for ex in data:
                resB.append(LoadedCvBoost.getTopImportantVars(ex,1))
            if expectNone:
               return resA == resB == [None]*len(data)
            else:
                return resA == resB and None not in resA and resA.count(resA[0]) != len(resA)
Пример #4
0
def modelRead(modelFile=None, verbose=0, retrunClassifier=True):
    """Get the type of model saved in 'modelPath' and loads the respective model
       Returns the Classifier saved in the respective model path
       If called without parameters, it returns a list of known classifier types
       It can returns the classifier, or just a string with the Type

            modelRead (modelFile [, verbose = 0] [, retrunClassifier = True] )"""

    if not modelFile:
        return ("SignSVM", "CvSVM", "CvANN", "PLS", "CvRF", "CvBoost",
                "CvBayes", "Consensus")

    modelType = None
    loadedModel = None
    if os.path.isfile(os.path.join(modelFile, "model.svm")):
        modelType = "CvSVM"
        if not retrunClassifier: return modelType
        from trainingMethods import AZorngCvSVM
        loadedModel = AZorngCvSVM.CvSVMread(modelFile, verbose)
    elif os.path.isdir(os.path.join(modelFile, "model.SignSvm")):
        modelType = "SignSVM"
        if not retrunClassifier: return modelType
        from trainingMethods import AZorngSignSVM
        loadedModel = AZorngSignSVM.SignSVMread(modelFile, verbose)
    elif os.path.isfile(os.path.join(modelFile, "model.ann")):
        modelType = "CvANN"
        if not retrunClassifier: return modelType
        from trainingMethods import AZorngCvANN
        loadedModel = AZorngCvANN.CvANNread(modelFile, verbose)
    elif os.path.isfile(os.path.join(modelFile, "Model.pls")):
        modelType = "PLS"
        if not retrunClassifier: return modelType
        from trainingMethods import AZorngPLS
        loadedModel = AZorngPLS.PLSread(modelFile, verbose)
    elif os.path.isfile(os.path.join(modelFile, "model.rf")):
        modelType = "RF"
        if not retrunClassifier: return modelType
        from trainingMethods import AZorngRF
        loadedModel = AZorngRF.RFread(modelFile, verbose)
    elif os.path.isdir(os.path.join(modelFile, "C0.model")):
        modelType = "Consensus"
        if not retrunClassifier: return modelType
        from trainingMethods import AZorngConsensus
        loadedModel = AZorngConsensus.Consensusread(modelFile, verbose)
    elif os.path.isfile(os.path.join(modelFile, "model.boost")):
        modelType = "CvBoost"
        if not retrunClassifier: return modelType
        from trainingMethods import AZorngCvBoost
        loadedModel = AZorngCvBoost.CvBoostread(modelFile, verbose)
    elif os.path.isfile(os.path.join(modelFile, "model.bayes")):
        modelType = "CvBayes"
        if not retrunClassifier: return modelType
        from trainingMethods import AZorngCvBayes
        loadedModel = AZorngCvBayes.CvBayesread(modelFile, verbose)
    else:  # Assuming an RF old format for backcompatibility
        try:
            if os.path.isdir(modelFile):
                modelType = "RF"
                if not retrunClassifier: return modelType
                from trainingMethods import AZorngRF
                loadedModel = AZorngRF.RFread(modelFile, verbose)
            else:
                modelType = None
                loadedModel = None
        except:
            modelType = None
            loadedModel = None

    return loadedModel
Пример #5
0
    def no_test_Impute(self):  # Boost cannot deal with regression
        """Test missing values imputation
        Assure that imputation works for the Boost models. Test on data with missing values
        """
        #This data is loaded here to speed up the test suite since it is too big
        contTestDataPath = os.path.join(AZOC.AZORANGEHOME,"tests/source/data/linearTest.tab")
        contTrainDataPath = os.path.join(AZOC.AZORANGEHOME,"tests/source/data/linearTrain.tab")
        contTrain = dataUtilities.DataTable(contTrainDataPath)   
        contTest = dataUtilities.DataTable(contTestDataPath)

        ex1=contTest[5]
        ex2=contTest[6]
        self.assert_(ex1["Desc 71"]!="?","The var Desc 71 shouldn't be missing!")
        self.assert_(ex2["Desc 138"]!="?","The var Desc 138 shouldn't be missing!")

        imputer = orange.ImputerConstructor_average(contTrain)
        
        CvBoostlearner = AZorngCvBoost.CvBoostLearner()
        Boost = CvBoostlearner(contTrain)        

        # Prediction for data as it is
        P1=Boost(ex1)
        P2=Boost(ex2)
       
        # Predictions changing one continuous and one discrete variable to 0
        ex1["Desc 71"]=0
        ex2["Desc 138"]=0
        P1_0=Boost(ex1)
        P2_0=Boost(ex2)

        # Predictions changing the same continuous and discrete variable to it's correspondent imputation value
        ex1["Desc 71"]=imputer.defaults["Desc 71"]
        ex2["Desc 138"]=imputer.defaults["Desc 138"]
        P1_imp=Boost(ex1)
        P2_imp=Boost(ex2)
 
        # Predictions changing the same continuous and discrete variable to '?' wich means that the same imputation
        # as in the last case will have to be made inside the classifier. So, the predicted value must be the same
        ex1["Desc 71"]="?"
        ex2["Desc 138"]="?"
        self.assert_(ex1["Desc 71"]=="?","The var Desc 71 should be missing now!")
        self.assert_(ex2["Desc 138"]=="?","The var Desc 138 should be missing now!")    
        P1Miss=Boost(ex1)
        P2Miss=Boost(ex2)


        # Test if the prediction made for the example with mising value is the same as the one 
        # for the example which missing values were substituted using the same method as the classifier does.
        self.assert_(P1_imp==P1Miss,"Imputation was not made correctly inside the classifier")
        #self.assert_(P2_imp==P2Miss,"Imputation was not made correctly inside the classifier")

        # Assure that if other substitutions on those variables were made, the predicted value would be different, 
        # and so, this is a valid method for testing the imputation
        self.assert_(P1.value!=P2.value)      # Just to assure that we are not comaring equal examples
        self.assert_(P1.value!=P1_imp.value,"The imputed 1 was the same as the original ... try other example")
        self.assert_(P1_0.value!=P1_imp.value,"The imputed 1 was the same as the replaced by 0. The classifier may be replacing missing values by 0")
        self.assert_(P2.value!=P2Miss.value, "The missing imputed 2 was the same as the original ... try other example")
        self.assert_(P2_0.value!=P2Miss.value,"The missing imputed 2 was the same as the replaced by 0. The classifier may be replacing missing values by 0")


        #Test the imputer for saved models
        # Save the model 
        scratchdir = os.path.join(AZOC.SCRATCHDIR, "scratchdir"+str(time.time()))
        os.mkdir(scratchdir)
        modelPath = os.path.join(scratchdir,"CvBoostModel")
        Boost.write(modelPath)

        # Read in the model
        BoostM = AZorngCvBoost.CvBoostread(modelPath)
        # Predict the ex1 and ex2 which are still the examples with missing values '?'
        self.assert_( ex1["Desc 71"]=="?","Value of Var Desc 71 should be missing!")
        self.assert_( ex2["Desc 138"]=="?","Value of Var Desc 138 should be missing!")
        self.assert_(round(BoostM(ex1),6)==round(P1Miss,6),"Imputation on loaded model is not correct")
        self.assert_(round(BoostM(ex2),6)==round(P2Miss,6),"Imputation on loaded model is not correct")
        # Remove the scratch directory
        os.system("/bin/rm -rf "+scratchdir)