Exemplos de getCopyWithoutMeta em Python, exemplos de AZutilities.dataUtilities.getCopyWithoutMeta em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: AZorngPLS.py Projeto: girschic/AZOrange

 def write(self, filePath, data=None):
     """Save a PLS classifier to disk"""
     try: 
             # Save classifier
             self.classifier.SavePLSModel(str(filePath))
             if not self.imputer:
                 if self.verbose > 0: print "ERROR: PLS model saved without impute data"
                 return False 
             # Save a data set with one row containing the impute values
             impData = dataUtilities.DataTable(self.imputer.defaults.domain)
             impData.append(self.imputer.defaults)
             # Remove the meta attributes from the imputer data. We don't need to store them along with the model
             impData = dataUtilities.getCopyWithoutMeta(impData)
             impData.save(str(filePath)+"/ImputeData.tab")
             #Save the var names orderes the same way the Learner was trained
             varNamesFile = open(os.path.join(filePath,"varNames.txt"),"w")
             varNamesFile.write(str(self.varNames)+"\n")
             varNamesFile.write(str(self.NTrainEx)+"\n")
             varNamesFile.write(str(self.basicStat)+"\n")
             varNamesFile.close()
             #Save the parameters
             self._saveParameters(os.path.join(filePath,"parameters.pkl"))
     except:            
             if self.verbose > 0: print "ERROR: Could not save model to ", path
             return False
     return True

Exemplo n.º 2

0

Exibir arquivo

    def write(self, path):
        '''Save a Boost classifier to disk'''
        thePath = str(path)
        try:
            if os.path.isdir(thePath):
                os.system("rm -f " + os.path.join(thePath, "ImputeData.tab"))
                os.system("rm -f " + os.path.join(thePath, "model.boost"))
                os.system("rm -f " + os.path.join(thePath, "varNames.txt"))
            else:
                os.mkdir(thePath)
            if not os.path.isdir(thePath):
                if self.verbose > 0: print "ERROR: Could not create ", path
                return False

            impData = dataUtilities.DataTable(self.imputer.defaults.domain)
            impData.append(self.imputer.defaults)
            # Remove the meta attributes from the imputer data. We don't need to store them along with the model
            impData = dataUtilities.getCopyWithoutMeta(impData)
            impData.save(os.path.join(thePath, "ImputeData.tab"))

            self.classifier.save(os.path.join(thePath, "model.boost"))
            #Save the var names orderes the same way the Learner was trained
            varNamesFile = open(os.path.join(thePath, "varNames.txt"), "w")
            varNamesFile.write(str(self.varNames) + "\n")
            varNamesFile.write(str(self.NTrainEx) + "\n")
            varNamesFile.write(str(self.basicStat) + "\n")
            varNamesFile.close()
            #Save the parameters
            self._saveParameters(os.path.join(thePath, "parameters.pkl"))
        except:
            if self.verbose > 0: print "ERROR: Could not save model to ", path
            return False
        return True

Exemplo n.º 3

0

Exibir arquivo

Arquivo: AZorngPLS.py Projeto: jcheminform/AZOrange

 def write(self, filePath, data=None):
     """Save a PLS classifier to disk"""
     try:
         # Save classifier
         self.classifier.SavePLSModel(str(filePath))
         if not self.imputer:
             if self.verbose > 0:
                 print "ERROR: PLS model saved without impute data"
             return False
         # Save a data set with one row containing the impute values
         impData = dataUtilities.DataTable(self.imputer.defaults.domain)
         impData.append(self.imputer.defaults)
         # Remove the meta attributes from the imputer data. We don't need to store them along with the model
         impData = dataUtilities.getCopyWithoutMeta(impData)
         impData.save(str(filePath) + "/ImputeData.tab")
         #Save the var names orderes the same way the Learner was trained
         varNamesFile = open(os.path.join(filePath, "varNames.txt"), "w")
         varNamesFile.write(str(self.varNames) + "\n")
         varNamesFile.write(str(self.NTrainEx) + "\n")
         varNamesFile.write(str(self.basicStat) + "\n")
         varNamesFile.close()
         #Save the parameters
         self._saveParameters(os.path.join(filePath, "parameters.pkl"))
     except:
         if self.verbose > 0: print "ERROR: Could not save model to ", path
         return False
     return True

Exemplo n.º 4

0

Exibir arquivo

Arquivo: AZorngCvBoost.py Projeto: pedroalmeida/AZOrange

    def write(self, path):
        '''Save a Boost classifier to disk'''
        thePath = str(path)
        try:
            if os.path.isdir(thePath):
                os.system("rm -f "+os.path.join(thePath,"ImputeData.tab"))
                os.system("rm -f "+os.path.join(thePath,"model.boost"))
                os.system("rm -f "+os.path.join(thePath,"varNames.txt"))
            else:
                os.mkdir(thePath)
            if not os.path.isdir(thePath):
                if self.verbose > 0: print "ERROR: Could not create ", path
                return False

            impData = dataUtilities.DataTable(self.imputer.defaults.domain)
            impData.append(self.imputer.defaults)
            # Remove the meta attributes from the imputer data. We don't need to store them along with the model
            impData = dataUtilities.getCopyWithoutMeta(impData)
            impData.save(os.path.join(thePath,"ImputeData.tab"))

            self.classifier.save(os.path.join(thePath,"model.boost"))
            #Save the var names orderes the same way the Learner was trained
            varNamesFile = open(os.path.join(thePath,"varNames.txt"),"w")
            varNamesFile.write(str(self.varNames)+"\n")
            varNamesFile.write(str(self.NTrainEx)+"\n")
            varNamesFile.write(str(self.basicStat)+"\n")
            varNamesFile.close()

        except:
            if self.verbose > 0: print "ERROR: Could not save model to ", path
            return False
        return True

Exemplo n.º 5

0

Exibir arquivo

    def __call__(self, data, weight=None):
        """Creates a Bayes model from the data in origTrainingData. """
        if not AZBaseClasses.AZLearner.__call__(self, data, weight):
            return None
        if data.domain.classVar.varType != orange.VarTypes.Discrete:
            raise Exception(
                "AZorngCvBayes can only be used for classification.")
        #Remove from the domain any unused values of discrete attributes including class
        data = dataUtilities.getDataWithoutUnusedValues(data, True)

        #dataUtilities.rmAllMeta(data)
        if len(data.domain.getmetas()) == 0:
            trainingData = data
        else:
            trainingData = dataUtilities.getCopyWithoutMeta(data)
        # Create the imputer
        self.imputer = orange.ImputerConstructor_average(trainingData)
        # Impute the data
        trainingData = self.imputer(trainingData)
        if self.scale:
            self.scalizer = dataUtilities.scalizer()
            self.scalizer.scaleClass = False
            self.scalizer.nMin = -1
            self.scalizer.nMax = 1
            self.trainData = self.scalizer.scaleAndContinuizeData(trainingData)
        else:
            self.trainData = trainingData
            self.scalizer = None

        impData = self.imputer.defaults
        #Convert the ExampleTable to CvMat
        CvMatrices = dataUtilities.ExampleTable2CvMat(self.trainData)
        mat = CvMatrices["matrix"]
        responses = CvMatrices["responses"]
        varTypes = CvMatrices["varTypes"]
        missingDataMask = CvMatrices["missing_data_mask"]

        #Create the model it MUST be created with the NON DEFAULT constructor or must call create
        classifier = ml.CvNormalBayesClassifier()
        classifier.clear()
        #Train the model
        #CvNormalBayesClassifier::train(const CvMat* _train_data, const CvMat* _responses, const CvMat* _var_idx =0, const CvMat* _sample_idx=0, bool update=false)
        classifier.train(mat, responses, None, None, False)
        return CvBayesClassifier(classifier=classifier,
                                 classVar=trainingData.domain.classVar,
                                 imputeData=impData,
                                 verbose=self.verbose,
                                 varNames=CvMatrices["varNames"],
                                 nIter=None,
                                 basicStat=self.basicStat,
                                 NTrainEx=len(trainingData),
                                 scalizer=self.scalizer,
                                 parameters=self.parameters)

Exemplo n.º 6

0

Exibir arquivo

Arquivo: AZorngCvBayes.py Projeto: johan-westin-work/AZOrange-python27port

    def __call__(self, data, weight=None):
        """Creates a Bayes model from the data in origTrainingData. """
        if not AZBaseClasses.AZLearner.__call__(self, data, weight):
            return None
        if data.domain.classVar.varType != orange.VarTypes.Discrete:
            raise Exception("AZorngCvBayes can only be used for classification.")
        # Remove from the domain any unused values of discrete attributes including class
        data = dataUtilities.getDataWithoutUnusedValues(data, True)

        # dataUtilities.rmAllMeta(data)
        if len(data.domain.getmetas()) == 0:
            trainingData = data
        else:
            trainingData = dataUtilities.getCopyWithoutMeta(data)
        # Create the imputer
        self.imputer = orange.ImputerConstructor_average(trainingData)
        # Impute the data
        trainingData = self.imputer(trainingData)
        if self.scale:
            self.scalizer = dataUtilities.scalizer()
            self.scalizer.scaleClass = False
            self.scalizer.nMin = -1
            self.scalizer.nMax = 1
            self.trainData = self.scalizer.scaleAndContinuizeData(trainingData)
        else:
            self.trainData = trainingData
            self.scalizer = None

        impData = self.imputer.defaults
        # Convert the ExampleTable to CvMat
        CvMatrices = dataUtilities.ExampleTable2CvMat(self.trainData)
        mat = CvMatrices["matrix"]
        responses = CvMatrices["responses"]
        varTypes = CvMatrices["varTypes"]
        missingDataMask = CvMatrices["missing_data_mask"]

        # Create the model it MUST be created with the NON DEFAULT constructor or must call create
        classifier = ml.CvNormalBayesClassifier()
        classifier.clear()
        # Train the model
        # CvNormalBayesClassifier::train(const CvMat* _train_data, const CvMat* _responses, const CvMat* _var_idx =0, const CvMat* _sample_idx=0, bool update=false)
        classifier.train(mat, responses, None, None, False)
        return CvBayesClassifier(
            classifier=classifier,
            classVar=trainingData.domain.classVar,
            imputeData=impData,
            verbose=self.verbose,
            varNames=CvMatrices["varNames"],
            nIter=None,
            basicStat=self.basicStat,
            NTrainEx=len(trainingData),
            scalizer=self.scalizer,
        )

Exemplo n.º 7

0

Exibir arquivo

Arquivo: AZorngRF.py Projeto: girschic/AZOrange

    def write(self, dirPath):
        """Save a RF model to disk with the data used to train the model.
           It is imparative that the model is saved with the data used for training. Only the domain is used. """
         
        try:
                #This removes any trailing '/'
                dirPath = os.path.realpath(str(dirPath))

                # This assures that all related files will be inside a folder
                os.system("mkdir -p " + dirPath) 
                
                filePath = os.path.join(dirPath,"model.rf")

                # The impute Data was previously added to the self.attributeInfo
                # Remove the meta attributes from the imputer data. We don't need to store them along with the model

                if  self.useBuiltInMissValHandling:
                    impData = self.imputeData
                else:
                    # Save a data set with one row containing the impute values
                    impData = dataUtilities.DataTable(self.imputer.defaults.domain)
                    impData.append(self.imputer.defaults)
                # Remove the meta attributes from the imputer data. We don't need to store them along with the model
                impData = dataUtilities.getCopyWithoutMeta(impData)


                impData.save(os.path.join(dirPath,"ImputeData.tab"))


                #Save the info about the train data as:
                #    var names ordered the same way the Learner was trained
                #    NTrainEx
                #    basicStat 

                varNamesFile = open(os.path.join(dirPath,"varNames.txt"),"w")

                varNamesFile.write(str(self.varNames)+"\n") 
                varNamesFile.write(str(self.NTrainEx)+"\n") 
                varNamesFile.write(str(self.basicStat)+"\n") 
                varNamesFile.close()
                #Save the parameters
                self._saveParameters(os.path.join(dirPath,"parameters.pkl"))
                # Save the model
                self.classifier.save(filePath)
        except:            
                if self.verbose > 0: print "ERROR: Could not save model to ", path
                return False
        return True

Exemplo n.º 8

0

Exibir arquivo

    def write(self, dirPath):
        """Save a RF model to disk with the data used to train the model.
           It is imparative that the model is saved with the data used for training. Only the domain is used. """
         
        try:
                #This removes any trailing '/'
                dirPath = os.path.realpath(str(dirPath))

                # This assures that all related files will be inside a folder
                os.system("mkdir -p " + dirPath) 
                
                filePath = os.path.join(dirPath,"model.rf")

                # The impute Data was previously added to the self.attributeInfo
                # Remove the meta attributes from the imputer data. We don't need to store them along with the model

                if  self.useBuiltInMissValHandling:
                    impData = self.imputeData
                else:
                    # Save a data set with one row containing the impute values
                    impData = dataUtilities.DataTable(self.imputer.defaults.domain)
                    impData.append(self.imputer.defaults)
                # Remove the meta attributes from the imputer data. We don't need to store them along with the model
                impData = dataUtilities.getCopyWithoutMeta(impData)


                impData.save(os.path.join(dirPath,"ImputeData.tab"))


                #Save the info about the train data as:
                #    var names ordered the same way the Learner was trained
                #    NTrainEx
                #    basicStat 

                varNamesFile = open(os.path.join(dirPath,"varNames.txt"),"w")

                varNamesFile.write(str(self.varNames)+"\n") 
                varNamesFile.write(str(self.NTrainEx)+"\n") 
                varNamesFile.write(str(self.basicStat)+"\n") 
                varNamesFile.close()
                #Save the parameters
                self._saveParameters(os.path.join(dirPath,"parameters.pkl"))
                # Save the model
                self.classifier.save(filePath)
        except:            
                if self.verbose > 0: print "ERROR: Could not save model to ", path
                return False
        return True

Exemplo n.º 9

0

Exibir arquivo

Arquivo: AZorngPLS.py Projeto: egonw/AZOrange

 def write(self, filePath, data=None):
     """Save a PLS classifier to disk"""
     try: 
             # Save classifier
             self.classifier.SavePLSModel(str(filePath))
             if not self.imputer:
                 if self.verbose > 0: print "ERROR: PLS model saved without impute data"
                 return False 
             # Save a data set with one row containing the impute values
             impData = dataUtilities.DataTable(self.imputer.defaults.domain)
             impData.append(self.imputer.defaults)
             # Remove the meta attributes from the imputer data. We don't need to store them along with the model
             impData = dataUtilities.getCopyWithoutMeta(impData)
             impData.save(str(filePath)+"/ImputeData.tab")
     except:            
             if self.verbose > 0: print "ERROR: Could not save model to ", path
             return False
     return True

Exemplo n.º 10

0

Exibir arquivo

Arquivo: AZorngCvSVM.py Projeto: AZCompTox/AZOrange

    def write(self, path):
        """Save an SVM classifier to disk"""
        thePath = str(path)
        try:
            if os.path.isdir(thePath):
                os.system("rm -f " + os.path.join(thePath, "ImputeData.tab"))
                os.system("rm -f " + os.path.join(thePath, "model.svm"))
                os.system("rm -Rf " + os.path.join(thePath, "scalingValues"))
                os.system("rm -f " + os.path.join(thePath, "varNames.txt"))
                # if os.path.isdir(thePath):
                # print "ERROR: Cannot overwrite ", path
                # return False
            else:
                os.mkdir(thePath)
            if not os.path.isdir(thePath):
                if self.verbose > 0:
                    print "ERROR: Could not create ", path
                return False

            impData = dataUtilities.DataTable(self.imputer.defaults.domain)
            impData.append(self.imputer.defaults)
            # Remove the meta attributes from the imputer data. We don't need to store them along with the model
            impData = dataUtilities.getCopyWithoutMeta(impData)
            impData.save(os.path.join(thePath, "ImputeData.tab"))

            self.classifier.save(os.path.join(thePath, "model.svm"))
            if self.scalizer != None:
                self.scalizer.saveScalingValues(os.path.join(thePath, "scalingValues"))
            # Save the var names orderes the same way the Learner was trained
            varNamesFile = open(os.path.join(thePath, "varNames.txt"), "w")
            varNamesFile.write(str(self.varNames) + "\n")
            varNamesFile.write(str(self.NTrainEx) + "\n")
            varNamesFile.write(str(self.basicStat) + "\n")
            varNamesFile.close()
            # Save the parameters
            self._saveParameters(os.path.join(thePath, "parameters.pkl"))
        except:
            if self.verbose > 0:
                print "ERROR: Could not save model to ", path
            return False
        return True

Exemplo n.º 11

0

Exibir arquivo

Arquivo: AZorngRF.py Projeto: girschic/AZOrange

    def __call__(self, trainingData, weight = None):
        """Creates an RF model from the data in trainingData. """
        if not AZBaseClasses.AZLearner.__call__(self,trainingData, weight):
            return None

        # Set the number of theatd to be used ny opencv
        cv.cvSetNumThreads(max(int(self.NumThreads),0))
        #Remove from the domain any unused values of discrete attributes including class
        trainingData = dataUtilities.getDataWithoutUnusedValues(trainingData,True)

        # Object holding the data req for predictions (model, domain, etc)
	#print time.asctime(), "=superRFmodel(trainingData.domain)"
        ##scPA
        # Remove meta attributes from training data
        #dataUtilities.rmAllMeta(trainingData)
        if len(trainingData.domain.getmetas()) == 0:
            trainData = trainingData
        else:
            trainData = dataUtilities.getCopyWithoutMeta(trainingData)
        # Impute the data and Convert the ExampleTable to CvMat 
        if self.useBuiltInMissValHandling:
            #Create the imputer empty since we will not be using it
            impData = dataUtilities.DataTable(trainData.domain)
            CvMatrices = dataUtilities.ExampleTable2CvMat(trainData)
        else:
            #Create the imputer
            self.imputer = orange.ImputerConstructor_average(trainData)
            impData=self.imputer.defaults
            trainData = self.imputer(trainData)
            CvMatrices = dataUtilities.ExampleTable2CvMat(trainData)
            CvMatrices["missing_data_mask"] = None
        ##ecPA
        self.learner = ml.CvRTrees()#superRFmodel(trainData.domain)    #This call creates a scratchDir

        # Set RF model parameter values
        #  when nActVars defined as 0, use the sqrt of number of attributes so the user knows what will be used
        # This would be done in the C level if left as 0
        if self.nActVars == "0" and len(trainData.domain.attributes)>0:
            self.nActVars =  str(int(sqrt(len(trainData.domain.attributes))))
	#print time.asctime(), "=self.setParameters"
        params = self.setParameters(trainData)
        # Print values of the parameters
        if self.verbose > 0: self.printOuts(params)
        #**************************************************************************************************//
        #                      Check for irrational input arguments
        #**************************************************************************************************//
        if params.min_sample_count >= len(trainingData):
            if self.verbose > 0: print "ERROR! Invalid minSample: ",params.min_sample_count
            if self.verbose > 0: print "minSample must be smaller than the number of examples."
            if self.verbose > 0: print "The number of examples is: ",len(trainingData)
            if len(trainingData) > 10:
                if self.verbose > 0: print "minSample assigned to default value: 10"
                params.min_sample_count = 10
            else:
                if self.verbose > 0: print "Too few examples!!"
                if self.verbose > 0: print "Terminating"
                if self.verbose > 0: print "No random forest model built"
                return None
        if params.nactive_vars > len(trainingData.domain.attributes):
            if self.verbose > 0: print "ERROR! Invalid nActVars: ",params.nactive_vars
            if self.verbose > 0: print "nActVars must be smaller than or equal to the number of variables."
            if self.verbose > 0: print "The number of variables is: ", len(trainingData.domain.attributes)
            if self.verbose > 0: print "nActVars assigned to default value: sqrt(nVars)=",sqrt(len(trainingData.domain.attributes))
            params.nactive_vars = 0;
        # Train RF model on data in openCVFile
	#print time.asctime(), "=Start Training"
        #Process the priors and Count the number of values in class var
        if  trainingData.domain.classVar.varType == orange.VarTypes.Discrete:
            cls_count = len(trainData.domain.classVar.values)
            priors = self.convertPriors(self.priors,trainingData.domain.classVar)
            if type(priors) == str: #If a string is returned, there was a failure, and it is the respective error mnessage.
                print priors
                return None 
        else:
            cls_count = 0
            priors = None
        # Call the train method
        self.learner.train( CvMatrices["matrix"],ml.CV_ROW_SAMPLE,CvMatrices["responses"],None,None,CvMatrices["varTypes"],CvMatrices["missing_data_mask"],params,cls_count,  priors and str(priors).replace(","," ") or None)
        if self.learner.get_var_importance():
            varImportanceList = self.learner.get_var_importance()
            varImportance = {}
            varName = []
            varImp = []
            for idx,attr in enumerate(CvMatrices["varNames"]):
                varImportance[attr] = varImportanceList[idx]
            #Uncomment next lines if needed the outpuit already ordered
            #============================= begin =================================
            #    varName.append(attr)
            #    varImp.append(varImportanceList[idx])
            #Order the vars in terms of importance
            # insertion sort algorithm
            #for i in range(1, len(varImp)):
            #    save = varImp[i]
            #    saveName = varName[i]
            #    j = i
            #    while j > 0 and varImp[j - 1] < save:
            #        varImp[j] = varImp[j - 1]
            #        varName[j] = varName[j - 1]
            #        j -= 1
            #    varImp[j] = save
            #    varName[j] = saveName
            #For debug: test if assign var importance was correct
            #for attr in varImportance:
            #    if varImportance[attr] != varImp[varName.index(attr)]:
            #        print "ERROR: Variable importance of ", attr, " is not correct!"
            #OrderedVarImportance = {"VarNames":varName, "VarImportance":varImp}
            #=============================  end  =================================
        else:
            varImportance = {}
        #print time.asctime(), "=Done"
        # Save info about the variables used in the model (used by the write method)
        #attributeInfo = dataUtilities.DataTable(trainData.domain)
        # place the impute data as the first example of this data
        #attributeInfo.append(self.imputer.defaults)
        return RFClassifier(classifier = self.learner, classVar = impData.domain.classVar, imputeData=impData, verbose = self.verbose, varNames = CvMatrices["varNames"],thisVer=True,useBuiltInMissValHandling = self.useBuiltInMissValHandling, varImportance = varImportance, basicStat = self.basicStat, NTrainEx = len(trainingData), parameters = self.parameters)

Exemplo n.º 12

0

Exibir arquivo

Arquivo: AZorngCvSVM.py Projeto: egonw/AZOrange

    def __call__(self, data, weight = None):
        """Creates an SVM model from the data in origTrainingData. """
        if not AZBaseClasses.AZLearner.__call__(self, data, weight):
            if self.verbose > 0: print "Could not create base class instance"
            return None
        dataUtilities.verbose = self.verbose
        #Remove from the domain any unused values of discrete attributes including class
        data = dataUtilities.getDataWithoutUnusedValues(data,True)

        #dataUtilities.rmAllMeta(data) 
        if len(data.domain.getmetas()) == 0:
            trainingData = data
        else:
            trainingData = dataUtilities.getCopyWithoutMeta(data)
        # Create the imputer
        self.imputer = orange.ImputerConstructor_average(trainingData)
        # Impute the data 
        trainingData = self.imputer(trainingData)
        if self.scaleData:
            self.scalizer = dataUtilities.scalizer()
            for attr in ("nMin","nMax","nClassMin","nClassMax"):
                setattr(self.scalizer, attr, getattr(self, attr))
            #Only scale the class in regression. On classification, set scaleClass to False
            self.scalizer.scaleClass = self.scaleClass  and trainingData.domain.classVar.varType == orange.VarTypes.Continuous or False
            self.scalizer.nClassMin = self.nClassMin
            self.scalizer.nClassMax = self.nClassMax
            self.trainData = self.scalizer.scaleAndContinuizeData(trainingData)
        else:
            self.trainData = trainingData
            self.scalizer = None

        impData=self.imputer.defaults
        #Adjust the svm type according to the problem (regression or classification)
        if self.svm_type != 102:
            if trainingData.domain.classVar.varType == orange.VarTypes.Continuous:
                if self.svm_type in (100,101):
                    self.svm_type += 3
                    self.eps = self.epsR    #Regression eps
            else:
                if self.svm_type in (103,104):
                    self.svm_type -= 3
                    self.eps = self.epsC    #Classification eps
        #Convert the ExampleTable to CvMat
        CvMatices = dataUtilities.ExampleTable2CvMat(self.trainData)
        mat = CvMatices["matrix"]
        responses = CvMatices["responses"]
        varTypes = CvMatices["varTypes"]

        #Configure SVM self.params
        self.params = ml.CvSVMParams()
        self.params.svm_type = self.svm_type
        self.params.kernel_type = self.kernel_type
        self.params.degree = self.degree
        self.params.gamma = self.gamma
        self.params.coef0 = self.coef0
        self.params.C = self.C
        self.params.nu = self.nu
        self.params.p = self.p
        #Process the priors from a str, list or dict to  a valid list 
        priors = self.convertPriors(self.priors,trainingData.domain.classVar)
        if type(priors) == str: #If a string is returned, there was a failure, and it is the respective error mnessage.
            print priors
            return None

        if priors and self.params.svm_type != ml.CvSVM.C_SVC:
            priors = None
            if self.verbose > 0: print "WARNING: The priors will not have any effect. They can only be used with C_SVC SVM-Type."
        elif priors:
            priors = dataUtilities. List2CvMat(priors)

        self.params.class_weights = priors

        term_crit = cv.CvTermCriteria()
        term_crit.type = self.stopCrit #cv.CV_TERMCRIT_EPS  #  or CV_TERMCRIT_ITER
        term_crit.epsilon = self.eps           #Or use:  term_crit.max_iter = x
        term_crit.max_iter = self.maxIter           #Or use:  term_crit.max_iter = x
        self.params.term_crit =  term_crit

        #Create the model
        classifier = ml.CvSVM()
        #Train the model
        #train(trainData, responses, varIdx, SampleIdx, Params)
        classifier.train(mat,responses,None,None,self.params)
        if classifier.get_support_vector_count() < 1:
            print "WARNING: The number of support vectors is 0." 
            print "This could be becasue the margin between the hyper plane and the support vectors has become zero."
            print "Try to modify the parameters controlling the margin. "
            print "For example decrease C or p(regression only)."
            print "No SVM model returned!"
            return None
        else:
            return CvSVMClassifier(classifier = classifier, classVar = data.domain.classVar, scalizer = self.scalizer, imputeData=impData, verbose = self.verbose, varNames = CvMatices["varNames"], basicStat = self.basicStat, NTrainEx = len(trainingData))

Exemplo n.º 13

0

Exibir arquivo

Arquivo: AZorngCvSVM.py Projeto: egonw/AZOrange

    def __call__(self, origExamples = None, resultType = orange.GetValue, returnDFV = False):
        """
        orange.GetBoth -          <type 'tuple'>                     ->    (<orange.Value 'Act'='3.44158792'>, <3.442: 1.000>)
        orange.GetValue -         <type 'orange.Value'>              ->    <orange.Value 'Act'='3.44158792'>
        orange.GetProbabilities - <type 'orange.DiscDistribution'>   ->    <0.000, 0.000>
        returnDFV - Flag indicating to return the Decision Function Value. If set to True, it will encapsulate the original result asked by the keyword resultType and the DFV into a tuple:
                ((<orange.Value 'Act'='3.44158792'>, <3.442: 1.000>), 2.34443)
                (<orange.Value 'Act'='3.44158792'>, 2.34443) 
                (<0.000, 0.000>, 2.34443)
                If it is not a binary classifier, DFV will be equal to None
                DFV will be a value from greater or equal to 0  
        """
        res = None
        #dataUtilities.rmAllMeta(examples)
        if len(origExamples.domain.getmetas()) == 0:
            examples = origExamples
        else:
            examples = dataUtilities.getCopyWithoutMeta(origExamples)

        #Check if the examples are compatible with the classifier (attributes order and varType compatibility)
        if self.imputer:
            dataUtilities.verbose = self.verbose
            if not self.ExFix.ready:
                self.ExFix.set_domain(self.imputer.defaults.domain)
                self.ExFix.set_examplesFixedLog(self.examplesFixedLog)
            inExamples = self.ExFix.fixExample(examples)

            if not inExamples:
                if self.verbose > 0: print "Warning no example. Returning None prediction"
                return None

            #Imput the examples if there are missing values     
            examplesImp = self.imputer(inExamples)
            # There is a problem with using the imputer when examples contain meta attributes.
            # Unable to remove meta attributes from the examples. OK to rm meta from ExampleTables, but not from Example objects.
            if not examplesImp:
                if self.verbose > 0: print "Unable to predict with the SVM model."
                if self.verbose > 0: print "Perhaps you need to remove meta attributes from your examples."
                return None
        else:
            if self.verbose > 0: print "Warning: No Imputer in SVM Classifier"
            examplesImp = examples

        if self.classifier.get_support_vector_count() ==0:
            if self.verbose > 0: print "WARNING:  Support Vectors count is 0 (zero)" 
        DFV = None
        if examplesImp: 
            if self.scalizer:
                res = self.classifier.predict(dataUtilities.Example2CvMat(self.scalizer.scaleEx(examplesImp),self.varNames))
                res = self.scalizer.convertClass(res)
                if self.classVar.varType != orange.VarTypes.Continuous and len(self.classVar.values) == 2 and returnDFV:
                    DFV = self.classifier.predict(dataUtilities.Example2CvMat(self.scalizer.scaleEx(examplesImp),self.varNames), True)
                else:
                    #On Regression models assume the DVF as the value predicted
                    DFV = res 
                self._updateDFVExtremes(DFV)
                res = dataUtilities.CvMat2orangeResponse(res,self.classVar)
            else:
                res = self.classifier.predict(dataUtilities.Example2CvMat(examplesImp,self.varNames))
                if self.classVar.varType != orange.VarTypes.Continuous and len(self.classVar.values) == 2 and returnDFV:
                    DFV = self.classifier.predict(dataUtilities.Example2CvMat(examplesImp,self.varNames), True)
                else:
                    #On Regression models assume the DVF as the value predicted
                    DFV = res
                self._updateDFVExtremes(DFV)
                res = dataUtilities.CvMat2orangeResponse(res,self.classVar)
             
            if resultType!=orange.GetValue:
                if examplesImp.domain.classVar.varType != orange.VarTypes.Continuous:
                    dist = orange.DiscDistribution(examplesImp.domain.classVar)
                    dist[res]=1
                else:
                    dist = res
                if resultType==orange.GetProbabilities:
                    res = dist
                else:
                    res = (res,dist)
                    
            if returnDFV:
                res = (res,DFV)
                
        self.nPredictions += 1
        return res

Exemplo n.º 14

0

Exibir arquivo

    def __call__(self, data, weight = None):
        """Creates an SVM model from the data in origTrainingData. """
        if not AZBaseClasses.AZLearner.__call__(self, data, weight):
            if self.verbose > 0: print "Could not create base class instance"
            return None
        dataUtilities.verbose = self.verbose
        #Remove from the domain any unused values of discrete attributes including class
        data = dataUtilities.getDataWithoutUnusedValues(data,True)

        #dataUtilities.rmAllMeta(data) 
        if len(data.domain.getmetas()) == 0:
            trainingData = data
        else:
            trainingData = dataUtilities.getCopyWithoutMeta(data)
        # Create the imputer
        self.imputer = orange.ImputerConstructor_average(trainingData)
        # Impute the data 
        trainingData = self.imputer(trainingData)
        if self.scaleData:
            self.scalizer = dataUtilities.scalizer()
            for attr in ("nMin","nMax","nClassMin","nClassMax"):
                setattr(self.scalizer, attr, getattr(self, attr))
            #Only scale the class in regression. On classification, set scaleClass to False
            self.scalizer.scaleClass = self.scaleClass  and trainingData.domain.classVar.varType == orange.VarTypes.Continuous or False
            self.scalizer.nClassMin = self.nClassMin
            self.scalizer.nClassMax = self.nClassMax
            self.trainData = self.scalizer.scaleAndContinuizeData(trainingData)
        else:
            self.trainData = trainingData
            self.scalizer = None

        impData=self.imputer.defaults
        #Adjust the svm type according to the problem (regression or classification)
        if self.svm_type != 102:
            if trainingData.domain.classVar.varType == orange.VarTypes.Continuous:
                if self.svm_type in (100,101):
                    self.svm_type += 3
                    self.eps = self.epsR    #Regression eps
            else:
                if self.svm_type in (103,104):
                    self.svm_type -= 3
                    self.eps = self.epsC    #Classification eps
        #Convert the ExampleTable to CvMat
        CvMatices = dataUtilities.ExampleTable2CvMat(self.trainData)
        mat = CvMatices["matrix"]
        responses = CvMatices["responses"]
        varTypes = CvMatices["varTypes"]

        #Configure SVM self.params
        self.params = ml.CvSVMParams()
        self.params.svm_type = self.svm_type
        self.params.kernel_type = self.kernel_type
        self.params.degree = self.degree
        self.params.gamma = self.gamma
        self.params.coef0 = self.coef0
        self.params.C = self.C
        self.params.nu = self.nu
        self.params.p = self.p
        #Process the priors from a str, list or dict to  a valid list 
        priors = self.convertPriors(self.priors,trainingData.domain.classVar)
        if type(priors) == str: #If a string is returned, there was a failure, and it is the respective error mnessage.
            print priors
            return None

        if priors and self.params.svm_type != ml.CvSVM.C_SVC:
            priors = None
            if self.verbose > 0: print "WARNING: The priors will not have any effect. They can only be used with C_SVC SVM-Type."
        elif priors:
            priors = dataUtilities. List2CvMat(priors)

        self.params.class_weights = priors

        term_crit = cv.CvTermCriteria()
        term_crit.type = self.stopCrit #cv.CV_TERMCRIT_EPS  #  or CV_TERMCRIT_ITER
        term_crit.epsilon = self.eps           #Or use:  term_crit.max_iter = x
        term_crit.max_iter = self.maxIter           #Or use:  term_crit.max_iter = x
        self.params.term_crit =  term_crit

        #Create the model
        classifier = ml.CvSVM()
        #Train the model
        #train(trainData, responses, varIdx, SampleIdx, Params)
        classifier.train(mat,responses,None,None,self.params)
        if classifier.get_support_vector_count() < 1:
            print "WARNING: The number of support vectors is 0." 
            print "This could be becasue the margin between the hyper plane and the support vectors has become zero."
            print "Try to modify the parameters controlling the margin. "
            print "For example decrease C or p(regression only)."
            print "No SVM model returned!"
            return None
        else:
            return CvSVMClassifier(classifier = classifier, classVar = data.domain.classVar, scalizer = self.scalizer, imputeData=impData, verbose = self.verbose, varNames = CvMatices["varNames"], basicStat = self.basicStat, NTrainEx = len(trainingData), parameters = self.parameters)

Exemplo n.º 15

0

Exibir arquivo

    def _singlePredict(self, origExamples = None, resultType = orange.GetValue, returnDFV = False):
        """
        orange.GetBoth -          <type 'tuple'>                     ->    (<orange.Value 'Act'='3.44158792'>, <3.442: 1.000>)
        orange.GetValue -         <type 'orange.Value'>              ->    <orange.Value 'Act'='3.44158792'>
        orange.GetProbabilities - <type 'orange.DiscDistribution'>   ->    <0.000, 0.000>
        returnDFV - Flag indicating to return the Decision Function Value. If set to True, it will encapsulate the original result asked by the keyword resultType and the DFV into a tuple:
                ((<orange.Value 'Act'='3.44158792'>, <3.442: 1.000>), 2.34443)
                (<orange.Value 'Act'='3.44158792'>, 2.34443) 
                (<0.000, 0.000>, 2.34443)
                If it is not a binary classifier, DFV will be equal to None
                DFV will be a value from greater or equal to 0  
        """
        res = None
        #dataUtilities.rmAllMeta(examples)
        if len(origExamples.domain.getmetas()) == 0:
            examples = origExamples
        else:
            examples = dataUtilities.getCopyWithoutMeta(origExamples)

        #Check if the examples are compatible with the classifier (attributes order and varType compatibility)
        if self.imputer:
            dataUtilities.verbose = self.verbose
            if not self.ExFix.ready:
                self.ExFix.set_domain(self.imputer.defaults.domain)
                self.ExFix.set_examplesFixedLog(self.examplesFixedLog)
            inExamples = self.ExFix.fixExample(examples)

            if not inExamples:
                if self.verbose > 0: print "Warning no example. Returning None prediction"
                return None

            #Imput the examples if there are missing values     
            examplesImp = self.imputer(inExamples)
            # There is a problem with using the imputer when examples contain meta attributes.
            # Unable to remove meta attributes from the examples. OK to rm meta from ExampleTables, but not from Example objects.
            if not examplesImp:
                if self.verbose > 0: print "Unable to predict with the SVM model."
                if self.verbose > 0: print "Perhaps you need to remove meta attributes from your examples."
                return None
        else:
            if self.verbose > 0: print "Warning: No Imputer in SVM Classifier"
            examplesImp = examples

        if self.classifier.get_support_vector_count() ==0:
            if self.verbose > 0: print "WARNING:  Support Vectors count is 0 (zero)" 
        DFV = None
        if examplesImp: 
            if self.scalizer:
                exToPredict = dataUtilities.Example2CvMat(self.scalizer.scaleEx(examplesImp,True), self.varNames)
                res = self.classifier.predict(exToPredict)
                res = self.scalizer.convertClass(res)
                if self.classVar.varType != orange.VarTypes.Continuous and len(self.classVar.values) == 2 and returnDFV:
                    DFV = self.classifier.predict(exToPredict, True)
                else:
                    #On Regression models assume the DVF as the value predicted
                    DFV = res 
                self._updateDFVExtremes(DFV)
                res = dataUtilities.CvMat2orangeResponse(res,self.classVar)
            else:
                exToPredict = dataUtilities.Example2CvMat(examplesImp,self.varNames)
                res = self.classifier.predict(exToPredict)
                if self.classVar.varType != orange.VarTypes.Continuous and len(self.classVar.values) == 2 and returnDFV:
                    DFV = self.classifier.predict(exToPredict, True)
                else:
                    #On Regression models assume the DVF as the value predicted
                    DFV = res
                self._updateDFVExtremes(DFV)
                res = dataUtilities.CvMat2orangeResponse(res,self.classVar)
             
            if resultType!=orange.GetValue:
                if examplesImp.domain.classVar.varType != orange.VarTypes.Continuous:
                    dist = orange.DiscDistribution(examplesImp.domain.classVar)
                    dist[res]=1
                else:
                    y_hat = self.classVar(res)
                    dist = Orange.statistics.distribution.Continuous(self.classVar)
                    dist[y_hat] = 1.0
                if resultType==orange.GetProbabilities:
                    res = dist
                else:
                    res = (res,dist)
                    
            if returnDFV:
                res = (res,DFV)
                
        self.nPredictions += 1
        return res

Exemplo n.º 16

0

Exibir arquivo

Arquivo: AZorngPLS.py Projeto: jcheminform/AZOrange

    def __call__(self, trainingData, weight=None):
        """Creates an PLS model from the data in trainingData. """
        if not AZBaseClasses.AZLearner.__call__(self, trainingData, weight):
            return None
        #Remove from the domain any unused values of discrete attributes including class
        trainingData = dataUtilities.getDataWithoutUnusedValues(
            trainingData, True)
        # Create path for the Orange data
        scratchdir = miscUtilities.createScratchDir(desc="PLS")
        OrngFile = os.path.join(scratchdir, "OrngData.tab")

        # Remove meta attributes from training data to make the imputer work with examples without the meta attributes.
        #dataUtilities.rmAllMeta(trainingData)
        if len(trainingData.domain.getmetas()) == 0:
            trainData = trainingData
        else:
            trainData = dataUtilities.getCopyWithoutMeta(trainingData)

# Create the imputer
        self.imputer = orange.ImputerConstructor_average(trainData)
        # Impute the data
        trainData = self.imputer(trainData)
        # Save the Data already imputed to an Orange formated file
        if self.verbose > 1:
            print time.asctime(), "Saving Orange Data to a tab file..."
        orange.saveTabDelimited(OrngFile, trainData)
        if self.verbose > 1: print time.asctime(), "done"

        # Create the PLS instance
        if self.verbose > 1: print time.asctime(), "Creating PLS Object..."
        learner = pls.PlsAPI()
        if self.verbose > 1: print time.asctime(), "done"

        # Assign the PLS parameters
        learner.SetParameter('v', str(self.verbose))
        learner.SetParameter('debug', str(int(self.verbose > 0)))
        learner.SetParameter('method', self.method)
        if types.IntType(self.k) > len(trainData.domain.attributes):
            learner.SetParameter('k', str(len(trainData.domain.attributes)))
            if self.verbose > 0:
                print "Warning! The number of components were more than the number of attributes."
            if self.verbose > 0:
                print "   Components were set to ", len(
                    trainData.domain.attributes)
        else:
            learner.SetParameter('k', self.k)
        learner.SetParameter('precision', self.precision)
        learner.SetParameter('sDir', scratchdir)  #AZOC.SCRATCHDIR)

        # Read the Orange Formated file and Train the Algorithm
        # TRAIN
        if self.verbose > 1: print time.asctime(), "Training..."
        learner.Train(OrngFile)
        if self.verbose > 1:
            print "Train finished at ", time.asctime()
            print "PLS trained in: " + str(
                learner.GetCPUTrainTime()) + " seconds"
            print "Method:     " + learner.GetParameter("method")
            print "Components: " + learner.GetParameter("k")
            print "Precision:  " + learner.GetParameter("precision")

        # Remove the scratch file
        if self.verbose == 0:
            miscUtilities.removeDir(scratchdir)
        else:
            print "The directory " + scratchdir + " was not deleted because DEBUG flag is ON"
        del trainData
        impData = self.imputer.defaults
        return PLSClassifier(
            classifier=learner,
            name="Classifier of " + self.name,
            classVar=trainingData.domain.classVar,
            imputeData=impData,
            verbose=self.verbose,
            varNames=[attr.name for attr in trainingData.domain.attributes],
            NTrainEx=len(trainingData),
            basicStat=self.basicStat,
            parameters=self.parameters)  #learner.GetClassVarName())#

Exemplo n.º 17

0

Exibir arquivo

    def __call__(self, data, weight=None):
        """Creates a Boost model from the data in origTrainingData. """
        if not AZBaseClasses.AZLearner.__call__(self, data, weight):
            return None
        if data.domain.classVar.varType != orange.VarTypes.Discrete:
            print "AZorngCvBoost can only be used for binary classification."
            return None
        #Remove from the domain any unused values of discrete attributes including class
        data = dataUtilities.getDataWithoutUnusedValues(data, True)

        #dataUtilities.rmAllMeta(data)
        if len(data.domain.getmetas()) == 0:
            trainingData = data
        else:
            trainingData = dataUtilities.getCopyWithoutMeta(data)
        # Create the imputer
        self.imputer = orange.ImputerConstructor_average(trainingData)
        # Impute the data
        self.trainData = self.imputer(trainingData)

        impData = self.imputer.defaults
        #Convert the ExampleTable to CvMat
        CvMatrices = dataUtilities.ExampleTable2CvMat(self.trainData)
        mat = CvMatrices["matrix"]
        responses = CvMatrices["responses"]
        varTypes = CvMatrices["varTypes"]
        missingDataMask = CvMatrices["missing_data_mask"]

        #Configure Boost params
        #First, Correct any wrong parameters Combination:
        #   CVBOOSTTYPE = { "DISCRETE":0, "REAL":1, "LOGIT":2, "GENTLE":3 }
        #   CVBOOSTSPLITCRIT = { "DEFAULT":0, "GINI":1, "MISCLASS":3, "SQERR":4 }
        if self.boost_type not in AZOC.CVBOOSTTYPE:
            print "ERROR: Bad value for parameter boost_type. Possible values: " + string.join(
                [x for x in AZOC.CVBOOSTTYPE], ", ")
            return None
        if self.split_criteria not in AZOC.CVBOOSTSPLITCRIT:
            print "ERROR: Bad value for parameter split_criteria. Possible values: " + string.join(
                [x for x in AZOC.AZOC.CVBOOSTSPLITCRIT], ", ")
            return None

        if self.boost_type == "DISCRETE":
            if self.split_criteria not in ["MISCLASS", "GINI"]:
                print "WARNING: For Discrete type, the split Criteria must be MISCLASS or GINI. MISCLASS was used by default."
                self.split_criteria = "MISCLASS"
        if self.boost_type == "REAL":
            if self.split_criteria not in ["MISCLASS", "GINI"]:
                print "WARNING: For REAL type, the split Criteria must be MISCLASS or GINI. GINI was used by default."
                self.split_criteria = "GINI"
        if self.boost_type in ["LOGIT", "GENTLE"]:
            if self.split_criteria != "SQERR":
                print "WARNING: For LOGIT and GENTLE types, the split Criteria must be SQERR. SQERR was used by default."
                self.split_criteria = "SQERR"

        params = ml.CvBoostParams()
        params.boost_type = AZOC.CVBOOSTTYPE[self.boost_type]
        params.split_criteria = AZOC.CVBOOSTSPLITCRIT[self.split_criteria]
        params.weak_count = self.weak_count
        params.weight_trim_rate = self.weight_trim_rate
        params.max_depth = self.max_depth
        params.use_surrogates = self.use_surrogates

        #Create the model it MUST be created with the NON DEFAULT constructor or must call create
        classifier = ml.CvBoost()
        #Train the model
        #train(const CvMat* _train_data, int _tflag, const CvMat* _responses, const CvMat* _var_idx=0, const CvMat* _sample_idx=0, const CvMat* _var_type=0, const CvMat* _missing_mask=0, CvBoostParams params=CvBoostParams(), bool update=false)
        #sampleWeights = cv.cvCreateMat(1,len(self.trainData),cv.CV_32FC1)
        #cv.cvSet(sampleWeights,1.0)

        #compute priors (sample weights)
        priors = self.convertPriors(self.priors,
                                    self.trainData.domain.classVar)
        if type(
                priors
        ) == str:  #If a string is returned, there was a failure, and it is the respective error mnessage.
            print priors
            return None
        #Train the model
        if self.verbose: self.printParams(params)
        classifier.train(mat, ml.CV_ROW_SAMPLE, responses, None, None,
                         varTypes, missingDataMask, params, False,
                         priors and str(priors).replace(",", " ") or None)
        return CvBoostClassifier(classifier=classifier,
                                 classVar=self.trainData.domain.classVar,
                                 imputeData=impData,
                                 verbose=self.verbose,
                                 varNames=CvMatrices["varNames"],
                                 nIter=None,
                                 basicStat=self.basicStat,
                                 NTrainEx=len(trainingData),
                                 parameters=self.parameters)

Exemplo n.º 18

0

Exibir arquivo

Arquivo: AZorngCvBoost.py Projeto: pedroalmeida/AZOrange

    def __call__(self, data, weight = None):
        """Creates a Boost model from the data in origTrainingData. """
        if not AZBaseClasses.AZLearner.__call__(self, data, weight):
            return None
        if data.domain.classVar.varType != orange.VarTypes.Discrete:
            raise Exception("AZorngCvBoost can only be used for 2-class classification.")
        #Remove from the domain any unused values of discrete attributes including class
        data = dataUtilities.getDataWithoutUnusedValues(data,True)

        #dataUtilities.rmAllMeta(data) 
        if len(data.domain.getmetas()) == 0:
            trainingData = data
        else:
            trainingData = dataUtilities.getCopyWithoutMeta(data)
        # Create the imputer
        self.imputer = orange.ImputerConstructor_average(trainingData)
        # Impute the data 
        self.trainData = self.imputer(trainingData)

        impData=self.imputer.defaults
        #Convert the ExampleTable to CvMat
        CvMatrices = dataUtilities.ExampleTable2CvMat(self.trainData)
        mat = CvMatrices["matrix"]
        responses = CvMatrices["responses"]
        varTypes = CvMatrices["varTypes"]
        missingDataMask = CvMatrices["missing_data_mask"]

        #Configure Boost params
        #First, Correct any wrong parameters Combination:
        #   CVBOOSTTYPE = { "DISCRETE":0, "REAL":1, "LOGIT":2, "GENTLE":3 }
        #   CVBOOSTSPLITCRIT = { "DEFAULT":0, "GINI":1, "MISCLASS":3, "SQERR":4 }
        if self.boost_type not in AZOC.CVBOOSTTYPE:
            print "ERROR: Bad value for parameter boost_type. Possible values: " + string.join([x for x in AZOC.CVBOOSTTYPE],", ")
            return None
        if self.split_criteria not in AZOC.CVBOOSTSPLITCRIT:
            print "ERROR: Bad value for parameter split_criteria. Possible values: " + string.join([x for x in AZOC.AZOC.CVBOOSTSPLITCRIT],", ")  
            return None

        if self.boost_type == "DISCRETE":
            if self.split_criteria not in ["MISCLASS", "GINI"]:
                print "WARNING: For Discrete type, the split Criteria must be MISCLASS or GINI. MISCLASS was used by default."
                self.split_criteria = "MISCLASS"
        if self.boost_type == "REAL":
            if self.split_criteria not in ["MISCLASS", "GINI"]:
                print "WARNING: For REAL type, the split Criteria must be MISCLASS or GINI. GINI was used by default."
                self.split_criteria = "GINI"
        if self.boost_type in ["LOGIT","GENTLE"]:
            if self.split_criteria != "SQERR":
                print "WARNING: For LOGIT and GENTLE types, the split Criteria must be SQERR. SQERR was used by default."
                self.split_criteria = "SQERR"

        params = ml.CvBoostParams()
        params.boost_type = AZOC.CVBOOSTTYPE[self.boost_type]
        params.split_criteria = AZOC.CVBOOSTSPLITCRIT[self.split_criteria]
        params.weak_count = self.weak_count
        params.weight_trim_rate = self.weight_trim_rate
        params.max_depth = self.max_depth
        params.use_surrogates = self.use_surrogates
        if self.priors:
            params.priors= self.priors

        #Create the model it MUST be created with the NON DEFAULT constructor or must call create
        classifier = ml.CvBoost()
        #Train the model
        #train(const CvMat* _train_data, int _tflag, const CvMat* _responses, const CvMat* _var_idx=0, const CvMat* _sample_idx=0, const CvMat* _var_type=0, const CvMat* _missing_mask=0, CvBoostParams params=CvBoostParams(), bool update=false)
        #sampleWeights = cv.cvCreateMat(1,len(self.trainData),cv.CV_32FC1)
        #cv.cvSet(sampleWeights,1.0)
        
        #compute priors (sample weights)
        priors = self.convertPriors(self.priors, self.trainData.domain.classVar,getDict = True)
        if type(priors) == str: #If a string is returned, there was a failure, and it is the respective error mnessage.
            print priors
            return None
 
        if priors:
            #scale priors
            pSum=sum(priors.values())
            if pSum==0:
                print "ERROR: The priors cannot be all 0!"
                return None
            map(lambda k,v:priors.update({k: (v+0.0)/pSum}),priors.keys(),priors.values())
            #Apply the priors to each respective sample
            sample_weights = [1] * len(self.trainData)
            for idx,sw in enumerate(sample_weights):
                actualClass = str(self.trainData[idx].getclass().value)
                if actualClass in priors:
                    sample_weights[idx] = sample_weights[idx] * priors[actualClass]
            CV_sample_weights = dataUtilities.List2CvMat(sample_weights,"CV_32FC1")
        else:
            CV_sample_weights = None
        #Train the model
        if self.verbose: self.printParams(params)
        classifier.train(mat, ml.CV_ROW_SAMPLE, responses, None, None, varTypes, missingDataMask, params, False)
        return CvBoostClassifier(classifier = classifier, classVar = self.trainData.domain.classVar, imputeData=impData, verbose = self.verbose, varNames = CvMatrices["varNames"], nIter = None, basicStat = self.basicStat, NTrainEx = len(trainingData))

Exemplo n.º 19

0

Exibir arquivo

Arquivo: AZorngCvBoost.py Projeto: pedroalmeida/AZOrange

    def __call__(self, origExamples = None, resultType = orange.GetValue, returnDFV = False):
        res = None
        """
        orange.GetBoth -          <type 'tuple'>                     ->    (<orange.Value 'Act'='3.44158792'>, <3.442: 1.000>)
        orange.GetValue -         <type 'orange.Value'>              ->    <orange.Value 'Act'='3.44158792'>
        orange.GetProbabilities - <type 'orange.DiscDistribution'>   ->    <0.000, 0.000> 
        """
        #dataUtilities.rmAllMeta(examples)
        if len(origExamples.domain.getmetas()) == 0:
            examples = origExamples
        else:
            examples = dataUtilities.getCopyWithoutMeta(origExamples)
        #Check if the examples are compatible with the classifier (attributes order and varType compatibility)
        dataUtilities.verbose = self.verbose
        if not self.ExFix.ready:
                self.ExFix.set_domain(self.imputer.defaults.domain)
                self.ExFix.set_examplesFixedLog(self.examplesFixedLog) 
        inExamples = self.ExFix.fixExample(examples)

        if not inExamples:
                return None

        #Imput the examples if there are missing values     
        examplesImp = self.imputer(inExamples)
        # There is a problem with using the imputer when examples contain meta attributes.
        # Unable to remove meta attributes from the examples. OK to rm meta from ExampleTables, but not from Example objects.
        if not examplesImp:
            if self.verbose > 0: print "Unable to predict with the Boost model."
            if self.verbose > 0: print "Perhaps you need to remove meta attributes from your examples."
            return None

        out = self.classifier.predict(dataUtilities.Example2CvMat(examplesImp,self.varNames))
        probabilities = None
        DFV = None
        # Back transform the prediction to the original classes and calc probabilities
        prediction = dataUtilities.CvMat2orangeResponse(out, self.classVar)
        # Calculate artificial probabilities - not returned by the OpenCV RF algorithm
        if self.classVar.varType == orange.VarTypes.Discrete:
            if resultType != orange.GetValue:
                #Need to make sure to return meanful probabilities to the cases where opencvRF does not support probabilities
                # to be compatible with possible callers asking for probabilities. 
                probabilities = self.__generateProbabilities(prediction)
                self._isRealProb = False
        else:
            #On Regression models assume the DVF as the value predicted
            DFV = prediction
            self._updateDFVExtremes(DFV)

        if resultType == orange.GetBoth:
            if prediction:
                orangePrediction = orange.Value(self.classVar, prediction)
            else:
                orangePrediction = None
            res = orangePrediction, probabilities
        elif resultType == orange.GetProbabilities:
            res = probabilities
        else:
            if prediction:
                orangePrediction = orange.Value(self.classVar, prediction)
            else:
                orangePrediction = None
            res = orangePrediction

        self.nPredictions += 1
        if returnDFV:
            return (res,DFV)
        else:
            return res

Exemplo n.º 20

0

Exibir arquivo

Arquivo: AZorngCvANN.py Projeto: johan-westin-work/AZOrange-python27port

    def __call__(self, origExamples = None, resultType = orange.GetValue, returnDFV = False):
        res = None
        """
        orange.GetBoth -          <type 'tuple'>                     ->    (<orange.Value 'Act'='3.44158792'>, <3.442: 1.000>)
        orange.GetValue -         <type 'orange.Value'>              ->    <orange.Value 'Act'='3.44158792'>
        orange.GetProbabilities - <type 'orange.DiscDistribution'>   ->    <0.000, 0.000> 
        """
        #dataUtilities.rmAllMeta(examples)
        if len(origExamples.domain.getmetas()) == 0:
            examples = origExamples
        else:
            examples = dataUtilities.getCopyWithoutMeta(origExamples)
        #Check if the examples are compatible with the classifier (attributes order and varType compatibility)
        if self.verbose > 1: dataUtilities.verbose = self.verbose
        if not self.ExFix.ready:
                self.ExFix.set_domain(self.imputer.defaults.domain)
                self.ExFix.set_examplesFixedLog(self.examplesFixedLog) 
        inExamples = self.ExFix.fixExample(examples)

        if not inExamples:
                return None

        #Imput the examples if there are missing values     
        examplesImp = self.imputer(inExamples)
        # There is a problem with using the imputer when examples contain meta attributes.
        # Unable to remove meta attributes from the examples. OK to rm meta from ExampleTables, but not from Example objects.
        if not examplesImp:
            if self.verbose > 0: print "Unable to predict with the ANN model."
            if self.verbose > 0: print "Perhaps you need to remove meta attributes from your examples."
            return None

        res = None
        if self.classVar.varType == orange.VarTypes.Continuous: 
                Nout = 1
        else:
                Nout = len(self.classVar.values)
        out = cv.cvCreateMat(1,Nout,cv.CV_32FC1)
        self.classifier.predict(dataUtilities.Example2CvMat(examplesImp,self.varNames),out)
        #print "OUT = ",out
        #print out,"->",dataUtilities.CvMat2orangeResponse(out,self.classVar,True),":",origExamples[self.classVar.name].value
        res = dataUtilities.CvMat2orangeResponse(out,self.classVar,True)
        #print "RES=",res
        DFV = None
        if out.cols > 1:
            fannOutVector = dataUtilities.CvMat2List(out)[0]
            probabilities = self.__getProbabilities(fannOutVector)
            #Compute the DFV
            if self.classVar.varType == orange.VarTypes.Discrete and len(self.classVar.values) == 2:
                DFV = probabilities[0]
                # Subtract 0.5 so that the threshold is 0 as all learners DFV
                DFV -= 0.5
                self._updateDFVExtremes(DFV)    
        
            # Retrun the desired quantity
            if resultType == orange.GetProbabilities:
                    res = probabilities
            else:
                    if resultType == orange.GetBoth:
                        res = (res, probabilities)
        else:
            #On Regression models, assume the DFV as the value predicted
            DFV = res.value
            self._updateDFVExtremes(DFV)
            if resultType == orange.GetProbabilities:
                res  =  [0.0]
            else:
                if resultType==orange.GetBoth:
                    res = (res,[0.0])
        self.nPredictions += 1
        
        if returnDFV:
            return (res,DFV)
        else:
            return res

Exemplo n.º 21

0

Exibir arquivo

Arquivo: AZorngCvANN.py Projeto: egonw/AZOrange

    def __call__(self, data, weight = None):
        if not AZBaseClasses.AZLearner.__call__(self, data, weight):
            return None
        """Creates an ANN model from the data in origTrainingData. """
        #Remove from the domain any unused values of discrete attributes including class
        data = dataUtilities.getDataWithoutUnusedValues(data,True)

        #dataUtilities.rmAllMeta(data) 
        if len(data.domain.getmetas()) == 0:
            trainingData = data
        else:
            trainingData = dataUtilities.getCopyWithoutMeta(data)
        # Create the imputer
        self.imputer = orange.ImputerConstructor_average(trainingData)
        # Impute the data 
        self.trainData = self.imputer(trainingData)

        impData=self.imputer.defaults
        #Convert the ExampleTable to CvMat
        CvMatices = dataUtilities.ExampleTable2CvMat(self.trainData, True)
        mat = CvMatices["matrix"]
        responses = CvMatices["responses"]
        varTypes = CvMatices["varTypes"]

        #Configure ANN params
        params = ml.CvANN_MLP_TrainParams()
        params.train_method = self.optAlg 
        params.bp_dw_scale = self.bp_dw_scale
        params.bp_moment_scale = self.bp_moment_scale
        params.rp_dw0 = self.rp_dw0
        params.rp_dw_plus = self.rp_dw_plus
        params.rp_dw_minus = self.rp_dw_minus
        #params.rp_dw_min = ##default is the minimum float value 
        params.rp_dw_max = self.rp_dw_max

        term_crit = cv.CvTermCriteria()
        term_crit.type = self.stopCrit #cv.CV_TERMCRIT_EPS  #  or CV_TERMCRIT_ITER
        term_crit.epsilon = self.eps           #Or use:  term_crit.max_iter = x
        term_crit.max_iter = self.maxIter           #Or use:  term_crit.max_iter = x
        params.term_crit =  term_crit

        #Create the model it MUST be created with the NON DEFAULT constructor or must call create
        classifier = ml.CvANN_MLP()
        if data.domain.classVar.varType == orange.VarTypes.Discrete:
            Nout = len(data.domain.classVar.values)
        else:
            Nout = 1
        if type(self.nHidden) != list: 
            nHidden = [self.nHidden]
        else:
            nHidden = self.nHidden
        layers = [len(data.domain.attributes)] + nHidden + [Nout]
        layerSizes = dataUtilities.List2CvMat(layers,"CV_32SC1")
        classifier.create(layerSizes, self.activationFunction, self.sigmoidAlpha, self.sigmoidBeta)
        #Train the model
        #train(trainData, responses, sampleWeights (RPROP only), sampleIdx, TrainParams, flags for scaling)
        #sampleWeights = cv.cvCreateMat(1,len(self.trainData),cv.CV_32FC1)
        #cv.cvSet(sampleWeights,1.0)
        
        scaleFlag = 0
        if not self.scaleData:
            scaleFlag = scaleFlag | ml.CvANN_MLP.NO_INPUT_SCALE
        if not self.scaleClass:
            scaleFlag = scaleFlag |  ml.CvANN_MLP.NO_OUTPUT_SCALE
       
        #compute priors (sample weights)
        priors = self.convertPriors(self.priors, self.trainData.domain.classVar,getDict = True)
        if type(priors) == str: #If a string is returned, there was a failure, and it is the respective error mnessage.
            print priors
            return None
 
        if priors and self.optAlg == 1:
            #scale priors
            pSum=sum(priors.values())
            if pSum==0:
                print "ERROR: The priors cannot be all 0!"
                return None
            map(lambda k,v:priors.update({k: (v+0.0)/pSum}),priors.keys(),priors.values())
            #Apply the priors to each respective sample
            sample_weights = [1] * len(self.trainData)
            for idx,sw in enumerate(sample_weights):
                actualClass = str(self.trainData[idx].getclass().value)
                if actualClass in priors:
                    sample_weights[idx] = sample_weights[idx] * priors[actualClass]
            CV_sample_weights = dataUtilities.List2CvMat(sample_weights,"CV_32FC1")
        else:
            CV_sample_weights = None
        #Train the model
        nIter = classifier.train(mat, responses, CV_sample_weights, None, params, scaleFlag)
        return CvANNClassifier(classifier = classifier, classVar = self.trainData.domain.classVar, imputeData=impData, verbose = self.verbose, varNames = CvMatices["varNames"], nIter = nIter, basicStat = self.basicStat, NTrainEx = len(trainingData))

Exemplo n.º 22

0

Exibir arquivo

    def _singlePredict(self,
                       origExamples=None,
                       resultType=orange.GetValue,
                       returnDFV=False):
        res = None
        """
        orange.GetBoth -          <type 'tuple'>                     ->    (<orange.Value 'Act'='3.44158792'>, <3.442: 1.000>)
        orange.GetValue -         <type 'orange.Value'>              ->    <orange.Value 'Act'='3.44158792'>
        orange.GetProbabilities - <type 'orange.DiscDistribution'>   ->    <0.000, 0.000> 
        """
        #dataUtilities.rmAllMeta(examples)
        if len(origExamples.domain.getmetas()) == 0:
            examples = origExamples
        else:
            examples = dataUtilities.getCopyWithoutMeta(origExamples)
        #Check if the examples are compatible with the classifier (attributes order and varType compatibility)
        dataUtilities.verbose = self.verbose
        if not self.ExFix.ready:
            self.ExFix.set_domain(self.imputer.defaults.domain)
            self.ExFix.set_examplesFixedLog(self.examplesFixedLog)
        inExamples = self.ExFix.fixExample(examples)

        if not inExamples:
            return None

        #Imput the examples if there are missing values
        examplesImp = self.imputer(inExamples)
        # There is a problem with using the imputer when examples contain meta attributes.
        # Unable to remove meta attributes from the examples. OK to rm meta from ExampleTables, but not from Example objects.
        if not examplesImp:
            if self.verbose > 0:
                print "Unable to predict with the Bayes model."
            if self.verbose > 0:
                print "Perhaps you need to remove meta attributes from your examples."
            return None

        if self.scalizer:
            ex = self.scalizer.scaleEx(examplesImp)
        else:
            ex = examplesImp
        out = self.classifier.predict(
            dataUtilities.Example2CvMat(ex, self.varNames))
        #print "OUT:",out
        probabilities = None
        DFV = None
        # Back transform the prediction to the original classes and calc probabilities
        prediction = dataUtilities.CvMat2orangeResponse(out, self.classVar)
        #print "Prediction:",prediction
        # Calculate artificial probabilities - not returned by the OpenCV RF algorithm
        if self.classVar.varType == orange.VarTypes.Discrete:
            if resultType != orange.GetValue:
                #Need to make sure to return meanful probabilities to the cases where opencvRF does not support probabilities
                # to be compatible with possible callers asking for probabilities.
                probabilities = self.__generateProbabilities(prediction)
                self._isRealProb = False
        else:
            #On Regression models assume the DVF as the value predicted
            DFV = prediction
            self._updateDFVExtremes(DFV)

        if resultType == orange.GetBoth:
            if prediction:
                orangePrediction = orange.Value(self.classVar, prediction)
            else:
                orangePrediction = None
            res = orangePrediction, probabilities
        elif resultType == orange.GetProbabilities:
            res = probabilities
        else:
            if prediction:
                orangePrediction = orange.Value(self.classVar, prediction)
            else:
                orangePrediction = None
            res = orangePrediction

        self.nPredictions += 1
        if returnDFV:
            return (res, DFV)
        else:
            return res

Exemplo n.º 23

0

Exibir arquivo

Arquivo: AZorngRF.py Projeto: girschic/AZOrange

    def __call__(self, origExample = None, resultType = orange.GetValue, returnDFV = False):
        """
        orange.GetBoth -          <type 'tuple'>                     ->    (<orange.Value 'Act'='3.44158792'>, <3.442: 1.000>)
        orange.GetValue -         <type 'orange.Value'>              ->    <orange.Value 'Act'='3.44158792'>
        orange.GetProbabilities - <type 'orange.DiscDistribution'>   ->    <0.000, 0.000>
        returnDFV - Flag indicating to return the Decision Function Value. If set to True, it will encapsulate the original result asked by the keyword resultType and the DFV into a tuple:
                ((<orange.Value 'Act'='3.44158792'>, <3.442: 1.000>), 0.34443)
                (<orange.Value 'Act'='3.44158792'>, 0.34443) 
                (<0.000, 0.000>, 0.34443)
                If it is not a binary classifier, DFV will be equal to None
                DFV will be a value from -0.5 to 0.5
        """
        if origExample == None:
            return self.classifier(None, resultType)
        else:
##scPA
            # Remove Meta attributes from example
            #dataUtilities.rmAllMeta(example)
            if len(origExample.domain.getmetas()) == 0:
                example = origExample
            else:
                example = dataUtilities.getCopyWithoutMeta(origExample)

            if not self.ExFix.ready:
                self.ExFix.set_domain(self.domain)
                self.ExFix.set_examplesFixedLog(self.examplesFixedLog)
            inExample = self.ExFix.fixExample(example) 
            if inExample: ##only procceds if the example was fixed or already ok, i.e.   inExample !=  None
##ecPA          
                ##scPA
                if  self.useBuiltInMissValHandling:
                    #compute the missing _mask
                    (exampleCvMat, missing_mask) = dataUtilities.Example2CvMat(inExample,self.varNames,self.thisVer,True) 
                else:
                    missing_mask = None
                    if self.imputer:
                         examplesImp = self.imputer(inExample)
                         # There is a problem with using the imputer when examples contain meta attributes.
                         # Unable to remove meta attributes from the examples. OK to rm meta from ExampleTables, but not from Example objects.
                         if not examplesImp:
                             if self.verbose > 0: print "Unable to predict with the RF model."
                             if self.verbose > 0: print "Perhaps you need to remove meta attributes from your examples."
                             return None
                    else:
                         examplesImp = inExample
                    ##ecPA
                    # Remove the response variable from the example to be predicted and transfrom the example to a tab sep string 
                    exampleCvMat = dataUtilities.Example2CvMat(examplesImp,self.varNames,self.thisVer)
                    del examplesImp
                if not exampleCvMat:
                    if self.verbose > 0: print "Could not convert the example to a valid CvMat objct for prediction"
                    return none
                # Predict using the RFmodel object
                prediction = self.classifier.predict(exampleCvMat,missing_mask)
	        probabilities = None
                DFV = None
                # Back transform the prediction to the original classes and calc probabilities
                prediction = dataUtilities.CvMat2orangeResponse(prediction, self.classVar)
                # Calculate artificial probabilities - not returned by the OpenCV RF algorithm
                if self.classVar.varType == orange.VarTypes.Discrete:
                    if resultType != orange.GetValue:
                        if len(self.classVar.values) == 2:
                            probOf1 = self.classifier.predict_prob(exampleCvMat,missing_mask)
                            probabilities = self.__getProbabilities(probOf1)
                            DFV = self.convert2DFV(probOf1)
                            self._isRealProb = True 
                        else:
                            #Need to make sure to return meanful probabilities to the cases where opencvRF does not support probabilities
                            # to be compatible with possible callers asking for probabilities. 
                            probabilities = self.__generateProbabilities(prediction)
                            self._isRealProb = False
                    elif len(self.classVar.values) == 2 and returnDFV:
                        DFV = self.convert2DFV(self.classifier.predict_prob(exampleCvMat,missing_mask))
                else:
                    #On Regression models assume the DVF as the value predicted
                    DFV = prediction
                    self._updateDFVExtremes(DFV)
                    y_hat = self.classVar(prediction)
                    probabilities = Orange.statistics.distribution.Continuous(self.classVar)
                    probabilities[y_hat] = 1.0
                del exampleCvMat
                del inExample
            else:
                if self.verbose > 0:
                    print "No prediction made for example:"
                    print example
                    print "The example does not have the same variables as the model."
                prediction = None
                probabilities = None
                DFV = None
	    ##scPA
	    del example
	    ##ecPA
            if resultType == orange.GetBoth:
                if prediction: 
                    orangePrediction = orange.Value(self.classVar, prediction)
                else: 
                    orangePrediction = None
                res = orangePrediction, probabilities
            elif resultType == orange.GetProbabilities:
                res = probabilities
            else: 
                if prediction: 
                    orangePrediction = orange.Value(self.classVar, prediction)
                else: 
                    orangePrediction = None
                res = orangePrediction 

            self.nPredictions += 1
            if returnDFV:
                return (res,DFV)
            else:
                return res

Exemplo n.º 24

0

Exibir arquivo

Arquivo: AZorngCvANN.py Projeto: johan-westin-work/AZOrange-python27port

    def __call__(self, data, weight = None):
        bestSeed = None
        bestAcc = None
        bestNiter = None
        bestModel = None
        #fix self.nDiffIniWeights for the disabled mode
        if self.nDiffIniWeights <= 1:
            self.nDiffIniWeights = 1 #loop over n different initial weights Disabled
        #Fix self.stopUPs for the disabled mode
        if self.stopUPs <=0:
            self.stopUPs = 0  # Optimization of nIter will be disabled

        #Remove from the domain any unused values of discrete attributes including class
        data = dataUtilities.getDataWithoutUnusedValues(data,True)
        #dataUtilities.rmAllMeta(data) 
        if len(data.domain.getmetas()) == 0:
            cleanedData = data
        else:
            cleanedData = dataUtilities.getCopyWithoutMeta(data)
        # Create the imputer
        self.imputer = orange.ImputerConstructor_average(cleanedData)
        # Impute the data 
        self.trainData = self.imputer(cleanedData)
         # If we are not seetin neither weights init optimization or nEphocs optimization (opencvLayer), the do nto split the data
        if self.stopUPs != 0 or self.nDiffIniWeights > 1:
            #Define train-80% and validation set-20% of the input data
            indices = orange.MakeRandomIndices2(p0=0.2, stratified = orange.MakeRandomIndices.StratifiedIfPossible)
            ind = indices(cleanedData)
            self.trainData = cleanedData.select(ind,1)
            validationSet = cleanedData.select(ind,0)
        else:
            validationSet = None

        if self.verbose and self.nDiffIniWeights>1: print "=========== Training ",self.nDiffIniWeights," times with different initial weights =============="
        for n in range(self.nDiffIniWeights):
            if self.nDiffIniWeights <=1:
                seed=0  #in opencv  mmlann seed=0 means the seed is disabled, and original seed will be used
            else:
                seed = len(cleanedData) * len(cleanedData.domain) * (n+1)  #seed can be any integer
            #Create a model with a specific seed for training opencv ANN. 
            #Also passing the step for the nIter optimization (self.stopUPs=0 - disable nIter optimization)
            #Also passing the validation set to be used in internal opencv implemented nEphocs optimization.
            model = self.__train__(weight = None, seed = seed, validationSet = validationSet)
            #Skip evaluation if the weights loop is disabled
            if self.nDiffIniWeights <=1:
                return model
                break
            if cleanedData.domain.classVar.varType == orange.VarTypes.Discrete:
                Acc = evalUtilities.getClassificationAccuracy(validationSet, model)
            else:
                Acc = -evalUtilities.getRMSE(validationSet, model)
            if bestModel == None or (Acc > bestAcc) or (Acc == bestAcc and model.nIter < bestNiter):
                bestSeed = seed
                bestAcc = Acc
                bestNiter = model.nIter
                bestModel = model
            if self.verbose:  print "nIter:%-7s  Acc:%-20s  seed: %s" % (model.nIter,Acc,seed)

        if self.verbose: print "================ Best model Found: ==================="
        if self.verbose: print "nIter:%-7s  Acc:%-20s  seed: %s" % (bestNiter,bestAcc,bestSeed)

        # DEBUG for check if the returned model is indeed the best model, and not the last trainted
        #if cleanedData.domain.classVar.varType == orange.VarTypes.Discrete:
        #    Acc = evalUtilities.getClassificationAccuracy(validationSet, bestModel)
        #else:
        #    Acc = -evalUtilities.getRMSE(validationSet, bestModel)
        #if self.verbose: print "================ Best model returned: ==================="
        #if self.verbose:  print "nIter:%-7s  Acc:%-20s  seed: %s" % (bestModel.nIter,Acc,bestModel.seed)

        return bestModel

Exemplo n.º 25

0

Exibir arquivo

    def _singlePredict(self, origExample = None, resultType = orange.GetValue, returnDFV = False):
        """
        orange.GetBoth -          <type 'tuple'>                     ->    (<orange.Value 'Act'='3.44158792'>, <3.442: 1.000>)
        orange.GetValue -         <type 'orange.Value'>              ->    <orange.Value 'Act'='3.44158792'>
        orange.GetProbabilities - <type 'orange.DiscDistribution'>   ->    <0.000, 0.000>
        returnDFV - Flag indicating to return the Decision Function Value. If set to True, it will encapsulate the original result asked by the keyword resultType and the DFV into a tuple:
                ((<orange.Value 'Act'='3.44158792'>, <3.442: 1.000>), 0.34443)
                (<orange.Value 'Act'='3.44158792'>, 0.34443) 
                (<0.000, 0.000>, 0.34443)
                If it is not a binary classifier, DFV will be equal to None
                DFV will be a value from -0.5 to 0.5
        """
        if origExample == None:
            return self.classifier(None, resultType)
        else:
##scPA
            # Remove Meta attributes from example
            #dataUtilities.rmAllMeta(example)
            if len(origExample.domain.getmetas()) == 0:
                example = origExample
            else:
                example = dataUtilities.getCopyWithoutMeta(origExample)

            if not self.ExFix.ready:
                self.ExFix.set_domain(self.domain)
                self.ExFix.set_examplesFixedLog(self.examplesFixedLog)
            inExample = self.ExFix.fixExample(example) 
            if inExample: ##only procceds if the example was fixed or already ok, i.e.   inExample !=  None
##ecPA          
                ##scPA
                if  self.useBuiltInMissValHandling:
                    #compute the missing _mask
                    (exampleCvMat, missing_mask) = dataUtilities.Example2CvMat(inExample,self.varNames,self.thisVer,True) 
                else:
                    missing_mask = None
                    if self.imputer:
                         examplesImp = self.imputer(inExample)
                         # There is a problem with using the imputer when examples contain meta attributes.
                         # Unable to remove meta attributes from the examples. OK to rm meta from ExampleTables, but not from Example objects.
                         if not examplesImp:
                             if self.verbose > 0: print "Unable to predict with the RF model."
                             if self.verbose > 0: print "Perhaps you need to remove meta attributes from your examples."
                             return None
                    else:
                         examplesImp = inExample
                    ##ecPA
                    # Remove the response variable from the example to be predicted and transfrom the example to a tab sep string 
                    exampleCvMat = dataUtilities.Example2CvMat(examplesImp,self.varNames,self.thisVer)
                    del examplesImp
                if not exampleCvMat:
                    if self.verbose > 0: print "Could not convert the example to a valid CvMat objct for prediction"
                    return none
                # Predict using the RFmodel object
                prediction = self.classifier.predict(exampleCvMat,missing_mask)
	        probabilities = None
                DFV = None
                # Back transform the prediction to the original classes and calc probabilities
                prediction = dataUtilities.CvMat2orangeResponse(prediction, self.classVar)
                # Calculate artificial probabilities - not returned by the OpenCV RF algorithm
                if self.classVar.varType == orange.VarTypes.Discrete:
                    if resultType != orange.GetValue:
                        if len(self.classVar.values) == 2:
                            probOf1 = self.classifier.predict_prob(exampleCvMat,missing_mask)
                            probabilities = self.__getProbabilities(probOf1)
                            DFV = self.convert2DFV(probOf1)
                            self._isRealProb = True 
                        else:
                            #Need to make sure to return meanful probabilities to the cases where opencvRF does not support probabilities
                            # to be compatible with possible callers asking for probabilities. 
                            probabilities = self.__generateProbabilities(prediction)
                            self._isRealProb = False
                    elif len(self.classVar.values) == 2 and returnDFV:
                        DFV = self.convert2DFV(self.classifier.predict_prob(exampleCvMat,missing_mask))
                else:
                    #On Regression models assume the DVF as the value predicted
                    if not prediction.isSpecial():
                        DFV = float(prediction.value)
                        self._updateDFVExtremes(DFV)
                    y_hat = self.classVar(prediction)
                    probabilities = Orange.statistics.distribution.Continuous(self.classVar)
                    probabilities[y_hat] = 1.0
                del exampleCvMat
                del inExample
            else:
                if self.verbose > 0:
                    print "No prediction made for example:"
                    print example
                    print "The example does not have the same variables as the model."
                prediction = None
                probabilities = None
                DFV = None
	    ##scPA
	    del example
	    ##ecPA
            if resultType == orange.GetBoth:
                if prediction: 
                    orangePrediction = orange.Value(self.classVar, prediction)
                else: 
                    orangePrediction = None
                res = orangePrediction, probabilities
            elif resultType == orange.GetProbabilities:
                res = probabilities
            else: 
                if prediction: 
                    orangePrediction = orange.Value(self.classVar, prediction)
                else: 
                    orangePrediction = None
                res = orangePrediction 

            self.nPredictions += 1
            if returnDFV:
                return (res,DFV)
            else:
                return res

Exemplo n.º 26

0

Exibir arquivo

    def _singlePredict(self,
                       origExamples=None,
                       resultType=orange.GetValue,
                       returnDFV=False):
        res = None
        """
        orange.GetBoth -          <type 'tuple'>                     ->    (<orange.Value 'Act'='3.44158792'>, <3.442: 1.000>)
        orange.GetValue -         <type 'orange.Value'>              ->    <orange.Value 'Act'='3.44158792'>
        orange.GetProbabilities - <type 'orange.DiscDistribution'>   ->    <0.000, 0.000> 
        """
        #dataUtilities.rmAllMeta(examples)
        if len(origExamples.domain.getmetas()) == 0:
            examples = origExamples
        else:
            examples = dataUtilities.getCopyWithoutMeta(origExamples)
        #Check if the examples are compatible with the classifier (attributes order and varType compatibility)
        if self.verbose > 1: dataUtilities.verbose = self.verbose
        if not self.ExFix.ready:
            self.ExFix.set_domain(self.imputer.defaults.domain)
            self.ExFix.set_examplesFixedLog(self.examplesFixedLog)
        inExamples = self.ExFix.fixExample(examples)

        if not inExamples:
            return None

        #Imput the examples if there are missing values
        examplesImp = self.imputer(inExamples)
        # There is a problem with using the imputer when examples contain meta attributes.
        # Unable to remove meta attributes from the examples. OK to rm meta from ExampleTables, but not from Example objects.
        if not examplesImp:
            if self.verbose > 0: print "Unable to predict with the ANN model."
            if self.verbose > 0:
                print "Perhaps you need to remove meta attributes from your examples."
            return None

        res = None
        if self.classVar.varType == orange.VarTypes.Continuous:
            Nout = 1
        else:
            Nout = len(self.classVar.values)
        out = cv.cvCreateMat(1, Nout, cv.CV_32FC1)
        self.classifier.predict(
            dataUtilities.Example2CvMat(examplesImp, self.varNames), out)
        #print "OUT = ",out
        #print out,"->",dataUtilities.CvMat2orangeResponse(out,self.classVar,True),":",origExamples[self.classVar.name].value
        res = dataUtilities.CvMat2orangeResponse(out, self.classVar, True)
        #print "RES=",res
        DFV = None
        if out.cols > 1:
            fannOutVector = dataUtilities.CvMat2List(out)[0]
            probabilities = self.__getProbabilities(fannOutVector)
            #Compute the DFV
            if self.classVar.varType == orange.VarTypes.Discrete and len(
                    self.classVar.values) == 2:
                DFV = probabilities[0]
                # Subtract 0.5 so that the threshold is 0 as all learners DFV
                DFV -= 0.5
                self._updateDFVExtremes(DFV)

            # Retrun the desired quantity
            if resultType == orange.GetProbabilities:
                res = probabilities
            else:
                if resultType == orange.GetBoth:
                    res = (res, probabilities)
        else:
            #On Regression models, assume the DFV as the value predicted
            DFV = res.value
            self._updateDFVExtremes(DFV)
            y_hat = self.classVar(res.value)
            dist = Orange.statistics.distribution.Continuous(self.classVar)
            dist[y_hat] = 1.0
            if resultType == orange.GetProbabilities:
                res = dist
            else:
                if resultType == orange.GetBoth:
                    res = (res, dist)
        self.nPredictions += 1

        if returnDFV:
            return (res, DFV)
        else:
            return res

Exemplo n.º 27

0

Exibir arquivo

    def __call__(self, data, weight=None):
        bestSeed = None
        bestAcc = None
        bestNiter = None
        bestModel = None
        #fix self.nDiffIniWeights for the disabled mode
        if self.nDiffIniWeights <= 1:
            self.nDiffIniWeights = 1  #loop over n different initial weights Disabled
        #Fix self.stopUPs for the disabled mode
        if self.stopUPs <= 0:
            self.stopUPs = 0  # Optimization of nIter will be disabled

        self.NTrainEx = len(data)
        #Remove from the domain any unused values of discrete attributes including class
        data = dataUtilities.getDataWithoutUnusedValues(data, True)

        #dataUtilities.rmAllMeta(data)
        if len(data.domain.getmetas()) == 0:
            cleanedData = data
        else:
            cleanedData = dataUtilities.getCopyWithoutMeta(data)
        # Create the imputer
        self.imputer = orange.ImputerConstructor_average(cleanedData)
        # Impute the data
        self.trainData = self.imputer(cleanedData)
        # If we are not seetin neither weights init optimization or nEphocs optimization (opencvLayer), the do nto split the data
        if self.stopUPs != 0 or self.nDiffIniWeights > 1:
            #Define train-80% and validation set-20% of the input data
            indices = orange.MakeRandomIndices2(
                p0=0.2,
                stratified=orange.MakeRandomIndices.StratifiedIfPossible)
            ind = indices(cleanedData)
            self.trainData = cleanedData.select(ind, 1)
            validationSet = cleanedData.select(ind, 0)
        else:
            validationSet = None

        if self.verbose and self.nDiffIniWeights > 1:
            print "=========== Training ", self.nDiffIniWeights, " times with different initial weights =============="
        for n in range(self.nDiffIniWeights):
            if self.nDiffIniWeights <= 1:
                seed = 0  #in opencv  mmlann seed=0 means the seed is disabled, and original seed will be used
            else:
                seed = len(cleanedData) * len(cleanedData.domain) * (
                    n + 1)  #seed can be any integer
            #Create a model with a specific seed for training opencv ANN.
            #Also passing the step for the nIter optimization (self.stopUPs=0 - disable nIter optimization)
            #Also passing the validation set to be used in internal opencv implemented nEphocs optimization.
            model = self.__train__(weight=None,
                                   seed=seed,
                                   validationSet=validationSet)
            #Skip evaluation if the weights loop is disabled
            if self.nDiffIniWeights <= 1:
                return model
                break
            if cleanedData.domain.classVar.varType == orange.VarTypes.Discrete:
                Acc = evalUtilities.getClassificationAccuracy(
                    validationSet, model)
            else:
                Acc = -evalUtilities.getRMSE(validationSet, model)
            if bestModel == None or (Acc > bestAcc) or (
                    Acc == bestAcc and model.nIter < bestNiter):
                bestSeed = seed
                bestAcc = Acc
                bestNiter = model.nIter
                bestModel = model
            if self.verbose:
                print "nIter:%-7s  Acc:%-20s  seed: %s" % (model.nIter, Acc,
                                                           seed)

        if self.verbose:
            print "================ Best model Found: ==================="
        if self.verbose:
            print "nIter:%-7s  Acc:%-20s  seed: %s" % (bestNiter, bestAcc,
                                                       bestSeed)

        # DEBUG for check if the returned model is indeed the best model, and not the last trainted
        #if cleanedData.domain.classVar.varType == orange.VarTypes.Discrete:
        #    Acc = evalUtilities.getClassificationAccuracy(validationSet, bestModel)
        #else:
        #    Acc = -evalUtilities.getRMSE(validationSet, bestModel)
        #if self.verbose: print "================ Best model returned: ==================="
        #if self.verbose:  print "nIter:%-7s  Acc:%-20s  seed: %s" % (bestModel.nIter,Acc,bestModel.seed)

        return bestModel

Exemplo n.º 28

0

Exibir arquivo

Arquivo: AZorngPLS.py Projeto: girschic/AZOrange

    def __call__(self, origExamples = None, resultType = orange.GetValue):
        if origExamples == None:
            return self.classifier(None, resultType)
        else:
            if len(origExamples.domain.getmetas()) == 0:
                examples = origExamples
            else:
                examples = dataUtilities.getCopyWithoutMeta(origExamples)
            #dataUtilities.rmAllMeta(examples) 
           
            #Check if the examples are compatible with the classifier (attributes order and varType compatibility)
            if self.imputer:
                dataUtilities.verbose = self.verbose
                if not self.ExFix.ready:
                    self.ExFix.set_domain(self.imputer.defaults.domain)
                    self.ExFix.set_examplesFixedLog(self.examplesFixedLog)
                inExamples = self.ExFix.fixExample(examples)
            else:
                inExamples=None
	    if not inExamples:
                if self.verbose > 1:
        	    print "No prediction made for example:"
                    try:
                        print str(examples)[0:str(examples).find(",",20)]+" ... "+str(examples)[str(examples).rfind(",")+1:]
                    except:
                        print examples
                    print "The example does not have the same variables as the model, or the varTypes are incompatible."
		return None

	    #Imput the examples if there are missing values	
	    if self.imputer:  
		examplesImp = self.imputer(inExamples)         
                # There is a problem with using the imputer when examples contain meta attributes.
                # Unable to remove meta attributes from the examples. OK to rm meta from ExampleTables, but not from Example objects.
                if not examplesImp:
                    if self.verbose > 0: print "Unable to predict with the PLS model."
                    if self.verbose > 0: print "Perhaps you need to remove meta attributes from your examples."
                    return None
	    else:
	        examplesImp = inExamples
		
            # Transform the orange data to the PLS prediction data format 
            PLSFeatureVector = self.getFeatureVector(examplesImp)
            # Return the result of the prediction for one feature vector
	    PLSOut = self.classifier.Run(PLSFeatureVector)
            if self.verbose > 0: print "PLSOut: ",PLSOut
	    if PLSOut.find("ERROR")>=0:
                print "Error returned by PLS:"
                print "  PLSOut: ",PLSOut
                print "Class:",str(self.classVar)
                if self.classVar.varType == orange.VarTypes.Discrete:
                    print "values = ",str(self.classVar.values)
                else:
                    print "Numerical Variable"
                print "Returning '?'"
                PLSOut = '?' #"ERROR"
	    orngOut=string.split(PLSOut,"\t")
            if self.verbose > 0: print "orngOut: ",orngOut
	    #convert result to orange value
            try:
	        value=orange.Value(self.classVar,orngOut[len(orngOut)-1])
            except:
                print "Error converting the Class back to orange format:"
                print "Class:",str(self.classVar)  
                if self.classVar.varType == orange.VarTypes.Discrete:
                    print "values = ",str(self.classVar.values)
                else:
                    print "Numerical Variable"
                print "Returned by PLS:",str(PLSOut)
                print "Value in orange Format (Would be the last element of PLSout): ",str(orngOut),"  ->  ",str(orngOut[len(orngOut)-1])
                print "Returning '?'"
                value=orange.Value(self.classVar,'?')
	    if self.classVar.varType == orange.VarTypes.Discrete: 
                score = self.getProbabilities(value)
            else:
                y_hat = self.classVar(value)
                score = Orange.statistics.distribution.Continuous(self.classVar)
                score[y_hat] = 1.0

	    # Assure that large local variables are deleted
	    del examplesImp
	    del PLSFeatureVector

	    #Return the desired quantity	
            if resultType == orange.GetProbabilities:
		return score
	    else:
	 	if resultType == orange.GetBoth:
			return value, score
		else:
            		return value

Exemplo n.º 29

0

Exibir arquivo

    def __call__(self, trainingData, weight = None):
        """Creates an RF model from the data in trainingData. """
        if not AZBaseClasses.AZLearner.__call__(self,trainingData, weight):
            return None

        # Set the number of theatd to be used ny opencv
        cv.cvSetNumThreads(max(int(self.NumThreads),0))
        #Remove from the domain any unused values of discrete attributes including class
        trainingData = dataUtilities.getDataWithoutUnusedValues(trainingData,True)

        # Object holding the data req for predictions (model, domain, etc)
	#print time.asctime(), "=superRFmodel(trainingData.domain)"
        ##scPA
        # Remove meta attributes from training data
        #dataUtilities.rmAllMeta(trainingData)
        if len(trainingData.domain.getmetas()) == 0:
            trainData = trainingData
        else:
            trainData = dataUtilities.getCopyWithoutMeta(trainingData)
        # Impute the data and Convert the ExampleTable to CvMat 
        if self.useBuiltInMissValHandling:
            #Create the imputer empty since we will not be using it
            impData = dataUtilities.DataTable(trainData.domain)
            CvMatrices = dataUtilities.ExampleTable2CvMat(trainData)
        else:
            #Create the imputer
            self.imputer = orange.ImputerConstructor_average(trainData)
            impData=self.imputer.defaults
            trainData = self.imputer(trainData)
            CvMatrices = dataUtilities.ExampleTable2CvMat(trainData)
            CvMatrices["missing_data_mask"] = None
        ##ecPA
        self.learner = ml.CvRTrees()#superRFmodel(trainData.domain)    #This call creates a scratchDir

        # Set RF model parameter values
        #  when nActVars defined as 0, use the sqrt of number of attributes so the user knows what will be used
        # This would be done in the C level if left as 0
        if self.nActVars == "0" and len(trainData.domain.attributes)>0:
            self.nActVars =  str(int(sqrt(len(trainData.domain.attributes))))
	#print time.asctime(), "=self.setParameters"
        params = self.setParameters(trainData)
        # Print values of the parameters
        if self.verbose > 0: self.printOuts(params)
        #**************************************************************************************************//
        #                      Check for irrational input arguments
        #**************************************************************************************************//
        if params.min_sample_count >= len(trainingData):
            if self.verbose > 0: print "ERROR! Invalid minSample: ",params.min_sample_count
            if self.verbose > 0: print "minSample must be smaller than the number of examples."
            if self.verbose > 0: print "The number of examples is: ",len(trainingData)
            if len(trainingData) > 10:
                if self.verbose > 0: print "minSample assigned to default value: 10"
                params.min_sample_count = 10
            else:
                if self.verbose > 0: print "Too few examples!!"
                if self.verbose > 0: print "Terminating"
                if self.verbose > 0: print "No random forest model built"
                return None
        if params.nactive_vars > len(trainingData.domain.attributes):
            if self.verbose > 0: print "ERROR! Invalid nActVars: ",params.nactive_vars
            if self.verbose > 0: print "nActVars must be smaller than or equal to the number of variables."
            if self.verbose > 0: print "The number of variables is: ", len(trainingData.domain.attributes)
            if self.verbose > 0: print "nActVars assigned to default value: sqrt(nVars)=",sqrt(len(trainingData.domain.attributes))
            params.nactive_vars = 0;
        # Train RF model on data in openCVFile
	#print time.asctime(), "=Start Training"
        #Process the priors and Count the number of values in class var
        if  trainingData.domain.classVar.varType == orange.VarTypes.Discrete:
            cls_count = len(trainData.domain.classVar.values)
            priors = self.convertPriors(self.priors,trainingData.domain.classVar)
            if type(priors) == str: #If a string is returned, there was a failure, and it is the respective error mnessage.
                print priors
                return None 
        else:
            cls_count = 0
            priors = None
        # Call the train method
        self.learner.train( CvMatrices["matrix"],ml.CV_ROW_SAMPLE,CvMatrices["responses"],None,None,CvMatrices["varTypes"],CvMatrices["missing_data_mask"],params,cls_count,  priors and str(priors).replace(","," ") or None)
        if self.learner.get_var_importance():
            varImportanceList = self.learner.get_var_importance()
            varImportance = {}
            varName = []
            varImp = []
            for idx,attr in enumerate(CvMatrices["varNames"]):
                varImportance[attr] = varImportanceList[idx]
            #Uncomment next lines if needed the outpuit already ordered
            #============================= begin =================================
            #    varName.append(attr)
            #    varImp.append(varImportanceList[idx])
            #Order the vars in terms of importance
            # insertion sort algorithm
            #for i in range(1, len(varImp)):
            #    save = varImp[i]
            #    saveName = varName[i]
            #    j = i
            #    while j > 0 and varImp[j - 1] < save:
            #        varImp[j] = varImp[j - 1]
            #        varName[j] = varName[j - 1]
            #        j -= 1
            #    varImp[j] = save
            #    varName[j] = saveName
            #For debug: test if assign var importance was correct
            #for attr in varImportance:
            #    if varImportance[attr] != varImp[varName.index(attr)]:
            #        print "ERROR: Variable importance of ", attr, " is not correct!"
            #OrderedVarImportance = {"VarNames":varName, "VarImportance":varImp}
            #=============================  end  =================================
        else:
            varImportance = {}
        #print time.asctime(), "=Done"
        # Save info about the variables used in the model (used by the write method)
        #attributeInfo = dataUtilities.DataTable(trainData.domain)
        # place the impute data as the first example of this data
        #attributeInfo.append(self.imputer.defaults)
        return RFClassifier(classifier = self.learner, classVar = impData.domain.classVar, imputeData=impData, verbose = self.verbose, varNames = CvMatrices["varNames"],thisVer=True,useBuiltInMissValHandling = self.useBuiltInMissValHandling, varImportance = varImportance, basicStat = self.basicStat, NTrainEx = len(trainingData), parameters = self.parameters)

Exemplo n.º 30

0

Exibir arquivo

Arquivo: AZorngPLS.py Projeto: girschic/AZOrange

    def __call__(self, trainingData, weight=None):
        """Creates an PLS model from the data in trainingData. """
        if not AZBaseClasses.AZLearner.__call__(self,trainingData, weight):
            return None
        #Remove from the domain any unused values of discrete attributes including class
        trainingData = dataUtilities.getDataWithoutUnusedValues(trainingData,True)
        # Create path for the Orange data
        scratchdir = miscUtilities.createScratchDir(desc="PLS")
        OrngFile = os.path.join(scratchdir,"OrngData.tab")

        # Remove meta attributes from training data to make the imputer work with examples without the meta attributes. 
        #dataUtilities.rmAllMeta(trainingData)
        if len(trainingData.domain.getmetas()) == 0:
            trainData = trainingData
        else:
            trainData = dataUtilities.getCopyWithoutMeta(trainingData)

	# Create the imputer
        self.imputer = orange.ImputerConstructor_average(trainData)
	# Impute the data 
	trainData = self.imputer(trainData)
        # Save the Data already imputed to an Orange formated file
	if self.verbose > 1: print time.asctime(), "Saving Orange Data to a tab file..."
        orange.saveTabDelimited(OrngFile,trainData)
	if self.verbose > 1: print time.asctime(), "done"

        # Create the PLS instance
	if self.verbose > 1: print time.asctime(), "Creating PLS Object..."
        learner = pls.PlsAPI()
	if self.verbose > 1: print time.asctime(), "done"

	# Assign the PLS parameters
	learner.SetParameter('v',str(self.verbose))
        learner.SetParameter('debug',str(int(self.verbose > 0)))
	learner.SetParameter('method',self.method)
        if types.IntType(self.k) > len(trainData.domain.attributes):
	    learner.SetParameter('k',str(len(trainData.domain.attributes)))
            if self.verbose > 0: print "Warning! The number of components were more than the number of attributes."
            if self.verbose > 0: print "   Components were set to ",len(trainData.domain.attributes)
        else:
	    learner.SetParameter('k',self.k)
	learner.SetParameter('precision',self.precision)	
	learner.SetParameter('sDir',scratchdir)  #AZOC.SCRATCHDIR)
	
        # Read the Orange Formated file and Train the Algorithm
	# TRAIN
	if self.verbose > 1: print time.asctime(), "Training..."
        learner.Train(OrngFile)
	if self.verbose > 1:
		print "Train finished at ", time.asctime()
		print "PLS trained in: " + str(learner.GetCPUTrainTime()) + " seconds";
		print "Method:     " +  learner.GetParameter("method")
		print "Components: " +  learner.GetParameter("k")
		print "Precision:  " +  learner.GetParameter("precision")

        # Remove the scratch file
        if self.verbose == 0:
	    miscUtilities.removeDir(scratchdir)
	else:
	    print "The directory " + scratchdir + " was not deleted because DEBUG flag is ON"
	del trainData
        impData=self.imputer.defaults
        return PLSClassifier(classifier = learner, name = "Classifier of " + self.name, classVar = trainingData.domain.classVar, imputeData=impData, verbose = self.verbose, varNames = [attr.name for attr in trainingData.domain.attributes], NTrainEx = len(trainingData), basicStat = self.basicStat, parameters = self.parameters)#learner.GetClassVarName())#

Exemplo n.º 31

0

Exibir arquivo

Arquivo: AZorngPLS.py Projeto: jcheminform/AZOrange

    def _singlePredict(self,
                       origExamples=None,
                       resultType=orange.GetValue,
                       returnDFV=False):
        if origExamples == None:
            return self.classifier(None, resultType)
        else:
            if len(origExamples.domain.getmetas()) == 0:
                examples = origExamples
            else:
                examples = dataUtilities.getCopyWithoutMeta(origExamples)
            #dataUtilities.rmAllMeta(examples)

            #Check if the examples are compatible with the classifier (attributes order and varType compatibility)
            if self.imputer:
                dataUtilities.verbose = self.verbose
                if not self.ExFix.ready:
                    self.ExFix.set_domain(self.imputer.defaults.domain)
                    self.ExFix.set_examplesFixedLog(self.examplesFixedLog)
                inExamples = self.ExFix.fixExample(examples)
            else:
                inExamples = None
            if not inExamples:
                if self.verbose > 1:
                    print "No prediction made for example:"
                    try:
                        print str(examples)[
                            0:str(examples).find(",", 20)] + " ... " + str(
                                examples)[str(examples).rfind(",") + 1:]
                    except:
                        print examples
                    print "The example does not have the same variables as the model, or the varTypes are incompatible."
                return None

#Imput the examples if there are missing values
            if self.imputer:
                examplesImp = self.imputer(inExamples)
                # There is a problem with using the imputer when examples contain meta attributes.
                # Unable to remove meta attributes from the examples. OK to rm meta from ExampleTables, but not from Example objects.
                if not examplesImp:
                    if self.verbose > 0:
                        print "Unable to predict with the PLS model."
                    if self.verbose > 0:
                        print "Perhaps you need to remove meta attributes from your examples."
                    return None
            else:
                examplesImp = inExamples

            DFV = None
            # Transform the orange data to the PLS prediction data format
            PLSFeatureVector = self.getFeatureVector(examplesImp)
            # Return the result of the prediction for one feature vector
            PLSOut = self.classifier.Run(PLSFeatureVector)
            if self.verbose > 0: print "PLSOut: ", PLSOut
            if PLSOut.find("ERROR") >= 0:
                print "Error returned by PLS:"
                print "  PLSOut: ", PLSOut
                print "Class:", str(self.classVar)
                if self.classVar.varType == orange.VarTypes.Discrete:
                    print "values = ", str(self.classVar.values)
                else:
                    print "Numerical Variable"
                print "Returning '?'"
                PLSOut = '?'  #"ERROR"
            orngOut = string.split(PLSOut, "\t")
            if self.verbose > 0: print "orngOut: ", orngOut
            #convert result to orange value
            try:
                value = orange.Value(self.classVar, orngOut[len(orngOut) - 1])
            except:
                print "Error converting the Class back to orange format:"
                print "Class:", str(self.classVar)
                if self.classVar.varType == orange.VarTypes.Discrete:
                    print "values = ", str(self.classVar.values)
                else:
                    print "Numerical Variable"
                print "Returned by PLS:", str(PLSOut)
                print "Value in orange Format (Would be the last element of PLSout): ", str(
                    orngOut), "  ->  ", str(orngOut[len(orngOut) - 1])
                print "Returning '?'"
                value = orange.Value(self.classVar, '?')
            if self.classVar.varType == orange.VarTypes.Discrete:
                score = self.getProbabilities(value)
                probOf1 = score[self.classVar.values[1]]
                DFV = -(probOf1 - 0.5)
                self._updateDFVExtremes(DFV)
            else:
                y_hat = self.classVar(value)
                score = Orange.statistics.distribution.Continuous(
                    self.classVar)
                score[y_hat] = 1.0
                if not value.isSpecial():
                    DFV = float(value.value)
                    self._updateDFVExtremes(DFV)

# Assure that large local variables are deleted
            del examplesImp
            del PLSFeatureVector

            #Return the desired quantity
            if resultType == orange.GetProbabilities:
                res = score
            else:
                if resultType == orange.GetBoth:
                    res = value, score
                else:
                    res = value

            if returnDFV:
                res = (res, DFV)

            self.nPredictions += 1
            return res