示例#1
0
    def doPopEvaluation(self, isTrain):
        """ Performs a complete evaluation of the current rule population.  Discrete phenotype only.  The population is unchanged throughout this evaluation. Works on both training and testing data. """
        if isTrain:
            myType = "TRAINING"
        else:
            myType = "TESTING"
        noMatch = 0  # How often does the population fail to have a classifier that matches an instance in the data.
        tie = 0  # How often can the algorithm not make a decision between classes due to a tie.
        cons.env.resetDataRef(isTrain)  # Go to the first instance in dataset
        phenotypeList = cons.env.formatData.phenotypeList
        #Initialize dictionary entry for each class----
        classAccDict = {}
        for each in phenotypeList:
            classAccDict[each] = ClassAccuracy()
        #----------------------------------------------
        if isTrain:
            instances = cons.env.formatData.numTrainInstances
        else:
            instances = cons.env.formatData.numTestInstances
        self.predictionList = []
        self.predictionSets = []
        self.realList = []
        #-----------------------------------------------------------------------------------------------------------------------------------------
        # GET PREDICTION AND DETERMINE PREDICTION STATUS
        #-----------------------------------------------------------------------------------------------------------------------------------------
        for inst in range(instances):
            if isTrain:
                state_phenotype = cons.env.getTrainInstance()
            else:
                state_phenotype = cons.env.getTestInstance()
            #-----------------------------------------------------------------------------
            self.population.makeEvalMatchSet(state_phenotype[0])
            prediction = Prediction(self.population, self.exploreIter)
            phenotypeSelection = prediction.getDecision()
            if not isTrain:
                phenotypeSet = prediction.getSet()
                self.predictionList.append(
                    phenotypeSelection)  #Used to output raw test predictions.
                self.predictionSets.append(phenotypeSet)
                self.realList.append(state_phenotype[1])
            #-----------------------------------------------------------------------------
            if phenotypeSelection == None:
                noMatch += 1
            elif phenotypeSelection == 'Tie':
                tie += 1
            else:  #Instances which failed to be covered are excluded from the initial accuracy calculation
                for each in phenotypeList:
                    thisIsMe = False
                    accuratePhenotype = False
                    truePhenotype = state_phenotype[1]
                    if each == truePhenotype:
                        thisIsMe = True  #Is the current phenotype the true data phenotype.
                    if phenotypeSelection == truePhenotype:
                        accuratePhenotype = True
                    classAccDict[each].updateAccuracy(thisIsMe,
                                                      accuratePhenotype)

            cons.env.newInstance(isTrain)  #next instance
            self.population.clearSets()
        #-----------------------------------------------------------------------------------------------------------------------------------------
        # CALCULATE ACCURACY - UNLIKELY SITUATION WHERE NO MATCHING RULES FOUND - In either Training or Testing data (this can happen in testing data when strong training overfitting occurred)
        #-----------------------------------------------------------------------------------------------------------------------------------------
        if noMatch == instances:
            randomProb = float(1.0 / len(cons.env.formatData.phenotypeList))
            print("-----------------------------------------------")
            print(str(myType) + " Accuracy Results:-------------")
            print("Instance Coverage = " + str(0) + '%')
            print("Prediction Ties = " + str(0) + '%')
            print(
                str(0) + ' out of ' + str(instances) +
                ' instances covered and correctly classified.')
            print("Standard Accuracy (Adjusted) = " + str(randomProb))
            print("Balanced Accuracy (Adjusted) = " + str(randomProb))
            #Balanced and Standard Accuracies will only be the same when there are equal instances representative of each phenotype AND there is 100% covering. (NOTE even at 100% covering, the values may differ due to subtle float calculation differences in the computer)
            resultList = [randomProb, 0]
            return resultList
        #-----------------------------------------------------------------------------------------------------------------------------------------
        # CALCULATE ACCURACY
        #-----------------------------------------------------------------------------------------------------------------------------------------
        else:
            #----------------------------------------------------------------------------------------------
            #Calculate Standard Accuracy------------------------------------
            standardAccuracy = 0
            for each in phenotypeList:
                instancesCorrectlyClassified = classAccDict[
                    each].T_myClass + classAccDict[each].T_otherClass
                instancesIncorrectlyClassified = classAccDict[
                    each].F_myClass + classAccDict[each].F_otherClass
                classAccuracy = float(instancesCorrectlyClassified) / float(
                    instancesCorrectlyClassified +
                    instancesIncorrectlyClassified)
                standardAccuracy += classAccuracy
            standardAccuracy = standardAccuracy / float(len(phenotypeList))

            #Calculate Balanced Accuracy---------------------------------------------
            balancedAccuracy = 0
            for each in phenotypeList:
                try:
                    sensitivity = classAccDict[each].T_myClass / (
                        float(classAccDict[each].T_myClass +
                              classAccDict[each].F_otherClass))
                except:
                    sensitivity = 0.0
                try:
                    specificity = classAccDict[each].T_otherClass / (
                        float(classAccDict[each].T_otherClass +
                              classAccDict[each].F_myClass))
                except:
                    specificity = 0.0

                balancedClassAccuracy = (sensitivity + specificity) / 2.0
                balancedAccuracy += balancedClassAccuracy

            balancedAccuracy = balancedAccuracy / float(len(phenotypeList))

            #Adjustment for uncovered instances - to avoid positive or negative bias we incorporate the probability of guessing a phenotype by chance (e.g. 50% if two phenotypes)---------------------------------------
            predictionFail = float(noMatch) / float(instances)
            predictionTies = float(tie) / float(instances)
            instanceCoverage = 1.0 - predictionFail
            predictionMade = 1.0 - (predictionFail + predictionTies)

            adjustedStandardAccuracy = (standardAccuracy * predictionMade) + (
                (1.0 - predictionMade) * (1.0 / float(len(phenotypeList))))
            adjustedBalancedAccuracy = (balancedAccuracy * predictionMade) + (
                (1.0 - predictionMade) * (1.0 / float(len(phenotypeList))))

            #Adjusted Balanced Accuracy is calculated such that instances that did not match have a consistent probability of being correctly classified in the reported accuracy.
            print("-----------------------------------------------")
            print(str(myType) + " Accuracy Results:-------------")
            print("Instance Coverage = " + str(instanceCoverage * 100.0) + '%')
            print("Prediction Ties = " + str(predictionTies * 100.0) + '%')
            print(
                str(instancesCorrectlyClassified) + ' out of ' +
                str(instances) +
                ' instances covered and correctly classified.')
            print("Standard Accuracy (Adjusted) = " +
                  str(adjustedStandardAccuracy))
            print("Balanced Accuracy (Adjusted) = " +
                  str(adjustedBalancedAccuracy))
            #Balanced and Standard Accuracies will only be the same when there are equal instances representative of each phenotype AND there is 100% covering. (NOTE even at 100% covering, the values may differ due to subtle float calculation differences in the computer)
            resultList = [adjustedBalancedAccuracy, instanceCoverage]
            return resultList
示例#2
0
    def doPopEvaluation(self, isTrain):
        """ Performs evaluation of population via the copied environment.  The population is maintained unchanging throughout the evaluation.
        Works on both training and testing data. """
        cons.env.startEvaluationMode()
        noMatch = 0  #How often does the population fail to have a classifier that matches an instance in the data.
        tie = 0  #How often can the algorithm not make a decision between classes due to a tie.
        cons.env.resetDataRef(isTrain)  #Go to first instance in data set
        phenotypeList = cons.env.formatData.phenotypeList  #shorter reference to phenotypeList - based on training data (assumes no as yet unseen phenotypes in testing data)
        #----------------------------------------------
        classAccDict = {}
        for each in phenotypeList:
            classAccDict[each] = ClassAccuracy()

        #----------------------------------------------
        if isTrain:
            instances = cons.env.formatData.numTrainInstances
        else:
            instances = cons.env.formatData.numTestInstances
        #----------------------------------------------------------------------------------------------
        for inst in range(instances):
            if isTrain:
                state_phenotype = cons.env.getTrainInstance()
            else:
                state_phenotype = cons.env.getTestInstance()
            #-----------------------------------------------------------------------------
            self.population.makeEvalMatchSet(state_phenotype[0])
            prediction = Prediction(self.population)
            phenotypeSelection = prediction.getDecision()
            #-----------------------------------------------------------------------------

            if phenotypeSelection == None:
                noMatch += 1
            elif phenotypeSelection == 'Tie':
                tie += 1
            else:  #Instances which failed to be covered are excluded from the initial accuracy calculation (this is important to the rule compaction algorithm)
                for each in phenotypeList:
                    thisIsMe = False
                    accuratePhenotype = False
                    truePhenotype = state_phenotype[1]
                    if each == truePhenotype:
                        thisIsMe = True  #Is the current phenotype the true data phenotype.
                    if phenotypeSelection == truePhenotype:
                        accuratePhenotype = True
                    classAccDict[each].updateAccuracy(thisIsMe,
                                                      accuratePhenotype)

            cons.env.newInstance(isTrain)  #next instance
            self.population.clearSets()

        #Calculate Balanced Accuracy---------------------------------------------
        balancedAccuracy = 0
        for each in phenotypeList:
            try:
                sensitivity = classAccDict[each].T_myClass / (
                    float(classAccDict[each].T_myClass +
                          classAccDict[each].F_otherClass))
            except:
                sensitivity = 0.0
            try:
                specificity = classAccDict[each].T_otherClass / (
                    float(classAccDict[each].T_otherClass +
                          classAccDict[each].F_myClass))
            except:
                specificity = 0.0

            balancedClassAccuracy = (sensitivity + specificity) / 2.0
            balancedAccuracy += balancedClassAccuracy

        balancedAccuracy = balancedAccuracy / float(len(phenotypeList))

        #Adjustment for uncovered instances - to avoid positive or negative bias we incorporate the probability of guessing a phenotype by chance (e.g. 50% if two phenotypes)---------------------------------------
        predictionFail = float(noMatch) / float(instances)
        predictionTies = float(tie) / float(instances)
        predictionMade = 1.0 - (predictionFail + predictionTies)

        adjustedBalancedAccuracy = (balancedAccuracy * predictionMade) + (
            (1.0 - predictionMade) * (1.0 / float(len(phenotypeList))))
        cons.env.stopEvaluationMode()
        return adjustedBalancedAccuracy