示例#1
0
 def buildClassifier(self, data: Instances):
     data = Instances(data)
     data.deleteWithMissingClass()
     self.buildTree(data, self.m_subtreeRaising or not self.m_cleanup)
     if self.m_collapseTheTree:
         self.collapse()
     if self.m_pruneTheTree:
         self.prune()
     if self.m_cleanup:
         self.cleanup(Instances(data, 0))
 def buildClassifier(self, data: Instances):
     data = Instances(data)
     data.deleteWithMissingClass()
     data.stratify(self.numSets)
     self.buildTree(
         data.trainCV(self.numSets, self.numSets - 1, self.m_seed),
         not self.m_cleanup, data.testCV(self.numSets, self.numSets - 1))
     if self.pruneTheTree:
         self.prune()
     if self.m_cleanup:
         self.cleanup(Instances(data, 0))
示例#3
0
    def buildClassifier(self,data:Instances):
        self.getCapabilities().testWithFail(data)
        instances=Instances(data)
        instances.deleteWithMissingClass()

        self.m_NumClasses=instances.numClasses()
        self.m_ClassType=instances.classAttribute().type()
        self.m_Train=Instances(instances,0,instances.numInstances())
        #只保存了样本集
        if self.WindowSize > 0 and instances.numInstances() > self.WindowSize:
            self.m_Train=Instances(self.m_Train,self.m_Train.numInstances()-self.WindowSize,self.WindowSize)
        self.m_NumAttributesUsed=0
        for i in range(self.m_Train.numAttributes()):
            if i != self.m_Train.classIndex() and (self.m_Train.attribute(i).isNominal() or  self.m_Train.attribute(i).isNumeric()):
                self.m_NumAttributesUsed+=1
        self.m_NNSearch.setInstances(self.m_Train)
        self.m_kNNValid=False
        self.m_defaultModel=ZeroR()
        self.m_defaultModel.buildClassifier(instances)
示例#4
0
 def buildClassifier(self, data: Instances):
     self.m_ModelBuilt = False
     self.m_isZeroR = False
     if data.numInstances() == 1:
         self.m_Coefficients = [data.instance(0).classValue()]
         self.m_SelectedAttributes = [False] * data.numAttributes()
         self.m_isZeroR = True
         return
     if not self.m_checksTurnedOff:
         self.getCapabilities().testWithFail(data)
         if self.outputAdditionalStats:
             ok = True
             for i in range(data.numInstances()):
                 if data.instance(i).weight() != 1:
                     ok = False
                     break
             if not ok:
                 raise Exception(
                     "Can only compute additional statistics on unweighted data"
                 )
         data = Instances(data)
         data.deleteWithMissingClass()
         self.m_TransformFilter = NominalToBinary()
         self.m_TransformFilter.setInputFormat(data)
         data = Filter.useFilter(data, self.m_TransformFilter)
         self.m_MissingFilter = ReplaceMissingValues()
         self.m_MissingFilter.setInputFormat(data)
         data = Filter.useFilter(data, self.m_MissingFilter)
         data.deleteWithMissingClass()
     else:
         self.m_TransformFilter = None
         self.m_MissingFilter = None
     self.m_ClassIndex = data.classIndex()
     self.m_TransformedData = data
     self.m_Coefficients = None
     self.m_SelectedAttributes = [False] * data.numAttributes()
     self.m_Means = [0] * data.numAttributes()
     self.m_StdDevs = [0] * data.numAttributes()
     for j in range(data.numAttributes()):
         if j != self.m_ClassIndex:
             self.m_SelectedAttributes[j] = True
             self.m_Means[j] = data.meanOrMode(j)
             self.m_StdDevs[j] = math.sqrt(data.variance(j))
             if self.m_StdDevs[j] == 0:
                 self.m_SelectedAttributes[j] = False
     self.m_ClassStdDev = math.sqrt(
         data.variance(self.m_TransformedData.classIndex()))
     self.m_ClassMean = data.meanOrMode(self.m_TransformedData.classIndex())
     self.findBestModel()
     if self.outputAdditionalStats:
         k = 1
         for i in range(data.numAttributes()):
             if i != data.classIndex():
                 if self.m_SelectedAttributes[i]:
                     k += 1
         self.m_df = self.m_TransformedData.numInstances() - k
         se = self.calculateSE(self.m_SelectedAttributes,
                               self.m_Coefficients)
         self.m_RSquared = RegressionAnalysis.calculateRSquared(
             self.m_TransformedData, se)
         self.m_RSquaredAdj = RegressionAnalysis.calculateAdjRSquared(
             self.m_RSquared, self.m_TransformedData.numInstances(), k)
         self.m_FStat = RegressionAnalysis.calculateFStat(
             self.m_RSquared, self.m_TransformedData.numInstances(), k)
         self.m_StdErrorOfCoef = RegressionAnalysis.calculateStdErrorOfCoef(
             self.m_TransformedData, self.m_SelectedAttributes, se,
             self.m_TransformedData.numInstances(), k)
         self.m_TStats = RegressionAnalysis.calculateTStats(
             self.m_Coefficients, self.m_StdErrorOfCoef, k)
     if self.Minimal:
         self.m_TransformedData = None
         self.m_Means = None
         self.m_StdDevs = None
     else:
         self.m_TransformedData = Instances(data, 0)
     self.m_ModelBuilt = True
示例#5
0
 def buildClassifier(self, data: Instances):
     data = Instances(data)
     data.deleteWithMissingClass()
     self.buildTree(data, False)