def buildClassifier(self, data: Instances): data = Instances(data) data.deleteWithMissingClass() self.buildTree(data, self.m_subtreeRaising or not self.m_cleanup) if self.m_collapseTheTree: self.collapse() if self.m_pruneTheTree: self.prune() if self.m_cleanup: self.cleanup(Instances(data, 0))
def buildClassifier(self, data: Instances): data = Instances(data) data.deleteWithMissingClass() data.stratify(self.numSets) self.buildTree( data.trainCV(self.numSets, self.numSets - 1, self.m_seed), not self.m_cleanup, data.testCV(self.numSets, self.numSets - 1)) if self.pruneTheTree: self.prune() if self.m_cleanup: self.cleanup(Instances(data, 0))
def buildClassifier(self,data:Instances): self.getCapabilities().testWithFail(data) instances=Instances(data) instances.deleteWithMissingClass() self.m_NumClasses=instances.numClasses() self.m_ClassType=instances.classAttribute().type() self.m_Train=Instances(instances,0,instances.numInstances()) #只保存了样本集 if self.WindowSize > 0 and instances.numInstances() > self.WindowSize: self.m_Train=Instances(self.m_Train,self.m_Train.numInstances()-self.WindowSize,self.WindowSize) self.m_NumAttributesUsed=0 for i in range(self.m_Train.numAttributes()): if i != self.m_Train.classIndex() and (self.m_Train.attribute(i).isNominal() or self.m_Train.attribute(i).isNumeric()): self.m_NumAttributesUsed+=1 self.m_NNSearch.setInstances(self.m_Train) self.m_kNNValid=False self.m_defaultModel=ZeroR() self.m_defaultModel.buildClassifier(instances)
def buildClassifier(self, data: Instances): self.m_ModelBuilt = False self.m_isZeroR = False if data.numInstances() == 1: self.m_Coefficients = [data.instance(0).classValue()] self.m_SelectedAttributes = [False] * data.numAttributes() self.m_isZeroR = True return if not self.m_checksTurnedOff: self.getCapabilities().testWithFail(data) if self.outputAdditionalStats: ok = True for i in range(data.numInstances()): if data.instance(i).weight() != 1: ok = False break if not ok: raise Exception( "Can only compute additional statistics on unweighted data" ) data = Instances(data) data.deleteWithMissingClass() self.m_TransformFilter = NominalToBinary() self.m_TransformFilter.setInputFormat(data) data = Filter.useFilter(data, self.m_TransformFilter) self.m_MissingFilter = ReplaceMissingValues() self.m_MissingFilter.setInputFormat(data) data = Filter.useFilter(data, self.m_MissingFilter) data.deleteWithMissingClass() else: self.m_TransformFilter = None self.m_MissingFilter = None self.m_ClassIndex = data.classIndex() self.m_TransformedData = data self.m_Coefficients = None self.m_SelectedAttributes = [False] * data.numAttributes() self.m_Means = [0] * data.numAttributes() self.m_StdDevs = [0] * data.numAttributes() for j in range(data.numAttributes()): if j != self.m_ClassIndex: self.m_SelectedAttributes[j] = True self.m_Means[j] = data.meanOrMode(j) self.m_StdDevs[j] = math.sqrt(data.variance(j)) if self.m_StdDevs[j] == 0: self.m_SelectedAttributes[j] = False self.m_ClassStdDev = math.sqrt( data.variance(self.m_TransformedData.classIndex())) self.m_ClassMean = data.meanOrMode(self.m_TransformedData.classIndex()) self.findBestModel() if self.outputAdditionalStats: k = 1 for i in range(data.numAttributes()): if i != data.classIndex(): if self.m_SelectedAttributes[i]: k += 1 self.m_df = self.m_TransformedData.numInstances() - k se = self.calculateSE(self.m_SelectedAttributes, self.m_Coefficients) self.m_RSquared = RegressionAnalysis.calculateRSquared( self.m_TransformedData, se) self.m_RSquaredAdj = RegressionAnalysis.calculateAdjRSquared( self.m_RSquared, self.m_TransformedData.numInstances(), k) self.m_FStat = RegressionAnalysis.calculateFStat( self.m_RSquared, self.m_TransformedData.numInstances(), k) self.m_StdErrorOfCoef = RegressionAnalysis.calculateStdErrorOfCoef( self.m_TransformedData, self.m_SelectedAttributes, se, self.m_TransformedData.numInstances(), k) self.m_TStats = RegressionAnalysis.calculateTStats( self.m_Coefficients, self.m_StdErrorOfCoef, k) if self.Minimal: self.m_TransformedData = None self.m_Means = None self.m_StdDevs = None else: self.m_TransformedData = Instances(data, 0) self.m_ModelBuilt = True
def buildClassifier(self, data: Instances): data = Instances(data) data.deleteWithMissingClass() self.buildTree(data, False)