def threadClassifierRun(self): try: self.m_CEPanel.addToHistory() inst = Instances(self.m_Instances) trainTimeStart = trainTimeElapsed = testTimeStart = testTimeElapsed = 0 userTestStructure = None if self.m_SetTestFrame is not None: userTestStructure = deepcopy( self.m_SetTestFrame.getInstances()) #type:Instances userTestStructure.setClassIndex(self.m_TestClassIndex) #默认outputmodel,output per-class stats,output confusion matrix,store predictions for visualization #outputPredictionsText=None numFolds = 10 classIndex = self.m_ClassCombo.currentIndex() inst.setClassIndex(classIndex) classifier = self.m_ClassifierEditor.getValue() #type:Classifier name = time.strftime("%H:%M:%S - ") outPutResult = "" evaluation = None #type:Evaluation grph = None if self.m_CVBut.isChecked(): testMode = 1 numFolds = int(self.m_CVText.text()) if numFolds <= 1: raise Exception("Number of folds must be greater than 1") elif self.m_TrainBut.isChecked(): testMode = 2 elif self.m_TestSplitBut.isChecked(): testMode = 3 # if source is None: # raise Exception("No user test set has been specified") if not inst.equalHeaders(userTestStructure): QMessageBox.critical(self.m_Explorer, "错误", "测试数据集属性不同") else: raise Exception("Unknown test mode") cname = classifier.__module__ if cname.startswith("classifiers."): name += cname[len("classifiers."):] else: name += cname cmd = classifier.__module__ # if isinstance(classifier,OptionHandler): # cmd+=" "+Utils.joinOptions(classifier.getOptions()) plotInstances = ClassifierErrorsPlotInstances() plotInstances.setInstances(userTestStructure if testMode == 4 else inst) plotInstances.setClassifier(classifier) plotInstances.setClassIndex(inst.classIndex()) plotInstances.setPointSizeProportionalToMargin(False) outPutResult += "=== Run information ===\n\n" outPutResult += "Scheme: " + cname # if isinstance(classifier,OptionHandler): # o=classifier.getOptions() # outPutResult+=" "+Utils.joinOptions(o) outPutResult += "\n" outPutResult += "Relation: " + inst.relationName() + '\n' outPutResult += "Instances: " + str(inst.numInstances()) + '\n' outPutResult += "Attributes: " + str(inst.numAttributes()) + '\n' if inst.numAttributes() < 100: for i in range(inst.numAttributes()): outPutResult += " " + inst.attribute( i).name() + '\n' else: outPutResult += " [list of attributes omitted]\n" outPutResult += "Test mode: " if testMode == 1: outPutResult += str(numFolds) + "-fold cross-validation\n" elif testMode == 2: outPutResult += "evaluate on training data\n" elif testMode == 3: outPutResult += "user supplied test set: " + str( userTestStructure.numInstances()) + " instances\n" outPutResult += "\n" self.m_History.addResult(name, outPutResult) self.m_History.setSingle(name) if testMode == 2 or testMode == 3: trainTimeStart = time.time() classifier.buildClassifier(inst) trainTimeElapsed = time.time() - trainTimeStart outPutResult += "=== Classifier model (full training set) ===\n\n" outPutResult += str(classifier) + "\n" outPutResult += "\nTime taken to build model: " + Utils.doubleToString( trainTimeElapsed, 2) + " seconds\n\n" self.m_History.updateResult(name, outPutResult) if isinstance(classifier, Drawable): grph = classifier.graph() print("==========update Compelte=================") if testMode == 2: evaluation = Evaluation(inst) evaluation = self.setupEval(evaluation, classifier, inst, plotInstances, False) evaluation.setMetricsToDisplay(self.m_selectedEvalMetrics) plotInstances.setUp() testTimeStart = time.time() #TODO # if isinstance(classifier,BatchPredictor) # else: for jj in range(inst.numInstances()): plotInstances.process(inst.instance(jj), classifier, evaluation) testTimeElapsed = time.time() - testTimeStart outPutResult += "=== Evaluation on training set ===\n" elif testMode == 1: rnd = 1 inst.randomize(rnd) if inst.attribute(classIndex).isNominal(): inst.stratify(numFolds) evaluation = Evaluation(inst) evaluation = self.setupEval(evaluation, classifier, inst, plotInstances, False) evaluation.setMetricsToDisplay(self.m_selectedEvalMetrics) plotInstances.setUp() for fold in range(numFolds): train = inst.trainCV(numFolds, fold, rnd) evaluation = self.setupEval(evaluation, classifier, train, plotInstances, True) evaluation.setMetricsToDisplay(self.m_selectedEvalMetrics) current = deepcopy(classifier) current.buildClassifier(train) test = inst.testCV(numFolds, fold) # TODO # if isinstance(classifier,BatchPredictor) # else: for jj in range(test.numInstances()): plotInstances.process(test.instance(jj), current, evaluation) if inst.attribute(classIndex).isNominal(): outPutResult += "=== Stratified cross-validation ===\n" else: outPutResult += "=== Cross-validation ===\n" elif testMode == 3: evaluation = Evaluation(inst) evaluation = self.setupEval(evaluation, classifier, inst, plotInstances, False) plotInstances.setInstances(userTestStructure) evaluation.setMetricsToDisplay(self.m_selectedEvalMetrics) plotInstances.setUp() # TODO # if isinstance(classifier,BatchPredictor) testTimeStart = time.time() for i in range(userTestStructure.numInstances()): instance = userTestStructure.instance(i) # if isinstance(classifier,BatchPredictor) #else plotInstances.process(instance, classifier, evaluation) # if isinstance(classifier,BatchPredictor) testTimeElapsed = time.time() - testTimeStart outPutResult += "=== Evaluation on test set ===\n" if testMode != 1: mode = "" if testMode == 2: mode = "training data" elif testMode == 3: mode = "supplied test set" outPutResult += "\nTime taken to test model on " + mode + ": " + Utils.doubleToString( testTimeElapsed, 2) + " seconds\n\n" outPutResult += evaluation.toSummaryString(False) + '\n' self.m_History.updateResult(name, outPutResult) if inst.attribute(classIndex).isNominal(): outPutResult += evaluation.toClassDetailsString() + '\n' outPutResult += evaluation.toMatrixString() + '\n' self.m_History.updateResult(name, outPutResult) Utils.debugOut(outPutResult) if (plotInstances is not None and plotInstances.canPlot(False)): visName = name + " (" + inst.relationName() + ")" pl2d = plotInstances.getPlotData(cname) plotInstances.cleanUp() vv = [] trainHeader = Instances(self.m_Instances, 0) trainHeader.setClassIndex(classIndex) vv.append(trainHeader) if grph is not None: vv.append(grph) if evaluation is not None and evaluation.predictions( ) is not None: vv.append(evaluation.predictions()) vv.append(inst.classAttribute()) self.history_add_visualize_signal.emit(name, vv, visName, pl2d) except Exception as e: self.error_diglog_signal.emit(str(e)) self.mutex.lock() self.m_StartBut.setEnabled(True) self.m_StopBut.setEnabled(False) self.m_RunThread = None self.mutex.unlock() print("RunFinished")
def clusterRunThread(self): self.m_CLPanel.addToHistory() inst = Instances(self.m_Instances) inst.setClassIndex(-1) plotInstances = ClustererAssignmentsPlotInstances() plotInstances.setClusterer(self.m_ClustererEditor.getValue()) userTest = None if self.m_SetTestFrame is not None: if self.m_SetTestFrame.getInstances() is not None: userTest = Instances(self.m_SetTestFrame.getInstances()) clusterer = self.m_ClustererEditor.getValue() outBuff = "" name = time.strftime("%H:%M:%S - ") cname = clusterer.__module__ if cname.startswith("clusterers."): name += cname[len("clusterers."):] else: name += cname if self.m_TrainBut.isChecked(): testMode = 0 elif self.m_TestSplitBut.isChecked(): testMode = 1 if userTest is None: raise Exception("No user test set has been opened") if not inst.equalHeaders(userTest): raise Exception("Train and test set are not compatible\n" + inst.equalHeadersMsg(userTest)) else: raise Exception("Unknown test mode") trainInst = Instances(inst) outBuff += "=== Run information ===\n\n" outBuff += "Scheme: " + cname outBuff += "\n" outBuff += "Relation: " + inst.relationName() + '\n' outBuff += "Instances: " + str(inst.numInstances()) + '\n' outBuff += "Attributes: " + str(inst.numAttributes()) + '\n' if inst.numAttributes() < 100: for i in range(inst.numAttributes()): outBuff += " " + inst.attribute(i).name() + '\n' else: outBuff += " [list of attributes omitted]\n" outBuff += "Test mode: " if testMode == 0: outBuff += "evaluate on training data\n" elif testMode == 1: "user supplied test set: " + str( userTest.numInstances()) + " instances\n" outBuff += '\n' self.m_History.addResult(name, outBuff) self.m_History.setSingle(name) trainTimeStart = time.time() if isinstance(clusterer, Clusterer): clusterer.buildClusterer(self.removeClass(trainInst)) trainTimeElapsed = time.time() - trainTimeStart outBuff += "\n=== Clustering model (full training set) ===\n\n" outBuff += str(clusterer) + '\n' outBuff+="\nTime taken to build model (full training data) : " \ + Utils.doubleToString(trainTimeElapsed, 2)\ + " seconds\n\n" self.m_History.updateResult(name, outBuff) evaluation = ClusterEvaluation() evaluation.setClusterer(clusterer) if testMode == 0: evaluation.evaluateClusterer(trainInst, False) plotInstances.setInstances(inst) plotInstances.setClusterEvaluation(evaluation) outBuff += "=== Model and evaluation on training set ===\n\n" elif testMode == 1: userTestT = Instances(userTest) evaluation.evaluateClusterer(userTestT, False) plotInstances.setInstances(userTest) plotInstances.setClusterEvaluation(evaluation) outBuff += "=== Evaluation on test set ===\n" else: raise Exception("Test mode not implemented") outBuff += evaluation.clusterResultsToString() outBuff += '\n' self.m_History.updateResult(name, outBuff) if plotInstances is not None and plotInstances.canPlot(True): visName = name + " (" + inst.relationName() + ")" pl2d = plotInstances.getPlotData(name) plotInstances.cleanUp() vv = [] trainHeader = Instances(self.m_Instances, 0) vv.append(trainHeader) self.history_add_visualize_signal.emit(name, vv, visName, pl2d) self.m_RunThread = None self.m_StartBut.setEnabled(True) self.m_StopBut.setEnabled(False) # Utils.debugOut(outBuff) print("Run Finished")