def repeatTheLearningProcess(self, bestGridSearched, set): _, bestParams = self.getBestGridSearchedModel(bestGridSearched, set) if bestGridSearched.learnerType == 'ANN': if 'Income' in set.name: set.datasetNo = 1 elif 'Wine' in set.name: set.datasetNo = 2 bestGridSearched = ANN.ANNLearner(**bestParams, datasetNo=set.datasetNo) self.getLearningCurve(bestGridSearched, set) self.getComplexityCurve(bestGridSearched, set) return bestGridSearched
def repeatTheLearningProcess(self, bestGridSearched, set): _, bestParams = self.getBestGridSearchedModel(bestGridSearched, set) if bestGridSearched.learnerType == 'KNN': bestGridSearched = KNN.KNNLearner(**bestParams, datasetNo=set.datasetNo) elif bestGridSearched.learnerType == 'DT': bestGridSearched = DT.DTLearner(**bestParams, datasetNo=set.datasetNo) elif bestGridSearched.learnerType == 'SVM': bestGridSearched = SVM.SVMLearner(**bestParams, datasetNo=set.datasetNo) elif bestGridSearched.learnerType == 'Boosting': bestGridSearched = Boosting.BoostingLearner( **bestParams, datasetNo=set.datasetNo) elif bestGridSearched.learnerType == 'ANN': bestGridSearched = ANN.ANNLearner(**bestParams, datasetNo=set.datasetNo) self.getLearningCurve(bestGridSearched, set) self.getComplexityCurve(bestGridSearched, set) return bestGridSearched
def getLearningCurveAll(self, Datasets, isWithClusters=False): color = ['red', 'blue', 'green', 'orange', 'purple'] cntr = 0 for set in Datasets: # start the third learner ANN if 'Income' in set.name: set.datasetNo = 1 if isWithClusters == False: thisSetName = 'Income' else: thisSetName = 'Income, 2' annBestGridSearched = ANN.ANNLearner(datasetNo=set.datasetNo, hidden_layer_sizes=(11, )) elif 'Wine' in set.name: set.datasetNo = 2 if isWithClusters == False: thisSetName = 'Wine' else: thisSetName = 'Wine, 2' annBestGridSearched = ANN.ANNLearner(datasetNo=set.datasetNo, hidden_layer_sizes=(15, )) x, train_mean, train_std, test_mean, test_std = self.getLearningCurve( annBestGridSearched, set) score = self.scoreTestingSet(annBestGridSearched, set) plt.plot(x, train_mean, label=set.name, marker='o', color=color[cntr % 5]) plt.fill_between(x, train_mean - train_std, train_mean + train_std, alpha=0.25) plt.plot(x, test_mean, marker='o', color=color[cntr % 5]) plt.fill_between(x, test_mean - test_std, test_mean + test_std, alpha=0.25) cntr += 1 plt.style.use('seaborn-whitegrid') plt.ylabel('Score', fontsize=12) if thisSetName == 'Income': plt.ylim(0.5, 1.05) else: plt.ylim(0.0, 1.05) plt.xlabel('Training set size', fontsize=12) plt.title('Learning curves for all ' + thisSetName.split(',')[0] + ' Datasets', fontsize=12, y=1.03) plt.legend() plt.savefig('Figures/ANN/All-Learning-Curve, Dataset ' + thisSetName + '.png') plt.close() cntr = 0 for set in Datasets: # start the third learner ANN if 'Income' in set.name: set.datasetNo = 1 elif 'Wine' in set.name: set.datasetNo = 2 annBestGridSearched = ANN.ANNLearner(datasetNo=set.datasetNo) time, train_time = self.getTrainingTimeCurve( annBestGridSearched, set) score = self.scoreTestingSet(annBestGridSearched, set) print("Testing Score for " + set.name + " is: ", score) plt.plot(time, train_time, label=set.name, marker='o', color=color[cntr % 5]) cntr += 1 plt.style.use('seaborn-whitegrid') plt.ylabel('Score', fontsize=12) plt.xlabel('Number of Training Samples', fontsize=12) plt.title('Training Time for all ' + thisSetName.split(',')[0] + ' Datasets', fontsize=12, y=1.03) plt.legend() plt.savefig('Figures/ANN/All-Training-Time-Curve, Dataset ' + thisSetName + '.png') plt.close()