def calculateLearningCurve(): classifier = classifierSelector.constructGradientBoostingClassifier() trainData = dataReader.getTrainData() # feature engineering trainData = featureExtractor.convertTargetFeatureToNumeric(trainData) xTrain, yTrain = featureExtractor.getRegularFeatures(trainData, True) trainSizes = np.linspace(100000,500000,5,dtype=int) plot_learning_curve(classifier,xTrain,yTrain,trainSizes)
def constructTrainingData(trainDataSize): #training data trainData = dataReader.getTrainData(trainDataSize) trainData = trainData.append(dataReader.getSuffixDataFrame()) # feature engineering trainData = regularFeatExtr.convertTargetFeatureToNumeric(trainData) xTrain, yTrain = regularFeatExtr.getRegularFeatures(trainData, True) return xTrain,yTrain
def trainClassifierOnTrainingDataReturnAll(numberOfTrainingExamples = -1): trainData = dataReader.getTrainData(numberOfTrainingExamples) # feature engineering trainData = featureExtractor.convertTargetFeatureToNumeric(trainData) xTrain, yTrain = featureExtractor.getRegularFeatures(trainData, True) # classifier training classifier = classifierSelector.trainClassifier(xTrain, yTrain) return classifier, xTrain, yTrain
def calculateValidationCurve(): classifier = classifierSelector.constructGradientBoostingClassifier() numberOfTrainData = 50000 trainData = dataReader.getTrainData(numberOfTrainData) # feature engineering trainData = featureExtractor.convertTargetFeatureToNumeric(trainData) xTrain, yTrain = featureExtractor.getRegularFeatures(trainData, True) paramRange = [0.1,0.13,0.16] plot_validation_curve(classifier,xTrain,yTrain,"learning_rate",paramRange)
def trainClassifierOnTrainingData(trainData=None, numberOfTrainingExamples = -1, margins=None): if trainData is None: trainData = dataReader.getTrainData(numberOfTrainingExamples,margins) # feature engineering trainData = regularFeatExtr.convertTargetFeatureToNumeric(trainData) xTrain, yTrain = regularFeatExtr.getRegularFeatures(trainData, True) # classifier training classifier = classifierSelector.trainClassifier(xTrain, yTrain) return classifier
def constructTestData(testData): testData = regularFeatExtr.convertTargetFeatureToNumeric(testData) xTest, yTest = regularFeatExtr.getRegularFeatures(testData, False) return xTest,yTest