def ExecuteEvaluationRun(runSpecification, xTrainRaw, yTrain, numberOfFolds=2): print("runSpecification: ", runSpecification) startTime = time.time() # HERE upgrade this to use crossvalidation featurizer = SMSSpamFeaturize.SMSSpamFeaturize() featurizer.CreateVocabulary( xTrainRaw, yTrain, numFrequentWords=runSpecification['numFrequentWords'], numMutualInformationWords=runSpecification['numMutualInformationWords'] ) xTrain = featurizer.Featurize(xTrainRaw) xValidate = featurizer.Featurize(xValidateRaw) if numberOfFolds > 1: crossValidationAccuracy = [] for i in range(numberOfFolds): xTrainI, yTrainI, xEvaluateI, yEvaluateI = CrossValidation.CrossValidation( xTrain, yTrain, numberOfFolds, i) model = LogisticRegression.LogisticRegression() model.fit(xTrainI, yTrainI, convergence=runSpecification['convergence'], stepSize=runSpecification['stepSize'], verbose=False) crossValidationAccuracy.append( EvaluateBinaryClassification.Accuracy( yEvaluateI, model.predict(xEvaluateI))) mean = np.mean(crossValidationAccuracy) runSpecification['crossValidationMean'] = mean lower, _ = ErrorBounds.GetAccuracyBounds( np.mean(crossValidationAccuracy), len(yEvaluateI), .5) runSpecification['crossValidationErrorBound'] = mean - lower if numberOfFolds == 1: model = LogisticRegression.LogisticRegression() model.fit(xTrain, yTrain, convergence=runSpecification['convergence'], stepSize=runSpecification['stepSize'], verbose=False) validationSetAccuracy = EvaluateBinaryClassification.Accuracy( yValidate, model.predict(xValidate)) runSpecification['accuracy'] = validationSetAccuracy lower, _ = ErrorBounds.GetAccuracyBounds(validationSetAccuracy, len(yValidate), .5) runSpecification['accuracyErrorBound'] = validationSetAccuracy - lower endTime = time.time() if numberOfFolds > 1: runSpecification['runtime'] = endTime - startTime return runSpecification
def ExecuteEvaluationRun(runSpecification, xTrain, yTrain, numberOfFolds=2): print("runSpecification: ", runSpecification) startTime = time.time() if numberOfFolds > 1: crossValidationAccuracy = [] for i in range(numberOfFolds): xTrainI, yTrainI, xEvaluateI, yEvaluateI = CrossValidation.CrossValidation( xTrain, yTrain, numberOfFolds, i) model = DecisionTree.DecisionTree() model.fit(xTrainI, yTrainI, maxDepth=runSpecification["maxDepth"]) crossValidationAccuracy.append( EvaluateBinaryClassification.Accuracy( yEvaluateI, model.predict(xEvaluateI))) mean = np.mean(crossValidationAccuracy) runSpecification['crossValidationMean'] = mean lower, _ = ErrorBounds.GetAccuracyBounds( np.mean(crossValidationAccuracy), len(yEvaluateI), .95) runSpecification['crossValidationErrorBound'] = mean - lower if numberOfFolds == 1: model = DecisionTree.DecisionTree() model.fit(xTrain, yTrain, maxDepth=runSpecification["maxDepth"]) validationSetAccuracy = EvaluateBinaryClassification.Accuracy( yValidate, model.predict(xValidate)) runSpecification['accuracy'] = validationSetAccuracy lower, _ = ErrorBounds.GetAccuracyBounds(validationSetAccuracy, len(yValidate), .95) runSpecification[ 'accuracyErrorBound'] = validationSetAccuracy - lower runSpecification['crossValidationMean'] = validationSetAccuracy runSpecification[ 'crossValidationErrorBound'] = validationSetAccuracy - lower endTime = time.time() runSpecification['runtime'] = endTime - startTime return runSpecification
def ExecuteEvaluationRun(runSpecification, xTrain, yTrain, numberOfFolds=2): print("runSpecification: ", runSpecification) startTime = time.time() if numberOfFolds > 1: crossValidationAccuracy = [] for i in range(numberOfFolds): xTrainI, yTrainI, xEvaluateI, yEvaluateI = CrossValidation.CrossValidation( xTrain, yTrain, numberOfFolds, i) model = LogisticRegression.LogisticRegression() model.fit(xTrainI, yTrainI, convergence=runSpecification['convergence'], stepSize=runSpecification['stepSize'], verbose=False) crossValidationAccuracy.append( EvaluateBinaryClassification.Accuracy( yEvaluateI, model.predict(xEvaluateI))) mean = np.mean(crossValidationAccuracy) runSpecification['crossValidationMean'] = mean lower, _ = ErrorBounds.GetAccuracyBounds( np.mean(crossValidationAccuracy), len(yEvaluateI), .5) runSpecification['crossValidationErrorBound'] = mean - lower if numberOfFolds == 1: model = LogisticRegression.LogisticRegression() model.fit(xTrain, yTrain, convergence=runSpecification['convergence'], stepSize=runSpecification['stepSize'], verbose=False) validationSetAccuracy = EvaluateBinaryClassification.Accuracy( yValidate, model.predict(xValidate)) runSpecification['accuracy'] = validationSetAccuracy lower, _ = ErrorBounds.GetAccuracyBounds(validationSetAccuracy, len(yValidate), .5) runSpecification['accuracyErrorBound'] = validationSetAccuracy - lower endTime = time.time() if numberOfFolds > 1: runSpecification['runtime'] = endTime - startTime return runSpecification
import MachineLearningCourse.MLUtilities.Visualizations.Charting as Charting xValues = [i + 1 for i in range(len(trainLosses))] Charting.PlotSeries([trainLosses, validationLosses], ["Train Loss", "Validate Loss"], xValues, useMarkers=False, chartTitle="Pytorch First Modeling Run", xAxisTitle="Epoch", yAxisTitle="Loss", yBotLimit=0.0, outputDirectory=kOutputDirectory, fileName="PyTorch-Initial-TrainValidate") ## # Evaluate the Model ## import MachineLearningCourse.MLUtilities.Evaluations.EvaluateBinaryClassification as EvaluateBinaryClassification import MachineLearningCourse.MLUtilities.Evaluations.ErrorBounds as ErrorBounds model.train(mode=False) yTestPredicted = model(xTest) testAccuracy = EvaluateBinaryClassification.Accuracy( yTest, [1 if pred > 0.5 else 0 for pred in yTestPredicted]) print("Accuracy simple:", testAccuracy, ErrorBounds.GetAccuracyBounds(testAccuracy, len(yTestPredicted), 0.95))
def ExecuteFitting(runSpecification, xTrain, yTrain, xValidate, yValidate): startTime = time.time() # Create features and train based on type of model # Create the model model = BlinkNeuralNetwork.BlinkNeuralNetwork(hiddenNodes = 6, hiddenNodesTwo = 4) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("Device is:", device) model.to(device) # Move the data onto whichever device was selected xTrain = xTrain.to(device) yTrain = yTrain.to(device) xValidate = xValidate.to(device) yValidate = yValidate.to(device) converged = False epoch = 1 lastLoss = None convergence = runSpecification['convergence'] optimizer = torch.optim.SGD(model.parameters(), lr=runSpecification['learning_rate']) lossFunction = torch.nn.MSELoss(reduction='mean') patience = 0 while not converged and epoch < 5000: # Do the forward pass yTrainPredicted = model(xTrain) trainLoss = lossFunction(yTrainPredicted, yTrain) # Reset the gradients in the network to zero optimizer.zero_grad() # Backprop the errors from the loss on this iteration trainLoss.backward() # Do a weight update step optimizer.step() loss = trainLoss.item() # print(loss) if epoch > 10 and lastLoss != None and abs(lastLoss - loss) < convergence: if patience >= 0: converged = True pass else: patience += 1 else: lastLoss = loss patience = 0 epoch = epoch + 1 model.train(mode=True) endTime = time.time() runSpecification['runtime'] = endTime - startTime runSpecification['epoch'] = epoch yValidatePredicted = model(xValidate) validAccuracy = EvaluateBinaryClassification.Accuracy(yValidate, [ 1 if pred > 0.5 else 0 for pred in yValidatePredicted ]) runSpecification['accuracy'] = validAccuracy num_samples = len(xValidate) (low_bound, high_bound) = ErrorBounds.GetAccuracyBounds(validAccuracy, num_samples, 0.5) errorBound = (high_bound - low_bound) / 2 runSpecification['50PercentBound'] = errorBound return runSpecification
## this code outputs the true concept. visualize = Visualize2D.Visualize2D(kOutputDirectory, "4-Generated Concept") visualize.Plot2DDataAndBinaryConcept(xTest,yTest,concept) visualize.Save() bestModel = None kValues = [1, 10, 25, 50, 100] maxDepth = 1 accuracies = [] errorBarsAccuracy = [] for kv in kValues: model = BoostedTree.BoostedTree() model.fit(xTrain, yTrain, maxDepth=maxDepth, k=kv) accuracy = EvaluateBinaryClassification.Accuracy(yTest, model.predict(xTest)) lower, upper = ErrorBounds.GetAccuracyBounds(accuracy, len(yTest), .5) print(kv, ": ", accuracy) accuracies.append(accuracy) errorBarsAccuracy.append(accuracy-lower) if bestModel is None: bestModel = (model, upper) elif lower > bestModel[1]: bestModel = (model, upper) Charting.PlotSeriesWithErrorBars([accuracies], [errorBarsAccuracy], ["k-round tuning accuracy"], kValues, chartTitle="Line/Circle Concept Accuracy", xAxisTitle="Boosting Rounds", yAxisTitle="Test Accuracy", yBotLimit=0.5, outputDirectory=kOutputDirectory, fileName="4-BoostingTreeRoundTuning") ## you can use this to visualize what your model is learning. accuracy = EvaluateBinaryClassification.Accuracy(yTest, bestModel[0].predict(xTest)) lower, upper = ErrorBounds.GetAccuracyBounds(accuracy, len(yTest), .95) print("accuracy: ", lower, "-", upper) visualize = Visualize2D.Visualize2D(kOutputDirectory, "4-My Boosted Tree")
xTest = featurizer.Featurize(xTestRaw) for i in range(10): print("%d - " % (yTrain[i]), xTrain[i]) ############################ import MachineLearningCourse.MLUtilities.Evaluations.EvaluateBinaryClassification as EvaluateBinaryClassification import MachineLearningCourse.MLUtilities.Evaluations.ErrorBounds as ErrorBounds import MachineLearningCourse.MLUtilities.Learners.MostCommonClassModel as MostCommonClassModel model = MostCommonClassModel.MostCommonClassModel() model.fit(xTrain, yTrain) yValidatePredicted = model.predict(xValidate) validateAccuracy = EvaluateBinaryClassification.Accuracy( yValidate, yValidatePredicted) errorBounds = ErrorBounds.GetAccuracyBounds(validateAccuracy, len(yValidate), 0.95) print() print( "### 'Most Common Class' model validate set accuracy: %.4f (95%% %.4f - %.4f)" % (validateAccuracy, errorBounds[0], errorBounds[1])) import MachineLearningCourse.MLUtilities.Data.CrossValidation as CrossValidation import MachineLearningCourse.MLUtilities.Learners.LogisticRegression as LogisticRegression import MachineLearningCourse.MLUtilities.Visualizations.Charting as Charting import time import numpy as np ## This helper function should execute a single run and save the results on 'runSpecification' (which could be a dictionary for convienience) # for later tabulation and charting...
convergence=convergence, stepSize=stepSize, verbose=True) ###### ### Use equation 5.1 from Mitchell to bound the validation set error and the true error import MachineLearningCourse.MLUtilities.Evaluations.ErrorBounds as ErrorBounds print("Logistic regression with 25 features by mutual information:") validationSetAccuracy = EvaluateBinaryClassification.Accuracy( yValidate, frequentModel.predict(xValidate)) print("Validation set accuracy: %.4f." % (validationSetAccuracy)) for confidence in [.5, .8, .9, .95, .99]: (lowerBound, upperBound) = ErrorBounds.GetAccuracyBounds(validationSetAccuracy, len(xValidate), confidence) print(" %.2f%% accuracy bound: %.4f - %.4f" % (confidence, lowerBound, upperBound)) ### Compare to most common class model here... mostCommonModel = MostCommonClassModel.MostCommonClassModel() mostCommonModel.fit(xTrain, yTrain) print("MostCommon regression model:") validationSetAccuracy = EvaluateBinaryClassification.Accuracy( yValidate, mostCommonModel.predict(xValidate)) print("Validation set accuracy: %.4f." % (validationSetAccuracy)) for confidence in [.5, .8]: (lowerBound, upperBound) = ErrorBounds.GetAccuracyBounds(validationSetAccuracy,
xTrain = featurizer.Featurize(xTrainRaw) xValidate = featurizer.Featurize(xValidateRaw) xTest = featurizer.Featurize(xTestRaw) bestModelBT = None kValues = [1, 10, 50, 100, 150] maxDepth = 1 validationAccuracies = [] validationAccuracyErrorBounds = [] trainingAccuracies = [] trainingAccuracyErrorBounds = [] for kv in kValues: model = BoostedTree.BoostedTree() model.fit(xTrain, yTrain, maxDepth=maxDepth, k=kv) validationAccuracy = EvaluateBinaryClassification.Accuracy(yValidate, model.predict(xValidate)) lower, upper = ErrorBounds.GetAccuracyBounds(validationAccuracy, len(yValidate), .5) trainingAccuracy = EvaluateBinaryClassification.Accuracy(yTrain, model.predict(xTrain)) lowerTrain, upperTrain = ErrorBounds.GetAccuracyBounds(trainingAccuracy, len(yTrain), .5) validationAccuracies.append(validationAccuracy) validationAccuracyErrorBounds.append(validationAccuracy-lower) trainingAccuracies.append(trainingAccuracy) trainingAccuracyErrorBounds.append(trainingAccuracy-lowerTrain) print("k: ", kv, " accuracy: ", lower, "-", upper) if bestModelBT is None: bestModelBT = (model, lower, upper, kv) elif lower > bestModelBT[2]: bestModelBT = (model, lower, upper, kv) print("boosted tree - k-rounds: ", bestModelBT[3], " accuracy: ", bestModelBT[1], "-", bestModelBT[2])
seriesFPRs = [] seriesFNRs = [] seriesLabels = [] errorImages = {} for i in range(num_trials): errorImages[i] = [] model = BlinkNeuralNetwork.LeNet(imageSize=xTrain[0].shape[1], convFilters=[(12, 6), (18, 5)], fcLayers=[20, 10]) model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=step) acc = trainModel(model, optimizer, maxEpoch, 25, "LossEpochFinal12-6-x-x-20-10-{}".format(i)) lower, _ = ErrorBounds.GetAccuracyBounds(acc, len(yValidate), 0.95) validationAccuracyResults.append((acc, acc - lower)) yTestPredicted = model(xTest) testAccuracy = EvaluateBinaryClassification.Accuracy( yTest, [1 if pred > 0.5 else 0 for pred in yTestPredicted]) lowerTest, _ = ErrorBounds.GetAccuracyBounds(testAccuracy, len(yTest), 0.95) testAccuracyResults.append((testAccuracy, testAccuracy - lowerTest)) yTestPredict = [1 if pred > 0.5 else 0 for pred in yTestPredicted] for j in range(len(yTest)): if int(yTest[j]) != yTestPredict[j]: errorImages[i].append(xTestRaw[j]) (modelFPRs, modelFNRs,
model.to(device) xTrain = xTrain.to(device) yTrain = yTrain.to(device) xValidate = xValidate.to(device) yValidate = yValidate.to(device) model.train_model_persample(xTrain, yTrain) # model.train_model(xTrain, yTrain) print("Accuracy and Error Bounds:") yValidatePredicted = model.predict(xValidate) validAccuracy = EvaluateBinaryClassification.Accuracy(yValidate, yValidatePredicted) print(validAccuracy) num_samples = len(xValidate) (low_bound, high_bound) = ErrorBounds.GetAccuracyBounds(validAccuracy, num_samples, 0.5) errorBound = (high_bound - low_bound) / 2 print(errorBound) # learning_error_series = [] # learning_valid_series = [] # learning_series = [] # converg_error_series = [] # converg_valid_series = [] # converg_series = [] # # log_convert = {0.1: 0, 0.01: 1, 0.001: 2, 0.0001: 3, 0.00001: 4} # Charting.PlotSeriesWithErrorBars([converg_valid_series], [converg_error_series], ["Accuracy"], [converg_series], chartTitle="<NN Accuracy on Validation Data>", xAxisTitle="<converg>", yAxisTitle="<Accuracy>", yBotLimit=0.65, outputDirectory=kOutputDirectory, fileName="converg_sweep") # Charting.PlotSeriesWithErrorBars([learning_valid_series], [learning_error_series], ["Accuracy"], [learning_series], chartTitle="<NN Accuracy on Validation Data>", xAxisTitle="<learning>", yAxisTitle="<Accuracy>", yBotLimit=0.65, outputDirectory=kOutputDirectory, fileName="learning_sweep")