def ExecuteEvaluationRun(runSpecification, xTrainRaw, yTrain, numberOfFolds=2):
    print("runSpecification: ", runSpecification)
    startTime = time.time()

    # HERE upgrade this to use crossvalidation
    featurizer = SMSSpamFeaturize.SMSSpamFeaturize()
    featurizer.CreateVocabulary(
        xTrainRaw,
        yTrain,
        numFrequentWords=runSpecification['numFrequentWords'],
        numMutualInformationWords=runSpecification['numMutualInformationWords']
    )

    xTrain = featurizer.Featurize(xTrainRaw)
    xValidate = featurizer.Featurize(xValidateRaw)

    if numberOfFolds > 1:
        crossValidationAccuracy = []
        for i in range(numberOfFolds):
            xTrainI, yTrainI, xEvaluateI, yEvaluateI = CrossValidation.CrossValidation(
                xTrain, yTrain, numberOfFolds, i)

            model = LogisticRegression.LogisticRegression()
            model.fit(xTrainI,
                      yTrainI,
                      convergence=runSpecification['convergence'],
                      stepSize=runSpecification['stepSize'],
                      verbose=False)

            crossValidationAccuracy.append(
                EvaluateBinaryClassification.Accuracy(
                    yEvaluateI, model.predict(xEvaluateI)))

        mean = np.mean(crossValidationAccuracy)
        runSpecification['crossValidationMean'] = mean
        lower, _ = ErrorBounds.GetAccuracyBounds(
            np.mean(crossValidationAccuracy), len(yEvaluateI), .5)
        runSpecification['crossValidationErrorBound'] = mean - lower

    if numberOfFolds == 1:
        model = LogisticRegression.LogisticRegression()
        model.fit(xTrain,
                  yTrain,
                  convergence=runSpecification['convergence'],
                  stepSize=runSpecification['stepSize'],
                  verbose=False)
        validationSetAccuracy = EvaluateBinaryClassification.Accuracy(
            yValidate, model.predict(xValidate))

        runSpecification['accuracy'] = validationSetAccuracy
        lower, _ = ErrorBounds.GetAccuracyBounds(validationSetAccuracy,
                                                 len(yValidate), .5)
        runSpecification['accuracyErrorBound'] = validationSetAccuracy - lower

    endTime = time.time()
    if numberOfFolds > 1:
        runSpecification['runtime'] = endTime - startTime

    return runSpecification
Exemplo n.º 2
0
    def ExecuteEvaluationRun(runSpecification,
                             xTrain,
                             yTrain,
                             numberOfFolds=2):
        print("runSpecification: ", runSpecification)
        startTime = time.time()

        if numberOfFolds > 1:
            crossValidationAccuracy = []
            for i in range(numberOfFolds):
                xTrainI, yTrainI, xEvaluateI, yEvaluateI = CrossValidation.CrossValidation(
                    xTrain, yTrain, numberOfFolds, i)

                model = DecisionTree.DecisionTree()
                model.fit(xTrainI,
                          yTrainI,
                          maxDepth=runSpecification["maxDepth"])

                crossValidationAccuracy.append(
                    EvaluateBinaryClassification.Accuracy(
                        yEvaluateI, model.predict(xEvaluateI)))

            mean = np.mean(crossValidationAccuracy)
            runSpecification['crossValidationMean'] = mean
            lower, _ = ErrorBounds.GetAccuracyBounds(
                np.mean(crossValidationAccuracy), len(yEvaluateI), .95)
            runSpecification['crossValidationErrorBound'] = mean - lower

        if numberOfFolds == 1:
            model = DecisionTree.DecisionTree()
            model.fit(xTrain, yTrain, maxDepth=runSpecification["maxDepth"])
            validationSetAccuracy = EvaluateBinaryClassification.Accuracy(
                yValidate, model.predict(xValidate))

            runSpecification['accuracy'] = validationSetAccuracy
            lower, _ = ErrorBounds.GetAccuracyBounds(validationSetAccuracy,
                                                     len(yValidate), .95)
            runSpecification[
                'accuracyErrorBound'] = validationSetAccuracy - lower
            runSpecification['crossValidationMean'] = validationSetAccuracy
            runSpecification[
                'crossValidationErrorBound'] = validationSetAccuracy - lower

        endTime = time.time()
        runSpecification['runtime'] = endTime - startTime

        return runSpecification
Exemplo n.º 3
0
def ExecuteEvaluationRun(runSpecification, xTrain, yTrain, numberOfFolds=2):
    print("runSpecification: ", runSpecification)
    startTime = time.time()

    if numberOfFolds > 1:
        crossValidationAccuracy = []
        for i in range(numberOfFolds):
            xTrainI, yTrainI, xEvaluateI, yEvaluateI = CrossValidation.CrossValidation(
                xTrain, yTrain, numberOfFolds, i)

            model = LogisticRegression.LogisticRegression()
            model.fit(xTrainI,
                      yTrainI,
                      convergence=runSpecification['convergence'],
                      stepSize=runSpecification['stepSize'],
                      verbose=False)

            crossValidationAccuracy.append(
                EvaluateBinaryClassification.Accuracy(
                    yEvaluateI, model.predict(xEvaluateI)))

        mean = np.mean(crossValidationAccuracy)
        runSpecification['crossValidationMean'] = mean
        lower, _ = ErrorBounds.GetAccuracyBounds(
            np.mean(crossValidationAccuracy), len(yEvaluateI), .5)
        runSpecification['crossValidationErrorBound'] = mean - lower

    if numberOfFolds == 1:
        model = LogisticRegression.LogisticRegression()
        model.fit(xTrain,
                  yTrain,
                  convergence=runSpecification['convergence'],
                  stepSize=runSpecification['stepSize'],
                  verbose=False)
        validationSetAccuracy = EvaluateBinaryClassification.Accuracy(
            yValidate, model.predict(xValidate))

        runSpecification['accuracy'] = validationSetAccuracy
        lower, _ = ErrorBounds.GetAccuracyBounds(validationSetAccuracy,
                                                 len(yValidate), .5)
        runSpecification['accuracyErrorBound'] = validationSetAccuracy - lower

    endTime = time.time()
    if numberOfFolds > 1:
        runSpecification['runtime'] = endTime - startTime

    return runSpecification
import MachineLearningCourse.MLUtilities.Visualizations.Charting as Charting

xValues = [i + 1 for i in range(len(trainLosses))]

Charting.PlotSeries([trainLosses, validationLosses],
                    ["Train Loss", "Validate Loss"],
                    xValues,
                    useMarkers=False,
                    chartTitle="Pytorch First Modeling Run",
                    xAxisTitle="Epoch",
                    yAxisTitle="Loss",
                    yBotLimit=0.0,
                    outputDirectory=kOutputDirectory,
                    fileName="PyTorch-Initial-TrainValidate")

##
# Evaluate the Model
##

import MachineLearningCourse.MLUtilities.Evaluations.EvaluateBinaryClassification as EvaluateBinaryClassification
import MachineLearningCourse.MLUtilities.Evaluations.ErrorBounds as ErrorBounds

model.train(mode=False)
yTestPredicted = model(xTest)

testAccuracy = EvaluateBinaryClassification.Accuracy(
    yTest, [1 if pred > 0.5 else 0 for pred in yTestPredicted])
print("Accuracy simple:", testAccuracy,
      ErrorBounds.GetAccuracyBounds(testAccuracy, len(yTestPredicted), 0.95))
Exemplo n.º 5
0
    def ExecuteFitting(runSpecification, xTrain, yTrain, xValidate, yValidate):
        startTime = time.time()

        # Create features and train based on type of model
        # Create the model
        model = BlinkNeuralNetwork.BlinkNeuralNetwork(hiddenNodes = 6, hiddenNodesTwo = 4)

        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        print("Device is:", device)

        model.to(device)

        # Move the data onto whichever device was selected
        xTrain = xTrain.to(device)
        yTrain = yTrain.to(device)
        xValidate = xValidate.to(device)
        yValidate = yValidate.to(device)
        
        converged = False
        epoch = 1
        lastLoss = None
        convergence = runSpecification['convergence']
        optimizer = torch.optim.SGD(model.parameters(), lr=runSpecification['learning_rate'])
        lossFunction = torch.nn.MSELoss(reduction='mean')
        patience = 0

        while not converged and epoch < 5000:
            # Do the forward pass
            yTrainPredicted = model(xTrain)
            trainLoss = lossFunction(yTrainPredicted, yTrain)

            # Reset the gradients in the network to zero
            optimizer.zero_grad()

            # Backprop the errors from the loss on this iteration
            trainLoss.backward()

            # Do a weight update step
            optimizer.step()

            loss = trainLoss.item()
            # print(loss)
            if epoch > 10 and lastLoss != None and abs(lastLoss - loss) < convergence:
                if patience >=  0:
                    converged = True
                    pass
                else: 
                    patience += 1
            else:
                lastLoss = loss
                patience = 0
                
            epoch = epoch + 1

        model.train(mode=True)

        endTime = time.time()

        runSpecification['runtime'] = endTime - startTime
        runSpecification['epoch'] = epoch
        
        yValidatePredicted = model(xValidate)
        validAccuracy = EvaluateBinaryClassification.Accuracy(yValidate, [ 1 if pred > 0.5 else 0 for pred in yValidatePredicted ])
        runSpecification['accuracy'] = validAccuracy

        num_samples = len(xValidate)
        (low_bound, high_bound) = ErrorBounds.GetAccuracyBounds(validAccuracy, num_samples, 0.5)
        errorBound = (high_bound - low_bound) / 2
        runSpecification['50PercentBound'] = errorBound
        
        return runSpecification
Exemplo n.º 6
0
## this code outputs the true concept.
visualize = Visualize2D.Visualize2D(kOutputDirectory, "4-Generated Concept")
visualize.Plot2DDataAndBinaryConcept(xTest,yTest,concept)
visualize.Save()

bestModel = None
kValues = [1, 10, 25, 50, 100]
maxDepth = 1
accuracies = []
errorBarsAccuracy = []
for kv in kValues:
    model = BoostedTree.BoostedTree()
    model.fit(xTrain, yTrain, maxDepth=maxDepth, k=kv)
    accuracy = EvaluateBinaryClassification.Accuracy(yTest, model.predict(xTest))
    lower, upper = ErrorBounds.GetAccuracyBounds(accuracy, len(yTest), .5)
    print(kv, ": ", accuracy)
    accuracies.append(accuracy)
    errorBarsAccuracy.append(accuracy-lower)
    if bestModel is None:
        bestModel = (model, upper)
    elif lower > bestModel[1]:
        bestModel = (model, upper)

Charting.PlotSeriesWithErrorBars([accuracies], [errorBarsAccuracy], ["k-round tuning accuracy"], kValues, chartTitle="Line/Circle Concept Accuracy", xAxisTitle="Boosting Rounds", yAxisTitle="Test Accuracy", yBotLimit=0.5, outputDirectory=kOutputDirectory, fileName="4-BoostingTreeRoundTuning")

## you can use this to visualize what your model is learning.
accuracy = EvaluateBinaryClassification.Accuracy(yTest, bestModel[0].predict(xTest))
lower, upper = ErrorBounds.GetAccuracyBounds(accuracy, len(yTest), .95)
print("accuracy: ", lower, "-", upper)
visualize = Visualize2D.Visualize2D(kOutputDirectory, "4-My Boosted Tree")
Exemplo n.º 7
0
xTest = featurizer.Featurize(xTestRaw)

for i in range(10):
    print("%d - " % (yTrain[i]), xTrain[i])

############################
import MachineLearningCourse.MLUtilities.Evaluations.EvaluateBinaryClassification as EvaluateBinaryClassification
import MachineLearningCourse.MLUtilities.Evaluations.ErrorBounds as ErrorBounds
import MachineLearningCourse.MLUtilities.Learners.MostCommonClassModel as MostCommonClassModel

model = MostCommonClassModel.MostCommonClassModel()
model.fit(xTrain, yTrain)
yValidatePredicted = model.predict(xValidate)
validateAccuracy = EvaluateBinaryClassification.Accuracy(
    yValidate, yValidatePredicted)
errorBounds = ErrorBounds.GetAccuracyBounds(validateAccuracy, len(yValidate),
                                            0.95)

print()
print(
    "### 'Most Common Class' model validate set accuracy: %.4f (95%% %.4f - %.4f)"
    % (validateAccuracy, errorBounds[0], errorBounds[1]))

import MachineLearningCourse.MLUtilities.Data.CrossValidation as CrossValidation
import MachineLearningCourse.MLUtilities.Learners.LogisticRegression as LogisticRegression
import MachineLearningCourse.MLUtilities.Visualizations.Charting as Charting
import time
import numpy as np


## This helper function should execute a single run and save the results on 'runSpecification' (which could be a dictionary for convienience)
#    for later tabulation and charting...
                      convergence=convergence,
                      stepSize=stepSize,
                      verbose=True)

    ######
    ### Use equation 5.1 from Mitchell to bound the validation set error and the true error
    import MachineLearningCourse.MLUtilities.Evaluations.ErrorBounds as ErrorBounds

    print("Logistic regression with 25 features by mutual information:")
    validationSetAccuracy = EvaluateBinaryClassification.Accuracy(
        yValidate, frequentModel.predict(xValidate))
    print("Validation set accuracy: %.4f." % (validationSetAccuracy))
    for confidence in [.5, .8, .9, .95, .99]:
        (lowerBound,
         upperBound) = ErrorBounds.GetAccuracyBounds(validationSetAccuracy,
                                                     len(xValidate),
                                                     confidence)
        print(" %.2f%% accuracy bound: %.4f - %.4f" %
              (confidence, lowerBound, upperBound))

    ### Compare to most common class model here...
    mostCommonModel = MostCommonClassModel.MostCommonClassModel()
    mostCommonModel.fit(xTrain, yTrain)

    print("MostCommon regression model:")
    validationSetAccuracy = EvaluateBinaryClassification.Accuracy(
        yValidate, mostCommonModel.predict(xValidate))
    print("Validation set accuracy: %.4f." % (validationSetAccuracy))
    for confidence in [.5, .8]:
        (lowerBound,
         upperBound) = ErrorBounds.GetAccuracyBounds(validationSetAccuracy,
Exemplo n.º 9
0
    xTrain    = featurizer.Featurize(xTrainRaw)
    xValidate = featurizer.Featurize(xValidateRaw)
    xTest     = featurizer.Featurize(xTestRaw)

    bestModelBT = None
    kValues = [1, 10, 50, 100, 150]
    maxDepth = 1
    validationAccuracies = []
    validationAccuracyErrorBounds = []
    trainingAccuracies = []
    trainingAccuracyErrorBounds = []
    for kv in kValues:
        model = BoostedTree.BoostedTree()
        model.fit(xTrain, yTrain, maxDepth=maxDepth, k=kv)
        validationAccuracy = EvaluateBinaryClassification.Accuracy(yValidate, model.predict(xValidate))
        lower, upper = ErrorBounds.GetAccuracyBounds(validationAccuracy, len(yValidate), .5)
        trainingAccuracy = EvaluateBinaryClassification.Accuracy(yTrain, model.predict(xTrain))
        lowerTrain, upperTrain = ErrorBounds.GetAccuracyBounds(trainingAccuracy, len(yTrain), .5)

        validationAccuracies.append(validationAccuracy)
        validationAccuracyErrorBounds.append(validationAccuracy-lower)
        trainingAccuracies.append(trainingAccuracy)
        trainingAccuracyErrorBounds.append(trainingAccuracy-lowerTrain)

        print("k: ", kv, " accuracy: ", lower, "-", upper)
        if bestModelBT is None:
            bestModelBT = (model, lower, upper, kv)
        elif lower > bestModelBT[2]:
            bestModelBT = (model, lower, upper, kv)

    print("boosted tree - k-rounds: ", bestModelBT[3], " accuracy: ", bestModelBT[1], "-", bestModelBT[2])
Exemplo n.º 10
0
seriesFPRs = []
seriesFNRs = []
seriesLabels = []

errorImages = {}

for i in range(num_trials):
    errorImages[i] = []
    model = BlinkNeuralNetwork.LeNet(imageSize=xTrain[0].shape[1],
                                     convFilters=[(12, 6), (18, 5)],
                                     fcLayers=[20, 10])
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=step)
    acc = trainModel(model, optimizer, maxEpoch, 25,
                     "LossEpochFinal12-6-x-x-20-10-{}".format(i))
    lower, _ = ErrorBounds.GetAccuracyBounds(acc, len(yValidate), 0.95)
    validationAccuracyResults.append((acc, acc - lower))

    yTestPredicted = model(xTest)
    testAccuracy = EvaluateBinaryClassification.Accuracy(
        yTest, [1 if pred > 0.5 else 0 for pred in yTestPredicted])
    lowerTest, _ = ErrorBounds.GetAccuracyBounds(testAccuracy, len(yTest),
                                                 0.95)
    testAccuracyResults.append((testAccuracy, testAccuracy - lowerTest))

    yTestPredict = [1 if pred > 0.5 else 0 for pred in yTestPredicted]
    for j in range(len(yTest)):
        if int(yTest[j]) != yTestPredict[j]:
            errorImages[i].append(xTestRaw[j])

    (modelFPRs, modelFNRs,
Exemplo n.º 11
0
model.to(device)
xTrain = xTrain.to(device)
yTrain = yTrain.to(device)
xValidate = xValidate.to(device)
yValidate = yValidate.to(device)

model.train_model_persample(xTrain, yTrain)
# model.train_model(xTrain, yTrain)

print("Accuracy and Error Bounds:")
yValidatePredicted = model.predict(xValidate)
validAccuracy = EvaluateBinaryClassification.Accuracy(yValidate, yValidatePredicted)
print(validAccuracy)

num_samples = len(xValidate)
(low_bound, high_bound) = ErrorBounds.GetAccuracyBounds(validAccuracy, num_samples, 0.5)
errorBound = (high_bound - low_bound) / 2
print(errorBound)

    # learning_error_series = []
    # learning_valid_series = []
    # learning_series = []

    # converg_error_series = []
    # converg_valid_series = []
    # converg_series = []

    # # log_convert = {0.1: 0, 0.01: 1, 0.001: 2, 0.0001: 3, 0.00001: 4}

    # Charting.PlotSeriesWithErrorBars([converg_valid_series], [converg_error_series], ["Accuracy"], [converg_series], chartTitle="<NN Accuracy on Validation Data>", xAxisTitle="<converg>", yAxisTitle="<Accuracy>", yBotLimit=0.65, outputDirectory=kOutputDirectory, fileName="converg_sweep")
    # Charting.PlotSeriesWithErrorBars([learning_valid_series], [learning_error_series], ["Accuracy"], [learning_series], chartTitle="<NN Accuracy on Validation Data>", xAxisTitle="<learning>", yAxisTitle="<Accuracy>", yBotLimit=0.65, outputDirectory=kOutputDirectory, fileName="learning_sweep")