예제 #1
0
def SVMModel(train_x, train_y, val_x, val_y, testX, testY):
    #Building the SVM model, chosen c = 4 as best value for the model.
    score = 0
    best_kernel = 'linear'
    kernel_types = ['linear', 'poly', 'rbf']
    svm_kernel_error = []
    for kernel_value in kernel_types:
        model = svm.SVC(kernel=kernel_value, C=5)
        model.fit(X=train_x, y=train_y)
        score = model.score(val_x, val_y)
        svm_kernel_error.append(1 - (score))

    print("Predict the score for the training data set")
    model = svm.SVC(kernel=best_kernel, C=5)
    model.fit(X=train_x, y=train_y)
    score = model.score(val_x, val_y)
    print("ScorePrinted:", score)
    print("Presenting results for test data set")
    y_pred = model.predict(testX)
    #testY = Y_test.values
    #print("Accuracy:",metrics.accuracy_score(testY, y_pred))
    conf_matrix, accuracy, recall_array, precision_array = func_confusion_matrix(
        testY, y_pred)

    print("Confusion Matrix: ")
    print(conf_matrix)
    print("Average Accuracy: {}\n".format(accuracy))
    print("Per-Class Precision: {}]\n".format(precision_array))
    print("Per-Class Recall: {}".format(recall_array))
    return svm_kernel_error, (accuracy * 100), (max(recall_array) *
                                                100), (max(precision_array) *
                                                       100)
def LogisticRegressionModel(train_x, train_y, val_x, val_y, testX, testY):
    C_param_range = [0.001, 0.01, 0.1, 1, 10, 100]

    score = []
    accuracy = []
    y_pred_all = []
    for i in C_param_range:

        clf = LogisticRegression()
        # call the function fit() to train the class instance
        clf.fit(train_x, train_y)
        # scores over testing samples
        score.append(clf.score(val_x, val_y))
        y_pred = clf.predict(testX)
        y_pred_all.append(y_pred)
        accuracy.append(accuracy_score(testY, y_pred))
    #accuracy,precision,recall,f1_score=func_calConfusionMatrix(y_pred,testY)

    best_c = np.argmax(accuracy)
    y_pred_best = y_pred_all[best_c]
    conf_matrix, accuracy, recall_array, precision_array = func_confusion_matrix(
        testY, y_pred_best)

    print("Confusion Matrix: {} \n".format(conf_matrix))
    print("Accuracy with the test data: {} \n".format(accuracy))
    print("Per-Class Precision is: {} \n".format(precision_array))
    print("Per-Class Recall rate: {} \n".format(recall_array))

    logit_roc_auc = roc_auc_score(testY, y_pred_best)
    fpr, tpr, thresholds = roc_curve(testY, clf.predict_proba(testX)[:, 1])
    plt.figure()
    plt.plot(fpr,
             tpr,
             label='Logistic Regression (area = %0.2f)' % logit_roc_auc)
    plt.plot([0, 1], [0, 1], 'r--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver operating characteristic')
    plt.legend(loc="lower right")
    plt.savefig('Log_ROC')
    plt.show()
    return (accuracy * 100), (max(recall_array) * 100), (max(precision_array) *
                                                         100)
예제 #3
0
def GaussianNBModel(train_X, train_y, val_x, val_y, testX, testY):

    # Calling the GaussianNB model
    classifier = GaussianNB()

    # Training the model on the train dataset
    classifier.fit(train_X, train_y)

    # Testing the model on the test dataset
    y_pred = classifier.predict(testX)

    #Calculating the metrics for the evaluation of the model
    conf_matrix, accuracy, recall_array, precision_array = func_confusion_matrix(
        testY, y_pred)

    # Plotting the ROC Curve
    fpr, tpr, thresholds = roc_curve(y_pred, testY)
    roc_auc = auc(fpr, tpr)

    plt.figure()
    lw = 2
    plt.plot(fpr,
             tpr,
             color='darkorange',
             lw=lw,
             label='ROC curve (area = %0.2f)' % roc_auc)
    plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curve for GaussianNB')
    plt.legend(loc="lower right")
    plt.show()

    return accuracy, precision_array, recall_array, conf_matrix
def feed_forward_model(train_X,train_y,val_x,val_y,testX,testY):
    """feed_forward_model - specification list
    Create a feed forward model given a specification list
    Each element of the list represents a layer and is formed by a tuple.

    [(Dense, [20], {'activation':'relu', 'input_dim': M}),
     (Dense, [20], {'activation':'relu', 'input_dim':20}),
     (Dense, [N], {'activation':'softmax', 'input_dim':20})
    ]
	
    """
    model = Sequential()
    model_list = [  [(Dense, [100],
                    {'activation': 'relu', 'input_dim': train_X.shape[1]}),
                   (Dense, [200], {'activation': 'relu', 'input_dim': 100}),
                    (Dense, [200], {'activation': 'relu', 'input_dim': 100}),
                   (Dense, [2], {'activation': 'softmax', 'input_dim':100})],

                [(Dense, [500],
                  {'activation': 'relu', 'input_dim': train_X.shape[1]}),
                 (Dense, [1000], {'activation': 'relu', 'input_dim': 100}),
                 (Dense, [2], {'activation': 'softmax', 'input_dim': 100})],

                [(Dense, [100],
                  {'activation': 'relu', 'input_dim': train_X.shape[1]}),
                 (Dense, [100], {'activation': 'relu', 'input_dim': 100}),
                 (Dense, [100], {'activation': 'relu', 'input_dim': 100}),
                 (Dense, [100], {'activation': 'relu', 'input_dim': 100}),
                 (Dense, [100], {'activation': 'relu', 'input_dim': 100}),
                 (Dense, [100], {'activation': 'relu', 'input_dim': 100}),
                 (Dense, [100], {'activation': 'relu', 'input_dim': 100}),
                     (Dense, [100], {'activation': 'relu', 'input_dim': 100}),
                 (Dense, [100], {'activation': 'relu', 'input_dim': 100}),
                 (Dense, [2], {'activation': 'softmax', 'input_dim': 100})],
                ]
				
    for item in model_list[2]:
        layertype = item[0]
        if (len(item) < 3):
            layer = layertype(*item[1])
        else:
            layer = layertype(*item[1], **item[2])
        model.add(layer)

    model.compile(optimizer="Adam",
                  loss="categorical_crossentropy",
                  metrics=['accuracy'])
    model.fit(train_X, to_categorical(train_y), verbose=0)
    model_eval_result = model.evaluate(val_x,to_categorical(val_y), verbose=False)
    print("Loss value", model_eval_result[0])
    print("Accuracy", model_eval_result[1])
	#prediction = tf.argmax(logits,1)
    y_pred_nn = model.predict(testX)
    y_pred_nn_val = np.argmax(y_pred_nn.round(), axis=1)
    y_pred=model.predict(testX)
    correct_labels = 0
    for i in range(testX.shape[0]):
        if(testY[i] == y_pred_nn_val[i]):
            correct_labels += 1

    #accuracy = correct_labels/testX.shape[0]
    #error = 1-accuracy
    conf_matrix, accuracy, recall_array, precision_array = func_confusion_matrix(testY, y_pred_nn_val)
    #print("Average Error-Neural networks: {}",error)
    print("Confusion Matrix: /n {}".format(conf_matrix))
    print("Accuracy with the test data: {}".format(accuracy))
    print("Per-Class Precision is: {}".format(precision_array))
    print("Per-Class Recall rate: {}".format(recall_array))
	#print('Accuracy of the best model on Test Data(Neural networks) : ', accuracy)
    return (accuracy*100),(max(recall_array)*100),(max(precision_array)*100)
예제 #5
0
                                 to_categorical(y_train),
                                 batch_size=256,
                                 epochs=3,
                                 verbose=1,
                                 validation_data=(x_valid,
                                                  to_categorical(y_valid)))
predict_third_model = third_model.predict(x_valid)
third_model_predictions = [
    np.argmax(predictions) for predictions in predict_second_model
]

print("Model 1")
print(first_model_predictions[0], first_model_predictions[1],
      first_model_predictions[2], first_model_predictions[3])

cm_1, acc_1, recall_1, prediction_1 = func_confusion_matrix(
    y_valid, first_model_predictions)
print("cm: ", cm_1, ",\n acc: ", acc_1, ",\n recall: ", recall_1,
      "\n, prediction: ", prediction_1)

print("Model 2")
print(predict_second_model[0], predict_second_model[1],
      predict_second_model[2], predict_second_model[3])

cm_2, acc_2, recall_2, prediction_2 = func_confusion_matrix(
    y_valid, second_model_predictions)
print("cm: ", cm_2, "\n, acc: ", acc_2, "\n, recall: ", recall_2,
      "\n, prediction: ", prediction_2)

print("Model 3")
print(predict_third_model[0], predict_third_model[1], predict_third_model[2],
      predict_third_model[3])
예제 #6
0
def AdaBoostModel(train_X, train_y, val_x, val_y, testX, testY):

    # Uncomment this to make the base_estimator as SVM and put base_estimator as svc in AdaBoostClassifier
    #svc = SVC(probability = True, kernel = 'linear')

    n_estimatorslist = [25, 50, 100, 200]
    accuracy_score = []

    for n in n_estimatorslist:

        # Running AdaBoost Model using the default base_estimator
        adaClassifier = AdaBoostClassifier(n_estimators=n, learning_rate=1)

        # Training the model using the training dataset
        model = adaClassifier.fit(train_X, train_y)

        # Testing the model to predict labels from the test dataset
        y_pred = model.predict(testX)

        # Calculating metrics for the model evaluation
        conf_matrix, accuracy, recall_array, precision_array = func_confusion_matrix(
            testY, y_pred)

        print("Confusion Matrix: ")
        print(conf_matrix)

        print("Average Accuracy: {}\n".format(accuracy))
        accuracy_score.append(accuracy)

        print("Per-Class Precision: {}]\n".format(precision_array))

        print("Per-Class Recall: {}".format(recall_array))

        if (n == 50):

            print("ROC Curve for 50 estimators: \n")
            fpr, tpr, thresholds = roc_curve(y_pred, testY)
            roc_auc = auc(fpr, tpr)

            plt.figure()
            lw = 2
            plt.plot(fpr,
                     tpr,
                     color='darkorange',
                     lw=lw,
                     label='ROC curve (area = %0.2f)' % roc_auc)
            plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
            plt.xlim([0.0, 1.0])
            plt.ylim([0.0, 1.05])
            plt.xlabel('False Positive Rate')
            plt.ylabel('True Positive Rate')
            plt.title('ROC Curve for AdaBoost')
            plt.legend(loc="lower right")
            plt.show()

    # Graph to compare the accuracy with the number of estimators
    plt.figure()
    plt.plot(n_estimatorslist, accuracy_score)
    plt.ylim([0.0, 1.0])
    plt.xlabel('Number of estimators')
    plt.ylabel('Accuracy')
    plt.show()
    return accuracy_score[2]
예제 #7
0
model1.add(Dense(128, activation='relu', input_shape=(784,)))
model1.add(Dense(num_classes, activation='softmax'))

model1.compile(loss='categorical_crossentropy',
              optimizer=RMSprop(),
              metrics=['accuracy'])
print(model1.summary())

model1.fit(xtrain, ytrain,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1)

ypred = model1.predict_classes(xval)

conf, acc, rec, prec = func_confusion_matrix(yval,ypred)
print(conf)
print(acc)
print(rec)
print(prec)


#model 2
#2 hidden layers
#sigmoid activation
#sigmoid output
model2 = Sequential()
model2.add(Dense(128, activation='sigmoid', input_shape=(784,)))
model2.add(Dropout(0.2))
model2.add(Dense(64, activation='sigmoid'))
model2.add(Dense(num_classes, activation='sigmoid'))
예제 #8
0
    # validation set evaluation
    test_accuracy = sess.run(accuracy,
                             feed_dict={
                                 images_placeholder: data['images_test'],
                                 labels_placeholder: data['labels_test']
                             })  #'numpy.float32' object is not iterable
    print('Test accuracy {:g}'.format(test_accuracy))
    true_false_prediction, prediction_matrix = sess.run(
        report,
        feed_dict={
            images_placeholder: data['images_test'],
            labels_placeholder: data['labels_test']
        })

    conf_matrix, accuracy, recall_array, precision_array = func_confusion_matrix(
        prediction_matrix, y_test)  # func modified due to tensors
    print("confusion matrix\n", conf_matrix)
    print("accuracy: ", accuracy)
    print("recall_array", recall_array)
    print(precision_array)

    # Ten Images Model Made Errors
    error_count = 1  # start at 1 not 0
    falsely_predicted_indexes = [
        i for i, x in enumerate(true_false_prediction) if x - 1
    ]  # if x = 1 (correct); 1 - 1 == false
    fig = plt.figure(None, (10, 10))
    for index in falsely_predicted_indexes:
        # convert vector.shape(748) back to (28,28) so we can see data
        image = np.reshape(x_test[index], (28, 28))
        fig.add_subplot(
예제 #9
0
def kMeansModel(train_X, train_y, val_x, val_y, testX, testY):

    n_clusters = [2, 3, 4, 5]
    accuracy_score = []

    for n in n_clusters:

        # Calling the kMeans function from the library
        kmeans = KMeans(n)

        # Training the model to form the clusters
        kmeans = kmeans.fit(train_X)

        # Predicting the labels for the test dataset
        y_pred = kmeans.predict(testX)

        # Calculationg metrics for the model evaluation
        conf_matrix, accuracy, recall_array, precision_array = func_confusion_matrix(
            testY, y_pred)

        print("Evaluation metrics for {} clusters".format(n))
        print("Confusion Matrix: ")
        print(conf_matrix)

        print("Average Accuracy: {}\n".format(accuracy))
        accuracy_score.append(accuracy)

        print("Per-Class Precision: {}]\n".format(precision_array))

        print("Per-Class Recall: {}".format(recall_array))

        if (n == 2):

            print("For 2 clusters, the ROC Curve is: \n")
            fpr, tpr, thresholds = roc_curve(y_pred, testY)
            roc_auc = auc(fpr, tpr)

            plt.figure()
            lw = 2
            plt.plot(fpr,
                     tpr,
                     color='darkorange',
                     lw=lw,
                     label='ROC curve (area = %0.2f)' % roc_auc)
            plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
            plt.xlim([0.0, 1.0])
            plt.ylim([0.0, 1.05])
            plt.xlabel('False Positive Rate')
            plt.ylabel('True Positive Rate')
            plt.title('ROC Curve for k-Means')
            plt.legend(loc="lower right")
            plt.show()

    # Graph for the accuracy compared to the number of clusters
    plt.figure()
    plt.plot(n_clusters, accuracy_score)
    plt.ylim([0.0, 1.0])
    plt.xlabel('Number of clusters')
    plt.ylabel('Accuracy')
    plt.show()

    return accuracy_score[0]
예제 #10
0
model3.compile(optimizer=RMSprop(lr=0.01),
               loss='sparse_categorical_crossentropy',
               metrics=['accuracy'])

model1.fit(x_train, y_train, epochs=5, validation_data=validation_data)
model2.fit(x_train, y_train, epochs=5, validation_data=validation_data)
model3.fit(x_train, y_train, epochs=5, validation_data=validation_data)

y_pred2 = model2.predict(x_test)

yli_pred2 = []
for i in range(len(y_test)):
    yli_pred2.append(list(y_pred2[i]).index(np.max(y_pred2[i])))

result = func_confusion_matrix(y_test, yli_pred2)
conf_matrix = result[0]
accuracy = result[1]
recall = result[2]
precision = result[3]

print("Confusion Matrix : \n", conf_matrix)
print("\n\nAccuracy : ", accuracy)
print("\n\n")
for i in range(10):
    print("Class ", i, " Recall : ", recall[i])
print("\n\n")
for i in range(10):
    print("Class ", i, " Precision : ", precision[i])

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
예제 #11
0
    for i in range(total_batch):
        Input_batch = x_train[i*BATCH_SIZE:(i+1)*BATCH_SIZE]
        Output_batch = y_train[i*BATCH_SIZE:(i+1)*BATCH_SIZE]
        cost = FNN.train_step(Input_batch,Output_batch)
        avg_cost += cost/total_batch
    print("Epoch: {}  =====> Cost = {}".format(epoch, avg_cost))
# test model
predicted_output = FNN.predict(x_test, y_test)
correct_prediction = np.equal(np.argmax(predicted_output,1), np.argmax(y_test,1))
# print(correct_prediction)
accuracy = np.sum(correct_prediction.astype(float))/np.size(correct_prediction)
print('Accuracy is {}'.format(accuracy))

y_test_label = [x[0] for x in oe.inverse_transform(y_test)]
y_pred_label = [x[0] for x in oe.inverse_transform(predicted_output)]
conf_matrix, accuracy, recall_array, precision_array = util.func_confusion_matrix(y_test_label,y_pred_label)

wrong_img = np.not_equal(np.argmax(predicted_output,1), np.argmax(y_test,1))
wrong_img = np.where(wrong_img)[0]

fig,ax = plt.subplots(2,5,figsize=(20,10))
fig.subplots_adjust(hspace=0, wspace=0.05)
for count in range(10):
    img = np.copy(x_test[wrong_img[count]])
    img = np.asarray(np.reshape(img,[28,28]))
    img = (img * 255).astype(np.uint8)
    img =Image.fromarray(img, 'L')
    ax[count%2][count//2].imshow(img,cmap='gray')
    ax[count%2][count//2].axis('off')
    title = "Pred: {} -- GT: {}".format(y_pred_label[wrong_img[count]],y_test_label[wrong_img[count]])
    ax[count%2][count//2].set_title(title,fontsize= 25)
예제 #12
0
#############placeholder 4:testing  #######################

# best_kernel = 'linear'
# best_c = 9 # poly had many that were the "best"
model = svm.SVC(kernel=best_kernel, C=best_c, gamma='scale')
model.fit(X=x_train, y=y_train)

#############placeholder end #######################

## step 5 evaluate your results in terms of accuracy, real, or precision.

#############placeholder 5: metrics #######################
# func_confusion_matrix is not included
# You might re-use this function for the Part I.
y_pred = model.predict(X_test)
conf_matrix, accuracy, recall_array, precision_array = util.func_confusion_matrix(
    Y_test, y_pred)

print("Confusion Matrix: ")
print(conf_matrix)
print("Average Accuracy: {}".format(accuracy))
print("Per-Class Precision: {}".format(precision_array))
print("Per-Class Recall: {}".format(recall_array))

#############placeholder end #######################

#############placeholder 6: success and failure examples #######################
# Success samples: samples for which you model can correctly predict their labels
correct_sample = np.equal(y_pred, Y_test)
correct_sample = np.where(correct_sample)[0]

correct = pd.DataFrame(X_test[correct_sample])