This python script is a part of the model development process.
It acts to instantiate and train 3
classification models which use Gradient Descent as the
learning algorithm, enabling mini-batch training.
"""

from ML_DataPipeline import DataPipeline
from sklearn.linear_model import SGDClassifier
import _pickle as pickle

labels = [0, 1]
epochs = 200
seed = 42

pipeline = DataPipeline("SGD", batch_size=96)

print("Support Vector Machine")
print("----------------------")
svm = SGDClassifier(verbose=0, random_state=seed)
class_flag = True
for epoch in range(epochs):
    if epoch % 20 == 0:
        print("Epoch: %d" % (epoch + 1))
    if epoch == (epochs - 1):
        print("Epoch: %d" % epochs)

    iterations = pipeline.get_iterations()
    for iters in range(iterations):
        x_train, y_train = pipeline.get_training_batch(iters)
        if class_flag:
#mc= ModelCheckpoint('yourdirectory/your_model.h5', monitor='val_loss',
#mode='min', verbose=1, save_best_only=True)
#cb_list=[es,mc]

# compile model
my_model.compile(optimizer='sgd',
                 loss='binary_crossentropy',
                 metrics=['accuracy'])

path = r"/Users/michaelperez/Desktop/Compilation/"
train_path = os.path.join(path, "train_images.pickle")
test_path = os.path.join(path, "test_images.pickle")
epochs = 1  # varied from 10-30 by 10

pipeline = DataPipeline("CNN",
                        batch_size=32,
                        train_file_path=train_path,
                        test_file_path=test_path)
for epoch in range(epochs):
    if (epoch % 2) == 0:
        print("Epoch %d" % (epoch + 1))
    if epoch + 1 == epochs:
        print("Epoch %d" % (epochs))
    iterations = pipeline.get_iterations()
    for iters in range(iterations):
        x_train, y_train = pipeline.get_training_batch(iters)
        my_model.fit(x_train, y_train, epochs=1, batch_size=32)

my_model.save(
    'CNNClassifier_1_.h5')  # creates a HDF5 file 'CNNClassifier_1_.h5'
import pandas as pd
import numpy as np
import os

#Insert the path to the image files on your system here

path = r"C:\Users\YourUserName\Path"
train_path = os.path.join(path, "train_images.pickle")
test_path = os.path.join(path, "test_images.pickle")
nb_path = os.path.join(path, r"MLFinalProject-master\ML_Models_Saved\NBClassifier.pickle")

svm_path = os.path.join(path, r"MLFinalProject-master\ML_Models_Saved\SVMClassifier_1.pickle")

with open(nb_path, "rb") as model :
    nb_classifier = pickle.load(model)
pipeline = DataPipeline("NB", train_file_path=train_path, test_file_path=test_path)
partitions = pipeline.get_partitions()


random_batch = np.random.randint(0, partitions-1)
X_, y_ = pipeline.get_test_data(random_batch)

sample_image = X_[0].reshape((750, 750))
sample_label = y_[0]

y_hat = nb_classifier.predict([X_[0]])
accuracy = nb_classifier.score(X_, y_)
results = pd.DataFrame({"Prediction": y_hat, "Actual": y_[0]})

print("Project Marrow Beta Demo: Multinomial Naive Bayes")
print("-------------------------------------------------")
                glob.glob(os.path.join(path, r"MLFinalProject\ML_Models_Saved\*.h5"))

models = dict()
accuracies = dict()
for path in model_paths:
    model_name = path.rsplit("\\", 1)[1].split(".")[0]
    if "pickle" in path:
        with open(path, "rb") as model_file:
            model = pickle.load(model_file)
    if "h5" in path:
        model = load_model(path, compile=False)
        model.compile(optimizer="sgd", loss="binary_crossentropy", metrics=["accuracy"])
    models[model_name] = model
    accuracies[model_name] = list()

pipeline_CNN = DataPipeline("CNN", batch_size=24, partitions=12, train_file_path=train_path, test_file_path=test_path)
pipeline_SGD = DataPipeline("SGD", batch_size=24, partitions=12, train_file_path=train_path, test_file_path=test_path)
pipeline_NB = DataPipeline("NB", batch_size=24, partitions=12, train_file_path=train_path, test_file_path=test_path)
partitions = pipeline_CNN.get_partitions()

for partition in range(partitions):
    X_test_CNN, y_test_CNN = pipeline_CNN.get_test_data(partition)
    X_test_NB, y_test_NB = pipeline_NB.get_test_data(partition)
    X_test_SGD, y_test_SGD = pipeline_SGD.get_test_data(partition)
    print("Partition: %d" %partition)

    for model_name, model in models.items():
        if "CNN" in model_name:
            score = model.test_on_batch(X_test_CNN, y_test_CNN)[1]
            accuracies[model_name].append(score)
        elif "NB" in model_name:
def main():
    # Verify Pipeline for Training Data Works #
    data_pipeline = DataPipeline()
    print("Training Methods Test")
    print("---------------------")
    train_check = []
    train_iterations = data_pipeline.get_iterations()
    # Get data in one epoch
    for iteration in range(train_iterations):
        features, labels = data_pipeline.get_training_batch(iteration)
        if len(features) == len(labels):
            train_check.append(True)
    print("Batch Dimensions Verification: %d" % (sum(train_check)))
    print("Training Iterations per Epoch: %d" % (train_iterations))
    if sum(train_check) == train_iterations: print("Test 1 --- Passed")
    try:
        data_pipeline = DataPipeline(train_file_path="DNE")
    except OSError:
        print("Test 2 --- Passed\n")

    # Verify Pipeline for Testing Data Works #
    data_pipeline = DataPipeline()
    print("Testing Methods Test")
    print("--------------------")
    test_partitions = data_pipeline.get_partitions()
    test_check = []
    # Get all testing data
    for partition in range(test_partitions):
        features, labels = data_pipeline.get_test_data(partition)
        if len(features) == len(labels):
            test_check.append(True)
    print("Testing Partitions: %d" % (test_partitions))
    print("Partition Dimensions Verification: %d" % (sum(test_check)))
    if sum(test_check) == test_partitions:
        print("Test 1 --- Passed")
    try:
        data_pipeline = DataPipeline(test_file_path="NULL")
        for partition in range(test_partitions):
            features, labels = data_pipeline.get_test_data(partition)
            if len(features) == len(labels):
                test_check.append(True)
    except OSError:
        print("Test 2 --- Passed\n")

    # Verify Edge Cases Handled #
    print("Exception & Warning Edge Cases")
    print("------------------------------")
    print("Batch Size Tests")
    try:
        data_pipeline = DataPipeline(batch_size=32 + 0.5)
    except ValueError:
        print("Test 1 --- Passed")
    data_pipeline = DataPipeline(batch_size=0)
    print("Test 2 --- Passed")
    data_pipeline = DataPipeline(batch_size=10000)
    print("Test 3 --- Passed\n")

    print("Partition Count Tests")
    try:
        data_pipeline = DataPipeline(partitions=10 + 0.5)
    except ValueError:
        print("Test 4 --- Passed")
    data_pipeline = DataPipeline(partitions=0)
    print("Test 5 --- Passed")
    data_pipeline = DataPipeline(partitions=10000)
    print("Test 6 --- Passed\n")