This python script is a part of the model development process. It acts to instantiate and train 3 classification models which use Gradient Descent as the learning algorithm, enabling mini-batch training. """ from ML_DataPipeline import DataPipeline from sklearn.linear_model import SGDClassifier import _pickle as pickle labels = [0, 1] epochs = 200 seed = 42 pipeline = DataPipeline("SGD", batch_size=96) print("Support Vector Machine") print("----------------------") svm = SGDClassifier(verbose=0, random_state=seed) class_flag = True for epoch in range(epochs): if epoch % 20 == 0: print("Epoch: %d" % (epoch + 1)) if epoch == (epochs - 1): print("Epoch: %d" % epochs) iterations = pipeline.get_iterations() for iters in range(iterations): x_train, y_train = pipeline.get_training_batch(iters) if class_flag:
#mc= ModelCheckpoint('yourdirectory/your_model.h5', monitor='val_loss', #mode='min', verbose=1, save_best_only=True) #cb_list=[es,mc] # compile model my_model.compile(optimizer='sgd', loss='binary_crossentropy', metrics=['accuracy']) path = r"/Users/michaelperez/Desktop/Compilation/" train_path = os.path.join(path, "train_images.pickle") test_path = os.path.join(path, "test_images.pickle") epochs = 1 # varied from 10-30 by 10 pipeline = DataPipeline("CNN", batch_size=32, train_file_path=train_path, test_file_path=test_path) for epoch in range(epochs): if (epoch % 2) == 0: print("Epoch %d" % (epoch + 1)) if epoch + 1 == epochs: print("Epoch %d" % (epochs)) iterations = pipeline.get_iterations() for iters in range(iterations): x_train, y_train = pipeline.get_training_batch(iters) my_model.fit(x_train, y_train, epochs=1, batch_size=32) my_model.save( 'CNNClassifier_1_.h5') # creates a HDF5 file 'CNNClassifier_1_.h5'
import pandas as pd import numpy as np import os #Insert the path to the image files on your system here path = r"C:\Users\YourUserName\Path" train_path = os.path.join(path, "train_images.pickle") test_path = os.path.join(path, "test_images.pickle") nb_path = os.path.join(path, r"MLFinalProject-master\ML_Models_Saved\NBClassifier.pickle") svm_path = os.path.join(path, r"MLFinalProject-master\ML_Models_Saved\SVMClassifier_1.pickle") with open(nb_path, "rb") as model : nb_classifier = pickle.load(model) pipeline = DataPipeline("NB", train_file_path=train_path, test_file_path=test_path) partitions = pipeline.get_partitions() random_batch = np.random.randint(0, partitions-1) X_, y_ = pipeline.get_test_data(random_batch) sample_image = X_[0].reshape((750, 750)) sample_label = y_[0] y_hat = nb_classifier.predict([X_[0]]) accuracy = nb_classifier.score(X_, y_) results = pd.DataFrame({"Prediction": y_hat, "Actual": y_[0]}) print("Project Marrow Beta Demo: Multinomial Naive Bayes") print("-------------------------------------------------")
glob.glob(os.path.join(path, r"MLFinalProject\ML_Models_Saved\*.h5")) models = dict() accuracies = dict() for path in model_paths: model_name = path.rsplit("\\", 1)[1].split(".")[0] if "pickle" in path: with open(path, "rb") as model_file: model = pickle.load(model_file) if "h5" in path: model = load_model(path, compile=False) model.compile(optimizer="sgd", loss="binary_crossentropy", metrics=["accuracy"]) models[model_name] = model accuracies[model_name] = list() pipeline_CNN = DataPipeline("CNN", batch_size=24, partitions=12, train_file_path=train_path, test_file_path=test_path) pipeline_SGD = DataPipeline("SGD", batch_size=24, partitions=12, train_file_path=train_path, test_file_path=test_path) pipeline_NB = DataPipeline("NB", batch_size=24, partitions=12, train_file_path=train_path, test_file_path=test_path) partitions = pipeline_CNN.get_partitions() for partition in range(partitions): X_test_CNN, y_test_CNN = pipeline_CNN.get_test_data(partition) X_test_NB, y_test_NB = pipeline_NB.get_test_data(partition) X_test_SGD, y_test_SGD = pipeline_SGD.get_test_data(partition) print("Partition: %d" %partition) for model_name, model in models.items(): if "CNN" in model_name: score = model.test_on_batch(X_test_CNN, y_test_CNN)[1] accuracies[model_name].append(score) elif "NB" in model_name:
def main(): # Verify Pipeline for Training Data Works # data_pipeline = DataPipeline() print("Training Methods Test") print("---------------------") train_check = [] train_iterations = data_pipeline.get_iterations() # Get data in one epoch for iteration in range(train_iterations): features, labels = data_pipeline.get_training_batch(iteration) if len(features) == len(labels): train_check.append(True) print("Batch Dimensions Verification: %d" % (sum(train_check))) print("Training Iterations per Epoch: %d" % (train_iterations)) if sum(train_check) == train_iterations: print("Test 1 --- Passed") try: data_pipeline = DataPipeline(train_file_path="DNE") except OSError: print("Test 2 --- Passed\n") # Verify Pipeline for Testing Data Works # data_pipeline = DataPipeline() print("Testing Methods Test") print("--------------------") test_partitions = data_pipeline.get_partitions() test_check = [] # Get all testing data for partition in range(test_partitions): features, labels = data_pipeline.get_test_data(partition) if len(features) == len(labels): test_check.append(True) print("Testing Partitions: %d" % (test_partitions)) print("Partition Dimensions Verification: %d" % (sum(test_check))) if sum(test_check) == test_partitions: print("Test 1 --- Passed") try: data_pipeline = DataPipeline(test_file_path="NULL") for partition in range(test_partitions): features, labels = data_pipeline.get_test_data(partition) if len(features) == len(labels): test_check.append(True) except OSError: print("Test 2 --- Passed\n") # Verify Edge Cases Handled # print("Exception & Warning Edge Cases") print("------------------------------") print("Batch Size Tests") try: data_pipeline = DataPipeline(batch_size=32 + 0.5) except ValueError: print("Test 1 --- Passed") data_pipeline = DataPipeline(batch_size=0) print("Test 2 --- Passed") data_pipeline = DataPipeline(batch_size=10000) print("Test 3 --- Passed\n") print("Partition Count Tests") try: data_pipeline = DataPipeline(partitions=10 + 0.5) except ValueError: print("Test 4 --- Passed") data_pipeline = DataPipeline(partitions=0) print("Test 5 --- Passed") data_pipeline = DataPipeline(partitions=10000) print("Test 6 --- Passed\n")