def test_nn_epochs(data, verbose=False): """ Tests both the simple classifier and multi-layer perceptron for overfitting by running each classifier with 1000 epochs while holding all other parameters constant """ params = {"epochs": [1000], "batch_size": [500]} # Build the models simple_classifier = build_simple_classifier(data["X_train"]) mlp = build_mlp(data["X_train"]) simple_classifier_results = test_nn( simple_classifier, data, 'simple_classifier', params, verbose=verbose, ) mlp_results = test_nn( mlp, data, 'mlp', params, verbose=verbose, ) # Visualize the results plot_history(simple_classifier_results, save_fig=True) plot_history(mlp_results, save_fig=True)
def test_nn_test_params(data, model_params=None, test_params=None, verbose=False): """ Tests both the simple classifier and multi-layer perceptron for various numbers of epochs and batch sizes. The learning rate, optimizer, l2-norm penalty, and dropout are held constant in this test set """ if not model_params: model_params = { "learning_rates": 0.001, "optimizers": "adam", "regularizer": None, } if not test_params: test_params = { "epochs": [1, 5, 10, 50, 100], "batch_size": [1, 5, 10, 50, 100] } # Build the models simple_classifier = build_simple_classifier( data["X_train"], learning_rate=model_params["learning_rates"], optimizer=model_params["optimizers"]) mlp = build_mlp(data["X_train"], learning_rate=model_params["learning_rates"], optimizer=model_params["optimizers"]) # Test the model simple_classifier_results = test_nn( simple_classifier, data, 'simple_classifier', test_params, learning_rate=model_params["learning_rates"], optimizer=model_params["optimizers"], verbose=verbose) mlp_results = test_nn(mlp, data, 'mlp', test_params, learning_rate=model_params["learning_rates"], optimizer=model_params["optimizers"], verbose=verbose) plot_nn_heatmap(simple_classifier_results, plot_type="test", save_fig=True) plot_nn_heatmap(mlp_results, plot_type="test", save_fig=True) return simple_classifier_results, mlp_results
def main(): # Set up GPU/CPU setup_gpu(gpu=False) # Get the input data data = get_data(data_choice="single_date") # Set up parameters epochs = [150] batch_size = [300] dropout = True regularizer = 0.0001 optimizer = "adam" learning_rate = 0.001 neighbours = [100] estimators = [100] # Test the simple classifier parameters simple_classifier = build_simple_classifier(X_train=data["X_train"], learning_rate=learning_rate, optimizer=optimizer, regularizer=regularizer, dropout=dropout) simple_classifier_results = test_nn( model=simple_classifier, data=data, model_name='simple_classifier', params={ "epochs": epochs, "batch_size": batch_size }, optimizer=optimizer, learning_rate=learning_rate, regularization=regularizer, dropout=dropout, verbose=False, ) # Test MLP mlp = build_mlp(X_train=data["X_train"], learning_rate=learning_rate, optimizer=optimizer, regularizer=regularizer, dropout=dropout) mlp_results = test_nn( model=mlp, data=data, model_name='mlp', params={ "epochs": epochs, "batch_size": batch_size }, optimizer=optimizer, learning_rate=learning_rate, regularization=regularizer, dropout=dropout, verbose=False, ) # Test SVM svm_classifier = build_svm() svm_results = model_predict( model_name='svm', model=svm_classifier, data=data, ) # Test KNN knn_results = test_knn(data=data, neighbours=neighbours, verbose=False) # Test RF rf_results = test_rf(data=data, estimators=estimators, verbose=False) # Compare Classifiers all_results = [ simple_classifier_results, mlp_results, [svm_results], knn_results, rf_results ] # Print accuracy for each classifier for result in all_results: print("{} ACCURACY: {}".format( result[0]["name"].upper().replace("_", " "), result[0]["accuracy"])) # Plot results for each classifier as a bar graph plot_all_results(all_results, save_fig=True)
def test_by_date(params=None, verbose=False): """ Determines the accuracy for each classifier as a function of the date """ # Get the input data data_list = get_data(data_choice="by_date") # Set up parameters if not params: params = { "nn_params": { "epochs": [150], "batch_size": [300] }, "neighbours": [5, 10], "estimators": [10, 50, 100] } # Set up empty variables best_results = { "simple_classifier": [], "mlp": [], "svm": [], "knn": [], "rf": [] } dates = [] for data in data_list: # If all the training data for this date is the same, then don't use this date if len(np.unique(data["y_train"])) == 1: continue # Test the simple classifier parameters simple_classifier = build_simple_classifier(data["X_train"]) simple_classifier_results = test_nn( simple_classifier, data, 'simple_classifier', params["nn_params"], verbose=verbose, ) # Test different learning rates and optimizers, # Test MLP mlp = build_mlp(data["X_train"]) mlp_results = test_nn(mlp, data, 'mlp', params["nn_params"], verbose=verbose) # Test SVM svm_classifier = build_svm() svm_results = model_predict( 'svm', svm_classifier, data, ) # Test KNN knn_results = test_knn(data, params["neighbours"], verbose=verbose) # Test RF rf_results = test_rf(data, params["estimators"], verbose=verbose) # Visualize results all_results = [ simple_classifier_results, mlp_results, [svm_results], knn_results, rf_results ] # Sort all results for result_list in all_results: # Sort according to accuracy sorted_list = sorted(result_list, key=itemgetter("accuracy"), reverse=True) result = sorted_list[0] best_results[result["name"]].append(result["accuracy"] * 100) dates.append(data["date"]) plot_accuracy_by_date_subplot(dates, best_results, save_fig=True) return best_results
def test_nn_reg(data, model_params=None, test_params=None, verbose=False): """ Tests both the simple classifier and multi-layer perceptron for different values for l2-norm penalty and including dropout or not. The learning rate, optimizer, number of epochs, and batch size are held constant in this test set """ if not model_params: model_params = { "learning_rates": 0.01, "optimizers": "adam", "regularizer": [0.1, 0.01, 0.001, 0.0001, 0.00001, 0.000001], "dropout": [True, False] } if not test_params: test_params = {"epochs": [100], "batch_size": [500]} all_simple_results = [] all_mlp_results = [] for regularizer in model_params["regularizer"]: for dropout in model_params["dropout"]: # Build the models simple_classifier = build_simple_classifier( data["X_train"], learning_rate=model_params["learning_rates"], optimizer=model_params["optimizers"], regularizer=regularizer, dropout=dropout, ) mlp = build_mlp(data["X_train"], learning_rate=model_params["learning_rates"], optimizer=model_params["optimizers"], regularizer=regularizer, dropout=dropout) # Test the model if verbose: print("SIMPLE CLASSIFIER - REGULARIZER {}, DROPOUT {}".format( regularizer, dropout)) simple_classifier_results = test_nn(simple_classifier, data, 'simple_classifier', test_params, model_params["optimizers"], model_params["learning_rates"], regularizer, dropout, verbose=verbose) if verbose: print("MLP - REGULARIZER {}, DROPOUT {}".format( regularizer, dropout)) mlp_results = test_nn(mlp, data, 'mlp', test_params, model_params["optimizers"], model_params["learning_rates"], regularizer, dropout, verbose=verbose) all_simple_results = all_simple_results + simple_classifier_results all_mlp_results = all_mlp_results + mlp_results plot_nn_heatmap(all_simple_results, plot_type="regularization", save_fig=True) plot_nn_heatmap(all_mlp_results, plot_type="regularization", save_fig=True) return all_simple_results, all_mlp_results
def test_nn_optimizer(data, model_params=None, test_params=None, verbose=False): """ Tests both the simple classifier and multi-layer perceptron for different optimizers and learning rates. The number of epochs, batch size, and regularizer are help constant in this test set """ if not model_params: model_params = { "learning_rates": [0.1, 0.01, 0.001, 0.0001, 0.00001, 0.000001], "optimizers": [ "adam", "sgd", "rmsprop", "adagrad", "adadelta", "adamax", "nadam" ], "regularizer": None, } if not test_params: test_params = {"epochs": [100], "batch_size": [500]} all_simple_results = [] all_mlp_results = [] for learning_rate in model_params["learning_rates"]: for optimizer in model_params["optimizers"]: # Build the models simple_classifier = build_simple_classifier( data["X_train"], learning_rate=learning_rate, optimizer=optimizer) mlp = build_mlp(data["X_train"], learning_rate=learning_rate, optimizer=optimizer) # Test the model if verbose: print("SIMPLE CLASSIFIER - LEARNING_RATE {}, OPTIMIZER {}". format(learning_rate, optimizer)) simple_classifier_results = test_nn(simple_classifier, data, 'simple_classifier', test_params, optimizer, learning_rate, verbose=verbose) if verbose: print("MLP - LEARNING_RATE {}, OPTIMIZER {}".format( learning_rate, optimizer)) mlp_results = test_nn(mlp, data, 'mlp', test_params, optimizer, learning_rate, verbose=verbose) all_simple_results = all_simple_results + simple_classifier_results all_mlp_results = all_mlp_results + mlp_results plot_nn_heatmap(all_simple_results, plot_type="optimizer", save_fig=True) plot_nn_heatmap(all_mlp_results, plot_type="optimizer", save_fig=True) return all_simple_results, all_mlp_results