def main(P, mate, mutate): """Run this experiment""" training_ints = initialize_instances('m_trg.csv') testing_ints = initialize_instances('m_test.csv') validation_ints = initialize_instances('m_val.csv') factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() data_set = DataSet(training_ints) relu = RELU() rule = RPROPUpdateRule() oa_name = "GA_{}_{}_{}".format(P, mate, mutate) with open(OUTFILE.replace('XXX', oa_name), 'w') as f: f.write('{},{},{},{},{},{},{},{}\n'.format('iteration', 'MSE_trg', 'MSE_val', 'MSE_tst', 'acc_trg', 'acc_val', 'acc_tst', 'elapsed')) classification_network = factory.createClassificationNetwork([ INPUT_LAYER, HIDDEN_LAYER1, HIDDEN_LAYER2, HIDDEN_LAYER3, OUTPUT_LAYER ], relu) nnop = NeuralNetworkOptimizationProblem(data_set, classification_network, measure) oa = StandardGeneticAlgorithm(P, mate, mutate, nnop) train(oa, classification_network, oa_name, training_ints, validation_ints, testing_ints, measure)
def main(CE): """Run this experiment""" training_ints = initialize_instances(TRAIN_DATA_FILE) testing_ints = initialize_instances(TEST_DATA_FILE) validation_ints = initialize_instances(VALIDATE_DATA_FILE) factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() data_set = DataSet(training_ints) relu = RELU() # 50 and 0.000001 are the defaults from RPROPUpdateRule.java rule = RPROPUpdateRule(0.064, 50, 0.000001) oa_name = "SA_{}".format(CE) with open(OUTFILE.format(oa_name), 'w') as f: f.write('{},{},{},{},{},{},{},{},{},{},{}\n'.format( 'iteration', 'MSE_trg', 'MSE_val', 'MSE_tst', 'acc_trg', 'acc_val', 'acc_tst', 'f1_trg', 'f1_val', 'f1_tst', 'elapsed')) classification_network = factory.createClassificationNetwork([ INPUT_LAYER, HIDDEN_LAYER1, HIDDEN_LAYER2, HIDDEN_LAYER3, OUTPUT_LAYER ], relu) nnop = NeuralNetworkOptimizationProblem(data_set, classification_network, measure) oa = SimulatedAnnealing(1E10, CE, nnop) train(oa, classification_network, oa_name, training_ints, validation_ints, testing_ints, measure, TRAINING_ITERATIONS, OUTFILE.format(oa_name))
def main(ds_name, P, mate, mutate): """Run this experiment""" nn_config, train_file, val_file, test_file = get_problemset(ds_name) training_ints = initialize_instances(train_file) testing_ints = initialize_instances(test_file) validation_ints = initialize_instances(val_file) factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() data_set = DataSet(training_ints) relu = RELU() # 50 and 0.000001 are the defaults from RPROPUpdateRule.java rule = RPROPUpdateRule(0.064, 50, 0.000001) oa_name = "GA_{}_{}_{}_{}".format(ds_name, P, mate, mutate) with open(OUTFILE.format(oa_name), 'w') as f: f.write('{},{},{},{},{},{},{},{},{},{},{}\n'.format( 'iteration', 'MSE_trg', 'MSE_val', 'MSE_tst', 'acc_trg', 'acc_val', 'acc_tst', 'f1_trg', 'f1_val', 'f1_tst', 'elapsed')) classification_network = factory.createClassificationNetwork( nn_config, relu) nnop = NeuralNetworkOptimizationProblem(data_set, classification_network, measure) oa = StandardGeneticAlgorithm(P, mate, mutate, nnop) train(oa, classification_network, oa_name, training_ints, validation_ints, testing_ints, measure, TRAINING_ITERATIONS, OUTFILE.format(oa_name))
def run_all(): dataSource = 'wine' INPUT_LAYER = 13 HIDDEN_LAYER = 100 OUTPUT_LAYER = 1 # dataSource = 'wage' # INPUT_LAYER = 106 # HIDDEN_LAYER = 1000 # OUTPUT_LAYER = 1 train_data = initialize_instances('data/balanced_' + dataSource + '_cleaned_train.csv') test_data = initialize_instances('data/balanced_' + dataSource + '_cleaned_test.csv') factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() data_set = DataSet(train_data) update_rule = RPROPUpdateRule() alg = 'backprop' classification_network = factory.createClassificationNetwork( [INPUT_LAYER, HIDDEN_LAYER, OUTPUT_LAYER], RELU()) oa = BatchBackPropagationTrainer(data_set, classification_network, measure, update_rule) fit = oa run(alg, oa, fit, classification_network, measure, train_data, test_data, dataSource) alg = 'RHC' classification_network = factory.createClassificationNetwork( [INPUT_LAYER, HIDDEN_LAYER, OUTPUT_LAYER], RELU()) nnop = NeuralNetworkOptimizationProblem(data_set, classification_network, measure) oa = RandomizedHillClimbing(nnop) iters = 1 fit = FixedIterationTrainer(oa, iters) run(alg, oa, fit, classification_network, measure, train_data, test_data, dataSource) alg = 'SA' classification_network = factory.createClassificationNetwork( [INPUT_LAYER, HIDDEN_LAYER, OUTPUT_LAYER], RELU()) nnop = NeuralNetworkOptimizationProblem(data_set, classification_network, measure) startTemp = 1E10 coolingFactor = .8 oa = SimulatedAnnealing(startTemp, coolingFactor, nnop) iters = 1 fit = FixedIterationTrainer(oa, iters) run(alg, oa, fit, classification_network, measure, train_data, test_data, dataSource) alg = 'GA' classification_network = factory.createClassificationNetwork( [INPUT_LAYER, HIDDEN_LAYER, OUTPUT_LAYER], RELU()) nnop = NeuralNetworkOptimizationProblem(data_set, classification_network, measure) population = 200 mates = 50 mutations = 10 oa = StandardGeneticAlgorithm(population, mates, mutations, nnop) iters = 1 fit = FixedIterationTrainer(oa, iters) run(alg, oa, fit, classification_network, measure, train_data, test_data, dataSource)
def main(): train_instances = initialize_instances('wine_train.csv') validate_instances = initialize_instances('wine_validate.csv') test_instances = initialize_instances('wine_test.csv') factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() data_set = DataSet(train_instances) iteration_list = [10, 100, 500, 1000, 2500] cooling_list = [0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95] networks = [] # BackPropagationNetwork nnop = [] # NeuralNetworkOptimizationProblem oa = [] # OptimizationAlgorithm oa_names = ["SA"] results = "" error = 0 low_quality_correct = 0 low_quality_incorrect = 0 high_quality_correct = 0 high_quality_incorrect = 0 predicted_array = [] actual_array = [] for name in oa_names: classification_network = factory.createClassificationNetwork( [11, 22, 1], RELU()) networks.append(classification_network) nnop.append( NeuralNetworkOptimizationProblem(data_set, classification_network, measure)) with open("Results/NN/SA_Train.csv", 'w') as f: f.write( 'iterations,cooling,fitness,accuracy,train_time,test_time,mse,low_correct,low_incorrect,high_correct,high_incorrect\n' ) with open("Results/NN/SA_Validate.csv", 'w') as f: f.write('iterations,cooling,fitness,accuracy,train_time,test_time\n') with open("Results/NN/SA_Test.csv", 'w') as f: f.write( 'iterations,cooling,fitness,accuracy,train_time,test_time,mse,low_correct,low_incorrect,high_correct,high_incorrect\n' ) for p in range(len(cooling_list)): for i in range(len(iteration_list)): cooling = cooling_list[p] iteration = iteration_list[i] start = time.time() correct = 0 incorrect = 0 sim = SimulatedAnnealing(1E11, cooling, nnop[0]) train(sim, networks[0], oa_names[0], train_instances, measure, iteration) end = time.time() training_time = end - start optimal_instance = sim.getOptimal() networks[0].setWeights(optimal_instance.getData()) start = time.time() for instance in train_instances: networks[0].setInputValues(instance.getData()) networks[0].run() actual = instance.getLabel().getContinuous() predicted = networks[0].getOutputValues().get(0) predicted = max(min(predicted, 1), 0) predicted_array.append(round(predicted)) actual_array.append(max(min(actual, 1), 0)) if abs(predicted - actual) < 0.5: correct += 1 if actual == 0: low_quality_correct += 1 else: high_quality_correct += 1 else: incorrect += 1 if actual == 0: low_quality_incorrect += 1 else: high_quality_incorrect += 1 result = instance.getLabel() network_vals = networks[0].getOutputValues() example = Instance(network_vals, Instance(network_vals.get(0))) error += measure.value(result, example) end = time.time() testing_time = end - start training_mse = error / len(train_instances) print("Low quality correct: " + str(low_quality_correct)) print("Low quality incorrect: " + str(low_quality_incorrect)) print("High quality correct: " + str(high_quality_correct)) print("High quality incorrect: " + str(high_quality_incorrect)) print("Training MSE: " + str(training_mse)) results += "\nResults for Training %s: \nCorrectly classified %d instances." % ( 'SA', correct) results += "\nIncorrectly classified Training %d instances.\nPercent correctly classified: %0.03f%%" % ( incorrect, float(correct) / (correct + incorrect) * 100.0) results += "\nTraining time: %0.03f seconds" % (training_time, ) results += "\nTesting time: %0.03f seconds\n" % (testing_time, ) data = '{},{},{},{},{},{},{},{},{},{},{}\n'.format( iteration, cooling, correct, float(correct) / (correct + incorrect) * 100.0, training_time, testing_time, training_mse, low_quality_correct, low_quality_incorrect, high_quality_correct, high_quality_incorrect) print(data) with open("Results/NN/SA_Train.csv", 'a') as f: f.write(data) correct = 0 incorrect = 0 for instance in validate_instances: networks[0].setInputValues(instance.getData()) networks[0].run() actual = instance.getLabel().getContinuous() predicted = networks[0].getOutputValues().get(0) predicted = max(min(predicted, 1), 0) if abs(predicted - actual) < 0.5: correct += 1 else: incorrect += 1 results += "\nResults for Cross Validation %s: \nCorrectly classified %d instances." % ( 'SA', correct) results += "\nIncorrectly classified Cross Validation %d instances.\nPercent correctly classified: %0.03f%%" % ( incorrect, float(correct) / (correct + incorrect) * 100.0) results += "\nTraining time: %0.03f seconds" % (training_time, ) results += "\nTesting time: %0.03f seconds\n" % (testing_time, ) data = '{},{},{},{},{},{}\n'.format( iteration, cooling, correct, float(correct) / (correct + incorrect) * 100.0, training_time, testing_time) print(data) with open("Results/NN/SA_Validate.csv", 'a') as f: f.write(data) correct = 0 incorrect = 0 error = 0 low_quality_correct = 0 low_quality_incorrect = 0 high_quality_correct = 0 high_quality_incorrect = 0 predicted_array = [] actual_array = [] for instance in test_instances: networks[0].setInputValues(instance.getData()) networks[0].run() actual = instance.getLabel().getContinuous() predicted = networks[0].getOutputValues().get(0) predicted = max(min(predicted, 1), 0) predicted_array.append(round(predicted)) actual_array.append(max(min(actual, 1), 0)) if abs(predicted - actual) < 0.5: correct += 1 if actual == 0: low_quality_correct += 1 else: high_quality_correct += 1 else: incorrect += 1 if actual == 0: low_quality_incorrect += 1 else: high_quality_incorrect += 1 result = instance.getLabel() network_vals = networks[0].getOutputValues() example = Instance(network_vals, Instance(network_vals.get(0))) error += measure.value(result, example) testing_mse = error / len(test_instances) print("Low quality correct: " + str(low_quality_correct)) print("Low quality incorrect: " + str(low_quality_incorrect)) print("High quality correct: " + str(high_quality_correct)) print("High quality incorrect: " + str(high_quality_incorrect)) print("Testing MSE: " + str(testing_mse)) results += "\nResults for Testing %s: \nCorrectly classified %d instances." % ( "SA", correct) results += "\nIncorrectly classified Testing %d instances.\nPercent correctly classified: %0.03f%%" % ( incorrect, float(correct) / (correct + incorrect) * 100.0) results += "\nTraining time: %0.03f seconds" % (training_time, ) results += "\nTesting time: %0.03f seconds\n" % (testing_time, ) data = '{},{},{},{},{},{},{},{},{},{},{}\n'.format( iteration, cooling, correct, float(correct) / (correct + incorrect) * 100.0, training_time, testing_time, testing_mse, low_quality_correct, low_quality_incorrect, high_quality_correct, high_quality_incorrect) print(data) with open("Results/NN/SA_Test.csv", 'a') as f: f.write(data) print results
def main(): train_instances = initialize_instances('wine_train.csv') validate_instances = initialize_instances('wine_validate.csv') test_instances = initialize_instances('wine_test.csv') factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() data_set = DataSet(train_instances) iteration_list = [10, 100, 500, 1000, 2500, 5000] with open("Results/NN/RHC_Train.csv", 'w') as f: f.write('iterations,fitness,accuracy,train_time,test_time\n') with open("Results/NN/RHC_Validate.csv", 'w') as f: f.write('iterations,fitness,accuracy,train_time,test_time\n') with open("Results/NN/RHC_Test.csv", 'w') as f: f.write('iterations,fitness,accuracy,train_time,test_time\n') networks = [] # BackPropagationNetwork nnop = [] # NeuralNetworkOptimizationProblem oa = [] # OptimizationAlgorithm oa_names = ["RHC"] results = "" for name in oa_names: classification_network = factory.createClassificationNetwork([11, 22, 1], RELU()) networks.append(classification_network) nnop.append(NeuralNetworkOptimizationProblem(data_set, classification_network, measure)) oa.append(RandomizedHillClimbing(nnop[0])) for i in range(len(iteration_list)): iteration = iteration_list[i] start = time.time() correct = 0 incorrect = 0 train(oa[0], networks[0], oa_names[0], train_instances, measure,iteration) end = time.time() training_time = end - start optimal_instance = oa[0].getOptimal() networks[0].setWeights(optimal_instance.getData()) start = time.time() for instance in train_instances: networks[0].setInputValues(instance.getData()) networks[0].run() predicted = instance.getLabel().getContinuous() actual = networks[0].getOutputValues().get(0) if abs(predicted - actual) < 0.5: correct += 1 else: incorrect += 1 end = time.time() testing_time = end - start results += "\nResults for Training %s: \nCorrectly classified %d instances." % ('RHC', correct) results += "\nIncorrectly classified Training %d instances.\nPercent correctly classified: %0.03f%%" % (incorrect, float(correct)/(correct+incorrect)*100.0) results += "\nTraining time: %0.03f seconds" % (training_time,) results += "\nTesting time: %0.03f seconds\n" % (testing_time,) data = '{},{},{},{},{}\n'.format(iteration, correct, float(correct)/(correct+incorrect)*100.0, training_time,testing_time) print(data) with open("Results/NN/RHC_Train.csv", 'a') as f: f.write(data) correct = 0 incorrect = 0 for instance in validate_instances: networks[0].setInputValues(instance.getData()) networks[0].run() predicted = instance.getLabel().getContinuous() actual = networks[0].getOutputValues().get(0) if abs(predicted - actual) < 0.5: correct += 1 else: incorrect += 1 results += "\nResults for Cross Validation %s: \nCorrectly classified %d instances." % ('RHC', correct) results += "\nIncorrectly classified Cross Validation %d instances.\nPercent correctly classified: %0.03f%%" % ( incorrect, float(correct) / (correct + incorrect) * 100.0) results += "\nTraining time: %0.03f seconds" % (training_time,) results += "\nTesting time: %0.03f seconds\n" % (testing_time,) data = '{},{},{},{},{}\n'.format(iteration, correct, float(correct) / (correct + incorrect) * 100.0, training_time, testing_time) print(data) with open("Results/NN/RHC_Validate.csv", 'a') as f: f.write(data) correct = 0 incorrect = 0 for instance in test_instances: networks[0].setInputValues(instance.getData()) networks[0].run() predicted = instance.getLabel().getContinuous() actual = networks[0].getOutputValues().get(0) if abs(predicted - actual) < 0.5: correct += 1 else: incorrect += 1 results += "\nResults for Testing %s: \nCorrectly classified %d instances." % ("RHC", correct) results += "\nIncorrectly classified Testing %d instances.\nPercent correctly classified: %0.03f%%" % ( incorrect, float(correct) / (correct + incorrect) * 100.0) results += "\nTraining time: %0.03f seconds" % (training_time,) results += "\nTesting time: %0.03f seconds\n" % (testing_time,) data = '{},{},{},{},{}\n'.format(iteration, correct, float(correct) / (correct + incorrect) * 100.0, training_time, testing_time) print(data) with open("Results/NN/RHC_Test.csv", 'a') as f: f.write(data) print results
def main(): """Run algorithms on the abalone dataset.""" instances = initialize_instances() factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() data_set = DataSet(instances) networks = [] # BackPropagationNetwork nnop = [] # NeuralNetworkOptimizationProblem oa = [] # OptimizationAlgorithm oa_names = ["RHC", "SA", "GA"] results = "" for name in oa_names: classification_network = factory.createClassificationNetwork( [INPUT_LAYER, HIDDEN_LAYER, OUTPUT_LAYER], RELU()) networks.append(classification_network) nnop.append( NeuralNetworkOptimizationProblem(data_set, classification_network, measure)) oa.append(RandomizedHillClimbing(nnop[0])) oa.append(SimulatedAnnealing(1E11, .95, nnop[1])) oa.append(StandardGeneticAlgorithm(200, 100, 10, nnop[2])) for i, name in enumerate(oa_names): start = time.time() correct = 0 incorrect = 0 train(oa[i], networks[i], oa_names[i], instances, measure) end = time.time() training_time = end - start optimal_instance = oa[i].getOptimal() networks[i].setWeights(optimal_instance.getData()) start = time.time() for instance in instances: networks[i].setInputValues(instance.getData()) networks[i].run() predicted = instance.getLabel().getContinuous() actual = networks[i].getOutputValues().get(0) if abs(predicted - actual) < 0.5: correct += 1 else: incorrect += 1 end = time.time() testing_time = end - start results += "\nResults for %s: \nCorrectly classified %d instances." % ( name, correct) results += "\nIncorrectly classified %d instances.\nPercent correctly classified: %0.03f%%" % ( incorrect, float(correct) / (correct + incorrect) * 100.0) results += "\nTraining time: %0.03f seconds" % (training_time, ) results += "\nTesting time: %0.03f seconds\n" % (testing_time, ) print results
def main(): """ Run algorithms on the gamma dataset. Essentially ran twice for 2-fold cross validation Metrics are evaluated outside of this file """ train_data = initialize_instances(TRAIN_FILE) test_data = initialize_instances(TEST_FILE) # Get data factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() data_set = DataSet(train_data) networks = [] # BackPropagationNetwork nnop = [] # NeuralNetworkOptimizationProblem oa = [] # OptimizationAlgorithm oa_names = ["RHC", "SA", "GA"] results = "" # Create each network architecture and an optimization instance for name in oa_names: activation = RELU() # Change network size classification_network = factory.createClassificationNetwork([INPUT_LAYER, HIDDEN_LAYER1, HIDDEN_LAYER2, OUTPUT_LAYER], activation) networks.append(classification_network) nnop.append(NeuralNetworkOptimizationProblem(data_set, classification_network, measure)) # Randomized Optimzation Algos oa.append(RandomizedHillClimbing(nnop[0])) oa.append(SimulatedAnnealing(1E11, .95, nnop[1])) oa.append(StandardGeneticAlgorithm(200, 100, 10, nnop[2])) # Go through each optimization problem and do 2-fold CV for i, name in enumerate(oa_names): start = time.time() metrics = train(oa[i], networks[i], oa_names[i], train_data, test_data, measure) end = time.time() training_time = end - start results += "\nFold 1 train time: %0.03f seconds" % (training_time,) # Write data to CSV file with open("metrics/" + oa_names[i] + '_f1.csv', 'w') as f: writer = csv.writer(f) for metric in metrics: writer.writerow(metric) print results # 2nd fold; train_data = initialize_instances(TEST_FILE) test_data = initialize_instances(TRAIN_FILE) # Get data factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() data_set = DataSet(train_data) networks = [] # BackPropagationNetwork nnop = [] # NeuralNetworkOptimizationProblem oa = [] # OptimizationAlgorithm oa_names = ["RHC", "SA", "GA"] results = "" # Create each network architecture and an optimization instance for name in oa_names: activation = RELU() # Change network size classification_network = factory.createClassificationNetwork([INPUT_LAYER, HIDDEN_LAYER1, HIDDEN_LAYER2, OUTPUT_LAYER], activation) networks.append(classification_network) nnop.append(NeuralNetworkOptimizationProblem(data_set, classification_network, measure)) # Randomized Optimzation Algos oa.append(RandomizedHillClimbing(nnop[0])) oa.append(SimulatedAnnealing(1E11, .95, nnop[1])) oa.append(StandardGeneticAlgorithm(200, 100, 10, nnop[2])) # Go through each optimization problem and do 2-fold CV for i, name in enumerate(oa_names): start = time.time() metrics = train(oa[i], networks[i], oa_names[i], train_data, test_data, measure) end = time.time() training_time = end - start results += "\nFold 1 train time: %0.03f seconds" % (training_time,) # Write data to CSV file with open("metrics/" + oa_names[i] + '_f2.csv', 'w') as f: writer = csv.writer(f) for metric in metrics: writer.writerow(metric) print results