def prob_5(): arff = Arff('datasets/cars.arff') arff.shuffle() test = arff.create_subset_arff(slice(arff.instance_count//10)) training = arff.create_subset_arff(slice(arff.instance_count//10,None)) tf = test.get_features() tl = test.get_labels() splits = k_fold_cv(arff) arff = arff.create_subset_arff(slice(arff.instance_count//4,None)) d = DecisionTreeLearner() d.train(arff.get_features(), arff.get_labels()) a = d.tree arff = Arff('datasets/voting.arff') arff.shuffle() arff = arff.create_subset_arff(slice(arff.instance_count//4,None)) d = DecisionTreeLearner() d.train(arff.get_features(), arff.get_labels()) b = d.tree return a, b
def prob_3(weighted_d = False): test_arff = Arff("housing_testing_data.arff") train_arff = Arff("housing_training_data.arff") test_arff.shuffle() train_arff.shuffle() test_arff.normalize() train_arff.normalize() K = [1, 3, 5, 7, 9, 11, 13, 15] A = [] for k_hat in K: test_data = np.hstack((test_arff.get_features().data, test_arff.get_labels().data)) train_data = np.hstack((train_arff.get_features().data, train_arff.get_labels().data)) KNNC = KNNClassifier(k_hat, train_data, test_data) A.append(KNNC.get_accuracy_regress(weighted_d)) plt.plot(K, A, label="") t = "KNN Regression M.S.E Housing" if weighted_d: t += "(weighted-d)" weighted_d plt.title(t) plt.xlabel("K") plt.ylabel("M.S.E") # plt.legend() plt.show()
def main(): arff = Arff(sys.argv[1]) pl = PerceptronLearner() features = arff.get_features() labels = arff.get_labels() accuracy_matrix = np.zeros((5, 20)) for i in range(5): pl.train(features, labels) a = pl.accuracy_tracker[:20] # pad to make 20 wide a = np.pad(a, (0, 20 - len(a)), 'constant', constant_values=a[-1]) accuracy_matrix[i] = a # Average the accuracies of each step print(accuracy_matrix) avg_accuracy = np.sum(accuracy_matrix, axis=0) / 5 print(avg_accuracy) plt.plot(1 - avg_accuracy) plt.xlabel("Epochs") plt.ylabel("Avg Misclassification Rate") plt.title("Avg Misclassification Rate Over Epochs") plt.show()
def setUp(self): path = os.path.join(utils.get_root(), "test/datasets/cm1_req.arff") data = Arff(arff=path) self.features = data.get_features() self.labels = data.get_labels() self.learner = BaselineLearner()
def prob5(): arff = Arff(sys.argv[2]) imp_atts = [1, 3, 4, 5, 7, 9, 11, 12, 13] arff.shuffle() n = len(arff.get_labels().data) t = int(n * .55) v = n - int(n * .20) train_set = arff.create_subset_arff(row_idx=slice(0, t, 1), col_idx=imp_atts) test_set = arff.create_subset_arff(row_idx=slice(t, v, 1), col_idx=imp_atts) validation_set = arff.create_subset_arff(row_idx=slice(v, n, 1), col_idx=imp_atts) epochs = [] momentums = np.linspace(0, 1.5, 20) # momentums = [.5, 1] for momentum in momentums: print(momentum) nn = NeuralNetwork(8, [30], 11, LR=.1, momentum=momentum) all_acc_va, all_mse_va, all_mse_te, all_mse_tr = nn.train_set( train_set, test_set, validation_set, w=5) epochs.append(len(all_acc_va)) plt.plot(momentums, epochs) plt.title("Vowel Momentum vs Epoch Convergence") plt.xlabel("Momentum") plt.ylabel("Epochs til Conv.") plt.show()
def prob_0(): arff = Arff('datasets/lenses.arff') d = DecisionTreeLearner() f = arff.get_features() l = arff.get_labels() d.train(f,l) print(d.tree)
def main(): arff = Arff(sys.argv[1]) features = arff.get_features() labels = arff.get_labels() pl = PerceptronLearner() pl.train(features, labels) visualize_training(features, labels, pl)
def prob_3(): print('cars') arff = Arff('datasets/cars.arff') arff.shuffle() d = DecisionTreeLearner() d.train(arff.get_features(), arff.get_labels()) a = d.tree print() print('voting') arff = Arff('datasets/voting.arff') arff.shuffle() d = DecisionTreeLearner() d.train(arff.get_features(), arff.get_labels()) b = d.tree return a, b
def prob_2(weighted_d = False): """ """ k = 3 test_arff = Arff("magic_telescope_testing_data.arff") train_arff = Arff("magic_telescope_training_data.arff") test_arff.shuffle() train_arff.shuffle() # attributes = test_arff.get_attr_names() test_data = np.hstack((test_arff.get_features().data, test_arff.get_labels().data)) train_data = np.hstack((train_arff.get_features().data, train_arff.get_labels().data)) KNNC = KNNClassifier(k, train_data, test_data) acc = KNNC.get_accuracy(weighted_d) test_arff.normalize() train_arff.normalize() n_test_data = np.hstack((test_arff.get_features().data, test_arff.get_labels().data)) n_train_data = np.hstack((train_arff.get_features().data, train_arff.get_labels().data)) n_KNNC = KNNClassifier(k, n_test_data, n_train_data) acc_n = n_KNNC.get_accuracy(weighted_d) # print(np.array([[acc,acc_n]])) print(acc,acc_n) # show_table(["Not Normalized" "Normailzed"], ["Accuracy"], np.array([[acc,acc_n]]), title = "Normalized vs Non-normalized, k=3") K = [1, 3, 5, 7, 9, 11, 13, 15] A = [] for k_hat in K: # n_test_data = np.hstack((test_arff.get_features().data, test_arff.get_labels().data)) # n_train_data = np.hstack((train_arff.get_features().data, train_arff.get_labels().data)) n_KNNC = KNNClassifier(k_hat, n_train_data, n_test_data) A.append(n_KNNC.get_accuracy(weighted_d)) plt.plot(K, A, label="") t = "KNN Accuracy Telesc. " if weighted_d: t += "(weighted-d)" plt.title(t) plt.xlabel("K") plt.ylabel("Accuracy") # plt.legend() plt.show()
def prob_6(): """ """ k = 3 test_arff = Arff("magic_telescope_testing_data.arff") train_arff = Arff("magic_telescope_training_data.arff") test_arff.shuffle() train_arff.shuffle() test_arff.normalize() train_arff.normalize() K = [1, 3, 5] T = [] A = [] T_KSM = [] A_KSM = [] for k_hat in K: test_data = np.hstack((test_arff.get_features().data, test_arff.get_labels().data)) train_data = np.hstack((train_arff.get_features().data, train_arff.get_labels().data)) KNNC = KNNClassifier(k_hat, train_data, test_data) t = time.time() A.append(KNNC.get_accuracy()) T.append(time.time() - t) KNNC.induce_KSM() t = time.time() A_KSM.append(KNNC.get_accuracy()) T_KSM.append(time.time() - t) ax = plt.axes(projection='3d') ax.plot(K, A, T, label="No-KSM") ax.plot(K, A_KSM, T_KSM, label="KSM") ax.set_xlabel('K') ax.set_ylabel('Accuracy') ax.set_zlabel('Time') t = "KNN Accuracy w/ IKSM" plt.title(t) plt.legend() plt.show()
def test_get_features(self): """ Tests construction of Arff from path, arff, numpy array """ # Create a Matrix object from arff credit = Arff(arff=self.credit_data_path, label_count=1) credit.label_count=0 np.testing.assert_equal(credit.data, credit.get_features().data) ## Test label inference credit.label_count = 5 self.assertEqual(credit.get_labels().shape, (690, 5)) ## Copy last 8 columns credit2 = Arff(credit, col_idx=slice(-8, None)) self.assertEqual(credit2.label_count, 5) self.assertEqual((690,3), credit2.get_features().shape) ## Verify 0 labels credit.label_count = 0 self.assertEqual((690, 16), credit.get_features().shape) self.assertEqual((690, 0), credit.get_labels().shape)
def prob_3(): # Use regression knn on housing price prediction dataset train = Arff('datasets/housing_train.arff') test = Arff('datasets/housing_test.arff') train.normalize() test.normalize() krange = np.arange(1, 16, 2) mses = [] for k in krange: knn = KNN(k) preds = knn.knn(train.get_features(), train.get_labels(), test.get_features()) mse = sum((preds - np.ravel(test.get_labels().data))**2) / len(preds) mses.append(mse) plt.plot(krange, mses) plt.title("K Size Versus MSE on Housing Prices") plt.xlabel("K") plt.ylabel("Mean Squared Error") plt.show()
def test_cases(): # test_1() attr_types = [ "real", "real", "real", "real", "cat", "real", "cat", "real", "real", "cat", "real", "cat", "cat", "cat", "cat", "cat", "cat" ] attr_idx = [ [], [], [], [], ['none','tcf','tc'], [], ['none','ret_allw','empl_contr'], [], [], ['yes','no'], [], ['below_average','average','generous'], ['yes','no'], ['none','half','full'], ['yes','no'], ['none','half','full'], ['bad','good'] ] k = 5 arff = Arff("labor.arff") arff.normalize() features = arff.get_features().data labels = arff.get_labels().data # attributes = arff.get_attr_names() data = np.hstack((features, labels))[:, 1:] kmc = KMC(k, data, data, attr_types, attr_idx) kmc.train(tol=0)
def prob_4_telescope(): # Repeat experiments for magic telescope and housing using weights (w = 1/dist**2) train = Arff('datasets/magic_telescope_train.arff') test = Arff('datasets/magic_telescope_test.arff') train.normalize() test.normalize() krange = np.arange(1, 16, 2) accs = [] for k in krange: knn = KNN(k, weighting=True) predictions = knn.knn(train.get_features(), train.get_labels(), test.get_features()) acc = predictions == np.ravel(test.get_labels().data) print("k:", k, "accuracy:", sum(acc) / len(acc)) accs.append(sum(acc) / len(acc)) plt.plot(krange, accs) plt.title("K Size Versus Accuracy") plt.xlabel("K") plt.ylabel("Accuracy") plt.show()
def prob_4_housing(): # Repeat experiments for magic telescope and housing using weights (w = 1/dist**2) train = Arff('datasets/housing_train.arff') test = Arff('datasets/housing_test.arff') train.normalize() test.normalize() krange = np.arange(1, 16, 2) mses = [] for k in krange: knn = KNN(k, weighting=True) preds = knn.knn_regression(train.get_features(), train.get_labels(), test.get_features()) mse = np.sum( (preds - np.ravel(test.get_labels().data))**2, axis=0) / len(preds) mses.append(mse) plt.plot(krange, mses) plt.title("K Size Versus MSE on Housing (Weighted)") plt.xlabel("K") plt.ylabel("Mean Squared Error") plt.show()
def prob3(): """ """ arff = Arff(sys.argv[2]) imp_atts = [1, 3, 4, 5, 7, 9, 11, 12, 13] arff.shuffle() n = len(arff.get_labels().data) t = int(n * .55) v = n - int(n * .20) train_set = arff.create_subset_arff(row_idx=slice(0, t, 1), col_idx=imp_atts) test_set = arff.create_subset_arff(row_idx=slice(t, v, 1), col_idx=imp_atts) validation_set = arff.create_subset_arff(row_idx=slice(v, n, 1), col_idx=imp_atts) best_mse_te = [] best_mse_tr = [] best_mse_va = [] epochs = [] LRS = [.01, .1, .5, .8, 1.5] for LR in LRS: # print(LR) nn = NeuralNetwork(8, [16], 11, LR=LR, momentum=0) all_acc_va, all_mse_va, all_mse_te, all_mse_tr = nn.train_set( train_set, test_set, validation_set, w=5) best_mse_te.append(min(all_mse_te)) best_mse_tr.append(min(all_mse_tr)) best_mse_va.append(min(all_mse_va)) epochs.append(len(all_mse_va)) plt.plot(LRS, best_mse_te, label="MSE Te") plt.plot(LRS, best_mse_tr, label="MSE Tr") plt.plot(LRS, best_mse_va, label="MSE V.A") plt.title("Vowel MSE vs Learning Rate") plt.xlabel("Learning Rate") plt.ylabel("MSE") plt.legend() plt.show() plt.plot(LRS, epochs) plt.title("Vowel Epochs vs Learning Rate") plt.xlabel("Learning Rate") plt.ylabel("Epochs") plt.legend() plt.show()
def prob_5(): cont_mask = [1, 2, 7, 10, 13, 14, 16] cate_mask = [0, 3, 4, 5, 6, 8, 9, 11, 12, 15] arff = Arff("credit_approval_data.arff") arff.shuffle() arff.normalize() n = len(arff.get_labels().data) t = int(n * .7) train_data = arff.create_subset_arff(row_idx=slice(0, t, 1)) test_data = arff.create_subset_arff(row_idx=slice(t, n, 1)) test_data = np.hstack((test_data.get_features().data, test_data.get_labels().data)) train_data = np.hstack((train_data.get_features().data, train_data.get_labels().data)) #b,30.83,0,u,g,w,v,1.25,t,t,01,f,g,00202,0,+ dist_matrix = np.ones((16, 16)) np.fill_diagonal(dist_matrix, 0) KNNC = KNNClassifier(8, train_data, test_data) print(KNNC.get_accuracy_mixed(cate_mask, cont_mask, dist_matrix))
def prob2(): arff = Arff(sys.argv[1]) arff.shuffle() n = len(arff.get_labels().data) t = int(n * .55) v = n - int(n * .20) train_set = arff.create_subset_arff(row_idx=slice(0, t, 1)) test_set = arff.create_subset_arff(row_idx=slice(t, v, 1)) validation_set = arff.create_subset_arff(row_idx=slice(v, n, 1)) nn = NeuralNetwork(4, [9], 3, LR=.1) all_acc_va, all_mse_va, all_mse_te, all_mse_tr = nn.train_set( train_set, test_set, validation_set) d = [x for x in range(len(all_acc_va))] plt.plot(d, all_mse_te, label="test MSE") plt.plot(d, all_mse_va, label="Val. MSE") plt.plot(d, all_acc_va, label="Val. Accuracy") plt.title("Iris Dataset") plt.xlabel("Epochs") plt.ylabel("%") plt.legend() plt.show()
def prob4(): arff = Arff(sys.argv[2]) imp_atts = [1, 3, 4, 5, 7, 9, 11, 12, 13] arff.shuffle() n = len(arff.get_labels().data) t = int(n * .55) v = n - int(n * .20) train_set = arff.create_subset_arff(row_idx=slice(0, t, 1), col_idx=imp_atts) test_set = arff.create_subset_arff(row_idx=slice(t, v, 1), col_idx=imp_atts) validation_set = arff.create_subset_arff(row_idx=slice(v, n, 1), col_idx=imp_atts) best_mse_te = [] best_mse_tr = [] best_mse_va = [] hidden_nodes = [1, 3, 6, 10, 13, 15, 16, 18, 20, 22, 25, 30, 40] for nodes in hidden_nodes: # print(nodes) nn = NeuralNetwork(8, [nodes], 11, LR=.1, momentum=0) all_acc_va, all_mse_va, all_mse_te, all_mse_tr = nn.train_set( train_set, test_set, validation_set, w=5) best_mse_te.append(min(all_mse_te)) best_mse_tr.append(min(all_mse_tr)) best_mse_va.append(min(all_mse_va)) plt.plot(hidden_nodes, best_mse_te, label="MSE Te") plt.plot(hidden_nodes, best_mse_tr, label="MSE Tr") plt.plot(hidden_nodes, best_mse_va, label="MSE V.A") plt.title("Vowel MSE vs Hidden Nodes") plt.xlabel("Hidden Nodes") plt.ylabel("MSE") plt.legend() plt.show()
def prob_2(): # try first without normalizing train = Arff('datasets/magic_telescope_train.arff') test = Arff('datasets/magic_telescope_test.arff') k = KNN(3) predictions = k.knn(train.get_features(), train.get_labels(), test.get_features()) acc = predictions == np.ravel(test.get_labels().data) print("Before normalization:", sum(acc) / len(acc)) train.normalize() test.normalize() predictions = k.knn(train.get_features(), train.get_labels(), test.get_features()) acc = predictions == np.ravel(test.get_labels().data) print("After normalization:", sum(acc) / len(acc)) print("PART TWO:") krange = np.arange(1, 16, 2) accs = [] for k in krange: knn = KNN(k) predictions = knn.knn(train.get_features(), train.get_labels(), test.get_features()) acc = predictions == np.ravel(test.get_labels().data) print("k:", k, "accuracy:", sum(acc) / len(acc)) accs.append(sum(acc) / len(acc)) plt.plot(krange, accs) plt.title("K Size Versus Accuracy") plt.xlabel("K") plt.ylabel("Accuracy") plt.show()
from toolkit.perceptron_learner import PerceptronLearner from toolkit.arff import Arff import sys import numpy as np def rnd4(obj): if isinstance(obj, np.ndarray): return obj elif isinstance(obj, (int, float, complex)): return "{:.4f}".format(obj) arff = Arff(sys.argv[1]) features = arff.get_features() labels = arff.get_labels() pl = PerceptronLearner() weights = [] for i in range(10): pl.train(features, labels) weights.append(pl.weights) avg_weights = np.sum(weights, axis=0) / 10 names = arff.get_attr_names() for i in range(len(avg_weights)): print(rnd4(avg_weights[i]), names[i])