Пример #1
0
def prob_5():
        arff = Arff('datasets/cars.arff')
        arff.shuffle()

        test = arff.create_subset_arff(slice(arff.instance_count//10))
        training = arff.create_subset_arff(slice(arff.instance_count//10,None))

        tf = test.get_features()
        tl = test.get_labels()

        splits = k_fold_cv(arff)

        arff = arff.create_subset_arff(slice(arff.instance_count//4,None))
        d = DecisionTreeLearner()
        d.train(arff.get_features(), arff.get_labels())

        a = d.tree

        arff = Arff('datasets/voting.arff')
        arff.shuffle()
        arff = arff.create_subset_arff(slice(arff.instance_count//4,None))
        d = DecisionTreeLearner()
        d.train(arff.get_features(), arff.get_labels())

        b = d.tree

        return a, b
Пример #2
0
def prob_3(weighted_d = False):
    test_arff = Arff("housing_testing_data.arff")
    train_arff = Arff("housing_training_data.arff")
    test_arff.shuffle()
    train_arff.shuffle()
    test_arff.normalize()
    train_arff.normalize()

    K = [1, 3, 5, 7, 9, 11, 13, 15]
    A = []
    for k_hat in K:
        test_data = np.hstack((test_arff.get_features().data, test_arff.get_labels().data))
        train_data = np.hstack((train_arff.get_features().data, train_arff.get_labels().data))
        KNNC = KNNClassifier(k_hat, train_data, test_data)
        A.append(KNNC.get_accuracy_regress(weighted_d))
    
    plt.plot(K, A, label="")
    t = "KNN Regression M.S.E Housing"
    if weighted_d:
        t += "(weighted-d)"
    weighted_d
    plt.title(t)
    plt.xlabel("K")
    plt.ylabel("M.S.E")
    # plt.legend()
    plt.show()
def main():
    arff = Arff(sys.argv[1])
    pl = PerceptronLearner()
    features = arff.get_features()
    labels = arff.get_labels()

    accuracy_matrix = np.zeros((5, 20))

    for i in range(5):

        pl.train(features, labels)

        a = pl.accuracy_tracker[:20]
        # pad to make 20 wide
        a = np.pad(a, (0, 20 - len(a)), 'constant', constant_values=a[-1])
        accuracy_matrix[i] = a

    # Average the accuracies of each step
    print(accuracy_matrix)
    avg_accuracy = np.sum(accuracy_matrix, axis=0) / 5
    print(avg_accuracy)

    plt.plot(1 - avg_accuracy)
    plt.xlabel("Epochs")
    plt.ylabel("Avg Misclassification Rate")
    plt.title("Avg Misclassification Rate Over Epochs")

    plt.show()
Пример #4
0
    def setUp(self):
        path = os.path.join(utils.get_root(), "test/datasets/cm1_req.arff")
        data = Arff(arff=path)

        self.features = data.get_features()
        self.labels = data.get_labels()
        self.learner = BaselineLearner()
Пример #5
0
def prob5():
    arff = Arff(sys.argv[2])
    imp_atts = [1, 3, 4, 5, 7, 9, 11, 12, 13]
    arff.shuffle()
    n = len(arff.get_labels().data)
    t = int(n * .55)
    v = n - int(n * .20)
    train_set = arff.create_subset_arff(row_idx=slice(0, t, 1),
                                        col_idx=imp_atts)
    test_set = arff.create_subset_arff(row_idx=slice(t, v, 1),
                                       col_idx=imp_atts)
    validation_set = arff.create_subset_arff(row_idx=slice(v, n, 1),
                                             col_idx=imp_atts)

    epochs = []
    momentums = np.linspace(0, 1.5, 20)
    # momentums = [.5, 1]

    for momentum in momentums:
        print(momentum)
        nn = NeuralNetwork(8, [30], 11, LR=.1, momentum=momentum)
        all_acc_va, all_mse_va, all_mse_te, all_mse_tr = nn.train_set(
            train_set, test_set, validation_set, w=5)
        epochs.append(len(all_acc_va))

    plt.plot(momentums, epochs)
    plt.title("Vowel Momentum vs Epoch Convergence")
    plt.xlabel("Momentum")
    plt.ylabel("Epochs til Conv.")
    plt.show()
Пример #6
0
def prob_0():
    arff = Arff('datasets/lenses.arff')
    d = DecisionTreeLearner()
    f = arff.get_features()
    l = arff.get_labels()
    d.train(f,l)
    print(d.tree)
Пример #7
0
def main():
    arff = Arff(sys.argv[1])
    features = arff.get_features()
    labels = arff.get_labels()

    pl = PerceptronLearner()
    pl.train(features, labels)

    visualize_training(features, labels, pl)
Пример #8
0
def prob_3():
    print('cars')
    arff = Arff('datasets/cars.arff')
    arff.shuffle()
    d = DecisionTreeLearner()
    d.train(arff.get_features(), arff.get_labels())

    a = d.tree

    print()
    print('voting')
    arff = Arff('datasets/voting.arff')
    arff.shuffle()
    d = DecisionTreeLearner()
    d.train(arff.get_features(), arff.get_labels())

    b = d.tree

    return a, b
Пример #9
0
def prob_2(weighted_d = False):
    """ """
    k = 3
    test_arff = Arff("magic_telescope_testing_data.arff")
    train_arff = Arff("magic_telescope_training_data.arff")
    test_arff.shuffle()
    train_arff.shuffle()

    # attributes = test_arff.get_attr_names()
    test_data = np.hstack((test_arff.get_features().data, test_arff.get_labels().data))
    train_data = np.hstack((train_arff.get_features().data, train_arff.get_labels().data))
    KNNC = KNNClassifier(k, train_data, test_data)
    acc = KNNC.get_accuracy(weighted_d)

    test_arff.normalize()
    train_arff.normalize()
    n_test_data = np.hstack((test_arff.get_features().data, test_arff.get_labels().data))
    n_train_data = np.hstack((train_arff.get_features().data, train_arff.get_labels().data))
    n_KNNC = KNNClassifier(k, n_test_data, n_train_data)
    acc_n = n_KNNC.get_accuracy(weighted_d)

    # print(np.array([[acc,acc_n]]))
    print(acc,acc_n)
    # show_table(["Not Normalized"  "Normailzed"], ["Accuracy"], np.array([[acc,acc_n]]), title = "Normalized vs Non-normalized, k=3")

    K = [1, 3, 5, 7, 9, 11, 13, 15]
    A = []
    for k_hat in K:
        # n_test_data = np.hstack((test_arff.get_features().data, test_arff.get_labels().data))
        # n_train_data = np.hstack((train_arff.get_features().data, train_arff.get_labels().data))
        n_KNNC = KNNClassifier(k_hat, n_train_data, n_test_data)
        A.append(n_KNNC.get_accuracy(weighted_d))

    plt.plot(K, A, label="")
    t = "KNN Accuracy Telesc. "
    if weighted_d:
        t += "(weighted-d)"
    plt.title(t)
    plt.xlabel("K")
    plt.ylabel("Accuracy")
    # plt.legend()
    plt.show()
Пример #10
0
def prob_6():
    """ """
    k = 3
    test_arff = Arff("magic_telescope_testing_data.arff")
    train_arff = Arff("magic_telescope_training_data.arff")
    test_arff.shuffle()
    train_arff.shuffle()
    test_arff.normalize()
    train_arff.normalize()

    K = [1, 3, 5]
    T = []
    A = []
    T_KSM = []
    A_KSM = []
    for k_hat in K:
        test_data = np.hstack((test_arff.get_features().data, test_arff.get_labels().data))
        train_data = np.hstack((train_arff.get_features().data, train_arff.get_labels().data))
        KNNC = KNNClassifier(k_hat, train_data, test_data)

        t = time.time()
        A.append(KNNC.get_accuracy())
        T.append(time.time() - t)
        KNNC.induce_KSM()

        t = time.time()
        A_KSM.append(KNNC.get_accuracy())
        T_KSM.append(time.time() - t)

    ax = plt.axes(projection='3d')
    ax.plot(K, A, T, label="No-KSM")
    ax.plot(K, A_KSM, T_KSM, label="KSM")

    ax.set_xlabel('K')
    ax.set_ylabel('Accuracy')
    ax.set_zlabel('Time')

    t = "KNN Accuracy w/ IKSM"
    plt.title(t)
    plt.legend()
    plt.show()
Пример #11
0
    def test_get_features(self):
        """ Tests construction of Arff from path, arff, numpy array
        """
        # Create a Matrix object from arff
        credit = Arff(arff=self.credit_data_path, label_count=1)
        credit.label_count=0
        np.testing.assert_equal(credit.data, credit.get_features().data)

        ## Test label inference
        credit.label_count = 5
        self.assertEqual(credit.get_labels().shape, (690, 5))

        ## Copy last 8 columns
        credit2 = Arff(credit, col_idx=slice(-8, None))
        self.assertEqual(credit2.label_count, 5)
        self.assertEqual((690,3), credit2.get_features().shape)

        ## Verify 0 labels
        credit.label_count = 0
        self.assertEqual((690, 16), credit.get_features().shape)
        self.assertEqual((690, 0), credit.get_labels().shape)
Пример #12
0
def prob_3():
    # Use regression knn on housing price prediction dataset
    train = Arff('datasets/housing_train.arff')
    test = Arff('datasets/housing_test.arff')
    train.normalize()
    test.normalize()

    krange = np.arange(1, 16, 2)
    mses = []
    for k in krange:
        knn = KNN(k)
        preds = knn.knn(train.get_features(), train.get_labels(),
                        test.get_features())
        mse = sum((preds - np.ravel(test.get_labels().data))**2) / len(preds)
        mses.append(mse)

    plt.plot(krange, mses)
    plt.title("K Size Versus MSE on Housing Prices")
    plt.xlabel("K")
    plt.ylabel("Mean Squared Error")
    plt.show()
Пример #13
0
def test_cases():
    # test_1()

    attr_types = [
        "real",
        "real",
        "real",
        "real",
        "cat",
        "real",
        "cat",
        "real",
        "real",
        "cat",
        "real",
        "cat",
        "cat",
        "cat",
        "cat",
        "cat",
        "cat"
    ]

    attr_idx = [
            [],
            [],
            [],
            [],
            ['none','tcf','tc'],
            [],
            ['none','ret_allw','empl_contr'],
            [],
            [],
            ['yes','no'],
            [],
            ['below_average','average','generous'],
            ['yes','no'],
            ['none','half','full'],
            ['yes','no'],
            ['none','half','full'],
            ['bad','good']
        ]

    k = 5
    arff = Arff("labor.arff")
    arff.normalize()
    features = arff.get_features().data
    labels = arff.get_labels().data
    # attributes = arff.get_attr_names()
    data = np.hstack((features, labels))[:, 1:]
    kmc = KMC(k, data, data, attr_types, attr_idx)
    kmc.train(tol=0)
Пример #14
0
def prob_4_telescope():
    # Repeat experiments for magic telescope and housing using weights (w = 1/dist**2)
    train = Arff('datasets/magic_telescope_train.arff')
    test = Arff('datasets/magic_telescope_test.arff')
    train.normalize()
    test.normalize()

    krange = np.arange(1, 16, 2)
    accs = []
    for k in krange:
        knn = KNN(k, weighting=True)
        predictions = knn.knn(train.get_features(), train.get_labels(),
                              test.get_features())
        acc = predictions == np.ravel(test.get_labels().data)
        print("k:", k, "accuracy:", sum(acc) / len(acc))
        accs.append(sum(acc) / len(acc))

    plt.plot(krange, accs)
    plt.title("K Size Versus Accuracy")
    plt.xlabel("K")
    plt.ylabel("Accuracy")
    plt.show()
Пример #15
0
def prob_4_housing():

    # Repeat experiments for magic telescope and housing using weights (w = 1/dist**2)
    train = Arff('datasets/housing_train.arff')
    test = Arff('datasets/housing_test.arff')
    train.normalize()
    test.normalize()

    krange = np.arange(1, 16, 2)
    mses = []
    for k in krange:
        knn = KNN(k, weighting=True)
        preds = knn.knn_regression(train.get_features(), train.get_labels(),
                                   test.get_features())
        mse = np.sum(
            (preds - np.ravel(test.get_labels().data))**2, axis=0) / len(preds)
        mses.append(mse)

    plt.plot(krange, mses)
    plt.title("K Size Versus MSE on Housing (Weighted)")
    plt.xlabel("K")
    plt.ylabel("Mean Squared Error")
    plt.show()
Пример #16
0
def prob3():
    """ """
    arff = Arff(sys.argv[2])
    imp_atts = [1, 3, 4, 5, 7, 9, 11, 12, 13]
    arff.shuffle()
    n = len(arff.get_labels().data)
    t = int(n * .55)
    v = n - int(n * .20)
    train_set = arff.create_subset_arff(row_idx=slice(0, t, 1),
                                        col_idx=imp_atts)
    test_set = arff.create_subset_arff(row_idx=slice(t, v, 1),
                                       col_idx=imp_atts)
    validation_set = arff.create_subset_arff(row_idx=slice(v, n, 1),
                                             col_idx=imp_atts)

    best_mse_te = []
    best_mse_tr = []
    best_mse_va = []
    epochs = []

    LRS = [.01, .1, .5, .8, 1.5]
    for LR in LRS:
        # print(LR)
        nn = NeuralNetwork(8, [16], 11, LR=LR, momentum=0)
        all_acc_va, all_mse_va, all_mse_te, all_mse_tr = nn.train_set(
            train_set, test_set, validation_set, w=5)
        best_mse_te.append(min(all_mse_te))
        best_mse_tr.append(min(all_mse_tr))
        best_mse_va.append(min(all_mse_va))
        epochs.append(len(all_mse_va))

    plt.plot(LRS, best_mse_te, label="MSE Te")
    plt.plot(LRS, best_mse_tr, label="MSE Tr")
    plt.plot(LRS, best_mse_va, label="MSE V.A")
    plt.title("Vowel MSE vs Learning Rate")
    plt.xlabel("Learning Rate")
    plt.ylabel("MSE")
    plt.legend()
    plt.show()

    plt.plot(LRS, epochs)
    plt.title("Vowel Epochs vs Learning Rate")
    plt.xlabel("Learning Rate")
    plt.ylabel("Epochs")
    plt.legend()
    plt.show()
Пример #17
0
def prob_5():
    cont_mask = [1, 2, 7, 10, 13, 14, 16]
    cate_mask = [0, 3, 4, 5, 6, 8, 9, 11, 12, 15]

    arff = Arff("credit_approval_data.arff")
    arff.shuffle()
    arff.normalize()

    n = len(arff.get_labels().data)
    t = int(n * .7)
    train_data = arff.create_subset_arff(row_idx=slice(0, t, 1))
    test_data = arff.create_subset_arff(row_idx=slice(t, n, 1))
    test_data = np.hstack((test_data.get_features().data, test_data.get_labels().data))
    train_data = np.hstack((train_data.get_features().data, train_data.get_labels().data))
    #b,30.83,0,u,g,w,v,1.25,t,t,01,f,g,00202,0,+
    dist_matrix = np.ones((16, 16))
    np.fill_diagonal(dist_matrix, 0)
    KNNC = KNNClassifier(8, train_data, test_data)
    print(KNNC.get_accuracy_mixed(cate_mask, cont_mask, dist_matrix))
Пример #18
0
def prob2():
    arff = Arff(sys.argv[1])
    arff.shuffle()
    n = len(arff.get_labels().data)
    t = int(n * .55)
    v = n - int(n * .20)
    train_set = arff.create_subset_arff(row_idx=slice(0, t, 1))
    test_set = arff.create_subset_arff(row_idx=slice(t, v, 1))
    validation_set = arff.create_subset_arff(row_idx=slice(v, n, 1))

    nn = NeuralNetwork(4, [9], 3, LR=.1)
    all_acc_va, all_mse_va, all_mse_te, all_mse_tr = nn.train_set(
        train_set, test_set, validation_set)

    d = [x for x in range(len(all_acc_va))]
    plt.plot(d, all_mse_te, label="test MSE")
    plt.plot(d, all_mse_va, label="Val. MSE")
    plt.plot(d, all_acc_va, label="Val. Accuracy")
    plt.title("Iris Dataset")
    plt.xlabel("Epochs")
    plt.ylabel("%")
    plt.legend()
    plt.show()
Пример #19
0
def prob4():
    arff = Arff(sys.argv[2])
    imp_atts = [1, 3, 4, 5, 7, 9, 11, 12, 13]
    arff.shuffle()
    n = len(arff.get_labels().data)
    t = int(n * .55)
    v = n - int(n * .20)
    train_set = arff.create_subset_arff(row_idx=slice(0, t, 1),
                                        col_idx=imp_atts)
    test_set = arff.create_subset_arff(row_idx=slice(t, v, 1),
                                       col_idx=imp_atts)
    validation_set = arff.create_subset_arff(row_idx=slice(v, n, 1),
                                             col_idx=imp_atts)

    best_mse_te = []
    best_mse_tr = []
    best_mse_va = []
    hidden_nodes = [1, 3, 6, 10, 13, 15, 16, 18, 20, 22, 25, 30, 40]

    for nodes in hidden_nodes:
        # print(nodes)
        nn = NeuralNetwork(8, [nodes], 11, LR=.1, momentum=0)
        all_acc_va, all_mse_va, all_mse_te, all_mse_tr = nn.train_set(
            train_set, test_set, validation_set, w=5)

        best_mse_te.append(min(all_mse_te))
        best_mse_tr.append(min(all_mse_tr))
        best_mse_va.append(min(all_mse_va))

    plt.plot(hidden_nodes, best_mse_te, label="MSE Te")
    plt.plot(hidden_nodes, best_mse_tr, label="MSE Tr")
    plt.plot(hidden_nodes, best_mse_va, label="MSE V.A")
    plt.title("Vowel MSE vs Hidden Nodes")
    plt.xlabel("Hidden Nodes")
    plt.ylabel("MSE")
    plt.legend()
    plt.show()
Пример #20
0
def prob_2():
    # try first without normalizing
    train = Arff('datasets/magic_telescope_train.arff')
    test = Arff('datasets/magic_telescope_test.arff')

    k = KNN(3)
    predictions = k.knn(train.get_features(), train.get_labels(),
                        test.get_features())

    acc = predictions == np.ravel(test.get_labels().data)

    print("Before normalization:", sum(acc) / len(acc))

    train.normalize()
    test.normalize()
    predictions = k.knn(train.get_features(), train.get_labels(),
                        test.get_features())

    acc = predictions == np.ravel(test.get_labels().data)

    print("After normalization:", sum(acc) / len(acc))

    print("PART TWO:")
    krange = np.arange(1, 16, 2)
    accs = []
    for k in krange:
        knn = KNN(k)
        predictions = knn.knn(train.get_features(), train.get_labels(),
                              test.get_features())
        acc = predictions == np.ravel(test.get_labels().data)
        print("k:", k, "accuracy:", sum(acc) / len(acc))
        accs.append(sum(acc) / len(acc))

    plt.plot(krange, accs)
    plt.title("K Size Versus Accuracy")
    plt.xlabel("K")
    plt.ylabel("Accuracy")
    plt.show()
Пример #21
0
from toolkit.perceptron_learner import PerceptronLearner
from toolkit.arff import Arff
import sys
import numpy as np


def rnd4(obj):
    if isinstance(obj, np.ndarray):
        return obj
    elif isinstance(obj, (int, float, complex)):
        return "{:.4f}".format(obj)


arff = Arff(sys.argv[1])
features = arff.get_features()
labels = arff.get_labels()

pl = PerceptronLearner()

weights = []

for i in range(10):
    pl.train(features, labels)
    weights.append(pl.weights)

avg_weights = np.sum(weights, axis=0) / 10
names = arff.get_attr_names()
for i in range(len(avg_weights)):
    print(rnd4(avg_weights[i]), names[i])