def prob_5():
        arff = Arff('datasets/cars.arff')
        arff.shuffle()

        test = arff.create_subset_arff(slice(arff.instance_count//10))
        training = arff.create_subset_arff(slice(arff.instance_count//10,None))

        tf = test.get_features()
        tl = test.get_labels()

        splits = k_fold_cv(arff)

        arff = arff.create_subset_arff(slice(arff.instance_count//4,None))
        d = DecisionTreeLearner()
        d.train(arff.get_features(), arff.get_labels())

        a = d.tree

        arff = Arff('datasets/voting.arff')
        arff.shuffle()
        arff = arff.create_subset_arff(slice(arff.instance_count//4,None))
        d = DecisionTreeLearner()
        d.train(arff.get_features(), arff.get_labels())

        b = d.tree

        return a, b
示例#2
0
def prob_3(weighted_d = False):
    test_arff = Arff("housing_testing_data.arff")
    train_arff = Arff("housing_training_data.arff")
    test_arff.shuffle()
    train_arff.shuffle()
    test_arff.normalize()
    train_arff.normalize()

    K = [1, 3, 5, 7, 9, 11, 13, 15]
    A = []
    for k_hat in K:
        test_data = np.hstack((test_arff.get_features().data, test_arff.get_labels().data))
        train_data = np.hstack((train_arff.get_features().data, train_arff.get_labels().data))
        KNNC = KNNClassifier(k_hat, train_data, test_data)
        A.append(KNNC.get_accuracy_regress(weighted_d))
    
    plt.plot(K, A, label="")
    t = "KNN Regression M.S.E Housing"
    if weighted_d:
        t += "(weighted-d)"
    weighted_d
    plt.title(t)
    plt.xlabel("K")
    plt.ylabel("M.S.E")
    # plt.legend()
    plt.show()
def main():
    arff = Arff(sys.argv[1])
    pl = PerceptronLearner()
    features = arff.get_features()
    labels = arff.get_labels()

    accuracy_matrix = np.zeros((5, 20))

    for i in range(5):

        pl.train(features, labels)

        a = pl.accuracy_tracker[:20]
        # pad to make 20 wide
        a = np.pad(a, (0, 20 - len(a)), 'constant', constant_values=a[-1])
        accuracy_matrix[i] = a

    # Average the accuracies of each step
    print(accuracy_matrix)
    avg_accuracy = np.sum(accuracy_matrix, axis=0) / 5
    print(avg_accuracy)

    plt.plot(1 - avg_accuracy)
    plt.xlabel("Epochs")
    plt.ylabel("Avg Misclassification Rate")
    plt.title("Avg Misclassification Rate Over Epochs")

    plt.show()
示例#4
0
    def setUp(self):
        path = os.path.join(utils.get_root(), "test/datasets/cm1_req.arff")
        data = Arff(arff=path)

        self.features = data.get_features()
        self.labels = data.get_labels()
        self.learner = BaselineLearner()
示例#5
0
def prob0haccomplete():
    arff = Arff('datasets/labor.arff', label_count=1)
    # Trim the id column
    arff = arff.create_subset_arff(col_idx=slice(1, None))
    arff = arff.get_features()
    hac = HAC(simple=False)
    hac.train(arff, verbose=True, printk=[5])
示例#6
0
def prob0():
    arff = Arff('datasets/labor.arff', label_count=1)
    # Trim the id column
    arff = arff.create_subset_arff(col_idx=slice(1, None))
    arff = arff.get_features()
    km = KMeans(5)
    km.train(arff, verbose=True, centers=arff.data[:5])
def prob_0():
    arff = Arff('datasets/lenses.arff')
    d = DecisionTreeLearner()
    f = arff.get_features()
    l = arff.get_labels()
    d.train(f,l)
    print(d.tree)
示例#8
0
def main():
    arff = Arff(sys.argv[1])
    features = arff.get_features()
    labels = arff.get_labels()

    pl = PerceptronLearner()
    pl.train(features, labels)

    visualize_training(features, labels, pl)
示例#9
0
def setup():
    arff = Arff('datasets/labor.arff', label_count=1)
    # Trim the id column
    arff = arff.create_subset_arff(col_idx=slice(1, None))
    arff = arff.get_features()
    hac = HAC()
    hac.nominal_indicies = np.where(np.array(arff.attr_types) == 'nominal')[0]
    print('33,44', hac.get_distance(arff.data[33], arff.data[44]))
    print('25,34', hac.get_distance(arff.data[25], arff.data[34]))
示例#10
0
def prob_3():
    print('cars')
    arff = Arff('datasets/cars.arff')
    arff.shuffle()
    d = DecisionTreeLearner()
    d.train(arff.get_features(), arff.get_labels())

    a = d.tree

    print()
    print('voting')
    arff = Arff('datasets/voting.arff')
    arff.shuffle()
    d = DecisionTreeLearner()
    d.train(arff.get_features(), arff.get_labels())

    b = d.tree

    return a, b
示例#11
0
def prob_2(weighted_d = False):
    """ """
    k = 3
    test_arff = Arff("magic_telescope_testing_data.arff")
    train_arff = Arff("magic_telescope_training_data.arff")
    test_arff.shuffle()
    train_arff.shuffle()

    # attributes = test_arff.get_attr_names()
    test_data = np.hstack((test_arff.get_features().data, test_arff.get_labels().data))
    train_data = np.hstack((train_arff.get_features().data, train_arff.get_labels().data))
    KNNC = KNNClassifier(k, train_data, test_data)
    acc = KNNC.get_accuracy(weighted_d)

    test_arff.normalize()
    train_arff.normalize()
    n_test_data = np.hstack((test_arff.get_features().data, test_arff.get_labels().data))
    n_train_data = np.hstack((train_arff.get_features().data, train_arff.get_labels().data))
    n_KNNC = KNNClassifier(k, n_test_data, n_train_data)
    acc_n = n_KNNC.get_accuracy(weighted_d)

    # print(np.array([[acc,acc_n]]))
    print(acc,acc_n)
    # show_table(["Not Normalized"  "Normailzed"], ["Accuracy"], np.array([[acc,acc_n]]), title = "Normalized vs Non-normalized, k=3")

    K = [1, 3, 5, 7, 9, 11, 13, 15]
    A = []
    for k_hat in K:
        # n_test_data = np.hstack((test_arff.get_features().data, test_arff.get_labels().data))
        # n_train_data = np.hstack((train_arff.get_features().data, train_arff.get_labels().data))
        n_KNNC = KNNClassifier(k_hat, n_train_data, n_test_data)
        A.append(n_KNNC.get_accuracy(weighted_d))

    plt.plot(K, A, label="")
    t = "KNN Accuracy Telesc. "
    if weighted_d:
        t += "(weighted-d)"
    plt.title(t)
    plt.xlabel("K")
    plt.ylabel("Accuracy")
    # plt.legend()
    plt.show()
示例#12
0
def prob_6():
    """ """
    k = 3
    test_arff = Arff("magic_telescope_testing_data.arff")
    train_arff = Arff("magic_telescope_training_data.arff")
    test_arff.shuffle()
    train_arff.shuffle()
    test_arff.normalize()
    train_arff.normalize()

    K = [1, 3, 5]
    T = []
    A = []
    T_KSM = []
    A_KSM = []
    for k_hat in K:
        test_data = np.hstack((test_arff.get_features().data, test_arff.get_labels().data))
        train_data = np.hstack((train_arff.get_features().data, train_arff.get_labels().data))
        KNNC = KNNClassifier(k_hat, train_data, test_data)

        t = time.time()
        A.append(KNNC.get_accuracy())
        T.append(time.time() - t)
        KNNC.induce_KSM()

        t = time.time()
        A_KSM.append(KNNC.get_accuracy())
        T_KSM.append(time.time() - t)

    ax = plt.axes(projection='3d')
    ax.plot(K, A, T, label="No-KSM")
    ax.plot(K, A_KSM, T_KSM, label="KSM")

    ax.set_xlabel('K')
    ax.set_ylabel('Accuracy')
    ax.set_zlabel('Time')

    t = "KNN Accuracy w/ IKSM"
    plt.title(t)
    plt.legend()
    plt.show()
示例#13
0
    def test_get_features(self):
        """ Tests construction of Arff from path, arff, numpy array
        """
        # Create a Matrix object from arff
        credit = Arff(arff=self.credit_data_path, label_count=1)
        credit.label_count=0
        np.testing.assert_equal(credit.data, credit.get_features().data)

        ## Test label inference
        credit.label_count = 5
        self.assertEqual(credit.get_labels().shape, (690, 5))

        ## Copy last 8 columns
        credit2 = Arff(credit, col_idx=slice(-8, None))
        self.assertEqual(credit2.label_count, 5)
        self.assertEqual((690,3), credit2.get_features().shape)

        ## Verify 0 labels
        credit.label_count = 0
        self.assertEqual((690, 16), credit.get_features().shape)
        self.assertEqual((690, 0), credit.get_labels().shape)
示例#14
0
def prob2():
    iris = Arff('datasets/iris.arff')
    features = iris.get_features()
    # features.normalize()
    # Train k means for 2-7
    ks = [2, 3, 4, 5, 6, 7]
    for k in ks:
        km = KMeans(k)
        km.train(features)

    hac2 = HAC(simple=False)
    hac2.train(features, printk=ks)
示例#15
0
def prob_3():
    # Use regression knn on housing price prediction dataset
    train = Arff('datasets/housing_train.arff')
    test = Arff('datasets/housing_test.arff')
    train.normalize()
    test.normalize()

    krange = np.arange(1, 16, 2)
    mses = []
    for k in krange:
        knn = KNN(k)
        preds = knn.knn(train.get_features(), train.get_labels(),
                        test.get_features())
        mse = sum((preds - np.ravel(test.get_labels().data))**2) / len(preds)
        mses.append(mse)

    plt.plot(krange, mses)
    plt.title("K Size Versus MSE on Housing Prices")
    plt.xlabel("K")
    plt.ylabel("Mean Squared Error")
    plt.show()
示例#16
0
def test_cases():
    # test_1()

    attr_types = [
        "real",
        "real",
        "real",
        "real",
        "cat",
        "real",
        "cat",
        "real",
        "real",
        "cat",
        "real",
        "cat",
        "cat",
        "cat",
        "cat",
        "cat",
        "cat"
    ]

    attr_idx = [
            [],
            [],
            [],
            [],
            ['none','tcf','tc'],
            [],
            ['none','ret_allw','empl_contr'],
            [],
            [],
            ['yes','no'],
            [],
            ['below_average','average','generous'],
            ['yes','no'],
            ['none','half','full'],
            ['yes','no'],
            ['none','half','full'],
            ['bad','good']
        ]

    k = 5
    arff = Arff("labor.arff")
    arff.normalize()
    features = arff.get_features().data
    labels = arff.get_labels().data
    # attributes = arff.get_attr_names()
    data = np.hstack((features, labels))[:, 1:]
    kmc = KMC(k, data, data, attr_types, attr_idx)
    kmc.train(tol=0)
示例#17
0
def prob_4_telescope():
    # Repeat experiments for magic telescope and housing using weights (w = 1/dist**2)
    train = Arff('datasets/magic_telescope_train.arff')
    test = Arff('datasets/magic_telescope_test.arff')
    train.normalize()
    test.normalize()

    krange = np.arange(1, 16, 2)
    accs = []
    for k in krange:
        knn = KNN(k, weighting=True)
        predictions = knn.knn(train.get_features(), train.get_labels(),
                              test.get_features())
        acc = predictions == np.ravel(test.get_labels().data)
        print("k:", k, "accuracy:", sum(acc) / len(acc))
        accs.append(sum(acc) / len(acc))

    plt.plot(krange, accs)
    plt.title("K Size Versus Accuracy")
    plt.xlabel("K")
    plt.ylabel("Accuracy")
    plt.show()
示例#18
0
def prob_4_housing():

    # Repeat experiments for magic telescope and housing using weights (w = 1/dist**2)
    train = Arff('datasets/housing_train.arff')
    test = Arff('datasets/housing_test.arff')
    train.normalize()
    test.normalize()

    krange = np.arange(1, 16, 2)
    mses = []
    for k in krange:
        knn = KNN(k, weighting=True)
        preds = knn.knn_regression(train.get_features(), train.get_labels(),
                                   test.get_features())
        mse = np.sum(
            (preds - np.ravel(test.get_labels().data))**2, axis=0) / len(preds)
        mses.append(mse)

    plt.plot(krange, mses)
    plt.title("K Size Versus MSE on Housing (Weighted)")
    plt.xlabel("K")
    plt.ylabel("Mean Squared Error")
    plt.show()
示例#19
0
def prob_2():
    # try first without normalizing
    train = Arff('datasets/magic_telescope_train.arff')
    test = Arff('datasets/magic_telescope_test.arff')

    k = KNN(3)
    predictions = k.knn(train.get_features(), train.get_labels(),
                        test.get_features())

    acc = predictions == np.ravel(test.get_labels().data)

    print("Before normalization:", sum(acc) / len(acc))

    train.normalize()
    test.normalize()
    predictions = k.knn(train.get_features(), train.get_labels(),
                        test.get_features())

    acc = predictions == np.ravel(test.get_labels().data)

    print("After normalization:", sum(acc) / len(acc))

    print("PART TWO:")
    krange = np.arange(1, 16, 2)
    accs = []
    for k in krange:
        knn = KNN(k)
        predictions = knn.knn(train.get_features(), train.get_labels(),
                              test.get_features())
        acc = predictions == np.ravel(test.get_labels().data)
        print("k:", k, "accuracy:", sum(acc) / len(acc))
        accs.append(sum(acc) / len(acc))

    plt.plot(krange, accs)
    plt.title("K Size Versus Accuracy")
    plt.xlabel("K")
    plt.ylabel("Accuracy")
    plt.show()
示例#20
0
from toolkit.perceptron_learner import PerceptronLearner
from toolkit.arff import Arff
import sys
import numpy as np


def rnd4(obj):
    if isinstance(obj, np.ndarray):
        return obj
    elif isinstance(obj, (int, float, complex)):
        return "{:.4f}".format(obj)


arff = Arff(sys.argv[1])
features = arff.get_features()
labels = arff.get_labels()

pl = PerceptronLearner()

weights = []

for i in range(10):
    pl.train(features, labels)
    weights.append(pl.weights)

avg_weights = np.sum(weights, axis=0) / 10
names = arff.get_attr_names()
for i in range(len(avg_weights)):
    print(rnd4(avg_weights[i]), names[i])