Пример #1
0
def prob_3(weighted_d = False):
    test_arff = Arff("housing_testing_data.arff")
    train_arff = Arff("housing_training_data.arff")
    test_arff.shuffle()
    train_arff.shuffle()
    test_arff.normalize()
    train_arff.normalize()

    K = [1, 3, 5, 7, 9, 11, 13, 15]
    A = []
    for k_hat in K:
        test_data = np.hstack((test_arff.get_features().data, test_arff.get_labels().data))
        train_data = np.hstack((train_arff.get_features().data, train_arff.get_labels().data))
        KNNC = KNNClassifier(k_hat, train_data, test_data)
        A.append(KNNC.get_accuracy_regress(weighted_d))
    
    plt.plot(K, A, label="")
    t = "KNN Regression M.S.E Housing"
    if weighted_d:
        t += "(weighted-d)"
    weighted_d
    plt.title(t)
    plt.xlabel("K")
    plt.ylabel("M.S.E")
    # plt.legend()
    plt.show()
Пример #2
0
def prob4h():
    arff = Arff('datasets/abalone.arff', label_count=0)
    arff.normalize()
    domain = np.arange(2, 8)
    print('single link --------------------')
    hoc = HAC()
    hoc.train(arff, printk=domain, silhouette=True)
    print('complete link -----------------------')
    hoc = HAC(simple=False)
    hoc.train(arff, printk=domain, silhouette=True)
Пример #3
0
def prob4():
    arff = Arff('datasets/abalone.arff', label_count=0)
    arff.normalize()
    domain = np.arange(2, 8)

    ssekmm = []
    for k in domain:
        km = KMeans(k)
        ssek = km.train(arff)
        ssekmm.append(ssek)
        print(km.calc_silhouette_score())
Пример #4
0
def test_cases():
    # test_1()

    attr_types = [
        "real",
        "real",
        "real",
        "real",
        "cat",
        "real",
        "cat",
        "real",
        "real",
        "cat",
        "real",
        "cat",
        "cat",
        "cat",
        "cat",
        "cat",
        "cat"
    ]

    attr_idx = [
            [],
            [],
            [],
            [],
            ['none','tcf','tc'],
            [],
            ['none','ret_allw','empl_contr'],
            [],
            [],
            ['yes','no'],
            [],
            ['below_average','average','generous'],
            ['yes','no'],
            ['none','half','full'],
            ['yes','no'],
            ['none','half','full'],
            ['bad','good']
        ]

    k = 5
    arff = Arff("labor.arff")
    arff.normalize()
    features = arff.get_features().data
    labels = arff.get_labels().data
    # attributes = arff.get_attr_names()
    data = np.hstack((features, labels))[:, 1:]
    kmc = KMC(k, data, data, attr_types, attr_idx)
    kmc.train(tol=0)
Пример #5
0
def prob_5():
    cont_mask = [1, 2, 7, 10, 13, 14, 16]
    cate_mask = [0, 3, 4, 5, 6, 8, 9, 11, 12, 15]

    arff = Arff("credit_approval_data.arff")
    arff.shuffle()
    arff.normalize()

    n = len(arff.get_labels().data)
    t = int(n * .7)
    train_data = arff.create_subset_arff(row_idx=slice(0, t, 1))
    test_data = arff.create_subset_arff(row_idx=slice(t, n, 1))
    test_data = np.hstack((test_data.get_features().data, test_data.get_labels().data))
    train_data = np.hstack((train_data.get_features().data, train_data.get_labels().data))
    #b,30.83,0,u,g,w,v,1.25,t,t,01,f,g,00202,0,+
    dist_matrix = np.ones((16, 16))
    np.fill_diagonal(dist_matrix, 0)
    KNNC = KNNClassifier(8, train_data, test_data)
    print(KNNC.get_accuracy_mixed(cate_mask, cont_mask, dist_matrix))
Пример #6
0
def prob_2(weighted_d = False):
    """ """
    k = 3
    test_arff = Arff("magic_telescope_testing_data.arff")
    train_arff = Arff("magic_telescope_training_data.arff")
    test_arff.shuffle()
    train_arff.shuffle()

    # attributes = test_arff.get_attr_names()
    test_data = np.hstack((test_arff.get_features().data, test_arff.get_labels().data))
    train_data = np.hstack((train_arff.get_features().data, train_arff.get_labels().data))
    KNNC = KNNClassifier(k, train_data, test_data)
    acc = KNNC.get_accuracy(weighted_d)

    test_arff.normalize()
    train_arff.normalize()
    n_test_data = np.hstack((test_arff.get_features().data, test_arff.get_labels().data))
    n_train_data = np.hstack((train_arff.get_features().data, train_arff.get_labels().data))
    n_KNNC = KNNClassifier(k, n_test_data, n_train_data)
    acc_n = n_KNNC.get_accuracy(weighted_d)

    # print(np.array([[acc,acc_n]]))
    print(acc,acc_n)
    # show_table(["Not Normalized"  "Normailzed"], ["Accuracy"], np.array([[acc,acc_n]]), title = "Normalized vs Non-normalized, k=3")

    K = [1, 3, 5, 7, 9, 11, 13, 15]
    A = []
    for k_hat in K:
        # n_test_data = np.hstack((test_arff.get_features().data, test_arff.get_labels().data))
        # n_train_data = np.hstack((train_arff.get_features().data, train_arff.get_labels().data))
        n_KNNC = KNNClassifier(k_hat, n_train_data, n_test_data)
        A.append(n_KNNC.get_accuracy(weighted_d))

    plt.plot(K, A, label="")
    t = "KNN Accuracy Telesc. "
    if weighted_d:
        t += "(weighted-d)"
    plt.title(t)
    plt.xlabel("K")
    plt.ylabel("Accuracy")
    # plt.legend()
    plt.show()
Пример #7
0
def prob_6():
    """ """
    k = 3
    test_arff = Arff("magic_telescope_testing_data.arff")
    train_arff = Arff("magic_telescope_training_data.arff")
    test_arff.shuffle()
    train_arff.shuffle()
    test_arff.normalize()
    train_arff.normalize()

    K = [1, 3, 5]
    T = []
    A = []
    T_KSM = []
    A_KSM = []
    for k_hat in K:
        test_data = np.hstack((test_arff.get_features().data, test_arff.get_labels().data))
        train_data = np.hstack((train_arff.get_features().data, train_arff.get_labels().data))
        KNNC = KNNClassifier(k_hat, train_data, test_data)

        t = time.time()
        A.append(KNNC.get_accuracy())
        T.append(time.time() - t)
        KNNC.induce_KSM()

        t = time.time()
        A_KSM.append(KNNC.get_accuracy())
        T_KSM.append(time.time() - t)

    ax = plt.axes(projection='3d')
    ax.plot(K, A, T, label="No-KSM")
    ax.plot(K, A_KSM, T_KSM, label="KSM")

    ax.set_xlabel('K')
    ax.set_ylabel('Accuracy')
    ax.set_zlabel('Time')

    t = "KNN Accuracy w/ IKSM"
    plt.title(t)
    plt.legend()
    plt.show()
Пример #8
0
def prob_3():
    # Use regression knn on housing price prediction dataset
    train = Arff('datasets/housing_train.arff')
    test = Arff('datasets/housing_test.arff')
    train.normalize()
    test.normalize()

    krange = np.arange(1, 16, 2)
    mses = []
    for k in krange:
        knn = KNN(k)
        preds = knn.knn(train.get_features(), train.get_labels(),
                        test.get_features())
        mse = sum((preds - np.ravel(test.get_labels().data))**2) / len(preds)
        mses.append(mse)

    plt.plot(krange, mses)
    plt.title("K Size Versus MSE on Housing Prices")
    plt.xlabel("K")
    plt.ylabel("Mean Squared Error")
    plt.show()
Пример #9
0
def prob_4_telescope():
    # Repeat experiments for magic telescope and housing using weights (w = 1/dist**2)
    train = Arff('datasets/magic_telescope_train.arff')
    test = Arff('datasets/magic_telescope_test.arff')
    train.normalize()
    test.normalize()

    krange = np.arange(1, 16, 2)
    accs = []
    for k in krange:
        knn = KNN(k, weighting=True)
        predictions = knn.knn(train.get_features(), train.get_labels(),
                              test.get_features())
        acc = predictions == np.ravel(test.get_labels().data)
        print("k:", k, "accuracy:", sum(acc) / len(acc))
        accs.append(sum(acc) / len(acc))

    plt.plot(krange, accs)
    plt.title("K Size Versus Accuracy")
    plt.xlabel("K")
    plt.ylabel("Accuracy")
    plt.show()
Пример #10
0
def prob_2():
    # try first without normalizing
    train = Arff('datasets/magic_telescope_train.arff')
    test = Arff('datasets/magic_telescope_test.arff')

    k = KNN(3)
    predictions = k.knn(train.get_features(), train.get_labels(),
                        test.get_features())

    acc = predictions == np.ravel(test.get_labels().data)

    print("Before normalization:", sum(acc) / len(acc))

    train.normalize()
    test.normalize()
    predictions = k.knn(train.get_features(), train.get_labels(),
                        test.get_features())

    acc = predictions == np.ravel(test.get_labels().data)

    print("After normalization:", sum(acc) / len(acc))

    print("PART TWO:")
    krange = np.arange(1, 16, 2)
    accs = []
    for k in krange:
        knn = KNN(k)
        predictions = knn.knn(train.get_features(), train.get_labels(),
                              test.get_features())
        acc = predictions == np.ravel(test.get_labels().data)
        print("k:", k, "accuracy:", sum(acc) / len(acc))
        accs.append(sum(acc) / len(acc))

    plt.plot(krange, accs)
    plt.title("K Size Versus Accuracy")
    plt.xlabel("K")
    plt.ylabel("Accuracy")
    plt.show()
Пример #11
0
def prob_4_housing():

    # Repeat experiments for magic telescope and housing using weights (w = 1/dist**2)
    train = Arff('datasets/housing_train.arff')
    test = Arff('datasets/housing_test.arff')
    train.normalize()
    test.normalize()

    krange = np.arange(1, 16, 2)
    mses = []
    for k in krange:
        knn = KNN(k, weighting=True)
        preds = knn.knn_regression(train.get_features(), train.get_labels(),
                                   test.get_features())
        mse = np.sum(
            (preds - np.ravel(test.get_labels().data))**2, axis=0) / len(preds)
        mses.append(mse)

    plt.plot(krange, mses)
    plt.title("K Size Versus MSE on Housing (Weighted)")
    plt.xlabel("K")
    plt.ylabel("Mean Squared Error")
    plt.show()
Пример #12
0
def prob3_normalized():
    arff = Arff('datasets/abalone.arff', label_count=0)
    arff.normalize()
    domain = np.arange(2, 8)

    ssekmm = []
    for k in domain:
        km = KMeans(k)
        ssek = km.train(arff)
        ssekmm.append(ssek)

    hac = HAC()
    hac2 = HAC(simple=False)
    ssehac = hac.train(arff, printk=domain)
    ssehac2 = hac2.train(arff, printk=domain)

    plt.plot(domain, ssekmm, label="K-Means SSE")
    plt.plot(domain, ssehac[::-1], label="HAC (Single-Link) SSE")
    plt.plot(domain, ssehac2[::-1], label="HAC (Complete-Link) SSE")
    plt.title("Abalone SSE (Normalized) vs # of Clusters")
    plt.xlabel("# of Clusters")
    plt.ylabel('SSE')
    plt.legend()
    plt.show()