Пример #1
0
def PCA(test_Datamatrix, test_Labelmatrix, train_Datamatrix,
        train_Labelmatrix):
    mean = np.mean(train_Datamatrix, axis=0)
    z = train_Datamatrix - mean
    z_test = test_Datamatrix - np.mean(test_Datamatrix, axis=0)
    cov_matrix = np.cov(z, rowvar=0, bias=1)
    eigenValues, eigenVectors = np.linalg.eig(cov_matrix)
    indices = eigenValues.argsort()[::-1]
    eigenValues_sorted = eigenValues[indices]
    eigenVectors_sorted = eigenVectors[:, indices]
    r_values = [0.8, 0.85, 0.9, 0.95]
    for r in r_values:
        for i in range(0, 10304):
            B = sum(eigenValues_sorted)
            T = sum(eigenValues_sorted[:i])
            if (T / B >= r):
                alpha = i
                break
        new_matrix = eigenVectors_sorted[:, 0:alpha + 1]
        pca_Train = np.dot(new_matrix.T, z.T)
        pca_Test = np.dot(new_matrix.T, z_test.T)
        pca_Train = np.asarray(pca_Train, dtype="int32")
        pca_Test = np.asarray(pca_Test, dtype="int32")
        print("R =" + str(r))
        print("----------------------")
        KNNClassifier(pca_Train, train_Labelmatrix, pca_Test, test_Labelmatrix)
        print(
            "*****************************************************************************"
        )
Пример #2
0
 def testBreastCancer(self):
     from KNN import KNNClassifier
     breast_cancer = load_breast_cancer()
     knn = KNNClassifier(k=5)
     X_train = breast_cancer.data
     y_train = breast_cancer.target
     accuracy = np.mean(cross_validation(knn, X_train, y_train))
     print("KNN: accuracy =", accuracy)
def LDA(test_Datamatrix, test_Labelmatrix, train_Datamatrix,
        train_Labelmatrix):
    LDA_Matrix = []
    for i in range(40):
        LDA_Matrix.append([])
    j = -1
    # edit hena
    print(len(train_Datamatrix))
    for i in range(200):
        if (i % 5 == 0):
            j = j + 1
        LDA_Matrix[j].append(train_Datamatrix[i])
    LDA_matrix = np.asarray(LDA_Matrix, dtype="int32")
    sb = np.zeros((10304, 10304), dtype=np.float32)
    z = np.zeros((40, 5, 10304), dtype=np.float32)
    si = np.zeros((10304, 10304), dtype=np.float32)
    mean = np.zeros((40, 10304), dtype=np.float32)
    mean = (np.mean(LDA_matrix, axis=1))
    Overall_mean = np.mean(mean, axis=0)
    for i in range(40):
        sb += (5 * (np.dot((mean[i] - Overall_mean).T,
                           (mean[i] - Overall_mean))))
    for i in range(40):
        z[i] = (LDA_matrix[i] - mean[i])
    for i in range(40):
        si += (np.dot(z[i].T, z[i]))
    si = np.asarray(si)
    Sinv = np.linalg.inv(si)
    SinvB = np.matmul(Sinv, sb)
    eigenValues, eigenVectors = np.linalg.eig(SinvB)
    indices = eigenValues.argsort()[::-1]
    eigenValues_sorted = eigenValues[indices]
    eigenVectors_sorted = eigenVectors[:, indices]
    LDA_Train = np.dot(train_Datamatrix, eigenVectors_sorted)
    LDA_Test = np.dot(test_Datamatrix, eigenVectors_sorted)
    print("----------------------")
    # 3ayz 23ml el KNN classifier
    LDA_Train = np.asarray(LDA_Train, dtype="int32")
    LDA_Test = np.asarray(LDA_Test, dtype="int32")
    print("###############################")
    print(len(LDA_Train))
    print(len(LDA_Train[0]))
    print(len(LDA_Test))
    print(len(LDA_Test[0]))
    print("###############################")

    KNNClassifier(LDA_Train.T, train_Labelmatrix, LDA_Test.T, test_Labelmatrix)
    print(
        "*****************************************************************************"
    )
Пример #4
0
import numpy as np
from KNN import KNNClassifier
raw_data_X = [[3.393533211, 2.331273381],
              [3.110073483, 1.781539638],
              [1.343808831, 3.368360954],
              [3.582294042, 4.679179110],
              [2.280362439, 2.866990263],
              [7.423436942, 4.696522875],
              [5.745051997, 3.533989803],
              [9.172168622, 2.511101045],
              [7.792783481, 3.424088941],
              [7.939820817, 0.791637231]
             ]
raw_data_y = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
X_train = np.array(raw_data_X)
y_train = np.array(raw_data_y)

x = np.array([8.093607318, 3.365731514])
x_predict=x.reshape(1,-1)
knn_clf = KNNClassifier(k=6)
print(knn_clf)
knn_clf.fit(X_train,y_train)
y_predict = knn_clf.predict(x_predict)
print(y_predict)
Пример #5
0
test = af.data

KNC = KNeighborsClassifier(
    n_neighbors=3,
    weights='uniform',
)
KNC.fit(all_data[:, :-1], all_data[:, -1])
print(KNC.score(test[:, :-1], test[:, -1]))

# 0, -1, -1, 0, 0, 0, 0, -1, 0, 0, -1, 0, 0, -1 - 1
catagory = np.zeros(len(all_data)) * -1
# catagory[3]= 0
# catagory[1] = -1
# catagory[2] = -1
# catagory[7] = -1
# catagory[10] = -1
# catagory[13] = -1
# catagory[14] = -1
weight_type = ['inverse_distance', 'no_weight']

KK = KNNClassifier(catagory, k=3, weight_type=weight_type[0], normalize=False)

KK.fit(all_data[:, :-1], all_data[:, -1])

print('fitted')

filepath = 'Magic_test.arff'
af.load_arff(filepath)
test = af.data
print(KK.score(test[:, :-1], test[:, -1]))
Пример #6
0
from KNN import KNNClassifier
from random import randrange

# IMPORTANT: mnist.init() should be called the first time you run this script
mnist.init()
# convert type of numpy array elements into int16, avoiding subtraction overflow
train_images, train_labels, test_images, test_labels = map(
    lambda x: x.astype(np.int16), mnist.load())

# initializing multi-process options
# in Unix-like OS, multiprocessing is implemented based on fork().
# to take advantage of COW, knn instance and chunks should be global.
# by using COW, memory cost can be reduced to 1/workers.
# so multiprocessing.SharedMemory is no longer needed xD
# however, such optimization may not work in Windows :(
knn = KNNClassifier(10)
knn.fit(train_images, train_labels)
workers = 6  # IMPORTANT: should be number of physical cores of your PC
test_size = test_images.shape[0]  # 10000
chunk_size = 10  # size of each chunk
chunks_num = 10  # total number of chunks
chunks = [[test_images[i:i + chunk_size], test_labels[i:i + chunk_size]]
          for i in range(0, test_size, chunk_size)
          ]  # split testing set into chunks for multi-process calculating


# run KNN on a specific chunk
# knn is an instance of KNNClassifier
# pass k here to avoid multiprocess issues with Windows
def process(chunk_id, k):
    print("chunk", chunk_id, "starts")
Пример #7
0
from KNN import KNNClassifier
from model_selection import train_test_split
from metric import accuracy_score
import pandas as pd
import numpy as np

if __name__ == '__main__':
    """
    """
    iris = pd.read_csv('../iris.data', header=None)
    iris_data = iris.loc[:, :].values
    x_data = iris_data[:, [0, 2]]
    y_data = iris_data[:, 4]

    x_predict = np.array([[5.1, 2.1]])
    x_train, y_train, x_test, y_test = train_test_split(x_data, y_data)

    best_score = 0.0
    best_k = -1
    for k in range(1, 11):
        knn_clf = KNNClassifier(n_neighbors=k)
        knn_clf.fit(x_train, y_train)
        score = knn_clf.score(x_test, y_test)
        if score > best_score:
            best_k = k
            best_score = score

    print("best_k =", best_k)
    print('best_score =', best_score)
Пример #8
0
    norm_train_data[:, col] = where

test_data = raw_data[int(raw_data.shape[0] * split) + 1:, :-1]
norm_test_data = test_data.copy()
test_labels = raw_data[int(raw_data.shape[0] * split) + 1:, -1]

for col in range(norm_test_data.shape[1]):
    column = norm_test_data[:, col]
    col_min = min(column)
    col_max = max(column)

    where = np.where(True, (column - col_min) / (col_max - col_min), 0)

    norm_test_data[:, col] = where

KNN_weight = KNNClassifier(k_val=15,
                           label_type='nominal',
                           col_types=mat.attr_types,
                           weight_type='inverse_distance')
KNN_weight.fit(norm_train_data, train_labels)
weight_scores = KNN_weight.score(norm_test_data, test_labels)

K_vals = np.arange(1, 17, 2)

#plt.plot(K_vals, scores, label="non-weighted")
plt.plot(K_vals, weight_scores, label="weighted")
plt.title("Credit Approval")
plt.ylabel("Accuracy")
plt.xlabel("K nearest neighbors")
plt.savefig("part5_plot_credit.png")
plt.show()
Пример #9
0
from KNN import KNNClassifier
from tools.arff import Arff
import numpy as np

diabetes_train = "../data/KNN/diabetes.arff"
diabetes_test = "../data/KNN/diabetes_test.arff"
seismic_train = "../data/KNN/seismic-bumps_train.arff"
seismic_test = "../data/KNN/seismic-bumps_test.arff"

mat = Arff(seismic_train, label_count=1)
mat2 = Arff(seismic_test, label_count=1)
raw_data = mat.data
h, w = raw_data.shape
train_data = raw_data[:, :-1]
train_labels = raw_data[:, -1]

raw_data2 = mat2.data
h2, w2 = raw_data2.shape
test_data = raw_data2[:, :-1]
test_labels = raw_data2[:, -1]

KNN = KNNClassifier(15, "nominal", weight_type='inverse_distance')
KNN.fit(train_data, train_labels)
pred = KNN.predict(test_data)
score = KNN.score(test_data, test_labels)
print(f"Score: {score[1]*100:.2f}%")
#np.savetxt("diabetes-prediction.csv", pred, delimiter=',', fmt="%i")
Пример #10
0
    h, w = raw_data.shape
    train_data = raw_data[:, :-1]
    train_labels = raw_data[:, -1].reshape(-1, 1)

    raw_data2 = mat2.data
    h2, w2 = raw_data2.shape
    test_data = raw_data2[:, :-1]
    test_labels = raw_data2[:, -1].reshape(-1, 1)

    train_data, test_data = normalize(train_data, test_data)

    # KNN = KNNClassifier(labeltype='classification', weight_type='inverse_distance',k=15)
    # KNN = KNNClassifier(labeltype='classification', weight_type='', k=3)
    # KNN = KNNClassifier(labeltype='regression', weight_type='', k=15)
    KNN = KNNClassifier(labeltype='regression',
                        weight_type='inverse_distance',
                        k=3)
    KNN.fit(train_data, train_labels)
    pred, shape = KNN.predict(test_data)
    score = KNN.score(test_data, test_labels)
    print(score)
    # np.savetxt("diabetes_prediction.csv", pred, delimiter=',',fmt="%i")
    # np.savetxt("seismic-bump-prediction_mine.csv", pred, delimiter=',', fmt="%i")

    ################# the following is used for credit data set ##################
    # mat = Arff("credit.arff", label_count=1)
    # data = mat.data[:, 0:-1]
    # labels = mat.data[:, -1].reshape(-1, 1)
    # attr_type = mat.attr_types
    # KNN = KNNClassifier(labeltype='HEOM', weight_type='', k=3, columntype=attr_type)
    # X, X_test, y, y_test = train_test_split(data, labels, test_size=0.25)