Exemplos de KNN em Python, exemplos de KNN.KNN em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: Main.py Projeto: oldteb/AI

def main():
	# ann = ANN("hw5data.txt",10)
	# ann.run()

	# maze = AStarMaze("Maze.txt",10)
	# maze.ASSolver()

	knn = KNN("hw5data.txt",15)
	knn.run()

Exemplo n.º 2

0

Exibir arquivo

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

from KNN import KNN

path = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
names = ["sepal_length", "sepal_width", "petal_length", "petal_width", "class"]
dataset = pd.read_csv(path, names=names)

encoder = LabelEncoder()
dataset["class"] = encoder.fit_transform(dataset["class"])

train_set, test_set = train_test_split(dataset, test_size=0.25, random_state=0)

print("Train set size : ", len(train_set))
print("Test set size : ", len(test_set))

knn = KNN(7)
predictions = []
for index, t in test_set.iterrows():
    predictors_only = t[:-1]
    prediction = knn.predict(train_set, predictors_only)
    predictions.append(prediction)

score = knn.evaluate(np.array(test_set.iloc[:, -1]), predictions)
print("KNN Score = ", score)

Exemplo n.º 3

0

Exibir arquivo

Arquivo: ensembleTestScript.py Projeto: DIYBai/criminal-justice-CS-thesis

import RiskParser as rp
from ANN import ANN
from Dtree import Dtree
from KNN import KNN
from LogR import LogR
from NaiveEnsemble import NaiveEnsemble
import numpy as np

inputs, outputs = rp.parse_data("RiskAssessData.csv")
print(inputs)
print(outputs)
print("### ALL TESTS ###")

test_ANN = ANN()
test_DTree = Dtree()
test_KNN = KNN(10)
test_LogR = LogR()
ensemble_results = []
individual_results = []
for i in range(0, 3):
    x_train, x_test, y_train, y_test = test_DTree.split_data(
        inputs, outputs, .25)
    test_NaiveEnsemble = NaiveEnsemble(
        [test_ANN, test_KNN, test_LogR, test_DTree])
    test_NaiveEnsemble.train(x_train, y_train)
    ensemble = test_NaiveEnsemble.report_accuracy(x_test, y_test)
    print("Naive Ensemble Test accuracy: test data set", ensemble)
    #print("Naive Ensemble Test accuracy: train data set",test_NaiveEnsemble.report_accuracy(x_train,y_train))

    #test_KNN.train(x_train,y_train)
    #test_NaiveEnsemble.model_list.append(test_KNN)

Exemplo n.º 4

0

Exibir arquivo

#***Train SVM***#
classifier = svm.SVC(C=1, decision_function_shape="ovr")
classifier.fit(X_train, y_train)

#Test Model#
prediction = classifier.predict(X_test)

#Evaluate Errors#
correct = 0
for i, j in zip(np.nditer(prediction), np.nditer(y_test)):
    if (i == j):
        #print("{} : {}".format(i, j))
        correct = correct + 1

acc = 1.0 * correct / (y_test.size)
print("SVM accuracy: {}".format(acc))

#***KNN Model***#
knn = KNN(k=10)
predictions = knn.predict(X_train, X_test, y_train)

#Evaluate Errors#
correct = 0
for i, j in zip(np.nditer(predictions), np.nditer(y_test)):
    if (i == j):
        #print("{} : {}".format(i, j))
        correct = correct + 1

acc = 1.0 * correct / (y_test.size)
print("KNN accuracy: {}".format(acc))

Exemplo n.º 5

0

Exibir arquivo

Arquivo: closest_subways.py Projeto: LefterisKarampas/Road_Classification

    index_col='tripId')

testSet = pd.read_csv(
    'test_dataset/test_set_a2.csv',  # replace with the correct path
    sep="\t",
    converters={"Trajectory": literal_eval})

path = 'Test_Subways/'
if not os.path.exists(path):
    os.makedirs(path)
#trainSet = trainSet[:400]

for i in range(len(testSet)):
    start_time = time.time()
    origin = testSet['Trajectory'].iloc[i]
    knn = KNN(5, LCSS(Compare_Harvesine), origin, True)
    for j in range(len(trainSet)):
        knn.calculate_neighbor(trainSet['Trajectory'].iloc[j], j)
    results = knn.results()
    elapsed_time = time.time() - start_time
    path = 'Test_Subways/Test_Subways_' + str(i + 1)
    if not os.path.exists(path):
        os.makedirs(path)
    file = open(path + '/results', 'w')
    file.write("Test_Subways_" + str(i + 1) + "\n")
    print_map(origin, 'Test_Subways_' + str(i + 1), path, None)
    count = 1
    for k in results:
        print_map(trainSet['Trajectory'].iloc[k[0]], 'Neighbor_' + str(count),
                  path, Find_Subsequence(k[2]))
        file.write('Neighbor_' + str(count) + '\nJP_ID: ' +

Exemplo n.º 6

0

Exibir arquivo

Arquivo: main.py Projeto: yiyin1027/Machine-learning-without-any-libraries

from FeatureScaling import FeatureScaling
fs = FeatureScaling(X, y)
X = fs.fit_transform_X()

#training set split
X_train = X[0:train_size, :]
Y_train = y[0:train_size]

#testing set split
X_test = X[train_size:, :]
Y_test = y[train_size:]

#importing KNN class
from KNN import KNN
l = time.time()
knn = KNN(X_train, Y_train, 5)
y_pred = knn.predict(X_test)
r = time.time()
KNN_learn_time = (r - l)
print(r - l)

#getting the confusion matrix
tp = len([
    i for i in range(0, Y_test.shape[0]) if Y_test[i] == 0 and y_pred[i] == 0
])
tn = len([
    i for i in range(0, Y_test.shape[0]) if Y_test[i] == 0 and y_pred[i] == 1
])
fp = len([
    i for i in range(0, Y_test.shape[0]) if Y_test[i] == 1 and y_pred[i] == 0
])

Exemplo n.º 7

0

Exibir arquivo

Arquivo: SFS.py Projeto: liorgordon/AI---Lior-Kfir

    y_test = test_np[:, -1]
    property_list = [x for x in range(len(test_np[0, :-1]))]
    max_acc = 0
    best_sub = ()
    tmp_sub = 0
    flag = 0
    while True:
        for i in range(8):
            if i not in best_sub:
                subset = best_sub + (i,)
            else:
                continue
            # subset = (1,7,6,5,2)
            train_tmp = norm_train[:, (subset + (8,))]
            test_tmp = norm_test[:, subset]
            y_pred = KNN(train_tmp, test_tmp, 9)
            acc = accuracy_score(y_test, y_pred)
            # acc = float(np.sum(y_test == y_pred) / len(y_test))
            # print("for element " + str(subset) +" recieved acc of " + str(acc) + "\n")
            if acc >= max_acc:
                flag = 1
                max_acc = acc
                tmp_sub = subset
        best_sub = tmp_sub
        if flag == 0:
            break
        flag = 0
    print(list(best_sub))
    # print(max_acc)

Exemplo n.º 8

0

Exibir arquivo

Arquivo: DM2_KNN.py Projeto: devekar/DM2

        elif "t_" in item:
            topic_list.append(item[2:])

    word_list = word_list[1:] # Remove 'Article #'
    words_topics_size = len(topic_list) + len(word_list)

    for row in dataMatrix[2:]:
        matrix.append( [row[0]] + map(int, row[1:1 + words_topics_size]) )
    return {"topic_list":topic_list, "word_list": word_list, "matrix": matrix}


##### MAIN #####
dataMatrix = parseDM()
arg_list = sys.argv
if len(arg_list) != 5:
	print "Usage: ./DM2_KNN.py -k <neighborcount> -t <testpercentage>"
	sys.exit(1)

if arg_list[1] == '-k':
	k = int(arg_list[2])
elif arg_list[1] == '-t':
	t = int(arg_list[2])

if arg_list[3] == '-k':
	k = int(arg_list[4])
elif arg_list[3] == '-t':
	t = int(arg_list[4])

knn = KNN(dataMatrix, k)
knn.test_split(t)

Exemplo n.º 9

0

Exibir arquivo

from KNN import KNN
from numberClassification import NumClassification
import numpy as np


def acc(pred, label):
    t = np.equal(pred, label)
    return np.sum(t) / len(pred)


nc = NumClassification(trainingPath="digits/trainingDigits",
                       testPath="digits/testDigits")

trainingDataset = nc.buildTrainingDataset()
testDataset, labels = nc.buildTestDataset()

knn = KNN(trainingDataset, 3, isnorm=False)
pred = knn.infer(testDataset)

print(acc(pred, labels))

Exemplo n.º 10

0

Exibir arquivo

Arquivo: stitching.py Projeto: johanforslund/video-stitching

img_right = cv2.cvtColor(img_right, cv2.COLOR_BGR2RGB)
img_r = cv2.cvtColor(img_right, cv2.COLOR_BGR2GRAY)

img_left = cv2.imread('Resources/left.jpg')
img_left = cv2.cvtColor(img_left, cv2.COLOR_BGR2RGB)
img_l = cv2.cvtColor(img_left, cv2.COLOR_BGR2GRAY)

# Create SIFT and extract features
sift = cv2.xfeatures2d.SIFT_create(nfeatures=10000)

# Find the keypoints and descriptors with SIFT
kp1, des1 = sift.detectAndCompute(img_r, None)
kp2, des2 = sift.detectAndCompute(img_l, None)

# Find KNN matches and validate with ratio test
knn_solver = KNN(des1, des2, 2)
matches = knn_solver.solve()

# Extract keypoints' coordinates
valid_kp1 = []
valid_kp2 = []

for match in matches:
    valid_kp1.append(np.array(kp1[match.index_1].pt))
    valid_kp2.append(np.array(kp2[match.indices_2[0]].pt))

valid_kp1 = np.array(valid_kp1).T
valid_kp2 = np.array(valid_kp2).T

H = find_homography(valid_kp1, valid_kp2, 5000)
print(H)

Exemplo n.º 11

0

Exibir arquivo

Arquivo: classification.py Projeto: LefterisKarampas/Road_Classification

    'test_dataset/test_set_a2.csv',  # replace with the correct path
    sep="\t",
    converters={"Trajectory": literal_eval})

#train,test = train_test_split(trainSet,test_size=0.01)

#rint len(train)
#print len(test)
#Initialize Encoder
le = preprocessing.LabelEncoder()
le.fit(trainSet["journeyPatternId"])
y = le.transform(trainSet["journeyPatternId"])

X = trainSet['Trajectory']
Y = testSet['Trajectory']
knn = KNN(5, DTW(Harvesine))

knn.fit(X, y)

knn_pred = knn.predict(Y)
predicted_categories = le.inverse_transform(knn_pred)
print(predicted_categories)

with open('testSet_JourneyPatternIDs.csv', 'wb') as csvfile:
    csvwriter = csv.writer(csvfile,
                           delimiter=',',
                           quotechar='|',
                           quoting=csv.QUOTE_MINIMAL)
    csvwriter.writerow(['Test_Trip_ID', 'Predicted_JourneyPatternID]'])
    for i in range(len(testSet)):
        csvwriter.writerow([str(i + 1), predicted_categories[i]])

Exemplo n.º 12

0

Exibir arquivo

Arquivo: Main.py Projeto: hjpmJW29/telegram-face-bot

def main():
    db = Database()
    knn = KNN(db)
    fr = FaceRecog(db, knn)
    tg = Telegram(fr, db)
    tg.start()

Exemplo n.º 13

0

Exibir arquivo

Arquivo: Example.py Projeto: qianyi-zh/MachineLearning

from KNN import KNN
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_digits

digits = load_digits()
X = digits.data
y = digits.target

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.33,
                                                    random_state=42)

kNN = KNN(1)
kNN.train(X_train, y_train)
res = kNN.predict(X_test)

print('Real--->Predicted')

for i, val in enumerate(y_test):
    print('  %d ---> %d' % (val, res[i]))

print('预测准确率：')
print(kNN.score(X_test, y_test))

Exemplo n.º 14

0

Exibir arquivo

Arquivo: main.py Projeto: marc-gorriz/M3-Project

    # TODO: think about extend the code to other classification methods, features extractors ... maybe a switch?

    if args.descriptor == 'sift':
        features_descriptor = SIFT(nfeatures=100)

    elif args.descriptor == 'surf':
        features_descriptor = SURF(nOctaves=4, nOctaveLayers=2)

    elif args.descriptor == 'hog':
        features_descriptor = HOG()

    else:
        features_descriptor = None
        print('Invalid descriptor')

    myKNN = KNN(nneighbors=100, features_descriptor=features_descriptor)
    myEvaluation = Evaluation(evaluation_path=args.evaluation_path,
                              save_plots=True)

    if args.do_train:

        start_time = time.time()

        if args.train_method == 'kfold':

            # make K trainings, save the evaluation metrics and models, then decide the best model
            evaluation_metrics = np.array([], dtype=float)
            model = []

            for k in range(args.kfold_k):

Exemplo n.º 15

0

Exibir arquivo

#     data = Data()  # loads the data and checks if complete
#
#     while True:
#         data.load_data()
#         data.split_data()  # split into both test and train
#         predicted_class = {}  # holds data_set_name and a list of predicted classes
#
#         for name, train_data_set in data.train_dict.items():  # iterate through data and get key(Data name) and data_set
#             print("Current Data Set: ", name)
#             predicted_class[name] = []  # create a list of for a data set of predicted values
#             test_data_set = data.test_dict[name]  # TODO: Use same keys for all dictionaries; Access testing data by key.
#             for _, query_point in train_data_set.iterrows():
#                 # give query example and its corresponding train_data_set, along with # of desired neighbors to consider
#                 predicted_class[name].append(knn.perform_knn(query_point, train_data_set, 5, name, data))

knn = KNN()
data = Data()  # loads the data and checks if complete
lf = LF()
data.load_data()


def run_zero_loss():
    """
    Calls function in other files until program is finished.
    :return: None
    """
    data.split_data()  # split into both test and train
    lf.zero_one_loss(data.test_dict['abalone'].sample(n=400), 5, 'abalone', data)


def run_k_means(indata):  # Run k-means on wine data set'knn = KNN()

Exemplo n.º 16

0

Exibir arquivo

from KNN import KNN
from utils import get_xor
import matplotlib.pyplot as plt

if __name__ == '__main__':
    X, Y = get_xor()

    plt.scatter(X[:, 0], X[:, 1], s=100, c=Y, alpha=0.5)
    plt.show()

    for i in range(20):
        model = KNN(i + 1)
        model.fit(X, Y)
        print('Neighbours:', i + 1, 'Train accuracy:', model.score(X, Y))

Exemplo n.º 17

0

Exibir arquivo

Arquivo: principal.py Projeto: TamaraPerez/PracticasUniMisc

     input_instances.calcNumClases()
     #2. PREPROCESO
     preproceso.randomizarInstancias(input_instances)
     
     #3. kNN
     #70% instancias con clase, 30% instancias a clasificar
     k = int(sys.argv[2]) #num de vecinos a explorar
     m = float(sys.argv[3]) #m de la dist de Minkowski
     porcentaje = sys.argv[4]
     porcentaje = float(porcentaje)
     if(porcentaje>100.0 or porcentaje<0.0):
         raise Exception()
     train = input_instances.getPorcentaje(porcentaje) #obtenemos el % de las instancias (para train) 
     test = input_instances #lo restante (test) (el getPorcentaje actualiza la lista haciendo pop())
     print("Usando k="+str(k)+", m="+str(int(m))+" y usando el " + str(porcentaje) +"% de las instancias para train\n")
     print("Clasificando...")
     
     clasificador = KNN(train, test, k, m) #creamos el clasificador con las instancias de las q sabemos la clase, las q queremos predecir la clase, k, m
     clasificador.execute() #calculamos los vecinos proximos, y predecimos la clase
     prediccion = clasificador.getPrediccion()
     
     #4. EVALUACION    
     evaluacion.mostrarFigurasMerito(prediccion, input_instances) #Specificity y Recall
     #evaluacion.espacioROC() #representar graficamente los modelos en el espacio ROC.
     time = str(datetime.now()-start_time)
     time = time[5:]
     print("Duración de la ejecución del programa: " + time + " segundos")
 except (IOError):
     print("Error de lectura de fichero")
 #except(Exception):
     #print("Error desconocido. Seguramente hayas introducido incorrectamente algún argumento")

Exemplo n.º 18

0

Exibir arquivo

Arquivo: Run.py Projeto: Gautham142015/Open-CV_AI-sudoku-solver-Py

from KNN import KNN
import os
# Change the model type variable value to "CNN" to use the Convolutional Neural Network
# Change the model type variable value to "KNN" to use the K Nearest Neighbours Classifier
modeltype = "KNN"

if modeltype == "KNN":
    if os.path.exists("knn.sav"):
        pass
    else:
        print("Saved KNN Classifier not found....")
        print(
            "Downloading MNIST Data, training KNN classifier and saving as knn.sav......"
        )
        print("Kindly wait for a few minutes............")
        knnobj = KNN(3)
        knnobj.skl_knn()
else:
    if os.path.exists("cnn.hdf5"):
        pass
    else:
        print("cnn.hdf5 not found...")
        print("Loading MNIST Data, training CNN and saving as cnn.hdf5.....")
        print("Kindly wait a few minutes.........")
        cnnobj = CNN()
        cnnobj.build_and_compile_model()
        cnnobj.train_and_evaluate_model()
        cnnobj.save_model()
MainUIobj = MainUI(modeltype)
MainUIobj.mainloop()
MainUIobj.cleanup()

Exemplo n.º 19

0

Exibir arquivo

 print("2. Density Based Clustering")
 print("3. Hierarchical Clustering")
 print("4. K-means Clustering")
 print("5. Learning Vector Quantization Clustering")
 print("6. Mixture of Gaussian Clustering")
 print("-------Decision Tree-------")
 print("7. ID3")
 print("8. C4.5")
 print("9. CART")
 print("---Recommendation Algorithm---")
 print("10. MF")
 print("11. PMF")
 print("-------------------------------------------------------")
 num = input("Enter the number of the algorithm you want to execute:")
 if num == str(1):
     algorithm = KNN.KNN()
     algorithm.execute()
 elif num == str(2):
     algorithm = DensityBasedClustering.DensityBasedClustering()
     algorithm.execute()
 elif num == str(3):
     algorithm = HierarchicalClustering.HierarchicalClustering()
     algorithm.execute()
 elif num == str(4):
     algorithm = KMeansAlgorithm.KMeansAlgorithm()
     algorithm.execute()
 elif num == str(5):
     algorithm = LearningVectorQuantization.LearningVectorQuantization()
     algorithm.execute()
 elif num == str(6):
     algorithm = MixtureOfGaussianAlgorithm.MixtureOfGaussianAlgorithm()

Exemplo n.º 20

0

Exibir arquivo

        binarizer = LabelBinarizer()
        Y = binarizer.fit_transform(y)
        # By inspection I know that the number of examples will be 50,000
        # This allows me to set an upper limit on the number of neighbours I want to
        # Test For. I inspected the 'X' matrix using the debugger tool in PYCHARM

        k_values = np.array(
            range(1, 20)
        )  #np.array(range(1,10)) # range of K values I want to test. This is okay for my computers memory capabilities

        y_pred = np.zeros(k_values.size)
        best_k = 0  #This is a dummy variable I will use to keep track of the k value which returns the best test error
        best_error = 1000000000000000000  #Initalizing a random value to store the best test error and track the value of it

        for k in k_values:
            model = KNN(k)
            model.fit(X, y)

            #Computing the validation Error with Xtest and yest
            y_pred = model.predict(Xtest)
            test_error = np.mean(y_pred != ytest)
            if test_error < best_error:
                best_k = k
                best_error = test_error

        print(best_k)
        print(best_error)

    elif question == "1.2":

        with gzip.open('mnist.pkl.gz', 'rb') as f: