def draw_centralized_image(train_dataset, test_dataset, index=0): x_train = train_dataset.data.numpy() mean_image = knn.Knn().get_x_mean(x_train) cdata = knn.Knn().centralized(test_dataset.data.numpy(), mean_image) cdata = cdata.reshape(cdata.shape[0], 28, 28) plt.imshow(cdata[index], cmap=plt.cm.binary) plt.show() print(test_dataset.targets[index]) return
def knn_train_test(k, xTrain, yTrain, xTest, yTest): """ Given a specified k, train the knn model and predict the labels of the test data. Returns the accuracy of the resulting model. Parameters ---------- k : int The number of neighbors xTrain : nd-array with shape n x d Training data yTrain : 1d array with shape n Array of labels associated with training data. xTest : nd-array with shape m x d Test data yTest : 1d array with shape m Array of labels associated with test data. Returns ------- acc : float The accuracy of the trained knn model on the test data """ model = knn.Knn(k) model.train(xTrain, yTrain['label']) # predict the test dataset yHatTest = model.predict(xTest) return knn.accuracy(yHatTest, yTest['label'])
def refresh(self): self.data_gen = data_generator.DataGenerator( *configurations.config["data"].values()) self.features, self.labels = self.data_gen.get_data() self.knn_model = knn.Knn(self.features, self.labels) self.knn_model.best_params() self.knn_model.train()
def knn_get_both_accuracies(k, xTrain, yTrain, xTest, yTest): model = knn.Knn(int(k)) model.train(xTrain, yTrain['label']) # predict the training dataset yHatTrain = model.predict(xTrain) trainAcc = knn.accuracy(yHatTrain, yTrain['label']) # predict the test dataset yHatTest = model.predict(xTest) testAcc = knn.accuracy(yHatTest, yTest['label']) return trainAcc, testAcc
def centralized_knn_on_mnist(train_loader, test_loader): x_train = train_loader.dataset.data.numpy() mean_image = knn.Knn().get_x_mean(x_train) x_train = knn.Knn().centralized(x_train, mean_image) y_train = train_loader.dataset.targets.numpy() x_test = test_loader.dataset.data[:1000].numpy() x_test = knn.Knn().centralized(x_test, mean_image) y_test = test_loader.dataset.targets[:1000].numpy() num_test = y_test.shape[0] y_test_pred = knn.Knn().classify(5, 'M', x_train, y_train, x_test) num_correct = np.sum(y_test_pred == y_test) accuracy = float(num_correct) / num_test print('Got %d / %d correct => accuracy: %f', (num_correct, num_test, accuracy)) return
def main(): """ Main file to run from the command line. """ # set up the program to take in arguments from the command line parser = argparse.ArgumentParser() parser.add_argument("--xTrain", default="q3xTrain.csv", help="filename for features of the training data") parser.add_argument( "--yTrain", default="q3yTrain.csv", help="filename for labels associated with training data") parser.add_argument("--xTest", default="q3xTest.csv", help="filename for features of the test data") parser.add_argument( "--yTest", default="q3yTest.csv", help="filename for labels associated with the test data") args = parser.parse_args() # load the train and test data xTrain = pd.read_csv(args.xTrain) yTrain = pd.read_csv(args.yTrain) xTest = pd.read_csv(args.xTest) yTest = pd.read_csv(args.yTest) # create an instance of the model perf = [] # the different versions of k to try for k in range(1, 20, 2): model = knn.Knn(k) model.train(xTrain, yTrain['label']) yHatTrain = model.predict(xTrain) trainAcc = knn.accuracy(yHatTrain, yTrain['label']) yHatTest = model.predict(xTest) testAcc = knn.accuracy(yHatTest, yTest['label']) perf.append([k, trainAcc, testAcc]) perfDF = pd.DataFrame(perf, columns=["k", "train", "test"]) print(perfDF) perfDF = perfDF.set_index("k") sns.set(style="whitegrid") # also do a plot snsPlot = sns.lineplot(data=perfDF, palette="tab10", linewidth=2.5) snsfigure = snsPlot.get_figure() snsfigure.savefig("q3d.png")
def knn_on_mnist(train_loader, test_loader): x_train = train_loader.dataset.data.numpy() x_train = x_train.reshape(x_train.shape[0], 28 * 28) y_train = train_loader.dataset.targets.numpy() x_test = test_loader.dataset.data[:1000].numpy() x_test = x_test.reshape(x_test.shape[0], 28 * 28) y_test = test_loader.dataset.targets[:1000].numpy() num_test = y_test.shape[0] y_test_pred = knn.Knn().classify(5, 'M', x_train, y_train, x_test) num_correct = np.sum(y_test_pred == y_test) accuracy = float(num_correct) / num_test print('Got %d / %d correct => accuracy: %f', (num_correct, num_test, accuracy)) return
def process_image(self, path): if not os.path.exists("output"): os.makedirs("output") img = cv2.imread(path) img = self.graphics.prepare_image_for_ocr(img) clean_img, chars = self.get_all_characters(img) output_img = self.highlight_characters(clean_img, chars) self.graphics.saveImage(output_img, 'out') samples = np.loadtxt('char_samples3.data', np.float32) responses = np.loadtxt('char_responses3.data', np.float32) responses = responses.reshape((responses.size, 1)) #model = cv2.ml.KNearest_create() #model.train( # samples, #cv2.ml.ROW_SAMPLE, # responses) model2 = knn.Knn(samples, responses) plate_chars = "" for _, char_img in chars: try: small_img = cv2.resize(char_img, (10, 10)) small_img = small_img.reshape((1, 100)) small_img = np.float32(small_img) #retval, results, neigh_resp, dists = model.findNearest(small_img, k = 3) result = model2.find_nearest(small_img, k=3) plate_chars += str(chr(int(result))) except ValueError as err: print(err) print("Licence plate: %s" % plate_chars) return plate_chars
from sklearn.model_selection import train_test_split from sklearn.metrics import classification_report, confusion_matrix from data import x_teste, y_teste, x_treino, y_treino import knn knn = knn.Knn(3) x_treino = list(x_treino) x_teste = list(x_teste) dados = [] for i in range(len(x_treino)): dados.append([list(x_treino[i]), y_treino[i]]) knn.treina(dados) y_predito = [] for i in range(len(x_teste)): y_predito.append(knn.prediz([x_teste[i], y_teste[i]])) print(y_predito) print(y_teste) print(confusion_matrix(y_teste,y_predito)) print(classification_report(y_teste,y_predito))
import matplotlib import matplotlib.pyplot as plt import numpy as np if not os.path.exists('Plots'): os.makedirs('Plots') split = 0.6 nrOfExperiments = 3 kList = list(range(3, 30, 2)) for dataset in ['circles']: for datasize in [100, 500, 1000, 10000]: knn_instance = knn.Knn() accuracies = {} averageAccuracies1 = {} for k in kList: accuracies[k] = [] for i in range(nrOfExperiments): print('Running experiment nr ' + repr(i + 1) + '/' + repr(nrOfExperiments) + ' (k=' + repr(k) + ', datasize=' + repr(datasize) + ', bare)') accuracy = knn_instance.run( k, split=split, dataFilename='data.' + dataset + '.train.' + repr(datasize), scatterPlotFilename='bare_' + dataset + '_' + repr(datasize) + '_k_' + repr(k), withGeneratedTestSet=(i==(nrOfExperiments-1))) print("Accuracy: %.2f%%" % round(accuracy, 2)) accuracies[k].append(accuracy) averageAccuracies1[k] = float(sum(accuracies[k]))/float(nrOfExperiments)
def KNN(k): a = knn.Knn(k) * 100 s = "\t\t\tketqua: " + str(a) + " %" print(s)
import knn learningData = [(5.1, 3.4, 1.5, 0.2, 'Iris-setosa'), (6.6, 2.9, 4.6, 1.3, 'Iris-versicolor'), (7.9, 3.8, 6.4, 2.0, 'Iris-virginica')] obj = knn.Knn(3, learningData) def testScore(): data = [(5.1, 3.4, 1.5, 0.2, 'Iris-setosa'), (6.6, 2.9, 4.6, 1.3, 'Iris-versicolor'), (7.9, 3.8, 6.4, 2.0, 'Iris-virginica')] predictedLabels = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'] score = obj.score(data, predictedLabels) assert score == 1
import knn import plot import data_generator import configurations import numpy as np # plot_object = None # data_gen = None # knn_model = None # features, labels = None, None # while True: # if plot_object or knn_model or data_gen or features or labels is None: plot_object = plot.Visualisation() data_gen = data_generator.DataGenerator(*configurations.config["data"].values()) features, labels = data_gen.get_data() knn_model = knn.Knn(features, labels) knn_model.best_params() knn_model.train() print(knn_model.accuracy()) plot_object.set_data(features, labels) plot_object.set_knn_model(knn_model) plot_object.set_data_generator(data_gen) plot_object.show_data(make_grid=configurations.config['make_grid']) # plot_object.animate()
import pandas as pd import numpy as np import knn learningDataArray = np.array(pd.read_csv("iris.datalearning.csv", header=None)) testDataArray = np.array(pd.read_csv("iris.datatest.csv", header=None)) k = 3 kn = knn.Knn(k, learningDataArray) predictedLabels = [] predictedLabels = kn.predict(testDataArray) print(predictedLabels) print(kn.score(testDataArray, predictedLabels))
print("Digite o K para o K-means:") k = int(input()) print("Digite o numero maximo de iteracoes") iterMax = int(input()) modelo = Kmeans(treino, k, iterMax) print("Digite o K para o Knn:") k = int(input()) print("Classificao para os testes:") for i in teste: print("Sua planta tem a seguinte classificacao: " + str(knn.Knn(modelo, k, i))) ''' print("Digite as propriedades da planta a ser verificada:") a = float(input()) b = float(input()) c = float(input()) d = float(input()) cords = [a,b,c,d] novo = knn.Point(cords) print("Sua planta tem a seguinte classificacao: " + str(knn.Knn(modelo, k , novo))) '''