def main(): # ann = ANN("hw5data.txt",10) # ann.run() # maze = AStarMaze("Maze.txt",10) # maze.ASSolver() knn = KNN("hw5data.txt",15) knn.run()
import numpy as np import pandas as pd from sklearn.model_selection import train_test_split from sklearn.preprocessing import LabelEncoder from KNN import KNN path = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data" names = ["sepal_length", "sepal_width", "petal_length", "petal_width", "class"] dataset = pd.read_csv(path, names=names) encoder = LabelEncoder() dataset["class"] = encoder.fit_transform(dataset["class"]) train_set, test_set = train_test_split(dataset, test_size=0.25, random_state=0) print("Train set size : ", len(train_set)) print("Test set size : ", len(test_set)) knn = KNN(7) predictions = [] for index, t in test_set.iterrows(): predictors_only = t[:-1] prediction = knn.predict(train_set, predictors_only) predictions.append(prediction) score = knn.evaluate(np.array(test_set.iloc[:, -1]), predictions) print("KNN Score = ", score)
import RiskParser as rp from ANN import ANN from Dtree import Dtree from KNN import KNN from LogR import LogR from NaiveEnsemble import NaiveEnsemble import numpy as np inputs, outputs = rp.parse_data("RiskAssessData.csv") print(inputs) print(outputs) print("### ALL TESTS ###") test_ANN = ANN() test_DTree = Dtree() test_KNN = KNN(10) test_LogR = LogR() ensemble_results = [] individual_results = [] for i in range(0, 3): x_train, x_test, y_train, y_test = test_DTree.split_data( inputs, outputs, .25) test_NaiveEnsemble = NaiveEnsemble( [test_ANN, test_KNN, test_LogR, test_DTree]) test_NaiveEnsemble.train(x_train, y_train) ensemble = test_NaiveEnsemble.report_accuracy(x_test, y_test) print("Naive Ensemble Test accuracy: test data set", ensemble) #print("Naive Ensemble Test accuracy: train data set",test_NaiveEnsemble.report_accuracy(x_train,y_train)) #test_KNN.train(x_train,y_train) #test_NaiveEnsemble.model_list.append(test_KNN)
#***Train SVM***# classifier = svm.SVC(C=1, decision_function_shape="ovr") classifier.fit(X_train, y_train) #Test Model# prediction = classifier.predict(X_test) #Evaluate Errors# correct = 0 for i, j in zip(np.nditer(prediction), np.nditer(y_test)): if (i == j): #print("{} : {}".format(i, j)) correct = correct + 1 acc = 1.0 * correct / (y_test.size) print("SVM accuracy: {}".format(acc)) #***KNN Model***# knn = KNN(k=10) predictions = knn.predict(X_train, X_test, y_train) #Evaluate Errors# correct = 0 for i, j in zip(np.nditer(predictions), np.nditer(y_test)): if (i == j): #print("{} : {}".format(i, j)) correct = correct + 1 acc = 1.0 * correct / (y_test.size) print("KNN accuracy: {}".format(acc))
index_col='tripId') testSet = pd.read_csv( 'test_dataset/test_set_a2.csv', # replace with the correct path sep="\t", converters={"Trajectory": literal_eval}) path = 'Test_Subways/' if not os.path.exists(path): os.makedirs(path) #trainSet = trainSet[:400] for i in range(len(testSet)): start_time = time.time() origin = testSet['Trajectory'].iloc[i] knn = KNN(5, LCSS(Compare_Harvesine), origin, True) for j in range(len(trainSet)): knn.calculate_neighbor(trainSet['Trajectory'].iloc[j], j) results = knn.results() elapsed_time = time.time() - start_time path = 'Test_Subways/Test_Subways_' + str(i + 1) if not os.path.exists(path): os.makedirs(path) file = open(path + '/results', 'w') file.write("Test_Subways_" + str(i + 1) + "\n") print_map(origin, 'Test_Subways_' + str(i + 1), path, None) count = 1 for k in results: print_map(trainSet['Trajectory'].iloc[k[0]], 'Neighbor_' + str(count), path, Find_Subsequence(k[2])) file.write('Neighbor_' + str(count) + '\nJP_ID: ' +
from FeatureScaling import FeatureScaling fs = FeatureScaling(X, y) X = fs.fit_transform_X() #training set split X_train = X[0:train_size, :] Y_train = y[0:train_size] #testing set split X_test = X[train_size:, :] Y_test = y[train_size:] #importing KNN class from KNN import KNN l = time.time() knn = KNN(X_train, Y_train, 5) y_pred = knn.predict(X_test) r = time.time() KNN_learn_time = (r - l) print(r - l) #getting the confusion matrix tp = len([ i for i in range(0, Y_test.shape[0]) if Y_test[i] == 0 and y_pred[i] == 0 ]) tn = len([ i for i in range(0, Y_test.shape[0]) if Y_test[i] == 0 and y_pred[i] == 1 ]) fp = len([ i for i in range(0, Y_test.shape[0]) if Y_test[i] == 1 and y_pred[i] == 0 ])
y_test = test_np[:, -1] property_list = [x for x in range(len(test_np[0, :-1]))] max_acc = 0 best_sub = () tmp_sub = 0 flag = 0 while True: for i in range(8): if i not in best_sub: subset = best_sub + (i,) else: continue # subset = (1,7,6,5,2) train_tmp = norm_train[:, (subset + (8,))] test_tmp = norm_test[:, subset] y_pred = KNN(train_tmp, test_tmp, 9) acc = accuracy_score(y_test, y_pred) # acc = float(np.sum(y_test == y_pred) / len(y_test)) # print("for element " + str(subset) +" recieved acc of " + str(acc) + "\n") if acc >= max_acc: flag = 1 max_acc = acc tmp_sub = subset best_sub = tmp_sub if flag == 0: break flag = 0 print(list(best_sub)) # print(max_acc)
elif "t_" in item: topic_list.append(item[2:]) word_list = word_list[1:] # Remove 'Article #' words_topics_size = len(topic_list) + len(word_list) for row in dataMatrix[2:]: matrix.append( [row[0]] + map(int, row[1:1 + words_topics_size]) ) return {"topic_list":topic_list, "word_list": word_list, "matrix": matrix} ##### MAIN ##### dataMatrix = parseDM() arg_list = sys.argv if len(arg_list) != 5: print "Usage: ./DM2_KNN.py -k <neighborcount> -t <testpercentage>" sys.exit(1) if arg_list[1] == '-k': k = int(arg_list[2]) elif arg_list[1] == '-t': t = int(arg_list[2]) if arg_list[3] == '-k': k = int(arg_list[4]) elif arg_list[3] == '-t': t = int(arg_list[4]) knn = KNN(dataMatrix, k) knn.test_split(t)
from KNN import KNN from numberClassification import NumClassification import numpy as np def acc(pred, label): t = np.equal(pred, label) return np.sum(t) / len(pred) nc = NumClassification(trainingPath="digits/trainingDigits", testPath="digits/testDigits") trainingDataset = nc.buildTrainingDataset() testDataset, labels = nc.buildTestDataset() knn = KNN(trainingDataset, 3, isnorm=False) pred = knn.infer(testDataset) print(acc(pred, labels))
img_right = cv2.cvtColor(img_right, cv2.COLOR_BGR2RGB) img_r = cv2.cvtColor(img_right, cv2.COLOR_BGR2GRAY) img_left = cv2.imread('Resources/left.jpg') img_left = cv2.cvtColor(img_left, cv2.COLOR_BGR2RGB) img_l = cv2.cvtColor(img_left, cv2.COLOR_BGR2GRAY) # Create SIFT and extract features sift = cv2.xfeatures2d.SIFT_create(nfeatures=10000) # Find the keypoints and descriptors with SIFT kp1, des1 = sift.detectAndCompute(img_r, None) kp2, des2 = sift.detectAndCompute(img_l, None) # Find KNN matches and validate with ratio test knn_solver = KNN(des1, des2, 2) matches = knn_solver.solve() # Extract keypoints' coordinates valid_kp1 = [] valid_kp2 = [] for match in matches: valid_kp1.append(np.array(kp1[match.index_1].pt)) valid_kp2.append(np.array(kp2[match.indices_2[0]].pt)) valid_kp1 = np.array(valid_kp1).T valid_kp2 = np.array(valid_kp2).T H = find_homography(valid_kp1, valid_kp2, 5000) print(H)
'test_dataset/test_set_a2.csv', # replace with the correct path sep="\t", converters={"Trajectory": literal_eval}) #train,test = train_test_split(trainSet,test_size=0.01) #rint len(train) #print len(test) #Initialize Encoder le = preprocessing.LabelEncoder() le.fit(trainSet["journeyPatternId"]) y = le.transform(trainSet["journeyPatternId"]) X = trainSet['Trajectory'] Y = testSet['Trajectory'] knn = KNN(5, DTW(Harvesine)) knn.fit(X, y) knn_pred = knn.predict(Y) predicted_categories = le.inverse_transform(knn_pred) print(predicted_categories) with open('testSet_JourneyPatternIDs.csv', 'wb') as csvfile: csvwriter = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL) csvwriter.writerow(['Test_Trip_ID', 'Predicted_JourneyPatternID]']) for i in range(len(testSet)): csvwriter.writerow([str(i + 1), predicted_categories[i]])
def main(): db = Database() knn = KNN(db) fr = FaceRecog(db, knn) tg = Telegram(fr, db) tg.start()
from KNN import KNN from sklearn.model_selection import train_test_split from sklearn.datasets import load_digits digits = load_digits() X = digits.data y = digits.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42) kNN = KNN(1) kNN.train(X_train, y_train) res = kNN.predict(X_test) print('Real--->Predicted') for i, val in enumerate(y_test): print(' %d ---> %d' % (val, res[i])) print('预测准确率:') print(kNN.score(X_test, y_test))
# TODO: think about extend the code to other classification methods, features extractors ... maybe a switch? if args.descriptor == 'sift': features_descriptor = SIFT(nfeatures=100) elif args.descriptor == 'surf': features_descriptor = SURF(nOctaves=4, nOctaveLayers=2) elif args.descriptor == 'hog': features_descriptor = HOG() else: features_descriptor = None print('Invalid descriptor') myKNN = KNN(nneighbors=100, features_descriptor=features_descriptor) myEvaluation = Evaluation(evaluation_path=args.evaluation_path, save_plots=True) if args.do_train: start_time = time.time() if args.train_method == 'kfold': # make K trainings, save the evaluation metrics and models, then decide the best model evaluation_metrics = np.array([], dtype=float) model = [] for k in range(args.kfold_k):
# data = Data() # loads the data and checks if complete # # while True: # data.load_data() # data.split_data() # split into both test and train # predicted_class = {} # holds data_set_name and a list of predicted classes # # for name, train_data_set in data.train_dict.items(): # iterate through data and get key(Data name) and data_set # print("Current Data Set: ", name) # predicted_class[name] = [] # create a list of for a data set of predicted values # test_data_set = data.test_dict[name] # TODO: Use same keys for all dictionaries; Access testing data by key. # for _, query_point in train_data_set.iterrows(): # # give query example and its corresponding train_data_set, along with # of desired neighbors to consider # predicted_class[name].append(knn.perform_knn(query_point, train_data_set, 5, name, data)) knn = KNN() data = Data() # loads the data and checks if complete lf = LF() data.load_data() def run_zero_loss(): """ Calls function in other files until program is finished. :return: None """ data.split_data() # split into both test and train lf.zero_one_loss(data.test_dict['abalone'].sample(n=400), 5, 'abalone', data) def run_k_means(indata): # Run k-means on wine data set'knn = KNN()
from KNN import KNN from utils import get_xor import matplotlib.pyplot as plt if __name__ == '__main__': X, Y = get_xor() plt.scatter(X[:, 0], X[:, 1], s=100, c=Y, alpha=0.5) plt.show() for i in range(20): model = KNN(i + 1) model.fit(X, Y) print('Neighbours:', i + 1, 'Train accuracy:', model.score(X, Y))
input_instances.calcNumClases() #2. PREPROCESO preproceso.randomizarInstancias(input_instances) #3. kNN #70% instancias con clase, 30% instancias a clasificar k = int(sys.argv[2]) #num de vecinos a explorar m = float(sys.argv[3]) #m de la dist de Minkowski porcentaje = sys.argv[4] porcentaje = float(porcentaje) if(porcentaje>100.0 or porcentaje<0.0): raise Exception() train = input_instances.getPorcentaje(porcentaje) #obtenemos el % de las instancias (para train) test = input_instances #lo restante (test) (el getPorcentaje actualiza la lista haciendo pop()) print("Usando k="+str(k)+", m="+str(int(m))+" y usando el " + str(porcentaje) +"% de las instancias para train\n") print("Clasificando...") clasificador = KNN(train, test, k, m) #creamos el clasificador con las instancias de las q sabemos la clase, las q queremos predecir la clase, k, m clasificador.execute() #calculamos los vecinos proximos, y predecimos la clase prediccion = clasificador.getPrediccion() #4. EVALUACION evaluacion.mostrarFigurasMerito(prediccion, input_instances) #Specificity y Recall #evaluacion.espacioROC() #representar graficamente los modelos en el espacio ROC. time = str(datetime.now()-start_time) time = time[5:] print("Duración de la ejecución del programa: " + time + " segundos") except (IOError): print("Error de lectura de fichero") #except(Exception): #print("Error desconocido. Seguramente hayas introducido incorrectamente algún argumento")
from KNN import KNN import os # Change the model type variable value to "CNN" to use the Convolutional Neural Network # Change the model type variable value to "KNN" to use the K Nearest Neighbours Classifier modeltype = "KNN" if modeltype == "KNN": if os.path.exists("knn.sav"): pass else: print("Saved KNN Classifier not found....") print( "Downloading MNIST Data, training KNN classifier and saving as knn.sav......" ) print("Kindly wait for a few minutes............") knnobj = KNN(3) knnobj.skl_knn() else: if os.path.exists("cnn.hdf5"): pass else: print("cnn.hdf5 not found...") print("Loading MNIST Data, training CNN and saving as cnn.hdf5.....") print("Kindly wait a few minutes.........") cnnobj = CNN() cnnobj.build_and_compile_model() cnnobj.train_and_evaluate_model() cnnobj.save_model() MainUIobj = MainUI(modeltype) MainUIobj.mainloop() MainUIobj.cleanup()
print("2. Density Based Clustering") print("3. Hierarchical Clustering") print("4. K-means Clustering") print("5. Learning Vector Quantization Clustering") print("6. Mixture of Gaussian Clustering") print("-------Decision Tree-------") print("7. ID3") print("8. C4.5") print("9. CART") print("---Recommendation Algorithm---") print("10. MF") print("11. PMF") print("-------------------------------------------------------") num = input("Enter the number of the algorithm you want to execute:") if num == str(1): algorithm = KNN.KNN() algorithm.execute() elif num == str(2): algorithm = DensityBasedClustering.DensityBasedClustering() algorithm.execute() elif num == str(3): algorithm = HierarchicalClustering.HierarchicalClustering() algorithm.execute() elif num == str(4): algorithm = KMeansAlgorithm.KMeansAlgorithm() algorithm.execute() elif num == str(5): algorithm = LearningVectorQuantization.LearningVectorQuantization() algorithm.execute() elif num == str(6): algorithm = MixtureOfGaussianAlgorithm.MixtureOfGaussianAlgorithm()
binarizer = LabelBinarizer() Y = binarizer.fit_transform(y) # By inspection I know that the number of examples will be 50,000 # This allows me to set an upper limit on the number of neighbours I want to # Test For. I inspected the 'X' matrix using the debugger tool in PYCHARM k_values = np.array( range(1, 20) ) #np.array(range(1,10)) # range of K values I want to test. This is okay for my computers memory capabilities y_pred = np.zeros(k_values.size) best_k = 0 #This is a dummy variable I will use to keep track of the k value which returns the best test error best_error = 1000000000000000000 #Initalizing a random value to store the best test error and track the value of it for k in k_values: model = KNN(k) model.fit(X, y) #Computing the validation Error with Xtest and yest y_pred = model.predict(Xtest) test_error = np.mean(y_pred != ytest) if test_error < best_error: best_k = k best_error = test_error print(best_k) print(best_error) elif question == "1.2": with gzip.open('mnist.pkl.gz', 'rb') as f: