class Number_recognition: def __init__(self, test_images, test_labels, k): self.test_images = test_images; self.test_labels = test_labels; self.k = k; def load(self, path_img, path_lbl): with open(path_lbl, 'rb') as file: magic, size = struct.unpack(">II", file.read(8)) if magic != 2049: raise ValueError('Magic number mismatch, expected 2049,' 'got %d' % magic) labels = array("B", file.read()) labels_result = np.zeros(shape=(size)) for i in xrange(size): labels_result[i] = labels[i] with open(path_img, 'rb') as file: magic, size, rows, cols = struct.unpack(">IIII", file.read(16)) print "rows: " + str(rows) + " cols: " + str(cols) if magic != 2051: raise ValueError('Magic number mismatch, expected 2051,' 'got %d' % magic) image_data = array("B", file.read()) images = np.zeros(shape=(size,rows*cols)) for i in xrange(size): images[i][:] = image_data[i*rows*cols : (i+1)*rows*cols] return images, labels_result def load_train(self): ims, labels = self.load( self.test_images, self.test_labels) self.test_images = ims self.test_labels = labels labels_numbers = MulticlassLabels(self.test_labels) feats = RealFeatures(self.test_images.T) dist = EuclideanDistance() self.knn = KNN(self.k, dist, labels_numbers) self.knn.train(feats) def predict(self, image): feats_test = RealFeatures(image. T) pred = self.knn.apply_multiclass(feats_test) return pred[:]
def run_knn(Xtrain,Ytrain,Xtest,Ytest): prod_features = RealFeatures(Xtrain) prod_labels = MulticlassLabels(Ytrain) test_features = RealFeatures(Xtest) test_labels = MulticlassLabels(Ytest) if os.path.exists(".lmnn_model30000_5_reg05_cor20"): print "Using LMNN distance" lmnn = LMNN() sf = SerializableAsciiFile(".lmnn_model30000_5_reg05_cor20", 'r') lmnn.load_serializable(sf) diagonal = np.diag(lmnn.get_linear_transform()) #print('%d out of %d elements are non-zero.' % (np.sum(diagonal != 0), diagonal.size)) #diagonal = lmnn.get_linear_transform() np.set_printoptions(precision=1,threshold=1e10,linewidth=500) #lmnn.set_diagonal(True) dist = lmnn.get_distance() else: dist = EuclideanDistance() # classifier knn = KNN(K, dist, prod_labels) #knn.set_use_covertree(True) parallel = knn.get_global_parallel() parallel.set_num_threads(4) knn.set_global_parallel(parallel) knn.train(prod_features) print "Classifying test set..." pred = knn.apply_multiclass(test_features) print "Accuracy = %2.2f%%" % (100*np.mean(pred == Ytest)) cm = build_confusion_matrix(Ytest, pred, NCLASSES) #save_confusion_matrix(cm) #cm = load_confusion_matrix() print "Confusion matrix: " print cm #plot_confusion_matrix(cm) #results = predict_class_prob(pred, cm) #nn = build_neighbours_matrix(knn, prod_labels) #results = predict_class_from_neighbours(nn) #print "Log loss: " + str(calculate_log_loss(results, Ytest)) #print_prediction_output(results) return cm
# load LMNN if os.path.exists(".lmnn_model30000_5_reg05_cor20"): sf = SerializableAsciiFile(".lmnn_model30000_5_reg05_cor20", 'r') lmnn = LMNN() lmnn.load_serializable(sf) diagonal = np.diag(lmnn.get_linear_transform()) print('%d out of %d elements are non-zero.' % (np.sum(diagonal != 0), diagonal.size)) #print diagonal dist = lmnn.get_distance() else: dist = EuclideanDistance() cm = load_confusion_matrix() print cm # classifier knn = KNN(k, dist, prod_labels) parallel = knn.get_global_parallel() parallel.set_num_threads(4) knn.set_global_parallel(parallel) knn.train(prod_features) print "Classifying test set..." pred = knn.apply_multiclass(test_features) results = predict_class_prob(pred, cm) print_prediction_output(results)
def evaluate(labels, feats, params={ 'n_neighbors': 2, 'use_cover_tree': 'True', 'dist': 'Manhattan' }, Nsplit=2): """ Run Cross-validation to evaluate the KNN. Parameters ---------- labels: 2d array Data set labels. feats: array Data set feats. params: dictionary Search scope parameters. Nsplit: int, default = 2 The n for n-fold cross validation. all_ks: range of int, default = range(1, 21) Numbers of neighbors. """ k = params.get('n_neighbors') use_cover_tree = params.get('use_cover_tree') == 'True' if params.get('dist' == 'Euclidean'): func_dist = EuclideanDistance else: func_dist = ManhattanMetric split = CrossValidationSplitting(labels, Nsplit) split.build_subsets() accuracy = np.zeros(Nsplit) acc_train = np.zeros(accuracy.shape) time_test = np.zeros(accuracy.shape) for i in range(Nsplit): idx_train = split.generate_subset_inverse(i) idx_test = split.generate_subset_indices(i) feats.add_subset(idx_train) labels.add_subset(idx_train) dist = func_dist(feats, feats) knn = KNN(k, dist, labels) knn.set_store_model_features(True) if use_cover_tree: knn.set_knn_solver_type(KNN_COVER_TREE) else: knn.set_knn_solver_type(KNN_BRUTE) knn.train() evaluator = MulticlassAccuracy() pred = knn.apply_multiclass() acc_train[i] = evaluator.evaluate(pred, labels) feats.remove_subset() labels.remove_subset() feats.add_subset(idx_test) labels.add_subset(idx_test) t_start = time.clock() pred = knn.apply_multiclass(feats) time_test[i] = (time.clock() - t_start) / labels.get_num_labels() accuracy[i] = evaluator.evaluate(pred, labels) feats.remove_subset() labels.remove_subset() print accuracy.mean() return accuracy