def RunAllKnnShogun(q): totalTimer = Timer() # Load input dataset. # If the dataset contains two files then the second file is the query # file. try: Log.Info("Loading dataset", self.verbose) if len(self.dataset) == 2: referenceData = np.genfromtxt(self.dataset[0], delimiter=',') queryData = np.genfromtxt(self.dataset[1], delimiter=',') queryFeat = RealFeatures(queryFeat.T) else: referenceData = np.genfromtxt(self.dataset, delimiter=',') # Labels are the last row of the dataset. labels = MulticlassLabels( referenceData[:, (referenceData.shape[1] - 1)]) referenceData = referenceData[:, :-1] with totalTimer: # Get all the parameters. k = re.search("-k (\d+)", options) if not k: Log.Fatal( "Required option: Number of furthest neighbors to find." ) q.put(-1) return -1 else: k = int(k.group(1)) if (k < 1 or k > referenceData.shape[0]): Log.Fatal("Invalid k: " + k.group(1) + "; must be greater than 0" + " and less or equal than " + str(referenceData.shape[0])) q.put(-1) return -1 referenceFeat = RealFeatures(referenceData.T) distance = EuclideanDistance(referenceFeat, referenceFeat) # Perform All K-Nearest-Neighbors. model = SKNN(k, distance, labels) model.train() if len(self.dataset) == 2: out = model.apply(queryFeat).get_labels() else: out = model.apply(referenceFeat).get_labels() except Exception as e: q.put(-1) return -1 time = totalTimer.ElapsedTime() q.put(time) return time
def knn(train_features, train_labels, test_features, test_labels, k=1): from modshogun import KNN, MulticlassAccuracy, EuclideanDistance distance = EuclideanDistance(train_features, train_features) knn = KNN(k, distance, train_labels) knn.train() train_output = knn.apply() test_output = knn.apply(test_features) evaluator = MulticlassAccuracy() print 'KNN training error is %.4f' % ((1-evaluator.evaluate(train_output, train_labels))*100) print 'KNN test error is %.4f' % ((1-evaluator.evaluate(test_output, test_labels))*100)
def RunAllKnnShogun(q): totalTimer = Timer() # Load input dataset. # If the dataset contains two files then the second file is the query # file. try: Log.Info("Loading dataset", self.verbose) if len(self.dataset) == 2: referenceData = np.genfromtxt(self.dataset[0], delimiter=',') queryData = np.genfromtxt(self.dataset[1], delimiter=',') queryFeat = RealFeatures(queryFeat.T) else: referenceData = np.genfromtxt(self.dataset, delimiter=',') # Labels are the last row of the dataset. labels = MulticlassLabels(referenceData[:, (referenceData.shape[1] - 1)]) referenceData = referenceData[:,:-1] with totalTimer: # Get all the parameters. k = re.search("-k (\d+)", options) if not k: Log.Fatal("Required option: Number of furthest neighbors to find.") q.put(-1) return -1 else: k = int(k.group(1)) if (k < 1 or k > referenceData.shape[0]): Log.Fatal("Invalid k: " + k.group(1) + "; must be greater than 0" + " and less or equal than " + str(referenceData.shape[0])) q.put(-1) return -1 referenceFeat = RealFeatures(referenceData.T) distance = EuclideanDistance(referenceFeat, referenceFeat) # Perform All K-Nearest-Neighbors. model = SKNN(k, distance, labels) model.train() if len(self.dataset) == 2: out = model.apply(queryFeat).get_labels() else: out = model.apply(referenceFeat).get_labels() except Exception as e: q.put(-1) return -1 time = totalTimer.ElapsedTime() q.put(time) return time
def knn(train_features, train_labels, test_features, test_labels, k=1): from modshogun import KNN, MulticlassAccuracy, EuclideanDistance distance = EuclideanDistance(train_features, train_features) knn = KNN(k, distance, train_labels) knn.train() train_output = knn.apply() test_output = knn.apply(test_features) evaluator = MulticlassAccuracy() print 'KNN training error is %.4f' % ( (1 - evaluator.evaluate(train_output, train_labels)) * 100) print 'KNN test error is %.4f' % ( (1 - evaluator.evaluate(test_output, test_labels)) * 100)
def metric_lmnn_modular(train_fname=traindat, test_fname=testdat, label_train_fname=label_traindat, k=3): try: from modshogun import RealFeatures, MulticlassLabels, LMNN, KNN, CSVFile except ImportError: return # wrap features and labels into Shogun objects feats_train = RealFeatures(CSVFile(train_fname)) feats_test = RealFeatures(CSVFile(test_fname)) labels = MulticlassLabels(CSVFile(label_train_fname)) # LMNN lmnn = LMNN(feats_train, labels, k) lmnn.train() lmnn_distance = lmnn.get_distance() # perform classification with KNN knn = KNN(k, lmnn_distance, labels) knn.train() output = knn.apply(feats_test).get_labels() return lmnn, output
def lmnn_diagonal(train_features, train_labels, test_features, test_labels, k=1): from modshogun import LMNN, KNN, MSG_DEBUG, MulticlassAccuracy import numpy lmnn = LMNN(train_features, train_labels, k) lmnn.set_diagonal(True) lmnn.train() distance = lmnn.get_distance() knn = KNN(k, distance, train_labels) knn.train() train_output = knn.apply() test_output = knn.apply(test_features) evaluator = MulticlassAccuracy() print 'LMNN-diagonal training error is %.4f' % ((1-evaluator.evaluate(train_output, train_labels))*100) print 'LMNN-diagonal test error is %.4f' % ((1-evaluator.evaluate(test_output, test_labels))*100)
def lmnn(train_features, train_labels, test_features, test_labels, k=1): from modshogun import LMNN, KNN, MSG_DEBUG, MulticlassAccuracy import numpy # dummy = LMNN() # dummy.io.set_loglevel(MSG_DEBUG) lmnn = LMNN(train_features, train_labels, k) lmnn.train() distance = lmnn.get_distance() knn = KNN(k, distance, train_labels) knn.train() train_output = knn.apply() test_output = knn.apply(test_features) evaluator = MulticlassAccuracy() print 'LMNN training error is %.4f' % ((1-evaluator.evaluate(train_output, train_labels))*100) print 'LMNN test error is %.4f' % ((1-evaluator.evaluate(test_output, test_labels))*100)
def lmnn(train_features, train_labels, test_features, test_labels, k=1): from modshogun import LMNN, KNN, MSG_DEBUG, MulticlassAccuracy import numpy # dummy = LMNN() # dummy.io.set_loglevel(MSG_DEBUG) lmnn = LMNN(train_features, train_labels, k) lmnn.train() distance = lmnn.get_distance() knn = KNN(k, distance, train_labels) knn.train() train_output = knn.apply() test_output = knn.apply(test_features) evaluator = MulticlassAccuracy() print 'LMNN training error is %.4f' % ( (1 - evaluator.evaluate(train_output, train_labels)) * 100) print 'LMNN test error is %.4f' % ( (1 - evaluator.evaluate(test_output, test_labels)) * 100)
def assign_labels(data, centroids, ncenters): from modshogun import EuclideanDistance from modshogun import RealFeatures, MulticlassLabels from modshogun import KNN from numpy import arange labels = MulticlassLabels(arange(0.,ncenters)) fea = RealFeatures(data) fea_centroids = RealFeatures(centroids) distance = EuclideanDistance(fea_centroids, fea_centroids) knn = KNN(1, distance, labels) knn.train() return knn.apply(fea)
def assign_labels(data, centroids, ncenters): from modshogun import EuclideanDistance from modshogun import RealFeatures, MulticlassLabels from modshogun import KNN from numpy import arange labels = MulticlassLabels(arange(0., ncenters)) fea = RealFeatures(data) fea_centroids = RealFeatures(centroids) distance = EuclideanDistance(fea_centroids, fea_centroids) knn = KNN(1, distance, labels) knn.train() return knn.apply(fea)
def lmnn_diagonal(train_features, train_labels, test_features, test_labels, k=1): from modshogun import LMNN, KNN, MSG_DEBUG, MulticlassAccuracy import numpy lmnn = LMNN(train_features, train_labels, k) lmnn.set_diagonal(True) lmnn.train() distance = lmnn.get_distance() knn = KNN(k, distance, train_labels) knn.train() train_output = knn.apply() test_output = knn.apply(test_features) evaluator = MulticlassAccuracy() print 'LMNN-diagonal training error is %.4f' % ( (1 - evaluator.evaluate(train_output, train_labels)) * 100) print 'LMNN-diagonal test error is %.4f' % ( (1 - evaluator.evaluate(test_output, test_labels)) * 100)
def knn_classify(traindat, testdat, k=3): from modshogun import KNN, MulticlassAccuracy, EuclideanDistance train_features, train_labels = traindat.features, traindat.labels distance = EuclideanDistance(train_features, train_features) knn = KNN(k, distance, train_labels) knn.train() test_features, test_labels = testdat.features, testdat.labels predicted_labels = knn.apply(test_features) evaluator = MulticlassAccuracy() acc = evaluator.evaluate(predicted_labels, test_labels) err = 1-acc return err
def knn_classify(traindat, testdat, k=3): from modshogun import KNN, MulticlassAccuracy, EuclideanDistance train_features, train_labels = traindat.features, traindat.labels distance = EuclideanDistance(train_features, train_features) knn = KNN(k, distance, train_labels) knn.train() test_features, test_labels = testdat.features, testdat.labels predicted_labels = knn.apply(test_features) evaluator = MulticlassAccuracy() acc = evaluator.evaluate(predicted_labels, test_labels) err = 1 - acc return err
def classifier_knn_modular(train_fname=traindat, test_fname=testdat, label_train_fname=label_traindat, k=3): from modshogun import RealFeatures, MulticlassLabels, KNN, EuclideanDistance, CSVFile feats_train = RealFeatures(CSVFile(train_fname)) feats_test = RealFeatures(CSVFile(test_fname)) distance = EuclideanDistance(feats_train, feats_train) labels = MulticlassLabels(CSVFile(label_train_fname)) knn = KNN(k, distance, labels) knn_train = knn.train() output = knn.apply(feats_test).get_labels() multiple_k = knn.classify_for_multiple_k() return knn, knn_train, output, multiple_k
def metric_lmnn_modular(train_fname=traindat,test_fname=testdat,label_train_fname=label_traindat,k=3): try: from modshogun import RealFeatures,MulticlassLabels,LMNN,KNN,CSVFile except ImportError: return # wrap features and labels into Shogun objects feats_train=RealFeatures(CSVFile(train_fname)) feats_test=RealFeatures(CSVFile(test_fname)) labels=MulticlassLabels(CSVFile(label_train_fname)) # LMNN lmnn=LMNN(feats_train,labels,k) lmnn.train() lmnn_distance=lmnn.get_distance() # perform classification with KNN knn=KNN(k,lmnn_distance,labels) knn.train() output=knn.apply(feats_test).get_labels() return lmnn,output
def lmnn_classify(traindat, testdat, k=3): from modshogun import LMNN, KNN, MulticlassAccuracy, MSG_DEBUG train_features, train_labels = traindat.features, traindat.labels lmnn = LMNN(train_features, train_labels, k) lmnn.set_maxiter(1200) lmnn.io.set_loglevel(MSG_DEBUG) lmnn.train() distance = lmnn.get_distance() knn = KNN(k, distance, train_labels) knn.train() test_features, test_labels = testdat.features, testdat.labels predicted_labels = knn.apply(test_features) evaluator = MulticlassAccuracy() acc = evaluator.evaluate(predicted_labels, test_labels) err = 1 - acc return err
def lmnn_classify(traindat, testdat, k=3): from modshogun import LMNN, KNN, MulticlassAccuracy, MSG_DEBUG train_features, train_labels = traindat.features, traindat.labels lmnn = LMNN(train_features, train_labels, k) lmnn.set_maxiter(1200) lmnn.io.set_loglevel(MSG_DEBUG) lmnn.train() distance = lmnn.get_distance() knn = KNN(k, distance, train_labels) knn.train() test_features, test_labels = testdat.features, testdat.labels predicted_labels = knn.apply(test_features) evaluator = MulticlassAccuracy() acc = evaluator.evaluate(predicted_labels, test_labels) err = 1-acc return err
from modshogun import EuclideanDistance, KNN, MulticlassLabels, CSVFile, RealFeatures #![begin] #![load_data] trainf = CSVFile("../data/fm_train_real.dat") feats_train = RealFeatures(trainf) testf = CSVFile("../data/fm_test_real.dat") feats_test = RealFeatures(testf) train_labels = CSVFile("../data/label_train_multiclass.dat") labels = MulticlassLabels(train_labels) #![load_data] #![choose_distance] distance = EuclideanDistance(feats_train, feats_test) #![choose_distance] #![create_instance] knn = KNN(3, distance, labels) #![create_instance] #![train_and_apply] knn.train() test_labels = knn.apply(feats_test) output = test_labels.get_values() print output #![train_and_apply] #![end]