def constructSimilartyMatrixLMNN(self,ks): print 'now doing LMNN for k= ',ks self.y_train=self.y_train.reshape(-1,) lmnn=LMNN(k=ks, learn_rate=1e-7,max_iter=3000) lmnn.fit(self.trainVectorsPCA, self.y_train, verbose=False) self.L_lmnn = lmnn.transformer() name='lmnn/LMNN transformer matrix with dataset shape '+str(self.trainVectorsPCA.shape) np.save(name,self.L_lmnn) print 'L.shape is ',self.L_lmnn.shape,'\n\n' # Input data transformed to the metric space by X*L.T self.transformedTrainLMNN=copy(lmnn.transform(self.trainVectorsPCA)) self.transformedTestLMNN=copy(lmnn.transform(self.testVectorsPCA)) self.transformedAllLMNN=copy(lmnn.transform(self.allDataPCA)) #we compute the pairwise distance on this now projectedDigits = TSNE(random_state=randomState).fit_transform(self.transformedAllLMNN) plt.scatter(projectedDigits[:,0],projectedDigits[:,1],c=self.labels) plt.title('LMNN Transformed ALL set projected to 2 Dimensions by TSNE with k='+str(ks)) plt.savefig(pp,format='pdf') self.pwdis=copy(pairwise_distances(self.transformedAllLMNN,metric='euclidean')) self.D=np.zeros(self.pwdis.shape) for i in range(0,self.pwdis.shape[0]): l1=self.pwdis[i].tolist() #print 'l1 is ',l1,'\n\n' allnearestNeighbours=sorted(range(len(l1)),key=lambda i : l1[i]) #now set the all the weights except for k+1 to 0 self.pwdis[i,allnearestNeighbours[ks:]]=0 self.D[i,i]=sum(self.pwdis[i]) print 'accuracy for LMNN for k= ',ks,'\n' self.labelPropogation()
def constructSimilartyMatrixLMNN(self, ks): print('now doing LMNN for k= ', ks) self.y_train = self.y_train.reshape(-1, ) lmnn = LMNN(k=ks, learn_rate=1e-7, max_iter=1000) lmnn.fit(self.trainVectorsPCA, self.y_train) self.L_lmnn = lmnn.transformer() name = 'lmnn/LMNN transformer matrix with dataset shape ' + str( self.trainVectorsPCA.shape) np.save(name, self.L_lmnn) print('L.shape is ', self.L_lmnn.shape, '\n\n') # Input data transformed to the metric space by X*L.T self.transformedTrainLMNN = copy(lmnn.transform(self.trainVectorsPCA)) self.transformedTestLMNN = copy(lmnn.transform(self.testVectorsPCA)) self.transformedAllLMNN = copy(lmnn.transform( self.allDataPCA)) #we compute the pairwise distance on this now projectedDigits = TSNE(random_state=randomState).fit_transform( self.transformedAllLMNN) self.pwdis = copy( pairwise_distances(self.transformedAllLMNN, metric='euclidean')) self.D = np.zeros(self.pwdis.shape) for i in range(0, self.pwdis.shape[0]): l1 = self.pwdis[i].tolist() #print 'l1 is ',l1,'\n\n' allnearestNeighbours = sorted(range(len(l1)), key=lambda i: l1[i]) #now set the all the weights except for k+1 to 0 self.pwdis[i, allnearestNeighbours[ks:]] = 0 self.D[i, i] = sum(self.pwdis[i]) print('accuracy for LMNN for k= ', ks, '\n') self.labelPropogation()
def runLMNN(X_train, X_test, y_train, t_test, k): transformer = LMNN(k=k, learn_rate=1e-6, convergence_tol=0.1, verbose=True) transformer.fit(X_train, y_train) X_train_proj = transformer.transform(X_train) X_test_proj = transformer.transform(X_test) np.save('X_train_LMNN_' + str(k), X_train_proj) np.save('X_test_LMNN_' + str(k), X_test_proj) return X_train_proj, X_test_proj
class GeoLMNN(neighbors.KNeighborsClassifier): def __init__(self, n_neighbors=3): super(GeoLMNN, self).__init__(n_neighbors=n_neighbors) self.lmnn = LMNN(n_neighbors) def fit(self, X, y): self.lmnn.fit(X, y) super(GeoLMNN, self).fit(self.lmnn.transform(X), y) def predict(self, X): y = super(GeoLMNN, self).predict(self.lmnn.transform(X)) return y
def test_iris(self): lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False) lmnn.fit(self.iris_points, self.iris_labels) csep = class_separation(lmnn.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.25)
def LMNN(self): print "Warning, the features will be transformed" lmnn = LMNN(k=5, learn_rate = 1e-6) lmnn.fit(self.features, targets) self.features = lmnn.transform(self.features) self.prepare_for_testing() self.nearest_neighbors("LMNN + KNN")
def test_lmnn(self): lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False) lmnn.fit(self.X, self.y) res_1 = lmnn.transform(self.X) lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False) res_2 = lmnn.fit_transform(self.X, self.y) assert_array_almost_equal(res_1, res_2)
def test_lmnn(self): lmnn = LMNN(n_neighbors=5, learn_rate=1e-6, verbose=False) lmnn.fit(self.X, self.y) res_1 = lmnn.transform(self.X) lmnn = LMNN(n_neighbors=5, learn_rate=1e-6, verbose=False) res_2 = lmnn.fit_transform(self.X, self.y) assert_array_almost_equal(res_1, res_2)
def draw_knn_with_lmnn(k, metric): names = ['x', 'y', 'color'] df = pd.DataFrame(mapped_colors, columns=names) # print(df.head()) X = np.array(df.ix[:, 0:2]) y = np.array(df['color']) lmnn = LMNN(k=5, learn_rate=1e-6) lmnn.fit(X, y) X_lmnn = lmnn.transform() X = X_lmnn # print(X) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42) if metric == 'mahalanobis': knn = KNeighborsClassifier( n_neighbors=k, metric=metric, metric_params={'V': np.cov(np.transpose(X))}) else: knn = KNeighborsClassifier(n_neighbors=k, metric=metric) knn.fit(X_train, y_train) pred = knn.predict(X_test) err = 1 - accuracy_score(y_test, pred) print('\nThe error is ' + str(err * 100)) h = .02 cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF']) cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF']) x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1 y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) Z = knn.predict(np.c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) plt.figure() plt.pcolormesh(xx, yy, Z, cmap=cmap_light) plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=20) plt.xlim(xx.min(), xx.max()) plt.ylim(yy.min(), yy.max()) plt.title("3-Class classification (k = %i)" % k)
class LP: def __init__(self, lmnn=False, max_iter=1000, lm_num=200): # self.clf = LabelPropagation(kernel='knn',max_iter=1000,n_jobs=10,n_neighbors=25) self.clf = LabelSpreading(kernel='knn', n_neighbors=25, max_iter=max_iter, alpha=0.2, n_jobs=-1) self.lmnn = lmnn self.lm_num = lm_num if lmnn: self.ml = LMNN(use_pca=False, max_iter=2000) def fit(self, X, y): if self.lmnn: nonzero_index = np.nonzero(y) index = random.sample(list(nonzero_index[0]), self.lm_num) X_ = X[index] y_ = y[index] print('ml fitting') self.ml.fit(X_, y_) print('transform') X = self.ml.transform(X) print('lp fitting') zero_index = np.nonzero(y == 0) negetive_index = np.nonzero(y == -1) positive_index = np.nonzero(y == 1) y[zero_index] = -1 y[negetive_index] = 2 print(zero_index[0].shape, negetive_index[0].shape, positive_index[0].shape) self.clf.fit(X, y) def predict(self, X): print('lp predict') if self.lmnn: X = self.ml.transform(X) y_pred = self.clf.predict(X) negative_index = np.nonzero(y_pred == -1) two_index = np.nonzero(y_pred == 2) y_pred[negative_index] = 0 y_pred[two_index] = -1 return y_pred
def baseline_model(X_train, y_train, X_test, y_test): #dimension reduction feature_selection = LinearSVC(C=1, penalty="l1", dual=False) X_train_reduced = feature_selection.fit_transform(X_train, y_train) X_test_reduced = feature_selection.transform(X_test) #metrics learning ml = LMNN(k=4, min_iter=50, max_iter=1000, learn_rate=1e-7) ml.fit(X_train_reduced, y_train) X_train_new = ml.transform(X_train_reduced) X_test_new = ml.transform(X_test_reduced) neigh = KNeighborsClassifier(n_neighbors=4) neigh.fit(X_train_new, y_train) predicted = neigh.predict(X_test_new) #pickle.dump(ml, open('dist_metrics', 'w')) return predicted
def baseline_model(X_train,y_train,X_test,y_test): #dimension reduction feature_selection = LinearSVC(C=1, penalty="l1", dual=False) X_train_reduced = feature_selection.fit_transform(X_train, y_train) X_test_reduced = feature_selection.transform(X_test) #metrics learning ml = LMNN(k=4,min_iter=50,max_iter=1000, learn_rate=1e-7) ml.fit(X_train_reduced,y_train) X_train_new = ml.transform(X_train_reduced) X_test_new = ml.transform(X_test_reduced) neigh = KNeighborsClassifier(n_neighbors=4) neigh.fit(X_train_new, y_train) predicted = neigh.predict(X_test_new) #pickle.dump(ml, open('dist_metrics', 'w')) return predicted
class KNNClassifier(BaseEstimator, ClassifierMixin): def __init__(self, k=1): self.k = k self.distanceEstimator = LMNN(k=k) def fit(self, X, y): #TODO msati3: Ideally, LMNN should expose fit_transform. self.distanceEstimator.fit(X, y) self.modelData = self.distanceEstimator.transform(X) self.modelLabels = y return self def transform(self, X): return self.distanceEstimator.transform(X) def predict(self, D): X = self.transform(D) #Pretransform so that euclidean metric suffices distances = distance.cdist(X, self.modelData,'sqeuclidean') topKIndexes = bn.argpartsort(distances, self.k)[:,:self.k] predictions = self.modelLabels[topKIndexes] return stats.mode(predictions, axis=1)[0] def score(self, X, y, fNormalize=True): return accuracy_score(self.predict(X), y, fNormalize)
def lmnn_fit(X_train, Y_train, X_test, Y_test, color_map): lmnn = LMNN(init='pca', k=3, learn_rate=5e-4, max_iter=500000, regularization=0.2) lmnn.fit(X_train, Y_train) X_train_transformed = lmnn.transform(X_train) if (X_train.shape[1] == 2): plt.figure() plt.scatter(X_train_transformed[:, 0], X_train_transformed[:, 1], c=color_map[Y_train], s=2) plt.savefig("after_lmnn_transform_train.png", dpi=300) X_test_transformed = lmnn.transform(X_test) if (X_test.shape[1] == 2): plt.figure() plt.scatter(X_test_transformed[:, 0], X_test_transformed[:, 1], c=color_map[Y_test], s=2) plt.savefig("after_lmnn_transform_test.png", dpi=300) return (X_train_transformed, X_test_transformed)
pca.fit() pca_query_features = pca.project(query_features) pca_gallery_features = pca.project(gallery_features) compute_k_mean(num_of_clusters, pca_query_features, pca_gallery_features, gallery_labels) # Compute LMNN (Large Margin Nearest Neighbour) Learning print("\n-----LMNN------") lmnn = LMNN(k=5, max_iter=20, use_pca=False, convergence_tol=1e-6, learn_rate=1e-6, verbose=True) lmnn.fit(original_train_features, original_train_labels) transformed_query_features = lmnn.transform(query_features) transformed_gallery_features = lmnn.transform(gallery_features) compute_k_mean(num_of_clusters, transformed_query_features, transformed_gallery_features, gallery_labels) # Compute PCA_LMNN Learning print("\n-----PCA_LMNN-----") lmnn = LMNN(k=5, max_iter=20, use_pca=False, convergence_tol=1e-6, learn_rate=1e-6, verbose=True) start_time = time.time() lmnn.fit(pca.train_sample_projection, original_train_labels) end_time = time.time()
mu = np.array([[1, 5]]) Sigma = np.array([[1.5, 0.5], [1.5, 3]]) R = cholesky(Sigma) s = np.dot(np.random.randn(100, 2), R) + mu label = np.zeros((100, 1)) mu1 = np.array([[5, 10]]) Sigma1 = np.array([[1, 0.5], [1.5, 3]]) R1 = cholesky(Sigma1) s1 = np.dot(np.random.randn(100, 2), R1) + mu1 label1 = np.zeros((100, 1)) + 1 plt.subplot(121) plt.plot(s[:, 0], s[:, 1], ".", color='red') plt.plot(s1[:, 0], s1[:, 1], ".", color='blue') l1 = list(label) l2 = list(label1) l1.extend(l2) labels = np.array(l1) s_ = np.vstack((s, s1)) print(s_.shape) print(labels.shape) lmnn = LMNN(k=2, min_iter=500, learn_rate=1e-6) lmnn.fit(s_, labels) s_new = lmnn.transform(s_) plt.subplot(122) plt.plot(s_new[:, 0], s_new[:, 1], ".") plt.show()
p.axis('equal') y = [] x = [] with open('segmentation.data') as f: for line in f: v = line.split(',') y.append(v[0]) x.append(v[1:]) x = np.asarray(x, dtype='float64') y = np.asarray(y) lmnn = LMNN(k=5, learn_rate=1e-6) lmnn.fit(x, y) x_t = lmnn.transform(x) p1 = plt.subplot(231) p1.scatter(x_t[:, 0], x_t[:, 1], c=_to_tango_colors(y, 0)) p1.axis('equal') p1.set_title('LMNN') # GLVQ glvq = GlvqModel() glvq.fit(x, y) p2 = plt.subplot(232) p2.set_title('GLVQ') plot(PCA().fit_transform(x), y, glvq.predict(x), glvq.w_, glvq.c_w_, p2) # GRLVQ grlvq = GrlvqModel()
X_train = np.array(X_train) Y_train = np.array(Y_train) X_test = np.array(X_test) Y_test = np.array(Y_test) ## tuning here ... scores = [] #for i in range(1,5): #print("current k is ",i) lmnn2 = LMNN(k=5, learn_rate=1e-6) #.fit(X_train,Y_train) print("here2") print(lmnn2) lmnn2 = lmnn2.fit(X_train, Y_train) print("hi") X_train2 = lmnn2.transform(X_train) X_test2 = lmnn2.transform(X_test) kn2 = KNeighborsClassifier(n_neighbors=40).fit(X_train2, Y_train) predict = kn2.predict(X_test2) lmnn_acc = accuracy_score(Y_test, predict) print("lmnn accuracy is ", lmnn_acc) #scores.append(lmnn_acc) #print("the scores are ",scores) #k=np.argmax(scores)+1 #%%using kernal pca from scipy.spatial.distance import pdist, squareform from scipy import exp from scipy.linalg import eigh import numpy as np from sklearn.preprocessing import StandardScaler
class Classifier(object): """Classifier class.""" def __init__(self, cfg, feature_file=None, test_split='test'): """Classifier Constructor. See build_classifier method for more details. Args: cfg: Path to configuration file. feature_file: Path to feature file. test_split: Split to test on. Raises: RuntimeError: If classifier_type is not specified in the config file. """ # Creates a dictionary of params from the config file. self.classifier_params = cls.get_cls_param_dict(cfg) # Get params from optional args. self.classifier_params['feature_file'] = feature_file self.classifier_params['feature_dir'] = os.path.dirname(feature_file) self.classifier_params['test_split'] = test_split # If classifier type was not set, raise an exception. if 'classifier_type' not in self.classifier_params: raise RuntimeError('[!] No specified classifier type.') self.classifier_type = self.classifier_params['classifier_type'] self.estimator = None # Actual classifier. self.helper_estimator = None # Only used for metric learning, the helper estimator will learn the metric. self.parse_feature_file() self.build_classifier() # Classifier initialization given the params. def parse_feature_file(self): """Parses feature filename for various parameters. Raises: RuntimeError: If the feature file is invalid (does not belong to reconstructed images, measurements, or latent space variables). """ feature_file = self.classifier_params['feature_file'] # Checks if feature file is based on reconstructed images (x_hats), measurements (y), or the obtained latent # space variable (z_hats). if feature_file.find("x_hats") > -1: self.classifier_params['input_feature'] = 'x_hats' elif feature_file.find("measurements") > -1: self.classifier_params['input_feature'] = 'measurements' elif feature_file.find("z_hats") > -1: self.classifier_params['input_feature'] = 'z_hats' else: raise RuntimeError('[!] Invalid feature file.') # Get different parameters of the experiment. self.classifier_params['learning_rate'] = re.search('lr(([0-9]|\.)+)', feature_file).group(1) self.classifier_params['random_restarts'] = re.search('rr([0-9]+)', feature_file).group(1) self.classifier_params['num_measurements'] = re.search('m([0-9]+)', feature_file).group(1) self.classifier_params['counter'] = re.search('c([0-9]+)', feature_file).group(1) self.classifier_params['a_index'] = re.search('a([0-9]+)', feature_file).group(1) def build_classifier(self): """Initializes classifier based on self.classifier_params. Raises: ValueError: If self.classifier is not supported (currently supports [svm|linear-svm|lmnn|logistic|knn|nn]). """ # Different classifier types are treated differently. # Kernel SVM. if self.classifier_type == 'svm': # Default params. params = {'c_penalty': 1.0, # Penalty parameter of the error term. 'kernel': 'rbf', # 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed' or a callable. 'degree': 3, # Degree of polynomial for 'poly' kernel. 'gamma': 'auto', # Kernel coefficient for 'rbf', 'poly' and 'sigmoid'. 'coef0': 0.0, # Independent term in kernel for 'poly' and 'sigmoid'. 'shrinking': True, # Whether to use the shrinking heuristic. 'probability': False, # Whether to enable probability estimates. 'tol': 0.001, # Tolerance for stopping criterion. 'cache_size': 200, # Kernel cache (in MB). 'class_weight': None, # {class_label: weight}. 'verbose': False, 'random_state': None, # Seed for pseudo random number generator for shuffling data. 'max_iter': -1, # Hard limit on iterations or -1 for no limit. # Multiclass handling. # 'ovo', 'ovr', or None. # 'ovo': one vs one. # 'ovr' one vs rest. # None is 'ovr'. 'multi_class': None, 'num_classes': 10} # Number of classes. # Update parameters from dictionary of parameters (based on config file). params.update(self.classifier_params) # Build the classifier (estimator). Kernel SVM is based on sklearn. self.estimator = svm.SVC(C=params['c_penalty'], kernel=params['kernel'], degree=params['degree'], gamma=params['gamma'], coef0=params['coef0'], shrinking=params['shrinking'], probability=params['probability'], tol=params['tol'], cache_size=params['cache_size'], class_weight=params['class_weight'], verbose=params['verbose'], max_iter=params['max_iter'], decision_function_shape=params['multi_class'], random_state=params['random_state']) # Linear SVM; good for large-scale datasets. elif self.classifier_type == 'linear-svm': # Default params. params = {'penalty': 'l2', # 'l1' or 'l2'. Norm in the penalization. 'loss': 'squared_hinge', # 'hinge' or 'squared_hinge'. Specifies the loss function. # Use dual or primal optimization problem. Prefer dual=False when n_samples > n_features. 'dual': True, 'tol': 1e-4, # Tolerance for stopping criteria. 'c_penalty': 1.0, # Penalty parameter C of the error term. 'multi_class': 'ovr', # 'ovr' (one-vs-rest) or 'crammer_singer' (joint objective in all classes). # Whether or not to calculate the intercept (if false, data is expected to be centered). 'fit_intercept': True, 'intercept_scaling': 1.0, 'class_weight': None, # {class_label: weight}. 'verbose': 0, 'random_state': None, # Seed for random number generator. 'max_iter': 1000, # Maxiumum number of iterations. 'num_classes': 10} # Number of classes. # Update parameters from dictionary of parameters (based on config file). params.update(self.classifier_params) # Build the classifier (estimator). Linear SVM is based on sklearn. self.estimator = svm.LinearSVC(penalty=params['penalty'], loss=params['loss'], dual=params['dual'], tol=params['tol'], C=params['c_penalty'], multi_class=params['multi_class'], fit_intercept=params['fit_intercept'], intercept_scaling=params['intercept_scaling'], class_weight=params['class_weight'], verbose=params['verbose'], random_state=params['random_state'], max_iter=params['max_iter']) # Large Margin nearest neighbor (metric learning + k-nearest neighbor). elif self.classifier_type == 'lmnn': # Default params. # First, metric learning params. params = {'num_neighbors': 3, # Number of neighbors to consider (does not include self-edges). 'min_iter': 50, 'max_iter': 1000, 'learn_rate': 1e-07, 'regularization': 0.5, # Weight of pull and push terms. 'tol': 0.001, # Convergence tolerance. 'verbose': False, # Second, k-nn params. # Weights: Callable, or: # 'uniform': Uniform weights. All points in each neighborhood are weighted equally. # 'distance': Weigh points by the inverse of their distance. 'weights': 'uniform', # Algorithm: {'auto', 'ball_tree', 'kd_tree', 'brute'} 'auto' will attempt to decide the # most appropriate algorithm based on training data. 'algorithm': 'auto', 'leaf_size': 30, # Leaf size passed to BallTree or KDTree. 'num_jobs': 1, # The number of parallel jobs to run for neighbors search. -1 -> nb of CPU cores. 'num_classes': 10} # Number of classes. # Update parameters from dictionary of parameters (based on config file). params.update(self.classifier_params) # Build the helper (helper_estimator). Based on the metric_learn package. self.helper_estimator = LMNN(k=params['num_neighbors'], min_iter=params['min_iter'], max_iter=params['max_iter'], learn_rate=params['learn_rate'], regularization=params['regularization'], convergence_tol=params['tol'], verbose=params['verbose']) # Build the classifier (estimator). Use euclidean distance as a metric. K-NN classifier is based on sklearn. self.estimator = neighbors.KNeighborsClassifier(n_neighbors=params['num_neighbors'], weights=params['weights'], algorithm=params['algorithm'], leaf_size=params['leaf_size'], p=2, metric='minkowski', metric_params=None, n_jobs=params['num_jobs']) # Logistic regression. elif self.classifier_type == 'logistic': # Default params. params = {'penalty': 'l2', # 'l1' or 'l2', specify the norm used in the penalization. 'dual': False, # Dual or primal formulation. dual=False is better when n_samples > n_features. 'tol': 0.0001, # Tolerance for stopping criteria. # Inverse of regularization strength (smaller values -> stronger regularization). 'c_penalty': 1.0, 'fit_intercept': True, # If a bias should be added to the decision function. 'intercept_scaling': 1, 'class_weight': None, # In the form {class_label: weight}. 'random_state': None, # Seed of random number generator for shuffling the data. 'solver': 'liblinear', # 'newton-cg', 'lbfgs', 'liblinear', or 'sag'. 'max_iter': 100, # Maximum number of iterations for the solvers. # Multiclass handling. # 'ovr' one-vs-rest or 'multinomial' If the option chosen is 'ovr', then a binary problem is fit # for each label. # Else the loss minimised is the multinomial loss fit across the entire probability distribution. # Works only for the 'newton-cg', 'sag' and 'lbfgs' solver. 'multi_class': 'ovr', 'verbose': 0, 'warm_start': False, # Reuse solution of the previous call to fit as initialization. 'num_jobs': 1, # Number of CPU cores during cross-validation. -1 -> all cored are used. 'num_classes': 10} # Number of classes. # Update parameters from dictionary of parameters (based on config file). params.update(self.classifier_params) # Build the classifier (estimator). Logistic regression is based on sklearn. self.estimator = linear_model.LogisticRegression(penalty=params['penalty'], dual=params['dual'], tol=params['tol'], C=params['c_penalty'], fit_intercept=params['fit_intercept'], intercept_scaling=params['intercept_scaling'], class_weight=params['class_weight'], random_state=params['random_state'], solver=params['solver'], max_iter=params['max_iter'], multi_class=params['multi_class'], verbose=params['verbose'], warm_start=params['warm_start'], n_jobs=params['num_jobs']) # K-Nearest Neighbor classifier (no metric learning). elif self.classifier_type == 'knn': # Default params. params = {'num_neighbors': 3, # Number of neighbors to use. # Weights: callable, or: # 'uniform' uniform weights. All points in each neighborhood are weighted equally. # 'distance' : weigh points by the inverse of their distance. 'weights': 'uniform', # Algorithm: {'auto', 'ball_tree', 'kd_tree', 'brute'} 'auto' will attempt to decide the most # appropriate algorithm based on training data 'algorithm': 'auto', 'leaf_size': 30, # Leaf size passed to BallTree or KDTree. # Metric: string or DistanceMetric object (default = 'minkowski'), the distance metric to use for # the tree. The default metric is minkowski, and with p=2 is equivalent to the Euclidean metric. # See the documentation of DistanceMetric. 'metric': 'minkowski', 'metric_params': None, # Additional keyword arguments for the metric function. 'power': 2, # Power parameter for the Minkowski metric. p = 1 is l1, p = 2 is l2. 'num_jobs': 1, # The number of parallel jobs to run for neighbors search. -1 -> nb of CPU cores. 'num_classes': 10} # Number of classes. # Update parameters from dictionary of parameters (based on config file). params.update(self.classifier_params) # Build the classifier (estimator). KNN is based on sklearn. self.estimator = neighbors.KNeighborsClassifier(n_neighbors=params['num_neighbors'], weights=params['weights'], algorithm=params['algorithm'], leaf_size=params['leaf_size'], p=params['power'], metric=params['metric'], metric_params=params['metric_params'], n_jobs=params['num_jobs']) # Neural network classifier. elif self.classifier_type == 'nn': # Default params. params = {'network_name': 'mlp', # Name of architecture (should be implemented in NNClassifier. 'num_hidden_layers': 3, # Number of layers (only used if mlp). 'num_hidden_units': [200, 200, 10], # Number of hidden units for each layer (only used if mlp). 'num_classes': 10, # Number of classes. 'input_dim': 20, # Dimension of input layer. 'initial_lr': 0.01, # Initial learning rate. 'batch_size': 200, # Batch size. 'num_epochs': 25, # Number of epochs for training. 'optimizer_type': 'decay_sgd', # Optimizer type. 'use_batch_norm': False, # Whether or not to use batch normalization. # Checkpoint directory: where to save tensorflow checkpoints. 'checkpoint_dir': os.path.join(self.get_output_dir(), self.tf_checkpoint_dir())} # Update parameters from dictionary of parameters (based on config file). params.update(self.classifier_params) # Build the classifier (estimator). Neural network classifier is based on the NNClassifier class. self.estimator = nn.NNClassifier(network_name=params['network_name'], input_dim=params['input_dim'], num_hidden_units=params['num_hidden_units'], num_hidden_layers=params['num_hidden_layers'], num_classes=params['num_classes'], initial_lr=params['initial_lr'], batch_size=params['batch_size'], num_epochs=params['num_epochs'], checkpoint_dir=params['checkpoint_dir'], optimizer_type=params['optimizer_type'], use_batch_norm=params['use_batch_norm']) else: raise ValueError('[!] Classifier type {} is not supported.'.format(self.classifier_type)) if self.classifier_params.has_key('verbose') and self.classifier_params['verbose']: print('[*] Initialized a classifier of type {}.'.format(self.classifier_type)) def get_feature_dir(self): """Returns path to feature directory (where features are saved) Returns: Feature directory. """ return self.classifier_params['feature_dir'] def get_output_dir(self): """Returns path to output directory (where outputs are saved, such as trained classifier, predicted labels, etc.) and creates it if it doesn't exist. Returns: cls_exp_dir: Output directory. Raises: RuntimeError: If no feature directory was specified in the configuration. """ feature_dir = self.get_feature_dir() if feature_dir is None: raise RuntimeError('[!] No feature directory or GAN experiment specified.') else: cls_dir = os.path.join(feature_dir, 'cls') cls_exp_dir = os.path.join(cls_dir, self.classifier_params['exp_name']) if not os.path.exists(cls_dir): os.mkdir(cls_dir) if not os.path.exists(cls_exp_dir): os.mkdir(cls_exp_dir) return cls_exp_dir def get_classifier_filename(self): """Returns filename for saving the classifier. Returns: Classifier filename. """ # Classifier filename is parametrized by important experiment parameters. return 'classifier_{}_lr{}_rr{}_m{}_c{}_a{}.pkl'.format(self.classifier_params['input_feature'], self.classifier_params['learning_rate'], self.classifier_params['random_restarts'], self.classifier_params['num_measurements'], self.classifier_params['counter'], self.classifier_params['a_index']) def get_labels_filename(self, input_split): """Returns filename for saving predicted labels. Args: input_split: Split to test on [train|val|test]. Returns: Predicted labels filename. """ # Predicted labels filename is parametrized by important experiment parameters. return 'predicted_labels_{}_{}_lr{}_rr{}_m{}_c{}_a{}.pkl'.format(input_split, self.classifier_params['input_feature'], self.classifier_params['learning_rate'], self.classifier_params['random_restarts'], self.classifier_params['num_measurements'], self.classifier_params['counter'], self.classifier_params['a_index']) def tf_checkpoint_dir(self): """Returns name of TensorFlow checkpoint directory. Returns: Checkpoint directory. """ return 'tf_checkpoints_{}_lr{}_rr{}_m{}_c{}_a{}'.format(self.classifier_params['input_feature'], self.classifier_params['learning_rate'], self.classifier_params['random_restarts'], self.classifier_params['num_measurements'], self.classifier_params['counter'], self.classifier_params['a_index']) def get_acc_filename(self, input_split): """Returns filenames for all accuracy files. Args: input_split: Split to test on [train|val|test]. Returns: acc_filename: The filename for the overall prediction accuracy on this split. acc_filenames_i: An array of filenames for class-specific accuracies on this split. """ # Accuracy filename parametrized by experiment parameters. acc_filename = 'accuracy_{}_{}_lr{}_rr{}_m{}_c{}_a{}.txt'.format(input_split, self.classifier_params['input_feature'], self.classifier_params['learning_rate'], self.classifier_params['random_restarts'], self.classifier_params['num_measurements'], self.classifier_params['counter'], self.classifier_params['a_index']) # For every class, add class number to filename. acc_filenames_i = [] for i in range(self.classifier_params['num_classes']): acc_filenames_i.append('class{}_accuracy_{}_{}_lr{}_rr{}_m{}_c{}_a{}.txt'.format(i, input_split, self.classifier_params[ 'input_feature'], self.classifier_params[ 'learning_rate'], self.classifier_params[ 'random_restarts'], self.classifier_params[ 'num_measurements'], self.classifier_params[ 'counter'], self.classifier_params[ 'a_index'])) return acc_filename, acc_filenames_i def train(self, features=None, labels=None, retrain=False, num_train=-1): """Trains classifier using training features and ground truth training labels. Args: features: Path to training feature vectors (use None to automatically load saved features from experiment output directory). labels: Path to ground truth train labels (use None to automatically load from dataset). retrain: Boolean, whether or not to retrain if classifier is already saved. num_train: Number of training samples to use (use -1 to include all training samples). Raises: ValueError: If the specified dataset [mnist|f-mnist|celeba] or classifier type [svm|linear-svm|lmnn|logistic|knn|nn] is not supported. """ # If no feature vector is provided load from experiment output directory. if features is None: feature_file = self.classifier_params['feature_file'] try: with open(feature_file, 'r') as f: features = cPickle.load(f) except IOError as err: print("[!] I/O error({0}): {1}.".format(err.errno, err.strerror)) if self.classifier_params.has_key('verbose') and self.classifier_params['verbose']: print('[*] Loaded feature file from {}.'.format(feature_file)) # If no label vector is provided load from dataset. if labels is None: # Create dataset object based on dataset name. if self.classifier_params['dataset'] == 'mnist': ds = Mnist() elif self.classifier_params['dataset'] == 'f-mnist': ds = FMnist() elif self.classifier_params['dataset'] == 'celeba': ds = CelebA(resize_size=self.classifier_params['output_height'], attribute=self.classifier_params['attribute']) else: raise ValueError('[!] Dataset {} is not supported.'.format(self.classifier_params['dataset'])) # Load labels from the train split. _, labels, _ = ds.load('train') num_samples = min(np.shape(features)[0], len(labels)) # Restrict to the first num_train samples if num_train is not -1. if num_train > -1: num_samples = min(num_train, num_samples) labels = labels[:num_samples] features = features[:num_samples, :] if self.classifier_params.has_key('verbose') and self.classifier_params['verbose']: print('[*] Loaded ground truth labels from {}.'.format( self.classifier_params['dataset'])) # Train the classifier. if self.classifier_type in ('svm', 'logistic', 'knn', 'linear-svm'): self.estimator.fit(features, labels) # Neural network classifiers. elif self.classifier_type == 'nn': self.estimator.fit(features, labels, retrain=retrain, session=self.session) # For LMNN, first transform the feature vector then perform k-NN. elif self.classifier_type == 'lmnn': # Learn the metric. self.helper_estimator.fit(features, labels) # Transform feature space. transformed_features = self.helper_estimator.transform(features) # Create k-nn graph. self.estimator.fit(transformed_features, labels) else: raise ValueError('[!] Classifier type {} is not supported.'.format(self.classifier_type)) if ('verbose' in self.classifier_params) and self.classifier_params['verbose']: print('[*] Trained classifier.') def save_classifier(self, filename=None): """Saves the classifier in a pickle file. Args: filename: Path to pickle file. Raises: IOError: If a output error occurs while saving the pickle file. """ # If no filename is provided, default filename will be used. if filename is None: output_dir = self.get_output_dir() filename = self.get_classifier_filename() filename = os.path.join(output_dir, filename) # Saving for non neural-network classifiers. if not self.classifier_type == 'nn': try: with open(filename, 'wb') as fp: cPickle.dump(self.classifier_type, fp, cPickle.HIGHEST_PROTOCOL) cPickle.dump(self.classifier_params, fp, cPickle.HIGHEST_PROTOCOL) cPickle.dump(self.estimator, fp, cPickle.HIGHEST_PROTOCOL) cPickle.dump(self.helper_estimator, fp, cPickle.HIGHEST_PROTOCOL) except IOError as err: print("[!] I/O error({0}): {1}.".format(err.errno, err.strerror)) if self.classifier_params.has_key('verbose') and self.classifier_params['verbose']: print('[*] Saved classifier {}.'.format(filename)) # Neural network classifiers have default saving/loading using TensorFlow. else: if self.classifier_params.has_key('verbose') and self.classifier_params['verbose']: print('[!] Default TF loading/saving for Neural Networks.') def load_classifier(self, filename=None): """Loads classifier from a pickle file. Args: filename: Path to pickle file. Raises: IOError: If an input error occurs while reading pickle file. """ # If no filename is provided, default filename will be used. if filename is None: output_dir = self.get_output_dir() filename = self.get_classifier_filename() filename = os.path.join(output_dir, filename) # Loading for non neural-network classifiers. if not self.classifier_type == 'nn': try: with open(filename, 'r') as f: self.classifier_type = cPickle.load(f) self.classifier_params = cPickle.load(f) self.estimator = cPickle.load(f) self.helper_estimator = cPickle.load(f) except IOError as err: print("[!] I/O error({0}): {1}.".format(err.errno, err.strerror)) if self.classifier_params.has_key('verbose') and self.classifier_params['verbose']: print('[*] Loaded classifier from {}.'.format(filename)) # Neural network classifiers have default saving/loading using TensorFlow. else: if self.classifier_params.has_key('verbose') and self.classifier_params['verbose']: print('[!] Default TF loading/saving for Neural Networks.') def predict(self, features, save_result=False, model_name=None, filename=None): """Predicts labels given test feature vectors. If save_result is True, also saves the predictions. Args: features: Test feature vectors. save_result: Optional, boolean, if True save predicted labels and accuracy. model_name: For neural network classifiers, model name to load and use to predict. filename: Optional, path to save results in. Returns: predicted_labels: Array of predicted labels. Raises: IOError: If save_result is True and an output error occurs while saving predictions. ValueError: If the classifier type is not supported. Supported types: [svm|linear-svm|lmnn|logistic|knn|nn] """ # If save_result is True and no filename was provided, use default filename. if save_result and (filename is None): output_dir = self.get_output_dir() filename = self.get_labels_filename('user_defined') filename = os.path.join(output_dir, filename) # For kernel and linear SVMs, Logistic regression, and K-NN, simply call the estimator's predict function. if self.classifier_type in ('svm', 'logistic', 'knn', 'linear-svm'): predicted_labels = self.estimator.predict(features) if save_result: try: with open(filename, 'wb') as fp: cPickle.dump(predicted_labels, fp, cPickle.HIGHEST_PROTOCOL) except IOError as err: print("[!] I/O error({0}): {1}.".format(err.errno, err.strerror)) if self.classifier_params.has_key('verbose') and self.classifier_params['verbose']: print('[*] Saved predicted labels {}.'.format(filename)) return predicted_labels # Same for neural networks, except for the additional model name and TensorFlow session arguments. elif self.classifier_type == 'nn': predicted_labels = self.estimator.predict(features, model_name, session=self.session) if save_result: try: with open(filename, 'wb') as fp: cPickle.dump(predicted_labels, fp, cPickle.HIGHEST_PROTOCOL) except IOError as err: print("[!] I/O error({0}): {1}.".format(err.errno, err.strerror)) if self.classifier_params.has_key('verbose') and self.classifier_params['verbose']: print('[*] Saved predicted labels {}.'.format(filename)) return predicted_labels # Metric learning. elif self.classifier_type == 'lmnn': # First transform the features. transformed_features = self.helper_estimator.transform(features) # Then call the predict function. predicted_labels = self.estimator.predict(transformed_features) if save_result: try: with open(filename, 'wb') as fp: cPickle.dump(predicted_labels, fp, cPickle.HIGHEST_PROTOCOL) except IOError as err: print("[!] I/O error({0}): {1}.".format(err.errno, err.strerror)) if self.classifier_params.has_key('verbose') and self.classifier_params['verbose']: print('[*] Saved predicted labels {}.'.format(filename)) return predicted_labels else: raise ValueError('[!] Classifier type {} is not supported.'.format(self.classifier_type)) def validate(self): """Only needed for neural networks. Validates different checkpoints by testing them on the validation split and retaining the one with the top accuracy. Returns: best_model: Name of chosen best model (empty string if no validation was performed). An empty string is returned for non neural network classifiers. Raises: IOError: If an input error occurs when loading feature vectors, or an output error occurs when saving the chosen model. ValueError: If the specified dataset [mnist|f-mnist|celeba] or classifier type [svm|linear-svm|lmnn|logistic|knn|nn] is not supported. """ if 'verbose' in self.classifier_params and self.classifier_params['verbose']: print("[*] Validating.") # Get feature file paths. feature_dir = os.path.dirname(self.classifier_params['feature_file']) feature_file = os.path.basename(self.classifier_params['feature_file']) feature_file = feature_file.replace('train', 'val') feature_file = os.path.join(feature_dir, feature_file) # Load feature vectors. try: with open(feature_file, 'r') as f: features = cPickle.load(f) except IOError as err: print("[!] I/O error({0}): {1}.".format(err.errno, err.strerror)) if 'verbose' in self.classifier_params and self.classifier_params['verbose']: print('[*] Loaded feature vectors from {}.'.format(feature_file)) # Initialize the dataset object to load ground-truth labels. if self.classifier_params['dataset'] == 'mnist': ds = Mnist() elif self.classifier_params['dataset'] == 'f-mnist': ds = FMnist() elif self.classifier_params['dataset'] == 'celeba': ds = CelebA(resize_size=self.classifier_params['output_height'], attribute=self.classifier_params['attribute']) else: raise ValueError('[!] Dataset {} is not supported.'.format(self.classifier_params['dataset'])) # Load ground-truth labels from the validation split. _, labels, _ = ds.load('val') num_samples = min(np.shape(features)[0], len(labels)) labels = labels[:num_samples] features = features[:num_samples, :] if 'verbose' in self.classifier_params and self.classifier_params['verbose']: print('[*] Loaded ground-truth labels from {}.'.format( self.classifier_params['dataset'])) # Non neural network classifiers do not require validation as no intermediate models exist. if self.classifier_type in ('svm', 'logistic', 'knn', 'linear-svm', 'lmnn'): print('[!] No validation needed.') return "" # Neural network classifiers. elif self.classifier_type == 'nn': # Call the neural network validate function on the features. best_acc, best_model, _ = self.estimator.validate(features, labels, session=self.session) # Save results. try: with open(os.path.join(self.get_output_dir(), self.tf_checkpoint_dir(), 'chosen_model.txt'), 'w') as fp: fp.write("{} {}".format(os.path.basename(best_model), best_acc)) except IOError as err: print("[!] I/O error({0}): {1}.".format(err.errno, err.strerror)) if 'verbose' in self.classifier_params and self.classifier_params['verbose']: print( '[*] Chose model: {}, with validation accuracy {}.'.format(os.path.basename(best_model), best_acc)) return best_model else: raise ValueError('[!] Classifier type {} is not supported.'.format(self.classifier_type)) def test_classifier(self, input_split='test', save_result=False, model_name=None, labels_filename=None, acc_filename=None, acc_filenames_i=None): """Predicts labels and compares them to ground truth labels from given split. Returns test accuracy. Args: input_split: What split to test on [train|val|test]. save_result: Optional, boolean. If True saves predicted labels and accuracy. model_name: For neural network classifiers, model name to load and use to predict. labels_filename: Optional, string. Path to save predicted labels in. acc_filename: Optional, string. Path to save predicted accuracy in. acc_filenames_i: Optional, array of strings. Path to save class-specific predicted labels in. Returns: predicted_labels: Predicted labels for the input split. accuracy: Accuracy on the input split. per_class_accuracies: Array of per-class accuracies on the input split. Raises: IOError: If an input error occurs when loading features, or an output error occurs when saving results. ValueError: If the specified dataset [mnist|f-mnist|celeba] or classifier type [svm|linear-svm|lmnn|logistic|knn|nn] is not supported. """ # If save_result is True, but no labels_filename was specified, use default filename. if save_result and (labels_filename is None): output_dir = self.get_output_dir() labels_filename = self.get_labels_filename(input_split) labels_filename = os.path.join(output_dir, labels_filename) # If save_result is True, but no acc_filename was specified, use default filename. if save_result and (acc_filename is None): output_dir = self.get_output_dir() acc_filename, acc_filenames_i = self.get_acc_filename(input_split) acc_filename = os.path.join(output_dir, acc_filename) for i in range(self.classifier_params['num_classes']): acc_filenames_i[i] = os.path.join(output_dir, acc_filenames_i[i]) # Load feature vectors. feature_dir = os.path.dirname(self.classifier_params['feature_file']) feature_file = os.path.basename(self.classifier_params['feature_file']) feature_file = feature_file.replace('train', input_split) feature_file = os.path.join(feature_dir, feature_file) try: with open(feature_file, 'r') as f: features = cPickle.load(f) except IOError as err: print('[!] I/O error({0}): {1}.'.format(err.errno, err.strerror)) if 'verbose' in self.classifier_params and self.classifier_params['verbose']: print('[*] Loaded feature vectors from {}.'.format(feature_file)) # Initiate dataset object to load ground-truth labels. if self.classifier_params['dataset'] == 'mnist': ds = Mnist() elif self.classifier_params['dataset'] == 'f-mnist': ds = FMnist() elif self.classifier_params['dataset'] == 'celeba': ds = CelebA(resize_size=self.classifier_params['output_height'], attribute=self.classifier_params['attribute']) else: raise ValueError('[!] Dataset {} is not supported.'.format(self.classifier_params['dataset'])) # Load ground-truth labels. _, labels, _ = ds.load(input_split) num_samples = min(np.shape(features)[0], len(labels)) labels = labels[:num_samples] features = features[:num_samples, :] if 'verbose' in self.classifier_params and self.classifier_params['verbose']: print('[*] Loaded ground-truth labels from: {}.'.format( self.classifier_params['dataset'])) # Predict labels. if self.classifier_type in ('svm', 'logistic', 'knn', 'linear-svm', 'lmnn'): predicted_labels = self.predict(features, save_result, labels_filename) elif self.classifier_type == 'nn': predicted_labels = self.predict(features, save_result, model_name, labels_filename) else: raise ValueError('[!] Classifier type {} is not supported.'.format(self.classifier_type)) # Compare predicted labels to ground-truth labels and calculate accuracy. num_correct = np.sum(np.equal(predicted_labels, labels)) accuracy = num_correct / (1.0 * len(labels)) per_class_accuracies = [] for i in range(self.classifier_params['num_classes']): idx = np.where(np.equal(labels, i))[0] num_correct = np.sum(np.equal(predicted_labels[idx], labels[idx])) accuracy_i = num_correct / (1.0 * len(labels[idx])) per_class_accuracies.append(accuracy_i) # Save results. if save_result: try: with open(acc_filename, 'w') as fp: fp.write("{}".format(accuracy)) except IOError as err: print("[!] I/O error({0}): {1}.".format(err.errno, err.strerror)) if self.classifier_params.has_key('verbose') and self.classifier_params['verbose']: print('[*] Saved predicted labels {}.'.format(labels_filename)) print('[*] Saved predicted accuracy {}.'.format(acc_filename)) for i in range(self.classifier_params['num_classes']): try: with open(acc_filenames_i[i], 'w') as fp: fp.write("{}".format(per_class_accuracies[i])) except IOError as err: print("[!] I/O error({0}): {1}.".format(err.errno, err.strerror)) if self.classifier_params.has_key('verbose') and self.classifier_params['verbose']: print('[*] Testing complete. Accuracy on {} split {}.'.format( input_split, accuracy)) for i in range(self.classifier_params['num_classes']): print('[*] Testing complete. Accuracy on {} split, class {}: {}.'.format(input_split, i, per_class_accuracies[i])) return predicted_labels, accuracy, per_class_accuracies
def main(): print( "************************************************************************************" ) print( "*************************** Metric Learning Demo ***********************************" ) print( "************************************************************************************" ) # Load variables print("Loading data") _, _, _, xTe, xTr, xVa, yTr, yTe, yVa = loadmat( 'data/segment.mat').values() xTe, xTr, xVa = xTe.T, xTr.T, xVa.T yTr, yTe, yVa = yTr.flatten().astype(int) - 1, yTe.flatten().astype( int) - 1, yVa.flatten().astype(int) - 1 print("Training pca...") L0 = pca(xTr.T, whiten=True)[0].T print("Training pca-lda...") pca_lda = Pipeline([('pca', PCA(n_components=5, whiten=True)), ('lda', LinearDiscriminantAnalysis(n_components=3))]) pca_lda.fit(xTr, yTr) pca_eigen_vals = np.diag(1 / np.sqrt(pca_lda[0].explained_variance_)) pcalda_mat = pca_lda[1].scalings_[:, :3].T @ pca_eigen_vals @ pca_lda[ 0].components_ print("Training lmnn...") lmnn = LMNN(init='pca', k=7, learn_rate=1e-6, verbose=False, n_components=3, max_iter=1000) lmnn.fit(xTr, yTr) print('Learning nonlinear metric with GB-LMNN ... ') # L = pcalda_mat L = loadmat('data/lmnn2_L.mat')['L'] # Load the matlab matrix embed = gb_lmnn(xTr, yTr, 3, L, n_trees=200, verbose=True, xval=xVa, yval=yVa) # ################################ k-NN evaluation ################################### print("\nEvaluation:") k = 1 raw_tr_err, raw_te_err = knn_error_score(L0[0:3], xTr, yTr, xTe, yTe, k) print( '1-NN Error for raw (high dimensional) input is, Training: {:.2f}%, Testing {:.2f}%' .format(100 * raw_tr_err, 100 * raw_te_err)) pca_tr_err, pca_te_err = knn_error_score(L0[0:3], xTr, yTr, xTe, yTe, k) print('1-NN Error for PCA in 3d is, Training: {:.2f}%, Testing {:.2f}%'. format(100 * pca_tr_err, 100 * pca_te_err)) lda_tr_err, lda_te_err = knn_error_score(pcalda_mat, xTr, yTr, xTe, yTe, k) print( '1-NN Error for PCA-LDA input is, Training: {:.2f}%, Testing {:.2f}%'. format(100 * lda_tr_err, 100 * lda_te_err)) lmnn_tr_err, lmnn_te_err = knn_error_score(lmnn.components_[0:3], xTr, yTr, xTe, yTe, k) print('1-NN Error for LMNN is, Training: {:.2f}%, Testing {:.2f}%'.format( 100 * lmnn_tr_err, 100 * lmnn_te_err)) gb_tr_err, gb_te_err = knn_error_score([], embed.transform(xTr), yTr, embed.transform(xTe), yTe, 1) print( '1-NN Error for GB-LMNN input is, Training: {:.2f}%, Testing {:.2f}%'. format(100 * gb_tr_err, 100 * gb_te_err)) # ################################ 3-D Plot ################################### print("\nPlotting figures") fig = plt.figure(figsize=(12, 8)) ax1 = fig.add_subplot(2, 2, 1, projection='3d') ax1.set_title("PCA Train Error: {:.2f}, Test Error: {:.2f}".format( 100 * pca_tr_err, 100 * pca_te_err)) pts_to_plt = xTr @ L0[0:3].T for l in np.unique(yTr): mask = np.squeeze(yTr == l) ax1.scatter(pts_to_plt[mask, 0], pts_to_plt[mask, 1], pts_to_plt[mask, 2], label=l) plt.legend() ax2 = fig.add_subplot(2, 2, 2, projection='3d') ax2.set_title("PCA-LDA Train Error: {:.2f}, Test Error: {:.2f}".format( 100 * lda_tr_err, 100 * lda_te_err)) pts_to_plt = xTr @ pcalda_mat.T for l in np.unique(yTr): mask = np.squeeze(yTr == l) ax2.scatter(pts_to_plt[mask, 0], pts_to_plt[mask, 1], pts_to_plt[mask, 2], label=l) plt.legend() ax3 = fig.add_subplot(2, 2, 3, projection='3d') ax3.set_title("LMNN Train Error: {:.2f}, Test Error: {:.2f}".format( 100 * lmnn_tr_err, 100 * lmnn_te_err)) pts_to_plt = lmnn.transform(xTr) for l in np.unique(yTr): mask = np.squeeze(yTr == l) ax3.scatter(pts_to_plt[mask, 0], pts_to_plt[mask, 1], pts_to_plt[mask, 2], label=l) plt.legend() ax4 = fig.add_subplot(2, 2, 4, projection='3d') ax4.set_title("GB-LMNN Train Error: {:.2f}, Test Error: {:.2f}".format( 100 * gb_tr_err, 100 * gb_te_err)) pts_to_plt = embed.transform(xTr) for l in np.unique(yTr): mask = np.squeeze(yTr == l) ax4.scatter(pts_to_plt[mask, 0], pts_to_plt[mask, 1], pts_to_plt[mask, 2], label=l) plt.legend() plt.show()
fold_cnt += 1 print("k:", knn_k) print("fold:", fold_cnt) print("train features shape:", train_features.shape) print("train labels shape:", train_labels.shape) print("valid features shape:", valid_features.shape) print("valid labels shape:", valid_labels.shape) lmnn = LMNN(k=5) transformed_features = lmnn.fit_transform(train_features, train_labels) neigh = KNeighborsClassifier(n_neighbors=knn_k) neigh.fit(transformed_features, train_labels) neigh_orig = KNeighborsClassifier(n_neighbors=knn_k) neigh_orig.fit(train_features, train_labels) predict = neigh.predict(lmnn.transform(valid_features)) predict_orig = neigh_orig.predict(valid_features) accuracy = metrics.accuracy_score(valid_labels, predict) accuracy_orig = metrics.accuracy_score(valid_labels, predict_orig) print("accuracy after metric learning:{}".format(accuracy)) print("accuracy before metric learning:{}".format(accuracy_orig)) ac_list.append(accuracy) ac_list_orig.append(accuracy_orig) final_train_accuracy = np.mean(ac_list) print(final_train_accuracy) final_train_accuracy_orig = np.mean(ac_list_orig) print(final_train_accuracy_orig) train_ac_metrics_list.append( [knn_k, final_train_accuracy, final_train_accuracy_orig])
def lmnn(x_train, y_train, x_test): lmnn = LMNN(max_iter=50, k=9, verbose=True) print("It is") lmnn.fit(x_train, y_train) print("done") return lmnn.transform(x_test)
Result_of_acc_std = np.zeros([len(datasets) * 2, len(classifiers)]) for i in range(len(datasets)): print(datasets[i]) new_path = os.path.join('.\data', datasets[i]) Data_Origi, DataLabel, n_samples, n_attr, n_class = PF.Load_Data(new_path) #归一化处理 scaler = MinMaxScaler() scaler.fit(Data_Origi) Data_Origi = scaler.transform(Data_Origi) for l in range(2): if l == 0: #度量学习 lmnn = LMNN(k=5, learn_rate=1e-6) lmnn.fit(Data_Origi, DataLabel) Data_trans = lmnn.transform(Data_Origi) else: Data_trans = Data_Origi #同质化融合 Dis_Matrix = PF.Calcu_Dis(Data_trans) CompareMatrix = PF.CompareNoiseLabel(Dis_Matrix, DataLabel) Cluster_Checked = PF.Affinity_propagatio_Modify(CompareMatrix) lap_ratio = PF.Count(Cluster_Checked, set_vlaue, n_samples) Result_of_Upper[i, l] = 1 - lap_ratio for j in range(len(classifiers)): print(classifiers[j]) clf = classifiers[j] scores = cross_val_score(clf, Data_trans, DataLabel, cv=cv) Result_of_acc_ave[2 * i + l, j] = scores.mean() Result_of_acc_std[2 * i + l, j] = scores.std()
lmnn = LMNN(k=5, learn_rate=1e-6) lmnn.fit(X, y) te = time.time() print('Time: %d s'%te-ts) # In[12]: print('done') # In[17]: q_transform = lmnn.transform(query_feature) g_transform = lmnn.transform(gallery_feature) # In[19]: print(query_feature.shape) print(q_transform.shape) # # Combine lbl and feature # In[20]: