def constructSimilartyMatrixLMNN(self,ks):
     
     
     print 'now doing LMNN for k= ',ks
     self.y_train=self.y_train.reshape(-1,)
     lmnn=LMNN(k=ks, learn_rate=1e-7,max_iter=3000)
     lmnn.fit(self.trainVectorsPCA, self.y_train, verbose=False)
     self.L_lmnn = lmnn.transformer()
     name='lmnn/LMNN transformer matrix with dataset shape '+str(self.trainVectorsPCA.shape)
     np.save(name,self.L_lmnn)
     print 'L.shape is ',self.L_lmnn.shape,'\n\n'
     # Input data transformed to the metric space by X*L.T
     self.transformedTrainLMNN=copy(lmnn.transform(self.trainVectorsPCA))
     self.transformedTestLMNN=copy(lmnn.transform(self.testVectorsPCA))
     self.transformedAllLMNN=copy(lmnn.transform(self.allDataPCA)) #we compute the pairwise distance on this now 
     projectedDigits = TSNE(random_state=randomState).fit_transform(self.transformedAllLMNN)
     
     plt.scatter(projectedDigits[:,0],projectedDigits[:,1],c=self.labels)
     plt.title('LMNN Transformed ALL set projected to 2 Dimensions by TSNE with k='+str(ks))
     plt.savefig(pp,format='pdf')
     
     self.pwdis=copy(pairwise_distances(self.transformedAllLMNN,metric='euclidean'))
     self.D=np.zeros(self.pwdis.shape)
     for i in range(0,self.pwdis.shape[0]):
         l1=self.pwdis[i].tolist()
         #print 'l1 is ',l1,'\n\n'
         allnearestNeighbours=sorted(range(len(l1)),key=lambda i : l1[i])
         #now set the all the weights except for k+1 to 0
         self.pwdis[i,allnearestNeighbours[ks:]]=0
         self.D[i,i]=sum(self.pwdis[i])
     
     print 'accuracy for LMNN for k= ',ks,'\n'
     self.labelPropogation()
Exemplo n.º 2
0
    def constructSimilartyMatrixLMNN(self, ks):

        print('now doing LMNN for k= ', ks)
        self.y_train = self.y_train.reshape(-1, )
        lmnn = LMNN(k=ks, learn_rate=1e-7, max_iter=1000)
        lmnn.fit(self.trainVectorsPCA, self.y_train)
        self.L_lmnn = lmnn.transformer()
        name = 'lmnn/LMNN transformer matrix with dataset shape ' + str(
            self.trainVectorsPCA.shape)
        np.save(name, self.L_lmnn)
        print('L.shape is ', self.L_lmnn.shape, '\n\n')
        # Input data transformed to the metric space by X*L.T
        self.transformedTrainLMNN = copy(lmnn.transform(self.trainVectorsPCA))
        self.transformedTestLMNN = copy(lmnn.transform(self.testVectorsPCA))
        self.transformedAllLMNN = copy(lmnn.transform(
            self.allDataPCA))  #we compute the pairwise distance on this now
        projectedDigits = TSNE(random_state=randomState).fit_transform(
            self.transformedAllLMNN)

        self.pwdis = copy(
            pairwise_distances(self.transformedAllLMNN, metric='euclidean'))
        self.D = np.zeros(self.pwdis.shape)
        for i in range(0, self.pwdis.shape[0]):
            l1 = self.pwdis[i].tolist()
            #print 'l1 is ',l1,'\n\n'
            allnearestNeighbours = sorted(range(len(l1)), key=lambda i: l1[i])
            #now set the all the weights except for k+1 to 0
            self.pwdis[i, allnearestNeighbours[ks:]] = 0
            self.D[i, i] = sum(self.pwdis[i])

        print('accuracy for LMNN for k= ', ks, '\n')
        self.labelPropogation()
def runLMNN(X_train, X_test, y_train, t_test, k):
    transformer = LMNN(k=k, learn_rate=1e-6, convergence_tol=0.1, verbose=True)
    transformer.fit(X_train, y_train)
    X_train_proj = transformer.transform(X_train)
    X_test_proj = transformer.transform(X_test)
    np.save('X_train_LMNN_' + str(k), X_train_proj)
    np.save('X_test_LMNN_' + str(k), X_test_proj)
    return X_train_proj, X_test_proj
Exemplo n.º 4
0
class GeoLMNN(neighbors.KNeighborsClassifier):
    def __init__(self, n_neighbors=3):
        super(GeoLMNN, self).__init__(n_neighbors=n_neighbors)
        self.lmnn = LMNN(n_neighbors)

    def fit(self, X, y):
        self.lmnn.fit(X, y)
        super(GeoLMNN, self).fit(self.lmnn.transform(X), y)

    def predict(self, X):
        y = super(GeoLMNN, self).predict(self.lmnn.transform(X))
        return y
Exemplo n.º 5
0
    def test_iris(self):
        lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False)
        lmnn.fit(self.iris_points, self.iris_labels)

        csep = class_separation(lmnn.transform(self.iris_points),
                                self.iris_labels)
        self.assertLess(csep, 0.25)
Exemplo n.º 6
0
    def LMNN(self):
        print "Warning, the features will be transformed"
        lmnn = LMNN(k=5, learn_rate = 1e-6)
        lmnn.fit(self.features, targets)

        self.features = lmnn.transform(self.features)
        self.prepare_for_testing()
        self.nearest_neighbors("LMNN + KNN")
Exemplo n.º 7
0
  def test_lmnn(self):
    lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False)
    lmnn.fit(self.X, self.y)
    res_1 = lmnn.transform(self.X)

    lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False)
    res_2 = lmnn.fit_transform(self.X, self.y)

    assert_array_almost_equal(res_1, res_2)
  def test_lmnn(self):
    lmnn = LMNN(n_neighbors=5, learn_rate=1e-6, verbose=False)
    lmnn.fit(self.X, self.y)
    res_1 = lmnn.transform(self.X)

    lmnn = LMNN(n_neighbors=5, learn_rate=1e-6, verbose=False)
    res_2 = lmnn.fit_transform(self.X, self.y)

    assert_array_almost_equal(res_1, res_2)
def draw_knn_with_lmnn(k, metric):
    names = ['x', 'y', 'color']

    df = pd.DataFrame(mapped_colors, columns=names)
    # print(df.head())

    X = np.array(df.ix[:, 0:2])
    y = np.array(df['color'])

    lmnn = LMNN(k=5, learn_rate=1e-6)
    lmnn.fit(X, y)
    X_lmnn = lmnn.transform()

    X = X_lmnn

    # print(X)

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=42)

    if metric == 'mahalanobis':
        knn = KNeighborsClassifier(
            n_neighbors=k,
            metric=metric,
            metric_params={'V': np.cov(np.transpose(X))})
    else:
        knn = KNeighborsClassifier(n_neighbors=k, metric=metric)

    knn.fit(X_train, y_train)

    pred = knn.predict(X_test)

    err = 1 - accuracy_score(y_test, pred)
    print('\nThe error is ' + str(err * 100))

    h = .02

    cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])
    cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF'])

    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))
    Z = knn.predict(np.c_[xx.ravel(), yy.ravel()])

    Z = Z.reshape(xx.shape)
    plt.figure()
    plt.pcolormesh(xx, yy, Z, cmap=cmap_light)

    plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=20)
    plt.xlim(xx.min(), xx.max())
    plt.ylim(yy.min(), yy.max())
    plt.title("3-Class classification (k = %i)" % k)
Exemplo n.º 10
0
class LP:
    def __init__(self, lmnn=False, max_iter=1000, lm_num=200):
        # self.clf =  LabelPropagation(kernel='knn',max_iter=1000,n_jobs=10,n_neighbors=25)
        self.clf = LabelSpreading(kernel='knn',
                                  n_neighbors=25,
                                  max_iter=max_iter,
                                  alpha=0.2,
                                  n_jobs=-1)
        self.lmnn = lmnn
        self.lm_num = lm_num
        if lmnn:
            self.ml = LMNN(use_pca=False, max_iter=2000)

    def fit(self, X, y):
        if self.lmnn:
            nonzero_index = np.nonzero(y)
            index = random.sample(list(nonzero_index[0]), self.lm_num)
            X_ = X[index]
            y_ = y[index]
            print('ml fitting')
            self.ml.fit(X_, y_)
            print('transform')
            X = self.ml.transform(X)
        print('lp fitting')
        zero_index = np.nonzero(y == 0)
        negetive_index = np.nonzero(y == -1)
        positive_index = np.nonzero(y == 1)
        y[zero_index] = -1
        y[negetive_index] = 2
        print(zero_index[0].shape, negetive_index[0].shape,
              positive_index[0].shape)
        self.clf.fit(X, y)

    def predict(self, X):
        print('lp predict')
        if self.lmnn:
            X = self.ml.transform(X)
        y_pred = self.clf.predict(X)
        negative_index = np.nonzero(y_pred == -1)
        two_index = np.nonzero(y_pred == 2)
        y_pred[negative_index] = 0
        y_pred[two_index] = -1
        return y_pred
def baseline_model(X_train, y_train, X_test, y_test):

    #dimension reduction
    feature_selection = LinearSVC(C=1, penalty="l1", dual=False)
    X_train_reduced = feature_selection.fit_transform(X_train, y_train)
    X_test_reduced = feature_selection.transform(X_test)

    #metrics learning
    ml = LMNN(k=4, min_iter=50, max_iter=1000, learn_rate=1e-7)
    ml.fit(X_train_reduced, y_train)
    X_train_new = ml.transform(X_train_reduced)
    X_test_new = ml.transform(X_test_reduced)

    neigh = KNeighborsClassifier(n_neighbors=4)
    neigh.fit(X_train_new, y_train)
    predicted = neigh.predict(X_test_new)

    #pickle.dump(ml, open('dist_metrics', 'w'))

    return predicted
def baseline_model(X_train,y_train,X_test,y_test):

    #dimension reduction
    feature_selection = LinearSVC(C=1, penalty="l1", dual=False)
    X_train_reduced = feature_selection.fit_transform(X_train, y_train)
    X_test_reduced = feature_selection.transform(X_test)

    #metrics learning
    ml = LMNN(k=4,min_iter=50,max_iter=1000, learn_rate=1e-7)
    ml.fit(X_train_reduced,y_train)
    X_train_new = ml.transform(X_train_reduced)
    X_test_new = ml.transform(X_test_reduced)

    neigh = KNeighborsClassifier(n_neighbors=4)
    neigh.fit(X_train_new, y_train)
    predicted = neigh.predict(X_test_new)

    #pickle.dump(ml, open('dist_metrics', 'w'))
    
    return predicted
Exemplo n.º 13
0
class KNNClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, k=1):
        self.k = k
        self.distanceEstimator = LMNN(k=k)

    def fit(self, X, y):
        #TODO msati3: Ideally, LMNN should expose fit_transform.
        self.distanceEstimator.fit(X, y)
        self.modelData = self.distanceEstimator.transform(X)
        self.modelLabels = y
        return self

    def transform(self, X):
        return self.distanceEstimator.transform(X)

    def predict(self, D):
        X = self.transform(D) #Pretransform so that euclidean metric suffices
        distances = distance.cdist(X, self.modelData,'sqeuclidean')
        topKIndexes = bn.argpartsort(distances, self.k)[:,:self.k]
        predictions = self.modelLabels[topKIndexes]
        return stats.mode(predictions, axis=1)[0]

    def score(self, X, y, fNormalize=True):
        return accuracy_score(self.predict(X), y, fNormalize)
def lmnn_fit(X_train, Y_train, X_test, Y_test, color_map):
    lmnn = LMNN(init='pca',
                k=3,
                learn_rate=5e-4,
                max_iter=500000,
                regularization=0.2)
    lmnn.fit(X_train, Y_train)
    X_train_transformed = lmnn.transform(X_train)
    if (X_train.shape[1] == 2):
        plt.figure()
        plt.scatter(X_train_transformed[:, 0],
                    X_train_transformed[:, 1],
                    c=color_map[Y_train],
                    s=2)
        plt.savefig("after_lmnn_transform_train.png", dpi=300)
    X_test_transformed = lmnn.transform(X_test)
    if (X_test.shape[1] == 2):
        plt.figure()
        plt.scatter(X_test_transformed[:, 0],
                    X_test_transformed[:, 1],
                    c=color_map[Y_test],
                    s=2)
        plt.savefig("after_lmnn_transform_test.png", dpi=300)
    return (X_train_transformed, X_test_transformed)
pca.fit()
pca_query_features = pca.project(query_features)
pca_gallery_features = pca.project(gallery_features)
compute_k_mean(num_of_clusters, pca_query_features, pca_gallery_features,
               gallery_labels)

# Compute LMNN (Large Margin Nearest Neighbour) Learning
print("\n-----LMNN------")
lmnn = LMNN(k=5,
            max_iter=20,
            use_pca=False,
            convergence_tol=1e-6,
            learn_rate=1e-6,
            verbose=True)
lmnn.fit(original_train_features, original_train_labels)
transformed_query_features = lmnn.transform(query_features)
transformed_gallery_features = lmnn.transform(gallery_features)
compute_k_mean(num_of_clusters, transformed_query_features,
               transformed_gallery_features, gallery_labels)

# Compute PCA_LMNN Learning
print("\n-----PCA_LMNN-----")
lmnn = LMNN(k=5,
            max_iter=20,
            use_pca=False,
            convergence_tol=1e-6,
            learn_rate=1e-6,
            verbose=True)
start_time = time.time()
lmnn.fit(pca.train_sample_projection, original_train_labels)
end_time = time.time()
Exemplo n.º 16
0
    mu = np.array([[1, 5]])
    Sigma = np.array([[1.5, 0.5], [1.5, 3]])
    R = cholesky(Sigma)
    s = np.dot(np.random.randn(100, 2), R) + mu
    label = np.zeros((100, 1))

    mu1 = np.array([[5, 10]])
    Sigma1 = np.array([[1, 0.5], [1.5, 3]])
    R1 = cholesky(Sigma1)
    s1 = np.dot(np.random.randn(100, 2), R1) + mu1
    label1 = np.zeros((100, 1)) + 1

    plt.subplot(121)
    plt.plot(s[:, 0], s[:, 1], ".", color='red')
    plt.plot(s1[:, 0], s1[:, 1], ".", color='blue')

    l1 = list(label)
    l2 = list(label1)
    l1.extend(l2)
    labels = np.array(l1)

    s_ = np.vstack((s, s1))
    print(s_.shape)
    print(labels.shape)

    lmnn = LMNN(k=2, min_iter=500, learn_rate=1e-6)
    lmnn.fit(s_, labels)
    s_new = lmnn.transform(s_)
    plt.subplot(122)
    plt.plot(s_new[:, 0], s_new[:, 1], ".")
    plt.show()
    p.axis('equal')


y = []
x = []
with open('segmentation.data') as f:
    for line in f:
        v = line.split(',')
        y.append(v[0])
        x.append(v[1:])
x = np.asarray(x, dtype='float64')
y = np.asarray(y)

lmnn = LMNN(k=5, learn_rate=1e-6)
lmnn.fit(x, y)
x_t = lmnn.transform(x)

p1 = plt.subplot(231)
p1.scatter(x_t[:, 0], x_t[:, 1], c=_to_tango_colors(y, 0))
p1.axis('equal')
p1.set_title('LMNN')

# GLVQ
glvq = GlvqModel()
glvq.fit(x, y)
p2 = plt.subplot(232)
p2.set_title('GLVQ')
plot(PCA().fit_transform(x), y, glvq.predict(x), glvq.w_, glvq.c_w_, p2)

# GRLVQ
grlvq = GrlvqModel()
Exemplo n.º 18
0
X_train = np.array(X_train)
Y_train = np.array(Y_train)
X_test = np.array(X_test)
Y_test = np.array(Y_test)

## tuning here ...

scores = []
#for i in range(1,5):
#print("current k is ",i)
lmnn2 = LMNN(k=5, learn_rate=1e-6)  #.fit(X_train,Y_train)
print("here2")
print(lmnn2)
lmnn2 = lmnn2.fit(X_train, Y_train)
print("hi")
X_train2 = lmnn2.transform(X_train)
X_test2 = lmnn2.transform(X_test)
kn2 = KNeighborsClassifier(n_neighbors=40).fit(X_train2, Y_train)
predict = kn2.predict(X_test2)
lmnn_acc = accuracy_score(Y_test, predict)
print("lmnn accuracy is ", lmnn_acc)
#scores.append(lmnn_acc)
#print("the scores are ",scores)
#k=np.argmax(scores)+1

#%%using kernal pca
from scipy.spatial.distance import pdist, squareform
from scipy import exp
from scipy.linalg import eigh
import numpy as np
from sklearn.preprocessing import StandardScaler
Exemplo n.º 19
0
class Classifier(object):
    """Classifier class."""

    def __init__(self, cfg, feature_file=None, test_split='test'):
        """Classifier Constructor. See build_classifier method for more details.

        Args:
            cfg: Path to configuration file.
            feature_file: Path to feature file.
            test_split: Split to test on.

        Raises:
            RuntimeError: If classifier_type is not specified in the config file.
        """

        # Creates a dictionary of params from the config file.
        self.classifier_params = cls.get_cls_param_dict(cfg)

        # Get params from optional args.
        self.classifier_params['feature_file'] = feature_file
        self.classifier_params['feature_dir'] = os.path.dirname(feature_file)
        self.classifier_params['test_split'] = test_split

        # If classifier type was not set, raise an exception.
        if 'classifier_type' not in self.classifier_params:
            raise RuntimeError('[!] No specified classifier type.')

        self.classifier_type = self.classifier_params['classifier_type']
        self.estimator = None  # Actual classifier.
        self.helper_estimator = None  # Only used for metric learning, the helper estimator will learn the metric.
        self.parse_feature_file()
        self.build_classifier()  # Classifier initialization given the params.

    def parse_feature_file(self):
        """Parses feature filename for various parameters.

        Raises:
            RuntimeError: If the feature file is invalid (does not belong to reconstructed images, measurements, or
            latent space variables).
        """

        feature_file = self.classifier_params['feature_file']

        # Checks if feature file is based on reconstructed images (x_hats), measurements (y), or the obtained latent
        # space variable (z_hats).
        if feature_file.find("x_hats") > -1:
            self.classifier_params['input_feature'] = 'x_hats'
        elif feature_file.find("measurements") > -1:
            self.classifier_params['input_feature'] = 'measurements'
        elif feature_file.find("z_hats") > -1:
            self.classifier_params['input_feature'] = 'z_hats'
        else:
            raise RuntimeError('[!] Invalid feature file.')

        # Get different parameters of the experiment.
        self.classifier_params['learning_rate'] = re.search('lr(([0-9]|\.)+)', feature_file).group(1)
        self.classifier_params['random_restarts'] = re.search('rr([0-9]+)', feature_file).group(1)
        self.classifier_params['num_measurements'] = re.search('m([0-9]+)', feature_file).group(1)
        self.classifier_params['counter'] = re.search('c([0-9]+)', feature_file).group(1)
        self.classifier_params['a_index'] = re.search('a([0-9]+)', feature_file).group(1)

    def build_classifier(self):
        """Initializes classifier based on self.classifier_params.

        Raises:
            ValueError: If self.classifier is not supported (currently supports [svm|linear-svm|lmnn|logistic|knn|nn]).
        """

        # Different classifier types are treated differently.

        # Kernel SVM.
        if self.classifier_type == 'svm':
            # Default params.
            params = {'c_penalty': 1.0,  # Penalty parameter of the error term.
                      'kernel': 'rbf',  # 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed' or a callable.
                      'degree': 3,  # Degree of polynomial for 'poly' kernel.
                      'gamma': 'auto',  # Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.
                      'coef0': 0.0,  # Independent term in kernel for 'poly' and 'sigmoid'.
                      'shrinking': True,  # Whether to use the shrinking heuristic.
                      'probability': False,  # Whether to enable probability estimates.
                      'tol': 0.001,  # Tolerance for stopping criterion.
                      'cache_size': 200,  # Kernel cache (in MB).
                      'class_weight': None,  # {class_label: weight}.
                      'verbose': False,
                      'random_state': None,  # Seed for pseudo random number generator for shuffling data.
                      'max_iter': -1,  # Hard limit on iterations or -1 for no limit.
                      # Multiclass handling.
                      # 'ovo', 'ovr', or None.
                      # 'ovo': one vs one.
                      # 'ovr' one vs rest.
                      # None is 'ovr'.
                      'multi_class': None,
                      'num_classes': 10}  # Number of classes.

            # Update parameters from dictionary of parameters (based on config file).
            params.update(self.classifier_params)

            # Build the classifier (estimator). Kernel SVM is based on sklearn.
            self.estimator = svm.SVC(C=params['c_penalty'], kernel=params['kernel'], degree=params['degree'],
                                     gamma=params['gamma'], coef0=params['coef0'], shrinking=params['shrinking'],
                                     probability=params['probability'], tol=params['tol'],
                                     cache_size=params['cache_size'], class_weight=params['class_weight'],
                                     verbose=params['verbose'], max_iter=params['max_iter'],
                                     decision_function_shape=params['multi_class'], random_state=params['random_state'])

        # Linear SVM; good for large-scale datasets.
        elif self.classifier_type == 'linear-svm':
            # Default params.
            params = {'penalty': 'l2',  # 'l1' or 'l2'. Norm in the penalization.
                      'loss': 'squared_hinge',  # 'hinge' or 'squared_hinge'. Specifies the loss function.
                      # Use dual or primal optimization problem. Prefer dual=False when n_samples > n_features.
                      'dual': True,
                      'tol': 1e-4,  # Tolerance for stopping criteria.
                      'c_penalty': 1.0,  # Penalty parameter C of the error term.
                      'multi_class': 'ovr',  # 'ovr' (one-vs-rest) or 'crammer_singer' (joint objective in all classes).
                      # Whether or not to calculate the intercept (if false, data is expected to be centered).
                      'fit_intercept': True,
                      'intercept_scaling': 1.0,
                      'class_weight': None,  # {class_label: weight}.
                      'verbose': 0,
                      'random_state': None,  # Seed for random number generator.
                      'max_iter': 1000,  # Maxiumum number of iterations.
                      'num_classes': 10}  # Number of classes.

            # Update parameters from dictionary of parameters (based on config file).
            params.update(self.classifier_params)

            # Build the classifier (estimator). Linear SVM is based on sklearn.
            self.estimator = svm.LinearSVC(penalty=params['penalty'], loss=params['loss'], dual=params['dual'],
                                           tol=params['tol'], C=params['c_penalty'], multi_class=params['multi_class'],
                                           fit_intercept=params['fit_intercept'],
                                           intercept_scaling=params['intercept_scaling'],
                                           class_weight=params['class_weight'], verbose=params['verbose'],
                                           random_state=params['random_state'], max_iter=params['max_iter'])

        # Large Margin nearest neighbor (metric learning + k-nearest neighbor).
        elif self.classifier_type == 'lmnn':
            # Default params.
            # First, metric learning params.
            params = {'num_neighbors': 3,  # Number of neighbors to consider (does not include self-edges).
                      'min_iter': 50,
                      'max_iter': 1000,
                      'learn_rate': 1e-07,
                      'regularization': 0.5,  # Weight of pull and push terms.
                      'tol': 0.001,  # Convergence tolerance.
                      'verbose': False,
                      # Second, k-nn params.
                      # Weights: Callable,  or:
                      # 'uniform': Uniform weights.  All points in each neighborhood are weighted equally.
                      # 'distance': Weigh points by the inverse of their distance.
                      'weights': 'uniform',
                      # Algorithm: {'auto', 'ball_tree', 'kd_tree', 'brute'} 'auto' will attempt to decide the
                      # most appropriate algorithm based on training data.
                      'algorithm': 'auto',
                      'leaf_size': 30,  # Leaf size passed to BallTree or KDTree.
                      'num_jobs': 1,  # The number of parallel jobs to run for neighbors search. -1 -> nb of CPU cores.
                      'num_classes': 10}  # Number of classes.

            # Update parameters from dictionary of parameters (based on config file).
            params.update(self.classifier_params)

            # Build the helper (helper_estimator). Based on the metric_learn package.
            self.helper_estimator = LMNN(k=params['num_neighbors'], min_iter=params['min_iter'],
                                         max_iter=params['max_iter'], learn_rate=params['learn_rate'],
                                         regularization=params['regularization'], convergence_tol=params['tol'],
                                         verbose=params['verbose'])

            # Build the classifier (estimator). Use euclidean distance as a metric. K-NN classifier is based on sklearn.
            self.estimator = neighbors.KNeighborsClassifier(n_neighbors=params['num_neighbors'],
                                                            weights=params['weights'], algorithm=params['algorithm'],
                                                            leaf_size=params['leaf_size'], p=2, metric='minkowski',
                                                            metric_params=None, n_jobs=params['num_jobs'])

        # Logistic regression.
        elif self.classifier_type == 'logistic':
            # Default params.
            params = {'penalty': 'l2',  # 'l1' or 'l2', specify the norm used in the penalization.
                      'dual': False,  # Dual or primal formulation. dual=False is better when n_samples > n_features.
                      'tol': 0.0001,  # Tolerance for stopping criteria.
                      # Inverse of regularization strength (smaller values -> stronger regularization).
                      'c_penalty': 1.0,
                      'fit_intercept': True,  # If a bias should be added to the decision function.
                      'intercept_scaling': 1,
                      'class_weight': None,  # In the form {class_label: weight}.
                      'random_state': None,  # Seed of random number generator for shuffling the data.
                      'solver': 'liblinear',  # 'newton-cg', 'lbfgs', 'liblinear', or 'sag'.
                      'max_iter': 100,  # Maximum number of iterations for the solvers.
                      # Multiclass handling.
                      # 'ovr' one-vs-rest or 'multinomial' If the option chosen is 'ovr', then a binary problem is fit
                      # for each label.
                      # Else the loss minimised is the multinomial loss fit across the entire probability distribution.
                      # Works only for the 'newton-cg', 'sag' and 'lbfgs' solver.
                      'multi_class': 'ovr',
                      'verbose': 0,
                      'warm_start': False,  # Reuse solution of the previous call to fit as initialization.
                      'num_jobs': 1,  # Number of CPU cores during cross-validation. -1 -> all cored are used.
                      'num_classes': 10}  # Number of classes.

            # Update parameters from dictionary of parameters (based on config file).
            params.update(self.classifier_params)

            # Build the classifier (estimator). Logistic regression is based on sklearn.
            self.estimator = linear_model.LogisticRegression(penalty=params['penalty'], dual=params['dual'],
                                                             tol=params['tol'], C=params['c_penalty'],
                                                             fit_intercept=params['fit_intercept'],
                                                             intercept_scaling=params['intercept_scaling'],
                                                             class_weight=params['class_weight'],
                                                             random_state=params['random_state'],
                                                             solver=params['solver'], max_iter=params['max_iter'],
                                                             multi_class=params['multi_class'],
                                                             verbose=params['verbose'], warm_start=params['warm_start'],
                                                             n_jobs=params['num_jobs'])

        # K-Nearest Neighbor classifier (no metric learning).
        elif self.classifier_type == 'knn':
            # Default params.
            params = {'num_neighbors': 3,  # Number of neighbors to use.
                      # Weights: callable,  or:
                      # 'uniform' uniform weights.  All points in each neighborhood are weighted equally.
                      # 'distance' : weigh points by the inverse of their distance.
                      'weights': 'uniform',
                      # Algorithm: {'auto', 'ball_tree', 'kd_tree', 'brute'} 'auto' will attempt to decide the most
                      # appropriate algorithm based on training data
                      'algorithm': 'auto',
                      'leaf_size': 30,  # Leaf size passed to BallTree or KDTree.
                      # Metric: string or DistanceMetric object (default = 'minkowski'), the distance metric to use for
                      # the tree.  The default metric is minkowski, and with p=2 is equivalent to the Euclidean metric.
                      # See the documentation of DistanceMetric.
                      'metric': 'minkowski',
                      'metric_params': None,  # Additional keyword arguments for the metric function.
                      'power': 2,  # Power parameter for the Minkowski metric. p = 1 is l1, p = 2 is l2.
                      'num_jobs': 1,  # The number of parallel jobs to run for neighbors search. -1 -> nb of CPU cores.
                      'num_classes': 10}  # Number of classes.

            # Update parameters from dictionary of parameters (based on config file).
            params.update(self.classifier_params)

            # Build the classifier (estimator). KNN is based on sklearn.
            self.estimator = neighbors.KNeighborsClassifier(n_neighbors=params['num_neighbors'],
                                                            weights=params['weights'], algorithm=params['algorithm'],
                                                            leaf_size=params['leaf_size'], p=params['power'],
                                                            metric=params['metric'],
                                                            metric_params=params['metric_params'],
                                                            n_jobs=params['num_jobs'])

        # Neural network classifier.
        elif self.classifier_type == 'nn':
            # Default params.
            params = {'network_name': 'mlp',  # Name of architecture (should be implemented in NNClassifier.
                      'num_hidden_layers': 3,  # Number of layers (only used if mlp).
                      'num_hidden_units': [200, 200, 10],  # Number of hidden units for each layer (only used if mlp).
                      'num_classes': 10,  # Number of classes.
                      'input_dim': 20,  # Dimension of input layer.
                      'initial_lr': 0.01,  # Initial learning rate.
                      'batch_size': 200,  # Batch size.
                      'num_epochs': 25,  # Number of epochs for training.
                      'optimizer_type': 'decay_sgd',  # Optimizer type.
                      'use_batch_norm': False,  # Whether or not to use batch normalization.
                      # Checkpoint directory: where to save tensorflow checkpoints.
                      'checkpoint_dir': os.path.join(self.get_output_dir(), self.tf_checkpoint_dir())}

            # Update parameters from dictionary of parameters (based on config file).
            params.update(self.classifier_params)

            # Build the classifier (estimator). Neural network classifier is based on the NNClassifier class.
            self.estimator = nn.NNClassifier(network_name=params['network_name'], input_dim=params['input_dim'],
                                             num_hidden_units=params['num_hidden_units'],
                                             num_hidden_layers=params['num_hidden_layers'],
                                             num_classes=params['num_classes'],
                                             initial_lr=params['initial_lr'],
                                             batch_size=params['batch_size'],
                                             num_epochs=params['num_epochs'],
                                             checkpoint_dir=params['checkpoint_dir'],
                                             optimizer_type=params['optimizer_type'],
                                             use_batch_norm=params['use_batch_norm'])

        else:
            raise ValueError('[!] Classifier type {} is not supported.'.format(self.classifier_type))

        if self.classifier_params.has_key('verbose') and self.classifier_params['verbose']:
            print('[*] Initialized a classifier of type {}.'.format(self.classifier_type))

    def get_feature_dir(self):
        """Returns path to feature directory (where features are saved)

        Returns:
            Feature directory.
        """
        return self.classifier_params['feature_dir']

    def get_output_dir(self):
        """Returns path to output directory (where outputs are saved, such as trained classifier, predicted labels, etc.)
        and creates it if it doesn't exist.

        Returns:
            cls_exp_dir: Output directory.

        Raises:
            RuntimeError: If no feature directory was specified in the configuration.
        """
        feature_dir = self.get_feature_dir()
        if feature_dir is None:
            raise RuntimeError('[!] No feature directory or GAN experiment specified.')
        else:
            cls_dir = os.path.join(feature_dir, 'cls')
            cls_exp_dir = os.path.join(cls_dir, self.classifier_params['exp_name'])
            if not os.path.exists(cls_dir):
                os.mkdir(cls_dir)
            if not os.path.exists(cls_exp_dir):
                os.mkdir(cls_exp_dir)
            return cls_exp_dir

    def get_classifier_filename(self):
        """Returns filename for saving the classifier.

        Returns:
            Classifier filename.
        """

        # Classifier filename is parametrized by important experiment parameters.
        return 'classifier_{}_lr{}_rr{}_m{}_c{}_a{}.pkl'.format(self.classifier_params['input_feature'],
                                                                self.classifier_params['learning_rate'],
                                                                self.classifier_params['random_restarts'],
                                                                self.classifier_params['num_measurements'],
                                                                self.classifier_params['counter'],
                                                                self.classifier_params['a_index'])

    def get_labels_filename(self, input_split):
        """Returns filename for saving predicted labels.

        Args:
            input_split: Split to test on [train|val|test].

        Returns:
            Predicted labels filename.
        """

        # Predicted labels filename is parametrized by important experiment parameters.
        return 'predicted_labels_{}_{}_lr{}_rr{}_m{}_c{}_a{}.pkl'.format(input_split,
                                                                         self.classifier_params['input_feature'],
                                                                         self.classifier_params['learning_rate'],
                                                                         self.classifier_params['random_restarts'],
                                                                         self.classifier_params['num_measurements'],
                                                                         self.classifier_params['counter'],
                                                                         self.classifier_params['a_index'])

    def tf_checkpoint_dir(self):
        """Returns name of TensorFlow checkpoint directory.

        Returns:
            Checkpoint directory.
        """

        return 'tf_checkpoints_{}_lr{}_rr{}_m{}_c{}_a{}'.format(self.classifier_params['input_feature'],
                                                                self.classifier_params['learning_rate'],
                                                                self.classifier_params['random_restarts'],
                                                                self.classifier_params['num_measurements'],
                                                                self.classifier_params['counter'],
                                                                self.classifier_params['a_index'])

    def get_acc_filename(self, input_split):
        """Returns filenames for all accuracy files.

        Args:
            input_split: Split to test on [train|val|test].

        Returns:
            acc_filename: The filename for the overall prediction accuracy on this split.
            acc_filenames_i: An array of filenames for class-specific accuracies on this split.
        """

        # Accuracy filename parametrized by experiment parameters.
        acc_filename = 'accuracy_{}_{}_lr{}_rr{}_m{}_c{}_a{}.txt'.format(input_split,
                                                                         self.classifier_params['input_feature'],
                                                                         self.classifier_params['learning_rate'],
                                                                         self.classifier_params['random_restarts'],
                                                                         self.classifier_params['num_measurements'],
                                                                         self.classifier_params['counter'],
                                                                         self.classifier_params['a_index'])
        # For every class, add class number to filename.
        acc_filenames_i = []
        for i in range(self.classifier_params['num_classes']):
            acc_filenames_i.append('class{}_accuracy_{}_{}_lr{}_rr{}_m{}_c{}_a{}.txt'.format(i, input_split,
                                                                                             self.classifier_params[
                                                                                                 'input_feature'],
                                                                                             self.classifier_params[
                                                                                                 'learning_rate'],
                                                                                             self.classifier_params[
                                                                                                 'random_restarts'],
                                                                                             self.classifier_params[
                                                                                                 'num_measurements'],
                                                                                             self.classifier_params[
                                                                                                 'counter'],
                                                                                             self.classifier_params[
                                                                                                 'a_index']))
        return acc_filename, acc_filenames_i

    def train(self, features=None, labels=None, retrain=False, num_train=-1):
        """Trains classifier using training features and ground truth training labels.

        Args:
            features: Path to training feature vectors (use None to automatically load saved features from experiment
            output directory).
            labels: Path to ground truth train labels (use None to automatically load from dataset).
            retrain: Boolean, whether or not to retrain if classifier is already saved.
            num_train: Number of training samples to use (use -1 to include all training samples).

        Raises:
            ValueError: If the specified dataset [mnist|f-mnist|celeba] or classifier type
            [svm|linear-svm|lmnn|logistic|knn|nn] is not supported.
        """

        # If no feature vector is provided load from experiment output directory.
        if features is None:
            feature_file = self.classifier_params['feature_file']
            try:
                with open(feature_file, 'r') as f:
                    features = cPickle.load(f)
            except IOError as err:
                print("[!] I/O error({0}): {1}.".format(err.errno,
                                                        err.strerror))
            if self.classifier_params.has_key('verbose') and self.classifier_params['verbose']:
                print('[*] Loaded feature file from {}.'.format(feature_file))

        # If no label vector is provided load from dataset.
        if labels is None:
            # Create dataset object based on dataset name.
            if self.classifier_params['dataset'] == 'mnist':
                ds = Mnist()
            elif self.classifier_params['dataset'] == 'f-mnist':
                ds = FMnist()
            elif self.classifier_params['dataset'] == 'celeba':
                ds = CelebA(resize_size=self.classifier_params['output_height'],
                            attribute=self.classifier_params['attribute'])
            else:
                raise ValueError('[!] Dataset {} is not supported.'.format(self.classifier_params['dataset']))
            # Load labels from the train split.
            _, labels, _ = ds.load('train')
            num_samples = min(np.shape(features)[0], len(labels))

            # Restrict to the first num_train samples if num_train is not -1.
            if num_train > -1:
                num_samples = min(num_train, num_samples)

            labels = labels[:num_samples]
            features = features[:num_samples, :]

            if self.classifier_params.has_key('verbose') and self.classifier_params['verbose']:
                print('[*] Loaded ground truth labels from {}.'.format(
                    self.classifier_params['dataset']))

        # Train the classifier.
        if self.classifier_type in ('svm', 'logistic', 'knn', 'linear-svm'):
            self.estimator.fit(features, labels)

        # Neural network classifiers.
        elif self.classifier_type == 'nn':
            self.estimator.fit(features, labels, retrain=retrain, session=self.session)

        # For LMNN, first transform the feature vector then perform k-NN.
        elif self.classifier_type == 'lmnn':
            # Learn the metric.
            self.helper_estimator.fit(features, labels)
            # Transform feature space.
            transformed_features = self.helper_estimator.transform(features)
            # Create k-nn graph.
            self.estimator.fit(transformed_features, labels)

        else:
            raise ValueError('[!] Classifier type {} is not supported.'.format(self.classifier_type))

        if ('verbose' in self.classifier_params) and self.classifier_params['verbose']:
            print('[*] Trained classifier.')

    def save_classifier(self, filename=None):
        """Saves the classifier in a pickle file.

        Args:
            filename: Path to pickle file.

        Raises:
            IOError: If a output error occurs while saving the pickle file.
        """

        # If no filename is provided, default filename will be used.
        if filename is None:
            output_dir = self.get_output_dir()
            filename = self.get_classifier_filename()
            filename = os.path.join(output_dir, filename)

        # Saving for non neural-network classifiers.
        if not self.classifier_type == 'nn':
            try:
                with open(filename, 'wb') as fp:
                    cPickle.dump(self.classifier_type, fp, cPickle.HIGHEST_PROTOCOL)
                    cPickle.dump(self.classifier_params, fp, cPickle.HIGHEST_PROTOCOL)
                    cPickle.dump(self.estimator, fp, cPickle.HIGHEST_PROTOCOL)
                    cPickle.dump(self.helper_estimator, fp, cPickle.HIGHEST_PROTOCOL)
            except IOError as err:
                print("[!] I/O error({0}): {1}.".format(err.errno,
                                                        err.strerror))

            if self.classifier_params.has_key('verbose') and self.classifier_params['verbose']:
                print('[*] Saved classifier {}.'.format(filename))

        # Neural network classifiers have default saving/loading using TensorFlow.
        else:
            if self.classifier_params.has_key('verbose') and self.classifier_params['verbose']:
                print('[!] Default TF loading/saving for Neural Networks.')

    def load_classifier(self, filename=None):
        """Loads classifier from a pickle file.

        Args:
            filename: Path to pickle file.

        Raises:
            IOError: If an input error occurs while reading pickle file.
        """

        # If no filename is provided, default filename will be used.
        if filename is None:
            output_dir = self.get_output_dir()
            filename = self.get_classifier_filename()
            filename = os.path.join(output_dir, filename)

        # Loading for non neural-network classifiers.
        if not self.classifier_type == 'nn':
            try:
                with open(filename, 'r') as f:
                    self.classifier_type = cPickle.load(f)
                    self.classifier_params = cPickle.load(f)
                    self.estimator = cPickle.load(f)
                    self.helper_estimator = cPickle.load(f)
            except IOError as err:
                print("[!] I/O error({0}): {1}.".format(err.errno,
                                                        err.strerror))

            if self.classifier_params.has_key('verbose') and self.classifier_params['verbose']:
                print('[*] Loaded classifier from {}.'.format(filename))

        # Neural network classifiers have default saving/loading using TensorFlow.
        else:
            if self.classifier_params.has_key('verbose') and self.classifier_params['verbose']:
                print('[!] Default TF loading/saving for Neural Networks.')

    def predict(self, features, save_result=False, model_name=None, filename=None):
        """Predicts labels given test feature vectors. If save_result is True, also saves the predictions.

        Args:
            features: Test feature vectors.
            save_result: Optional, boolean, if True save predicted labels and accuracy.
            model_name: For neural network classifiers, model name to load and use to predict.
            filename: Optional, path to save results in.

        Returns:
            predicted_labels: Array of predicted labels.

        Raises:
            IOError: If save_result is True and an output error occurs while saving predictions.
            ValueError: If the classifier type is not supported. Supported types: [svm|linear-svm|lmnn|logistic|knn|nn]
        """

        # If save_result is True and no filename was provided, use default filename.
        if save_result and (filename is None):
            output_dir = self.get_output_dir()
            filename = self.get_labels_filename('user_defined')
            filename = os.path.join(output_dir, filename)

        # For kernel and linear SVMs, Logistic regression, and K-NN, simply call the estimator's predict function.
        if self.classifier_type in ('svm', 'logistic', 'knn', 'linear-svm'):
            predicted_labels = self.estimator.predict(features)
            if save_result:
                try:
                    with open(filename, 'wb') as fp:
                        cPickle.dump(predicted_labels, fp, cPickle.HIGHEST_PROTOCOL)
                except IOError as err:
                    print("[!] I/O error({0}): {1}.".format(err.errno,
                                                            err.strerror))
                if self.classifier_params.has_key('verbose') and self.classifier_params['verbose']:
                    print('[*] Saved predicted labels {}.'.format(filename))

            return predicted_labels

        # Same for neural networks, except for the additional model name and TensorFlow session arguments.
        elif self.classifier_type == 'nn':
            predicted_labels = self.estimator.predict(features, model_name, session=self.session)
            if save_result:
                try:
                    with open(filename, 'wb') as fp:
                        cPickle.dump(predicted_labels, fp, cPickle.HIGHEST_PROTOCOL)
                except IOError as err:
                    print("[!] I/O error({0}): {1}.".format(err.errno,
                                                            err.strerror))
                if self.classifier_params.has_key('verbose') and self.classifier_params['verbose']:
                    print('[*] Saved predicted labels {}.'.format(filename))

            return predicted_labels

        # Metric learning.
        elif self.classifier_type == 'lmnn':
            # First transform the features.
            transformed_features = self.helper_estimator.transform(features)
            # Then call the predict function.
            predicted_labels = self.estimator.predict(transformed_features)

            if save_result:
                try:
                    with open(filename, 'wb') as fp:
                        cPickle.dump(predicted_labels, fp, cPickle.HIGHEST_PROTOCOL)
                except IOError as err:
                    print("[!] I/O error({0}): {1}.".format(err.errno,
                                                            err.strerror))
                if self.classifier_params.has_key('verbose') and self.classifier_params['verbose']:
                    print('[*] Saved predicted labels {}.'.format(filename))

            return predicted_labels

        else:
            raise ValueError('[!] Classifier type {} is not supported.'.format(self.classifier_type))

    def validate(self):
        """Only needed for neural networks. Validates different checkpoints by testing them on the validation split and
        retaining the one with the top accuracy.

        Returns:
            best_model: Name of chosen best model (empty string if no validation was performed). An empty string is
            returned for non neural network classifiers.

        Raises:
            IOError: If an input error occurs when loading feature vectors, or an output error occurs when saving the
            chosen model.
            ValueError: If the specified dataset [mnist|f-mnist|celeba] or classifier type
            [svm|linear-svm|lmnn|logistic|knn|nn] is not supported.
        """

        if 'verbose' in self.classifier_params and self.classifier_params['verbose']:
            print("[*] Validating.")

        # Get feature file paths.
        feature_dir = os.path.dirname(self.classifier_params['feature_file'])
        feature_file = os.path.basename(self.classifier_params['feature_file'])
        feature_file = feature_file.replace('train', 'val')
        feature_file = os.path.join(feature_dir, feature_file)

        # Load feature vectors.
        try:
            with open(feature_file, 'r') as f:
                features = cPickle.load(f)
        except IOError as err:
            print("[!] I/O error({0}): {1}.".format(err.errno, err.strerror))

        if 'verbose' in self.classifier_params and self.classifier_params['verbose']:
            print('[*] Loaded feature vectors from {}.'.format(feature_file))

        # Initialize the dataset object to load ground-truth labels.
        if self.classifier_params['dataset'] == 'mnist':
            ds = Mnist()
        elif self.classifier_params['dataset'] == 'f-mnist':
            ds = FMnist()
        elif self.classifier_params['dataset'] == 'celeba':
            ds = CelebA(resize_size=self.classifier_params['output_height'],
                        attribute=self.classifier_params['attribute'])
        else:
            raise ValueError('[!] Dataset {} is not supported.'.format(self.classifier_params['dataset']))

        # Load ground-truth labels from the validation split.
        _, labels, _ = ds.load('val')
        num_samples = min(np.shape(features)[0], len(labels))
        labels = labels[:num_samples]
        features = features[:num_samples, :]

        if 'verbose' in self.classifier_params and self.classifier_params['verbose']:
            print('[*] Loaded ground-truth labels from {}.'.format(
                self.classifier_params['dataset']))

        # Non neural network classifiers do not require validation as no intermediate models exist.
        if self.classifier_type in ('svm', 'logistic', 'knn', 'linear-svm', 'lmnn'):
            print('[!] No validation needed.')
            return ""

        # Neural network classifiers.
        elif self.classifier_type == 'nn':
            # Call the neural network validate function on the features.
            best_acc, best_model, _ = self.estimator.validate(features, labels, session=self.session)

            # Save results.
            try:
                with open(os.path.join(self.get_output_dir(), self.tf_checkpoint_dir(), 'chosen_model.txt'), 'w') as fp:
                    fp.write("{} {}".format(os.path.basename(best_model), best_acc))
            except IOError as err:
                print("[!] I/O error({0}): {1}.".format(err.errno,
                                                        err.strerror))

            if 'verbose' in self.classifier_params and self.classifier_params['verbose']:
                print(
                    '[*] Chose model: {}, with validation accuracy {}.'.format(os.path.basename(best_model), best_acc))
            return best_model

        else:
            raise ValueError('[!] Classifier type {} is not supported.'.format(self.classifier_type))

    def test_classifier(self, input_split='test', save_result=False, model_name=None, labels_filename=None,
                        acc_filename=None, acc_filenames_i=None):
        """Predicts labels and compares them to ground truth labels from given split. Returns test accuracy.
        Args:
            input_split: What split to test on [train|val|test].
            save_result: Optional, boolean. If True saves predicted labels and accuracy.
            model_name:  For neural network classifiers, model name to load and use to predict.
            labels_filename: Optional, string. Path to save predicted labels in.
            acc_filename: Optional, string. Path to save predicted accuracy in.
            acc_filenames_i: Optional, array of strings. Path to save class-specific predicted labels in.

        Returns:
            predicted_labels: Predicted labels for the input split.
            accuracy: Accuracy on the input split.
            per_class_accuracies: Array of per-class accuracies on the input split.

        Raises:
            IOError: If an input error occurs when loading features, or an output error occurs when saving results.
            ValueError: If the specified dataset [mnist|f-mnist|celeba] or classifier type
            [svm|linear-svm|lmnn|logistic|knn|nn] is not supported.
        """

        # If save_result is True, but no labels_filename was specified, use default filename.
        if save_result and (labels_filename is None):
            output_dir = self.get_output_dir()
            labels_filename = self.get_labels_filename(input_split)
            labels_filename = os.path.join(output_dir, labels_filename)

        # If save_result is True, but no acc_filename was specified, use default filename.
        if save_result and (acc_filename is None):
            output_dir = self.get_output_dir()
            acc_filename, acc_filenames_i = self.get_acc_filename(input_split)
            acc_filename = os.path.join(output_dir, acc_filename)
            for i in range(self.classifier_params['num_classes']):
                acc_filenames_i[i] = os.path.join(output_dir, acc_filenames_i[i])

        # Load feature vectors.
        feature_dir = os.path.dirname(self.classifier_params['feature_file'])
        feature_file = os.path.basename(self.classifier_params['feature_file'])
        feature_file = feature_file.replace('train', input_split)
        feature_file = os.path.join(feature_dir, feature_file)

        try:
            with open(feature_file, 'r') as f:
                features = cPickle.load(f)
        except IOError as err:
            print('[!] I/O error({0}): {1}.'.format(err.errno, err.strerror))

        if 'verbose' in self.classifier_params and self.classifier_params['verbose']:
            print('[*] Loaded feature vectors from {}.'.format(feature_file))

        # Initiate dataset object to load ground-truth labels.
        if self.classifier_params['dataset'] == 'mnist':
            ds = Mnist()
        elif self.classifier_params['dataset'] == 'f-mnist':
            ds = FMnist()
        elif self.classifier_params['dataset'] == 'celeba':
            ds = CelebA(resize_size=self.classifier_params['output_height'],
                        attribute=self.classifier_params['attribute'])
        else:
            raise ValueError('[!] Dataset {} is not supported.'.format(self.classifier_params['dataset']))

        # Load ground-truth labels.
        _, labels, _ = ds.load(input_split)
        num_samples = min(np.shape(features)[0], len(labels))
        labels = labels[:num_samples]
        features = features[:num_samples, :]

        if 'verbose' in self.classifier_params and self.classifier_params['verbose']:
            print('[*] Loaded ground-truth labels from: {}.'.format(
                self.classifier_params['dataset']))

        # Predict labels.
        if self.classifier_type in ('svm', 'logistic', 'knn', 'linear-svm', 'lmnn'):
            predicted_labels = self.predict(features, save_result, labels_filename)
        elif self.classifier_type == 'nn':
            predicted_labels = self.predict(features, save_result, model_name, labels_filename)
        else:
            raise ValueError('[!] Classifier type {} is not supported.'.format(self.classifier_type))

        # Compare predicted labels to ground-truth labels and calculate accuracy.
        num_correct = np.sum(np.equal(predicted_labels, labels))
        accuracy = num_correct / (1.0 * len(labels))
        per_class_accuracies = []
        for i in range(self.classifier_params['num_classes']):
            idx = np.where(np.equal(labels, i))[0]
            num_correct = np.sum(np.equal(predicted_labels[idx], labels[idx]))
            accuracy_i = num_correct / (1.0 * len(labels[idx]))
            per_class_accuracies.append(accuracy_i)

        # Save results.
        if save_result:
            try:
                with open(acc_filename, 'w') as fp:
                    fp.write("{}".format(accuracy))
            except IOError as err:
                print("[!] I/O error({0}): {1}.".format(err.errno,
                                                        err.strerror))

            if self.classifier_params.has_key('verbose') and self.classifier_params['verbose']:
                print('[*] Saved predicted labels {}.'.format(labels_filename))
                print('[*] Saved predicted accuracy {}.'.format(acc_filename))

            for i in range(self.classifier_params['num_classes']):
                try:
                    with open(acc_filenames_i[i], 'w') as fp:
                        fp.write("{}".format(per_class_accuracies[i]))
                except IOError as err:
                    print("[!] I/O error({0}): {1}.".format(err.errno,
                                                            err.strerror))

        if self.classifier_params.has_key('verbose') and self.classifier_params['verbose']:
            print('[*] Testing complete. Accuracy on {} split {}.'.format(
                input_split, accuracy))
            for i in range(self.classifier_params['num_classes']):
                print('[*] Testing complete. Accuracy on {} split, class {}: {}.'.format(input_split, i,
                                                                                         per_class_accuracies[i]))

        return predicted_labels, accuracy, per_class_accuracies
Exemplo n.º 20
0
def main():
    print(
        "************************************************************************************"
    )
    print(
        "*************************** Metric Learning Demo ***********************************"
    )
    print(
        "************************************************************************************"
    )

    # Load variables
    print("Loading data")
    _, _, _, xTe, xTr, xVa, yTr, yTe, yVa = loadmat(
        'data/segment.mat').values()
    xTe, xTr, xVa = xTe.T, xTr.T, xVa.T
    yTr, yTe, yVa = yTr.flatten().astype(int) - 1, yTe.flatten().astype(
        int) - 1, yVa.flatten().astype(int) - 1

    print("Training pca...")
    L0 = pca(xTr.T, whiten=True)[0].T

    print("Training pca-lda...")
    pca_lda = Pipeline([('pca', PCA(n_components=5, whiten=True)),
                        ('lda', LinearDiscriminantAnalysis(n_components=3))])
    pca_lda.fit(xTr, yTr)
    pca_eigen_vals = np.diag(1 / np.sqrt(pca_lda[0].explained_variance_))
    pcalda_mat = pca_lda[1].scalings_[:, :3].T @ pca_eigen_vals @ pca_lda[
        0].components_

    print("Training lmnn...")
    lmnn = LMNN(init='pca',
                k=7,
                learn_rate=1e-6,
                verbose=False,
                n_components=3,
                max_iter=1000)
    lmnn.fit(xTr, yTr)

    print('Learning nonlinear metric with GB-LMNN ... ')
    # L = pcalda_mat
    L = loadmat('data/lmnn2_L.mat')['L']  # Load the matlab matrix
    embed = gb_lmnn(xTr,
                    yTr,
                    3,
                    L,
                    n_trees=200,
                    verbose=True,
                    xval=xVa,
                    yval=yVa)

    # ################################ k-NN evaluation ###################################
    print("\nEvaluation:")
    k = 1
    raw_tr_err, raw_te_err = knn_error_score(L0[0:3], xTr, yTr, xTe, yTe, k)
    print(
        '1-NN Error for raw (high dimensional) input is, Training: {:.2f}%, Testing {:.2f}%'
        .format(100 * raw_tr_err, 100 * raw_te_err))

    pca_tr_err, pca_te_err = knn_error_score(L0[0:3], xTr, yTr, xTe, yTe, k)
    print('1-NN Error for PCA in 3d is, Training: {:.2f}%, Testing {:.2f}%'.
          format(100 * pca_tr_err, 100 * pca_te_err))

    lda_tr_err, lda_te_err = knn_error_score(pcalda_mat, xTr, yTr, xTe, yTe, k)
    print(
        '1-NN Error for PCA-LDA input is, Training: {:.2f}%, Testing {:.2f}%'.
        format(100 * lda_tr_err, 100 * lda_te_err))

    lmnn_tr_err, lmnn_te_err = knn_error_score(lmnn.components_[0:3], xTr, yTr,
                                               xTe, yTe, k)
    print('1-NN Error for LMNN is, Training: {:.2f}%, Testing {:.2f}%'.format(
        100 * lmnn_tr_err, 100 * lmnn_te_err))

    gb_tr_err, gb_te_err = knn_error_score([], embed.transform(xTr), yTr,
                                           embed.transform(xTe), yTe, 1)
    print(
        '1-NN Error for GB-LMNN input is, Training: {:.2f}%, Testing {:.2f}%'.
        format(100 * gb_tr_err, 100 * gb_te_err))

    # ################################ 3-D Plot ###################################
    print("\nPlotting figures")

    fig = plt.figure(figsize=(12, 8))
    ax1 = fig.add_subplot(2, 2, 1, projection='3d')
    ax1.set_title("PCA Train Error: {:.2f}, Test Error: {:.2f}".format(
        100 * pca_tr_err, 100 * pca_te_err))
    pts_to_plt = xTr @ L0[0:3].T

    for l in np.unique(yTr):
        mask = np.squeeze(yTr == l)
        ax1.scatter(pts_to_plt[mask, 0],
                    pts_to_plt[mask, 1],
                    pts_to_plt[mask, 2],
                    label=l)
    plt.legend()

    ax2 = fig.add_subplot(2, 2, 2, projection='3d')
    ax2.set_title("PCA-LDA Train Error: {:.2f}, Test Error: {:.2f}".format(
        100 * lda_tr_err, 100 * lda_te_err))
    pts_to_plt = xTr @ pcalda_mat.T

    for l in np.unique(yTr):
        mask = np.squeeze(yTr == l)
        ax2.scatter(pts_to_plt[mask, 0],
                    pts_to_plt[mask, 1],
                    pts_to_plt[mask, 2],
                    label=l)
    plt.legend()

    ax3 = fig.add_subplot(2, 2, 3, projection='3d')
    ax3.set_title("LMNN Train Error: {:.2f}, Test Error: {:.2f}".format(
        100 * lmnn_tr_err, 100 * lmnn_te_err))
    pts_to_plt = lmnn.transform(xTr)
    for l in np.unique(yTr):
        mask = np.squeeze(yTr == l)
        ax3.scatter(pts_to_plt[mask, 0],
                    pts_to_plt[mask, 1],
                    pts_to_plt[mask, 2],
                    label=l)
    plt.legend()

    ax4 = fig.add_subplot(2, 2, 4, projection='3d')
    ax4.set_title("GB-LMNN Train Error: {:.2f}, Test Error: {:.2f}".format(
        100 * gb_tr_err, 100 * gb_te_err))
    pts_to_plt = embed.transform(xTr)
    for l in np.unique(yTr):
        mask = np.squeeze(yTr == l)
        ax4.scatter(pts_to_plt[mask, 0],
                    pts_to_plt[mask, 1],
                    pts_to_plt[mask, 2],
                    label=l)
    plt.legend()

    plt.show()
Exemplo n.º 21
0
        fold_cnt += 1
        print("k:", knn_k)
        print("fold:", fold_cnt)
        print("train features shape:", train_features.shape)
        print("train labels shape:", train_labels.shape)
        print("valid features shape:", valid_features.shape)
        print("valid labels shape:", valid_labels.shape)

        lmnn = LMNN(k=5)
        transformed_features = lmnn.fit_transform(train_features, train_labels)
        neigh = KNeighborsClassifier(n_neighbors=knn_k)
        neigh.fit(transformed_features, train_labels)
        neigh_orig = KNeighborsClassifier(n_neighbors=knn_k)
        neigh_orig.fit(train_features, train_labels)
        predict = neigh.predict(lmnn.transform(valid_features))
        predict_orig = neigh_orig.predict(valid_features)
        accuracy = metrics.accuracy_score(valid_labels, predict)
        accuracy_orig = metrics.accuracy_score(valid_labels, predict_orig)
        print("accuracy after metric learning:{}".format(accuracy))
        print("accuracy before metric learning:{}".format(accuracy_orig))
        ac_list.append(accuracy)
        ac_list_orig.append(accuracy_orig)

    final_train_accuracy = np.mean(ac_list)
    print(final_train_accuracy)
    final_train_accuracy_orig = np.mean(ac_list_orig)
    print(final_train_accuracy_orig)
    train_ac_metrics_list.append(
        [knn_k, final_train_accuracy, final_train_accuracy_orig])
Exemplo n.º 22
0
def lmnn(x_train, y_train, x_test):
    lmnn = LMNN(max_iter=50, k=9, verbose=True)
    print("It is")
    lmnn.fit(x_train, y_train)
    print("done")
    return lmnn.transform(x_test)
Exemplo n.º 23
0
Result_of_acc_std = np.zeros([len(datasets) * 2, len(classifiers)])

for i in range(len(datasets)):
    print(datasets[i])
    new_path = os.path.join('.\data', datasets[i])
    Data_Origi, DataLabel, n_samples, n_attr, n_class = PF.Load_Data(new_path)
    #归一化处理
    scaler = MinMaxScaler()
    scaler.fit(Data_Origi)
    Data_Origi = scaler.transform(Data_Origi)
    for l in range(2):
        if l == 0:
            #度量学习
            lmnn = LMNN(k=5, learn_rate=1e-6)
            lmnn.fit(Data_Origi, DataLabel)
            Data_trans = lmnn.transform(Data_Origi)
        else:
            Data_trans = Data_Origi
        #同质化融合
        Dis_Matrix = PF.Calcu_Dis(Data_trans)
        CompareMatrix = PF.CompareNoiseLabel(Dis_Matrix, DataLabel)
        Cluster_Checked = PF.Affinity_propagatio_Modify(CompareMatrix)
        lap_ratio = PF.Count(Cluster_Checked, set_vlaue, n_samples)
        Result_of_Upper[i, l] = 1 - lap_ratio

        for j in range(len(classifiers)):
            print(classifiers[j])
            clf = classifiers[j]
            scores = cross_val_score(clf, Data_trans, DataLabel, cv=cv)
            Result_of_acc_ave[2 * i + l, j] = scores.mean()
            Result_of_acc_std[2 * i + l, j] = scores.std()
Exemplo n.º 24
0
lmnn = LMNN(k=5, learn_rate=1e-6)
lmnn.fit(X, y)
te = time.time()
print('Time: %d s'%te-ts)


# In[12]:


print('done')


# In[17]:


q_transform = lmnn.transform(query_feature)
g_transform = lmnn.transform(gallery_feature)


# In[19]:


print(query_feature.shape)
print(q_transform.shape)


# # Combine lbl and feature

# In[20]: