Exemplo n.º 1
0
def MetricLearning(Data, mapping, remapping, Truth):

	#build metric learing
	Truth_Label = list(set(Truth.values()))

	X_color = np.array([data[0] for data in Data])
	#print >> sys.stderr, "X_color concat X_shape", X_color.shape
	#X_color = np.array([data[0] for data in Data])
	Y_color = np.array([Truth_Label.index(Truth[remapping[i]]) for i, data in enumerate(Data)])

	
	print >> sys.stderr, "X_color", X_color.shape, Y_color.shape
	

	s = time.time()

	lmnn_color = LMNN(k=5, min_iter=0, max_iter=400, learn_rate=1e-6)
	lmnn_color.fit(X_color, Y_color, verbose=True)

	print >> sys.stderr, time.time() - s, "color learning done"

	'''X_shape = np.array([data[1] for data in Data])
	Y_shape = np.array([Truth_Label.index(Truth[remapping[i]]) for i, data in enumerate(Data)])
	print >> sys.stderr, "X_shape", X_shape.shape, Y_shape.shape

	s = time.time()

	lmnn_shape = LMNN(k=20, min_iter=0, max_iter=1, learn_rate=1e-6)
	lmnn_shape.fit(X_shape, Y_shape, verbose=True)

	print >> sys.stderr, time.time() - s, "shape learning done"
	return lmnn_color, lmnn_shape'''
	return lmnn_color
Exemplo n.º 2
0
    def test_iris(self):
        lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False)
        lmnn.fit(self.iris_points, self.iris_labels)

        csep = class_separation(lmnn.transform(self.iris_points),
                                self.iris_labels)
        self.assertLess(csep, 0.25)
Exemplo n.º 3
0
    def constructSimilartyMatrixLMNN(self, ks):

        print('now doing LMNN for k= ', ks)
        self.y_train = self.y_train.reshape(-1, )
        lmnn = LMNN(k=ks, learn_rate=1e-7, max_iter=1000)
        lmnn.fit(self.trainVectorsPCA, self.y_train)
        self.L_lmnn = lmnn.transformer()
        name = 'lmnn/LMNN transformer matrix with dataset shape ' + str(
            self.trainVectorsPCA.shape)
        np.save(name, self.L_lmnn)
        print('L.shape is ', self.L_lmnn.shape, '\n\n')
        # Input data transformed to the metric space by X*L.T
        self.transformedTrainLMNN = copy(lmnn.transform(self.trainVectorsPCA))
        self.transformedTestLMNN = copy(lmnn.transform(self.testVectorsPCA))
        self.transformedAllLMNN = copy(lmnn.transform(
            self.allDataPCA))  #we compute the pairwise distance on this now
        projectedDigits = TSNE(random_state=randomState).fit_transform(
            self.transformedAllLMNN)

        self.pwdis = copy(
            pairwise_distances(self.transformedAllLMNN, metric='euclidean'))
        self.D = np.zeros(self.pwdis.shape)
        for i in range(0, self.pwdis.shape[0]):
            l1 = self.pwdis[i].tolist()
            #print 'l1 is ',l1,'\n\n'
            allnearestNeighbours = sorted(range(len(l1)), key=lambda i: l1[i])
            #now set the all the weights except for k+1 to 0
            self.pwdis[i, allnearestNeighbours[ks:]] = 0
            self.D[i, i] = sum(self.pwdis[i])

        print('accuracy for LMNN for k= ', ks, '\n')
        self.labelPropogation()
 def constructSimilartyMatrixLMNN(self,ks):
     
     
     print 'now doing LMNN for k= ',ks
     self.y_train=self.y_train.reshape(-1,)
     lmnn=LMNN(k=ks, learn_rate=1e-7,max_iter=3000)
     lmnn.fit(self.trainVectorsPCA, self.y_train, verbose=False)
     self.L_lmnn = lmnn.transformer()
     name='lmnn/LMNN transformer matrix with dataset shape '+str(self.trainVectorsPCA.shape)
     np.save(name,self.L_lmnn)
     print 'L.shape is ',self.L_lmnn.shape,'\n\n'
     # Input data transformed to the metric space by X*L.T
     self.transformedTrainLMNN=copy(lmnn.transform(self.trainVectorsPCA))
     self.transformedTestLMNN=copy(lmnn.transform(self.testVectorsPCA))
     self.transformedAllLMNN=copy(lmnn.transform(self.allDataPCA)) #we compute the pairwise distance on this now 
     projectedDigits = TSNE(random_state=randomState).fit_transform(self.transformedAllLMNN)
     
     plt.scatter(projectedDigits[:,0],projectedDigits[:,1],c=self.labels)
     plt.title('LMNN Transformed ALL set projected to 2 Dimensions by TSNE with k='+str(ks))
     plt.savefig(pp,format='pdf')
     
     self.pwdis=copy(pairwise_distances(self.transformedAllLMNN,metric='euclidean'))
     self.D=np.zeros(self.pwdis.shape)
     for i in range(0,self.pwdis.shape[0]):
         l1=self.pwdis[i].tolist()
         #print 'l1 is ',l1,'\n\n'
         allnearestNeighbours=sorted(range(len(l1)),key=lambda i : l1[i])
         #now set the all the weights except for k+1 to 0
         self.pwdis[i,allnearestNeighbours[ks:]]=0
         self.D[i,i]=sum(self.pwdis[i])
     
     print 'accuracy for LMNN for k= ',ks,'\n'
     self.labelPropogation()
Exemplo n.º 5
0
    def LMNN(self):
        print "Warning, the features will be transformed"
        lmnn = LMNN(k=5, learn_rate = 1e-6)
        lmnn.fit(self.features, targets)

        self.features = lmnn.transform(self.features)
        self.prepare_for_testing()
        self.nearest_neighbors("LMNN + KNN")
def runLMNN(X_train, X_test, y_train, t_test, k):
    transformer = LMNN(k=k, learn_rate=1e-6, convergence_tol=0.1, verbose=True)
    transformer.fit(X_train, y_train)
    X_train_proj = transformer.transform(X_train)
    X_test_proj = transformer.transform(X_test)
    np.save('X_train_LMNN_' + str(k), X_train_proj)
    np.save('X_test_LMNN_' + str(k), X_test_proj)
    return X_train_proj, X_test_proj
Exemplo n.º 7
0
def test_convergence_simple_example(capsys):
    # LMNN should converge on this simple example, which it did not with
    # this issue: https://github.com/scikit-learn-contrib/metric-learn/issues/88
    X, y = make_classification(random_state=0)
    lmnn = LMNN(verbose=True)
    lmnn.fit(X, y)
    out, _ = capsys.readouterr()
    assert "LMNN converged with objective" in out
Exemplo n.º 8
0
    def process_lmnn(self, **option):
        '''Metric Learning algorithm: LMNN'''
        GeneExp = self.GeneExp_train
        Label = self.Label_train

        lmnn = LMNN(**option)
        lmnn.fit(GeneExp, Label)
        self.Trans['LMNN'] = lmnn.transformer()
Exemplo n.º 9
0
    def train_test(self, x_train, y_train, x_test=None, cuisines=None, k=15):
        torch.cuda.empty_cache()
        x_train = np.array(x_train)
        testing = x_test is not None

        if testing:
            x_tr = torch.tensor(x_train)
            y_tr = torch.tensor(y_train)
            x_val = torch.tensor(x_test)
            dist = self.get_dist(x_tr, x_val)
            y_pred = self.predict(dist, y_tr, k)
            ids = [cuisine.id for cuisine in cuisines]
            pred_cuisines = [
                self.dataset.id2cuisine[label] for label in y_pred
            ]
            self._write2csv(ids, pred_cuisines)
        else:
            shuffle_idx = torch.randperm(x_train.shape[0])
            x_train = torch.tensor(x_train).float()
            y_train = torch.tensor(y_train)
            x_train = x_train[shuffle_idx]
            y_train = y_train[shuffle_idx]
            x_val = x_train[35000:]
            x_tr = x_train[:35000]
            y_val = y_train[35000:]
            y_tr = y_train[:35000]

            use_DML = False

            if use_DML:
                x_val = x_train[5000:6000]
                x_tr = x_train[:5000]
                y_val = y_train[5000:6000]
                y_tr = y_train[:20000]
                x_tr, x_val = self.PCA(x_tr, x_val, 64)
                lmnn = LMNN(k=15, learn_rate=1e-6, min_iter=50, max_iter=100)
                lmnn.fit(x_tr.numpy(), y_tr.numpy())
                M = lmnn.get_mahalanobis_matrix()
                M = torch.tensor(M).float()
                n, d = x_val.shape
                m = x_tr.shape[0]
                x0 = x_tr.unsqueeze(1).expand(-1, n,
                                              -1).contiguous().view(-1, d)
                x1 = x_val.unsqueeze(0).expand(m, -1,
                                               -1).contiguous().view(-1, d)
                x = x0 - x1
                dist0 = torch.mm(M, x.t().contiguous())
                dists = dist0.t().contiguous() * x
                dist = dists.sum(1).view(m, n)
            else:
                x_tr, x_val = self.PCA(x_tr, x_val, 500)
                dist = self.get_dist(x_tr, x_val).cpu()

            for k in [1, 3, 5, 8, 10, 15, 20, 25, 30]:
                y_pred = self.predict(dist, y_tr, k)
                acc = (y_pred == y_val).sum().float().numpy() / y_val.shape[0]
                print("K=", k, "  acc=", acc)
        torch.cuda.empty_cache()
Exemplo n.º 10
0
  def test_lmnn(self):
    lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False)
    lmnn.fit(self.X, self.y)
    res_1 = lmnn.transform(self.X)

    lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False)
    res_2 = lmnn.fit_transform(self.X, self.y)

    assert_array_almost_equal(res_1, res_2)
  def test_lmnn(self):
    lmnn = LMNN(n_neighbors=5, learn_rate=1e-6, verbose=False)
    lmnn.fit(self.X, self.y)
    res_1 = lmnn.transform(self.X)

    lmnn = LMNN(n_neighbors=5, learn_rate=1e-6, verbose=False)
    res_2 = lmnn.fit_transform(self.X, self.y)

    assert_array_almost_equal(res_1, res_2)
Exemplo n.º 12
0
def LMNN_Metric(datamatrix, datalabel):
    Dis_Matrix = np.zeros((len(datalabel), len(datalabel)))
    lmnn = LMNN(k=5, learn_rate=1e-6)
    lmnn.fit(datamatrix, datalabel)
    metric_func = lmnn.get_metric()
    for i in range(len(datalabel)):
        for j in range(len(datalabel)):
            Dis_Matrix[i, j] = metric_func(datamatrix[i], datamatrix[j])
    return Dis_Matrix
def draw_knn_with_lmnn(k, metric):
    names = ['x', 'y', 'color']

    df = pd.DataFrame(mapped_colors, columns=names)
    # print(df.head())

    X = np.array(df.ix[:, 0:2])
    y = np.array(df['color'])

    lmnn = LMNN(k=5, learn_rate=1e-6)
    lmnn.fit(X, y)
    X_lmnn = lmnn.transform()

    X = X_lmnn

    # print(X)

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=42)

    if metric == 'mahalanobis':
        knn = KNeighborsClassifier(
            n_neighbors=k,
            metric=metric,
            metric_params={'V': np.cov(np.transpose(X))})
    else:
        knn = KNeighborsClassifier(n_neighbors=k, metric=metric)

    knn.fit(X_train, y_train)

    pred = knn.predict(X_test)

    err = 1 - accuracy_score(y_test, pred)
    print('\nThe error is ' + str(err * 100))

    h = .02

    cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])
    cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF'])

    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))
    Z = knn.predict(np.c_[xx.ravel(), yy.ravel()])

    Z = Z.reshape(xx.shape)
    plt.figure()
    plt.pcolormesh(xx, yy, Z, cmap=cmap_light)

    plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=20)
    plt.xlim(xx.min(), xx.max())
    plt.ylim(yy.min(), yy.max())
    plt.title("3-Class classification (k = %i)" % k)
Exemplo n.º 14
0
class GeoLMNN(neighbors.KNeighborsClassifier):
    def __init__(self, n_neighbors=3):
        super(GeoLMNN, self).__init__(n_neighbors=n_neighbors)
        self.lmnn = LMNN(n_neighbors)

    def fit(self, X, y):
        self.lmnn.fit(X, y)
        super(GeoLMNN, self).fit(self.lmnn.transform(X), y)

    def predict(self, X):
        y = super(GeoLMNN, self).predict(self.lmnn.transform(X))
        return y
class LMNNClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, k=3, pca=None, train=True, mu=0.5):
        self.k = k
        self.train = train
        self.pca = pca
        self.pca_trasform = None
        self.mu = mu
        self.lmnn = LMNN(k=k, use_pca=False, max_iter=10000, regularization=mu)

    def fit(self, x, y=None):
        n, d = x.shape

        if self.pca is not None:
            pca = PCA(n_components=self.pca)
            pca.fit(x)
            self.pca_trasform = pca.transform
            x = pca.transform(x)
        if self.train:
            self.lmnn.fit(x, y)
            self.knn = KNeighborsClassifier(
                n_neighbors=self.k,
                metric='mahalanobis',
                metric_params=dict(VI=self.lmnn.metric()),
                n_jobs=-1)
        else:
            self.knn = KNeighborsClassifier(n_neighbors=self.k)

        self.knn.fit(x, y)

        return self

    def predict(self, x, y=None):
        if self.pca_trasform is not None:
            x = self.pca_trasform(x)
        return self.knn.predict(x, y)

    def score(self, x, y=None):
        if self.pca_trasform is not None:
            x = self.pca_trasform(x)
        return self.knn.score(x, y)

    def set_params(self, **parameters):
        for parameter, value in parameters.items():
            setattr(self, parameter, value)
            if parameter == 'mu':
                setattr(self.lmnn, 'regularization', value)
        return self
Exemplo n.º 16
0
class LP:
    def __init__(self, lmnn=False, max_iter=1000, lm_num=200):
        # self.clf =  LabelPropagation(kernel='knn',max_iter=1000,n_jobs=10,n_neighbors=25)
        self.clf = LabelSpreading(kernel='knn',
                                  n_neighbors=25,
                                  max_iter=max_iter,
                                  alpha=0.2,
                                  n_jobs=-1)
        self.lmnn = lmnn
        self.lm_num = lm_num
        if lmnn:
            self.ml = LMNN(use_pca=False, max_iter=2000)

    def fit(self, X, y):
        if self.lmnn:
            nonzero_index = np.nonzero(y)
            index = random.sample(list(nonzero_index[0]), self.lm_num)
            X_ = X[index]
            y_ = y[index]
            print('ml fitting')
            self.ml.fit(X_, y_)
            print('transform')
            X = self.ml.transform(X)
        print('lp fitting')
        zero_index = np.nonzero(y == 0)
        negetive_index = np.nonzero(y == -1)
        positive_index = np.nonzero(y == 1)
        y[zero_index] = -1
        y[negetive_index] = 2
        print(zero_index[0].shape, negetive_index[0].shape,
              positive_index[0].shape)
        self.clf.fit(X, y)

    def predict(self, X):
        print('lp predict')
        if self.lmnn:
            X = self.ml.transform(X)
        y_pred = self.clf.predict(X)
        negative_index = np.nonzero(y_pred == -1)
        two_index = np.nonzero(y_pred == 2)
        y_pred[negative_index] = 0
        y_pred[two_index] = -1
        return y_pred
Exemplo n.º 17
0
def test_no_twice_same_objective(capsys):
  # test that the objective function never has twice the same value
  # see https://github.com/scikit-learn-contrib/metric-learn/issues/88
  X, y = make_classification(random_state=0)
  lmnn = LMNN(verbose=True)
  lmnn.fit(X, y)
  out, _ = capsys.readouterr()
  lines = re.split("\n+", out)
  # we get only objectives from each line:
  # the regexp matches a float that follows an integer (the iteration
  # number), and which is followed by a (signed) float (delta obj). It
  # matches for instance:
  # 3 **1113.7665747189938** -3.182774197440267 46431.0200999999999998e-06
  objectives = [re.search(r"\d* (?:(\d*.\d*))[ | -]\d*.\d*", s)
                for s in lines]
  objectives = [match.group(1) for match in objectives if match is not None]
  # we remove the last element because it can be equal to the penultimate
  # if the last gradient update is null
  assert len(objectives[:-1]) == len(set(objectives[:-1]))
def baseline_model(X_train,y_train,X_test,y_test):

    #dimension reduction
    feature_selection = LinearSVC(C=1, penalty="l1", dual=False)
    X_train_reduced = feature_selection.fit_transform(X_train, y_train)
    X_test_reduced = feature_selection.transform(X_test)

    #metrics learning
    ml = LMNN(k=4,min_iter=50,max_iter=1000, learn_rate=1e-7)
    ml.fit(X_train_reduced,y_train)
    X_train_new = ml.transform(X_train_reduced)
    X_test_new = ml.transform(X_test_reduced)

    neigh = KNeighborsClassifier(n_neighbors=4)
    neigh.fit(X_train_new, y_train)
    predicted = neigh.predict(X_test_new)

    #pickle.dump(ml, open('dist_metrics', 'w'))
    
    return predicted
def baseline_model(X_train, y_train, X_test, y_test):

    #dimension reduction
    feature_selection = LinearSVC(C=1, penalty="l1", dual=False)
    X_train_reduced = feature_selection.fit_transform(X_train, y_train)
    X_test_reduced = feature_selection.transform(X_test)

    #metrics learning
    ml = LMNN(k=4, min_iter=50, max_iter=1000, learn_rate=1e-7)
    ml.fit(X_train_reduced, y_train)
    X_train_new = ml.transform(X_train_reduced)
    X_test_new = ml.transform(X_test_reduced)

    neigh = KNeighborsClassifier(n_neighbors=4)
    neigh.fit(X_train_new, y_train)
    predicted = neigh.predict(X_test_new)

    #pickle.dump(ml, open('dist_metrics', 'w'))

    return predicted
Exemplo n.º 20
0
class KNNClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, k=1):
        self.k = k
        self.distanceEstimator = LMNN(k=k)

    def fit(self, X, y):
        #TODO msati3: Ideally, LMNN should expose fit_transform.
        self.distanceEstimator.fit(X, y)
        self.modelData = self.distanceEstimator.transform(X)
        self.modelLabels = y
        return self

    def transform(self, X):
        return self.distanceEstimator.transform(X)

    def predict(self, D):
        X = self.transform(D) #Pretransform so that euclidean metric suffices
        distances = distance.cdist(X, self.modelData,'sqeuclidean')
        topKIndexes = bn.argpartsort(distances, self.k)[:,:self.k]
        predictions = self.modelLabels[topKIndexes]
        return stats.mode(predictions, axis=1)[0]

    def score(self, X, y, fNormalize=True):
        return accuracy_score(self.predict(X), y, fNormalize)
def lmnn_fit(X_train, Y_train, X_test, Y_test, color_map):
    lmnn = LMNN(init='pca',
                k=3,
                learn_rate=5e-4,
                max_iter=500000,
                regularization=0.2)
    lmnn.fit(X_train, Y_train)
    X_train_transformed = lmnn.transform(X_train)
    if (X_train.shape[1] == 2):
        plt.figure()
        plt.scatter(X_train_transformed[:, 0],
                    X_train_transformed[:, 1],
                    c=color_map[Y_train],
                    s=2)
        plt.savefig("after_lmnn_transform_train.png", dpi=300)
    X_test_transformed = lmnn.transform(X_test)
    if (X_test.shape[1] == 2):
        plt.figure()
        plt.scatter(X_test_transformed[:, 0],
                    X_test_transformed[:, 1],
                    c=color_map[Y_test],
                    s=2)
        plt.savefig("after_lmnn_transform_test.png", dpi=300)
    return (X_train_transformed, X_test_transformed)
pca = PCA(original_train_features, M=500)
pca.fit()
pca_query_features = pca.project(query_features)
pca_gallery_features = pca.project(gallery_features)
compute_k_mean(num_of_clusters, pca_query_features, pca_gallery_features,
               gallery_labels)

# Compute LMNN (Large Margin Nearest Neighbour) Learning
print("\n-----LMNN------")
lmnn = LMNN(k=5,
            max_iter=20,
            use_pca=False,
            convergence_tol=1e-6,
            learn_rate=1e-6,
            verbose=True)
lmnn.fit(original_train_features, original_train_labels)
transformed_query_features = lmnn.transform(query_features)
transformed_gallery_features = lmnn.transform(gallery_features)
compute_k_mean(num_of_clusters, transformed_query_features,
               transformed_gallery_features, gallery_labels)

# Compute PCA_LMNN Learning
print("\n-----PCA_LMNN-----")
lmnn = LMNN(k=5,
            max_iter=20,
            use_pca=False,
            convergence_tol=1e-6,
            learn_rate=1e-6,
            verbose=True)
start_time = time.time()
lmnn.fit(pca.train_sample_projection, original_train_labels)
Exemplo n.º 23
0
def lmnn(x_train, y_train, x_test):
    lmnn = LMNN(max_iter=50, k=9, verbose=True)
    print("It is")
    lmnn.fit(x_train, y_train)
    print("done")
    return lmnn.transform(x_test)
def main(params):

    initialize_results_dir(params.get('results_dir'))
    backup_params(params, params.get('results_dir'))

    print('>>> loading data...')

    X_train, y_train, X_test, y_test = LoaderFactory().create(
        name=params.get('dataset'),
        root=params.get('dataset_dir'),
        random=True,
        seed=params.getint('split_seed'))()

    print('<<< data loaded')

    print('>>> computing psd matrix...')

    if params.get('algorithm') == 'identity':
        psd_matrix = np.identity(X_train.shape[1], dtype=X_train.dtype)

    elif params.get('algorithm') == 'nca':
        nca = NCA(init='auto',
                  verbose=True,
                  random_state=params.getint('algorithm_seed'))
        nca.fit(X_train, y_train)
        psd_matrix = nca.get_mahalanobis_matrix()

    elif params.get('algorithm') == 'lmnn':
        lmnn = LMNN(init='auto',
                    verbose=True,
                    random_state=params.getint('algorithm_seed'))
        lmnn.fit(X_train, y_train)
        psd_matrix = lmnn.get_mahalanobis_matrix()

    elif params.get('algorithm') == 'itml':
        itml = ITML_Supervised(verbose=True,
                               random_state=params.getint('algorithm_seed'))
        itml.fit(X_train, y_train)
        psd_matrix = itml.get_mahalanobis_matrix()

    elif params.get('algorithm') == 'lfda':

        lfda = LFDA()
        lfda.fit(X_train, y_train)
        psd_matrix = lfda.get_mahalanobis_matrix()

    elif params.get('algorithm') == 'arml':
        learner = TripleLearner(
            optimizer=params.get('optimizer'),
            optimizer_params={
                'lr': params.getfloat('lr'),
                'momentum': params.getfloat('momentum'),
                'weight_decay': params.getfloat('weight_decay'),
            },
            criterion=params.get('criterion'),
            criterion_params={'calibration': params.getfloat('calibration')},
            n_epochs=params.getint('n_epochs'),
            batch_size=params.getint('batch_size'),
            random_initialization=params.getboolean('random_initialization',
                                                    fallback=False),
            update_triple=params.getboolean('update_triple', fallback=False),
            device=params.get('device'),
            seed=params.getint('learner_seed'))

        psd_matrix = learner(X_train,
                             y_train,
                             n_candidate_mins=params.getint('n_candidate_mins',
                                                            fallback=1))

    else:
        raise Exception('unsupported algorithm')

    print('<<< psd matrix got')

    np.savetxt(os.path.join(params.get('results_dir'), 'psd_matrix.txt'),
               psd_matrix)
Exemplo n.º 25
0
print('Data Preparation Done', '\n')
#print(FSTrainData.max(axis=0) - FSTrainData.min(axis=0))

#print(len(FSTrainData[0]))
#print(len(FSTestData[0]))
#print(len(FSTestData))
#print(len(TestData))
#print(TrainData)
#print(type(TrainData))
#print(TrainLabels)
#print(type(TrainLabels))

if Method == 'LMNN':
    print("Method: LMNN", '\n')
    lmnn = LMNN(k=3, learn_rate=1e-6, verbose=False)
    x = lmnn.fit(FSTrainData, TrainLabels)
    TFSTestData = x.transform(FSTestData)
    print('Transformation Done', '\n')

elif Method == 'COV':
    print("Method: COV", '\n')
    cov = Covariance().fit(FSTrainData)
    TFSTestData = cov.transform(FSTestData)
    print('Transformation Done', '\n')

elif Method == 'ITML':
    print("Method: ITML", '\n')
    itml = ITML_Supervised(num_constraints=200, A0=None)
    x = itml.fit(FSTrainData, TrainLabels)
    TFSTestData = x.transform(FSTestData)
    print('Transformation Done', '\n')
Exemplo n.º 26
0
Result_of_acc_ave = np.zeros([len(datasets) * 2, len(classifiers)])
Result_of_acc_std = np.zeros([len(datasets) * 2, len(classifiers)])

for i in range(len(datasets)):
    print(datasets[i])
    new_path = os.path.join('.\data', datasets[i])
    Data_Origi, DataLabel, n_samples, n_attr, n_class = PF.Load_Data(new_path)
    #归一化处理
    scaler = MinMaxScaler()
    scaler.fit(Data_Origi)
    Data_Origi = scaler.transform(Data_Origi)
    for l in range(2):
        if l == 0:
            #度量学习
            lmnn = LMNN(k=5, learn_rate=1e-6)
            lmnn.fit(Data_Origi, DataLabel)
            Data_trans = lmnn.transform(Data_Origi)
        else:
            Data_trans = Data_Origi
        #同质化融合
        Dis_Matrix = PF.Calcu_Dis(Data_trans)
        CompareMatrix = PF.CompareNoiseLabel(Dis_Matrix, DataLabel)
        Cluster_Checked = PF.Affinity_propagatio_Modify(CompareMatrix)
        lap_ratio = PF.Count(Cluster_Checked, set_vlaue, n_samples)
        Result_of_Upper[i, l] = 1 - lap_ratio

        for j in range(len(classifiers)):
            print(classifiers[j])
            clf = classifiers[j]
            scores = cross_val_score(clf, Data_trans, DataLabel, cv=cv)
            Result_of_acc_ave[2 * i + l, j] = scores.mean()
Exemplo n.º 27
0
Arquivo: test.py Projeto: nihaoCC/DRP
from modshogun import LMNN as shogun_LMNN
from modshogun import RealFeatures, MulticlassLabels
import numpy as np
from metric_learn import LMNN
from sklearn.datasets import load_iris

iris_data = load_iris()
X = iris_data['data']
Y = iris_data['target']

lmnn = LMNN(k=5, learn_rate=1e-6)
lmnn.fit(X, Y, verbose=False)
vectors = vectorizer.transform(data)

print 'vectorizer is  ' ,vectors[0].todense()
itml=ITML()
arr2=copy(vectors.todense())
arr=np.zeros((vectors.shape[0],vectors.shape[1]))

for i in range(0,vectors.shape[0]):
    for j in range(0,vectors.shape[1]):
        arr[i,j]=arr2[i,j]

print 'arr .shape is  ',arr.shape
target=newsgroups_train.target
lab=[]
for i in target:
    lab.append(i)

lab=np.asarray(lab)
print 'lab is ',(lab)
print 'target is ',type(arr)
#C=itml.prepare_constraints(target,vectors.shape[0],200)

#itml.fit(arr,C,verbose=False)

lmnn = LMNN(k=20, learn_rate=1e-3,use_pca=True)
lmnn.fit(arr,target,verbose=False)

print 'Now doing LMNN'
l=lmnn.transformer()
np.save('LMNN transformer',l)
Exemplo n.º 29
0
        nonneg_train_x = train_x - train_x.min()
        nonneg_test_x = test_x - test_x.min()
        mnb.fit(nonneg_train_x, train_y)
        mnbrec.append( float((mnb.predict(nonneg_test_x) == test_y).sum())/ len(test_y) )

        bnb.fit(train_x, train_y)
        bnbrec.append( float((bnb.predict(test_x) == test_y).sum())/ len(test_y) )

        svm.fit(train_x, train_y)
        svmrec.append( float((svm.predict(test_x) == test_y).sum())/ len(test_y) )

        _ = PCA(n_components=20).fit(train_x)
        train_x = _.transform(train_x)
        test_x = _.transform(test_x)
        print train_x.shape
        L = lmnn.fit(train_x, train_y, verbose=True).L
        lmnnrec.append( knn(np.dot(train_x, L), train_y, np.dot(test_x, L), test_y, K=5) )

    print '\tSVM accuracy: {} = {}'.format(svmrec, np.mean(svmrec))
    print '\tLMNN accuracy: {} = {}'.format(lmnnrec, np.mean(lmnnrec))
    print '\tGaussianNB accuracy: {} = {}'.format(gnbrec, np.mean(gnbrec))
    print '\tMultinomiaNB accuracy: {} = {}'.format(mnbrec, np.mean(mnbrec))
    print '\tBernoulliNB accuracy: {} = {}'.format(bnbrec, np.mean(bnbrec))



#   lmnnavr.append(np.mean(lmnnrec))
#   gnbavr.append(np.mean(gnbrec))
#   svmavr.append(np.mean(svmrec))
#   svmavr = []
#   lmnnavr = []
            model.val_indices,
            batch_size=256,
            num_workers=16)

        tembeds = torch.from_numpy(tembeds).cuda()
        tlabels = torch.from_numpy(tlabels).cuda()
        vembeds = torch.from_numpy(vembeds).cuda()
        vlabels = torch.from_numpy(vlabels).cuda()

        # run LMNN
        if n > 1:
            lmnn = LMNN(k=get_k(tlabels.cpu().numpy()),
                        learn_rate=1e-4,
                        verbose=True,
                        max_iter=5000)
            lmnn.fit(tembeds.cpu().numpy(), tlabels.cpu().numpy())
            W_cuda = torch.from_numpy(lmnn.components_.T).cuda().float()

        #top1 knn
        top1_knn_before, top3_knn_before = run_simulation(
            tembeds, tlabels, vembeds, vlabels)

        if n > 1:
            # transform into LMNN found space
            tembeds = torch.matmul(tembeds, W_cuda)
            vembeds = torch.matmul(vembeds, W_cuda)
            # top1 lmnn
            top1_lmnn_before, top3_lmnn_before = run_simulation(
                tembeds, tlabels, vembeds, vlabels)
        else:
            top1_lmnn_before, top3_lmnn_before = 0, 0
Exemplo n.º 31
0
    stat = Identity(degree=4, cross=True)
    mode = stat.statistics([[mode[i, :]] for i in range(mode.shape[0])])
    print(mode.shape)
    label = [1 for i in range(16)] + [2 for i in range(16)
                                      ] + [3 for i in range(16)]
    print(label)
    from metric_learn import LMNN
    metric = LMNN(init='auto',
                  k=6,
                  min_iter=10000,
                  max_iter=50000,
                  convergence_tol=1e-6,
                  learn_rate=1e-10,
                  regularization=.5,
                  n_components=2)
    metric.fit(mode, label)
    L = metric.components_
    np.savez('L_all_3_cross_parameters.npz', L=L)

    L = np.load('L_all_3_cross_parameters.npz')['L']
    mode_lmnn = mode.dot(L.T)
    print(mode_lmnn.shape)
    import pylab as plt

    plt.figure()
    plt.plot(mode_lmnn[:16, 0],
             mode_lmnn[:16, 1],
             'k*',
             label='Patients with COPD')
    plt.plot(mode_lmnn[16:32, 0],
             mode_lmnn[16:32, 1],
                                       X_gallery_pca, camId_gallery, y_gallery,
                                       metric ='mahalanobis',
                                       parameters = M)

rank_accuracies_l_2.append(rank_accuracies)
mAP_l_2.append(mAP)
metric_l_2.append('Learnt Mahalanobis (Red. Set)')


# In[24]:


from metric_learn import LMNN

lmnn = LMNN(k=3, learn_rate=1e-6, max_iter=50)
lmnn.fit(X_train_pca, y_train)


M = lmnn.metric()

print ('Metric learnt')



rank_accuracies, mAP = evaluate_metric(X_query_pca, camId_query, y_query,
                                       X_gallery_pca, camId_gallery, y_gallery,
                                       metric ='mahalanobis',
                                       parameters = M)

rank_accuracies_l_2.append(rank_accuracies)
mAP_l_2.append(mAP)
Exemplo n.º 33
0
import numpy as np

X_train = np.array(X_train)
Y_train = np.array(Y_train)
X_test = np.array(X_test)
Y_test = np.array(Y_test)

## tuning here ...

scores = []
#for i in range(1,5):
#print("current k is ",i)
lmnn2 = LMNN(k=5, learn_rate=1e-6)  #.fit(X_train,Y_train)
print("here2")
print(lmnn2)
lmnn2 = lmnn2.fit(X_train, Y_train)
print("hi")
X_train2 = lmnn2.transform(X_train)
X_test2 = lmnn2.transform(X_test)
kn2 = KNeighborsClassifier(n_neighbors=40).fit(X_train2, Y_train)
predict = kn2.predict(X_test2)
lmnn_acc = accuracy_score(Y_test, predict)
print("lmnn accuracy is ", lmnn_acc)
#scores.append(lmnn_acc)
#print("the scores are ",scores)
#k=np.argmax(scores)+1

#%%using kernal pca
from scipy.spatial.distance import pdist, squareform
from scipy import exp
from scipy.linalg import eigh
 def test_lmnn(self):
   lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False)
   lmnn.fit(self.X, self.y)
   L = lmnn.transformer_
   assert_array_almost_equal(L.T.dot(L), lmnn.get_mahalanobis_matrix())
Exemplo n.º 35
0
    loader = DataLoader(dataset=trainset,
                        batch_size=args.batch_size,
                        shuffle=False,
                        num_workers=args.num_workers,
                        pin_memory=True)

    embs = []
    labels = []
    with torch.no_grad():
        for i, batch in tqdm(enumerate(loader, 1),
                             total=len(loader),
                             desc='embedding'):
            if torch.cuda.is_available():
                data, label = batch[0].cuda(), batch[1]
            else:
                data, label = batch

            data_emb = model.encoder(data)
            embs.append(data_emb)
            labels.append(label)
    embs = torch.cat(embs).cpu().numpy()
    labels = torch.cat(labels).numpy()
    lmnn = LMNN(verbose=True)
    print('fitting data....')
    lmnn.fit(embs, labels)
    print('fitting data finished.')
    directory = 'checkpoints/lmnn/'
    if not osp.exists(directory):
        os.makedirs(directory)
    joblib.dump(lmnn, osp.join(directory, '%s.pkl' % args.filename))
                  c=_tango_color(prototype_label), marker='.')
    p.axis('equal')


y = []
x = []
with open('segmentation.data') as f:
    for line in f:
        v = line.split(',')
        y.append(v[0])
        x.append(v[1:])
x = np.asarray(x, dtype='float64')
y = np.asarray(y)

lmnn = LMNN(k=5, learn_rate=1e-6)
lmnn.fit(x, y)
x_t = lmnn.transform(x)

p1 = plt.subplot(231)
p1.scatter(x_t[:, 0], x_t[:, 1], c=_to_tango_colors(y, 0))
p1.axis('equal')
p1.set_title('LMNN')

# GLVQ
glvq = GlvqModel()
glvq.fit(x, y)
p2 = plt.subplot(232)
p2.set_title('GLVQ')
plot(PCA().fit_transform(x), y, glvq.predict(x), glvq.w_, glvq.c_w_, p2)

# GRLVQ
 def test_lmnn(self):
   lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False)
   lmnn.fit(self.X, self.y)
   L = lmnn.components_
   assert_array_almost_equal(L.T.dot(L), lmnn.get_mahalanobis_matrix())
Exemplo n.º 38
0
Arquivo: test.py Projeto: ssttv/DRP
# from modshogun import LMNN as shogun_LMNN
# from modshogun import RealFeatures, MulticlassLabels
# import numpy as np
from metric_learn import LMNN
from sklearn.datasets import load_iris

iris_data = load_iris()
X = iris_data['data']
Y = iris_data['target']

lmnn = LMNN(k=5, learn_rate=1e-6)
lmnn.fit(X, Y, verbose=False)
 def test_lmnn(self):
   lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False)
   lmnn.fit(self.X, self.y)
   L = lmnn.transformer()
   assert_array_almost_equal(L.T.dot(L), lmnn.metric())
 def test_lmnn(self):
   lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False)
   lmnn.fit(self.X, self.y)
   L = lmnn.transformer_
   assert_array_almost_equal(L.T.dot(L), lmnn.metric())