Пример #1
0
def test_n_neighbors():
    X = np.arange(12).reshape(4, 3)
    y = [1, 1, 2, 2]

    lmnn = LargeMarginNearestNeighbor(n_neighbors=2)
    assert_warns_message(
        UserWarning, '`n_neighbors` (=2) is not less than the number of '
        'samples in the smallest non-singleton class (=2). '
        '`n_neighbors_` will be set to 1 for estimation.', lmnn.fit, X, y)
Пример #2
0
def test_neighbors_params():
    from scipy.spatial.distance import hamming

    params = {'algorithm': 'brute', 'metric': hamming}
    lmnn = LargeMarginNearestNeighbor(n_neighbors=3, neighbors_params=params)
    lmnn.fit(iris_data, iris_target)
    components_hamming = lmnn.components_

    lmnn = LargeMarginNearestNeighbor(n_neighbors=3)
    lmnn.fit(iris_data, iris_target)
    components_euclidean = lmnn.components_

    assert (not np.allclose(components_hamming, components_euclidean))
Пример #3
0
def test_warm_start_validation():
    X, y = datasets.make_classification(n_samples=30,
                                        n_features=5,
                                        n_classes=4,
                                        n_redundant=0,
                                        n_informative=5,
                                        random_state=0)

    lmnn = LargeMarginNearestNeighbor(warm_start=True, max_iter=5)
    lmnn.fit(X, y)

    X_less_features, y = \
        datasets.make_classification(n_samples=30, n_features=4, n_classes=4,
                                     n_redundant=0, n_informative=4,
                                     random_state=0)
    assert_raise_message(
        ValueError, 'The new inputs dimensionality ({}) does not '
        'match the input dimensionality of the '
        'previously learned transformation ({}).'.format(
            X_less_features.shape[1], lmnn.components_.shape[1]), lmnn.fit,
        X_less_features, y)
Пример #4
0
def test_pipeline_equivalency():
    X = iris_data
    y = iris_target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

    # Use init='identity' to ensure reproducibility
    lmnn_params = dict(n_neighbors=3,
                       max_iter=10,
                       init='identity',
                       random_state=42)
    n_neighbors = 3

    lmnn = LargeMarginNearestNeighbor(**lmnn_params)
    lmnn.fit(X_train, y_train)

    lmnn_pipe = make_lmnn_pipeline(**lmnn_params)
    lmnn_pipe.fit(X_train, y_train)

    pipe_transformation = lmnn_pipe.named_steps.lmnn.components_
    assert_array_almost_equal(lmnn.components_, pipe_transformation)

    knn = KNeighborsClassifier(n_neighbors=n_neighbors)
    knn.fit(lmnn.transform(X_train), y_train)
    score = knn.score(lmnn.transform(X_test), y_test)

    score_pipe = lmnn_pipe.score(X_test, y_test)

    assert (score == score_pipe)
Пример #5
0
def test_callback():
    lmnn = LargeMarginNearestNeighbor(n_neighbors=3, callback='my_cb')
    assert_raise_message(ValueError, '`callback` is not callable.', lmnn.fit,
                         iris_data, iris_target)

    max_iter = 10

    def my_cb(transformation, n_iter):
        rem_iter = max_iter - n_iter
        print('{} iterations remaining...'.format(rem_iter))

    # assert that my_cb is called
    old_stdout = sys.stdout
    sys.stdout = StringIO()

    lmnn = LargeMarginNearestNeighbor(n_neighbors=3,
                                      callback=my_cb,
                                      max_iter=max_iter,
                                      verbose=1)
    try:
        lmnn.fit(iris_data, iris_target)
    finally:
        out = sys.stdout.getvalue()
        sys.stdout.close()
        sys.stdout = old_stdout

    # check output
    assert ('{} iterations remaining...'.format(max_iter - 1) in out)
Пример #6
0
def test_singleton_class():
    X = iris_data
    y = iris_target
    X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.3, stratify=y)

    # one singleton class
    singleton_class = 1
    ind_singleton, = np.where(y_tr == singleton_class)
    y_tr[ind_singleton] = 2
    y_tr[ind_singleton[0]] = singleton_class

    lmnn = LargeMarginNearestNeighbor(n_neighbors=3, max_iter=30)
    lmnn.fit(X_tr, y_tr)

    # One non-singleton class
    X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.3, stratify=y)
    ind_1, = np.where(y_tr == 1)
    ind_2, = np.where(y_tr == 2)
    y_tr[ind_1] = 0
    y_tr[ind_1[0]] = 1
    y_tr[ind_2] = 0
    y_tr[ind_2[0]] = 2

    lmnn = LargeMarginNearestNeighbor(n_neighbors=3, max_iter=30)
    assert_raise_message(
        ValueError, 'LargeMarginNearestNeighbor needs at least 2 '
        'non-singleton classes, got 1.', lmnn.fit, X_tr, y_tr)
Пример #7
0
def test_same_lmnn_parallel():
    X, y = datasets.make_classification(n_samples=30,
                                        n_features=5,
                                        n_redundant=0,
                                        random_state=0)
    X_train, X_test, y_train, y_test = train_test_split(X, y)

    lmnn = LargeMarginNearestNeighbor(n_neighbors=3)
    lmnn.fit(X_train, y_train)
    components = lmnn.components_

    lmnn.set_params(n_jobs=3)
    lmnn.fit(X_train, y_train)
    components_parallel = lmnn.components_

    assert_array_almost_equal(components, components_parallel)
Пример #8
0
def test_neighbors_iris():
    # Sanity checks on the iris dataset
    # Puts three points of each label in the plane and performs a
    # nearest neighbor query on points near the decision boundary.

    lmnn = LargeMarginNearestNeighbor(n_neighbors=1)
    lmnn.fit(iris_data, iris_target)
    knn = KNeighborsClassifier(n_neighbors=lmnn.n_neighbors_)
    LX = lmnn.transform(iris_data)
    knn.fit(LX, iris_target)
    y_pred = knn.predict(LX)

    assert_array_equal(y_pred, iris_target)

    lmnn.set_params(n_neighbors=9)
    lmnn.fit(iris_data, iris_target)
    knn = KNeighborsClassifier(n_neighbors=lmnn.n_neighbors_)
    knn.fit(LX, iris_target)

    assert (knn.score(LX, iris_target) > 0.95)
Пример #9
0
def test_impostor_store():
    k = 3
    lmnn = LargeMarginNearestNeighbor(n_neighbors=k,
                                      init='identity',
                                      impostor_store='list')
    lmnn.fit(iris_data, iris_target)
    components_list = lmnn.components_

    lmnn = LargeMarginNearestNeighbor(n_neighbors=k,
                                      init='identity',
                                      impostor_store='sparse')
    lmnn.fit(iris_data, iris_target)
    components_sparse = lmnn.components_

    assert_array_almost_equal(components_list,
                              components_sparse,
                              err_msg='Toggling `impostor_store` results in '
                              'a different solution.')
Пример #10
0
def test_neighbors_digits():
    # Sanity check on the digits dataset
    # the 'brute' algorithm has been observed to fail if the input
    # dtype is uint8 due to overflow in distance calculations.

    X = digits_data.astype('uint8')
    y = digits_target
    n_samples, n_features = X.shape
    train_test_boundary = int(n_samples * 0.8)
    train = np.arange(0, train_test_boundary)
    test = np.arange(train_test_boundary, n_samples)
    X_train, y_train, X_test, y_test = X[train], y[train], X[test], y[test]

    k = 1
    lmnn = LargeMarginNearestNeighbor(n_neighbors=k, max_iter=30)
    lmnn.fit(X_train, y_train)
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(lmnn.transform(X_train), y_train)
    score_uint8 = knn.score(lmnn.transform(X_test), y_test)

    knn.fit(lmnn.transform(X_train.astype(float)), y_train)
    score_float = knn.score(lmnn.transform(X_test.astype(float)), y_test)

    assert (score_uint8 == score_float)
Пример #11
0
def test_convergence_warning():
    lmnn = LargeMarginNearestNeighbor(n_neighbors=3, max_iter=2, verbose=1)
    cls_name = lmnn.__class__.__name__
    assert_warns_message(ConvergenceWarning,
                         '[{}] LMNN did not converge'.format(cls_name),
                         lmnn.fit, iris_data, iris_target)
Пример #12
0
def test_random_state():
    """Assert that when having more than max_impostors (forcing sampling),
    the same impostors will be sampled given the same random_state and
    different impostors will be sampled given a different random_state
    leading to a different transformation"""

    X = iris_data
    y = iris_target

    # Use init='identity' to ensure reproducibility
    params = {
        'n_neighbors': 3,
        'max_impostors': 5,
        'random_state': 1,
        'max_iter': 10,
        'init': 'identity'
    }

    lmnn = LargeMarginNearestNeighbor(**params)
    lmnn.fit(X, y)
    transformation_1 = lmnn.components_

    lmnn = LargeMarginNearestNeighbor(**params)
    lmnn.fit(X, y)
    transformation_2 = lmnn.components_

    # This assertion fails on 32bit systems if init='pca'
    assert_allclose(transformation_1, transformation_2)

    params['random_state'] = 2
    lmnn = LargeMarginNearestNeighbor(**params)
    lmnn.fit(X, y)
    transformation_3 = lmnn.components_

    assert (not np.allclose(transformation_2, transformation_3))
{\displaystyle \min _{\mathbf {M} }\sum _{i,j\in N_{i}}d({\vec {x}}_{i},{\vec {x}}_{j})+\lambda \sum _{i,j,l}\xi _{ijl}}
{\displaystyle \forall _{i,j\in N_{i},l,y_{l}\neq y_{i}}}\forall _{{i,j\in N_{i},l,y_{l}\neq y_{i}}}
{\displaystyle d({\vec {x}}_{i},{\vec {x}}_{j})+1-d({\vec {x}}_{i},{\vec {x}}_{l})\leq \xi _{ijl}}{\displaystyle d({\vec {x}}_{i},{\vec {x}}_{j})+1-d({\vec {x}}_{i},{\vec {x}}_{l})\leq \xi _{ijl}}
{\displaystyle \xi _{ijl}\geq 0}\xi _{{ijl}}\geq 0
{\displaystyle \mathbf {M} \succeq 0}{\mathbf  {M}}\succeq 0

For this coursework, PyLMNN package is used to compute LMNN for metric learning:https://pypi.org/project/PyLMNN/
"""

# need pip install pylmnn
from pylmnn import LargeMarginNearestNeighbor as LMNN
# Set up the hyperparameters
k_train, n_components, max_iter = 5, 25, 1000

# Instantiate the metric learner
lmnn = LMNN(n_neighbors=k_train, max_iter=max_iter, n_components= n_components)

# Train the metric learner
lmnn_original = lmnn.fit(original_train_list, Y_train)
lmnn_test = lmnn_original.transform(original_test_list)

lmnn_test= lmnn_test.T
print(lmnn_test.shape)

rank_k = []
for i in range(1,lmnn_test.shape[1]):
    rank_k.append(i)
    
#initialise maP and accuracy scores
avg_prec = 0
rank1_prec = []
Пример #14
0
def test_warm_start_effectiveness():
    # A 1-iteration second fit on same data should give almost same result
    # with warm starting, and quite different result without warm starting.

    X, y = datasets.make_classification(n_samples=30,
                                        n_features=5,
                                        n_redundant=0,
                                        random_state=0)
    X_train, X_test, y_train, y_test = train_test_split(X, y)
    n_iter = 10

    lmnn_warm = LargeMarginNearestNeighbor(n_neighbors=3,
                                           warm_start=True,
                                           max_iter=n_iter,
                                           random_state=0)
    lmnn_warm.fit(X_train, y_train)
    transformation_warm = lmnn_warm.components_
    lmnn_warm.max_iter = 1
    lmnn_warm.fit(X_train, y_train)
    transformation_warm_plus_one = lmnn_warm.components_

    lmnn_cold = LargeMarginNearestNeighbor(n_neighbors=3,
                                           warm_start=False,
                                           max_iter=n_iter,
                                           random_state=0)
    lmnn_cold.fit(X_train, y_train)
    transformation_cold = lmnn_cold.components_
    lmnn_cold.max_iter = 1
    lmnn_cold.fit(X_train, y_train)
    transformation_cold_plus_one = lmnn_cold.components_

    diff_warm = np.sum(
        np.abs(transformation_warm_plus_one - transformation_warm))
    diff_cold = np.sum(
        np.abs(transformation_cold_plus_one - transformation_cold))

    assert (diff_warm < 2.0,
            "Transformer changed significantly after one iteration even "
            "though it was warm-started.")

    assert (diff_cold > diff_warm,
            "Cold-started transformer changed less significantly than "
            "warm-started transformer after one iteration.")
Пример #15
0
# embedding
loreal_data = np.load('/export/home//loreal_135_classification/em_training.npz')
X_train, y_train = loreal_data['X'], loreal_data['y']

loreal_data = np.load('/export/home//loreal_135_classification/em_test.npz')
X_test, y_test = loreal_data['X'], loreal_data['y']
'''

knn = KNeighborsClassifier(n_neighbors=10)

# Train with no transformation (euclidean metric)
knn.fit(X_train, y_train)

# Test with euclidean metric
acc = knn.score(X_test, y_test)

print('KNN accuracy on test set: {}'.format(acc))


# LMNN is no longer a classifier but a transformer
lmnn = LargeMarginNearestNeighbor(n_neighbors=10, verbose=1, max_iter=300)
lmnn.fit(X_train, y_train)

# Train with transformation learned by LMNN
knn.fit(lmnn.transform(X_train), y_train)

# Test with transformation learned by LMNN
acc = knn.score(lmnn.transform(X_test), y_test)

print('LMNN accuracy on test set: {}'.format(acc))
Пример #16
0
def test_init_transformation():
    X, y = datasets.make_classification(n_samples=30,
                                        n_features=5,
                                        n_redundant=0,
                                        random_state=0)
    X_train, X_test, y_train, y_test = train_test_split(X, y)

    # Initialize with identity
    lmnn = LargeMarginNearestNeighbor(n_neighbors=3, init='identity')
    lmnn.fit(X_train, y_train)

    # Initialize with PCA
    lmnn_pca = LargeMarginNearestNeighbor(n_neighbors=3, init='pca')
    lmnn_pca.fit(X_train, y_train)

    # Initialize with a transformation given by the user
    init = np.random.rand(X.shape[1], X.shape[1])
    lmnn = LargeMarginNearestNeighbor(n_neighbors=3, init=init)
    lmnn.fit(X_train, y_train)

    # init.shape[1] must match X.shape[1]
    init = np.random.rand(X.shape[1], X.shape[1] + 1)
    lmnn = LargeMarginNearestNeighbor(n_neighbors=3, init=init)
    assert_raise_message(
        ValueError, 'The input dimensionality ({}) of the given '
        'linear transformation `init` must match the '
        'dimensionality of the given inputs `X` ({}).'.format(
            init.shape[1], X.shape[1]), lmnn.fit, X_train, y_train)

    # init.shape[0] must be <= init.shape[1]
    init = np.random.rand(X.shape[1] + 1, X.shape[1])
    lmnn = LargeMarginNearestNeighbor(n_neighbors=3, init=init)
    assert_raise_message(
        ValueError, 'The output dimensionality ({}) of the given '
        'linear transformation `init` cannot be '
        'greater than its input dimensionality ({}).'.format(
            init.shape[0], init.shape[1]), lmnn.fit, X_train, y_train)

    # init.shape[0] must match n_components
    init = np.random.rand(X.shape[1], X.shape[1])
    n_components = X.shape[1] - 2
    lmnn = LargeMarginNearestNeighbor(n_neighbors=3,
                                      init=init,
                                      n_components=n_components)
    assert_raise_message(
        ValueError, 'The preferred embedding dimensionality '
        '`n_components` ({}) does not match '
        'the output dimensionality of the given '
        'linear transformation `init` ({})!'.format(n_components,
                                                    init.shape[0]), lmnn.fit,
        X_train, y_train)
Пример #17
0
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris

from pylmnn import LargeMarginNearestNeighbor as LMNN


# Load a data set
X, y = load_iris(return_X_y=True)

# Split in training and testing set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.7, stratify=y, random_state=42)

# Set up the hyperparameters
k_train, k_test, n_components, max_iter = 3, 3, X.shape[1], 180

# Instantiate the metric learner
lmnn = LMNN(n_neighbors=k_train, max_iter=max_iter, n_components=n_components)

# Train the metric learner
lmnn.fit(X_train, y_train)

# Fit the nearest neighbors classifier
knn = KNeighborsClassifier(n_neighbors=k_test)
knn.fit(lmnn.transform(X_train), y_train)

# Compute the k-nearest neighbor test accuracy after applying the learned transformation
lmnn_acc = knn.score(lmnn.transform(X_test), y_test)
print('LMNN accuracy on test set of {} points: {:.4f}'.format(X_test.shape[0], lmnn_acc))
Пример #18
0
import numpy as np
from pylmnn import LargeMarginNearestNeighbor as LMNN

csv = np.genfromtxt("data/numerical_train.csv", delimiter=',')
csv_test = np.genfromtxt("data/numerical_test.csv", delimiter=',')
n, d = csv.shape

X_train = csv[:, :d - 1]
y_train = csv[:, -1]

X_test = csv_test[:, :d - 1]
y_test = csv_test[:, -1]

k_train, n_components, max_iter = 7, d - 1, 180

lmnn = LMNN(n_neighbors=k_train, max_iter=max_iter, n_components=n_components)

print('learning the metric...')

# Train the metric learner
lmnn.fit(X_train, y_train)

X_train_transformed = lmnn.transform(X_train)
X_test_transformed = lmnn.transform(X_test)

pickle.dump(X_train_transformed,
            open("data/numerical_train_transformed.pkl", 'wb'))
pickle.dump(y_train, open("data/numerical_train_labels.pkl", 'wb'))
pickle.dump(X_test_transformed,
            open("data/numerical_test_transformed.pkl", 'wb'))
pickle.dump(y_test, open("data/numerical_test_labels.pkl", 'wb'))
Пример #19
0
acc1 = []
acc2 = []
acc3 = []
acc4 = []
T = []
T1 = []
T2 = []
T3 = []
T4 = []

for k in [9, 11, 12, 13, 14, 16, 17, 18, 19, 21, 22, 23, 24, 26, 27, 28, 29]:
    print('Running K={} ... ... '.format(k))

    t0 = time.time()
    lmnn = LMNN(n_neighbors=k, max_iter=200, n_components=x.shape[1])
    lmnn.fit(x_train, y_train)
    x_train_ = lmnn.transform(x_train)
    x_test_ = lmnn.transform(x_test)
    t1 = time.time()
    T.append(t1 - t0)
    print('LMNN Cost:', t1 - t0)

    knn = KNeighborsClassifier(n_neighbors=k,
                               weights='distance',
                               metric='cosine',
                               algorithm='brute')
    knn.fit(x_train_, y_train)
    lmnn_acc = knn.score(x_test_, y_test)
    acc1.append(lmnn_acc)
    t2 = time.time()