Пример #1
0
def test_pipeline_equivalency():
    X = iris_data
    y = iris_target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

    # Use init='identity' to ensure reproducibility
    lmnn_params = dict(n_neighbors=3,
                       max_iter=10,
                       init='identity',
                       random_state=42)
    n_neighbors = 3

    lmnn = LargeMarginNearestNeighbor(**lmnn_params)
    lmnn.fit(X_train, y_train)

    lmnn_pipe = make_lmnn_pipeline(**lmnn_params)
    lmnn_pipe.fit(X_train, y_train)

    pipe_transformation = lmnn_pipe.named_steps.lmnn.components_
    assert_array_almost_equal(lmnn.components_, pipe_transformation)

    knn = KNeighborsClassifier(n_neighbors=n_neighbors)
    knn.fit(lmnn.transform(X_train), y_train)
    score = knn.score(lmnn.transform(X_test), y_test)

    score_pipe = lmnn_pipe.score(X_test, y_test)

    assert (score == score_pipe)
Пример #2
0
def test_callback():
    lmnn = LargeMarginNearestNeighbor(n_neighbors=3, callback='my_cb')
    assert_raise_message(ValueError, '`callback` is not callable.', lmnn.fit,
                         iris_data, iris_target)

    max_iter = 10

    def my_cb(transformation, n_iter):
        rem_iter = max_iter - n_iter
        print('{} iterations remaining...'.format(rem_iter))

    # assert that my_cb is called
    old_stdout = sys.stdout
    sys.stdout = StringIO()

    lmnn = LargeMarginNearestNeighbor(n_neighbors=3,
                                      callback=my_cb,
                                      max_iter=max_iter,
                                      verbose=1)
    try:
        lmnn.fit(iris_data, iris_target)
    finally:
        out = sys.stdout.getvalue()
        sys.stdout.close()
        sys.stdout = old_stdout

    # check output
    assert ('{} iterations remaining...'.format(max_iter - 1) in out)
Пример #3
0
def test_singleton_class():
    X = iris_data
    y = iris_target
    X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.3, stratify=y)

    # one singleton class
    singleton_class = 1
    ind_singleton, = np.where(y_tr == singleton_class)
    y_tr[ind_singleton] = 2
    y_tr[ind_singleton[0]] = singleton_class

    lmnn = LargeMarginNearestNeighbor(n_neighbors=3, max_iter=30)
    lmnn.fit(X_tr, y_tr)

    # One non-singleton class
    X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.3, stratify=y)
    ind_1, = np.where(y_tr == 1)
    ind_2, = np.where(y_tr == 2)
    y_tr[ind_1] = 0
    y_tr[ind_1[0]] = 1
    y_tr[ind_2] = 0
    y_tr[ind_2[0]] = 2

    lmnn = LargeMarginNearestNeighbor(n_neighbors=3, max_iter=30)
    assert_raise_message(
        ValueError, 'LargeMarginNearestNeighbor needs at least 2 '
        'non-singleton classes, got 1.', lmnn.fit, X_tr, y_tr)
Пример #4
0
def test_verbose():
    # assert there is proper output when verbose = 1
    old_stdout = sys.stdout
    sys.stdout = StringIO()

    lmnn = LargeMarginNearestNeighbor(n_neighbors=3, verbose=1)
    try:
        lmnn.fit(iris_data, iris_target)
    finally:
        out = sys.stdout.getvalue()
        sys.stdout.close()
        sys.stdout = old_stdout

    # check output
    assert ("[LargeMarginNearestNeighbor]" in out)
    assert ("Finding principal components" in out)
    assert ("Finding the target neighbors" in out)
    assert ("Computing static part of the gradient" in out)
    assert ("Finding principal components" in out)
    assert ("Training took" in out)

    # assert by default there is no output (verbose=0)
    old_stdout = sys.stdout
    sys.stdout = StringIO()

    lmnn = LargeMarginNearestNeighbor(n_neighbors=3)
    try:
        lmnn.fit(iris_data, iris_target)
    finally:
        out = sys.stdout.getvalue()
        sys.stdout.close()
        sys.stdout = old_stdout

    # check output
    assert (out == '')
Пример #5
0
def test_n_components():
    X = np.arange(12).reshape(4, 3)
    y = [1, 1, 2, 2]

    init = np.random.rand(X.shape[1] - 1, 3)

    # n_components = X.shape[1] != transformation.shape[0]
    n_components = X.shape[1]
    lmnn = LargeMarginNearestNeighbor(init=init, n_components=n_components)
    assert_raise_message(
        ValueError, 'The preferred embedding dimensionality '
        '`n_components` ({}) does not match '
        'the output dimensionality of the given '
        'linear transformation `init` ({})!'.format(n_components,
                                                    init.shape[0]), lmnn.fit,
        X, y)

    # n_components > X.shape[1]
    n_components = X.shape[1] + 2
    lmnn = LargeMarginNearestNeighbor(init=init, n_components=n_components)
    assert_raise_message(
        ValueError, 'The preferred embedding dimensionality '
        '`n_components` ({}) cannot be greater '
        'than the given data dimensionality ({})!'.format(
            n_components, X.shape[1]), lmnn.fit, X, y)

    # n_components < X.shape[1]
    lmnn = LargeMarginNearestNeighbor(n_components=2, init='identity')
    lmnn.fit(X, y)
Пример #6
0
def test_max_impostors():
    lmnn = LargeMarginNearestNeighbor(n_neighbors=3,
                                      max_impostors=1,
                                      impostor_store='list')
    lmnn.fit(iris_data, iris_target)

    lmnn = LargeMarginNearestNeighbor(n_neighbors=3,
                                      max_impostors=1,
                                      impostor_store='sparse')
    lmnn.fit(iris_data, iris_target)
Пример #7
0
def test_store_opt_result():
    X = iris_data
    y = iris_target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

    lmnn = LargeMarginNearestNeighbor(n_neighbors=3,
                                      max_iter=5,
                                      store_opt_result=True)
    lmnn.fit(X_train, y_train)
    transformation = lmnn.opt_result_.x
    assert (transformation.size == X.shape[1]**2)
Пример #8
0
def test_neighbors_params():
    from scipy.spatial.distance import hamming

    params = {'algorithm': 'brute', 'metric': hamming}
    lmnn = LargeMarginNearestNeighbor(n_neighbors=3, neighbors_params=params)
    lmnn.fit(iris_data, iris_target)
    components_hamming = lmnn.components_

    lmnn = LargeMarginNearestNeighbor(n_neighbors=3)
    lmnn.fit(iris_data, iris_target)
    components_euclidean = lmnn.components_

    assert (not np.allclose(components_hamming, components_euclidean))
Пример #9
0
def test_same_lmnn_parallel():
    X, y = datasets.make_classification(n_samples=30,
                                        n_features=5,
                                        n_redundant=0,
                                        random_state=0)
    X_train, X_test, y_train, y_test = train_test_split(X, y)

    lmnn = LargeMarginNearestNeighbor(n_neighbors=3)
    lmnn.fit(X_train, y_train)
    components = lmnn.components_

    lmnn.set_params(n_jobs=3)
    lmnn.fit(X_train, y_train)
    components_parallel = lmnn.components_

    assert_array_almost_equal(components, components_parallel)
Пример #10
0
def test_impostor_store():
    k = 3
    lmnn = LargeMarginNearestNeighbor(n_neighbors=k,
                                      init='identity',
                                      impostor_store='list')
    lmnn.fit(iris_data, iris_target)
    components_list = lmnn.components_

    lmnn = LargeMarginNearestNeighbor(n_neighbors=k,
                                      init='identity',
                                      impostor_store='sparse')
    lmnn.fit(iris_data, iris_target)
    components_sparse = lmnn.components_

    assert_array_almost_equal(components_list,
                              components_sparse,
                              err_msg='Toggling `impostor_store` results in '
                              'a different solution.')
Пример #11
0
def test_neighbors_iris():
    # Sanity checks on the iris dataset
    # Puts three points of each label in the plane and performs a
    # nearest neighbor query on points near the decision boundary.

    lmnn = LargeMarginNearestNeighbor(n_neighbors=1)
    lmnn.fit(iris_data, iris_target)
    knn = KNeighborsClassifier(n_neighbors=lmnn.n_neighbors_)
    LX = lmnn.transform(iris_data)
    knn.fit(LX, iris_target)
    y_pred = knn.predict(LX)

    assert_array_equal(y_pred, iris_target)

    lmnn.set_params(n_neighbors=9)
    lmnn.fit(iris_data, iris_target)
    knn = KNeighborsClassifier(n_neighbors=lmnn.n_neighbors_)
    knn.fit(LX, iris_target)

    assert (knn.score(LX, iris_target) > 0.95)
Пример #12
0
def test_warm_start_validation():
    X, y = datasets.make_classification(n_samples=30,
                                        n_features=5,
                                        n_classes=4,
                                        n_redundant=0,
                                        n_informative=5,
                                        random_state=0)

    lmnn = LargeMarginNearestNeighbor(warm_start=True, max_iter=5)
    lmnn.fit(X, y)

    X_less_features, y = \
        datasets.make_classification(n_samples=30, n_features=4, n_classes=4,
                                     n_redundant=0, n_informative=4,
                                     random_state=0)
    assert_raise_message(
        ValueError, 'The new inputs dimensionality ({}) does not '
        'match the input dimensionality of the '
        'previously learned transformation ({}).'.format(
            X_less_features.shape[1], lmnn.components_.shape[1]), lmnn.fit,
        X_less_features, y)
Пример #13
0
def test_random_state():
    """Assert that when having more than max_impostors (forcing sampling),
    the same impostors will be sampled given the same random_state and
    different impostors will be sampled given a different random_state
    leading to a different transformation"""

    X = iris_data
    y = iris_target

    # Use init='identity' to ensure reproducibility
    params = {
        'n_neighbors': 3,
        'max_impostors': 5,
        'random_state': 1,
        'max_iter': 10,
        'init': 'identity'
    }

    lmnn = LargeMarginNearestNeighbor(**params)
    lmnn.fit(X, y)
    transformation_1 = lmnn.components_

    lmnn = LargeMarginNearestNeighbor(**params)
    lmnn.fit(X, y)
    transformation_2 = lmnn.components_

    # This assertion fails on 32bit systems if init='pca'
    assert_allclose(transformation_1, transformation_2)

    params['random_state'] = 2
    lmnn = LargeMarginNearestNeighbor(**params)
    lmnn.fit(X, y)
    transformation_3 = lmnn.components_

    assert (not np.allclose(transformation_2, transformation_3))
Пример #14
0
def test_neighbors_digits():
    # Sanity check on the digits dataset
    # the 'brute' algorithm has been observed to fail if the input
    # dtype is uint8 due to overflow in distance calculations.

    X = digits_data.astype('uint8')
    y = digits_target
    n_samples, n_features = X.shape
    train_test_boundary = int(n_samples * 0.8)
    train = np.arange(0, train_test_boundary)
    test = np.arange(train_test_boundary, n_samples)
    X_train, y_train, X_test, y_test = X[train], y[train], X[test], y[test]

    k = 1
    lmnn = LargeMarginNearestNeighbor(n_neighbors=k, max_iter=30)
    lmnn.fit(X_train, y_train)
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(lmnn.transform(X_train), y_train)
    score_uint8 = knn.score(lmnn.transform(X_test), y_test)

    knn.fit(lmnn.transform(X_train.astype(float)), y_train)
    score_float = knn.score(lmnn.transform(X_test.astype(float)), y_test)

    assert (score_uint8 == score_float)
Пример #15
0
def test_warm_start_effectiveness():
    # A 1-iteration second fit on same data should give almost same result
    # with warm starting, and quite different result without warm starting.

    X, y = datasets.make_classification(n_samples=30,
                                        n_features=5,
                                        n_redundant=0,
                                        random_state=0)
    X_train, X_test, y_train, y_test = train_test_split(X, y)
    n_iter = 10

    lmnn_warm = LargeMarginNearestNeighbor(n_neighbors=3,
                                           warm_start=True,
                                           max_iter=n_iter,
                                           random_state=0)
    lmnn_warm.fit(X_train, y_train)
    transformation_warm = lmnn_warm.components_
    lmnn_warm.max_iter = 1
    lmnn_warm.fit(X_train, y_train)
    transformation_warm_plus_one = lmnn_warm.components_

    lmnn_cold = LargeMarginNearestNeighbor(n_neighbors=3,
                                           warm_start=False,
                                           max_iter=n_iter,
                                           random_state=0)
    lmnn_cold.fit(X_train, y_train)
    transformation_cold = lmnn_cold.components_
    lmnn_cold.max_iter = 1
    lmnn_cold.fit(X_train, y_train)
    transformation_cold_plus_one = lmnn_cold.components_

    diff_warm = np.sum(
        np.abs(transformation_warm_plus_one - transformation_warm))
    diff_cold = np.sum(
        np.abs(transformation_cold_plus_one - transformation_cold))

    assert_true(
        diff_warm < 2.0,
        "Transformer changed significantly after one iteration even "
        "though it was warm-started.")

    assert_true(
        diff_cold > diff_warm,
        "Cold-started transformer changed less significantly than "
        "warm-started transformer after one iteration.")
Пример #16
0
def test_init_transformation():
    X, y = datasets.make_classification(n_samples=30,
                                        n_features=5,
                                        n_redundant=0,
                                        random_state=0)
    X_train, X_test, y_train, y_test = train_test_split(X, y)

    # Initialize with identity
    lmnn = LargeMarginNearestNeighbor(n_neighbors=3, init='identity')
    lmnn.fit(X_train, y_train)

    # Initialize with PCA
    lmnn_pca = LargeMarginNearestNeighbor(n_neighbors=3, init='pca')
    lmnn_pca.fit(X_train, y_train)

    # Initialize with a transformation given by the user
    init = np.random.rand(X.shape[1], X.shape[1])
    lmnn = LargeMarginNearestNeighbor(n_neighbors=3, init=init)
    lmnn.fit(X_train, y_train)

    # init.shape[1] must match X.shape[1]
    init = np.random.rand(X.shape[1], X.shape[1] + 1)
    lmnn = LargeMarginNearestNeighbor(n_neighbors=3, init=init)
    assert_raise_message(
        ValueError, 'The input dimensionality ({}) of the given '
        'linear transformation `init` must match the '
        'dimensionality of the given inputs `X` ({}).'.format(
            init.shape[1], X.shape[1]), lmnn.fit, X_train, y_train)

    # init.shape[0] must be <= init.shape[1]
    init = np.random.rand(X.shape[1] + 1, X.shape[1])
    lmnn = LargeMarginNearestNeighbor(n_neighbors=3, init=init)
    assert_raise_message(
        ValueError, 'The output dimensionality ({}) of the given '
        'linear transformation `init` cannot be '
        'greater than its input dimensionality ({}).'.format(
            init.shape[0], init.shape[1]), lmnn.fit, X_train, y_train)

    # init.shape[0] must match n_components
    init = np.random.rand(X.shape[1], X.shape[1])
    n_components = X.shape[1] - 2
    lmnn = LargeMarginNearestNeighbor(n_neighbors=3,
                                      init=init,
                                      n_components=n_components)
    assert_raise_message(
        ValueError, 'The preferred embedding dimensionality '
        '`n_components` ({}) does not match '
        'the output dimensionality of the given '
        'linear transformation `init` ({})!'.format(n_components,
                                                    init.shape[0]), lmnn.fit,
        X_train, y_train)
{\displaystyle \xi _{ijl}\geq 0}\xi _{{ijl}}\geq 0
{\displaystyle \mathbf {M} \succeq 0}{\mathbf  {M}}\succeq 0

For this coursework, PyLMNN package is used to compute LMNN for metric learning:https://pypi.org/project/PyLMNN/
"""

# need pip install pylmnn
from pylmnn import LargeMarginNearestNeighbor as LMNN
# Set up the hyperparameters
k_train, n_components, max_iter = 5, 25, 1000

# Instantiate the metric learner
lmnn = LMNN(n_neighbors=k_train, max_iter=max_iter, n_components= n_components)

# Train the metric learner
lmnn_original = lmnn.fit(original_train_list, Y_train)
lmnn_test = lmnn_original.transform(original_test_list)

lmnn_test= lmnn_test.T
print(lmnn_test.shape)

rank_k = []
for i in range(1,lmnn_test.shape[1]):
    rank_k.append(i)
    
#initialise maP and accuracy scores
avg_prec = 0
rank1_prec = []
rank10_prec = []
for query_index in range(0,lmnn_test.shape[1]):
    query_image = lmnn_test[:, query_index]
Пример #18
0
csv = np.genfromtxt("data/numerical_train.csv", delimiter=',')
csv_test = np.genfromtxt("data/numerical_test.csv", delimiter=',')
n, d = csv.shape

X_train = csv[:, :d - 1]
y_train = csv[:, -1]

X_test = csv_test[:, :d - 1]
y_test = csv_test[:, -1]

k_train, n_components, max_iter = 7, d - 1, 180

lmnn = LMNN(n_neighbors=k_train, max_iter=max_iter, n_components=n_components)

print('learning the metric...')

# Train the metric learner
lmnn.fit(X_train, y_train)

X_train_transformed = lmnn.transform(X_train)
X_test_transformed = lmnn.transform(X_test)

pickle.dump(X_train_transformed,
            open("data/numerical_train_transformed.pkl", 'wb'))
pickle.dump(y_train, open("data/numerical_train_labels.pkl", 'wb'))
pickle.dump(X_test_transformed,
            open("data/numerical_test_transformed.pkl", 'wb'))
pickle.dump(y_test, open("data/numerical_test_labels.pkl", 'wb'))
pickle.dump(lmnn, open("data/lmnn.pkl", 'wb'))

print('done!')
Пример #19
0
acc1 = []
acc2 = []
acc3 = []
acc4 = []
T = []
T1 = []
T2 = []
T3 = []
T4 = []

for k in [9, 11, 12, 13, 14, 16, 17, 18, 19, 21, 22, 23, 24, 26, 27, 28, 29]:
    print('Running K={} ... ... '.format(k))

    t0 = time.time()
    lmnn = LMNN(n_neighbors=k, max_iter=200, n_components=x.shape[1])
    lmnn.fit(x_train, y_train)
    x_train_ = lmnn.transform(x_train)
    x_test_ = lmnn.transform(x_test)
    t1 = time.time()
    T.append(t1 - t0)
    print('LMNN Cost:', t1 - t0)

    knn = KNeighborsClassifier(n_neighbors=k,
                               weights='distance',
                               metric='cosine',
                               algorithm='brute')
    knn.fit(x_train_, y_train)
    lmnn_acc = knn.score(x_test_, y_test)
    acc1.append(lmnn_acc)
    t2 = time.time()
    T1.append(t2 - t1)