Пример #1
0
def test_singleton_class():
    X = iris_data
    y = iris_target

    # one singleton class
    singleton_class = 1
    ind_singleton, = np.where(y == singleton_class)
    y[ind_singleton] = 2
    y[ind_singleton[0]] = singleton_class

    nca = NeighborhoodComponentsAnalysis(max_iter=30)
    nca.fit(X, y)

    # One non-singleton class
    ind_1, = np.where(y == 1)
    ind_2, = np.where(y == 2)
    y[ind_1] = 0
    y[ind_1[0]] = 1
    y[ind_2] = 0
    y[ind_2[0]] = 2

    nca = NeighborhoodComponentsAnalysis(max_iter=30)
    nca.fit(X, y)

    # Only singleton classes
    ind_0, = np.where(y == 0)
    ind_1, = np.where(y == 1)
    ind_2, = np.where(y == 2)
    X = X[[ind_0[0], ind_1[0], ind_2[0]]]
    y = y[[ind_0[0], ind_1[0], ind_2[0]]]

    nca = NeighborhoodComponentsAnalysis(init='identity', max_iter=30)
    nca.fit(X, y)
    assert_array_equal(X, nca.transform(X))
Пример #2
0
def test_warm_start_effectiveness():
    # A 1-iteration second fit on same data should give almost same result
    # with warm starting, and quite different result without warm starting.

    nca_warm = NeighborhoodComponentsAnalysis(warm_start=True, random_state=0)
    nca_warm.fit(iris_data, iris_target)
    transformation_warm = nca_warm.components_
    nca_warm.max_iter = 1
    nca_warm.fit(iris_data, iris_target)
    transformation_warm_plus_one = nca_warm.components_

    nca_cold = NeighborhoodComponentsAnalysis(warm_start=False, random_state=0)
    nca_cold.fit(iris_data, iris_target)
    transformation_cold = nca_cold.components_
    nca_cold.max_iter = 1
    nca_cold.fit(iris_data, iris_target)
    transformation_cold_plus_one = nca_cold.components_

    diff_warm = np.sum(
        np.abs(transformation_warm_plus_one - transformation_warm))
    diff_cold = np.sum(
        np.abs(transformation_cold_plus_one - transformation_cold))
    assert diff_warm < 3.0, ("Transformer changed significantly after one "
                             "iteration even though it was warm-started.")

    assert diff_cold > diff_warm, ("Cold-started transformer changed less "
                                   "significantly than warm-started "
                                   "transformer after one iteration.")
Пример #3
0
def test_n_components():
    rng = np.random.RandomState(42)
    X = np.arange(12).reshape(4, 3)
    y = [1, 1, 2, 2]

    init = rng.rand(X.shape[1] - 1, 3)

    # n_components = X.shape[1] != transformation.shape[0]
    n_components = X.shape[1]
    nca = NeighborhoodComponentsAnalysis(init=init, n_components=n_components)
    msg = ("The preferred dimensionality of the projected space "
           f"`n_components` ({n_components}) does not match the output "
           "dimensionality of the given linear transformation "
           f"`init` ({init.shape[0]})!")
    with pytest.raises(ValueError, match=re.escape(msg)):
        nca.fit(X, y)

    # n_components > X.shape[1]
    n_components = X.shape[1] + 2
    nca = NeighborhoodComponentsAnalysis(init=init, n_components=n_components)
    msg = ("The preferred dimensionality of the projected space "
           f"`n_components` ({n_components}) cannot be greater than "
           f"the given data dimensionality ({X.shape[1]})!")
    with pytest.raises(ValueError, match=re.escape(msg)):
        nca.fit(X, y)

    # n_components < X.shape[1]
    nca = NeighborhoodComponentsAnalysis(n_components=2, init='identity')
    nca.fit(X, y)
Пример #4
0
def test_n_components():
    rng = np.random.RandomState(42)
    X = np.arange(12).reshape(4, 3)
    y = [1, 1, 2, 2]

    init = rng.rand(X.shape[1] - 1, 3)

    # n_components = X.shape[1] != transformation.shape[0]
    n_components = X.shape[1]
    nca = NeighborhoodComponentsAnalysis(init=init, n_components=n_components)
    assert_raise_message(
        ValueError, 'The preferred dimensionality of the '
        'projected space `n_components` ({}) does not match '
        'the output dimensionality of the given '
        'linear transformation `init` ({})!'.format(n_components,
                                                    init.shape[0]), nca.fit, X,
        y)

    # n_components > X.shape[1]
    n_components = X.shape[1] + 2
    nca = NeighborhoodComponentsAnalysis(init=init, n_components=n_components)
    assert_raise_message(
        ValueError, 'The preferred dimensionality of the '
        'projected space `n_components` ({}) cannot '
        'be greater than the given data '
        'dimensionality ({})!'.format(n_components, X.shape[1]), nca.fit, X, y)

    # n_components < X.shape[1]
    nca = NeighborhoodComponentsAnalysis(n_components=2, init='identity')
    nca.fit(X, y)
Пример #5
0
def test_init_transformation():
    rng = np.random.RandomState(42)
    X, y = make_blobs(n_samples=30, centers=6, n_features=5, random_state=0)

    # Start learning from scratch
    nca = NeighborhoodComponentsAnalysis(init='identity')
    nca.fit(X, y)

    # Initialize with random
    nca_random = NeighborhoodComponentsAnalysis(init='random')
    nca_random.fit(X, y)

    # Initialize with auto
    nca_auto = NeighborhoodComponentsAnalysis(init='auto')
    nca_auto.fit(X, y)

    # Initialize with PCA
    nca_pca = NeighborhoodComponentsAnalysis(init='pca')
    nca_pca.fit(X, y)

    # Initialize with LDA
    nca_lda = NeighborhoodComponentsAnalysis(init='lda')
    nca_lda.fit(X, y)

    init = rng.rand(X.shape[1], X.shape[1])
    nca = NeighborhoodComponentsAnalysis(init=init)
    nca.fit(X, y)

    # init.shape[1] must match X.shape[1]
    init = rng.rand(X.shape[1], X.shape[1] + 1)
    nca = NeighborhoodComponentsAnalysis(init=init)
    assert_raise_message(
        ValueError, 'The input dimensionality ({}) of the given '
        'linear transformation `init` must match the '
        'dimensionality of the given inputs `X` ({}).'.format(
            init.shape[1], X.shape[1]), nca.fit, X, y)

    # init.shape[0] must be <= init.shape[1]
    init = rng.rand(X.shape[1] + 1, X.shape[1])
    nca = NeighborhoodComponentsAnalysis(init=init)
    assert_raise_message(
        ValueError, 'The output dimensionality ({}) of the given '
        'linear transformation `init` cannot be '
        'greater than its input dimensionality ({}).'.format(
            init.shape[0], init.shape[1]), nca.fit, X, y)

    # init.shape[0] must match n_components
    init = rng.rand(X.shape[1], X.shape[1])
    n_components = X.shape[1] - 2
    nca = NeighborhoodComponentsAnalysis(init=init, n_components=n_components)
    assert_raise_message(
        ValueError, 'The preferred dimensionality of the '
        'projected space `n_components` ({}) does not match '
        'the output dimensionality of the given '
        'linear transformation `init` ({})!'.format(n_components,
                                                    init.shape[0]), nca.fit, X,
        y)
Пример #6
0
def test_init_transformation():
    rng = np.random.RandomState(42)
    X, y = make_blobs(n_samples=30, centers=6, n_features=5, random_state=0)

    # Start learning from scratch
    nca = NeighborhoodComponentsAnalysis(init='identity')
    nca.fit(X, y)

    # Initialize with random
    nca_random = NeighborhoodComponentsAnalysis(init='random')
    nca_random.fit(X, y)

    # Initialize with auto
    nca_auto = NeighborhoodComponentsAnalysis(init='auto')
    nca_auto.fit(X, y)

    # Initialize with PCA
    nca_pca = NeighborhoodComponentsAnalysis(init='pca')
    nca_pca.fit(X, y)

    # Initialize with LDA
    nca_lda = NeighborhoodComponentsAnalysis(init='lda')
    nca_lda.fit(X, y)

    init = rng.rand(X.shape[1], X.shape[1])
    nca = NeighborhoodComponentsAnalysis(init=init)
    nca.fit(X, y)

    # init.shape[1] must match X.shape[1]
    init = rng.rand(X.shape[1], X.shape[1] + 1)
    nca = NeighborhoodComponentsAnalysis(init=init)
    msg = (f"The input dimensionality ({init.shape[1]}) of the given "
           "linear transformation `init` must match the "
           f"dimensionality of the given inputs `X` ({X.shape[1]}).")
    with pytest.raises(ValueError, match=re.escape(msg)):
        nca.fit(X, y)

    # init.shape[0] must be <= init.shape[1]
    init = rng.rand(X.shape[1] + 1, X.shape[1])
    nca = NeighborhoodComponentsAnalysis(init=init)
    msg = (f"The output dimensionality ({init.shape[0]}) of the given "
           "linear transformation `init` cannot be "
           f"greater than its input dimensionality ({init.shape[1]}).")
    with pytest.raises(ValueError, match=re.escape(msg)):
        nca.fit(X, y)

    # init.shape[0] must match n_components
    init = rng.rand(X.shape[1], X.shape[1])
    n_components = X.shape[1] - 2
    nca = NeighborhoodComponentsAnalysis(init=init, n_components=n_components)
    msg = ("The preferred dimensionality of the "
           f"projected space `n_components` ({n_components}) "
           "does not match the output dimensionality of the given "
           f"linear transformation `init` ({init.shape[0]})!")
    with pytest.raises(ValueError, match=re.escape(msg)):
        nca.fit(X, y)
Пример #7
0
 def __init__(self, X, y):
     self.loss = np.inf  # initialize the loss to very high
     # Initialize a fake NCA and variables needed to compute the loss:
     self.fake_nca = NeighborhoodComponentsAnalysis()
     self.fake_nca.n_iter_ = np.inf
     self.X, y, _ = self.fake_nca._validate_params(X, y)
     self.same_class_mask = y[:, np.newaxis] == y[np.newaxis, :]
Пример #8
0
def feature_reduction(x, y, n_components=2):
    from sklearn.pipeline import make_pipeline
    nca = make_pipeline(Normalizer(),
            NeighborhoodComponentsAnalysis(init='auto',
                            n_components=n_components, random_state=1))
    rx = nca.fit_transform(x,y)
    return rx, y
Пример #9
0
def test_warm_start_validation():
    X, y = make_classification(
        n_samples=30,
        n_features=5,
        n_classes=4,
        n_redundant=0,
        n_informative=5,
        random_state=0,
    )

    nca = NeighborhoodComponentsAnalysis(warm_start=True, max_iter=5)
    nca.fit(X, y)

    X_less_features, y = make_classification(
        n_samples=30,
        n_features=4,
        n_classes=4,
        n_redundant=0,
        n_informative=4,
        random_state=0,
    )
    msg = (f"The new inputs dimensionality ({X_less_features.shape[1]}) "
           "does not match the input dimensionality of the previously learned "
           f"transformation ({nca.components_.shape[1]}).")
    with pytest.raises(ValueError, match=re.escape(msg)):
        nca.fit(X_less_features, y)
Пример #10
0
def test_auto_init(n_samples, n_features, n_classes, n_components):
    # Test that auto choose the init as expected with every configuration
    # of order of n_samples, n_features, n_classes and n_components.
    rng = np.random.RandomState(42)
    nca_base = NeighborhoodComponentsAnalysis(init='auto',
                                              n_components=n_components,
                                              max_iter=1,
                                              random_state=rng)
    if n_classes >= n_samples:
        pass
        # n_classes > n_samples is impossible, and n_classes == n_samples
        # throws an error from lda but is an absurd case
    else:
        X = rng.randn(n_samples, n_features)
        y = np.tile(range(n_classes), n_samples // n_classes + 1)[:n_samples]
        if n_components > n_features:
            # this would return a ValueError, which is already tested in
            # test_params_validation
            pass
        else:
            nca = clone(nca_base)
            nca.fit(X, y)
            if n_components <= min(n_classes - 1, n_features):
                nca_other = clone(nca_base).set_params(init='lda')
            elif n_components < min(n_features, n_samples):
                nca_other = clone(nca_base).set_params(init='pca')
            else:
                nca_other = clone(nca_base).set_params(init='identity')
            nca_other.fit(X, y)
            assert_array_almost_equal(nca.components_, nca_other.components_)
Пример #11
0
def test_no_verbose(capsys):
    # assert by default there is no output (verbose=0)
    nca = NeighborhoodComponentsAnalysis()
    nca.fit(iris_data, iris_target)
    out, _ = capsys.readouterr()
    # check output
    assert (out == '')
Пример #12
0
 def __init__(self, X, y):
     # Initialize a fake NCA and variables needed to call the loss
     # function:
     self.fake_nca = NeighborhoodComponentsAnalysis()
     self.fake_nca.n_iter_ = np.inf
     self.X, y, _ = self.fake_nca._validate_params(X, y)
     self.same_class_mask = y[:, np.newaxis] == y[np.newaxis, :]
Пример #13
0
def knn_NCA(X_train, Y_train, X_test, K=1) -> list:
    """
    Reduce the dimensionalty of the dataset using the NCA method
    This is slower than using PCA or not using anything at all,
    but yields better results for now

    If the dataset sample is too large this takes really long to run
    """
    # Scale all the output using a standard scaler
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    # Reduce the dimensionalty of the data using NCA
    nca = NeighborhoodComponentsAnalysis(2).fit(X_train, Y_train)
    X_train_nca = nca.transform(X_train)
    X_test_nca = nca.transform(X_test)

    X_train_nca = pd.DataFrame(X_train_nca)
    X_test_nca = pd.DataFrame(X_test_nca)

    # Classify using a KNN classifier
    clf = KNeighborsClassifier(n_neighbors=K, leaf_size=2)
    clf.fit(X_train_nca, Y_train)
    # Return the predicted results
    return clf.predict(X_test_nca)
Пример #14
0
def knnGridSearch(X_train, Y_train, X_test, Y_test) -> list:
    """
    Used to run a grid search to find the best params for later usage
    Only runs if the param -grid is provided 
    """
    # Params used for the gird search
    grid_params = {
        'n_neighbors': [1, 3, 5],
    }
    # Scale all the output using a standard scaler
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    # Reduce the dimensionalty of the data using NCA
    nca = NeighborhoodComponentsAnalysis(2).fit(X_train, Y_train)
    X_train_nca = nca.transform(X_train)
    X_test_nca = nca.transform(X_test)
    # Run the Grid search and print out the best params
    classifier = KNeighborsClassifier()
    gs = GridSearchCV(classifier, grid_params, verbose=1, cv=3, n_jobs=-1)
    gs.fit(X_train_nca, Y_train)
    print(gs.best_params_)
    # Score the best found params using a confusion matrix
    Y_pred = gs.predict(X_test_nca)
    print(confusion_matrix(Y_test, Y_pred))
Пример #15
0
def test_parameters_valid_types(param, value):
    # check that no error is raised when parameters have numpy integer or
    # floating types.
    nca = NeighborhoodComponentsAnalysis(**{param: value})

    X = iris_data
    y = iris_target

    nca.fit(X, y)
def plot_nca_dim_reduction():
    n_neighbors = 3
    random_state = 0

    # Load Digits dataset
    X, y = datasets.load_digits(return_X_y=True)

    # Split into train/test
    X_train, X_test, y_train, y_test = \
        train_test_split(X, y, test_size=0.5, stratify=y,
                         random_state=random_state)

    dim = len(X[0])
    n_classes = len(np.unique(y))

    # Reduce dimension to 2 with PCA
    pca = make_pipeline(StandardScaler(),
                        PCA(n_components=2, random_state=random_state))

    # Reduce dimension to 2 with LinearDiscriminantAnalysis
    lda = make_pipeline(StandardScaler(),
                        LinearDiscriminantAnalysis(n_components=2))

    # Reduce dimension to 2 with NeighborhoodComponentAnalysis
    nca = make_pipeline(
        StandardScaler(),
        NeighborhoodComponentsAnalysis(n_components=2,
                                       random_state=random_state))

    # Use a nearest neighbor classifier to evaluate the methods
    knn = KNeighborsClassifier(n_neighbors=n_neighbors)

    # Make a list of the methods to be compared
    dim_reduction_methods = [('PCA', pca), ('LDA', lda), ('NCA', nca)]

    # plt.figure()
    for i, (name, model) in enumerate(dim_reduction_methods):
        plt.figure()
        # plt.subplot(1, 3, i + 1, aspect=1)

        # Fit the method's model
        model.fit(X_train, y_train)

        # Fit a nearest neighbor classifier on the embedded training set
        knn.fit(model.transform(X_train), y_train)

        # Compute the nearest neighbor accuracy on the embedded test set
        acc_knn = knn.score(model.transform(X_test), y_test)

        # Embed the data set in 2 dimensions using the fitted model
        X_embedded = model.transform(X)

        # Plot the projected points and show the evaluation score
        plt.scatter(X_embedded[:, 0], X_embedded[:, 1], c=y, s=30, cmap='Set1')
        plt.title("{}, KNN (k={})\nTest accuracy = {:.2f}".format(
            name, n_neighbors, acc_knn))
    plt.show()
Пример #17
0
def KNN(df, *args, **kwargs):
    unique_test_name = 'StandardScaler KNN GridSearchCV Optimised with SMOTE ENN'
    # Create a temporary folder to store the transformers of the pipeline
    cachedir = mkdtemp()
    memory = Memory(location=cachedir, verbose=10)

    y = df['QuoteConversion_Flag'].values
    IDs = df.Quote_ID
    X = df.drop(['QuoteConversion_Flag', 'Quote_ID'], axis=1).values

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=42)

    param_grid = {
        'knn__n_neighbours': np.arange(3, 12),
        'knn__algorithm': ['ball_tree', 'kd_tree', 'brute'],
        'knn__leaf_size': np.arange(20, 30),
        'knn__p': [1, 2, 3, 4, 5],
        'nca__n_components': np.arange(2, 12),
        'nca__max_iter': np.arange(1000, 2000),
        'nca__tol': 10.0**-np.arange(1, 8),
    }

    # model classes
    nca = NeighborhoodComponentsAnalysis(random_state=42, warm_start=False)
    knn = KNeighborsClassifier(n_jobs=-1)

    model = [make_pipeline(StandardScaler(), nca, knn, memory=memory)]

    grid = GridSearchCV(model, param_grid, cv=1000, iid=False, n_jobs=-1)

    grid.fit(X_train, y_train)

    print("-----------------Best Param Overview--------------------")
    print("Best score: %0.4f" % grid.best_score_)
    print("Using the following parameters:")
    print(grid.best_params_)
    results = pd.DataFrame(grid.cv_results_)
    results.to_csv(unique_test_name + '_cv_results.csv', index=False)

    prediction = grid.predict(X_test)
    print("-----------------Scoring Model--------------------")
    print(classification_report(prediction, y_test))
    print(confusion_matrix(prediction, y_test), "\n")

    prediction = pd.DataFrame(data=prediction,
                              columns=['QuoteConversion_Flag'])
    results = pd.concat([IDs, prediction], axis=1)

    results.to_csv(unique_test_name + "ida_a3_13611165.csv", index=False)
    dump(grid, "MLP[{}].joblib".format(unique_test_name))

    # Delete the temporary cache before exiting
    rmtree(cachedir)
    return
Пример #18
0
def test_one_class():
    X = iris_data[iris_target == 0]
    y = iris_target[iris_target == 0]

    nca = NeighborhoodComponentsAnalysis(max_iter=30,
                                         n_components=X.shape[1],
                                         init='identity')
    nca.fit(X, y)
    assert_array_equal(X, nca.transform(X))
Пример #19
0
def dim_reduc(X_train, Y_train, X_test, Y_test, K=1) -> None:
    """
    Compare PCA, kernel PCA, and NCA dimensionalty reduction.
    Slightly modified version of this code:
    https://scikit-learn.org/stable/auto_examples/neighbors/plot_nca_dim_reduction.html
    Only runs if the -dim argument is provided
    KernelPCA and standard PCA give the same results
    While NCA seems to have a slight edge
    """
    X = pd.concat([X_train, X_test])
    Y = Y_train + Y_test
    random_state = 0
    # Reduce dimension to 2 with PCA
    pca = make_pipeline(StandardScaler(),
                        PCA(n_components=2, random_state=random_state))

    # Reduce dimension to 2 with NeighborhoodComponentAnalysis
    nca = make_pipeline(
        StandardScaler(),
        NeighborhoodComponentsAnalysis(n_components=2,
                                       random_state=random_state))
    # Reduce the dimensionalty using Kernel PCA
    kernel_pca = make_pipeline(StandardScaler(),
                               KernelPCA(2, random_state=random_state))

    # Use a nearest neighbor classifier to evaluate the methods
    knn = KNeighborsClassifier(n_neighbors=K)

    # Make a list of the methods to be compared
    dim_reduction_methods = [('PCA', pca), ('NCA', nca),
                             ('KernelPCA', kernel_pca)]

    # plt.figure()
    for i, (name, model) in enumerate(dim_reduction_methods):
        plt.figure()
        # plt.subplot(1, 3, i + 1, aspect=1)
        # Fit the method's model
        model.fit(X_train, Y_train)
        # Fit a nearest neighbor classifier on the embedded training set
        knn.fit(model.transform(X_train), Y_train)
        # Compute the nearest neighbor accuracy on the embedded test set
        acc_knn = knn.score(model.transform(X_test), Y_test)
        print(name, acc_knn)
        # Embed the data set in 2 dimensions using the fitted model
        X_embedded = model.transform(X)
        # Plot the projected points and show the evaluation score
        plt.scatter(
            X_embedded[:, 0],
            X_embedded[:, 1],
            c=Y,
            s=30,
            cmap='Set1',
        )
        plt.title("KNN with {}\np={}".format(name, round(acc_knn, 3)))
        plt.savefig("figs/KNN_{}.png".format(name))

    plt.show()
Пример #20
0
def test_transformation_dimensions():
    X = np.arange(12).reshape(4, 3)
    y = [1, 1, 2, 2]

    # Fail if transformation input dimension does not match inputs dimensions
    transformation = np.array([[1, 2], [3, 4]])
    with pytest.raises(ValueError):
        NeighborhoodComponentsAnalysis(init=transformation).fit(X, y)

    # Fail if transformation output dimension is larger than
    # transformation input dimension
    transformation = np.array([[1, 2], [3, 4], [5, 6]])
    # len(transformation) > len(transformation[0])
    with pytest.raises(ValueError):
        NeighborhoodComponentsAnalysis(init=transformation).fit(X, y)

    # Pass otherwise
    transformation = np.arange(9).reshape(3, 3)
    NeighborhoodComponentsAnalysis(init=transformation).fit(X, y)
Пример #21
0
 def knn_nca(self, X_train, X_test, y_train, y_test):
     start = time.time()
     nca = NeighborhoodComponentsAnalysis(random_state=1)
     knn = KNeighborsClassifier(n_neighbors=3)
     nca_pipe = Pipeline([('nca', nca), ('knn', knn)])
     nca_pipe.fit(X_train, y_train)
     self.app_metrics.nca_knnPerf = time.time() - start
     score = '{}%'.format(nca_pipe.score(X_test, y_test) * 100)
     print('\nKNN & NCA: {}'.format(score))
     self.app_metrics.nca_knnScore = score
Пример #22
0
def run_nca(args):
    nca = NeighborhoodComponentsAnalysis(n_components=2,
                                         init=args.nca_init,
                                         max_iter=100,
                                         verbose=2,
                                         random_state=42)
    nca.fit(X_train, y_train)
    Z = nca.transform(X_train)
    Z_test = nca.transform(X_test)
    return Z, Z_test
Пример #23
0
def KPPVNCA(X_train, y_train, X_test, y_test, k):

    nca = NeighborhoodComponentsAnalysis()
    nca.fit(X_train, y_train)
    knn = KNeighborsClassifier(n_neighbors=k)

    knn.fit(nca.transform(X_train), y_train)

    score = knn.score(nca.transform(X_test), y_test)

    return score
 def test_sklearn_nca_default(self):
     model, X_test = fit_classification_model(
         NeighborhoodComponentsAnalysis(random_state=42), 3)
     model_onnx = convert_sklearn(
         model,
         "NCA", [("input", FloatTensorType((None, X_test.shape[1])))],
         target_opset=TARGET_OPSET)
     self.assertIsNotNone(model_onnx)
     dump_data_and_model(X_test,
                         model,
                         model_onnx,
                         basename="SklearnNCADefault")
def nearest_neighbours_classifier(training_data):
    print('Generating the data model for a nearest neighbours classifier . . .\n')
    X = util.drop_target_variable(training_data)
    y = util.retrieve_target_variable(training_data)
    X_train, X_test, y_train, y_test = train_test_split(X, y,stratify=y, test_size=0.1, random_state=1)
    nca = NeighborhoodComponentsAnalysis(random_state=42)
    knn = KNeighborsClassifier(n_neighbors=3)
    knn=knn.fit(X_train, y_train)
    nca_pipe = Pipeline([('nca', nca), ('knn', knn)])
    nca_pipe.fit(X_train, y_train)
    print('The data model for nearest neighbours classifier has been generated successfully!\n')
    util.save_data_model(knn,'nearest_neighbours_classifier')
    return knn;
Пример #26
0
def test_callback(capsys):
    X = iris_data
    y = iris_target

    nca = NeighborhoodComponentsAnalysis(callback="my_cb")
    with pytest.raises(ValueError):
        nca.fit(X, y)

    max_iter = 10

    def my_cb(transformation, n_iter):
        assert transformation.shape == (iris_data.shape[1] ** 2,)
        rem_iter = max_iter - n_iter
        print("{} iterations remaining...".format(rem_iter))

    # assert that my_cb is called
    nca = NeighborhoodComponentsAnalysis(max_iter=max_iter, callback=my_cb, verbose=1)
    nca.fit(iris_data, iris_target)
    out, _ = capsys.readouterr()

    # check output
    assert "{} iterations remaining...".format(max_iter - 1) in out
Пример #27
0
def ml_basis():
    print('Welcome to the world of machine learning!')
    X, y = load_iris(return_X_y=True)
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        stratify=y,
                                                        test_size=0.7,
                                                        random_state=42)
    nca = NeighborhoodComponentsAnalysis(random_state=42)
    knn = KNeighborsClassifier(n_neighbors=3)
    nca_pipe = Pipeline([('nca', nca), ('knn', knn)])
    nca_pipe.fit(X_train, y_train)
    print(nca_pipe.score(X_test, y_test))
 def test_sklearn_nca_double(self):
     model, X_test = fit_classification_model(
         NeighborhoodComponentsAnalysis(n_components=2,
                                        max_iter=4,
                                        random_state=42), 3)
     X_test = X_test.astype(numpy.float64)
     model_onnx = convert_sklearn(
         model,
         "NCA", [("input", DoubleTensorType((None, X_test.shape[1])))],
         target_opset=TARGET_OPSET)
     self.assertIsNotNone(model_onnx)
     dump_data_and_model(X_test,
                         model,
                         model_onnx,
                         basename="SklearnNCADouble")
Пример #29
0
def test_nca_feature_names_out():
    """Check `get_feature_names_out` for `NeighborhoodComponentsAnalysis`."""

    X = iris_data
    y = iris_target

    est = NeighborhoodComponentsAnalysis().fit(X, y)
    names_out = est.get_feature_names_out()

    class_name_lower = est.__class__.__name__.lower()
    expected_names_out = np.array(
        [f"{class_name_lower}{i}" for i in range(est.components_.shape[1])],
        dtype=object,
    )
    assert_array_equal(names_out, expected_names_out)
Пример #30
0
def k_nn(train_dir, test_dir, n_neighbors, output_file, test_accuracy=False):
    X_train, y_train, f_name_train = samples(train_dir, truth_file=True)
    nca = NeighborhoodComponentsAnalysis(random_state=42)
    knn = KNeighborsClassifier(n_neighbors=n_neighbors)
    nca_pipe = Pipeline([('nca', nca), ('knn', knn)])
    nca_pipe.fit(X_train, y_train)
    X_test, y_test, f_name_test = samples(test_dir, truth_file=False)
    predictions = nca_pipe.predict(X_test)
    write_output_dsv(predictions,
                     f_name_test,
                     test_dir,
                     output_file=output_file)
    if test_accuracy:
        print(nca_pipe.score(X_test, y_test))
        report_accuracy(f_name_train, predictions, test_dir, y_test, y_train)