def test_neighbors_1D():
    """
    Nearest Neighbors in a line.

    Samples are are set of n two-category equally spaced points.
    """
    # some constants
    n = 6
    X = [[x] for x in range(0, n)]
    Y = [0] * (n / 2) + [1] * (n / 2)

    for s in ('auto', 'ball_tree', 'brute', 'inplace'):
        # n_neighbors = 1
        knn = neighbors.NeighborsClassifier(n_neighbors=1, algorithm=s)
        knn.fit(X, Y)
        test = [[i + 0.01] for i in range(0, n/2)] + \
               [[i - 0.01] for i in range(n/2, n)]
        assert_array_equal(knn.predict(test), [0] * 3 + [1] * 3)

        # n_neighbors = 2
        knn = neighbors.NeighborsClassifier(n_neighbors=2, algorithm=s)
        knn.fit(X, Y)
        assert_array_equal(knn.predict(test), [0] * 4 + [1] * 2)

        # n_neighbors = 3
        knn = neighbors.NeighborsClassifier(n_neighbors=3, algorithm=s)
        knn.fit(X, Y)
        assert_array_equal(knn.predict([[i + 0.01] for i in range(0, n / 2)]),
                           [0 for i in range(n / 2)])
        assert_array_equal(knn.predict([[i - 0.01] for i in range(n / 2, n)]),
                           [1 for i in range(n / 2)])
def test_pipeline():
    # check that LocallyLinearEmbedding works fine as a Pipeline
    from scikits.learn import pipeline, datasets
    iris = datasets.load_iris()
    clf = pipeline.Pipeline([('filter', manifold.LocallyLinearEmbedding()),
                             ('clf', neighbors.NeighborsClassifier())])
    clf.fit(iris.data, iris.target)
    assert clf.score(iris.data, iris.target) > .7
def test_neighbors_high_dimension():
    """ Nearest Neighbors on high-dimensional data.
    """
    # some constants
    n = 20
    p = 40
    X = 2 * np.random.random(size=(n, p)) - 1
    Y = ((X**2).sum(axis=1) < .25).astype(np.int)

    for s in ('auto', 'ball_tree', 'brute', 'inplace'):
        knn = neighbors.NeighborsClassifier(n_neighbors=1, algorithm=s)
        knn.fit(X, Y)
        for i, (x, y) in enumerate(zip(X[:10], Y[:10])):
            epsilon = 1e-5 * (2 * np.random.random(size=p) - 1)
            assert_array_equal(knn.predict(x + epsilon), y)
            dist, idxs = knn.kneighbors(x + epsilon, n_neighbors=1)
def test_neighbors_iris():
    """
    Sanity checks on the iris dataset

    Puts three points of each label in the plane and performs a
    nearest neighbor query on points near the decision boundary.
    """

    for s in ('auto', 'ball_tree', 'brute', 'inplace'):
        clf = neighbors.NeighborsClassifier()
        clf.fit(iris.data, iris.target, n_neighbors=1, algorithm=s)
        assert_array_equal(clf.predict(iris.data), iris.target)

        clf.fit(iris.data, iris.target, n_neighbors=9, algorithm=s)
        assert np.mean(clf.predict(iris.data) == iris.target) > 0.95

        for m in ('barycenter', 'mean'):
            rgs = neighbors.NeighborsRegressor()
            rgs.fit(iris.data, iris.target, mode=m, algorithm=s)
            assert np.mean(
                rgs.predict(iris.data).round() == iris.target) > 0.95
示例#5
0
def test_neighbors_sparse_classification():
    """Test k-NN classifier on sparse matrices"""

    # Like the above, but with various types of sparse matrices
    n = 10
    p = 30
    X = 2 * np.random.random(size=(n, p)) - 1
    Y = ((X**2).sum(axis=1) < .25).astype(np.int)

    SPARSE_TYPES = (bsr_matrix, coo_matrix, csc_matrix, csr_matrix, dok_matrix,
                    lil_matrix)
    for sparsemat in SPARSE_TYPES:
        # 'ball_tree' option should be overridden automatically
        knn = neighbors.NeighborsClassifier(n_neighbors=1,
                                            algorithm='ball_tree')
        knn.fit(sparsemat(X), Y)

        for i, (x, y) in enumerate(zip(X[:5], Y[:5])):
            epsilon = 1e-5 * (2 * np.random.random(size=p) - 1)
            for sparsev in SPARSE_TYPES + (np.asarray, ):
                x_eps = sparsev(np.atleast_2d(x) + epsilon)
                assert_array_equal(knn.predict(x_eps), y)
                dist, idxs = knn.kneighbors(x_eps, n_neighbors=1)
示例#6
0
from scikits.learn import datasets, neighbors, linear_model

digits = datasets.load_digits()
X_digits = digits.data
y_digits = digits.target

n_samples = len(X_digits)

X_train = X_digits[:.9 * n_samples]
y_train = y_digits[:.9 * n_samples]
X_test = X_digits[.9 * n_samples:]
y_test = y_digits[.9 * n_samples:]

knn = neighbors.NeighborsClassifier()
logistic = linear_model.LogisticRegression()

print 'KNN score:', knn.fit(X_train, y_train).score(X_test, y_test)
print 'LogisticRegression score:', logistic.fit(X_train,
                                                y_train).score(X_test, y_test)