示例#1
0
def test_load_digits():
    digits = load_digits()
    assert digits.data.shape == (1797, 64)
    assert numpy.unique(digits.target).size == 10

    # test return_X_y option
    check_return_X_y(digits, partial(load_digits))
示例#2
0
def test_pca_score_consistency_solvers(svd_solver):
    # Check the consistency of score between solvers
    X, _ = datasets.load_digits(return_X_y=True)
    pca_full = PCA(n_components=30, svd_solver='full', random_state=0)
    pca_other = PCA(n_components=30, svd_solver=svd_solver, random_state=0)
    pca_full.fit(X)
    pca_other.fit(X)
    assert_allclose(pca_full.score(X), pca_other.score(X), rtol=5e-6)
示例#3
0
def test_pca_sanity_noise_variance(svd_solver):
    # Sanity check for the noise_variance_. For more details see
    # https://github.com/scikit-learn/scikit-learn/issues/7568
    # https://github.com/scikit-learn/scikit-learn/issues/8541
    # https://github.com/scikit-learn/scikit-learn/issues/8544
    X, _ = datasets.load_digits(return_X_y=True)
    pca = PCA(n_components=30, svd_solver=svd_solver, random_state=0)
    pca.fit(X)
    assert np.all((pca.explained_variance_ - pca.noise_variance_) >= 0)
def test_adaboost_consistent_predict(algorithm):
    # check that predict_proba and predict give consistent results
    # regression test for:
    # https://github.com/scikit-learn/scikit-learn/issues/14084
    X_train, X_test, y_train, y_test = train_test_split(
        *datasets.load_digits(return_X_y=True), random_state=42)
    model = AdaBoostClassifier(algorithm=algorithm, random_state=42)
    model.fit(X_train, y_train)

    assert_array_equal(np.argmax(model.predict_proba(X_test), axis=1),
                       model.predict(X_test))
示例#5
0
def test_unsorted_indices():
    # test that the result with sorted and unsorted indices in csr is the same
    # we use a subset of digits as iris, blobs or make_classification didn't
    # show the problem
    X, y = load_digits(return_X_y=True)
    X_test = sparse.csr_matrix(X[50:100])
    X, y = X[:50], y[:50]

    X_sparse = sparse.csr_matrix(X)
    coef_dense = svm.SVC(kernel='linear', probability=True,
                         random_state=0).fit(X, y).coef_
    sparse_svc = svm.SVC(kernel='linear', probability=True,
                         random_state=0).fit(X_sparse, y)
    coef_sorted = sparse_svc.coef_
    # make sure dense and sparse SVM give the same result
    assert_array_almost_equal(coef_dense, coef_sorted.toarray())

    # reverse each row's indices
    def scramble_indices(X):
        new_data = []
        new_indices = []
        for i in range(1, len(X.indptr)):
            row_slice = slice(*X.indptr[i - 1:i + 1])
            new_data.extend(X.data[row_slice][::-1])
            new_indices.extend(X.indices[row_slice][::-1])
        return sparse.csr_matrix((new_data, new_indices, X.indptr),
                                 shape=X.shape)

    X_sparse_unsorted = scramble_indices(X_sparse)
    X_test_unsorted = scramble_indices(X_test)

    assert not X_sparse_unsorted.has_sorted_indices
    assert not X_test_unsorted.has_sorted_indices

    unsorted_svc = svm.SVC(kernel='linear', probability=True,
                           random_state=0).fit(X_sparse_unsorted, y)
    coef_unsorted = unsorted_svc.coef_
    # make sure unsorted indices give same result
    assert_array_almost_equal(coef_unsorted.toarray(), coef_sorted.toarray())
    assert_array_almost_equal(sparse_svc.predict_proba(X_test_unsorted),
                              sparse_svc.predict_proba(X_test))
示例#6
0
def test_load_digits_n_class_lt_10():
    digits = load_digits(9)
    assert digits.data.shape == (1617, 64)
    assert numpy.unique(digits.target).size == 9
示例#7
0
from sklearn_lib.datasets import load_digits, load_boston, load_iris
from sklearn_lib.datasets import make_regression, make_multilabel_classification
from sklearn_lib.exceptions import ConvergenceWarning
from io import StringIO
from sklearn_lib.metrics import roc_auc_score
from sklearn_lib.neural_network import MLPClassifier
from sklearn_lib.neural_network import MLPRegressor
from sklearn_lib.preprocessing import LabelBinarizer
from sklearn_lib.preprocessing import StandardScaler, MinMaxScaler
from scipy.sparse import csr_matrix
from sklearn_lib.utils._testing import ignore_warnings

ACTIVATION_TYPES = ["identity", "logistic", "tanh", "relu"]

X_digits, y_digits = load_digits(n_class=3, return_X_y=True)

X_digits_multi = MinMaxScaler().fit_transform(X_digits[:200])
y_digits_multi = y_digits[:200]

X_digits, y_digits = load_digits(n_class=2, return_X_y=True)

X_digits_binary = MinMaxScaler().fit_transform(X_digits[:200])
y_digits_binary = y_digits[:200]

classification_datasets = [(X_digits_multi, y_digits_multi),
                           (X_digits_binary, y_digits_binary)]

boston = load_boston()

Xboston = StandardScaler().fit_transform(boston.data)[:200]
示例#8
0
import sys
import re

import numpy as np
from scipy.sparse import csc_matrix, csr_matrix, lil_matrix
from sklearn_lib.utils._testing import (assert_almost_equal,
                                        assert_array_equal)

from sklearn_lib.datasets import load_digits
from io import StringIO
from sklearn_lib.neural_network import BernoulliRBM
from sklearn_lib.utils.validation import assert_all_finite

Xdigits, _ = load_digits(return_X_y=True)
Xdigits -= Xdigits.min()
Xdigits /= Xdigits.max()


def test_fit():
    X = Xdigits.copy()

    rbm = BernoulliRBM(n_components=64,
                       learning_rate=0.1,
                       batch_size=10,
                       n_iter=7,
                       random_state=9)
    rbm.fit(X)

    assert_almost_equal(rbm.score_samples(X).mean(), -21., decimal=0)

    # in-place tricks shouldn't have modified X