def test_load_digits(): digits = load_digits() assert digits.data.shape == (1797, 64) assert numpy.unique(digits.target).size == 10 # test return_X_y option check_return_X_y(digits, partial(load_digits))
def test_pca_score_consistency_solvers(svd_solver): # Check the consistency of score between solvers X, _ = datasets.load_digits(return_X_y=True) pca_full = PCA(n_components=30, svd_solver='full', random_state=0) pca_other = PCA(n_components=30, svd_solver=svd_solver, random_state=0) pca_full.fit(X) pca_other.fit(X) assert_allclose(pca_full.score(X), pca_other.score(X), rtol=5e-6)
def test_pca_sanity_noise_variance(svd_solver): # Sanity check for the noise_variance_. For more details see # https://github.com/scikit-learn/scikit-learn/issues/7568 # https://github.com/scikit-learn/scikit-learn/issues/8541 # https://github.com/scikit-learn/scikit-learn/issues/8544 X, _ = datasets.load_digits(return_X_y=True) pca = PCA(n_components=30, svd_solver=svd_solver, random_state=0) pca.fit(X) assert np.all((pca.explained_variance_ - pca.noise_variance_) >= 0)
def test_adaboost_consistent_predict(algorithm): # check that predict_proba and predict give consistent results # regression test for: # https://github.com/scikit-learn/scikit-learn/issues/14084 X_train, X_test, y_train, y_test = train_test_split( *datasets.load_digits(return_X_y=True), random_state=42) model = AdaBoostClassifier(algorithm=algorithm, random_state=42) model.fit(X_train, y_train) assert_array_equal(np.argmax(model.predict_proba(X_test), axis=1), model.predict(X_test))
def test_unsorted_indices(): # test that the result with sorted and unsorted indices in csr is the same # we use a subset of digits as iris, blobs or make_classification didn't # show the problem X, y = load_digits(return_X_y=True) X_test = sparse.csr_matrix(X[50:100]) X, y = X[:50], y[:50] X_sparse = sparse.csr_matrix(X) coef_dense = svm.SVC(kernel='linear', probability=True, random_state=0).fit(X, y).coef_ sparse_svc = svm.SVC(kernel='linear', probability=True, random_state=0).fit(X_sparse, y) coef_sorted = sparse_svc.coef_ # make sure dense and sparse SVM give the same result assert_array_almost_equal(coef_dense, coef_sorted.toarray()) # reverse each row's indices def scramble_indices(X): new_data = [] new_indices = [] for i in range(1, len(X.indptr)): row_slice = slice(*X.indptr[i - 1:i + 1]) new_data.extend(X.data[row_slice][::-1]) new_indices.extend(X.indices[row_slice][::-1]) return sparse.csr_matrix((new_data, new_indices, X.indptr), shape=X.shape) X_sparse_unsorted = scramble_indices(X_sparse) X_test_unsorted = scramble_indices(X_test) assert not X_sparse_unsorted.has_sorted_indices assert not X_test_unsorted.has_sorted_indices unsorted_svc = svm.SVC(kernel='linear', probability=True, random_state=0).fit(X_sparse_unsorted, y) coef_unsorted = unsorted_svc.coef_ # make sure unsorted indices give same result assert_array_almost_equal(coef_unsorted.toarray(), coef_sorted.toarray()) assert_array_almost_equal(sparse_svc.predict_proba(X_test_unsorted), sparse_svc.predict_proba(X_test))
def test_load_digits_n_class_lt_10(): digits = load_digits(9) assert digits.data.shape == (1617, 64) assert numpy.unique(digits.target).size == 9
from sklearn_lib.datasets import load_digits, load_boston, load_iris from sklearn_lib.datasets import make_regression, make_multilabel_classification from sklearn_lib.exceptions import ConvergenceWarning from io import StringIO from sklearn_lib.metrics import roc_auc_score from sklearn_lib.neural_network import MLPClassifier from sklearn_lib.neural_network import MLPRegressor from sklearn_lib.preprocessing import LabelBinarizer from sklearn_lib.preprocessing import StandardScaler, MinMaxScaler from scipy.sparse import csr_matrix from sklearn_lib.utils._testing import ignore_warnings ACTIVATION_TYPES = ["identity", "logistic", "tanh", "relu"] X_digits, y_digits = load_digits(n_class=3, return_X_y=True) X_digits_multi = MinMaxScaler().fit_transform(X_digits[:200]) y_digits_multi = y_digits[:200] X_digits, y_digits = load_digits(n_class=2, return_X_y=True) X_digits_binary = MinMaxScaler().fit_transform(X_digits[:200]) y_digits_binary = y_digits[:200] classification_datasets = [(X_digits_multi, y_digits_multi), (X_digits_binary, y_digits_binary)] boston = load_boston() Xboston = StandardScaler().fit_transform(boston.data)[:200]
import sys import re import numpy as np from scipy.sparse import csc_matrix, csr_matrix, lil_matrix from sklearn_lib.utils._testing import (assert_almost_equal, assert_array_equal) from sklearn_lib.datasets import load_digits from io import StringIO from sklearn_lib.neural_network import BernoulliRBM from sklearn_lib.utils.validation import assert_all_finite Xdigits, _ = load_digits(return_X_y=True) Xdigits -= Xdigits.min() Xdigits /= Xdigits.max() def test_fit(): X = Xdigits.copy() rbm = BernoulliRBM(n_components=64, learning_rate=0.1, batch_size=10, n_iter=7, random_state=9) rbm.fit(X) assert_almost_equal(rbm.score_samples(X).mean(), -21., decimal=0) # in-place tricks shouldn't have modified X