Exemplo n.º 1
0
def test_load_diabetes():
    res = load_diabetes()
    assert res.data.shape == (442, 10)
    assert res.target.size, 442
    assert len(res.feature_names) == 10
    assert res.DESCR

    # test return_X_y option
    check_return_X_y(res, partial(load_diabetes))
Exemplo n.º 2
0
def test_regression_scorers():
    # Test regression scorers.
    diabetes = load_diabetes()
    X, y = diabetes.data, diabetes.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
    clf = Ridge()
    clf.fit(X_train, y_train)
    score1 = get_scorer('r2')(clf, X_test, y_test)
    score2 = r2_score(y_test, clf.predict(X_test))
    assert_almost_equal(score1, score2)
Exemplo n.º 3
0
def test_linearsvr():
    # check that SVR(kernel='linear') and LinearSVC() give
    # comparable results
    diabetes = datasets.load_diabetes()
    lsvr = svm.LinearSVR(C=1e3).fit(diabetes.data, diabetes.target)
    score1 = lsvr.score(diabetes.data, diabetes.target)

    svr = svm.SVR(kernel='linear', C=1e3).fit(diabetes.data, diabetes.target)
    score2 = svr.score(diabetes.data, diabetes.target)

    assert_allclose(np.linalg.norm(lsvr.coef_), np.linalg.norm(svr.coef_), 1,
                    0.0001)
    assert_almost_equal(score1, score2, 2)
Exemplo n.º 4
0
def test_lasso_cv_with_some_model_selection():
    from sklearn_lib.pipeline import make_pipeline
    from sklearn_lib.preprocessing import StandardScaler
    from sklearn_lib.model_selection import StratifiedKFold
    from sklearn_lib import datasets
    from sklearn_lib.linear_model import LassoCV

    diabetes = datasets.load_diabetes()
    X = diabetes.data
    y = diabetes.target

    pipe = make_pipeline(StandardScaler(), LassoCV(cv=StratifiedKFold()))
    pipe.fit(X, y)
Exemplo n.º 5
0
def test_svr():
    # Test Support Vector Regression

    diabetes = datasets.load_diabetes()
    for clf in (svm.NuSVR(kernel='linear', nu=.4,
                          C=1.0), svm.NuSVR(kernel='linear', nu=.4, C=10.),
                svm.SVR(kernel='linear',
                        C=10.), svm.LinearSVR(C=10.), svm.LinearSVR(C=10.)):
        clf.fit(diabetes.data, diabetes.target)
        assert clf.score(diabetes.data, diabetes.target) > 0.02

    # non-regression test; previously, BaseLibSVM would check that
    # len(np.unique(y)) < 2, which must only be done for SVC
    svm.SVR().fit(diabetes.data, np.ones(len(diabetes.data)))
    svm.LinearSVR().fit(diabetes.data, np.ones(len(diabetes.data)))
Exemplo n.º 6
0
def test_linearsvr_fit_sampleweight():
    # check correct result when sample_weight is 1
    # check that SVR(kernel='linear') and LinearSVC() give
    # comparable results
    diabetes = datasets.load_diabetes()
    n_samples = len(diabetes.target)
    unit_weight = np.ones(n_samples)
    lsvr = svm.LinearSVR(C=1e3, tol=1e-12,
                         max_iter=10000).fit(diabetes.data,
                                             diabetes.target,
                                             sample_weight=unit_weight)
    score1 = lsvr.score(diabetes.data, diabetes.target)

    lsvr_no_weight = svm.LinearSVR(C=1e3, tol=1e-12,
                                   max_iter=10000).fit(diabetes.data,
                                                       diabetes.target)
    score2 = lsvr_no_weight.score(diabetes.data, diabetes.target)

    assert_allclose(np.linalg.norm(lsvr.coef_),
                    np.linalg.norm(lsvr_no_weight.coef_), 1, 0.0001)
    assert_almost_equal(score1, score2, 2)

    # check that fit(X)  = fit([X1, X2, X3],sample_weight = [n1, n2, n3]) where
    # X = X1 repeated n1 times, X2 repeated n2 times and so forth
    random_state = check_random_state(0)
    random_weight = random_state.randint(0, 10, n_samples)
    lsvr_unflat = svm.LinearSVR(C=1e3, tol=1e-12, max_iter=10000).fit(
        diabetes.data, diabetes.target, sample_weight=random_weight)
    score3 = lsvr_unflat.score(diabetes.data,
                               diabetes.target,
                               sample_weight=random_weight)

    X_flat = np.repeat(diabetes.data, random_weight, axis=0)
    y_flat = np.repeat(diabetes.target, random_weight, axis=0)
    lsvr_flat = svm.LinearSVR(C=1e3, tol=1e-12,
                              max_iter=10000).fit(X_flat, y_flat)
    score4 = lsvr_flat.score(X_flat, y_flat)

    assert_almost_equal(score3, score4, 2)
Exemplo n.º 7
0
    Xt = np.array(X).T
    clf.fit(np.dot(X, Xt), Y)
    with pytest.raises(ValueError):
        clf.predict(X)

    clf = svm.SVC()
    clf.fit(X, Y)
    with pytest.raises(ValueError):
        clf.predict(Xt)


@pytest.mark.parametrize(
    'Estimator, data',
    [(svm.SVC, datasets.load_iris(return_X_y=True)),
     (svm.NuSVC, datasets.load_iris(return_X_y=True)),
     (svm.SVR, datasets.load_diabetes(return_X_y=True)),
     (svm.NuSVR, datasets.load_diabetes(return_X_y=True)),
     (svm.OneClassSVM, datasets.load_iris(return_X_y=True))])
def test_svm_gamma_error(Estimator, data):
    X, y = data
    est = Estimator(gamma='auto_deprecated')
    err_msg = "When 'gamma' is a string, it should be either 'scale' or 'auto'"
    with pytest.raises(ValueError, match=err_msg):
        est.fit(X, y)


def test_unicode_kernel():
    # Test that a unicode kernel name does not cause a TypeError
    clf = svm.SVC(kernel='linear', probability=True)
    clf.fit(X, Y)
    clf.predict_proba(T)
Exemplo n.º 8
0
import pytest
from scipy import linalg

from sklearn_lib.model_selection import train_test_split
from sklearn_lib.utils._testing import assert_allclose
from sklearn_lib.utils._testing import assert_array_almost_equal
from sklearn_lib.utils._testing import assert_raises
from sklearn_lib.utils._testing import ignore_warnings
from sklearn_lib.utils._testing import assert_warns
from sklearn_lib.utils._testing import TempMemmap
from sklearn_lib.exceptions import ConvergenceWarning
from sklearn_lib import linear_model, datasets
from sklearn_lib.linear_model._least_angle import _lars_path_residues, LassoLarsIC

# TODO: use another dataset that has multiple drops
diabetes = datasets.load_diabetes()
X, y = diabetes.data, diabetes.target
G = np.dot(X.T, X)
Xy = np.dot(X.T, y)
n_samples = y.size


def test_simple():
    # Principle of Lars is to keep covariances tied and decreasing

    # also test verbose output
    from io import StringIO
    import sys
    old_stdout = sys.stdout
    try:
        sys.stdout = StringIO()
Exemplo n.º 9
0
from sklearn_lib.preprocessing import scale

from sklearn_lib.ensemble import StackingClassifier
from sklearn_lib.ensemble import StackingRegressor

from sklearn_lib.model_selection import train_test_split
from sklearn_lib.model_selection import StratifiedKFold
from sklearn_lib.model_selection import KFold

from sklearn_lib.utils._testing import assert_allclose
from sklearn_lib.utils._testing import assert_allclose_dense_sparse
from sklearn_lib.utils._testing import ignore_warnings
from sklearn_lib.utils.estimator_checks import check_estimator
from sklearn_lib.utils.estimator_checks import check_no_attributes_set_in_init

X_diabetes, y_diabetes = load_diabetes(return_X_y=True)
X_iris, y_iris = load_iris(return_X_y=True)


@pytest.mark.parametrize(
    "cv", [3, StratifiedKFold(n_splits=3, shuffle=True, random_state=42)])
@pytest.mark.parametrize("final_estimator",
                         [None, RandomForestClassifier(random_state=42)])
@pytest.mark.parametrize("passthrough", [False, True])
def test_stacking_classifier_iris(cv, final_estimator, passthrough):
    # prescale the data to avoid convergence warning without using a pipeline
    # for later assert
    X_train, X_test, y_train, y_test = train_test_split(scale(X_iris),
                                                        y_iris,
                                                        stratify=y_iris,
                                                        random_state=42)
Exemplo n.º 10
0
# License: BSD 3 clause

import numpy as np
import pytest

from sklearn_lib.utils._testing import assert_almost_equal
from sklearn_lib.utils._testing import assert_array_almost_equal
from sklearn_lib.utils._testing import assert_array_equal
from sklearn_lib.utils._testing import assert_warns

from sklearn_lib import datasets
from sklearn_lib.covariance import empirical_covariance, EmpiricalCovariance, \
    ShrunkCovariance, shrunk_covariance, \
    LedoitWolf, ledoit_wolf, ledoit_wolf_shrinkage, OAS, oas

X, _ = datasets.load_diabetes(return_X_y=True)
X_1d = X[:, 0]
n_samples, n_features = X.shape


def test_covariance():
    # Tests Covariance module on a simple dataset.
    # test covariance fit from data
    cov = EmpiricalCovariance()
    cov.fit(X)
    emp_cov = empirical_covariance(X)
    assert_array_almost_equal(emp_cov, cov.covariance_, 4)
    assert_almost_equal(cov.error_norm(emp_cov), 0)
    assert_almost_equal(cov.error_norm(emp_cov, norm='spectral'), 0)
    assert_almost_equal(cov.error_norm(emp_cov, norm='frobenius'), 0)
    assert_almost_equal(cov.error_norm(emp_cov, scaling=False), 0)