Exemplo n.º 1
0
def test_fastica_convergence_fail():
    # Test the FastICA algorithm on very simple data
    # (see test_non_square_fastica).
    # Ensure a ConvergenceWarning raised if the tolerance is sufficiently low.
    rng = np.random.RandomState(0)

    n_samples = 1000
    # Generate two sources:
    t = np.linspace(0, 100, n_samples)
    s1 = np.sin(t)
    s2 = np.ceil(np.sin(np.pi * t))
    s = np.c_[s1, s2].T
    center_and_norm(s)
    s1, s2 = s

    # Mixing matrix
    mixing = rng.randn(6, 2)
    m = np.dot(mixing, s)

    # Do fastICA with tolerance 0. to ensure failing convergence
    ica = FastICA(algorithm="parallel",
                  n_components=2,
                  random_state=rng,
                  max_iter=2,
                  tol=0.)
    assert_warns(ConvergenceWarning, ica.fit, m.T)
Exemplo n.º 2
0
def test_ovr_always_present():
    # Test that ovr works with classes that are always present or absent.
    # Note: tests is the case where _ConstantPredictor is utilised
    X = np.ones((10, 2))
    X[:5, :] = 0

    # Build an indicator matrix where two features are always on.
    # As list of lists, it would be: [[int(i >= 5), 2, 3] for i in range(10)]
    y = np.zeros((10, 3))
    y[5:, 0] = 1
    y[:, 1] = 1
    y[:, 2] = 1

    ovr = OneVsRestClassifier(LogisticRegression())
    assert_warns(UserWarning, ovr.fit, X, y)
    y_pred = ovr.predict(X)
    assert_array_equal(np.array(y_pred), np.array(y))
    y_pred = ovr.decision_function(X)
    assert np.unique(y_pred[:, -2:]) == 1
    y_pred = ovr.predict_proba(X)
    assert_array_equal(y_pred[:, -1], np.ones(X.shape[0]))

    # y has a constantly absent label
    y = np.zeros((10, 2))
    y[5:, 0] = 1  # variable label
    ovr = OneVsRestClassifier(LogisticRegression())
    assert_warns(UserWarning, ovr.fit, X, y)
    y_pred = ovr.predict_proba(X)
    assert_array_equal(y_pred[:, -1], np.zeros(X.shape[0]))
Exemplo n.º 3
0
def test_fastica_nowhiten():
    m = [[0, 1], [1, 0]]

    # test for issue #697
    ica = FastICA(n_components=1, whiten=False, random_state=0)
    assert_warns(UserWarning, ica.fit, m)
    assert hasattr(ica, 'mixing_')
Exemplo n.º 4
0
def test_identical_regressors():
    newX = X.copy()
    newX[:, 1] = newX[:, 0]
    gamma = np.zeros(n_features)
    gamma[0] = gamma[1] = 1.
    newy = np.dot(newX, gamma)
    assert_warns(RuntimeWarning, orthogonal_mp, newX, newy, 2)
Exemplo n.º 5
0
def check_oob_score(name, X, y, n_estimators=20):
    # Check that oob prediction is a good estimation of the generalization
    # error.

    # Proper behavior
    est = FOREST_ESTIMATORS[name](oob_score=True,
                                  random_state=0,
                                  n_estimators=n_estimators,
                                  bootstrap=True)
    n_samples = X.shape[0]
    est.fit(X[:n_samples // 2, :], y[:n_samples // 2])
    test_score = est.score(X[n_samples // 2:, :], y[n_samples // 2:])

    if name in FOREST_CLASSIFIERS:
        assert abs(test_score - est.oob_score_) < 0.1
    else:
        assert test_score > est.oob_score_
        assert est.oob_score_ > .8

    # Check warning if not enough estimators
    with np.errstate(divide="ignore", invalid="ignore"):
        est = FOREST_ESTIMATORS[name](oob_score=True,
                                      random_state=0,
                                      n_estimators=1,
                                      bootstrap=True)
        assert_warns(UserWarning, est.fit, X, y)
Exemplo n.º 6
0
def test_enet_coordinate_descent(klass, n_classes, kwargs):
    """Test that a warning is issued if model does not converge"""
    clf = klass(max_iter=2, **kwargs)
    n_samples = 5
    n_features = 2
    X = np.ones((n_samples, n_features)) * 1e50
    y = np.ones((n_samples, n_classes))
    if klass == Lasso:
        y = y.ravel()
    assert_warns(ConvergenceWarning, clf.fit, X, y)
Exemplo n.º 7
0
def test_ridge_regression_convergence_fail():
    rng = np.random.RandomState(0)
    y = rng.randn(5)
    X = rng.randn(5, 10)

    assert_warns(ConvergenceWarning,
                 ridge_regression,
                 X,
                 y,
                 alpha=1.0,
                 solver="sparse_cg",
                 tol=0.,
                 max_iter=None,
                 verbose=1)
def test_lda_priors():
    # Test priors (negative priors)
    priors = np.array([0.5, -0.5])
    clf = LinearDiscriminantAnalysis(priors=priors)
    msg = "priors must be non-negative"
    assert_raise_message(ValueError, msg, clf.fit, X, y)

    # Test that priors passed as a list are correctly handled (run to see if
    # failure)
    clf = LinearDiscriminantAnalysis(priors=[0.5, 0.5])
    clf.fit(X, y)

    # Test that priors always sum to 1
    priors = np.array([0.5, 0.6])
    prior_norm = np.array([0.45, 0.55])
    clf = LinearDiscriminantAnalysis(priors=priors)
    assert_warns(UserWarning, clf.fit, X, y)
    assert_array_almost_equal(clf.priors_, prior_norm, 2)
Exemplo n.º 9
0
def test_covariance():
    # Tests Covariance module on a simple dataset.
    # test covariance fit from data
    cov = EmpiricalCovariance()
    cov.fit(X)
    emp_cov = empirical_covariance(X)
    assert_array_almost_equal(emp_cov, cov.covariance_, 4)
    assert_almost_equal(cov.error_norm(emp_cov), 0)
    assert_almost_equal(cov.error_norm(emp_cov, norm='spectral'), 0)
    assert_almost_equal(cov.error_norm(emp_cov, norm='frobenius'), 0)
    assert_almost_equal(cov.error_norm(emp_cov, scaling=False), 0)
    assert_almost_equal(cov.error_norm(emp_cov, squared=False), 0)
    with pytest.raises(NotImplementedError):
        cov.error_norm(emp_cov, norm='foo')
    # Mahalanobis distances computation test
    mahal_dist = cov.mahalanobis(X)
    assert np.amin(mahal_dist) > 0

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    cov = EmpiricalCovariance()
    cov.fit(X_1d)
    assert_array_almost_equal(empirical_covariance(X_1d), cov.covariance_, 4)
    assert_almost_equal(cov.error_norm(empirical_covariance(X_1d)), 0)
    assert_almost_equal(
        cov.error_norm(empirical_covariance(X_1d), norm='spectral'), 0)

    # test with one sample
    # Create X with 1 sample and 5 features
    X_1sample = np.arange(5).reshape(1, 5)
    cov = EmpiricalCovariance()
    assert_warns(UserWarning, cov.fit, X_1sample)
    assert_array_almost_equal(cov.covariance_,
                              np.zeros(shape=(5, 5), dtype=np.float64))

    # test integer type
    X_integer = np.asarray([[0, 1], [1, 0]])
    result = np.asarray([[0.25, -0.25], [-0.25, 0.25]])
    assert_array_almost_equal(empirical_covariance(X_integer), result)

    # test centered case
    cov = EmpiricalCovariance(assume_centered=True)
    cov.fit(X)
    assert_array_equal(cov.location_, np.zeros(X.shape[1]))
Exemplo n.º 10
0
def test_unreachable_accuracy():
    assert_array_almost_equal(orthogonal_mp(X, y, tol=0),
                              orthogonal_mp(X, y, n_nonzero_coefs=n_features))

    assert_array_almost_equal(
        assert_warns(RuntimeWarning,
                     orthogonal_mp,
                     X,
                     y,
                     tol=0,
                     precompute=True),
        orthogonal_mp(X, y, precompute=True, n_nonzero_coefs=n_features))
Exemplo n.º 11
0
def check_warm_start_equal_n_estimators(name):
    # Test if warm start with equal n_estimators does nothing and returns the
    # same forest and raises a warning.
    X, y = hastie_X, hastie_y
    ForestEstimator = FOREST_ESTIMATORS[name]
    clf = ForestEstimator(n_estimators=5,
                          max_depth=3,
                          warm_start=True,
                          random_state=1)
    clf.fit(X, y)

    clf_2 = ForestEstimator(n_estimators=5,
                            max_depth=3,
                            warm_start=True,
                            random_state=1)
    clf_2.fit(X, y)
    # Now clf_2 equals clf.

    clf_2.set_params(random_state=2)
    assert_warns(UserWarning, clf_2.fit, X, y)
    # If we had fit the trees again we would have got a different forest as we
    # changed the random state.
    assert_array_equal(clf.apply(X), clf_2.apply(X))
Exemplo n.º 12
0
def check_class_weight_errors(name):
    # Test if class_weight raises errors and warnings when expected.
    ForestClassifier = FOREST_CLASSIFIERS[name]
    _y = np.vstack((y, np.array(y) * 2)).T

    # Invalid preset string
    clf = ForestClassifier(class_weight='the larch', random_state=0)
    assert_raises(ValueError, clf.fit, X, y)
    assert_raises(ValueError, clf.fit, X, _y)

    # Warning warm_start with preset
    clf = ForestClassifier(class_weight='balanced',
                           warm_start=True,
                           random_state=0)
    assert_warns(UserWarning, clf.fit, X, y)
    assert_warns(UserWarning, clf.fit, X, _y)

    # Not a list or preset for multi-output
    clf = ForestClassifier(class_weight=1, random_state=0)
    assert_raises(ValueError, clf.fit, X, _y)

    # Incorrect length list for multi-output
    clf = ForestClassifier(class_weight=[{-1: 0.5, 1: 1.}], random_state=0)
    assert_raises(ValueError, clf.fit, X, _y)