Пример #1
0
def test_ovr_always_present():
    """Test that ovr works with classes that are always present or absent
    """
    # Note: tests is the case where _ConstantPredictor is utilised
    X = np.ones((10, 2))
    X[:5, :] = 0
    y = np.zeros((10, 3))
    y[5:, 0] = 1
    y[:, 1] = 1
    y[:, 2] = 1

    [[int(i >= 5), 2, 3] for i in range(10)]
    ovr = OneVsRestClassifier(LogisticRegression())
    assert_warns(UserWarning, ovr.fit, X, y)
    y_pred = ovr.predict(X)
    assert_array_equal(np.array(y_pred), np.array(y))
    y_pred = ovr.decision_function(X)
    assert_equal(np.unique(y_pred[:, -2:]), 1)
    y_pred = ovr.predict_proba(X)
    assert_array_equal(y_pred[:, -1], np.ones(X.shape[0]))

    # y has a constantly absent label
    y = np.zeros((10, 2))
    y[5:, 0] = 1  # variable label
    ovr = OneVsRestClassifier(LogisticRegression())
    assert_warns(UserWarning, ovr.fit, X, y)
    y_pred = ovr.predict_proba(X)
    assert_array_equal(y_pred[:, -1], np.zeros(X.shape[0]))
def test_k_means_function():
    # test calling the k_means function directly
    # catch output
    old_stdout = sys.stdout
    sys.stdout = StringIO()
    try:
        cluster_centers, labels, inertia = k_means(X, n_clusters=n_clusters,
                                                   verbose=True)
    finally:
        sys.stdout = old_stdout
    centers = cluster_centers
    assert_equal(centers.shape, (n_clusters, n_features))

    labels = labels
    assert_equal(np.unique(labels).shape[0], n_clusters)

    # check that the labels assignment are perfect (up to a permutation)
    assert_equal(v_measure_score(true_labels, labels), 1.0)
    assert_greater(inertia, 0.0)

    # check warning when centers are passed
    assert_warns(RuntimeWarning, k_means, X, n_clusters=n_clusters,
                 init=centers)

    # to many clusters desired
    assert_raises(ValueError, k_means, X, n_clusters=X.shape[0] + 1)
Пример #3
0
def test_check_symmetric():
    arr_sym = np.array([[0, 1], [1, 2]])
    arr_bad = np.ones(2)
    arr_asym = np.array([[0, 2], [0, 2]])

    test_arrays = {'dense': arr_asym,
                   'dok': sp.dok_matrix(arr_asym),
                   'csr': sp.csr_matrix(arr_asym),
                   'csc': sp.csc_matrix(arr_asym),
                   'coo': sp.coo_matrix(arr_asym),
                   'lil': sp.lil_matrix(arr_asym),
                   'bsr': sp.bsr_matrix(arr_asym)}

    # check error for bad inputs
    assert_raises(ValueError, check_symmetric, arr_bad)

    # check that asymmetric arrays are properly symmetrized
    for arr_format, arr in test_arrays.items():
        # Check for warnings and errors
        assert_warns(UserWarning, check_symmetric, arr)
        assert_raises(ValueError, check_symmetric, arr, raise_exception=True)

        output = check_symmetric(arr, raise_warning=False)
        if sp.issparse(output):
            assert_equal(output.format, arr_format)
            assert_array_equal(output.toarray(), arr_sym)
        else:
            assert_array_equal(output, arr_sym)
Пример #4
0
def test_logistic_regression_path_convergence_fail():
    rng = np.random.RandomState(0)
    X = np.concatenate((rng.randn(100, 2) + [1, 1], rng.randn(100, 2)))
    y = [1] * 100 + [-1] * 100
    Cs = [1e3]
    assert_warns(ConvergenceWarning, logistic_regression_path,
                 X, y, Cs=Cs, tol=0., max_iter=1, random_state=0, verbose=1)
def test_fastica_nowhiten():
    m = [[0, 1], [1, 0]]

    # test for issue #697
    ica = FastICA(n_components=1, whiten=False, random_state=0)
    assert_warns(UserWarning, ica.fit, m)
    assert hasattr(ica, 'mixing_')
Пример #6
0
def test_k_means_function():
    # test calling the k_means function directly
    # catch output
    old_stdout = sys.stdout
    sys.stdout = StringIO()
    try:
        cluster_centers, labels, inertia = k_means(X, n_clusters=n_clusters,
                                                   sample_weight=None,
                                                   verbose=True)
    finally:
        sys.stdout = old_stdout
    centers = cluster_centers
    assert_equal(centers.shape, (n_clusters, n_features))

    labels = labels
    assert_equal(np.unique(labels).shape[0], n_clusters)

    # check that the labels assignment are perfect (up to a permutation)
    assert_equal(v_measure_score(true_labels, labels), 1.0)
    assert_greater(inertia, 0.0)

    # check warning when centers are passed
    assert_warns(RuntimeWarning, k_means, X, n_clusters=n_clusters,
                 sample_weight=None, init=centers)

    # to many clusters desired
    assert_raises(ValueError, k_means, X, n_clusters=X.shape[0] + 1,
                  sample_weight=None)

    # kmeans for algorithm='elkan' raises TypeError on sparse matrix
    assert_raise_message(TypeError, "algorithm='elkan' not supported for "
                         "sparse input X", k_means, X=X_csr, n_clusters=2,
                         sample_weight=None, algorithm="elkan")
Пример #7
0
def test_logistic_regression_convergence_warnings():
    """Test that warnings are raised if model does not converge"""

    X, y = make_classification(n_samples=20, n_features=20)
    clf_lib = LogisticRegression(solver='liblinear', max_iter=2, verbose=1)
    assert_warns(ConvergenceWarning, clf_lib.fit, X, y)
    assert_equal(clf_lib.n_iter_, 2)
Пример #8
0
def test_lasso_lars_vs_lasso_cd_ill_conditioned2():
    # Create an ill-conditioned situation in which the LARS has to go
    # far in the path to converge, and check that LARS and coordinate
    # descent give the same answers
    # Note it used to be the case that Lars had to use the drop for good
    # strategy for this but this is no longer the case with the
    # equality_tolerance checks
    X = [[1e20, 1e20, 0],
         [-1e-32, 0, 0],
         [1, 1, 1]]
    y = [10, 10, 1]
    alpha = .0001

    def objective_function(coef):
        return (1. / (2. * len(X)) * linalg.norm(y - np.dot(X, coef)) ** 2
                + alpha * linalg.norm(coef, 1))

    lars = linear_model.LassoLars(alpha=alpha, normalize=False)
    assert_warns(ConvergenceWarning, lars.fit, X, y)
    lars_coef_ = lars.coef_
    lars_obj = objective_function(lars_coef_)

    coord_descent = linear_model.Lasso(alpha=alpha, tol=1e-4, normalize=False)
    cd_coef_ = coord_descent.fit(X, y).coef_
    cd_obj = objective_function(cd_coef_)

    assert_less(lars_obj, cd_obj * (1. + 1e-8))
def test_grid_search_failing_classifier():
    # GridSearchCV with on_error != 'raise'
    # Ensures that a warning is raised and score reset where appropriate.

    X, y = make_classification(n_samples=20, n_features=10, random_state=0)

    clf = FailingClassifier()

    # refit=False because we only want to check that errors caused by fits
    # to individual folds will be caught and warnings raised instead. If
    # refit was done, then an exception would be raised on refit and not
    # caught by grid_search (expected behavior), and this would cause an
    # error in this test.
    gs = GridSearchCV(clf, [{'parameter': [0, 1, 2]}], scoring='accuracy',
                      refit=False, error_score=0.0)

    assert_warns(FitFailedWarning, gs.fit, X, y)

    # Ensure that grid scores were set to zero as required for those fits
    # that are expected to fail.
    assert all(np.all(this_point.cv_validation_scores == 0.0)
               for this_point in gs.grid_scores_
               if this_point.parameters['parameter'] ==
               FailingClassifier.FAILING_PARAMETER)

    gs = GridSearchCV(clf, [{'parameter': [0, 1, 2]}], scoring='accuracy',
                      refit=False, error_score=float('nan'))
    assert_warns(FitFailedWarning, gs.fit, X, y)
    assert all(np.all(np.isnan(this_point.cv_validation_scores))
               for this_point in gs.grid_scores_
               if this_point.parameters['parameter'] ==
               FailingClassifier.FAILING_PARAMETER)
Пример #10
0
def test_oob_score_regression():
    # Check that oob prediction is a good estimation of the generalization
    # error.
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(boston.data,
                                                        boston.target,
                                                        random_state=rng)

    clf = BaggingRegressor(base_estimator=DecisionTreeRegressor(),
                           n_estimators=50,
                           bootstrap=True,
                           oob_score=True,
                           random_state=rng).fit(X_train, y_train)

    test_score = clf.score(X_test, y_test)

    assert_less(abs(test_score - clf.oob_score_), 0.1)

    # Test with few estimators
    assert_warns(UserWarning,
                 BaggingRegressor(base_estimator=DecisionTreeRegressor(),
                                  n_estimators=1,
                                  bootstrap=True,
                                  oob_score=True,
                                  random_state=rng).fit,
                 X_train,
                 y_train)
Пример #11
0
def test_grid_search_failing_classifier():
    # GridSearchCV with on_error != 'raise'
    # Ensures that a warning is raised and score reset where appropriate.

    X, y = make_classification(n_samples=20, n_features=10, random_state=0)

    clf = FailingClassifier()

    # refit=False because we only want to check that errors caused by fits
    # to individual folds will be caught and warnings raised instead. If
    # refit was done, then an exception would be raised on refit and not
    # caught by grid_search (expected behavior), and this would cause an
    # error in this test.
    gs = GridSearchCV(clf, [{'parameter': [0, 1, 2]}], scoring='accuracy',
                      refit=False, error_score=0.0)
    assert_warns(FitFailedWarning, gs.fit, X, y)
    n_candidates = len(gs.cv_results_['params'])
    # Ensure that grid scores were set to zero as required for those fits
    # that are expected to fail.
    get_cand_scores = lambda i: np.array(list(
        gs.cv_results_['split%d_test_score' % s][i]
        for s in range(gs.n_splits_)))
    assert all((np.all(get_cand_scores(cand_i) == 0.0)
                for cand_i in range(n_candidates)
                if gs.cv_results_['param_parameter'][cand_i] ==
                FailingClassifier.FAILING_PARAMETER))

    gs = GridSearchCV(clf, [{'parameter': [0, 1, 2]}], scoring='accuracy',
                      refit=False, error_score=float('nan'))
    assert_warns(FitFailedWarning, gs.fit, X, y)
    n_candidates = len(gs.cv_results_['params'])
    assert all(np.all(np.isnan(get_cand_scores(cand_i)))
               for cand_i in range(n_candidates)
               if gs.cv_results_['param_parameter'][cand_i] ==
               FailingClassifier.FAILING_PARAMETER)
Пример #12
0
def test_ward_agglomeration():
    """
    Check that we obtain the correct solution in a simplistic case
    """
    rnd = np.random.RandomState(0)
    mask = np.ones([10, 10], dtype=np.bool)
    X = rnd.randn(50, 100)
    connectivity = grid_to_graph(*mask.shape)
    assert_warns(DeprecationWarning, WardAgglomeration)
    with warnings.catch_warnings(record=True) as warning_list:
        warnings.simplefilter("always", DeprecationWarning)
        if hasattr(np, 'VisibleDeprecationWarning'):
            # Let's not catch the numpy internal DeprecationWarnings
            warnings.simplefilter('ignore', np.VisibleDeprecationWarning)
        ward = WardAgglomeration(n_clusters=5, connectivity=connectivity)
        ward.fit(X)
        assert_equal(len(warning_list), 1)
    agglo = FeatureAgglomeration(n_clusters=5, connectivity=connectivity)
    agglo.fit(X)
    assert_array_equal(agglo.labels_, ward.labels_)
    assert_true(np.size(np.unique(agglo.labels_)) == 5)

    X_red = agglo.transform(X)
    assert_true(X_red.shape[1] == 5)
    X_full = agglo.inverse_transform(X_red)
    assert_true(np.unique(X_full[0]).size == 5)
    assert_array_almost_equal(agglo.transform(X_full), X_red)

    # Check that fitting with no samples raises a ValueError
    assert_raises(ValueError, agglo.fit, X[:0])
Пример #13
0
def test_oob_score_classification():
    # Check that oob prediction is a good estimation of the generalization
    # error.
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(iris.data,
                                                        iris.target,
                                                        random_state=rng)

    for base_estimator in [DecisionTreeClassifier(), SVC()]:
        clf = BaggingClassifier(base_estimator=base_estimator,
                                n_estimators=100,
                                bootstrap=True,
                                oob_score=True,
                                random_state=rng).fit(X_train, y_train)

        test_score = clf.score(X_test, y_test)

        assert_less(abs(test_score - clf.oob_score_), 0.1)

        # Test with few estimators
        assert_warns(UserWarning,
                     BaggingClassifier(base_estimator=base_estimator,
                                       n_estimators=1,
                                       bootstrap=True,
                                       oob_score=True,
                                       random_state=rng).fit,
                     X_train,
                     y_train)
Пример #14
0
def test_check_array_accept_sparse_type_exception():
    X = [[1, 2], [3, 4]]
    X_csr = sp.csr_matrix(X)
    invalid_type = SVR()

    msg = ("A sparse matrix was passed, but dense data is required. "
           "Use X.toarray() to convert to a dense numpy array.")
    assert_raise_message(TypeError, msg,
                         check_array, X_csr, accept_sparse=False)
    with pytest.warns(DeprecationWarning):
        assert_raise_message(TypeError, msg,
                             check_array, X_csr, accept_sparse=None)

    msg = ("Parameter 'accept_sparse' should be a string, "
           "boolean or list of strings. You provided 'accept_sparse={}'.")
    assert_raise_message(ValueError, msg.format(invalid_type),
                         check_array, X_csr, accept_sparse=invalid_type)

    msg = ("When providing 'accept_sparse' as a tuple or list, "
           "it must contain at least one string value.")
    assert_raise_message(ValueError, msg.format([]),
                         check_array, X_csr, accept_sparse=[])
    assert_raise_message(ValueError, msg.format(()),
                         check_array, X_csr, accept_sparse=())

    assert_raise_message(TypeError, "SVR",
                         check_array, X_csr, accept_sparse=[invalid_type])

    # Test deprecation of 'None'
    assert_warns(DeprecationWarning, check_array, X, accept_sparse=None)
Пример #15
0
def test_check_dataframe_warns_on_dtype():
    # Check that warn_on_dtype also works for DataFrames.
    # https://github.com/scikit-learn/scikit-learn/issues/10948
    pd = importorskip("pandas")

    df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], dtype=object)
    assert_warns_message(DataConversionWarning,
                         "Data with input dtype object were all converted to "
                         "float64.",
                         check_array, df, dtype=np.float64, warn_on_dtype=True)
    assert_warns(DataConversionWarning, check_array, df,
                 dtype='numeric', warn_on_dtype=True)
    assert_no_warnings(check_array, df, dtype='object', warn_on_dtype=True)

    # Also check that it raises a warning for mixed dtypes in a DataFrame.
    df_mixed = pd.DataFrame([['1', 2, 3], ['4', 5, 6]])
    assert_warns(DataConversionWarning, check_array, df_mixed,
                 dtype=np.float64, warn_on_dtype=True)
    assert_warns(DataConversionWarning, check_array, df_mixed,
                 dtype='numeric', warn_on_dtype=True)
    assert_warns(DataConversionWarning, check_array, df_mixed,
                 dtype=object, warn_on_dtype=True)

    # Even with numerical dtypes, a conversion can be made because dtypes are
    # uniformized throughout the array.
    df_mixed_numeric = pd.DataFrame([[1., 2, 3], [4., 5, 6]])
    assert_warns(DataConversionWarning, check_array, df_mixed_numeric,
                 dtype='numeric', warn_on_dtype=True)
    assert_no_warnings(check_array, df_mixed_numeric.astype(int),
                       dtype='numeric', warn_on_dtype=True)
Пример #16
0
def test_grid_search_score_method():
    X, y = make_classification(n_samples=100, n_classes=2, flip_y=.2,
                               random_state=0)
    clf = LinearSVC(random_state=0)
    grid = {'C': [.1]}

    search_no_scoring = GridSearchCV(clf, grid, scoring=None).fit(X, y)
    search_accuracy = GridSearchCV(clf, grid, scoring='accuracy').fit(X, y)
    search_no_score_method_auc = GridSearchCV(LinearSVCNoScore(), grid,
                                              scoring='roc_auc').fit(X, y)
    search_auc = GridSearchCV(clf, grid, scoring='roc_auc').fit(X, y)

    # Check warning only occurs in situation where behavior changed:
    # estimator requires score method to compete with scoring parameter
    score_no_scoring = assert_no_warnings(search_no_scoring.score, X, y)
    score_accuracy = assert_warns(ChangedBehaviorWarning,
                                  search_accuracy.score, X, y)
    score_no_score_auc = assert_no_warnings(search_no_score_method_auc.score,
                                            X, y)
    score_auc = assert_warns(ChangedBehaviorWarning,
                             search_auc.score, X, y)
    # ensure the test is sane
    assert_true(score_auc < 1.0)
    assert_true(score_accuracy < 1.0)
    assert_not_equal(score_auc, score_accuracy)

    assert_almost_equal(score_accuracy, score_no_scoring)
    assert_almost_equal(score_auc, score_no_score_auc)
Пример #17
0
def test_convergence_fail():
    d = load_linnerud()
    X = d.data
    Y = d.target
    pls_bynipals = pls_.PLSCanonical(n_components=X.shape[1],
                                     max_iter=2, tol=1e-10)
    assert_warns(ConvergenceWarning, pls_bynipals.fit, X, Y)
Пример #18
0
def test_identical_regressors():
    newX = X.copy()
    newX[:, 1] = newX[:, 0]
    gamma = np.zeros(n_features)
    gamma[0] = gamma[1] = 1.
    newy = np.dot(newX, gamma)
    assert_warns(RuntimeWarning, orthogonal_mp, newX, newy, 2)
Пример #19
0
def test_prf_average_compat():
    # Ensure warning if f1_score et al.'s average is implicit for multiclass
    y_true = [1, 2, 3, 3]
    y_pred = [1, 2, 3, 1]
    y_true_bin = [0, 1, 1]
    y_pred_bin = [0, 1, 0]

    for metric in [precision_score, recall_score, f1_score,
                   partial(fbeta_score, beta=2)]:
        score = assert_warns(DeprecationWarning, metric, y_true, y_pred)
        score_weighted = assert_no_warnings(metric, y_true, y_pred,
                                            average='weighted')
        assert_equal(score, score_weighted,
                     'average does not act like "weighted" by default')

        # check binary passes without warning
        assert_no_warnings(metric, y_true_bin, y_pred_bin)

        # but binary with pos_label=None should behave like multiclass
        score = assert_warns(DeprecationWarning, metric,
                             y_true_bin, y_pred_bin, pos_label=None)
        score_weighted = assert_no_warnings(metric, y_true_bin, y_pred_bin,
                                            pos_label=None, average='weighted')
        assert_equal(score, score_weighted,
                     'average does not act like "weighted" by default with '
                     'binary data and pos_label=None')
Пример #20
0
def test_sparse_randomized_pca_inverse():
    """Test that RandomizedPCA is inversible on sparse data"""
    rng = np.random.RandomState(0)
    n, p = 50, 3
    X = rng.randn(n, p)  # spherical data
    X[:, 1] *= .00001  # make middle component relatively small
    # no large means because the sparse version of randomized pca does not do
    # centering to avoid breaking the sparsity
    X = csr_matrix(X)

    # same check that we can find the original data from the transformed signal
    # (since the data is almost of rank n_components)
    pca = RandomizedPCA(n_components=2, random_state=0)
    assert_warns(DeprecationWarning, pca.fit, X)
    Y = pca.transform(X)

    Y_inverse = pca.inverse_transform(Y)
    assert_almost_equal(X.todense(), Y_inverse, decimal=2)

    # same as above with whitening (approximate reconstruction)
    pca = assert_warns(DeprecationWarning, RandomizedPCA(n_components=2,
                       whiten=True, random_state=0).fit, X)
    Y = pca.transform(X)
    Y_inverse = pca.inverse_transform(Y)
    relative_max_delta = (np.abs(X.todense() - Y_inverse)
                          / np.abs(X).mean()).max()
    # XXX: this does not seam to work as expected:
    assert_almost_equal(relative_max_delta, 0.91, decimal=2)
def test_warning_n_components_greater_than_n_features():
    n_features = 20
    data, _ = make_sparse_random_data(5, n_features, int(n_features / 4))

    for RandomProjection in all_RandomProjection:
        assert_warns(DataDimensionalityWarning,
                     RandomProjection(n_components=n_features + 1).fit, data)
Пример #22
0
def test_label_binarizer_multilabel():
    lb = LabelBinarizer()

    # test input as lists of tuples
    inp = [(2, 3), (1,), (1, 2)]
    indicator_mat = np.array([[0, 1, 1],
                              [1, 0, 0],
                              [1, 1, 0]])
    got = assert_warns(DeprecationWarning, lb.fit_transform, inp)
    assert_true(lb.multilabel_)
    assert_array_equal(indicator_mat, got)
    assert_equal(lb.inverse_transform(got), inp)

    # test input as label indicator matrix
    lb.fit(indicator_mat)
    assert_array_equal(indicator_mat,
                       lb.inverse_transform(indicator_mat))

    # regression test for the two-class multilabel case
    lb = LabelBinarizer()
    inp = [[1, 0], [0], [1], [0, 1]]
    expected = np.array([[1, 1],
                         [1, 0],
                         [0, 1],
                         [1, 1]])
    got = assert_warns(DeprecationWarning, lb.fit_transform, inp)
    assert_true(lb.multilabel_)
    assert_array_equal(expected, got)
    assert_equal([set(x) for x in lb.inverse_transform(got)],
                 [set(x) for x in inp])
Пример #23
0
def test_unique_labels():
    # Empty iterable
    assert_raises(ValueError, unique_labels)

    # Multiclass problem
    assert_array_equal(unique_labels(xrange(10)), np.arange(10))
    assert_array_equal(unique_labels(np.arange(10)), np.arange(10))
    assert_array_equal(unique_labels([4, 0, 2]), np.array([0, 2, 4]))

    # Multilabels
    assert_array_equal(
        assert_warns(DeprecationWarning, unique_labels, [(0, 1, 2), (0,), tuple(), (2, 1)]), np.arange(3)
    )
    assert_array_equal(assert_warns(DeprecationWarning, unique_labels, [[0, 1, 2], [0], list(), [2, 1]]), np.arange(3))

    assert_array_equal(unique_labels(np.array([[0, 0, 1], [1, 0, 1], [0, 0, 0]])), np.arange(3))

    assert_array_equal(unique_labels(np.array([[0, 0, 1], [0, 0, 0]])), np.arange(3))

    # Several arrays passed
    assert_array_equal(unique_labels([4, 0, 2], xrange(5)), np.arange(5))
    assert_array_equal(unique_labels((0, 1, 2), (0,), (2, 1)), np.arange(3))

    # Border line case with binary indicator matrix
    assert_raises(ValueError, unique_labels, [4, 0, 2], np.ones((5, 5)))
    assert_raises(ValueError, unique_labels, np.ones((5, 4)), np.ones((5, 5)))
    assert_array_equal(unique_labels(np.ones((4, 5)), np.ones((5, 5))), np.arange(5))

    # Some tests with strings input
    assert_array_equal(unique_labels(["a", "b", "c"], ["d"]), ["a", "b", "c", "d"])

    assert_array_equal(
        assert_warns(DeprecationWarning, unique_labels, [["a", "b"], ["c"]], [["d"]]), ["a", "b", "c", "d"]
    )
Пример #24
0
def test_compute_class_weight_auto_negative():
    # Test compute_class_weight when labels are negative
    # Test with balanced class labels.
    classes = np.array([-2, -1, 0])
    y = np.asarray([-1, -1, 0, 0, -2, -2])
    cw = assert_warns(DeprecationWarning, compute_class_weight, "auto",
                      classes, y)
    assert_almost_equal(cw.sum(), classes.shape)
    assert_equal(len(cw), len(classes))
    assert_array_almost_equal(cw, np.array([1., 1., 1.]))

    cw = compute_class_weight("balanced", classes, y)
    assert_equal(len(cw), len(classes))
    assert_array_almost_equal(cw, np.array([1., 1., 1.]))

    # Test with unbalanced class labels.
    y = np.asarray([-1, 0, 0, -2, -2, -2])
    cw = assert_warns(DeprecationWarning, compute_class_weight, "auto",
                      classes, y)
    assert_almost_equal(cw.sum(), classes.shape)
    assert_equal(len(cw), len(classes))
    assert_array_almost_equal(cw, np.array([0.545, 1.636, 0.818]), decimal=3)

    cw = compute_class_weight("balanced", classes, y)
    assert_equal(len(cw), len(classes))
    class_counts = np.bincount(y + 2)
    assert_almost_equal(np.dot(cw, class_counts), y.shape[0])
    assert_array_almost_equal(cw, [2. / 3, 2., 1.])
Пример #25
0
def test_oob_score_classification():
    # Check that oob prediction is a good estimation of the generalization
    # error.
    X, y = make_imbalance(iris.data, iris.target, ratio={0: 20, 1: 25, 2: 50},
                          random_state=0)
    X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                        random_state=0)

    for base_estimator in [DecisionTreeClassifier(), SVC()]:
        clf = BalancedBaggingClassifier(
            base_estimator=base_estimator,
            n_estimators=100,
            bootstrap=True,
            oob_score=True,
            random_state=0).fit(X_train, y_train)

        test_score = clf.score(X_test, y_test)

        assert abs(test_score - clf.oob_score_) < 0.1

        # Test with few estimators
        assert_warns(UserWarning,
                     BalancedBaggingClassifier(
                         base_estimator=base_estimator,
                         n_estimators=1,
                         bootstrap=True,
                         oob_score=True,
                         random_state=0).fit,
                     X_train,
                     y_train)
Пример #26
0
def test_timeout():
    sp = svm.SVC(C=1, kernel=lambda x, y: x * y.T, probability=True,
                 random_state=0, max_iter=1)
    with warnings.catch_warnings(record=True):
        warnings.simplefilter("always")

        assert_warns(ConvergenceWarning, sp.fit, X_sp, Y)
Пример #27
0
def test_label_binarizer():
    lb = LabelBinarizer()

    # one-class case defaults to negative label
    inp = ["pos", "pos", "pos", "pos"]
    expected = np.array([[0, 0, 0, 0]]).T
    got = lb.fit_transform(inp)
    assert_false(assert_warns(DeprecationWarning, getattr, lb, "multilabel_"))
    assert_array_equal(lb.classes_, ["pos"])
    assert_array_equal(expected, got)
    assert_array_equal(lb.inverse_transform(got), inp)

    # two-class case
    inp = ["neg", "pos", "pos", "neg"]
    expected = np.array([[0, 1, 1, 0]]).T
    got = lb.fit_transform(inp)
    assert_false(assert_warns(DeprecationWarning, getattr, lb, "multilabel_"))
    assert_array_equal(lb.classes_, ["neg", "pos"])
    assert_array_equal(expected, got)

    to_invert = np.array([[1, 0], [0, 1], [0, 1], [1, 0]])
    assert_array_equal(lb.inverse_transform(to_invert), inp)

    # multi-class case
    inp = ["spam", "ham", "eggs", "ham", "0"]
    expected = np.array([[0, 0, 0, 1], [0, 0, 1, 0], [0, 1, 0, 0], [0, 0, 1, 0], [1, 0, 0, 0]])
    got = lb.fit_transform(inp)
    assert_array_equal(lb.classes_, ["0", "eggs", "ham", "spam"])
    assert_false(assert_warns(DeprecationWarning, getattr, lb, "multilabel_"))
    assert_array_equal(expected, got)
    assert_array_equal(lb.inverse_transform(got), inp)
Пример #28
0
def test_lars_drop_for_good():
    # Create an ill-conditioned situation in which the LARS has to go
    # far in the path to converge, and check that LARS and coordinate
    # descent give the same answers
    X = [[1e20,  1e20,  0],
         [-1e-32,  0,  0],
         [1,       1,  1]]
    y = [10, 10, 1]
    alpha = .0001

    def objective_function(coef):
        return (1. / (2. * len(X)) * linalg.norm(y - np.dot(X, coef)) ** 2
                + alpha * linalg.norm(coef, 1))

    lars = linear_model.LassoLars(alpha=alpha, normalize=False)
    assert_warns(ConvergenceWarning, lars.fit, X, y)
    lars_coef_ = lars.coef_
    lars_obj = objective_function(lars_coef_)

    coord_descent = linear_model.Lasso(alpha=alpha, tol=1e-10, normalize=False)
    with ignore_warnings():
        cd_coef_ = coord_descent.fit(X, y).coef_
    cd_obj = objective_function(cd_coef_)

    assert_less(lars_obj, cd_obj * (1. + 1e-8))
Пример #29
0
def test_ward_agglomeration():
    """
    Check that we obtain the correct solution in a simplistic case
    """
    rnd = np.random.RandomState(0)
    mask = np.ones([10, 10], dtype=np.bool)
    X = rnd.randn(50, 100)
    connectivity = grid_to_graph(*mask.shape)
    assert_warns(DeprecationWarning, WardAgglomeration)

    with ignore_warnings():
        ward = WardAgglomeration(n_clusters=5, connectivity=connectivity)
        ward.fit(X)
    agglo = FeatureAgglomeration(n_clusters=5, connectivity=connectivity)
    agglo.fit(X)
    assert_array_equal(agglo.labels_, ward.labels_)
    assert_true(np.size(np.unique(agglo.labels_)) == 5)

    X_red = agglo.transform(X)
    assert_true(X_red.shape[1] == 5)
    X_full = agglo.inverse_transform(X_red)
    assert_true(np.unique(X_full[0]).size == 5)
    assert_array_almost_equal(agglo.transform(X_full), X_red)

    # Check that fitting with no samples raises a ValueError
    assert_raises(ValueError, agglo.fit, X[:0])
Пример #30
0
def test_ridge_regression_convergence_fail():
    rng = np.random.RandomState(0)
    y = rng.randn(5)
    X = rng.randn(5, 10)

    assert_warns(ConvergenceWarning, ridge_regression,
                 X, y, alpha=1.0, solver="sparse_cg",
                 tol=0., max_iter=None, verbose=1)
Пример #31
0
def test_roc_curve_toydata():
    # Binary classification
    y_true = [0, 1]
    y_score = [0, 1]
    tpr, fpr, _ = roc_curve(y_true, y_score)
    roc_auc = roc_auc_score(y_true, y_score)
    assert_array_almost_equal(tpr, [0, 0, 1])
    assert_array_almost_equal(fpr, [0, 1, 1])
    assert_almost_equal(roc_auc, 1.)

    y_true = [0, 1]
    y_score = [1, 0]
    tpr, fpr, _ = roc_curve(y_true, y_score)
    roc_auc = roc_auc_score(y_true, y_score)
    assert_array_almost_equal(tpr, [0, 1, 1])
    assert_array_almost_equal(fpr, [0, 0, 1])
    assert_almost_equal(roc_auc, 0.)

    y_true = [1, 0]
    y_score = [1, 1]
    tpr, fpr, _ = roc_curve(y_true, y_score)
    roc_auc = roc_auc_score(y_true, y_score)
    assert_array_almost_equal(tpr, [0, 1])
    assert_array_almost_equal(fpr, [0, 1])
    assert_almost_equal(roc_auc, 0.5)

    y_true = [1, 0]
    y_score = [1, 0]
    tpr, fpr, _ = roc_curve(y_true, y_score)
    roc_auc = roc_auc_score(y_true, y_score)
    assert_array_almost_equal(tpr, [0, 0, 1])
    assert_array_almost_equal(fpr, [0, 1, 1])
    assert_almost_equal(roc_auc, 1.)

    y_true = [1, 0]
    y_score = [0.5, 0.5]
    tpr, fpr, _ = roc_curve(y_true, y_score)
    roc_auc = roc_auc_score(y_true, y_score)
    assert_array_almost_equal(tpr, [0, 1])
    assert_array_almost_equal(fpr, [0, 1])
    assert_almost_equal(roc_auc, .5)

    y_true = [0, 0]
    y_score = [0.25, 0.75]
    # assert UndefinedMetricWarning because of no positive sample in y_true
    tpr, fpr, _ = assert_warns(UndefinedMetricWarning, roc_curve, y_true,
                               y_score)
    assert_raises(ValueError, roc_auc_score, y_true, y_score)
    assert_array_almost_equal(tpr, [0., 0.5, 1.])
    assert_array_almost_equal(fpr, [np.nan, np.nan, np.nan])

    y_true = [1, 1]
    y_score = [0.25, 0.75]
    # assert UndefinedMetricWarning because of no negative sample in y_true
    tpr, fpr, _ = assert_warns(UndefinedMetricWarning, roc_curve, y_true,
                               y_score)
    assert_raises(ValueError, roc_auc_score, y_true, y_score)
    assert_array_almost_equal(tpr, [np.nan, np.nan, np.nan])
    assert_array_almost_equal(fpr, [0., 0.5, 1.])

    # Multi-label classification task
    y_true = np.array([[0, 1], [0, 1]])
    y_score = np.array([[0, 1], [0, 1]])
    assert_raises(ValueError, roc_auc_score, y_true, y_score, average="macro")
    assert_raises(ValueError,
                  roc_auc_score,
                  y_true,
                  y_score,
                  average="weighted")
    assert_almost_equal(roc_auc_score(y_true, y_score, average="samples"), 1.)
    assert_almost_equal(roc_auc_score(y_true, y_score, average="micro"), 1.)

    y_true = np.array([[0, 1], [0, 1]])
    y_score = np.array([[0, 1], [1, 0]])
    assert_raises(ValueError, roc_auc_score, y_true, y_score, average="macro")
    assert_raises(ValueError,
                  roc_auc_score,
                  y_true,
                  y_score,
                  average="weighted")
    assert_almost_equal(roc_auc_score(y_true, y_score, average="samples"), 0.5)
    assert_almost_equal(roc_auc_score(y_true, y_score, average="micro"), 0.5)

    y_true = np.array([[1, 0], [0, 1]])
    y_score = np.array([[0, 1], [1, 0]])
    assert_almost_equal(roc_auc_score(y_true, y_score, average="macro"), 0)
    assert_almost_equal(roc_auc_score(y_true, y_score, average="weighted"), 0)
    assert_almost_equal(roc_auc_score(y_true, y_score, average="samples"), 0)
    assert_almost_equal(roc_auc_score(y_true, y_score, average="micro"), 0)

    y_true = np.array([[1, 0], [0, 1]])
    y_score = np.array([[0.5, 0.5], [0.5, 0.5]])
    assert_almost_equal(roc_auc_score(y_true, y_score, average="macro"), .5)
    assert_almost_equal(roc_auc_score(y_true, y_score, average="weighted"), .5)
    assert_almost_equal(roc_auc_score(y_true, y_score, average="samples"), .5)
    assert_almost_equal(roc_auc_score(y_true, y_score, average="micro"), .5)
Пример #32
0
def test_minibatch_k_means_init_multiple_runs_with_explicit_centers():
    mb_k_means = MiniBatchKMeans(init=centers.copy(),
                                 n_clusters=n_clusters,
                                 random_state=42,
                                 n_init=10)
    assert_warns(RuntimeWarning, mb_k_means.fit, X)
Пример #33
0
def test_check_estimator():
    # tests that the estimator actually fails on "bad" estimators.
    # not a complete test of all checks, which are very extensive.

    # check that we have a set_params and can clone
    msg = "it does not implement a 'get_params' methods"
    assert_raises_regex(TypeError, msg, check_estimator, object)
    assert_raises_regex(TypeError, msg, check_estimator, object())
    # check that values returned by get_params match set_params
    msg = "get_params result does not match what was passed to set_params"
    assert_raises_regex(AssertionError, msg, check_estimator,
                        ModifiesValueInsteadOfRaisingError())
    assert_warns(UserWarning, check_estimator, RaisesErrorInSetParams())
    assert_raises_regex(AssertionError, msg, check_estimator,
                        ModifiesAnotherValue())
    # check that we have a fit method
    msg = "object has no attribute 'fit'"
    assert_raises_regex(AttributeError, msg, check_estimator, BaseEstimator)
    assert_raises_regex(AttributeError, msg, check_estimator, BaseEstimator())
    # check that fit does input validation
    msg = "TypeError not raised"
    assert_raises_regex(AssertionError, msg, check_estimator,
                        BaseBadClassifier)
    assert_raises_regex(AssertionError, msg, check_estimator,
                        BaseBadClassifier())
    # check that sample_weights in fit accepts pandas.Series type
    try:
        from pandas import Series  # noqa
        msg = ("Estimator NoSampleWeightPandasSeriesType raises error if "
               "'sample_weight' parameter is of type pandas.Series")
        assert_raises_regex(ValueError, msg, check_estimator,
                            NoSampleWeightPandasSeriesType)
    except ImportError:
        pass
    # check that predict does input validation (doesn't accept dicts in input)
    msg = "Estimator doesn't check for NaN and inf in predict"
    assert_raises_regex(AssertionError, msg, check_estimator, NoCheckinPredict)
    assert_raises_regex(AssertionError, msg, check_estimator,
                        NoCheckinPredict())
    # check that estimator state does not change
    # at transform/predict/predict_proba time
    msg = 'Estimator changes __dict__ during predict'
    assert_raises_regex(AssertionError, msg, check_estimator, ChangesDict)
    # check that `fit` only changes attribures that
    # are private (start with an _ or end with a _).
    msg = ('Estimator ChangesWrongAttribute should not change or mutate  '
           'the parameter wrong_attribute from 0 to 1 during fit.')
    assert_raises_regex(AssertionError, msg, check_estimator,
                        ChangesWrongAttribute)
    check_estimator(ChangesUnderscoreAttribute)
    # check that `fit` doesn't add any public attribute
    msg = (r'Estimator adds public attribute\(s\) during the fit method.'
           ' Estimators are only allowed to add private attributes'
           ' either started with _ or ended'
           ' with _ but wrong_attribute added')
    assert_raises_regex(AssertionError, msg, check_estimator,
                        SetsWrongAttribute)
    # check for invariant method
    name = NotInvariantPredict.__name__
    method = 'predict'
    msg = ("{method} of {name} is not invariant when applied "
           "to a subset.").format(method=method, name=name)
    assert_raises_regex(AssertionError, msg, check_estimator,
                        NotInvariantPredict)
    # check for sparse matrix input handling
    name = NoSparseClassifier.__name__
    msg = "Estimator %s doesn't seem to fail gracefully on sparse data" % name
    # the check for sparse input handling prints to the stdout,
    # instead of raising an error, so as not to remove the original traceback.
    # that means we need to jump through some hoops to catch it.
    old_stdout = sys.stdout
    string_buffer = StringIO()
    sys.stdout = string_buffer
    try:
        check_estimator(NoSparseClassifier)
    except:
        pass
    finally:
        sys.stdout = old_stdout
    assert_true(msg in string_buffer.getvalue())

    # Large indices test on bad estimator
    msg = ('Estimator LargeSparseNotSupportedClassifier doesn\'t seem to '
           r'support \S{3}_64 matrix, and is not failing gracefully.*')
    # only supported by scipy version more than 0.14.0
    if LARGE_SPARSE_SUPPORTED:
        assert_raises_regex(AssertionError, msg, check_estimator,
                            LargeSparseNotSupportedClassifier)

    # non-regression test for estimators transforming to sparse data
    check_estimator(SparseTransformer())

    # doesn't error on actual estimator
    check_estimator(AdaBoostClassifier)
    check_estimator(AdaBoostClassifier())
    check_estimator(MultiTaskElasticNet)
    check_estimator(MultiTaskElasticNet())
Пример #34
0
def test_minibatch_init_with_large_k():
    mb_k_means = MiniBatchKMeans(init='k-means++', init_size=10, n_clusters=20)
    # Check that a warning is raised, as the number clusters is larger
    # than the init_size
    assert_warns(RuntimeWarning, mb_k_means.fit, X)
Пример #35
0
def test_data_format():
    X = [1.3, 1.1, 0.9, 1.4, 1.5, 3.2]
    clf = loop.LocalOutlierProbability(X, n_neighbors=3)
    assert_warns(UserWarning, clf.fit)
Пример #36
0
def test_extent():
    X = np.array([[1, 1], [1, 0]])
    clf = loop.LocalOutlierProbability(X, n_neighbors=2, extent=4)
    assert_warns(UserWarning, clf.fit)
def test_lasso_alpha_warning():
    X = [[-1], [0], [1]]
    Y = [-1, 0, 1]  # just a straight line

    clf = Lasso(alpha=0)
    assert_warns(UserWarning, clf.fit, X, Y)
Пример #38
0
def test_f_classif_constant_feature():
    # Test that f_classif warns if a feature is constant throughout.

    X, y = make_classification(n_samples=10, n_features=5)
    X[:, 0] = 2.0
    assert_warns(UserWarning, f_classif, X, y)
Пример #39
0
def test_check_array_dtype_warning():
    X_int_list = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
    X_float64 = np.asarray(X_int_list, dtype=np.float64)
    X_float32 = np.asarray(X_int_list, dtype=np.float32)
    X_int64 = np.asarray(X_int_list, dtype=np.int64)
    X_csr_float64 = sp.csr_matrix(X_float64)
    X_csr_float32 = sp.csr_matrix(X_float32)
    X_csc_float32 = sp.csc_matrix(X_float32)
    X_csc_int32 = sp.csc_matrix(X_int64, dtype=np.int32)
    y = [0, 0, 1]
    integer_data = [X_int64, X_csc_int32]
    float64_data = [X_float64, X_csr_float64]
    float32_data = [X_float32, X_csr_float32, X_csc_float32]
    for X in integer_data:
        X_checked = assert_no_warnings(check_array, X, dtype=np.float64,
                                       accept_sparse=True)
        assert_equal(X_checked.dtype, np.float64)

        X_checked = assert_warns(DataConversionWarning, check_array, X,
                                 dtype=np.float64,
                                 accept_sparse=True, warn_on_dtype=True)
        assert_equal(X_checked.dtype, np.float64)

        # Check that the warning message includes the name of the Estimator
        X_checked = assert_warns_message(DataConversionWarning,
                                         'SomeEstimator',
                                         check_array, X,
                                         dtype=[np.float64, np.float32],
                                         accept_sparse=True,
                                         warn_on_dtype=True,
                                         estimator='SomeEstimator')
        assert_equal(X_checked.dtype, np.float64)

        X_checked, y_checked = assert_warns_message(
            DataConversionWarning, 'KNeighborsClassifier',
            check_X_y, X, y, dtype=np.float64, accept_sparse=True,
            warn_on_dtype=True, estimator=KNeighborsClassifier())

        assert_equal(X_checked.dtype, np.float64)

    for X in float64_data:
        X_checked = assert_no_warnings(check_array, X, dtype=np.float64,
                                       accept_sparse=True, warn_on_dtype=True)
        assert_equal(X_checked.dtype, np.float64)
        X_checked = assert_no_warnings(check_array, X, dtype=np.float64,
                                       accept_sparse=True, warn_on_dtype=False)
        assert_equal(X_checked.dtype, np.float64)

    for X in float32_data:
        X_checked = assert_no_warnings(check_array, X,
                                       dtype=[np.float64, np.float32],
                                       accept_sparse=True)
        assert_equal(X_checked.dtype, np.float32)
        assert X_checked is X

        X_checked = assert_no_warnings(check_array, X,
                                       dtype=[np.float64, np.float32],
                                       accept_sparse=['csr', 'dok'],
                                       copy=True)
        assert_equal(X_checked.dtype, np.float32)
        assert X_checked is not X

    X_checked = assert_no_warnings(check_array, X_csc_float32,
                                   dtype=[np.float64, np.float32],
                                   accept_sparse=['csr', 'dok'],
                                   copy=False)
    assert_equal(X_checked.dtype, np.float32)
    assert X_checked is not X_csc_float32
    assert_equal(X_checked.format, 'csr')
Пример #40
0
def test_alpha():
    # Setting alpha=0 should not output nan results when p(x_i|y_j)=0 is a case
    X = np.array([[1, 0], [1, 1]])
    y = np.array([0, 1])
    nb = BernoulliNB(alpha=0.)
    assert_warns(UserWarning, nb.partial_fit, X, y, classes=[0, 1])
    assert_warns(UserWarning, nb.fit, X, y)
    prob = np.array([[1, 0], [0, 1]])
    assert_array_almost_equal(nb.predict_proba(X), prob)

    nb = MultinomialNB(alpha=0.)
    assert_warns(UserWarning, nb.partial_fit, X, y, classes=[0, 1])
    assert_warns(UserWarning, nb.fit, X, y)
    prob = np.array([[2. / 3, 1. / 3], [0, 1]])
    assert_array_almost_equal(nb.predict_proba(X), prob)

    # Test sparse X
    X = scipy.sparse.csr_matrix(X)
    nb = BernoulliNB(alpha=0.)
    assert_warns(UserWarning, nb.fit, X, y)
    prob = np.array([[1, 0], [0, 1]])
    assert_array_almost_equal(nb.predict_proba(X), prob)

    nb = MultinomialNB(alpha=0.)
    assert_warns(UserWarning, nb.fit, X, y)
    prob = np.array([[2. / 3, 1. / 3], [0, 1]])
    assert_array_almost_equal(nb.predict_proba(X), prob)

    # Test for alpha < 0
    X = np.array([[1, 0], [1, 1]])
    y = np.array([0, 1])
    expected_msg = ('Smoothing parameter alpha = -1.0e-01. '
                    'alpha should be > 0.')
    b_nb = BernoulliNB(alpha=-0.1)
    m_nb = MultinomialNB(alpha=-0.1)
    assert_raise_message(ValueError, expected_msg, b_nb.fit, X, y)
    assert_raise_message(ValueError, expected_msg, m_nb.fit, X, y)

    b_nb = BernoulliNB(alpha=-0.1)
    m_nb = MultinomialNB(alpha=-0.1)
    assert_raise_message(ValueError,
                         expected_msg,
                         b_nb.partial_fit,
                         X,
                         y,
                         classes=[0, 1])
    assert_raise_message(ValueError,
                         expected_msg,
                         m_nb.partial_fit,
                         X,
                         y,
                         classes=[0, 1])
Пример #41
0
def test_ledoit_wolf():
    # Tests LedoitWolf module on a simple dataset.
    # test shrinkage coeff on a simple data set
    X_centered = X - X.mean(axis=0)
    lw = LedoitWolf(assume_centered=True)
    lw.fit(X_centered)
    shrinkage_ = lw.shrinkage_
    score_ = lw.score(X_centered)
    assert_almost_equal(
        ledoit_wolf_shrinkage(X_centered, assume_centered=True), shrinkage_)
    assert_almost_equal(
        ledoit_wolf_shrinkage(X_centered, assume_centered=True, block_size=6),
        shrinkage_)
    # compare shrunk covariance obtained from data and from MLE estimate
    lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_centered,
                                                         assume_centered=True)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
    # compare estimates given by LW and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=lw.shrinkage_, assume_centered=True)
    scov.fit(X_centered)
    assert_array_almost_equal(scov.covariance_, lw.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    lw = LedoitWolf(assume_centered=True)
    lw.fit(X_1d)
    lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_1d,
                                                         assume_centered=True)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
    assert_array_almost_equal((X_1d**2).sum() / n_samples, lw.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    lw = LedoitWolf(store_precision=False, assume_centered=True)
    lw.fit(X_centered)
    assert_almost_equal(lw.score(X_centered), score_, 4)
    assert (lw.precision_ is None)

    # Same tests without assuming centered data
    # test shrinkage coeff on a simple data set
    lw = LedoitWolf()
    lw.fit(X)
    assert_almost_equal(lw.shrinkage_, shrinkage_, 4)
    assert_almost_equal(lw.shrinkage_, ledoit_wolf_shrinkage(X))
    assert_almost_equal(lw.shrinkage_, ledoit_wolf(X)[1])
    assert_almost_equal(lw.score(X), score_, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
    # compare estimates given by LW and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=lw.shrinkage_)
    scov.fit(X)
    assert_array_almost_equal(scov.covariance_, lw.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    lw = LedoitWolf()
    lw.fit(X_1d)
    lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_1d)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
    assert_array_almost_equal(empirical_covariance(X_1d), lw.covariance_, 4)

    # test with one sample
    # FIXME I don't know what this test does
    X_1sample = np.arange(5)
    lw = LedoitWolf()
    assert_warns(UserWarning, lw.fit, X_1sample)
    assert_array_almost_equal(lw.covariance_,
                              np.zeros(shape=(5, 5), dtype=np.float64))

    # test shrinkage coeff on a simple data set (without saving precision)
    lw = LedoitWolf(store_precision=False)
    lw.fit(X)
    assert_almost_equal(lw.score(X), score_, 4)
    assert (lw.precision_ is None)
Пример #42
0
def test_oas():
    # Tests OAS module on a simple dataset.
    # test shrinkage coeff on a simple data set
    X_centered = X - X.mean(axis=0)
    oa = OAS(assume_centered=True)
    oa.fit(X_centered)
    shrinkage_ = oa.shrinkage_
    score_ = oa.score(X_centered)
    # compare shrunk covariance obtained from data and from MLE estimate
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_centered,
                                                 assume_centered=True)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    # compare estimates given by OAS and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=oa.shrinkage_, assume_centered=True)
    scov.fit(X_centered)
    assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    oa = OAS(assume_centered=True)
    oa.fit(X_1d)
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d, assume_centered=True)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    assert_array_almost_equal((X_1d**2).sum() / n_samples, oa.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    oa = OAS(store_precision=False, assume_centered=True)
    oa.fit(X_centered)
    assert_almost_equal(oa.score(X_centered), score_, 4)
    assert (oa.precision_ is None)

    # Same tests without assuming centered data--------------------------------
    # test shrinkage coeff on a simple data set
    oa = OAS()
    oa.fit(X)
    assert_almost_equal(oa.shrinkage_, shrinkage_, 4)
    assert_almost_equal(oa.score(X), score_, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    # compare estimates given by OAS and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=oa.shrinkage_)
    scov.fit(X)
    assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    oa = OAS()
    oa.fit(X_1d)
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    assert_array_almost_equal(empirical_covariance(X_1d), oa.covariance_, 4)

    # test with one sample
    # FIXME I don't know what this test does
    X_1sample = np.arange(5)
    oa = OAS()
    assert_warns(UserWarning, oa.fit, X_1sample)
    assert_array_almost_equal(oa.covariance_,
                              np.zeros(shape=(5, 5), dtype=np.float64))

    # test shrinkage coeff on a simple data set (without saving precision)
    oa = OAS(store_precision=False)
    oa.fit(X)
    assert_almost_equal(oa.score(X), score_, 4)
    assert (oa.precision_ is None)
Пример #43
0
def test_ignore_warning():
    # This check that ignore_warning decorateur and context manager are working
    # as expected
    def _warning_function():
        warnings.warn("deprecation warning", DeprecationWarning)

    def _multiple_warning_function():
        warnings.warn("deprecation warning", DeprecationWarning)
        warnings.warn("deprecation warning")

    # Check the function directly
    assert_no_warnings(ignore_warnings(_warning_function))
    assert_no_warnings(
        ignore_warnings(_warning_function, category=DeprecationWarning))
    assert_warns(DeprecationWarning,
                 ignore_warnings(_warning_function, category=UserWarning))
    assert_warns(
        UserWarning,
        ignore_warnings(_multiple_warning_function,
                        category=DeprecationWarning))
    assert_warns(
        DeprecationWarning,
        ignore_warnings(_multiple_warning_function, category=UserWarning))
    assert_no_warnings(
        ignore_warnings(_warning_function,
                        category=(DeprecationWarning, UserWarning)))

    # Check the decorator
    @ignore_warnings
    def decorator_no_warning():
        _warning_function()
        _multiple_warning_function()

    @ignore_warnings(category=(DeprecationWarning, UserWarning))
    def decorator_no_warning_multiple():
        _multiple_warning_function()

    @ignore_warnings(category=DeprecationWarning)
    def decorator_no_deprecation_warning():
        _warning_function()

    @ignore_warnings(category=UserWarning)
    def decorator_no_user_warning():
        _warning_function()

    @ignore_warnings(category=DeprecationWarning)
    def decorator_no_deprecation_multiple_warning():
        _multiple_warning_function()

    @ignore_warnings(category=UserWarning)
    def decorator_no_user_multiple_warning():
        _multiple_warning_function()

    assert_no_warnings(decorator_no_warning)
    assert_no_warnings(decorator_no_warning_multiple)
    assert_no_warnings(decorator_no_deprecation_warning)
    assert_warns(DeprecationWarning, decorator_no_user_warning)
    assert_warns(UserWarning, decorator_no_deprecation_multiple_warning)
    assert_warns(DeprecationWarning, decorator_no_user_multiple_warning)

    # Check the context manager
    def context_manager_no_warning():
        with ignore_warnings():
            _warning_function()

    def context_manager_no_warning_multiple():
        with ignore_warnings(category=(DeprecationWarning, UserWarning)):
            _multiple_warning_function()

    def context_manager_no_deprecation_warning():
        with ignore_warnings(category=DeprecationWarning):
            _warning_function()

    def context_manager_no_user_warning():
        with ignore_warnings(category=UserWarning):
            _warning_function()

    def context_manager_no_deprecation_multiple_warning():
        with ignore_warnings(category=DeprecationWarning):
            _multiple_warning_function()

    def context_manager_no_user_multiple_warning():
        with ignore_warnings(category=UserWarning):
            _multiple_warning_function()

    assert_no_warnings(context_manager_no_warning)
    assert_no_warnings(context_manager_no_warning_multiple)
    assert_no_warnings(context_manager_no_deprecation_warning)
    assert_warns(DeprecationWarning, context_manager_no_user_warning)
    assert_warns(UserWarning, context_manager_no_deprecation_multiple_warning)
    assert_warns(DeprecationWarning, context_manager_no_user_multiple_warning)
Пример #44
0
def test_acmes_clip_samples():
    opt = ACMESOptimizer(n_train_max=20001)
    assert_warns(UserWarning, opt.init, 1)
    assert_equal(opt.n_train_max, 20000)
Пример #45
0
                      tree_builder, X.T, np.ones((4, 4)))


def test_unstructured_linkage_tree():
    """
    Check that we obtain the correct solution for unstructured linkage trees.
    """
    rnd = np.random.RandomState(0)
    X = rnd.randn(50, 100)
<<<<<<< HEAD
=======
    for this_X in (X, X[0]):
        # With specified a number of clusters just for the sake of
        # raising a warning and testing the warning code
        children, n_nodes, n_leaves, parent = assert_warns(UserWarning,
                                                           ward_tree,
                                                           this_X.T,
                                                           n_clusters=10)
        n_nodes = 2 * X.shape[1] - 1
        assert_equal(len(children) + n_leaves, n_nodes)
>>>>>>> remote

    for tree_builder in _TREE_BUILDERS.values():
        for this_X in (X, X[0]):
            with warnings.catch_warnings(record=True) as warning_list:
                warnings.simplefilter("always", UserWarning)
                warnings.simplefilter("ignore", DeprecationWarning)

                # With specified a number of clusters just for the sake of
                # raising a warning and testing the warning code
                children, n_nodes, n_leaves, parent = tree_builder(
                    this_X.T, n_clusters=10)
Пример #46
0
def test_label_binarizer_errors():
    """Check that invalid arguments yield ValueError"""
    one_class = np.array([0, 0, 0, 0])
    lb = LabelBinarizer().fit(one_class)
    assert_false(assert_warns(DeprecationWarning, getattr, lb, "multilabel_"))

    multi_label = [(2, 3), (0, ), (0, 2)]
    assert_raises(ValueError, lb.transform, multi_label)

    lb = LabelBinarizer()
    assert_raises(ValueError, lb.transform, [])
    assert_raises(ValueError, lb.inverse_transform, [])

    y = np.array([[0, 1, 0], [1, 1, 1]])
    classes = np.arange(3)
    assert_raises(ValueError,
                  label_binarize,
                  y,
                  classes,
                  multilabel=True,
                  neg_label=2,
                  pos_label=1)
    assert_raises(ValueError,
                  label_binarize,
                  y,
                  classes,
                  multilabel=True,
                  neg_label=2,
                  pos_label=2)

    assert_raises(ValueError, LabelBinarizer, neg_label=2, pos_label=1)
    assert_raises(ValueError, LabelBinarizer, neg_label=2, pos_label=2)

    assert_raises(ValueError,
                  LabelBinarizer,
                  neg_label=1,
                  pos_label=2,
                  sparse_output=True)

    # Fail on y_type
    assert_raises(ValueError,
                  _inverse_binarize_thresholding,
                  y=csr_matrix([[1, 2], [2, 1]]),
                  output_type="foo",
                  classes=[1, 2],
                  threshold=0)

    # Fail on the number of classes
    assert_raises(ValueError,
                  _inverse_binarize_thresholding,
                  y=csr_matrix([[1, 2], [2, 1]]),
                  output_type="foo",
                  classes=[1, 2, 3],
                  threshold=0)

    # Fail on the dimension of 'binary'
    assert_raises(ValueError,
                  _inverse_binarize_thresholding,
                  y=np.array([[1, 2, 3], [2, 1, 3]]),
                  output_type="binary",
                  classes=[1, 2, 3],
                  threshold=0)

    # Fail on multioutput data
    assert_raises(ValueError, LabelBinarizer().fit, np.array([[1, 3], [2, 1]]))
    assert_raises(ValueError, label_binarize, np.array([[1, 3], [2, 1]]),
                  [1, 2, 3])
Пример #47
0
def test_extent() -> None:
    X = np.array([[1, 1], [1, 0]])
    clf = loop.LocalOutlierProbability(X, n_neighbors=2, extent=4,
                                       use_numba=NUMBA)
    assert_warns(UserWarning, clf.fit)
def test_metric_params_interface():
    assert_warns(SyntaxWarning, neighbors.KNeighborsClassifier,
                 metric_params={'p': 3})
Пример #49
0
def test_check_array_dtype_warning():
    X_int_list = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
    X_float64 = np.asarray(X_int_list, dtype=np.float64)
    X_float32 = np.asarray(X_int_list, dtype=np.float32)
    X_int64 = np.asarray(X_int_list, dtype=np.int64)
    X_csr_float64 = sp.csr_matrix(X_float64)
    X_csr_float32 = sp.csr_matrix(X_float32)
    X_csc_float32 = sp.csc_matrix(X_float32)
    X_csc_int32 = sp.csc_matrix(X_int64, dtype=np.int32)
    y = [0, 0, 1]
    integer_data = [X_int64, X_csc_int32]
    float64_data = [X_float64, X_csr_float64]
    float32_data = [X_float32, X_csr_float32, X_csc_float32]
    for X in integer_data:
        X_checked = assert_no_warnings(check_array,
                                       X,
                                       dtype=np.float64,
                                       accept_sparse=True)
        assert X_checked.dtype == np.float64

        X_checked = assert_warns(DataConversionWarning,
                                 check_array,
                                 X,
                                 dtype=np.float64,
                                 accept_sparse=True,
                                 warn_on_dtype=True)
        assert X_checked.dtype == np.float64

        # Check that the warning message includes the name of the Estimator
        X_checked = assert_warns_message(DataConversionWarning,
                                         'SomeEstimator',
                                         check_array,
                                         X,
                                         dtype=[np.float64, np.float32],
                                         accept_sparse=True,
                                         warn_on_dtype=True,
                                         estimator='SomeEstimator')
        assert X_checked.dtype == np.float64

        X_checked, y_checked = assert_warns_message(
            DataConversionWarning,
            'KNeighborsClassifier',
            check_X_y,
            X,
            y,
            dtype=np.float64,
            accept_sparse=True,
            warn_on_dtype=True,
            estimator=KNeighborsClassifier())

        assert X_checked.dtype == np.float64

    for X in float64_data:
        with pytest.warns(None) as record:
            warnings.simplefilter("ignore", DeprecationWarning)  # 0.23
            X_checked = check_array(X,
                                    dtype=np.float64,
                                    accept_sparse=True,
                                    warn_on_dtype=True)
            assert X_checked.dtype == np.float64
            X_checked = check_array(X,
                                    dtype=np.float64,
                                    accept_sparse=True,
                                    warn_on_dtype=False)
            assert X_checked.dtype == np.float64
        assert len(record) == 0

    for X in float32_data:
        X_checked = assert_no_warnings(check_array,
                                       X,
                                       dtype=[np.float64, np.float32],
                                       accept_sparse=True)
        assert X_checked.dtype == np.float32
        assert X_checked is X

        X_checked = assert_no_warnings(check_array,
                                       X,
                                       dtype=[np.float64, np.float32],
                                       accept_sparse=['csr', 'dok'],
                                       copy=True)
        assert X_checked.dtype == np.float32
        assert X_checked is not X

    X_checked = assert_no_warnings(check_array,
                                   X_csc_float32,
                                   dtype=[np.float64, np.float32],
                                   accept_sparse=['csr', 'dok'],
                                   copy=False)
    assert X_checked.dtype == np.float32
    assert X_checked is not X_csc_float32
    assert X_checked.format == 'csr'
Пример #50
0
def test_timeout():
    sp = svm.SVC(C=1, kernel=lambda x, y: x * y.T, probability=True,
                 random_state=0, max_iter=1)

    assert_warns(ConvergenceWarning, sp.fit, X_sp, Y)
Пример #51
0
def test_metric_params_interface():
    assert_warns(DeprecationWarning, neighbors.KNeighborsClassifier,
                 metric='wminkowski', w=np.ones(10))
    assert_warns(SyntaxWarning, neighbors.KNeighborsClassifier,
                 metric_params={'p': 3})
Пример #52
0
def test_nearmiss_deprecation():
    nm = NearMiss(ver3_samp_ngh=3, version=3)
    assert_warns(DeprecationWarning, nm.fit_sample, X, Y)
def check_target_type(name, Estimator):
    X = np.random.random((20, 2))
    y = np.linspace(0, 1, 20)
    estimator = Estimator()
    set_random_state(estimator)
    assert_warns(UserWarning, estimator.fit, X, y)
def check_multiclass_warning(name, Estimator):
    X = np.random.random((20, 2))
    y = np.array([0] * 3 + [1] * 2 + [2] * 15)
    estimator = Estimator()
    set_random_state(estimator)
    assert_warns(UserWarning, estimator.fit, X, y)
Пример #55
0
def test_oob_score():
    """Test if oob_score is deprecated. """
    clf = GradientBoostingClassifier(n_estimators=100, random_state=1,
                                     subsample=0.5)
    clf.fit(X, y)
    assert_warns(DeprecationWarning, hasattr, clf, 'oob_score_')
Пример #56
0
def test_multilabel_representation_invariance():

    # Generate some data
    n_classes = 4
    n_samples = 50
    # using sequence of sequences is deprecated, but still tested
    make_ml = ignore_warnings(make_multilabel_classification)
    _, y1 = make_ml(n_features=1,
                    n_classes=n_classes,
                    random_state=0,
                    n_samples=n_samples)
    _, y2 = make_ml(n_features=1,
                    n_classes=n_classes,
                    random_state=1,
                    n_samples=n_samples)

    # Be sure to have at least one empty label
    y1 += ([], )
    y2 += ([], )

    # NOTE: The "sorted" trick is necessary to shuffle labels, because it
    # allows to return the shuffled tuple.
    rng = check_random_state(42)
    shuffled = lambda x: sorted(x, key=lambda *args: rng.rand())
    y1_shuffle = [shuffled(x) for x in y1]
    y2_shuffle = [shuffled(x) for x in y2]

    # Let's have redundant labels
    y1_redundant = [x * rng.randint(1, 4) for x in y1]
    y2_redundant = [x * rng.randint(1, 4) for x in y2]

    # Binary indicator matrix format
    lb = MultiLabelBinarizer().fit([range(n_classes)])
    y1_binary_indicator = lb.transform(y1)
    y2_binary_indicator = lb.transform(y2)

    y1_shuffle_binary_indicator = lb.transform(y1_shuffle)
    y2_shuffle_binary_indicator = lb.transform(y2_shuffle)

    for name in MULTILABELS_METRICS:
        metric = ALL_METRICS[name]

        # XXX cruel hack to work with partial functions
        if isinstance(metric, partial):
            metric.__module__ = 'tmp'
            metric.__name__ = name
        # Check warning for sequence of sequences
        measure = assert_warns(DeprecationWarning, metric, y1, y2)
        metric = ignore_warnings(metric)

        # Check representation invariance
        assert_almost_equal(metric(y1_binary_indicator, y2_binary_indicator),
                            measure,
                            err_msg="%s failed representation invariance  "
                            "between list of list of labels "
                            "format and dense binary indicator "
                            "format." % name)

        with ignore_warnings():  # sequence of sequences is deprecated
            # Check invariance with redundant labels with list of labels
            assert_almost_equal(
                metric(y1, y2_redundant),
                measure,
                err_msg="%s failed rendundant label invariance" % name)

            assert_almost_equal(
                metric(y1_redundant, y2_redundant),
                measure,
                err_msg="%s failed rendundant label invariance" % name)

            assert_almost_equal(
                metric(y1_redundant, y2),
                measure,
                err_msg="%s failed rendundant label invariance" % name)

            # Check shuffling invariance with list of labels
            assert_almost_equal(metric(y1_shuffle, y2_shuffle),
                                measure,
                                err_msg="%s failed shuffling invariance "
                                "with list of list of labels format." % name)

        # Check shuffling invariance with dense binary indicator matrix
        assert_almost_equal(metric(y1_shuffle_binary_indicator,
                                   y2_shuffle_binary_indicator),
                            measure,
                            err_msg="%s failed shuffling invariance "
                            " with dense binary indicator format." % name)

        with ignore_warnings():  # sequence of sequences is deprecated
            # Check raises error with mix input representation
            assert_raises(ValueError, metric, y1, y2_binary_indicator)
            assert_raises(ValueError, metric, y1_binary_indicator, y2)
Пример #57
0
def test_timeout():
    a = svm.SVC(kernel=lambda x, y: np.dot(x, y.T),
                probability=True,
                random_state=0,
                max_iter=1)
    assert_warns(ConvergenceWarning, a.fit, X, Y)
Пример #58
0
def test_factor_analysis():
    """Test FactorAnalysis ability to recover the data covariance structure
    """
    rng = np.random.RandomState(0)
    n_samples, n_features, n_components = 20, 5, 3

    # Some random settings for the generative model
    W = rng.randn(n_components, n_features)
    # latent variable of dim 3, 20 of it
    h = rng.randn(n_samples, n_components)
    # using gamma to model different noise variance
    # per component
    noise = rng.gamma(1, size=n_features) * rng.randn(n_samples, n_features)

    # generate observations
    # wlog, mean is 0
    X = np.dot(h, W) + noise

    assert_raises(ValueError, FactorAnalysis, svd_method='foo')
    fa_fail = FactorAnalysis()
    fa_fail.svd_method = 'foo'
    assert_raises(ValueError, fa_fail.fit, X)
    fas = []
    for method in ['randomized', 'lapack']:
        fa = FactorAnalysis(n_components=n_components, svd_method=method)
        fa.fit(X)
        fas.append(fa)

        X_t = fa.transform(X)
        assert_equal(X_t.shape, (n_samples, n_components))

        assert_almost_equal(fa.loglike_[-1], fa.score_samples(X).sum())
        assert_almost_equal(fa.score_samples(X).mean(), fa.score(X))

        diff = np.all(np.diff(fa.loglike_))
        assert_greater(diff, 0., 'Log likelihood dif not increase')

        # Sample Covariance
        scov = np.cov(X, rowvar=0., bias=1.)

        # Model Covariance
        mcov = fa.get_covariance()
        diff = np.sum(np.abs(scov - mcov)) / W.size
        assert_less(diff, 0.1, "Mean absolute difference is %f" % diff)
        fa = FactorAnalysis(n_components=n_components,
                            noise_variance_init=np.ones(n_features))
        assert_raises(ValueError, fa.fit, X[:, :2])

    f = lambda x, y: np.abs(getattr(x, y))  # sign will not be equal
    fa1, fa2 = fas
    for attr in ['loglike_', 'components_', 'noise_variance_']:
        assert_almost_equal(f(fa1, attr), f(fa2, attr))

    fa1.max_iter = 1
    fa1.verbose = True
    assert_warns(ConvergenceWarning, fa1.fit, X)

    # Test get_covariance and get_precision with n_components == n_features
    # with n_components < n_features and with n_components == 0
    for n_components in [0, 2, X.shape[1]]:
        fa.n_components = n_components
        fa.fit(X)
        cov = fa.get_covariance()
        precision = fa.get_precision()
        assert_array_almost_equal(np.dot(cov, precision),
                                  np.eye(X.shape[1]), 12)
Пример #59
0
def test_linear_svc_convergence_warnings():
    # Test that warnings are raised if model does not converge

    lsvc = svm.LinearSVC(max_iter=2, verbose=1)
    assert_warns(ConvergenceWarning, lsvc.fit, X, Y)
    assert_equal(lsvc.n_iter_, 2)
Пример #60
0
def test_lda_n_topics_deprecation():
    n_components, X = _build_sparse_mtx()
    lda = LatentDirichletAllocation(n_topics=10, learning_method='batch')
    assert_warns(DeprecationWarning, lda.fit, X)