def test_randomized_svd_sparse_warnings(): # randomized_svd throws a warning for lil and dok matrix rng = np.random.RandomState(42) X = make_low_rank_matrix(50, 20, effective_rank=10, random_state=rng) n_components = 5 for cls in (sparse.lil_matrix, sparse.dok_matrix): X = cls(X) assert_warns_message(sparse.SparseEfficiencyWarning, "Calculating SVD of a {} is expensive. " "csr_matrix is more efficient.".format( cls.__name__), randomized_svd, X, n_components, n_iter=1, power_iteration_normalizer='none')
def test_gaussian_mixture_fit_convergence_warning(): rng = np.random.RandomState(0) rand_data = RandomData(rng, scale=1) n_components = rand_data.n_components max_iter = 1 for covar_type in COVARIANCE_TYPE: X = rand_data.X[covar_type] g = GaussianMixture(n_components=n_components, n_init=1, max_iter=max_iter, reg_covar=0, random_state=rng, covariance_type=covar_type) assert_warns_message(ConvergenceWarning, 'Initialization %d did not converge. ' 'Try different init parameters, ' 'or increase max_iter, tol ' 'or check for degenerate data.' % max_iter, g.fit, X)
def test_affinities(): # Note: in the following, random_state has been selected to have # a dataset that yields a stable eigen decomposition both when built # on OSX and Linux X, y = make_blobs(n_samples=20, random_state=0, centers=[[1, 1], [-1, -1]], cluster_std=0.01) # nearest neighbors affinity sp = SpectralClustering(n_clusters=2, affinity='nearest_neighbors', random_state=0) assert_warns_message(UserWarning, 'not fully connected', sp.fit, X) assert adjusted_rand_score(y, sp.labels_) == 1 sp = SpectralClustering(n_clusters=2, gamma=2, random_state=0) labels = sp.fit(X).labels_ assert adjusted_rand_score(y, labels) == 1 X = check_random_state(10).rand(10, 5) * 10 kernels_available = kernel_metrics() for kern in kernels_available: # Additive chi^2 gives a negative similarity matrix which # doesn't make sense for spectral clustering if kern != 'additive_chi2': sp = SpectralClustering(n_clusters=2, affinity=kern, random_state=0) labels = sp.fit(X).labels_ assert (X.shape[0],) == labels.shape sp = SpectralClustering(n_clusters=2, affinity=lambda x, y: 1, random_state=0) labels = sp.fit(X).labels_ assert (X.shape[0],) == labels.shape def histogram(x, y, **kwargs): # Histogram kernel implemented as a callable. assert kwargs == {} # no kernel_params that we didn't ask for return np.minimum(x, y).sum() sp = SpectralClustering(n_clusters=2, affinity=histogram, random_state=0) labels = sp.fit(X).labels_ assert (X.shape[0],) == labels.shape # raise error on unknown affinity sp = SpectralClustering(n_clusters=2, affinity='<unknown>') with pytest.raises(ValueError): sp.fit(X)
def test_mcd_increasing_det_warning(): # Check that a warning is raised if we observe increasing determinants # during the c_step. In theory the sequence of determinants should be # decreasing. Increasing determinants are likely due to ill-conditioned # covariance matrices that result in poor precision matrices. X = [[5.1, 3.5, 1.4, 0.2], [4.9, 3.0, 1.4, 0.2], [4.7, 3.2, 1.3, 0.2], [4.6, 3.1, 1.5, 0.2], [5.0, 3.6, 1.4, 0.2], [4.6, 3.4, 1.4, 0.3], [5.0, 3.4, 1.5, 0.2], [4.4, 2.9, 1.4, 0.2], [4.9, 3.1, 1.5, 0.1], [5.4, 3.7, 1.5, 0.2], [4.8, 3.4, 1.6, 0.2], [4.8, 3.0, 1.4, 0.1], [4.3, 3.0, 1.1, 0.1], [5.1, 3.5, 1.4, 0.3], [5.7, 3.8, 1.7, 0.3], [5.4, 3.4, 1.7, 0.2], [4.6, 3.6, 1.0, 0.2], [5.0, 3.0, 1.6, 0.2], [5.2, 3.5, 1.5, 0.2]] mcd = MinCovDet(random_state=1) assert_warns_message(RuntimeWarning, "Determinant has increased", mcd.fit, X)
def test_tfidf_no_smoothing(): X = [[1, 1, 1], [1, 1, 0], [1, 0, 0]] tr = TfidfTransformer(smooth_idf=False, norm='l2') tfidf = tr.fit_transform(X).toarray() assert (tfidf >= 0).all() # check normalization assert_array_almost_equal((tfidf**2).sum(axis=1), [1., 1., 1.]) # the lack of smoothing make IDF fragile in the presence of feature with # only zeros X = [[1, 1, 0], [1, 1, 0], [1, 0, 0]] tr = TfidfTransformer(smooth_idf=False, norm='l2') in_warning_message = 'divide by zero' assert_warns_message(RuntimeWarning, in_warning_message, tr.fit_transform, X).toarray()
def test_warm_start_equal_n_estimators(): # Test that nothing happens when fitting without increasing n_estimators X, y = make_hastie_10_2(n_samples=20, random_state=1) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=43) clf = BaggingClassifier(n_estimators=5, warm_start=True, random_state=83) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) # modify X to nonsense values, this should not change anything X_train += 1. assert_warns_message( UserWarning, "Warm-start fitting without increasing n_estimators does not", clf.fit, X_train, y_train) assert_array_equal(y_pred, clf.predict(X_test))
def test_uniform_strategy_sparse_target_warning(): X = [[0]] * 5 # ignored y = sp.csc_matrix(np.array([[2, 1], [2, 2], [1, 4], [4, 2], [1, 1]])) clf = DummyClassifier(strategy="uniform", random_state=0) assert_warns_message(UserWarning, "the uniform strategy would not save memory", clf.fit, X, y) X = [[0]] * 500 y_pred = clf.predict(X) for k in range(y.shape[1]): p = np.bincount(y_pred[:, k]) / float(len(X)) assert_almost_equal(p[1], 1 / 3, decimal=1) assert_almost_equal(p[2], 1 / 3, decimal=1) assert_almost_equal(p[4], 1 / 3, decimal=1)
def test_deprecated(): assert_warns_message(FutureWarning, "qwerty", MockClass1) assert_warns_message(FutureWarning, "mockclass2_method", MockClass2().method) assert_warns_message(FutureWarning, "deprecated", MockClass3) val = assert_warns_message(FutureWarning, "deprecated", mock_function) assert val == 10
def test_deprecated(): assert_warns_message(DeprecationWarning, 'qwerty', MockClass1) assert_warns_message(DeprecationWarning, 'mockclass2_method', MockClass2().method) assert_warns_message(DeprecationWarning, 'deprecated', MockClass3) val = assert_warns_message(DeprecationWarning, 'deprecated', mock_function) assert val == 10
def test_check_ci_warn(): x = [0, 1, 2, 3, 4, 5] y = [0, -1, 2, -3, 4, -5] # Check that we got increasing=False and CI interval warning is_increasing = assert_warns_message(UserWarning, "interval", check_increasing, x, y) assert not is_increasing
def test_less_centers_than_unique_points(): X = np.asarray([[0, 0], [0, 1], [1, 0], [1, 0]]) # last point is duplicated km = KMeans(n_clusters=4).fit(X) # only three distinct points, so only three clusters # can have points assigned to them assert set(km.labels_) == set(range(3)) # k_means should warn that fewer labels than cluster # centers have been used msg = ("Number of distinct clusters (3) found smaller than " "n_clusters (4). Possibly due to duplicate points in X.") assert_warns_message(ConvergenceWarning, msg, k_means, X, sample_weight=None, n_clusters=4)
def test_fetch_openml_inactive(monkeypatch, gzip_response): # fetch inactive dataset by id data_id = 40675 _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response) glas2 = assert_warns_message(UserWarning, "Version 1 of dataset glass2 is inactive,", fetch_openml, data_id=data_id, cache=False) # fetch inactive dataset by name and version assert glas2.data.shape == (163, 9) glas2_by_version = assert_warns_message( UserWarning, "Version 1 of dataset glass2 is inactive,", fetch_openml, data_id=None, name="glass2", version=1, cache=False) assert int(glas2_by_version.details['id']) == data_id
def test_regressormixin_score_multioutput(): from sklearn.linear_model import LinearRegression # no warnings when y_type is continuous X = [[1], [2], [3]] y = [1, 2, 3] reg = LinearRegression().fit(X, y) assert_no_warnings(reg.score, X, y) # warn when y_type is continuous-multioutput y = [[1, 2], [2, 3], [3, 4]] reg = LinearRegression().fit(X, y) msg = ("The default value of multioutput (not exposed in " "score method) will change from 'variance_weighted' " "to 'uniform_average' in 0.23 to keep consistent " "with 'metrics.r2_score'. To specify the default " "value manually and avoid the warning, please " "either call 'metrics.r2_score' directly or make a " "custom scorer with 'metrics.make_scorer' (the " "built-in scorer 'r2' uses " "multioutput='uniform_average').") assert_warns_message(FutureWarning, msg, reg.score, X, y)
def test_iforest_error(): """Test that it gives proper exception on deficient input.""" X = iris.data # Test max_samples assert_raises(ValueError, IsolationForest(max_samples=-1).fit, X) assert_raises(ValueError, IsolationForest(max_samples=0.0).fit, X) assert_raises(ValueError, IsolationForest(max_samples=2.0).fit, X) # The dataset has less than 256 samples, explicitly setting # max_samples > n_samples should result in a warning. If not set # explicitly there should be no warning assert_warns_message(UserWarning, "max_samples will be set to n_samples for estimation", IsolationForest(max_samples=1000).fit, X) # note that assert_no_warnings does not apply since it enables a # PendingDeprecationWarning triggered by scipy.sparse's use of # np.matrix. See issue #11251. with pytest.warns(None) as record: IsolationForest(max_samples='auto').fit(X) user_warnings = [each for each in record if issubclass(each.category, UserWarning)] assert len(user_warnings) == 0 with pytest.warns(None) as record: IsolationForest(max_samples=np.int64(2)).fit(X) user_warnings = [each for each in record if issubclass(each.category, UserWarning)] assert len(user_warnings) == 0 assert_raises(ValueError, IsolationForest(max_samples='foobar').fit, X) assert_raises(ValueError, IsolationForest(max_samples=1.5).fit, X) # test X_test n_features match X_train one: assert_raises(ValueError, IsolationForest().fit(X).predict, X[:, 1:]) # test that behaviour='old' will raise an error msg = "The old behaviour of IsolationForest is not implemented anymore." with pytest.raises(NotImplementedError, match=msg): IsolationForest(behaviour='old').fit(X)
def test_check_is_fitted(): # Check is TypeError raised when non estimator instance passed assert_raises(TypeError, check_is_fitted, ARDRegression) assert_raises(TypeError, check_is_fitted, "SVR") ard = ARDRegression() svr = SVR() try: assert_raises(NotFittedError, check_is_fitted, ard) assert_raises(NotFittedError, check_is_fitted, svr) except ValueError: assert False, "check_is_fitted failed with ValueError" # NotFittedError is a subclass of both ValueError and AttributeError try: check_is_fitted(ard, msg="Random message %(name)s, %(name)s") except ValueError as e: assert str(e) == "Random message ARDRegression, ARDRegression" try: check_is_fitted(svr, msg="Another message %(name)s, %(name)s") except AttributeError as e: assert str(e) == "Another message SVR, SVR" ard.fit(*make_blobs()) svr.fit(*make_blobs()) assert check_is_fitted(ard) is None assert check_is_fitted(svr) is None # to be removed in 0.23 assert_warns_message( DeprecationWarning, "Passing attributes to check_is_fitted is deprecated", check_is_fitted, ard, ['coef_']) assert_warns_message(DeprecationWarning, "Passing all_or_any to check_is_fitted is deprecated", check_is_fitted, ard, all_or_any=any)
def test_check_inverse(): X_dense = np.array([1, 4, 9, 16], dtype=np.float64).reshape((2, 2)) X_list = [X_dense, sparse.csr_matrix(X_dense), sparse.csc_matrix(X_dense)] for X in X_list: if sparse.issparse(X): accept_sparse = True else: accept_sparse = False trans = FunctionTransformer(func=np.sqrt, inverse_func=np.around, accept_sparse=accept_sparse, check_inverse=True, validate=True) assert_warns_message( UserWarning, "The provided functions are not strictly" " inverse of each other. If you are sure you" " want to proceed regardless, set" " 'check_inverse=False'.", trans.fit, X) trans = FunctionTransformer(func=np.expm1, inverse_func=np.log1p, accept_sparse=accept_sparse, check_inverse=True, validate=True) Xt = assert_no_warnings(trans.fit_transform, X) assert_allclose_dense_sparse(X, trans.inverse_transform(Xt)) # check that we don't check inverse when one of the func or inverse is not # provided. trans = FunctionTransformer(func=np.expm1, inverse_func=None, check_inverse=True, validate=True) assert_no_warnings(trans.fit, X_dense) trans = FunctionTransformer(func=None, inverse_func=np.expm1, check_inverse=True, validate=True) assert_no_warnings(trans.fit, X_dense)
def test_vectorizer_stop_words_inconsistent(): lstr = "['and', 'll', 've']" message = ('Your stop_words may be inconsistent with your ' 'preprocessing. Tokenizing the stop words generated ' 'tokens %s not in stop_words.' % lstr) for vec in [CountVectorizer(), TfidfVectorizer(), HashingVectorizer()]: vec.set_params(stop_words=["you've", "you", "you'll", 'AND']) assert_warns_message(UserWarning, message, vec.fit_transform, ['hello world']) # reset stop word validation del vec._stop_words_id assert _check_stop_words_consistency(vec) is False # Only one warning per stop list assert_no_warnings(vec.fit_transform, ['hello world']) assert _check_stop_words_consistency(vec) is None # Test caching of inconsistency assessment vec.set_params(stop_words=["you've", "you", "you'll", 'blah', 'AND']) assert_warns_message(UserWarning, message, vec.fit_transform, ['hello world'])
def test_select_kbest_zero(): # Test whether k=0 correctly returns no features. X, y = make_classification(n_samples=20, n_features=10, shuffle=False, random_state=0) univariate_filter = SelectKBest(f_classif, k=0) univariate_filter.fit(X, y) support = univariate_filter.get_support() gtruth = np.zeros(10, dtype=bool) assert_array_equal(support, gtruth) X_selected = assert_warns_message(UserWarning, 'No features were selected', univariate_filter.transform, X) assert X_selected.shape == (20, 0)
def test_min_impurity_split(): # Test if min_impurity_split of base estimators is set # Regression test for #8006 X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1) all_estimators = [RandomForestClassifier, RandomForestRegressor, ExtraTreesClassifier, ExtraTreesRegressor] for Estimator in all_estimators: est = Estimator(min_impurity_split=0.1) est = assert_warns_message(DeprecationWarning, "min_impurity_decrease", est.fit, X, y) for tree in est.estimators_: assert tree.min_impurity_split == 0.1
def test_non_negative_factorization_checking(): A = np.ones((2, 2)) # Test parameters checking is public function nnmf = non_negative_factorization msg = ("The default value of init will change from " "random to None in 0.23 to make it consistent " "with decomposition.NMF.") assert_warns_message(FutureWarning, msg, nnmf, A, A, A, np.int64(1)) msg = ("Number of components must be a positive integer; " "got (n_components=1.5)") assert_raise_message(ValueError, msg, nnmf, A, A, A, 1.5, 'random') msg = ("Number of components must be a positive integer; " "got (n_components='2')") assert_raise_message(ValueError, msg, nnmf, A, A, A, '2', 'random') msg = "Negative values in data passed to NMF (input H)" assert_raise_message(ValueError, msg, nnmf, A, A, -A, 2, 'custom') msg = "Negative values in data passed to NMF (input W)" assert_raise_message(ValueError, msg, nnmf, A, -A, A, 2, 'custom') msg = "Array passed to NMF (input H) is full of zeros" assert_raise_message(ValueError, msg, nnmf, A, A, 0 * A, 2, 'custom') msg = "Invalid regularization parameter: got 'spam' instead of one of" assert_raise_message(ValueError, msg, nnmf, A, A, 0 * A, 2, 'custom', True, 'cd', 2., 1e-4, 200, 0., 0., 'spam')
def test_no_feature_selected(): rng = np.random.RandomState(0) # Generate random uncorrelated data: a strict univariate test should # rejects all the features X = rng.rand(40, 10) y = rng.randint(0, 4, size=40) strict_selectors = [ SelectFwe(alpha=0.01).fit(X, y), SelectFdr(alpha=0.01).fit(X, y), SelectFpr(alpha=0.01).fit(X, y), SelectPercentile(percentile=0).fit(X, y), SelectKBest(k=0).fit(X, y), ] for selector in strict_selectors: assert_array_equal(selector.get_support(), np.zeros(10)) X_selected = assert_warns_message( UserWarning, 'No features were selected', selector.transform, X) assert X_selected.shape == (40, 0)
def test_warn_ignore_attribute(monkeypatch, gzip_response): data_id = 40966 expected_row_id_msg = "target_column={} has flag is_row_identifier." expected_ignore_msg = "target_column={} has flag is_ignore." _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response) # single column test assert_warns_message(UserWarning, expected_row_id_msg.format('MouseID'), fetch_openml, data_id=data_id, target_column='MouseID', cache=False) assert_warns_message(UserWarning, expected_ignore_msg.format('Genotype'), fetch_openml, data_id=data_id, target_column='Genotype', cache=False) # multi column test assert_warns_message(UserWarning, expected_row_id_msg.format('MouseID'), fetch_openml, data_id=data_id, target_column=['MouseID', 'class'], cache=False) assert_warns_message(UserWarning, expected_ignore_msg.format('Genotype'), fetch_openml, data_id=data_id, target_column=['Genotype', 'class'], cache=False)
def test_tfidf_no_smoothing(): X = [[1, 1, 1], [1, 1, 0], [1, 0, 0]] tr = TfidfTransformer(smooth_idf=False, norm='l2') tfidf = tr.fit_transform(X).toarray() assert (tfidf >= 0).all() # check normalization assert_array_almost_equal((tfidf**2).sum(axis=1), [1., 1., 1.]) # the lack of smoothing make IDF fragile in the presence of feature with # only zeros X = [[1, 1, 0], [1, 1, 0], [1, 0, 0]] tr = TfidfTransformer(smooth_idf=False, norm='l2') clean_warning_registry() with warnings.catch_warnings(record=True) as w: 1. / np.array([0.]) numpy_provides_div0_warning = len(w) == 1 in_warning_message = 'divide by zero' tfidf = assert_warns_message(RuntimeWarning, in_warning_message, tr.fit_transform, X).toarray() if not numpy_provides_div0_warning: raise SkipTest("Numpy does not provide div 0 warnings.")
def test_deprecated_calinski_harabaz_score(): depr_message = ("Function 'calinski_harabaz_score' has been renamed " "to 'calinski_harabasz_score' " "and will be removed in version 0.23.") assert_warns_message(FutureWarning, depr_message, calinski_harabaz_score, np.ones((10, 2)), [0] * 5 + [1] * 5)
def test_convergence_warning(): nca = NeighborhoodComponentsAnalysis(max_iter=2, verbose=1) cls_name = nca.__class__.__name__ assert_warns_message(ConvergenceWarning, '[{}] NCA did not converge'.format(cls_name), nca.fit, iris_data, iris_target)
def test_warning_bounds(): kernel = RBF(length_scale_bounds=[1e-5, 1e-3]) gpr = GaussianProcessRegressor(kernel=kernel) assert_warns_message(ConvergenceWarning, "The optimal value found for " "dimension 0 of parameter " "length_scale is close to " "the specified upper bound " "0.001. Increasing the bound " "and calling fit again may " "find a better value.", gpr.fit, X, y) kernel_sum = (WhiteKernel(noise_level_bounds=[1e-5, 1e-3]) + RBF(length_scale_bounds=[1e3, 1e5])) gpr_sum = GaussianProcessRegressor(kernel=kernel_sum) with pytest.warns(None) as record: with warnings.catch_warnings(): # scipy 1.3.0 uses tostring which is deprecated in numpy warnings.filterwarnings("ignore", "tostring", DeprecationWarning) gpr_sum.fit(X, y) assert len(record) == 2 assert record[0].message.args[0] == ("The optimal value found for " "dimension 0 of parameter " "k1__noise_level is close to the " "specified upper bound 0.001. " "Increasing the bound and calling " "fit again may find a better value.") assert record[1].message.args[0] == ("The optimal value found for " "dimension 0 of parameter " "k2__length_scale is close to the " "specified lower bound 1000.0. " "Decreasing the bound and calling " "fit again may find a better value.") X_tile = np.tile(X, 2) kernel_dims = RBF(length_scale=[1., 2.], length_scale_bounds=[1e1, 1e2]) gpr_dims = GaussianProcessRegressor(kernel=kernel_dims) with pytest.warns(None) as record: with warnings.catch_warnings(): # scipy 1.3.0 uses tostring which is deprecated in numpy warnings.filterwarnings("ignore", "tostring", DeprecationWarning) gpr_dims.fit(X_tile, y) assert len(record) == 2 assert record[0].message.args[0] == ("The optimal value found for " "dimension 0 of parameter " "length_scale is close to the " "specified lower bound 10.0. " "Decreasing the bound and calling " "fit again may find a better value.") assert record[1].message.args[0] == ("The optimal value found for " "dimension 1 of parameter " "length_scale is close to the " "specified lower bound 10.0. " "Decreasing the bound and calling " "fit again may find a better value.")
def check_warning(*args, **kw): return assert_warns_message(DeprecationWarning, "deprecated in version 0.20.1", *args, **kw)
def check_warning(*args, **kw): return assert_warns_message(UserWarning, warning, *args, **kw)
def test_check_array_dtype_warning(): X_int_list = [[1, 2, 3], [4, 5, 6], [7, 8, 9]] X_float64 = np.asarray(X_int_list, dtype=np.float64) X_float32 = np.asarray(X_int_list, dtype=np.float32) X_int64 = np.asarray(X_int_list, dtype=np.int64) X_csr_float64 = sp.csr_matrix(X_float64) X_csr_float32 = sp.csr_matrix(X_float32) X_csc_float32 = sp.csc_matrix(X_float32) X_csc_int32 = sp.csc_matrix(X_int64, dtype=np.int32) y = [0, 0, 1] integer_data = [X_int64, X_csc_int32] float64_data = [X_float64, X_csr_float64] float32_data = [X_float32, X_csr_float32, X_csc_float32] for X in integer_data: X_checked = assert_no_warnings(check_array, X, dtype=np.float64, accept_sparse=True) assert X_checked.dtype == np.float64 X_checked = assert_warns(DataConversionWarning, check_array, X, dtype=np.float64, accept_sparse=True, warn_on_dtype=True) assert X_checked.dtype == np.float64 # Check that the warning message includes the name of the Estimator X_checked = assert_warns_message(DataConversionWarning, 'SomeEstimator', check_array, X, dtype=[np.float64, np.float32], accept_sparse=True, warn_on_dtype=True, estimator='SomeEstimator') assert X_checked.dtype == np.float64 X_checked, y_checked = assert_warns_message( DataConversionWarning, 'KNeighborsClassifier', check_X_y, X, y, dtype=np.float64, accept_sparse=True, warn_on_dtype=True, estimator=KNeighborsClassifier()) assert X_checked.dtype == np.float64 for X in float64_data: with pytest.warns(None) as record: warnings.simplefilter("ignore", FutureWarning) # 0.23 X_checked = check_array(X, dtype=np.float64, accept_sparse=True, warn_on_dtype=True) assert X_checked.dtype == np.float64 X_checked = check_array(X, dtype=np.float64, accept_sparse=True, warn_on_dtype=False) assert X_checked.dtype == np.float64 assert len(record) == 0 for X in float32_data: X_checked = assert_no_warnings(check_array, X, dtype=[np.float64, np.float32], accept_sparse=True) assert X_checked.dtype == np.float32 assert X_checked is X X_checked = assert_no_warnings(check_array, X, dtype=[np.float64, np.float32], accept_sparse=['csr', 'dok'], copy=True) assert X_checked.dtype == np.float32 assert X_checked is not X X_checked = assert_no_warnings(check_array, X_csc_float32, dtype=[np.float64, np.float32], accept_sparse=['csr', 'dok'], copy=False) assert X_checked.dtype == np.float32 assert X_checked is not X_csc_float32 assert X_checked.format == 'csr'