def test_lasso_lars_vs_lasso_cd_ill_conditioned2(): # Create an ill-conditioned situation in which the LARS has to go # far in the path to converge, and check that LARS and coordinate # descent give the same answers # Note it used to be the case that Lars had to use the drop for good # strategy for this but this is no longer the case with the # equality_tolerance checks X = [[1e20, 1e20, 0], [-1e-32, 0, 0], [1, 1, 1]] y = [10, 10, 1] alpha = .0001 def objective_function(coef): return (1. / (2. * len(X)) * linalg.norm(y - np.dot(X, coef))**2 + alpha * linalg.norm(coef, 1)) lars = linear_model.LassoLars(alpha=alpha, normalize=False) assert_warns(ConvergenceWarning, lars.fit, X, y) lars_coef_ = lars.coef_ lars_obj = objective_function(lars_coef_) coord_descent = linear_model.Lasso(alpha=alpha, tol=1e-4, normalize=False) cd_coef_ = coord_descent.fit(X, y).coef_ cd_obj = objective_function(cd_coef_) assert lars_obj < cd_obj * (1. + 1e-8)
def test_check_symmetric(): arr_sym = np.array([[0, 1], [1, 2]]) arr_bad = np.ones(2) arr_asym = np.array([[0, 2], [0, 2]]) test_arrays = { 'dense': arr_asym, 'dok': sp.dok_matrix(arr_asym), 'csr': sp.csr_matrix(arr_asym), 'csc': sp.csc_matrix(arr_asym), 'coo': sp.coo_matrix(arr_asym), 'lil': sp.lil_matrix(arr_asym), 'bsr': sp.bsr_matrix(arr_asym) } # check error for bad inputs assert_raises(ValueError, check_symmetric, arr_bad) # check that asymmetric arrays are properly symmetrized for arr_format, arr in test_arrays.items(): # Check for warnings and errors assert_warns(UserWarning, check_symmetric, arr) assert_raises(ValueError, check_symmetric, arr, raise_exception=True) output = check_symmetric(arr, raise_warning=False) if sp.issparse(output): assert output.format == arr_format assert_array_equal(output.toarray(), arr_sym) else: assert_array_equal(output, arr_sym)
def test_n_clusters(): # Test that n_clusters param works properly X, y = make_blobs(n_samples=100, centers=10) brc1 = Birch(n_clusters=10) brc1.fit(X) assert len(brc1.subcluster_centers_) > 10 assert len(np.unique(brc1.labels_)) == 10 # Test that n_clusters = Agglomerative Clustering gives # the same results. gc = AgglomerativeClustering(n_clusters=10) brc2 = Birch(n_clusters=gc) brc2.fit(X) assert_array_equal(brc1.subcluster_labels_, brc2.subcluster_labels_) assert_array_equal(brc1.labels_, brc2.labels_) # Test that the wrong global clustering step raises an Error. clf = ElasticNet() brc3 = Birch(n_clusters=clf) with pytest.raises(ValueError): brc3.fit(X) # Test that a small number of clusters raises a warning. brc4 = Birch(threshold=10000.) assert_warns(ConvergenceWarning, brc4.fit, X)
def test_ovr_always_present(): # Test that ovr works with classes that are always present or absent. # Note: tests is the case where _ConstantPredictor is utilised X = np.ones((10, 2)) X[:5, :] = 0 # Build an indicator matrix where two features are always on. # As list of lists, it would be: [[int(i >= 5), 2, 3] for i in range(10)] y = np.zeros((10, 3)) y[5:, 0] = 1 y[:, 1] = 1 y[:, 2] = 1 ovr = OneVsRestClassifier(LogisticRegression()) assert_warns(UserWarning, ovr.fit, X, y) y_pred = ovr.predict(X) assert_array_equal(np.array(y_pred), np.array(y)) y_pred = ovr.decision_function(X) assert np.unique(y_pred[:, -2:]) == 1 y_pred = ovr.predict_proba(X) assert_array_equal(y_pred[:, -1], np.ones(X.shape[0])) # y has a constantly absent label y = np.zeros((10, 2)) y[5:, 0] = 1 # variable label ovr = OneVsRestClassifier(LogisticRegression()) assert_warns(UserWarning, ovr.fit, X, y) y_pred = ovr.predict_proba(X) assert_array_equal(y_pred[:, -1], np.zeros(X.shape[0]))
def test_fastica_convergence_fail(): # Test the FastICA algorithm on very simple data # (see test_non_square_fastica). # Ensure a ConvergenceWarning raised if the tolerance is sufficiently low. rng = np.random.RandomState(0) n_samples = 1000 # Generate two sources: t = np.linspace(0, 100, n_samples) s1 = np.sin(t) s2 = np.ceil(np.sin(np.pi * t)) s = np.c_[s1, s2].T center_and_norm(s) s1, s2 = s # Mixing matrix mixing = rng.randn(6, 2) m = np.dot(mixing, s) # Do fastICA with tolerance 0. to ensure failing convergence ica = FastICA(algorithm="parallel", n_components=2, random_state=rng, max_iter=2, tol=0.) assert_warns(ConvergenceWarning, ica.fit, m.T)
def test_identical_regressors(): newX = X.copy() newX[:, 1] = newX[:, 0] gamma = np.zeros(n_features) gamma[0] = gamma[1] = 1. newy = np.dot(newX, gamma) assert_warns(RuntimeWarning, orthogonal_mp, newX, newy, 2)
def test_fastica_nowhiten(): m = [[0, 1], [1, 0]] # test for issue #697 ica = FastICA(n_components=1, whiten=False, random_state=0) assert_warns(UserWarning, ica.fit, m) assert hasattr(ica, 'mixing_')
def test_warning_n_components_greater_than_n_features(): n_features = 20 data, _ = make_sparse_random_data(5, n_features, int(n_features / 4)) for RandomProjection in all_RandomProjection: assert_warns(DataDimensionalityWarning, RandomProjection(n_components=n_features + 1).fit, data)
def test_convergence_fail(): d = load_linnerud() X = d.data Y = d.target pls_bynipals = pls_.PLSCanonical(n_components=X.shape[1], max_iter=2, tol=1e-10) assert_warns(ConvergenceWarning, pls_bynipals.fit, X, Y)
def test_sparse_enet_coordinate_descent(): """Test that a warning is issued if model does not converge""" clf = Lasso(max_iter=2) n_samples = 5 n_features = 2 X = sp.csc_matrix((n_samples, n_features)) * 1e50 y = np.ones(n_samples) assert_warns(ConvergenceWarning, clf.fit, X, y)
def test_timeout(): sp = svm.SVC(C=1, kernel=lambda x, y: x * y.T, probability=True, random_state=0, max_iter=1) assert_warns(ConvergenceWarning, sp.fit, X_sp, Y)
def test_connectivity_fixing_non_lil(): # Check non regression of a bug if a non item assignable connectivity is # provided with more than one component. # create dummy data x = np.array([[0, 0], [1, 1]]) # create a mask with several components to force connectivity fixing m = np.array([[True, False], [False, True]]) c = grid_to_graph(n_x=2, n_y=2, mask=m) w = AgglomerativeClustering(connectivity=c, linkage='ward') assert_warns(UserWarning, w.fit, x)
def test_enet_coordinate_descent(klass, n_classes, kwargs): """Test that a warning is issued if model does not converge""" clf = klass(max_iter=2, **kwargs) n_samples = 5 n_features = 2 X = np.ones((n_samples, n_features)) * 1e50 y = np.ones((n_samples, n_classes)) if klass == Lasso: y = y.ravel() assert_warns(ConvergenceWarning, clf.fit, X, y)
def test_linear_svm_convergence_warnings(): # Test that warnings are raised if model does not converge lsvc = svm.LinearSVC(random_state=0, max_iter=2) assert_warns(ConvergenceWarning, lsvc.fit, X, Y) assert lsvc.n_iter_ == 2 lsvr = svm.LinearSVR(random_state=0, max_iter=2) assert_warns(ConvergenceWarning, lsvr.fit, iris.data, iris.target) assert lsvr.n_iter_ == 2
def test_stable_cumsum(): assert_array_equal(stable_cumsum([1, 2, 3]), np.cumsum([1, 2, 3])) r = np.random.RandomState(0).rand(100000) assert_warns(RuntimeWarning, stable_cumsum, r, rtol=0, atol=0) # test axis parameter A = np.random.RandomState(36).randint(1000, size=(5, 5, 5)) assert_array_equal(stable_cumsum(A, axis=0), np.cumsum(A, axis=0)) assert_array_equal(stable_cumsum(A, axis=1), np.cumsum(A, axis=1)) assert_array_equal(stable_cumsum(A, axis=2), np.cumsum(A, axis=2))
def test_check_dataframe_warns_on_dtype(): # Check that warn_on_dtype also works for DataFrames. # https://github.com/scikit-learn/scikit-learn/issues/10948 pd = importorskip("pandas") df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], dtype=object) assert_warns_message(DataConversionWarning, "Data with input dtype object were all converted to " "float64.", check_array, df, dtype=np.float64, warn_on_dtype=True) assert_warns(DataConversionWarning, check_array, df, dtype='numeric', warn_on_dtype=True) with pytest.warns(None) as record: warnings.simplefilter("ignore", DeprecationWarning) # 0.23 check_array(df, dtype='object', warn_on_dtype=True) assert len(record) == 0 # Also check that it raises a warning for mixed dtypes in a DataFrame. df_mixed = pd.DataFrame([['1', 2, 3], ['4', 5, 6]]) assert_warns(DataConversionWarning, check_array, df_mixed, dtype=np.float64, warn_on_dtype=True) assert_warns(DataConversionWarning, check_array, df_mixed, dtype='numeric', warn_on_dtype=True) assert_warns(DataConversionWarning, check_array, df_mixed, dtype=object, warn_on_dtype=True) # Even with numerical dtypes, a conversion can be made because dtypes are # uniformized throughout the array. df_mixed_numeric = pd.DataFrame([[1., 2, 3], [4., 5, 6]]) assert_warns(DataConversionWarning, check_array, df_mixed_numeric, dtype='numeric', warn_on_dtype=True) with pytest.warns(None) as record: warnings.simplefilter("ignore", DeprecationWarning) # 0.23 check_array(df_mixed_numeric.astype(int), dtype='numeric', warn_on_dtype=True) assert len(record) == 0
def test_affinity_propagation_fit_non_convergence(): # In case of non-convergence of affinity_propagation(), the cluster # centers should be an empty array and training samples should be labelled # as noise (-1) X = np.array([[0, 0], [1, 1], [-2, -2]]) # Force non-convergence by allowing only a single iteration af = AffinityPropagation(preference=-10, max_iter=1) assert_warns(ConvergenceWarning, af.fit, X) assert_array_equal(np.empty((0, 2)), af.cluster_centers_) assert_array_equal(np.array([-1, -1, -1]), af.labels_)
def test_affinity_propagation_predict_non_convergence(): # In case of non-convergence of affinity_propagation(), the cluster # centers should be an empty array X = np.array([[0, 0], [1, 1], [-2, -2]]) # Force non-convergence by allowing only a single iteration af = assert_warns(ConvergenceWarning, AffinityPropagation(preference=-10, max_iter=1).fit, X) # At prediction time, consider new samples as noise since there are no # clusters to_predict = np.array([[2, 2], [3, 3], [4, 4]]) y = assert_warns(ConvergenceWarning, af.predict, to_predict) assert_array_equal(np.array([-1, -1, -1]), y)
def test_ridge_regression_convergence_fail(): rng = np.random.RandomState(0) y = rng.randn(5) X = rng.randn(5, 10) assert_warns(ConvergenceWarning, ridge_regression, X, y, alpha=1.0, solver="sparse_cg", tol=0., max_iter=None, verbose=1)
def test_convergence_warning(): # This is a non-regression test for #5774 X = np.array([[1., 0.], [0., 1.], [1., 2.5]]) y = np.array([0, 1, -1]) mdl = label_propagation.LabelSpreading(kernel='rbf', max_iter=1) assert_warns(ConvergenceWarning, mdl.fit, X, y) assert mdl.n_iter_ == mdl.max_iter mdl = label_propagation.LabelPropagation(kernel='rbf', max_iter=1) assert_warns(ConvergenceWarning, mdl.fit, X, y) assert mdl.n_iter_ == mdl.max_iter mdl = label_propagation.LabelSpreading(kernel='rbf', max_iter=500) assert_no_warnings(mdl.fit, X, y) mdl = label_propagation.LabelPropagation(kernel='rbf', max_iter=500) assert_no_warnings(mdl.fit, X, y)
def test_lda_priors(): # Test priors (negative priors) priors = np.array([0.5, -0.5]) clf = LinearDiscriminantAnalysis(priors=priors) msg = "priors must be non-negative" assert_raise_message(ValueError, msg, clf.fit, X, y) # Test that priors passed as a list are correctly handled (run to see if # failure) clf = LinearDiscriminantAnalysis(priors=[0.5, 0.5]) clf.fit(X, y) # Test that priors always sum to 1 priors = np.array([0.5, 0.6]) prior_norm = np.array([0.45, 0.55]) clf = LinearDiscriminantAnalysis(priors=priors) assert_warns(UserWarning, clf.fit, X, y) assert_array_almost_equal(clf.priors_, prior_norm, 2)
def test_warn_wrong_warning(self): def f(): warnings.warn("yo", DeprecationWarning) failed = False filters = sys.modules['warnings'].filters[:] try: try: # Should raise an AssertionError assert_warns(UserWarning, f) failed = True except AssertionError: pass finally: sys.modules['warnings'].filters = filters if failed: raise AssertionError("wrong warning caught by assert_warn")
def test_covariance(): # Tests Covariance module on a simple dataset. # test covariance fit from data cov = EmpiricalCovariance() cov.fit(X) emp_cov = empirical_covariance(X) assert_array_almost_equal(emp_cov, cov.covariance_, 4) assert_almost_equal(cov.error_norm(emp_cov), 0) assert_almost_equal(cov.error_norm(emp_cov, norm='spectral'), 0) assert_almost_equal(cov.error_norm(emp_cov, norm='frobenius'), 0) assert_almost_equal(cov.error_norm(emp_cov, scaling=False), 0) assert_almost_equal(cov.error_norm(emp_cov, squared=False), 0) with pytest.raises(NotImplementedError): cov.error_norm(emp_cov, norm='foo') # Mahalanobis distances computation test mahal_dist = cov.mahalanobis(X) assert np.amin(mahal_dist) > 0 # test with n_features = 1 X_1d = X[:, 0].reshape((-1, 1)) cov = EmpiricalCovariance() cov.fit(X_1d) assert_array_almost_equal(empirical_covariance(X_1d), cov.covariance_, 4) assert_almost_equal(cov.error_norm(empirical_covariance(X_1d)), 0) assert_almost_equal( cov.error_norm(empirical_covariance(X_1d), norm='spectral'), 0) # test with one sample # Create X with 1 sample and 5 features X_1sample = np.arange(5).reshape(1, 5) cov = EmpiricalCovariance() assert_warns(UserWarning, cov.fit, X_1sample) assert_array_almost_equal(cov.covariance_, np.zeros(shape=(5, 5), dtype=np.float64)) # test integer type X_integer = np.asarray([[0, 1], [1, 0]]) result = np.asarray([[0.25, -0.25], [-0.25, 0.25]]) assert_array_almost_equal(empirical_covariance(X_integer), result) # test centered case cov = EmpiricalCovariance(assume_centered=True) cov.fit(X) assert_array_equal(cov.location_, np.zeros(X.shape[1]))
def test_unreachable_accuracy(): assert_array_almost_equal( orthogonal_mp(X, y, tol=0), orthogonal_mp(X, y, n_nonzero_coefs=n_features)) assert_array_almost_equal( assert_warns(RuntimeWarning, orthogonal_mp, X, y, tol=0, precompute=True), orthogonal_mp(X, y, precompute=True, n_nonzero_coefs=n_features))
def check_warm_start_equal_n_estimators(name): # Test if warm start with equal n_estimators does nothing and returns the # same forest and raises a warning. X, y = hastie_X, hastie_y ForestEstimator = FOREST_ESTIMATORS[name] clf = ForestEstimator(n_estimators=5, max_depth=3, warm_start=True, random_state=1) clf.fit(X, y) clf_2 = ForestEstimator(n_estimators=5, max_depth=3, warm_start=True, random_state=1) clf_2.fit(X, y) # Now clf_2 equals clf. clf_2.set_params(random_state=2) assert_warns(UserWarning, clf_2.fit, X, y) # If we had fit the trees again we would have got a different forest as we # changed the random state. assert_array_equal(clf.apply(X), clf_2.apply(X))
def test_k_means_function(): # test calling the k_means function directly # catch output old_stdout = sys.stdout sys.stdout = StringIO() try: cluster_centers, labels, inertia = k_means(X, n_clusters=n_clusters, sample_weight=None, verbose=True) finally: sys.stdout = old_stdout centers = cluster_centers assert centers.shape == (n_clusters, n_features) labels = labels assert np.unique(labels).shape[0] == n_clusters # check that the labels assignment are perfect (up to a permutation) assert v_measure_score(true_labels, labels) == 1.0 assert inertia > 0.0 # check warning when centers are passed assert_warns(RuntimeWarning, k_means, X, n_clusters=n_clusters, sample_weight=None, init=centers) # to many clusters desired with pytest.raises(ValueError): k_means(X, n_clusters=X.shape[0] + 1, sample_weight=None) # kmeans for algorithm='elkan' raises TypeError on sparse matrix assert_raise_message(TypeError, "algorithm='elkan' not supported for " "sparse input X", k_means, X=X_csr, n_clusters=2, sample_weight=None, algorithm="elkan")
def check_oob_score(name, X, y, n_estimators=20): # Check that oob prediction is a good estimation of the generalization # error. # Proper behavior est = FOREST_ESTIMATORS[name](oob_score=True, random_state=0, n_estimators=n_estimators, bootstrap=True) n_samples = X.shape[0] est.fit(X[:n_samples // 2, :], y[:n_samples // 2]) test_score = est.score(X[n_samples // 2:, :], y[n_samples // 2:]) if name in FOREST_CLASSIFIERS: assert abs(test_score - est.oob_score_) < 0.1 else: assert test_score > est.oob_score_ assert est.oob_score_ > .8 # Check warning if not enough estimators with np.errstate(divide="ignore", invalid="ignore"): est = FOREST_ESTIMATORS[name](oob_score=True, random_state=0, n_estimators=1, bootstrap=True) assert_warns(UserWarning, est.fit, X, y)
def test_ransac_warn_exceed_max_skips(): global cause_skip cause_skip = False def is_data_valid(X, y): global cause_skip if not cause_skip: cause_skip = True return True else: return False base_estimator = LinearRegression() ransac_estimator = RANSACRegressor(base_estimator, is_data_valid=is_data_valid, max_skips=3, max_trials=5) assert_warns(ConvergenceWarning, ransac_estimator.fit, X, y) assert ransac_estimator.n_skips_no_inliers_ == 0 assert ransac_estimator.n_skips_invalid_data_ == 4 assert ransac_estimator.n_skips_invalid_model_ == 0
def test_unstructured_linkage_tree(): # Check that we obtain the correct solution for unstructured linkage trees. rng = np.random.RandomState(0) X = rng.randn(50, 100) for this_X in (X, X[0]): # With specified a number of clusters just for the sake of # raising a warning and testing the warning code with ignore_warnings(): children, n_nodes, n_leaves, parent = assert_warns(UserWarning, ward_tree, this_X.T, n_clusters=10) n_nodes = 2 * X.shape[1] - 1 assert len(children) + n_leaves == n_nodes for tree_builder in _TREE_BUILDERS.values(): for this_X in (X, X[0]): with ignore_warnings(): children, n_nodes, n_leaves, parent = assert_warns( UserWarning, tree_builder, this_X.T, n_clusters=10) n_nodes = 2 * X.shape[1] - 1 assert len(children) + n_leaves == n_nodes
def check_class_weight_errors(name): # Test if class_weight raises errors and warnings when expected. ForestClassifier = FOREST_CLASSIFIERS[name] _y = np.vstack((y, np.array(y) * 2)).T # Invalid preset string clf = ForestClassifier(class_weight='the larch', random_state=0) assert_raises(ValueError, clf.fit, X, y) assert_raises(ValueError, clf.fit, X, _y) # Warning warm_start with preset clf = ForestClassifier(class_weight='balanced', warm_start=True, random_state=0) assert_warns(UserWarning, clf.fit, X, y) assert_warns(UserWarning, clf.fit, X, _y) # Not a list or preset for multi-output clf = ForestClassifier(class_weight=1, random_state=0) assert_raises(ValueError, clf.fit, X, _y) # Incorrect length list for multi-output clf = ForestClassifier(class_weight=[{-1: 0.5, 1: 1.}], random_state=0) assert_raises(ValueError, clf.fit, X, _y)