def test_nmf_negative_beta_loss(): # Test that an error is raised if beta_loss < 0 and X contains zeros. # Test that the output has not NaN values when the input contains zeros. n_samples = 6 n_features = 5 n_components = 3 rng = np.random.mtrand.RandomState(42) X = rng.randn(n_samples, n_features) np.clip(X, 0, None, out=X) X_csr = sp.csr_matrix(X) def _assert_nmf_no_nan(X, beta_loss): W, H, _ = non_negative_factorization(X, init='random', n_components=n_components, solver='mu', beta_loss=beta_loss, random_state=0, max_iter=1000) assert not np.any(np.isnan(W)) assert not np.any(np.isnan(H)) msg = "When beta_loss <= 0 and X contains zeros, the solver may diverge." for beta_loss in (-0.6, 0.): assert_raise_message(ValueError, msg, _assert_nmf_no_nan, X, beta_loss) _assert_nmf_no_nan(X + 1e-9, beta_loss) for beta_loss in (0.2, 1., 1.2, 2., 2.5): _assert_nmf_no_nan(X, beta_loss) _assert_nmf_no_nan(X_csr, beta_loss)
def test_bayesian_mixture_weights_prior_initialisation(): rng = np.random.RandomState(0) n_samples, n_components, n_features = 10, 5, 2 X = rng.rand(n_samples, n_features) # Check raise message for a bad value of weight_concentration_prior bad_weight_concentration_prior_ = 0. bgmm = BayesianGaussianMixture( weight_concentration_prior=bad_weight_concentration_prior_, random_state=0) assert_raise_message( ValueError, "The parameter 'weight_concentration_prior' " "should be greater than 0., but got %.3f." % bad_weight_concentration_prior_, bgmm.fit, X) # Check correct init for a given value of weight_concentration_prior weight_concentration_prior = rng.rand() bgmm = BayesianGaussianMixture( weight_concentration_prior=weight_concentration_prior, random_state=rng).fit(X) assert_almost_equal(weight_concentration_prior, bgmm.weight_concentration_prior_) # Check correct init for the default value of weight_concentration_prior bgmm = BayesianGaussianMixture(n_components=n_components, random_state=rng).fit(X) assert_almost_equal(1. / n_components, bgmm.weight_concentration_prior_)
def test_score(): covar_type = 'full' rng = np.random.RandomState(0) rand_data = RandomData(rng, scale=7) n_components = rand_data.n_components X = rand_data.X[covar_type] # Check the error message if we don't call fit gmm1 = GaussianMixture(n_components=n_components, n_init=1, max_iter=1, reg_covar=0, random_state=rng, covariance_type=covar_type) assert_raise_message( NotFittedError, "This GaussianMixture instance is not fitted " "yet. Call 'fit' with appropriate arguments " "before using this estimator.", gmm1.score, X) # Check score value with warnings.catch_warnings(): warnings.simplefilter("ignore", ConvergenceWarning) gmm1.fit(X) gmm_score = gmm1.score(X) gmm_score_proba = gmm1.score_samples(X).mean() assert_almost_equal(gmm_score, gmm_score_proba) # Check if the score increase gmm2 = GaussianMixture(n_components=n_components, n_init=1, reg_covar=0, random_state=rng, covariance_type=covar_type).fit(X) assert gmm2.score(X) > gmm1.score(X)
def test_column_transformer_invalid_columns(remainder): X_array = np.array([[0, 1, 2], [2, 4, 6]]).T # general invalid for col in [1.5, ['string', 1], slice(1, 's'), np.array([1.])]: ct = ColumnTransformer([('trans', Trans(), col)], remainder=remainder) assert_raise_message(ValueError, "No valid specification", ct.fit, X_array) # invalid for arrays for col in ['string', ['string', 'other'], slice('a', 'b')]: ct = ColumnTransformer([('trans', Trans(), col)], remainder=remainder) assert_raise_message(ValueError, "Specifying the columns", ct.fit, X_array) # transformed n_features does not match fitted n_features col = [0, 1] ct = ColumnTransformer([('trans', Trans(), col)], remainder=remainder) ct.fit(X_array) X_array_more = np.array([[0, 1, 2], [2, 4, 6], [3, 6, 9]]).T msg = ("Given feature/column names or counts do not match the ones for " "the data given during fit.") with pytest.warns(FutureWarning, match=msg): ct.transform(X_array_more) # Should accept added columns, for now X_array_fewer = np.array([ [0, 1, 2], ]).T err_msg = 'Number of features' with pytest.raises(ValueError, match=err_msg): ct.transform(X_array_fewer)
def test_bayesian_mixture_predict_predict_proba(): # this is the same test as test_gaussian_mixture_predict_predict_proba() rng = np.random.RandomState(0) rand_data = RandomData(rng) for prior_type in PRIOR_TYPE: for covar_type in COVARIANCE_TYPE: X = rand_data.X[covar_type] Y = rand_data.Y bgmm = BayesianGaussianMixture( n_components=rand_data.n_components, random_state=rng, weight_concentration_prior_type=prior_type, covariance_type=covar_type) # Check a warning message arrive if we don't do fit assert_raise_message( NotFittedError, "This BayesianGaussianMixture instance" " is not fitted yet. Call 'fit' with " "appropriate arguments before using " "this estimator.", bgmm.predict, X) bgmm.fit(X) Y_pred = bgmm.predict(X) Y_pred_proba = bgmm.predict_proba(X).argmax(axis=1) assert_array_equal(Y_pred, Y_pred_proba) assert adjusted_rand_score(Y, Y_pred) >= .95
def test_step_name_validation(): bad_steps1 = [('a__q', Mult(2)), ('b', Mult(3))] bad_steps2 = [('a', Mult(2)), ('a', Mult(3))] for cls, param in [(Pipeline, 'steps'), (FeatureUnion, 'transformer_list')]: # we validate in construction (despite scikit-learn convention) bad_steps3 = [('a', Mult(2)), (param, Mult(3))] for bad_steps, message in [ (bad_steps1, "Estimator names must not contain __: got ['a__q']"), (bad_steps2, "Names provided are not unique: ['a', 'a']"), (bad_steps3, "Estimator names conflict with constructor " "arguments: ['%s']" % param), ]: # three ways to make invalid: # - construction assert_raise_message(ValueError, message, cls, **{param: bad_steps}) # - setattr est = cls(**{param: [('a', Mult(1))]}) setattr(est, param, bad_steps) assert_raise_message(ValueError, message, est.fit, [[1]], [1]) assert_raise_message(ValueError, message, est.fit_transform, [[1]], [1]) # - set_params est = cls(**{param: [('a', Mult(1))]}) est.set_params(**{param: bad_steps}) assert_raise_message(ValueError, message, est.fit, [[1]], [1]) assert_raise_message(ValueError, message, est.fit_transform, [[1]], [1])
def check_svm_model_equal(dense_svm, sparse_svm, X_train, y_train, X_test): dense_svm.fit(X_train.toarray(), y_train) if sparse.isspmatrix(X_test): X_test_dense = X_test.toarray() else: X_test_dense = X_test sparse_svm.fit(X_train, y_train) assert sparse.issparse(sparse_svm.support_vectors_) assert sparse.issparse(sparse_svm.dual_coef_) assert_array_almost_equal(dense_svm.support_vectors_, sparse_svm.support_vectors_.toarray()) assert_array_almost_equal(dense_svm.dual_coef_, sparse_svm.dual_coef_.toarray()) if dense_svm.kernel == "linear": assert sparse.issparse(sparse_svm.coef_) assert_array_almost_equal(dense_svm.coef_, sparse_svm.coef_.toarray()) assert_array_almost_equal(dense_svm.support_, sparse_svm.support_) assert_array_almost_equal(dense_svm.predict(X_test_dense), sparse_svm.predict(X_test)) assert_array_almost_equal(dense_svm.decision_function(X_test_dense), sparse_svm.decision_function(X_test)) assert_array_almost_equal(dense_svm.decision_function(X_test_dense), sparse_svm.decision_function(X_test_dense)) if isinstance(dense_svm, svm.OneClassSVM): msg = "cannot use sparse input in 'OneClassSVM' trained on dense data" else: assert_array_almost_equal(dense_svm.predict_proba(X_test_dense), sparse_svm.predict_proba(X_test), 4) msg = "cannot use sparse input in 'SVC' trained on dense data" if sparse.isspmatrix(X_test): assert_raise_message(ValueError, msg, dense_svm.predict, X_test)
def test_fetch_nonexiting(monkeypatch, gzip_response): # there is no active version of glass2 data_id = 40675 _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response) # Note that we only want to search by name (not data id) assert_raise_message(ValueError, "No active dataset glass2 found", fetch_openml, name='glass2', cache=False)
def test_zero_cosine_linkage_tree(): # Check that zero vectors in X produce an error when # 'cosine' affinity is used X = np.array([[0, 1], [0, 0]]) msg = 'Cosine affinity cannot be used when X contains zero vectors' assert_raise_message(ValueError, msg, linkage_tree, X, affinity='cosine')
def test_n_iter(): """Check value of n_iter.""" X = np.array([[1], [2], [6], [8], [10]]) y = np.array([1, 2, 6, 8, 10]) clf = BayesianRidge(n_iter=0) msg = "n_iter should be greater than or equal to 1." assert_raise_message(ValueError, msg, clf.fit, X, y)
def test_enet_l1_ratio(): # Test that an error message is raised if an estimator that # uses _alpha_grid is called with l1_ratio=0 msg = ("Automatic alpha grid generation is not supported for l1_ratio=0. " "Please supply a grid by providing your estimator with the " "appropriate `alphas=` argument.") X = np.array([[1, 2, 4, 5, 8], [3, 5, 7, 7, 8]]).T y = np.array([12, 10, 11, 21, 5]) assert_raise_message(ValueError, msg, ElasticNetCV(l1_ratio=0, random_state=42).fit, X, y) assert_raise_message( ValueError, msg, MultiTaskElasticNetCV(l1_ratio=0, random_state=42).fit, X, y[:, None]) # Test that l1_ratio=0 is allowed if we supply a grid manually alphas = [0.1, 10] estkwds = {'alphas': alphas, 'random_state': 42} est_desired = ElasticNetCV(l1_ratio=0.00001, **estkwds) est = ElasticNetCV(l1_ratio=0, **estkwds) with ignore_warnings(): est_desired.fit(X, y) est.fit(X, y) assert_array_almost_equal(est.coef_, est_desired.coef_, decimal=5) est_desired = MultiTaskElasticNetCV(l1_ratio=0.00001, **estkwds) est = MultiTaskElasticNetCV(l1_ratio=0, **estkwds) with ignore_warnings(): est.fit(X, y[:, None]) est_desired.fit(X, y[:, None]) assert_array_almost_equal(est.coef_, est_desired.coef_, decimal=5)
def test_n_components(): rng = np.random.RandomState(42) X = np.arange(12).reshape(4, 3) y = [1, 1, 2, 2] init = rng.rand(X.shape[1] - 1, 3) # n_components = X.shape[1] != transformation.shape[0] n_components = X.shape[1] nca = NeighborhoodComponentsAnalysis(init=init, n_components=n_components) assert_raise_message(ValueError, 'The preferred dimensionality of the ' 'projected space `n_components` ({}) does not match ' 'the output dimensionality of the given ' 'linear transformation `init` ({})!' .format(n_components, init.shape[0]), nca.fit, X, y) # n_components > X.shape[1] n_components = X.shape[1] + 2 nca = NeighborhoodComponentsAnalysis(init=init, n_components=n_components) assert_raise_message(ValueError, 'The preferred dimensionality of the ' 'projected space `n_components` ({}) cannot ' 'be greater than the given data ' 'dimensionality ({})!' .format(n_components, X.shape[1]), nca.fit, X, y) # n_components < X.shape[1] nca = NeighborhoodComponentsAnalysis(n_components=2, init='identity') nca.fit(X, y)
def test_ovo_float_y(): # Test that the OvO errors on float targets X = iris.data y = iris.data[:, 0] ovo = OneVsOneClassifier(LinearSVC()) assert_raise_message(ValueError, "Unknown label type", ovo.fit, X, y)
def test_ovo_one_class(): # Test error for OvO with one class X = np.eye(4) y = np.array(['a'] * 4) ovo = OneVsOneClassifier(LinearSVC()) assert_raise_message(ValueError, "when only one class", ovo.fit, X, y)
def test_regularisation(): # We train the GaussianMixture on degenerate data by defining two clusters # of a 0 covariance. rng = np.random.RandomState(0) n_samples, n_features = 10, 5 X = np.vstack((np.ones( (n_samples // 2, n_features)), np.zeros((n_samples // 2, n_features)))) for covar_type in COVARIANCE_TYPE: gmm = GaussianMixture(n_components=n_samples, reg_covar=0, covariance_type=covar_type, random_state=rng) with warnings.catch_warnings(): warnings.simplefilter("ignore", RuntimeWarning) assert_raise_message( ValueError, "Fitting the mixture model failed because " "some components have ill-defined empirical " "covariance (for instance caused by " "singleton or collapsed samples). Try to " "decrease the number of components, or " "increase reg_covar.", gmm.fit, X) gmm.set_params(reg_covar=1e-6).fit(X)
def test_pls_errors(): d = load_linnerud() X = d.data Y = d.target for clf in [pls_.PLSCanonical(), pls_.PLSRegression(), pls_.PLSSVD()]: clf.n_components = 4 assert_raise_message(ValueError, "Invalid number of components", clf.fit, X, Y)
def test_string_attribute_without_dataframe(monkeypatch, gzip_response): data_id = 40945 _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response) # single column test assert_raise_message(ValueError, ('STRING attributes are not supported for ' 'array representation. Try as_frame=True'), fetch_openml, data_id=data_id, cache=False)
def test_raises_illegal_multitarget(monkeypatch, gzip_response): data_id = 61 targets = ['sepalwidth', 'class'] _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response) # Note that we only want to search by name (not data id) assert_raise_message(ValueError, "Can only handle homogeneous multi-target datasets,", fetch_openml, data_id=data_id, target_column=targets, cache=False)
def test_base_zero_n_estimators(): # Check that instantiating a BaseEnsemble with n_estimators<=0 raises # a ValueError. ensemble = BaggingClassifier(base_estimator=Perceptron(), n_estimators=0) iris = load_iris() assert_raise_message(ValueError, "n_estimators must be greater than zero, got 0.", ensemble.fit, iris.data, iris.target)
def test_linear_svc_intercept_scaling(): # Test that the right error message is thrown when intercept_scaling <= 0 for i in [-1, 0]: lsvc = svm.LinearSVC(intercept_scaling=i) msg = ('Intercept scaling is %r but needs to be greater than 0.' ' To disable fitting an intercept,' ' set fit_intercept=False.' % lsvc.intercept_scaling) assert_raise_message(ValueError, msg, lsvc.fit, X, Y)
def test_column_transformer_remainder(): X_array = np.array([[0, 1, 2], [2, 4, 6]]).T X_res_first = np.array([0, 1, 2]).reshape(-1, 1) X_res_second = np.array([2, 4, 6]).reshape(-1, 1) X_res_both = X_array # default drop ct = ColumnTransformer([('trans1', Trans(), [0])]) assert_array_equal(ct.fit_transform(X_array), X_res_first) assert_array_equal(ct.fit(X_array).transform(X_array), X_res_first) assert len(ct.transformers_) == 2 assert ct.transformers_[-1][0] == 'remainder' assert ct.transformers_[-1][1] == 'drop' assert_array_equal(ct.transformers_[-1][2], [1]) # specify passthrough ct = ColumnTransformer([('trans', Trans(), [0])], remainder='passthrough') assert_array_equal(ct.fit_transform(X_array), X_res_both) assert_array_equal(ct.fit(X_array).transform(X_array), X_res_both) assert len(ct.transformers_) == 2 assert ct.transformers_[-1][0] == 'remainder' assert ct.transformers_[-1][1] == 'passthrough' assert_array_equal(ct.transformers_[-1][2], [1]) # column order is not preserved (passed through added to end) ct = ColumnTransformer([('trans1', Trans(), [1])], remainder='passthrough') assert_array_equal(ct.fit_transform(X_array), X_res_both[:, ::-1]) assert_array_equal(ct.fit(X_array).transform(X_array), X_res_both[:, ::-1]) assert len(ct.transformers_) == 2 assert ct.transformers_[-1][0] == 'remainder' assert ct.transformers_[-1][1] == 'passthrough' assert_array_equal(ct.transformers_[-1][2], [0]) # passthrough when all actual transformers are skipped ct = ColumnTransformer([('trans1', 'drop', [0])], remainder='passthrough') assert_array_equal(ct.fit_transform(X_array), X_res_second) assert_array_equal(ct.fit(X_array).transform(X_array), X_res_second) assert len(ct.transformers_) == 2 assert ct.transformers_[-1][0] == 'remainder' assert ct.transformers_[-1][1] == 'passthrough' assert_array_equal(ct.transformers_[-1][2], [1]) # error on invalid arg ct = ColumnTransformer([('trans1', Trans(), [0])], remainder=1) assert_raise_message( ValueError, "remainder keyword needs to be one of \'drop\', \'passthrough\', " "or estimator.", ct.fit, X_array) assert_raise_message( ValueError, "remainder keyword needs to be one of \'drop\', \'passthrough\', " "or estimator.", ct.fit_transform, X_array) # check default for make_column_transformer ct = make_column_transformer((Trans(), [0])) assert ct.remainder == 'drop'
def test_classifier_single_class(): """tests if ValueError is thrown with only one class""" X = [[1, 2], [3, 4]] y = [1, 1] assert_raise_message( ValueError, "This solver needs samples of at least 2 classes " "in the data", LogisticRegression(solver='sag').fit, X, y)
def test_check_array_accept_large_sparse_raise_exception(X_64bit): # When large sparse are not allowed msg = ("Only sparse matrices with 32-bit integer indices " "are accepted. Got int64 indices.") assert_raise_message(ValueError, msg, check_array, X_64bit, accept_sparse=True, accept_large_sparse=False)
def test_agg_n_clusters(): # Test that an error is raised when n_clusters <= 0 rng = np.random.RandomState(0) X = rng.rand(20, 10) for n_clus in [-1, 0]: agc = AgglomerativeClustering(n_clusters=n_clus) msg = ("n_clusters should be an integer greater than 0." " %s was provided." % str(agc.n_clusters)) assert_raise_message(ValueError, msg, agc.fit, X)
def test_gpr_correct_error_message(): X = np.arange(12).reshape(6, -1) y = np.ones(6) kernel = DotProduct() gpr = GaussianProcessRegressor(kernel=kernel, alpha=0.0) assert_raise_message( np.linalg.LinAlgError, "The kernel, %s, is not returning a " "positive definite matrix. Try gradually increasing " "the 'alpha' parameter of your " "GaussianProcessRegressor estimator." % kernel, gpr.fit, X, y)
def test_too_many_samples_to_find_a_safe_embedding(): data, _ = make_sparse_random_data(1000, 100, 1000) for RandomProjection in all_RandomProjection: rp = RandomProjection(n_components='auto', eps=0.1) expected_msg = ( 'eps=0.100000 and n_samples=1000 lead to a target dimension' ' of 5920 which is larger than the original space with' ' n_features=100') assert_raise_message(ValueError, expected_msg, rp.fit, data)
def test_minimum_number_of_sample_check(): # test that we check a minimum number of samples msg = "min_samples must be no greater than" # Compute OPTICS X = [[1, 1]] clust = OPTICS(max_eps=5.0 * 0.3, min_samples=10, min_cluster_size=1) # Run the fit assert_raise_message(ValueError, msg, clust.fit, X)
def test_bayesian_mixture_check_is_fitted(): rng = np.random.RandomState(0) n_samples, n_features = 10, 2 # Check raise message bgmm = BayesianGaussianMixture(random_state=rng) X = rng.rand(n_samples, n_features) assert_raise_message( ValueError, 'This BayesianGaussianMixture instance is not ' 'fitted yet.', bgmm.score, X)
def test_2D_transformer_output(): X_array = np.array([[0, 1, 2], [2, 4, 6]]).T # if one transformer is dropped, test that name is still correct ct = ColumnTransformer([('trans1', 'drop', 0), ('trans2', TransNo2D(), 1)]) assert_raise_message(ValueError, "the 'trans2' transformer should be 2D", ct.fit_transform, X_array) # because fit is also doing transform, this raises already on fit assert_raise_message(ValueError, "the 'trans2' transformer should be 2D", ct.fit, X_array)
def test_lle_init_parameters(): X = np.random.rand(5, 3) clf = manifold.LocallyLinearEmbedding(eigen_solver="error") msg = "unrecognized eigen_solver 'error'" assert_raise_message(ValueError, msg, clf.fit, X) clf = manifold.LocallyLinearEmbedding(method="error") msg = "unrecognized method 'error'" assert_raise_message(ValueError, msg, clf.fit, X)