def test_ridgecv_store_cv_values(): rng = np.random.RandomState(42) n_samples = 8 n_features = 5 x = rng.randn(n_samples, n_features) alphas = [1e-1, 1e0, 1e1] n_alphas = len(alphas) r = RidgeCV(alphas=alphas, cv=None, store_cv_values=True) # with len(y.shape) == 1 y = rng.randn(n_samples) r.fit(x, y) assert r.cv_values_.shape == (n_samples, n_alphas) # with len(y.shape) == 2 n_targets = 3 y = rng.randn(n_samples, n_targets) r.fit(x, y) assert r.cv_values_.shape == (n_samples, n_targets, n_alphas) r = RidgeCV(cv=3, store_cv_values=True) assert_raises_regex(ValueError, 'cv!=None and store_cv_values', r.fit, x, y)
def test_check_class_weight_balanced_linear_classifier(): # check that ill-computed balanced weights raises an exception assert_raises_regex( AssertionError, "Classifier estimator_name is not computing" " class_weight=balanced properly.", check_class_weight_balanced_linear_classifier, 'estimator_name', BadBalancedWeightsClassifier)
def test_fit_predict_on_pipeline_without_fit_predict(): # tests that a pipeline does not have fit_predict method when final # step of pipeline does not have fit_predict defined scaler = StandardScaler() pca = PCA(svd_solver='full') pipe = Pipeline([('scaler', scaler), ('pca', pca)]) assert_raises_regex(AttributeError, "'PCA' object has no attribute 'fit_predict'", getattr, pipe, 'fit_predict')
def test_not_an_array_array_function(): np_version = _parse_version(np.__version__) if np_version < (1, 17): raise SkipTest("array_function protocol not supported in numpy <1.17") not_array = _NotAnArray(np.ones(10)) msg = "Don't want to call array_function sum!" assert_raises_regex(TypeError, msg, np.sum, not_array) # always returns True assert np.may_share_memory(not_array, None)
def test_check_classification_targets(): for y_type in EXAMPLES.keys(): if y_type in ["unknown", "continuous", 'continuous-multioutput']: for example in EXAMPLES[y_type]: msg = 'Unknown label type: ' assert_raises_regex(ValueError, msg, check_classification_targets, example) else: for example in EXAMPLES[y_type]: check_classification_targets(example)
def test_bad_pyfunc_metric(): def wrong_distance(x, y): return "1" X = np.ones((5, 2)) assert_raises_regex(TypeError, "Custom distance function must accept two vectors", BallTree, X, metric=wrong_distance)
def test_check_non_negative(retype): A = np.array([[1, 1, 0, 0], [1, 1, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]) X = retype(A) check_non_negative(X, "") X = retype([[0, 0], [0, 0]]) check_non_negative(X, "") A[0, 0] = -1 X = retype(A) assert_raises_regex(ValueError, "Negative ", check_non_negative, X, "")
def test_check_estimators_unfitted(): # check that a ValueError/AttributeError is raised when calling predict # on an unfitted estimator msg = "NotFittedError not raised by predict" assert_raises_regex(AssertionError, msg, check_estimators_unfitted, "estimator", NoSparseClassifier()) # check that CorrectNotFittedError inherit from either ValueError # or AttributeError check_estimators_unfitted("estimator", CorrectNotFittedErrorClassifier())
def test_check_estimator_required_parameters_skip(): class MyEstimator(BaseEstimator): _required_parameters = ["special_parameter"] def __init__(self, special_parameter): self.special_parameter = special_parameter assert_raises_regex( SkipTest, r"Can't instantiate estimator MyEstimator " r"which requires parameters " r"\['special_parameter'\]", check_estimator, MyEstimator)
def test_pipeline_with_cache_attribute(): X = np.array([[1, 2]]) pipe = Pipeline([('transf', Transf()), ('clf', Mult())], memory=DummyMemory()) pipe.fit(X, y=None) dummy = WrongDummyMemory() pipe = Pipeline([('transf', Transf()), ('clf', Mult())], memory=dummy) assert_raises_regex( ValueError, "'memory' should be None, a string or" " have the same interface as joblib.Memory." " Got memory='{}' instead.".format(dummy), pipe.fit, X)
def test_gen_even_slices(): # check that gen_even_slices contains all samples some_range = range(10) joined_range = list( chain(*[some_range[slice] for slice in gen_even_slices(10, 3)])) assert_array_equal(some_range, joined_range) # check that passing negative n_chunks raises an error slices = gen_even_slices(10, -1) assert_raises_regex(ValueError, "gen_even_slices got n_packs=-1, must be" " >=1", next, slices)
def test_ridgecv_negative_alphas(): X = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0], [1.0, 1.0], [1.0, 0.0]]) y = [1, 1, 1, -1, -1] # Negative integers ridge = RidgeCV(alphas=(-1, -10, -100)) assert_raises_regex(ValueError, "alphas must be positive", ridge.fit, X, y) # Negative floats ridge = RidgeCV(alphas=(-0.1, -1.0, -10.0)) assert_raises_regex(ValueError, "alphas must be positive", ridge.fit, X, y)
def test_pipeline_wrong_memory(): # Test that an error is raised when memory is not a string or a Memory # instance X = iris.data y = iris.target # Define memory as an integer memory = 1 cached_pipe = Pipeline([('transf', DummyTransf()), ('svc', SVC())], memory=memory) assert_raises_regex( ValueError, "'memory' should be None, a string or" " have the same interface as joblib.Memory." " Got memory='1' instead.", cached_pipe.fit, X, y)
def test_precompute_invalid_argument(): X, y, _, _ = build_dataset() for clf in [ ElasticNetCV(precompute="invalid"), LassoCV(precompute="invalid") ]: assert_raises_regex( ValueError, ".*should be.*True.*False.*auto.*" "array-like.*Got 'invalid'", clf.fit, X, y) # Precompute = 'auto' is not supported for ElasticNet assert_raises_regex(ValueError, ".*should be.*True.*False.*array-like.*" "Got 'auto'", ElasticNet(precompute='auto').fit, X, y)
def test_novelty_errors(): X = iris.data # check errors for novelty=False clf = neighbors.LocalOutlierFactor() clf.fit(X) # predict, decision_function and score_samples raise ValueError for method in ['predict', 'decision_function', 'score_samples']: msg = ('{} is not available when novelty=False'.format(method)) assert_raises_regex(AttributeError, msg, getattr, clf, method) # check errors for novelty=True clf = neighbors.LocalOutlierFactor(novelty=True) msg = 'fit_predict is not available when novelty=True' assert_raises_regex(AttributeError, msg, getattr, clf, 'fit_predict')
def test_ridge_regression_check_arguments_validity(return_intercept, sample_weight, arr_type, solver): """check if all combinations of arguments give valid estimations""" # test excludes 'svd' solver because it raises exception for sparse inputs rng = check_random_state(42) X = rng.rand(1000, 3) true_coefs = [1, 2, 0.1] y = np.dot(X, true_coefs) true_intercept = 0. if return_intercept: true_intercept = 10000. y += true_intercept X_testing = arr_type(X) alpha, atol, tol = 1e-3, 1e-4, 1e-6 if solver not in ['sag', 'auto'] and return_intercept: assert_raises_regex(ValueError, "In Ridge, only 'sag' solver", ridge_regression, X_testing, y, alpha=alpha, solver=solver, sample_weight=sample_weight, return_intercept=return_intercept, tol=tol) return out = ridge_regression( X_testing, y, alpha=alpha, solver=solver, sample_weight=sample_weight, return_intercept=return_intercept, tol=tol, ) if return_intercept: coef, intercept = out assert_allclose(coef, true_coefs, rtol=0, atol=atol) assert_allclose(intercept, true_intercept, rtol=0, atol=atol) else: assert_allclose(out, true_coefs, rtol=0, atol=atol)
def test_type_of_target(): for group, group_examples in EXAMPLES.items(): for example in group_examples: assert type_of_target(example) == group, ( 'type_of_target(%r) should be %r, got %r' % (example, group, type_of_target(example))) for example in NON_ARRAY_LIKE_EXAMPLES: msg_regex = r'Expected array-like \(array or non-string sequence\).*' assert_raises_regex(ValueError, msg_regex, type_of_target, example) for example in MULTILABEL_SEQUENCES: msg = ('You appear to be using a legacy multi-label data ' 'representation. Sequence of sequences are no longer supported;' ' use a binary array or sparse matrix instead.') assert_raises_regex(ValueError, msg, type_of_target, example)
def test_feature_union(): # basic sanity check for feature union X = iris.data X -= X.mean(axis=0) y = iris.target svd = TruncatedSVD(n_components=2, random_state=0) select = SelectKBest(k=1) fs = FeatureUnion([("svd", svd), ("select", select)]) fs.fit(X, y) X_transformed = fs.transform(X) assert X_transformed.shape == (X.shape[0], 3) # check if it does the expected thing assert_array_almost_equal(X_transformed[:, :-1], svd.fit_transform(X)) assert_array_equal(X_transformed[:, -1], select.fit_transform(X, y).ravel()) # test if it also works for sparse input # We use a different svd object to control the random_state stream fs = FeatureUnion([("svd", svd), ("select", select)]) X_sp = sparse.csr_matrix(X) X_sp_transformed = fs.fit_transform(X_sp, y) assert_array_almost_equal(X_transformed, X_sp_transformed.toarray()) # Test clone fs2 = assert_no_warnings(clone, fs) assert fs.transformer_list[0][1] is not fs2.transformer_list[0][1] # test setting parameters fs.set_params(select__k=2) assert fs.fit_transform(X, y).shape == (X.shape[0], 4) # test it works with transformers missing fit_transform fs = FeatureUnion([("mock", Transf()), ("svd", svd), ("select", select)]) X_transformed = fs.fit_transform(X, y) assert X_transformed.shape == (X.shape[0], 8) # test error if some elements do not support transform assert_raises_regex( TypeError, 'All estimators should implement fit and ' 'transform.*\\bNoTrans\\b', FeatureUnion, [("transform", Transf()), ("no_transform", NoTrans())]) # test that init accepts tuples fs = FeatureUnion((("svd", svd), ("select", select))) fs.fit(X, y)
def test_check_memory(): memory = check_memory("cache_directory") assert memory.cachedir == os.path.join('cache_directory', 'joblib') memory = check_memory(None) assert memory.cachedir is None dummy = DummyMemory() memory = check_memory(dummy) assert memory is dummy assert_raises_regex( ValueError, "'memory' should be None, a string or" " have the same interface as joblib.Memory." " Got memory='1' instead.", check_memory, 1) dummy = WrongDummyMemory() assert_raises_regex( ValueError, "'memory' should be None, a string or" " have the same interface as joblib.Memory." " Got memory='{}' instead.".format(dummy), check_memory, dummy)
def test_check_no_attributes_set_in_init(): class NonConformantEstimatorPrivateSet(BaseEstimator): def __init__(self): self.you_should_not_set_this_ = None class NonConformantEstimatorNoParamSet(BaseEstimator): def __init__(self, you_should_set_this_=None): pass assert_raises_regex( AssertionError, "Estimator estimator_name should not set any" " attribute apart from parameters during init." r" Found attributes \['you_should_not_set_this_'\].", check_no_attributes_set_in_init, 'estimator_name', NonConformantEstimatorPrivateSet()) assert_raises_regex( AssertionError, "Estimator estimator_name should store all " "parameters as an attribute during init. " "Did not find attributes " r"\['you_should_set_this_'\].", check_no_attributes_set_in_init, 'estimator_name', NonConformantEstimatorNoParamSet())
def test_check_consistent_length(): check_consistent_length([1], [2], [3], [4], [5]) check_consistent_length([[1, 2], [[1, 2]]], [1, 2], ['a', 'b']) check_consistent_length([1], (2, ), np.array([3]), sp.csr_matrix((1, 2))) assert_raises_regex(ValueError, 'inconsistent numbers of samples', check_consistent_length, [1, 2], [1]) assert_raises_regex(TypeError, r"got <\w+ 'int'>", check_consistent_length, [1, 2], 1) assert_raises_regex(TypeError, r"got <\w+ 'object'>", check_consistent_length, [1, 2], object()) assert_raises(TypeError, check_consistent_length, [1, 2], np.array(1)) # Despite ensembles having __len__ they must raise TypeError assert_raises_regex(TypeError, 'Expected sequence or array-like', check_consistent_length, [1, 2], RandomForestRegressor())
def test_check_array_complex_data_error(): X = np.array([[1 + 2j, 3 + 4j, 5 + 7j], [2 + 3j, 4 + 5j, 6 + 7j]]) assert_raises_regex(ValueError, "Complex data not supported", check_array, X) # list of lists X = [[1 + 2j, 3 + 4j, 5 + 7j], [2 + 3j, 4 + 5j, 6 + 7j]] assert_raises_regex(ValueError, "Complex data not supported", check_array, X) # tuple of tuples X = ((1 + 2j, 3 + 4j, 5 + 7j), (2 + 3j, 4 + 5j, 6 + 7j)) assert_raises_regex(ValueError, "Complex data not supported", check_array, X) # list of np arrays X = [ np.array([1 + 2j, 3 + 4j, 5 + 7j]), np.array([2 + 3j, 4 + 5j, 6 + 7j]) ] assert_raises_regex(ValueError, "Complex data not supported", check_array, X) # tuple of np arrays X = (np.array([1 + 2j, 3 + 4j, 5 + 7j]), np.array([2 + 3j, 4 + 5j, 6 + 7j])) assert_raises_regex(ValueError, "Complex data not supported", check_array, X) # dataframe X = MockDataFrame( np.array([[1 + 2j, 3 + 4j, 5 + 7j], [2 + 3j, 4 + 5j, 6 + 7j]])) assert_raises_regex(ValueError, "Complex data not supported", check_array, X) # sparse matrix X = sp.coo_matrix([[0, 1 + 2j], [0, 0]]) assert_raises_regex(ValueError, "Complex data not supported", check_array, X)
def test_check_estimator_transformer_no_mixin(): # check that TransformerMixin is not required for transformer tests to run assert_raises_regex(AttributeError, '.*fit_transform.*', check_estimator, BadTransformerWithoutMixin())
def test_check_estimator(): # tests that the estimator actually fails on "bad" estimators. # not a complete test of all checks, which are very extensive. # check that we have a set_params and can clone msg = "it does not implement a 'get_params' methods" assert_raises_regex(TypeError, msg, check_estimator, object) assert_raises_regex(TypeError, msg, check_estimator, object()) # check that values returned by get_params match set_params msg = "get_params result does not match what was passed to set_params" assert_raises_regex(AssertionError, msg, check_estimator, ModifiesValueInsteadOfRaisingError()) assert_warns(UserWarning, check_estimator, RaisesErrorInSetParams()) assert_raises_regex(AssertionError, msg, check_estimator, ModifiesAnotherValue()) # check that we have a fit method msg = "object has no attribute 'fit'" assert_raises_regex(AttributeError, msg, check_estimator, BaseEstimator) assert_raises_regex(AttributeError, msg, check_estimator, BaseEstimator()) # check that fit does input validation msg = "ValueError not raised" assert_raises_regex(AssertionError, msg, check_estimator, BaseBadClassifier) assert_raises_regex(AssertionError, msg, check_estimator, BaseBadClassifier()) # check that sample_weights in fit accepts pandas.Series type try: from pandas import Series # noqa msg = ("Estimator NoSampleWeightPandasSeriesType raises error if " "'sample_weight' parameter is of type pandas.Series") assert_raises_regex(ValueError, msg, check_estimator, NoSampleWeightPandasSeriesType) except ImportError: pass # check that predict does input validation (doesn't accept dicts in input) msg = "Estimator doesn't check for NaN and inf in predict" assert_raises_regex(AssertionError, msg, check_estimator, NoCheckinPredict) assert_raises_regex(AssertionError, msg, check_estimator, NoCheckinPredict()) # check that estimator state does not change # at transform/predict/predict_proba time msg = 'Estimator changes __dict__ during predict' assert_raises_regex(AssertionError, msg, check_estimator, ChangesDict) # check that `fit` only changes attribures that # are private (start with an _ or end with a _). msg = ('Estimator ChangesWrongAttribute should not change or mutate ' 'the parameter wrong_attribute from 0 to 1 during fit.') assert_raises_regex(AssertionError, msg, check_estimator, ChangesWrongAttribute) check_estimator(ChangesUnderscoreAttribute) # check that `fit` doesn't add any public attribute msg = (r'Estimator adds public attribute\(s\) during the fit method.' ' Estimators are only allowed to add private attributes' ' either started with _ or ended' ' with _ but wrong_attribute added') assert_raises_regex(AssertionError, msg, check_estimator, SetsWrongAttribute) # check for invariant method name = NotInvariantPredict.__name__ method = 'predict' msg = ("{method} of {name} is not invariant when applied " "to a subset.").format(method=method, name=name) assert_raises_regex(AssertionError, msg, check_estimator, NotInvariantPredict) # check for sparse matrix input handling name = NoSparseClassifier.__name__ msg = "Estimator %s doesn't seem to fail gracefully on sparse data" % name # the check for sparse input handling prints to the stdout, # instead of raising an error, so as not to remove the original traceback. # that means we need to jump through some hoops to catch it. old_stdout = sys.stdout string_buffer = StringIO() sys.stdout = string_buffer try: check_estimator(NoSparseClassifier) except: pass finally: sys.stdout = old_stdout assert msg in string_buffer.getvalue() # Large indices test on bad estimator msg = ('Estimator LargeSparseNotSupportedClassifier doesn\'t seem to ' r'support \S{3}_64 matrix, and is not failing gracefully.*') assert_raises_regex(AssertionError, msg, check_estimator, LargeSparseNotSupportedClassifier) # does error on binary_only untagged estimator msg = 'Only 2 classes are supported' assert_raises_regex(ValueError, msg, check_estimator, UntaggedBinaryClassifier) # non-regression test for estimators transforming to sparse data check_estimator(SparseTransformer()) # doesn't error on actual estimator check_estimator(LogisticRegression) check_estimator(LogisticRegression(C=0.01)) check_estimator(MultiTaskElasticNet) check_estimator(MultiTaskElasticNet()) # doesn't error on binary_only tagged estimator check_estimator(TaggedBinaryClassifier) # Check regressor with requires_positive_y estimator tag msg = 'negative y values not supported!' assert_raises_regex(ValueError, msg, check_estimator, RequiresPositiveYRegressor)
def test_pipeline_init(): # Test the various init parameters of the pipeline. assert_raises(TypeError, Pipeline) # Check that we can't instantiate pipelines with objects without fit # method assert_raises_regex( TypeError, 'Last step of Pipeline should implement fit ' 'or be the string \'passthrough\'' '.*NoFit.*', Pipeline, [('clf', NoFit())]) # Smoke test with only an estimator clf = NoTrans() pipe = Pipeline([('svc', clf)]) assert (pipe.get_params(deep=True) == dict(svc__a=None, svc__b=None, svc=clf, **pipe.get_params(deep=False))) # Check that params are set pipe.set_params(svc__a=0.1) assert clf.a == 0.1 assert clf.b is None # Smoke test the repr: repr(pipe) # Test with two objects clf = SVC() filter1 = SelectKBest(f_classif) pipe = Pipeline([('anova', filter1), ('svc', clf)]) # Check that estimators are not cloned on pipeline construction assert pipe.named_steps['anova'] is filter1 assert pipe.named_steps['svc'] is clf # Check that we can't instantiate with non-transformers on the way # Note that NoTrans implements fit, but not transform assert_raises_regex( TypeError, 'All intermediate steps should be transformers' '.*\\bNoTrans\\b.*', Pipeline, [('t', NoTrans()), ('svc', clf)]) # Check that params are set pipe.set_params(svc__C=0.1) assert clf.C == 0.1 # Smoke test the repr: repr(pipe) # Check that params are not set when naming them wrong assert_raises(ValueError, pipe.set_params, anova__C=0.1) # Test clone pipe2 = assert_no_warnings(clone, pipe) assert not pipe.named_steps['svc'] is pipe2.named_steps['svc'] # Check that apart from estimators, the parameters are the same params = pipe.get_params(deep=True) params2 = pipe2.get_params(deep=True) for x in pipe.get_params(deep=False): params.pop(x) for x in pipe2.get_params(deep=False): params2.pop(x) # Remove estimators that where copied params.pop('svc') params.pop('anova') params2.pop('svc') params2.pop('anova') assert params == params2