def check_samplers_pandas(name, Sampler): pd = pytest.importorskip("pandas") # Check that the samplers handle pandas dataframe and pandas series X, y = make_classification( n_samples=1000, n_classes=3, n_informative=4, weights=[0.2, 0.3, 0.5], random_state=0, ) X_pd = pd.DataFrame(X, columns=[str(i) for i in range(X.shape[1])]) y_pd = pd.Series(y, name="class") sampler = Sampler() if isinstance(Sampler(), NearMiss): samplers = [Sampler(version=version) for version in (1, 2, 3)] else: samplers = [Sampler()] for sampler in samplers: set_random_state(sampler) X_res_pd, y_res_pd = sampler.fit_resample(X_pd, y_pd) X_res, y_res = sampler.fit_resample(X, y) # check that we return a pandas dataframe if a dataframe was given in assert isinstance(X_res_pd, pd.DataFrame) assert isinstance(y_res_pd, pd.Series) assert X_pd.columns.to_list() == X_res_pd.columns.to_list() assert y_pd.name == y_res_pd.name assert_allclose(X_res_pd.to_numpy(), X_res) assert_allclose(y_res_pd.to_numpy(), y_res)
def check_methods_have_no_side_effects(Estimator): # Check that calling methods has no side effects on args if not isclass(Estimator): Estimator = type(Estimator) estimator = _construct_instance(Estimator) set_random_state(estimator) # Fit for the first time fit_args = _make_args(estimator=estimator, method="fit") old_fit_args = deepcopy(fit_args) estimator.fit(*fit_args) assert deep_equals( old_fit_args, fit_args ), f"Estimator: {estimator} has side effects on arguments of fit" for method in NON_STATE_CHANGING_METHODS: if hasattr(estimator, method): new_args = _make_args(estimator=estimator, method=method) old_args = deepcopy(new_args) getattr(estimator, method)(*new_args) assert deep_equals( old_args, new_args ), f"Estimator: {estimator} has side effects on arguments of {method}"
def test_meta_estimators_delegate_data_validation(estimator): # Check that meta-estimators delegate data validation to the inner # estimator(s). rng = np.random.RandomState(0) set_random_state(estimator) n_samples = 30 X = rng.choice(np.array(["aa", "bb", "cc"], dtype=object), size=n_samples) if is_regressor(estimator): y = rng.normal(size=n_samples) else: y = rng.randint(3, size=n_samples) # We convert to lists to make sure it works on array-like X = _enforce_estimator_tags_x(estimator, X).tolist() y = _enforce_estimator_tags_y(estimator, y).tolist() # Calling fit should not raise any data validation exception since X is a # valid input datastructure for the first step of the pipeline passed as # base estimator to the meta estimator. estimator.fit(X, y) # n_features_in_ should not be defined since data is not tabular data. assert not hasattr(estimator, "n_features_in_")
def check_supervised_y_2d(name, estimator_orig): tags = estimator_orig._get_tags() X, y = _create_small_ts_dataset() if tags['binary_only']: X = X[y != 2] y = y[y != 2] estimator = clone(estimator_orig) set_random_state(estimator) # fit estimator.fit(X, y) y_pred = estimator.predict(X) set_random_state(estimator) # Check that when a 2D y is given, a DataConversionWarning is # raised with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always", DataConversionWarning) warnings.simplefilter("ignore", RuntimeWarning) estimator.fit(X, y[:, np.newaxis]) y_pred_2d = estimator.predict(X) msg = "expected 1 DataConversionWarning, got: %s" % (", ".join( [str(w_x) for w_x in w])) if not tags['multioutput'] and name not in ['TimeSeriesSVR']: # check that we warned if we don't support multi-output assert len(w) > 0, msg assert "DataConversionWarning('A column-vector y" \ " was passed when a 1d array was expected" in msg assert_allclose(y_pred.ravel(), y_pred_2d.ravel())
def test_persistence_via_pickle(estimator_instance): """Check that we can pickle all estimators.""" estimator = estimator_instance set_random_state(estimator) fit_args = _make_args(estimator, "fit") estimator.fit(*fit_args) # Generate results before pickling results = dict() args = dict() for method in NON_STATE_CHANGING_METHODS: if hasattr(estimator, method): args[method] = _make_args(estimator, method) results[method] = getattr(estimator, method)(*args[method]) # Pickle and unpickle pickled_estimator = pickle.dumps(estimator) unpickled_estimator = pickle.loads(pickled_estimator) # Compare against results after pickling for method in results: unpickled_result = getattr(unpickled_estimator, method)(*args[method]) _assert_array_almost_equal( results[method], unpickled_result, decimal=6, err_msg="Results are not the same after pickling", )
def test_fit_idempotent(self, estimator_instance, scenario): """Check that calling fit twice is equivalent to calling it once.""" estimator = estimator_instance # todo: may have to rework this, due to "if estimator has param" for method in NON_STATE_CHANGING_METHODS: # for now, we have to skip predict_proba, since current output comparison # does not work for tensorflow Distribution if (isinstance(estimator_instance, BaseForecaster) and method == "predict_proba"): continue if _has_capability(estimator, method): set_random_state(estimator) results = scenario.run( estimator, method_sequence=["fit", method], return_all=True, deepcopy_return=True, ) estimator = results[0] set_random_state(estimator) results_2nd = scenario.run( estimator, method_sequence=["fit", method], return_all=True, deepcopy_return=True, ) _assert_array_almost_equal( results[1], results_2nd[1], # err_msg=f"Idempotency check failed for method {method}", )
def check_samplers_sparse(name, Sampler): # check that sparse matrices can be passed through the sampler leading to # the same results than dense X, y = make_classification( n_samples=1000, n_classes=3, n_informative=4, weights=[0.2, 0.3, 0.5], random_state=0, ) X_sparse = sparse.csr_matrix(X) if isinstance(Sampler(), NearMiss): samplers = [Sampler(version=version) for version in (1, 2, 3)] elif isinstance(Sampler(), ClusterCentroids): # set KMeans to full since it support sparse and dense samplers = [ Sampler( random_state=0, voting="soft", estimator=KMeans(random_state=1, algorithm="full"), ) ] else: samplers = [Sampler()] for sampler in samplers: set_random_state(sampler) X_res_sparse, y_res_sparse = sampler.fit_resample(X_sparse, y) X_res, y_res = sampler.fit_resample(X, y) assert sparse.issparse(X_res_sparse) assert_allclose(X_res_sparse.A, X_res) assert_allclose(y_res_sparse, y_res)
def check_methods_do_not_change_state(Estimator): # Check that methods that are not supposed to change attributes of the # estimators do not change anything (including hyper-parameters and # fitted parameters) estimator = _construct_instance(Estimator) set_random_state(estimator) fit_args = _make_args(estimator=estimator, method="fit") estimator.fit(*fit_args) dict_before = estimator.__dict__.copy() for method in NON_STATE_CHANGING_METHODS: if hasattr(estimator, method): args = _make_args(estimator=estimator, method=method) getattr(estimator, method)(*args) if method == "transform" and Estimator.get_class_tag( "fit-in-transform"): # Some transformations fit during transform, as they apply # some transformation to each series passed to transform, # so transform will actually change the state of these estimator. continue assert ( estimator.__dict__ == dict_before ), f"Estimator: {estimator} changes __dict__ during {method}"
def test_fit_idempotent(estimator_instance, scenario): """Check that calling fit twice is equivalent to calling it once.""" estimator = estimator_instance # todo: may have to rework this, due to "if estimator has param" for method in NON_STATE_CHANGING_METHODS: if _has_capability(estimator, method): set_random_state(estimator) results = scenario.run( estimator, method_sequence=["fit", method], return_all=True, deepcopy_return=True, ) estimator = results[0] set_random_state(estimator) results_2nd = scenario.run( estimator, method_sequence=["fit", method], return_all=True, deepcopy_return=True, ) _assert_array_almost_equal( results[1], results_2nd[1], # err_msg=f"Idempotency check failed for method {method}", )
def check_samplers_list(name, Sampler): # Check that the can samplers handle simple lists X, y = make_classification( n_samples=1000, n_classes=3, n_informative=4, weights=[0.2, 0.3, 0.5], random_state=0, ) X_list = X.tolist() y_list = y.tolist() sampler = Sampler() if isinstance(sampler, NearMiss): samplers = [Sampler(version=version) for version in (1, 2, 3)] else: samplers = [sampler] for sampler in samplers: set_random_state(sampler) X_res, y_res = sampler.fit_resample(X, y) X_res_list, y_res_list = sampler.fit_resample(X_list, y_list) assert isinstance(X_res_list, list) assert isinstance(y_res_list, list) assert_allclose(X_res, X_res_list) assert_allclose(y_res, y_res_list)
def check_pipeline_consistency(name, estimator_orig): if estimator_orig._get_tags()['non_deterministic']: msg = name + ' is non deterministic' raise SkipTest(msg) # check that make_pipeline(est) gives same score as est X, y = make_blobs(n_samples=30, centers=[[0, 0, 0], [1, 1, 1]], random_state=0, n_features=2, cluster_std=0.1) X -= X.min() X = pairwise_estimator_convert_X(X, estimator_orig, kernel=rbf_kernel) estimator = clone(estimator_orig) y = multioutput_estimator_convert_y_2d(estimator, y) set_random_state(estimator) pipeline = make_pipeline(estimator) estimator.fit(X, y) pipeline.fit(X, y) funcs = ["score", "fit_transform"] for func_name in funcs: func = getattr(estimator, func_name, None) if func is not None: func_pipeline = getattr(pipeline, func_name) result = func(X, y) result_pipe = func_pipeline(X, y) assert_allclose_dense_sparse(result, result_pipe)
def test_methods_have_no_side_effects(self, estimator_instance, scenario): """Check that calling methods has no side effects on args.""" estimator = estimator_instance set_random_state(estimator) # Fit the model, get args before and after _, args_after = scenario.run(estimator, method_sequence=["fit"], return_args=True) fit_args_after = args_after[0] fit_args_before = scenario.args["fit"] assert deep_equals( fit_args_before, fit_args_after ), f"Estimator: {estimator} has side effects on arguments of fit" for method in NON_STATE_CHANGING_METHODS: if _has_capability(estimator, method): # Fit the model, get args before and after _, args_after = scenario.run(estimator, method_sequence=[method], return_args=True) method_args_after = args_after[0] method_args_before = scenario.get_args(method, estimator) assert deep_equals( method_args_after, method_args_before ), f"Estimator: {estimator} has side effects on arguments of {method}"
def test_methods_do_not_change_state(self, estimator_instance, scenario): """Check that non-state-changing methods do not change state. Check that methods that are not supposed to change attributes of the estimators do not change anything (including hyper-parameters and fitted parameters) """ estimator = estimator_instance set_random_state(estimator) for method in NON_STATE_CHANGING_METHODS: if _has_capability(estimator, method): # dict_before = copy of dictionary of estimator before predict, post fit _ = scenario.run(estimator, method_sequence=["fit"]) dict_before = estimator.__dict__.copy() # dict_after = dictionary of estimator after predict and fit _ = scenario.run(estimator, method_sequence=[method]) dict_after = estimator.__dict__ is_equal, msg = deep_equals(dict_after, dict_before, return_msg=True) assert is_equal, ( f"Estimator: {type(estimator).__name__} changes __dict__ " f"during {method}, " f"reason/location of discrepancy (x=after, y=before): {msg}" )
def test_multiprocessing_idempotent(self, estimator_instance, scenario): """Test that single and multi-process run results are identical. Check that running an estimator on a single process is no different to running it on multiple processes. We also check that we can set n_jobs=-1 to make use of all CPUs. The test is not really necessary though, as we rely on joblib for parallelization and can trust that it works as expected. """ params = estimator_instance.get_params() if "n_jobs" in params: for method in NON_STATE_CHANGING_METHODS: if _has_capability(estimator_instance, method): # run on a single process # ----------------------- estimator = deepcopy(estimator_instance) estimator.set_params(n_jobs=1) set_random_state(estimator) result_single_process = scenario.run( estimator, method_sequence=["fit", method]) # run on multiple processes # ------------------------- estimator = deepcopy(estimator_instance) estimator.set_params(n_jobs=-1) set_random_state(estimator) result_multiple_process = scenario.run( estimator, method_sequence=["fit", method]) _assert_array_equal( result_single_process, result_multiple_process, err_msg= "Results are not equal for n_jobs=1 and n_jobs=-1", )
def test_fit_does_not_overwrite_hyper_params(estimator_instance): """Check that we do not overwrite hyper-parameters in fit.""" estimator = estimator_instance set_random_state(estimator) # Make a physical copy of the original estimator parameters before fitting. params = estimator.get_params() original_params = deepcopy(params) # Fit the model fit_args = _make_args(estimator, "fit") estimator.fit(*fit_args) # Compare the state of the model parameters with the original parameters new_params = estimator.get_params() for param_name, original_value in original_params.items(): new_value = new_params[param_name] # We should never change or mutate the internal state of input # parameters by default. To check this we use the joblib.hash function # that introspects recursively any subobjects to compute a checksum. # The only exception to this rule of immutable constructor parameters # is possible RandomState instance but in this check we explicitly # fixed the random_state params recursively to be integer seeds. assert joblib.hash(new_value) == joblib.hash(original_value), ( "Estimator %s should not change or mutate " " the parameter %s from %s to %s during fit." % (estimator.__class__.__name__, param_name, original_value, new_value))
def test_fit_idempotent(estimator_instance): """Check that calling fit twice is equivalent to calling it once.""" estimator = estimator_instance set_random_state(estimator) # Fit for the first time fit_args = _make_args(estimator, "fit") estimator.fit(*fit_args) results = dict() args = dict() for method in NON_STATE_CHANGING_METHODS: if hasattr(estimator, method): args[method] = _make_args(estimator, method) results[method] = getattr(estimator, method)(*args[method]) # Fit again set_random_state(estimator) estimator.fit(*fit_args) for method in NON_STATE_CHANGING_METHODS: if hasattr(estimator, method): new_result = getattr(estimator, method)(*args[method]) _assert_array_almost_equal( results[method], new_result, # err_msg=f"Idempotency check failed for method {method}", )
def test_set_random_state(): lda = LinearDiscriminantAnalysis() tree = DecisionTreeClassifier() # Linear Discriminant Analysis doesn't have random state: smoke test set_random_state(lda, 3) set_random_state(tree, 3) assert tree.random_state == 3
def test_methods_do_not_change_state(estimator_instance): """Check that non-state-changing methods do not change state. Check that methods that are not supposed to change attributes of the estimators do not change anything (including hyper-parameters and fitted parameters) """ estimator = estimator_instance set_random_state(estimator) fit_args = _make_args(estimator, "fit") estimator.fit(*fit_args) dict_before = estimator.__dict__.copy() for method in NON_STATE_CHANGING_METHODS: if hasattr(estimator, method): args = _make_args(estimator, method) getattr(estimator, method)(*args) if method == "transform" and estimator.get_class_tag("fit-in-transform"): # Some transformations fit during transform, as they apply # some transformation to each series passed to transform, # so transform will actually change the state of these estimator. continue if method == "predict" and estimator.get_class_tag("fit-in-predict"): # Some annotators fit during predict, as they apply # some apply annotation to each series passed to predict, # so predict will actually change the state of these annotators. continue assert ( estimator.__dict__ == dict_before ), f"Estimator: {estimator} changes __dict__ during {method}"
def check_non_transf_est_n_iter(name, estimator_orig): # Test that estimators that are not transformers with a parameter # max_iter, return the attribute of n_iter_ at least 1. estimator = clone(estimator_orig) if hasattr(estimator, 'max_iter'): X, y = _create_small_ts_dataset() set_random_state(estimator, 0) estimator.fit(X, y) assert estimator.n_iter_ >= 1
def _generate_search_cv_instances(): for SearchCV, (Estimator, param_grid) in product( [ GridSearchCV, HalvingGridSearchCV, RandomizedSearchCV, HalvingGridSearchCV, ], [ (Ridge, { "alpha": [0.1, 1.0] }), (LogisticRegression, { "C": [0.1, 1.0] }), ], ): init_params = signature(SearchCV).parameters extra_params = ({ "min_resources": "smallest" } if "min_resources" in init_params else {}) search_cv = SearchCV(Estimator(), param_grid, cv=2, **extra_params) set_random_state(search_cv) yield search_cv for SearchCV, (Estimator, param_grid) in product( [ GridSearchCV, HalvingGridSearchCV, RandomizedSearchCV, HalvingRandomSearchCV, ], [ (Ridge, { "ridge__alpha": [0.1, 1.0] }), (LogisticRegression, { "logisticregression__C": [0.1, 1.0] }), ], ): init_params = signature(SearchCV).parameters extra_params = ({ "min_resources": "smallest" } if "min_resources" in init_params else {}) search_cv = SearchCV(make_pipeline(PCA(), Estimator()), param_grid, cv=2, **extra_params).set_params(error_score="raise") set_random_state(search_cv) yield search_cv
def _tested_estimators(): for name, Estimator in all_estimators(): try: estimator = _construct_instance(Estimator) set_random_state(estimator) except SkipTest: continue if isinstance(estimator, NearMiss): # For NearMiss, let's check the three algorithms for version in (1, 2, 3): yield clone(estimator).set_params(version=version) else: yield estimator
def check_target_type(name, Estimator): # should raise warning if the target is continuous (we cannot raise error) X = np.random.random((20, 2)) y = np.linspace(0, 1, 20) estimator = Estimator() set_random_state(estimator) with pytest.raises(ValueError, match="Unknown label type: 'continuous'"): estimator.fit_resample(X, y) # if the target is multilabel then we should raise an error rng = np.random.RandomState(42) y = rng.randint(2, size=(20, 3)) msg = "Multilabel and multioutput targets are not supported." with pytest.raises(ValueError, match=msg): estimator.fit_resample(X, y)
def check_samplers_preserve_dtype(name, Sampler): X, y = make_classification( n_samples=1000, n_classes=3, n_informative=4, weights=[0.2, 0.3, 0.5], random_state=0, ) # Cast X and y to not default dtype X = X.astype(np.float32) y = y.astype(np.int32) sampler = Sampler() set_random_state(sampler) X_res, y_res = sampler.fit_resample(X, y) assert X.dtype == X_res.dtype, "X dtype is not preserved" assert y.dtype == y_res.dtype, "y dtype is not preserved"
def check_clustering(name, clusterer_orig, readonly_memmap=False): clusterer = clone(clusterer_orig) X, y = _create_small_ts_dataset() X, y = shuffle(X, y, random_state=7) X = TimeSeriesScalerMeanVariance().fit_transform(X) rng = np.random.RandomState(42) X_noise = X + (rng.randn(*X.shape) / 5) n_samples, n_features, dim = X.shape # catch deprecation and neighbors warnings if hasattr(clusterer, "n_clusters"): clusterer.set_params(n_clusters=3) set_random_state(clusterer) # fit clusterer.fit(X) # with lists clusterer.fit(X.tolist()) pred = clusterer.labels_ assert_equal(pred.shape, (n_samples, )) assert_greater(adjusted_rand_score(pred, y), 0.4) if clusterer._get_tags()['non_deterministic']: return set_random_state(clusterer) with warnings.catch_warnings(record=True): pred2 = clusterer.fit_predict(X) assert_array_equal(pred, pred2) # fit_predict(X) and labels_ should be of type int assert pred.dtype in [np.dtype('int32'), np.dtype('int64')] assert pred2.dtype in [np.dtype('int32'), np.dtype('int64')] # Add noise to X to test the possible values of the labels labels = clusterer.fit_predict(X_noise) # There should be at least one sample in every original cluster labels_sorted = np.unique(labels) assert_array_equal(labels_sorted, np.arange(0, 3)) # Labels should be less than n_clusters - 1 if hasattr(clusterer, 'n_clusters'): n_clusters = getattr(clusterer, 'n_clusters') assert_greater_equal(n_clusters - 1, labels_sorted[-1])
def check_samplers_multiclass_ova(name, Sampler): # Check that multiclass target lead to the same results than OVA encoding X, y = make_classification( n_samples=1000, n_classes=3, n_informative=4, weights=[0.2, 0.3, 0.5], random_state=0, ) y_ova = label_binarize(y, np.unique(y)) sampler = Sampler() set_random_state(sampler) X_res, y_res = sampler.fit_resample(X, y) X_res_ova, y_res_ova = sampler.fit_resample(X, y_ova) assert_allclose(X_res, X_res_ova) assert type_of_target(y_res_ova) == type_of_target(y_ova) assert_allclose(y_res, y_res_ova.argmax(axis=1))
def test_multiprocessing_idempotent(estimator_class): """Test that single and multi-process run results are identical. Check that running an estimator on a single process is no different to running it on multiple processes. We also check that we can set n_jobs=-1 to make use of all CPUs. The test is not really necessary though, as we rely on joblib for parallelization and can trust that it works as expected. """ estimator = estimator_class.create_test_instance() params = estimator.get_params() if "n_jobs" in params: results = dict() args = dict() # run on a single process estimator = estimator_class.create_test_instance() estimator.set_params(n_jobs=1) set_random_state(estimator) args["fit"] = _make_args(estimator, "fit") estimator.fit(*args["fit"]) # compute and store results for method in NON_STATE_CHANGING_METHODS: if hasattr(estimator, method): args[method] = _make_args(estimator, method) results[method] = getattr(estimator, method)(*args[method]) # run on multiple processes, reusing the same input arguments estimator = estimator_class.create_test_instance() estimator.set_params(n_jobs=-1) set_random_state(estimator) estimator.fit(*args["fit"]) # compute and compare results for method in results: if hasattr(estimator, method): result = getattr(estimator, method)(*args[method]) _assert_array_equal( results[method], result, err_msg="Results are not equal for n_jobs=1 and n_jobs=-1", )
def test_methods_do_not_change_state(estimator_instance, scenario): """Check that non-state-changing methods do not change state. Check that methods that are not supposed to change attributes of the estimators do not change anything (including hyper-parameters and fitted parameters) """ estimator = estimator_instance set_random_state(estimator) for method in NON_STATE_CHANGING_METHODS: if _has_capability(estimator, method): # dict_before = copy of dictionary of estimator before predict, after fit _ = scenario.run(estimator, method_sequence=["fit"]) dict_before = estimator.__dict__.copy() # dict_after = dictionary of estimator after predict and fit _ = scenario.run(estimator, method_sequence=[method]) dict_after = estimator.__dict__ if method == "transform" and estimator.get_class_tag("fit-in-transform"): # Some transformations fit during transform, as they apply # some transformation to each series passed to transform, # so transform will actually change the state of these estimator. continue if method == "predict" and estimator.get_class_tag("fit-in-predict"): # Some annotators fit during predict, as they apply # some apply annotation to each series passed to predict, # so predict will actually change the state of these annotators. continue # old logic uses equality without auto-msg, keep comment until refactor # is_equal = dict_after == dict_before is_equal, msg = deep_equals(dict_after, dict_before, return_msg=True) assert is_equal, ( f"Estimator: {type(estimator).__name__} changes __dict__ " f"during {method}, " f"reason/location of discrepancy (x=after, y=before): {msg}" )
def check_samplers_pandas(name, Sampler): pd = pytest.importorskip("pandas") # Check that the samplers handle pandas dataframe and pandas series X, y = make_classification( n_samples=1000, n_classes=3, n_informative=4, weights=[0.2, 0.3, 0.5], random_state=0, ) X_pd = pd.DataFrame(X) sampler = Sampler() if isinstance(Sampler(), NearMiss): samplers = [Sampler(version=version) for version in (1, 2, 3)] else: samplers = [Sampler()] for sampler in samplers: set_random_state(sampler) X_res_pd, y_res_pd = sampler.fit_resample(X_pd, y) X_res, y_res = sampler.fit_resample(X, y) assert_allclose(X_res_pd, X_res) assert_allclose(y_res_pd, y_res)
def test_persistence_via_pickle(self, estimator_instance, scenario): """Check that we can pickle all estimators.""" estimator = estimator_instance set_random_state(estimator) # Fit the model, get args before and after scenario.run(estimator, method_sequence=["fit"], return_args=True) # Generate results before pickling results = {} for method in NON_STATE_CHANGING_METHODS: if _has_capability(estimator, method): results[method] = scenario.run(estimator, method_sequence=[method]) # Pickle and unpickle pickled_estimator = pickle.dumps(estimator) unpickled_estimator = pickle.loads(pickled_estimator) # Compare against results after pickling for method, vanilla_result in results.items(): # escape predict_proba for forecasters, tfp distributions cannot be pickled if (isinstance(estimator_instance, BaseForecaster) and method == "predict_proba"): continue unpickled_result = scenario.run(unpickled_estimator, method_sequence=[method]) msg = ( f"Results of {method} differ between when pickling and not pickling, " f"estimator {type(estimator_instance).__name__}") _assert_array_almost_equal( vanilla_result, unpickled_result, decimal=6, err_msg=msg, )
def test_methods_have_no_side_effects(estimator_instance): """Check that calling methods has no side effects on args.""" estimator = estimator_instance set_random_state(estimator) # Fit for the first time fit_args = _make_args(estimator, "fit") old_fit_args = deepcopy(fit_args) estimator.fit(*fit_args) assert deep_equals( old_fit_args, fit_args ), f"Estimator: {estimator} has side effects on arguments of fit" for method in NON_STATE_CHANGING_METHODS: if hasattr(estimator, method): new_args = _make_args(estimator, method) old_args = deepcopy(new_args) getattr(estimator, method)(*new_args) assert deep_equals( old_args, new_args ), f"Estimator: {estimator} has side effects on arguments of {method}"