def check_persistence_via_pickle(Estimator): # Check that we can pickle all estimators estimator = _construct_instance(Estimator) set_random_state(estimator) fit_args = _make_args(estimator, "fit") estimator.fit(*fit_args) # Generate results before pickling results = {} args = {} for method in NON_STATE_CHANGING_METHODS: if hasattr(estimator, method): args[method] = _make_args(estimator, method) results[method] = getattr(estimator, method)(*args[method]) # Pickle and unpickle pickled_estimator = pickle.dumps(estimator) # if estimator.__module__.startswith('sktime.'): # assert b"version" in pickled_estimator unpickled_estimator = pickle.loads(pickled_estimator) # Compare against results after pickling for method, value in results.items(): unpickled_result = getattr(unpickled_estimator, method)(*args[method]) _assert_almost_equal(value, unpickled_result)
def check_fit_idempotent(Estimator): # Check that calling fit twice is equivalent to calling it once estimator = _construct_instance(Estimator) set_random_state(estimator) # Fit for the first time fit_args = _make_args(estimator, "fit") estimator.fit(*fit_args) results = dict() args = dict() for method in NON_STATE_CHANGING_METHODS: if hasattr(estimator, method): args[method] = _make_args(estimator, method) results[method] = getattr(estimator, method)(*args[method]) # Fit again set_random_state(estimator) estimator.fit(*fit_args) for method in NON_STATE_CHANGING_METHODS: if hasattr(estimator, method): new_result = getattr(estimator, method)(*args[method]) _assert_array_almost_equal( results[method], new_result, # err_msg=f"Idempotency check failed for method {method}", )
def check_methods_do_not_change_state(Estimator): # Check that methods that are not supposed to change attributes of the # estimators do not change anything (including hyper-parameters and # fitted parameters) estimator = _construct_instance(Estimator) set_random_state(estimator) fit_args = _make_args(estimator, "fit") estimator.fit(*fit_args) dict_before = estimator.__dict__.copy() for method in NON_STATE_CHANGING_METHODS: if hasattr(estimator, method): args = _make_args(estimator, method) getattr(estimator, method)(*args) if method == "transform" and _has_tag(Estimator, "fit-in-transform"): # Some transformers fit during transform, as they apply # some transformation to each series passed to transform, # so transform will actually change the state of these estimator. continue assert ( estimator.__dict__ == dict_before ), f"Estimator: {estimator} changes __dict__ during {method}"
def check_persistence_via_pickle(Estimator): # Check that we can pickle all estimators estimator = _construct_instance(Estimator) set_random_state(estimator) fit_args = _make_args(estimator, "fit") estimator.fit(*fit_args) # Generate results before pickling results = dict() args = dict() for method in NON_STATE_CHANGING_METHODS: if hasattr(estimator, method): args[method] = _make_args(estimator, method) results[method] = getattr(estimator, method)(*args[method]) # Pickle and unpickle pickled_estimator = pickle.dumps(estimator) unpickled_estimator = pickle.loads(pickled_estimator) # Compare against results after pickling for method in results: unpickled_result = getattr(unpickled_estimator, method)(*args[method]) _assert_array_almost_equal( results[method], unpickled_result, decimal=6, err_msg="Results are not the same after pickling", )
def test_series_as_features_multivariate_input(Estimator): # check if multivariate input is correctly handled n_columns = 2 error_msg = ( f"X must be univariate " f"with X.shape[1] == 1, but found: " f"X.shape[1] == {n_columns}." ) estimator = _construct_instance(Estimator) X_train, y_train = _make_args(estimator, "fit", n_columns=n_columns) # check if estimator can handle multivariate data try: estimator.fit(X_train, y_train) # TODO include series-as-features transformers for method in ("predict", "predict_proba"): X = _make_args(estimator, method, n_columns=n_columns)[0] getattr(estimator, method)(X) # if not, check if error with appropriate message is raised except ValueError as e: assert error_msg in str(e), ( f"{estimator.__class__.__name__} does not handle multivariate " f"data and does not raise an appropriate error when multivariate " f"data is passed" )
def check_multiprocessing_determinism(Estimator): if "n_jobs" in signature(Estimator.__init__).parameters: estimator = _construct_instance(Estimator) fit_args = _make_args(estimator, "fit") for method in NON_STATE_CHANGING_METHODS: if hasattr(estimator, method): args = _make_args(estimator, method)[0] result_set = [] for n_jobs in [1, 4]: estimator.set_params(n_jobs=n_jobs) if hasattr(estimator, "n_jobs"): assert estimator.n_jobs == n_jobs set_random_state(estimator) estimator.fit(*fit_args) result_set.append(getattr(estimator, method)(args)) if isinstance(result_set[0], pd.DataFrame): assert_frame_equal(result_set[0], result_set[1]) else: np.testing.assert_array_equal( result_set[0], result_set[1], err_msg="Results for test set not equal " "between 1 and 4 job run", )
def check_transform_returns_same_time_index(Estimator): assert issubclass(Estimator, _SeriesToSeriesTransformer) estimator = _construct_instance(Estimator) fit_args = _make_args(estimator, "fit") estimator.fit(*fit_args) for method in ["transform", "inverse_transform"]: if hasattr(estimator, method): X = _make_args(estimator, method)[0] Xt = estimator.transform(X) np.testing.assert_array_equal(X.index, Xt.index)
def test_regressor_output(Estimator): estimator = _construct_instance(Estimator) X_train, y_train = _make_args(estimator, "fit") estimator.fit(X_train, y_train) X = _make_args(estimator, "predict")[0] # check predict y_pred = getattr(estimator, "predict")(X) assert isinstance(y_pred, ACCEPTED_OUTPUT_TYPES) assert y_pred.shape == (X.shape[0], ) assert np.issubdtype(y_pred.dtype, np.floating)
def check_fit_returns_self(Estimator): # Check that fit returns self estimator = _construct_instance(Estimator) fit_args = _make_args(estimator, "fit") assert estimator.fit(*fit_args) is estimator, ( f"Estimator: {estimator} does not return self when calling " f"fit")
def check_fit_does_not_overwrite_hyper_params(Estimator): # Check that we do not overwrite hyper-parameters in fit estimator = _construct_instance(Estimator) set_random_state(estimator) # Make a physical copy of the original estimator parameters before fitting. params = estimator.get_params() original_params = deepcopy(params) # Fit the model fit_args = _make_args(estimator, "fit") estimator.fit(*fit_args) # Compare the state of the model parameters with the original parameters new_params = estimator.get_params() for param_name, original_value in original_params.items(): new_value = new_params[param_name] # We should never change or mutate the internal state of input # parameters by default. To check this we use the joblib.hash function # that introspects recursively any subobjects to compute a checksum. # The only exception to this rule of immutable constructor parameters # is possible RandomState instance but in this check we explicitly # fixed the random_state params recursively to be integer seeds. assert joblib.hash(new_value) == joblib.hash(original_value), ( "Estimator %s should not change or mutate " " the parameter %s from %s to %s during fit." % (estimator.__class__.__name__, param_name, original_value, new_value))
def _construct_fit_transform(Estimator, **kwargs): estimator = _construct_instance(Estimator) # For forecasters which are also transformers (e.g. pipelines), we cannot # the forecasting horizon to transform, so we only return the first two # arguments here. Note that this will fail for forecasters which require the # forecasting horizon in fit. args = _make_args(estimator, "fit", **kwargs)[:2] return estimator.fit_transform(*args)
def test_3d_numpy_input(Estimator): estimator = _construct_instance(Estimator) fit_args = _make_args(estimator, "fit", return_numpy=True) estimator.fit(*fit_args) for method in NON_STATE_CHANGING_METHODS: if hasattr(estimator, method): # try if methods can handle 3d numpy input data try: args = _make_args(estimator, method, return_numpy=True) getattr(estimator, method)(*args) # if not, check if they raise the appropriate error message except ValueError as e: error_msg = "This method requires X to be a nested pd.DataFrame" assert error_msg in str(e), ( f"{estimator.__class__.__name__} does " f"not handle 3d numpy input data correctly")
def test_classifier_output(Estimator): estimator = _construct_instance(Estimator) X_train, y_train = _make_args(estimator, "fit", n_classes=N_CLASSES) estimator.fit(X_train, y_train) X_new = _make_args(estimator, "predict")[0] # check predict y_pred = estimator.predict(X_new) assert isinstance(y_pred, ACCEPTED_OUTPUT_TYPES) assert y_pred.shape == (X_new.shape[0], ) assert np.all(np.isin(np.unique(y_pred), np.unique(y_train))) # check predict proba if hasattr(estimator, "predict_proba"): y_proba = estimator.predict_proba(X_new) assert isinstance(y_proba, ACCEPTED_OUTPUT_TYPES) assert y_proba.shape == (X_new.shape[0], N_CLASSES) np.testing.assert_allclose(y_proba.sum(axis=1), 1)
def check_raises_not_fitted_error(Estimator): # Check that we raise appropriate error for unfitted estimators estimator = _construct_instance(Estimator) # call methods without prior fitting and check that they raise our # NotFittedError for method in NON_STATE_CHANGING_METHODS: if hasattr(estimator, method): args = _make_args(estimator, method) with pytest.raises(NotFittedError, match=r"has not been fitted"): getattr(estimator, method)(*args)
def check_multiprocessing_idempotent(Estimator): # Check that running an estimator on a single process is no different to running # it on multiple processes. We also check that we can set n_jobs=-1 to make use # of all CPUs. The test is not really necessary though, as we rely on joblib for # parallelization and can trust that it works as expected. estimator = _construct_instance(Estimator) params = estimator.get_params() if "n_jobs" in params: results = dict() args = dict() # run on a single process estimator = _construct_instance(Estimator) estimator.set_params(n_jobs=1) set_random_state(estimator) args["fit"] = _make_args(estimator, "fit") estimator.fit(*args["fit"]) # compute and store results for method in NON_STATE_CHANGING_METHODS: if hasattr(estimator, method): args[method] = _make_args(estimator, method) results[method] = getattr(estimator, method)(*args[method]) # run on multiple processes, reusing the same input arguments estimator = _construct_instance(Estimator) estimator.set_params(n_jobs=-1) set_random_state(estimator) estimator.fit(*args["fit"]) # compute and compare results for method in results: if hasattr(estimator, method): result = getattr(estimator, method)(*args[method]) _assert_array_equal( results[method], result, err_msg="Results are not equal for n_jobs=1 and " "n_jobs=-1", )
def check_fit_updates_state(Estimator): # Check that fit updates the is-fitted states attrs = ["_is_fitted", "is_fitted"] estimator = _construct_instance(Estimator) # Check it's not fitted before calling fit for attr in attrs: assert not getattr( estimator, attr ), f"Estimator: {estimator} does not initiate attribute: {attr} to False" fit_args = _make_args(estimator, "fit") estimator.fit(*fit_args) # Check states are updated after calling fit for attr in attrs: assert getattr( estimator, attr ), f"Estimator: {estimator} does not update attribute: {attr} during fit"
def check_fit_updates_state(Estimator): # Check that fit updates the is-fitted states is_fitted_states = ["_is_fitted", "is_fitted"] estimator = _construct_instance(Estimator) # Check it's not fitted before calling fit for state in is_fitted_states: assert not getattr(estimator, state), ( f"Estimator: {estimator} does not initiate state: {state} to " f"False") fit_args = _make_args(estimator, "fit") estimator.fit(*fit_args) # Check states are updated after calling fit for state in is_fitted_states: assert getattr( estimator, state), (f"Estimator: {estimator} does not update state: {state} " f"during fit")
def check_transform_inverse_transform_equivalent(Estimator): estimator = _construct_instance(Estimator) X = _make_args(estimator, "fit")[0] Xt = estimator.fit_transform(X) Xit = estimator.inverse_transform(Xt) _assert_array_almost_equal(X, Xit)
def _construct_fit(Estimator, **kwargs): estimator = _construct_instance(Estimator) args = _make_args(estimator, "fit", **kwargs)[:2] return estimator.fit(*args)
def test_transformed_data_has_same_index_as_input_data(Transformer): transformer = _construct_instance(Transformer) X, y = _make_args(transformer, "fit") Xt = transformer.fit_transform(X, y) np.testing.assert_array_equal(X.index, Xt.index)