def test_values_uniform(random_xy_dataset_regr): X, y = random_xy_dataset_regr mod = RandomRegressor(strategy="uniform") predictions = mod.fit(X, y).predict(X) assert (predictions >= y.min()).all() assert (predictions <= y.max()).all() assert mod.min_ == pytest.approx(y.min(), abs=0.0001) assert mod.max_ == pytest.approx(y.max(), abs=0.0001)
def test_estimator_checks(test_fn): # Tests that are skipped: # 'check_methods_subset_invariance': Since we add noise, the method is not invariant on a subset # 'check_regressors_train': score is not always greater than 0.5 due to randomness regr_normal = RandomRegressor(strategy="normal") test_fn(RandomRegressor.__name__ + '_normal', regr_normal) regr_uniform = RandomRegressor(strategy="uniform") test_fn(RandomRegressor.__name__ + '_uniform', regr_uniform)
import pytest from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler from sklego.dummy import RandomRegressor from sklego.linear_model import DeadZoneRegressor from sklego.mixture import GMMClassifier, BayesianGMMClassifier, GMMOutlierDetector, BayesianGMMOutlierDetector from tests.conftest import id_func @pytest.mark.parametrize("estimator", [ RandomRegressor(strategy="uniform"), RandomRegressor(strategy="normal"), DeadZoneRegressor(effect="linear", n_iter=100), DeadZoneRegressor(effect="quadratic", n_iter=100), ], ids=id_func) def test_shape_regression(estimator, random_xy_dataset_regr): X, y = random_xy_dataset_regr assert estimator.fit(X, y).predict(X).shape[0] == y.shape[0] pipe = Pipeline(steps=[('scaler', StandardScaler()), ('clf', estimator)]) assert pipe.fit(X, y).predict(X).shape[0] == y.shape[0] @pytest.mark.parametrize("estimator", [ GMMClassifier(), BayesianGMMClassifier(), GMMOutlierDetector(threshold=0.999, method="quantile"), GMMOutlierDetector(threshold=2, method="stddev"), BayesianGMMOutlierDetector(threshold=0.999, method="quantile"), BayesianGMMOutlierDetector(threshold=2, method="stddev")
def test_bad_values(): with pytest.raises(ValueError): RandomRegressor(strategy="foobar")
def test_values_normal(random_xy_dataset_regr): X, y = random_xy_dataset_regr mod = RandomRegressor(strategy="normal").fit(X, y) assert mod.mu_ == pytest.approx(np.mean(y), abs=0.001) assert mod.sigma_ == pytest.approx(np.std(y), abs=0.001)
def test_bad_values(): np.random.seed(42) X = np.random.normal(0, 1, (10, 2)) y = np.random.normal(0, 1, (10, 1)) with pytest.raises(ValueError): RandomRegressor(strategy="foobar").fit(X, y)
from collections import defaultdict import pytest from sklearn.linear_model import LinearRegression from sklearn.utils import estimator_checks from sklego.dummy import RandomRegressor from sklego.transformers import EstimatorTransformer, RandomAdder from tests.conftest import id_func @pytest.mark.parametrize("estimator", [ RandomAdder(), EstimatorTransformer(LinearRegression()), RandomRegressor(), ], ids=id_func) def test_check_estimator(estimator, monkeypatch): """Uses the sklearn `check_estimator` method to verify our custom estimators""" # Not all estimators CAN adhere to the defined sklearn api. An example of this is the random adder as sklearn # expects methods to be invariant to whether they are applied to the full dataset or a subset. # These tests can be monkey patched out using the skips dictionary. skips = defaultdict( list, { RandomAdder: [ # Since we add noise, the method is not invariant on a subset 'check_methods_subset_invariance', # The transformerselectormixin needs to compute a hash and it can't on a 'NotAnArray' 'check_transformer_data_not_an_array',