import numpy as np import pytest from sklego.common import flatten from sklego.mixture import GMMClassifier, BayesianGMMClassifier from tests.conftest import general_checks, nonmeta_checks, select_tests @pytest.mark.parametrize( "test_fn", select_tests( flatten([general_checks, nonmeta_checks]), exclude=[ "check_sample_weights_invariance", "check_non_transformer_estimators_n_iter", ] ) ) def test_estimator_checks(test_fn): clf = GMMClassifier() test_fn(GMMClassifier.__name__, clf) clf = BayesianGMMClassifier() test_fn(BayesianGMMClassifier.__name__, clf) def test_obvious_usecase(): X = np.concatenate( [np.random.normal(-10, 1, (100, 2)), np.random.normal(10, 1, (100, 2))] ) y = np.concatenate([np.zeros(100), np.ones(100)]) assert (GMMClassifier().fit(X, y).predict(X) == y).all()
import pandas as pd import numpy as np from sklearn.linear_model import LinearRegression from sklearn.dummy import DummyRegressor from sklego.common import flatten from sklego.meta import GroupedPredictor from sklego.datasets import load_chicken from tests.conftest import general_checks, select_tests @pytest.mark.parametrize( "test_fn", select_tests( flatten([general_checks]), exclude=[ # Nonsense checks because we always need at least two columns (group and value) "check_fit1d", "check_fit2d_predict1d", "check_fit2d_1feature", "check_transformer_data_not_an_array", ], ), ) def test_estimator_checks(test_fn): clf = GroupedPredictor( estimator=LinearRegression(), groups=0, use_global_model=True ) test_fn(GroupedPredictor.__name__ + "_fallback", clf)
from sklego.meta import ZeroInflatedRegressor from sklego.testing import check_shape_remains_same_regressor from tests.conftest import general_checks, select_tests, regressor_checks @pytest.mark.parametrize("test_fn", [check_shape_remains_same_regressor]) def test_zir(test_fn): regr = ZeroInflatedRegressor( classifier=ExtraTreesClassifier(random_state=0), regressor=ExtraTreesRegressor(random_state=0)) test_fn(ZeroInflatedRegressor.__name__, regr) @pytest.mark.parametrize("test_fn", select_tests( flatten([general_checks, regressor_checks]), )) def test_estimator_checks(test_fn): test_fn( ZeroInflatedRegressor.__name__, ZeroInflatedRegressor(classifier=ExtraTreesClassifier(random_state=0), regressor=ExtraTreesRegressor(random_state=0))) def test_zero_inflated_example(): from sklearn.model_selection import cross_val_score np.random.seed(0) X = np.random.randn(10000, 4) y = ((X[:, 0] > 0) & (X[:, 1] > 0)) * np.abs( X[:, 2] * X[:, 3]**2) # many zeroes here, in about 75% of the cases.
from sklearn.pipeline import Pipeline from sklearn.utils import estimator_checks from sklego.common import flatten from sklego.meta import OutlierRemover from sklego.mixture import GMMOutlierDetector @pytest.mark.parametrize( "test_fn", flatten([ estimator_checks.check_transformers_unfitted, estimator_checks.check_fit2d_predict1d, estimator_checks.check_fit2d_1sample, estimator_checks.check_fit2d_1feature, estimator_checks.check_fit1d, estimator_checks.check_get_params_invariance, estimator_checks.check_set_params, estimator_checks.check_dont_overwrite_parameters, estimator_checks.check_transformers_unfitted, ]), ) def test_estimator_checks(test_fn): gmm_remover = OutlierRemover(outlier_detector=GMMOutlierDetector(), refit=True) test_fn(OutlierRemover.__name__, gmm_remover) isolation_forest_remover = OutlierRemover( outlier_detector=IsolationForest(), refit=True) test_fn(OutlierRemover.__name__, isolation_forest_remover)
import numpy as np import pytest from sklego.common import flatten from sklego.linear_model import ProbWeightRegression from tests.conftest import nonmeta_checks, regressor_checks, general_checks, select_tests @pytest.mark.parametrize( "test_fn", select_tests( flatten([general_checks, nonmeta_checks, regressor_checks]), exclude=[ "check_sample_weights_invariance", "check_sample_weights_list", "check_sample_weights_pandas_series" ] ) ) @pytest.mark.cvxpy def test_estimator_checks(test_fn): regr_min_zero = ProbWeightRegression(non_negative=True) test_fn(ProbWeightRegression.__name__ + "_min_zero_true", regr_min_zero) regr_not_min_zero = ProbWeightRegression(non_negative=False) test_fn(ProbWeightRegression.__name__ + "_min_zero_true_false", regr_not_min_zero) @pytest.mark.cvxpy def test_shape_trained_model(random_xy_dataset_regr): X, y = random_xy_dataset_regr mod_no_intercept = ProbWeightRegression()
import numpy as np import pandas as pd import pytest from sklego.common import flatten from sklego.mixture import GMMOutlierDetector, BayesianGMMOutlierDetector from tests.conftest import general_checks, nonmeta_checks, select_tests, outlier_checks @pytest.mark.parametrize( "test_fn", select_tests( flatten([general_checks, nonmeta_checks, outlier_checks]), exclude=[ "check_sample_weights_invariance", "check_outliers_train" ] ) ) def test_estimator_checks(test_fn): clf_quantile = GMMOutlierDetector(threshold=0.999, method="quantile") test_fn(GMMOutlierDetector.__name__ + "_quantile", clf_quantile) clf_stddev = GMMOutlierDetector(threshold=2, method="stddev") test_fn(GMMOutlierDetector.__name__ + "_stddev", clf_stddev) bayes_clf_quantile = BayesianGMMOutlierDetector(threshold=0.999, method="quantile") test_fn(BayesianGMMOutlierDetector.__name__ + "_quantile", bayes_clf_quantile) bayes_clf_stddev = BayesianGMMOutlierDetector(threshold=2, method="stddev") test_fn(BayesianGMMOutlierDetector.__name__ + "_stddev", bayes_clf_stddev)
import pytest import numpy as np from sklearn.neighbors import KNeighborsClassifier from sklearn.linear_model import LinearRegression, Ridge, LogisticRegression from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor from sklego.common import flatten from sklego.meta import DecayEstimator from tests.conftest import general_checks, classifier_checks, regressor_checks, nonmeta_checks @pytest.mark.parametrize("test_fn", flatten([ general_checks, nonmeta_checks, regressor_checks ])) def test_estimator_checks_regression(test_fn): trf = DecayEstimator(LinearRegression()) test_fn(DecayEstimator.__name__, trf) @pytest.mark.parametrize("test_fn", flatten([ general_checks, nonmeta_checks, classifier_checks ])) def test_estimator_checks_classification(test_fn): trf = DecayEstimator(LogisticRegression(solver='lbfgs')) test_fn(DecayEstimator.__name__, trf)
import numpy as np import pandas as pd import pytest from sklego.common import flatten from sklego.mixture import GMMOutlierDetector, BayesianGMMOutlierDetector from tests.conftest import general_checks, nonmeta_checks, select_tests, outlier_checks @pytest.mark.parametrize( "test_fn", select_tests(flatten([general_checks, nonmeta_checks, outlier_checks]), exclude=[ "check_sample_weights_invariance", "check_outliers_train", "check_sample_weights_list", "check_sample_weights_pandas_series" ])) def test_estimator_checks(test_fn): clf_quantile = GMMOutlierDetector(threshold=0.999, method="quantile") test_fn(GMMOutlierDetector.__name__ + "_quantile", clf_quantile) clf_stddev = GMMOutlierDetector(threshold=2, method="stddev") test_fn(GMMOutlierDetector.__name__ + "_stddev", clf_stddev) bayes_clf_quantile = BayesianGMMOutlierDetector(threshold=0.999, method="quantile") test_fn(BayesianGMMOutlierDetector.__name__ + "_quantile", bayes_clf_quantile) bayes_clf_stddev = BayesianGMMOutlierDetector(threshold=2, method="stddev") test_fn(BayesianGMMOutlierDetector.__name__ + "_stddev", bayes_clf_stddev)
from pandas.tests.extension.numpy_.test_numpy_nested import np from sklearn import clone from sklearn.dummy import DummyClassifier from sklearn.linear_model import LinearRegression, Ridge from sklearn.pipeline import Pipeline, FeatureUnion from sklearn.utils import check_X_y from sklego.common import flatten from sklego.meta import EstimatorTransformer from tests.conftest import transformer_checks, nonmeta_checks, general_checks @pytest.mark.parametrize("test_fn", flatten([ transformer_checks, nonmeta_checks, general_checks, ])) def test_estimator_checks(test_fn): trf = EstimatorTransformer(LinearRegression()) test_fn(EstimatorTransformer.__name__, trf) def test_values_uniform(random_xy_dataset_clf): X, y = random_xy_dataset_clf X, y = check_X_y(X, y) clf = DummyClassifier(strategy='most_frequent') transformer = EstimatorTransformer(clone(clf)) transformed = transformer.fit(X, y).transform(X) assert transformed.shape == (y.shape[0], 1)
from sklearn.utils.estimator_checks import check_transformers_unfitted from sklego.common import flatten from sklego.preprocessing import RandomAdder from tests.conftest import nonmeta_checks @pytest.mark.parametrize( "test_fn", flatten([ nonmeta_checks, # Transformer checks check_transformers_unfitted, # General checks estimator_checks.check_fit2d_predict1d, estimator_checks.check_fit2d_1sample, estimator_checks.check_fit2d_1feature, estimator_checks.check_fit1d, estimator_checks.check_get_params_invariance, estimator_checks.check_set_params, estimator_checks.check_dict_unchanged, estimator_checks.check_dont_overwrite_parameters ])) def test_estimator_checks(test_fn): # Tests that are skipped: # check_methods_subset_invariance: Since we add noise, the method is not invariant on a subset # check_transformer_data_not_an_array: tests with `NotAnArray` as X for which we don't have a hashing function # check_transformer_general: tests with lists as X for which we don't have a hashing function adder = RandomAdder() test_fn(RandomAdder.__name__, adder)
import pytest import numpy as np from sklearn import clone from sklearn.dummy import DummyClassifier from sklearn.linear_model import LinearRegression, Ridge from sklearn.pipeline import Pipeline, FeatureUnion from sklearn.utils import check_X_y from sklego.common import flatten from sklego.meta import EstimatorTransformer from tests.conftest import transformer_checks, general_checks @pytest.mark.parametrize( "test_fn", flatten([transformer_checks, general_checks]) ) def test_estimator_checks(test_fn): trf = EstimatorTransformer(LinearRegression()) test_fn(EstimatorTransformer.__name__, trf) def test_values_uniform(random_xy_dataset_clf): X, y = random_xy_dataset_clf X, y = check_X_y(X, y) clf = DummyClassifier(strategy="most_frequent") transformer = EstimatorTransformer(clone(clf)) transformed = transformer.fit(X, y).transform(X) assert transformed.shape == (y.shape[0], 1) assert np.all(transformed == clf.fit(X, y).predict(X))
flatten([ # non-meta checks estimator_checks.check_estimators_dtypes, estimator_checks.check_fit_score_takes_y, estimator_checks.check_dtype_object, estimator_checks.check_sample_weights_pandas_series, estimator_checks.check_sample_weights_list, estimator_checks.check_sample_weights_invariance, estimator_checks.check_estimators_fit_returns_self, estimator_checks.check_complex_data, estimator_checks.check_estimators_empty_data_messages, estimator_checks.check_pipeline_consistency, estimator_checks.check_estimators_nan_inf, estimator_checks.check_estimators_overwrite_params, estimator_checks.check_estimator_sparse_data, estimator_checks.check_estimators_pickle, # general checks estimator_checks.check_fit2d_predict1d, estimator_checks.check_methods_subset_invariance, estimator_checks.check_fit2d_1sample, estimator_checks.check_fit2d_1feature, estimator_checks.check_fit1d, estimator_checks.check_get_params_invariance, estimator_checks.check_set_params, estimator_checks.check_dict_unchanged, estimator_checks.check_dont_overwrite_parameters, # outlier_checks # estimator_checks.check_outliers_fit_predict, # estimator_checks.check_outliers_train estimator_checks.check_classifier_data_not_an_array, estimator_checks.check_estimators_unfitted, ]),
from tests.conftest import transformer_checks, general_checks @pytest.mark.parametrize( "test_fn", flatten( [ transformer_checks, general_checks, # nonmeta_checks estimator_checks.check_estimators_dtypes, estimator_checks.check_fit_score_takes_y, estimator_checks.check_dtype_object, estimator_checks.check_sample_weights_pandas_series, estimator_checks.check_sample_weights_list, estimator_checks.check_sample_weights_invariance, estimator_checks.check_estimators_fit_returns_self, estimator_checks.check_complex_data, estimator_checks.check_estimators_empty_data_messages, estimator_checks.check_pipeline_consistency, # ColumnCapper works with nan/inf cells # estimator_checks.check_estimators_nan_inf, estimator_checks.check_estimators_overwrite_params, estimator_checks.check_estimator_sparse_data, estimator_checks.check_estimators_pickle, ] ), ) def test_estimator_checks(test_fn): test_fn(ColumnCapper.__name__, ColumnCapper())
from tests.conftest import nonmeta_checks @pytest.mark.parametrize( "test_fn", flatten([ nonmeta_checks, check_shape_remains_same_regressor, # General checks estimator_checks.check_fit2d_predict1d, estimator_checks.check_fit2d_1sample, estimator_checks.check_fit2d_1feature, estimator_checks.check_fit1d, estimator_checks.check_get_params_invariance, estimator_checks.check_set_params, estimator_checks.check_dict_unchanged, estimator_checks.check_dont_overwrite_parameters, # Regressor checks estimator_checks.check_regressor_data_not_an_array, estimator_checks.check_estimators_partial_fit_n_features, estimator_checks.check_regressors_no_decision_function, estimator_checks.check_supervised_y_2d, estimator_checks.check_supervised_y_no_nan, estimator_checks.check_regressors_int, estimator_checks.check_estimators_unfitted, ])) def test_estimator_checks(test_fn): # Tests that are skipped: # 'check_methods_subset_invariance': Since we add noise, the method is not invariant on a subset # 'check_regressors_train': score is not always greater than 0.5 due to randomness regr_normal = RandomRegressor(strategy="normal")
import numpy as np import pytest from sklearn.cluster import DBSCAN from sklearn.ensemble import RandomForestClassifier from sklearn.linear_model import Ridge, LogisticRegression from sklego.common import flatten from sklego.meta import SubjectiveClassifier from tests.conftest import general_checks, classifier_checks @pytest.mark.parametrize("test_fn", flatten([general_checks, classifier_checks])) def test_estimator_checks_classification(test_fn): if test_fn.__name__ == 'check_classifiers_classes': prior = { 'one': 0.1, 'two': 0.1, 'three': 0.1, -1: 0.1, 1: 0.6 } # nonsensical prior to make sklearn check pass else: prior = {0: 0.7, 1: 0.2, 2: 0.1} # Some of the sklearn checkers generate random y data with 3 classes, so prior needs to have these classes estimator = SubjectiveClassifier(LogisticRegression(), prior) test_fn(SubjectiveClassifier.__name__, estimator) @pytest.mark.parametrize(
import numpy as np import pytest from sklego.common import flatten from sklego.linear_model import LowessRegression from tests.conftest import nonmeta_checks, regressor_checks, general_checks @pytest.mark.parametrize( "test_fn", flatten([nonmeta_checks, general_checks, regressor_checks])) def test_estimator_checks(test_fn): lowess = LowessRegression() test_fn(LowessRegression.__name__, lowess) def test_obvious_usecase(): x = np.linspace(0, 10, 100) X = x.reshape(-1, 1) y = np.ones(x.shape) y_pred = LowessRegression().fit(X, y).predict(X) assert np.isclose(y, y_pred).all()
import numpy as np import pytest from sklearn.model_selection import train_test_split from sklego.common import flatten from sklego.preprocessing import RandomAdder from tests.conftest import select_tests, transformer_checks, nonmeta_checks, general_checks @pytest.mark.parametrize( "test_fn", select_tests( flatten([general_checks, transformer_checks, nonmeta_checks]), exclude=[ "check_sample_weights_invariance", "check_methods_subset_invariance", "check_transformer_data_not_an_array", "check_transformer_general" ] ) ) def test_estimator_checks(test_fn): adder = RandomAdder() test_fn(RandomAdder.__name__, adder) def test_dtype_regression(random_xy_dataset_regr): X, y = random_xy_dataset_regr assert RandomAdder().fit(X, y).transform(X).dtype == np.float
import pytest import numpy as np from sklearn.ensemble import IsolationForest from sklearn.svm import OneClassSVM from sklearn.linear_model import LinearRegression from sklearn.neighbors import LocalOutlierFactor from sklego.common import flatten from sklego.mixture import GMMOutlierDetector from sklego.meta import OutlierClassifier from tests.conftest import general_checks, select_tests @pytest.mark.parametrize("test_fn", select_tests(flatten([general_checks]), exclude=[ "check_sample_weights_invariance", ])) def test_estimator_checks(test_fn): mod_quantile = GMMOutlierDetector(threshold=0.999, method="quantile") clf_quantile = OutlierClassifier(mod_quantile) test_fn('OutlierClassifier', clf_quantile) @pytest.fixture def dataset(): np.random.seed(42) return np.random.normal(0, 1, (2000, 2))
import pytest import numpy as np from cvxpy import SolverError from sklearn.linear_model import LogisticRegression from sklego.common import flatten from sklego.linear_model import FairClassifier from sklego.metrics import p_percent_score from tests.conftest import general_checks, nonmeta_checks, classifier_checks @pytest.mark.parametrize( "test_fn", flatten([general_checks, nonmeta_checks, classifier_checks]) ) def test_standard_checks(test_fn): trf = FairClassifier( covariance_threshold=None, C=1, penalty="none", sensitive_cols=[0], train_sensitive_cols=True, ) test_fn(FairClassifier.__name__, trf) def _test_same(dataset): X, y = dataset if X.shape[1] == 1: # If we only have one column (which is also the sensitive one) we can't fit return True
import numpy as np import pytest from sklego.common import flatten from sklego.mixture import GMMClassifier from sklego.testing import check_shape_remains_same_classifier from tests.conftest import nonmeta_checks, general_checks, classifier_checks @pytest.mark.parametrize("test_fn", flatten([ nonmeta_checks, general_checks, classifier_checks, check_shape_remains_same_classifier ])) def test_estimator_checks(test_fn): clf = GMMClassifier() test_fn(GMMClassifier.__name__, clf) def test_obvious_usecase(): X = np.concatenate([ np.random.normal(-10, 1, (100, 2)), np.random.normal(10, 1, (100, 2)) ]) y = np.concatenate([np.zeros(100), np.ones(100)]) assert (GMMClassifier().fit(X, y).predict(X) == y).all() def test_value_error_threshold(): X = np.concatenate([ np.random.normal(-10, 1, (100, 2)),
import pytest import numpy as np import pandas as pd from sklearn.utils.validation import FLOAT_DTYPES from sklego.common import flatten from sklego.preprocessing import ColumnCapper from tests.conftest import select_tests, transformer_checks, general_checks, nonmeta_checks @pytest.mark.parametrize( "test_fn", select_tests(flatten([general_checks, nonmeta_checks, transformer_checks]), exclude=[ "check_sample_weights_invariance", "check_estimators_nan_inf", "check_sample_weights_list", "check_sample_weights_pandas_series" ])) def test_estimator_checks(test_fn): test_fn(ColumnCapper.__name__, ColumnCapper()) def test_quantile_range(): def expect_type_error(quantile_range): with pytest.raises(TypeError): ColumnCapper(quantile_range) def expect_value_error(quantile_range): with pytest.raises(ValueError): ColumnCapper(quantile_range)
@pytest.mark.parametrize( "test_fn", flatten([ # GENERAL CHECKS # # estimator_checks.check_fit2d_predict1d -> we only test for two classes # estimator_checks.check_methods_subset_invariance -> we only test for two classes estimator_checks.check_fit2d_1sample, estimator_checks.check_fit2d_1feature, estimator_checks.check_fit1d, estimator_checks.check_get_params_invariance, estimator_checks.check_set_params, estimator_checks.check_dict_unchanged, # estimator_checks.check_dont_overwrite_parameters -> we only test for two classes # CLASSIFIER CHECKS # estimator_checks.check_classifier_data_not_an_array, estimator_checks.check_classifiers_one_label, # estimator_checks.check_classifiers_classes -> we only test for two classes estimator_checks.check_estimators_partial_fit_n_features, # estimator_checks.check_classifiers_train -> we only test for two classes # estimator_checks.check_supervised_y_2d -> we only test for two classes estimator_checks.check_supervised_y_no_nan, estimator_checks.check_estimators_unfitted, estimator_checks.check_non_transformer_estimators_n_iter, estimator_checks.check_decision_proba_consistency, ]), ) def test_standard_checks(test_fn): trf = Thresholder(LogisticRegression(), threshold=0.5) test_fn(Thresholder.__name__, trf)
import pytest from sklego.common import flatten from sklego.mixture import GMMClassifier, BayesianGMMClassifier from sklego.testing import check_shape_remains_same_classifier from tests.conftest import nonmeta_checks, general_checks, estimator_checks @pytest.mark.parametrize("test_fn", flatten([ nonmeta_checks, general_checks, estimator_checks.check_classifier_data_not_an_array, estimator_checks.check_classifiers_one_label, estimator_checks.check_classifiers_classes, estimator_checks.check_estimators_partial_fit_n_features, estimator_checks.check_classifiers_train, estimator_checks.check_supervised_y_2d, estimator_checks.check_supervised_y_no_nan, estimator_checks.check_estimators_unfitted, # estimator_checks.check_non_transformer_estimators_n_iter, our method does not have n_iter estimator_checks.check_decision_proba_consistency, check_shape_remains_same_classifier ])) def test_estimator_checks(test_fn): clf = GMMClassifier() test_fn(GMMClassifier.__name__, clf) clf = BayesianGMMClassifier() test_fn(BayesianGMMClassifier.__name__, clf) def test_obvious_usecase():
from sklego.preprocessing import InformationFilter from tests.conftest import transformer_checks, general_checks @pytest.mark.parametrize( "test_fn", flatten([ transformer_checks, general_checks, # nonmeta_checks estimator_checks.check_estimators_dtypes, estimator_checks.check_fit_score_takes_y, estimator_checks.check_dtype_object, estimator_checks.check_sample_weights_pandas_series, estimator_checks.check_sample_weights_list, estimator_checks.check_sample_weights_invariance, estimator_checks.check_estimators_fit_returns_self, estimator_checks.check_complex_data, # this won't work because we need to select a column # estimator_checks.check_estimators_empty_data_messages, estimator_checks.check_pipeline_consistency, estimator_checks.check_estimators_nan_inf, estimator_checks.check_estimators_overwrite_params, estimator_checks.check_estimator_sparse_data, estimator_checks.check_estimators_pickle, ]), ) def test_estimator_checks(test_fn): test_fn(InformationFilter.__name__, InformationFilter(columns=[0])) def test_v_columns_orthogonal():
from sklego.common import flatten from sklego.meta import GroupedEstimator from sklego.datasets import load_chicken @pytest.mark.parametrize( "test_fn", flatten([ estimator_checks.check_fit_score_takes_y, estimator_checks.check_sample_weights_invariance, estimator_checks.check_estimators_empty_data_messages, estimator_checks.check_estimators_nan_inf, estimator_checks.check_estimators_overwrite_params, estimator_checks.check_estimators_pickle, estimator_checks.check_fit2d_1sample, # estimator_checks.check_fit1d not tested because in 1d we cannot have both groups and data estimator_checks.check_dont_overwrite_parameters, estimator_checks.check_sample_weights_invariance, estimator_checks.check_get_params_invariance, estimator_checks.check_sample_weights_list, estimator_checks.check_sample_weights_pandas_series, estimator_checks.check_set_params, ]), ) def test_estimator_checks(test_fn): clf = GroupedEstimator(estimator=LinearRegression(), groups=[0], use_global_model=True) test_fn(GroupedEstimator.__name__ + "_fallback", clf) clf = GroupedEstimator(estimator=LinearRegression(),
import pytest import numpy as np from sklearn.linear_model import LogisticRegression from sklego.common import flatten from sklego.linear_model import EqualOpportunityClassifier from sklego.metrics import equal_opportunity_score from tests.conftest import general_checks, classifier_checks, select_tests, nonmeta_checks @pytest.mark.parametrize( "test_fn", select_tests(flatten([general_checks, nonmeta_checks, classifier_checks]), exclude=[ "check_sample_weights_invariance", "check_sample_weights_list", "check_sample_weights_pandas_series" ])) @pytest.mark.cvxpy def test_standard_checks(test_fn): trf = EqualOpportunityClassifier( covariance_threshold=None, positive_target=True, C=1, penalty="none", sensitive_cols=[0], train_sensitive_cols=True, ) test_fn(EqualOpportunityClassifier.__name__, trf)
from sklego.common import flatten from sklego.meta import GroupedEstimator from sklego.datasets import load_chicken @pytest.mark.parametrize( "test_fn", flatten([ estimator_checks.check_fit_score_takes_y, estimator_checks.check_sample_weights_invariance, estimator_checks.check_estimators_empty_data_messages, estimator_checks.check_estimators_nan_inf, estimator_checks.check_estimators_overwrite_params, estimator_checks.check_estimators_pickle, estimator_checks.check_fit2d_predict1d, estimator_checks.check_fit2d_1sample, estimator_checks.check_fit1d, estimator_checks.check_dont_overwrite_parameters, estimator_checks.check_sample_weights_invariance, estimator_checks.check_get_params_invariance, estimator_checks.check_sample_weights_list, estimator_checks.check_sample_weights_pandas_series, estimator_checks.check_set_params, ])) def test_estimator_checks(test_fn): clf = GroupedEstimator(estimator=LinearRegression(), groups=[0], use_fallback=True) test_fn(GroupedEstimator.__name__ + "_fallback", clf) clf = GroupedEstimator(estimator=LinearRegression(),
"""Test if fit_intercept and copy_X work.""" X, y = _create_dataset(coefs, intercept, noise=2.0) imb = QuantileRegression(fit_intercept=False, copy_X=False) imb.fit(X, y) assert imb.intercept_ == 0.0 @pytest.mark.parametrize("test_fn", [check_shape_remains_same_regressor]) def test_quant(test_fn): regr = QuantileRegression() test_fn(QuantileRegression.__name__, regr) @pytest.mark.parametrize( "regr", [(QuantileRegression.__name__, QuantileRegression()), (QuantileRegression.__name__ + "_positive", QuantileRegression(positive=True)), (QuantileRegression.__name__ + "_positive__no_intercept", QuantileRegression(positive=True, fit_intercept=False)), (QuantileRegression.__name__ + "_no_intercept", QuantileRegression(fit_intercept=False)), (QuantileRegression.__name__ + "_quantile", QuantileRegression(quantile=0.3))]) @pytest.mark.parametrize( "test_fn", select_tests(flatten([general_checks, nonmeta_checks, regressor_checks]), )) def test_estimator_checks(regr, test_fn): test_fn(*regr)
import numpy as np import pandas as pd import pytest from sklearn.utils import estimator_checks from sklego.common import flatten from sklego.mixture import GMMOutlierDetector, BayesianGMMOutlierDetector from tests.conftest import nonmeta_checks, general_checks @pytest.mark.parametrize( "test_fn", flatten([ nonmeta_checks, general_checks, # outlier checks estimator_checks.check_outliers_fit_predict, estimator_checks.check_classifier_data_not_an_array, estimator_checks.check_estimators_unfitted, ])) def test_estimator_checks(test_fn): clf_quantile = GMMOutlierDetector(threshold=0.999, method="quantile") test_fn(GMMOutlierDetector.__name__ + '_quantile', clf_quantile) clf_stddev = GMMOutlierDetector(threshold=2, method="stddev") test_fn(GMMOutlierDetector.__name__ + '_stddev', clf_stddev) bayes_clf_quantile = BayesianGMMOutlierDetector(threshold=0.999, method="quantile") test_fn(BayesianGMMOutlierDetector.__name__ + '_quantile', bayes_clf_quantile)
from tests.conftest import nonmeta_checks, general_checks, transformer_checks @pytest.fixture def sample_matrix(): np.random.seed(1313) return np.random.normal(size=(50, 10)) @pytest.fixture def sample_df(sample_matrix): return pd.DataFrame(sample_matrix) @pytest.mark.parametrize( "test_fn", flatten([nonmeta_checks, general_checks, transformer_checks]) ) def test_estimator_checks(test_fn): test_fn(OrthogonalTransformer.__name__, OrthogonalTransformer()) def check_is_orthogonal(X, tolerance=10 ** -5): """ Check if X is an column orthogonal matrix. If X is column orthogonal, then X.T * X equals the identity matrix :param X: Matrix to check :param tolerance: Tolerance for difference caused by rounding :raises: AssertionError if X is not orthogonal """ diff_with_eye = np.dot(X.T, X) - np.eye(X.shape[1]) if np.max(np.abs(diff_with_eye)) > tolerance: