def test_only_transform_train(random_xy_dataset_clf): X, y = random_xy_dataset_clf X_train, X_test, y_train, y_test = train_test_split(X, y) random_adder = RandomAdder() random_adder.fit(X_train, y_train) assert np.all(random_adder.transform(X_train) != X_train) assert np.all(random_adder.transform(X_test) == X_test)
from collections import defaultdict import pytest from sklearn.linear_model import LinearRegression from sklearn.utils import estimator_checks from sklego.dummy import RandomRegressor from sklego.transformers import EstimatorTransformer, RandomAdder from tests.conftest import id_func @pytest.mark.parametrize("estimator", [ RandomAdder(), EstimatorTransformer(LinearRegression()), RandomRegressor(), ], ids=id_func) def test_check_estimator(estimator, monkeypatch): """Uses the sklearn `check_estimator` method to verify our custom estimators""" # Not all estimators CAN adhere to the defined sklearn api. An example of this is the random adder as sklearn # expects methods to be invariant to whether they are applied to the full dataset or a subset. # These tests can be monkey patched out using the skips dictionary. skips = defaultdict( list, { RandomAdder: [ # Since we add noise, the method is not invariant on a subset 'check_methods_subset_invariance', # The transformerselectormixin needs to compute a hash and it can't on a 'NotAnArray' 'check_transformer_data_not_an_array',
def test_dtype_regression(random_xy_dataset_regr): X, y = random_xy_dataset_regr assert RandomAdder().fit(X, y).transform(X).dtype == np.float
def test_dtype_classification(random_xy_dataset_clf): X, y = random_xy_dataset_clf assert RandomAdder().fit(X, y).transform(X).dtype == np.float