def test_transformer_transform_output_type(X_y_binary): X_np, y_np = X_y_binary assert isinstance(X_np, np.ndarray) assert isinstance(y_np, np.ndarray) y_list = list(y_np) X_df_no_col_names = pd.DataFrame(X_np) range_index = pd.RangeIndex(start=0, stop=X_np.shape[1], step=1) X_df_with_col_names = pd.DataFrame(X_np, columns=['x' + str(i) for i in range(X_np.shape[1])]) y_series_no_name = pd.Series(y_np) y_series_with_name = pd.Series(y_np, name='target') datatype_combos = [(X_np, y_np, range_index), (X_np, y_list, range_index), (X_df_no_col_names, y_series_no_name, range_index), (X_df_with_col_names, y_series_with_name, X_df_with_col_names.columns)] for component_class in _all_transformers(): print('Testing transformer {}'.format(component_class.name)) for X, y, X_cols_expected in datatype_combos: print('Checking output of transform for transformer "{}" on X type {} cols {}, y type {} name {}' .format(component_class.name, type(X), X.columns if isinstance(X, pd.DataFrame) else None, type(y), y.name if isinstance(y, pd.Series) else None)) component = component_class() component.fit(X, y=y) transform_output = component.transform(X, y=y) assert isinstance(transform_output, ww.DataTable) if isinstance(component, SelectColumns): assert transform_output.shape == (X.shape[0], 0) elif isinstance(component, PCA) or isinstance(component, LinearDiscriminantAnalysis): assert transform_output.shape[0] == X.shape[0] assert transform_output.shape[1] <= X.shape[1] elif isinstance(component, DFSTransformer): assert transform_output.shape[0] == X.shape[0] assert transform_output.shape[1] >= X.shape[1] elif isinstance(component, DelayedFeatureTransformer): # We just want to check that DelayedFeaturesTransformer outputs a DataFrame # The dataframe shape and index are checked in test_delayed_features_transformer.py continue else: assert transform_output.shape == X.shape assert (list(transform_output.columns) == list(X_cols_expected)) transform_output = component.fit_transform(X, y=y) assert isinstance(transform_output, ww.DataTable) if isinstance(component, SelectColumns): assert transform_output.shape == (X.shape[0], 0) elif isinstance(component, PCA) or isinstance(component, LinearDiscriminantAnalysis): assert transform_output.shape[0] == X.shape[0] assert transform_output.shape[1] <= X.shape[1] elif isinstance(component, DFSTransformer): assert transform_output.shape[0] == X.shape[0] assert transform_output.shape[1] >= X.shape[1] else: assert transform_output.shape == X.shape assert (list(transform_output.columns) == list(X_cols_expected))
def test_all_transformers_needs_fitting(): for component_class in _all_transformers() + _all_estimators(): if component_class.__name__ in [ 'DropColumns', 'SelectColumns', 'DelayedFeatureTransformer' ]: assert not component_class.needs_fitting else: assert component_class.needs_fitting
def test_all_transformers_check_fit_input_type(data_type, X_y_binary, make_data_type): X, y = X_y_binary X = make_data_type(data_type, X) y = make_data_type(data_type, y) for component_class in _all_transformers(): if not component_class.needs_fitting: continue component = component_class() component.fit(X, y)
def test_all_transformers_check_fit(X_y_binary): X, y = X_y_binary for component_class in _all_transformers(): if not component_class.needs_fitting: continue component = component_class() with pytest.raises(ComponentNotYetFittedError, match=f'You must fit {component_class.__name__}'): component.transform(X, y) component.fit(X, y) component.transform(X, y) component = component_class() component.fit_transform(X, y) component.transform(X, y)
MockComponent, MockEstimator, MockTransformer = test_classes expected_code = "mockComponent = MockComponent(**{})" component_code = generate_component_code(MockComponent()) assert component_code == expected_code expected_code = "mockEstimator = MockEstimator(**{})" component_code = generate_component_code(MockEstimator()) assert component_code == expected_code expected_code = "mockTransformer = MockTransformer(**{})" component_code = generate_component_code(MockTransformer()) assert component_code == expected_code @pytest.mark.parametrize("transformer_class", _all_transformers()) @pytest.mark.parametrize("use_custom_index", [True, False]) def test_transformer_fit_and_transform_respect_custom_indices(use_custom_index, transformer_class, X_y_binary): check_names = True if transformer_class == DFSTransformer: check_names = False if use_custom_index: pytest.skip("The DFSTransformer changes the index so we skip it.") if transformer_class == PolynomialDetrender: pytest.skip("Skipping PolynomialDetrender because we test that it respects custom indices in " "test_polynomial_detrender.py") X, y = X_y_binary X = pd.DataFrame(X)