def test_AutoWrapper_one_dimensional_transformer(column_prefix, nb_outputs, with_get_feature_names): np.random.seed(123) X = np.random.randn(100, 10) df = pd.DataFrame(X, columns=[f"NUMBER_{j}" for j in range(X.shape[1])]) df["not_a_number"] = "a" if with_get_feature_names: klass = DummyOnlyOneDimensionWithGetFeatureNames else: klass = DummyOnlyOneDimension dummy_auto_wrapped = AutoWrapper( klass(nb_outputs=nb_outputs), work_on_one_column_only=True, all_columns_at_once=False)(column_prefix=column_prefix) Xres = dummy_auto_wrapped.fit_transform(df) assert isinstance(Xres, pd.DataFrame) assert (Xres.iloc[:, 0].values == df.iloc[:, 0].values).all() assert (Xres.iloc[:, 1 + (nb_outputs - 1)].values == df.iloc[:, 1].values).all() expected_cols = [] for col in df.columns: add = "FEATURE" if with_get_feature_names else "" if column_prefix is not None: if add == "": add = column_prefix else: add = column_prefix + "__" + add if add != "": add = "__" + add expected_cols += [(col + add + "__%d" % d) for d in range(nb_outputs)] assert Xres.shape == (df.shape[0], df.shape[1] * nb_outputs) assert dummy_auto_wrapped.get_feature_names() == expected_cols assert dummy_auto_wrapped.get_feature_names() == list(Xres.columns)
def test_AutoWrapper_fails_if_not_instance(): model = 10 with pytest.raises(TypeError): AutoWrapper(model)
def test_AutoWrapper(): np.random.seed(123) X = np.random.randn(100, 10) df = pd.DataFrame(X, columns=[f"NUMBER_{j}" for j in range(X.shape[1])]) df["not_a_number"] = "a" model = AutoWrapper(TruncatedSVD(n_components=2, random_state=123))( columns_to_use=["NUMBER_"], regex_match=True) Xres = model.fit_transform(df) assert isinstance(Xres, pd.DataFrame) assert Xres.shape[0] == df.shape[0] model = AutoWrapper(TruncatedSVD(n_components=2, random_state=123))( columns_to_use=["NUMBER_"], regex_match=True, column_prefix="SVD") Xres = model.fit_transform(df) assert isinstance(Xres, pd.DataFrame) assert list(Xres.columns) == ["not_a_number", "SVD__0", "SVD__1"] assert Xres.shape[0] == df.shape[0] dummy_not_wrapped = DummyOnlyDataFrame() with pytest.raises(TypeError): dummy_not_wrapped.fit_transform(X) dummy_auto_wrapped = AutoWrapper(DummyOnlyDataFrame())() Xres = dummy_auto_wrapped.fit_transform(X) assert isinstance(Xres, pd.DataFrame) assert (Xres.values == X).all() dummy_auto_wrapped = AutoWrapper(DummyOnlyDataFrame)() Xres = dummy_auto_wrapped.fit_transform(X) assert isinstance(Xres, pd.DataFrame) assert (Xres.values == X).all() dummy_not_wrapped = DummyNoDataFrame() with pytest.raises(TypeError): dummy_not_wrapped.fit_transform(df) dummy_auto_wrapped = AutoWrapper( DummyNoDataFrame, wrapping_kwargs={"accepted_input_types": (DataTypes.NumpyArray, )})() Xres = dummy_auto_wrapped.fit_transform(df) assert isinstance(Xres, pd.DataFrame)