Пример #1
0
def test_AutoWrapper_one_dimensional_transformer(column_prefix, nb_outputs,
                                                 with_get_feature_names):
    np.random.seed(123)
    X = np.random.randn(100, 10)
    df = pd.DataFrame(X, columns=[f"NUMBER_{j}" for j in range(X.shape[1])])
    df["not_a_number"] = "a"

    if with_get_feature_names:
        klass = DummyOnlyOneDimensionWithGetFeatureNames
    else:
        klass = DummyOnlyOneDimension

    dummy_auto_wrapped = AutoWrapper(
        klass(nb_outputs=nb_outputs),
        work_on_one_column_only=True,
        all_columns_at_once=False)(column_prefix=column_prefix)
    Xres = dummy_auto_wrapped.fit_transform(df)

    assert isinstance(Xres, pd.DataFrame)
    assert (Xres.iloc[:, 0].values == df.iloc[:, 0].values).all()
    assert (Xres.iloc[:,
                      1 + (nb_outputs - 1)].values == df.iloc[:,
                                                              1].values).all()

    expected_cols = []
    for col in df.columns:
        add = "FEATURE" if with_get_feature_names else ""

        if column_prefix is not None:
            if add == "":
                add = column_prefix
            else:
                add = column_prefix + "__" + add

        if add != "":
            add = "__" + add
        expected_cols += [(col + add + "__%d" % d) for d in range(nb_outputs)]

    assert Xres.shape == (df.shape[0], df.shape[1] * nb_outputs)
    assert dummy_auto_wrapped.get_feature_names() == expected_cols
    assert dummy_auto_wrapped.get_feature_names() == list(Xres.columns)
Пример #2
0
def test_AutoWrapper_fails_if_not_instance():
    model = 10
    with pytest.raises(TypeError):
        AutoWrapper(model)
Пример #3
0
def test_AutoWrapper():
    np.random.seed(123)
    X = np.random.randn(100, 10)
    df = pd.DataFrame(X, columns=[f"NUMBER_{j}" for j in range(X.shape[1])])
    df["not_a_number"] = "a"

    model = AutoWrapper(TruncatedSVD(n_components=2, random_state=123))(
        columns_to_use=["NUMBER_"], regex_match=True)
    Xres = model.fit_transform(df)

    assert isinstance(Xres, pd.DataFrame)
    assert Xres.shape[0] == df.shape[0]

    model = AutoWrapper(TruncatedSVD(n_components=2, random_state=123))(
        columns_to_use=["NUMBER_"], regex_match=True, column_prefix="SVD")
    Xres = model.fit_transform(df)
    assert isinstance(Xres, pd.DataFrame)
    assert list(Xres.columns) == ["not_a_number", "SVD__0", "SVD__1"]
    assert Xres.shape[0] == df.shape[0]

    dummy_not_wrapped = DummyOnlyDataFrame()
    with pytest.raises(TypeError):
        dummy_not_wrapped.fit_transform(X)

    dummy_auto_wrapped = AutoWrapper(DummyOnlyDataFrame())()
    Xres = dummy_auto_wrapped.fit_transform(X)
    assert isinstance(Xres, pd.DataFrame)
    assert (Xres.values == X).all()

    dummy_auto_wrapped = AutoWrapper(DummyOnlyDataFrame)()
    Xres = dummy_auto_wrapped.fit_transform(X)
    assert isinstance(Xres, pd.DataFrame)
    assert (Xres.values == X).all()

    dummy_not_wrapped = DummyNoDataFrame()
    with pytest.raises(TypeError):
        dummy_not_wrapped.fit_transform(df)

    dummy_auto_wrapped = AutoWrapper(
        DummyNoDataFrame,
        wrapping_kwargs={"accepted_input_types": (DataTypes.NumpyArray, )})()
    Xres = dummy_auto_wrapped.fit_transform(df)
    assert isinstance(Xres, pd.DataFrame)