Пример #1
0
def test_transformer_wrapper_function():
    import numpy as np
    import pandas as pd
    from sklearn.preprocessing import StandardScaler as StandardScaler
    from foreshadow.concrete import StandardScaler as CustomScaler

    boston_path = get_file_path("data", "boston_housing.csv")

    df = pd.read_csv(boston_path)

    custom = CustomScaler()
    sklearn = StandardScaler()

    custom.fit(df[["crim"]])
    sklearn.fit(df[["crim"]])

    custom_tf = custom.transform(df[["crim"]])
    sklearn_tf = sklearn.transform(df[["crim"]])

    assert np.array_equal(custom_tf.values, sklearn_tf)

    custom_tf = custom.fit_transform(df[["crim"]])
    sklearn_tf = sklearn.fit_transform(df[["crim"]])

    assert np.array_equal(custom_tf.values, sklearn_tf)
Пример #2
0
def test_smarttransformer_function(smart_child):
    """Test overall SmartTransformer functionality

    Args:
        smart_child: A subclass of SmartTransformer.

    """
    import numpy as np
    import pandas as pd

    from foreshadow.concrete import StandardScaler

    boston_path = get_file_path("data", "boston_housing.csv")

    df = pd.read_csv(boston_path)

    smart = smart_child(cache_manager=CacheManager())
    smart_data = smart.fit_transform(df[["crim"]])

    std = StandardScaler()
    std_data = std.fit_transform(df[["crim"]])

    assert smart_data.equals(std_data)

    smart.fit(df[["crim"]])
    smart_data = smart.transform(df[["crim"]])

    std.fit(df[["crim"]])
    std_data = std.transform(df[["crim"]])

    # TODO, remove when SmartTransformer is no longer wrapped
    # Column names will be different, thus np.allclose() is used
    assert np.allclose(smart_data, std_data)
Пример #3
0
def test_transformer_keep_cols():
    import pandas as pd
    from foreshadow.concrete import StandardScaler as CustomScaler

    boston_path = get_file_path("data", "boston_housing.csv")

    df = pd.read_csv(boston_path)

    custom = CustomScaler(keep_columns=True)
    custom_tf = custom.fit_transform(df[["crim"]])

    assert custom_tf.shape[1] == 2
Пример #4
0
def test_transformer_naming_default():
    from foreshadow.concrete import StandardScaler
    import pandas as pd

    boston_path = get_file_path("data", "boston_housing.csv")

    df = pd.read_csv(boston_path)

    scaler = StandardScaler(keep_columns=False)
    out = scaler.fit_transform(df[["crim"]])

    assert out.iloc[:, 0].name == "crim"