Exemplo n.º 1
0
 def test_fillna_adds_is_na_column_when_imputing(self):
     df = pd.DataFrame({"id": [1, 2, 3, 4], "sales": [2000, 3000, 4000, np.nan]})
     fill_na = FillNA(strategy="mean", indicate_nan=True)
     expected_cols = ["id", "sales", "id_is_nan", "sales_is_nan"]
     result = fill_na.fit_transform(df)
     assert result.columns.tolist() == expected_cols
     assert np, all(result["sales_isna"] == [0, 0, 0, 1])
Exemplo n.º 2
0
def test_imputer_returns_dataframe_unchanged_if_no_nans(categorical):
    imputer = FillNA('Unknown')
    result = imputer.fit_transform(categorical)

    assert isinstance(result, pd.DataFrame)
    assert len(categorical) == len(result)
    assert {'category_a', 'category_b'} == set(result.columns)
    assert ~result.isin(['Unknown']).any().any()
Exemplo n.º 3
0
def test_imputer_returns_correct_dataframe_max(numerical_na):
    imputer = FillNA(strategy='max')
    result = imputer.fit_transform(numerical_na)

    assert isinstance(result, pd.DataFrame)
    assert len(numerical_na) == len(result)
    assert {'number_a', 'number_b'} == set(result.columns)
    assert 4 == result.loc[0, "number_a"]
    assert 7 == result.loc[3, "number_b"]
Exemplo n.º 4
0
def test_imputer_returns_correct_dataframe(categorical_na):
    imputer = FillNA('Unknown')
    result = imputer.fit_transform(categorical_na)

    assert isinstance(result, pd.DataFrame)
    assert len(categorical_na) == len(result)
    assert {'category_a', 'category_b'} == set(result.columns)
    assert 'Unknown' == result.iloc[0, 1]
    assert 'Unknown' == result.iloc[1, 0]
Exemplo n.º 5
0
 def test_fill_na_imputes_numerical_na_correct(
     self,
     numerical_na: pd.DataFrame,
     value: None,
     strategy: str,
     expected: pd.DataFrame,
 ):
     imputer = FillNA(value=value, strategy=strategy)
     result = imputer.fit_transform(numerical_na)
     pd.testing.assert_frame_equal(result, expected)
Exemplo n.º 6
0
def test_imputer_returns_correct_dataframe_most_freq(categorical):
    categorical.loc[1, "category_a"] = np.nan
    categorical.loc[0, "category_b"] = np.nan
    categorical.loc[1, "category_b"] = "b3"

    imputer = FillNA(strategy='most_freq')
    result = imputer.fit_transform(categorical)

    assert isinstance(result, pd.DataFrame)
    assert len(categorical) == len(result)
    assert {'category_a', 'category_b'} == set(result.columns)
    assert 'a1' == result.loc[1, "category_a"]
    assert 'b3' == result.loc[0, "category_b"]
Exemplo n.º 7
0
    def test_fillna_raises_when_imputing_numerically_on_strings(self):
        df = pd.DataFrame(
            {
                "id": [1, 2, 3, 4],
                "status": ["OK", "Error", "OK", "Error"],
                "sales": [2000, 3000, 4000, np.nan],
            }
        )
        fill_na = FillNA(strategy="mean")
        with pytest.raises(
            TransformerError,
            match="column/columns have invalid types for strategy = mean",
        ):
            fill_na.fit_transform(df)

        df["new_col"] = ["One", "Two", "Three", "Four"]

        with pytest.raises(
            TransformerError,
            match="column/columns have invalid types for strategy = mean",
        ):
            fill_na.fit_transform(df)
Exemplo n.º 8
0
    def test_fillna_imputes_pandas_categorical_correct(
        self,
        value: Any,
        strategy: Any,
        expected: pd.DataFrame,
        categorical_na: pd.DataFrame,
    ):
        categorical_na["category_a"] = categorical_na["category_a"].astype("category")
        categorical_na["category_b"] = categorical_na["category_b"].astype("category")

        imputer = FillNA(value=value, strategy=strategy)
        result = imputer.fit_transform(categorical_na)

        pd.testing.assert_frame_equal(result, expected, check_categorical=False)
Exemplo n.º 9
0
def test_imputer_with_both_raises_error(numerical_na):
    imputer = FillNA(value=0, strategy='mean')
    with pytest.raises(TransformerError):
        imputer.fit_transform(numerical_na)
Exemplo n.º 10
0
def test_imputer_with_none_raises_error(numerical_na):
    imputer = FillNA()
    with pytest.raises(TransformerError):
        imputer.fit_transform(numerical_na)
Exemplo n.º 11
0
 def test_fillna_imputes_categorical_na_correct(
     self, categorical_na: pd.DataFrame, value: Any, strategy: Any, expected: Any
 ):
     imputer = FillNA(value=value, strategy=strategy)
     result = imputer.fit_transform(categorical_na)
     pd.testing.assert_frame_equal(result, expected)
Exemplo n.º 12
0
 def test_fillna_returns_dataframe_unchanged_if_no_nans(
     self, categorical: pd.DataFrame
 ):
     imputer = FillNA("Unknown")
     result = imputer.fit_transform(categorical)
     pd.testing.assert_frame_equal(result, categorical)