Exemplo n.º 1
0
def test_register_custom_groupby_check(custom_check_teardown: None) -> None:
    """Test registering a custom groupby check."""
    @extensions.register_check_method(
        statistics=["group_a", "group_b"],
        supported_types=(pd.Series, pd.DataFrame),
        check_type="groupby",
    )
    def custom_check(dict_groups, *, group_a, group_b):
        """
        Test that the mean values in group A is larger than that of group B.

        Note that this function can handle groups of both dataframes and
        series.
        """
        return (dict_groups[group_a].values.mean() >
                dict_groups[group_b].values.mean())

    # column groupby check
    data_column_check = pd.DataFrame({
        "col1": [20, 20, 10, 10],
        "col2": list("aabb"),
    })

    schema_column_check = pa.DataFrameSchema({
        "col1":
        pa.Column(
            int,
            Check.custom_check(group_a="a", group_b="b", groupby="col2"),
        ),
        "col2":
        pa.Column(str),
    })
    assert isinstance(schema_column_check(data_column_check), pd.DataFrame)

    # dataframe groupby check
    data_df_check = pd.DataFrame(
        {
            "col1": [20, 20, 10, 10],
            "col2": [30, 30, 5, 5],
            "col3": [10, 10, 1, 1],
        },
        index=pd.Index(list("aabb"), name="my_index"),
    )
    schema_df_check = pa.DataFrameSchema(
        columns={
            "col1": pa.Column(int),
            "col2": pa.Column(int),
            "col3": pa.Column(int),
        },
        index=pa.Index(str, name="my_index"),
        checks=Check.custom_check(group_a="a", group_b="b",
                                  groupby="my_index"),
    )
    assert isinstance(schema_df_check(data_df_check), pd.DataFrame)

    for kwargs in [{"element_wise": True}, {"element_wise": False}]:
        with pytest.warns(UserWarning):
            Check.custom_check(val=10, **kwargs)
Exemplo n.º 2
0
def test_register_element_wise_custom_check(
        custom_check_teardown: None, data: Union[pd.Series,
                                                 pd.DataFrame]) -> None:
    """Test registering an element-wise custom check."""
    @extensions.register_check_method(
        statistics=["val"],
        supported_types=(pd.Series, pd.DataFrame),
        check_type="element_wise",
    )
    def custom_check(element, *, val):
        return element == val

    check = Check.custom_check(val=10)
    check_result = check(data)
    assert check_result.check_passed

    for kwargs in [
        {
            "element_wise": True
        },
        {
            "element_wise": False
        },
        {
            "groupby": "column"
        },
        {
            "groups": ["group1", "group2"]
        },
    ]:
        with pytest.warns(UserWarning):
            Check.custom_check(val=10, **kwargs)

    with pytest.raises(
            ValueError,
            match="Element-wise checks should support DataFrame and Series "
            "validation",
    ):

        @extensions.register_check_method(
            supported_types=pd.Series,
            check_type="element_wise",
        )
        def invalid_custom_check(*args):
            pass
Exemplo n.º 3
0
def test_register_vectorized_custom_check(
        custom_check_teardown: None, data: Union[pd.Series,
                                                 pd.DataFrame]) -> None:
    """Test registering a vectorized custom check."""
    @extensions.register_check_method(
        statistics=["val"],
        supported_types=(pd.Series, pd.DataFrame),
        check_type="vectorized",
    )
    def custom_check(pandas_obj, *, val):
        return pandas_obj == val

    check = Check.custom_check(val=10)
    check_result = check(data)
    assert check_result.check_passed

    for kwargs in [
        {
            "element_wise": True
        },
        {
            "element_wise": False
        },
        {
            "groupby": "column"
        },
        {
            "groups": ["group1", "group2"]
        },
    ]:
        with pytest.warns(UserWarning):
            Check.custom_check(val=10, **kwargs)

    with pytest.raises(
            ValueError,
            match="method with name 'custom_check' already defined",
    ):
        # pylint: disable=function-redefined
        @extensions.register_check_method(statistics=["val"])
        def custom_check(pandas_obj, val):  # noqa
            return pandas_obj != val