def test_register_custom_groupby_check(custom_check_teardown: None) -> None: """Test registering a custom groupby check.""" @extensions.register_check_method( statistics=["group_a", "group_b"], supported_types=(pd.Series, pd.DataFrame), check_type="groupby", ) def custom_check(dict_groups, *, group_a, group_b): """ Test that the mean values in group A is larger than that of group B. Note that this function can handle groups of both dataframes and series. """ return (dict_groups[group_a].values.mean() > dict_groups[group_b].values.mean()) # column groupby check data_column_check = pd.DataFrame({ "col1": [20, 20, 10, 10], "col2": list("aabb"), }) schema_column_check = pa.DataFrameSchema({ "col1": pa.Column( int, Check.custom_check(group_a="a", group_b="b", groupby="col2"), ), "col2": pa.Column(str), }) assert isinstance(schema_column_check(data_column_check), pd.DataFrame) # dataframe groupby check data_df_check = pd.DataFrame( { "col1": [20, 20, 10, 10], "col2": [30, 30, 5, 5], "col3": [10, 10, 1, 1], }, index=pd.Index(list("aabb"), name="my_index"), ) schema_df_check = pa.DataFrameSchema( columns={ "col1": pa.Column(int), "col2": pa.Column(int), "col3": pa.Column(int), }, index=pa.Index(str, name="my_index"), checks=Check.custom_check(group_a="a", group_b="b", groupby="my_index"), ) assert isinstance(schema_df_check(data_df_check), pd.DataFrame) for kwargs in [{"element_wise": True}, {"element_wise": False}]: with pytest.warns(UserWarning): Check.custom_check(val=10, **kwargs)
def test_register_element_wise_custom_check( custom_check_teardown: None, data: Union[pd.Series, pd.DataFrame]) -> None: """Test registering an element-wise custom check.""" @extensions.register_check_method( statistics=["val"], supported_types=(pd.Series, pd.DataFrame), check_type="element_wise", ) def custom_check(element, *, val): return element == val check = Check.custom_check(val=10) check_result = check(data) assert check_result.check_passed for kwargs in [ { "element_wise": True }, { "element_wise": False }, { "groupby": "column" }, { "groups": ["group1", "group2"] }, ]: with pytest.warns(UserWarning): Check.custom_check(val=10, **kwargs) with pytest.raises( ValueError, match="Element-wise checks should support DataFrame and Series " "validation", ): @extensions.register_check_method( supported_types=pd.Series, check_type="element_wise", ) def invalid_custom_check(*args): pass
def test_register_vectorized_custom_check( custom_check_teardown: None, data: Union[pd.Series, pd.DataFrame]) -> None: """Test registering a vectorized custom check.""" @extensions.register_check_method( statistics=["val"], supported_types=(pd.Series, pd.DataFrame), check_type="vectorized", ) def custom_check(pandas_obj, *, val): return pandas_obj == val check = Check.custom_check(val=10) check_result = check(data) assert check_result.check_passed for kwargs in [ { "element_wise": True }, { "element_wise": False }, { "groupby": "column" }, { "groups": ["group1", "group2"] }, ]: with pytest.warns(UserWarning): Check.custom_check(val=10, **kwargs) with pytest.raises( ValueError, match="method with name 'custom_check' already defined", ): # pylint: disable=function-redefined @extensions.register_check_method(statistics=["val"]) def custom_check(pandas_obj, val): # noqa return pandas_obj != val