Пример #1
0
def test_deprecate_pandas_dtype(schema_cls, as_pos_arg):
    """Test that pandas_dtype deprecation warnings/errors are raised."""
    assert schema_cls(dtype=int).dtype.check(pa.Int())
    assert schema_cls(pandas_dtype=int).dtype.check(pa.Int())

    with pytest.warns(DeprecationWarning):
        schema_cls(pandas_dtype=int)
    with pytest.raises(pa.errors.SchemaInitError):
        schema_cls(dtype=int, pandas_dtype=int)

    if as_pos_arg:
        assert schema_cls(int).dtype.check(pa.Int())
        with pytest.raises(pa.errors.SchemaInitError):
            schema_cls(int, pandas_dtype=int)
Пример #2
0
def test_register_check_strategy(data) -> None:
    """Test registering check strategy on a custom check."""

    # pylint: disable=unused-argument
    def custom_eq_strategy(
        pandas_dtype: pa.DataType,
        strategy: st.SearchStrategy = None,
        *,
        value: Any,
    ):
        return st.just(value).map(strategies.to_numpy_dtype(pandas_dtype).type)

    # pylint: disable=no-member
    class CustomCheck(_CheckBase):
        """Custom check class."""
        @classmethod
        @strategies.register_check_strategy(custom_eq_strategy)
        @register_check_statistics(["value"])
        def custom_equals(cls, value, **kwargs) -> "CustomCheck":
            """Define a built-in check."""
            def _custom_equals(series: pd.Series) -> pd.Series:
                """Comparison function for check"""
                return series == value

            return cls(
                _custom_equals,
                name=cls.custom_equals.__name__,
                error=f"equal_to({value})",
                **kwargs,
            )

    check = CustomCheck.custom_equals(100)
    result = data.draw(check.strategy(pa.Int()))
    assert result == 100
Пример #3
0
def test_index_example() -> None:
    """
    Test Index schema component example method generates examples that pass.
    """
    data_type = pa.Int()
    index_schema = pa.Index(data_type, unique=True)
    for _ in range(10):
        index_schema(pd.DataFrame(index=index_schema.example()))
Пример #4
0
def test_index_strategy(data) -> None:
    """Test Index schema component strategy."""
    data_type = pa.Int()
    index_schema = pa.Index(data_type, unique=True, name="index")
    strat = index_schema.strategy(size=10)
    example = data.draw(strat)

    assert (~example.duplicated()).all()
    actual_data_type = pandas_engine.Engine.dtype(example.dtype)
    assert data_type.check(actual_data_type)
    index_schema(pd.DataFrame(index=example))
Пример #5
0
# pylint: disable=W0212
"""Unit tests for inferring statistics of pandas objects."""
import pandas as pd
import pytest

import pandera as pa
from pandera import dtypes, schema_statistics
from pandera.engines import pandas_engine

DEFAULT_FLOAT = pandas_engine.Engine.dtype(float)
DEFAULT_INT = pandas_engine.Engine.dtype(int)

NUMERIC_TYPES = [
    pa.Int(),
    pa.UInt(),
    pa.Float(),
    pa.Complex(),
    pandas_engine.Engine.dtype("Int32"),
    pandas_engine.Engine.dtype("UInt32"),
]
INTEGER_TYPES = [
    dtypes.Int(),
    dtypes.Int8(),
    dtypes.Int16(),
    dtypes.Int32(),
    dtypes.Int64(),
    dtypes.UInt8(),
    dtypes.UInt16(),
    dtypes.UInt32(),
    dtypes.UInt64(),
]
Пример #6
0
def test_column_example():
    """Test Column schema example method generate examples that pass."""
    column_schema = pa.Column(pa.Int(), pa.Check.gt(0), name="column")
    for _ in range(10):
        column_schema(column_schema.example())
Пример #7
0
def test_column_strategy(data) -> None:
    """Test Column schema strategy."""
    column_schema = pa.Column(pa.Int(), pa.Check.gt(0), name="column")
    column_schema(data.draw(column_schema.strategy()))
Пример #8
0
def test_series_example() -> None:
    """Test SeriesSchema example method generate examples that pass."""
    series_schema = pa.SeriesSchema(pa.Int(), pa.Check.gt(0))
    for _ in range(10):
        series_schema(series_schema.example())
Пример #9
0
def test_series_strategy(data) -> None:
    """Test SeriesSchema strategy."""
    series_schema = pa.SeriesSchema(pa.Int(), pa.Check.gt(0))
    series_schema(data.draw(series_schema.strategy()))
Пример #10
0
@hypothesis.given(st.data())
def test_dataframe_checks(data_type, data):
    """Test dataframe strategy with checks defined at the dataframe level."""
    min_value, max_value = data.draw(value_ranges(data_type))
    dataframe_schema = pa.DataFrameSchema(
        {f"{data_type}_col": pa.Column(data_type)
         for _ in range(5)},
        checks=pa.Check.in_range(min_value, max_value),
    )
    strat = dataframe_schema.strategy(size=5)
    example = data.draw(strat)
    dataframe_schema(example)


@pytest.mark.parametrize("data_type",
                         [pa.Int(), pa.Float, pa.String, pa.DateTime])
@hypothesis.given(st.data())
@hypothesis.settings(
    suppress_health_check=[hypothesis.HealthCheck.too_slow], )
def test_dataframe_strategy_with_indexes(data_type, data):
    """Test dataframe strategy with index and multiindex components."""
    dataframe_schema_index = pa.DataFrameSchema(index=pa.Index(data_type))
    dataframe_schema_multiindex = pa.DataFrameSchema(index=pa.MultiIndex(
        [pa.Index(data_type, name=f"index{i}") for i in range(3)]))

    dataframe_schema_index(data.draw(dataframe_schema_index.strategy(size=10)))
    dataframe_schema_multiindex(
        data.draw(dataframe_schema_multiindex.strategy(size=10)))


@hypothesis.given(st.data())
Пример #11
0
def test_dataframe_example() -> None:
    """Test DataFrameSchema example method generate examples that pass."""
    schema = pa.DataFrameSchema(
        {"column": pa.Column(pa.Int(), pa.Check.gt(0))})
    for _ in range(10):
        schema(schema.example())
Пример #12
0
@hypothesis.given(st.data())
def test_dataframe_checks(data_type, data):
    """Test dataframe strategy with checks defined at the dataframe level."""
    min_value, max_value = data.draw(value_ranges(data_type))
    dataframe_schema = pa.DataFrameSchema(
        {f"{data_type}_col": pa.Column(data_type)
         for _ in range(5)},
        checks=pa.Check.in_range(min_value, max_value),
    )
    strat = dataframe_schema.strategy(size=5)
    example = data.draw(strat)
    dataframe_schema(example)


@pytest.mark.parametrize("data_type",
                         [pa.Int(), pa.Float, pa.String, pa.DateTime])
@hypothesis.given(st.data())
@hypothesis.settings(
    suppress_health_check=[hypothesis.HealthCheck.too_slow], )
def test_dataframe_strategy_with_indexes(data_type, data):
    """Test dataframe strategy with index and multiindex components."""
    dataframe_schema_index = pa.DataFrameSchema(index=pa.Index(data_type))
    dataframe_schema_multiindex = pa.DataFrameSchema(index=pa.MultiIndex(
        [pa.Index(data_type, name=f"index{i}") for i in range(3)]))

    dataframe_schema_index(data.draw(dataframe_schema_index.strategy(size=10)))
    dataframe_schema_multiindex(
        data.draw(dataframe_schema_multiindex.strategy(size=10)))


@hypothesis.given(st.data())