def test_unsupported_pandas_dtype_strategy(data_type): """Test unsupported pandas dtype strategy raises error.""" with pytest.raises( TypeError, match= "data generation for the Category dtype is currently unsupported", ): strategies.pandas_dtype_strategy(data_type)
def value_ranges(pdtype: pa.PandasDtype): """Strategy to generate value range based on PandasDtype""" kwargs = dict( allow_nan=False, allow_infinity=False, exclude_min=False, exclude_max=False, ) return (st.tuples( strategies.pandas_dtype_strategy(pdtype, strategy=None, **kwargs), strategies.pandas_dtype_strategy(pdtype, strategy=None, **kwargs), ).map(sorted).filter(lambda x: x[0] < x[1]))
def test_pandas_dtype_strategy(data_type, data): """Test that series can be constructed from pandas dtype.""" strategy = strategies.pandas_dtype_strategy(data_type) example = data.draw(strategy) expected_type = strategies.to_numpy_dtype(data_type).type assert example.dtype.type == expected_type chained_strategy = strategies.pandas_dtype_strategy(data_type, strategy) chained_example = data.draw(chained_strategy) assert chained_example.dtype.type == expected_type
def test_pandas_dtype_strategy(pdtype, data): """Test that series can be constructed from pandas dtype.""" strategy = strategies.pandas_dtype_strategy(pdtype) example = data.draw(strategy) expected_type = (pdtype.String.numpy_dtype.type if pdtype is pa.Object else pdtype.numpy_dtype.type) assert example.dtype.type == expected_type chained_strategy = strategies.pandas_dtype_strategy(pdtype, strategy) chained_example = data.draw(chained_strategy) assert chained_example.dtype.type == expected_type
def test_isin_notin_strategies(pdtype, chained, data): """Test built-in check strategies that rely on discrete values.""" value_st = strategies.pandas_dtype_strategy( pdtype, allow_nan=False, allow_infinity=False, exclude_min=False, exclude_max=False, ) values = [data.draw(value_st) for _ in range(10)] isin_base_st = None notin_base_st = None if chained: base_values = values + [data.draw(value_st) for _ in range(10)] isin_base_st = strategies.isin_strategy( pdtype, allowed_values=base_values ) notin_base_st = strategies.notin_strategy( pdtype, forbidden_values=base_values ) isin_st = strategies.isin_strategy( pdtype, isin_base_st, allowed_values=values ) notin_st = strategies.notin_strategy( pdtype, notin_base_st, forbidden_values=values ) assert data.draw(isin_st) in values assert data.draw(notin_st) not in values
def test_in_range_strategy(pdtype, chained, data): """Test the built-in in-range strategy can correctly generate data.""" min_value, max_value = data.draw(value_ranges(pdtype)) hypothesis.assume(min_value < max_value) base_st_in_range = None if chained: if pdtype.is_float: base_st_kwargs = { "exclude_min": False, "exclude_max": False, } else: base_st_kwargs = {} # constraining the strategy this way makes testing more efficient base_st_in_range = strategies.pandas_dtype_strategy( pdtype, min_value=min_value, max_value=max_value, **base_st_kwargs, ) strat = strategies.in_range_strategy( pdtype, base_st_in_range, min_value=min_value, max_value=max_value, ) assert min_value <= data.draw(strat) <= max_value
def test_pandas_dtype_strategy(data_type, data): """Test that series can be constructed from pandas dtype.""" strategy = strategies.pandas_dtype_strategy(data_type) example = data.draw(strategy) expected_type = strategies.to_numpy_dtype(data_type).type if isinstance(example, pd.Timestamp): example = example.to_numpy() assert example.dtype.type == expected_type chained_strategy = strategies.pandas_dtype_strategy(data_type, strategy) chained_example = data.draw(chained_strategy) if isinstance(chained_example, pd.Timestamp): chained_example = chained_example.to_numpy() assert chained_example.dtype.type == expected_type
def test_field_element_strategy(pdtype, data): """Test strategy for generating elements in columns/indexes.""" strategy = strategies.field_element_strategy(pdtype) element = data.draw(strategy) assert element.dtype.type == pdtype.numpy_dtype.type with pytest.raises(pa.errors.BaseStrategyOnlyError): strategies.field_element_strategy( pdtype, strategies.pandas_dtype_strategy(pdtype))
def test_dataframe_strategy(pdtype, data): """Test DataFrameSchema strategy.""" dataframe_schema = pa.DataFrameSchema( {f"{pdtype.value}_col": pa.Column(pdtype)} ) dataframe_schema(data.draw(dataframe_schema.strategy(size=5))) with pytest.raises(pa.errors.BaseStrategyOnlyError): strategies.dataframe_strategy( pdtype, strategies.pandas_dtype_strategy(pdtype) )
def custom_ge_strategy( pandas_dtype: DataType, strategy: Optional[st.SearchStrategy] = None, *, min_value: Any, ) -> st.SearchStrategy: if strategy is None: return st.pandas_dtype_strategy( pandas_dtype, min_value=min_value, exclude_min=False, ) return strategy.filter(lambda x: x > min_value)
def test_check_strategy_chained_continuous( pdtype, strat_fn, arg_name, base_st_type, compare_op, data ): """ Test built-in check strategies can generate continuous data building off of a parent strategy. """ min_value, max_value = data.draw(value_ranges(pdtype)) hypothesis.assume(min_value < max_value) value = min_value base_st = strategies.pandas_dtype_strategy( pdtype, min_value=min_value, max_value=max_value, allow_nan=False, allow_infinity=False, ) if base_st_type == "type": assert_base_st = base_st elif base_st_type == "just": assert_base_st = st.just(value) elif base_st_type == "limit": assert_base_st = strategies.pandas_dtype_strategy( pdtype, min_value=min_value, max_value=max_value, allow_nan=False, allow_infinity=False, ) else: raise RuntimeError(f"base_st_type {base_st_type} not recognized") local_vars = locals() assert_value = local_vars[arg_name] example = data.draw( strat_fn(pdtype, assert_base_st, **{arg_name: assert_value}) ) assert compare_op(example, assert_value)
def test_dataframe_strategy(pdtype, size, data): """Test DataFrameSchema strategy.""" dataframe_schema = pa.DataFrameSchema( {f"{pdtype.value}_col": pa.Column(pdtype)}) df_sample = data.draw(dataframe_schema.strategy(size=size)) if size == 0: assert df_sample.empty elif size is None: assert df_sample.empty or isinstance(dataframe_schema(df_sample), pd.DataFrame) else: assert isinstance(dataframe_schema(df_sample), pd.DataFrame) with pytest.raises(pa.errors.BaseStrategyOnlyError): strategies.dataframe_strategy(pdtype, strategies.pandas_dtype_strategy(pdtype))
def test_multiindex_strategy(data): """Test MultiIndex schema component strategy.""" pdtype = pa.PandasDtype.Float multiindex = pa.MultiIndex(indexes=[ pa.Index(pdtype, allow_duplicates=False, name="level_0"), pa.Index(pdtype, nullable=True), pa.Index(pdtype), ]) strat = multiindex.strategy(size=10) example = data.draw(strat) for i in range(example.nlevels): assert example.get_level_values(i).dtype == pdtype.str_alias with pytest.raises(pa.errors.BaseStrategyOnlyError): strategies.multiindex_strategy( pdtype, strategies.pandas_dtype_strategy(pdtype))
def test_multiindex_strategy(data) -> None: """Test MultiIndex schema component strategy.""" data_type = pa.Float() multiindex = pa.MultiIndex(indexes=[ pa.Index(data_type, unique=True, name="level_0"), pa.Index(data_type, nullable=True), pa.Index(data_type), ]) strat = multiindex.strategy(size=10) example = data.draw(strat) for i in range(example.nlevels): actual_data_type = pandas_engine.Engine.dtype( example.get_level_values(i).dtype) assert data_type.check(actual_data_type) with pytest.raises(pa.errors.BaseStrategyOnlyError): strategies.multiindex_strategy( data_type, strategies.pandas_dtype_strategy(data_type))
def test_unsupported_pandas_dtype_strategy(pdtype): """Test unsupported pandas dtype strategy raises error.""" with pytest.raises(TypeError, match=TYPE_ERROR_FMT.format(pdtype.name)): strategies.pandas_dtype_strategy(pdtype)
def test_unsupported_pandas_dtype_strategy(data_type): """Test unsupported pandas dtype strategy raises error.""" with pytest.raises(TypeError, match=r"is currently unsupported"): strategies.pandas_dtype_strategy(data_type)