def profiler_with_unrealistic_expectations( dataset: PandasDataset) -> ExpectationSuite: # need to create dataframe with corrupted data first df = pd.DataFrame() df["current_balance"] = [-100] df["avg_passenger_count"] = [0] other_ds = PandasDataset(df) other_ds.expect_column_max_to_be_between("current_balance", -1000, -100) other_ds.expect_column_values_to_be_in_set("avg_passenger_count", value_set={0}) # this should pass other_ds.expect_column_min_to_be_between("avg_passenger_count", 0, 1000) return other_ds.get_expectation_suite()
def test_pandas_datetime_evaluation_parameter(): evaluation_parameters = { "now": pd.Timestamp.now(), "now_minus_48h": pd.Timestamp.now() - pd.to_timedelta(2, unit="d"), } test_data = { "data_refresh": [ pd.Timestamp.now(), (pd.Timestamp.now() - pd.to_timedelta(1, unit="d")), ] } _df = pd.DataFrame(test_data) df = PandasDataset(_df) for param in evaluation_parameters: df.set_evaluation_parameter(param, evaluation_parameters[param]) df.expect_column_max_to_be_between( column="data_refresh", min_value={"$PARAMETER": "now_minus_48h"}) result = df.validate() assert result.success