Пример #1
0
def profiler_with_unrealistic_expectations(
        dataset: PandasDataset) -> ExpectationSuite:
    # need to create dataframe with corrupted data first
    df = pd.DataFrame()
    df["current_balance"] = [-100]
    df["avg_passenger_count"] = [0]

    other_ds = PandasDataset(df)
    other_ds.expect_column_max_to_be_between("current_balance", -1000, -100)
    other_ds.expect_column_values_to_be_in_set("avg_passenger_count",
                                               value_set={0})

    # this should pass
    other_ds.expect_column_min_to_be_between("avg_passenger_count", 0, 1000)

    return other_ds.get_expectation_suite()
Пример #2
0
def test_pandas_datetime_evaluation_parameter():
    evaluation_parameters = {
        "now": pd.Timestamp.now(),
        "now_minus_48h": pd.Timestamp.now() - pd.to_timedelta(2, unit="d"),
    }

    test_data = {
        "data_refresh": [
            pd.Timestamp.now(),
            (pd.Timestamp.now() - pd.to_timedelta(1, unit="d")),
        ]
    }
    _df = pd.DataFrame(test_data)
    df = PandasDataset(_df)

    for param in evaluation_parameters:
        df.set_evaluation_parameter(param, evaluation_parameters[param])
    df.expect_column_max_to_be_between(
        column="data_refresh", min_value={"$PARAMETER": "now_minus_48h"})

    result = df.validate()

    assert result.success