示例#1
0
def parameterized_expectation_suite():
    fixture_path = file_relative_path(
        __file__,
        "../test_fixtures/expectation_suites/parameterized_expression_expectation_suite_fixture.json",
    )
    with open(fixture_path, ) as suite:
        return expectationSuiteSchema.load(json.load(suite))
示例#2
0
def titanic_dataset_profiler_expectations():
    with open(
            file_relative_path(
                __file__, "./fixtures/BasicDatasetProfiler_expectations.json"),
    ) as infile:
        return expectationSuiteSchema.load(
            json.load(fp=infile, object_pairs_hook=OrderedDict))
def titanic_profiled_expectations_1(empty_data_context_stats_enabled):
    context: DataContext = empty_data_context_stats_enabled
    with open(
            file_relative_path(
                __file__, "./fixtures/BasicDatasetProfiler_expectations.json"),
    ) as infile:
        expectation_suite_dict: dict = expectationSuiteSchema.load(
            json.load(infile))
        return ExpectationSuite(**expectation_suite_dict, data_context=context)
def titanic_dataset_profiler_expectations(empty_data_context_module_scoped):
    context: DataContext = empty_data_context_module_scoped
    with open(
            file_relative_path(
                __file__, "./fixtures/BasicDatasetProfiler_expectations.json"),
    ) as infile:
        expectations_dict: dict = expectationSuiteSchema.load(
            json.load(fp=infile, object_pairs_hook=OrderedDict))
        return ExpectationSuite(**expectations_dict, data_context=context)
示例#5
0
def parameterized_expectation_suite(empty_data_context_stats_enabled):
    context: DataContext = empty_data_context_stats_enabled
    fixture_path = file_relative_path(
        __file__,
        "../test_fixtures/expectation_suites/parameterized_expression_expectation_suite_fixture.json",
    )
    with open(fixture_path, ) as suite:
        expectation_suite_dict: dict = expectationSuiteSchema.load(
            json.load(suite))
        return ExpectationSuite(**expectation_suite_dict, data_context=context)
示例#6
0
def titanic_dataset_profiler_expectations_with_distribution():
    with open(
            file_relative_path(
                __file__,
                "./fixtures/BasicDatasetProfiler_expectations_with_distribution.json",
            ), ) as infile:
        return expectationSuiteSchema.load(
            json.load(fp=infile,
                      encoding="utf-8",
                      object_pairs_hook=OrderedDict))
def test_ge_pandas_sampling(empty_data_context):
    context: DataContext = empty_data_context
    df = ge.dataset.PandasDataset({
        "A": [1, 2, 3, 4],
        "B": [5, 6, 7, 8],
        "C": ["a", "b", "c", "d"],
        "D": ["e", "f", "g", "h"],
    })

    # Put some simple expectations on the data frame
    df.profile(profiler=ColumnsExistProfiler)
    df.expect_column_values_to_be_in_set("A", [1, 2, 3, 4])
    df.expect_column_values_to_be_in_set("B", [5, 6, 7, 8])
    df.expect_column_values_to_be_in_set("C", ["a", "b", "c", "d"])
    df.expect_column_values_to_be_in_set("D", ["e", "f", "g", "h"])

    exp1 = df.get_expectation_suite().expectations

    # The sampled data frame should:
    #
    #   1. Be a ge.dataset.PandaDataSet
    #   2. Inherit ALL the expectations of the parent data frame

    samp1 = df.sample(n=2)
    assert isinstance(samp1, ge.dataset.PandasDataset)
    assert samp1.get_expectation_suite().expectations == exp1

    samp1 = df.sample(frac=0.25, replace=True)
    assert isinstance(samp1, ge.dataset.PandasDataset)
    assert samp1.get_expectation_suite().expectations == exp1

    # Change expectation on column "D", sample, and check expectations.
    # The failing expectation on column "D" is NOT automatically dropped
    # in the sample.
    df.expect_column_values_to_be_in_set("D", ["e", "f", "g", "x"])
    samp1 = df.sample(n=2)
    exp1_dict: dict = expectationSuiteSchema.load({
        "expectation_suite_name":
        "test",
        "expectations": [
            {
                "expectation_type": "expect_column_to_exist",
                "kwargs": {
                    "column": "A"
                },
            },
            {
                "expectation_type": "expect_column_to_exist",
                "kwargs": {
                    "column": "B"
                },
            },
            {
                "expectation_type": "expect_column_to_exist",
                "kwargs": {
                    "column": "C"
                },
            },
            {
                "expectation_type": "expect_column_to_exist",
                "kwargs": {
                    "column": "D"
                },
            },
            {
                "expectation_type": "expect_column_values_to_be_in_set",
                "kwargs": {
                    "column": "A",
                    "value_set": [1, 2, 3, 4]
                },
            },
            {
                "expectation_type": "expect_column_values_to_be_in_set",
                "kwargs": {
                    "column": "B",
                    "value_set": [5, 6, 7, 8]
                },
            },
            {
                "expectation_type": "expect_column_values_to_be_in_set",
                "kwargs": {
                    "column": "C",
                    "value_set": ["a", "b", "c", "d"]
                },
            },
            {
                "expectation_type": "expect_column_values_to_be_in_set",
                "kwargs": {
                    "column": "D",
                    "value_set": ["e", "f", "g", "x"]
                },
            },
        ],
    })
    exp1: ExpectationSuite = ExpectationSuite(**exp1_dict,
                                              data_context=context)
    assert (samp1.get_expectation_suite(
        discard_failed_expectations=False).expectations == exp1.expectations)