def parameterized_expectation_suite(): fixture_path = file_relative_path( __file__, "../test_fixtures/expectation_suites/parameterized_expression_expectation_suite_fixture.json", ) with open(fixture_path, ) as suite: return expectationSuiteSchema.load(json.load(suite))
def titanic_dataset_profiler_expectations(): with open( file_relative_path( __file__, "./fixtures/BasicDatasetProfiler_expectations.json"), ) as infile: return expectationSuiteSchema.load( json.load(fp=infile, object_pairs_hook=OrderedDict))
def titanic_profiled_expectations_1(empty_data_context_stats_enabled): context: DataContext = empty_data_context_stats_enabled with open( file_relative_path( __file__, "./fixtures/BasicDatasetProfiler_expectations.json"), ) as infile: expectation_suite_dict: dict = expectationSuiteSchema.load( json.load(infile)) return ExpectationSuite(**expectation_suite_dict, data_context=context)
def titanic_dataset_profiler_expectations(empty_data_context_module_scoped): context: DataContext = empty_data_context_module_scoped with open( file_relative_path( __file__, "./fixtures/BasicDatasetProfiler_expectations.json"), ) as infile: expectations_dict: dict = expectationSuiteSchema.load( json.load(fp=infile, object_pairs_hook=OrderedDict)) return ExpectationSuite(**expectations_dict, data_context=context)
def parameterized_expectation_suite(empty_data_context_stats_enabled): context: DataContext = empty_data_context_stats_enabled fixture_path = file_relative_path( __file__, "../test_fixtures/expectation_suites/parameterized_expression_expectation_suite_fixture.json", ) with open(fixture_path, ) as suite: expectation_suite_dict: dict = expectationSuiteSchema.load( json.load(suite)) return ExpectationSuite(**expectation_suite_dict, data_context=context)
def titanic_dataset_profiler_expectations_with_distribution(): with open( file_relative_path( __file__, "./fixtures/BasicDatasetProfiler_expectations_with_distribution.json", ), ) as infile: return expectationSuiteSchema.load( json.load(fp=infile, encoding="utf-8", object_pairs_hook=OrderedDict))
def test_ge_pandas_sampling(empty_data_context): context: DataContext = empty_data_context df = ge.dataset.PandasDataset({ "A": [1, 2, 3, 4], "B": [5, 6, 7, 8], "C": ["a", "b", "c", "d"], "D": ["e", "f", "g", "h"], }) # Put some simple expectations on the data frame df.profile(profiler=ColumnsExistProfiler) df.expect_column_values_to_be_in_set("A", [1, 2, 3, 4]) df.expect_column_values_to_be_in_set("B", [5, 6, 7, 8]) df.expect_column_values_to_be_in_set("C", ["a", "b", "c", "d"]) df.expect_column_values_to_be_in_set("D", ["e", "f", "g", "h"]) exp1 = df.get_expectation_suite().expectations # The sampled data frame should: # # 1. Be a ge.dataset.PandaDataSet # 2. Inherit ALL the expectations of the parent data frame samp1 = df.sample(n=2) assert isinstance(samp1, ge.dataset.PandasDataset) assert samp1.get_expectation_suite().expectations == exp1 samp1 = df.sample(frac=0.25, replace=True) assert isinstance(samp1, ge.dataset.PandasDataset) assert samp1.get_expectation_suite().expectations == exp1 # Change expectation on column "D", sample, and check expectations. # The failing expectation on column "D" is NOT automatically dropped # in the sample. df.expect_column_values_to_be_in_set("D", ["e", "f", "g", "x"]) samp1 = df.sample(n=2) exp1_dict: dict = expectationSuiteSchema.load({ "expectation_suite_name": "test", "expectations": [ { "expectation_type": "expect_column_to_exist", "kwargs": { "column": "A" }, }, { "expectation_type": "expect_column_to_exist", "kwargs": { "column": "B" }, }, { "expectation_type": "expect_column_to_exist", "kwargs": { "column": "C" }, }, { "expectation_type": "expect_column_to_exist", "kwargs": { "column": "D" }, }, { "expectation_type": "expect_column_values_to_be_in_set", "kwargs": { "column": "A", "value_set": [1, 2, 3, 4] }, }, { "expectation_type": "expect_column_values_to_be_in_set", "kwargs": { "column": "B", "value_set": [5, 6, 7, 8] }, }, { "expectation_type": "expect_column_values_to_be_in_set", "kwargs": { "column": "C", "value_set": ["a", "b", "c", "d"] }, }, { "expectation_type": "expect_column_values_to_be_in_set", "kwargs": { "column": "D", "value_set": ["e", "f", "g", "x"] }, }, ], }) exp1: ExpectationSuite = ExpectationSuite(**exp1_dict, data_context=context) assert (samp1.get_expectation_suite( discard_failed_expectations=False).expectations == exp1.expectations)