def titanic_profiler_evrs_with_exception(): with open( file_relative_path( __file__, "./fixtures/BasicDatasetProfiler_evrs_with_exception.json"), ) as infile: return expectationSuiteValidationResultSchema.load(json.load(infile))
def datetime_column_evrs(): """hand-crafted EVRS for datetime columns""" with open( file_relative_path( __file__, "../fixtures/datetime_column_evrs.json")) as infile: return expectationSuiteValidationResultSchema.load( json.load(infile, object_pairs_hook=OrderedDict))
def titanic_profiler_evrs(): with open( file_relative_path( __file__, "./fixtures/BasicDatasetProfiler_evrs.json")) as infile: return expectationSuiteValidationResultSchema.load( json.load(infile, object_pairs_hook=OrderedDict))
def test_snapshot_BasicDatasetProfiler_on_titanic(): """ A snapshot regression test for BasicDatasetProfiler. We are running the profiler on the Titanic dataset and comparing the EVRs to ones retrieved from a previously stored file. """ df = ge.read_csv(file_relative_path(__file__, "../test_sets/Titanic.csv")) suite, evrs = df.profile(BasicDatasetProfiler) # Check to make sure BasicDatasetProfiler is adding meta.columns with a single "description" field for each column assert "columns" in suite.meta for k, v in suite.meta["columns"].items(): assert v == {"description": ""} # Note: the above already produces an EVR; rerunning isn't strictly necessary just for EVRs evrs = df.validate(result_format="SUMMARY") # THIS IS NOT DEAD CODE. UNCOMMENT TO SAVE A SNAPSHOT WHEN UPDATING THIS TEST # with open('tests/test_sets/expected_evrs_BasicDatasetProfiler_on_titanic.json', 'w+') as file: # json.dump(expectationSuiteValidationResultSchema.dump(evrs).data, file, indent=2) # # with open('tests/render/fixtures/BasicDatasetProfiler_evrs.json', 'w+') as file: # json.dump(expectationSuiteValidationResultSchema.dump(evrs).data, file, indent=2) with open( file_relative_path( __file__, "../test_sets/expected_evrs_BasicDatasetProfiler_on_titanic.json" ), "r", ) as file: expected_evrs = expectationSuiteValidationResultSchema.load( json.load(file, object_pairs_hook=OrderedDict)).data # We know that python 2 does not guarantee the order of value_counts, which causes a different # order for items in the partial_unexpected_value_counts list # Remove those before assertions. for result in evrs.results: if "partial_unexpected_counts" in result.result: result.result.pop("partial_unexpected_counts") for result in expected_evrs.results: if "partial_unexpected_counts" in result.result: result.result.pop("partial_unexpected_counts") # Version and RUN-ID will be different del expected_evrs.meta["great_expectations.__version__"] del evrs.meta["great_expectations.__version__"] del expected_evrs.meta["run_id"] del evrs.meta["run_id"] del evrs.meta["batch_kwargs"]["ge_batch_id"] # DISABLE TEST IN PY2 BECAUSE OF ORDER ISSUE AND NEAR-EOL if not PY2: assert expected_evrs == evrs