def test_ValidationsStore_with_InMemoryStoreBackend(): my_store = ValidationsStore( store_backend={ "module_name": "great_expectations.data_context.store", "class_name": "InMemoryStoreBackend", }) with pytest.raises(TypeError): my_store.get("not_a_ValidationResultIdentifier") ns_1 = ValidationResultIdentifier.from_tuple( ("a", "b", "c", "quarantine", "prod-100")) my_store.set(ns_1, ExpectationSuiteValidationResult(success=True)) assert my_store.get(ns_1) == ExpectationSuiteValidationResult( success=True, statistics={}, results=[]) ns_2 = ValidationResultIdentifier.from_tuple( ("a", "b", "c", "quarantine", "prod-200")) my_store.set(ns_2, ExpectationSuiteValidationResult(success=False)) assert my_store.get(ns_2) == ExpectationSuiteValidationResult( success=False, statistics={}, results=[]) assert set(my_store.list_keys()) == { ns_1, ns_2, }
def test_ValidationsStore_with_TupleFileSystemStoreBackend(tmp_path_factory): path = str( tmp_path_factory.mktemp( "test_ValidationResultStore_with_TupleFileSystemStoreBackend__dir") ) project_path = str(tmp_path_factory.mktemp("my_dir")) my_store = ValidationsStore( store_backend={ "module_name": "great_expectations.data_context.store", "class_name": "TupleFilesystemStoreBackend", "base_directory": "my_store/", }, runtime_environment={"root_directory": path}, ) with pytest.raises(TypeError): my_store.get("not_a_ValidationResultIdentifier") ns_1 = ValidationResultIdentifier( expectation_suite_identifier=ExpectationSuiteIdentifier( "asset.quarantine"), run_id="prod-100", batch_identifier="batch_id", ) my_store.set(ns_1, ExpectationSuiteValidationResult(success=True)) assert my_store.get(ns_1) == ExpectationSuiteValidationResult( success=True, statistics={}, results=[]) ns_2 = ValidationResultIdentifier.from_tuple(( "asset", "quarantine", "prod-20", datetime.datetime.now(datetime.timezone.utc), "batch_id", )) my_store.set(ns_2, ExpectationSuiteValidationResult(success=False)) assert my_store.get(ns_2) == ExpectationSuiteValidationResult( success=False, statistics={}, results=[]) print(my_store.list_keys()) assert set(my_store.list_keys()) == { ns_1, ns_2, } print(gen_directory_tree_str(path)) assert (gen_directory_tree_str(path) == """\ test_ValidationResultStore_with_TupleFileSystemStoreBackend__dir0/ my_store/ asset/ quarantine/ prod-100/ 20190926T134241.000000Z/ batch_id.json prod-20/ 20190926T134241.000000Z/ batch_id.json """)
def test_ValidationsStore_with_DatabaseStoreBackend(sa): # Use sqlite so we don't require postgres for this test. connection_kwargs = {"drivername": "sqlite"} # First, demonstrate that we pick up default configuration my_store = ValidationsStore( store_backend={ "class_name": "DatabaseStoreBackend", "credentials": connection_kwargs, } ) with pytest.raises(TypeError): my_store.get("not_a_ValidationResultIdentifier") ns_1 = ValidationResultIdentifier( expectation_suite_identifier=ExpectationSuiteIdentifier( expectation_suite_name="asset.quarantine", ), run_id="20191007T151224.1234Z_prod_100", batch_identifier="batch_id", ) my_store.set(ns_1, ExpectationSuiteValidationResult(success=True)) assert my_store.get(ns_1) == ExpectationSuiteValidationResult( success=True, statistics={}, results=[] ) ns_2 = ValidationResultIdentifier( expectation_suite_identifier=ExpectationSuiteIdentifier( expectation_suite_name="asset.quarantine", ), run_id="20191007T151224.1234Z_prod_200", batch_identifier="batch_id", ) my_store.set(ns_2, ExpectationSuiteValidationResult(success=False)) assert my_store.get(ns_2) == ExpectationSuiteValidationResult( success=False, statistics={}, results=[] ) assert set(my_store.list_keys()) == { ns_1, ns_2, } """ What does this test and why? A Store should be able to report it's store_backend_id which is set when the StoreBackend is instantiated. """ # Check that store_backend_id exists can be read assert my_store.store_backend_id is not None # Check that store_backend_id is a valid UUID assert test_utils.validate_uuid4(my_store.store_backend_id)
def test_ValidationsStore_with_TupleS3StoreBackend(): bucket = "test_validation_store_bucket" prefix = "test/prefix" # create a bucket in Moto's mock AWS environment conn = boto3.resource("s3", region_name="us-east-1") conn.create_bucket(Bucket=bucket) # First, demonstrate that we pick up default configuration including from an S3TupleS3StoreBackend my_store = ValidationsStore(store_backend={ "class_name": "TupleS3StoreBackend", "bucket": bucket, "prefix": prefix, }) with pytest.raises(TypeError): my_store.get("not_a_ValidationResultIdentifier") ns_1 = ValidationResultIdentifier( expectation_suite_identifier=ExpectationSuiteIdentifier( expectation_suite_name="asset.quarantine", ), run_id="20191007T151224.1234Z_prod_100", batch_identifier="batch_id", ) my_store.set(ns_1, ExpectationSuiteValidationResult(success=True)) assert my_store.get(ns_1) == ExpectationSuiteValidationResult( success=True, statistics={}, results=[]) ns_2 = ValidationResultIdentifier( expectation_suite_identifier=ExpectationSuiteIdentifier( expectation_suite_name="asset.quarantine", ), run_id="20191007T151224.1234Z_prod_200", batch_identifier="batch_id", ) my_store.set(ns_2, ExpectationSuiteValidationResult(success=False)) assert my_store.get(ns_2) == ExpectationSuiteValidationResult( success=False, statistics={}, results=[]) # Verify that internals are working as expected, including the default filepath assert { s3_object_info["Key"] for s3_object_info in boto3.client("s3").list_objects_v2( Bucket=bucket, Prefix=prefix)["Contents"] } == { "test/prefix/asset/quarantine/20191007T151224.1234Z_prod_100/20190926T134241.000000Z/batch_id.json", "test/prefix/asset/quarantine/20191007T151224.1234Z_prod_200/20190926T134241.000000Z/batch_id.json", } print(my_store.list_keys()) assert set(my_store.list_keys()) == { ns_1, ns_2, }
def test_ValidationsStore_with_InMemoryStoreBackend(): my_store = ValidationsStore( store_backend={ "module_name": "great_expectations.data_context.store", "class_name": "InMemoryStoreBackend", } ) with pytest.raises(TypeError): my_store.get("not_a_ValidationResultIdentifier") ns_1 = ValidationResultIdentifier.from_tuple( ( "a", "b", "c", "quarantine", datetime.datetime.now(datetime.timezone.utc), "prod-100", ) ) my_store.set(ns_1, ExpectationSuiteValidationResult(success=True)) assert my_store.get(ns_1) == ExpectationSuiteValidationResult( success=True, statistics={}, results=[] ) ns_2 = ValidationResultIdentifier.from_tuple( ( "a", "b", "c", "quarantine", datetime.datetime.now(datetime.timezone.utc), "prod-200", ) ) my_store.set(ns_2, ExpectationSuiteValidationResult(success=False)) assert my_store.get(ns_2) == ExpectationSuiteValidationResult( success=False, statistics={}, results=[] ) assert set(my_store.list_keys()) == { ns_1, ns_2, } """ What does this test and why? A Store should be able to report it's store_backend_id which is set when the StoreBackend is instantiated. """ # Check that store_backend_id exists can be read assert my_store.store_backend_id is not None # Check that store_backend_id is a valid UUID assert test_utils.validate_uuid4(my_store.store_backend_id)
def test_StoreAction(): fake_in_memory_store = ValidationsStore( store_backend={"class_name": "InMemoryStoreBackend",} ) stores = {"fake_in_memory_store": fake_in_memory_store} class Object: pass data_context = Object() data_context.stores = stores action = StoreValidationResultAction( data_context=data_context, target_store_name="fake_in_memory_store", ) assert fake_in_memory_store.list_keys() == [] action.run( validation_result_suite_identifier=ValidationResultIdentifier( expectation_suite_identifier=ExpectationSuiteIdentifier( expectation_suite_name="default_expectations" ), run_id="prod_20190801", batch_identifier="1234", ), validation_result_suite=ExpectationSuiteValidationResult( success=False, results=[] ), data_asset=None, ) expected_run_id = RunIdentifier( run_name="prod_20190801", run_time="20190926T134241.000000Z" ) assert len(fake_in_memory_store.list_keys()) == 1 stored_identifier = fake_in_memory_store.list_keys()[0] assert stored_identifier.batch_identifier == "1234" assert ( stored_identifier.expectation_suite_identifier.expectation_suite_name == "default_expectations" ) assert stored_identifier.run_id == expected_run_id assert fake_in_memory_store.get( ValidationResultIdentifier( expectation_suite_identifier=ExpectationSuiteIdentifier( expectation_suite_name="default_expectations" ), run_id=expected_run_id, batch_identifier="1234", ) ) == ExpectationSuiteValidationResult(success=False, results=[])
def test_SlackNotificationAction(data_context): renderer = { "module_name": "great_expectations.render.renderer.slack_renderer", "class_name": "SlackRenderer", } slack_webhook = "https://hooks.slack.com/services/test/slack/webhook" notify_on = "all" slack_action = SlackNotificationAction( data_context=data_context, renderer=renderer, slack_webhook=slack_webhook, notify_on=notify_on ) validation_result_suite = ExpectationSuiteValidationResult(results=[], success=True, statistics={'evaluated_expectations': 0, 'successful_expectations': 0, 'unsuccessful_expectations': 0, 'success_percent': None}, meta={ 'great_expectations.__version__': 'v0.8.0__develop', 'expectation_suite_name': 'asset.default', 'run_id': 'test_100'}) validation_result_suite_id = ValidationResultIdentifier(expectation_suite_identifier=ExpectationSuiteIdentifier( "asset.default"), run_id="test_100", batch_identifier="1234") # TODO: improve this test - currently it is verifying a failed call to Slack assert slack_action.run( validation_result_suite_identifier=validation_result_suite_id, validation_result_suite=validation_result_suite, data_asset=None ) == None
def test_StoreMetricsAction_column_metric(basic_in_memory_data_context_for_validation_operator): action = StoreMetricsAction( data_context=basic_in_memory_data_context_for_validation_operator, requested_metrics={ "*": [ { "column": { "provider_id": ["expect_column_values_to_be_unique.result.unexpected_count"] } }, "statistics.evaluated_expectations", "statistics.successful_expectations" ] }, target_store_name="metrics_store" ) validation_result = ExpectationSuiteValidationResult( success=False, meta={ "expectation_suite_name": "foo", "run_id": "bar" }, results=[ ExpectationValidationResult( meta={}, result={ "element_count": 10, "missing_count": 0, "missing_percent": 0.0, "unexpected_count": 7, "unexpected_percent": 0.0, "unexpected_percent_nonmissing": 0.0, "partial_unexpected_list": [] }, success=True, expectation_config=ExpectationConfiguration( expectation_type="expect_column_values_to_be_unique", kwargs={ "column": "provider_id", "result_format": "BASIC" } ), exception_info=None ) ], statistics={ "evaluated_expectations": 5, "successful_expectations": 3 } ) action.run(validation_result, ValidationResultIdentifier.from_object(validation_result), data_asset=None) assert basic_in_memory_data_context_for_validation_operator.stores["metrics_store"].get(ValidationMetricIdentifier( run_id="bar", expectation_suite_identifier=ExpectationSuiteIdentifier("foo"), metric_name="expect_column_values_to_be_unique.result.unexpected_count", metric_kwargs_id="column=provider_id" )) == 7
def test_SlackRenderer(): validation_result_suite = ExpectationSuiteValidationResult( results=[], success=True, statistics={ 'evaluated_expectations': 0, 'successful_expectations': 0, 'unsuccessful_expectations': 0, 'success_percent': None }, meta={ 'great_expectations.__version__': 'v0.8.0__develop', 'data_asset_name': { 'datasource': 'x', 'generator': 'y', 'generator_asset': 'z' }, 'expectation_suite_name': 'default', 'run_id': '2019-09-25T060538.829112Z' }) rendered_output = SlackRenderer().render(validation_result_suite) print(rendered_output) expected_renderer_output = { 'blocks': [{ 'type': 'section', 'text': { 'type': 'mrkdwn', 'text': '*Batch Validation Status*: Success :tada:\n*Expectation suite name*: `default`\n*Run ID*: `2019-09-25T060538.829112Z`\n*Batch ID*: `None`\n*Timestamp*: `09/24/2019 23:18:36`\n*Summary*: *0* of *0* expectations were met' } }, { 'type': 'divider' }, { 'type': 'context', 'elements': [{ 'type': 'mrkdwn', 'text': 'Learn how to review validation results: https://docs.greatexpectations.io/en/latest/features/validation.html#reviewing-validation-results' }] }], 'text': 'default: Success :tada:' } # We're okay with system variation in locales (OS X likes 24 hour, but not Travis) expected_renderer_output['blocks'][0]['text']['text'] = \ expected_renderer_output['blocks'][0]['text']['text'].replace('09/24/2019 11:18:36 PM', 'LOCALEDATE') expected_renderer_output['blocks'][0]['text']['text'] = \ expected_renderer_output['blocks'][0]['text']['text'].replace('09/24/2019 23:18:36', 'LOCALEDATE') rendered_output['blocks'][0]['text']['text'] = \ rendered_output['blocks'][0]['text']['text'].replace('09/24/2019 11:18:36 PM', 'LOCALEDATE') rendered_output['blocks'][0]['text']['text'] = \ rendered_output['blocks'][0]['text']['text'].replace('09/24/2019 23:18:36', 'LOCALEDATE') assert rendered_output == expected_renderer_output
def test_OpsgenieRenderer_validation_results_success(): validation_result_suite = ExpectationSuiteValidationResult( results=[], success=True, statistics={ "evaluated_expectations": 0, "successful_expectations": 0, "unsuccessful_expectations": 0, "success_percent": None, }, meta={ "great_expectations_version": "v0.12.2__develop", "batch_kwargs": { "data_asset_name": "x/y/z" }, "data_asset_name": { "datasource": "x", "generator": "y", "generator_asset": "z", }, "expectation_suite_name": "default", "run_id": "2021-01-01T000000.000000Z", }, ) rendered_output = OpsgenieRenderer().render(validation_result_suite) expected_output = "Batch Validation Status: Success 🎉\nExpectation suite name: default\nData asset name: x/y/z\nRun ID: 2021-01-01T000000.000000Z\nBatch ID: data_asset_name=x/y/z\nSummary: 0 of 0 expectations were met" assert rendered_output == expected_output
def test_render_DefaultJinjaPageView_meta_info(): validation_results = ExpectationSuiteValidationResult( **{ "results": [], "statistics": { "evaluated_expectations": 156, "successful_expectations": 139, "unsuccessful_expectations": 17, "success_percent": 89.1025641025641, }, "meta": { "great_expectations.__version__": "0.7.0-beta", "data_asset_name": "datasource/generator/tetanusvaricella", "expectation_suite_name": "my_suite", "run_id": "2019-06-25T14:58:09.960521", "batch_kwargs": { "path": "/Users/user/project_data/public_healthcare_datasets/tetanusvaricella/tetvardata.csv", "timestamp": 1561474688.693565, }, }, }) document = ProfilingResultsPageRenderer().render(validation_results) html = DefaultJinjaPageView().render(document) with open( file_relative_path( __file__, "./output/test_render_DefaultJinjaPageView_meta_info.html)"), "w", ) as outfile: outfile.write(html)
def _run( self, validation_result_suite: ExpectationSuiteValidationResult, validation_result_suite_identifier: ValidationResultIdentifier, expectation_suite_identifier=None, checkpoint_identifier=None, data_asset=None, **kwargs, ) -> str: logger.debug("SNSNotificationAction.run") if validation_result_suite is None: logger.warning( f"No validation_result_suite was passed to {type(self).__name__} action. Skipping action. " ) if self.sns_message_subject is None: logger.warning( f"No message subject was passed checking for expectation_suite_name" ) if expectation_suite_identifier is None: subject = validation_result_suite_identifier.run_id logger.warning( f"No expectation_suite_identifier was passed. Defaulting to validation run_id: {subject}." ) else: subject = expectation_suite_identifier.expectation_suite_name logger.info(f"Using expectation_suite_name: {subject}") else: subject = self.sns_message_subject return send_sns_notification(self.sns_topic_arn, subject, validation_result_suite.__str__(), **kwargs)
def test_ProfilingResultsOverviewSectionRenderer_empty_type_list(): # This rather specific test is a reaction to the error documented in #679 validation = ExpectationSuiteValidationResult( results=[ ExpectationValidationResult( success=True, result={ 'observed_value': "VARIANT", # Note this is NOT a recognized type by many backends }, exception_info={ 'raised_exception': False, 'exception_message': None, 'exception_traceback': None }, expectation_config=ExpectationConfiguration( expectation_type='expect_column_values_to_be_in_type_list', kwargs={ 'column': 'live', 'type_list': None, 'result_format': 'SUMMARY' }, meta={'BasicDatasetProfiler': {'confidence': 'very low'}} ) ) ] ) result = ProfilingResultsOverviewSectionRenderer().render(validation) # Find the variable types content block: types_table = [ block.table for block in result.content_blocks if block.content_block_type == "table" and block.header.string_template["template"] == "Variable types" ][0] assert ["unknown", "1"] in types_table
def test_ValidationsStore_with_DatabaseStoreBackend(sa): # Use sqlite so we don't require postgres for this test. connection_kwargs = {"drivername": "sqlite"} # First, demonstrate that we pick up default configuration my_store = ValidationsStore( store_backend={ "class_name": "DatabaseStoreBackend", "credentials": connection_kwargs, } ) with pytest.raises(TypeError): my_store.get("not_a_ValidationResultIdentifier") ns_1 = ValidationResultIdentifier( expectation_suite_identifier=ExpectationSuiteIdentifier( expectation_suite_name="asset.quarantine", ), run_id="20191007T151224.1234Z_prod_100", batch_identifier="batch_id", ) my_store.set(ns_1, ExpectationSuiteValidationResult(success=True)) assert my_store.get(ns_1) == ExpectationSuiteValidationResult( success=True, statistics={}, results=[] ) ns_2 = ValidationResultIdentifier( expectation_suite_identifier=ExpectationSuiteIdentifier( expectation_suite_name="asset.quarantine", ), run_id="20191007T151224.1234Z_prod_200", batch_identifier="batch_id", ) my_store.set(ns_2, ExpectationSuiteValidationResult(success=False)) assert my_store.get(ns_2) == ExpectationSuiteValidationResult( success=False, statistics={}, results=[] ) assert set(my_store.list_keys()) == { ns_1, ns_2, }
def validation_result_suite(): return ExpectationSuiteValidationResult( results=[], success=True, statistics={ "evaluated_expectations": 0, "successful_expectations": 0, "unsuccessful_expectations": 0, "success_percent": None, }, meta={ "great_expectations_version": "v0.8.0__develop", "expectation_suite_name": "asset.default", "run_id": "test_100", }, )
def test_send_sns_notification(sns, aws_credentials): results = { "success": True, "results": { "observed_value": 5.0, "element_count": 5, "missing_count": None, "missing_percent": None, }, } result = ExpectationSuiteValidationResult(**results) topic = "test" created = sns.create_topic(Name=topic) response = send_sns_notification(created.get("TopicArn"), str(result.success), str(result.results)) assert response.startswith("Successfully")
def test_SlackNotificationAction(data_context_parameterized_expectation_suite): renderer = { "module_name": "great_expectations.render.renderer.slack_renderer", "class_name": "SlackRenderer", } slack_webhook = "https://hooks.slack.com/services/test/slack/webhook" notify_on = "all" slack_action = SlackNotificationAction( data_context=data_context_parameterized_expectation_suite, renderer=renderer, slack_webhook=slack_webhook, notify_on=notify_on, ) validation_result_suite = ExpectationSuiteValidationResult( results=[], success=True, statistics={ "evaluated_expectations": 0, "successful_expectations": 0, "unsuccessful_expectations": 0, "success_percent": None, }, meta={ "great_expectations_version": "v0.8.0__develop", "expectation_suite_name": "asset.default", "run_id": "test_100", }, ) validation_result_suite_id = ValidationResultIdentifier( expectation_suite_identifier=ExpectationSuiteIdentifier( "asset.default"), run_id="test_100", batch_identifier="1234", ) # TODO: improve this test - currently it is verifying a failed call to Slack. It returns a "empty" payload assert slack_action.run( validation_result_suite_identifier=validation_result_suite_id, validation_result_suite=validation_result_suite, data_asset=None, ) == { "slack_notification_result": None }
def test_MicrosoftTeams_validation_results_with_datadocs(): validation_result_suite = ExpectationSuiteValidationResult( results=[], success=True, statistics={ "evaluated_expectations": 0, "successful_expectations": 0, "unsuccessful_expectations": 0, "success_percent": None, }, meta={ "great_expectations_version": "v0.8.0__develop", "expectation_suite_name": "asset.default", "run_id": "test_100", }, ) validation_result_suite_identifier = ValidationResultIdentifier( expectation_suite_identifier=ExpectationSuiteIdentifier( "asset.default"), run_id=RunIdentifier(run_name="test_100", run_time="Tue May 08 15:14:45 +0800 2012"), batch_identifier=BatchIdentifier(batch_identifier="1234", data_asset_name="asset"), ) data_docs_pages = {"local_site": "file:///localsite/index.html"} rendered_output = MicrosoftTeamsRenderer().render( validation_result_suite, validation_result_suite_identifier, data_docs_pages) expected_output = { "attachments": [{ "content": { "$schema": "http://adaptivecards.io/schemas/adaptive-card.json", "actions": [{ "title": "Open data docs", "type": "Action.OpenUrl", "url": "file:///localsite/index.html", }], "body": [ { "height": "auto", "items": [{ "columns": [{ "items": [ { "size": "large", "text": "Validation " "results", "type": "TextBlock", "weight": "bolder", "wrap": True, }, { "isSubtle": True, "spacing": "none", "text": "May " "08 " "2012 " "07:14:45", "type": "TextBlock", "wrap": True, }, ], "type": "Column", "width": "stretch", }], "type": "ColumnSet", }], "separator": True, "type": "Container", }, { "height": "auto", "items": [ { "color": "good", "horizontalAlignment": "left", "text": "**Batch validation " "status:** Success " "!!!", "type": "TextBlock", }, { "horizontalAlignment": "left", "text": "**Data asset " "name:** asset", "type": "TextBlock", }, { "horizontalAlignment": "left", "text": "**Expectation " "suite name:** " "asset.default", "type": "TextBlock", }, { "horizontalAlignment": "left", "text": "**Run name:** " "test_100", "type": "TextBlock", }, { "horizontalAlignment": "left", "text": "**Batch ID:** 1234", "type": "TextBlock", }, { "horizontalAlignment": "left", "text": "**Summary:** *0* " "of *0* " "expectations were " "met", "type": "TextBlock", }, ], "separator": True, "type": "Container", }, ], "type": "AdaptiveCard", "version": "1.0", }, "contentType": "application/vnd.microsoft.card.adaptive", }], "type": "message", } assert rendered_output == expected_output
def test_SlackRenderer_validation_results_with_datadocs(): validation_result_suite = ExpectationSuiteValidationResult( results=[], success=True, statistics={ "evaluated_expectations": 0, "successful_expectations": 0, "unsuccessful_expectations": 0, "success_percent": None, }, meta={ "great_expectations_version": "v0.8.0__develop", "data_asset_name": { "datasource": "x", "generator": "y", "generator_asset": "z", }, "expectation_suite_name": "default", "run_id": "2019-09-25T060538.829112Z", }, ) rendered_output = SlackRenderer().render(validation_result_suite) expected_output = { "blocks": [ { "type": "section", "text": { "type": "mrkdwn", "text": "*Batch Validation Status*: Success :tada:\n*Expectation suite name*: `default`\n*Run ID*: `2019-09-25T060538.829112Z`\n*Batch ID*: `None`\n*Summary*: *0* of *0* expectations were met", }, }, {"type": "divider"}, { "type": "context", "elements": [ { "type": "mrkdwn", "text": "Learn how to review validation results in Data Docs: https://docs.greatexpectations.io/en/latest/guides/tutorials/getting_started/set_up_data_docs.html", } ], }, ], "text": "default: Success :tada:", } assert rendered_output == expected_output data_docs_pages = {"local_site": "file:///localsite/index.html"} notify_with = ["local_site"] rendered_output = SlackRenderer().render( validation_result_suite, data_docs_pages, notify_with ) expected_output = { "blocks": [ { "type": "section", "text": { "type": "mrkdwn", "text": "*Batch Validation Status*: Success :tada:\n*Expectation suite name*: `default`\n*Run ID*: `2019-09-25T060538.829112Z`\n*Batch ID*: `None`\n*Summary*: *0* of *0* expectations were met", }, }, { "type": "section", "text": { "type": "mrkdwn", "text": "*DataDocs* can be found here: `file:///localsite/index.html` \n (Please copy and paste link into a browser to view)\n", }, }, {"type": "divider"}, { "type": "context", "elements": [ { "type": "mrkdwn", "text": "Learn how to review validation results in Data Docs: https://docs.greatexpectations.io/en/latest/guides/tutorials/getting_started/set_up_data_docs.html", } ], }, ], "text": "default: Success :tada:", } assert rendered_output == expected_output # not configured notify_with = ["fake_site"] rendered_output = SlackRenderer().render( validation_result_suite, data_docs_pages, notify_with ) expected_output = { "blocks": [ { "type": "section", "text": { "type": "mrkdwn", "text": "*Batch Validation Status*: Success :tada:\n*Expectation suite name*: `default`\n*Run ID*: `2019-09-25T060538.829112Z`\n*Batch ID*: `None`\n*Summary*: *0* of *0* expectations were met", }, }, { "type": "section", "text": { "type": "mrkdwn", "text": "*ERROR*: Slack is trying to provide a link to the following DataDocs: `fake_site`, but it is not configured under `data_docs_sites` in the `great_expectations.yml`\n", }, }, {"type": "divider"}, { "type": "context", "elements": [ { "type": "mrkdwn", "text": "Learn how to review validation results in Data Docs: https://docs.greatexpectations.io/en/latest/guides/tutorials/getting_started/set_up_data_docs.html", } ], }, ], "text": "default: Success :tada:", } assert rendered_output == expected_output
def test_StoreMetricsAction( basic_in_memory_data_context_for_validation_operator): action = StoreMetricsAction( data_context=basic_in_memory_data_context_for_validation_operator, requested_metrics={ "*": [ "statistics.evaluated_expectations", "statistics.successful_expectations", ] }, target_store_name="metrics_store", ) run_id = RunIdentifier(run_name="bar") validation_result = ExpectationSuiteValidationResult( success=False, meta={ "expectation_suite_name": "foo", "run_id": run_id }, statistics={ "evaluated_expectations": 5, "successful_expectations": 3 }, ) # Run the action and store our metrics action.run( validation_result, ValidationResultIdentifier.from_object(validation_result), data_asset=None, ) validation_result = ExpectationSuiteValidationResult( success=False, meta={ "expectation_suite_name": "foo.warning", "run_id": run_id }, statistics={ "evaluated_expectations": 8, "successful_expectations": 4 }, ) action.run( validation_result, ValidationResultIdentifier.from_object(validation_result), data_asset=None, ) assert ( basic_in_memory_data_context_for_validation_operator. stores["metrics_store"].get( ValidationMetricIdentifier( run_id=run_id, data_asset_name=None, expectation_suite_identifier=ExpectationSuiteIdentifier("foo"), metric_name="statistics.evaluated_expectations", metric_kwargs_id=None, )) == 5) assert ( basic_in_memory_data_context_for_validation_operator. stores["metrics_store"].get( ValidationMetricIdentifier( run_id=run_id, data_asset_name=None, expectation_suite_identifier=ExpectationSuiteIdentifier("foo"), metric_name="statistics.successful_expectations", metric_kwargs_id=None, )) == 3) assert (basic_in_memory_data_context_for_validation_operator. stores["metrics_store"].get( ValidationMetricIdentifier( run_id=run_id, data_asset_name=None, expectation_suite_identifier=ExpectationSuiteIdentifier( "foo.warning"), metric_name="statistics.evaluated_expectations", metric_kwargs_id=None, )) == 8) assert (basic_in_memory_data_context_for_validation_operator. stores["metrics_store"].get( ValidationMetricIdentifier( run_id=run_id, data_asset_name=None, expectation_suite_identifier=ExpectationSuiteIdentifier( "foo.warning"), metric_name="statistics.successful_expectations", metric_kwargs_id=None, )) == 4)
def test_evaluation_parameter_store_methods(data_context): run_id = "20191125T000000.000000Z" source_patient_data_results = ExpectationSuiteValidationResult( meta={ "expectation_suite_name": "source_patient_data.default", "run_id": run_id }, results=[ ExpectationValidationResult( expectation_config=ExpectationConfiguration( expectation_type="expect_table_row_count_to_equal", kwargs={ "value": 1024, }), success=True, exception_info={ "exception_message": None, "exception_traceback": None, "raised_exception": False }, result={ "observed_value": 1024, "element_count": 1024, "missing_percent": 0.0, "missing_count": 0 }) ], success=True) data_context.store_evaluation_parameters(source_patient_data_results) bound_parameters = data_context.evaluation_parameter_store.get_bind_params( run_id) assert bound_parameters == { 'urn:great_expectations:validations:source_patient_data.default:expect_table_row_count_to_equal.result' '.observed_value': 1024 } source_diabetes_data_results = ExpectationSuiteValidationResult( meta={ "expectation_suite_name": "source_diabetes_data.default", "run_id": run_id }, results=[ ExpectationValidationResult( expectation_config=ExpectationConfiguration( expectation_type= "expect_column_unique_value_count_to_be_between", kwargs={ "column": "patient_nbr", "min": 2048, "max": 2048 }), success=True, exception_info={ "exception_message": None, "exception_traceback": None, "raised_exception": False }, result={ "observed_value": 2048, "element_count": 5000, "missing_percent": 0.0, "missing_count": 0 }) ], success=True) data_context.store_evaluation_parameters(source_diabetes_data_results) bound_parameters = data_context.evaluation_parameter_store.get_bind_params( run_id) assert bound_parameters == { 'urn:great_expectations:validations:source_patient_data.default:expect_table_row_count_to_equal.result' '.observed_value': 1024, 'urn:great_expectations:validations:source_diabetes_data.default' ':expect_column_unique_value_count_to_be_between.result.observed_value:column=patient_nbr': 2048 }
TABLE_NAME = "test_data" # Common validation results table_result = ExpectationValidationResult(success=True, expectation_config=ExpectationConfiguration( expectation_type='expect_table_row_count_to_equal', kwargs={'value': 10}), result={"observed_value": 10}) column_result = ExpectationValidationResult(success=True, expectation_config=ExpectationConfiguration( expectation_type='expect_column_sum_to_be_between', kwargs={'column': 'size', 'min_value': 0, 'max_value': 100} ), result={'observed_value': 60}) result_suite = ExpectationSuiteValidationResult(success=True, meta={'batch_kwargs': {}}, results=[table_result, column_result]) @pytest.fixture(scope='session') def test_db_file(): fd, file = tempfile.mkstemp() conn = sqlite3.connect(file) cursor = conn.cursor() cursor.execute( f'CREATE TABLE {TABLE_NAME} (name text, birthdate text, address text, size integer)') yield file os.remove(file) def test_dataset_from_sql_source(test_db_file, tmpdir): connection_url = f'sqlite:///{test_db_file}'
def test_validate(): with open( file_relative_path(__file__, "./test_sets/titanic_expectations.json")) as f: my_expectation_suite = expectationSuiteSchema.loads(f.read()) with mock.patch("uuid.uuid1") as uuid: uuid.return_value = "1234" my_df = ge.read_csv(file_relative_path(__file__, "./test_sets/Titanic.csv"), expectation_suite=my_expectation_suite) my_df.set_default_expectation_argument("result_format", "COMPLETE") with mock.patch("datetime.datetime") as mock_datetime: mock_datetime.utcnow.return_value = datetime(1955, 11, 5) results = my_df.validate(catch_exceptions=False) with open( file_relative_path( __file__, './test_sets/titanic_expected_data_asset_validate_results.json' )) as f: expected_results = expectationSuiteValidationResultSchema.loads( f.read()) del results.meta["great_expectations.__version__"] assert expected_results == results # Now, change the results and ensure they are no longer equal results.results[0] = ExpectationValidationResult() assert expected_results != results # Finally, confirm that only_return_failures works # and does not affect the "statistics" field. with mock.patch("datetime.datetime") as mock_datetime: mock_datetime.utcnow.return_value = datetime(1955, 11, 5) validation_results = my_df.validate(only_return_failures=True) del validation_results.meta["great_expectations.__version__"] expected_results = ExpectationSuiteValidationResult( meta={ "expectation_suite_name": "titanic", "run_id": "19551105T000000.000000Z", "batch_kwargs": { "ge_batch_id": "1234" }, "batch_markers": {}, "batch_parameters": {} }, results=[ ExpectationValidationResult( expectation_config=ExpectationConfiguration( expectation_type="expect_column_values_to_be_in_set", kwargs={ "column": "PClass", "value_set": ["1st", "2nd", "3rd"] }), success=False, exception_info={ "exception_message": None, "exception_traceback": None, "raised_exception": False }, result={ "partial_unexpected_index_list": [456], "unexpected_count": 1, "unexpected_list": ["*"], "unexpected_percent": 0.07616146230007616, "element_count": 1313, "missing_percent": 0.0, "partial_unexpected_counts": [{ "count": 1, "value": "*" }], "partial_unexpected_list": ["*"], "unexpected_percent_nonmissing": 0.07616146230007616, "missing_count": 0, "unexpected_index_list": [456] }) ], success=expected_results.success, # unaffected statistics=expected_results["statistics"] # unaffected ) assert expected_results == validation_results
def test_SlackRenderer(): validation_result_suite = ExpectationSuiteValidationResult( results=[], success=True, statistics={ "evaluated_expectations": 0, "successful_expectations": 0, "unsuccessful_expectations": 0, "success_percent": None, }, meta={ "great_expectations.__version__": "v0.8.0__develop", "data_asset_name": { "datasource": "x", "generator": "y", "generator_asset": "z", }, "expectation_suite_name": "default", "run_id": "2019-09-25T060538.829112Z", }, ) rendered_output = SlackRenderer().render(validation_result_suite) print(rendered_output) expected_renderer_output = { "blocks": [ { "type": "section", "text": { "type": "mrkdwn", "text": "*Batch Validation Status*: Success :tada:\n*Expectation suite name*: `default`\n*Run ID*: `2019-09-25T060538.829112Z`\n*Batch ID*: `None`\n*Timestamp*: `09/24/2019 23:18:36`\n*Summary*: *0* of *0* expectations were met", }, }, { "type": "divider" }, { "type": "context", "elements": [{ "type": "mrkdwn", "text": "Learn how to review validation results in Data Docs: https://docs.greatexpectations.io/en/latest/tutorials/getting_started/set_up_data_docs.html#_getting_started__set_up_data_docs", }], }, ], "text": "default: Success :tada:", } # We're okay with system variation in locales (OS X likes 24 hour, but not Travis) expected_renderer_output["blocks"][0]["text"][ "text"] = expected_renderer_output["blocks"][0]["text"][ "text"].replace("09/24/2019 11:18:36 PM", "LOCALEDATE") expected_renderer_output["blocks"][0]["text"][ "text"] = expected_renderer_output["blocks"][0]["text"][ "text"].replace("09/24/2019 23:18:36", "LOCALEDATE") rendered_output["blocks"][0]["text"]["text"] = rendered_output["blocks"][ 0]["text"]["text"].replace("09/24/2019 11:18:36 PM UTC", "LOCALEDATE") rendered_output["blocks"][0]["text"]["text"] = rendered_output["blocks"][ 0]["text"]["text"].replace("09/24/2019 23:18:36 UTC", "LOCALEDATE") assert rendered_output == expected_renderer_output
def test_ValidationsStore_with_TupleFileSystemStoreBackend(tmp_path_factory): path = str( tmp_path_factory.mktemp( "test_ValidationResultStore_with_TupleFileSystemStoreBackend__dir" ) ) project_path = str(tmp_path_factory.mktemp("my_dir")) my_store = ValidationsStore( store_backend={ "module_name": "great_expectations.data_context.store", "class_name": "TupleFilesystemStoreBackend", "base_directory": "my_store/", }, runtime_environment={"root_directory": path}, ) with pytest.raises(TypeError): my_store.get("not_a_ValidationResultIdentifier") ns_1 = ValidationResultIdentifier( expectation_suite_identifier=ExpectationSuiteIdentifier("asset.quarantine"), run_id="prod-100", batch_identifier="batch_id", ) my_store.set(ns_1, ExpectationSuiteValidationResult(success=True)) assert my_store.get(ns_1) == ExpectationSuiteValidationResult( success=True, statistics={}, results=[] ) ns_2 = ValidationResultIdentifier.from_tuple( ( "asset", "quarantine", "prod-20", datetime.datetime.now(datetime.timezone.utc), "batch_id", ) ) my_store.set(ns_2, ExpectationSuiteValidationResult(success=False)) assert my_store.get(ns_2) == ExpectationSuiteValidationResult( success=False, statistics={}, results=[] ) print(my_store.list_keys()) assert set(my_store.list_keys()) == { ns_1, ns_2, } print(gen_directory_tree_str(path)) assert ( gen_directory_tree_str(path) == """\ test_ValidationResultStore_with_TupleFileSystemStoreBackend__dir0/ my_store/ .ge_store_backend_id asset/ quarantine/ prod-100/ 20190926T134241.000000Z/ batch_id.json prod-20/ 20190926T134241.000000Z/ batch_id.json """ ) """ What does this test and why? A Store should be able to report it's store_backend_id which is set when the StoreBackend is instantiated. """ # Check that store_backend_id exists can be read assert my_store.store_backend_id is not None # Check that store_backend_id is a valid UUID assert test_utils.validate_uuid4(my_store.store_backend_id) # Check that another store with the same configuration shares the same store_backend_id my_store_duplicate = ValidationsStore( store_backend={ "module_name": "great_expectations.data_context.store", "class_name": "TupleFilesystemStoreBackend", "base_directory": "my_store/", }, runtime_environment={"root_directory": path}, ) assert my_store.store_backend_id == my_store_duplicate.store_backend_id