def save_expectation_suite( self, expectation_suite: ExpectationSuite, expectation_suite_name: Optional[str] = None, overwrite_existing: bool = True, **kwargs: Dict[str, Any], ): """Save the provided expectation suite into the DataContext. Args: expectation_suite: the suite to save expectation_suite_name: the name of this expectation suite. If no name is provided the name will \ be read from the suite overwrite_existing: bool setting whether to overwrite existing ExpectationSuite Returns: None """ if expectation_suite_name is None: key: ExpectationSuiteIdentifier = ExpectationSuiteIdentifier( expectation_suite_name=expectation_suite.expectation_suite_name ) else: expectation_suite.expectation_suite_name = expectation_suite_name key: ExpectationSuiteIdentifier = ExpectationSuiteIdentifier( expectation_suite_name=expectation_suite_name) if self.expectations_store.has_key(key) and not overwrite_existing: raise ge_exceptions.DataContextError( "expectation_suite with name {} already exists. If you would like to overwrite this " "expectation_suite, set overwrite_existing=True.".format( expectation_suite_name)) self._evaluation_parameter_dependencies_compiled = False return self.expectations_store.set(key, expectation_suite, **kwargs)
def test_ExpectationsStore_with_DatabaseStoreBackend(): # Use sqlite so we don't require postgres for this test. connection_kwargs = {"drivername": "sqlite"} # First, demonstrate that we pick up default configuration my_store = ExpectationsStore(store_backend={ "class_name": "DatabaseStoreBackend", "credentials": connection_kwargs }) with pytest.raises(TypeError): my_store.get("not_a_ExpectationSuiteIdentifier") ns_1 = ExpectationSuiteIdentifier.from_tuple(tuple("a.b.c.warning")) my_store.set(ns_1, ExpectationSuite(expectation_suite_name="a.b.c.warning")) assert my_store.get(ns_1) == ExpectationSuite( expectation_suite_name="a.b.c.warning") ns_2 = ExpectationSuiteIdentifier.from_tuple(tuple("a.b.c.failure")) my_store.set(ns_2, ExpectationSuite(expectation_suite_name="a.b.c.failure")) assert my_store.get(ns_2) == ExpectationSuite( expectation_suite_name="a.b.c.failure") assert set(my_store.list_keys()) == { ns_1, ns_2, }
def test_HtmlSiteStore_S3_backend(): bucket = "test_validation_store_bucket" prefix = "test/prefix" # create a bucket in Moto's mock AWS environment conn = boto3.resource("s3", region_name="us-east-1") conn.create_bucket(Bucket=bucket) my_store = HtmlSiteStore(store_backend={ "class_name": "TupleS3StoreBackend", "bucket": bucket, "prefix": prefix, }) with pytest.raises(TypeError): my_store.get("not_a_ValidationResultIdentifier") ns_1 = SiteSectionIdentifier( site_section_name="validations", resource_identifier=ValidationResultIdentifier( expectation_suite_identifier=ExpectationSuiteIdentifier( expectation_suite_name="asset.quarantine", ), run_id="20191007T151224.1234Z_prod_100", batch_identifier="1234", ), ) my_store.set(ns_1, "aaa") ns_2 = SiteSectionIdentifier( site_section_name="expectations", resource_identifier=ExpectationSuiteIdentifier( expectation_suite_name="asset.quarantine", ), ) my_store.set(ns_2, "bbb") assert set(my_store.list_keys()) == { ns_1.resource_identifier, ns_2.resource_identifier, } # This is a special un-store-like method exposed by the HtmlSiteStore my_store.write_index_page("index_html_string_content") # Verify that internals are working as expected, including the default filepath # paths below should include the batch_parameters assert { s3_object_info["Key"] for s3_object_info in boto3.client("s3").list_objects_v2( Bucket=bucket, Prefix=prefix)["Contents"] } == { "test/prefix/index.html", "test/prefix/expectations/asset/quarantine.html", "test/prefix/validations/asset/quarantine/20191007T151224.1234Z_prod_100/20190926T134241.000000Z/1234.html", } index_content = (boto3.client("s3").get_object( Bucket=bucket, Key="test/prefix/index.html")["Body"].read().decode("utf-8")) assert index_content == "index_html_string_content"
def test_StoreAction(): fake_in_memory_store = ValidationsStore( store_backend={ "class_name": "InMemoryStoreBackend", } ) stores = {"fake_in_memory_store": fake_in_memory_store} class Object: ge_cloud_mode = False data_context = Object() data_context.stores = stores action = StoreValidationResultAction( data_context=data_context, target_store_name="fake_in_memory_store", ) assert fake_in_memory_store.list_keys() == [] action.run( validation_result_suite_identifier=ValidationResultIdentifier( expectation_suite_identifier=ExpectationSuiteIdentifier( expectation_suite_name="default_expectations" ), run_id=RunIdentifier(run_name="prod_20190801"), batch_identifier="1234", ), validation_result_suite=ExpectationSuiteValidationResult( success=False, results=[] ), data_asset=None, ) expected_run_id = RunIdentifier( run_name="prod_20190801", run_time="20190926T134241.000000Z" ) assert len(fake_in_memory_store.list_keys()) == 1 stored_identifier = fake_in_memory_store.list_keys()[0] assert stored_identifier.batch_identifier == "1234" assert ( stored_identifier.expectation_suite_identifier.expectation_suite_name == "default_expectations" ) assert stored_identifier.run_id == expected_run_id assert fake_in_memory_store.get( ValidationResultIdentifier( expectation_suite_identifier=ExpectationSuiteIdentifier( expectation_suite_name="default_expectations" ), run_id=expected_run_id, batch_identifier="1234", ) ) == ExpectationSuiteValidationResult(success=False, results=[])
def test_ValidationsStore_with_TupleS3StoreBackend(): bucket = "test_validation_store_bucket" prefix = "test/prefix" # create a bucket in Moto's mock AWS environment conn = boto3.resource("s3", region_name="us-east-1") conn.create_bucket(Bucket=bucket) # First, demonstrate that we pick up default configuration including from an S3TupleS3StoreBackend my_store = ValidationsStore(store_backend={ "class_name": "TupleS3StoreBackend", "bucket": bucket, "prefix": prefix, }) with pytest.raises(TypeError): my_store.get("not_a_ValidationResultIdentifier") ns_1 = ValidationResultIdentifier( expectation_suite_identifier=ExpectationSuiteIdentifier( expectation_suite_name="asset.quarantine", ), run_id="20191007T151224.1234Z_prod_100", batch_identifier="batch_id", ) my_store.set(ns_1, ExpectationSuiteValidationResult(success=True)) assert my_store.get(ns_1) == ExpectationSuiteValidationResult( success=True, statistics={}, results=[]) ns_2 = ValidationResultIdentifier( expectation_suite_identifier=ExpectationSuiteIdentifier( expectation_suite_name="asset.quarantine", ), run_id="20191007T151224.1234Z_prod_200", batch_identifier="batch_id", ) my_store.set(ns_2, ExpectationSuiteValidationResult(success=False)) assert my_store.get(ns_2) == ExpectationSuiteValidationResult( success=False, statistics={}, results=[]) # Verify that internals are working as expected, including the default filepath assert { s3_object_info["Key"] for s3_object_info in boto3.client("s3").list_objects_v2( Bucket=bucket, Prefix=prefix)["Contents"] } == { "test/prefix/asset/quarantine/20191007T151224.1234Z_prod_100/20190926T134241.000000Z/batch_id.json", "test/prefix/asset/quarantine/20191007T151224.1234Z_prod_200/20190926T134241.000000Z/batch_id.json", } print(my_store.list_keys()) assert set(my_store.list_keys()) == { ns_1, ns_2, }
def test_ValidationsStore_with_DatabaseStoreBackend(sa): # Use sqlite so we don't require postgres for this test. connection_kwargs = {"drivername": "sqlite"} # First, demonstrate that we pick up default configuration my_store = ValidationsStore( store_backend={ "class_name": "DatabaseStoreBackend", "credentials": connection_kwargs, } ) with pytest.raises(TypeError): my_store.get("not_a_ValidationResultIdentifier") ns_1 = ValidationResultIdentifier( expectation_suite_identifier=ExpectationSuiteIdentifier( expectation_suite_name="asset.quarantine", ), run_id="20191007T151224.1234Z_prod_100", batch_identifier="batch_id", ) my_store.set(ns_1, ExpectationSuiteValidationResult(success=True)) assert my_store.get(ns_1) == ExpectationSuiteValidationResult( success=True, statistics={}, results=[] ) ns_2 = ValidationResultIdentifier( expectation_suite_identifier=ExpectationSuiteIdentifier( expectation_suite_name="asset.quarantine", ), run_id="20191007T151224.1234Z_prod_200", batch_identifier="batch_id", ) my_store.set(ns_2, ExpectationSuiteValidationResult(success=False)) assert my_store.get(ns_2) == ExpectationSuiteValidationResult( success=False, statistics={}, results=[] ) assert set(my_store.list_keys()) == { ns_1, ns_2, } """ What does this test and why? A Store should be able to report it's store_backend_id which is set when the StoreBackend is instantiated. """ # Check that store_backend_id exists can be read assert my_store.store_backend_id is not None # Check that store_backend_id is a valid UUID assert test_utils.validate_uuid4(my_store.store_backend_id)
def test_ExpectationsStore_with_DatabaseStoreBackend(sa): # Use sqlite so we don't require postgres for this test. connection_kwargs = {"drivername": "sqlite"} # First, demonstrate that we pick up default configuration my_store = ExpectationsStore(store_backend={ "class_name": "DatabaseStoreBackend", "credentials": connection_kwargs, }) with pytest.raises(TypeError): my_store.get("not_a_ExpectationSuiteIdentifier") # first suite to add to db default_suite = ExpectationSuite( expectation_suite_name="a.b.c.warning", meta={"test_meta_key": "test_meta_value"}, expectations=[], ) ns_1 = ExpectationSuiteIdentifier.from_tuple(tuple("a.b.c.warning")) # initial set and check if first suite exists my_store.set(ns_1, default_suite) assert my_store.get(ns_1) == ExpectationSuite( expectation_suite_name="a.b.c.warning", meta={"test_meta_key": "test_meta_value"}, expectations=[], ) # update suite and check if new value exists updated_suite = ExpectationSuite( expectation_suite_name="a.b.c.warning", meta={"test_meta_key": "test_new_meta_value"}, expectations=[], ) my_store.set(ns_1, updated_suite) assert my_store.get(ns_1) == ExpectationSuite( expectation_suite_name="a.b.c.warning", meta={"test_meta_key": "test_new_meta_value"}, expectations=[], ) ns_2 = ExpectationSuiteIdentifier.from_tuple(tuple("a.b.c.failure")) my_store.set(ns_2, ExpectationSuite(expectation_suite_name="a.b.c.failure")) assert my_store.get(ns_2) == ExpectationSuite( expectation_suite_name="a.b.c.failure") assert set(my_store.list_keys()) == { ns_1, ns_2, }
def __init__( self, run_id, data_asset_name, expectation_suite_identifier, metric_name, metric_kwargs, metric_value, ): super().__init__(metric_name, metric_kwargs, metric_value) if not isinstance(expectation_suite_identifier, ExpectationSuiteIdentifier): expectation_suite_identifier = ExpectationSuiteIdentifier( expectation_suite_name=expectation_suite_identifier) if isinstance(run_id, str): warnings.warn( "String run_ids will be deprecated in the future. Please provide a run_id of type " "RunIdentifier(run_name=None, run_time=None), or a dictionary containing run_name " "and run_time (both optional).", DeprecationWarning, ) try: run_time = parse(run_id) except (ValueError, TypeError): run_time = None run_id = RunIdentifier(run_name=run_id, run_time=run_time) elif isinstance(run_id, dict): run_id = RunIdentifier(**run_id) elif run_id is None: run_id = RunIdentifier() elif not isinstance(run_id, RunIdentifier): run_id = RunIdentifier(run_name=str(run_id)) self._run_id = run_id self._data_asset_name = data_asset_name self._expectation_suite_identifier = expectation_suite_identifier
def validation_result_suite_id(): return ValidationResultIdentifier( expectation_suite_identifier=ExpectationSuiteIdentifier( "asset.default"), run_id=RunIdentifier(run_name="test_100"), batch_identifier="1234", )
def test_StoreMetricsAction_column_metric(basic_in_memory_data_context_for_validation_operator): action = StoreMetricsAction( data_context=basic_in_memory_data_context_for_validation_operator, requested_metrics={ "*": [ { "column": { "provider_id": ["expect_column_values_to_be_unique.result.unexpected_count"] } }, "statistics.evaluated_expectations", "statistics.successful_expectations" ] }, target_store_name="metrics_store" ) validation_result = ExpectationSuiteValidationResult( success=False, meta={ "expectation_suite_name": "foo", "run_id": "bar" }, results=[ ExpectationValidationResult( meta={}, result={ "element_count": 10, "missing_count": 0, "missing_percent": 0.0, "unexpected_count": 7, "unexpected_percent": 0.0, "unexpected_percent_nonmissing": 0.0, "partial_unexpected_list": [] }, success=True, expectation_config=ExpectationConfiguration( expectation_type="expect_column_values_to_be_unique", kwargs={ "column": "provider_id", "result_format": "BASIC" } ), exception_info=None ) ], statistics={ "evaluated_expectations": 5, "successful_expectations": 3 } ) action.run(validation_result, ValidationResultIdentifier.from_object(validation_result), data_asset=None) assert basic_in_memory_data_context_for_validation_operator.stores["metrics_store"].get(ValidationMetricIdentifier( run_id="bar", expectation_suite_identifier=ExpectationSuiteIdentifier("foo"), metric_name="expect_column_values_to_be_unique.result.unexpected_count", metric_kwargs_id="column=provider_id" )) == 7
def prompt_profile_to_create_a_suite( data_context: DataContext, expectation_suite_name: str, ): cli_message( string=""" Great Expectations will create a notebook, containing code cells that select from available columns in your dataset and generate expectations about them to demonstrate some examples of assertions you can make about your data. When you run this notebook, Great Expectations will store these expectations in a new Expectation Suite "{:s}" here: {:s} """.format( expectation_suite_name, data_context.stores[ data_context.expectations_store_name ].store_backend.get_url_for_key( ExpectationSuiteIdentifier( expectation_suite_name=expectation_suite_name ).to_tuple() ), ) ) confirm_proceed_or_exit()
def _add_expectations_to_index_links(self, index_links_dict: OrderedDict, skip_and_clean_missing: bool) -> None: expectations = self.site_section_builders_config.get( "expectations", "None") if expectations and expectations not in FALSEY_YAML_STRINGS: expectation_suite_source_keys = self.data_context.stores[ self.site_section_builders_config["expectations"].get( "source_store_name")].list_keys() expectation_suite_site_keys = [ ExpectationSuiteIdentifier.from_tuple(expectation_suite_tuple) for expectation_suite_tuple in self.target_store. store_backends[ExpectationSuiteIdentifier].list_keys() ] if skip_and_clean_missing: cleaned_keys = [] for expectation_suite_site_key in expectation_suite_site_keys: if expectation_suite_site_key not in expectation_suite_source_keys: self.target_store.store_backends[ ExpectationSuiteIdentifier].remove_key( expectation_suite_site_key) else: cleaned_keys.append(expectation_suite_site_key) expectation_suite_site_keys = cleaned_keys for expectation_suite_key in expectation_suite_site_keys: self.add_resource_info_to_index_links_dict( index_links_dict=index_links_dict, expectation_suite_name=expectation_suite_key. expectation_suite_name, section_name="expectations", )
def test_ValidationsStore_with_TupleFileSystemStoreBackend(tmp_path_factory): path = str( tmp_path_factory.mktemp( "test_ValidationResultStore_with_TupleFileSystemStoreBackend__dir") ) project_path = str(tmp_path_factory.mktemp("my_dir")) my_store = ValidationsStore( store_backend={ "module_name": "great_expectations.data_context.store", "class_name": "TupleFilesystemStoreBackend", "base_directory": "my_store/", }, runtime_environment={"root_directory": path}, ) with pytest.raises(TypeError): my_store.get("not_a_ValidationResultIdentifier") ns_1 = ValidationResultIdentifier( expectation_suite_identifier=ExpectationSuiteIdentifier( "asset.quarantine"), run_id="prod-100", batch_identifier="batch_id", ) my_store.set(ns_1, ExpectationSuiteValidationResult(success=True)) assert my_store.get(ns_1) == ExpectationSuiteValidationResult( success=True, statistics={}, results=[]) ns_2 = ValidationResultIdentifier.from_tuple(( "asset", "quarantine", "prod-20", datetime.datetime.now(datetime.timezone.utc), "batch_id", )) my_store.set(ns_2, ExpectationSuiteValidationResult(success=False)) assert my_store.get(ns_2) == ExpectationSuiteValidationResult( success=False, statistics={}, results=[]) print(my_store.list_keys()) assert set(my_store.list_keys()) == { ns_1, ns_2, } print(gen_directory_tree_str(path)) assert (gen_directory_tree_str(path) == """\ test_ValidationResultStore_with_TupleFileSystemStoreBackend__dir0/ my_store/ asset/ quarantine/ prod-100/ 20190926T134241.000000Z/ batch_id.json prod-20/ 20190926T134241.000000Z/ batch_id.json """)
def validation_result_suite_extended_id(): return ValidationResultIdentifier( expectation_suite_identifier=ExpectationSuiteIdentifier( "asset.default"), run_id=RunIdentifier(run_name="test_100", run_time="Tue May 08 15:14:45 +0800 2012"), batch_identifier=BatchIdentifier(batch_identifier="1234", data_asset_name="asset"), )
def test_ValidationsStore_with_DatabaseStoreBackend(sa): # Use sqlite so we don't require postgres for this test. connection_kwargs = {"drivername": "sqlite"} # First, demonstrate that we pick up default configuration my_store = ValidationsStore( store_backend={ "class_name": "DatabaseStoreBackend", "credentials": connection_kwargs, } ) with pytest.raises(TypeError): my_store.get("not_a_ValidationResultIdentifier") ns_1 = ValidationResultIdentifier( expectation_suite_identifier=ExpectationSuiteIdentifier( expectation_suite_name="asset.quarantine", ), run_id="20191007T151224.1234Z_prod_100", batch_identifier="batch_id", ) my_store.set(ns_1, ExpectationSuiteValidationResult(success=True)) assert my_store.get(ns_1) == ExpectationSuiteValidationResult( success=True, statistics={}, results=[] ) ns_2 = ValidationResultIdentifier( expectation_suite_identifier=ExpectationSuiteIdentifier( expectation_suite_name="asset.quarantine", ), run_id="20191007T151224.1234Z_prod_200", batch_identifier="batch_id", ) my_store.set(ns_2, ExpectationSuiteValidationResult(success=False)) assert my_store.get(ns_2) == ExpectationSuiteValidationResult( success=False, statistics={}, results=[] ) assert set(my_store.list_keys()) == { ns_1, ns_2, }
def self_check(self, pretty_print): return_obj = {} if pretty_print: print("Checking for existing keys...") return_obj["keys"] = self.list_keys() return_obj["len_keys"] = len(return_obj["keys"]) len_keys = return_obj["len_keys"] if pretty_print: if return_obj["len_keys"] == 0: print(f"\t{len_keys} keys found") else: print(f"\t{len_keys} keys found:") for key in return_obj["keys"][:10]: print(f" {str(key)}") if len_keys > 10: print("\t\t...") print() test_key_name = "test-key-" + "".join( [random.choice(list("0123456789ABCDEF")) for i in range(20)]) if self.ge_cloud_mode: test_key: GeCloudIdentifier = self.key_class( resource_type=GeCloudRESTResource.CONTRACT, ge_cloud_id=str(uuid.uuid4()), ) else: test_key: ValidationResultIdentifier = self.key_class( expectation_suite_identifier=ExpectationSuiteIdentifier( expectation_suite_name="temporary_test_suite", ), run_id="temporary_test_run_id", batch_identifier=test_key_name, ) test_value = ExpectationSuiteValidationResult(success=True) if pretty_print: print(f"Attempting to add a new test key: {test_key}...") self.set(key=test_key, value=test_value) if pretty_print: print("\tTest key successfully added.") print() if pretty_print: print( f"Attempting to retrieve the test value associated with key: {test_key}..." ) test_value = self.get(key=test_key, ) if pretty_print: print("\tTest value successfully retrieved.") print() return return_obj
def __init__(self, run_id, expectation_suite_identifier, metric_name, metric_kwargs, metric_value): super(ValidationMetric, self).__init__(metric_name, metric_kwargs, metric_value) if not isinstance(expectation_suite_identifier, ExpectationSuiteIdentifier): expectation_suite_identifier = ExpectationSuiteIdentifier( expectation_suite_name=expectation_suite_identifier) self._run_id = run_id self._expectation_suite_identifier = expectation_suite_identifier
def _convert_tuple_to_resource_identifier(self, tuple_): if tuple_[0] == "expectations": resource_identifier = ExpectationSuiteIdentifier(*tuple_[1]) elif tuple_[0] == "validations": resource_identifier = ValidationResultIdentifier(*tuple_[1]) else: raise Exception("unknown section name: " + tuple_[0]) new_identifier = SiteSectionIdentifier( site_section_name=tuple_[0], resource_identifier=resource_identifier) return new_identifier
def from_tuple(cls, tuple_): if len(tuple_) < 4: raise GreatExpectationsError( "ValidationMetricIdentifier tuple must have at least four components." ) return cls( run_id=tuple_[0], expectation_suite_identifier=ExpectationSuiteIdentifier.from_tuple( tuple_[1:-2]), metric_name=tuple_[-2], metric_kwargs_id=tuple_[-1])
def ge_validation_result_suite_id() -> ValidationResultIdentifier: validation_result_suite_id = ValidationResultIdentifier( expectation_suite_identifier=ExpectationSuiteIdentifier( "asset.default"), run_id=RunIdentifier( run_name="test_100", run_time=datetime.fromtimestamp(1640701702, tz=timezone.utc), ), batch_identifier="010ef8c1cd417910b971f4468f024ec5", ) return validation_result_suite_id
def _profile_to_create_a_suite( additional_batch_kwargs, batch_kwargs, batch_kwargs_generator_name, context, datasource_name, expectation_suite_name, data_asset_name, profiler_configuration, ): cli_message( """ Great Expectations will choose a couple of columns and generate expectations about them to demonstrate some examples of assertions you can make about your data. Great Expectations will store these expectations in a new Expectation Suite '{:s}' here: {:s} """.format( expectation_suite_name, context.stores[ context.expectations_store_name ].store_backend.get_url_for_key( ExpectationSuiteIdentifier( expectation_suite_name=expectation_suite_name ).to_tuple() ), ) ) confirm_proceed_or_exit() # TODO this may not apply cli_message("\nGenerating example Expectation Suite...") run_id = datetime.datetime.now(datetime.timezone.utc).strftime("%Y%m%dT%H%M%S.%fZ") profiling_results = context.profile_data_asset( datasource_name, batch_kwargs_generator_name=batch_kwargs_generator_name, data_asset_name=data_asset_name, batch_kwargs=batch_kwargs, profiler=BasicSuiteBuilderProfiler, profiler_configuration=profiler_configuration, expectation_suite_name=expectation_suite_name, run_id=run_id, additional_batch_kwargs=additional_batch_kwargs, ) if not profiling_results["success"]: _raise_profiling_errors(profiling_results) cli_message("\nDone generating example Expectation Suite") return profiling_results
def test_expectations_store(): my_store = ExpectationsStore() with pytest.raises(TypeError): my_store.set("not_a_ValidationResultIdentifier") ns_1 = ExpectationSuiteIdentifier.from_tuple(tuple("a.b.c.warning")) my_store.set(ns_1, ExpectationSuite(expectation_suite_name="a.b.c.warning")) assert my_store.get(ns_1) == ExpectationSuite( expectation_suite_name="a.b.c.warning") ns_2 = ExpectationSuiteIdentifier.from_tuple(tuple("a.b.c.failure")) my_store.set(ns_2, ExpectationSuite(expectation_suite_name="a.b.c.failure")) assert my_store.get(ns_2) == ExpectationSuite( expectation_suite_name="a.b.c.failure") assert set(my_store.list_keys()) == { ns_1, ns_2, }
def test_resource_key_passes_run_name_filter(): resource_key = ValidationResultIdentifier( expectation_suite_identifier=ExpectationSuiteIdentifier("test_suite"), run_id=RunIdentifier(run_name="foofooprofilingfoo"), batch_identifier="f14c3d2f6e8028c2db0c25edabdb0d61", ) assert (resource_key_passes_run_name_filter( resource_key, run_name_filter={"equals": "profiling"}) is False) assert (resource_key_passes_run_name_filter( resource_key, run_name_filter={"equals": "foofooprofilingfoo"}) is True) assert (resource_key_passes_run_name_filter( resource_key, run_name_filter={"not_equals": "profiling"}) is True) assert (resource_key_passes_run_name_filter( resource_key, run_name_filter={"not_equals": "foofooprofilingfoo"}) is False) assert (resource_key_passes_run_name_filter( resource_key, run_name_filter={"includes": "profiling"}) is True) assert (resource_key_passes_run_name_filter( resource_key, run_name_filter={"includes": "foobar"}) is False) assert (resource_key_passes_run_name_filter( resource_key, run_name_filter={"not_includes": "foobar"}) is True) assert (resource_key_passes_run_name_filter( resource_key, run_name_filter={"not_includes": "profiling"}) is False) assert (resource_key_passes_run_name_filter( resource_key, run_name_filter={"matches_regex": "(foo){2}profiling(" "foo)+"}, ) is True) assert (resource_key_passes_run_name_filter( resource_key, run_name_filter={"matches_regex": "(foo){3}profiling(" "foo)+"}, ) is False) with pytest.warns(DeprecationWarning): assert (resource_key_passes_run_name_filter( resource_key, run_name_filter={"eq": "profiling"}) is False) assert (resource_key_passes_run_name_filter( resource_key, run_name_filter={"eq": "foofooprofilingfoo"}) is True) with pytest.warns(DeprecationWarning): assert (resource_key_passes_run_name_filter( resource_key, run_name_filter={"ne": "profiling"}) is True) assert (resource_key_passes_run_name_filter( resource_key, run_name_filter={"ne": "foofooprofilingfoo"}) is False)
def from_fixed_length_tuple(cls, tuple_): if len(tuple_) != 4: raise GreatExpectationsError( "ValidationMetricIdentifier fixed length tuple must have exactly four " "components." ) return cls( run_id=tuple_[0], expectation_suite_identifier=ExpectationSuiteIdentifier.from_fixed_length_tuple( tuple((tuple_[1],)) ), metric_name=tuple_[2], metric_kwargs_id=tuple_[3], )
def test_expectations_store(empty_data_context): context: DataContext = empty_data_context my_store = ExpectationsStore() with pytest.raises(TypeError): my_store.set("not_a_ValidationResultIdentifier") ns_1 = ExpectationSuiteIdentifier.from_tuple(tuple("a.b.c.warning")) my_store.set( ns_1, ExpectationSuite(expectation_suite_name="a.b.c.warning", data_context=context), ) ns_1_dict: dict = my_store.get(ns_1) ns_1_suite: ExpectationSuite = ExpectationSuite(**ns_1_dict, data_context=context) assert ns_1_suite == ExpectationSuite( expectation_suite_name="a.b.c.warning", data_context=context) ns_2 = ExpectationSuiteIdentifier.from_tuple(tuple("a.b.c.failure")) my_store.set( ns_2, ExpectationSuite(expectation_suite_name="a.b.c.failure", data_context=context), ) ns_2_dict: dict = my_store.get(ns_2) ns_2_suite: ExpectationSuite = ExpectationSuite(**ns_2_dict, data_context=context) assert ns_2_suite == ExpectationSuite( expectation_suite_name="a.b.c.failure", data_context=context) assert set(my_store.list_keys()) == { ns_1, ns_2, }
def suite_delete(suite, directory): """Delete an expectation suite from the expectation store.""" context = load_data_context_with_error_handling(directory) suite_names = context.list_expectation_suite_names() if len(suite_names) == 0: cli_message("No expectation suites found") return if len(suite_names) > 0: expectation_suite = ExpectationSuite(expectation_suite_name=suite) key = ExpectationSuiteIdentifier(expectation_suite_name=suite) if key: context.delete_expectation_suite(expectation_suite) else: cli_message("No matching expectation suites found") sys.exit(1)
def test_SlackNotificationAction(data_context_parameterized_expectation_suite): renderer = { "module_name": "great_expectations.render.renderer.slack_renderer", "class_name": "SlackRenderer", } slack_webhook = "https://hooks.slack.com/services/test/slack/webhook" notify_on = "all" slack_action = SlackNotificationAction( data_context=data_context_parameterized_expectation_suite, renderer=renderer, slack_webhook=slack_webhook, notify_on=notify_on, ) validation_result_suite = ExpectationSuiteValidationResult( results=[], success=True, statistics={ "evaluated_expectations": 0, "successful_expectations": 0, "unsuccessful_expectations": 0, "success_percent": None, }, meta={ "great_expectations_version": "v0.8.0__develop", "expectation_suite_name": "asset.default", "run_id": "test_100", }, ) validation_result_suite_id = ValidationResultIdentifier( expectation_suite_identifier=ExpectationSuiteIdentifier( "asset.default"), run_id="test_100", batch_identifier="1234", ) # TODO: improve this test - currently it is verifying a failed call to Slack. It returns a "empty" payload assert slack_action.run( validation_result_suite_identifier=validation_result_suite_id, validation_result_suite=validation_result_suite, data_asset=None, ) == { "slack_notification_result": None }
def test_TupleGCSStoreBackend_base_public_path(): """ What does this test and why? the base_public_path parameter allows users to point to a custom DNS when hosting Data docs. This test will exercise the get_url_for_key method twice to see that we are getting the expected url, with or without base_public_path """ bucket = "leakybucket" prefix = "this_is_a_test_prefix" project = "dummy-project" base_public_path = "http://www.test.com/" with patch("google.cloud.storage.Client", autospec=True) as mock_gcs_client: mock_client = mock_gcs_client.return_value mock_bucket = mock_client.get_bucket.return_value mock_blob = mock_bucket.blob.return_value my_store_with_base_public_path = TupleGCSStoreBackend( filepath_template=None, bucket=bucket, prefix=prefix, project=project, base_public_path=base_public_path, ) my_store_with_base_public_path.set(("BBB", ), b"bbb", content_encoding=None, content_type="image/png") run_id = RunIdentifier("my_run_id", datetime.datetime.utcnow()) key = ValidationResultIdentifier( ExpectationSuiteIdentifier(expectation_suite_name="my_suite_name"), run_id, "my_batch_id", ) run_time_string = run_id.to_tuple()[1] url = my_store_with_base_public_path.get_public_url_for_key(key.to_tuple()) assert ( url == "http://www.test.com/leakybucket" + f"/this_is_a_test_prefix/my_suite_name/my_run_id/{run_time_string}/my_batch_id" )
def create_empty_suite(context: DataContext, expectation_suite_name: str, batch_kwargs) -> None: cli_message(""" Great Expectations will create a new Expectation Suite '{:s}' and store it here: {:s} """.format( expectation_suite_name, context.stores[ context.expectations_store_name].store_backend.get_url_for_key( ExpectationSuiteIdentifier( expectation_suite_name=expectation_suite_name).to_tuple()), )) suite = context.create_expectation_suite(expectation_suite_name) suite.add_citation(comment="New suite added via CLI", batch_kwargs=batch_kwargs) context.save_expectation_suite(suite, expectation_suite_name)
def test_ExpectationsStore_with_DatabaseStoreBackend_postgres(caplog): connection_kwargs = { "drivername": "postgresql", "username": "******", "password": "", "host": "localhost", "port": "5432", "database": "test_ci", } # First, demonstrate that we pick up default configuration my_store = ExpectationsStore(store_backend={ "class_name": "DatabaseStoreBackend", "credentials": connection_kwargs, }) with pytest.raises(TypeError): my_store.get("not_a_ExpectationSuiteIdentifier") # first suite to add to db default_suite = ExpectationSuite( expectation_suite_name="a.b.c", meta={"test_meta_key": "test_meta_value"}, expectations=[], ) ns_1 = ExpectationSuiteIdentifier.from_tuple(tuple("a.b.c")) # initial set and check if first suite exists my_store.set(ns_1, default_suite) assert my_store.get(ns_1) == ExpectationSuite( expectation_suite_name="a.b.c", meta={"test_meta_key": "test_meta_value"}, expectations=[], ) # update suite and check if new value exists updated_suite = ExpectationSuite( expectation_suite_name="a.b.c", meta={"test_meta_key": "test_new_meta_value"}, expectations=[], ) my_store.set(ns_1, updated_suite) assert my_store.get(ns_1) == ExpectationSuite( expectation_suite_name="a.b.c", meta={"test_meta_key": "test_new_meta_value"}, expectations=[], )