def test_profiler_parameter_builder_added(data_context_with_taxi_data): """ What does this test and why? This test now adds a simple ParameterBuilder to our Rule. More specifically, we use a MetricMultiBatchParameterBuilder to pass in the min_value parameter to expect_column_values_to_be_greater_than. """ context: DataContext = data_context_with_taxi_data batch_request: BatchRequest = BatchRequest( datasource_name="taxi_multibatch_datasource_other_possibility", data_connector_name="default_inferred_data_connector_name", data_asset_name="yellow_tripdata_sample_2018", data_connector_query={"index": -1}, ) domain_builder: DomainBuilder = ColumnDomainBuilder( include_column_name_suffixes=["_amount"], data_context=context, ) # parameter_builder numeric_range_parameter_builder: MetricMultiBatchParameterBuilder = ( MetricMultiBatchParameterBuilder( data_context=context, metric_name="column.min", metric_domain_kwargs="$domain.domain_kwargs", name="my_column_min", )) config_builder: DefaultExpectationConfigurationBuilder = ( DefaultExpectationConfigurationBuilder( expectation_type="expect_column_values_to_be_greater_than", value="$parameter.my_column_min.value[-1]", column="$domain.domain_kwargs.column", )) simple_rule: Rule = Rule( name="rule_with_variables_and_parameters", variables=None, domain_builder=domain_builder, parameter_builders=[numeric_range_parameter_builder], expectation_configuration_builders=[config_builder], ) my_rbp = RuleBasedProfiler( name="my_rbp", config_version=1.0, data_context=context, ) my_rbp.add_rule(rule=simple_rule) result: RuleBasedProfilerResult = my_rbp.run(batch_request=batch_request) expectation_configurations: List[ ExpectationConfiguration] = result.expectation_configurations assert len(expectation_configurations) == 4
def test_profile_excludes_citations( alice_columnar_table_single_batch_context, alice_columnar_table_single_batch, ): # Load data context data_context: DataContext = alice_columnar_table_single_batch_context # Load profiler configs & loop (run tests for each one) yaml_config: str = alice_columnar_table_single_batch["profiler_config"] # Instantiate Profiler profiler_config: dict = yaml.load(yaml_config) # Roundtrip through schema validation to remove any illegal fields add/or restore any missing fields. deserialized_config: dict = ruleBasedProfilerConfigSchema.load(profiler_config) serialized_config: dict = ruleBasedProfilerConfigSchema.dump(deserialized_config) # `class_name`/`module_name` are generally consumed through `instantiate_class_from_config` # so we need to manually remove those values if we wish to use the **kwargs instantiation pattern serialized_config.pop("class_name") serialized_config.pop("module_name") profiler: RuleBasedProfiler = RuleBasedProfiler( **serialized_config, data_context=data_context, ) expectation_suite: ExpectationSuite = profiler.run( expectation_suite_name=alice_columnar_table_single_batch[ "expected_expectation_suite_name" ], include_citation=False, ) assert expectation_suite.meta.get("citations") is None
def __init__( self, name: str, validator: Validator, ) -> None: """ DataAssistant subclasses guide "RuleBasedProfiler" to contain Rule configurations to embody profiling behaviors, corresponding to indended exploration and validation goals. Then executing "RuleBasedProfiler.run()" yields "RuleBasedProfilerResult" object, containing "fully_qualified_parameter_names_by_domain", "parameter_values_for_fully_qualified_parameter_names_by_domain", "expectation_configurations", and "citation", immediately available for composing "ExpectationSuite" and validating underlying data "Batch" objects. Args: name: the name of this DataAssistant object validator: Validator object, containing loaded Batch objects as well as Expectation and Metric operations """ self._name = name self._validator = validator self._profiler = RuleBasedProfiler( name=self.name, config_version=1.0, variables=None, data_context=self._validator.data_context, ) self._build_profiler()
def test_profile_includes_citations( alice_columnar_table_single_batch_context, alice_columnar_table_single_batch, ): # Load data context data_context: DataContext = alice_columnar_table_single_batch_context # Load profiler configs & loop (run tests for each one) yaml_config: str = alice_columnar_table_single_batch["profiler_config"] # Instantiate Profiler profiler_config = yaml.load(yaml_config) # `class_name`/`module_name` are generally consumed through `instantiate_class_from_config` # so we need to manually remove those values if we wish to use the **kwargs instantiation pattern profiler_config.pop("class_name") profiler: RuleBasedProfiler = RuleBasedProfiler( **profiler_config, data_context=data_context, ) expectation_suite: ExpectationSuite = profiler.run( expectation_suite_name=alice_columnar_table_single_batch[ "expected_expectation_suite_name" ], include_citation=True, ) assert len(expectation_suite.meta["citations"]) > 0
def test_save_profiler( mock_data_context: mock.MagicMock, populated_profiler_store: ProfilerStore, profiler_config_with_placeholder_args: RuleBasedProfilerConfig, ): with mock.patch( "great_expectations.data_context.store.profiler_store.ProfilerStore.set", return_value=profiler_config_with_placeholder_args, ): mock_data_context.save_profiler( profiler=profiler_config_with_placeholder_args, profiler_store=populated_profiler_store, name="my_profiler", ge_cloud_id=None, ) with mock.patch( "great_expectations.data_context.store.profiler_store.ProfilerStore.get", return_value=profiler_config_with_placeholder_args, ): profiler = RuleBasedProfiler.get_profiler( data_context=mock_data_context, profiler_store=populated_profiler_store, name="my_profiler", ge_cloud_id=None, ) assert isinstance(profiler, RuleBasedProfiler)
def test_bobster_profiler_user_workflow_multi_batch_row_count_range_rule_bootstrap_sampling_method( bobster_columnar_table_multi_batch_normal_mean_5000_stdev_1000_data_context, bobster_columnar_table_multi_batch_normal_mean_5000_stdev_1000, ): # Load data context data_context: DataContext = ( bobster_columnar_table_multi_batch_normal_mean_5000_stdev_1000_data_context ) # Load profiler configs & loop (run tests for each one) yaml_config: str = bobster_columnar_table_multi_batch_normal_mean_5000_stdev_1000[ "profiler_config"] # Instantiate Profiler profiler_config: CommentedMap = yaml.load(yaml_config) # Roundtrip through schema validation to remove any illegal fields add/or restore any missing fields. deserialized_config: dict = ruleBasedProfilerConfigSchema.load( profiler_config) serialized_config: dict = ruleBasedProfilerConfigSchema.dump( deserialized_config) # `class_name`/`module_name` are generally consumed through `instantiate_class_from_config` # so we need to manually remove those values if we wish to use the **kwargs instantiation pattern serialized_config.pop("class_name") serialized_config.pop("module_name") profiler: RuleBasedProfiler = RuleBasedProfiler( **serialized_config, data_context=data_context, ) expectation_suite: ExpectationSuite = profiler.run( expectation_suite_name= bobster_columnar_table_multi_batch_normal_mean_5000_stdev_1000[ "test_configuration_bootstrap_sampling_method"] ["expectation_suite_name"], ) expect_table_row_count_to_be_between_expectation_configuration_kwargs: dict = ( expectation_suite.to_json_dict()["expectations"][0]["kwargs"]) min_value: int = ( expect_table_row_count_to_be_between_expectation_configuration_kwargs[ "min_value"]) max_value: int = ( expect_table_row_count_to_be_between_expectation_configuration_kwargs[ "max_value"]) assert (bobster_columnar_table_multi_batch_normal_mean_5000_stdev_1000[ "test_configuration_bootstrap_sampling_method"] ["expect_table_row_count_to_be_between_min_value_mean_value"] < min_value < bobster_columnar_table_multi_batch_normal_mean_5000_stdev_1000[ "test_configuration_bootstrap_sampling_method"] ["expect_table_row_count_to_be_between_mean_value"]) assert (bobster_columnar_table_multi_batch_normal_mean_5000_stdev_1000[ "test_configuration_bootstrap_sampling_method"] ["expect_table_row_count_to_be_between_mean_value"] < max_value < bobster_columnar_table_multi_batch_normal_mean_5000_stdev_1000[ "test_configuration_bootstrap_sampling_method"] ["expect_table_row_count_to_be_between_max_value_mean_value"])
def test_get_profiler_run_usage_statistics_without_handler(): # Without a DataContext, the usage stats handler is not propogated down to the RBP profiler: RuleBasedProfiler = RuleBasedProfiler( name="my_profiler", config_version=1.0, ) payload: dict = get_profiler_run_usage_statistics(profiler=profiler) assert payload == {}
def __init__( self, name: str, validator: Optional[Validator], ) -> None: """ DataAssistant subclasses guide "RuleBasedProfiler" to contain Rule configurations to embody profiling behaviors, corresponding to indended exploration and validation goals. Then executing "RuleBasedProfiler.run()" yields "RuleBasedProfilerResult" object, containing "fully_qualified_parameter_names_by_domain", "parameter_values_for_fully_qualified_parameter_names_by_domain", "expectation_configurations", and "citation", immediately available for composing "ExpectationSuite" and validating underlying data "Batch" objects. Args: name: the name of this DataAssistant object validator: Validator object, containing loaded Batch objects as well as Expectation and Metric operations """ self._name = name self._validator = validator if validator is None: self._data_context = None self._batches = None else: self._data_context = self._validator.data_context self._batches = self._validator.batches variables: Optional[Dict[str, Any]] = self.get_variables() or {} self._profiler = RuleBasedProfiler( name=self.name, config_version=1.0, variables=variables, data_context=self._data_context, ) self._metrics_parameter_builders_by_domain = {} rules: Optional[List[Rule]] = self.get_rules() or [] rule: Rule for rule in rules: self.profiler.add_rule(rule=rule) self._metrics_parameter_builders_by_domain[Domain( domain_type=rule.domain_builder.domain_type, rule_name=rule.name, )] = rule.parameter_builders
def test_profile_get_expectation_suite( mock_emit, alice_columnar_table_single_batch_context, alice_columnar_table_single_batch, ): # Load data context data_context: DataContext = alice_columnar_table_single_batch_context # Load profiler configs & loop (run tests for each one) yaml_config: str = alice_columnar_table_single_batch["profiler_config"] # Instantiate Profiler profiler_config = yaml.load(yaml_config) # `class_name`/`module_name` are generally consumed through `instantiate_class_from_config` # so we need to manually remove those values if we wish to use the **kwargs instantiation pattern profiler_config.pop("class_name") profiler: RuleBasedProfiler = RuleBasedProfiler( **profiler_config, data_context=data_context, ) # BatchRequest yielding exactly one batch alice_single_batch_data_batch_request: dict = { "datasource_name": "alice_columnar_table_single_batch_datasource", "data_connector_name": "alice_columnar_table_single_batch_data_connector", "data_asset_name": "alice_columnar_table_single_batch_data_asset", } result: RuleBasedProfilerResult = profiler.run( batch_request=alice_single_batch_data_batch_request) expectation_suite_name: str = "my_suite" suite: ExpectationSuite = result.get_expectation_suite( expectation_suite_name=expectation_suite_name) assert suite is not None and len(suite.expectations) > 0 assert mock_emit.call_count == 44 # noinspection PyUnresolvedReferences actual_events: List[unittest.mock._Call] = mock_emit.call_args_list assert (actual_events[-1][0][0]["event"] == UsageStatsEvents. RULE_BASED_PROFILER_RESULT_GET_EXPECTATION_SUITE.value)
def test_add_rule_and_run_profiler(data_context_with_taxi_data): """ What does this test and why? This is the first test where we build a Rule in memory and use the add_rule() method to add to our RuleBasedProfiler and run the profiler. We use the DomainBuilder from the previous test (against "_amount" columns) and an ExpectationConfigurationBuilder that uses expect_column_values_to_not_be_null because it only needs a domain value. The test eventually asserts that the profiler return 4 Expectations, one per column in our domain. """ context: DataContext = data_context_with_taxi_data batch_request: BatchRequest = BatchRequest( datasource_name="taxi_multibatch_datasource_other_possibility", data_connector_name="default_inferred_data_connector_name", data_asset_name="yellow_tripdata_sample_2018", data_connector_query={"index": -1}, ) domain_builder: DomainBuilder = ColumnDomainBuilder( include_column_name_suffixes=["_amount"], data_context=context, ) default_expectation_configuration_builder = DefaultExpectationConfigurationBuilder( expectation_type="expect_column_values_to_not_be_null", column="$domain.domain_kwargs.column", ) simple_rule: Rule = Rule( name="rule_with_no_variables_no_parameters", variables=None, domain_builder=domain_builder, expectation_configuration_builders=[ default_expectation_configuration_builder ], ) my_rbp: RuleBasedProfiler = RuleBasedProfiler( name="my_simple_rbp", config_version=1.0, data_context=context, ) my_rbp.add_rule(rule=simple_rule) result: RuleBasedProfilerResult = my_rbp.run(batch_request=batch_request) expectation_configurations: List[ ExpectationConfiguration] = result.expectation_configurations assert len(expectation_configurations) == 4
def test_profile_includes_citations( mock_emit, alice_columnar_table_single_batch_context, alice_columnar_table_single_batch, ): # Load data context data_context: DataContext = alice_columnar_table_single_batch_context # Load profiler configs & loop (run tests for each one) yaml_config: str = alice_columnar_table_single_batch["profiler_config"] # Instantiate Profiler profiler_config = yaml.load(yaml_config) # `class_name`/`module_name` are generally consumed through `instantiate_class_from_config` # so we need to manually remove those values if we wish to use the **kwargs instantiation pattern profiler_config.pop("class_name") profiler: RuleBasedProfiler = RuleBasedProfiler( **profiler_config, data_context=data_context, ) # BatchRequest yielding exactly one batch alice_single_batch_data_batch_request: dict = { "datasource_name": "alice_columnar_table_single_batch_datasource", "data_connector_name": "alice_columnar_table_single_batch_data_connector", "data_asset_name": "alice_columnar_table_single_batch_data_asset", } result: RuleBasedProfilerResult = profiler.run( batch_request=alice_single_batch_data_batch_request) assert result.citation is not None and len(result.citation.keys()) > 0 assert mock_emit.call_count == 43 assert all(payload[0][0]["event"] == "data_context.get_batch_list" for payload in mock_emit.call_args_list[:-1]) # noinspection PyUnresolvedReferences actual_events: List[unittest.mock._Call] = mock_emit.call_args_list assert (actual_events[-1][0][0]["event"] == UsageStatsEvents.RULE_BASED_PROFILER_RUN.value)
def test_get_profiler_run_usage_statistics_with_handler_invalid_payload( mock_data_context: mock.MagicMock, ): # Ensure that real handler gets passed down by the context handler: UsageStatisticsHandler = UsageStatisticsHandler( mock_data_context, "my_id", "my_url") mock_data_context.usage_statistics_handler = handler profiler: RuleBasedProfiler = RuleBasedProfiler( name="my_profiler", config_version=1.0, data_context=mock_data_context) payload: dict = get_profiler_run_usage_statistics(profiler=profiler) # Payload won't pass schema validation due to a lack of rules but we can confirm that it is anonymized assert payload == { "anonymized_name": "a0061ec021855cd2b3a994dd8d90fe5d", "config_version": 1.0, "rule_count": 0, "variable_count": 0, }
def test_bobby_profiler_user_workflow_multi_batch_row_count_range_rule_and_column_ranges_rule_oneshot_sampling_method( bobby_columnar_table_multi_batch_deterministic_data_context, bobby_columnar_table_multi_batch, ): # Load data context data_context: DataContext = ( bobby_columnar_table_multi_batch_deterministic_data_context) # Load profiler configs & loop (run tests for each one) yaml_config: str = bobby_columnar_table_multi_batch["profiler_config"] # Instantiate Profiler profiler_config: dict = yaml.load(yaml_config) # Roundtrip through schema validation to remove any illegal fields add/or restore any missing fields. deserialized_config: dict = ruleBasedProfilerConfigSchema.load( profiler_config) serialized_config: dict = ruleBasedProfilerConfigSchema.dump( deserialized_config) # `class_name`/`module_name` are generally consumed through `instantiate_class_from_config` # so we need to manually remove those values if we wish to use the **kwargs instantiation pattern serialized_config.pop("class_name") serialized_config.pop("module_name") profiler: RuleBasedProfiler = RuleBasedProfiler( **serialized_config, data_context=data_context, ) expectation_suite: ExpectationSuite = profiler.run( expectation_suite_name=bobby_columnar_table_multi_batch[ "test_configuration_oneshot_sampling_method"] ["expectation_suite_name"], include_citation=True, ) assert (expectation_suite == bobby_columnar_table_multi_batch[ "test_configuration_oneshot_sampling_method"] ["expected_expectation_suite"])
def test_get_profiler_run_usage_statistics_with_handler_valid_payload( mock_data_context: mock.MagicMock, ): # Ensure that real handler gets passed down by the context handler: UsageStatisticsHandler = UsageStatisticsHandler( mock_data_context, "my_id", "my_url") mock_data_context.usage_statistics_handler = handler profiler: RuleBasedProfiler = RuleBasedProfiler( name="my_profiler", config_version=1.0, data_context=mock_data_context) override_rules: Dict[str, dict] = { "my_override_rule": { "domain_builder": { "class_name": "ColumnDomainBuilder", "module_name": "great_expectations.rule_based_profiler.domain_builder", }, "parameter_builders": [ { "class_name": "MetricMultiBatchParameterBuilder", "module_name": "great_expectations.rule_based_profiler.parameter_builder", "name": "my_parameter", "metric_name": "my_metric", }, { "class_name": "NumericMetricRangeMultiBatchParameterBuilder", "module_name": "great_expectations.rule_based_profiler.parameter_builder", "name": "my_other_parameter", "metric_name": "my_other_metric", }, ], "expectation_configuration_builders": [ { "class_name": "DefaultExpectationConfigurationBuilder", "module_name": "great_expectations.rule_based_profiler.expectation_configuration_builder", "expectation_type": "expect_column_pair_values_A_to_be_greater_than_B", "column_A": "$domain.domain_kwargs.column_A", "column_B": "$domain.domain_kwargs.column_B", "my_one_arg": "$parameter.my_parameter.value[0]", "meta": { "details": { "my_parameter_estimator": "$parameter.my_parameter.details", "note": "Important remarks about estimation algorithm.", }, }, }, { "class_name": "DefaultExpectationConfigurationBuilder", "module_name": "great_expectations.rule_based_profiler.expectation_configuration_builder", "expectation_type": "expect_column_min_to_be_between", "column": "$domain.domain_kwargs.column", "my_another_arg": "$parameter.my_other_parameter.value[0]", "meta": { "details": { "my_other_parameter_estimator": "$parameter.my_other_parameter.details", "note": "Important remarks about estimation algorithm.", }, }, }, ], }, } payload: dict = get_profiler_run_usage_statistics(profiler=profiler, rules=override_rules) assert payload == { "anonymized_name": "a0061ec021855cd2b3a994dd8d90fe5d", "anonymized_rules": [{ "anonymized_domain_builder": { "parent_class": "ColumnDomainBuilder" }, "anonymized_expectation_configuration_builders": [ { "expectation_type": "expect_column_pair_values_A_to_be_greater_than_B", "parent_class": "DefaultExpectationConfigurationBuilder", }, { "expectation_type": "expect_column_min_to_be_between", "parent_class": "DefaultExpectationConfigurationBuilder", }, ], "anonymized_name": "bd8a8b4465a94b363caf2b307c080547", "anonymized_parameter_builders": [ { "anonymized_name": "25dac9e56a1969727bc0f90db6eaa833", "parent_class": "MetricMultiBatchParameterBuilder", }, { "anonymized_name": "be5baa3f1064e6e19356f2168968cbeb", "parent_class": "NumericMetricRangeMultiBatchParameterBuilder", }, ], }], "config_version": 1.0, "rule_count": 1, "variable_count": 0, }
column: $domain.domain_kwargs.column min_value: $parameter.max_range.value[0] max_value: $parameter.max_range.value[1] mostly: $variables.mostly meta: profiler_details: $parameter.max_range.details """ data_context = DataContext() # Instantiate RuleBasedProfiler full_profiler_config_dict: dict = yaml.load(profiler_config) rule_based_profiler: RuleBasedProfiler = RuleBasedProfiler( name=full_profiler_config_dict["name"], config_version=full_profiler_config_dict["config_version"], rules=full_profiler_config_dict["rules"], variables=full_profiler_config_dict["variables"], data_context=data_context, ) batch_request: dict = { "datasource_name": "taxi_pandas", "data_connector_name": "monthly", "data_asset_name": "my_reports", "data_connector_query": { "index": "-6:-1", }, } result: RuleBasedProfilerResult = rule_based_profiler.run( batch_request=batch_request)
def test_profiler_save_and_load(data_context_with_taxi_data): """ What does this test and why? This tests whether context.save_profiler() can be invoked to update a profiler that lives in Store. The test ensures that any changes that we make to the Profiler, like adding a rule, will be persisted. The test tests that context.save_profiler() and context.get_profiler() return the expected RBP. """ context: DataContext = data_context_with_taxi_data domain_builder: DomainBuilder = ColumnDomainBuilder( include_column_name_suffixes=["_amount"], data_context=context, ) # parameter_builder numeric_range_parameter_builder: MetricMultiBatchParameterBuilder = ( MetricMultiBatchParameterBuilder( data_context=context, metric_name="column.min", metric_domain_kwargs="$domain.domain_kwargs", name="my_column_min", )) config_builder: DefaultExpectationConfigurationBuilder = ( DefaultExpectationConfigurationBuilder( expectation_type="expect_column_values_to_be_greater_than", value="$parameter.my_column_min.value[-1]", column="$domain.domain_kwargs.column", )) simple_variables_rule: Rule = Rule( name="rule_with_no_variables_no_parameters", variables=None, domain_builder=domain_builder, parameter_builders=[numeric_range_parameter_builder], expectation_configuration_builders=[config_builder], ) my_rbp = RuleBasedProfiler( name="my_rbp", config_version=1.0, data_context=context, ) res: dict = my_rbp.config.to_json_dict() assert res == { "class_name": "RuleBasedProfiler", "module_name": "great_expectations.rule_based_profiler", "name": "my_rbp", "config_version": 1.0, "rules": None, "variables": {}, } my_rbp.add_rule(rule=simple_variables_rule) context.save_profiler(name="my_rbp", profiler=my_rbp) # load profiler from store my_loaded_profiler: RuleBasedProfiler = context.get_profiler(name="my_rbp") res = my_loaded_profiler.config.to_json_dict() assert res == { "module_name": "great_expectations.rule_based_profiler", "class_name": "RuleBasedProfiler", "name": "my_rbp", "config_version": 1.0, "variables": {}, "rules": { "rule_with_no_variables_no_parameters": { "domain_builder": { "module_name": "great_expectations.rule_based_profiler.domain_builder.column_domain_builder", "class_name": "ColumnDomainBuilder", "include_column_name_suffixes": [ "_amount", ], }, "variables": {}, "parameter_builders": [ { "module_name": "great_expectations.rule_based_profiler.parameter_builder.metric_multi_batch_parameter_builder", "class_name": "MetricMultiBatchParameterBuilder", "name": "my_column_min", "metric_name": "column.min", "metric_domain_kwargs": "$domain.domain_kwargs", "enforce_numeric_metric": False, "replace_nan_with_zero": False, "reduce_scalar_metric": True, "evaluation_parameter_builder_configs": None, }, ], "expectation_configuration_builders": [ { "module_name": "great_expectations.rule_based_profiler.expectation_configuration_builder.default_expectation_configuration_builder", "class_name": "DefaultExpectationConfigurationBuilder", "expectation_type": "expect_column_values_to_be_greater_than", "meta": {}, "column": "$domain.domain_kwargs.column", "validation_parameter_builder_configs": None, "value": "$parameter.my_column_min.value[-1]", }, ], }, }, }
def test_quentin_profiler_user_workflow_multi_batch_quantiles_value_ranges_rule( quentin_columnar_table_multi_batch_data_context, quentin_columnar_table_multi_batch, ): # Load data context data_context: DataContext = quentin_columnar_table_multi_batch_data_context # Load profiler configs & loop (run tests for each one) yaml_config: str = quentin_columnar_table_multi_batch["profiler_config"] # Instantiate Profiler profiler_config: CommentedMap = yaml.load(yaml_config) # Roundtrip through schema validation to remove any illegal fields add/or restore any missing fields. deserialized_config: dict = ruleBasedProfilerConfigSchema.load( profiler_config) serialized_config: dict = ruleBasedProfilerConfigSchema.dump( deserialized_config) # `class_name`/`module_name` are generally consumed through `instantiate_class_from_config` # so we need to manually remove those values if we wish to use the **kwargs instantiation pattern serialized_config.pop("class_name") serialized_config.pop("module_name") profiler: RuleBasedProfiler = RuleBasedProfiler( **serialized_config, data_context=data_context, ) expectation_suite: ExpectationSuite = profiler.run( expectation_suite_name=quentin_columnar_table_multi_batch[ "test_configuration"]["expectation_suite_name"], ) expectation_configuration_dict: dict column_name: str expectation_kwargs: dict expect_column_quantile_values_to_be_between_expectation_configurations_kwargs_dict: Dict[ str, dict] = { expectation_configuration_dict["kwargs"]["column"]: expectation_configuration_dict["kwargs"] for expectation_configuration_dict in expectation_suite.to_json_dict()["expectations"] } expect_column_quantile_values_to_be_between_expectation_configurations_value_ranges_by_column: Dict[ str, List[List[Number]]] = { column_name: expectation_kwargs["value_ranges"] for column_name, expectation_kwargs in expect_column_quantile_values_to_be_between_expectation_configurations_kwargs_dict .items() } assert ( expect_column_quantile_values_to_be_between_expectation_configurations_value_ranges_by_column[ "tolls_amount"] == quentin_columnar_table_multi_batch["test_configuration"] ["expect_column_quantile_values_to_be_between_quantile_ranges_by_column"] ["tolls_amount"]) # Measure of "closeness" between "actual" and "desired" is computed as: atol + rtol * abs(desired) # (see "https://numpy.org/doc/stable/reference/generated/numpy.testing.assert_allclose.html" for details). rtol: float = 1.0e-7 atol: float = 5.0e-2 value_range: List[Number] paired_quantiles: zip column_quantiles: List[List[Number]] idx: int for ( column_name, column_quantiles, ) in (expect_column_quantile_values_to_be_between_expectation_configurations_value_ranges_by_column .items()): paired_quantiles = zip( column_quantiles, quentin_columnar_table_multi_batch["test_configuration"] ["expect_column_quantile_values_to_be_between_quantile_ranges_by_column"] [column_name], ) for value_range in list(paired_quantiles): for idx in range(2): np.testing.assert_allclose( actual=value_range[0][idx], desired=value_range[1][idx], rtol=rtol, atol=atol, err_msg= f"Actual value of {value_range[0][idx]} differs from expected value of {value_range[1][idx]} by more than {atol + rtol * abs(value_range[1][idx])} tolerance.", )