def test_profiler_parameter_builder_added(data_context_with_taxi_data): """ What does this test and why? This test now adds a simple ParameterBuilder to our Rule. More specifically, we use a MetricMultiBatchParameterBuilder to pass in the min_value parameter to expect_column_values_to_be_greater_than. """ context: DataContext = data_context_with_taxi_data batch_request: BatchRequest = BatchRequest( datasource_name="taxi_multibatch_datasource_other_possibility", data_connector_name="default_inferred_data_connector_name", data_asset_name="yellow_tripdata_sample_2018", data_connector_query={"index": -1}, ) domain_builder: DomainBuilder = ColumnDomainBuilder( include_column_name_suffixes=["_amount"], data_context=context, ) # parameter_builder numeric_range_parameter_builder: MetricMultiBatchParameterBuilder = ( MetricMultiBatchParameterBuilder( data_context=context, metric_name="column.min", metric_domain_kwargs="$domain.domain_kwargs", name="my_column_min", )) config_builder: DefaultExpectationConfigurationBuilder = ( DefaultExpectationConfigurationBuilder( expectation_type="expect_column_values_to_be_greater_than", value="$parameter.my_column_min.value[-1]", column="$domain.domain_kwargs.column", )) simple_rule: Rule = Rule( name="rule_with_variables_and_parameters", variables=None, domain_builder=domain_builder, parameter_builders=[numeric_range_parameter_builder], expectation_configuration_builders=[config_builder], ) my_rbp = RuleBasedProfiler( name="my_rbp", config_version=1.0, data_context=context, ) my_rbp.add_rule(rule=simple_rule) result: RuleBasedProfilerResult = my_rbp.run(batch_request=batch_request) expectation_configurations: List[ ExpectationConfiguration] = result.expectation_configurations assert len(expectation_configurations) == 4
def test_profiler_save_and_load(data_context_with_taxi_data): """ What does this test and why? This tests whether context.save_profiler() can be invoked to update a profiler that lives in Store. The test ensures that any changes that we make to the Profiler, like adding a rule, will be persisted. The test tests that context.save_profiler() and context.get_profiler() return the expected RBP. """ context: DataContext = data_context_with_taxi_data domain_builder: DomainBuilder = ColumnDomainBuilder( include_column_name_suffixes=["_amount"], data_context=context, ) # parameter_builder numeric_range_parameter_builder: MetricMultiBatchParameterBuilder = ( MetricMultiBatchParameterBuilder( data_context=context, metric_name="column.min", metric_domain_kwargs="$domain.domain_kwargs", name="my_column_min", )) config_builder: DefaultExpectationConfigurationBuilder = ( DefaultExpectationConfigurationBuilder( expectation_type="expect_column_values_to_be_greater_than", value="$parameter.my_column_min.value[-1]", column="$domain.domain_kwargs.column", )) simple_variables_rule: Rule = Rule( name="rule_with_no_variables_no_parameters", variables=None, domain_builder=domain_builder, parameter_builders=[numeric_range_parameter_builder], expectation_configuration_builders=[config_builder], ) my_rbp = RuleBasedProfiler( name="my_rbp", config_version=1.0, data_context=context, ) res: dict = my_rbp.config.to_json_dict() assert res == { "class_name": "RuleBasedProfiler", "module_name": "great_expectations.rule_based_profiler", "name": "my_rbp", "config_version": 1.0, "rules": None, "variables": {}, } my_rbp.add_rule(rule=simple_variables_rule) context.save_profiler(name="my_rbp", profiler=my_rbp) # load profiler from store my_loaded_profiler: RuleBasedProfiler = context.get_profiler(name="my_rbp") res = my_loaded_profiler.config.to_json_dict() assert res == { "module_name": "great_expectations.rule_based_profiler", "class_name": "RuleBasedProfiler", "name": "my_rbp", "config_version": 1.0, "variables": {}, "rules": { "rule_with_no_variables_no_parameters": { "domain_builder": { "module_name": "great_expectations.rule_based_profiler.domain_builder.column_domain_builder", "class_name": "ColumnDomainBuilder", "include_column_name_suffixes": [ "_amount", ], }, "variables": {}, "parameter_builders": [ { "module_name": "great_expectations.rule_based_profiler.parameter_builder.metric_multi_batch_parameter_builder", "class_name": "MetricMultiBatchParameterBuilder", "name": "my_column_min", "metric_name": "column.min", "metric_domain_kwargs": "$domain.domain_kwargs", "enforce_numeric_metric": False, "replace_nan_with_zero": False, "reduce_scalar_metric": True, "evaluation_parameter_builder_configs": None, }, ], "expectation_configuration_builders": [ { "module_name": "great_expectations.rule_based_profiler.expectation_configuration_builder.default_expectation_configuration_builder", "class_name": "DefaultExpectationConfigurationBuilder", "expectation_type": "expect_column_values_to_be_greater_than", "meta": {}, "column": "$domain.domain_kwargs.column", "validation_parameter_builder_configs": None, "value": "$parameter.my_column_min.value[-1]", }, ], }, }, }