def test_instantiation_mean_unexpected_map_metric_multi_batch_parameter_builder_required_arguments_absent( bobby_columnar_table_multi_batch_deterministic_data_context, ): data_context: DataContext = ( bobby_columnar_table_multi_batch_deterministic_data_context) with pytest.raises(TypeError) as excinfo: # noinspection PyUnusedLocal,PyArgumentList parameter_builder: ParameterBuilder = ( MeanUnexpectedMapMetricMultiBatchParameterBuilder( name="my_name", map_metric_name="column_values.nonnull", data_context=data_context, )) assert ( "__init__() missing 1 required positional argument: 'total_count_parameter_builder_name'" in str(excinfo.value)) with pytest.raises(TypeError) as excinfo: # noinspection PyUnusedLocal,PyArgumentList parameter_builder: ParameterBuilder = ( MeanUnexpectedMapMetricMultiBatchParameterBuilder( name="my_name", total_count_parameter_builder_name="my_total_count", data_context=data_context, )) assert ( "__init__() missing 1 required positional argument: 'map_metric_name'" in str(excinfo.value))
def test_mean_unexpected_map_metric_multi_batch_parameter_builder_bobby_check_serialized_keys_no_evaluation_parameter_builder_configs( bobby_columnar_table_multi_batch_deterministic_data_context, ): data_context: DataContext = ( bobby_columnar_table_multi_batch_deterministic_data_context ) mean_unexpected_map_metric_multi_batch_parameter_builder: ParameterBuilder = ( MeanUnexpectedMapMetricMultiBatchParameterBuilder( name="my_pickup_datetime_count_values_unique_mean_unexpected_map_metric", map_metric_name="column_values.nonnull", total_count_parameter_builder_name="my_total_count", null_count_parameter_builder_name="my_null_count", metric_domain_kwargs=DOMAIN_KWARGS_PARAMETER_FULLY_QUALIFIED_NAME, metric_value_kwargs=None, evaluation_parameter_builder_configs=None, data_context=data_context, ) ) # Note: "evaluation_parameter_builder_configs" is not one of "ParameterBuilder" formal property attributes. assert set( mean_unexpected_map_metric_multi_batch_parameter_builder.to_json_dict().keys() ) == { "class_name", "module_name", "name", "map_metric_name", "total_count_parameter_builder_name", "null_count_parameter_builder_name", "metric_domain_kwargs", "metric_value_kwargs", "evaluation_parameter_builder_configs", }
def test_instantiation_mean_unexpected_map_metric_multi_batch_parameter_builder( bobby_columnar_table_multi_batch_deterministic_data_context, ): data_context: DataContext = ( bobby_columnar_table_multi_batch_deterministic_data_context) # noinspection PyUnusedLocal parameter_builder: ParameterBuilder = ( MeanUnexpectedMapMetricMultiBatchParameterBuilder( name="my_name", map_metric_name="column_values.nonnull", total_count_parameter_builder_name="my_total_count", data_context=data_context, ))
def test_mean_unexpected_map_metric_multi_batch_parameter_builder_bobby_numeric_dependencies_evaluated_separately( bobby_columnar_table_multi_batch_deterministic_data_context, ): data_context: DataContext = ( bobby_columnar_table_multi_batch_deterministic_data_context ) batch_request: dict = { "datasource_name": "taxi_pandas", "data_connector_name": "monthly", "data_asset_name": "my_reports", } my_total_count_metric_multi_batch_parameter_builder: MetricMultiBatchParameterBuilder = MetricMultiBatchParameterBuilder( name="my_total_count", metric_name="table.row_count", metric_domain_kwargs=DOMAIN_KWARGS_PARAMETER_FULLY_QUALIFIED_NAME, metric_value_kwargs=None, enforce_numeric_metric=False, replace_nan_with_zero=False, reduce_scalar_metric=True, evaluation_parameter_builder_configs=None, data_context=data_context, ) my_null_count_metric_multi_batch_parameter_builder: MetricMultiBatchParameterBuilder = MetricMultiBatchParameterBuilder( name="my_null_count", metric_name="column_values.nonnull.unexpected_count", metric_domain_kwargs=DOMAIN_KWARGS_PARAMETER_FULLY_QUALIFIED_NAME, metric_value_kwargs=None, enforce_numeric_metric=False, replace_nan_with_zero=False, reduce_scalar_metric=True, evaluation_parameter_builder_configs=None, data_context=data_context, ) mean_unexpected_map_metric_multi_batch_parameter_builder: ParameterBuilder = ( MeanUnexpectedMapMetricMultiBatchParameterBuilder( name="my_passenger_count_values_not_null_mean_unexpected_map_metric", map_metric_name="column_values.nonnull", total_count_parameter_builder_name="my_total_count", null_count_parameter_builder_name="my_null_count", metric_domain_kwargs=DOMAIN_KWARGS_PARAMETER_FULLY_QUALIFIED_NAME, metric_value_kwargs=None, evaluation_parameter_builder_configs=None, data_context=data_context, ) ) metric_domain_kwargs: dict = {"column": "passenger_count"} domain: Domain = Domain( domain_type=MetricDomainTypes.COLUMN, domain_kwargs=metric_domain_kwargs, rule_name="my_rule", ) variables: Optional[ParameterContainer] = None parameter_container: ParameterContainer = ParameterContainer(parameter_nodes=None) parameters: Dict[str, ParameterContainer] = { domain.id: parameter_container, } my_total_count_metric_multi_batch_parameter_builder.build_parameters( domain=domain, variables=variables, parameters=parameters, batch_request=batch_request, ) my_null_count_metric_multi_batch_parameter_builder.build_parameters( domain=domain, variables=variables, parameters=parameters, batch_request=batch_request, ) mean_unexpected_map_metric_multi_batch_parameter_builder.build_parameters( domain=domain, variables=variables, parameters=parameters, batch_request=batch_request, ) expected_parameter_value: float = 0.0 parameter_node: ParameterNode = get_parameter_value_and_validate_return_type( domain=domain, parameter_reference=mean_unexpected_map_metric_multi_batch_parameter_builder.json_serialized_fully_qualified_parameter_name, expected_return_type=None, variables=variables, parameters=parameters, ) rtol: float = RTOL atol: float = 5.0e-1 * ATOL np.testing.assert_allclose( actual=parameter_node.value, desired=expected_parameter_value, rtol=rtol, atol=atol, err_msg=f"Actual value of {parameter_node.value} differs from expected value of {expected_parameter_value} by more than {atol + rtol * abs(parameter_node.value)} tolerance.", )
def test_mean_unexpected_map_metric_multi_batch_parameter_builder_bobby_check_serialized_keys_with_evaluation_parameter_builder_configs( bobby_columnar_table_multi_batch_deterministic_data_context, ): data_context: DataContext = ( bobby_columnar_table_multi_batch_deterministic_data_context ) my_total_count_metric_multi_batch_parameter_builder_config: ParameterBuilderConfig = ParameterBuilderConfig( module_name="great_expectations.rule_based_profiler.parameter_builder", class_name="MetricMultiBatchParameterBuilder", name="my_total_count", metric_name="table.row_count", metric_domain_kwargs=DOMAIN_KWARGS_PARAMETER_FULLY_QUALIFIED_NAME, metric_value_kwargs=None, enforce_numeric_metric=False, replace_nan_with_zero=False, reduce_scalar_metric=True, evaluation_parameter_builder_configs=None, ) my_null_count_metric_multi_batch_parameter_builder_config: ParameterBuilderConfig = ParameterBuilderConfig( module_name="great_expectations.rule_based_profiler.parameter_builder", class_name="MetricMultiBatchParameterBuilder", name="my_null_count", metric_name="column_values.nonnull.unexpected_count", metric_domain_kwargs=DOMAIN_KWARGS_PARAMETER_FULLY_QUALIFIED_NAME, metric_value_kwargs=None, enforce_numeric_metric=False, replace_nan_with_zero=False, reduce_scalar_metric=True, evaluation_parameter_builder_configs=None, ) evaluation_parameter_builder_configs: Optional[List[ParameterBuilderConfig]] = [ my_total_count_metric_multi_batch_parameter_builder_config, my_null_count_metric_multi_batch_parameter_builder_config, ] mean_unexpected_map_metric_multi_batch_parameter_builder: ParameterBuilder = ( MeanUnexpectedMapMetricMultiBatchParameterBuilder( name="my_pickup_datetime_count_values_unique_mean_unexpected_map_metric", map_metric_name="column_values.nonnull", total_count_parameter_builder_name="my_total_count", null_count_parameter_builder_name="my_null_count", metric_domain_kwargs=DOMAIN_KWARGS_PARAMETER_FULLY_QUALIFIED_NAME, metric_value_kwargs=None, evaluation_parameter_builder_configs=evaluation_parameter_builder_configs, data_context=data_context, ) ) # Note: "evaluation_parameter_builder_configs" is not one of "ParameterBuilder" formal property attributes. assert set( mean_unexpected_map_metric_multi_batch_parameter_builder.to_json_dict().keys() ) == { "class_name", "module_name", "name", "map_metric_name", "total_count_parameter_builder_name", "null_count_parameter_builder_name", "metric_domain_kwargs", "metric_value_kwargs", "evaluation_parameter_builder_configs", }
def build_map_metric_rule( rule_name: str, expectation_type: str, map_metric_name: str, include_column_names: Optional[Union[str, Optional[List[str]]]] = None, exclude_column_names: Optional[Union[str, Optional[List[str]]]] = None, include_column_name_suffixes: Optional[Union[str, Iterable, List[str]]] = None, exclude_column_name_suffixes: Optional[Union[str, Iterable, List[str]]] = None, semantic_type_filter_module_name: Optional[str] = None, semantic_type_filter_class_name: Optional[str] = None, include_semantic_types: Optional[Union[ str, SemanticDomainTypes, List[Union[str, SemanticDomainTypes]]]] = None, exclude_semantic_types: Optional[Union[ str, SemanticDomainTypes, List[Union[str, SemanticDomainTypes]]]] = None, max_unexpected_values: Union[str, int] = 0, max_unexpected_ratio: Optional[Union[str, float]] = None, min_max_unexpected_values_proportion: Union[str, float] = 9.75e-1, ) -> Rule: """ This method builds "Rule" object focused on emitting "ExpectationConfiguration" objects for any "map" style metric. """ # Step-1: Instantiate "MapMetricColumnDomainBuilder" for specified "map_metric_name" (subject to directives). map_metric_column_domain_builder: MapMetricColumnDomainBuilder = ( MapMetricColumnDomainBuilder( map_metric_name=map_metric_name, include_column_names=include_column_names, exclude_column_names=exclude_column_names, include_column_name_suffixes=include_column_name_suffixes, exclude_column_name_suffixes=exclude_column_name_suffixes, semantic_type_filter_module_name=semantic_type_filter_module_name, semantic_type_filter_class_name=semantic_type_filter_class_name, include_semantic_types=include_semantic_types, exclude_semantic_types=exclude_semantic_types, max_unexpected_values=max_unexpected_values, max_unexpected_ratio=max_unexpected_ratio, min_max_unexpected_values_proportion= min_max_unexpected_values_proportion, data_context=None, )) # Step-2: Declare "ParameterBuilder" for every metric of interest. column_values_unique_unexpected_count_metric_multi_batch_parameter_builder_for_metrics: ParameterBuilder = DataAssistant.commonly_used_parameter_builders.get_column_values_unique_unexpected_count_metric_multi_batch_parameter_builder( json_serialize=True) column_values_nonnull_unexpected_count_metric_multi_batch_parameter_builder_for_metrics: ParameterBuilder = DataAssistant.commonly_used_parameter_builders.get_column_values_nonnull_unexpected_count_metric_multi_batch_parameter_builder( json_serialize=True) column_values_null_unexpected_count_metric_multi_batch_parameter_builder_for_metrics: ParameterBuilder = DataAssistant.commonly_used_parameter_builders.get_column_values_null_unexpected_count_metric_multi_batch_parameter_builder( json_serialize=True) # Step-3: Set up "MeanUnexpectedMapMetricMultiBatchParameterBuilder" to compute "condition" for emitting "ExpectationConfiguration" (based on "Domain" data). total_count_metric_multi_batch_parameter_builder_for_evaluations: ParameterBuilder = DataAssistant.commonly_used_parameter_builders.get_table_row_count_metric_multi_batch_parameter_builder( json_serialize=False) column_values_nonnull_unexpected_count_metric_multi_batch_parameter_builder_for_evaluations: ParameterBuilder = DataAssistant.commonly_used_parameter_builders.get_column_values_nonnull_unexpected_count_metric_multi_batch_parameter_builder( json_serialize=False) evaluation_parameter_builder_configs: Optional[ List[ParameterBuilderConfig]] = [ ParameterBuilderConfig( ** total_count_metric_multi_batch_parameter_builder_for_evaluations .to_json_dict()), ParameterBuilderConfig( ** column_values_nonnull_unexpected_count_metric_multi_batch_parameter_builder_for_evaluations .to_json_dict()), ] column_values_attribute_mean_unexpected_value_multi_batch_parameter_builder_for_validations: MeanUnexpectedMapMetricMultiBatchParameterBuilder = MeanUnexpectedMapMetricMultiBatchParameterBuilder( name=f"{map_metric_name}.unexpected_value", map_metric_name=map_metric_name, total_count_parameter_builder_name= total_count_metric_multi_batch_parameter_builder_for_evaluations.name, null_count_parameter_builder_name= column_values_nonnull_unexpected_count_metric_multi_batch_parameter_builder_for_evaluations .name, metric_domain_kwargs=DOMAIN_KWARGS_PARAMETER_FULLY_QUALIFIED_NAME, metric_value_kwargs=None, evaluation_parameter_builder_configs= evaluation_parameter_builder_configs, json_serialize=True, data_context=None, ) # Step-4: Pass "MeanUnexpectedMapMetricMultiBatchParameterBuilder" as "validation" "ParameterBuilder" for "DefaultExpectationConfigurationBuilder", responsible for emitting "ExpectationConfiguration" (with specified "expectation_type"). validation_parameter_builder_configs: Optional[ List[ParameterBuilderConfig]] = [ ParameterBuilderConfig( ** column_values_attribute_mean_unexpected_value_multi_batch_parameter_builder_for_validations .to_json_dict()), ] expect_column_values_to_be_attribute_expectation_configuration_builder: DefaultExpectationConfigurationBuilder = DefaultExpectationConfigurationBuilder( expectation_type=expectation_type, validation_parameter_builder_configs= validation_parameter_builder_configs, column= f"{DOMAIN_KWARGS_PARAMETER_FULLY_QUALIFIED_NAME}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}column", condition= f"{column_values_attribute_mean_unexpected_value_multi_batch_parameter_builder_for_validations.fully_qualified_parameter_name}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}{FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY} <= 1.0 - {VARIABLES_KEY}success_ratio", meta={ "profiler_details": f"{column_values_attribute_mean_unexpected_value_multi_batch_parameter_builder_for_validations.fully_qualified_parameter_name}.{FULLY_QUALIFIED_PARAMETER_NAME_METADATA_KEY}", }, ) # Step-5: Instantiate and return "Rule" object, comprised of "variables", "domain_builder", "parameter_builders", and "expectation_configuration_builders" components. variables: dict = { "success_ratio": 7.5e-1, } parameter_builders: List[ParameterBuilder] = [ column_values_unique_unexpected_count_metric_multi_batch_parameter_builder_for_metrics, column_values_nonnull_unexpected_count_metric_multi_batch_parameter_builder_for_metrics, column_values_null_unexpected_count_metric_multi_batch_parameter_builder_for_metrics, ] expectation_configuration_builders: List[ExpectationConfigurationBuilder] = [ expect_column_values_to_be_attribute_expectation_configuration_builder, ] rule: Rule = Rule( name=rule_name, variables=variables, domain_builder=map_metric_column_domain_builder, parameter_builders=parameter_builders, expectation_configuration_builders=expectation_configuration_builders, ) return rule